Introduction
Freezing layers helps keep some parts of a model fixed so they don't change during training. This saves time and keeps learned knowledge safe.
Jump into concepts and practice - no test required
for param in model.layer.parameters(): param.requires_grad = False
for param in model.conv1.parameters(): param.requires_grad = False
for name, param in model.named_parameters(): if 'fc' not in name: param.requires_grad = False
for param in model.parameters(): param.requires_grad = False for param in model.fc.parameters(): param.requires_grad = True
import torch import torch.nn as nn import torch.optim as optim # Simple model with two linear layers class SimpleModel(nn.Module): def __init__(self): super().__init__() self.fc1 = nn.Linear(5, 3) self.fc2 = nn.Linear(3, 1) def forward(self, x): x = torch.relu(self.fc1(x)) x = self.fc2(x) return x model = SimpleModel() # Freeze first layer for param in model.fc1.parameters(): param.requires_grad = False # Optimizer only updates parameters with requires_grad=True optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1) # Dummy data inputs = torch.randn(4, 5) targets = torch.randn(4, 1) criterion = nn.MSELoss() # Training step model.train() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() # Check which parameters were updated fc1_grad = model.fc1.weight.grad fc2_grad = model.fc2.weight.grad print("fc1 weight grad:", fc1_grad) print("fc2 weight grad:", fc2_grad)
model?requires_grad = False for each parameter.requires_grad = False. Others are invalid or incorrect.import torch.nn as nn model = nn.Sequential( nn.Linear(10, 5), nn.ReLU(), nn.Linear(5, 2) ) for param in model[0].parameters(): param.requires_grad = False trainable_params = [p for p in model.parameters() if p.requires_grad] print(len(trainable_params))
for param in model.layer1.parameters():
param.grad = Falseparam.grad holds gradient values, it is a tensor or None, not a flag to enable/disable gradients.param.requires_grad = False. Setting param.grad = False is invalid and does not freeze.layer1, layer2, and layer3. You want to freeze layer1 and layer2 but train layer3. Which code correctly freezes only the first two layers?requires_grad = False on each parameter in the layers to freeze.layer1 and layer2 parameters and freezes them correctly. for param in model.parameters():
param.requires_grad = False
for param in model.layer3.parameters():
param.requires_grad = False incorrectly freezes all parameters including layer3. model.layer1.requires_grad = False
model.layer2.requires_grad = False tries to set requires_grad on layers (invalid). model.freeze_layers(['layer1', 'layer2']) calls a non-existent method.