Chapter 4; error updating gradients (Implement Learner)

# create DLs - DataLoaders
# URLs.MNIST_SAMPLE only have 3 and 7 images
# URLs.MNIST have all the images from 0-9
path = untar_data(URLs.MNIST_SAMPLE)
threes_train = (path/'train'/'3').ls().sorted()
sevens_train = (path/'train'/'7').ls().sorted()

threes_valid = (path/'valid'/'3').ls().sorted()
sevens_valid = (path/'valid'/'7').ls().sorted()

threes_tensors = [tensor(Image.open(o)) for o in threes_train]
sevens_tensors = [tensor(Image.open(o)) for o in sevens_train]

stacked_threes_train = torch.stack(threes_tensors).float()/255
stacked_sevens_train = torch.stack(sevens_tensors).float()/255

stacked_threes_valid = torch.stack([tensor(Image.open(o)) for o in threes_valid]).float()/255
stacked_sevens_valid = torch.stack([tensor(Image.open(o)) for o in sevens_valid]).float()/255

print("Training data shape: ", stacked_threes_train.shape, stacked_sevens_train.shape)
print("Validation data shape: ", stacked_threes_valid.shape, stacked_sevens_valid.shape)

train_x = torch.cat([stacked_threes_train, stacked_sevens_train]).view(-1, 28*28)
train_y = tensor([1]*len(threes_train) + [0]*len(sevens_train)).unsqueeze(1)
print("train_x.shape, train_y.shape: ", train_x.shape, train_y.shape)

valid_x = torch.cat([stacked_threes_valid, stacked_sevens_valid]).view(-1, 28*28)
valid_y = tensor([1]*len(threes_valid) + [0]*len(sevens_valid)).unsqueeze(1)

dset_train = list(zip(train_x, train_y))
dset_valid = list(zip(valid_x, valid_y))

dl_train = DataLoader(dset_train, batch_size=256)
dl_valid = DataLoader(dset_valid, batch_size=256)

# Create Model (that multiples weights with Variables(x) and adds bias); 
# initalize Weights and bias
# from PAGE - 164
def init_params(size, std=1.0):
  return (torch.randn(size)*std).requires_grad_()

# from PAGE - 165
def linear1(xb, weights, bias):
  """
  Returns predictions
  """
  return xb@weights + bias

# Create Loss fucntion
# from PAGE - 169
def mnist_loss(predictions, targets):
  """
  Returns loss value(to be used by SGD to optimize Weights).
  Loss fucntion measures the distance between predictions and targets
  """
  predictions = predictions.sigmoid()
  return torch.where(targets==1, 1-predictions, predictions).mean()

# create SGD - optimization step; calcualte gradient and update weights 
# using learning rate
# PAGE - 172
def calc_grad(xb, yb, model, weights, bias):
  preds = model(xb, weights, bias)
  loss = mnist_loss(predictions=preds, targets=yb)
  loss.backward()

def train_epoch(dl, model, lr, params, bias):
  for xb, yb in dl:
    calc_grad(xb, yb, model, params, bias)
    for p in params:
      p.data -= p.grad*lr
      p.grad.zero_()


# create metric fucntion (in this case "batch_accuracy)")
def batch_accuracy(model_preds, labels):
  preds = model_preds.sigmoid()
  correct = (preds>0.5) == labels
  return correct.float().mean()

def validate_epoch(model, validation_dataset, weights, bias):
  accs = [batch_accuracy(linear1(xb, weights, bias), yb) for xb, yb in validation_dataset]
  return round(torch.stack(accs).mean().item(), 4)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

# lets train our model for 20 epochs
epochs = 20
lr = 1.0
model_params = init_params((28*28, 1))
model_bias = init_params(1)
print(model_params.shape, model_bias.shape)

for i in range(epochs):
  print('epoch ===== ', i)
  # def train_epoch(dl, model, lr, params, bias):
  train_epoch(dl_train, linear1, lr, model_params, model_bias)
  print(validate_epoch(linear1, dl_valid, model_params, model_bias), end="   ")

Spent a lot of time debugging this code. Trying to write own Learner from scratch - which is basically writing what we have in the book.
I am getting an error on line p.data -= p.grad*lr
TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'

I am not sure why it’s None, I am calculating the gradient. I am not sure what concept/code understanding I am missing here,

Hello,

In train_epoch, both the weights and bias should be updated, but you are currently looping through the weights (i.e., params) only. Replacing for p in params: with for p in (params, bias): would fix the error. Bear in mind that it’s best practice to pack together all the model’s parameters in a tuple or iterable of some sort, so it might be worthwhile to do, e.g., params = (model_params, model_bias) and pass params to the various training functions in lieu of model_params and model_bias (this is what chapter 4 does too).