Hello!
I’m implementing my own basic version of a Learner
class, in a similar vein to the one in the fastai library. It’s one of the challenges provided in the fastbook.
My class is shown below.
class Learner:
"""A simple attempt to model a learner. Works with PyTorch models."""
def __init__(self, train_dataloader, valid_dataloader, model,
loss_function, metric_function, learning_rate=1.0):
self._train_dataloader = train_dataloader
self._valid_dataloader = valid_dataloader
self._model = model
self._loss_function = loss_function
self._metric_function = metric_function
self._learning_rate = learning_rate
self._parameters = list(self._model.parameters())
self._accuracy = None
# Private methods.
def _make_predictions(self, inputs):
return self._model(inputs)
def _calculate_loss(self, predictions, targets):
return self._loss_function(predictions, targets)
def _calculate_gradients(self, loss):
loss.backward()
def _update_parameters(self):
for parameter in self._parameters:
# print(parameter.grad.data)
parameter.data -= parameter.grad.data * self._learning_rate
# print(parameter.data)
# Reset parameters.
for parameter in self._parameters:
parameter.grad = None
def _calculate_accuracy(self):
self._accuracy = [self._metric_function(inputs, targets) for inputs,
targets in self._valid_dataloader]
self._accuracy = round(torch.stack(self._accuracy).mean().item(), 4)
# print(self._accuracy)
def _output_accuracy(self, epoch):
print(f"Epoch: {epoch}; Accuracy: {self._accuracy*100:.2f}%")
# Public methods.
def train_model(self, epochs):
for epoch in range(epochs):
print(self._accuracy)
for x_batch, y_batch in self._train_dataloader:
predictions = self._make_predictions(x_batch)
loss = self._calculate_loss(predictions, y_batch)
self._calculate_gradients(loss)
self._update_parameters()
self._calculate_accuracy()
self._output_accuracy(epoch)
# print(self._parameters[0])
However, when testing this class with a very simple model, the gradients of my parameters remain at None
despite the backward
method being used and requires_grad_
being set to True
.
I am using PyTorch’s Linear
class with a single parameter of value 7 and no bias. The goal is to get the parameter to 4.
Below is the setUp
method of my unittest
class.
class TestLearner(unittest.TestCase):
"""Tests for Learner. """
def setUp(self):
"""Create an instance of the Learner class."""
# Create data.
train_x = torch.tensor([[1], [2], [3], [4], [5]]).float()
train_y = torch.tensor([4, 8, 12, 16, 20]).float()
valid_x = torch.tensor([[6], [7], [8], [9], [10]]).float()
valid_y = torch.tensor([24, 28, 32, 36, 40]).float()
# Create datasets.
train_dataset = list(zip(train_x, train_y))
valid_dataset = list(zip(valid_x, valid_y))
# Create dataloaders.
self.train_dataloader = torch.utils.data.DataLoader(train_dataset,
batch_size=64)
self.valid_dataloader = torch.utils.data.DataLoader(valid_dataset,
batch_size=64)
# Create model.
linear_model = torch.nn.Linear(1, 1, bias=False)
linear_model.weight = torch.nn.parameter.Parameter(torch.tensor([
7]).float())
# Define loss function.
def l1_norm(predictions, targets):
return torch.nn.functional.l1_loss(predictions, targets)
# Define metric function.
def accuracy(inputs, targets):
predictions = linear_model(inputs).sigmoid()
correct_predictions = (predictions > 0.5) == targets
return correct_predictions.float().mean()
# Define learner.
self.learner = Learner(
self.train_dataloader,
self.valid_dataloader,
linear_model,
l1_norm,
accuracy,
)
Gradients are calculated as shown below.
def test__calculate_gradients(self):
# TODO: Why is gradient 3?
"""Tests whether the gradients for each weight is correctly
calculated."""
# Make predictions.
for x_batch, y_batch in self.train_dataloader:
predictions = self.learner._make_predictions(x_batch)
loss = self.learner._calculate_loss(predictions, y_batch)
# Calculate gradients.
loss.backward()
for parameter in self.learner.get_parameters():
print(parameter.grad.data)
In the above test case, the parameter’s gradient is 3. I update the parameters as shown below…
def test__update_parameters(self):
for parameter in self.learner.get_parameters():
print(parameter.grad.data)
self.learner._update_parameters()
…and get the following error; the parameter has a gradient of None
.
...E
======================================================================
ERROR: test__update_parameters (__main__.TestLearner)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/[REDACTED]/PycharmProjects/simpleLearner/test_learner.py", line 96, in test__update_parameters
print(parameter.grad.data)
AttributeError: 'NoneType' object has no attribute 'data'
----------------------------------------------------------------------
Ran 4 tests in 0.037s
FAILED (errors=1)
Does anyone have any idea why the gradient from my parameter isn’t being stored? I would really appreciate any suggestions!