Hi, I have been trying to implement a custom net from the lessons learned from chapter 4.
class LinearModel:
"""A simple linear model."""
def __init__(self, in_features, out_features):
self.weights = init_params((in_features, out_features)) # torch.Size([in_features, out_features])
self.bias = init_params(out_features) # torch.Size([out_features])
def parameters(self):
return (self.weights, self.bias)
def __call__(self, xb):
return (xb @ self.weights) + self.bias
class SimpleNet:
"""A simple multi layer neural network."""
def __init__(self, in_features, out_features):
self.layer1 = LinearModel(in_features, 30)
self.layer2 = lambda xb: xb.max(tensor(0.0))
self.layer3 = LinearModel(30, out_features)
def parameters(self):
w1, b1 = self.layer1.parameters()
w2, b2 = self.layer3.parameters()
return (w1, b1, w2, b2)
def __call__(self, xb):
res = self.layer1(xb)
res = self.layer2(res)
res = self.layer3(res)
return res
model = SimpleNet(28*28, 1)
But while LinearModel works fine with BasicOptim, SimpleNet doesn’t.
learner = SimpleLearner(data_loaders, LinearModel(28*28, 1))
learner.train_model(20, learning_rate=1.0)
# 0.6362 0.7891 0.916 0.9473 0.959 0.9639 0.9658 0.9663 0.9688 0.9697 0.9702 0.9727 0.9736 0.9746 0.9736 0.9736 0.9746 0.9751 0.9751 0.9756
learner = SimpleLearner(data_loaders, SimpleNet(28*28, 1))
learner.train_model(20, learning_rate=1.0)
# 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068 0.5068
On further inspection, I found that, nn.Linear parameters have different size that my custom LinearModel.
[p.size() for p in nn.Linear(28*28,30).parameters()]
# [torch.Size([30, 784]), torch.Size([30])]
[p.size() for p in LinearModel(28*28,30).parameters()]
# [torch.Size([784, 30]), torch.Size([30])]
While I’m not sure if this could be the reason, I tried transposing the weights anyway. But that doesn’t run.
Any idea what I could be missing?