Hello,
In Chapter 4. Under the Hood:Training a Digit Classifier under section “Putting IT All Together” . with following code to classify image either as 3 or 7
from fastai.vision.all import *
path = untar_data(URLs.MNIST_SAMPLE)
stacked_threes = torch.stack([tensor(Image.open(o))/255 for o in (path/'train/3').ls()])
stacked_sevens = torch.stack([tensor(Image.open(o))/255 for o in (path/'train/7').ls()])
stacked_threes.shape, stacked_sevens.shape
(torch.Size([6131, 28, 28]), torch.Size([6265, 28, 28]))
valid_stacked_threes = torch.stack([tensor(Image.open(o))/255 for o in (path/'valid/3').ls()])
valid_stacked_sevens = torch.stack([tensor(Image.open(o))/255 for o in (path/'valid/7').ls()])
valid_stacked_threes.shape, valid_stacked_sevens.shape
(torch.Size([1010, 28, 28]), torch.Size([1028, 28, 28]))
train_x = concat(stacked_threes,stacked_sevens).view(-1,28*28)
train_y = concat(tensor([1]*len(stacked_threes)),tensor([0]*len(stacked_sevens))).unsqueeze(1)
train_x.shape, train_y.shape
(torch.Size([12396, 784]), torch.Size([12396, 1]))
valid_x = concat(valid_stacked_threes,valid_stacked_sevens).view(-1,28*28)
valid_y = concat(tensor([1]*len(valid_stacked_threes)),tensor([0]*len(valid_stacked_sevens))).unsqueeze(1)
valid_x.shape, valid_y.shape
(torch.Size([2038, 784]), torch.Size([2038, 1]))
def linear1(xb):
return xb@weights + bias
def init_params(size,std=1.0):
return (torch.randn(size)*std).requires_grad_()
weights = init_params((28*28,1))
bias = init_params(1)
dset = list(zip(train_x,train_y))
valid_dset = list(zip(valid_x,valid_y))
dl = DataLoader(dset,batch_size=256)
valid_dl = DataLoader(valid_dset,batch_size=256)
def mnist_loss(preds,targets):
preds = preds.sigmoid()
return torch.where(targets == 1, 1 - preds, preds).mean()
def calc_grad(xb,yb):
preds = linear1(xb)
loss = mnist_loss(preds,yb)
loss.backward()
params = weights,bias
def train_epoch():
for xb,yb in dl:
calc_grad(xb,yb)
for p in params:
p.data -= p.grad*0.1
p.grad.zero_()
def batch_accuracy(xb,yb):
preds = xb.sigmoid()
corrects = (preds > 0.5) == yb
return corrects.float().mean()
def validate_epoch():
accur = [batch_accuracy(linear1(xb),yb) for xb,yb in valid_dl]
return torch.stack(accur).mean()
for i in range(10):
train_epoch()
print(validate_epoch())
tensor(0.9447)
tensor(0.9462)
tensor(0.9496)
tensor(0.9511)
tensor(0.9540)
tensor(0.9555)
tensor(0.9579)
tensor(0.9589)
tensor(0.9594)
tensor(0.9609)
Have understanding what is happening here, just have a confusion that when parameters get updated then these parameters are updated for which image pixels (for 3 digit image or for 7 digit image). As i understand that after initiating the weights, make prediction, then calculate loss, base on this loss , update parameters.
Now at this point of updating parameters , there is one set of parameters i.e Matrix with shape (28*28,1). These parameters will be updated in way that when it’s used for “3” digit image, prediction should be near to 1 for low loss. but same parameters when used for “7” digit image, prediction should be less that 0 for low loss.
How does parameters are only updated for digit image “3”.?
if parameters updated for digit image “7”, then should not we have two sets of parameters?
Missing here something or am my understanding is wrong?
kindly guide
Thank You