Context:
I have a small dataset of 5500 Images where I used separate notebooks to train.
Notebook1: Custom PyTorch dataset + Custom training loop --> Training time per epoch > 5 minutes
Notebook2: Databunch + Fast.AI learner --> Training time per epoch = 30 seconds
I would like to understand what kind of mistake I made in custom code in notebook1.
Code:
Fast.ai code
# Databunch
hot_df = pd.read_csv("ratings.csv")
hot_databunch = ImageList.from_folder("./SCUT-FBP5500_v2/Images", inner_df = hot_df)\
.split_by_rand_pct(valid_pct=0.2)\
.label_from_df(cols=2)\
.transform()\
.databunch()
### Model
import torchvision.models as models
model=models.mobilenet_v2(pretrained=True, progress=True)
new_head = nn.Sequential(nn.Dropout(p=0.2,inplace=False),
nn.Linear(in_features=1280,out_features=1000,bias=True),
nn.ReLU(),
nn.Linear(in_features=1000,out_features=1,bias=True),
nn.Sigmoid())
## Learner
learn = Learner(hot_databunch, model, metrics=[accuracy])
learn.fit(1)
Pytorch dataset
class HotOrNotDataset(Dataset):
def __init__(self, csv_file, img_root, transform=None):
self.ratings = pd.read_csv(csv_file)
self.img_root = img_root
self.transform = transform
def __len__(self):
return len(self.ratings)
def __getitem__(self, idx):
img_name = os.path.join(self.img_root,
self.ratings.iloc[idx]['Filename'])
image = cv2.imread(img_name)
image = image / 255.0
target = self.ratings.iloc[idx]['Hot']
sample = (image, target)
if self.transform:
sample = self.transform(sample)
return sample
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, label = sample
image = image.transpose((2, 0, 1))
return (torch.from_numpy(image).float(), torch.from_numpy(np.array([label])).long())
class Normalize(object):
def __init__(self):
self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
def __call__(self, x):
image, label = x
return (self.normalize(image), label)
dataset = HotOrNotDataset(csv_file='ratings.csv',
img_root="./SCUT-FBP5500_v2/Images",
transform=transforms.Compose([ToTensor(),Normalize()])
)
dataloader = DataLoader(dataset, batch_size=64,
shuffle=True, num_workers=2)
Pytorch training loop
def trainer(dataloader, epoch, net):
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(epoch): # loop over the dataset multiple times
running_loss = 0.0
total = 0.0
correct = 0.0
for i, data in enumerate(dataloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
outputs_np = outputs.detach().numpy()
outputs_np = outputs_np > 0.5
outputs_np = outputs_np.astype(float)
labels_np = labels.detach().numpy()
total += len(labels)
correct += np.sum((outputs_np == labels_np))
precision = precision_score(labels_np,outputs_np)
recall = recall_score(labels_np, outputs_np)
f1 = f1_score(labels_np, outputs_np)
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss/(i + 1)))
print(f'cum acc: {correct/total} prec_score: {round(precision,3)} recall_score: {round(recall,3)} f1_score: {round(f1,3)}')
print('Finished Training')
Question:
What did I do wrong in my pytorch notebook to cause such severe performance issue ?