Hi everyone,
I need to create a databunch for text classfication which has a stable train dataloader.
After reading the documentation for the fastai library, I made the following changes
Code
class CustomTextClasDataBunch(TextDataBunch):
"Create a `TextDataBunch` suitable for training an RNN classifier."
@classmethod
def create(cls, train_ds, valid_ds, test_ds=None, path:PathOrStr='.', bs:int=32, val_bs:int=None, pad_idx=1,
pad_first=True, device:torch.device=None, no_check:bool=False, backwards:bool=False,
dl_tfms:Optional[Collection[Callable]]=None, **dl_kwargs) -> DataBunch:
"Function that transform the `datasets` in a `DataBunch` for classification. Passes `**dl_kwargs` on to `DataLoader()`"
datasets = cls._init_ds(train_ds, valid_ds, test_ds)
val_bs = ifnone(val_bs, bs)
collate_fn = partial(pad_collate, pad_idx=pad_idx, pad_first=pad_first, backwards=backwards)
train_dl = DataLoader(datasets[0], batch_size=bs, shuffle=False, **dl_kwargs)
dataloaders = [train_dl]
for ds in datasets[1:]:
lengths = [len(t) for t in ds.x.items]
sampler = SortSampler(ds.x, key=lengths.__getitem__)
dataloaders.append(DataLoader(ds, batch_size=val_bs, sampler=sampler, **dl_kwargs))
return cls(*dataloaders, path=path, device=device, dl_tfms=dl_tfms, collate_fn=None, no_check=no_check)
data_clas = CustomTextClasDataBunch.from_df(dataset_location, train_df=df_train, valid_df=df_dev, test_df=df_test, vocab=data_lm.train_ds.vocab, bs=bs)
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)
learn.data.train_dl = learn.data.train_dl.new(shuffle=False, num_workers=0)
I hoped this would do the work, but test cases for this are failing. Everytime I print the first batch, entries are different.
Can anyone point out the error and tell me how to do it?