Hi All,
I’m trying to implement a custom model for question response matching, but I’m new to fastai and pytorch (keras background), and I’m having a problem with my model.
I’ve successfully finetuned the language model to my custom technical dataset.
I have a problem with the final fit function after completing all subcomponents, and I hope that you can help me with finding the problem.
Also, I explored that similar words produce similar embeddings, by retrieving the nearest neighbor, and I was happy to see that the low level layer learnt to embed similar words in the same space, also i saw some valid sentence completion (as a validation for the language model learning successfully).
My current problem
I have a set of query and responses, I implemented a dataset class and a created a dataloader, with a custom collate function.
I validated the output of the dataloader to have a fixed length batch_size for queries and responses.
P.S, I tried to implement a custom itemlist, but i wasn’t able to understand from the documentation how to do so (will try to do so also), So i decided to work with pytorch modules and understand how to integrate them with fastai.
My dataset implementation is as follows
Note:
- the length function returns double the contents as i would like to randomly choose the wrong match from the response.
- if the requested id is even, i divide by two and select the question and response, and if it’s odd, i divide by two (integer division) to get the query, and choose the response randomly.
- I return three an input tuple (question_ids, response_ids) and an output label.
class dataset_qr(Dataset):
def __init__(self, questions, responses, vocab):
## vocab from language model
self.q_tok = Tokenizer().process_all(questions)
self.r_tok = Tokenizer().process_all(responses)
self.q_tok_ids = [[vocab.stoi[word] for word in sentence] for sentence in self.q_tok]
self.r_tok_ids = [[vocab.stoi[word] for word in sentence] for sentence in self.r_tok]
def __getitem__(self, index):
if index % 2 == 0:
y_label = 1
r = self.r_tok_ids[index//2]
else:
y_label = 0
r = np.random.choice(self.r_tok_ids)
q = self.q_tok_ids[index//2]
return (q,r),y_label
def __len__(self):
return len(self.q_tok_ids * 2)
def collate_fn_qr(batch, pad_idx:int=1):
qr = [x[0] for x in batch]
y = [x[1] for x in batch]
max_q_len = max([len(x[0]) for x in qr])
max_r_len = max([len(x[1]) for x in qr])
qs = torch.zeros(len(batch), max_q_len).long() + pad_idx
rs = torch.zeros(len(batch), max_r_len).long() + pad_idx
for i,s in enumerate(qr):
qs[i,-len(s[0]):] = LongTensor(s[0])
rs[i,-len(s[1]):] = LongTensor(s[1])
return (qs,rs),tensor(np.array(y))
and this is how i create the databunch from the dataloader (i think there should be a mistake here)
## each of questions , responses is an array of strings
ds = dataset_qr(questions, responses, data_lm.vocab)
dl = DataLoader(dataset = ds, batch_size=64, shuffle=True,collate_fn= collate_fn_qr, # use custom collate function here
pin_memory=True)
## for testing i'm passing the same dataloader
## for validation and training, will modify it later
db = DataBunch(train_dl = dl,valid_dl = dl,collate_fn=collate_fn_qr)
and this is how i validated that the databunch is returing a valid batch
x,y = next(iter(db.train_dl))
print(ids_to_sent(x[0][0])) ## produce valid padded sentence
## x[0] is the queries batch
## x[0][0] is the first query ids
Model Creation
I slightly modified the code for MultiBatchEncoder and for Pooling Classifier (mainly the forward functions) to be as follows
class MultiBatchEncoder_(Module):
"Create an encoder over `module` that can process a full sentence."
def __init__(self, bptt:int, max_len:int, module:nn.Module, pad_idx:int=1):
self.max_len,self.bptt,self.module,self.pad_idx = max_len,bptt,module,pad_idx
def concat(self, arrs:Collection[Tensor])->Tensor:
"Concatenate the `arrs` along the batch dimension."
return [torch.cat([l[si] for l in arrs], dim=1) for si in range_of(arrs[0])]
def reset(self):
if hasattr(self.module, 'reset'): self.module.reset()
def forward(self, input:Tuple[LongTensor, LongTensor])->Tuple[Tensor,Tensor]:
result = []
print(type(input), type(input[0]))
for i in range(2):
bs,sl = input[i].size()
input_ = input[i]
self.reset()
raw_outputs, outputs, masks = [],[],[]
for i in range(0, sl, self.bptt):
r, o = self.module(input_[:,i: min(i+self.bptt, sl)])
if i>(sl-self.max_len):
masks.append(input_[:,i: min(i+self.bptt, sl)] == self.pad_idx)
raw_outputs.append(r)
outputs.append(o)
result.append((self.concat(raw_outputs), self.concat(outputs), torch.cat(masks,dim=1)))
## 3rd output is an array for each input with ones corresponding to pad term
return result
class PoolingLinearClassifier_(Module):
"Create a linear classifier with pooling."
def __init__(self, layers:Collection[int], drops:Collection[float]):
mod_layers = []
if len(drops) != len(layers)-1: raise ValueError("Number of layers and dropout values do not match.")
activs = [nn.ReLU(inplace=True)] * (len(layers) - 2) + [nn.Sigmoid()] # None
for n_in, n_out, p, actn in zip(layers[:-1], layers[1:], drops, activs):
mod_layers += bn_drop_lin(n_in, n_out, p=p, actn=actn)
self.layers = nn.Sequential(*mod_layers)
def forward(self, input:Tuple[Tensor,Tensor, Tensor])->Tuple[Tensor,Tensor,Tensor]:
print(type(input))
def masked_concat_pool(outputs, mask):
"Pool MultiBatchEncoder outputs into one vector [last_hidden, max_pool, avg_pool]."
output = outputs[-1]
avg_pool = output.masked_fill(mask[:, :, None], 0).mean(dim=1)
avg_pool *= output.size(1) / (output.size(1)-mask.type(avg_pool.dtype).sum(dim=1))[:,None]
max_pool = output.masked_fill(mask[:,:,None], -float('inf')).max(dim=1)[0]
x = torch.cat([output[:,-1], max_pool, avg_pool], 1)
return x
inter_out = []
for i in range(2):
raw_outputs,outputs,mask = input[i]
x = masked_concat_pool(outputs, mask)
inter_out.append(x)
x = torch.cat(inter_out, 1)
x = self.layers(x)
return x, raw_outputs, outputs
and this is the code for creating the learner and the sequential model
config = None
lin_ftrs = None
ps = None
n_class = 1
## meta for AWD_LSTM
meta = {'hid_name':'emb_sz', 'url':URLs.WT103_FWD, 'url_bwd':URLs.WT103_BWD,
'config_lm':awd_lstm_lm_config, 'split_lm': awd_lstm_lm_split,
'config_clas':awd_lstm_clas_config, 'split_clas': awd_lstm_clas_split}
config = ifnone(config, meta['config_clas']).copy()
for k in config.keys():
if k.endswith('_p'): config[k] *= 1.
if lin_ftrs is None: lin_ftrs = [50]
if ps is None: ps = [0.1]*len(lin_ftrs)
layers = [config[meta['hid_name']] * 6] + lin_ftrs + [n_class]
ps = [config.pop('output_p')] + ps
init = config.pop('init') if 'init' in config else None
#encoder = MultiBatchEncoder(bptt, max_len, arch(vocab_sz, **config), pad_idx=pad_idx)
#print(layers)
enc = MultiBatchEncoder_(bptt= 70,max_len= 1400, module=learn_2.model[0])
model = PoolingLinearClassifier_(layers, ps)
m = SequentialRNN(enc,model)
learn = RNNLearner(db, m, split_func=meta['split_clas'])
The problem is that when i try to learn
learn.lr_find()
I have the following error log
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-58-76198e84df68> in <module>
----> 1 learn.lr_find()
~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
30 cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
31 epochs = int(np.ceil(num_it/len(learn.data.train_dl)))
---> 32 learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
33
34 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,
~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
204 callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
205 if defaults.extra_callbacks is not None: callbacks += defaults.extra_callbacks
--> 206 fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
207
208 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
105 xb, yb = cb_handler.on_batch_begin(xb, yb)
106 print('after callback', len(xb), xb)
--> 107 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
108 if cb_handler.on_batch_end(loss): break
109
~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
28 print(type(xb),type(xb[0]),xb[0].shape, type(xb[1]), xb[1].shape)
29 print(xb)
---> 30 out = model(*xb)
31 out = cb_handler.on_loss_begin(out)
32
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
TypeError: forward() takes 2 positional arguments but 3 were given
When i tried to debug the error problem:
I tried to manually feed an input batch to the model by this
## db is the databunch
x,y = next(iter(db.train_dl))
## model is the SequentialRNN class
# produce the final sigmoid output along with rawoutputs and output
model(x)
# but
# this is causing the error
# this is how the fastai loss_batch() mentioned in the above error log is working.
model(*x)