Seq2seq language model

Is there a notebook or tutorial that exemplifies training a seq2seq language model on fastai v2? The type of model that I would ultimately like to train is similar to a language translation model, so I’ve been trying to train a translation model with MT_ENG_FRA.

I am aware that the nlp course provides an example of this for fastai v1, but I have unfortunately not been able to get this to work as the kernel that I am using keeps dying with no indication of what went wrong.


In my attempts to train a translation model with fastai v2, I so far seem to have constructed a DataLoader:

from fastai.text.all import *
path = untar_data(URLs.MT_ENG_FRA)

df = pd.read_csv(path / 'questions_easy (copy).csv')

imdb_clas = DataBlock(
    blocks=(TextBlock.from_df('en', seq_len=72), TextBlock.from_df('fr', seq_len=72)),
    get_x=ColReader('text'), get_y=ColReader('text'), splitter=ColSplitter())

dls = imdb_clas.dataloaders(df, bs=4)
dls.show_batch(max_n=2)

However, I do not know how to choose an appropriate model or how to construct an appropriate Learner for this task. I’ve tried using the language_model_learner function to get a Learner:

learn = language_model_learner(
    dls, AWD_LSTM, drop_mult=0.5, metrics=[accuracy, Perplexity()])

learn.fit_one_cycle(1, 1e-2)

but attempting to train a model using this code does not actually work:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_2529/906929582.py in <module>
----> 1 learn.fit_one_cycle(1, 1e-2)

~/anaconda3/lib/python3.9/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    114     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    115               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 116     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    117 
    118 # Cell

~/anaconda3/lib/python3.9/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    220             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    221             self.n_epoch = n_epoch
--> 222             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    223 
    224     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/lib/python3.9/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    162 
    163     def _with_events(self, f, event_type, ex, final=noop):
--> 164         try: self(f'before_{event_type}');  f()
    165         except ex: self(f'after_cancel_{event_type}')
    166         self(f'after_{event_type}');  final()

~/anaconda3/lib/python3.9/site-packages/fastai/learner.py in __call__(self, event_name)
    140 
    141     def ordered_cbs(self, event): return [cb for cb in self.cbs.sorted('order') if hasattr(cb, event)]
--> 142     def __call__(self, event_name): L(event_name).map(self._call_one)
    143 
    144     def _call_one(self, event_name):

~/anaconda3/lib/python3.9/site-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
    152     def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
    153 
--> 154     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
    155     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
    156     def argfirst(self, f, negate=False): return first(i for i,o in self.enumerate() if f(o))

~/anaconda3/lib/python3.9/site-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
    664     res = map(g, iterable)
    665     if gen: return res
--> 666     return list(res)
    667 
    668 # Cell

~/anaconda3/lib/python3.9/site-packages/fastcore/basics.py in __call__(self, *args, **kwargs)
    649             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    650         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 651         return self.func(*fargs, **kwargs)
    652 
    653 # Cell

~/anaconda3/lib/python3.9/site-packages/fastai/learner.py in _call_one(self, event_name)
    144     def _call_one(self, event_name):
    145         if not hasattr(event, event_name): raise Exception(f'missing {event_name}')
--> 146         for cb in self.cbs.sorted('order'): cb(event_name)
    147 
    148     def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)

~/anaconda3/lib/python3.9/site-packages/fastai/callback/core.py in __call__(self, event_name)
     55         res = None
     56         if self.run and _run:
---> 57             try: res = getattr(self, event_name, noop)()
     58             except (CancelBatchException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): raise
     59             except Exception as e:

~/anaconda3/lib/python3.9/site-packages/fastai/callback/core.py in before_fit(self)
     84         self.learn.train_iter,self.learn.pct_train = 0,0.
     85         device = getattr(self.dls, 'device', default_device())
---> 86         self.model.to(device)
     87         if isinstance(self.loss_func, (nn.Module, BaseLoss)): self.loss_func.to(device)
     88         if hasattr(self.model, 'reset'): self.model.reset()

~/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py in to(self, *args, **kwargs)
    897             return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
    898 
--> 899         return self._apply(convert)
    900 
    901     def register_backward_hook(

~/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py in _apply(self, fn)
    568     def _apply(self, fn):
    569         for module in self.children():
--> 570             module._apply(fn)
    571 
    572         def compute_should_use_set_data(tensor, tensor_applied):

~/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py in _apply(self, fn)
    568     def _apply(self, fn):
    569         for module in self.children():
--> 570             module._apply(fn)
    571 
    572         def compute_should_use_set_data(tensor, tensor_applied):

~/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py in _apply(self, fn)
    591             # `with torch.no_grad():`
    592             with torch.no_grad():
--> 593                 param_applied = fn(param)
    594             should_use_set_data = compute_should_use_set_data(param, param_applied)
    595             if should_use_set_data:

~/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py in convert(t)
    895                 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
    896                             non_blocking, memory_format=convert_to_format)
--> 897             return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
    898 
    899         return self._apply(convert)

RuntimeError: Exception occured in `TrainEvalCallback` when calling event `before_fit`:
	CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.