Hi all,
I am trying to do distributed training using fastaiV2. And I try to run the example code:
python train_imagenette.py
works.
But I get the following error with command python -m fastai.launch train_imagenette.py
: ( AttributeError: ‘XResNet’ object has no attribute ‘module’)
File "train_imagenette.py", line 79, in main
learn.fit_flat_cos(epochs, lr, wd=1e-2, cbs=cbs)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastai2/callback/schedule.py", line 112, in fit_flat_cos
self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastai2/learner.py", line 300, in fit
finally: self('after_fit')
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastai2/learner.py", line 228, in __call__
def __call__(self, event_name): L(event_name).map(self._call_one)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 362, in map
return self._new(map(g, self))
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 315, in _new
def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 41, in __call__
res = super().__call__(*((x,) + args), **kwargs)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 306, in __init__
items = list(items) if use_list else _listify(items)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 242, in _listify
if is_iter(o): return list(o)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 208, in __call__
return self.fn(*fargs, **kwargs)
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastai2/learner.py", line 231, in _call_one
[cb(event_name) for cb in sort_by_run(self.cbs)]
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastai2/learner.py", line 231, in <listcomp>
[cb(event_name) for cb in sort_by_run(self.cbs)]
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastai2/learner.py", line 25, in __call__
if self.run and _run: getattr(self, event_name, noop)()
File "/envs/mainpy3.7/lib/python3.7/site-packages/fastai2/distributed.py", line 106, in after_fit
self.learn.model = self.learn.model.module
File "/envs/mainpy3.7/lib/python3.7/site-packages/torch/nn/modules/module.py", line 585, in __getattr__
type(self).__name__, name))
AttributeError: 'XResNet' object has no attribute 'module'