RTX 3090 / Torch 1.8 (Nightly) Compatibility

rsomani95 · October 16, 2020, 5:35am

As of today (16 October 2020), you need to install the pytorch-nightly version to use the card. This can be done with conda install pytorch torchvision cudatoolkit=11.0 -c pytorch-nightly. See this issue for more details.

When running the pets tutorial, I run into the following error when trying to call dls.show_batch():

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-69-ad768ac4dd9a> in <module>
----> 1 tst_dl.show_batch(max_n=9)

~/git/fastai/fastai/data/core.py in show_batch(self, b, max_n, ctxs, show, unique, **kwargs)
     98             old_get_idxs = self.get_idxs
     99             self.get_idxs = lambda: Inf.zeros
--> 100         if b is None: b = self.one_batch()
    101         if not show: return self._pre_show_batch(b, max_n=max_n)
    102         show_batch(*self._pre_show_batch(b, max_n=max_n), ctxs=ctxs, max_n=max_n, **kwargs)

~/git/fastai/fastai/data/load.py in one_batch(self)
    134     def one_batch(self):
    135         if self.n is not None and len(self)==0: raise ValueError(f'This DataLoader does not contain any batches')
--> 136         with self.fake_l.no_multiproc(): res = first(self)
    137         if hasattr(self, 'it'): delattr(self, 'it')
    138         return res

~/git/fastcore/fastcore/foundation.py in first(x)
    236 def first(x):
    237     "First element of `x`, or None if missing"
--> 238     try: return next(iter(x))
    239     except StopIteration: return None
    240 

~/git/fastai/fastai/data/load.py in __iter__(self)
    100         self.before_iter()
    101         self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
--> 102         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
    103             if self.device is not None: b = to_device(b, self.device)
    104             yield self.after_batch(b)

~/miniconda3/envs/bleeding-edge/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __init__(self, loader)
    467 class _SingleProcessDataLoaderIter(_BaseDataLoaderIter):
    468     def __init__(self, loader):
--> 469         super(_SingleProcessDataLoaderIter, self).__init__(loader)
    470         assert self._timeout == 0
    471         assert self._num_workers == 0

~/miniconda3/envs/bleeding-edge/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __init__(self, loader)
    416         self._sampler_iter = iter(self._index_sampler)
    417         self._base_seed = torch.empty((), dtype=torch.int64).random_(generator=loader.generator).item()
--> 418         self._persistent_workers = loader.persistent_workers
    419         self._num_yielded = 0
    420 

AttributeError: '_FakeLoader' object has no attribute 'persistent_workers'

I also tried the vision tutorial and ran into the same error with a different trace when calling learn.fine_tune():

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-11-4b9dee10b079> in <module>
      1 learn = cnn_learner(dls, resnet34, metrics=error_rate)
----> 2 learn.fine_tune(1)

~/git/fastcore/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

~/git/fastai/fastai/callback/schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
    159     "Fine tune with `freeze` for `freeze_epochs` then with `unfreeze` from `epochs` using discriminative LR"
    160     self.freeze()
--> 161     self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
    162     base_lr /= 2
    163     self.unfreeze()

~/git/fastcore/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

~/git/fastai/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    111     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    112               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    114 
    115 # Cell

~/git/fastcore/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

~/git/fastai/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    205             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    206             self.n_epoch = n_epoch
--> 207             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    208 
    209     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/git/fastai/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/git/fastai/fastai/learner.py in _do_fit(self)
    195         for epoch in range(self.n_epoch):
    196             self.epoch=epoch
--> 197             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    198 
    199     @log_args(but='cbs')

~/git/fastai/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/git/fastai/fastai/learner.py in _do_epoch(self)
    189 
    190     def _do_epoch(self):
--> 191         self._do_epoch_train()
    192         self._do_epoch_validate()
    193 

~/git/fastai/fastai/learner.py in _do_epoch_train(self)
    181     def _do_epoch_train(self):
    182         self.dl = self.dls.train
--> 183         self._with_events(self.all_batches, 'train', CancelTrainException)
    184 
    185     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/git/fastai/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/git/fastai/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

~/git/fastai/fastai/data/load.py in __iter__(self)
    100         self.before_iter()
    101         self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
--> 102         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
    103             if self.device is not None: b = to_device(b, self.device)
    104             yield self.after_batch(b)

~/miniconda3/envs/bleeding-edge/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __init__(self, loader)
    764 
    765     def __init__(self, loader):
--> 766         super(_MultiProcessingDataLoaderIter, self).__init__(loader)
    767 
    768         assert self._num_workers > 0

~/miniconda3/envs/bleeding-edge/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __init__(self, loader)
    416         self._sampler_iter = iter(self._index_sampler)
    417         self._base_seed = torch.empty((), dtype=torch.int64).random_(generator=loader.generator).item()
--> 418         self._persistent_workers = loader.persistent_workers
    419         self._num_yielded = 0
    420 

AttributeError: '_FakeLoader' object has no attribute 'persistent_workers'

There seems to be some new additions in PyTorch’s dataloaders.py module that causes these errors.

Any clue how to fix this / is this a trivial fix? Is there a hacky workaround?
Thanks!

paulclou · October 16, 2020, 9:32pm

See #2878

This branch resolved the missing attribute issue, but I encountered a missing positional argument for torch.new_ones. I suspect it occurred because fastai.torch_core._f is not passing the argument to torch.tensor.new_ones. You can replicate this by import fastai; fastai.dataloaders._FakeLoader.persistent_workers=None and run the code again.

rsomani95 · October 17, 2020, 3:02am

Thanks for the response @paulclou! I think that error is misleading – I responded in more detail on GitHub