Fastai v2 vision

muellerzr · November 1, 2019, 12:16am

I’m trying to run image regression and this sticky issue is showing up again in colab. I thought it was tied to the warp transform but it does not seem that way, as leaving max_warp=0 worked (and still does) for the PETs notebook.

dblock = DataBlock(blocks=(ImageBlock, PointBlock),
                   get_items=get_image_files,
                   splitter=splitter,
                   get_y=get_ctr)
item_tfms=[]
batch_tfms = [*aug_transforms(size=(120,160), max_warp=0), Normalize(*imagenet_stats)]

dbunch = dblock.databunch(path, path=path, bs=64, batch_tfms=batch_tfms)
dbunch.show_batch(max_n=9, figsize=(9,6))

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-22-70822119f58e> in <module>()
----> 1 dbunch.show_batch(max_n=9, figsize=(9,6))

14 frames
/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in show_batch(self, b, max_n, ctxs, show, **kwargs)
     76 
     77     def show_batch(self, b=None, max_n=10, ctxs=None, show=True, **kwargs):
---> 78         if b is None: b = self.one_batch()
     79         if not show: return self._pre_show_batch(b, max_n=max_n)
     80         show_batch(*self._pre_show_batch(b, max_n=max_n), ctxs=ctxs, max_n=max_n, **kwargs)

/usr/local/lib/python3.6/dist-packages/fastai2/data/load.py in one_batch(self)
    120     def do_batch(self, b): return self.retain(self.create_batch(self.before_batch(b)), b)
    121     def one_batch(self):
--> 122         with self.fake_l.no_multiproc(): return first(self)

/usr/local/lib/python3.6/dist-packages/fastai2/core/utils.py in first(x)
    148 def first(x):
    149     "First element of `x`; i.e. a shortcut for `next(iter(x))`"
--> 150     return next(iter(x))
    151 
    152 #Cell

/usr/local/lib/python3.6/dist-packages/fastai2/data/load.py in __iter__(self)
     90         self.randomize()
     91         self.before_iter()
---> 92         for b in _loaders[self.fake_l.num_workers==0](self.fake_l): yield self.after_batch(b)
     93         self.after_iter()
     94         if hasattr(self, 'it'): delattr(self, 'it')

/usr/local/lib/python3.6/dist-packages/fastai2/core/transform.py in __call__(self, o)
    199         self.fs.append(t)
    200 
--> 201     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    202     def __repr__(self): return f"Pipeline: {self.fs}"
    203     def __getitem__(self,i): return self.fs[i]

/usr/local/lib/python3.6/dist-packages/fastai2/core/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    147     for f in tfms:
    148         if not is_enc: f = f.decode
--> 149         x = f(x, **kwargs)
    150     return x
    151 

/usr/local/lib/python3.6/dist-packages/fastai2/vision/augment.py in __call__(self, b, split_idx, **kwargs)
     31     def __call__(self, b, split_idx=None, **kwargs):
     32         self.before_call(b, split_idx=split_idx)
---> 33         return super().__call__(b, split_idx=split_idx, **kwargs) if self.do else b
     34 
     35 #Cell

/usr/local/lib/python3.6/dist-packages/fastai2/core/transform.py in __call__(self, x, **kwargs)
     85     @property
     86     def use_as_item(self): return ifnone(self.as_item_force, self.as_item)
---> 87     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     88     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     89     def setup(self, items=None): return self.setups(items)

/usr/local/lib/python3.6/dist-packages/fastai2/core/transform.py in _call(self, fn, x, split_idx, **kwargs)
     94         f = getattr(self, fn)
     95         if self.use_as_item or not is_listy(x): return self._do_call(f, x, **kwargs)
---> 96         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     97         return retain_type(res, x)
     98 

/usr/local/lib/python3.6/dist-packages/fastai2/core/transform.py in <genexpr>(.0)
     94         f = getattr(self, fn)
     95         if self.use_as_item or not is_listy(x): return self._do_call(f, x, **kwargs)
---> 96         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     97         return retain_type(res, x)
     98 

/usr/local/lib/python3.6/dist-packages/fastai2/core/transform.py in _do_call(self, f, x, **kwargs)
     98 
     99     def _do_call(self, f, x, **kwargs):
--> 100         return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
    101 
    102 add_docs(Transform, decode="Delegate to `decodes` to undo transform", setup="Delegate to `setups` to set up transform")

/usr/local/lib/python3.6/dist-packages/fastai2/core/dispatch.py in __call__(self, *args, **kwargs)
     96         if not f: return args[0]
     97         if self.inst is not None: f = types.MethodType(f, self.inst)
---> 98         return f(*args, **kwargs)
     99 
    100     def __get__(self, inst, owner):

/usr/local/lib/python3.6/dist-packages/fastai2/vision/augment.py in encodes(self, x)
    315     def encodes(self, x:TensorImage): return self._encode(x, self.mode)
    316     def encodes(self, x:TensorMask):  return self._encode(x, self.mode_mask)
--> 317     def encodes(self, x:(TensorPoint, TensorBBox)): return self._encode(x, self.mode, reverse=True)
    318 
    319 #Cell

/usr/local/lib/python3.6/dist-packages/fastai2/vision/augment.py in _encode(self, x, mode, reverse)
    311     def _encode(self, x, mode, reverse=False):
    312         coord_func = None if len(self.coord_fs)==0 or self.split_idx else partial(compose_tfms, tfms=self.coord_fs, reverse=reverse)
--> 313         return x.affine_coord(self.mat, coord_func, sz=self.size, mode=mode, pad_mode=self.pad_mode)
    314 
    315     def encodes(self, x:TensorImage): return self._encode(x, self.mode)

/usr/local/lib/python3.6/dist-packages/fastai2/vision/augment.py in affine_coord(x, mat, coord_tfm, sz, mode, pad_mode)
    260     if sz is None: sz = x._meta.get('sz', None)
    261     if coord_tfm is not None: x = coord_tfm(x, invert=True)
--> 262     if mat is not None: x = (x - mat[:,:,2].unsqueeze(1)) @ torch.inverse(mat[:,:,:2].transpose(1,2))
    263     return TensorPoint(x, sz=sz)
    264 

RuntimeError: inverse_cuda: For batch 0: U(17437184,17437184) is zero, singular U.

Thoughts?

@fanyi how did you go about doing that?

Only issue I have with that is these are the AffineCoordTfms:

[AffineCoordTfm: False (TensorBBox,object) -> encodes
 (TensorPoint,object) -> encodes
 (TensorImage,object) -> encodes
 (TensorMask,object) -> encodes ]

Which I don’t want to not include

fanyi · November 1, 2019, 6:26am

Hi, @muellerzr, I switched back to CPU mode to avoid this issue since I am focusing on go through my code for object recognition but not on the performance. So I can avoid this issue for the time being until it is fixed by @jeremy and other masters.

jeremy · November 2, 2019, 4:12am

It’s not something we can fix - you need to update colab to the latest pytorch.

muellerzr · November 2, 2019, 10:59pm

Got it, thanks Jeremy. I opened up an issue to see what may be going on, as the pytorch versions are up to date. (1.3.0+cu100, 0.4.1+cu100)

muellerzr · November 4, 2019, 8:55pm

An update on the Colab issue, it seems to be because of magma, they’re working on a fix:

muellerzr · November 4, 2019, 11:43pm

The issue will be fixed tonight/tommorow:

Finally resolved the issue today.
It turns out that the CMake for magma 2.5.1 added the line set(CUDA_SEPARABLE_COMPILATION ON) , and doing separable compilation somehow triggers some deep nvcc interaction with CentOS7 / RHEL7 linkers and all of that doesn’t go down very well.

After setting CUDA_SEPARABLE_COMPILATION to OFF , we can successfully build static binaries that work correctly on a K80.

We did not root cause this bug , which needs significant more time and effort, because of it’s subtle nature. We do not intend to root-cause it for now, as the cost-benefit isn’t very high.

The commit that fixes the issue is pytorch/builder@dd7edfd

The nightlies from tomorrow morning will carry the fix.

The fix will also naturally be part of v1.3.1, after which we will request an update Google Colab, which will fix the originally flagged bug report.

immarried · November 5, 2019, 9:46am

I’ve a tuple of tensors, and each tensor can be transformed into a PILMask by applying PILMask.create to it. How do I create a TupleTransform that can be applied directly to the tuple so that each tensor is converted to a PILMask? Neither TupleTransform(PILMask.create) or TupleTransform(PILMask.create.encodes) are allowed. I’d ultimately like to stick this TupleTransform in the tfms that will be passed to DataSource. Perhaps I’m going about this the wrong way.

immarried · November 5, 2019, 10:06am

Wait, is the answer:

masktuple = TupleTransform(lambda o: PILMask.create(o))

?

This also works:

class TupleMaskTfm(Transform):
    def encodes(self, o: np.uint8): return PILMask.create(o)

sgugger · November 5, 2019, 1:24pm

You can also just pass the function PILMask.create as an item_tfms (in DataBlock) or after_item (in DataSource),

immarried · November 8, 2019, 3:26am

I’m trying Kaggle’s Understanding Clouds competition, and would like to have , in the modeling data, the image as the independent variable and four masks, one for each cloud type, as the dependent variables.

Right now, I’m passing a tfms containing 5 transform pipelines to DataSource. The first one for the image, and the other four for the masks.

tfms = [[PILImage.create], 
        [RLE_Decode('fish', annots, IMG_SHAPE), PILMask.create],
        [RLE_Decode('flower', annots, IMG_SHAPE), PILMask.create],
        [RLE_Decode('gravel', annots, IMG_SHAPE), PILMask.create],
        [RLE_Decode('sugar', annots, IMG_SHAPE), PILMask.create]]

dsrc = DataSource(items, tfms=tfms)

and from this, the DataBunch is created, with:

after_item_tfms = [ToTensor,]
after_batch_tfms = [Cuda(), IntToFloatTensor()]
dbch = dsrc.databunch(after_item=after_item_tfms, after_batch=after_batch_tfms, bs=4)

which seems to load things of the desired shapes:

So, the next step will be to pass the DataBunch to Learner along with the model, loss, etc. However, in this whole process I haven’t told Fastai which of the tuple of 5 things are the independent variables and which are the dependent variables. How will it know to treat just the first tensor as the input to the model, and the rest as the targets?

My notebook for this can be found here.

immarried · November 10, 2019, 6:20am

There is a keyword argument called n_inp for DataSource. This is probably the number of independent variables. DataBlock also has it, too.

sgugger · November 10, 2019, 2:42pm

Yes, that’s exactly it. Pass along n_inp=1 because by default, the library considers everything but the last tensor as the input. Note that your loss function will need to take one output (which will very likely be a list of 4 things or a huge tensor that you will split in 4) and 4 targets.

s.s.o · November 11, 2019, 1:32pm

When I use cnn_learner with [accuracy,RocAuc] or [accuracy,RocAucMulti] metrics I got and error on windows. It it wrong use ? How can I fix it ?

cnn_learner(den_dbn, xresnet50, opt_func=opt_func, metrics=[accuracy,RocAuc])

some more error mesages
…
d:\codes\fastai_dev\dev\local\learner.py in accumulate(self, learn)
399 def accumulate(self, learn):
400 bs = find_bs(learn.yb)
–> 401 self.total += to_detach(_maybe_reduce(self.func(learn.pred, *learn.yb)))*bs
402 self.count += bs
403 @property

TypeError: unsupported operand type(s) for *: ‘AccumMetric’ and ‘int’

sgugger · November 11, 2019, 2:20pm

You need to instantiate your RocAuc metric: pass RocAuc()

s.s.o · November 11, 2019, 2:26pm

thank you for your support. After correcting it I recive

d:\codes\fastai_dev\dev\local\learner.py in _maybe_item(t)
434
435 def _maybe_item(t):
–> 436 t = t.value
437 return t.item() if isinstance(t, Tensor) and t.numel()==1 else t
438

d:\codes\fastai_dev\dev\local\metrics.py in value(self)
41 preds,targs = torch.cat(self.preds),torch.cat(self.targs)
42 if self.to_np: preds,targs = preds.numpy(),targs.numpy()
—> 43 return self.func(targs, preds, **self.kwargs) if self.invert_args else self.func(preds, targs, **self.kwargs)
44
45 #Cell

TypeError: recall_score() got an unexpected keyword argument ‘laverage’

sgugger · November 11, 2019, 2:28pm

Oh this one is a typo. Fixing now.

s.s.o · November 11, 2019, 2:28pm

ok thank you

s.s.o · November 11, 2019, 7:14pm

@sgugger, sorry to bother again … but this time I get err:

d:\codes\fastai_dev\dev\local\metrics.py in value(self)
42 preds,targs = torch.cat(self.preds),torch.cat(self.targs)
43 if self.to_np: preds,targs = preds.numpy(),targs.numpy()
—> 44 return self.func(targs, preds, **self.kwargs) if self.invert_args else self.func(preds, targs, **self.kwargs)
45
46 #Cell

TypeError: recall_score() got an unexpected keyword argument ‘max_fpr’

sgugger · November 11, 2019, 8:35pm

Ah yes, copy-pasting error. The function called was recall_score and not auc_roc_score. Fixed now.

muellerzr · November 12, 2019, 6:36am

I’m attempting to do segmentation right now, I have the model trained and I’m trying to call show_results like so (assume up until this point I have been following CAMVID):

learn.show_results(max_n=4, vmin=1, vmax=30, figsize=(15,6))

local variable 'old_shuffle' referenced before assignment

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in _do_epoch_validate(self, ds_idx, dl)
    254         try:
--> 255             dl.shuffle,old_shuffle = False,dl.shuffle
    256             dl.drop_last,old_drop = False,dl.drop_last

AttributeError: 'list' object has no attribute 'shuffle'

Along with this:

<ipython-input-51-67d4997d7834> in <module>()
----> 1 learn.show_results(max_n=4, figsize=(15,6))

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in show_results(self, ds_idx, dl, max_n, **kwargs)
    318         if dl is None: dl = self.dbunch.dls[ds_idx]
    319         b = dl.one_batch()
--> 320         _,_,preds = self.get_preds(dl=[b], with_decoded=True)
    321         self.dbunch.show_results(b, preds, max_n=max_n, **kwargs)
    322 

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in get_preds(self, ds_idx, dl, with_input, with_loss, with_decoded, act, save_preds, save_targs)
    294         with self.no_logging(), self.added_cbs(cb), self.loss_not_reduced(), self.no_mbar():
    295             self(_before_epoch)
--> 296             self._do_epoch_validate(ds_idx, dl)
    297             self(_after_epoch)
    298             if act is None: act = getattr(self.loss_func, 'activation', noop)

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in _do_epoch_validate(self, ds_idx, dl)
    259         except CancelValidException:                         self('after_cancel_validate')
    260         finally:
--> 261             dl.shuffle,dl.drop_last = old_shuffle,old_drop;  self('after_validate')
    262 
    263     def fit(self, n_epoch, lr=None, wd=defaults.wd, cbs=None, reset_opt=False):

UnboundLocalError: local variable 'old_shuffle' referenced before assignment

Perhaps it’s grabbing both of the DataLoaders instead of one?