Unet_binary segmentation

I want to apply the learning of Lesson3 to a different dataset with binary labels.
But I am getting this error, dont’t know why? Everything before this step works fine.

lr_find(learn)
---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-27-29bd39493606> in <module>
----> 1 lr_find(learn)


/opt/anaconda3/lib/python3.6/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, **kwargs)
     28     cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
     29     a = int(np.ceil(num_it/len(learn.data.train_dl)))
---> 30     learn.fit(a, start_lr, callbacks=[cb], **kwargs)
     31 
     32 def to_fp16(learn:Learner, loss_scale:float=512., flat_master:bool=False)->Learner:


/opt/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    160         callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
    161         fit(epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics,
--> 162             callbacks=self.callbacks+callbacks)
    163 
    164     def create_opt(self, lr:Floats, wd:Floats=0.)->None:


/opt/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
     72     cb_handler = CallbackHandler(callbacks, metrics)
     73     pbar = master_bar(range(epochs))
---> 74     cb_handler.on_train_begin(epochs, pbar=pbar, metrics=metrics)
     75 
     76     exception=False


/opt/anaconda3/lib/python3.6/site-packages/fastai/callback.py in on_train_begin(self, epochs, pbar, metrics)
    192         self.state_dict['n_epochs'],self.state_dict['pbar'],self.state_dict['metrics'] = epochs,pbar,metrics
    193         names = [(met.name if hasattr(met, 'name') else camel2snake(met.__class__.__name__)) for met in self.metrics]
--> 194         self('train_begin', metrics_names=names)
    195 
    196     def on_epoch_begin(self)->None:


/opt/anaconda3/lib/python3.6/site-packages/fastai/callback.py in __call__(self, cb_name, call_mets, **kwargs)
    185         "Call through to all of the `CallbakHandler` functions."
    186         if call_mets: [getattr(met, f'on_{cb_name}')(**self.state_dict, **kwargs) for met in self.metrics]
--> 187         return [getattr(cb, f'on_{cb_name}')(**self.state_dict, **kwargs) for cb in self.callbacks]
    188 
    189     def on_train_begin(self, epochs:int, pbar:PBar, metrics:MetricFuncList)->None:


/opt/anaconda3/lib/python3.6/site-packages/fastai/callback.py in <listcomp>(.0)
    185         "Call through to all of the `CallbakHandler` functions."
    186         if call_mets: [getattr(met, f'on_{cb_name}')(**self.state_dict, **kwargs) for met in self.metrics]
--> 187         return [getattr(cb, f'on_{cb_name}')(**self.state_dict, **kwargs) for cb in self.callbacks]
    188 
    189     def on_train_begin(self, epochs:int, pbar:PBar, metrics:MetricFuncList)->None:


/opt/anaconda3/lib/python3.6/site-packages/fastai/callbacks/fp16.py in on_train_begin(self, **kwargs)
     69             self.learn.data.test_dl.add_tfm(to_half)
     70         #Get a copy of the model params in FP32
---> 71         self.model_params, self.master_params = get_master(self.learn.layer_groups, self.flat_master)
     72         #Changes the optimizer so that the optimization step is done in FP32.
     73         opt = self.learn.opt


/opt/anaconda3/lib/python3.6/site-packages/fastai/callbacks/fp16.py in get_master(layer_groups, flat_master)
     23         return model_params, master_params
     24     else:
---> 25         master_params = [[param.clone().float().detach() for param in lg] for lg in model_params]
     26         for mp in master_params:
     27             for param in mp: param.requires_grad = True


/opt/anaconda3/lib/python3.6/site-packages/fastai/callbacks/fp16.py in <listcomp>(.0)
     23         return model_params, master_params
     24     else:
---> 25         master_params = [[param.clone().float().detach() for param in lg] for lg in model_params]
     26         for mp in master_params:
     27             for param in mp: param.requires_grad = True


/opt/anaconda3/lib/python3.6/site-packages/fastai/callbacks/fp16.py in <listcomp>(.0)
     23         return model_params, master_params
     24     else:
---> 25         master_params = [[param.clone().float().detach() for param in lg] for lg in model_params]
     26         for mp in master_params:
     27             for param in mp: param.requires_grad = True


RuntimeError: cuda runtime error (59) : device-side assert triggered at /opt/conda/conda-bld/pytorch-nightly_1540411964561/work/aten/src/THC/THCTensorCopy.cu:102

5 Likes

I am also trying to run this on the Salt Segmentation challenge(also binary) and getting the exact same error at lr find. I tried searching but nothing worked for me. I am getting

RuntimeError: cuda runtime error (59) : device-side assert triggered at /opt/conda/conda-bld/pytorch-nightly_1540036376816/work/aten/src/THC/generic/THCTensorCopy.cpp:20

Here is my notebook: https://github.com/bluesky314/Salt-Segmentation/blob/master/salt_segmentation.ipynb (ignore stuff below the error)

Once I get this error, even creating a learner gives me an error.

1 Like

lr_find(learn)? notlearn.lr_find()? I guess you can use both, but I am not used to the way you used it.

just saw that this was the way shown in the last lesson… but it is new to me.

Oops - that wasn’t intentional. learn.lr_find is recommended, but both are fine.

learn.lr_find gives the same error:

How may be debug this? I read somewhere we should convert the tensors to cpu and try. I tried doing that for data set and data bunch objects but got error saying the have no attribute as .cpu()

1 Like

I think I have spotted the error. After visualising more of my data I see it is empty somehow:

I changed from bs 8 to 4 and now the finder gave me:

Scrolling all the way at the bottom of the error is :

So I guess there is some mismatch of shapes but don’t know where this happened and why the data was not read fully. There is an accompanying csv, I don’t know if that supplementary or any use for this challenge.

Maybe we should shift this to fastai users

Did you check you masks don’t have values of 0 and 255? That happens sometimes. If that’s the case, you should change the function that opens them to divide by 255. This is done by adding

...datasets(SegmentationDataset)
  .setattr(mask_opener, partial(open_mask, div=True))

in the data block API. You are changing the function used to open the masks (mask_opener) by changing the value of one of its argument (hence the partial) that’s called div to True.

7 Likes

Since there are only two classes, the classes in the masks are exactly 0 and 255…

I was also running into this exact device-side assert error with a binary segmentation problem.

If your binary masks have values of 0 and 255 for the 2 classes like mine do, I can confirm that @sgugger’s suggestion to change the default behavior of mask_opener to div=True fixes that particular problem (by converting class “255” to class “1”).

A few other small points I’ve discovered while fixing this:

  • If you get a cuda runtime error (59) : device-side assert triggered, you have to restart the kernel or you’ll keep getting the same error when you try to create the Learner even after you’ve fixed the label problem.

  • in fastai v1.0.21, the exact data block .set_attr() line that works for me is:
    .set_attr(mask_opener=partial(open_mask, div=True))

  • The order of the functions called in the data block API matter. This works:

data = (src.datasets(SegmentationDataset, classes=codes)
        .set_attr(mask_opener=partial(open_mask, div=True))
        .transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

While this doesn’t:

data = (src.datasets(SegmentationDataset, classes=codes)
        .transform(get_transforms(), size=size, tfm_y=True)
        .set_attr(mask_opener=partial(open_mask, div=True))
        .databunch(bs=bs)
        .normalize(imagenet_stats))
7 Likes

I just pushed div as an argument to SegmentDataset since it seems a lot of people need it. So now,

data = (src.datasets(SegmentationDataset, classes=codes, div=True)
        .transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

should work.

And hi Dave, nice to see you again :wink:

11 Likes

Yup this worked! Thanks

1 Like

That error bothered me for a while. It works now. Thanks

Did anyone try to give a single class because because it is binary(it supposed to predict 0 or 1 for each pixel)?

hello

I am trying to do binary as well.
I am getting this error…Thanks

src = (ImageFileList.from_folder(path_img)
.label_from_func(get_y_fn))
data = (src.datasets(SegmentationDataset, classes=codes)

    .transform(get_transforms(), size=size, tfm_y=True)

    .databunch(bs=bs)

    .normalize(imagenet_stats))

AttributeError Traceback (most recent call last)
in
----> 1 data = (src.datasets(SegmentationDataset, classes=codes)
2 .transform(get_transforms(), size=size, tfm_y=True)
3 .databunch(bs=bs)
4 .normalize(imagenet_stats))

AttributeError: ‘ImageLabelList’ object has no attribute ‘datasets’

1 Like

@sgugger My mask has 0 and 255 as its values(binary segmentation). How do i use div = True for open_mask with SegmentationItemList for that 255 is converted to 1.

1 Like

Just slightly change the source code of SegmentationLabelList in your custom SegmentationLL

class SegmentationLL(ImageItemList):
    def __init__(self, items:Iterator, classes:Collection=None, **kwargs):
        super().__init__(items, **kwargs)
        self.classes,self.loss_func,self.create_func = classes,CrossEntropyFlat(),partial(open_mask, div=True)
        self.c = len(self.classes)

    def new(self, items, classes=None, **kwargs):
        return self.__class__(items, ifnone(classes, self.classes), **kwargs)

Or even shorter:

class SegmentationLL(SegmentationItemList):
    def __init__(self, items:Iterator, classes:Collection=None, **kwargs):
        super().__init__(items, classes, **kwargs)
        self.create_func = partial(open_mask, div=True)
3 Likes

@sgugger I defined a new class in my code as suggested. But i am getting name 'kwarg' is not defined error. Where can i find the definition of kwarg ? What should i do?

It should be kwargs instead kwarg

1 Like