Multi Object Detection - Error lr_find() fastai v2

The stack trace error is:

UserWarning: Using a target size (torch.Size([64, 2, 4])) that is different to the input size (torch.Size([64, 8])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size. ret = func(*args, **kwargs)

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18456\2451463369.py in <module>
      1 # Find a good learning rate with the learning rate finder
      2 
----> 3 learn.lr_find()
      4 # NOTE: when running the above line an error is returned: running_mean should contain 4096 elements not 8192
      5 # What is running_mean and why it should have 4096 elements?


~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\callback\schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggest_funcs)
    283     n_epoch = num_it//len(self.dls.train) + 1
    284     cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 285     with self.no_logging(): self.fit(n_epoch, cbs=cb)
    286     if suggest_funcs is not None:
    287         lrs, losses = tensor(self.recorder.lrs[num_it//10:-5]), tensor(self.recorder.losses[num_it//10:-5])

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    219             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    220             self.n_epoch = n_epoch
--> 221             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    222 
    223     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    161 
    162     def _with_events(self, f, event_type, ex, final=noop):
--> 163         try: self(f'before_{event_type}');  f()
    164         except ex: self(f'after_cancel_{event_type}')
    165         self(f'after_{event_type}');  final()

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _do_fit(self)
    210         for epoch in range(self.n_epoch):
    211             self.epoch=epoch
--> 212             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    213 
    214     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    161 
    162     def _with_events(self, f, event_type, ex, final=noop):
--> 163         try: self(f'before_{event_type}');  f()
    164         except ex: self(f'after_cancel_{event_type}')
    165         self(f'after_{event_type}');  final()

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _do_epoch(self)
    204 
    205     def _do_epoch(self):
--> 206         self._do_epoch_train()
    207         self._do_epoch_validate()
    208 

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _do_epoch_train(self)
    196     def _do_epoch_train(self):
    197         self.dl = self.dls.train
--> 198         self._with_events(self.all_batches, 'train', CancelTrainException)
    199 
    200     def _do_epoch_validate(self, ds_idx=1, dl=None):

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    161 
    162     def _with_events(self, f, event_type, ex, final=noop):
--> 163         try: self(f'before_{event_type}');  f()
    164         except ex: self(f'after_cancel_{event_type}')
    165         self(f'after_{event_type}');  final()

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in all_batches(self)
    167     def all_batches(self):
    168         self.n_iter = len(self.dl)
--> 169         for o in enumerate(self.dl): self.one_batch(*o)
    170 
    171     def _do_one_batch(self):

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in one_batch(self, i, b)
    192         b = self._set_device(b)
    193         self._split(b)
--> 194         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    195 
    196     def _do_epoch_train(self):

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    161 
    162     def _with_events(self, f, event_type, ex, final=noop):
--> 163         try: self(f'before_{event_type}');  f()
    164         except ex: self(f'after_cancel_{event_type}')
    165         self(f'after_{event_type}');  final()

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\learner.py in _do_one_batch(self)
    173         self('after_pred')
    174         if len(self.yb):
--> 175             self.loss_grad = self.loss_func(self.pred, *self.yb)
    176             self.loss = self.loss_grad.clone()
    177         self('after_loss')

<ipython-input-59-bf592cbdc4b7> in loss_fn(preds, targs, class_idxs)
      1 def loss_fn(preds, targs, class_idxs):
----> 2     return L1Loss()(preds, targs.squeeze())

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
     94 
     95     def forward(self, input: Tensor, target: Tensor) -> Tensor:
---> 96         return F.l1_loss(input, target, reduction=self.reduction)
     97 
     98 

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\functional.py in l1_loss(input, target, size_average, reduce, reduction)
   3066     if has_torch_function_variadic(input, target):
   3067         return handle_torch_function(
-> 3068             l1_loss, (input, target), input, target, size_average=size_average, reduce=reduce, reduction=reduction
   3069         )
   3070     if not (target.size() == input.size()):

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\overrides.py in handle_torch_function(public_api, relevant_args, *args, **kwargs)
   1353         # Use `public_api` instead of `implementation` so __torch_function__
   1354         # implementations can do equality/identity comparisons.
-> 1355         result = torch_func_method(public_api, types, args, kwargs)
   1356 
   1357         if result is not NotImplemented:

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\torch_core.py in __torch_function__(self, func, types, args, kwargs)
    338         convert=False
    339         if _torch_handled(args, self._opt, func): convert,types = type(self),(torch.Tensor,)
--> 340         res = super().__torch_function__(func, types, args=args, kwargs=kwargs)
    341         if convert: res = convert(res)
    342         if isinstance(res, TensorBase): res.set_meta(self, as_copy=True)

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\_tensor.py in __torch_function__(cls, func, types, args, kwargs)
   1049 
   1050         with _C.DisableTorchFunction():
-> 1051             ret = func(*args, **kwargs)
   1052             if func in get_default_nowrap_functions():
   1053                 return ret

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\functional.py in l1_loss(input, target, size_average, reduce, reduction)
   3078         reduction = _Reduction.legacy_get_string(size_average, reduce)
   3079 
-> 3080     expanded_input, expanded_target = torch.broadcast_tensors(input, target)
   3081     return torch._C._nn.l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
   3082 

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\functional.py in broadcast_tensors(*tensors)
     70     if has_torch_function(tensors):
     71         return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 72     return _VF.broadcast_tensors(tensors)  # type: ignore[attr-defined]
     73 
     74 

RuntimeError: The size of tensor a (8) must match the size of tensor b (4) at non-singleton dimension 2

I think you may be able to resolve the pred/target shape issue by creating a custom ‘block’ based on the BBoxBlock, specifically modifying (creating your own new version of) the bb_pad function to output the 2 bounding boxes as a 1x8 instead of 2x4 matrix.

BBoxBlock:

bb_pad:

Adding bbox = torch.reshape(bbox,-1) after bbox = torch.cat([bbox,bbox.new_zeros(max_len-bbox.shape[0], 4)])
https://pytorch.org/docs/stable/generated/torch.reshape.html#torch.reshape

Alternatively you should be able to make your model output be 2x4 but you’ll have to make sure your loss function supports that, and if not, make a custom version of the one you’re using that does.

1 Like

Later on today i’ll modify the bb_pad() function following the way you suggested.

If the error persists I’ll try the other option that is to set the model output shape as (2,4) by creating a custom create_head() function. Then I’d make the loss function accept a 2D model output by creating a custom loss function if it’s necessary.

I’ll inform you if I’ve managed to solve the error.

I created a custom bb_pad function.
I did a reshape of the TensorBbox to convert it’s shape from (2,4) into (8,). To do the reshape you told me to add the line “bbox = torch.reshape(bbox,-1)”, but after this line was executed I got an error saying the second parameter of reshape (-1) can’t be an int, it must be a tuple of ints. On the website you attached (torch.reshape — PyTorch 1.11.0 documentation) it indicates that the second parameter of the reshape function (new shape) must be a tuple of ints.
I changed “bbox = torch.reshape(bbox,-1)” to “bbox = torch.reshape(bbox,(-1,))” and the error I was having was fixed.

I also created a custom BboxBlock with no labels, as I don’t need the bboxes to be labeled and because also I think making the bboxes labeled is a problem because for each image I need to have the bboxes with size (8,) but the labels list have size (2,), so, the sizes don’t match. To create the custom BboxBlock I followed the example shown at Share your V2 projects here - #376 by idraja.

I deleted each line of the original bb_pad function() related to the labels because I only need to return from that function aTensorImage and a TensorBbox. I also don’t use the function clip_remove_empty() because it splits a TensorBbox of size (2,4) into to 2 TensorBbox of size (1,4).

I post the modifications I did on the code:

class NoLabelBBoxLabeler(Transform):
    """ Bounding box labeler with no label """
    def setups(self, x): noop
    def decode (self, x, **kwargs):
        self.bbox,self.lbls = None,None
        return self._call('decodes', x, **kwargs)

    def decodes(self, x:TensorBBox):
        self.bbox = x
        return self.bbox if self.lbls is None else LabeledBBox(self.bbox, self.lbls)
'''
def clip_remove_empty(bbox):
    "Clip bounding boxes with image border and label background the empty ones"
    "Splits bbox of size (2,4) into to bboxes of size (1,4)"
    bbox = torch.clamp(bbox, -1, 1)
    empty = ((bbox[...,2] - bbox[...,0])*(bbox[...,3] - bbox[...,1]) <= 0.)
    return (bbox[~empty])
'''

def custom_bb_pad(samples, pad_idx=0):
    "Function that collect `samples` of bboxes and adds padding with `pad_idx`."
    #samples = [(s[0], *clip_remove_empty(*s[1:])) for s in samples] # s[0] is a tuple of TensorImage & TensorBbox, TensorBbox size is (2,4)
    #max_len = max([len(s[2]) for s in samples]) # equals to 4 (number of bbox coordinates)
    def _f(img,bbox):
        #bbox = torch.cat([bbox,bbox.new_zeros(2, 4)])
        bbox = torch.reshape(bbox,(-1,)) # convert bboxes tensor to a 1D tensor
        return img,bbox
    return [_f(*s) for s in samples]


CustomBboxBlock = TransformBlock(type_tfms=TensorBBox.create, 
                             item_tfms=[PointScaler, NoLabelBBoxLabeler], dls_kwargs = {'before_batch': custom_bb_pad})
data = DataBlock(
    blocks=(ImageBlock, CustomBboxBlock), # ImageBlock means type of inputs are images; BBoxBlock & BBoxLblBlock = type of targets are BBoxes & their labels
    get_items=get_image_files,
    n_inp=1, # number of inputs; it's 1 because the only inputs are the rx images (ImageBlock)
    get_y=[lambda o: get_bboxes(o.name)], # get_y = targets [bboxes, labels]; get_x = inputs 
    splitter = RandomSplitter (0.1), # split training/validation; parameter 0.1 means there will be 10% of validation images 
    batch_tfms= [*aug_transforms(do_flip=False, size=(120,160)), Normalize.from_stats(*imagenet_stats)] 
)

I’m stuck trying to fix an error that is produced when the line "dls.show_batch(max_n=20, figsize=(9,6))
" is executed. The error is: “IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)”.

As you can see in the stack trace I post at the end the error doesn’t refer to the parts of code I changed, it refers to an internal function. I don’t know what is causing the error.

I also thought about creating a 2D ouput of the CNN as we commented but I don’t know how to do it, I haven’t found any documentation regarding creating a 2D ouput model. I also tried to find examples of Binary Object Detection as it’s my case on Github or a repository about lungs detection and I haven’t found anything.

I hope you or anybody can tell me what is causing the error and if the changes I did are right.
I also would appreciate if @idraja, that is the user who created the example using the BboxBlock with no labels can help me solve the error of show_batch function.

The stack trace is:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_14288\2938148043.py in <module>
      2 
      3 dls = data.dataloaders(path_dl, path=path_dl, bs = 64) # bs: how many samples per batch to load
----> 4 dls.show_batch(max_n=20, figsize=(9,6)) # NOTE: what do the values of figsize represent?
      5 # NOTE: The batch shown contains validation and training images?

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\data\core.py in show_batch(self, b, max_n, ctxs, show, unique, **kwargs)
     98             old_get_idxs = self.get_idxs
     99             self.get_idxs = lambda: Inf.zeros
--> 100         if b is None: b = self.one_batch()
    101         if not show: return self._pre_show_batch(b, max_n=max_n)
    102         show_batch(*self._pre_show_batch(b, max_n=max_n), ctxs=ctxs, max_n=max_n, **kwargs)

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\data\load.py in one_batch(self)
    146     def one_batch(self):
    147         if self.n is not None and len(self)==0: raise ValueError(f'This DataLoader does not contain any batches')
--> 148         with self.fake_l.no_multiproc(): res = first(self)
    149         if hasattr(self, 'it'): delattr(self, 'it')
    150         return res

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\basics.py in first(x, f, negate, **kwargs)
    553     x = iter(x)
    554     if f: x = filter_ex(x, f=f, negate=negate, gen=True, **kwargs)
--> 555     return next(x, None)
    556 
    557 # Cell

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\data\load.py in __iter__(self)
    109         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
    110             if self.device is not None: b = to_device(b, self.device)
--> 111             yield self.after_batch(b)
    112         self.after_iter()
    113         if hasattr(self, 'it'): del(self.it)

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\transform.py in __call__(self, o)
    198         self.fs = self.fs.sorted(key='order')
    199 
--> 200     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    201     def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
    202     def __getitem__(self,i): return self.fs[i]

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    148     for f in tfms:
    149         if not is_enc: f = f.decode
--> 150         x = f(x, **kwargs)
    151     return x
    152 

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\vision\augment.py in __call__(self, b, split_idx, **kwargs)
     33     def __call__(self, b, split_idx=None, **kwargs):
     34         self.before_call(b, split_idx=split_idx)
---> 35         return super().__call__(b, split_idx=split_idx, **kwargs) if self.do else b
     36 
     37 # Cell

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\transform.py in __call__(self, x, **kwargs)
     71     @property
     72     def name(self): return getattr(self, '_name', _get_name(self))
---> 73     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     74     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     75     def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\transform.py in _call(self, fn, x, split_idx, **kwargs)
     81     def _call(self, fn, x, split_idx=None, **kwargs):
     82         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 83         return self._do_call(getattr(self, fn), x, **kwargs)
     84 
     85     def _do_call(self, f, x, **kwargs):

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\transform.py in _do_call(self, f, x, **kwargs)
     88             ret = f.returns(x) if hasattr(f,'returns') else None
     89             return retain_type(f(x, **kwargs), x, ret)
---> 90         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     91         return retain_type(res, x)
     92 

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\transform.py in <genexpr>(.0)
     88             ret = f.returns(x) if hasattr(f,'returns') else None
     89             return retain_type(f(x, **kwargs), x, ret)
---> 90         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     91         return retain_type(res, x)
     92 

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\transform.py in _do_call(self, f, x, **kwargs)
     87             if f is None: return x
     88             ret = f.returns(x) if hasattr(f,'returns') else None
---> 89             return retain_type(f(x, **kwargs), x, ret)
     90         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     91         return retain_type(res, x)

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastcore\dispatch.py in __call__(self, *args, **kwargs)
    116         elif self.inst is not None: f = MethodType(f, self.inst)
    117         elif self.owner is not None: f = MethodType(f, self.owner)
--> 118         return f(*args, **kwargs)
    119 
    120     def __get__(self, inst, owner):

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\vision\augment.py in encodes(self, x)
    399     def encodes(self, x:TensorImage): return self._encode(x, self.mode)
    400     def encodes(self, x:TensorMask):  return self._encode(x, self.mode_mask)
--> 401     def encodes(self, x:(TensorPoint, TensorBBox)): return self._encode(x, self.mode, reverse=True)
    402 
    403 # Cell

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\vision\augment.py in _encode(self, x, mode, reverse)
    395     def _encode(self, x, mode, reverse=False):
    396         coord_func = None if len(self.coord_fs)==0 or self.split_idx else partial(compose_tfms, tfms=self.coord_fs, reverse=reverse)
--> 397         return x.affine_coord(self.mat, coord_func, sz=self.size, mode=mode, pad_mode=self.pad_mode, align_corners=self.align_corners)
    398 
    399     def encodes(self, x:TensorImage): return self._encode(x, self.mode)

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\vision\augment.py in affine_coord(x, mat, coord_tfm, sz, mode, pad_mode, align_corners)
    347     if sz is None: sz = getattr(x, "img_size", None)
    348     bs,n = x.shape[:2]
--> 349     pnts = stack([x[...,:2], stack([x[...,0],x[...,3]],dim=2),
    350                   stack([x[...,2],x[...,1]],dim=2), x[...,2:]], dim=2)
    351     pnts = TensorPoint(pnts.view(bs, 4*n, 2), img_size=sz).affine_coord(mat, coord_tfm, sz, mode, pad_mode)

~\AppData\Local\Programs\Python\Python37\lib\site-packages\fastai\torch_core.py in __torch_function__(self, func, types, args, kwargs)
    338         convert=False
    339         if _torch_handled(args, self._opt, func): convert,types = type(self),(torch.Tensor,)
--> 340         res = super().__torch_function__(func, types, args=args, kwargs=kwargs)
    341         if convert: res = convert(res)
    342         if isinstance(res, TensorBase): res.set_meta(self, as_copy=True)

~\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\_tensor.py in __torch_function__(cls, func, types, args, kwargs)
   1140 
   1141         with _C.DisableTorchFunction():
-> 1142             ret = func(*args, **kwargs)
   1143             if func in get_default_nowrap_functions():
   1144                 return ret

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

I just had a thought that reshaping the BBox in the item transform which happens before the batch transforms may break some of the aug_transforms if you are using any of the ones that modify the shape of the images at all like warping, rotation, etc. You may want to keep the 2x4 BBox shape as it was originally in custom_bb_pad and instead modify the shape in the batch transform as your last transform after Normalize.from_stats(*imagenet_stats). You should be able to view the batch by calling one_batch. I think this may fix the show_batch as well, but if not I believe you should be able to manually decode the results of one_batch to make sure it’s working properly.

class BBoxReshape(DisplayedTransform):
    "Normalize/denorm batch of `TensorImage`"
    parameters,order = L(),100
    def __init__(self): 
        noop

    def setups(self, dl:DataLoader):
        noop

    def encodes(self, x:TensorBBox): return torch.reshape(x,(x.shape[0],8))
    def decodes(self, x:TensorBBox): return torch.reshape(x,(x.shape[0],2,4))
1 Like

The doubt I have is how do I use the class BboxReshape.
On the DataBlock object I have the following transformations:
batch_tfms= [*aug_transforms(do_flip=False, size=(120,160)), Normalize.from_stats(*imagenet_stats)]

How do I create an instance of BboxReshape to add it on batch_tfms?

Did you try?

batch_tfms= [*aug_transforms(do_flip=False, size=(120,160)), Normalize.from_stats(*imagenet_stats), BBoxReshape()]
or
batch_tfms= [*aug_transforms(do_flip=False, size=(120,160)), Normalize.from_stats(*imagenet_stats), BBoxReshape]

I don’t think it should matter in this case if you do the instantiation in the list or not because there is nothing being done in the init or setup methods.

When I asked you I hadn’t tried yet and after you replied to me, I changed the batch_tfms I had on the DataBlock object for the following:
batch_tfms= [*aug_transforms(do_flip=False, size=(120,160)), Normalize.from_stats(*imagenet_stats), BBoxReshape]. I didn’t get any error.

I inform you I managed to solve the error of show_batch function. As I told you I changed the batch_tfms function and also I changed the custom_bb_pad function I had created. Next, I post the current state of the custom_bb_pad function:


def custom_bb_pad(samples, pad_idx=0):
    "Function that collect `samples` of bboxes and adds padding with `pad_idx`."
    #samples = [(s[0], *clip_remove_empty(*s[1:])) for s in samples] # s[0] is a tuple of TensorImage & TensorBbox, TensorBbox size is (2,4)
    max_len = max([len(s[1]) for s in samples]) # equals to 4 (number of bbox coordinates)
    def _f(img,bbox):
        bbox = torch.cat([bbox,bbox.new_zeros(max_len-bbox.shape[0], 4)])        
        return img,bbox
    return [_f(*s) for s in samples]

Due to the fact that you told me to keep the bbox shape at 2x4 on the bb_pad_function I added the line “bbox = torch.cat([bbox,bbox.new_zeros(max_len-bbox.shape[0], 4)])” of the original function (bb_pad function) that I had removed.

Now I can run the notebook with no errors until the line “learn.export(‘trained_roi_detetor_resnet_export.pkl’)” where I get the error: “PicklingError: Can’t pickle <function at 0x0000020C8F432708>: attribute lookup on main failed”.
I’ve made research about this error and from what I’ve understood it’s produced because the learn.export function isn’t compatible with the use of lambda functions in the code.

I tried the solution proposed by user asoellinger on the topic:
Learn.export() failing "PicklingError: Can't pickle <function <lambda> at 0x7f60e83e6bf8>: attribute lookup <lambda> on __main__ failed" - #3 by andandandand.
I installed “dill” package, added the line “import dill” and converted the export function to: learn.export(‘trained_roi_detetor_resnet_export.pkl’, pickle_protocol=dill).
I got an error saying “TypeError: an integer is required (got type module)”.

As this solution didn’t worked for me, I tried to find an alternative to the use of lambda. The line that is causing the error of the export function is “get_y=[lambda o: get_bboxes(o.name)]” from the DataBlock object.
So, I changed “get_y=[lambda o: get_bboxes(o.name)]” to “get_y=[get_bboxes(o.name) for o in get_image_files]” but I got an error saying “TypeError: ‘function’ object is not iterable”.

I don’t know how to solve the error associated to the learn.export() function.

Plus, I want to ask you if it’s normal that the status bar of the epochs completed shown as output to the learn.lr_find doesn’t reach the 100% when the execution of the lr_find function ends.
It happens only on the second time I use learn.lr_find, it’s after calling learn.unfreeze(). The first time I use learn.lr_find() it’s after calling learn.freeze(-1) and in this time the output at the end of lr_find execution doesn’t show any status bar.
The following image I post shows the issue of the status bar I mentioned before:

But I’ve to say that in both learn.lr_find() calls the tables shown as output to fit_one_cycle are full. On the first part that is after freezing the model, when I call fit_one_cycle(n_epochs=100) the table has 100 entries and on the second part that is after unfreezing the model I call fit_one_cycle(n_epochs=200) and the output’s table has 200 entries. I say this because I thought that on the second part if the status bar of the epochs completed didn’t reached the 100% maybe the output’s table of fit_one_cycle wouldn’t be full, but it’s not the case.

I hope you know how I can solve the error of learn.export() function and I hope you can also help me understand if it’s a problem that the status bar of the epochs completed doesn’t reach the 100% or if it’s normal.

Thank you so much for all your help. I’m glad that at least today the error of the show_batch function got fixed, although as you can notice I’m dealing with a lot of errors due to the fact that I’m adapting the notebook of my professors that was done to create just one 1 bbox for each image whereas for my project I need to create 2 bboxes for each image. Hopefully I can make the whole notebook functional soon.

get_y doesn’t need to be a list of functions anymore because you only have 1 dependent varible, the bounding boxes. You no longer have the classes for the bounding boxes. You can just modify your get_bboxes code so the parameter of your function pulls the name property from the pathlib object which is what is being passed

def get_bboxes(f):
  f = f.name
  ...
...
get_y=get_bboxes,
...
1 Like

Hopefully my last answer gets you past your learn.export error. If all you’re doing is experimentation, you can just use the learn.save and learn.load functions Learner, Metrics, and Basic Callbacks | fastai . Those functions save and load the model weights so you can refer to them later. learn.export is more for packaging up your model for inference in a more production like setting as is not needed if all you’re doing is experimenting and want to save your model weights for later. You can still run inference from a saved model (from learn.save), it’s just slightly slower to get up and running and takes more resources than the learn.export function, but that’s only important if you’re setting your model up to run inference and conserving resources and startup time is important.

This is normal. The LR finder is essentially trying a series of learning rates to try and give you a good starting point for a learning rate for you to use when training. It does this by trying a new (ever incresing) learning rate per batch and recording the loss. At some point when the learning rate is too high, the loss will explode and when the lr finder detects that the loss is exploding it stops because it knows the learning rate is too high so there is no point in continuing to try even higher learning rates.

1 Like

No problem! Good luck on your project. Hopefully you’re getting close to getting everything working.

It’s helpful to others when you find a post or reply helpful to click on the heart icon at the bottom of the post. This lets others know that the post or reply was helpful to you and therefor may be helpful to them.

I applied your proposed code and now that error of learn.export() has been solved.

1 Like

For the moment, I’ll keep the learn.export() function from the original code I have after calling learn.save(). If after showing the current code to my professors they tell me it’s not necessary to use learn.export() I’ll remove it.

Alright.

I agree, now all the posts you made that have helped me solve all the errors have a like.

1 Like

To clarify it, I want to communicate to @idraja & @muellerzr who I asked for help on this topic, that the problems for which I contacted them have been solved by applying the solutions @matdmiller proposed to me.

Hi @matdmiller,

I’ve been advancing the project for which I created this topic. To put you in context my project is about the analysis of lungs in X-ray images to detect COVID-19.
I’ve trained a lungs detection model.
Now I need to apply inference to predict the bounding boxes on images of a different dataset.
When I tried to execute the function “model.predict” I got an error related to the “custom_bb_pad” function. It’s a function you suggested me to implement to fix an issue I was having related to a mismatch of shapes between the CNN output (shape 1x8) and the target bounding boxes (shape 2x4).

I debugged the function “custom_bb_pad” to check what it was doing but I didn’t notice any change on the shape of the “bbox” parameter at the end of the debugging.
As I didn’t notice “custom_bb_pad” was necessary I commented it. After running the notebook I’ve been able to train the lungs detection model and to get the same metrics results (IoU) as when “custom_bb_pad” was uncommented. Plus, if I run model.predict() I no longer have an error.

I’d like to know why should I need to use “custom_bb_pad”.

Next, I show you the code you sent me to make matchable the shapes of the CNN model and the target bounding boxes.

Thank you.

# Cell
class NoLabelBBoxLabeler(Transform):
    """ Bounding box labeler with no label """
    def setups(self, x): noop
    def decode (self, x, **kwargs):
        self.bbox,self.lbls = None,None
        return self._call('decodes', x, **kwargs)

    def decodes(self, x:TensorBBox):
        self.bbox = x
        return self.bbox if self.lbls is None else LabeledBBox(self.bbox, self.lbls)
'''
def custom_bb_pad(samples, pad_idx=0):
    "Function that collect `samples` of bboxes and adds padding with `pad_idx`."
    # 'samples' is a list that contains a tuple with 2 elements: a TensorImage & a TensorBBox. TensorBBox size is (2,4)
    #   TensorImage size is [3, width, height]
    max_len = max([len(s[1]) for s in samples]) # s[1] is a TensorBBox. max_len equals to 2 (number of bboxes associated to a TensorBBox)
    def _f(img,bbox): # img is a TensorImage, bbox is a TensorBBox
        bbox = torch.cat([bbox,bbox.new_zeros(max_len-bbox.shape[0], 4)])        
        return img,bbox
    return [_f(*s) for s in samples] # _f function receives a TensorImage as first parameter (img) and a TensorBBox as second parameter (bbox)

CustomBboxBlock = TransformBlock(type_tfms=TensorBBox.create, 
                             item_tfms=[PointScaler, NoLabelBBoxLabeler], dls_kwargs = {'before_batch': custom_bb_pad})               
'''

CustomBboxBlock = TransformBlock(type_tfms=TensorBBox.create, 
                             item_tfms=[PointScaler, NoLabelBBoxLabeler])               

class BBoxReshape(DisplayedTransform):
    "Normalize/denorm batch of `TensorImage`"
    parameters,order = L(),100
    def __init__(self): 
        noop

    def setups(self, dl:DataLoader):
        noop

    def encodes(self, x:TensorBBox): return torch.reshape(x,(x.shape[0],8)) # x.shape original is (64,2,4) and it gets converted to (64,8)
    def decodes(self, x:TensorBBox): return torch.reshape(x,(x.shape[0],2,4)) # x.shape original is (64,8) and it gets converted to (64,2,4)```

The reshaping was moved here.

Because you always have exactly 2 bounding boxes, you probably don’t.

1 Like

Sorry for the delay.

Alright.
From the experiments I’ve done I noticed I don’t need to use the “custom_bb_pad” function.