How to save the object detection dataset patches?

Hey everyone,
I want to enquire on how we can save the images that are created by the databunch.
I have created the databunch from the MS COCO style dataset with the .json file for annotations and images in a folder.
Here is the code for how I create my data bunch:

batch_size = 64

do_flip = True
flip_vert = True 
max_rotate = 90 
max_zoom = 1.1 
max_lighting = 0.2
max_warp = 0.2
p_affine = 0.75 
p_lighting = 0.75 

tfms = get_transforms(do_flip=do_flip,
                      flip_vert=flip_vert,
                      max_rotate=max_rotate,
                      max_zoom=max_zoom,
                      max_lighting=max_lighting,
                      max_warp=max_warp,
                      p_affine=p_affine,
                      p_lighting=p_lighting)
train, valid = ObjectItemListSlide(train_images) ,ObjectItemListSlide(valid_images)
item_list = ItemLists(".", train, valid)
lls = item_list.label_from_func(lambda x: x.y, label_cls=SlideObjectCategoryList)
lls = lls.transform(tfms, tfm_y=True, size=patch_size)
data = lls.databunch(bs=batch_size, collate_fn=bb_pad_collate,num_workers=0).normalize()

Now i tried saving the databunch using data.save but it throws ctype error.
The error:

ValueError                                Traceback (most recent call last)
<ipython-input-22-88cd1dd3e50b> in <module>
----> 1 data.save("test_data_bs8.pkl")

3 frames
/usr/local/lib/python3.7/dist-packages/fastai/basic_data.py in save(self, file)
    153             warn("Serializing the `DataBunch` only works when you created it using the data block API.")
    154             return
--> 155         try_save(self.label_list, self.path, file)
    156 
    157     def add_test(self, items:Iterator, label:Any=None, tfms=None, tfm_y=None)->None:

/usr/local/lib/python3.7/dist-packages/fastai/torch_core.py in try_save(state, path, file)
    414             #To avoid the warning that come from PyTorch about model not being checked
    415             warnings.simplefilter("ignore")
--> 416             torch.save(state, target)
    417     except OSError as e:
    418         raise Exception(f"{e}\n Can't write {path/file}. Pass an absolute writable pathlib obj `fname`.")

/usr/local/lib/python3.7/dist-packages/torch/serialization.py in save(obj, f, pickle_module, pickle_protocol, _use_new_zipfile_serialization)
    378         if _use_new_zipfile_serialization:
    379             with _open_zipfile_writer(opened_file) as opened_zipfile:
--> 380                 _save(obj, opened_zipfile, pickle_module, pickle_protocol)
    381                 return
    382         _legacy_save(obj, opened_file, pickle_module, pickle_protocol)

/usr/local/lib/python3.7/dist-packages/torch/serialization.py in _save(obj, zip_file, pickle_module, pickle_protocol)
    587     pickler = pickle_module.Pickler(data_buf, protocol=pickle_protocol)
    588     pickler.persistent_id = persistent_id
--> 589     pickler.dump(obj)
    590     data_value = data_buf.getvalue()
    591     zip_file.write_record('data.pkl', data_value, len(data_value))

ValueError: ctypes objects containing pointers cannot be pickled

Hi,

Is there any particular reason you are using an older version of fastai?
New version of fastai has DataBlocks.
It would be easier to help if you use an up to date version of fastai.

I am using the older version because it is the only one supporting the object detection library,
as shown here GitHub - ChristianMarzahl/ObjectDetection: Some experiments with object detection in PyTorch
Even if i upgrade my fastai version it wont be supported by the library created by Christian Marzhal for object detection in FASTAI.

Oh, okay.

I wish I can help, but I am not familiar with older version of fastai. Maybe somebody else can help.

Can you share how can we implement the same in fastai v2, I would be happy to shift to v2 if it is able to load my dataset.
Here is the link to the dataset I use:
https://imig.science/midog2021/download-dataset/
You can see in the notebook given at the bottom of the page that they use the object detection library by Christian Marzhal which is based on fastaiv1.
Thank you in advance,
Harshit

Have you checked out Data block tutorial?
Although I cannot help you with that, this tutorial may help.

1 Like

yes i have checked this tutorial and it gives me an error, maybe now you can help me.
Here is the code that I am using to create dataloader for my dataset:

coco = DataBlock(blocks=(ImageBlock, BBoxBlock, BBoxLblBlock),
                 get_items=get_image_files,
                 splitter=RandomSplitter(),
                 get_y=[lambda o: img2bbox[o.name][0], lambda o: img2bbox[o.name][1]], 
                 item_tfms=Resize(256),
                 batch_tfms=aug_transforms(),
                 n_inp=1)

dls = coco.dataloaders(midog_folder)
dls.show_batch(max_n=9)

I get this error:

KeyError                                  Traceback (most recent call last)
/tmp/ipykernel_28/2260717954.py in <module>
      7                  n_inp=1)
      8 
----> 9 dls = coco.dataloaders(midog_folder)
     10 dls.show_batch(max_n=9)

/opt/conda/lib/python3.7/site-packages/fastai/data/block.py in dataloaders(self, source, path, verbose, **kwargs)

/opt/conda/lib/python3.7/site-packages/fastai/data/block.py in datasets(self, source, verbose)

/opt/conda/lib/python3.7/site-packages/fastai/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)

/opt/conda/lib/python3.7/site-packages/fastai/data/core.py in <listcomp>(.0)

/opt/conda/lib/python3.7/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     96     def __call__(cls, x=None, *args, **kwargs):
     97         if not args and not kwargs and x is not None and isinstance(x,cls): return x
---> 98         return super().__call__(x, *args, **kwargs)
     99 
    100 # %% ../nbs/02_foundation.ipynb 46

/opt/conda/lib/python3.7/site-packages/fastai/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose, dl_type)

/opt/conda/lib/python3.7/site-packages/fastai/data/core.py in setup(self, train_setup)

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in setup(self, items, train_setup)
    198         tfms = self.fs[:]
    199         self.fs.clear()
--> 200         for t in tfms: self.add(t,items, train_setup)
    201 
    202     def add(self,ts, items=None, train_setup=False):

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in add(self, ts, items, train_setup)
    202     def add(self,ts, items=None, train_setup=False):
    203         if not is_listy(ts): ts=[ts]
--> 204         for t in ts: t.setup(items, train_setup)
    205         self.fs+=ts
    206         self.fs = self.fs.sorted(key='order')

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in setup(self, items, train_setup)
     85     def setup(self, items=None, train_setup=False):
     86         train_setup = train_setup if self.train_setup is None else self.train_setup
---> 87         return self.setups(getattr(items, 'train', items) if train_setup else items)
     88 
     89     def _call(self, fn, x, split_idx=None, **kwargs):

/opt/conda/lib/python3.7/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
    118         elif self.inst is not None: f = MethodType(f, self.inst)
    119         elif self.owner is not None: f = MethodType(f, self.owner)
--> 120         return f(*args, **kwargs)
    121 
    122     def __get__(self, inst, owner):

/opt/conda/lib/python3.7/site-packages/fastai/data/transforms.py in setups(self, dsets)

/opt/conda/lib/python3.7/site-packages/fastai/data/core.py in <genexpr>(.0)

/opt/conda/lib/python3.7/site-packages/fastai/data/core.py in __getitem__(self, idx)

/opt/conda/lib/python3.7/site-packages/fastai/data/core.py in _after_item(self, o)

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in __call__(self, o)
    206         self.fs = self.fs.sorted(key='order')
    207 
--> 208     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    209     def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
    210     def __getitem__(self,i): return self.fs[i]

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    156     for f in tfms:
    157         if not is_enc: f = f.decode
--> 158         x = f(x, **kwargs)
    159     return x
    160 

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in __call__(self, x, **kwargs)
     79     @property
     80     def name(self): return getattr(self, '_name', _get_name(self))
---> 81     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     82     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     83     def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
     89     def _call(self, fn, x, split_idx=None, **kwargs):
     90         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 91         return self._do_call(getattr(self, fn), x, **kwargs)
     92 
     93     def _do_call(self, f, x, **kwargs):

/opt/conda/lib/python3.7/site-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     95             if f is None: return x
     96             ret = f.returns(x) if hasattr(f,'returns') else None
---> 97             return retain_type(f(x, **kwargs), x, ret)
     98         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     99         return retain_type(res, x)

/opt/conda/lib/python3.7/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
    118         elif self.inst is not None: f = MethodType(f, self.inst)
    119         elif self.owner is not None: f = MethodType(f, self.owner)
--> 120         return f(*args, **kwargs)
    121 
    122     def __get__(self, inst, owner):

/tmp/ipykernel_28/2260717954.py in <lambda>(o)
      2                  get_items=get_image_files,
      3                  splitter=RandomSplitter(),
----> 4                  get_y=[lambda o: img2bbox[o.name][0], lambda o: img2bbox[o.name][1]],
      5                  item_tfms=Resize(256),
      6                  batch_tfms=aug_transforms(),

KeyError: 'mf.png'

I don’t know what this error signifies, can anyone tell me what it means and how can I load my object detection dataset?

P.S. I don’t have any images with the name mf.png, in fact all my images are in .tiff format.