Error with databunch.add_test_folder("test")

Has anyone had success with add_test_folder?

I am able to create a databunch using

data = ImageDataBunch.from_folder(path, bs=bs)

but when i try to run

data.add_test_folder(“test”)

with a folder under path called test that contains images i get this error:

data.add_test_folder(“test”)
File “/home/matt/src/fastai/fastai/basic_data.py”, line 98, in getattr
def getattr(self,k:int)->Any: return getattr(self.train_dl, k)
File “/home/matt/src/fastai/fastai/basic_data.py”, line 24, in getattr
def getattr(self,k:str)->Any: return getattr(self.dl, k)
File “/home/matt/src/fastai/fastai/basic_data.py”, line 8, in DataLoader___getattr__
def DataLoader___getattr__(dl, k:str)->Any: return getattr(dl.dataset, k)
File “/home/matt/src/fastai/fastai/data_block.py”, line 377, in getattr
return res if res is not None else getattr(self.y, k)
AttributeError: ‘CategoryList’ object has no attribute ‘add_test_folder’

Doesn’t make sense to me.

I’m using 1.0.25

Could you show all the code of what you’re running? Here’s an example from our test suite fyi:

def test_vision_datasets():
    sds = (ImageItemList.from_folder(untar_data(URLs.MNIST_TINY))
           .split_by_idx([0])
           .label_from_folder()
           .add_test_folder())
    assert np.array_equal(sds.train.classes, sds.valid.classes), 'train/valid classes same'
    assert len(sds.test)==20, "test_ds is correct size"

hmm…I have a lot of code but when i boil it down its really only running these 2 lines:

data = ImageDataBunch.from_folder(path, bs=bs)
data.add_test_folder(“test”)

Here is a dropbox link so you can look around and try it out:
Removed link

I was actually able to replicate the bug with just this code:

error:

In theory you should be able to download that whole dropbox folder and replicate the problem. or point me to my error…

thanks

Oh you’re trying to add a test folder to a DataBunch? I don’t think you can do that. You’ll need to either use the data blocks API, or pass ‘test=’ to your databunch init.

I tried to create a databunch by using ImageItemList by following the documentation
(https://docs.fast.ai/data_block.html)

Here are the extract:

I could create dataloader for train_dl and train_ds but not test_dl and test_ds.

The full error message for data is:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/PIL/Image.py in open(fp, mode)
   2612     try:
-> 2613         fp.seek(0)
   2614     except (AttributeError, io.UnsupportedOperation):

AttributeError: 'Image' object has no attribute 'seek'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<timed exec> in <module>

~/fastai/fastai/data_block.py in add_test_folder(self, test_folder, label)
    380         "Add test set containing items from folder `test_folder` and an arbitrary `label`."
    381         items = self.x.__class__.from_folder(self.path/test_folder)
--> 382         return self.add_test(items.items, label=label)
    383 
    384 class LabelList(Dataset):

~/fastai/fastai/data_block.py in add_test(self, items, label)
    374         labels = [label for _ in range_of(items)]
    375         if isinstance(items, ItemList): self.test = self.valid.new(items.items, labels, xtra=items.xtra)
--> 376         else: self.test = self.valid.new(items, labels)
    377         return self
    378 

~/fastai/fastai/data_block.py in new(self, x, y, **kwargs)
    405             return self.__class__(x, y, tfms=self.tfms, tfm_y=self.tfm_y, **self.tfmargs)
    406         else:
--> 407             return self.new(self.x.new(x, **kwargs), self.y.new(y, **kwargs)).process()
    408 
    409     def __getattr__(self,k:str)->Any:

~/fastai/fastai/data_block.py in new(self, x, y, **kwargs)
    405             return self.__class__(x, y, tfms=self.tfms, tfm_y=self.tfm_y, **self.tfmargs)
    406         else:
--> 407             return self.new(self.x.new(x, **kwargs), self.y.new(y, **kwargs)).process()
    408 
    409     def __getattr__(self,k:str)->Any:

~/fastai/fastai/data_block.py in new(self, x, y, **kwargs)
    405             return self.__class__(x, y, tfms=self.tfms, tfm_y=self.tfm_y, **self.tfmargs)
    406         else:
--> 407             return self.new(self.x.new(x, **kwargs), self.y.new(y, **kwargs)).process()
    408 
    409     def __getattr__(self,k:str)->Any:

~/fastai/fastai/data_block.py in new(self, items, processor, **kwargs)
     77     def new(self, items:Iterator, processor:PreProcessor=None, **kwargs)->'ItemList':
     78         processor = ifnone(processor, self.processor)
---> 79         return self.__class__(items=items, processor=processor, path=self.path, x=self.x, **kwargs)
     80 
     81     def __getitem__(self,idxs:int)->Any:

~/fastai/fastai/data_block.py in __init__(self, items, path, label_cls, xtra, processor, x, **kwargs)
     43         self.path = Path(path)
     44         self.num_parts = len(self.path.parts)
---> 45         self.items,self.x = array(items, dtype=object),x
     46         self.label_cls,self.xtra,self.processor = ifnone(label_cls,self._label_cls),xtra,processor
     47         self._label_list,self._split = LabelList,ItemLists

~/fastai/fastai/core.py in array(a, *args, **kwargs)
    218     "Same as `np.array` but also handles generators"
    219     if not isinstance(a, collections.Sized): a = list(a)
--> 220     return np.array(a, *args, **kwargs)
    221 
    222 class Category(ItemBase):

~/fastai/fastai/data_block.py in __getitem__(self, idxs)
     80 
     81     def __getitem__(self,idxs:int)->Any:
---> 82         if isinstance(try_int(idxs), int): return self.get(idxs)
     83         else: return self.new(self.items[idxs], xtra=index_row(self.xtra, idxs))
     84 

~/fastai/fastai/vision/data.py in get(self, i)
    288     def get(self, i):
    289         fn = super().get(i)
--> 290         res = self.open(fn)
    291         self.sizes[i] = res.size
    292         return res

~/fastai/fastai/vision/data.py in open(self, fn)
    284         self.sizes={}
    285 
--> 286     def open(self, fn): return open_image(fn)
    287 
    288     def get(self, i):

~/fastai/fastai/vision/image.py in open_image(fn, div, convert_mode, cls)
    440     "Return `Image` object created from image in file `fn`."
    441     #fn = getattr(fn, 'path', fn)
--> 442     x = PIL.Image.open(fn).convert(convert_mode)
    443     x = pil2tensor(x,np.float32)
    444     if div: x.div_(255)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/PIL/Image.py in open(fp, mode)
   2613         fp.seek(0)
   2614     except (AttributeError, io.UnsupportedOperation):
-> 2615         fp = io.BytesIO(fp.read())
   2616         exclusive_fp = True
   2617 

AttributeError: 'Image' object has no attribute 'read'

I have the “train” and “test” folders as required.

PS. I did git pull few minutes ago before running these codes (v.1.0.28).

1 Like

I don’t manage to reproduce your bug. Can you try your same code on our tiny mnist sample? (with path = untar_data(URLs.MNIST_SAMPLE)).

Thanks for looking into it.

src = (ImageItemList.from_csv(path,'labels.csv')
       .random_split_by_pct(valid_pct=0.10)  
       .label_from_df(cols='label')
       .add_test_folder('valid')  ##  see remarks below
       .transform(tfms, size=224))  

Remarks:

  • pertaining “valid” folder is “test” folder for MNIST_SAMPLE dataset. This also works by using .add_test_folder() only if folder is called “test”.

Then, data loader works for both training and test dataset.

Note: During the last two days, I did not do any conda env update for fastai library but I updated Linux packages. :thinking:

1 Like

hello I am sorry if commenting here but on fourm I asked question no one responding:
I am doing some multi label medical images daignosis task where I have images and dataframe.
Dataframe contains: img_name, set, label, bbx.
Set contains: Train, Valid and Test set
I am having problem to add test set of the dataset my dataloader is below:

def get_chestxray8(path:PathOrStr, bs:int, img_sz:int, valid_only_bbx:bool=False, tfms:bool=False, convert_mode:str='RGB',
                   normalize:bool=True, norm_stats:Tuple[Floats, Floats]=imagenet_stats, processor:Optional[Callable]=None,
                   **kwargs:Any)->DataBunch:
    '''
    TODO
    '''
    path = Path(path)
    df = pd.read_pickle(path/'full_ds_bbx.pkl')
    df['is_valid'] = df.set!='Train'
    if valid_only_bbx: df = df[(df.set=='Train')]

    if processor is not None: df = processor(df)

    lbl_dict = df[['file','label']].set_index('file')['label'].to_dict()
    def bbox_label_func(fn:str)->list: return lbl_dict[Path(fn).name]
    lbls = ['No finding', 'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Infiltration', 
    'Lung Opacity', 'Mass', 'Pleural effusion', 'Pleural thickening', 'Pneumothorax', 'Pulmonary fibrosis']


    src = (CustomObjectItemList.from_df(df, path / 'images', cols='file', convert_mode=convert_mode)
                               .split_from_df('is_valid')
                               .label_from_func(bbox_label_func, classes=lbls))

    if tfms: src = src.transform(get_transforms(**kwargs), size=img_sz, tfm_y=True)

    data =  src.databunch(bs=bs, collate_fn=multiclass_bb_pad_collate)
    if normalize: data = data.normalize(stats=norm_stats)

    return data

and I get this databunch, where test and valid combines and get me only valid.
I get my databunch like this:

ImageDataBunch;

Train: LabelList (12662 items)
x: CustomObjectItemList
Image (3, 512, 512),Image (3, 512, 512),Image (3, 512, 512),Image (3, 512, 512),Image (3, 512, 512)
y: CustomObjectCategoryList
ImageBBox (512, 512),ImageBBox (512, 512),ImageBBox (512, 512),ImageBBox (512, 512),ImageBBox (512, 512)
Path: /home/ali/Desktop/CX Product/RpSalWeaklyDet/images;

Valid: LabelList (2129 items)
x: CustomObjectItemList
Image (3, 512, 512),Image (3, 512, 512),Image (3, 512, 512),Image (3, 512, 512),Image (3, 512, 512)
y: CustomObjectCategoryList
ImageBBox (512, 512),ImageBBox (512, 512),ImageBBox (512, 512),ImageBBox (512, 512),ImageBBox (512, 512)
Path: /home/ali/Desktop/CX Product/RpSalWeaklyDet/images;

Test: None

Please suggest how I would add test set too without combining test + valid.
Thanks