Error using TfmdDS (Image classification)

Hello everyone!

I’m learning v2 along with code walk-thrus from Jeremy. It’s beautiful how much thinking and how many iterations went into writing v2. As a pet project, I wanted to do a bit of Image classification myself and have created a List of items as shown

and the transforms are as below

from PIL import Image
def open_img(fn:Path)->Image.Image: return Image.open(fn)
def _rescale_and_pad(im:Image.Image, sz)->Image.Image:
    if isinstance(sz, int): sz = (sz,sz)
    w,h = im.size
    W,H = sz
    f = min(W/w, H/h)
    _w,_h = int(w*f), int(h*f)
    im = im.resize((_w,_h))
    IM = Image.new("L", sz, color=255)
    IM.paste(im, im.getbbox())
    return IM

def RescaleAndPad(size):
    'return a `rescale` function with said size'
    _inner = lambda im: _rescale_and_pad(im, size)
    return _inner

tfms = [[operator.itemgetter(0), open_img, Rescale((300,40)), img2tensor, Cuda()],
        [operator.itemgetter(1), Categorize()]]
train_ds = TfmdDS(train, tfms)

However when I call train_ds[0] I get the following stack trace (at end of this post which kinda looks scary) and not sure where I’m doing wrong. Code works fine if I cut down the train transforms to

tfms = [[operator.itemgetter(0), open_img], 
        [operator.itemgetter(1), Categorize()]

So I inferred it has to do something with RescaleAndPad transform but it works fine if I run it step by step. Please help.

Stack Trace

AttributeError                            Traceback (most recent call last)
<ipython-input-28-856230469917> in <module>
----> 1 train_ds[0]

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/core.py in __getitem__(self, idx)
    260         super().__init__(items)
    261 
--> 262     def __getitem__(self, idx): return L(self._gets(idx), use_list=None) if is_iter(idx) else self._get(idx)
    263     def _get(self, i): return getattr(self.items,'iloc',self.items)[i]
    264     def _gets(self, i):

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in _get(self, it)
    155         self.tls = [TfmdList(items, t, do_setup=do_setup, filt=filt, use_list=use_list) for t in L(tfms)]
    156 
--> 157     def _get(self, it): return tuple(tl._get(it) for tl in self.tls)
    158     def __repr__(self): return coll_repr(self)
    159     def decode(self, o): return tuple(it.decode(o_) for o_,it in zip(o,self.tls))

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in <genexpr>(.0)
    155         self.tls = [TfmdList(items, t, do_setup=do_setup, filt=filt, use_list=use_list) for t in L(tfms)]
    156 
--> 157     def _get(self, it): return tuple(tl._get(it) for tl in self.tls)
    158     def __repr__(self): return coll_repr(self)
    159     def decode(self, o): return tuple(it.decode(o_) for o_,it in zip(o,self.tls))

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in _get(self, i)
    133 
    134     def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, use_list=None, filt=self.filt)
--> 135     def _get (self, i): return self.tfms(super()._get(i))
    136     def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
    137 

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in __call__(self, o)
     93         self.fs.append(t)
     94 
---> 95     def __call__(self, o): return compose_tfms(o, tfms=self.fs, filt=self.filt)
     96     def decode  (self, o): return compose_tfms(o, tfms=self.fs, is_enc=False, reverse=True, filt=self.filt)
     97     def __repr__(self): return f"Pipeline: {self.fs}"

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
     55     for f in tfms:
     56         if not is_enc: f = f.decode
---> 57         x = f(x, **kwargs)
     58     return x
     59 

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/transform.py in __call__(self, x, **kwargs)
    149     @property
    150     def use_as_item(self): return ifnone(self.as_item_force, self.as_item)
--> 151     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
    152     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
    153     def __repr__(self): return f'{self.__class__.__name__}: {self.use_as_item} {self.encodes} {self.decodes}'

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/transform.py in _call(self, fn, x, filt, **kwargs)
    156         if filt!=self.filt and self.filt is not None: return x
    157         f = getattr(self, fn)
--> 158         if self.use_as_item: return self._do_call(f, x, **kwargs)
    159         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
    160         return retain_type(res, x)

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/transform.py in _do_call(self, f, x, **kwargs)
    161 
    162     def _do_call(self, f, x, **kwargs):
--> 163         return x if f is None else retain_type(f(x, **kwargs), x, f.returns(x))
    164 
    165 add_docs(Transform, decode="Delegate to `decodes` to undo transform")

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/core.py in retain_type(new, old, typ)
    525         typ = old if isinstance(old,type) else type(old)
    526     # Do nothing the new type is already an instance of requested type (i.e. same type)
--> 527     return typ(new) if typ!=NoneType and not isinstance(new, typ) else new
    528 
    529 #Cell 170

~usr/local/lib/python3.6/dist-packages/PIL/ImageFile.py in __init__(self, fp, filename)
    101 
    102         try:
--> 103             self._open()
    104         except (IndexError,  # end of data
    105                 TypeError,  # end of data (ord)

~usr/local/lib/python3.6/dist-packages/PIL/TiffImagePlugin.py in _open(self)
    967 
    968         # Header
--> 969         ifh = self.fp.read(8)
    970 
    971         # image file directory (tag dictionary)

AttributeError: 'Image' object has no attribute 'read'

This is a followup error for the same code. If I do cut down the transforms, I am able to index into the TfmdDS fine.

But there’s an error when I call

ix = 0
train_ds.show_at(ix, cmap="gray", figsize=(10,4))

Stack trace

TypeError                                 Traceback (most recent call last)
<ipython-input-11-bae04c8be184> in <module>
      1 ix = random.choice(range(len(train_ds)))
      2 ix = 0
----> 3 train_ds.show_at(ix, cmap="gray", figsize=(10,4))

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in show_at(self, idx, **kwargs)
    120     def subset(self, idxs): return self._new(super()._gets(idxs))
    121     def decode_at(self, idx): return self.decode(self[idx])
--> 122     def show_at(self, idx, **kwargs): return self.show(self[idx], **kwargs)
    123 
    124 #Cell 59

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in show(self, o, ctx, **kwargs)
    159     def decode(self, o): return tuple(it.decode(o_) for o_,it in zip(o,self.tls))
    160     def show(self, o, ctx=None, **kwargs):
--> 161         for o_,it in zip(o,self.tls): ctx = it.show(o_, ctx=ctx, **kwargs)
    162         return ctx
    163 

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in show(self, o, **kwargs)
    137 
    138     # Delegating to `self.tfms`
--> 139     def show(self, o, **kwargs): return self.tfms.show(o, **kwargs)
    140     def setup(self): self.tfms.setup(self)
    141     def decode(self, x, **kwargs): return self.tfms.decode(x, **kwargs)

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in show(self, o, ctx, **kwargs)
    103     def show(self, o, ctx=None, **kwargs):
    104         for f in reversed(self.fs):
--> 105             res = self._show(o, ctx, **kwargs)
    106             if res is not None: return res
    107             o = f.decode(o, filt=self.filt)

~notebooks/data/OCR/MAIN/tmp/fastai_dev/dev/local/data/pipeline.py in _show(self, o, ctx, **kwargs)
    111         o1 = [o] if self.as_item else o
    112         if not all(hasattr(o_, 'show') for o_ in o1): return
--> 113         for o_ in o1: ctx = o_.show(ctx=ctx, **kwargs)
    114         return 1 if ctx is None else ctx
    115 

TypeError: show() got an unexpected keyword argument 'ctx'

Any help would be appreciated.

Okay I fixed the error. In an older notebook I had open_img as

def open_img(fn:Path)->Image.Image: return Image.open(fn).copy()

instead of

def open_img(fn:Path)->Image.Image: return Image.open(fn)

So I reused the first definition instead of the second (as is the case in the first post) and it fixed both the errors. I’m curious why I used .copy() and what makes the error go away :thinking::thinking:

You should use load_image. But actually you don’t need the return type annotation at all (because you’re not changing the type that the method returns) - and removing it should fix the problem AFAICT.

The source of the problem is that the constructor for PIL.Image.Image when passed an Image assumes that the image is fully loaded, but Image.open only reads the image metadata.