Fit_one_cycle throws up PIL bug with some images

I’m trying to build an Indian Snake classifier using this dataset which I had pulled from github. I’ve just started with lesson 1 and decided to get my hands dirty with what I’ve learnt.

I managed to use ImageDataBunch to get the data but I encountered an error when I do learn.fit_one_cycle. The error happened in the first epoch and throws up the falling error logs:

---------------------------------------------------------------------------
UnboundLocalError                         Traceback (most recent call last)
<ipython-input-16-495233eaf2b4> in <module>
----> 1 learn.fit_one_cycle(4)

/opt/anaconda3/lib/python3.7/site-packages/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, final_div, wd, callbacks, tot_epochs, start_epoch)
     20     callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor, pct_start=pct_start,
     21                                        final_div=final_div, tot_epochs=tot_epochs, start_epoch=start_epoch))
---> 22     learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
     23  
     24 def lr_find(learn:Learner, start_lr:Floats=1e-7, end_lr:Floats=10, num_it:int=100, stop_div:bool=True, wd:float=None):

/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    200         callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
    201         self.cb_fns_registered = True
--> 202         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    203 
    204     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
     97             cb_handler.set_dl(learn.data.train_dl)
     98             cb_handler.on_epoch_begin()
---> 99             for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
    100                 xb, yb = cb_handler.on_batch_begin(xb, yb) 
    101                 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)

/opt/anaconda3/lib/python3.7/site-packages/fastprogress/fastprogress.py in __iter__(self)
     70         self.update(0)
     71         try:
---> 72             for i,o in enumerate(self._gen):
     73                 if i >= self.total: break
     74                 yield o

/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_data.py in __iter__(self)
     73     def __iter__(self):
     74         "Process and returns items from `DataLoader`."
---> 75         for b in self.dl: yield self.proc_batch(b)
     76  
     77     @classmethod

/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    799             if len(self.task_info[self.rcvd_idx]) == 2:
    800                 data = self.task_info.pop(self.rcvd_idx)[1]
--> 801                 return self._process_data(data)
    802 
    803             assert not self.shutdown and self.tasks_outstanding > 0 

/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
    844         self._try_put_index()
    845         if isinstance(data, ExceptionWrapper):
--> 846             data.reraise()
    847         return data
    848 


/opt/anaconda3/lib/python3.7/site-packages/torch/_utils.py in reraise(self)
    367             # (https://bugs.python.org/issue2651), so we work around it.
    368             msg = KeyErrorMessage(msg)
--> 369         raise self.exc_type(msg)

UnboundLocalError: Caught UnboundLocalError in DataLoader worker process 7.
Original Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py", line 648, in __getitem__
    if self.item is None: x,y = self.x[idxs],self.y[idxs]
  File "/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py", line 118, in __getitem__
    if isinstance(idxs, Integral): return self.get(idxs)
  File "/opt/anaconda3/lib/python3.7/site-packages/fastai/vision/data.py", line 271, in get
    res = self.open(fn)
  File "/opt/anaconda3/lib/python3.7/site-packages/fastai/vision/data.py", line 267, in open
    return open_image(fn, convert_mode=self.convert_mode, after_open=self.after_open)
  File "/opt/anaconda3/lib/python3.7/site-packages/fastai/vision/image.py", line 393, in open_image
    x = PIL.Image.open(fn).convert(convert_mode)
  File "/opt/anaconda3/lib/python3.7/site-packages/PIL/Image.py", line 2690, in open
    im = _open_core(fp, filename, prefix)
  File "/opt/anaconda3/lib/python3.7/site-packages/PIL/Image.py", line 2676, in _open_core
    im = factory(fp, filename)
  File "/opt/anaconda3/lib/python3.7/site-packages/PIL/JpegImagePlugin.py", line 783, in jpeg_factory
    im = JpegImageFile(fp, filename)
  File "/opt/anaconda3/lib/python3.7/site-packages/PIL/ImageFile.py", line 103, in __init__
    self._open()
  File "/opt/anaconda3/lib/python3.7/site-packages/PIL/JpegImagePlugin.py", line 373, in _open
    handler(self, i)
  File "/opt/anaconda3/lib/python3.7/site-packages/PIL/JpegImagePlugin.py", line 139, in APP
    self.info["photoshop"] = photoshop
UnboundLocalError: local variable 'photoshop' referenced before assignment

A bit of probing leads me to this PR which seems to address the same problem.

Since I’m new to all of this I edited /opt/anaconda3/lib/python3.7/site-packages/PIL/JpegImagePlugin.py line 139 with the fix which is a simple indentation issue and it works.

Can someone from the community let me know if my analysis is correct?

I tried seeing the pil packages in the GCP instance and I got the following:

jupyter@my-fastai-instance:~$ conda list PIL
# packages in environment at /opt/anaconda3:
#
# Name                    Version                   Build  Channel
pillow                    6.1.0            py37h34e0f95_0  
pillow-simd               6.0.0.post0              pypi_0    pypi

I think the fix should be there in 6.1.0 but I could be wrong.