Losing images after cleaning (errors)

After I run the cleaner
cleaner = ImageClassifierCleaner(learn)

I delete some files from the training and validation sets. Which is fine. But every step after this then fails. This is the error from re-running my learner

learn = vision_learner(dls, resnet18, metrics=error_rate)

FileNotFoundError Traceback (most recent call last)
Cell In[57], line 2
1 learn = vision_learner(dls, resnet18, metrics=error_rate)
----> 2 learn.fine_tune(4)

File /opt/conda/lib/python3.10/site-packages/fastai/callback/schedule.py:165, in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
163 “Fine tune with Learner.freeze for freeze_epochs, then with Learner.unfreeze for epochs, using discriminative LR.”
164 self.freeze()
→ 165 self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
166 base_lr /= 2
167 self.unfreeze()

File /opt/conda/lib/python3.10/site-packages/fastai/callback/schedule.py:119, in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt, start_epoch)
116 lr_max = np.array([h[‘lr’] for h in self.opt.hypers])
117 scheds = {‘lr’: combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
118 ‘mom’: combined_cos(pct_start, *(self.moms if moms is None else moms))}
→ 119 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd, start_epoch=start_epoch)

File /opt/conda/lib/python3.10/site-packages/fastai/learner.py:264, in Learner.fit(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)
262 self.opt.set_hypers(lr=self.lr if lr is None else lr)
263 self.n_epoch = n_epoch
→ 264 self._with_events(self._do_fit, ‘fit’, CancelFitException, self._end_cleanup)

File /opt/conda/lib/python3.10/site-packages/fastai/learner.py:199, in Learner.with_events(self, f, event_type, ex, final)
198 def with_events(self, f, event_type, ex, final=noop):
→ 199 try: self(f’before
{event_type}'); f()
200 except ex: self(f’after_cancel
201 self(f’after_{event_type}’); final()

File /opt/conda/lib/python3.10/site-packages/fastai/learner.py:247, in Learner._do_epoch(self)
246 def _do_epoch(self):
→ 247 self._do_epoch_train()
248 self._do_epoch_validate()

File /opt/conda/lib/python3.10/site-packages/fastai/learner.py:239, in Learner._do_epoch_train(self)
237 def _do_epoch_train(self):
238 self.dl = self.dls.train
→ 239 self._with_events(self.all_batches, ‘train’, CancelTrainException)

File /opt/conda/lib/python3.10/site-packages/fastai/learner.py:199, in Learner.with_events(self, f, event_type, ex, final)
198 def with_events(self, f, event_type, ex, final=noop):
→ 199 try: self(f’before
{event_type}'); f()
200 except ex: self(f’after_cancel
201 self(f’after_{event_type}’); final()

File /opt/conda/lib/python3.10/site-packages/fastai/learner.py:205, in Learner.all_batches(self)
203 def all_batches(self):
204 self.n_iter = len(self.dl)
→ 205 for o in enumerate(self.dl): self.one_batch(*o)

File /opt/conda/lib/python3.10/site-packages/fastai/data/load.py:127, in DataLoader.iter(self)
125 self.before_iter()
126 self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
→ 127 for b in _loadersself.fake_l.num_workers==0:
128 # pin_memory causes tuples to be converted to lists, so convert them back to tuples
129 if self.pin_memory and type(b) == list: b = tuple(b)
130 if self.device is not None: b = to_device(b, self.device)

File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:634, in _BaseDataLoaderIter.next(self)
631 if self._sampler_iter is None:
632 # TODO(Bug in dataloader iterator found by mypy · Issue #76750 · pytorch/pytorch · GitHub)
633 self._reset() # type: ignore[call-arg]
→ 634 data = self._next_data()
635 self._num_yielded += 1
636 if self._dataset_kind == _DatasetKind.Iterable and
637 self._IterableDataset_len_called is not None and
638 self._num_yielded > self._IterableDataset_len_called:

File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:1346, in _MultiProcessingDataLoaderIter._next_data(self)
1344 else:
1345 del self._task_info[idx]
→ 1346 return self._process_data(data)

File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:1372, in _MultiProcessingDataLoaderIter._process_data(self, data)
1370 self._try_put_index()
1371 if isinstance(data, ExceptionWrapper):
→ 1372 data.reraise()
1373 return data

File /opt/conda/lib/python3.10/site-packages/torch/_utils.py:644, in ExceptionWrapper.reraise(self)
640 except TypeError:
641 # If the exception takes multiple arguments, don’t try to
642 # instantiate since we don’t know how to
643 raise RuntimeError(msg) from None
→ 644 raise exception

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
File “/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py”, line 308, in _worker_loop
data = fetcher.fetch(index)
File “/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py”, line 41, in fetch
data = next(self.dataset_iter)
File “/opt/conda/lib/python3.10/site-packages/fastai/data/load.py”, line 138, in create_batches
yield from map(self.do_batch, self.chunkify(res))
File “/opt/conda/lib/python3.10/site-packages/fastcore/basics.py”, line 230, in chunked
res = list(itertools.islice(it, chunk_sz))
File “/opt/conda/lib/python3.10/site-packages/fastai/data/load.py”, line 153, in do_item
try: return self.after_item(self.create_item(s))
File “/opt/conda/lib/python3.10/site-packages/fastai/data/load.py”, line 160, in create_item
if self.indexed: return self.dataset[s or 0]
File “/opt/conda/lib/python3.10/site-packages/fastai/data/core.py”, line 458, in getitem
res = tuple([tl[it] for tl in self.tls])
File “/opt/conda/lib/python3.10/site-packages/fastai/data/core.py”, line 458, in
res = tuple([tl[it] for tl in self.tls])
File “/opt/conda/lib/python3.10/site-packages/fastai/data/core.py”, line 417, in getitem
return self._after_item(res) if is_indexer(idx) else res.map(self._after_item)
File “/opt/conda/lib/python3.10/site-packages/fastai/data/core.py”, line 377, in _after_item
def _after_item(self, o): return self.tfms(o)
File “/opt/conda/lib/python3.10/site-packages/fastcore/transform.py”, line 208, in call
def call(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
File “/opt/conda/lib/python3.10/site-packages/fastcore/transform.py”, line 158, in compose_tfms
x = f(x, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/fastcore/transform.py”, line 81, in call
def call(self, x, **kwargs): return self._call(‘encodes’, x, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/fastcore/transform.py”, line 91, in _call
return self._do_call(getattr(self, fn), x, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/fastcore/transform.py”, line 97, in _do_call
return retain_type(f(x, **kwargs), x, ret)
File “/opt/conda/lib/python3.10/site-packages/fastcore/dispatch.py”, line 120, in call
return f(*args, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/fastai/vision/core.py”, line 125, in create
return cls(load_image(fn, **merge(cls._open_args, kwargs)))
File “/opt/conda/lib/python3.10/site-packages/fastai/vision/core.py”, line 98, in load_image
im = Image.open(fn)
File “/opt/conda/lib/python3.10/site-packages/PIL/Image.py”, line 3236, in open
fp = builtins.open(filename, “rb”)
FileNotFoundError: [Errno 2] No such file or directory: ‘/kaggle/working/dogs/big/b101214a-a983-4903-8fc3-234868c5a176.jpg’

Issue is the image is gone. The cleaner edits the ‘path’ so you need to rerun the part where it creates your data learning set from the path
dls = bears.dataloaders(path)
learn = vision_learner(dls, resnet18, metrics=error_rate)