Since this library is somewhat out of date and the book version is from 2020, I’ve been using ChatGPT to help with errors and troubleshooting. (Writing this in September 2025).
That’s where I learned of fiftyone. This is the code GPT spit out to get around the image cleaner issues.
import torch
import numpy as np
import fiftyone as fo
# 1) Get predictions + losses from your learner
probs, targets, losses = learn.get_preds(with_loss=True)
preds = probs.argmax(dim=1)
# 2) Sort by highest loss
topk = 50 # adjust how many you want to see
idxs = torch.topk(losses, topk).indices
# 3) Map indices back to file paths
fns = np.array(learn.dls.valid_ds.items)[idxs.cpu().numpy()]
true_lbls = targets[idxs].cpu().numpy()
pred_lbls = preds[idxs].cpu().numpy()
# 4) Build a FiftyOne dataset from just these samples
samples = []
for fn, tl, pl in zip(fns, true_lbls, pred_lbls):
samples.append(
fo.Sample(
filepath=str(fn),
ground_truth=fo.Classification(label=learn.dls.vocab[tl]),
prediction=fo.Classification(label=learn.dls.vocab[pl])
)
)
dataset = fo.Dataset("top_losses", overwrite=True)
dataset.add_samples(samples)
session = fo.launch_app(dataset)
session
And then after cleaning the images in the UI, you can run this code to remove the samples marked for deletion:
from pathlib import Path
import shutil
# Either use fastai's dataset root:
# root = Path(learn.dls.path)
# Or hardcode it:
root = Path(<root of path>)
# Optional safety: move deletions into a trash folder instead of permanently unlinking
trash_dir = root / "_trash"
trash_dir.mkdir(parents=True, exist_ok=True)
delete_count = 0
for sample in dataset: # 'dataset' is your FiftyOne dataset
if "delete" in (sample.tags or []):
src = Path(sample.filepath)
if src.exists():
# move to trash instead of permanent delete
dst = trash_dir / src.name
i = 1
while dst.exists():
dst = trash_dir / f"{src.stem}_{i}{src.suffix}"
i += 1
shutil.move(str(src), dst)
delete_count += 1
print(f"Moved {delete_count} images to {trash_dir}")
Now your “delete” images are no longer in the training folders. When you reload with:
dls = ImageDataLoaders.from_folder(root, valid_pct=0.2, seed=42, item_tfms=Resize(224))
And then optionally rebuild the dataset from disk to see your changes in the fiftyone UI.
import fiftyone as fo
clean_root = <dataset root>
dataset.delete() # if you want to replace it
dataset = fo.Dataset.from_dir(
dataset_dir=clean_root,
dataset_type=fo.types.ImageClassificationDirectoryTree,
)
session.dataset = dataset
And then resume running learn.export()
as normal in the next cell.