Is there a way to create a super simple quick and dirty item_tfms in fastai2?
I find it inconvenient in fastai2 to handle those PILImage objects, I can’t find a way to convert them to a tensor, modify it and re-convert it to PILImage without the dataloader crashing.
I just want to add simple random noise to my inputs but couldn’t find a simple way to do it.
You can pass any function you want as transform.
Well, passing something as simple as this does not work:
def RandomNoise(x):
if isinstance(x, PILImage):
res = np.array(x)
res += (np.random.randn(*res.shape)*10).astype(‘uint8’)
res = np.clip(res, 0, 255)
return res
else:
return x
I get this message:
“Could not do one pass in your dataloader, there is something wrong in it”
and then when asking for one_batch i get:
“IndexError: tuple index out of range”
As explained in multiple topics, no one can help you if you don’t post all the code you executed and the full stack traces for the errors.
Sorry about that.
Here is the full error trace I get:
IndexError Traceback (most recent call last)
<ipython-input-37-90634fcc3c9e> in <module>
----> 1 dls.show_batch()
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastai2/data/core.py in show_batch(self, b, max_n, ctxs, show, unique, **kwargs)
95 old_get_idxs = self.get_idxs
96 self.get_idxs = lambda: Inf.zeros
---> 97 if b is None: b = self.one_batch()
98 if not show: return self._pre_show_batch(b, max_n=max_n)
99 show_batch(*self._pre_show_batch(b, max_n=max_n), ctxs=ctxs, max_n=max_n, **kwargs)
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastai2/data/load.py in one_batch(self)
130 def one_batch(self):
131 if self.n is not None and len(self)==0: raise ValueError(f'This DataLoader does not contain any batches')
--> 132 with self.fake_l.no_multiproc(): res = first(self)
133 if hasattr(self, 'it'): delattr(self, 'it')
134 return res
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastcore/utils.py in first(x)
182 def first(x):
183 "First element of `x`, or None if missing"
--> 184 try: return next(iter(x))
185 except StopIteration: return None
186
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastai2/data/load.py in __iter__(self)
96 self.randomize()
97 self.before_iter()
---> 98 for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
99 if self.device is not None: b = to_device(b, self.device)
100 yield self.after_batch(b)
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
343
344 def __next__(self):
--> 345 data = self._next_data()
346 self._num_yielded += 1
347 if self._dataset_kind == _DatasetKind.Iterable and \
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
383 def _next_data(self):
384 index = self._next_index() # may raise StopIteration
--> 385 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
386 if self._pin_memory:
387 data = _utils.pin_memory.pin_memory(data)
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
32 raise StopIteration
33 else:
---> 34 data = next(self.dataset_iter)
35 return self.collate_fn(data)
36
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastai2/data/load.py in create_batches(self, samps)
105 self.it = iter(self.dataset) if self.dataset is not None else None
106 res = filter(lambda o:o is not None, map(self.do_item, samps))
--> 107 yield from map(self.do_batch, self.chunkify(res))
108
109 def new(self, dataset=None, cls=None, **kwargs):
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastcore/utils.py in chunked(it, cs, drop_last)
276 if not isinstance(it, Iterator): it = iter(it)
277 while True:
--> 278 res = list(itertools.islice(it, cs))
279 if res and (len(res)==cs or not drop_last): yield res
280 if len(res)<cs: return
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastai2/data/load.py in do_item(self, s)
118 def prebatched(self): return self.bs is None
119 def do_item(self, s):
--> 120 try: return self.after_item(self.create_item(s))
121 except SkipItemException: return None
122 def chunkify(self, b): return b if self.prebatched else chunked(b, self.bs, self.drop_last)
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastcore/transform.py in __call__(self, o)
185 self.fs.append(t)
186
--> 187 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
188 def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
189 def __getitem__(self,i): return self.fs[i]
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
138 for f in tfms:
139 if not is_enc: f = f.decode
--> 140 x = f(x, **kwargs)
141 return x
142
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastai2/vision/augment.py in __call__(self, b, split_idx, **kwargs)
30
31 def __call__(self, b, split_idx=None, **kwargs):
---> 32 self.before_call(b, split_idx=split_idx)
33 return super().__call__(b, split_idx=split_idx, **kwargs) if self.do else b
34
~/miniconda3/envs/fastai2/lib/python3.7/site-packages/fastai2/vision/augment.py in before_call(self, b, split_idx)
169 self.orig_sz = _get_sz(b)
170 if split_idx: self.tl = (self.orig_sz-self.size)//2
--> 171 else: self.tl = Tuple(random.randint(0,self.orig_sz[0]-self.size[0]), random.randint(0,self.orig_sz[1]-self.size[1]))
172
173 def encodes(self, x:(Image.Image,TensorBBox,TensorPoint)):
IndexError: tuple index out of range
How did you build the DataBlock/DataLoaders as well?
Here is the DL building:
bs = 8 if cuda else 2
def RandomNoise(x):
res = np.array(x)
res += (np.random.randn(*res.shape)*10).astype('uint8')
res = np.clip(res, 0, 255)
return res
blocks = (DataBlock(blocks = (ImageBlock, ImageBlock(cls=PILImageBW)),
get_items= partial(get_files_subset, extensions=['.jpg'], folders=['train','val'], pct=1. if cuda else .1),
splitter= ParentSplitter(),
get_y= partial(get_edges, s=-1),
item_tfms= [RandomCrop(size=256), RandomNoise],
batch_tfms = [])
)
dls = blocks.dataloaders(p, bs=bs, device='cuda' if cuda else 'cpu')
(get_edges func extracts a 2d np.array from .mat ground truth files)
You should use the DataBlock.summary
method to see in which order your transforms are executed. My first guess is that RandomNoise
is executed to early in the stack. You can set its order with its order attribute.
Great I did not know this method.
I got it to finaly work, I added an order parameter to the function to be executed after ToTensor and then rewrote the function for a tensor instead of np.array:
def RandomNoise(x):
if isinstance(x, TensorImage):
noise = torch.randn(*x.shape)
x = x + (noise*20).int()
x = torch.clamp(x, 0, 255)
return x
RandomNoise.order = 99
The error came from
torch.randn_like(x)
RuntimeError: "norma_cpu" not implemented for 'Byte'
but I checked x.dtype was uint8 so I don’t know why this ‘byte’ error…
Yes uint8 is Byte
for PyTorch. We convert tensors to float on the GPU only, that’s why you had it.
Note that seeing your transform, you probably want to have it o the the GPU too, to be faster (and pass it as a batch transform).
Hi,
I’m new to fastai but I’ve been using Pytorch for the last year. I’m training an image classifier with 250x250 grayscale images. Each image contrains a white square in the bottom left and right corners with a timestamp.
Before feeding the model with my images, I wanted to create a simple item transform: I think this operation should be done when loading the image, before applying all the “real” (batch) transforms.
So after searching on the forums and the tutorials, this is the transform I have:
def remove_timestamps(x):
x = np.array(x)
x[215:249,1:60] = 0
x[215:249,195:249] = 0
return PILImage.create(x)
# This works perfectly
RemoveTimestamps = Transform(remove_timestamps)
img = PILImageBW.create(os.path.join(PATH_DATA,train_files[0]))
RemoveTimestamps(img)
I found another way to implement a transform. It also works as long as you don’t use it in a datablock:
class RemoveTimes(Transform):
def encodes(self, img:(PILImage, PILImageBW)): # Not sure if I'm doing this correctly?
img_np = np.array(img)
img_np[215:249,1:60] = 0
img_np[215:249,195:249] = 0
return PILImageBW.create(img_np)
# This also works:
tfm = RemoveTimes()
img = PILImageBW.create(os.path.join(PATH_DATA, train_files[0]))
a, b = tfm((img,'class0'))
show_image(a, title=b, cmap='gray')
I have all the data in a CSV file, but I can’t use the usual folder structure, so I created a column to indicate which image is for validation. Then I created the DataBlock:
# This works if I only use Resize
dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
splitter=ColSplitter(col='valid_col'),
get_x=ColReader(cols=2, pref=PATH_DATA+os.path.sep),
get_y=ColReader(cols=3),
item_tfms=[RemoveTimestamps, Resize(224)],
batch_tfms=aug_transforms())
dls = dblock.dataloaders(df, bs=256).cuda() # df is the dataframe
dls.show_batch(max_n=16)
I’ve been able to train a classifier without the custom transform. But when I add it to the item_tfms
list I get the error:
Could not do one pass in your dataloader, there is something wrong in it
The error occurs in this line of remove_timestamps
:
x[215:249,1:60] = 0
It says: too many indices for array: array is 0-dimensional, but 2 were indexed
What am I missing? I’ve really tried to understand everything I read on the forums, but I’m finding the transition to fastai a bit more difficult than I expected…
Thanks in advance!
EDIT: After taking a closer look at the albumentations tutorial I found out that I had to use ItemTransform. I’ve changed my custom transform to become an ItemTransform and now it works fine!