I’m trying to create a Transform for my y-block in the data-loader but the decode-function is never called but the encode function works fine and is called correctly.
I is decoding when I’m calling it separately but not when it’s used with the data loader.
class CustomText(str):
"""Helper function to be able to show the label of the data"""
def show(self, ctx=None, **kwargs):
return show_title(self, ctx=ctx, **kwargs)
class CustomTokenizer(Transform):
""" Converts characters to numbers and vise verse"""*
def __init__(self, df, char_limit=100, str_max_len=12):
self.df = df
self.str_max_len = str_max_len+2 # need to add start and stop
self.tokenstats = dict()
df['text'].apply(self.count_letters)
chars = [ c for c in sorted(self.tokenstats.keys()) if self.tokenstats[c]>char_limit]
all_chars = ['#pad#','#stop#','#unk#'] + chars
self.o2i = {c:i for i, c in enumerate(all_chars)}
self.vocab = {self.o2i[c]:c for c in self.o2i.keys()}
def count_letters(self, st):
for c in st:
n = self.tokenstats.get(c, 0) +1
self.tokenstats[c]=n
def encodes(self, x:CustomText):
print('encodes')
tokens = np.array([self.o2i.get(c, self.o2i['#unk#']) for c in x])
tokens = np.pad(plate, pad_width=(1, self.str_max_len-len(plate)-1), constant_values=self.o2i['#pad#'])
tokens[len(x)+1] = self.o2i['#stop#']
# how to add endchar?
return TensorText(tokens)
def decodes(self, x):
print('decodes')
encoded = [self.vocab.get(n, '#unk#') for n in x.cpu().detach().numpy() if n != self.o2i['#pad#'] and n!= self.o2i["#stop#"]]
return CustomText(''.join(encoded))
class Yblock():
def __init__(self,df):
self.df= df
def __call__(self):
ltok = LicenseTokenizer(self.df)
return TransformBlock(item_tfms=[ltok])
yblock = Yblock(df)
data = DataBlock(blocks=(ImageBlock, yblock),
get_items=get_items,
get_x=get_x, get_y=get_y,
item_tfms = [Resize((80,224), method='pad', pad_mode='border')],
splitter=RandomSplitter())
dls = data.dataloaders(df,bs=4)
batch = dls.train.one_batch()
decoded = dls.train.decode(b)
The decoded batch is of type TextTensor but I’m expecting it to be of type “str” and the decode function never prints out “decodes”.
The data.summary() function does not throw any errors.
How do I make it to be called correctly or ideas of how to debug this problem?