Possibly just removing your data/iceberg/tmp directory is all you need.
I think that fixed that issue and immediately brought up a new error for me to explore. I should have thought of that. Thanks for the help.
It seems that tqdm might be tripping me up. Here is what Iām running:
learn = ConvLearner.pretrained(arch, data, precompute=True)
Output:
0%| | 0/41 [00:00<?, ?it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-35-0e8771e4fad3> in <module>()
----> 1 learn = ConvLearner.pretrained(arch, data, precompute=True)
~/fastaip1v2/fastai/courses/dl1/fastai/conv_learner.py in pretrained(self, f, data, ps, xtra_fc, xtra_cut, **kwargs)
90 def pretrained(self, f, data, ps=None, xtra_fc=None, xtra_cut=0, **kwargs):
91 models = ConvnetBuilder(f, data.c, data.is_multi, data.is_reg, ps=ps, xtra_fc=xtra_fc, xtra_cut=xtra_cut)
---> 92 return self(data, models, **kwargs)
93
94 @property
~/fastaip1v2/fastai/courses/dl1/fastai/conv_learner.py in __init__(self, data, models, precompute, **kwargs)
83 elif self.metrics is None:
84 self.metrics = [accuracy_multi] if self.data.is_multi else [accuracy]
---> 85 if precompute: self.save_fc1()
86 self.freeze()
87 self.precompute = precompute
~/fastaip1v2/fastai/courses/dl1/fastai/conv_learner.py in save_fc1(self)
124 if len(self.activations[0])==0:
125 m=self.models.top_model
--> 126 predict_to_bcolz(m, self.data.fix_dl, act)
127 predict_to_bcolz(m, self.data.val_dl, val_act)
128 if self.data.test_dl: predict_to_bcolz(m, self.data.test_dl, test_act)
~/fastaip1v2/fastai/courses/dl1/fastai/model.py in predict_to_bcolz(m, gen, arr, workers)
11 lock=threading.Lock()
12 m.eval()
---> 13 for x,*_ in tqdm(gen):
14 y = to_np(m(VV(x)).data)
15 with lock:
~/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_tqdm.py in __iter__(self)
951 """, fp_write=getattr(self.fp, 'write', sys.stderr.write))
952
--> 953 for obj in iterable:
954 yield obj
955 # Update and possibly print the progressbar.
~/fastaip1v2/fastai/courses/dl1/fastai/dataset.py in __next__(self)
226 if self.i>=len(self.dl): raise StopIteration
227 self.i+=1
--> 228 return next(self.it)
229
230 @property
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
199 self.reorder_dict[idx] = batch
200 continue
--> 201 return self._process_next_batch(batch)
202
203 next = __next__ # Python 2 compatibility
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_next_batch(self, batch)
219 self._put_indices()
220 if isinstance(batch, ExceptionWrapper):
--> 221 raise batch.exc_type(batch.exc_msg)
222 return batch
223
TypeError: Traceback (most recent call last):
File "/home/kbird/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 40, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])
File "/home/kbird/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 40, in <listcomp>
samples = collate_fn([dataset[i] for i in batch_indices])
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/dataset.py", line 99, in __getitem__
return self.get(self.transform, x, y)
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/dataset.py", line 104, in get
return (x,y) if tfm is None else tfm(x,y)
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 488, in __call__
def __call__(self, im, y): return compose(im, y, self.tfms)
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 469, in compose
im, y =fn(im, y)
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 246, in __call__
x,y = ((self.transform(x),y) if self.tfm_y==TfmType.NO
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 254, in transform
x = self.do_transform(x)
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 348, in do_transform
return scale_min(x, self.sz)
File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 16, in scale_min
r,c = im.size
TypeError: 'int' object is not iterable
So itās in the selv.save_fc1() function again where the problem starts, but this time it is the predict_tobcolz(m, self.data.fix_dl, act)
where the problem shows up and if you keep digging into it, it looks like the problem goes into the tqdm package which it looks like is the loading bar? I am really hoping I am just missing something simple. I already deleted my tmp directory and reran and now I am not really sure where to go once again.
Ah, the problem is transformations - they assume a PIL image object, and you have an array. Interesting problem! I guess we should turn them back into objects when we read themā¦ Youāll need to skip the resizing step in transforms for now.
I was able to figure out what the issue is that required me to clear my tmp. I think the only issue is that the code under save_fc1 in conv_learner.py has the following:
def save_fc1(self):
self.get_activations()
act, val_act, test_act = self.activations
if len(self.activations[0])==0:
m=self.models.top_model
predict_to_bcolz(m, self.data.fix_dl, act)
predict_to_bcolz(m, self.data.val_dl, val_act)
if self.data.test_dl: predict_to_bcolz(m, self.data.test_dl, test_act)
self.fc_data = ImageClassifierData.from_arrays(self.data.path,
(act, self.data.trn_y), (val_act, self.data.val_y), self.data.bs, classes=self.data.classes,
test = test_act if self.data.test_dl else None, num_workers=8)
So after diving into this I figured out that the first line creates blank files if there are no files currently in tmp and otherwise just uses the files that are there. Then if you keep going down, there is a test condition that says if len(self.activation[0])==0:
. Since this already had data in it and was not 0, it wasnāt going into the loop so the issue only occurs if you run
learn = ConvLearner.pretrained(arch, data, precompute=True)
without a test_name identified in your ImageClassifierData.from_paths and probably similarly on the other ones. and then try to go add that in afterwards. This could potentially be fixed by checking the length of each activation and saying if any of them are 0 then run through it, but I think it would be best to have all of the activations in a different if statements similar to this:
def save_fc1(self):
self.get_activations()
act, val_act, test_act = self.activations
m=self.models.top_model
if len(self.activations[0])==0:
predict_to_bcolz(m, self.data.fix_dl, act)
if len(self.activations[1])==0:
predict_to_bcolz(m, self.data.val_dl, val_act)
if len(self.activations[2])==0:
if self.data.test_dl: predict_to_bcolz(m, self.data.test_dl, test_act)
self.fc_data = ImageClassifierData.from_arrays(self.data.path,
(act, self.data.trn_y), (val_act, self.data.val_y), self.data.bs, classes=self.data.classes,
test = test_act if self.data.test_dl else None, num_workers=8)
This says that if any of the activations are empty, go grab that activation. Does this seem like a viable fix to this issue or is it even something you would consider an issue since all you have to do is delete the tmp files and it fixes itself?
Yes it absolutely does - and awesome debugging skills on display there!
It so happens I suggested this exact fix to @yinterian this afternoon, and sheās planning to implement it. However I know sheās got meetings all day tomorrow, so if you feel like having a go yourself, Iām sure sheād appreciate a PR!
Thanks, I felt like I was going in circles, but eventually it started to makes sense what was actually going on. I definitely learned a lot from the experience and hopefully next time I would be able to find the solution much quicker. I have a lot of lines that I was just basically trying to figure out how everything was actually linking together.
Try using pdb inside jupyter notebook, if you havenāt already - it makes this kind of thing much easier.
Oh I havenāt. I will check it out. Trying to get a first submission tonight for dog breed competition. I am going to go back to the iceberg challenge, but I needed to step back and make sure I could do it on something more similar to cats vs dogs before I tried that.
So hereās what you need to know: https://davidhamann.de/2017/04/22/debugging-jupyter-notebooks/ . And then just look up the docs for pdb
. I think youāll find it a life-changer!
I always appreciate a tool recommendation. Especially a life-changing tool recommendation.
Just want to pop in and say this brought a long while of debugging to an end.
Couldnāt get ConvLearner
to work with a from_arrays(..)
after first training it w/o a test array. Deleting the tmp/ directory fixed that right away (iceberg kaggle comp.).
Also, Iām excited to see how the fastai library will develop. Any teething problems will be worth it, for a general-purpose deep learning library.
Since some of us are working on statoil I thought I would bring this up. I am trying to use ImageClassifierData.from_arrays api since the data is given in numerical array format .
Ok so after using train_test_split( ) from sklearn I have the following shapes -
X_tr - (1074, 75, 75, 3)
y_tr - (1074,)
X_vl - (530, 75, 75, 3)
y_vl - (530,)
After that I used the api as in lesson 1
f_model = resnet18
def get_data(sz,bs):
tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_top_down, max_zoom=1.00)
data = ImageClassifierData.from_arrays(PATH, (X_tr,y_tr),(X_vl,y_vl),test=X_test, bs = bs, classes = 2)
return data
I then used
sz = 32
bs = 32
data = get_data(sz, bs)
data.sz
The value of data.sz is 75 not 32, odd! I believe the resizing did not happen properly.
learn = ConvLearner.pretrained(f_model, data)
It compiled no problem.
The problem I believe starts from the next step , when I tried to find a suitable learning rate.
lrf=learn.lr_find()
The line above produces this beautiful error message -
A Jupyter Widget
0%| | 0/34 [00:00<?, ?it/s]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-63-e391e26ed47c> in <module>()
----> 1 lrf=learn.lr_find()
~/fastai/courses/dl1/fastai/learner.py in lr_find(self, start_lr, end_lr, wds)
215 layer_opt = self.get_layer_opt(start_lr, wds)
216 self.sched = LR_Finder(layer_opt, len(self.data.trn_dl), end_lr)
--> 217 self.fit_gen(self.model, self.data, layer_opt, 1)
218 self.load('tmp')
219
~/fastai/courses/dl1/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, metrics, callbacks, **kwargs)
124 n_epoch = sum_geom(cycle_len if cycle_len else 1, cycle_mult, n_cycle)
125 fit(model, data, n_epoch, layer_opt.opt, self.crit,
--> 126 metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, **kwargs)
127
128 def get_layer_groups(self): return self.models.get_layer_groups()
~/fastai/courses/dl1/fastai/model.py in fit(model, data, epochs, opt, crit, metrics, callbacks, **kwargs)
80 stepper.reset(True)
81 t = tqdm(iter(data.trn_dl), leave=False, total=len(data.trn_dl))
---> 82 for (*x,y) in t:
83 batch_num += 1
84 loss = stepper.step(V(x),V(y))
~/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py in __iter__(self)
951 """, fp_write=getattr(self.fp, 'write', sys.stderr.write))
952
--> 953 for obj in iterable:
954 yield obj
955 # Update and possibly print the progressbar.
~/fastai/courses/dl1/fastai/dataset.py in __next__(self)
222 if self.i>=len(self.dl): raise StopIteration
223 self.i+=1
--> 224 return next(self.it)
225
226 @property
~/fastai/courses/dl1/fastai/dataloader.py in __iter__(self)
64 def __iter__(self):
65 with ThreadPoolExecutor(max_workers=self.num_workers) as e:
---> 66 for batch in e.map(self.get_batch, iter(self.batch_sampler)):
67 yield get_tensor(batch, self.pin_memory)
68
~/anaconda3/lib/python3.6/concurrent/futures/_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
--> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.time())
~/anaconda3/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
423 raise CancelledError()
424 elif self._state == FINISHED:
--> 425 return self.__get_result()
426
427 self._condition.wait(timeout)
~/anaconda3/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
~/anaconda3/lib/python3.6/concurrent/futures/thread.py in run(self)
54
55 try:
---> 56 result = self.fn(*self.args, **self.kwargs)
57 except BaseException as exc:
58 self.future.set_exception(exc)
~/fastai/courses/dl1/fastai/dataloader.py in get_batch(self, indices)
60 def __len__(self): return len(self.batch_sampler)
61
---> 62 def get_batch(self, indices): return self.collate_fn([self.dataset[i] for i in indices])
63
64 def __iter__(self):
~/fastai/courses/dl1/fastai/dataloader.py in <listcomp>(.0)
60 def __len__(self): return len(self.batch_sampler)
61
---> 62 def get_batch(self, indices): return self.collate_fn([self.dataset[i] for i in indices])
63
64 def __iter__(self):
~/fastai/courses/dl1/fastai/dataset.py in __getitem__(self, idx)
92
93 def __getitem__(self, idx):
---> 94 x,y = self.get_x(idx),self.get_y(idx)
95 return self.get(self.transform, x, y)
96
~/fastai/courses/dl1/fastai/dataset.py in get_y(self, i)
168 super().__init__(transform)
169 def get_x(self, i): return self.x[i]
--> 170 def get_y(self, i): return self.y[i]
171 def get_n(self): return len(self.y)
172 def get_sz(self): return self.x.shape[1]
~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
621 key = com._apply_if_callable(key, self)
622 try:
--> 623 result = self.index.get_value(self, key)
624
625 if not is_scalar(result):
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
2555 try:
2556 return self._engine.get_value(s, k,
-> 2557 tz=getattr(series.dtype, 'tz', None))
2558 except KeyError as e1:
2559 if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 944
So , if anybody tried to solve statoil challenge using train_test_split
and .from_arrays
api - please let me know.
According to that error you have a pandas dataframe, not a numpy array?..
Just to reiterate, I have my training data samples as numpy array and I should convert it to pandas DataFrame ā¦ something along that line ā¦
No the opposite - you need a numpy array. But your error message shows you have a pandas dataframe.
I fixed my initial issue - but I now have another one when I run the next line -
learn.fit( 0.01,1,cycle_len=1,wds = wd)
The code runs couple of epoch but it then gets stuck and produces this error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
in ()
1 wd=5e-4
----> 2 learn.fit( 0.01,1,cycle_len=1,wds = wd)
~/fastai/courses/dl1/fastai/learner.py in fit(self, lrs, n_cycle, wds, **kwargs)
177 self.sched = None
178 layer_opt = self.get_layer_opt(lrs, wds)
--> 179 self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)
180
181 def lr_find(self, start_lr=1e-5, end_lr=10, wds=None):
~/fastai/courses/dl1/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, metrics, callbacks, **kwargs)
124 n_epoch = sum_geom(cycle_len if cycle_len else 1, cycle_mult, n_cycle)
125 fit(model, data, n_epoch, layer_opt.opt, self.crit,
--> 126 metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, **kwargs)
127
128 def get_layer_groups(self): return self.models.get_layer_groups()
~/fastai/courses/dl1/fastai/model.py in fit(model, data, epochs, opt, crit, metrics, callbacks, **kwargs)
90 if stop: return
91
---> 92 vals = validate(stepper, data.val_dl, metrics)
93 print(np.round([epoch, debias_loss] + vals, 6))
94 stop=False
~/fastai/courses/dl1/fastai/model.py in validate(stepper, dl, metrics)
100 stepper.reset(False)
101 for (*x,y) in iter(dl):
--> 102 preds,l = stepper.evaluate(VV(x), VV(y))
103 loss.append(to_np(l))
104 res.append([f(to_np(preds),to_np(y)) for f in metrics])
~/fastai/courses/dl1/fastai/model.py in evaluate(self, xs, y)
50
51 def evaluate(self, xs, y):
---> 52 preds = self.m(*xs)
53 if isinstance(preds,(tuple,list)): preds=preds[0]
54 return preds, self.crit(preds,y)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
222 for hook in self._forward_pre_hooks.values():
223 hook(self, input)
--> 224 result = self.forward(*input, **kwargs)
225 for hook in self._forward_hooks.values():
226 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
65 def forward(self, input):
66 for module in self._modules.values():
---> 67 input = module(input)
68 return input
69
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
222 for hook in self._forward_pre_hooks.values():
223 hook(self, input)
--> 224 result = self.forward(*input, **kwargs)
225 for hook in self._forward_hooks.values():
226 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
252 def forward(self, input):
253 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 254 self.padding, self.dilation, self.groups)
255
256
~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py in conv2d(input, weight, bias, stride, padding, dilation, groups)
50 f = ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
51 _pair(0), groups, torch.backends.cudnn.benchmark, torch.backends.cudnn.enabled)
---> 52 return f(input, weight, bias)
53
54
RuntimeError: expected Double tensor (got Float tensor)
The response to this error is shown in two github posts on Pytorch namely here and here.
Should I start tinkering into the source code or there is an work around.
I am trying to understand how does v2 Lesson1 model identifies the correctly classified images.
Can anyone explain how are the labels (if not labels then what) in this array stored? i.e. a 1 means a dog or a cat?
data.val_y
In these functions we are comparing the prediction (probability) with this array.
def rand_by_mask(mask): return np.random.choice(np.where(mask)[0], 4, replace=False)
def rand_by_correct(is_correct): return rand_by_mask((**preds == data.val_y**)==is_correct)
How can we compare labels directly with probabilities?
In the ImageDataClassfier Class this property(val_y) found only inside from_csv function but we still access it via the object (i.e data.val_y).
class ImageClassifierData(ImageData):
@property
def is_multi(self): return self.trn_dl.dataset.is_multi
@staticmethod
def get_ds(fn, trn, val, tfms, test=None, **kwargs):
res = [
fn(trn[0], trn[1], tfms[0], **kwargs), # train
fn(val[0], val[1], tfms[1], **kwargs), # val
fn(trn[0], trn[1], tfms[1], **kwargs), # fix
fn(val[0], val[1], tfms[0], **kwargs) # aug
]
if test is not None:
test_lbls = np.zeros((len(test),1))
res += [
fn(test, test_lbls, tfms[1], **kwargs), # test
fn(test, test_lbls, tfms[0], **kwargs) # test_aug
]
else: res += [None,None]
return res
@classmethod
def from_arrays(cls, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None):
""" Read in images and their labels given as numpy arrays
Arguments:
path: a root path of the data (used for storing trained models, precomputed values, etc)
trn: a tuple of training data matrix and target label/classification array (e.g. `trn=(x,y)` where `x` has the
shape of `(5000, 784)` and `y` has the shape of `(5000,)`)
val: a tuple of validation data matrix and target label/classification array.
bs: batch size
tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
classes: a list of all labels/classifications
num_workers: a number of workers
test: a matrix of test data (the shape should match `trn[0]`)
Returns:
ImageClassifierData
"""
datasets = cls.get_ds(ArraysIndexDataset, trn, val, tfms, test=test)
return cls(path, datasets, bs, num_workers, classes=classes)
@classmethod
def from_paths(cls, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8):
""" Read in images and their labels given as sub-folder names
Arguments:
path: a root path of the data (used for storing trained models, precomputed values, etc)
bs: batch size
tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
trn_name: a name of the folder that contains training images.
val_name: a name of the folder that contains validation images.
test_name: a name of the folder that contains test images.
num_workers: number of workers
Returns:
ImageClassifierData
"""
trn,val = [folder_source(path, o) for o in (trn_name, val_name)]
test_fnames = read_dir(path, test_name) if test_name else None
datasets = cls.get_ds(FilesIndexArrayDataset, trn, val, tfms, path=path, test=test_fnames)
return cls(path, datasets, bs, num_workers, classes=trn[2])
@classmethod
def from_csv(cls, path, folder, csv_fname, bs=64, tfms=(None,None),
val_idxs=None, suffix='', test_name=None, continuous=False, skip_header=True, num_workers=8):
""" Read in images and their labels given as a CSV file.
This method should be used when training image labels are given in an CSV file as opposed to
sub-directories with label names.
Arguments:
path: a root path of the data (used for storing trained models, precomputed values, etc)
folder: a name of the folder in which training images are contained.
csv_fname: a name of the CSV file which contains target labels.
bs: batch size
tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs`
suffix: suffix to add to image names in CSV file (sometimes CSV only contains the file name without file
extension e.g. '.jpg' - in which case, you can set suffix as '.jpg')
test_name: a name of the folder which contains test images.
continuous: TODO
skip_header: skip the first row of the CSV file.
num_workers: number of workers
Returns:
ImageClassifierData
"""
fnames,y,classes = csv_source(folder, csv_fname, skip_header, suffix, continuous=continuous)
((val_fnames,trn_fnames),(val_y,trn_y)) = split_by_idx(val_idxs, np.array(fnames), y)
test_fnames = read_dir(path, test_name) if test_name else None
if continuous:
f = FilesIndexArrayRegressionDataset
else:
f = FilesIndexArrayDataset if len(trn_y.shape)==1 else FilesNhotArrayDataset
datasets = cls.get_ds(f, (trn_fnames,trn_y), (val_fnames,val_y), tfms,
path=path, test=test_fnames)
return cls(path, datasets, bs, num_workers, classes=classes)
File: ~/fastai/courses/dl1/fastai/dataset.py
Type: type
@mmr, I ran into similar errors as you did, also while trying to use ImageClassifierData.from_arrays to load trn/val folds for the iceberg competition.
Have you found a fix in the source code?
Looking at http://pytorch.org/docs/master/tensors.html, a workaround may be changing your inputs via numpy to the corresponding expected float or int types.
For instance, for āexpected Double tensor (got Float tensor)ā, setting your X_tr, X_vl data to float64 (DoubleTensor) before loading it into ImageClassifierData.from_arrays:
X_tr = np.float64(X_tr)
X_vl = np.float64(X_vl)
I encountered the reverse problem where it was expecting X_tr and X_vl to be float32 while I originally had them as float64. I also had to make sure my y_tr and y_vl are of type int64 (LongTensor).
Hi there,
I am trying the ImageClassifierData.from_arrays method on Kaggleās Digit Recognizer competition. The images are 28x28 pixels, and Kaggle gives you a matrix where each image is flattened into a 784-element array (so each image is a row, each column is a pixel).
When I build my learner via ConvLearner.pretrained(...)
, I receive the following error (full text of error at the bottom of this post):
RuntimeError: Given groups=1, weight[64, 3, 7, 7], so expected input[32, 28, 28, 1] to have 3 channels, but got 28 channels instead
Does anyone have any idea on how to debug? Iāve included relevant detail from my code and the full text of the error below. (Have searched around the forums to see if this has been addressed, but no luck - thank you in advance for any help!!!)
Chloe
Here are my steps / attempts at troubleshooting:
- I used code from the Lesson 1 notebook and from the from_arrays method docstring to create my data object and then my initial learner. I used the resnet50 model because this performed well on small images per this link Jeremy posted in another thread. Relevant code excerpts:
sz = 28
arch=resnet50
bs=32
data = ImageClassifierData.from_arrays(PATH, trn=(X_train, y_train), val=(X_val, y_val), bs=bs, classes=labels, num_workers=4, test=test)
learn = ConvLearner.pretrained(arch, data, precompute=True)
The last line (the learner) is what generates the error.
- The dimensions of
trn
andval
are as follows :
X_train: (11200, 28, 28, 1)
X_val: (2800, 28, 28, 1)
y_train: (33600,)
y_val: (8400,)
-
**Other X_train & X_val dimensions I tried: I also tried the code with the initial data format intact (n x 784) ā I received the following error:
Expected 4D tensor as input, got 2D tensor instead
- which is why I reshaped to n x 28 x 28 x 1. I also tried n x 28 x 28 x 3 but received anAssertionError
when creating my data object because the dims of x and y do not match. -
Iāve cleared my
tmp
folder each time as well viashutil.rmtree(f'{PATH}tmp', ignore_errors=True)
.
Full text of error:
[details="Summary"]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-141-bfd96ca26d21> in <module>()
----> 1 learn = ConvLearner.pretrained(arch, data, precompute=True)
~/fastai/courses/dl1/fastai/conv_learner.py in pretrained(cls, f, data, ps, xtra_fc, xtra_cut, precompute, **kwargs)
96 def pretrained(cls, f, data, ps=None, xtra_fc=None, xtra_cut=0, precompute=False, **kwargs):
97 models = ConvnetBuilder(f, data.c, data.is_multi, data.is_reg, ps=ps, xtra_fc=xtra_fc, xtra_cut=xtra_cut)
---> 98 return cls(data, models, precompute, **kwargs)
99
100 @property
~/fastai/courses/dl1/fastai/conv_learner.py in __init__(self, data, models, precompute, **kwargs)
89 elif self.metrics is None:
90 self.metrics = [accuracy_thresh(0.5)] if self.data.is_multi else [accuracy]
---> 91 if precompute: self.save_fc1()
92 self.freeze()
93 self.precompute = precompute
~/fastai/courses/dl1/fastai/conv_learner.py in save_fc1(self)
141 m=self.models.top_model
142 if len(self.activations[0])!=len(self.data.trn_ds):
--> 143 predict_to_bcolz(m, self.data.fix_dl, act)
144 if len(self.activations[1])!=len(self.data.val_ds):
145 predict_to_bcolz(m, self.data.val_dl, val_act)
~/fastai/courses/dl1/fastai/model.py in predict_to_bcolz(m, gen, arr, workers)
12 m.eval()
13 for x,*_ in tqdm(gen):
---> 14 y = to_np(m(VV(x)).data)
15 with lock:
16 arr.append(y)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
355 result = self._slow_forward(*input, **kwargs)
356 else:
--> 357 result = self.forward(*input, **kwargs)
358 for hook in self._forward_hooks.values():
359 hook_result = hook(self, input, result)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
65 def forward(self, input):
66 for module in self._modules.values():
---> 67 input = module(input)
68 return input
69
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
355 result = self._slow_forward(*input, **kwargs)
356 else:
--> 357 result = self.forward(*input, **kwargs)
358 for hook in self._forward_hooks.values():
359 hook_result = hook(self, input, result)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
280 def forward(self, input):
281 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 282 self.padding, self.dilation, self.groups)
283
284
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/functional.py in conv2d(input, weight, bias, stride, padding, dilation, groups)
88 _pair(0), groups, torch.backends.cudnn.benchmark,
89 torch.backends.cudnn.deterministic, torch.backends.cudnn.enabled)
---> 90 return f(input, weight, bias)
91
92
RuntimeError: Given groups=1, weight[64, 3, 7, 7], so expected input[32, 28, 28, 1] to have 3 channels, but got 28 channels instead
[/details]