ImageClassifierData.from_arrays

Possibly just removing your data/iceberg/tmp directory is all you need.

1 Like

I think that fixed that issue and immediately brought up a new error for me to explore. I should have thought of that. Thanks for the help.

It seems that tqdm might be tripping me up. Here is what I’m running:

learn = ConvLearner.pretrained(arch, data, precompute=True)

Output:

  0%|          | 0/41 [00:00<?, ?it/s]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-35-0e8771e4fad3> in <module>()
----> 1 learn = ConvLearner.pretrained(arch, data, precompute=True)

~/fastaip1v2/fastai/courses/dl1/fastai/conv_learner.py in pretrained(self, f, data, ps, xtra_fc, xtra_cut, **kwargs)
     90     def pretrained(self, f, data, ps=None, xtra_fc=None, xtra_cut=0, **kwargs):
     91         models = ConvnetBuilder(f, data.c, data.is_multi, data.is_reg, ps=ps, xtra_fc=xtra_fc, xtra_cut=xtra_cut)
---> 92         return self(data, models, **kwargs)
     93 
     94     @property

~/fastaip1v2/fastai/courses/dl1/fastai/conv_learner.py in __init__(self, data, models, precompute, **kwargs)
     83         elif self.metrics is None:
     84             self.metrics = [accuracy_multi] if self.data.is_multi else [accuracy]
---> 85         if precompute: self.save_fc1()
     86         self.freeze()
     87         self.precompute = precompute

~/fastaip1v2/fastai/courses/dl1/fastai/conv_learner.py in save_fc1(self)
    124         if len(self.activations[0])==0:
    125             m=self.models.top_model
--> 126             predict_to_bcolz(m, self.data.fix_dl, act)
    127             predict_to_bcolz(m, self.data.val_dl, val_act)
    128             if self.data.test_dl: predict_to_bcolz(m, self.data.test_dl, test_act)

~/fastaip1v2/fastai/courses/dl1/fastai/model.py in predict_to_bcolz(m, gen, arr, workers)
     11     lock=threading.Lock()
     12     m.eval()
---> 13     for x,*_ in tqdm(gen):
     14         y = to_np(m(VV(x)).data)
     15         with lock:

~/anaconda3/envs/fastai/lib/python3.6/site-packages/tqdm/_tqdm.py in __iter__(self)
    951 """, fp_write=getattr(self.fp, 'write', sys.stderr.write))
    952 
--> 953             for obj in iterable:
    954                 yield obj
    955                 # Update and possibly print the progressbar.

~/fastaip1v2/fastai/courses/dl1/fastai/dataset.py in __next__(self)
    226         if self.i>=len(self.dl): raise StopIteration
    227         self.i+=1
--> 228         return next(self.it)
    229 
    230     @property

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    199                 self.reorder_dict[idx] = batch
    200                 continue
--> 201             return self._process_next_batch(batch)
    202 
    203     next = __next__  # Python 2 compatibility

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_next_batch(self, batch)
    219         self._put_indices()
    220         if isinstance(batch, ExceptionWrapper):
--> 221             raise batch.exc_type(batch.exc_msg)
    222         return batch
    223 

TypeError: Traceback (most recent call last):
  File "/home/kbird/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 40, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/kbird/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 40, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/dataset.py", line 99, in __getitem__
    return self.get(self.transform, x, y)
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/dataset.py", line 104, in get
    return (x,y) if tfm is None else tfm(x,y)
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 488, in __call__
    def __call__(self, im, y): return compose(im, y, self.tfms)
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 469, in compose
    im, y =fn(im, y)
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 246, in __call__
    x,y = ((self.transform(x),y) if self.tfm_y==TfmType.NO
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 254, in transform
    x = self.do_transform(x)
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 348, in do_transform
    return scale_min(x, self.sz)
  File "/home/kbird/fastaip1v2/fastai/courses/dl1/fastai/transforms.py", line 16, in scale_min
    r,c = im.size
TypeError: 'int' object is not iterable




So it’s in the selv.save_fc1() function again where the problem starts, but this time it is the predict_tobcolz(m, self.data.fix_dl, act) where the problem shows up and if you keep digging into it, it looks like the problem goes into the tqdm package which it looks like is the loading bar? I am really hoping I am just missing something simple. I already deleted my tmp directory and reran and now I am not really sure where to go once again.

Ah, the problem is transformations - they assume a PIL image object, and you have an array. Interesting problem! I guess we should turn them back into objects when we read them… You’ll need to skip the resizing step in transforms for now.

1 Like

I was able to figure out what the issue is that required me to clear my tmp. I think the only issue is that the code under save_fc1 in conv_learner.py has the following:

def save_fc1(self):
        self.get_activations()
        act, val_act, test_act = self.activations

        if len(self.activations[0])==0:
            m=self.models.top_model
            predict_to_bcolz(m, self.data.fix_dl, act)
            predict_to_bcolz(m, self.data.val_dl, val_act)
            if self.data.test_dl: predict_to_bcolz(m, self.data.test_dl, test_act)

        self.fc_data = ImageClassifierData.from_arrays(self.data.path,
                (act, self.data.trn_y), (val_act, self.data.val_y), self.data.bs, classes=self.data.classes,
                test = test_act if self.data.test_dl else None, num_workers=8)

So after diving into this I figured out that the first line creates blank files if there are no files currently in tmp and otherwise just uses the files that are there. Then if you keep going down, there is a test condition that says if len(self.activation[0])==0:. Since this already had data in it and was not 0, it wasn’t going into the loop so the issue only occurs if you run
learn = ConvLearner.pretrained(arch, data, precompute=True)
without a test_name identified in your ImageClassifierData.from_paths and probably similarly on the other ones. and then try to go add that in afterwards. This could potentially be fixed by checking the length of each activation and saying if any of them are 0 then run through it, but I think it would be best to have all of the activations in a different if statements similar to this:

    def save_fc1(self):
        self.get_activations()
        act, val_act, test_act = self.activations
        m=self.models.top_model
        if len(self.activations[0])==0:
            predict_to_bcolz(m, self.data.fix_dl, act)
        if len(self.activations[1])==0:
            predict_to_bcolz(m, self.data.val_dl, val_act)
        if len(self.activations[2])==0:
            if self.data.test_dl: predict_to_bcolz(m, self.data.test_dl, test_act)

        self.fc_data = ImageClassifierData.from_arrays(self.data.path,
                (act, self.data.trn_y), (val_act, self.data.val_y), self.data.bs, classes=self.data.classes,
                test = test_act if self.data.test_dl else None, num_workers=8)

This says that if any of the activations are empty, go grab that activation. Does this seem like a viable fix to this issue or is it even something you would consider an issue since all you have to do is delete the tmp files and it fixes itself?

Yes it absolutely does - and awesome debugging skills on display there! :smiley:

It so happens I suggested this exact fix to @yinterian this afternoon, and she’s planning to implement it. However I know she’s got meetings all day tomorrow, so if you feel like having a go yourself, I’m sure she’d appreciate a PR!

Thanks, I felt like I was going in circles, but eventually it started to makes sense what was actually going on. I definitely learned a lot from the experience and hopefully next time I would be able to find the solution much quicker. I have a lot of lines that I was just basically trying to figure out how everything was actually linking together.

Try using pdb inside jupyter notebook, if you haven’t already - it makes this kind of thing much easier.

Oh I haven’t. I will check it out. Trying to get a first submission tonight for dog breed competition. I am going to go back to the iceberg challenge, but I needed to step back and make sure I could do it on something more similar to cats vs dogs before I tried that.

So here’s what you need to know: https://davidhamann.de/2017/04/22/debugging-jupyter-notebooks/ . And then just look up the docs for pdb. I think you’ll find it a life-changer! :smiley:

2 Likes

I always appreciate a tool recommendation. Especially a life-changing tool recommendation. :slight_smile:

Just want to pop in and say this brought a long while of debugging to an end.


Couldn’t get ConvLearner to work with a from_arrays(..) after first training it w/o a test array. Deleting the tmp/ directory fixed that right away (iceberg kaggle comp.).

Also, I’m excited to see how the fastai library will develop. Any teething problems will be worth it, for a general-purpose deep learning library.

1 Like

Since some of us are working on statoil I thought I would bring this up. I am trying to use ImageClassifierData.from_arrays api since the data is given in numerical array format .
Ok so after using train_test_split( ) from sklearn I have the following shapes -

X_tr - (1074, 75, 75, 3)
y_tr - (1074,)
X_vl - (530, 75, 75, 3)
y_vl - (530,)

After that I used the api as in lesson 1

f_model = resnet18
def get_data(sz,bs):
    tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_top_down, max_zoom=1.00)
    data = ImageClassifierData.from_arrays(PATH, (X_tr,y_tr),(X_vl,y_vl),test=X_test, bs = bs, classes = 2)
    return data 

I then used

sz = 32
bs  = 32
data = get_data(sz, bs)
data.sz

The value of data.sz is 75 not 32, odd! I believe the resizing did not happen properly.

learn = ConvLearner.pretrained(f_model, data)

It compiled no problem.
The problem I believe starts from the next step , when I tried to find a suitable learning rate.

lrf=learn.lr_find()

The line above produces this beautiful error message -

A Jupyter Widget


  0%|          | 0/34 [00:00<?, ?it/s]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-63-e391e26ed47c> in <module>()
----> 1 lrf=learn.lr_find()

~/fastai/courses/dl1/fastai/learner.py in lr_find(self, start_lr, end_lr, wds)
    215         layer_opt = self.get_layer_opt(start_lr, wds)
    216         self.sched = LR_Finder(layer_opt, len(self.data.trn_dl), end_lr)
--> 217         self.fit_gen(self.model, self.data, layer_opt, 1)
    218         self.load('tmp')
    219 

~/fastai/courses/dl1/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, metrics, callbacks, **kwargs)
    124         n_epoch = sum_geom(cycle_len if cycle_len else 1, cycle_mult, n_cycle)
    125         fit(model, data, n_epoch, layer_opt.opt, self.crit,
--> 126             metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, **kwargs)
    127 
    128     def get_layer_groups(self): return self.models.get_layer_groups()

~/fastai/courses/dl1/fastai/model.py in fit(model, data, epochs, opt, crit, metrics, callbacks, **kwargs)
     80         stepper.reset(True)
     81         t = tqdm(iter(data.trn_dl), leave=False, total=len(data.trn_dl))
---> 82         for (*x,y) in t:
     83             batch_num += 1
     84             loss = stepper.step(V(x),V(y))

~/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py in __iter__(self)
    951 """, fp_write=getattr(self.fp, 'write', sys.stderr.write))
    952 
--> 953             for obj in iterable:
    954                 yield obj
    955                 # Update and possibly print the progressbar.

~/fastai/courses/dl1/fastai/dataset.py in __next__(self)
    222         if self.i>=len(self.dl): raise StopIteration
    223         self.i+=1
--> 224         return next(self.it)
    225 
    226     @property

~/fastai/courses/dl1/fastai/dataloader.py in __iter__(self)
     64     def __iter__(self):
     65         with ThreadPoolExecutor(max_workers=self.num_workers) as e:
---> 66             for batch in e.map(self.get_batch, iter(self.batch_sampler)):
     67                 yield get_tensor(batch, self.pin_memory)
     68 

~/anaconda3/lib/python3.6/concurrent/futures/_base.py in result_iterator()
    584                     # Careful not to keep a reference to the popped future
    585                     if timeout is None:
--> 586                         yield fs.pop().result()
    587                     else:
    588                         yield fs.pop().result(end_time - time.time())

~/anaconda3/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
    423                 raise CancelledError()
    424             elif self._state == FINISHED:
--> 425                 return self.__get_result()
    426 
    427             self._condition.wait(timeout)

~/anaconda3/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

~/anaconda3/lib/python3.6/concurrent/futures/thread.py in run(self)
     54 
     55         try:
---> 56             result = self.fn(*self.args, **self.kwargs)
     57         except BaseException as exc:
     58             self.future.set_exception(exc)

~/fastai/courses/dl1/fastai/dataloader.py in get_batch(self, indices)
     60     def __len__(self): return len(self.batch_sampler)
     61 
---> 62     def get_batch(self, indices): return self.collate_fn([self.dataset[i] for i in indices])
     63 
     64     def __iter__(self):

~/fastai/courses/dl1/fastai/dataloader.py in <listcomp>(.0)
     60     def __len__(self): return len(self.batch_sampler)
     61 
---> 62     def get_batch(self, indices): return self.collate_fn([self.dataset[i] for i in indices])
     63 
     64     def __iter__(self):

~/fastai/courses/dl1/fastai/dataset.py in __getitem__(self, idx)
     92 
     93     def __getitem__(self, idx):
---> 94         x,y = self.get_x(idx),self.get_y(idx)
     95         return self.get(self.transform, x, y)
     96 

~/fastai/courses/dl1/fastai/dataset.py in get_y(self, i)
    168         super().__init__(transform)
    169     def get_x(self, i): return self.x[i]
--> 170     def get_y(self, i): return self.y[i]
    171     def get_n(self): return len(self.y)
    172     def get_sz(self): return self.x.shape[1]

~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
    621         key = com._apply_if_callable(key, self)
    622         try:
--> 623             result = self.index.get_value(self, key)
    624 
    625             if not is_scalar(result):

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   2555         try:
   2556             return self._engine.get_value(s, k,
-> 2557                                           tz=getattr(series.dtype, 'tz', None))
   2558         except KeyError as e1:
   2559             if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 944 

So , if anybody tried to solve statoil challenge using train_test_split and .from_arrays api - please let me know.

According to that error you have a pandas dataframe, not a numpy array?..

Just to reiterate, I have my training data samples as numpy array and I should convert it to pandas DataFrame … something along that line …

No the opposite - you need a numpy array. But your error message shows you have a pandas dataframe.

I fixed my initial issue - but I now have another one when I run the next line -
learn.fit( 0.01,1,cycle_len=1,wds = wd)

The code runs couple of epoch but it then gets stuck and produces this error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
in ()
1 wd=5e-4
----> 2 learn.fit( 0.01,1,cycle_len=1,wds = wd)

~/fastai/courses/dl1/fastai/learner.py in fit(self, lrs, n_cycle, wds, **kwargs)
    177         self.sched = None
    178         layer_opt = self.get_layer_opt(lrs, wds)
--> 179         self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)
    180 
    181     def lr_find(self, start_lr=1e-5, end_lr=10, wds=None):

~/fastai/courses/dl1/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, metrics, callbacks, **kwargs)
    124         n_epoch = sum_geom(cycle_len if cycle_len else 1, cycle_mult, n_cycle)
    125         fit(model, data, n_epoch, layer_opt.opt, self.crit,
--> 126             metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, **kwargs)
    127 
    128     def get_layer_groups(self): return self.models.get_layer_groups()

~/fastai/courses/dl1/fastai/model.py in fit(model, data, epochs, opt, crit, metrics, callbacks, **kwargs)
     90             if stop: return
     91 
---> 92         vals = validate(stepper, data.val_dl, metrics)
     93         print(np.round([epoch, debias_loss] + vals, 6))
     94         stop=False

~/fastai/courses/dl1/fastai/model.py in validate(stepper, dl, metrics)
    100     stepper.reset(False)
    101     for (*x,y) in iter(dl):
--> 102         preds,l = stepper.evaluate(VV(x), VV(y))
    103         loss.append(to_np(l))
    104         res.append([f(to_np(preds),to_np(y)) for f in metrics])

~/fastai/courses/dl1/fastai/model.py in evaluate(self, xs, y)
     50 
     51     def evaluate(self, xs, y):
---> 52         preds = self.m(*xs)
     53         if isinstance(preds,(tuple,list)): preds=preds[0]
     54         return preds, self.crit(preds,y)

~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    222         for hook in self._forward_pre_hooks.values():
    223             hook(self, input)
--> 224         result = self.forward(*input, **kwargs)
    225         for hook in self._forward_hooks.values():
    226             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
     65     def forward(self, input):
     66         for module in self._modules.values():
---> 67             input = module(input)
     68         return input
     69 

~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    222         for hook in self._forward_pre_hooks.values():
    223             hook(self, input)
--> 224         result = self.forward(*input, **kwargs)
    225         for hook in self._forward_hooks.values():
    226             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
    252     def forward(self, input):
    253         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 254                         self.padding, self.dilation, self.groups)
    255 
    256 

~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py in conv2d(input, weight, bias, stride, padding, dilation, groups)
     50     f = ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
     51                _pair(0), groups, torch.backends.cudnn.benchmark, torch.backends.cudnn.enabled)
---> 52     return f(input, weight, bias)
     53 
     54 

RuntimeError: expected Double tensor (got Float tensor)

The response to this error is shown in two github posts on Pytorch namely here and here.
Should I start tinkering into the source code or there is an work around.

I am trying to understand how does v2 Lesson1 model identifies the correctly classified images.

Can anyone explain how are the labels (if not labels then what) in this array stored? i.e. a 1 means a dog or a cat?

data.val_y

In these functions we are comparing the prediction (probability) with this array.

def rand_by_mask(mask): return np.random.choice(np.where(mask)[0], 4, replace=False)
def rand_by_correct(is_correct): return rand_by_mask((**preds == data.val_y**)==is_correct)

How can we compare labels directly with probabilities?

In the ImageDataClassfier Class this property(val_y) found only inside from_csv function but we still access it via the object (i.e data.val_y).

class ImageClassifierData(ImageData):
    @property
    def is_multi(self): return self.trn_dl.dataset.is_multi

    @staticmethod
    def get_ds(fn, trn, val, tfms, test=None, **kwargs):
        res = [
            fn(trn[0], trn[1], tfms[0], **kwargs), # train
            fn(val[0], val[1], tfms[1], **kwargs), # val
            fn(trn[0], trn[1], tfms[1], **kwargs), # fix
            fn(val[0], val[1], tfms[0], **kwargs)  # aug
        ]
        if test is not None:
            test_lbls = np.zeros((len(test),1))
            res += [
                fn(test, test_lbls, tfms[1], **kwargs), # test
                fn(test, test_lbls, tfms[0], **kwargs)  # test_aug
            ]
        else: res += [None,None]
        return res

    @classmethod
    def from_arrays(cls, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None):
        """ Read in images and their labels given as numpy arrays

        Arguments:
            path: a root path of the data (used for storing trained models, precomputed values, etc)
            trn: a tuple of training data matrix and target label/classification array (e.g. `trn=(x,y)` where `x` has the
                shape of `(5000, 784)` and `y` has the shape of `(5000,)`)
            val: a tuple of validation data matrix and target label/classification array.
            bs: batch size
            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
            classes: a list of all labels/classifications
            num_workers: a number of workers
            test: a matrix of test data (the shape should match `trn[0]`)

        Returns:
            ImageClassifierData
        """
        datasets = cls.get_ds(ArraysIndexDataset, trn, val, tfms, test=test)
        return cls(path, datasets, bs, num_workers, classes=classes)

    @classmethod
    def from_paths(cls, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8):
        """ Read in images and their labels given as sub-folder names

        Arguments:
            path: a root path of the data (used for storing trained models, precomputed values, etc)
            bs: batch size
            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
            trn_name: a name of the folder that contains training images.
            val_name:  a name of the folder that contains validation images.
            test_name:  a name of the folder that contains test images.
            num_workers: number of workers

        Returns:
            ImageClassifierData
        """
        trn,val = [folder_source(path, o) for o in (trn_name, val_name)]
        test_fnames = read_dir(path, test_name) if test_name else None
        datasets = cls.get_ds(FilesIndexArrayDataset, trn, val, tfms, path=path, test=test_fnames)
        return cls(path, datasets, bs, num_workers, classes=trn[2])

    @classmethod
    def from_csv(cls, path, folder, csv_fname, bs=64, tfms=(None,None),
               val_idxs=None, suffix='', test_name=None, continuous=False, skip_header=True, num_workers=8):
        """ Read in images and their labels given as a CSV file.

        This method should be used when training image labels are given in an CSV file as opposed to
        sub-directories with label names.

        Arguments:
            path: a root path of the data (used for storing trained models, precomputed values, etc)
            folder: a name of the folder in which training images are contained.
            csv_fname: a name of the CSV file which contains target labels.
            bs: batch size
            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
            val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs`
            suffix: suffix to add to image names in CSV file (sometimes CSV only contains the file name without file
                    extension e.g. '.jpg' - in which case, you can set suffix as '.jpg')
            test_name: a name of the folder which contains test images.
            continuous: TODO
            skip_header: skip the first row of the CSV file.
            num_workers: number of workers

        Returns:
            ImageClassifierData
        """
        fnames,y,classes = csv_source(folder, csv_fname, skip_header, suffix, continuous=continuous)
        ((val_fnames,trn_fnames),(val_y,trn_y)) = split_by_idx(val_idxs, np.array(fnames), y)

        test_fnames = read_dir(path, test_name) if test_name else None
        if continuous:
            f = FilesIndexArrayRegressionDataset
        else:
            f = FilesIndexArrayDataset if len(trn_y.shape)==1 else FilesNhotArrayDataset
        datasets = cls.get_ds(f, (trn_fnames,trn_y), (val_fnames,val_y), tfms,
                               path=path, test=test_fnames)
        return cls(path, datasets, bs, num_workers, classes=classes)
File:           ~/fastai/courses/dl1/fastai/dataset.py
Type:           type
2 Likes

@mmr, I ran into similar errors as you did, also while trying to use ImageClassifierData.from_arrays to load trn/val folds for the iceberg competition.

Have you found a fix in the source code?

Looking at http://pytorch.org/docs/master/tensors.html, a workaround may be changing your inputs via numpy to the corresponding expected float or int types.

For instance, for “expected Double tensor (got Float tensor)”, setting your X_tr, X_vl data to float64 (DoubleTensor) before loading it into ImageClassifierData.from_arrays:

X_tr = np.float64(X_tr)
X_vl = np.float64(X_vl)

I encountered the reverse problem where it was expecting X_tr and X_vl to be float32 while I originally had them as float64. I also had to make sure my y_tr and y_vl are of type int64 (LongTensor).

2 Likes

Hi there,

I am trying the ImageClassifierData.from_arrays method on Kaggle’s Digit Recognizer competition. The images are 28x28 pixels, and Kaggle gives you a matrix where each image is flattened into a 784-element array (so each image is a row, each column is a pixel).

When I build my learner via ConvLearner.pretrained(...), I receive the following error (full text of error at the bottom of this post):

RuntimeError: Given groups=1, weight[64, 3, 7, 7], so expected input[32, 28, 28, 1] to have 3 channels, but got 28 channels instead

Does anyone have any idea on how to debug? I’ve included relevant detail from my code and the full text of the error below. (Have searched around the forums to see if this has been addressed, but no luck - thank you in advance for any help!!!)

Chloe

Here are my steps / attempts at troubleshooting:

  1. I used code from the Lesson 1 notebook and from the from_arrays method docstring to create my data object and then my initial learner. I used the resnet50 model because this performed well on small images per this link Jeremy posted in another thread. Relevant code excerpts:
sz = 28
arch=resnet50
bs=32

data = ImageClassifierData.from_arrays(PATH, trn=(X_train, y_train), val=(X_val, y_val), bs=bs, classes=labels, num_workers=4, test=test)

learn = ConvLearner.pretrained(arch, data, precompute=True)

The last line (the learner) is what generates the error.

  1. The dimensions of trn and val are as follows :
X_train: (11200, 28, 28, 1)
X_val: (2800, 28, 28, 1)
y_train: (33600,)
y_val: (8400,)
  1. **Other X_train & X_val dimensions I tried: I also tried the code with the initial data format intact (n x 784) – I received the following error: Expected 4D tensor as input, got 2D tensor instead - which is why I reshaped to n x 28 x 28 x 1. I also tried n x 28 x 28 x 3 but received an AssertionError when creating my data object because the dims of x and y do not match.

  2. I’ve cleared my tmp folder each time as well via shutil.rmtree(f'{PATH}tmp', ignore_errors=True).

Full text of error:


[details="Summary"]
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-141-bfd96ca26d21> in <module>()
----> 1 learn = ConvLearner.pretrained(arch, data, precompute=True)

~/fastai/courses/dl1/fastai/conv_learner.py in pretrained(cls, f, data, ps, xtra_fc, xtra_cut, precompute, **kwargs)
     96     def pretrained(cls, f, data, ps=None, xtra_fc=None, xtra_cut=0, precompute=False, **kwargs):
     97         models = ConvnetBuilder(f, data.c, data.is_multi, data.is_reg, ps=ps, xtra_fc=xtra_fc, xtra_cut=xtra_cut)
---> 98         return cls(data, models, precompute, **kwargs)
     99 
    100     @property

~/fastai/courses/dl1/fastai/conv_learner.py in __init__(self, data, models, precompute, **kwargs)
     89         elif self.metrics is None:
     90             self.metrics = [accuracy_thresh(0.5)] if self.data.is_multi else [accuracy]
---> 91         if precompute: self.save_fc1()
     92         self.freeze()
     93         self.precompute = precompute

~/fastai/courses/dl1/fastai/conv_learner.py in save_fc1(self)
    141         m=self.models.top_model
    142         if len(self.activations[0])!=len(self.data.trn_ds):
--> 143             predict_to_bcolz(m, self.data.fix_dl, act)
    144         if len(self.activations[1])!=len(self.data.val_ds):
    145             predict_to_bcolz(m, self.data.val_dl, val_act)

~/fastai/courses/dl1/fastai/model.py in predict_to_bcolz(m, gen, arr, workers)
     12     m.eval()
     13     for x,*_ in tqdm(gen):
---> 14         y = to_np(m(VV(x)).data)
     15         with lock:
     16             arr.append(y)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    355             result = self._slow_forward(*input, **kwargs)
    356         else:
--> 357             result = self.forward(*input, **kwargs)
    358         for hook in self._forward_hooks.values():
    359             hook_result = hook(self, input, result)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
     65     def forward(self, input):
     66         for module in self._modules.values():
---> 67             input = module(input)
     68         return input
     69 

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    355             result = self._slow_forward(*input, **kwargs)
    356         else:
--> 357             result = self.forward(*input, **kwargs)
    358         for hook in self._forward_hooks.values():
    359             hook_result = hook(self, input, result)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
    280     def forward(self, input):
    281         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 282                         self.padding, self.dilation, self.groups)
    283 
    284 

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/functional.py in conv2d(input, weight, bias, stride, padding, dilation, groups)
     88                 _pair(0), groups, torch.backends.cudnn.benchmark,
     89                 torch.backends.cudnn.deterministic, torch.backends.cudnn.enabled)
---> 90     return f(input, weight, bias)
     91 
     92 

RuntimeError: Given groups=1, weight[64, 3, 7, 7], so expected input[32, 28, 28, 1] to have 3 channels, but got 28 channels instead
[/details]

1 Like