Fast_book 01_intro error tabular learner

edwincv0 · March 27, 2020, 12:22am

Hi,

I ran into this error when running Lesson 01 from fast book.

fastai/fastbook/blob/master/01_intro.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#hide\n",
    "from utils import *"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "[[chapter_intro]]"
   ]
  },
  {

This file has been truncated. show original

When looking at source, saw it was the path parameters, so tried fixing that in the Jupyter Book.

Ran into a new error.

Using latest fastai2 code, and latest from fast book.

Any suggestions?

edwincv0 · March 27, 2020, 12:26am

Looks like also getting it on collab filtering cell below.

muellerzr · March 27, 2020, 12:32am

Can you show the entire stack trace? We need the whole thing to know what’s going on

edwincv0 · March 27, 2020, 3:24am

Hi @muellerzr,

TypeError Traceback (most recent call last)
in
6 ‘relationship’, ‘race’],
7 cont_names = [‘age’, ‘fnlwgt’, ‘education-num’],
----> 8 procs = [Categorify, FillMissing, Normalize])
9
10 learn = tabular_learner(dls, metrics=accuracy)

~/Developer/fastai2/fastai2/tabular/data.py in from_csv(cls, csv, **kwargs)
25 def from_csv(cls, csv, **kwargs):
26 "Create from csv file in path using procs"
—> 27 return cls.from_df(pd.read_csv(csv), **kwargs)
28
29 @delegates(TabDataLoader.init)

~/Developer/fastai2/fastai2/tabular/data.py in from_df(cls, df, path, procs, cat_names, cont_names, y_names, y_block, valid_idx, **kwargs)
19 if cont_names is None: cont_names = list(set(df)-set(cat_names)-set(y_names))
20 splits = RandomSplitter()(df) if valid_idx is None else IndexSplitter(valid_idx)(df)
—> 21 to = TabularPandas(df, procs, cat_names, cont_names, y_names, splits=splits, y_block=y_block)
22 return to.dataloaders(path=path, **kwargs)
23

~/Developer/fastai2/fastai2/tabular/core.py in init(self, df, procs, cat_names, cont_names, y_names, y_block, splits, do_setup, device, inplace, reduce_memory)
115 if y_block is None and self.y_names:
116 # Make ys categorical if they’re not numeric
–> 117 ys = df[self.y_names]
118 if len(ys.select_dtypes(include=‘number’).columns)!=len(ys.columns): y_block = CategoryBlock()
119 else: y_block = RegressionBlock()

~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in getitem(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):

~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality

~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
2485 “”“Return the cached item, item represents a label indexer.”""
2486 cache = self._item_cache
-> 2487 res = cache.get(item)
2488 if res is None:
2489 values = self._data.get(item)

TypeError: unhashable type: ‘L’

edwincv0 · March 27, 2020, 3:24am

TypeError Traceback (most recent call last)
in
1 from fastai2.collab import *
2 path = untar_data(URLs.ML_SAMPLE)
----> 3 dls = CollabDataLoaders.from_csv(path/‘ratings.csv’)
4 learn = collab_learner(dls, y_range=(0.5,5.5))
5 learn.fine_tune(10)

~/Developer/fastai2/fastai2/collab.py in from_csv(cls, csv, **kwargs)
29 def from_csv(cls, csv, **kwargs):
30 “Create a DataLoaders suitable for collaborative filtering from csv.”
—> 31 return cls.from_df(pd.read_csv(csv), **kwargs)
32
33 CollabDataLoaders.from_csv = delegates(to=CollabDataLoaders.from_df)(CollabDataLoaders.from_csv)

~/Developer/fastai2/fastai2/collab.py in from_df(cls, ratings, valid_pct, user_name, item_name, rating_name, seed, path, **kwargs)
23 cat_names = [user_name,item_name]
24 splits = RandomSplitter(valid_pct=valid_pct, seed=seed)(range_of(ratings))
—> 25 to = TabularCollab(ratings, [Categorify], cat_names, y_names=[rating_name], y_block=TransformBlock(), splits=splits)
26 return to.dataloaders(path=path, **kwargs)
27

~/Developer/fastai2/fastai2/tabular/core.py in init(self, df, procs, cat_names, cont_names, y_names, y_block, splits, do_setup, device, inplace, reduce_memory)
124 self.split = len(df) if splits is None else len(splits[0])
125 if reduce_memory:
–> 126 if len(self.cat_names) > 0: self.reduce_cats()
127 if len(self.cont_names) > 0: self.reduce_conts()
128 if do_setup: self.setup()

~/Developer/fastai2/fastai2/tabular/core.py in reduce_cats(self)
136 def decode(self): return self.procs.decode(self)
137 def decode_row(self, row): return self.new(pd.DataFrame(row).T).decode().items.iloc[0]
–> 138 def reduce_cats(self): self.train[self.cat_names] = self.train[self.cat_names].astype(‘category’)
139 def reduce_conts(self): self[self.cont_names] = self[self.cont_names].astype(np.float32)
140 def show(self, max_n=10, **kwargs): display_df(self.new(self.all_cols[:max_n]).decode().items)

~/anaconda3/lib/python3.7/site-packages/fastcore/foundation.py in getitem(self, k)
276 def init(self, items): self.items = items
277 def len(self): return len(self.items)
–> 278 def getitem(self, k): return self.items[k]
279 def setitem(self, k, v): self.items[list(k) if isinstance(k,CollBase) else k] = v
280 def delitem(self, i): del(self.items[i])

~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in getitem(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):

~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality

~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
2485 “”“Return the cached item, item represents a label indexer.”""
2486 cache = self._item_cache
-> 2487 res = cache.get(item)
2488 if res is None:
2489 values = self._data.get(item)

TypeError: unhashable type: ‘L’

sgugger · March 27, 2020, 1:08pm

You need to put the stack trace between two ``` for it to be properly formatted, it’s very hard to read otherwise.

In this case, I have fixed the example and confirmed it works on my side.

edwincv0 · March 27, 2020, 3:21pm

Hi @sgugger,

Thanks, just did a pull, saw that you fixed the path error on the paramater.

Still getting this though when running.

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-3-d44927d4a8d6> in <module>
      6                  'relationship', 'race'],
      7     cont_names = ['age', 'fnlwgt', 'education-num'],
----> 8     procs = [Categorify, FillMissing, Normalize])
      9 
     10 learn = tabular_learner(dls, metrics=accuracy)

~/Developer/fastai2/fastai2/tabular/data.py in from_csv(cls, csv, **kwargs)
     25     def from_csv(cls, csv, **kwargs):
     26         "Create from `csv` file in `path` using `procs`"
---> 27         return cls.from_df(pd.read_csv(csv), **kwargs)
     28 
     29     @delegates(TabDataLoader.__init__)

~/Developer/fastai2/fastai2/tabular/data.py in from_df(cls, df, path, procs, cat_names, cont_names, y_names, y_block, valid_idx, **kwargs)
     19         if cont_names is None: cont_names = list(set(df)-set(cat_names)-set(y_names))
     20         splits = RandomSplitter()(df) if valid_idx is None else IndexSplitter(valid_idx)(df)
---> 21         to = TabularPandas(df, procs, cat_names, cont_names, y_names, splits=splits, y_block=y_block)
     22         return to.dataloaders(path=path, **kwargs)
     23 

~/Developer/fastai2/fastai2/tabular/core.py in __init__(self, df, procs, cat_names, cont_names, y_names, y_block, splits, do_setup, device, inplace, reduce_memory)
    115         if y_block is None and self.y_names:
    116             # Make ys categorical if they're not numeric
--> 117             ys = df[self.y_names]
    118             if len(ys.select_dtypes(include='number').columns)!=len(ys.columns): y_block = CategoryBlock()
    119             else: y_block = RegressionBlock()

~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2686             return self._getitem_multilevel(key)
   2687         else:
-> 2688             return self._getitem_column(key)
   2689 
   2690     def _getitem_column(self, key):

~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2693         # get column
   2694         if self.columns.is_unique:
-> 2695             return self._get_item_cache(key)
   2696 
   2697         # duplicate columns & possible reduce dimensionality

~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   2485         """Return the cached item, item represents a label indexer."""
   2486         cache = self._item_cache
-> 2487         res = cache.get(item)
   2488         if res is None:
   2489             values = self._data.get(item)

TypeError: unhashable type: 'L'

Going to give it a try on another system, will update thread after.

sgugger · March 27, 2020, 3:35pm

Are you on fastai2 master? It works there.

wyquek · March 27, 2020, 4:05pm

I’m curious, I don’t see salary as a column in adult.csv

sgugger · March 27, 2020, 4:15pm

You have an old version of the dataset then, you should remove it locally and re-download it.

edwincv0 · March 28, 2020, 12:13am

@sgugger did a fresh install of conda env with fastai2 env, can confirm it’s working now. Thanks for fixing typo in path.

DanielLam · March 28, 2020, 9:29pm

Hi,

I’m running into an error on the TabularDataLoaders in NB1. It worked fine last week, but I tried it again today, and am running into an error. I just git pulled today. Is anyone else running into this?

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-17-1e20bc7da2d6> in <module>
----> 1 from fastai2.tabular.all import *
      2 path = untar_data(URLs.ADULT_SAMPLE)
      3 
      4 dls = TabularDataLoaders.from_csv(path/'adult.csv', path, y_names="salary",
      5     cat_names = ['workclass', 'education', 'marital-status', 'occupation',

~/Desktop/python/fastai2/fastai2/tabular/all.py in <module>
      2 from ..callback.all import *
      3 from .core import *
----> 4 from .data import *
      5 from .model import *
      6 from .learner import *

~/Desktop/python/fastai2/fastai2/tabular/data.py in <module>
      9 
     10 # Cell
---> 11 class TabularDataLoaders(DataLoaders):
     12     "Basic wrapper around several `DataLoader`s with factory methods for tabular data"
     13     @classmethod

~/Desktop/python/fastai2/fastai2/tabular/data.py in TabularDataLoaders()
     12     "Basic wrapper around several `DataLoader`s with factory methods for tabular data"
     13     @classmethod
---> 14     @delegates(Tabular.dataloaders, but=["dl_type", "dl_kwargs"])
     15     def from_df(cls, df, path='.', procs=None, cat_names=None, cont_names=None, y_names=None, y_block=None,
     16                 valid_idx=None, **kwargs):

TypeError: delegates() got an unexpected keyword argument 'but'

sgugger · March 28, 2020, 10:13pm

You need the last version of fastcore too, not just fastai.

DanielLam · March 28, 2020, 10:19pm

Thanks, it worked.