Hello. I am trying to create a language model from blogs. I am calling the Datasets API to build a language model from blogs.
dsets = Datasets(df_p, [tfms],dl_type=LMDataLoader)
df_p is a pandas dataframe that contains blogs. When we print:
print(df_p.head())
Result is:
0 ['12 years ago, the smartphone was in its infancy. Neither Airbnb nor Uber, Lyft or TaskRabbit yet existed. Saying the phrase ‘sharing economy’ usually resulted in raised eyebrows: was that some kind of hippie bartering, or a new take on philanthropy?', 'What’s happened since then is nothing short of extraordinary: today there are tens of thousands of self-professed sharing economy platforms worldwide, many featuring blistering valuations, wobbly IPOs, and fierce debates about the sharing economy’s benefits, pitfalls and potential. I’ve been at the fifty-yard line of this transformation, a...
Name: paragraphs, dtype: object
I am getting Attribute error:
AttributeError Traceback (most recent call last)
<ipython-input-28-bab5db2d3986> in <module>
----> 1 dsets = Datasets(df_p, [tfms],dl_type=LMDataLoader)
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
308 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
309 super().__init__(dl_type=dl_type)
--> 310 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
311 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
312
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/core.py in <listcomp>(.0)
308 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
309 super().__init__(dl_type=dl_type)
--> 310 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
311 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
312
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
161 def __call__(cls, x=None, *args, **kwargs):
162 if not args and not kwargs and x is not None and isinstance(x,cls): return x
--> 163 return super().__call__(x, *args, **kwargs)
164
165 # Cell
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose, dl_type)
234 if do_setup:
235 pv(f"Setting up {self.tfms}", verbose)
--> 236 self.setup(train_setup=train_setup)
237
238 def _new(self, items, split_idx=None, **kwargs):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/core.py in setup(self, train_setup)
250
251 def setup(self, train_setup=True):
--> 252 self.tfms.setup(self, train_setup)
253 if len(self) != 0:
254 x = super().__getitem__(0) if self.splits is None else super().__getitem__(self.splits[0])[0]
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in setup(self, items, train_setup)
190 tfms = self.fs[:]
191 self.fs.clear()
--> 192 for t in tfms: self.add(t,items, train_setup)
193
194 def add(self,t, items=None, train_setup=False):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in add(self, t, items, train_setup)
193
194 def add(self,t, items=None, train_setup=False):
--> 195 t.setup(items, train_setup)
196 self.fs.append(t)
197
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in setup(self, items, train_setup)
77 def setup(self, items=None, train_setup=False):
78 train_setup = train_setup if self.train_setup is None else self.train_setup
---> 79 return self.setups(getattr(items, 'train', items) if train_setup else items)
80
81 def _call(self, fn, x, split_idx=None, **kwargs):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
115 elif self.inst is not None: f = MethodType(f, self.inst)
116 elif self.owner is not None: f = MethodType(f, self.owner)
--> 117 return f(*args, **kwargs)
118
119 def __get__(self, inst, owner):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/text/data.py in setups(self, dsets)
40 if dsets is None: return
41 if self.vocab is None:
---> 42 count = dsets.counter if getattr(dsets, 'counter', None) is not None else Counter(p for o in dsets for p in o)
43 if self.special_toks is None and hasattr(dsets, 'special_toks'):
44 self.special_toks = dsets.special_toks
/opt/conda/envs/fastai/lib/python3.8/collections/__init__.py in __init__(self, iterable, **kwds)
550 '''
551 super(Counter, self).__init__()
--> 552 self.update(iterable, **kwds)
553
554 def __missing__(self, key):
/opt/conda/envs/fastai/lib/python3.8/collections/__init__.py in update(self, iterable, **kwds)
635 super(Counter, self).update(iterable) # fast path when counter is empty
636 else:
--> 637 _count_elements(self, iterable)
638 if kwds:
639 self.update(kwds)
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/text/data.py in <genexpr>(.0)
40 if dsets is None: return
41 if self.vocab is None:
---> 42 count = dsets.counter if getattr(dsets, 'counter', None) is not None else Counter(p for o in dsets for p in o)
43 if self.special_toks is None and hasattr(dsets, 'special_toks'):
44 self.special_toks = dsets.special_toks
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/core.py in <genexpr>(.0)
242 def _after_item(self, o): return self.tfms(o)
243 def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
--> 244 def __iter__(self): return (self[i] for i in range(len(self)))
245 def show(self, o, **kwargs): return self.tfms.show(o, **kwargs)
246 def decode(self, o, **kwargs): return self.tfms.decode(o, **kwargs)
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/core.py in __getitem__(self, idx)
278 res = super().__getitem__(idx)
279 if self._after_item is None: return res
--> 280 return self._after_item(res) if is_indexer(idx) else res.map(self._after_item)
281
282 # Cell
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/data/core.py in _after_item(self, o)
240 return super()._new(items, tfms=self.tfms, do_setup=False, types=self.types, split_idx=split_idx, **kwargs)
241 def subset(self, i): return self._new(self._get(self.splits[i]), split_idx=i)
--> 242 def _after_item(self, o): return self.tfms(o)
243 def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
244 def __iter__(self): return (self[i] for i in range(len(self)))
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in __call__(self, o)
196 self.fs.append(t)
197
--> 198 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
199 def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
200 def __getitem__(self,i): return self.fs[i]
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
148 for f in tfms:
149 if not is_enc: f = f.decode
--> 150 x = f(x, **kwargs)
151 return x
152
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in __call__(self, x, **kwargs)
71 @property
72 def name(self): return getattr(self, '_name', _get_name(self))
---> 73 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
74 def decode (self, x, **kwargs): return self._call('decodes', x, **kwargs)
75 def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
81 def _call(self, fn, x, split_idx=None, **kwargs):
82 if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 83 return self._do_call(getattr(self, fn), x, **kwargs)
84
85 def _do_call(self, f, x, **kwargs):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
87 if f is None: return x
88 ret = f.returns_none(x) if hasattr(f,'returns_none') else None
---> 89 return retain_type(f(x, **kwargs), x, ret)
90 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
91 return retain_type(res, x)
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
115 elif self.inst is not None: f = MethodType(f, self.inst)
116 elif self.owner is not None: f = MethodType(f, self.owner)
--> 117 return f(*args, **kwargs)
118
119 def __get__(self, inst, owner):
AttributeError: 'str' object has no attribute 'paragraphs'
I was trying to follow the wikitext tutorial here: https://docs.fast.ai/tutorial.wikitext.html
which also uses a similar dataset from IMDB.