hello, i have a problem while load my data
the last project i was tried the fastai vision, load my 11GB image dataset was working fine
and right now ill tried to create my own LM using fastai text
i want to fit the idwiki dataset with total size 1.7GB (350.000 data points)
but i get memory error while i put more than 200mb (70.000 data points) in my dataset folder
i was tried change the bs and chuck size to a small number (eg 1,2,4) but still get a memory error
thanks for your time
fastai : 1.0.33
torch : 1.0.0 dev
cuda : 10
gpu : 1080ti
ram : 16GB
from fastai import *
from fastai.text import *
data_lm = TextLMDataBunch.from_folder("./dataset",bs=1)
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-2-33f13487510f> in <module>
----> 1 data_lm = TextLMDataBunch.from_folder("./dataset",bs=1)
~/.local/lib/python3.6/site-packages/fastai/text/data.py in from_folder(cls, path, train, valid, test, classes, tokenizer, vocab, **kwargs)
194 src = (TextList.from_folder(path, processor=processor)
195 .split_by_folder(train=train, valid=valid))
--> 196 src = src.label_for_lm() if cls==TextLMDataBunch else src.label_from_folder(classes=classes)
197 if test is not None: src.add_test_folder(path/test)
198 return src.databunch(**kwargs)
~/.local/lib/python3.6/site-packages/fastai/data_block.py in _inner(*args, **kwargs)
370 self.valid = fv(*args, **kwargs)
371 self.__class__ = LabelLists
--> 372 self.process()
373 return self
374 return _inner
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self)
417 "Process the inner datasets."
418 xp,yp = self.get_processors()
--> 419 for i,ds in enumerate(self.lists): ds.process(xp, yp, filter_missing_y=i==0)
420 return self
421
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, xp, yp, filter_missing_y)
516 filt = array([o is None for o in self.y])
517 if filt.sum()>0: self.x,self.y = self.x[~filt],self.y[~filt]
--> 518 self.x.process(xp)
519 return self
520
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, processor)
65 if processor is not None: self.processor = processor
66 self.processor = listify(self.processor)
---> 67 for p in self.processor: p.process(self)
68 return self
69
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, ds)
36 def __init__(self, ds:Collection=None): self.ref_ds = ds
37 def process_one(self, item:Any): return item
---> 38 def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
39
40 class ItemList():
~/.local/lib/python3.6/site-packages/fastai/core.py in array(a, *args, **kwargs)
244 if not isinstance(a, collections.Sized) and not getattr(a,'__array_interface__',False):
245 a = list(a)
--> 246 return np.array(a, *args, **kwargs)
247
248 class EmptyLabel(ItemBase):
MemoryError: