Trying to run notebook with over 300,000 text files after running this code:
CLASSES = ['l', 'r']
def get_texts(path):
texts,labels = [],[]
for idx,label in enumerate(CLASSES):
for fname in (path/label).glob('*.*'):
texts.append(fname.open('r').read())
labels.append(idx)
return np.array(texts),np.array(labels)
trn_texts,trn_labels = get_texts(PATH/'train')
val_texts,val_labels = get_texts(PATH/'test')
I get this error:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-4-5fa80fc7c50a> in <module>()
9 return np.array(texts),np.array(labels)
10
---> 11 trn_texts,trn_labels = get_texts(PATH/'train')
12 val_texts,val_labels = get_texts(PATH/'test')
<ipython-input-4-5fa80fc7c50a> in get_texts(path)
7 texts.append(fname.open('r').read())
8 labels.append(idx)
----> 9 return np.array(texts),np.array(labels)
10
11 trn_texts,trn_labels = get_texts(PATH/'train')
MemoryError:
Any ideas?