Error with TextClasDataBunch.from_df() in Google Colab

(Monique Monteiro) #1

Hi all,

Since +/- yesterday, the error below started to occur in Google Colab with the following code:

data_clas = TextClasDataBunch.from_df(path,
train_df= df[train_bool],
valid_df= df[~train_bool],
tokenizer=tokenizer,
text_cols=0,
bs=24,
vocab=data_lm.vocab,
max_vocab=35000,
label_cols=1)

BrokenProcessPool Traceback (most recent call last)
in ()
7 vocab=data_lm.vocab,
8 max_vocab=35000,
----> 9 label_cols=1)

10 frames
/usr/local/lib/python3.6/dist-packages/fastai/text/data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, chunksize, max_vocab, min_freq, mark_fields, include_bos, include_eos, **kwargs)
202 else:
203 if label_delim is not None: src = src.label_from_df(cols=label_cols, classes=classes, label_delim=label_delim)
–> 204 else: src = src.label_from_df(cols=label_cols, classes=classes)
205 if test_df is not None: src.add_test(TextList.from_df(test_df, path, cols=text_cols))
206 return src.databunch(**kwargs)

/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in _inner(*args, **kwargs)
475 self.valid = fv(*args, from_item_lists=True, **kwargs)
476 self.class = LabelLists
–> 477 self.process()
478 return self
479 return _inner

/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self)
529 “Process the inner datasets.”
530 xp,yp = self.get_processors()
–> 531 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
532 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
533 for ds in self.lists:

/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, xp, yp, name)
709 p.warns = []
710 self.x,self.y = self.x[~filt],self.y[~filt]
–> 711 self.x.process(xp)
712 return self
713

/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, processor)
81 if processor is not None: self.processor = processor
82 self.processor = listify(self.processor)
—> 83 for p in self.processor: p.process(self)
84 return self
85

/usr/local/lib/python3.6/dist-packages/fastai/text/data.py in process(self, ds)
294 tokens = []
295 for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):
–> 296 tokens += self.tokenizer.process_all(ds.items[i:i+self.chunksize])
297 ds.items = tokens
298

/usr/local/lib/python3.6/dist-packages/fastai/text/transform.py in process_all(self, texts)
118 if self.n_cpus <= 1: return self._process_all_1(texts)
119 with ProcessPoolExecutor(self.n_cpus) as e:
–> 120 return sum(e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])
121
122 class Vocab():

/usr/lib/python3.6/concurrent/futures/process.py in _chain_from_iterable_of_lists(iterable)
364 careful not to keep references to yielded objects.
365 “”"
–> 366 for element in iterable:
367 element.reverse()
368 while element:

/usr/lib/python3.6/concurrent/futures/_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
–> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.monotonic())

/usr/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
–> 432 return self.__get_result()
433 else:
434 raise TimeoutError()

/usr/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
–> 384 raise self._exception
385 else:
386 return self._result

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

The first cell in the notebook contains:

!curl -s https://course.fast.ai/setup/colab | bash

Is it possible any update could have caused this issue?

Thanks in advance,
Monique

0 Likes