Os.fork() error in tokenizer for lesson 10

Maxisawesome · July 15, 2018, 11:46pm

When trying to run get_all to tokenize the data in lesson 10, I run into this error on os.fork().
The first 0 is an output, so it seems like I get one fork but nothing else? Not sure. Googling doesn’t seem to help. While it’s not really fixing the bug itself, someone else could just send me the file containing the tokens from this lesson and that would solve my problem. I’m pretty perplexed and have spent a long while on this bug… no clue where to go from here.

0

OSError Traceback (most recent call last)
in ()
----> 1 tok_trn, trn_labels = get_all(df_trn, 1)
2 tok_val, val_labels = get_all(df_val, 1)

in get_all(df, n_lbls)
3 for i, r in enumerate(df):
4 print(i)
----> 5 tok_, labels_ = get_texts(r, n_lbls)
6 labels += labels_
7 return tok, labels

in get_texts(df, n_lbls)
5 texts = texts.apply(fixup).values.astype(str)
6
----> 7 tok = Tokenizer().proc_all_mp(partition_by_cores(texts))
8 return tok, list(labels)

~/fastai/courses/dl2/fastai/text.py in proc_all_mp(ss, lang, ncpus)
99 ncpus = ncpus or num_cpus()//2
100 with ProcessPoolExecutor(ncpus) as e:
–> 101 return sum(e.map(Tokenizer.proc_all, ss, [lang]*len(ss)), [])
102
103

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in map(self, fn, timeout, chunksize, *iterables)
494 results = super().map(partial(_process_chunk, fn),
495 _get_chunks(*iterables, chunksize=chunksize),
–> 496 timeout=timeout)
497 return _chain_from_iterable_of_lists(results)
498

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/_base.py in map(self, fn, timeout, chunksize, *iterables)
573 end_time = timeout + time.time()
574
–> 575 fs = [self.submit(fn, *args) for args in zip(*iterables)]
576
577 # Yield must be hidden in closure so that the futures are submitted

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/_base.py in (.0)
573 end_time = timeout + time.time()
574
–> 575 fs = [self.submit(fn, *args) for args in zip(*iterables)]
576
577 # Yield must be hidden in closure so that the futures are submitted

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in submit(self, fn, *args, **kwargs)
464 self._result_queue.put(None)
465
–> 466 self._start_queue_management_thread()
467 return f
468 submit.doc = _base.Executor.submit.doc

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in _start_queue_management_thread(self)
425 if self._queue_management_thread is None:
426 # Start the processes so that their sentinels are known.
–> 427 self._adjust_process_count()
428 self._queue_management_thread = threading.Thread(
429 target=_queue_management_worker,

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in _adjust_process_count(self)
444 args=(self._call_queue,
445 self._result_queue))
–> 446 p.start()
447 self._processes[p.pid] = p
448

~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py in start(self)
103 ‘daemonic processes are not allowed to have children’
104 _cleanup()
–> 105 self._popen = self._Popen(self)
106 self._sentinel = self._popen.sentinel
107 # Avoid a refcycle if the target function holds an indirect

~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/context.py in _Popen(process_obj)
221 @staticmethod
222 def _Popen(process_obj):
–> 223 return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):

~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/context.py in _Popen(process_obj)
275 def _Popen(process_obj):
276 from .popen_fork import Popen
–> 277 return Popen(process_obj)
278
279 class SpawnProcess(process.BaseProcess):

~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/popen_fork.py in init(self, process_obj)
17 util._flush_std_streams()
18 self.returncode = None
—> 19 self._launch(process_obj)
20
21 def duplicate_for_child(self, fd):

~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/popen_fork.py in _launch(self, process_obj)
64 code = 1
65 parent_r, child_w = os.pipe()
—> 66 self.pid = os.fork()
67 if self.pid == 0:
68 try:

OSError: [Errno 22] Invalid argument

pankaj_kvhld · August 6, 2019, 12:46pm

Did you manage to solve it?