When trying to run get_all to tokenize the data in lesson 10, I run into this error on os.fork().
The first 0 is an output, so it seems like I get one fork but nothing else? Not sure. Googling doesn’t seem to help. While it’s not really fixing the bug itself, someone else could just send me the file containing the tokens from this lesson and that would solve my problem. I’m pretty perplexed and have spent a long while on this bug… no clue where to go from here.
0
OSError                                   Traceback (most recent call last)
 in ()
----> 1 tok_trn, trn_labels = get_all(df_trn, 1)
2 tok_val, val_labels = get_all(df_val, 1)
 in get_all(df, n_lbls)
3     for i, r in enumerate(df):
4         print(i)
----> 5         tok_, labels_ = get_texts(r, n_lbls)
6         labels += labels_
7     return tok, labels
 in get_texts(df, n_lbls)
5     texts = texts.apply(fixup).values.astype(str)
6
----> 7     tok = Tokenizer().proc_all_mp(partition_by_cores(texts))
8     return tok, list(labels)
~/fastai/courses/dl2/fastai/text.py in proc_all_mp(ss, lang, ncpus)
99         ncpus = ncpus or num_cpus()//2
100         with ProcessPoolExecutor(ncpus) as e:
–> 101             return sum(e.map(Tokenizer.proc_all, ss, [lang]*len(ss)), [])
102
103
~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in map(self, fn, timeout, chunksize, *iterables)
494         results = super().map(partial(_process_chunk, fn),
495                               _get_chunks(*iterables, chunksize=chunksize),
–> 496                               timeout=timeout)
497         return _chain_from_iterable_of_lists(results)
498
~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/_base.py in map(self, fn, timeout, chunksize, *iterables)
573             end_time = timeout + time.time()
574
–> 575         fs = [self.submit(fn, *args) for args in zip(*iterables)]
576
577         # Yield must be hidden in closure so that the futures are submitted
~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/_base.py in (.0)
573             end_time = timeout + time.time()
574
–> 575         fs = [self.submit(fn, *args) for args in zip(*iterables)]
576
577         # Yield must be hidden in closure so that the futures are submitted
~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in submit(self, fn, *args, **kwargs)
464             self._result_queue.put(None)
465
–> 466             self._start_queue_management_thread()
467             return f
468     submit.doc = _base.Executor.submit.doc
~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in _start_queue_management_thread(self)
425         if self._queue_management_thread is None:
426             # Start the processes so that their sentinels are known.
–> 427             self._adjust_process_count()
428             self._queue_management_thread = threading.Thread(
429                     target=_queue_management_worker,
~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/process.py in _adjust_process_count(self)
444                     args=(self._call_queue,
445                           self._result_queue))
–> 446             p.start()
447             self._processes[p.pid] = p
448
~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py in start(self)
103                ‘daemonic processes are not allowed to have children’
104         _cleanup()
–> 105         self._popen = self._Popen(self)
106         self._sentinel = self._popen.sentinel
107         # Avoid a refcycle if the target function holds an indirect
~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/context.py in _Popen(process_obj)
221     @staticmethod
222     def _Popen(process_obj):
–> 223         return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):
~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/context.py in _Popen(process_obj)
275         def _Popen(process_obj):
276             from .popen_fork import Popen
–> 277             return Popen(process_obj)
278
279     class SpawnProcess(process.BaseProcess):
~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/popen_fork.py in init(self, process_obj)
17         util._flush_std_streams()
18         self.returncode = None
—> 19         self._launch(process_obj)
20
21     def duplicate_for_child(self, fd):
~/anaconda3/envs/fastai/lib/python3.6/multiprocessing/popen_fork.py in _launch(self, process_obj)
64         code = 1
65         parent_r, child_w = os.pipe()
—> 66         self.pid = os.fork()
67         if self.pid == 0:
68             try:
OSError: [Errno 22] Invalid argument