Num_workers>0 causes AttributeError: '_FakeLoader' object has no attribute 'noops'

There are topics about a similar error, but it appears they’ve been using other tools alongside fast.ai and the solutions proposed haven’t helped fix the error I’m encountering. My model will train as long as I set num_workers=0, but as soon as I comment out that setting and allow it to multiprocess, I begin to get the errors in the stack trace below, with the AttributeError repeated once for each process fast.ai spins up and some subsequent errors that come from the process crashing.

I’ve tried running some other, simpler code and not had issues, so I suspect it has to do with how I’m defining my DataBlocks, but it’s not clear what needs to be added for them to be multiprocessing compatible.

Package versions:
torch: 1.6.0
fast.ai: 2.0.13
python: 3.8.3 (have tried on 3.7 with the same result)
CUDA: 11.0.182

Code:

from fastai.vision.all import *

def _parent_idxs(items, name):
    """Gets the indexes for the items based on the parent folder name"""
    def _inner(items, name):
        return mask2idxs(Path(o).parent.name == name for o in items)
    return [i for n in L(name) for i in _inner(items, n)]


def ParentSplitter(train_name='train', valid_name='valid'):
    "Split `items` from the parent folder names (`train_name` and `valid_name`)."
    def _inner(o, **kwargs):
        return _parent_idxs(o, train_name), _parent_idxs(o, valid_name)
    return _inner


class TorchTensor(TensorBase): 
    @classmethod
    def create(cls, fn: (Path, str)) -> None:
        # this reshaping is to make the data compatible with resnet34
        tens = torch.reshape(torch.load(fn)[1:4, :, :], (3, 512, 512)).cpu().clone()
        return cls(tens)


class TorchTensorMask(TensorMask): 
    @classmethod
    def create(cls, fn: (Path, str)) -> None:
        tens = torch.load(fn).cpu().clone()
        return cls(tens)


def TensorBlock():
    return TransformBlock(type_tfms=TorchTensor.create, batch_tfms=IntToFloatTensor)


def TensorMaskBlock(codes=None):
    return TransformBlock(type_tfms=TorchTensorMask.create, item_tfms=AddMaskCodes(codes=codes), batch_tfms=IntToFloatTensor)


def get_msk(o):
    path_pieces = o.parts
    new_path = []
    for i in range(len(path_pieces)):
        if path_pieces[i] == 'images':
            new_path.append('masks')
            continue
        new_path.append(path_pieces[i])
    mask_path = os.sep.join(new_path)
    return Path(mask_path)


class custom_dice(Metric):
    "Dice coefficient metric (DSC) class for a binary target in segmentation"
    def __init__(self, axis=1, object_class=1):
        self.axis = axis
        self.object_class = object_class

    def reset(self):
        self.inter, self.union = 0, 0

    def accumulate(self, learn):
        preds, targs = learn.pred, learn.y
        preds = torch.softmax(preds,dim=1).float()
        pred = preds[:, self.object_class, ...]
        targ = (targs==self.object_class).float()
        self.inter+=(pred*targ).sum()
        self.union+=(pred+targ).sum()

    @property
    def value(self):
        return 2.*self.inter/self.union if self.union > 0 else None


class m_dice(custom_dice):
    def __init__(self):
        super().__init__(self, object_class=1)


if __name__ == '__main__':
    # avoiding an error
    torch.multiprocessing.set_start_method('spawn')

    # directories for data
    path = "path to the data"


    datablock = DataBlock(
        blocks=(TensorBlock, TensorMaskBlock(codes=['mask'])),
        get_items=FileGetter(extensions='.pt', folders=['images']),
        get_y=get_msk,
        splitter=ParentSplitter(train_name='train_data', valid_name='test_data'))

    dataloaders = datablock.dataloaders(Path(path), bs=4) #, num_workers=0)

    dice_1 = m_dice()

    modelsavepath = "save directory for model"
    modelsavedir = "fastai/"

    learn = unet_learner(dls=dataloaders, arch=resnet34, metrics=[foreground_acc, dice_1], n_out=2, loss_func=CrossEntropyLossFlat(axis=1), path=modelsavepath, model_dir=modelsavedir)

    learn.fit(1)

Error Stack Trace:

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/anaconda3/envs/fastai/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/anaconda3/envs/fastai/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: '_FakeLoader' object has no attribute 'noops'
0         nan         01:40                                                                           
Traceback (most recent call last):
  File "/home/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 779, in _try_get_data
    data = self._data_queue.get(timeout=timeout)
  File "/home/anaconda3/envs/fastai/lib/python3.8/multiprocessing/queues.py", line 107, in get
    if not self._poll(timeout):
  File "/home/anaconda3/envs/fastai/lib/python3.8/multiprocessing/connection.py", line 257, in poll
    return self._poll(timeout)
  File "/home/anaconda3/envs/fastai/lib/python3.8/multiprocessing/connection.py", line 424, in _poll
    r = wait([self], timeout)
  File "/home/anaconda3/envs/fastai/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/home/anaconda3/envs/fastai/lib/python3.8/selectors.py", line 415, in select
    fd_event_list = self._selector.poll(timeout)
  File "/home/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/utils/data/_utils/signal_handling.py", line 
66, in handler
    _error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 1473) exited unexpectedly with exit code 1. Details are lost due to multiprocessing. 
Rerunning with num_workers=0 may give better error trace.

Setting num_workers=2 also results in the same errors as using the default parameter settings.
Has anyone got an idea of what might be going wrong?

Ok, I think we figured this error out too. My data was saved as CUDA tensors which required CUDA to reinitialize for every process for torch to load the data. This CUDA reinitialization then forced me to use the spawn start method for torch multiprocessing. Some combination of those things was creating the error, and saving my data as cpu tensors and commenting out torch.multiprocessing.set_start_method('spawn') has resolved it.