Hello,
@pipeline_def
def dali_pipeline(image_paths, mask_paths):
images_jpg = fn.readers.webdataset(paths=image_paths, shard_id=0, num_shards=1, dtypes=types.UINT8, random_shuffle=True)
masks_jpg = fn.readers.webdataset(paths=mask_paths, shard_id=0, num_shards=1, dtypes=types.UINT8, random_shuffle=True)
images = fn.decoders.image(images_jpg, device='mixed', output_type=types.RGB)
masks = fn.decoders.image(masks_jpg, device='mixed', output_type=types.RGB)
images = fn.resize(images, device='gpu', resize_x=256, resize_y=140, interp_type=dali.types.INTERP_NN)
masks = fn.resize(masks, device='gpu', resize_x=256, resize_y=140, interp_type=dali.types.INTERP_NN)
images = fn.normalize(images, dtype=types.FLOAT)
return images, masks
pipe = dali_pipeline(image_paths=image_paths, mask_paths=mask_paths, batch_size=64, num_threads=2, device_id=0)
pipe.build()
dali_dl = DALIGenericIterator(pipe, ['image', 'mask'])
I have this code here and I get the following error that I couldn’t solve even if I used another type of reader (instead of readers.file) or adding dtype=types.UINT8:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[16], line 18
16 pipe = dali_pipeline(image_paths=image_paths, mask_paths=mask_paths, batch_size=64, num_threads=2, device_id=0)
17 pipe.build()
---> 18 dali_dl = DALIGenericIterator(pipe, ['image', 'mask'])
File /opt/conda/lib/python3.10/site-packages/nvidia/dali/plugin/pytorch.py:194, in DALIGenericIterator.__init__(self, pipelines, output_map, size, reader_name, auto_reset, fill_last_batch, dynamic_shape, last_batch_padded, last_batch_policy, prepare_first_batch)
192 if self._prepare_first_batch:
193 try:
--> 194 self._first_batch = DALIGenericIterator.__next__(self)
195 # call to `next` sets _ever_consumed to True but if we are just calling it from
196 # here we should set if to False again
197 self._ever_consumed = False
File /opt/conda/lib/python3.10/site-packages/nvidia/dali/plugin/pytorch.py:211, in DALIGenericIterator.__next__(self)
208 return batch
210 # Gather outputs
--> 211 outputs = self._get_outputs()
213 data_batches = [None for i in range(self._num_gpus)]
214 for i in range(self._num_gpus):
File /opt/conda/lib/python3.10/site-packages/nvidia/dali/plugin/base_iterator.py:298, in _DaliBaseIterator._get_outputs(self)
296 for p in self._pipes:
297 with p._check_api_type_scope(types.PipelineAPIType.ITERATOR):
--> 298 outputs.append(p.share_outputs())
299 except StopIteration as e:
300 # in case ExternalSource returns StopIteration
301 if self._size < 0 and self._auto_reset == "yes":
File /opt/conda/lib/python3.10/site-packages/nvidia/dali/pipeline.py:1003, in Pipeline.share_outputs(self)
1001 self._batches_to_consume -= 1
1002 self._gpu_batches_to_consume -= 1
-> 1003 return self._pipe.ShareOutputs()
RuntimeError: Critical error in pipeline:
Error when executing CPU operator decoders__Image, instance name: "__Image_53", encountered:
Error in thread 0: [/opt/dali/dali/operators/decoder/host/host_decoder.cc:31] Assert on "IsType<uint8>(input.type())" failed: Input must be stored as uint8 data.
Stacktrace (7 entries):
[frame 0]: /opt/conda/lib/python3.10/site-packages/nvidia/dali/libdali_operators.so(+0x686e3e) [0x7b2235038e3e]
[frame 1]: /opt/conda/lib/python3.10/site-packages/nvidia/dali/libdali_operators.so(+0xa87601) [0x7b2235439601]
[frame 2]: /opt/conda/lib/python3.10/site-packages/nvidia/dali/libdali_operators.so(+0x6880e0) [0x7b223503a0e0]
[frame 3]: /opt/conda/lib/python3.10/site-packages/nvidia/dali/libdali.so(dali::ThreadPool::ThreadMain(int, int, bool, std::string const&)+0x1e6) [0x7b227e9e9e86]
[frame 4]: /opt/conda/lib/python3.10/site-packages/nvidia/dali/libdali.so(+0x769dd0) [0x7b227ef99dd0]
[frame 5]: /lib/x86_64-linux-gnu/libc.so.6(+0x94b43) [0x7b237cde6b43]
[frame 6]: /lib/x86_64-linux-gnu/libc.so.6(clone+0x44) [0x7b237ce77bb4]
Current pipeline object is no longer valid.
My dataset consists of a tar file that holds jpg images that are RGB.
Does anyone know how to solve this?