Hi Everyone,
I converted my data from object type to categories using,
train_cats(df_raw)
I am writing my df_raw to feather format,
os.makedirs(‘tmp’, exist_ok=True)
feather.write_dataframe(df_raw, ‘test.feather’)
But While writing it to feather format I am facing the below error, I didn’t understand what this error says, anyone please suggest where I made mistake.
ArrowInvalid Traceback (most recent call last)
in ()
1 os.makedirs(‘tmp’, exist_ok=True)
----> 2 feather.write_dataframe(df_raw, ‘test.feather’)
~/anaconda3/lib/python3.6/site-packages/pyarrow/feather.py in write_feather(df, dest)
176 writer = FeatherWriter(dest)
177 try:
–> 178 writer.write(df)
179 except Exception:
180 # Try to make sure the resource is closed
~/anaconda3/lib/python3.6/site-packages/pyarrow/feather.py in write(self, df)
89 # TODO(wesm): Remove this length check, see ARROW-1732
90 if len(df.columns) > 0:
—> 91 batch = RecordBatch.from_pandas(df, preserve_index=False)
92 for i, name in enumerate(batch.schema.names):
93 col = batch[i]
~/anaconda3/lib/python3.6/site-packages/pyarrow/table.pxi in pyarrow.lib.RecordBatch.from_pandas()
~/anaconda3/lib/python3.6/site-packages/pyarrow/pandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
385 arrays = list(executor.map(convert_column,
386 columns_to_convert,
–> 387 convert_types))
388
389 types = [x.type for x in arrays]
~/anaconda3/lib/python3.6/concurrent/futures/_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
–> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.time())
~/anaconda3/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
–> 432 return self.__get_result()
433 else:
434 raise TimeoutError()
~/anaconda3/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
–> 384 raise self._exception
385 else:
386 return self._result
~/anaconda3/lib/python3.6/concurrent/futures/thread.py in run(self)
54
55 try:
—> 56 result = self.fn(*self.args, **self.kwargs)
57 except BaseException as exc:
58 self.future.set_exception(exc)
~/anaconda3/lib/python3.6/site-packages/pyarrow/pandas_compat.py in convert_column(col, ty)
374 e.args += (“Conversion failed for column {0!s} with type {1!s}”
375 .format(col.name, col.dtype),)
–> 376 raise e
377
378 if nthreads == 1:
~/anaconda3/lib/python3.6/site-packages/pyarrow/pandas_compat.py in convert_column(col, ty)
368 def convert_column(col, ty):
369 try:
–> 370 return pa.array(col, type=ty, from_pandas=True, safe=safe)
371 except (pa.ArrowInvalid,
372 pa.ArrowNotImplementedError,
~/anaconda3/lib/python3.6/site-packages/pyarrow/array.pxi in pyarrow.lib.array()
~/anaconda3/lib/python3.6/site-packages/pyarrow/array.pxi in pyarrow.lib.DictionaryArray.from_arrays()
~/anaconda3/lib/python3.6/site-packages/pyarrow/array.pxi in pyarrow.lib.array()
~/anaconda3/lib/python3.6/site-packages/pyarrow/array.pxi in pyarrow.lib._ndarray_to_array()
~/anaconda3/lib/python3.6/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowInvalid: (‘Could not convert # with type str: tried to convert to double’, ‘Conversion failed for column Booking Rep with type category’)’
Thanks