Errors when I run lesson3-rossman


(Xu Zhang) #1

When I run this cell,
df, y, nas, mapper = proc_df(joined_samp, ‘Sales’, do_scale=True)
yl = np.log(y)

I received the following error messages:
ValueError Traceback (most recent call last)
in ()
----> 1 df, y, nas, mapper = proc_df(joined_samp, ‘Sales’, do_scale=True)
2 yl = np.log(y)

/media/projects/fastai/courses/dl1/fastai/structured.py in proc_df(df, y_fld, skip_flds, ignore_flds, do_scale, na_dict, preproc_fn, max_n_cat, subset, mapper)
435 if na_dict is None: na_dict = {}
436 for n,c in df.items(): na_dict = fix_missing(df, c, n, na_dict)
–> 437 if do_scale: mapper = scale_vars(df, mapper)
438 for n,c in df.items(): numericalize(df, c, n, max_n_cat)
439 df = pd.get_dummies(df, dummy_na=True)

/media/projects/fastai/courses/dl1/fastai/structured.py in scale_vars(df, mapper)
323 if mapper is None:
324 map_f = [([n],StandardScaler()) for n in df.columns if is_numeric_dtype(df[n])]
–> 325 mapper = DataFrameMapper(map_f).fit(df)
326 df[mapper.transformed_names_] = mapper.transform(df)
327 return mapper

~/anaconda3/envs/fastai/lib/python3.6/site-packages/sklearn_pandas/dataframe_mapper.py in fit(self, X, y)
212 with add_column_names_to_exception(columns):
213 Xt = self._get_col_subset(X, columns, input_df)
–> 214 _call_fit(transformers.fit, Xt, y)
215
216 # handle features not explicitly selected

~/anaconda3/envs/fastai/lib/python3.6/site-packages/sklearn_pandas/pipeline.py in _call_fit(fit_method, X, y, **kwargs)
22 “”"
23 try:
—> 24 return fit_method(X, y, **kwargs)
25 except TypeError:
26 # fit takes only one argument

~/anaconda3/envs/fastai/lib/python3.6/site-packages/sklearn/preprocessing/data.py in fit(self, X, y)
588 # Reset internal state before fitting
589 self._reset()
–> 590 return self.partial_fit(X, y)
591
592 def partial_fit(self, X, y=None):

~/anaconda3/envs/fastai/lib/python3.6/site-packages/sklearn/preprocessing/data.py in partial_fit(self, X, y)
610 “”"
611 X = check_array(X, accept_sparse=(‘csr’, ‘csc’), copy=self.copy,
–> 612 warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES)
613
614 # Even in the case of with_mean=False, we update the mean anyway

~/anaconda3/envs/fastai/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
451 % (array.ndim, estimator_name))
452 if force_all_finite:
–> 453 _assert_all_finite(array)
454
455 shape_repr = _shape_repr(array.shape)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/sklearn/utils/validation.py in _assert_all_finite(X)
42 and not np.isfinite(X).all()):
43 raise ValueError(“Input contains NaN, infinity”
—> 44 " or a value too large for %r." % X.dtype)
45
46

ValueError: [‘AfterStateHoliday’]: Input contains NaN, infinity or a value too large for dtype(‘float32’).


#2

I got the same error. Did you resolve this?


(Xu Zhang) #3

No. I am new and need helps from experts


#4

I finally think I fixed this. I am guessing that what I did wrong before was that I ran the concat_csvs function at the top. I don’t think this is necessary once the tgz file has been extracted.

At least, now that I didn’t do this, I have a model that seems to be running.