Getting Runtime error for cross entropy. What should be changed and why it is coming

AnkurBassi · April 22, 2021, 3:33pm

I am testing on movie lens data set

dls=CollabDataLoaders.from_df(ratings,item_name='Title',bs=64)

class deeplearn(Module):
    def __init__(self,user_size,movie_size,n_act=50,y_range=(0,5.5)):
        self.user_factors=create_params(user_size)
        self.movie_factors=create_params(movie_size)
        self.layers=nn.Sequential(
                    nn.Linear(user_size[1]+movie_size[1],n_act),
                    nn.ReLU(),
                    nn.Linear(n_act,5)
        )
        self.y_range=y_range
        
    def forward(self,x):
        embs=self.user_factors[x[:,0]],self.movie_factors[x[:,1]]
        x1=self.layers(torch.cat(embs,dim=1))
        return sigmoid_range(x1,*self.y_range)

embs=get_emb_sz(dls)        
n_users=6041
n_movies=3707

model = deeplearn(*embs)
learn3 = Learner(dls, model, loss_func=CrossEntropyLossFlat)
learn3.fit_one_cycle(5, 5e-3, wd=0.01)

After executing getting
RuntimeError: Boolean value of Tensor with more than one value is ambiguous

RuntimeError                              Traceback (most recent call last)
<ipython-input-145-b697f71a53c1> in <module>
      3 model = deeplearn(*embs)
      4 learn3 = Learner(dls, model, loss_func=CrossEntropyLossFlat)
----> 5 learn3.fit_one_cycle(5, 5e-3, wd=0.01)

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    110     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    111               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 112     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    113 
    114 # Cell

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    209             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    210             self.n_epoch = n_epoch
--> 211             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    212 
    213     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_fit(self)
    200         for epoch in range(self.n_epoch):
    201             self.epoch=epoch
--> 202             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    203 
    204     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_epoch(self)
    194 
    195     def _do_epoch(self):
--> 196         self._do_epoch_train()
    197         self._do_epoch_validate()
    198 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_epoch_train(self)
    186     def _do_epoch_train(self):
    187         self.dl = self.dls.train
--> 188         self._with_events(self.all_batches, 'train', CancelTrainException)
    189 
    190     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in all_batches(self)
    164     def all_batches(self):
    165         self.n_iter = len(self.dl)
--> 166         for o in enumerate(self.dl): self.one_batch(*o)
    167 
    168     def _do_one_batch(self):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in one_batch(self, i, b)
    182         self.iter = i
    183         self._split(b)
--> 184         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    185 
    186     def _do_epoch_train(self):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_one_batch(self)
    170         self('after_pred')
    171         if len(self.yb):
--> 172             self.loss_grad = self.loss_func(self.pred, *self.yb)
    173             self.loss = self.loss_grad.clone()
    174         self('after_loss')

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/losses.py in __init__(self, axis, *args, **kwargs)
     41     y_int = True
     42     @use_kwargs_dict(keep=True, weight=None, ignore_index=-100, reduction='mean')
---> 43     def __init__(self, *args, axis=-1, **kwargs): super().__init__(nn.CrossEntropyLoss, *args, axis=axis, **kwargs)
     44     def decodes(self, x):    return x.argmax(dim=self.axis)
     45     def activation(self, x): return F.softmax(x, dim=self.axis)

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/losses.py in __init__(self, loss_cls, axis, flatten, floatify, is_2d, *args, **kwargs)
     16     def __init__(self, loss_cls, *args, axis=-1, flatten=True, floatify=False, is_2d=True, **kwargs):
     17         store_attr("axis,flatten,floatify,is_2d")
---> 18         self.func = loss_cls(*args,**kwargs)
     19         functools.update_wrapper(self, self.func)
     20 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/modules/loss.py in __init__(self, weight, size_average, ignore_index, reduce, reduction)
    955     def __init__(self, weight: Optional[Tensor] = None, size_average=None, ignore_index: int = -100,
    956                  reduce=None, reduction: str = 'mean') -> None:
--> 957         super(CrossEntropyLoss, self).__init__(weight, size_average, reduce, reduction)
    958         self.ignore_index = ignore_index
    959 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/modules/loss.py in __init__(self, weight, size_average, reduce, reduction)
     23 class _WeightedLoss(_Loss):
     24     def __init__(self, weight: Optional[Tensor] = None, size_average=None, reduce=None, reduction: str = 'mean') -> None:
---> 25         super(_WeightedLoss, self).__init__(size_average, reduce, reduction)
     26         self.register_buffer('weight', weight)
     27 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/modules/loss.py in __init__(self, size_average, reduce, reduction)
     16         super(_Loss, self).__init__()
     17         if size_average is not None or reduce is not None:
---> 18             self.reduction = _Reduction.legacy_get_string(size_average, reduce)
     19         else:
     20             self.reduction = reduction

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/_reduction.py in legacy_get_string(size_average, reduce, emit_warning)
     35         reduce = True
     36 
---> 37     if size_average and reduce:
     38         ret = 'mean'
     39     elif reduce:

RuntimeError: Boolean value of Tensor with more than one value is ambiguous

muellerzr · April 22, 2021, 4:58pm

You should use CrossEntropyLossFlat()

AnkurBassi · April 22, 2021, 5:24pm

I used but it gives me error:RuntimeError: CUDA error: device-side assert triggered

RuntimeError                              Traceback (most recent call last)
<ipython-input-110-db86421c4928> in <module>
      4 loss_entropy=CrossEntropyLossFlat()
      5 learn3 = Learner(dls, model, loss_func=CrossEntropyLossFlat())
----> 6 learn3.fit_one_cycle(5, 5e-3, wd=0.01)

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    110     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    111               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 112     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    113 
    114 # Cell

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    209             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    210             self.n_epoch = n_epoch
--> 211             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    212 
    213     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_fit(self)
    200         for epoch in range(self.n_epoch):
    201             self.epoch=epoch
--> 202             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    203 
    204     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_epoch(self)
    194 
    195     def _do_epoch(self):
--> 196         self._do_epoch_train()
    197         self._do_epoch_validate()
    198 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_epoch_train(self)
    186     def _do_epoch_train(self):
    187         self.dl = self.dls.train
--> 188         self._with_events(self.all_batches, 'train', CancelTrainException)
    189 
    190     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in all_batches(self)
    164     def all_batches(self):
    165         self.n_iter = len(self.dl)
--> 166         for o in enumerate(self.dl): self.one_batch(*o)
    167 
    168     def _do_one_batch(self):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in one_batch(self, i, b)
    182         self.iter = i
    183         self._split(b)
--> 184         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    185 
    186     def _do_epoch_train(self):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_one_batch(self)
    171         if len(self.yb):
    172             self.loss_grad = self.loss_func(self.pred, *self.yb)
--> 173             self.loss = self.loss_grad.clone()
    174         self('after_loss')
    175         if not self.training or not len(self.yb): return

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/torch_core.py in __torch_function__(self, func, types, args, kwargs)
    327         convert=False
    328         if _torch_handled(args, self._opt, func): convert,types = type(self),(torch.Tensor,)
--> 329         res = super().__torch_function__(func, types, args=args, kwargs=kwargs)
    330         if convert: res = convert(res)
    331         if isinstance(res, TensorBase): res.set_meta(self, as_copy=True)

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/tensor.py in __torch_function__(cls, func, types, args, kwargs)
    993 
    994         with _C.DisableTorchFunction():
--> 995             ret = func(*args, **kwargs)
    996             return _convert(ret, cls)
    997 

RuntimeError: CUDA error: device-side assert triggered

AnkurBassi · April 22, 2021, 5:24pm

anything else that I can try @muellerzr

ali_baba · April 22, 2021, 5:37pm

CrossEntropyFlat() should do the trick – but you will probably need to restart the kernel and maybe even the server (or computer if you are using your own rig)

AnkurBassi · April 22, 2021, 5:39pm

I tried a lot on my own rig, it is not working there. Now I will check on colab and update the result here,thanks @ali_baba

muellerzr · April 22, 2021, 5:40pm

So when dealing with those, generally it means some y was not included in the training set. Try rerunning it again fully on the CPU (so pass in device=“cpu” to your .dataloaders() call.

It will give you a better answer.

AnkurBassi · April 22, 2021, 5:46pm

I gave device =‘cpu’ @muellerzr like below
dls=CollabDataLoaders.from_df(ratings,item_name=‘Title’,device=“cpu”,bs=64)

but still same error

RuntimeError: Boolean value of Tensor with more than one value is ambiguous

muellerzr · April 22, 2021, 6:07pm

Can you post how you made Learner in that instance and called fit? (Even if it’s repeating what you said above)

muellerzr · April 22, 2021, 6:17pm

@AnkurBassi or let’s try it a different way, try this:

dls=CollabDataLoaders.from_df(ratings,item_name='Title',bs=64, device='cpu')

class deeplearn(Module):
    def __init__(self,user_size,movie_size,n_act=50,y_range=(0,5.5)):
        self.user_factors=create_params(user_size)
        self.movie_factors=create_params(movie_size)
        self.layers=nn.Sequential(
                    nn.Linear(user_size[1]+movie_size[1],n_act),
                    nn.ReLU(),
                    nn.Linear(n_act,5)
        )
        self.y_range=y_range
        
    def forward(self,x):
        embs=self.user_factors[x[:,0]],self.movie_factors[x[:,1]]
        x1=self.layers(torch.cat(embs,dim=1))
        return sigmoid_range(x1,*self.y_range)

embs=get_emb_sz(dls)        
n_users=6041
n_movies=3707

model = deeplearn(*embs)
learn3 = Learner(dls, model, loss_func=CrossEntropyLossFlat())
learn3.fit_one_cycle(5, 5e-3, wd=0.01)

AnkurBassi · April 22, 2021, 6:23pm

muellerzr:

dls=CollabDataLoaders.from_df(ratings,item_name='Title',bs=64, device='cpu')

class deeplearn(Module):
    def __init__(self,user_size,movie_size,n_act=50,y_range=(0,5.5)):
        self.user_factors=create_params(user_size)
        self.movie_factors=create_params(movie_size)
        self.layers=nn.Sequential(
                    nn.Linear(user_size[1]+movie_size[1],n_act),
                    nn.ReLU(),
                    nn.Linear(n_act,5)
        )
        self.y_range=y_range
        
    def forward(self,x):
        embs=self.user_factors[x[:,0]],self.movie_factors[x[:,1]]
        x1=self.layers(torch.cat(embs,dim=1))
        return sigmoid_range(x1,*self.y_range)

embs=get_emb_sz(dls)        
n_users=6041
n_movies=3707

model = deeplearn(*embs)
learn3 = Learner(dls, model, loss_func=CrossEntropyLossFlat())
learn3.fit_one_cycle(5, 5e-3, wd=0.01)

IndexError Traceback (most recent call last)
in
23 model = deeplearn(*embs)
24 learn3 = Learner(dls, model, loss_func=CrossEntropyLossFlat())
—> 25 learn3.fit_one_cycle(5, 5e-3, wd=0.01)

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
110 scheds = {‘lr’: combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
111 ‘mom’: combined_cos(pct_start, *(self.moms if moms is None else moms))}
→ 112 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
113
114 # Cell

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
209 self.opt.set_hypers(lr=self.lr if lr is None else lr)
210 self.n_epoch = n_epoch
→ 211 self._with_events(self._do_fit, ‘fit’, CancelFitException, self._end_cleanup)
212
213 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
158
159 def with_events(self, f, event_type, ex, final=noop):
→ 160 try: self(f’before{event_type}'); f()
161 except ex: self(f’after_cancel{event_type}‘)
162 self(f’after_{event_type}’); final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_fit(self)
200 for epoch in range(self.n_epoch):
201 self.epoch=epoch
→ 202 self._with_events(self._do_epoch, ‘epoch’, CancelEpochException)
203
204 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
158
159 def with_events(self, f, event_type, ex, final=noop):
→ 160 try: self(f’before{event_type}'); f()
161 except ex: self(f’after_cancel{event_type}‘)
162 self(f’after_{event_type}’); final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_epoch(self)
194
195 def _do_epoch(self):
→ 196 self._do_epoch_train()
197 self._do_epoch_validate()
198

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_epoch_train(self)
186 def _do_epoch_train(self):
187 self.dl = self.dls.train
→ 188 self._with_events(self.all_batches, ‘train’, CancelTrainException)
189
190 def _do_epoch_validate(self, ds_idx=1, dl=None):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
158
159 def with_events(self, f, event_type, ex, final=noop):
→ 160 try: self(f’before{event_type}'); f()
161 except ex: self(f’after_cancel{event_type}‘)
162 self(f’after_{event_type}’); final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in all_batches(self)
164 def all_batches(self):
165 self.n_iter = len(self.dl)
→ 166 for o in enumerate(self.dl): self.one_batch(*o)
167
168 def _do_one_batch(self):

It says out of bounds. But now at least we have narrowed our scope

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in one_batch(self, i, b)
    182         self.iter = i
    183         self._split(b)
--> 184         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    185 
    186     def _do_epoch_train(self):

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/learner.py in _do_one_batch(self)
    170         self('after_pred')
    171         if len(self.yb):
--> 172             self.loss_grad = self.loss_func(self.pred, *self.yb)
    173             self.loss = self.loss_grad.clone()
    174         self('after_loss')

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/losses.py in __call__(self, inp, targ, **kwargs)
     33         if targ.dtype in [torch.int8, torch.int16, torch.int32]: targ = targ.long()
     34         if self.flatten: inp = inp.view(-1,inp.shape[-1]) if self.is_2d else inp.view(-1)
---> 35         return self.func.__call__(inp, targ.view(-1) if self.flatten else targ, **kwargs)
     36 
     37 # Cell

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    960     def forward(self, input: Tensor, target: Tensor) -> Tensor:
    961         return F.cross_entropy(input, target, weight=self.weight,
--> 962                                ignore_index=self.ignore_index, reduction=self.reduction)
    963 
    964 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2463                 cross_entropy, tens_ops, input, target, weight=weight,
   2464                 size_average=size_average, ignore_index=ignore_index, reduce=reduce,
-> 2465                 reduction=reduction)
   2466     if size_average is not None or reduce is not None:
   2467         reduction = _Reduction.legacy_get_string(size_average, reduce)

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/overrides.py in handle_torch_function(public_api, relevant_args, *args, **kwargs)
   1058         # Use `public_api` instead of `implementation` so __torch_function__
   1059         # implementations can do equality/identity comparisons.
-> 1060         result = overloaded_arg.__torch_function__(public_api, types, args, kwargs)
   1061 
   1062         if result is not NotImplemented:

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/fastai/torch_core.py in __torch_function__(self, func, types, args, kwargs)
    327         convert=False
    328         if _torch_handled(args, self._opt, func): convert,types = type(self),(torch.Tensor,)
--> 329         res = super().__torch_function__(func, types, args=args, kwargs=kwargs)
    330         if convert: res = convert(res)
    331         if isinstance(res, TensorBase): res.set_meta(self, as_copy=True)

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/tensor.py in __torch_function__(cls, func, types, args, kwargs)
    993 
    994         with _C.DisableTorchFunction():
--> 995             ret = func(*args, **kwargs)
    996             return _convert(ret, cls)
    997 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2466     if size_average is not None or reduce is not None:
   2467         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2468     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2469 
   2470 

~/anaconda3/envs/tf-gpu/lib/python3.6/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2262                          .format(input.size(0), target.size(0)))
   2263     if dim == 2:
-> 2264         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   2265     elif dim == 4:
   2266         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

IndexError: Target 5 is out of bounds.

muellerzr · April 22, 2021, 6:27pm

So what this means is the class 5 is not a part of your training set, but is in the validation. You may need to either oversample your data or go a bit further with the DataBlock API. We can “fix” the problem by passing a list of classes, however if your data for 5 is so rare that it’s only showing up in the validation set, it may be better to drop all instances of class 5 instead

(I’m also wondering why when you built the DataLoaders an exception wasn’t raised stating this…)

AnkurBassi · April 22, 2021, 6:31pm

I am also not sure for this too, but your solution gave me a big help, thanks for that. I gave nn.Linear(n_act,6) instead of 5 and it is working now, any idea why the descrepancy

muellerzr · April 22, 2021, 6:34pm

Because it’s numbered 0-n, so class 5 was essentially your 6th class, needing a sixth activation in your model

AnkurBassi · April 22, 2021, 6:40pm

yes,that is the answer. Even I was certain for this,but to my surprise I just want to cross check and found there is no class 0 for my target variable.

ratings.Rating.value_counts()
4 348971
3 261197
5 226310
2 107557
1 56174
Name: Rating, dtype: int64.

Now I am confused where we missed

muellerzr · April 22, 2021, 6:42pm

I imagine this is due to an API thing, I wouldn’t be surprised if fastai also expected there to be 0 star ratings (which is realistic).

I’m not too familiar with the recommendation system API/section of the DataBlock, so this is a hunch

AnkurBassi · April 22, 2021, 6:44pm

yeah It might be the reason

Is there any other way of making dataloader for this dataset without using recommendation api

muellerzr · April 22, 2021, 6:46pm

You can make it via the DataBlock api:

AnkurBassi · April 22, 2021, 6:47pm

Cool, so the old way. Big thanks to you @muellerzr, I appreciate your time and knowledge

Kerner · March 2, 2022, 11:25am

Hi,
Another nice solution is adjusting the loss and accuracy functions instead of changing the last layer to 6.
And if you build the dataloaders via DataBlock API there is no need for adjustments at all.
I have summarized all the different solutions and results in the following blog post: