Part 1: Lesson3 predictions from learn.TTA() return weird shape

MaheshBhosale · March 5, 2018, 5:30pm

I was just following dogs breed classification example from kaggle, as Jeremy explains in lesson3 of Part 1.
log_preds, y = learn.TTA(is_test = True)
log_preds.shape
(5, 10357, 120)
which should be actually,
(10357, 120)

Following is how my code looks like,

# coding: utf-8

# In[1]:


# Put these at the top of every notebook, to get automatic reloading and inline plotting
get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


# This file contains all the main external libs we'll use
from fastai.imports import *


# In[3]:


from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
import torch
import matplotlib.pyplot as plt


# In[4]:


PATH = "C:\\Users\\Mahesh.Bhosale\\fastai\\data\\dogs_breed\\"


# In[5]:


torch.cuda.is_available() & torch.backends.cudnn.enabled


# In[38]:


sz = 224
arch = resnet50
bs = 58


# In[83]:


label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv))) - 1
val_idxs = get_cv_idxs(n)


# In[40]:


os.listdir(PATH)


# In[41]:


label_df = pd.read_csv(label_csv)


# In[42]:


label_df.head()


# In[43]:


label_df.pivot_table(index = 'breed', aggfunc=len).sort_values('id', ascending = False)


# In[44]:


tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test', val_idxs = val_idxs, suffix='.jpg'
                                    ,tfms=tfms, bs=bs)


# In[45]:


fn = PATH + data.trn_ds.fnames[0];fn


# In[46]:


img = PIL.Image.open(fn); img


# In[47]:


img.size


# In[48]:


size_d  = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}
row_sz, col_sz = list(zip(*size_d.values()))


# In[49]:


row_sz = np.array(row_sz); col_sz = np.array(col_sz)


# In[50]:


plt.hist(row_sz)


# In[51]:


plt.hist(col_sz)


# In[52]:


def get_data(sz, bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test', num_workers=4, val_idxs = val_idxs, suffix='.jpg'
                                    ,tfms=tfms, bs=bs)
    return data if sz > 300 else data.resize(340, 'tmp')


# In[53]:


data = get_data(sz, bs)


# In[54]:


learn = ConvLearner.pretrained(arch, data, precompute=True)


# In[55]:


learn.fit(1e-2, 5)


# In[56]:


learn = ConvLearner.pretrained(arch, data, precompute = True, ps=0.5)


# In[57]:


learn.fit(1e-2, 10)


# In[58]:


learn.precompute=False


# In[62]:


learn.fit(1e-2, 5, cycle_len=1)


# In[64]:


learn.set_data(get_data(299, bs))
learn.freeze()


# In[65]:


learn.fit(1e-2, 1, cycle_len=1)


# In[66]:


learn.fit(1e-2, 1, cycle_len=1, cycle_mult=2)


# In[75]:


data.classes


# In[76]:


data.test_ds.fnames


# In[84]:


log_preds, y = learn.TTA(is_test = True)


# In[87]:


y.shape


# In[79]:


log_preds.shape


# In[ ]:


preds = np.exp(learn.predict(is_test = True))


# In[ ]:


preds.shape


# In[71]:


probs = np.mean(np.exp(log_preds))
probs.shape


# In[72]:


df = pd.DataFrame(probs)
df.columns = data.classes
df.insert(0, 'id', [o[5:-4] for o in data.test_ds.fnames])
df.head()
np.argmax(probs)

MaheshBhosale · March 6, 2018, 4:02am

Is it due to n_aug=4, which is by default value? And due to this preds1=[preds1]*math.ceil(n_aug/4)

laphi · March 6, 2018, 4:12am

See this thread

and use the solution by eduardopoleo (take the average).

MaheshBhosale · March 6, 2018, 4:59am

Thanks that solved my above problem. But y I get here is all zeros.
metrics.log_loss(y, probabilities) raises an error

    ---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-99-65d3a268010c> in <module>()
----> 1 metrics.log_loss(y, probabilities)

~\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\sklearn\metrics\classification.py in log_loss(y_true, y_pred, eps, normalize, sample_weight, labels)
   1652             raise ValueError('y_true contains only one label ({0}). Please '
   1653                              'provide the true labels explicitly through the '
-> 1654                              'labels argument.'.format(lb.classes_[0]))
   1655         else:
   1656             raise ValueError('The labels array needs to contain at least two '

ValueError: y_true contains only one label (0.0). Please provide the true labels explicitly through the labels argument.