Weird test accuracy for VGG-like training

Hi Everyone,
I have trained my model for many many times with 2900 images/class for 5 classes. I split data 80% for training and 20% for validation with shuffling.

All the times, I get zero test accuracy as shown in this figure, although the training accuracy is improving.

What do you think the problem is? In my opinion, I do not think this is an overfitting problem.

Here is my code:

import csv
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob

def plot_CNN(history): # summarize history for accuracy
    fig1 = plt.figure()
    plt.plot(history.history['acc'], figure = fig1)
    plt.plot(history.history['val_acc'], figure = fig1)
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    fig1.savefig('results/pipeline_accuracy.png')
    
    fig2 = plt.figure() # summarize history for loss
    plt.plot(history.history['loss'], figure = fig2)
    plt.plot(history.history['val_loss'], figure = fig2)
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    fig2.savefig('results/pipeline_loss.png')
    
    

print("************ Preparing DATA **************")
class1 = glob.glob('class1/*jpg') # use a.extend() for adding training folder
class2 = glob.glob('class2/*jpg')
class3 = glob.glob('class3/*jpg')
class4 = glob.glob('class4/*jpg')
class5 = glob.glob('class5/*jpg')


all_lists = [class1, class2, class3, class4, class5]
print("all_lists length: ", len(all_lists))
images = [] 
targets = [] # has lists of [0,1,2,3,4] based on the images lists

current_target = 0


for list in all_lists:
    print("current_target ", current_target)
    print("current_list ", len(list))
    for i in range(len(list)):
        # reading an image
        image = cv2.imread(list[i],0) # image in grayscale with adding 0 argument
        # converting color space
        #image = cv2.cvtColor(srcBGR, cv2.COLOR_BGR2RGB)
        # appending to images list
        images.append(image)
        targets.append(current_target)
        # Pre-processing .. Flipping
        flipped_image = cv2.flip(image, 1)
        
        # appending the flipped image .. so data will be doubled
        images.append(flipped_image)
        targets.append(current_target)
        
    current_target += 1
        

print("number of images: ", len(images))
print("number of targets: ", len(targets))


#X_train = np.array(images)
#Y_train = np.array(targets)

X_train = np.stack(images, axis=0)
Y_train = np.stack(targets, axis=0)


print('****** DATA IS READY *******')
classes_output = 5
nb_epochs = 10
scale = 1


print('X_train shape before: ',X_train.shape)
print('Y_train shape before: ',Y_train.shape)
if scale ==1:
    X_train = X_train.reshape((X_train.shape[0], 256, 256, scale))
    print('X_train reshape: ',X_train.shape)


from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense, Lambda, Cropping2D
from keras.models import model_from_json

def Model():
    print("******** RUNNING MY MODEL **********")
    model = Sequential()
    #model.add(Lambda(lambda x: x/255.0 - 0.5, input_shape=(256, 256, scale))) # normalizing by dividing each pixle by 255 ==> gives values in range (0,1), then shifted the mean from 0.5 to zero
    #model.add(Cropping2D(cropping=((70,25),(0,0))))
    
    #model.add(Convolution2D(32, 3, 3,activation='relu'))
    #model.add(Convolution2D(32, 3, 3,activation='relu'))
    model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(256, 256, scale)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    #model.add(Convolution2D(64, 3, 3,activation='relu'))
    #model.add(Convolution2D(64, 3, 3,activation='relu'))
    model.add(Convolution2D(64, (3, 3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    #model.add(Convolution2D(128, 3, 3,activation='relu'))
    #model.add(Convolution2D(128, 3, 3,activation='relu'))
    model.add(Convolution2D(128, (3, 3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    #model.add(Convolution2D(64, 3, 3))
    #model.add(Convolution2D(64, 3, 3))
    
    
    #  fully-connected layers
    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(100))
    model.add(Dropout(0.5))
    #model.add(Dense(50))
    #model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Dense(classes_output))
    model.add(Activation('sigmoid'))
    print("******** MODEL IS BUILT **********")
    return model


X_normalized = X_train /255.0 - 0.5 # normalizing data
from sklearn.preprocessing import LabelBinarizer
label_binarizer = LabelBinarizer()
y_one_hot = label_binarizer.fit_transform(Y_train)

myModel = Model() 
myModel.compile('adam', 'categorical_crossentropy', ['accuracy'])
history = myModel.fit(X_normalized, y_one_hot, batch_size=32, validation_split=0.2, shuffle=True, epochs=nb_epochs)#, verbose=2)
print("Model is compiled and fitted with ", nb_epochs , " epochs")

model_json = myModel.to_json()
with open("results/model.json", "w") as json_file:
    json_file.write(model_json)
    
myModel.save_weights('results/model.h5')

print('******** MODEL IS SAVED *********')
print(history.history.keys())
print(myModel.count_params())
print(myModel.summary())

plot_CNN(history)
>

Better to validate this. Can you put one more dropout before flattening, and check?

i.e. after

model.add(Convolution2D(128, (3, 3),activation='relu'))

  1. Use softmax activation instead of sigmoid.

  2. Please show us the full log with train/val losses/accuracies. Can you make model overfit (get close 100% accuracy on test data?). Does val accuracy stays at 0 at all times. It should be close to 1/5 at the begining?

P.S.
Is it just me or the figure is missing? (as shown in this figure)

1 Like

Thanks @SakvaUA for your reply

I used “softmax”, nothing changed.

I am sorry I just noticed that I did not upload the accuracy figure. I updated my post now. I think there is a bug in my code that causes the zero accuracy with validation data. Do you think that there is a problem in preparing data?

Hi @anandsaha
I am going to add this dropout. However, I am still thinking the problem is not the overfitting, which can be solved with dropout layer.
I updated my post by adding the figure. It is really weird that the validation accuracy is always zero. Do you think that there is a problem in preparing data?

What about validation loss? Does it change over time?

Here it is, it changes almost from 10 to 16:

Train on 23200 samples, validate on 5800 samples
Epoch 1/10
loss: 1.1621 - acc: 0.4698 - val_loss: 10.8291 - val_acc: 0.0000e+00

Epoch 2/10
loss: 0.9558 - acc: 0.6066 - val_loss: 13.5940 - val_acc: 0.0000e+00

Epoch 3/10
loss: 0.7511 - acc: 0.6953 - val_loss: 14.4218 - val_acc: 0.0000e+00

Epoch 4/10
loss: 0.5217 - acc: 0.7941 - val_loss: 15.6497 - val_acc: 0.0000e+00

Epoch 5/10
loss: 0.3343 - acc: 0.8678 - val_loss: 15.9435 - val_acc: 0.0000e+00

Epoch 6/10
loss: 0.2258 - acc: 0.9126 - val_loss: 16.0631 - val_acc: 0.0000e+00

Epoch 7/10
loss: 0.1757 - acc: 0.9325 - val_loss: 16.0811 - val_acc: 0.0000e+00

Epoch 8/10
loss: 0.1365 - acc: 0.9485 - val_loss: 16.1091 - val_acc: 0.0000e+00

Epoch 9/10
loss: 0.1161 - acc: 0.9554 - val_loss: 16.1084 - val_acc: 0.0000e+00

Epoch 10/10
loss: 0.0990 - acc: 0.9623 - val_loss: 16.1156 - val_acc: 0.0000e+00

@SakvaUA @anandsaha
Do you think I have a problem with validation split argument and shuffle = True?

https://keras.io/getting-started/faq/#how-is-the-validation-split-computed

You will need to shuffle your data manually it seems.

Shuffle X_normalized and y_one_hot before fit.

I just finished training my model after adding what you suggested. It is the same problem

How I shuffle them before fit?

Also, I found this link, but I did not understand how it is solved
https://github.com/fchollet/keras/issues/4298

Something like

data = np.array(np.arange(1, 20, 1))
print('Data\n', data)
np.random.shuffle(data)
print('Shuffled Data\n', data)
Data
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
Shuffled Data
[10  5 13 11  2 14  8  9  6 17 18 16 12 15  4  3 19  7  1]

But wait, you have to shuffle data and labels in the same sequence, else they will not be in sync. One way is:

data = np.array(np.arange(1, 20, 1))
labels = np.array(np.arange(11, 30, 1))

length = len(data)
mask = np.random.choice(length, length, replace=False)
# Use the same mask to maintain the shuffling sequence between data and labels
shuffled_data = data[mask]
shuffled_labels = labels[mask]

print('Data and Label\n', data, '\n', labels)
print('Shuffled Data and Label\n', shuffled_data, '\n', shuffled_labels)
Data and Label
 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19] 
 [11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29]
Shuffled Data and Label
 [ 4 10  3 14  2  6 12 13 18 11  8  5 16  9 15  7 17  1 19] 
 [14 20 13 24 12 16 22 23 28 21 18 15 26 19 25 17 27 11 29]

@anandsaha

That means you are asking to split data to train and validation after shuffling. Right?
Just to remind you that I have samples and targets separated, i.e. X_normalized and y_one_hot. Shuffling might mis-match all data.

Yep, just posted a followup reply.

No, the splitting is done by keras, when you mention validation_split=0.2 . You just have to make sure the elements are shuffled in X_normalized and y_one_hot.

Keras takes the bottom x% of your data as validation data, so we need to make sure the arrays we are passing is shuffled enough.

1 Like

One more thing, you should put replace=False in the choice function, so that same element is not repeated in the mask.

mask = np.random.choice(length, length, replace=False)

I think this is better and simple:

mask = np.random.permutation(length)

It looks like the model is outputting all 0. It has little capacity and have hard time learning usefull representation. Try removing dropout and see how it goes.

Check this out:

It has almost the same divergence characteristics as yours, except that there are more randomness than yours.

(And the cause is attributed to overfitting). But the trend of loss being constantly zero, really seemed like a software programming bug? May be some floating error truncation somewhere?