Hi there,
I’m trying to do the assignment by creating and training the network myself, just to check understanding. However my network does not converge (stuck on 0.5 accuracy forever). Would anyone be able to give any hints on why?
I’ve tested it on mnist and aside from being overkill, it works fine. I’ve tried with putting the pixel data in [0, 1] and [-1,1], doesn’t make any difference.
Any help would be greatly appreciated!
Model is below. I’m running it on my own machine, with tensorflow-gpu backend and tf dimension ordering.
BATCH_SIZE = 32 # 8
IMG_SIZE = 56 # 224
N_CHANNELS = 3
N_OUTPUTS = 2
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, N_CHANNELS)
train_datagen = image.ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(
'data\\train',
target_size=(IMG_SIZE,IMG_SIZE), class_mode='categorical',
batch_size=BATCH_SIZE, shuffle=True)
valid_datagen = image.ImageDataGenerator()
valid_generator = valid_datagen.flow_from_directory(
data\\valid',
target_size=(IMG_SIZE,IMG_SIZE), class_mode='categorical',
batch_size=BATCH_SIZE, shuffle=True)
model = Sequential()
model.add(ZeroPadding2D(1, input_shape=IMG_SHAPE))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D(1))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(ZeroPadding2D(1))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D(1))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(ZeroPadding2D(1))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D(1))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D(1))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(N_OUTPUTS, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit_generator(
train_generator,
steps_per_epoch=int(np.ceil(train_generator.samples/BATCH_SIZE)),
epochs=40,
validation_data=valid_generator,
validation_steps=int(np.ceil(valid_generator.samples/BATCH_SIZE)) )