Python – Keras models get constant loss and accuracy

Keras models get constant loss and accuracy… here is a solution to the problem.

Keras models get constant loss and accuracy

I’m trying to train a keras CNN on a Street View house number dataset. You can find the project here .
The problem is that neither loss nor accuracy changes over time during training. I’ve tried using 1 channel (grayscale) images, RGB (3 channel) images, wider (50,50) and smaller (28,28) images, more or less filters in convolutional layers, patches in wider and smaller pooling layers, with and without loss, with larger and smaller batches, with smaller and smaller optimizer learning steps, with different optimizer ,…

Training still insists on constant loss of

accuracy and accuracy

This is how I prepare the data

from PIL import Image
from PIL import ImageFilter
train_folders = 'sv_train/train'
test_folders = 'test'
extra_folders = 'extra'
SV_IMG_SIZE = 28
SV_CHANNELS = 3
train_imsize = np.ndarray([len(train_data),2])
k = 500
sv_images = []
max_images = 20000#len(train_data)
max_digits = 5
sv_labels = np.ones([max_images, max_digits], dtype=int) * 10 # init to 10 cause it would be no digit
nboxes = [[] for i in range(max_images)]
print ("%d to load" % len(train_data))
def getBBox(i,perc):

boxes = train_data[i]['boxes'] 
    x_min=9990
    y_min=9990
    x_max=0
    y_max=0
    for bid,b in enumerate(boxes):
        x_min = b['left'] if b['left'] <= x_min else x_min
        y_min = b['top'] if b['top'] <= y_min else y_min
        x_max = b['left']+b['width'] if  b['left']+b['width'] >= x_max else x_max
        y_max = b['top']+b['height'] if b['top']+b['height'] >= y_max else y_max

dy = y_max-y_min
    dx = x_max-x_min
    dpy = dy*perc
    dpx = dx*perc
    nboxes[i]=[dpx,dpy,dx,dy]
    return x_min-dpx, y_min-dpy, x_max+dpx, y_max+dpy

for i in range(max_images):
    print (" \r%d" % i ,end="")
    filename = train_data[i]['filename']
    fullname = os.path.join(train_folders, filename)
    boxes = train_data[i]['boxes']
    label = [10,10,10,10,10]
    lb = len(boxes)
    if lb <= max_digits:
        im = Image.open(fullname)
        x_min, y_min, x_max, y_max = getBBox(i,0.3)
        im = im.crop([x_min,y_min,x_max,y_max])
        owidth, oheight = im.size
        wr = SV_IMG_SIZE/float(owidth)
        hr = SV_IMG_SIZE/float(oheight)
        for bid,box in  enumerate(boxes):
            sv_labels[i][max_digits-lb+bid] = int(box['label'])

box = nboxes[i]
        box[0]*=wr
        box[1]*=wr
        box[2]*=hr
        box[3]*=hr
        im = im.resize((SV_IMG_SIZE,SV_IMG_SIZE),Image.ANTIALIAS)
        array = np.asarray(im)
        array =  array.reshape((SV_IMG_SIZE,SV_IMG_SIZE,SV_CHANNELS)).astype(np.float32)
        na = np.zeros([SV_IMG_SIZE,SV_IMG_SIZE,SV_CHANNELS],dtype=int)
        sv_images.append(array.astype(np.float32))

This is the model

from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical

adam = Adam(lr=0.5)

model = Sequential()
x = Input((SV_IMG_SIZE, SV_IMG_SIZE,SV_CHANNELS))

y = Convolution2D(16, 3, 3, activation='relu', border_mode='same')(x)
y = Convolution2D(32, 3, 3, activation='relu', border_mode='valid')(y)
y = MaxPooling2D((2, 2))(y)
y = Convolution2D(128, 3, 3, activation='relu', border_mode='valid')(y)
y = MaxPooling2D((2, 2))(y)
y = Flatten()(y)
y = Dense(512, activation='relu')(y)

digit1 = Dense(11, activation="softmax")(y)
digit2 = Dense(11, activation="softmax")(y)
digit3 = Dense(11, activation="softmax")(y)
digit4 = Dense(11, activation="softmax")(y)
digit5 = Dense(11, activation="softmax")(y)
model = Model(input=x, output=[digit1, digit2, digit3,digit4,digit5])

model.compile(optimizer=adam,
          loss='categorical_crossentropy',
          metrics=['accuracy'])

sv_train_labels = [to_categorical(svt_labels[:,0]),
                   to_categorical(svt_labels[:,1]),
                   to_categorical(svt_labels[:,2]),
                   to_categorical(svt_labels[:,3]),
                   to_categorical(svt_labels[:,4])]
sv_validation_labels = [to_categorical(svv_labels[:,0]),
                        to_categorical(svv_labels[:,1]),
                        to_categorical(svv_labels[:,2]),
                        to_categorical(svv_labels[:,3]),
                        to_categorical(svv_labels[:,4])]

model.fit(sv_train, sv_train_labels, nb_epoch=50, batch_size=8,validation_data=(sv_validation, sv_validation_labels))

Solution

As I commented above, I recommend avoiding training a model to predict 5-digit number combinations. Training a model to predict a single number is more efficient. I tried building a quick example based on the Keras example cifar10_cnn.py in MNIST SHVN format 2 (cropped digits) :

import numpy as np
import scipy.io as sio
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils.np_utils import to_categorical

# parameters
nb_epoch = 10
batch_size = 32

# load data
nb_classes = 10
train_data = sio.loadmat('train_32x32.mat')
test_data = sio.loadmat('test_32x32.mat')
X_train = train_data['X']. T / 255
X_test = test_data['X']. T / 255
y_train = to_categorical(train_data['y'] % nb_classes)
y_test = to_categorical(test_data['y'] % nb_classes)

# model
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# train
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_test, y_test), shuffle=True)

After training the model, train another model to recognize/extract each number from an image uses libraries such as OpenCV

Related Problems and Solutions