Building a CNN
Posted on Wed 18 March 2020 in deep learning
This notebook describes how to create a CNN for classifying dogs and cats from scratch. The following material contains my notes from Deep Learning with Python.
from __future__ import print_function
import numpy as np
from keras import layers
from keras import models
from keras import optimizers
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
import matplotlib.pyplot as plt
import os
input_h = 150 # height
input_w = 150 # width
input_c = 3 # number of channels
save_model = True # flag
In this case, the images of the dataset will be yield into the model using a generator. There are several advantages of using generators:
- Instead of loading the whole dataset and fed it into the model, the images are loaded in batches.
- The images can be preprocessed (rescale, grayscale, etc.) before the training step.
- The number of images can be increased via data augmentation.
# --- paths ---
# train (cats): /media/data/dogs_vs_cats_small/train/cats
# train (dogs): /media/data/dogs_vs_cats_small/train/dogs
# validation (cats): /media/data/dogs_vs_cats_small/validation/cats
# validation (dogs): /media/data/dogs_vs_cats_small/validation/dogs
# test (cats): /media/data/dogs_vs_cats_small/test/cats
# test (dogs): /media/data/dogs_vs_cats_small/test/dogs
# --- data preprocessing ---
train_datagen = ImageDataGenerator(rescale=1./255) # rescale all images by 1/255
validation_datagen = ImageDataGenerator(rescale=1./255)
train_path = "/media/data/dogs_vs_cats_small/train"
train_generator = train_datagen.flow_from_directory(train_path,
target_size=(input_h, input_w),
batch_size=20,
class_mode="binary"
)
validation_path = "/media/data/dogs_vs_cats_small/validation"
validation_generator = validation_datagen.flow_from_directory(validation_path,
target_size=(input_h, input_w),
batch_size=20,
class_mode="binary"
)
# check generator
for data_batch, labels_batch in train_generator:
print("data batch shape: ", data_batch.shape)
print("labels batch shape:", labels_batch.shape)
break
In this case, the generator yields a batch of 20 images each time. The shape of each image is (150, 150, 3), that is, height=150, width=150, and channels=3. Notice that the generators modify the input images in two ways:
- The range of the images changes from [0, 255] to [0, 1] by using
rescale=1./255
. - The shape of the images changes to
(150, 150)
by usingtarget_size=(input_h, input_w)
.
The generators also assign a label to each image automatically. Notice that such labels are encoded in the class_indices
property:
# check labels
print("train mapping:", train_generator.class_indices)
print("validation mapping:", validation_generator.class_indices)
# mapping
class_indices = train_generator.class_indices
It is worth to mention that this property will be needed later when classifying unseen images.
The next block shows how to build the model:
# --- build model ---
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation="relu", input_shape=(input_h, input_w, input_c)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation="relu"))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation="relu"))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation="relu"))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(1, activation="sigmoid"))
The next block displays a summary of the model. Notice that the shape of the convolution/maxpooling layers changes from shallow to deep layers. From [1] page 123, the convolution layers operates on feature maps; they are 3D tensors of shape (height, width, number_of_filters)
. Here, number_of_filters
is given when the layer is defined. For instance, the feature map of the first layer has a shape (148, 148, 32)
, that is, there are 32 filters.
# check the model
model.summary()
In addition, we can create a figure of the model:
from keras.utils import plot_model
plot_model(model, show_shapes=True)
# --- compile the model ---
model.compile(loss="binary_crossentropy",
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=["acc"])
# Listing 5.8 Fitting the model using a batch generator
# --- training ---
history = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50
)
# Listing 5.9 Saving the model
# --- save model ---
if save_model:
filename = "dogs_vs_cats_v1.h5"
model.save(filename)
print("model saved:", filename)
The next block shows the accuracy and loss after training.
# --- plotting ---
acc = history.history["acc"]
val_acc = history.history["val_acc"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(acc)+1)
plt.plot(epochs, acc, "bo", label="Training Acc")
plt.plot(epochs, val_acc, "b", label="Validation Acc")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training Loss")
plt.plot(epochs, val_loss, "b", label="Validation Loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()
From the first figure, it seems that the model starts to overfit after the 10-th epoch.
In the following, we will classify a few images.
# --- paths ----
# train (cats): /media/data/dogs_vs_cats_small/train/cats
# train (dogs): /media/data/dogs_vs_cats_small/train/dogs
# validation (cats): /media/data/dogs_vs_cats_small/validation/cats
# validation (dogs): /media/data/dogs_vs_cats_small/validation/dogs
# test (cats): /media/data/dogs_vs_cats_small/test/cats
# test (dogs): /media/data/dogs_vs_cats_small/test/dogs
# --- display images (they are not preprocessed yet) ---
test_dir_cats = "/media/data/dogs_vs_cats_small/test/cats"
test_dir_dogs = "/media/data/dogs_vs_cats_small/test/dogs"
fnames_dogs = [os.path.join(test_dir_dogs, fname) for fname in os.listdir(test_dir_dogs)]
fnames_cats = [os.path.join(test_dir_cats, fname) for fname in os.listdir(test_dir_cats)]
# convert PIL to np.array (cast to np.int in the range [0, 255])
img_path = fnames_dogs[4]
img = image.load_img(img_path)
x = image.img_to_array(img).astype(int)
plt.figure()
plt.imshow(x)
# convert PIL to np.array (as np.float32 in the range [0, 1])
img_path = fnames_dogs[5]
img = image.load_img(img_path)
x = image.img_to_array(img) * 1./255
plt.figure()
plt.imshow(x)
# convert PIL to np.array (cast to np.int in the range [0, 255])
img_path = fnames_cats[4]
img = image.load_img(img_path)
x = image.img_to_array(img).astype(int)
plt.figure()
plt.imshow(x)
# convert PIL to np.array (as np.float32 in the range [0, 1])
img_path = fnames_cats[5]
img = image.load_img(img_path)
x = image.img_to_array(img) * 1./255
plt.figure()
plt.imshow(x)
plt.show()
Image classification¶
%matplotlib inline
def display_batch(batch, label="", limit=10):
"""
Plots the images in the batch
"""
for i, img in enumerate(batch):
plt.figure()
plt.imshow(img)
title = "class: %s id: %d" % (label, i)
plt.title(title)
if (i+1) == limit:
break
def create_batch(fnames):
"""
Load images from a list of file paths.
The images are preprocessed to match the shape and size expected by the model:
- The images are rescaled in the range [0, 1]
- The size is (150, 150)
The output is a tensor of shape (n_samples, height, width, n_channels)
"""
images = []
for fname in fnames:
# convert PIL to np.array (as np.float32 in the range [0, 1])
img = image.load_img(fname,
color_mode="rgb",
target_size=(150, 150))
x = image.img_to_array(img) * 1./255
images.append(x)
return np.array(images)
test_dir_cats = "/media/data/dogs_vs_cats_small/test/cats"
test_dir_dogs = "/media/data/dogs_vs_cats_small/test/dogs"
fnames_dogs = [os.path.join(test_dir_dogs, fname) for fname in os.listdir(test_dir_dogs)]
fnames_cats = [os.path.join(test_dir_cats, fname) for fname in os.listdir(test_dir_cats)]
# create the batch for dogs and cats
batch_dogs = create_batch(fnames_dogs[:32])
batch_cats = create_batch(fnames_cats[:32])
display_batch(batch_dogs, "dogs", limit=5)
display_batch(batch_cats, "cats", limit=5)
batch_dogs.shape
# load model
model = load_model("dogs_vs_cats_v1.h5")
# prediction
pred_dogs = model.predict(batch_dogs)
pred_cats = model.predict(batch_cats)
# cast to labels
pred_dogs_classes = (pred_dogs > 0.5).astype("int32").flatten()
pred_cats_classes = (pred_cats > 0.5).astype("int32").flatten()
These are the predictions:
pred_dogs_classes
pred_cats_classes
Remember the mapping:
class_indices
error_dogs = (pred_dogs_classes != class_indices["dogs"]).sum()
print("missclassified dogs:", error_dogs)
error_cats = (pred_cats_classes != class_indices["cats"]).sum()
print("missclassified cats:", error_cats)
Single image classification¶
Now, rather than classify a batch of images, let's classify a single image at a time.
# load model
model = load_model("dogs_vs_cats_v1.h5")
Remember the mapping:
# mapping
class_indices
# reversed mapping
rev_class_indices = {v:k for k, v in class_indices.items()}
def classify_single_image(img_path):
# load first image (no preprocessing)
img_original = image.load_img(img_path)
# load first image
img = image.load_img(img_path,
color_mode="rgb",
target_size=(150, 150))
# convert PIL to np.array (as np.float32 in the range [0, 1])
x = image.img_to_array(img) * 1./255
# change shape to (1, height, width, n_channels)
input = np.array([x])
# predict
pred = model.predict(input)
# cast prediction to label
pred_label = (pred > 0.5).astype("int32").flatten()[0] # [0] to access to the first and only image in the batch
pred_value = pred.flatten()[0]
# results
label = rev_class_indices[pred_label]
print("prediction (float): %.4f, class (int): %d, label: %s" % (pred_value, pred_label, label))
# plot
fig = plt.figure()
ax = fig.add_subplot(111)
title = "Prediction: %s" % label
ax.set_title(title)
ax.imshow(img_original)
classify_single_image("hachi.jpg")
classify_single_image("sheldon.jpg")
So, it turns out that Hachi is indeed a dog, and Sheldon is a kind of cat.