Deep Learning for Computer Vision I by Stephen MarigaDeep Learning for Computer Vision I by Stephen Mariga

Deep Learning for Computer Vision I

Stephen Mariga

Web Developer

Python

import numpy as np from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.datasets import mnist from tensorflow.keras.callbacks import ModelCheckpoint from sklearn.utils import shuffle # Load the MNIST dataset (train_images, train_labels), (test_images, test_labels) = mnist.load_data() # Preprocess the images: reshape and normalize pixel values to be between 0 and 1 train_images = train_images.reshape((60000, 28, 28, 1)).astype("float32") / 255 test_images = test_images.reshape((10000, 28, 28, 1)).astype("float32") / 255 # Create training set 1 with digits 0 to 4 and we will grab 30,000 examples for this train_mask_1 = np.isin(train_labels, [0, 1, 2, 3, 4]) test_mask_1 = np.isin(test_labels, [0, 1, 2, 3, 4]) train_images_1 = train_images[train_mask_1][:30000] train_labels_1 = train_labels[train_mask_1][:30000] test_images_1 = test_images[test_mask_1][:5000] test_labels_1 = test_labels[test_mask_1][:5000] # Create training set 2 with digits 5 to 9 train_mask_2 = np.isin(train_labels, [5, 6, 7, 8, 9]) test_mask_2 = np.isin(test_labels, [5, 6, 7, 8, 9]) train_images_2 = train_images[train_mask_2][:50] # Select 50 images where the first 10 are for each digit train_labels_2 = train_labels[train_mask_2][:50] test_images_2 = test_images[test_mask_2][:5000] test_labels_2 = test_labels[test_mask_2][:5000] # Shuffle the training sets to ensure randomness during training train_images_1, train_labels_1 = shuffle(train_images_1, train_labels_1) train_images_2, train_labels_2 = shuffle(train_images_2, train_labels_2) # Function to create the model structure def create_model(): inputs = keras.Input(shape=(28, 28, 1)) # Input shape matches MNIST image dimensions (28x28x1 for grayscale) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(inputs) # First convolutional layer with 32 filters x = layers.MaxPooling2D(pool_size=2)(x) # Reduce the spatial dimensions with max pooling x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) # Second convolutional layer with 64 filters x = layers.MaxPooling2D(pool_size=2)(x) # Another max pooling layer x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) # Third convolutional layer with 128 filters x = layers.Flatten()(x) # Flatten the 2D feature maps to a 1D vector to feed into the dense layer outputs = layers.Dense(10, activation="softmax")(x) # Final dense layer for 10 classes (digits 0-9) # Compile the model with the RMSprop optimizer and categorical crossentropy for classification model = keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) return model # Train the model on Training Set 1 (digits 0-4) model_1 = create_model() # Save the best model during training based on validation loss using ModelCheckpoint checkpoint_1 = ModelCheckpoint("model_1.keras", save_best_only=True, monitor="val_loss") # Train the model using 80% of the training set, leaving 20% for validation history_1 = model_1.fit(train_images_1, train_labels_1, epochs=20, validation_split=0.2, callbacks=[checkpoint_1]) # Evaluate the model on Test Set 1 (digits 0-4) test_loss_1, test_acc_1 = model_1.evaluate(test_images_1, test_labels_1) print(f"Base Model Test Accuracy (Set 1): {test_acc_1:.3f}") # Now, we move on to the Naive Approach: Training on Training Set 2 (digits 5-9 only) model_2 = create_model() # Again, we'll save the best model during training using validation loss as the monitor checkpoint_2 = ModelCheckpoint("model_2.keras", save_best_only=True, monitor="val_loss") # Train the model on Training Set 2 with 80% training and 20% validation split history_2 = model_2.fit(train_images_2, train_labels_2, epochs=20, validation_split=0.2, callbacks=[checkpoint_2]) # Evaluate the model on Test Set 2 (digits 5-9) test_loss_2, test_acc_2 = model_2.evaluate(test_images_2, test_labels_2) print(f"Naive Approach Test Accuracy (Set 2): {test_acc_2:.3f}") # Let's now apply data augmentation to the images to help the model generalize better # This layer applies random transformations to images (flips, rotations, zoom) to make the model more robust data_augmentation = keras.Sequential([ layers.RandomFlip("horizontal"), # Randomly flip the images horizontally layers.RandomRotation(0.1), # Randomly rotate the images by 10% layers.RandomZoom(0.2), # Randomly zoom into images by 20% ]) # Apply data augmentation to the new model inputs = keras.Input(shape=(28, 28, 1)) # Input layer for MNIST image size x = data_augmentation(inputs) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x) # First convolutional layer x = layers.MaxPooling2D(pool_size=2)(x) # Max pooling x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) # Second convolutional layer x = layers.MaxPooling2D(pool_size=2)(x) # Max pooling x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) # Third convolutional layer x = layers.Flatten()(x) # Flatten the output outputs = layers.Dense(10, activation="softmax")(x) # Dense layer for classification # Compile the model with the same optimizer and loss function model_3 = keras.Model(inputs=inputs, outputs=outputs) model_3.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) # Use ModelCheckpoint to save the best version of the model during training checkpoint_3 = ModelCheckpoint("model_3.keras", save_best_only=True, monitor="val_loss") # Train the model with data augmentation applied, using the same 80/20 train/validation split history_3 = model_3.fit(train_images_2, train_labels_2, epochs=20, validation_split=0.2, callbacks=[checkpoint_3]) # Evaluate the model on Test Set 2 (digits 5-9) after applying data augmentation test_loss_3, test_acc_3 = model_3.evaluate(test_images_2, test_labels_2) print(f"Data Augmentation Approach Test Accuracy (Set 2): {test_acc_3:.3f}")

Like this project

Posted Oct 23, 2024

This project builds and trains convolutional neural network (CNN) models to classify digits from the MNIST dataset, divided into two subsets digits 0-4 and 5-9

Likes

Views