import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.utils import shuffle
# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Preprocess the images: reshape and normalize pixel values to be between 0 and 1
train_images = train_images.reshape((60000, 28, 28, 1)).astype("float32") / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype("float32") / 255
# Create training set 1 with digits 0 to 4 and we will grab 30,000 examples for this
train_mask_1 = np.isin(train_labels, [0, 1, 2, 3, 4])
test_mask_1 = np.isin(test_labels, [0, 1, 2, 3, 4])
train_images_1 = train_images[train_mask_1][:30000]
train_labels_1 = train_labels[train_mask_1][:30000]
test_images_1 = test_images[test_mask_1][:5000]
test_labels_1 = test_labels[test_mask_1][:5000]
# Create training set 2 with digits 5 to 9
train_mask_2 = np.isin(train_labels, [5, 6, 7, 8, 9])
test_mask_2 = np.isin(test_labels, [5, 6, 7, 8, 9])
train_images_2 = train_images[train_mask_2][:50] # Select 50 images where the first 10 are for each digit
train_labels_2 = train_labels[train_mask_2][:50]
test_images_2 = test_images[test_mask_2][:5000]
test_labels_2 = test_labels[test_mask_2][:5000]
# Shuffle the training sets to ensure randomness during training
train_images_1, train_labels_1 = shuffle(train_images_1, train_labels_1)
train_images_2, train_labels_2 = shuffle(train_images_2, train_labels_2)
# Function to create the model structure
def create_model():
inputs = keras.Input(shape=(28, 28, 1)) # Input shape matches MNIST image dimensions (28x28x1 for grayscale)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(inputs) # First convolutional layer with 32 filters
x = layers.MaxPooling2D(pool_size=2)(x) # Reduce the spatial dimensions with max pooling
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) # Second convolutional layer with 64 filters
x = layers.MaxPooling2D(pool_size=2)(x) # Another max pooling layer
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) # Third convolutional layer with 128 filters
x = layers.Flatten()(x) # Flatten the 2D feature maps to a 1D vector to feed into the dense layer
outputs = layers.Dense(10, activation="softmax")(x) # Final dense layer for 10 classes (digits 0-9)
# Compile the model with the RMSprop optimizer and categorical crossentropy for classification
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
return model
# Train the model on Training Set 1 (digits 0-4)
model_1 = create_model()
# Save the best model during training based on validation loss using ModelCheckpoint
checkpoint_1 = ModelCheckpoint("model_1.keras", save_best_only=True, monitor="val_loss")
# Train the model using 80% of the training set, leaving 20% for validation
history_1 = model_
1.fit(train_images_1, train_labels_1, epochs=20, validation_split=0.2, callbacks=[checkpoint_1])
# Evaluate the model on Test Set 1 (digits 0-4)
test_loss_1, test_acc_1 = model_1.evaluate(test_images_1, test_labels_1)
print(f"Base Model Test Accuracy (Set 1): {test_acc_1:.3f}")
# Now, we move on to the Naive Approach: Training on Training Set 2 (digits 5-9 only)
model_2 = create_model()
# Again, we'll save the best model during training using validation loss as the monitor
checkpoint_2 = ModelCheckpoint("model_2.keras", save_best_only=True, monitor="val_loss")
# Train the model on Training Set 2 with 80% training and 20% validation split
history_2 = model_
2.fit(train_images_2, train_labels_2, epochs=20, validation_split=0.2, callbacks=[checkpoint_2])
# Evaluate the model on Test Set 2 (digits 5-9)
test_loss_2, test_acc_2 = model_2.evaluate(test_images_2, test_labels_2)
print(f"Naive Approach Test Accuracy (Set 2): {test_acc_2:.3f}")
# Let's now apply data augmentation to the images to help the model generalize better
# This layer applies random transformations to images (flips, rotations, zoom) to make the model more robust
data_augmentation = keras.Sequential([
layers.RandomFlip("horizontal"), # Randomly flip the images horizontally
layers.RandomRotation(0.1), # Randomly rotate the images by 10%
layers.RandomZoom(0.2), # Randomly zoom into images by 20%
])
# Apply data augmentation to the new model
inputs = keras.Input(shape=(28, 28, 1)) # Input layer for MNIST image size
x = data_augmentation(inputs)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x) # First convolutional layer
x = layers.MaxPooling2D(pool_size=2)(x) # Max pooling
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) # Second convolutional layer
x = layers.MaxPooling2D(pool_size=2)(x) # Max pooling
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) # Third convolutional layer
x = layers.Flatten()(x) # Flatten the output
outputs = layers.Dense(10, activation="softmax")(x) # Dense layer for classification
# Compile the model with the same optimizer and loss function
model_3 = keras.Model(inputs=inputs, outputs=outputs)
model_3.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
# Use ModelCheckpoint to save the best version of the model during training
checkpoint_3 = ModelCheckpoint("model_3.keras", save_best_only=True, monitor="val_loss")
# Train the model with data augmentation applied, using the same 80/20 train/validation split
history_3 = model_
3.fit(train_images_2, train_labels_2, epochs=20, validation_split=0.2, callbacks=[checkpoint_3])
# Evaluate the model on Test Set 2 (digits 5-9) after applying data augmentation
test_loss_3, test_acc_3 = model_3.evaluate(test_images_2, test_labels_2)
print(f"Data Augmentation Approach Test Accuracy (Set 2): {test_acc_3:.3f}")