import tensorflow as tf from tensorflow.keras import layers, models (x_train, y_train), _ = tf.keras.datasets.mnist.load_data() x_train = x_train[..., tf.newaxis] / 255.0 # Data augmentation layer data_augmentation = tf.keras.Sequential([ layers.RandomRotation(0.1), layers.RandomTranslation(0.1, 0.1) ]) # Model definition model = models.Sequential([ layers.Input(shape=(28, 28, 1)), data_augmentation, layers.Conv2D(16, 3, activation='relu'), layers.Flatten(), layers.Dense(10, activation='softmax') ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=1, batch_size=128, verbose=0) train_acc = history.history['accuracy'][0]
Data augmentation creates new variations of the input images, making the training data more diverse. Early in training, this usually makes it harder for the model to fit perfectly, so training accuracy tends to be lower compared to training without augmentation.
Data augmentation creates new, varied training examples by transforming existing data. This helps the model learn more general features and reduces overfitting, which is why it acts as a regularizer.
Excessive augmentation can distort images so much that the model struggles to learn useful features, leading to underfitting.
Higher training loss with lower validation loss suggests the model is not memorizing training data but generalizing better, which is a sign of effective regularization.
data_augmentation = tf.keras.Sequential([
layers.RandomRotation(1.5), # 1.5 radians ~ 86 degrees
layers.RandomZoom(0.5)
])
model = tf.keras.Sequential([
layers.Input(shape=(28, 28, 1)),
data_augmentation,
layers.Conv2D(32, 3, activation='relu'),
layers.Flatten(),
layers.Dense(10, activation='softmax')
])RandomRotation with 1.5 radians (~86 degrees) is very large and can rotate digits almost sideways, making them hard to recognize. This harms learning and causes accuracy to drop to random guessing.