import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Define simple CNN model
class SimpleCNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 16, 3, 1)
self.conv2 = nn.Conv2d(16, 32, 3, 1)
self.fc1 = nn.Linear(32 * 5 * 5, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = nn.functional.relu(self.conv1(x))
x = nn.functional.max_pool2d(x, 2)
x = nn.functional.relu(self.conv2(x))
x = nn.functional.max_pool2d(x, 2)
x = torch.flatten(x, 1)
x = nn.functional.relu(self.fc1(x))
x = self.fc2(x)
return x
# Data augmentation transforms for training
train_transforms = transforms.Compose([
transforms.RandomRotation(15),
transforms.RandomHorizontalFlip(),
transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# Validation transforms (no augmentation)
val_transforms = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# Load datasets
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=train_transforms)
val_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=val_transforms)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1000, shuffle=False)
# Initialize model, loss, optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
for epoch in range(10):
model.train()
train_loss = 0
correct_train = 0
for data, target in train_loader:
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item() * data.size(0)
pred = output.argmax(dim=1)
correct_train += pred.eq(target).sum().item()
train_loss /= len(train_loader.dataset)
train_acc = 100. * correct_train / len(train_loader.dataset)
model.eval()
val_loss = 0
correct_val = 0
with torch.no_grad():
for data, target in val_loader:
output = model(data)
loss = criterion(output, target)
val_loss += loss.item() * data.size(0)
pred = output.argmax(dim=1)
correct_val += pred.eq(target).sum().item()
val_loss /= len(val_loader.dataset)
val_acc = 100. * correct_val / len(val_loader.dataset)
print(f'Epoch {epoch+1}: Train loss {train_loss:.4f}, Train acc {train_acc:.2f}%, Val loss {val_loss:.4f}, Val acc {val_acc:.2f}%')