import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import VOCDetection
import torchvision.transforms as T
# Define transforms with data augmentation
class Transform:
def __call__(self, image, target):
image = F.to_tensor(image)
if torch.rand(1).item() < 0.5:
image = F.hflip(image)
boxes = target['boxes']
width = image.shape[-1]
boxes[:, [0, 2]] = width - boxes[:, [2, 0]]
target['boxes'] = boxes
return image, target
# Load dataset with transforms
train_dataset = VOCDetection('./data', year='2007', image_set='train', download=True, transforms=Transform())
val_dataset = VOCDetection('./data', year='2007', image_set='val', download=True, transforms=Transform())
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
# Load pretrained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 21 # 20 classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# Optimizer with weight decay for regularization
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 10
for epoch in range(num_epochs):
model.train()
train_loss = 0
for images, targets in train_loader:
images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
optimizer.zero_grad()
losses.backward()
optimizer.step()
train_loss += losses.item()
lr_scheduler.step()
# Validation
model.eval()
val_loss = 0
with torch.no_grad():
for images, targets in val_loader:
images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
val_loss += losses.item()
print(f"Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")