Image datasets help computers learn to recognize pictures by showing many examples. CIFAR-10 and ImageNet are popular collections of images used to teach machines about different objects.
Image datasets (CIFAR-10, ImageNet) in Computer Vision
from torchvision.datasets import CIFAR10 # Load CIFAR-10 dataset cifar10_train = CIFAR10(root='./data', train=True, download=True) # Load ImageNet dataset (requires manual download and setup) # from torchvision.datasets import ImageNet # imagenet_train = ImageNet(root='./data/imagenet', split='train')
CIFAR-10 is small and easy to download, with 60,000 images in 10 classes.
ImageNet is very large with over 1 million images in 1000 classes and needs manual setup.
from torchvision.datasets import CIFAR10 # Download and load training data train_data = CIFAR10(root='./data', train=True, download=True) # Check number of images print(len(train_data))
from torchvision.datasets import CIFAR10 # Load test data test_data = CIFAR10(root='./data', train=False, download=True) # Get first image and label image, label = test_data[0] print(f'Label: {label}')
This program loads CIFAR-10 images, trains a simple model for one batch, and checks accuracy on one batch of test images.
import torch from torchvision.datasets import CIFAR10 from torchvision.transforms import ToTensor from torch.utils.data import DataLoader import torchvision.models as models import torch.nn as nn import torch.optim as optim # Load CIFAR-10 training and test data with transform to tensor train_data = CIFAR10(root='./data', train=True, download=True, transform=ToTensor()) test_data = CIFAR10(root='./data', train=False, download=True, transform=ToTensor()) # Data loaders train_loader = DataLoader(train_data, batch_size=64, shuffle=True) test_loader = DataLoader(test_data, batch_size=64, shuffle=False) # Use a simple pretrained model (ResNet18) and adjust for 10 classes model = models.resnet18(pretrained=False) model.fc = nn.Linear(model.fc.in_features, 10) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01) # Train for 1 epoch (for demo) model.train() for images, labels in train_loader: optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() break # only 1 batch for quick demo # Evaluate on test data model.eval() correct = 0 total = 0 with torch.no_grad(): for images, labels in test_loader: outputs = model(images) _, predicted = torch.max(outputs, 1) total += labels.size(0) correct += (predicted == labels).sum().item() break # only 1 batch for quick demo accuracy = 100 * correct / total print(f'Accuracy on 1 batch of test images: {accuracy:.2f}%')
CIFAR-10 images are small (32x32 pixels), so models train quickly but with limited detail.
ImageNet is much bigger and better for advanced models but needs more computing power.
Always check if datasets need manual download or special setup before use.
CIFAR-10 and ImageNet are popular image datasets for teaching computers to recognize pictures.
CIFAR-10 is small and easy to use; ImageNet is large and detailed.
These datasets help test and compare image recognition models in machine learning.