This code creates a simple dataset and splits it into train, validation, and test sets using PyTorch's random_split. It then prints the number of samples in each set.
import torch
from torch.utils.data import DataLoader, random_split, TensorDataset
# Create dummy dataset of 100 samples with 5 features
X = torch.randn(100, 5)
y = torch.randint(0, 2, (100,))
# Combine features and labels into a TensorDataset
dataset = TensorDataset(X, y)
# Define split sizes: 70 train, 15 val, 15 test
train_size = 70
val_size = 15
test_size = 15
# Use random_split to split dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(42))
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=10)
val_loader = DataLoader(val_dataset, batch_size=10)
test_loader = DataLoader(test_dataset, batch_size=10)
# Print sizes to confirm
print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")