from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, Trainer, TrainingArguments
import torch
# Load tokenizer and model
model_name = 't5-small'
tokenizer = T5Tokenizer.from_pretrained(model_name)
# Load config with increased dropout to reduce overfitting
config = T5Config.from_pretrained(model_name)
config.dropout_rate = 0.3
model = T5ForConditionalGeneration.from_pretrained(model_name, config=config)
# Prepare dummy dataset (replace with real dataset in practice)
class DummyDataset(torch.utils.data.Dataset):
def __init__(self, tokenizer):
self.inputs = ["summarize: The quick brown fox jumps over the lazy dog."] * 100
self.targets = ["A fox jumps over a dog."] * 100
self.tokenizer = tokenizer
def __len__(self):
return len(self.inputs)
def __getitem__(self, idx):
input_enc = self.tokenizer(self.inputs[idx], truncation=True, padding='max_length', max_length=32, return_tensors='pt')
target_enc = self.tokenizer(self.targets[idx], truncation=True, padding='max_length', max_length=16, return_tensors='pt')
input_ids = input_enc.input_ids.squeeze(0)
attention_mask = input_enc.attention_mask.squeeze(0)
labels = target_enc.input_ids.squeeze(0)
labels[labels == tokenizer.pad_token_id] = -100 # ignore padding in loss
return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
train_dataset = DummyDataset(tokenizer)
# Define training arguments with lower learning rate and early stopping
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
evaluation_strategy='epoch',
save_strategy='no',
learning_rate=3e-5,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=10,
load_best_model_at_end=False
)
# Define Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=train_dataset # Using train as eval for demo; replace with real val set
)
# Train model
trainer.train()