import torch
import torch.nn as nn
import torch.optim as optim
class AttentionRNN(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super().__init__()
self.rnn = nn.GRU(input_dim, hidden_dim, batch_first=True)
self.attention = nn.Linear(hidden_dim, 1)
self.classifier = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# x shape: (batch_size, seq_len, input_dim)
rnn_out, _ = self.rnn(x) # (batch_size, seq_len, hidden_dim)
attn_weights = torch.softmax(self.attention(rnn_out).squeeze(-1), dim=1) # (batch_size, seq_len)
context = torch.sum(rnn_out * attn_weights.unsqueeze(-1), dim=1) # (batch_size, hidden_dim)
output = self.classifier(context) # (batch_size, output_dim)
return output
# Example training loop (simplified)
input_dim = 100 # e.g., word embedding size
hidden_dim = 64
output_dim = 2 # e.g., binary classification
model = AttentionRNN(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Dummy data for demonstration
X_train = torch.randn(64, 50, input_dim) # batch_size=64, seq_len=50
y_train = torch.randint(0, 2, (64,))
X_val = torch.randn(64, 50, input_dim)
y_val = torch.randint(0, 2, (64,))
for epoch in range(10):
model.train()
optimizer.zero_grad()
outputs = model(X_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
val_outputs = model(X_val)
val_loss = criterion(val_outputs, y_val)
val_preds = val_outputs.argmax(dim=1)
val_acc = (val_preds == y_val).float().mean()
print(f"Epoch {epoch+1}: Train Loss={loss.item():.3f}, Val Loss={val_loss.item():.3f}, Val Acc={val_acc.item()*100:.2f}%")