This code trains a simple neural language model to predict the next word from a small set of words. It shows how the model learns and then predicts the next word after 'hello'.
import torch
import torch.nn as nn
import torch.optim as optim
# Simple neural language model for predicting next word from a small vocabulary
class SimpleLanguageModel(nn.Module):
def __init__(self, vocab_size, embed_dim):
super(SimpleLanguageModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.linear = nn.Linear(embed_dim, vocab_size)
def forward(self, x):
embeds = self.embedding(x)
out = self.linear(embeds)
return out
# Vocabulary and data
vocab = ['hello', 'world', 'good', 'morning']
word_to_ix = {w: i for i, w in enumerate(vocab)}
# Training data: pairs of (input_word, target_word)
data = [
('hello', 'world'),
('good', 'morning'),
('hello', 'good'),
('morning', 'world')
]
# Prepare inputs and targets
inputs = torch.tensor([word_to_ix[w[0]] for w in data], dtype=torch.long)
targets = torch.tensor([word_to_ix[w[1]] for w in data], dtype=torch.long)
# Model, loss, optimizer
model = SimpleLanguageModel(vocab_size=len(vocab), embed_dim=5)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
# Train for 100 epochs
for epoch in range(100):
model.train()
optimizer.zero_grad()
output = model(inputs)
loss = loss_function(output, targets)
loss.backward()
optimizer.step()
# Test prediction for input 'hello'
model.eval()
with torch.no_grad():
input_word = torch.tensor([word_to_ix['hello']], dtype=torch.long)
output = model(input_word)
predicted_ix = torch.argmax(output, dim=1).item()
predicted_word = vocab[predicted_ix]
print(f"Input word: 'hello'")
print(f"Predicted next word: '{predicted_word}'")
print(f"Final training loss: {loss.item():.4f}")