This program trains a small RNN model on a tiny text sample. It learns to predict the next word given two words. Then it generates three new words starting from 'hello world'.
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.utils import to_categorical
# Sample text data
text = "hello world hello machine learning"
# Create a simple word index
words = sorted(set(text.split()))
word_to_index = {w: i for i, w in enumerate(words)}
index_to_word = {i: w for w, i in word_to_index.items()}
# Prepare sequences (input and target)
sequence_length = 2
sequences = []
tokens = text.split()
for i in range(len(tokens) - sequence_length):
seq = tokens[i:i+sequence_length+1]
sequences.append([word_to_index[w] for w in seq])
sequences = np.array(sequences)
X, y = sequences[:, :-1], sequences[:, -1]
y = to_categorical(y, num_classes=len(words))
# Build the model
model = Sequential()
model.add(Embedding(input_dim=len(words), output_dim=10, input_length=sequence_length))
model.add(SimpleRNN(20))
model.add(Dense(len(words), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train the model
model.fit(X, y, epochs=100, verbose=0)
# Generate text function
def generate_text(seed_text, next_words=3):
result = seed_text.split()
for _ in range(next_words):
encoded = [word_to_index[w] for w in result[-sequence_length:]]
encoded = np.array(encoded).reshape(1, sequence_length)
pred = model.predict(encoded, verbose=0)
next_index = np.argmax(pred)
next_word = index_to_word[next_index]
result.append(next_word)
return ' '.join(result)
# Generate new text
output_text = generate_text('hello world')
print(output_text)