import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.utils import to_categorical
# Sample text data
text = "hello world hello machine learning world hello ai world"
# Create character mapping
chars = sorted(list(set(text)))
char_to_idx = {c:i for i,c in enumerate(chars)}
idx_to_char = {i:c for i,c in enumerate(chars)}
# Prepare sequences
seq_length = 5
sequences = []
next_chars = []
for i in range(len(text) - seq_length):
sequences.append(text[i:i+seq_length])
next_chars.append(text[i+seq_length])
X = np.zeros((len(sequences), seq_length), dtype=int)
y = np.zeros((len(sequences), len(chars)), dtype=int)
for i, seq in enumerate(sequences):
for t, char in enumerate(seq):
X[i, t] = char_to_idx[char]
y[i, char_to_idx[next_chars[i]]] = 1
# Build model
model = Sequential([
Embedding(len(chars), 10, input_length=seq_length),
LSTM(50),
Dense(len(chars), activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train model
model.fit(X, y, epochs=50, batch_size=8, verbose=0)
# Text generation function with temperature and top-k sampling
def sample(preds, temperature=1.0, top_k=None):
preds = np.asarray(preds).astype('float64')
preds = np.log(preds + 1e-8) / temperature
exp_preds = np.exp(preds)
preds = exp_preds / np.sum(exp_preds)
if top_k is not None:
indices_to_remove = preds < np.sort(preds)[-top_k]
preds[indices_to_remove] = 0
preds = preds / np.sum(preds)
probas = np.random.multinomial(1, preds, 1)
return np.argmax(probas)
# Generate text
seed_text = "hello"
generated = seed_text
for _ in range(50):
input_seq = [char_to_idx[c] for c in generated[-seq_length:]]
input_seq = np.array(input_seq).reshape(1, seq_length)
preds = model.predict(input_seq, verbose=0)[0]
next_index = sample(preds, temperature=0.8, top_k=3)
next_char = idx_to_char[next_index]
generated += next_char
print("Generated text:", generated)