import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
# Sample data preparation (dummy example)
texts = ["hello how are you", "how are you doing", "hello what is your name", "what is your favorite color"]
# Tokenization and sequence preparation
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = []
for line in texts:
encoded = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(encoded)):
sequence = encoded[:i+1]
sequences.append(sequence)
max_len = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_len, padding='pre')
sequences = np.array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]
y = tf.keras.utils.to_categorical(y, num_classes=len(tokenizer.word_index)+1)
# Model with dropout and fewer LSTM units
model = Sequential([
Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=10, input_length=max_len-1),
LSTM(32, return_sequences=False),
Dropout(0.3),
Dense(len(tokenizer.word_index)+1, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])
# Early stopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(X, y, epochs=30, batch_size=4, validation_split=0.2, callbacks=[early_stop], verbose=0)
# Print final metrics
train_acc = history.history['accuracy'][-1] * 100
val_acc = history.history['val_accuracy'][-1] * 100
train_loss = history.history['loss'][-1]
val_loss = history.history['val_loss'][-1]
print(f"Training accuracy: {train_acc:.2f}%, Validation accuracy: {val_acc:.2f}%")
print(f"Training loss: {train_loss:.4f}, Validation loss: {val_loss:.4f}")