import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
# Sample data (for demonstration, replace with real data)
texts = ['Hello world', 'How are you', 'Good morning', 'Nice to meet you']
labels = [1, 0, 1, 0]
# Tokenize and pad
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
data = pad_sequences(sequences, maxlen=5)
# Model with dropout to reduce overfitting
model = Sequential([
Embedding(input_dim=1000, output_dim=64, input_length=5),
LSTM(32, return_sequences=False),
Dropout(0.5),
Dense(16, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train with validation split
history = model.fit(data, labels, epochs=20, batch_size=2, validation_split=0.25, verbose=0)
# Evaluate
train_acc = history.history['accuracy'][-1] * 100
val_acc = history.history['val_accuracy'][-1] * 100
train_loss = history.history['loss'][-1]
val_loss = history.history['val_loss'][-1]
print(f'Training accuracy: {train_acc:.2f}%')
print(f'Validation accuracy: {val_acc:.2f}%')
print(f'Training loss: {train_loss:.3f}')
print(f'Validation loss: {val_loss:.3f}')