import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
# Sample data
sentences = [
'I love this movie',
'This film is terrible',
'What a great day',
'I hate this song',
'This is amazing',
'I dislike this book',
'Such a wonderful experience',
'This is bad',
'I enjoy this',
'This is awful'
]
labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0] # 1=positive, 0=negative
# Tokenize and pad
tokenizer = Tokenizer(num_words=1000, oov_token='<OOV>')
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences, padding='post', maxlen=5)
# Split data
X_train, X_val = padded[:8], padded[8:]
y_train, y_val = labels[:8], labels[8:]
# Build model with dropout and smaller LSTM
model = Sequential([
Embedding(input_dim=1000, output_dim=16, input_length=5),
LSTM(16, return_sequences=False),
Dropout(0.5),
Dense(8, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# Train
history = model.fit(X_train, y_train, epochs=30, batch_size=2, validation_data=(X_val, y_val), callbacks=[early_stop], verbose=0)
# Evaluate
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
print(f'Training accuracy: {train_acc*100:.2f}%')
print(f'Validation accuracy: {val_acc*100:.2f}%')