import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
# Sample data (replace with actual movie reviews and labels)
texts = [
"I loved this movie, it was fantastic!",
"This film was terrible and boring.",
"An excellent performance but the story was dull.",
"I didn't like the movie, but the soundtrack was good.",
"A masterpiece with subtle emotions and great acting.",
"Worst movie ever, I want my time back.",
"It was okay, not great but not bad either.",
"The plot was confusing but the visuals were stunning.",
"I enjoyed the film, very touching and well made.",
"Not my type of movie, but it had some good moments."
]
labels = [1, 0, 1, 0, 1, 0, 0, 1, 1, 0] # 1=positive, 0=negative
# Tokenize and pad sequences
max_words = 1000
max_len = 20
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
data = pad_sequences(sequences, maxlen=max_len)
# Split data
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.3, random_state=42)
# Build model
model = Sequential([
Embedding(input_dim=max_words, output_dim=50, input_length=max_len),
LSTM(64, return_sequences=False),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val), verbose=0)
# Evaluate
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation accuracy: {val_acc*100:.2f}%")
print(f"Validation loss: {val_loss:.4f}")