import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
# Sample data (replace with real dataset in practice)
texts = ["I love this movie", "This movie is terrible", "Amazing film", "Worst movie ever", "I enjoyed it", "Not good"]
labels = [1, 0, 1, 0, 1, 0]
# Tokenize texts
max_words = 1000
max_len = 10
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
x_data = pad_sequences(sequences, maxlen=max_len)
y_data = np.array(labels)
# Load GloVe embeddings
embedding_index = {}
with open('glove.6B.100d.txt', encoding='utf8') as f:
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embedding_index[word] = coefs
# Prepare embedding matrix
embedding_dim = 100
word_index = tokenizer.word_index
num_words = min(max_words, len(word_index) + 1)
embedding_matrix = np.zeros((num_words, embedding_dim))
for word, i in word_index.items():
if i >= max_words:
continue
embedding_vector = embedding_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
# Build model with pre-trained embeddings
model = Sequential()
model.add(Embedding(num_words, embedding_dim, weights=[embedding_matrix], input_length=max_len, trainable=False))
model.add(LSTM(32))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train model
history = model.fit(x_data, y_data, epochs=10, batch_size=32, validation_split=0.2, verbose=0)
# Extract final metrics
train_acc = history.history['accuracy'][-1] * 100
val_acc = history.history['val_accuracy'][-1] * 100
train_loss = history.history['loss'][-1]
val_loss = history.history['val_loss'][-1]
metrics_report = f"Training accuracy: {train_acc:.2f}%, Validation accuracy: {val_acc:.2f}%, Training loss: {train_loss:.3f}, Validation loss: {val_loss:.3f}"
print(metrics_report)