import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Sample data
texts = ['I love machine learning', 'Deep learning is fun', 'Natural language processing with embeddings']
labels = [1, 1, 0]
# Tokenize texts
max_words = 10000
max_len = 10
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
x_data = pad_sequences(sequences, maxlen=max_len)
y_data = np.array(labels)
# Load GloVe embeddings
embedding_dim = 100
embeddings_index = {}
with open('glove.6B.100d.txt', encoding='utf8') as f:
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
# Prepare embedding matrix
word_index = tokenizer.word_index
num_words = min(max_words, len(word_index) + 1)
embedding_matrix = np.zeros((num_words, embedding_dim))
for word, i in word_index.items():
if i >= max_words:
continue
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
# Build model
model = Sequential([
Embedding(num_words, embedding_dim, weights=[embedding_matrix], input_length=max_len, trainable=False),
Dropout(0.3),
LSTM(32),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train model
history = model.fit(x_data, y_data, epochs=10, batch_size=2, validation_split=0.3, verbose=0)
# Extract metrics
train_acc = history.history['accuracy'][-1] * 100
val_acc = history.history['val_accuracy'][-1] * 100
train_loss = history.history['loss'][-1]
val_loss = history.history['val_loss'][-1]
print(f'Training accuracy: {train_acc:.2f}%, Validation accuracy: {val_acc:.2f}%')
print(f'Training loss: {train_loss:.4f}, Validation loss: {val_loss:.4f}')