import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
# Sample data
texts = [
'I love machine learning',
'Deep learning is fun',
'Natural language processing with neural networks',
'Machine learning models can overfit',
'Vocabulary size affects model performance',
'Control vocabulary size to reduce overfitting',
'Neural networks learn from data',
'Text classification with neural networks',
'Overfitting happens when model is too complex',
'Validation accuracy is important'
]
labels = [1, 1, 1, 0, 0, 0, 1, 1, 0, 0]
# Limit vocabulary size to top 20 words
vocab_size = 20
tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, padding='post')
# Build model
model = Sequential([
Embedding(vocab_size, 16, input_length=padded.shape[1]),
GlobalAveragePooling1D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train model
history = model.fit(padded, labels, epochs=30, validation_split=0.2, verbose=0)
# Print final metrics
train_acc = history.history['accuracy'][-1] * 100
val_acc = history.history['val_accuracy'][-1] * 100
train_loss = history.history['loss'][-1]
val_loss = history.history['val_loss'][-1]
print(f'Training accuracy: {train_acc:.2f}%')
print(f'Validation accuracy: {val_acc:.2f}%')
print(f'Training loss: {train_loss:.4f}')
print(f'Validation loss: {val_loss:.4f}')