import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
# Sample data (replace with actual dataset loading)
texts = ["Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005.",
"Nah I don't think he goes to usf, he lives around here though",
"WINNER!! As a valued network customer you have been selected to receivea £900 prize reward!",
"Had your mobile 11 months or more? You are entitled to update to the latest colour mobiles with camera for free!",
"I'm gonna be home soon and i don't want to talk about this stuff anymore tonight"]
labels = [1, 0, 1, 1, 0] # 1 = spam, 0 = not spam
# Vectorize text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts).toarray()
y = np.array(labels)
# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=42)
# Build model with dropout and reduced neurons
model = Sequential([
Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
Dropout(0.5),
Dense(8, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='binary_crossentropy',
metrics=['accuracy'])
# Early stopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Train model
history = model.fit(X_train, y_train, epochs=50, batch_size=2, validation_data=(X_val, y_val), callbacks=[early_stop], verbose=0)
# Evaluate
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
print(f"Training accuracy: {train_acc*100:.2f}%, Validation accuracy: {val_acc*100:.2f}%")
print(f"Training loss: {train_loss:.4f}, Validation loss: {val_loss:.4f}")