import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
# Dummy data: features represent sentence embeddings, labels 1 if sentence is summary
X = np.random.rand(1000, 100)
y = np.random.randint(0, 2, 1000)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
model = Sequential([
Dense(64, activation='relu', input_shape=(100,)),
Dropout(0.5),
Dense(32, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stop])
# After training, to limit summary length, select sentences with prediction > 0.7 threshold
preds = model.predict(X_val)
selected_sentences = preds > 0.7
average_summary_length = np.mean(np.sum(selected_sentences, axis=1))
print(f'Validation accuracy: {history.history["val_accuracy"][-1]*100:.2f}%')
print(f'Average summary length (sentences selected): {average_summary_length:.2f}')