import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
# Sample data placeholders (replace with actual data loading)
X_train, y_train = ... # tokenized input sequences and target summaries
X_val, y_val = ...
# Model parameters
vocab_size = 5000
embedding_dim = 128
latent_dim = 256
# Encoder
encoder_inputs = Input(shape=(None,))
encoder_embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]
# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dropout = Dropout(0.5)(decoder_outputs) # Added dropout
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_dropout)
# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
# Compile with lower learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# Train the model
model.fit(
[X_train, y_train[:, :-1]],
y_train[:, 1:, None],
batch_size=32, # smaller batch size
epochs=30,
validation_data=([X_val, y_val[:, :-1]], y_val[:, 1:, None]),
callbacks=[early_stopping]
)