import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, TimeDistributed, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
# Parameters
max_chunks = 5 # number of chunks per document
chunk_size = 100 # words per chunk
embedding_dim = 50
lstm_units = 64
num_classes = 3
# Dummy data generation (for example)
# X shape: (num_samples, max_chunks, chunk_size, embedding_dim)
num_samples = 1000
X_train = np.random.rand(num_samples, max_chunks, chunk_size, embedding_dim).astype(np.float32)
y_train = tf.keras.utils.to_categorical(np.random.randint(0, num_classes, num_samples), num_classes)
X_val = np.random.rand(200, max_chunks, chunk_size, embedding_dim).astype(np.float32)
y_val = tf.keras.utils.to_categorical(np.random.randint(0, num_classes, 200), num_classes)
# Model definition
# Input shape: (max_chunks, chunk_size, embedding_dim)
input_layer = Input(shape=(max_chunks, chunk_size, embedding_dim))
# Encode each chunk with a shared LSTM
chunk_encoder = TimeDistributed(Bidirectional(LSTM(lstm_units, return_sequences=False)))(input_layer)
chunk_encoder = Dropout(0.3)(chunk_encoder)
# Combine chunk encodings with another LSTM
hierarchical_lstm = Bidirectional(LSTM(lstm_units, return_sequences=False))(chunk_encoder)
hierarchical_lstm = Dropout(0.3)(hierarchical_lstm)
# Output layer
output_layer = Dense(num_classes, activation='softmax')(hierarchical_lstm)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Train model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stop])