import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
# Sample data placeholders (replace with actual data loading)
X_train = ... # tokenized and padded sequences for training
y_train = ... # one-hot encoded labels for training
X_val = ... # tokenized and padded sequences for validation
y_val = ... # one-hot encoded labels for validation
vocab_size = 10000 # example vocabulary size
embedding_dim = 64
max_len = 100 # max sequence length
num_classes = 10 # number of entity classes including 'O'
model = Sequential([
Embedding(vocab_size, embedding_dim, input_length=max_len),
Bidirectional(LSTM(64, return_sequences=True)),
Dropout(0.5),
Bidirectional(LSTM(32, return_sequences=True)),
Dropout(0.5),
Dense(num_classes, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(
X_train, y_train,
epochs=20,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[early_stop]
)
# After training, evaluate on validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f'Validation accuracy: {val_accuracy * 100:.2f}%')