import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Layer
from tensorflow.keras.models import Model
import numpy as np
# Sample data preparation (toy example)
input_texts = ['hello', 'how are you', 'good morning', 'thank you']
target_texts = ['bonjour', 'comment รงa va', 'bon matin', 'merci']
# Tokenization and vectorization (simplified for example)
input_characters = sorted(list(set(''.join(input_texts))))
target_characters = sorted(list(set(''.join(target_texts))))
num_encoder_tokens = len(input_characters) + 1
num_decoder_tokens = len(target_characters) + 1
max_encoder_seq_length = max(len(txt) for txt in input_texts)
max_decoder_seq_length = max(len(txt) for txt in target_texts) + 1
input_token_index = dict([(char, i + 1) for i, char in enumerate(input_characters)])
target_token_index = dict([(char, i + 1) for i, char in enumerate(target_characters)])
encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length), dtype='int32')
decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length), dtype='int32')
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
for t, char in enumerate(input_text):
encoder_input_data[i, t] = input_token_index[char]
for t, char in enumerate(target_text):
decoder_input_data[i, t] = target_token_index[char]
if t > 0:
decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
# Define Bahdanau Attention Layer
class BahdanauAttention(Layer):
def __init__(self, units):
super().__init__()
self.W1 = Dense(units)
self.W2 = Dense(units)
self.V = Dense(1)
def call(self, query, values):
# query shape: (batch_size, hidden size)
# values shape: (batch_size, max_len, hidden size)
query_with_time_axis = tf.expand_dims(query, 1)
score = self.V(tf.nn.tanh(self.W1(values) + self.W2(query_with_time_axis)))
attention_weights = tf.nn.softmax(score, axis=1)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
# Model parameters
embedding_dim = 64
units = 64
# Encoder
encoder_inputs = Input(shape=(None,), name='encoder_inputs')
encoder_embedding = Embedding(num_encoder_tokens, embedding_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(units, return_sequences=True, return_state=True, dropout=0.3)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
# Decoder
decoder_inputs = Input(shape=(None,), name='decoder_inputs')
decoder_embedding = Embedding(num_decoder_tokens, embedding_dim, mask_zero=True)(decoder_inputs)
attention = BahdanauAttention(units)
# Prepare decoder LSTM
decoder_lstm = LSTM(units, return_sequences=True, return_state=True, dropout=0.3)
dense = Dense(num_decoder_tokens, activation='softmax')
all_outputs = []
inputs = decoder_embedding
# Use teacher forcing for training
for t in range(max_decoder_seq_length):
# Get context vector from attention
context_vector, attn_weights = attention(state_h, encoder_outputs)
# Expand dims to concatenate
context_vector = tf.expand_dims(context_vector, 1)
# Concatenate context vector and decoder input at time t
x = tf.concat([context_vector, inputs[:, t:t+1, :]], axis=-1)
# Pass through LSTM
output, state_h, state_c = decoder_lstm(x, initial_state=[state_h, state_c])
# Output dense layer
output = dense(output)
all_outputs.append(output)
# Concatenate all time steps
decoder_outputs = tf.concat(all_outputs, axis=1)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train model
model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=2, epochs=20, validation_split=0.2, verbose=2)