import tensorflow as tf
from tensorflow.keras import layers, models
# Define a simple transformer block for reasoning
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super().__init__()
self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.ffn = models.Sequential([
layers.Dense(ff_dim, activation='relu'),
layers.Dense(embed_dim),
])
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs, training=None):
attn_output = self.att(inputs, inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
return self.layernorm2(out1 + ffn_output)
# Build model with reasoning block
input_shape = (None, 128) # sequence length unknown, embedding size 128
inputs = layers.Input(shape=input_shape)
# Initial embedding or input
x = inputs
# Add transformer reasoning block
x = TransformerBlock(embed_dim=128, num_heads=4, ff_dim=256)(x)
# Global average pooling and output
x = layers.GlobalAveragePooling1D()(x)
outputs = layers.Dense(10, activation='softmax')(x) # 10 classes example
model = models.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Dummy data for demonstration
import numpy as np
X_train = np.random.rand(1000, 20, 128).astype(np.float32) # 1000 samples, 20 tokens each
Y_train = np.random.randint(0, 10, 1000)
X_val = np.random.rand(200, 20, 128).astype(np.float32)
Y_val = np.random.randint(0, 10, 200)
# Train with validation
history = model.fit(X_train, Y_train, epochs=10, batch_size=32, validation_data=(X_val, Y_val))