import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Simulated dataset loading function
# X: images, y: labels, skin_tones: 0 for lighter, 1 for darker
X = np.load('face_images.npy') # shape (num_samples, 64, 64, 3)
y = np.load('face_labels.npy') # shape (num_samples,)
skin_tones = np.load('skin_tones.npy') # shape (num_samples,)
# Split data
X_train, X_test, y_train, y_test, skin_train, skin_test = train_test_split(
X, y, skin_tones, test_size=0.2, random_state=42, stratify=y)
# Data augmentation for darker skin tones
datagen = ImageDataGenerator(
rotation_range=10,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True
)
# Separate darker skin tone samples
dark_indices = np.where(skin_train == 1)[0]
X_dark = X_train[dark_indices]
y_dark = y_train[dark_indices]
# Augment darker skin tone images to balance dataset
augmented_images = []
augmented_labels = []
for i in range(len(X_dark)):
x = X_dark[i].reshape((1,) + X_dark[i].shape)
aug_iter = datagen.flow(x, batch_size=1)
for _ in range(3): # create 3 augmented images per original
batch = next(aug_iter)
augmented_images.append(batch[0])
augmented_labels.append(y_dark[i])
# Combine original and augmented data
X_train_balanced = np.concatenate([X_train, np.array(augmented_images)])
y_train_balanced = np.concatenate([y_train, np.array(augmented_labels)])
skin_train_balanced = np.concatenate([skin_train, np.ones(len(augmented_images))])
# Convert labels to categorical
num_classes = len(np.unique(y))
y_train_cat = to_categorical(y_train_balanced, num_classes)
y_test_cat = to_categorical(y_test, num_classes)
# Compute class weights to balance classes
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_balanced), y=y_train_balanced)
class_weight_dict = dict(enumerate(class_weights))
# Define simple CNN model
model = Sequential([
Conv2D(32, (3,3), activation='relu', input_shape=(64,64,3)),
MaxPooling2D(2,2),
Conv2D(64, (3,3), activation='relu'),
MaxPooling2D(2,2),
Flatten(),
Dense(64, activation='relu'),
Dense(num_classes, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train model with class weights
model.fit(X_train_balanced, y_train_cat, epochs=15, batch_size=32, class_weight=class_weight_dict, validation_split=0.1)
# Evaluate overall accuracy
loss, overall_acc = model.evaluate(X_test, y_test_cat, verbose=0)
# Evaluate accuracy by skin tone
from sklearn.metrics import accuracy_score
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
acc_lighter = accuracy_score(y_test[skin_test == 0], y_pred[skin_test == 0])
acc_darker = accuracy_score(y_test[skin_test == 1], y_pred[skin_test == 1])
print(f'Overall accuracy: {overall_acc*100:.2f}%')
print(f'Accuracy on lighter skin tones: {acc_lighter*100:.2f}%')
print(f'Accuracy on darker skin tones: {acc_darker*100:.2f}%')