import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Simulated dataset loading
X = np.random.rand(1000, 64, 64, 3) # 1000 images 64x64 RGB
y = np.random.randint(0, 2, 1000) # Binary labels: person or not
# Data augmentation to reduce bias
datagen = ImageDataGenerator(
rotation_range=15,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True
)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Define model
model = Sequential([
Conv2D(32, (3,3), activation='relu', input_shape=(64,64,3)),
MaxPooling2D(2,2),
Dropout(0.25), # Dropout added to reduce overfitting
Conv2D(64, (3,3), activation='relu'),
MaxPooling2D(2,2),
Dropout(0.25),
Flatten(),
Dense(128, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Fit model with augmented data
batch_size = 32
train_generator = datagen.flow(X_train, y_train, batch_size=batch_size)
model.fit(train_generator, epochs=10, validation_data=(X_val, y_val))
# Predict with threshold tuning to reduce false positives
y_pred_prob = model.predict(X_val).flatten()
threshold = 0.6 # Increased threshold to reduce false positives
y_pred = (y_pred_prob > threshold).astype(int)
# Metrics
accuracy = accuracy_score(y_val, y_pred)
cm = confusion_matrix(y_val, y_pred)
false_positives = cm[0][1] / (cm[0][0] + cm[0][1])
# Responsible CV practice: simple anonymization example
# Here we simulate anonymizing by blurring faces before prediction (conceptual)
# In real case, apply privacy filters on input data
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"False Positive Rate: {false_positives*100:.2f}%")