from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import MetricFrame, demographic_parity_difference, equalized_odds_difference
import numpy as np
# Create a simple dataset with a sensitive feature
X, y = make_classification(n_samples=1000, n_features=5, random_state=42)
sensitive_feature = np.random.choice(['group_a', 'group_b'], size=1000)
# Split data
X_train, X_test, y_train, y_test, sf_train, sf_test = train_test_split(X, y, sensitive_feature, test_size=0.3, random_state=42)
# Train model
model = LogisticRegression(solver='liblinear')
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
# Calculate fairness metrics
metrics = MetricFrame(metrics={'accuracy': lambda y_true, y_pred: np.mean(y_true == y_pred),
'demographic_parity_difference': demographic_parity_difference,
'equalized_odds_difference': equalized_odds_difference},
y_true=y_test, y_pred=y_pred, sensitive_features=sf_test)
print(f"Accuracy: {metrics.overall['accuracy']:.2f}")
print(f"Demographic Parity Difference: {metrics.overall['demographic_parity_difference']:.2f}")
print(f"Equalized Odds Difference: {metrics.overall['equalized_odds_difference']:.2f}")
if metrics.overall['demographic_parity_difference'] > 0.1 or metrics.overall['equalized_odds_difference'] > 0.1:
print("The model shows some bias that needs attention.")
else:
print("The model appears fair.")