from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.metrics import brier_score_loss, accuracy_score
import matplotlib.pyplot as plt
# Create synthetic data
X, y = make_classification(n_samples=10000, n_features=20, random_state=42)
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train original model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)
# Predict probabilities before calibration
probs_uncalibrated = model.predict_proba(X_test)[:, 1]
# Evaluate before calibration
brier_uncalibrated = brier_score_loss(y_test, probs_uncalibrated)
acc_uncalibrated = accuracy_score(y_test, model.predict(X_test))
# Calibrate model using sigmoid (Platt scaling)
calibrated_sigmoid = CalibratedClassifierCV(model, method='sigmoid', cv='prefit')
calibrated_sigmoid.fit(X_train, y_train)
probs_calibrated_sigmoid = calibrated_sigmoid.predict_proba(X_test)[:, 1]
brier_sigmoid = brier_score_loss(y_test, probs_calibrated_sigmoid)
acc_sigmoid = accuracy_score(y_test, calibrated_sigmoid.predict(X_test))
# Calibrate model using isotonic regression
calibrated_isotonic = CalibratedClassifierCV(model, method='isotonic', cv='prefit')
calibrated_isotonic.fit(X_train, y_train)
probs_calibrated_isotonic = calibrated_isotonic.predict_proba(X_test)[:, 1]
brier_isotonic = brier_score_loss(y_test, probs_calibrated_isotonic)
acc_isotonic = accuracy_score(y_test, calibrated_isotonic.predict(X_test))
# Plot calibration curves
plt.figure(figsize=(8, 6))
for probs, label in [(probs_uncalibrated, 'Uncalibrated'), (probs_calibrated_sigmoid, 'Sigmoid'), (probs_calibrated_isotonic, 'Isotonic')]:
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, probs, n_bins=10)
plt.plot(mean_predicted_value, fraction_of_positives, marker='o', label=label)
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('Mean predicted probability')
plt.ylabel('Fraction of positives')
plt.title('Calibration Curves')
plt.legend()
plt.grid(True)
plt.show()
# Print metrics
print(f'Before calibration: Brier score loss = {brier_uncalibrated:.3f}, Accuracy = {acc_uncalibrated:.3f}')
print(f'Sigmoid calibration: Brier score loss = {brier_sigmoid:.3f}, Accuracy = {acc_sigmoid:.3f}')
print(f'Isotonic calibration: Brier score loss = {brier_isotonic:.3f}, Accuracy = {acc_isotonic:.3f}')