import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
# Sample true labels and predicted probabilities
true_labels = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1]) # 0=not spam, 1=spam
predicted_probs = np.array([0.1, 0.9, 0.4, 0.6, 0.3, 0.8, 0.2, 0.7, 0.5, 0.65])
# Original threshold 0.5
threshold = 0.5
predicted_labels = (predicted_probs >= threshold).astype(int)
# Confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
print('Confusion Matrix at threshold 0.5:')
print(cm)
# Classification report
report = classification_report(true_labels, predicted_labels, target_names=['Not Spam', 'Spam'])
print('Classification Report at threshold 0.5:')
print(report)
# Adjust threshold to 0.6 to reduce false positives
threshold = 0.6
predicted_labels_adj = (predicted_probs >= threshold).astype(int)
cm_adj = confusion_matrix(true_labels, predicted_labels_adj)
print('Confusion Matrix at threshold 0.6:')
print(cm_adj)
report_adj = classification_report(true_labels, predicted_labels_adj, target_names=['Not Spam', 'Spam'])
print('Classification Report at threshold 0.6:')
print(report_adj)