from sklearn.metrics import f1_score
# Sample true and predicted answers (1 for correct, 0 for incorrect)
true_answers = [1, 0, 1, 1, 0, 1, 0, 0, 1, 0]
predicted_answers = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]
# Calculate F1-score for accuracy
accuracy_f1 = f1_score(true_answers, predicted_answers)
# Sample confidence scores for each predicted answer
confidence_scores = [0.9, 0.6, 0.4, 0.95, 0.5, 0.85, 0.3, 0.7, 0.9, 0.6]
# Filter answers with confidence >= 0.7
filtered_predictions = [pred if conf >= 0.7 else 0 for pred, conf in zip(predicted_answers, confidence_scores)]
# Calculate new F1-score after filtering
filtered_accuracy_f1 = f1_score(true_answers, filtered_predictions)
# Calculate relevance as percentage of filtered answers matching true answers
correct_filtered = sum(1 for t, p in zip(true_answers, filtered_predictions) if t == p and p != 0)
relevance_score = correct_filtered / sum(1 for p in filtered_predictions if p != 0) * 100 if sum(1 for p in filtered_predictions if p != 0) > 0 else 0
print(f"Original F1 Accuracy: {accuracy_f1:.2f}")
print(f"Filtered F1 Accuracy: {filtered_accuracy_f1:.2f}")
print(f"Relevance Score after filtering: {relevance_score:.2f}%")