import numpy as np
from sklearn.metrics import mean_squared_error
def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))
def precision_at_k(y_true, y_scores, k):
# y_true: binary relevance (1 if relevant, 0 if not)
# y_scores: predicted scores for items
# Sort indices by predicted scores descending
idx_sorted = np.argsort(y_scores)[::-1]
top_k = idx_sorted[:k]
# Calculate precision@k
relevant_at_k = y_true[top_k].sum()
return relevant_at_k / k
# Example data for one user
true_ratings = np.array([4, 5, 3, 0, 0, 2, 1, 5, 4, 0]) # Actual ratings
predicted_ratings = np.array([3.8, 4.9, 2.5, 0.1, 0.2, 2.0, 1.5, 4.8, 3.9, 0.3]) # Model predictions
# Convert true ratings to binary relevance (1 if rating >=4, else 0)
true_relevance = (true_ratings >= 4).astype(int)
# Filter predictions by threshold to improve ranking
threshold = 3.5
filtered_indices = np.where(predicted_ratings >= threshold)[0]
filtered_true_relevance = true_relevance[filtered_indices]
filtered_predicted_ratings = predicted_ratings[filtered_indices]
# Calculate RMSE on all data
current_rmse = rmse(true_ratings, predicted_ratings)
# Calculate precision@5 on filtered data
k = 5
if len(filtered_predicted_ratings) >= k:
current_precision_at_5 = precision_at_k(filtered_true_relevance, filtered_predicted_ratings, k)
else:
current_precision_at_5 = precision_at_k(filtered_true_relevance, filtered_predicted_ratings, len(filtered_predicted_ratings))
print(f"RMSE: {current_rmse:.2f}")
print(f"Precision@5: {current_precision_at_5:.2f}")