import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import NearestNeighbors
# Sample user-item rating matrix (rows: users, columns: movies)
ratings = np.array([
[5, 3, 0, 1],
[4, 0, 0, 1],
[1, 1, 0, 5],
[1, 0, 0, 4],
[0, 1, 5, 4],
])
# Split data into train and test by masking some ratings
np.random.seed(42)
train = ratings.copy()
test = np.zeros(ratings.shape)
for user in range(ratings.shape[0]):
test_indices = np.random.choice(ratings.shape[1], size=1, replace=False)
for idx in test_indices:
test[user, idx] = ratings[user, idx]
train[user, idx] = 0
# Function to predict ratings using user-based CF
def predict_user_based(train_matrix, k=2):
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(train_matrix)
pred = np.zeros(train_matrix.shape)
for user in range(train_matrix.shape[0]):
distances, indices = model.kneighbors(train_matrix[user].reshape(1, -1), n_neighbors=k+1)
neighbors = indices.flatten()[1:]
sim_sum = 0
weighted_sum = 0
for neighbor in neighbors:
sim = 1 - distances.flatten()[np.where(indices.flatten() == neighbor)[0][0]]
weighted_sum += sim * train_matrix[neighbor]
sim_sum += sim
if sim_sum > 0:
pred[user] = weighted_sum / sim_sum
else:
pred[user] = 0
return pred
# Function to predict ratings using item-based CF
def predict_item_based(train_matrix, k=2):
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(train_matrix.T)
pred = np.zeros(train_matrix.shape)
for item in range(train_matrix.shape[1]):
distances, indices = model.kneighbors(train_matrix.T[item].reshape(1, -1), n_neighbors=k+1)
neighbors = indices.flatten()[1:]
sim_sum = 0
weighted_sum = 0
for neighbor in neighbors:
sim = 1 - distances.flatten()[np.where(indices.flatten() == neighbor)[0][0]]
weighted_sum += sim * train_matrix[:, neighbor]
sim_sum += sim
if sim_sum > 0:
pred[:, item] = weighted_sum / sim_sum
else:
pred[:, item] = 0
return pred
# Predict and evaluate user-based
user_pred = predict_user_based(train, k=2)
user_pred_masked = user_pred[test > 0]
test_masked = test[test > 0]
user_rmse = np.sqrt(mean_squared_error(test_masked, user_pred_masked))
# Predict and evaluate item-based
item_pred = predict_item_based(train, k=2)
item_pred_masked = item_pred[test > 0]
item_rmse = np.sqrt(mean_squared_error(test_masked, item_pred_masked))
print(f"User-based CF RMSE: {user_rmse:.2f}")
print(f"Item-based CF RMSE: {item_rmse:.2f}")