Spaces:
Sleeping
Sleeping
File size: 4,648 Bytes
0be6b17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split, cross_validate
from collections import defaultdict
class MovieRecommender:
def __init__(self, ratings_path, movies_path):
# Load data
self.ratings = pd.read_csv(ratings_path)
self.movies = pd.read_csv(movies_path)
# Build Surprise dataset
reader = Reader(rating_scale=(0.5, 5.0))
self.data = Dataset.load_from_df(
self.ratings[['userId', 'movieId', 'rating']],
reader
)
# Train model
self.trainset = self.data.build_full_trainset()
self.algo = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02)
self.algo.fit(self.trainset)
def recommend_movies(self, user_id, N):
# Get all movie IDs
all_movie_ids = self.movies['movieId'].unique()
# Get movies user has already rated
rated_movies = self.ratings[self.ratings['userId'] == user_id]['movieId'].values
# Get unrated movies
unrated_movies = [m for m in all_movie_ids if m not in rated_movies]
# Predict ratings
predictions = []
for movie_id in unrated_movies:
pred = self.algo.predict(user_id, movie_id)
predictions.append((movie_id, pred.est))
# Sort by predicted rating
predictions.sort(key=lambda x: x[1], reverse=True)
# Get top N
top_n = predictions[:N]
# Merge with movie titles
results = []
for movie_id, score in top_n:
title = self.movies[self.movies['movieId'] == movie_id]['title'].values[0]
results.append({
'movieId': movie_id,
'title': title,
'predicted_rating': round(score, 2)
})
return results
def evaluate(self):
# Cross-validation
results = cross_validate(
self.algo,
self.data,
measures=['RMSE', 'MAE'],
cv=5,
verbose=False
)
# Custom metrics: Precision@K, Recall@K, NDCG@K
trainset, testset = train_test_split(self.data, test_size=0.2)
self.algo.fit(trainset)
predictions = self.algo.test(testset)
# Calculate Precision@K and Recall@K
k = 10
threshold = 4.0
user_est_true = defaultdict(list)
for uid, _, true_r, est, _ in predictions:
user_est_true[uid].append((est, true_r))
precisions = []
recalls = []
for uid, user_ratings in user_est_true.items():
user_ratings.sort(key=lambda x: x[0], reverse=True)
top_k = user_ratings[:k]
n_rel = sum(1 for (_, true_r) in user_ratings if true_r >= threshold)
n_rec_k = sum(1 for (est, _) in top_k if est >= threshold)
n_rel_and_rec_k = sum(1 for (est, true_r) in top_k
if true_r >= threshold and est >= threshold)
precisions.append(n_rel_and_rec_k / n_rec_k if n_rec_k > 0 else 0)
recalls.append(n_rel_and_rec_k / n_rel if n_rel > 0 else 0)
return {
'rmse': np.mean(results['test_rmse']),
'mae': np.mean(results['test_mae']),
f'precision@{k}': np.mean(precisions),
f'recall@{k}': np.mean(recalls)
}
import gradio as gr
# Initialize recommender
recommender = MovieRecommender('ratings.csv', 'movies.csv')
def recommend_interface(user_id, n_recommendations):
try:
user_id = int(user_id)
n_recommendations = int(n_recommendations)
recommendations = recommender.recommend_movies(user_id, n_recommendations)
output = []
for i, rec in enumerate(recommendations, 1):
output.append(f"{i}. {rec['title']} (Predicted: {rec['predicted_rating']})")
return "\n".join(output)
except Exception as e:
return f"Error: {str(e)}"
# Create interface
demo = gr.Interface(
fn=recommend_interface,
inputs=[
gr.Textbox(label="User ID", placeholder="Enter user ID"),
gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Recommendations")
],
outputs=gr.Textbox(label="Recommendations", lines=15),
title="MovieLens Recommendation System",
description="Enter a user ID to get personalized movie recommendations"
)
demo.launch() |