import pandas as pd import numpy as np from surprise import SVD, Dataset, Reader, accuracy from surprise.model_selection import train_test_split, cross_validate from collections import defaultdict class MovieRecommender: def __init__(self, ratings_path, movies_path): # Load data self.ratings = pd.read_csv(ratings_path) self.movies = pd.read_csv(movies_path) # Build Surprise dataset reader = Reader(rating_scale=(0.5, 5.0)) self.data = Dataset.load_from_df( self.ratings[['userId', 'movieId', 'rating']], reader ) # Train model self.trainset = self.data.build_full_trainset() self.algo = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02) self.algo.fit(self.trainset) def recommend_movies(self, user_id, N): # Get all movie IDs all_movie_ids = self.movies['movieId'].unique() # Get movies user has already rated rated_movies = self.ratings[self.ratings['userId'] == user_id]['movieId'].values # Get unrated movies unrated_movies = [m for m in all_movie_ids if m not in rated_movies] # Predict ratings predictions = [] for movie_id in unrated_movies: pred = self.algo.predict(user_id, movie_id) predictions.append((movie_id, pred.est)) # Sort by predicted rating predictions.sort(key=lambda x: x[1], reverse=True) # Get top N top_n = predictions[:N] # Merge with movie titles results = [] for movie_id, score in top_n: title = self.movies[self.movies['movieId'] == movie_id]['title'].values[0] results.append({ 'movieId': movie_id, 'title': title, 'predicted_rating': round(score, 2) }) return results def evaluate(self): # Cross-validation results = cross_validate( self.algo, self.data, measures=['RMSE', 'MAE'], cv=5, verbose=False ) # Custom metrics: Precision@K, Recall@K, NDCG@K trainset, testset = train_test_split(self.data, test_size=0.2) self.algo.fit(trainset) predictions = self.algo.test(testset) # Calculate Precision@K and Recall@K k = 10 threshold = 4.0 user_est_true = defaultdict(list) for uid, _, true_r, est, _ in predictions: user_est_true[uid].append((est, true_r)) precisions = [] recalls = [] for uid, user_ratings in user_est_true.items(): user_ratings.sort(key=lambda x: x[0], reverse=True) top_k = user_ratings[:k] n_rel = sum(1 for (_, true_r) in user_ratings if true_r >= threshold) n_rec_k = sum(1 for (est, _) in top_k if est >= threshold) n_rel_and_rec_k = sum(1 for (est, true_r) in top_k if true_r >= threshold and est >= threshold) precisions.append(n_rel_and_rec_k / n_rec_k if n_rec_k > 0 else 0) recalls.append(n_rel_and_rec_k / n_rel if n_rel > 0 else 0) return { 'rmse': np.mean(results['test_rmse']), 'mae': np.mean(results['test_mae']), f'precision@{k}': np.mean(precisions), f'recall@{k}': np.mean(recalls) } import gradio as gr # Initialize recommender recommender = MovieRecommender('ratings.csv', 'movies.csv') def recommend_interface(user_id, n_recommendations): try: user_id = int(user_id) n_recommendations = int(n_recommendations) recommendations = recommender.recommend_movies(user_id, n_recommendations) output = [] for i, rec in enumerate(recommendations, 1): output.append(f"{i}. {rec['title']} (Predicted: {rec['predicted_rating']})") return "\n".join(output) except Exception as e: return f"Error: {str(e)}" # Create interface demo = gr.Interface( fn=recommend_interface, inputs=[ gr.Textbox(label="User ID", placeholder="Enter user ID"), gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Recommendations") ], outputs=gr.Textbox(label="Recommendations", lines=15), title="MovieLens Recommendation System", description="Enter a user ID to get personalized movie recommendations" ) demo.launch()