LvMAC's picture
Create app.py
0be6b17 verified
raw
history blame
4.65 kB
import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split, cross_validate
from collections import defaultdict
class MovieRecommender:
def __init__(self, ratings_path, movies_path):
# Load data
self.ratings = pd.read_csv(ratings_path)
self.movies = pd.read_csv(movies_path)
# Build Surprise dataset
reader = Reader(rating_scale=(0.5, 5.0))
self.data = Dataset.load_from_df(
self.ratings[['userId', 'movieId', 'rating']],
reader
)
# Train model
self.trainset = self.data.build_full_trainset()
self.algo = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02)
self.algo.fit(self.trainset)
def recommend_movies(self, user_id, N):
# Get all movie IDs
all_movie_ids = self.movies['movieId'].unique()
# Get movies user has already rated
rated_movies = self.ratings[self.ratings['userId'] == user_id]['movieId'].values
# Get unrated movies
unrated_movies = [m for m in all_movie_ids if m not in rated_movies]
# Predict ratings
predictions = []
for movie_id in unrated_movies:
pred = self.algo.predict(user_id, movie_id)
predictions.append((movie_id, pred.est))
# Sort by predicted rating
predictions.sort(key=lambda x: x[1], reverse=True)
# Get top N
top_n = predictions[:N]
# Merge with movie titles
results = []
for movie_id, score in top_n:
title = self.movies[self.movies['movieId'] == movie_id]['title'].values[0]
results.append({
'movieId': movie_id,
'title': title,
'predicted_rating': round(score, 2)
})
return results
def evaluate(self):
# Cross-validation
results = cross_validate(
self.algo,
self.data,
measures=['RMSE', 'MAE'],
cv=5,
verbose=False
)
# Custom metrics: Precision@K, Recall@K, NDCG@K
trainset, testset = train_test_split(self.data, test_size=0.2)
self.algo.fit(trainset)
predictions = self.algo.test(testset)
# Calculate Precision@K and Recall@K
k = 10
threshold = 4.0
user_est_true = defaultdict(list)
for uid, _, true_r, est, _ in predictions:
user_est_true[uid].append((est, true_r))
precisions = []
recalls = []
for uid, user_ratings in user_est_true.items():
user_ratings.sort(key=lambda x: x[0], reverse=True)
top_k = user_ratings[:k]
n_rel = sum(1 for (_, true_r) in user_ratings if true_r >= threshold)
n_rec_k = sum(1 for (est, _) in top_k if est >= threshold)
n_rel_and_rec_k = sum(1 for (est, true_r) in top_k
if true_r >= threshold and est >= threshold)
precisions.append(n_rel_and_rec_k / n_rec_k if n_rec_k > 0 else 0)
recalls.append(n_rel_and_rec_k / n_rel if n_rel > 0 else 0)
return {
'rmse': np.mean(results['test_rmse']),
'mae': np.mean(results['test_mae']),
f'precision@{k}': np.mean(precisions),
f'recall@{k}': np.mean(recalls)
}
import gradio as gr
# Initialize recommender
recommender = MovieRecommender('ratings.csv', 'movies.csv')
def recommend_interface(user_id, n_recommendations):
try:
user_id = int(user_id)
n_recommendations = int(n_recommendations)
recommendations = recommender.recommend_movies(user_id, n_recommendations)
output = []
for i, rec in enumerate(recommendations, 1):
output.append(f"{i}. {rec['title']} (Predicted: {rec['predicted_rating']})")
return "\n".join(output)
except Exception as e:
return f"Error: {str(e)}"
# Create interface
demo = gr.Interface(
fn=recommend_interface,
inputs=[
gr.Textbox(label="User ID", placeholder="Enter user ID"),
gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Recommendations")
],
outputs=gr.Textbox(label="Recommendations", lines=15),
title="MovieLens Recommendation System",
description="Enter a user ID to get personalized movie recommendations"
)
demo.launch()