Spaces:

LvMAC
/

DataSynthis_ML_JobTask

Sleeping

App Files Files Community

DataSynthis_ML_JobTask / app.py

LvMAC

Create app.py

0be6b17 verified 6 months ago

raw

history blame

4.65 kB

	import pandas as pd
	import numpy as np
	from surprise import SVD, Dataset, Reader, accuracy
	from surprise.model_selection import train_test_split, cross_validate
	from collections import defaultdict

	class MovieRecommender:
	def __init__(self, ratings_path, movies_path):
	# Load data
	self.ratings = pd.read_csv(ratings_path)
	self.movies = pd.read_csv(movies_path)

	# Build Surprise dataset
	reader = Reader(rating_scale=(0.5, 5.0))
	self.data = Dataset.load_from_df(
	self.ratings[['userId', 'movieId', 'rating']],
	reader
	)

	# Train model
	self.trainset = self.data.build_full_trainset()
	self.algo = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02)
	self.algo.fit(self.trainset)

	def recommend_movies(self, user_id, N):
	# Get all movie IDs
	all_movie_ids = self.movies['movieId'].unique()

	# Get movies user has already rated
	rated_movies = self.ratings[self.ratings['userId'] == user_id]['movieId'].values

	# Get unrated movies
	unrated_movies = [m for m in all_movie_ids if m not in rated_movies]

	# Predict ratings
	predictions = []
	for movie_id in unrated_movies:
	pred = self.algo.predict(user_id, movie_id)
	predictions.append((movie_id, pred.est))

	# Sort by predicted rating
	predictions.sort(key=lambda x: x[1], reverse=True)

	# Get top N
	top_n = predictions[:N]

	# Merge with movie titles
	results = []
	for movie_id, score in top_n:
	title = self.movies[self.movies['movieId'] == movie_id]['title'].values[0]
	results.append({
	'movieId': movie_id,
	'title': title,
	'predicted_rating': round(score, 2)
	})

	return results

	def evaluate(self):
	# Cross-validation
	results = cross_validate(
	self.algo,
	self.data,
	measures=['RMSE', 'MAE'],
	cv=5,
	verbose=False
	)

	# Custom metrics: Precision@K, Recall@K, NDCG@K
	trainset, testset = train_test_split(self.data, test_size=0.2)
	self.algo.fit(trainset)
	predictions = self.algo.test(testset)

	# Calculate Precision@K and Recall@K
	k = 10
	threshold = 4.0

	user_est_true = defaultdict(list)
	for uid, _, true_r, est, _ in predictions:
	user_est_true[uid].append((est, true_r))

	precisions = []
	recalls = []

	for uid, user_ratings in user_est_true.items():
	user_ratings.sort(key=lambda x: x[0], reverse=True)
	top_k = user_ratings[:k]

	n_rel = sum(1 for (_, true_r) in user_ratings if true_r >= threshold)
	n_rec_k = sum(1 for (est, _) in top_k if est >= threshold)
	n_rel_and_rec_k = sum(1 for (est, true_r) in top_k
	if true_r >= threshold and est >= threshold)

	precisions.append(n_rel_and_rec_k / n_rec_k if n_rec_k > 0 else 0)
	recalls.append(n_rel_and_rec_k / n_rel if n_rel > 0 else 0)

	return {
	'rmse': np.mean(results['test_rmse']),
	'mae': np.mean(results['test_mae']),
	f'precision@{k}': np.mean(precisions),
	f'recall@{k}': np.mean(recalls)
	}


	import gradio as gr

	# Initialize recommender
	recommender = MovieRecommender('ratings.csv', 'movies.csv')

	def recommend_interface(user_id, n_recommendations):
	try:
	user_id = int(user_id)
	n_recommendations = int(n_recommendations)

	recommendations = recommender.recommend_movies(user_id, n_recommendations)

	output = []
	for i, rec in enumerate(recommendations, 1):
	output.append(f"{i}. {rec['title']} (Predicted: {rec['predicted_rating']})")

	return "\n".join(output)
	except Exception as e:
	return f"Error: {str(e)}"

	# Create interface
	demo = gr.Interface(
	fn=recommend_interface,
	inputs=[
	gr.Textbox(label="User ID", placeholder="Enter user ID"),
	gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Recommendations")
	],
	outputs=gr.Textbox(label="Recommendations", lines=15),
	title="MovieLens Recommendation System",
	description="Enter a user ID to get personalized movie recommendations"
	)

	demo.launch()