Spaces:
Sleeping
Sleeping
File size: 4,989 Bytes
9d08616 005627d 9d08616 227ea11 005627d 6696cc6 543d0f0 6696cc6 005627d 543d0f0 005627d 9d08616 543d0f0 9d08616 543d0f0 9d08616 6696cc6 9d08616 6696cc6 9d08616 6696cc6 9d08616 543d0f0 6696cc6 543d0f0 6696cc6 9d08616 6696cc6 9d08616 543d0f0 6696cc6 9d08616 6696cc6 9d08616 6696cc6 543d0f0 6696cc6 9d08616 543d0f0 9d08616 543d0f0 9d08616 6696cc6 9d08616 6696cc6 9d08616 543d0f0 9d08616 543d0f0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import joblib
@st.cache_data
def load_weights():
try:
weights_path = 'weights'
content_tfidf_matrix = joblib.load(f'{weights_path}/content_tfidf_matrix.joblib')
title_to_index = joblib.load(f'{weights_path}/content_title_to_index.joblib')
content_vectorizer = joblib.load(f'{weights_path}/content_vectorizer.joblib')
movies = joblib.load(f'{weights_path}/movies_data.joblib')
user_profiles = joblib.load(f'{weights_path}/user_profiles.joblib')
user_tfidf_matrix = joblib.load(f'{weights_path}/user_tfidf_matrix.joblib')
movie_id_to_idx = joblib.load(f'{weights_path}/user_movie_id_to_idx.joblib')
train_ratings = joblib.load(f'{weights_path}/train_ratings.joblib')
return {
"movies": movies,
"content_tfidf_matrix": content_tfidf_matrix,
"title_to_index": title_to_index,
"content_vectorizer": content_vectorizer,
"user_profiles": user_profiles,
"user_tfidf_matrix": user_tfidf_matrix,
"movie_id_to_idx": movie_id_to_idx,
"train_ratings": train_ratings
}
except FileNotFoundError as e:
st.error(f"Missing file: {e.filename}")
st.stop()
except Exception as e:
st.error(f"Error loading weights: {str(e)}")
st.stop()
# Content-based recommendation (real-time cosine similarity)
def get_similar_movies(title, tfidf_matrix, title_to_index, movies, N=5):
try:
index = title_to_index[title]
target_vector = tfidf_matrix[index]
similarity_scores = cosine_similarity(target_vector, tfidf_matrix).flatten()
similar_indices = similarity_scores.argsort()[::-1][1:N+1]
similar_movies = movies.iloc[similar_indices][['title', 'genres']]
similar_scores = similarity_scores[similar_indices]
return list(zip(similar_movies['title'], similar_scores))
except KeyError:
return None
# User profile-based recommendation
def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_idx, movies, train_ratings, n=5):
if user_id not in user_profiles:
return None
user_profile = user_profiles[user_id]
similarities = cosine_similarity(user_profile.reshape(1, -1), tfidf_matrix).flatten()
movie_indices = np.argsort(similarities)[::-1]
rated_movies = set(train_ratings[train_ratings['userId'] == user_id]['movieId'].values)
top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n]
return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices]
# Streamlit App
st.set_page_config(page_title="Movie Recommender", page_icon="🎬")
st.title("🎬 Movie Recommender System")
st.markdown("""
This app supports:
- **Content-Based Recommendations**: Find similar movies based on genre.
- **User Profile-Based Recommendations**: Personalized picks based on your ratings.
""")
# Load all weights
weights = load_weights()
# Recommendation type selector
recommendation_type = st.sidebar.radio("Choose Recommendation Type", ["Content-Based", "User Profile-Based"])
if recommendation_type == "Content-Based":
st.header("📽️ Content-Based Recommendations")
movie_title = st.selectbox("Choose a Movie Title", [""] + sorted(weights["movies"]['title'].dropna().unique()))
if movie_title:
recommendations = get_similar_movies(
title=movie_title,
tfidf_matrix=weights["content_tfidf_matrix"],
title_to_index=weights["title_to_index"],
movies=weights["movies"],
N=5
)
if recommendations:
st.subheader(f"If you liked **{movie_title}**, you might enjoy:")
for i, (movie, score) in enumerate(recommendations, 1):
st.write(f"{i}. {movie} (Similarity Score: {score:.2f})")
else:
st.warning(f"No similar movies found for '{movie_title}'.")
else:
st.header("👤 User Profile-Based Recommendations")
user_id = st.number_input("Enter your User ID", min_value=1, step=1, value=1)
if st.button("Get Recommendations"):
recommendations = get_top_n_recommendations(
user_id=user_id,
user_profiles=weights["user_profiles"],
tfidf_matrix=weights["user_tfidf_matrix"],
movie_id_to_idx=weights["movie_id_to_idx"],
movies=weights["movies"],
train_ratings=weights["train_ratings"],
n=5
)
if recommendations:
st.subheader(f"Top 5 picks for User ID {user_id}:")
for i, (movie, pred_rating) in enumerate(recommendations, 1):
st.write(f"{i}. {movie} (Predicted Rating: {pred_rating:.2f})")
else:
st.warning(f"User ID {user_id} not found or not enough ratings.")
|