Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,15 +3,12 @@ import pandas as pd
|
|
| 3 |
import numpy as np
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
import joblib
|
| 6 |
-
import os
|
| 7 |
|
| 8 |
-
# Load precomputed weights
|
| 9 |
@st.cache_data
|
| 10 |
def load_weights():
|
| 11 |
try:
|
| 12 |
weights_path = 'weights'
|
| 13 |
content_tfidf_matrix = joblib.load(f'{weights_path}/content_tfidf_matrix.joblib')
|
| 14 |
-
content_similarity_matrix = joblib.load(f'{weights_path}/content_similarity_matrix.joblib')
|
| 15 |
title_to_index = joblib.load(f'{weights_path}/content_title_to_index.joblib')
|
| 16 |
content_vectorizer = joblib.load(f'{weights_path}/content_vectorizer.joblib')
|
| 17 |
movies = joblib.load(f'{weights_path}/movies_data.joblib')
|
|
@@ -22,7 +19,6 @@ def load_weights():
|
|
| 22 |
return {
|
| 23 |
"movies": movies,
|
| 24 |
"content_tfidf_matrix": content_tfidf_matrix,
|
| 25 |
-
"content_similarity_matrix": content_similarity_matrix,
|
| 26 |
"title_to_index": title_to_index,
|
| 27 |
"content_vectorizer": content_vectorizer,
|
| 28 |
"user_profiles": user_profiles,
|
|
@@ -31,17 +27,18 @@ def load_weights():
|
|
| 31 |
"train_ratings": train_ratings
|
| 32 |
}
|
| 33 |
except FileNotFoundError as e:
|
| 34 |
-
st.error(f"
|
| 35 |
st.stop()
|
| 36 |
except Exception as e:
|
| 37 |
-
st.error(f"
|
| 38 |
st.stop()
|
| 39 |
|
| 40 |
-
# Content-based recommendation
|
| 41 |
-
def get_similar_movies(title,
|
| 42 |
try:
|
| 43 |
index = title_to_index[title]
|
| 44 |
-
|
|
|
|
| 45 |
similar_indices = similarity_scores.argsort()[::-1][1:N+1]
|
| 46 |
similar_movies = movies.iloc[similar_indices][['title', 'genres']]
|
| 47 |
similar_scores = similarity_scores[similar_indices]
|
|
@@ -60,19 +57,20 @@ def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_
|
|
| 60 |
top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n]
|
| 61 |
return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices]
|
| 62 |
|
| 63 |
-
#
|
| 64 |
st.set_page_config(page_title="Movie Recommender", page_icon="🎬")
|
| 65 |
st.title("🎬 Movie Recommender System")
|
| 66 |
|
| 67 |
st.markdown("""
|
| 68 |
-
This app
|
| 69 |
-
- **Content-Based
|
| 70 |
-
- **User Profile-Based
|
| 71 |
""")
|
| 72 |
|
| 73 |
# Load all weights
|
| 74 |
weights = load_weights()
|
| 75 |
|
|
|
|
| 76 |
recommendation_type = st.sidebar.radio("Choose Recommendation Type", ["Content-Based", "User Profile-Based"])
|
| 77 |
|
| 78 |
if recommendation_type == "Content-Based":
|
|
@@ -82,17 +80,17 @@ if recommendation_type == "Content-Based":
|
|
| 82 |
if movie_title:
|
| 83 |
recommendations = get_similar_movies(
|
| 84 |
title=movie_title,
|
| 85 |
-
|
| 86 |
title_to_index=weights["title_to_index"],
|
| 87 |
movies=weights["movies"],
|
| 88 |
N=5
|
| 89 |
)
|
| 90 |
if recommendations:
|
| 91 |
-
st.subheader(f"
|
| 92 |
for i, (movie, score) in enumerate(recommendations, 1):
|
| 93 |
st.write(f"{i}. {movie} (Similarity Score: {score:.2f})")
|
| 94 |
else:
|
| 95 |
-
st.warning(f"
|
| 96 |
|
| 97 |
else:
|
| 98 |
st.header("👤 User Profile-Based Recommendations")
|
|
@@ -109,9 +107,8 @@ else:
|
|
| 109 |
n=5
|
| 110 |
)
|
| 111 |
if recommendations:
|
| 112 |
-
st.subheader(f"Top picks for User ID {user_id}:")
|
| 113 |
-
for i, (movie,
|
| 114 |
-
st.write(f"{i}. {movie} (Predicted Rating: {
|
| 115 |
else:
|
| 116 |
-
st.warning(f"
|
| 117 |
-
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
import joblib
|
|
|
|
| 6 |
|
|
|
|
| 7 |
@st.cache_data
|
| 8 |
def load_weights():
|
| 9 |
try:
|
| 10 |
weights_path = 'weights'
|
| 11 |
content_tfidf_matrix = joblib.load(f'{weights_path}/content_tfidf_matrix.joblib')
|
|
|
|
| 12 |
title_to_index = joblib.load(f'{weights_path}/content_title_to_index.joblib')
|
| 13 |
content_vectorizer = joblib.load(f'{weights_path}/content_vectorizer.joblib')
|
| 14 |
movies = joblib.load(f'{weights_path}/movies_data.joblib')
|
|
|
|
| 19 |
return {
|
| 20 |
"movies": movies,
|
| 21 |
"content_tfidf_matrix": content_tfidf_matrix,
|
|
|
|
| 22 |
"title_to_index": title_to_index,
|
| 23 |
"content_vectorizer": content_vectorizer,
|
| 24 |
"user_profiles": user_profiles,
|
|
|
|
| 27 |
"train_ratings": train_ratings
|
| 28 |
}
|
| 29 |
except FileNotFoundError as e:
|
| 30 |
+
st.error(f"Missing file: {e.filename}")
|
| 31 |
st.stop()
|
| 32 |
except Exception as e:
|
| 33 |
+
st.error(f"Error loading weights: {str(e)}")
|
| 34 |
st.stop()
|
| 35 |
|
| 36 |
+
# Content-based recommendation (real-time cosine similarity)
|
| 37 |
+
def get_similar_movies(title, tfidf_matrix, title_to_index, movies, N=5):
|
| 38 |
try:
|
| 39 |
index = title_to_index[title]
|
| 40 |
+
target_vector = tfidf_matrix[index]
|
| 41 |
+
similarity_scores = cosine_similarity(target_vector, tfidf_matrix).flatten()
|
| 42 |
similar_indices = similarity_scores.argsort()[::-1][1:N+1]
|
| 43 |
similar_movies = movies.iloc[similar_indices][['title', 'genres']]
|
| 44 |
similar_scores = similarity_scores[similar_indices]
|
|
|
|
| 57 |
top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n]
|
| 58 |
return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices]
|
| 59 |
|
| 60 |
+
# Streamlit App
|
| 61 |
st.set_page_config(page_title="Movie Recommender", page_icon="🎬")
|
| 62 |
st.title("🎬 Movie Recommender System")
|
| 63 |
|
| 64 |
st.markdown("""
|
| 65 |
+
This app supports:
|
| 66 |
+
- **Content-Based Recommendations**: Find similar movies based on genre.
|
| 67 |
+
- **User Profile-Based Recommendations**: Personalized picks based on your ratings.
|
| 68 |
""")
|
| 69 |
|
| 70 |
# Load all weights
|
| 71 |
weights = load_weights()
|
| 72 |
|
| 73 |
+
# Recommendation type selector
|
| 74 |
recommendation_type = st.sidebar.radio("Choose Recommendation Type", ["Content-Based", "User Profile-Based"])
|
| 75 |
|
| 76 |
if recommendation_type == "Content-Based":
|
|
|
|
| 80 |
if movie_title:
|
| 81 |
recommendations = get_similar_movies(
|
| 82 |
title=movie_title,
|
| 83 |
+
tfidf_matrix=weights["content_tfidf_matrix"],
|
| 84 |
title_to_index=weights["title_to_index"],
|
| 85 |
movies=weights["movies"],
|
| 86 |
N=5
|
| 87 |
)
|
| 88 |
if recommendations:
|
| 89 |
+
st.subheader(f"If you liked **{movie_title}**, you might enjoy:")
|
| 90 |
for i, (movie, score) in enumerate(recommendations, 1):
|
| 91 |
st.write(f"{i}. {movie} (Similarity Score: {score:.2f})")
|
| 92 |
else:
|
| 93 |
+
st.warning(f"No similar movies found for '{movie_title}'.")
|
| 94 |
|
| 95 |
else:
|
| 96 |
st.header("👤 User Profile-Based Recommendations")
|
|
|
|
| 107 |
n=5
|
| 108 |
)
|
| 109 |
if recommendations:
|
| 110 |
+
st.subheader(f"Top 5 picks for User ID {user_id}:")
|
| 111 |
+
for i, (movie, pred_rating) in enumerate(recommendations, 1):
|
| 112 |
+
st.write(f"{i}. {movie} (Predicted Rating: {pred_rating:.2f})")
|
| 113 |
else:
|
| 114 |
+
st.warning(f"User ID {user_id} not found or not enough ratings.")
|
|
|