Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,7 +37,7 @@ initial_user_history = pd.DataFrame({
|
|
| 37 |
synthetic_user_history = initial_user_history.copy()
|
| 38 |
|
| 39 |
# Load the new combined sampled data
|
| 40 |
-
combined_sampled_data = pd.read_csv('./
|
| 41 |
|
| 42 |
# Extract raw_ratings_df from the combined data
|
| 43 |
raw_ratings_df = combined_sampled_data[['userId', 'movieId', 'rating', 'timestamp']].copy()
|
|
@@ -76,9 +76,12 @@ for user_id in range(1, n_users + 1):
|
|
| 76 |
# Append mock ratings to the initial ratings_df
|
| 77 |
ratings_df = pd.concat([ratings_df, pd.DataFrame(mock_ratings)], ignore_index=True)
|
| 78 |
|
|
|
|
|
|
|
|
|
|
| 79 |
# Function to rebuild recommendation models based on current history
|
| 80 |
def rebuild_models():
|
| 81 |
-
global user_item_matrix, user_similarity_df, content_similarity
|
| 82 |
|
| 83 |
# Merge synthetic user into the ratings dataset
|
| 84 |
all_ratings = pd.concat([
|
|
@@ -114,6 +117,9 @@ def rebuild_models():
|
|
| 114 |
tfidf_matrix = tfidf.fit_transform(movies_db['genres'])
|
| 115 |
content_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
| 116 |
|
|
|
|
|
|
|
|
|
|
| 117 |
# Initialize models
|
| 118 |
rebuild_models()
|
| 119 |
|
|
@@ -183,14 +189,20 @@ def get_top_movies(user_id, search_query=None, n=15, alpha=0.6):
|
|
| 183 |
if len(user_rated_movies) > 0:
|
| 184 |
# Find the positional index of the movie in movies_db for content_similarity
|
| 185 |
# Ensure movie_id exists in movies_db before proceeding
|
| 186 |
-
if movie_id in
|
| 187 |
-
movie_idx =
|
| 188 |
|
| 189 |
for rated_movie_id in user_rated_movies:
|
| 190 |
# Ensure rated_movie_id exists in movies_db before proceeding
|
| 191 |
-
if rated_movie_id in
|
| 192 |
-
rated_idx =
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
content_score = content_score / len(user_rated_movies)
|
| 196 |
|
|
|
|
| 37 |
synthetic_user_history = initial_user_history.copy()
|
| 38 |
|
| 39 |
# Load the new combined sampled data
|
| 40 |
+
combined_sampled_data = pd.read_csv('./sampled_movie_ratings_for_gradio.csv')
|
| 41 |
|
| 42 |
# Extract raw_ratings_df from the combined data
|
| 43 |
raw_ratings_df = combined_sampled_data[['userId', 'movieId', 'rating', 'timestamp']].copy()
|
|
|
|
| 76 |
# Append mock ratings to the initial ratings_df
|
| 77 |
ratings_df = pd.concat([ratings_df, pd.DataFrame(mock_ratings)], ignore_index=True)
|
| 78 |
|
| 79 |
+
# Global variable for movie_id to positional index mapping
|
| 80 |
+
movie_id_to_idx = {}
|
| 81 |
+
|
| 82 |
# Function to rebuild recommendation models based on current history
|
| 83 |
def rebuild_models():
|
| 84 |
+
global user_item_matrix, user_similarity_df, content_similarity, movie_id_to_idx
|
| 85 |
|
| 86 |
# Merge synthetic user into the ratings dataset
|
| 87 |
all_ratings = pd.concat([
|
|
|
|
| 117 |
tfidf_matrix = tfidf.fit_transform(movies_db['genres'])
|
| 118 |
content_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
| 119 |
|
| 120 |
+
# Create a mapping from movieId to its 0-based positional index in movies_db
|
| 121 |
+
movie_id_to_idx = {movie_id: idx for idx, movie_id in enumerate(movies_db['movieId'])}
|
| 122 |
+
|
| 123 |
# Initialize models
|
| 124 |
rebuild_models()
|
| 125 |
|
|
|
|
| 189 |
if len(user_rated_movies) > 0:
|
| 190 |
# Find the positional index of the movie in movies_db for content_similarity
|
| 191 |
# Ensure movie_id exists in movies_db before proceeding
|
| 192 |
+
if movie_id in movie_id_to_idx:
|
| 193 |
+
movie_idx = movie_id_to_idx[movie_id]
|
| 194 |
|
| 195 |
for rated_movie_id in user_rated_movies:
|
| 196 |
# Ensure rated_movie_id exists in movies_db before proceeding
|
| 197 |
+
if rated_movie_id in movie_id_to_idx:
|
| 198 |
+
rated_idx = movie_id_to_idx[rated_movie_id]
|
| 199 |
+
# Ensure indices are within bounds of content_similarity
|
| 200 |
+
if rated_idx < content_similarity.shape[1] and movie_idx < content_similarity.shape[0]:
|
| 201 |
+
content_score += content_similarity[movie_idx, rated_idx]
|
| 202 |
+
else:
|
| 203 |
+
# Handle cases where index might still be out of bounds due to data inconsistencies
|
| 204 |
+
# This could happen if movies_db was somehow out of sync with content_similarity
|
| 205 |
+
print(f"Warning: Content similarity index out of bounds for movie_id={movie_id} or rated_movie_id={rated_movie_id}")
|
| 206 |
|
| 207 |
content_score = content_score / len(user_rated_movies)
|
| 208 |
|