prince4332 commited on
Commit
e356b06
·
verified ·
1 Parent(s): 4c1d72f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -37,7 +37,7 @@ initial_user_history = pd.DataFrame({
37
  synthetic_user_history = initial_user_history.copy()
38
 
39
  # Load the new combined sampled data
40
- combined_sampled_data = pd.read_csv('./sampled_movie_ratings_500.csv')
41
 
42
  # Extract raw_ratings_df from the combined data
43
  raw_ratings_df = combined_sampled_data[['userId', 'movieId', 'rating', 'timestamp']].copy()
@@ -76,9 +76,12 @@ for user_id in range(1, n_users + 1):
76
  # Append mock ratings to the initial ratings_df
77
  ratings_df = pd.concat([ratings_df, pd.DataFrame(mock_ratings)], ignore_index=True)
78
 
 
 
 
79
  # Function to rebuild recommendation models based on current history
80
  def rebuild_models():
81
- global user_item_matrix, user_similarity_df, content_similarity
82
 
83
  # Merge synthetic user into the ratings dataset
84
  all_ratings = pd.concat([
@@ -114,6 +117,9 @@ def rebuild_models():
114
  tfidf_matrix = tfidf.fit_transform(movies_db['genres'])
115
  content_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
116
 
 
 
 
117
  # Initialize models
118
  rebuild_models()
119
 
@@ -183,14 +189,20 @@ def get_top_movies(user_id, search_query=None, n=15, alpha=0.6):
183
  if len(user_rated_movies) > 0:
184
  # Find the positional index of the movie in movies_db for content_similarity
185
  # Ensure movie_id exists in movies_db before proceeding
186
- if movie_id in movies_db['movieId'].values:
187
- movie_idx = movies_db.index[movies_db['movieId'] == movie_id][0]
188
 
189
  for rated_movie_id in user_rated_movies:
190
  # Ensure rated_movie_id exists in movies_db before proceeding
191
- if rated_movie_id in movies_db['movieId'].values:
192
- rated_idx = movies_db.index[movies_db['movieId'] == rated_movie_id][0]
193
- content_score += content_similarity[movie_idx, rated_idx]
 
 
 
 
 
 
194
 
195
  content_score = content_score / len(user_rated_movies)
196
 
 
37
  synthetic_user_history = initial_user_history.copy()
38
 
39
  # Load the new combined sampled data
40
+ combined_sampled_data = pd.read_csv('./sampled_movie_ratings_for_gradio.csv')
41
 
42
  # Extract raw_ratings_df from the combined data
43
  raw_ratings_df = combined_sampled_data[['userId', 'movieId', 'rating', 'timestamp']].copy()
 
76
  # Append mock ratings to the initial ratings_df
77
  ratings_df = pd.concat([ratings_df, pd.DataFrame(mock_ratings)], ignore_index=True)
78
 
79
+ # Global variable for movie_id to positional index mapping
80
+ movie_id_to_idx = {}
81
+
82
  # Function to rebuild recommendation models based on current history
83
  def rebuild_models():
84
+ global user_item_matrix, user_similarity_df, content_similarity, movie_id_to_idx
85
 
86
  # Merge synthetic user into the ratings dataset
87
  all_ratings = pd.concat([
 
117
  tfidf_matrix = tfidf.fit_transform(movies_db['genres'])
118
  content_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
119
 
120
+ # Create a mapping from movieId to its 0-based positional index in movies_db
121
+ movie_id_to_idx = {movie_id: idx for idx, movie_id in enumerate(movies_db['movieId'])}
122
+
123
  # Initialize models
124
  rebuild_models()
125
 
 
189
  if len(user_rated_movies) > 0:
190
  # Find the positional index of the movie in movies_db for content_similarity
191
  # Ensure movie_id exists in movies_db before proceeding
192
+ if movie_id in movie_id_to_idx:
193
+ movie_idx = movie_id_to_idx[movie_id]
194
 
195
  for rated_movie_id in user_rated_movies:
196
  # Ensure rated_movie_id exists in movies_db before proceeding
197
+ if rated_movie_id in movie_id_to_idx:
198
+ rated_idx = movie_id_to_idx[rated_movie_id]
199
+ # Ensure indices are within bounds of content_similarity
200
+ if rated_idx < content_similarity.shape[1] and movie_idx < content_similarity.shape[0]:
201
+ content_score += content_similarity[movie_idx, rated_idx]
202
+ else:
203
+ # Handle cases where index might still be out of bounds due to data inconsistencies
204
+ # This could happen if movies_db was somehow out of sync with content_similarity
205
+ print(f"Warning: Content similarity index out of bounds for movie_id={movie_id} or rated_movie_id={rated_movie_id}")
206
 
207
  content_score = content_score / len(user_rated_movies)
208