Janesh1 commited on
Commit
6696cc6
·
verified ·
1 Parent(s): 5f843c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -109
app.py CHANGED
@@ -4,101 +4,52 @@ import numpy as np
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import joblib
6
  import os
7
- import glob
8
- import tempfile
9
- from huggingface_hub import hf_hub_download, HfApi
10
 
11
- # Force reinstall numpy to fix numpy._core issue
12
- try:
13
- st.write("Ensuring NumPy is installed correctly...")
14
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "numpy==1.23.5"])
15
- import numpy._core
16
- st.write("NumPy imported successfully.")
17
- except ImportError as e:
18
- st.error(f"Failed to import numpy._core after reinstall: {str(e)}. Contact support or try restarting the Space.")
19
- st.stop()
20
- except subprocess.CalledProcessError as e:
21
- st.error(f"Failed to reinstall numpy: {str(e)}. Check network or Space environment.")
22
- st.stop()
23
-
24
- # Load weights from Hugging Face
25
  @st.cache_data
26
  def load_weights():
27
  try:
28
- temp_dir = tempfile.mkdtemp()
29
- repo_id = "Janesh1/Movie_recommendation" # Replace with your Hugging Face repo (e.g., jdoe/movie-recommender-weights)
30
- weight_files = [
31
- 'content_tfidf_matrix.joblib',
32
- 'content_title_to_index.joblib',
33
- 'content_vectorizer.joblib',
34
- 'movies_data.joblib',
35
- 'user_profiles.joblib',
36
- 'user_tfidf_matrix.joblib',
37
- 'user_movie_id_to_idx.joblib',
38
- 'train_ratings.joblib',
39
- 'test_ratings.joblib'
40
- ]
41
-
42
- reassembled_files = {}
43
- api = HfApi()
44
- for weight_file in weight_files:
45
- # Check for chunked files
46
- chunks = sorted([f for f in api.list_repo_files(repo_id=repo_id, repo_type="space")
47
- if f.startswith('weights/' + weight_file + '.part')])
48
- if chunks:
49
- temp_path = os.path.join(temp_dir, weight_file)
50
- with open(temp_path, 'wb') as outfile:
51
- for chunk in chunks:
52
- try:
53
- chunk_path = hf_hub_download(repo_id=repo_id, filename=chunk, repo_type="space")
54
- with open(chunk_path, 'rb') as infile:
55
- outfile.write(infile.read())
56
- except Exception as e:
57
- st.error(f"Failed to download chunk {chunk}: {str(e)}")
58
- raise
59
- reassembled_files[weight_file] = temp_path
60
- else:
61
- # Download single file
62
- try:
63
- temp_path = hf_hub_download(repo_id=repo_id, filename=f'weights/{weight_file}', repo_type="space")
64
- reassembled_files[weight_file] = temp_path
65
- except Exception as e:
66
- st.error(f"Failed to download {weight_file}: {str(e)}")
67
- raise
68
-
69
- # Load weights
70
- try:
71
- content_tfidf_matrix = joblib.load(reassembled_files['content_tfidf_matrix.joblib'])
72
- title_to_index = joblib.load(reassembled_files['content_title_to_index.joblib'])
73
- movies = joblib.load(reassembled_files['movies_data.joblib'])
74
- user_profiles = joblib.load(reassembled_files['user_profiles.joblib'])
75
- user_tfidf_matrix = joblib.load(reassembled_files['user_tfidf_matrix.joblib'])
76
- movie_id_to_idx = joblib.load(reassembled_files['user_movie_id_to_idx.joblib'])
77
- train_ratings = joblib.load(reassembled_files['train_ratings.joblib'])
78
- except Exception as e:
79
- st.error(f"Error deserializing weights with joblib: {str(e)}. Possible numpy or joblib incompatibility.")
80
- raise
81
-
82
- return (movies, content_tfidf_matrix, title_to_index,
83
- user_profiles, user_tfidf_matrix, movie_id_to_idx, train_ratings)
84
  except Exception as e:
85
- st.error(f"Error loading weights from Hugging Face: {str(e)}")
86
  st.stop()
87
 
88
- # Content-based recommendation function (on-the-fly similarity)
89
- def get_similar_movies(title, tfidf_matrix, title_to_index, movies, N=5):
90
  try:
91
  index = title_to_index[title]
92
- movie_vector = tfidf_matrix[index]
93
- similarity_scores = cosine_similarity(movie_vector, tfidf_matrix).flatten()
94
  similar_indices = similarity_scores.argsort()[::-1][1:N+1]
95
- similar_movies = movies['title'].iloc[similar_indices]
96
  similar_scores = similarity_scores[similar_indices]
97
- return list(zip(similar_movies, similar_scores))
98
  except KeyError:
99
  return None
100
 
101
- # User profile-based recommendation function
102
  def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_idx, movies, train_ratings, n=5):
103
  if user_id not in user_profiles:
104
  return None
@@ -109,47 +60,58 @@ def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_
109
  top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n]
110
  return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices]
111
 
112
- # Streamlit app
113
- st.title("🎥 Movie Recommender System")
114
- st.write("Pick a way to find awesome movies! Either choose a movie you like or enter your user ID for personalized picks.")
 
 
 
 
 
 
115
 
116
- # Load weights from Hugging Face
117
- try:
118
- (movies, content_tfidf_matrix, title_to_index,
119
- user_profiles, user_tfidf_matrix, movie_id_to_idx, train_ratings) = load_weights()
120
- except Exception as e:
121
- st.error(f"Failed to initialize weights: {str(e)}")
122
- st.stop()
123
 
124
- # Sidebar for selecting recommendation type
125
- recommendation_type = st.sidebar.selectbox("Choose Recommendation Type", ["Content-Based", "User Profile-Based"])
126
 
127
  if recommendation_type == "Content-Based":
128
- st.header("Content-Based Movie Recommendations")
129
- st.write("Select a movie title to find similar movies based on genres.")
130
-
131
- movie_title = st.selectbox("Select a Movie", options=[""] + sorted(movies['title'].dropna().unique()))
132
 
133
  if movie_title:
134
- recommendations = get_similar_movies(movie_title, content_tfidf_matrix, title_to_index, movies, N=5)
 
 
 
 
 
 
135
  if recommendations:
136
- st.write(f"**Movies similar to '{movie_title}':**")
137
  for i, (movie, score) in enumerate(recommendations, 1):
138
  st.write(f"{i}. {movie} (Similarity Score: {score:.2f})")
139
  else:
140
- st.error(f"Oops! Movie '{movie_title}' not found. Try another title!")
141
 
142
  else:
143
- st.header("User Profile-Based Movie Recommendations")
144
- st.write("Enter your user ID to get personalized movie picks based on your ratings.")
145
-
146
- user_id = st.number_input("Enter User ID", min_value=1, step=1, value=1)
147
 
148
  if st.button("Get Recommendations"):
149
- recommendations = get_top_n_recommendations(user_id, user_profiles, user_tfidf_matrix, movie_id_to_idx, movies, train_ratings, n=5)
 
 
 
 
 
 
 
 
150
  if recommendations:
151
- st.write(f"**Top 5 recommendations for User {user_id}:**")
152
- for i, (movie, pred_rating) in enumerate(recommendations, 1):
153
- st.write(f"{i}. {movie} (Predicted Rating: {pred_rating:.2f})")
154
  else:
155
- st.error(f"Oops! User ID {user_id} not found or hasn't rated enough movies. Try another ID!")
 
 
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import joblib
6
  import os
 
 
 
7
 
8
+ # Load precomputed weights
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  @st.cache_data
10
  def load_weights():
11
  try:
12
+ weights_path = 'weights'
13
+ content_tfidf_matrix = joblib.load(f'{weights_path}/content_tfidf_matrix.joblib')
14
+ content_similarity_matrix = joblib.load(f'{weights_path}/content_similarity_matrix.joblib')
15
+ title_to_index = joblib.load(f'{weights_path}/content_title_to_index.joblib')
16
+ content_vectorizer = joblib.load(f'{weights_path}/content_vectorizer.joblib')
17
+ movies = joblib.load(f'{weights_path}/movies_data.joblib')
18
+ user_profiles = joblib.load(f'{weights_path}/user_profiles.joblib')
19
+ user_tfidf_matrix = joblib.load(f'{weights_path}/user_tfidf_matrix.joblib')
20
+ movie_id_to_idx = joblib.load(f'{weights_path}/user_movie_id_to_idx.joblib')
21
+ train_ratings = joblib.load(f'{weights_path}/train_ratings.joblib')
22
+ return {
23
+ "movies": movies,
24
+ "content_tfidf_matrix": content_tfidf_matrix,
25
+ "content_similarity_matrix": content_similarity_matrix,
26
+ "title_to_index": title_to_index,
27
+ "content_vectorizer": content_vectorizer,
28
+ "user_profiles": user_profiles,
29
+ "user_tfidf_matrix": user_tfidf_matrix,
30
+ "movie_id_to_idx": movie_id_to_idx,
31
+ "train_ratings": train_ratings
32
+ }
33
+ except FileNotFoundError as e:
34
+ st.error(f"Weight file missing: {e.filename}")
35
+ st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  except Exception as e:
37
+ st.error(f"An unexpected error occurred while loading weights: {str(e)}")
38
  st.stop()
39
 
40
+ # Content-based recommendation
41
+ def get_similar_movies(title, similarity_matrix, title_to_index, movies, N=5):
42
  try:
43
  index = title_to_index[title]
44
+ similarity_scores = similarity_matrix[index]
 
45
  similar_indices = similarity_scores.argsort()[::-1][1:N+1]
46
+ similar_movies = movies.iloc[similar_indices][['title', 'genres']]
47
  similar_scores = similarity_scores[similar_indices]
48
+ return list(zip(similar_movies['title'], similar_scores))
49
  except KeyError:
50
  return None
51
 
52
+ # User profile-based recommendation
53
  def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_idx, movies, train_ratings, n=5):
54
  if user_id not in user_profiles:
55
  return None
 
60
  top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n]
61
  return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices]
62
 
63
+ # --- Streamlit App ---
64
+ st.set_page_config(page_title="Movie Recommender", page_icon="🎬")
65
+ st.title("🎬 Movie Recommender System")
66
+
67
+ st.markdown("""
68
+ This app provides two types of movie recommendations:
69
+ - **Content-Based Filtering**: Suggests movies similar to one you like.
70
+ - **User Profile-Based Filtering**: Personalized recommendations based on your past ratings.
71
+ """)
72
 
73
+ # Load all weights
74
+ weights = load_weights()
 
 
 
 
 
75
 
76
+ recommendation_type = st.sidebar.radio("Choose Recommendation Type", ["Content-Based", "User Profile-Based"])
 
77
 
78
  if recommendation_type == "Content-Based":
79
+ st.header("📽️ Content-Based Recommendations")
80
+ movie_title = st.selectbox("Choose a Movie Title", [""] + sorted(weights["movies"]['title'].dropna().unique()))
 
 
81
 
82
  if movie_title:
83
+ recommendations = get_similar_movies(
84
+ title=movie_title,
85
+ similarity_matrix=weights["content_similarity_matrix"],
86
+ title_to_index=weights["title_to_index"],
87
+ movies=weights["movies"],
88
+ N=5
89
+ )
90
  if recommendations:
91
+ st.subheader(f"Because you liked **{movie_title}**, you might also enjoy:")
92
  for i, (movie, score) in enumerate(recommendations, 1):
93
  st.write(f"{i}. {movie} (Similarity Score: {score:.2f})")
94
  else:
95
+ st.warning(f"Could not find recommendations for **{movie_title}**.")
96
 
97
  else:
98
+ st.header("👤 User Profile-Based Recommendations")
99
+ user_id = st.number_input("Enter your User ID", min_value=1, step=1, value=1)
 
 
100
 
101
  if st.button("Get Recommendations"):
102
+ recommendations = get_top_n_recommendations(
103
+ user_id=user_id,
104
+ user_profiles=weights["user_profiles"],
105
+ tfidf_matrix=weights["user_tfidf_matrix"],
106
+ movie_id_to_idx=weights["movie_id_to_idx"],
107
+ movies=weights["movies"],
108
+ train_ratings=weights["train_ratings"],
109
+ n=5
110
+ )
111
  if recommendations:
112
+ st.subheader(f"Top picks for User ID {user_id}:")
113
+ for i, (movie, rating) in enumerate(recommendations, 1):
114
+ st.write(f"{i}. {movie} (Predicted Rating: {rating:.2f})")
115
  else:
116
+ st.warning(f"No recommendations available for User ID {user_id}. Try a different ID.")
117
+