import streamlit as st import pandas as pd import numpy as np from sklearn.metrics.pairwise import cosine_similarity import joblib @st.cache_data def load_weights(): try: weights_path = 'weights' content_tfidf_matrix = joblib.load(f'{weights_path}/content_tfidf_matrix.joblib') title_to_index = joblib.load(f'{weights_path}/content_title_to_index.joblib') content_vectorizer = joblib.load(f'{weights_path}/content_vectorizer.joblib') movies = joblib.load(f'{weights_path}/movies_data.joblib') user_profiles = joblib.load(f'{weights_path}/user_profiles.joblib') user_tfidf_matrix = joblib.load(f'{weights_path}/user_tfidf_matrix.joblib') movie_id_to_idx = joblib.load(f'{weights_path}/user_movie_id_to_idx.joblib') train_ratings = joblib.load(f'{weights_path}/train_ratings.joblib') return { "movies": movies, "content_tfidf_matrix": content_tfidf_matrix, "title_to_index": title_to_index, "content_vectorizer": content_vectorizer, "user_profiles": user_profiles, "user_tfidf_matrix": user_tfidf_matrix, "movie_id_to_idx": movie_id_to_idx, "train_ratings": train_ratings } except FileNotFoundError as e: st.error(f"Missing file: {e.filename}") st.stop() except Exception as e: st.error(f"Error loading weights: {str(e)}") st.stop() # Content-based recommendation (real-time cosine similarity) def get_similar_movies(title, tfidf_matrix, title_to_index, movies, N=5): try: index = title_to_index[title] target_vector = tfidf_matrix[index] similarity_scores = cosine_similarity(target_vector, tfidf_matrix).flatten() similar_indices = similarity_scores.argsort()[::-1][1:N+1] similar_movies = movies.iloc[similar_indices][['title', 'genres']] similar_scores = similarity_scores[similar_indices] return list(zip(similar_movies['title'], similar_scores)) except KeyError: return None # User profile-based recommendation def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_idx, movies, train_ratings, n=5): if user_id not in user_profiles: return None user_profile = user_profiles[user_id] similarities = cosine_similarity(user_profile.reshape(1, -1), tfidf_matrix).flatten() movie_indices = np.argsort(similarities)[::-1] rated_movies = set(train_ratings[train_ratings['userId'] == user_id]['movieId'].values) top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n] return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices] # Streamlit App st.set_page_config(page_title="Movie Recommender", page_icon="🎬") st.title("🎬 Movie Recommender System") st.markdown(""" This app supports: - **Content-Based Recommendations**: Find similar movies based on genre. - **User Profile-Based Recommendations**: Personalized picks based on your ratings. """) # Load all weights weights = load_weights() # Recommendation type selector recommendation_type = st.sidebar.radio("Choose Recommendation Type", ["Content-Based", "User Profile-Based"]) if recommendation_type == "Content-Based": st.header("📽️ Content-Based Recommendations") movie_title = st.selectbox("Choose a Movie Title", [""] + sorted(weights["movies"]['title'].dropna().unique())) if movie_title: recommendations = get_similar_movies( title=movie_title, tfidf_matrix=weights["content_tfidf_matrix"], title_to_index=weights["title_to_index"], movies=weights["movies"], N=5 ) if recommendations: st.subheader(f"If you liked **{movie_title}**, you might enjoy:") for i, (movie, score) in enumerate(recommendations, 1): st.write(f"{i}. {movie} (Similarity Score: {score:.2f})") else: st.warning(f"No similar movies found for '{movie_title}'.") else: st.header("👤 User Profile-Based Recommendations") user_id = st.number_input("Enter your User ID", min_value=1, step=1, value=1) if st.button("Get Recommendations"): recommendations = get_top_n_recommendations( user_id=user_id, user_profiles=weights["user_profiles"], tfidf_matrix=weights["user_tfidf_matrix"], movie_id_to_idx=weights["movie_id_to_idx"], movies=weights["movies"], train_ratings=weights["train_ratings"], n=5 ) if recommendations: st.subheader(f"Top 5 picks for User ID {user_id}:") for i, (movie, pred_rating) in enumerate(recommendations, 1): st.write(f"{i}. {movie} (Predicted Rating: {pred_rating:.2f})") else: st.warning(f"User ID {user_id} not found or not enough ratings.")