Janesh1's picture
Update app.py
543d0f0 verified
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import joblib
@st.cache_data
def load_weights():
try:
weights_path = 'weights'
content_tfidf_matrix = joblib.load(f'{weights_path}/content_tfidf_matrix.joblib')
title_to_index = joblib.load(f'{weights_path}/content_title_to_index.joblib')
content_vectorizer = joblib.load(f'{weights_path}/content_vectorizer.joblib')
movies = joblib.load(f'{weights_path}/movies_data.joblib')
user_profiles = joblib.load(f'{weights_path}/user_profiles.joblib')
user_tfidf_matrix = joblib.load(f'{weights_path}/user_tfidf_matrix.joblib')
movie_id_to_idx = joblib.load(f'{weights_path}/user_movie_id_to_idx.joblib')
train_ratings = joblib.load(f'{weights_path}/train_ratings.joblib')
return {
"movies": movies,
"content_tfidf_matrix": content_tfidf_matrix,
"title_to_index": title_to_index,
"content_vectorizer": content_vectorizer,
"user_profiles": user_profiles,
"user_tfidf_matrix": user_tfidf_matrix,
"movie_id_to_idx": movie_id_to_idx,
"train_ratings": train_ratings
}
except FileNotFoundError as e:
st.error(f"Missing file: {e.filename}")
st.stop()
except Exception as e:
st.error(f"Error loading weights: {str(e)}")
st.stop()
# Content-based recommendation (real-time cosine similarity)
def get_similar_movies(title, tfidf_matrix, title_to_index, movies, N=5):
try:
index = title_to_index[title]
target_vector = tfidf_matrix[index]
similarity_scores = cosine_similarity(target_vector, tfidf_matrix).flatten()
similar_indices = similarity_scores.argsort()[::-1][1:N+1]
similar_movies = movies.iloc[similar_indices][['title', 'genres']]
similar_scores = similarity_scores[similar_indices]
return list(zip(similar_movies['title'], similar_scores))
except KeyError:
return None
# User profile-based recommendation
def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_idx, movies, train_ratings, n=5):
if user_id not in user_profiles:
return None
user_profile = user_profiles[user_id]
similarities = cosine_similarity(user_profile.reshape(1, -1), tfidf_matrix).flatten()
movie_indices = np.argsort(similarities)[::-1]
rated_movies = set(train_ratings[train_ratings['userId'] == user_id]['movieId'].values)
top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n]
return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices]
# Streamlit App
st.set_page_config(page_title="Movie Recommender", page_icon="🎬")
st.title("🎬 Movie Recommender System")
st.markdown("""
This app supports:
- **Content-Based Recommendations**: Find similar movies based on genre.
- **User Profile-Based Recommendations**: Personalized picks based on your ratings.
""")
# Load all weights
weights = load_weights()
# Recommendation type selector
recommendation_type = st.sidebar.radio("Choose Recommendation Type", ["Content-Based", "User Profile-Based"])
if recommendation_type == "Content-Based":
st.header("📽️ Content-Based Recommendations")
movie_title = st.selectbox("Choose a Movie Title", [""] + sorted(weights["movies"]['title'].dropna().unique()))
if movie_title:
recommendations = get_similar_movies(
title=movie_title,
tfidf_matrix=weights["content_tfidf_matrix"],
title_to_index=weights["title_to_index"],
movies=weights["movies"],
N=5
)
if recommendations:
st.subheader(f"If you liked **{movie_title}**, you might enjoy:")
for i, (movie, score) in enumerate(recommendations, 1):
st.write(f"{i}. {movie} (Similarity Score: {score:.2f})")
else:
st.warning(f"No similar movies found for '{movie_title}'.")
else:
st.header("👤 User Profile-Based Recommendations")
user_id = st.number_input("Enter your User ID", min_value=1, step=1, value=1)
if st.button("Get Recommendations"):
recommendations = get_top_n_recommendations(
user_id=user_id,
user_profiles=weights["user_profiles"],
tfidf_matrix=weights["user_tfidf_matrix"],
movie_id_to_idx=weights["movie_id_to_idx"],
movies=weights["movies"],
train_ratings=weights["train_ratings"],
n=5
)
if recommendations:
st.subheader(f"Top 5 picks for User ID {user_id}:")
for i, (movie, pred_rating) in enumerate(recommendations, 1):
st.write(f"{i}. {movie} (Predicted Rating: {pred_rating:.2f})")
else:
st.warning(f"User ID {user_id} not found or not enough ratings.")