File size: 4,989 Bytes
9d08616
 
 
 
005627d
9d08616
 
227ea11
005627d
6696cc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543d0f0
6696cc6
005627d
543d0f0
005627d
9d08616
543d0f0
 
9d08616
 
543d0f0
 
9d08616
6696cc6
9d08616
6696cc6
9d08616
 
 
6696cc6
9d08616
 
 
 
 
 
 
 
 
 
543d0f0
6696cc6
 
 
 
543d0f0
 
 
6696cc6
9d08616
6696cc6
 
9d08616
543d0f0
6696cc6
9d08616
 
6696cc6
 
9d08616
 
6696cc6
 
543d0f0
6696cc6
 
 
 
9d08616
543d0f0
9d08616
 
 
543d0f0
9d08616
 
6696cc6
 
9d08616
 
6696cc6
 
 
 
 
 
 
 
 
9d08616
543d0f0
 
 
9d08616
543d0f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import joblib

@st.cache_data
def load_weights():
    try:
        weights_path = 'weights'
        content_tfidf_matrix = joblib.load(f'{weights_path}/content_tfidf_matrix.joblib')
        title_to_index = joblib.load(f'{weights_path}/content_title_to_index.joblib')
        content_vectorizer = joblib.load(f'{weights_path}/content_vectorizer.joblib')
        movies = joblib.load(f'{weights_path}/movies_data.joblib')
        user_profiles = joblib.load(f'{weights_path}/user_profiles.joblib')
        user_tfidf_matrix = joblib.load(f'{weights_path}/user_tfidf_matrix.joblib')
        movie_id_to_idx = joblib.load(f'{weights_path}/user_movie_id_to_idx.joblib')
        train_ratings = joblib.load(f'{weights_path}/train_ratings.joblib')
        return {
            "movies": movies,
            "content_tfidf_matrix": content_tfidf_matrix,
            "title_to_index": title_to_index,
            "content_vectorizer": content_vectorizer,
            "user_profiles": user_profiles,
            "user_tfidf_matrix": user_tfidf_matrix,
            "movie_id_to_idx": movie_id_to_idx,
            "train_ratings": train_ratings
        }
    except FileNotFoundError as e:
        st.error(f"Missing file: {e.filename}")
        st.stop()
    except Exception as e:
        st.error(f"Error loading weights: {str(e)}")
        st.stop()

# Content-based recommendation (real-time cosine similarity)
def get_similar_movies(title, tfidf_matrix, title_to_index, movies, N=5):
    try:
        index = title_to_index[title]
        target_vector = tfidf_matrix[index]
        similarity_scores = cosine_similarity(target_vector, tfidf_matrix).flatten()
        similar_indices = similarity_scores.argsort()[::-1][1:N+1]
        similar_movies = movies.iloc[similar_indices][['title', 'genres']]
        similar_scores = similarity_scores[similar_indices]
        return list(zip(similar_movies['title'], similar_scores))
    except KeyError:
        return None

# User profile-based recommendation
def get_top_n_recommendations(user_id, user_profiles, tfidf_matrix, movie_id_to_idx, movies, train_ratings, n=5):
    if user_id not in user_profiles:
        return None
    user_profile = user_profiles[user_id]
    similarities = cosine_similarity(user_profile.reshape(1, -1), tfidf_matrix).flatten()
    movie_indices = np.argsort(similarities)[::-1]
    rated_movies = set(train_ratings[train_ratings['userId'] == user_id]['movieId'].values)
    top_n_indices = [idx for idx in movie_indices if movies['id'].iloc[idx] not in rated_movies][:n]
    return [(movies['title'].iloc[idx], 1 + 4 * similarities[idx]) for idx in top_n_indices]

# Streamlit App
st.set_page_config(page_title="Movie Recommender", page_icon="🎬")
st.title("🎬 Movie Recommender System")

st.markdown("""
This app supports:
- **Content-Based Recommendations**: Find similar movies based on genre.
- **User Profile-Based Recommendations**: Personalized picks based on your ratings.
""")

# Load all weights
weights = load_weights()

# Recommendation type selector
recommendation_type = st.sidebar.radio("Choose Recommendation Type", ["Content-Based", "User Profile-Based"])

if recommendation_type == "Content-Based":
    st.header("📽️ Content-Based Recommendations")
    movie_title = st.selectbox("Choose a Movie Title", [""] + sorted(weights["movies"]['title'].dropna().unique()))
    
    if movie_title:
        recommendations = get_similar_movies(
            title=movie_title,
            tfidf_matrix=weights["content_tfidf_matrix"],
            title_to_index=weights["title_to_index"],
            movies=weights["movies"],
            N=5
        )
        if recommendations:
            st.subheader(f"If you liked **{movie_title}**, you might enjoy:")
            for i, (movie, score) in enumerate(recommendations, 1):
                st.write(f"{i}. {movie} (Similarity Score: {score:.2f})")
        else:
            st.warning(f"No similar movies found for '{movie_title}'.")

else:
    st.header("👤 User Profile-Based Recommendations")
    user_id = st.number_input("Enter your User ID", min_value=1, step=1, value=1)
    
    if st.button("Get Recommendations"):
        recommendations = get_top_n_recommendations(
            user_id=user_id,
            user_profiles=weights["user_profiles"],
            tfidf_matrix=weights["user_tfidf_matrix"],
            movie_id_to_idx=weights["movie_id_to_idx"],
            movies=weights["movies"],
            train_ratings=weights["train_ratings"],
            n=5
        )
        if recommendations:
            st.subheader(f"Top 5 picks for User ID {user_id}:")
            for i, (movie, pred_rating) in enumerate(recommendations, 1):
                st.write(f"{i}. {movie} (Predicted Rating: {pred_rating:.2f})")
        else:
            st.warning(f"User ID {user_id} not found or not enough ratings.")