File size: 2,189 Bytes
402108a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# -----------------------
# 1. Load and prepare data
# -----------------------
@st.cache_data
def load_data():
    # Replace with your file path or relative path
    df1 = pd.read_csv("tmdb_5000_credits.csv")
    df2 = pd.read_csv("tmdb_5000_movies.csv")
    df = pd.merge(df1, df2, left_on="movie_id", right_on="id")
    df["overview"] = df["overview"].fillna(" ")
    return df, df2

df, df2 = load_data()

# -----------------------
# 2. Build TF-IDF and cosine similarity
# -----------------------
@st.cache_resource
def build_model(df):
    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(df["overview"])
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    return cosine_sim

cosine_sim = build_model(df)

# -----------------------
# 3. Build reverse index mapping (title -> index)
# -----------------------
indices = pd.Series(df2.index, index=df2["title"]).drop_duplicates()

# -----------------------
# 4. Recommendation function
# -----------------------
def get_recommendations(title, cosine_sim=cosine_sim):
    if title not in indices:
        return []
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # skip the movie itself
    movie_indices = [i[0] for i in sim_scores]
    return df2["title"].iloc[movie_indices]

# -----------------------
# 5. Streamlit App UI
# -----------------------
st.title("🎬 Movie Recommendation Engine")
st.markdown("Get recommendations based on similar movie plots!")

movie_list = df2["title"].values
selected_movie = st.selectbox("Choose a movie to get recommendations", movie_list)

if st.button("Recommend"):
    recommendations = get_recommendations(selected_movie)
    if len(recommendations) == 0:
        st.warning("Movie not found in the database.")
    else:
        st.success("You might also like:")
        for rec in recommendations:
            st.write(f"- {rec}")