akashraut's picture
Create app.py
402108a verified
import streamlit as st
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# -----------------------
# 1. Load and prepare data
# -----------------------
@st.cache_data
def load_data():
# Replace with your file path or relative path
df1 = pd.read_csv("tmdb_5000_credits.csv")
df2 = pd.read_csv("tmdb_5000_movies.csv")
df = pd.merge(df1, df2, left_on="movie_id", right_on="id")
df["overview"] = df["overview"].fillna(" ")
return df, df2
df, df2 = load_data()
# -----------------------
# 2. Build TF-IDF and cosine similarity
# -----------------------
@st.cache_resource
def build_model(df):
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["overview"])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
return cosine_sim
cosine_sim = build_model(df)
# -----------------------
# 3. Build reverse index mapping (title -> index)
# -----------------------
indices = pd.Series(df2.index, index=df2["title"]).drop_duplicates()
# -----------------------
# 4. Recommendation function
# -----------------------
def get_recommendations(title, cosine_sim=cosine_sim):
if title not in indices:
return []
idx = indices[title]
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:11] # skip the movie itself
movie_indices = [i[0] for i in sim_scores]
return df2["title"].iloc[movie_indices]
# -----------------------
# 5. Streamlit App UI
# -----------------------
st.title("🎬 Movie Recommendation Engine")
st.markdown("Get recommendations based on similar movie plots!")
movie_list = df2["title"].values
selected_movie = st.selectbox("Choose a movie to get recommendations", movie_list)
if st.button("Recommend"):
recommendations = get_recommendations(selected_movie)
if len(recommendations) == 0:
st.warning("Movie not found in the database.")
else:
st.success("You might also like:")
for rec in recommendations:
st.write(f"- {rec}")