import streamlit as st import numpy as np import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # ----------------------- # 1. Load and prepare data # ----------------------- @st.cache_data def load_data(): # Replace with your file path or relative path df1 = pd.read_csv("tmdb_5000_credits.csv") df2 = pd.read_csv("tmdb_5000_movies.csv") df = pd.merge(df1, df2, left_on="movie_id", right_on="id") df["overview"] = df["overview"].fillna(" ") return df, df2 df, df2 = load_data() # ----------------------- # 2. Build TF-IDF and cosine similarity # ----------------------- @st.cache_resource def build_model(df): tfidf = TfidfVectorizer(stop_words="english") tfidf_matrix = tfidf.fit_transform(df["overview"]) cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) return cosine_sim cosine_sim = build_model(df) # ----------------------- # 3. Build reverse index mapping (title -> index) # ----------------------- indices = pd.Series(df2.index, index=df2["title"]).drop_duplicates() # ----------------------- # 4. Recommendation function # ----------------------- def get_recommendations(title, cosine_sim=cosine_sim): if title not in indices: return [] idx = indices[title] sim_scores = list(enumerate(cosine_sim[idx])) sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) sim_scores = sim_scores[1:11] # skip the movie itself movie_indices = [i[0] for i in sim_scores] return df2["title"].iloc[movie_indices] # ----------------------- # 5. Streamlit App UI # ----------------------- st.title("🎬 Movie Recommendation Engine") st.markdown("Get recommendations based on similar movie plots!") movie_list = df2["title"].values selected_movie = st.selectbox("Choose a movie to get recommendations", movie_list) if st.button("Recommend"): recommendations = get_recommendations(selected_movie) if len(recommendations) == 0: st.warning("Movie not found in the database.") else: st.success("You might also like:") for rec in recommendations: st.write(f"- {rec}")