Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # ----------------------- | |
| # 1. Load and prepare data | |
| # ----------------------- | |
| def load_data(): | |
| # Replace with your file path or relative path | |
| df1 = pd.read_csv("tmdb_5000_credits.csv") | |
| df2 = pd.read_csv("tmdb_5000_movies.csv") | |
| df = pd.merge(df1, df2, left_on="movie_id", right_on="id") | |
| df["overview"] = df["overview"].fillna(" ") | |
| return df, df2 | |
| df, df2 = load_data() | |
| # ----------------------- | |
| # 2. Build TF-IDF and cosine similarity | |
| # ----------------------- | |
| def build_model(df): | |
| tfidf = TfidfVectorizer(stop_words="english") | |
| tfidf_matrix = tfidf.fit_transform(df["overview"]) | |
| cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) | |
| return cosine_sim | |
| cosine_sim = build_model(df) | |
| # ----------------------- | |
| # 3. Build reverse index mapping (title -> index) | |
| # ----------------------- | |
| indices = pd.Series(df2.index, index=df2["title"]).drop_duplicates() | |
| # ----------------------- | |
| # 4. Recommendation function | |
| # ----------------------- | |
| def get_recommendations(title, cosine_sim=cosine_sim): | |
| if title not in indices: | |
| return [] | |
| idx = indices[title] | |
| sim_scores = list(enumerate(cosine_sim[idx])) | |
| sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) | |
| sim_scores = sim_scores[1:11] # skip the movie itself | |
| movie_indices = [i[0] for i in sim_scores] | |
| return df2["title"].iloc[movie_indices] | |
| # ----------------------- | |
| # 5. Streamlit App UI | |
| # ----------------------- | |
| st.title("🎬 Movie Recommendation Engine") | |
| st.markdown("Get recommendations based on similar movie plots!") | |
| movie_list = df2["title"].values | |
| selected_movie = st.selectbox("Choose a movie to get recommendations", movie_list) | |
| if st.button("Recommend"): | |
| recommendations = get_recommendations(selected_movie) | |
| if len(recommendations) == 0: | |
| st.warning("Movie not found in the database.") | |
| else: | |
| st.success("You might also like:") | |
| for rec in recommendations: | |
| st.write(f"- {rec}") | |