Spaces:
Sleeping
Sleeping
| import requests | |
| import pandas as pd | |
| import spacy | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from textblob import TextBlob | |
| import streamlit as st | |
| # 🔹 Replace this with your TMDB API Key | |
| API_KEY = "bbb69cf69be036e363d9ab8996f7f4ee" | |
| BASE_URL = "https://api.themoviedb.org/3" | |
| # 🔹 TMDB Image Base URL for posters | |
| IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500" | |
| # Load Spacy English NLP model | |
| nlp = spacy.load("en_core_web_sm") | |
| # =========================== | |
| # STEP 1: FETCH MOVIE DATA | |
| # =========================== | |
| def fetch_movies(num_pages=2): | |
| """Fetch popular movies from TMDB API.""" | |
| all_movies = [] | |
| for page in range(1, num_pages + 1): | |
| url = f"{BASE_URL}/discover/movie?api_key={API_KEY}&language=en-US&sort_by=popularity.desc&page={page}" | |
| response = requests.get(url) | |
| data = response.json() | |
| if "results" in data: | |
| for movie in data["results"]: | |
| all_movies.append({ | |
| "id": movie["id"], | |
| "title": movie["title"], | |
| "overview": movie["overview"], | |
| "vote_average": movie["vote_average"], | |
| "release_date": movie["release_date"] | |
| }) | |
| return pd.DataFrame(all_movies) | |
| # =========================== | |
| # STEP 2: FETCH ADDITIONAL DETAILS (GENRES, CAST, DIRECTOR) | |
| # =========================== | |
| def fetch_genres(): | |
| """Retrieve genre names from TMDB API and return a dictionary mapping genre IDs to names.""" | |
| url = f"{BASE_URL}/genre/movie/list?api_key={API_KEY}&language=en-US" | |
| response = requests.get(url) | |
| data = response.json() | |
| return {genre["id"]: genre["name"] for genre in data["genres"]} | |
| def fetch_movie_details(movie_id): | |
| """Fetch top 3 cast members and director for a given movie.""" | |
| url = f"{BASE_URL}/movie/{movie_id}/credits?api_key={API_KEY}" | |
| response = requests.get(url) | |
| data = requests.get(url).json() | |
| # Get top 3 cast members | |
| cast = ", ".join([member["name"] for member in data.get("cast", [])[:3]]) | |
| # Get director | |
| director = next((crew["name"] for crew in data.get("crew", []) if crew["job"] == "Director"), "Unknown") | |
| return cast, director | |
| # =========================== | |
| # STEP 3: ENRICH MOVIE DATA WITH GENRES, CAST, DIRECTOR | |
| # =========================== | |
| def enhance_movie_data(movies_df): | |
| """Add genres, top cast, and director information to the movie dataset.""" | |
| genre_dict = fetch_genres() | |
| movies_df["cast"], movies_df["director"] = zip(*movies_df["id"].apply(fetch_movie_details)) | |
| return movies_df | |
| # =========================== | |
| # STEP 4: FEATURE ENGINEERING (KEYWORDS & SENTIMENT) | |
| # =========================== | |
| def extract_keywords(text, num_keywords=5): | |
| """Extract top keywords from text using TF-IDF.""" | |
| vectorizer = TfidfVectorizer(stop_words="english", max_features=50) | |
| tfidf_matrix = vectorizer.fit_transform([text]) | |
| feature_names = vectorizer.get_feature_names_out() | |
| sorted_indices = tfidf_matrix.toarray().argsort()[0][-num_keywords:] | |
| return ", ".join(feature_names[i] for i in sorted_indices) | |
| def get_sentiment(text): | |
| """Analyze sentiment (-1 to 1) from movie description using TextBlob.""" | |
| return TextBlob(text).sentiment.polarity | |
| # =========================== | |
| # STEP 5: BUILD RECOMMENDER SYSTEM (CONTENT-BASED FILTERING) | |
| # =========================== | |
| def recommend_movies(movie_title, num_recommendations=5): | |
| """Recommend similar movies based on content similarity.""" | |
| if movie_title not in movies_df["title"].values: | |
| return "Movie not found in dataset!" | |
| # Get index of the selected movie | |
| movie_index = movies_df[movies_df["title"] == movie_title].index[0] | |
| # Compute similarity scores and sort them | |
| similarity_scores = list(enumerate(cosine_sim[movie_index])) | |
| similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations + 1] | |
| # Get recommended movie titles | |
| return [movies_df.iloc[i[0]]["title"] for i in similarity_scores] | |
| # =========================== | |
| # STEP 6: FETCH MOVIE POSTER | |
| # =========================== | |
| def get_movie_poster(movie_title): | |
| """Fetch movie poster from TMDB API.""" | |
| movie = movies_df[movies_df["title"] == movie_title] | |
| if not movie.empty: | |
| movie_id = movie.iloc[0]["id"] | |
| url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={API_KEY}" | |
| response = requests.get(url).json() | |
| return IMAGE_BASE_URL + response.get("poster_path", "") | |
| return None | |
| # =========================== | |
| # STEP 7: LOAD & PROCESS MOVIE DATA | |
| # =========================== | |
| movies_df = fetch_movies(num_pages=2) # Fetch movie data | |
| movies_df = enhance_movie_data(movies_df) # Add cast, director info | |
| # Apply feature extraction | |
| movies_df["keywords"] = movies_df["overview"].apply(lambda x: extract_keywords(str(x))) | |
| movies_df["sentiment"] = movies_df["overview"].apply(lambda x: get_sentiment(str(x))) | |
| # Combine relevant text features for recommendation | |
| movies_df["combined_features"] = ( | |
| movies_df["overview"].fillna("") + " " + | |
| movies_df["keywords"].fillna("") | |
| ) | |
| # Convert text into numerical vectors using TF-IDF | |
| tfidf_vectorizer = TfidfVectorizer(stop_words="english") | |
| tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df["combined_features"]) | |
| # Compute similarity scores between movies | |
| cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) | |
| # =========================== | |
| # STEP 8: STREAMLIT APP UI | |
| # =========================== | |
| st.title("🎬 Movie Recommendation System") | |
| # Dropdown to select a movie | |
| selected_movie = st.selectbox("Select a Movie", movies_df["title"].values) | |
| # Recommend button | |
| if st.button("Recommend"): | |
| recommendations = recommend_movies(selected_movie) | |
| if isinstance(recommendations, list): | |
| st.subheader(f"Movies similar to {selected_movie}:") | |
| # Display recommended movies in a horizontal layout | |
| cols = st.columns(len(recommendations)) | |
| for i, movie in enumerate(recommendations): | |
| poster_url = get_movie_poster(movie) | |
| with cols[i]: | |
| if poster_url: | |
| st.image(poster_url, width=150) | |
| st.write(f"**{movie}**") | |
| else: | |
| st.error("No recommendations found.") | |