Spaces:
Sleeping
Sleeping
File size: 6,382 Bytes
ff74c4e 279fe6d ff74c4e 0aa905c ff74c4e 0aa905c ff74c4e 024d3eb ff74c4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
import requests
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from textblob import TextBlob
import streamlit as st
# 🔹 Replace this with your TMDB API Key
API_KEY = "bbb69cf69be036e363d9ab8996f7f4ee"
BASE_URL = "https://api.themoviedb.org/3"
# 🔹 TMDB Image Base URL for posters
IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500"
# Load Spacy English NLP model
nlp = spacy.load("en_core_web_sm")
# ===========================
# STEP 1: FETCH MOVIE DATA
# ===========================
def fetch_movies(num_pages=2):
"""Fetch popular movies from TMDB API."""
all_movies = []
for page in range(1, num_pages + 1):
url = f"{BASE_URL}/discover/movie?api_key={API_KEY}&language=en-US&sort_by=popularity.desc&page={page}"
response = requests.get(url)
data = response.json()
if "results" in data:
for movie in data["results"]:
all_movies.append({
"id": movie["id"],
"title": movie["title"],
"overview": movie["overview"],
"vote_average": movie["vote_average"],
"release_date": movie["release_date"]
})
return pd.DataFrame(all_movies)
# ===========================
# STEP 2: FETCH ADDITIONAL DETAILS (GENRES, CAST, DIRECTOR)
# ===========================
def fetch_genres():
"""Retrieve genre names from TMDB API and return a dictionary mapping genre IDs to names."""
url = f"{BASE_URL}/genre/movie/list?api_key={API_KEY}&language=en-US"
response = requests.get(url)
data = response.json()
return {genre["id"]: genre["name"] for genre in data["genres"]}
def fetch_movie_details(movie_id):
"""Fetch top 3 cast members and director for a given movie."""
url = f"{BASE_URL}/movie/{movie_id}/credits?api_key={API_KEY}"
response = requests.get(url)
data = requests.get(url).json()
# Get top 3 cast members
cast = ", ".join([member["name"] for member in data.get("cast", [])[:3]])
# Get director
director = next((crew["name"] for crew in data.get("crew", []) if crew["job"] == "Director"), "Unknown")
return cast, director
# ===========================
# STEP 3: ENRICH MOVIE DATA WITH GENRES, CAST, DIRECTOR
# ===========================
def enhance_movie_data(movies_df):
"""Add genres, top cast, and director information to the movie dataset."""
genre_dict = fetch_genres()
movies_df["cast"], movies_df["director"] = zip(*movies_df["id"].apply(fetch_movie_details))
return movies_df
# ===========================
# STEP 4: FEATURE ENGINEERING (KEYWORDS & SENTIMENT)
# ===========================
def extract_keywords(text, num_keywords=5):
"""Extract top keywords from text using TF-IDF."""
vectorizer = TfidfVectorizer(stop_words="english", max_features=50)
tfidf_matrix = vectorizer.fit_transform([text])
feature_names = vectorizer.get_feature_names_out()
sorted_indices = tfidf_matrix.toarray().argsort()[0][-num_keywords:]
return ", ".join(feature_names[i] for i in sorted_indices)
def get_sentiment(text):
"""Analyze sentiment (-1 to 1) from movie description using TextBlob."""
return TextBlob(text).sentiment.polarity
# ===========================
# STEP 5: BUILD RECOMMENDER SYSTEM (CONTENT-BASED FILTERING)
# ===========================
def recommend_movies(movie_title, num_recommendations=5):
"""Recommend similar movies based on content similarity."""
if movie_title not in movies_df["title"].values:
return "Movie not found in dataset!"
# Get index of the selected movie
movie_index = movies_df[movies_df["title"] == movie_title].index[0]
# Compute similarity scores and sort them
similarity_scores = list(enumerate(cosine_sim[movie_index]))
similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations + 1]
# Get recommended movie titles
return [movies_df.iloc[i[0]]["title"] for i in similarity_scores]
# ===========================
# STEP 6: FETCH MOVIE POSTER
# ===========================
def get_movie_poster(movie_title):
"""Fetch movie poster from TMDB API."""
movie = movies_df[movies_df["title"] == movie_title]
if not movie.empty:
movie_id = movie.iloc[0]["id"]
url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={API_KEY}"
response = requests.get(url).json()
return IMAGE_BASE_URL + response.get("poster_path", "")
return None
# ===========================
# STEP 7: LOAD & PROCESS MOVIE DATA
# ===========================
movies_df = fetch_movies(num_pages=2) # Fetch movie data
movies_df = enhance_movie_data(movies_df) # Add cast, director info
# Apply feature extraction
movies_df["keywords"] = movies_df["overview"].apply(lambda x: extract_keywords(str(x)))
movies_df["sentiment"] = movies_df["overview"].apply(lambda x: get_sentiment(str(x)))
# Combine relevant text features for recommendation
movies_df["combined_features"] = (
movies_df["overview"].fillna("") + " " +
movies_df["keywords"].fillna("")
)
# Convert text into numerical vectors using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df["combined_features"])
# Compute similarity scores between movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# ===========================
# STEP 8: STREAMLIT APP UI
# ===========================
st.title("🎬 Movie Recommendation System")
# Dropdown to select a movie
selected_movie = st.selectbox("Select a Movie", movies_df["title"].values)
# Recommend button
if st.button("Recommend"):
recommendations = recommend_movies(selected_movie)
if isinstance(recommendations, list):
st.subheader(f"Movies similar to {selected_movie}:")
# Display recommended movies in a horizontal layout
cols = st.columns(len(recommendations))
for i, movie in enumerate(recommendations):
poster_url = get_movie_poster(movie)
with cols[i]:
if poster_url:
st.image(poster_url, width=150)
st.write(f"**{movie}**")
else:
st.error("No recommendations found.")
|