LearnStreamlit / app.py
hfariborzi's picture
Update app.py
ff74c4e verified
import requests
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from textblob import TextBlob
import streamlit as st
# 🔹 Replace this with your TMDB API Key
API_KEY = "bbb69cf69be036e363d9ab8996f7f4ee"
BASE_URL = "https://api.themoviedb.org/3"
# 🔹 TMDB Image Base URL for posters
IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500"
# Load Spacy English NLP model
nlp = spacy.load("en_core_web_sm")
# ===========================
# STEP 1: FETCH MOVIE DATA
# ===========================
def fetch_movies(num_pages=2):
"""Fetch popular movies from TMDB API."""
all_movies = []
for page in range(1, num_pages + 1):
url = f"{BASE_URL}/discover/movie?api_key={API_KEY}&language=en-US&sort_by=popularity.desc&page={page}"
response = requests.get(url)
data = response.json()
if "results" in data:
for movie in data["results"]:
all_movies.append({
"id": movie["id"],
"title": movie["title"],
"overview": movie["overview"],
"vote_average": movie["vote_average"],
"release_date": movie["release_date"]
})
return pd.DataFrame(all_movies)
# ===========================
# STEP 2: FETCH ADDITIONAL DETAILS (GENRES, CAST, DIRECTOR)
# ===========================
def fetch_genres():
"""Retrieve genre names from TMDB API and return a dictionary mapping genre IDs to names."""
url = f"{BASE_URL}/genre/movie/list?api_key={API_KEY}&language=en-US"
response = requests.get(url)
data = response.json()
return {genre["id"]: genre["name"] for genre in data["genres"]}
def fetch_movie_details(movie_id):
"""Fetch top 3 cast members and director for a given movie."""
url = f"{BASE_URL}/movie/{movie_id}/credits?api_key={API_KEY}"
response = requests.get(url)
data = requests.get(url).json()
# Get top 3 cast members
cast = ", ".join([member["name"] for member in data.get("cast", [])[:3]])
# Get director
director = next((crew["name"] for crew in data.get("crew", []) if crew["job"] == "Director"), "Unknown")
return cast, director
# ===========================
# STEP 3: ENRICH MOVIE DATA WITH GENRES, CAST, DIRECTOR
# ===========================
def enhance_movie_data(movies_df):
"""Add genres, top cast, and director information to the movie dataset."""
genre_dict = fetch_genres()
movies_df["cast"], movies_df["director"] = zip(*movies_df["id"].apply(fetch_movie_details))
return movies_df
# ===========================
# STEP 4: FEATURE ENGINEERING (KEYWORDS & SENTIMENT)
# ===========================
def extract_keywords(text, num_keywords=5):
"""Extract top keywords from text using TF-IDF."""
vectorizer = TfidfVectorizer(stop_words="english", max_features=50)
tfidf_matrix = vectorizer.fit_transform([text])
feature_names = vectorizer.get_feature_names_out()
sorted_indices = tfidf_matrix.toarray().argsort()[0][-num_keywords:]
return ", ".join(feature_names[i] for i in sorted_indices)
def get_sentiment(text):
"""Analyze sentiment (-1 to 1) from movie description using TextBlob."""
return TextBlob(text).sentiment.polarity
# ===========================
# STEP 5: BUILD RECOMMENDER SYSTEM (CONTENT-BASED FILTERING)
# ===========================
def recommend_movies(movie_title, num_recommendations=5):
"""Recommend similar movies based on content similarity."""
if movie_title not in movies_df["title"].values:
return "Movie not found in dataset!"
# Get index of the selected movie
movie_index = movies_df[movies_df["title"] == movie_title].index[0]
# Compute similarity scores and sort them
similarity_scores = list(enumerate(cosine_sim[movie_index]))
similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations + 1]
# Get recommended movie titles
return [movies_df.iloc[i[0]]["title"] for i in similarity_scores]
# ===========================
# STEP 6: FETCH MOVIE POSTER
# ===========================
def get_movie_poster(movie_title):
"""Fetch movie poster from TMDB API."""
movie = movies_df[movies_df["title"] == movie_title]
if not movie.empty:
movie_id = movie.iloc[0]["id"]
url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={API_KEY}"
response = requests.get(url).json()
return IMAGE_BASE_URL + response.get("poster_path", "")
return None
# ===========================
# STEP 7: LOAD & PROCESS MOVIE DATA
# ===========================
movies_df = fetch_movies(num_pages=2) # Fetch movie data
movies_df = enhance_movie_data(movies_df) # Add cast, director info
# Apply feature extraction
movies_df["keywords"] = movies_df["overview"].apply(lambda x: extract_keywords(str(x)))
movies_df["sentiment"] = movies_df["overview"].apply(lambda x: get_sentiment(str(x)))
# Combine relevant text features for recommendation
movies_df["combined_features"] = (
movies_df["overview"].fillna("") + " " +
movies_df["keywords"].fillna("")
)
# Convert text into numerical vectors using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df["combined_features"])
# Compute similarity scores between movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# ===========================
# STEP 8: STREAMLIT APP UI
# ===========================
st.title("🎬 Movie Recommendation System")
# Dropdown to select a movie
selected_movie = st.selectbox("Select a Movie", movies_df["title"].values)
# Recommend button
if st.button("Recommend"):
recommendations = recommend_movies(selected_movie)
if isinstance(recommendations, list):
st.subheader(f"Movies similar to {selected_movie}:")
# Display recommended movies in a horizontal layout
cols = st.columns(len(recommendations))
for i, movie in enumerate(recommendations):
poster_url = get_movie_poster(movie)
with cols[i]:
if poster_url:
st.image(poster_url, width=150)
st.write(f"**{movie}**")
else:
st.error("No recommendations found.")