asiaao's picture
Update app.py
7a9e095 verified
import streamlit as st
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
import re
# Download NLTK data if necessary
nltk.download('wordnet')
nltk.download('omw-1.4')
# Function to preprocess text
def preprocess_text(text):
# Initialize stemmer and lemmatizer
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
# Remove non-alphabetic characters
text = re.sub(r'[^a-zA-Z\s]', '', text)
# Tokenize and process words
words = text.split()
processed_words = [lemmatizer.lemmatize(stemmer.stem(word.lower())) for word in words if word.lower() not in ENGLISH_STOP_WORDS]
return ' '.join(processed_words)
# Function to fetch popular movies from TMDB
def fetch_popular_movies(api_key, page=1):
url = f"https://api.themoviedb.org/3/movie/popular?api_key={api_key}&language=en-US&page={page}"
response = requests.get(url)
movies = response.json().get('results', [])
# Fetch detailed info for each movie to include genres
for movie in movies:
movie_id = movie['id']
details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}"
details_response = requests.get(details_url)
movie.update(details_response.json()) # Include genres and other details
return movies
# Function to display movie details with expander
def display_movie_details(movie):
with st.expander(f"{movie['title']}"):
genres = ', '.join(genre['name'] for genre in movie.get('genres', []))
st.write(f"**Genre**: {genres}")
st.write(f"**Description**: {movie.get('overview', 'No description available.')}")
st.write(f"**Rating**: {movie.get('vote_average', 'N/A')}")
st.write(f"**Release Date**: {movie.get('release_date', 'N/A')}")
if movie.get('poster_path'):
poster_url = f"https://image.tmdb.org/t/p/w200{movie['poster_path']}"
st.image(poster_url, width=200)
# Main application
st.write("Enter a keyword and specify a minimum rating to search for related movies:")
# Replace with your own API key
api_key = 'ba40cfd0a5fa3bcd5ff0e94c3db114f3'
# Collect keyword and minimum rating from user
user_keyword = st.text_input("Enter a keyword:")
min_rating = st.slider("Select minimum rating (0-10):", 0.0, 10.0, 5.0)
# Fetch popular movies
movies_data = fetch_popular_movies(api_key)
if user_keyword and st.button("Get Recommendations"):
# Filter movies based on the minimum rating
filtered_movies = [movie for movie in movies_data if movie.get('vote_average', 0) >= min_rating]
if not filtered_movies:
st.write("No movies found that meet the minimum rating criteria.")
else:
# Gather movie overviews and genres for analysis
movie_texts = [
preprocess_text(f"{movie['overview']} {' '.join(genre['name'] for genre in movie.get('genres', []))}")
for movie in filtered_movies if movie and movie.get('overview')
]
user_keyword_processed = preprocess_text(user_keyword)
search_documents = movie_texts + [user_keyword_processed]
# Vectorize the documents
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(search_documents)
# Calculate cosine similarity
keyword_vector = tfidf_matrix[-1]
cosine_similarities = cosine_similarity(keyword_vector, tfidf_matrix[:-1]).flatten()
# Define a threshold for similarity
similarity_threshold = 0.1
st.write("Recommended Movies:")
any_matches = False
# Show top 3 recommendations, sorted by similarity and then by rating
top_indices = np.argsort(cosine_similarities)[-10:][::-1] # Sort top 10 to get more options
sorted_top_indices = sorted(
[(idx, cosine_similarities[idx]) for idx in top_indices if cosine_similarities[idx] > similarity_threshold],
key=lambda x: (x[1], filtered_movies[x[0]]['vote_average']), reverse=True
)[:3] # Sort by similarity and rating, limit to top 3
for idx, _ in sorted_top_indices:
display_movie_details(filtered_movies[idx])
any_matches = True
if not any_matches:
st.write("No movies found that match the keyword closely enough.")
st.write("Fallback Recommendations:")
top_rated_movies = sorted(filtered_movies, key=lambda movie: movie.get('vote_average', 0), reverse=True)[:3]
for movie in top_rated_movies: # Display top 3 highest-rated movies as a fallback
display_movie_details(movie)