Spaces:
Build error
Build error
| import streamlit as st | |
| import requests | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import numpy as np | |
| import nltk | |
| from nltk.stem import PorterStemmer, WordNetLemmatizer | |
| from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS | |
| import re | |
| # Download NLTK data if necessary | |
| nltk.download('wordnet') | |
| nltk.download('omw-1.4') | |
| # Function to preprocess text | |
| def preprocess_text(text): | |
| # Initialize stemmer and lemmatizer | |
| stemmer = PorterStemmer() | |
| lemmatizer = WordNetLemmatizer() | |
| # Remove non-alphabetic characters | |
| text = re.sub(r'[^a-zA-Z\s]', '', text) | |
| # Tokenize and process words | |
| words = text.split() | |
| processed_words = [lemmatizer.lemmatize(stemmer.stem(word.lower())) for word in words if word.lower() not in ENGLISH_STOP_WORDS] | |
| return ' '.join(processed_words) | |
| # Function to fetch popular movies from TMDB | |
| def fetch_popular_movies(api_key, page=1): | |
| url = f"https://api.themoviedb.org/3/movie/popular?api_key={api_key}&language=en-US&page={page}" | |
| response = requests.get(url) | |
| movies = response.json().get('results', []) | |
| # Fetch detailed info for each movie to include genres | |
| for movie in movies: | |
| movie_id = movie['id'] | |
| details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}" | |
| details_response = requests.get(details_url) | |
| movie.update(details_response.json()) # Include genres and other details | |
| return movies | |
| # Function to display movie details with expander | |
| def display_movie_details(movie): | |
| with st.expander(f"{movie['title']}"): | |
| genres = ', '.join(genre['name'] for genre in movie.get('genres', [])) | |
| st.write(f"**Genre**: {genres}") | |
| st.write(f"**Description**: {movie.get('overview', 'No description available.')}") | |
| st.write(f"**Rating**: {movie.get('vote_average', 'N/A')}") | |
| st.write(f"**Release Date**: {movie.get('release_date', 'N/A')}") | |
| if movie.get('poster_path'): | |
| poster_url = f"https://image.tmdb.org/t/p/w200{movie['poster_path']}" | |
| st.image(poster_url, width=200) | |
| # Main application | |
| st.write("Enter a keyword and specify a minimum rating to search for related movies:") | |
| # Replace with your own API key | |
| api_key = 'ba40cfd0a5fa3bcd5ff0e94c3db114f3' | |
| # Collect keyword and minimum rating from user | |
| user_keyword = st.text_input("Enter a keyword:") | |
| min_rating = st.slider("Select minimum rating (0-10):", 0.0, 10.0, 5.0) | |
| # Fetch popular movies | |
| movies_data = fetch_popular_movies(api_key) | |
| if user_keyword and st.button("Get Recommendations"): | |
| # Filter movies based on the minimum rating | |
| filtered_movies = [movie for movie in movies_data if movie.get('vote_average', 0) >= min_rating] | |
| if not filtered_movies: | |
| st.write("No movies found that meet the minimum rating criteria.") | |
| else: | |
| # Gather movie overviews and genres for analysis | |
| movie_texts = [ | |
| preprocess_text(f"{movie['overview']} {' '.join(genre['name'] for genre in movie.get('genres', []))}") | |
| for movie in filtered_movies if movie and movie.get('overview') | |
| ] | |
| user_keyword_processed = preprocess_text(user_keyword) | |
| search_documents = movie_texts + [user_keyword_processed] | |
| # Vectorize the documents | |
| tfidf_vectorizer = TfidfVectorizer(stop_words='english') | |
| tfidf_matrix = tfidf_vectorizer.fit_transform(search_documents) | |
| # Calculate cosine similarity | |
| keyword_vector = tfidf_matrix[-1] | |
| cosine_similarities = cosine_similarity(keyword_vector, tfidf_matrix[:-1]).flatten() | |
| # Define a threshold for similarity | |
| similarity_threshold = 0.1 | |
| st.write("Recommended Movies:") | |
| any_matches = False | |
| # Show top 3 recommendations, sorted by similarity and then by rating | |
| top_indices = np.argsort(cosine_similarities)[-10:][::-1] # Sort top 10 to get more options | |
| sorted_top_indices = sorted( | |
| [(idx, cosine_similarities[idx]) for idx in top_indices if cosine_similarities[idx] > similarity_threshold], | |
| key=lambda x: (x[1], filtered_movies[x[0]]['vote_average']), reverse=True | |
| )[:3] # Sort by similarity and rating, limit to top 3 | |
| for idx, _ in sorted_top_indices: | |
| display_movie_details(filtered_movies[idx]) | |
| any_matches = True | |
| if not any_matches: | |
| st.write("No movies found that match the keyword closely enough.") | |
| st.write("Fallback Recommendations:") | |
| top_rated_movies = sorted(filtered_movies, key=lambda movie: movie.get('vote_average', 0), reverse=True)[:3] | |
| for movie in top_rated_movies: # Display top 3 highest-rated movies as a fallback | |
| display_movie_details(movie) | |