import streamlit as st from huggingface_hub import hf_hub_download import numpy as np import pandas as pd from annoy import AnnoyIndex PLACEHOLDER_POSTER = "https://upload.wikimedia.org/wikipedia/commons/3/3f/Placeholder_view_vector.svg" movies_path = hf_hub_download(repo_id="Smiley0707/Movie-recommendation", filename="movies_df.csv") feature_path = hf_hub_download(repo_id="Smiley0707/Movie-recommendation", filename="feature_array.npz") index_path = hf_hub_download(repo_id="Smiley0707/Movie-recommendation", filename="my_index.ann") # --- Caching Data Loading --- @st.cache_data def load_movie_data(): """Loads the movie dataframe and performs initial cleaning.""" movies_df = pd.read_csv(movies_path) movies_df['title'] = movies_df['title'].fillna('') movies_df['poster_path'] = movies_df['poster_path'].fillna('') return movies_df @st.cache_data def load_feature_array(): """Loads the feature array from the .npz file.""" return np.load(feature_path)['arr_0'] @st.cache_resource def load_annoy_index(_feature_array): """Loads the Annoy index from file.""" f = _feature_array.shape[1] annoy_index = AnnoyIndex(f, 'angular') # In a real app, you might need to download this file first if not present annoy_index.load(index_path) return annoy_index # Main UI st.set_page_config(page_title="🎬 Movie Recommender", layout="wide") st.title("🎬 Movie Recommendation System") # Load data using cached functions new_movies = load_movie_data() feature_array = load_feature_array() annoy_index = load_annoy_index(feature_array) def get_poster(idx): url = new_movies.iloc[idx]['poster_path'] if not url or str(url).strip() == "": return PLACEHOLDER_POSTER print(url) url = f'https://image.tmdb.org/t/p/w342{url}' return url # --- App Layout --- query = st.text_input("Search for a movie:") if query: mask = new_movies['title'].str.lower().str.contains(query.lower(), na=False) results = new_movies[mask] if results.empty: st.info("No movies found.") else: st.subheader("Search Results:") cols = st.columns(5) movie_indices = list(results.index[:5]) for i, idx in enumerate(movie_indices): with cols[i]: st.markdown( f"""
poster
{new_movies.iloc[idx]['title']}
""", unsafe_allow_html=True ) recommend_for_idx = movie_indices[0] st.markdown(f"### Recommendations for {new_movies.iloc[recommend_for_idx]['title']}:") recs = annoy_index.get_nns_by_item(recommend_for_idx, 6)[1:] rec_cols = st.columns(min(len(recs), 5)) for i, r in enumerate(recs): with rec_cols[i]: st.markdown( f"""
poster
{new_movies.iloc[r]['title']}
""", unsafe_allow_html=True )