File size: 3,459 Bytes
9c1217e
 
3bef182
 
9c1217e
3bef182
9c1217e
3bef182
9c1217e
 
 
3bef182
b17635f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bef182
9c1217e
 
 
3bef182
b17635f
 
 
 
3bef182
9c1217e
 
 
 
59c01b6
cb5bab3
9c1217e
3bef182
b17635f
 
9c1217e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b17635f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import streamlit as st
from huggingface_hub import hf_hub_download
import numpy as np
import pandas as pd
from annoy import AnnoyIndex

PLACEHOLDER_POSTER = "https://upload.wikimedia.org/wikipedia/commons/3/3f/Placeholder_view_vector.svg"

movies_path = hf_hub_download(repo_id="Smiley0707/Movie-recommendation", filename="movies_df.csv")
feature_path = hf_hub_download(repo_id="Smiley0707/Movie-recommendation", filename="feature_array.npz")
index_path = hf_hub_download(repo_id="Smiley0707/Movie-recommendation", filename="my_index.ann")

# --- Caching Data Loading ---
@st.cache_data
def load_movie_data():
    """Loads the movie dataframe and performs initial cleaning."""
    movies_df = pd.read_csv(movies_path)
    movies_df['title'] = movies_df['title'].fillna('')
    movies_df['poster_path'] = movies_df['poster_path'].fillna('')
    return movies_df

@st.cache_data
def load_feature_array():
    """Loads the feature array from the .npz file."""
    return np.load(feature_path)['arr_0']

@st.cache_resource
def load_annoy_index(_feature_array):
    """Loads the Annoy index from file."""
    f = _feature_array.shape[1]
    annoy_index = AnnoyIndex(f, 'angular')
    # In a real app, you might need to download this file first if not present
    annoy_index.load(index_path)
    return annoy_index

# Main UI
st.set_page_config(page_title="🎬 Movie Recommender", layout="wide")
st.title("🎬 Movie Recommendation System")

# Load data using cached functions
new_movies = load_movie_data()
feature_array = load_feature_array()
annoy_index = load_annoy_index(feature_array)

def get_poster(idx):
    url = new_movies.iloc[idx]['poster_path']
    if not url or str(url).strip() == "":
        return PLACEHOLDER_POSTER
    print(url)
    url = f'https://image.tmdb.org/t/p/w342{url}'
    return url

# --- App Layout ---
query = st.text_input("Search for a movie:")
if query:
    mask = new_movies['title'].str.lower().str.contains(query.lower(), na=False)
    results = new_movies[mask]
    if results.empty:
        st.info("No movies found.")
    else:
        st.subheader("Search Results:")
        cols = st.columns(5)
        movie_indices = list(results.index[:5])
        for i, idx in enumerate(movie_indices):
            with cols[i]:
                st.markdown(
                    f"""<div style="border-radius:15px;border:2px solid #eee;padding:10px;text-align:center;">
                        <img src="{get_poster(idx)}" alt="poster" style="width:100px;height:150px;border-radius:10px;"><br>
                        <b>{new_movies.iloc[idx]['title']}</b>
                    </div>
                    """,
                    unsafe_allow_html=True
                )
        recommend_for_idx = movie_indices[0]
        st.markdown(f"### Recommendations for {new_movies.iloc[recommend_for_idx]['title']}:")
        recs = annoy_index.get_nns_by_item(recommend_for_idx, 6)[1:]
        rec_cols = st.columns(min(len(recs), 5))
        for i, r in enumerate(recs):
            with rec_cols[i]:
                st.markdown(
                    f"""<div style="border-radius:15px;border:2px solid #eee;padding:10px;text-align:center;">
                        <img src="{get_poster(r)}" alt="poster" style="width:100px;height:150px;border-radius:10px;"><br>
                        <b>{new_movies.iloc[r]['title']}</b>
                    </div>
                    """,
                    unsafe_allow_html=True
                )