import streamlit as st import pandas as pd import random import os import json from datetime import datetime import requests import difflib import pickle from keras.models import load_model import requests os.environ["SURPRISE_DATA_FOLDER"] = "/tmp/.surprise_data" from recommendation_utils import ( load_svd_model, load_trainset, recommend_with_svd ) #from recommendation_utils import ( # load_nn_model, load_svd_model, load_trainset, # recommend_with_nn, recommend_with_svd, load_encodings #) # encodings = load_encodings("/tmp/encodings.pkl") st.set_page_config(layout="wide") MOVIES_PATH = os.path.join(os.path.dirname(__file__), "movies.csv") RATINGS_JSON_PATH = "/tmp/ratings.json" POSTER_PLACEHOLDER = "https://via.placeholder.com/300x450.png?text=No+Poster" TMDB_API_KEY = "d15fc170483ad01d6b3d59561432fefc" @st.cache_data(show_spinner=False, ttl=86400) # 24h Cache def get_tmdb_data(title, year=None): url = "https://api.themoviedb.org/3/search/movie" params = { "api_key": TMDB_API_KEY, "query": title, } if year and year != 0: params["year"] = year try: response = requests.get(url, params=params) if response.status_code == 200 and response.json()["results"]: result = response.json()["results"][0] poster_path = result.get("poster_path") movie_id = result.get("id") poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}" if poster_path else POSTER_PLACEHOLDER tmdb_link = f"https://www.themoviedb.org/movie/{movie_id}" if movie_id else None return poster_url, tmdb_link except Exception: pass return POSTER_PLACEHOLDER, None def load_ratings_cached(): if os.path.exists(RATINGS_JSON_PATH): with open(RATINGS_JSON_PATH, "r") as f: return json.load(f) return [] def save_rating_to_json(entry): all_ratings = load_ratings_cached() all_ratings = [r for r in all_ratings if r["movie_id"] != entry["movie_id"]] all_ratings.append(entry) with open(RATINGS_JSON_PATH, "w") as f: json.dump(all_ratings, f, indent=2, default=str) # Cache invalidieren load_ratings_cached.clear() @st.cache_data(show_spinner=False) def load_movies(): df = pd.read_csv(MOVIES_PATH) df["year"] = df["title"].str.extract(r'\((\d{4})\)').fillna("0").astype(int) df["clean_title"] = df["title"].str.replace(r'\(\d{4}\)', '', regex=True).str.strip() df["genres"] = df["genres"].fillna("Unknown") return df movie_df = load_movies() movie_titles = movie_df["title"].unique().tolist() movie_id_to_title = dict(zip(movie_df["movieId"], movie_df["title"])) title_to_movie_id = dict(zip(movie_df["title"], movie_df["movieId"])) if "rated" not in st.session_state: st.session_state.rated = [] if "quiz_history" not in st.session_state: st.session_state.quiz_history = [] st.markdown(""" """, unsafe_allow_html=True) query_params = st.query_params page = query_params.get("rateflow") search_query = query_params.get("search") movie_id = query_params.get("movie_id") def render_star_rating(rating): return "".join(["★" for _ in range(rating)]) all_ratings_data = load_ratings_cached() if movie_id: try: movie_id = int(movie_id) match = movie_df[movie_df["movieId"] == movie_id] if match.empty: st.error(f"Movie with ID {movie_id} not found.") st.stop() movie_info = match.iloc[0] st.title(movie_info["clean_title"]) poster_url, tmdb_link = get_tmdb_data(movie_info["clean_title"], movie_info["year"]) col1, col2 = st.columns([1, 3]) with col1: if poster_url and "placeholder.com" not in poster_url: st.image(poster_url, use_container_width=True) else: st.markdown("""

No picture available

""", unsafe_allow_html=True) with col2: st.subheader("Details") st.write(f"**Genres:** {movie_info['genres']}") st.write(f"**Year:** {movie_info['year']}") if tmdb_link: st.markdown(f"View on TMDb", unsafe_allow_html=True) st.markdown("### Your Rating") existing_rating = next((r["rating"] for r in all_ratings_data if r["movie_id"] == movie_id), None) initial_index = (existing_rating - 1) if existing_rating else 0 rating_key = f"detail_rating_{movie_id}" new_rating = st.radio("Rate this movie:", [1, 2, 3, 4, 5], horizontal=True, index=initial_index, key=rating_key) if st.button("Submit Rating", key=f"submit_rating_btn_{movie_id}"): save_rating_to_json({ "movie_id": int(movie_info["movieId"]), "rating": new_rating, "timestamp": datetime.now().isoformat() }) st.success("Rating saved.") st.rerun() except Exception as e: st.error(f"Could not load movie details: {e}") elif page: st.title("Rate Random Movies") movie = movie_df.sample(1).iloc[0] poster_url, tmdb_link = get_tmdb_data(movie["clean_title"], movie["year"]) movie_id = int(movie["movieId"]) # explizit casten! col1, col2 = st.columns([1, 2]) with col1: if poster_url and "placeholder.com" not in poster_url: st.image(poster_url, width=200) else: st.markdown("""

No
Image

""", unsafe_allow_html=True) with col2: st.subheader(movie["clean_title"]) st.markdown(f"**Genres:** {movie['genres']}") st.markdown(f"**Year:** {movie['year']}") if tmdb_link: st.markdown(f"View on TMDb", unsafe_allow_html=True) # Bewertungsauswahl rating = st.radio("Rate this movie:", [1, 2, 3, 4, 5], horizontal=True, key=f"rating_{movie_id}") col_submit, col_skip = st.columns([1, 1]) with col_submit: if st.button("Submit Rating", key=f"submit_{movie_id}"): save_rating_to_json({ "movie_id": movie_id, "rating": rating, "timestamp": datetime.now().isoformat() }) st.success("Rating saved.") st.rerun() with col_skip: if st.button("Didn't Watch", key=f"skip_{movie_id}"): st.rerun() elif search_query: st.title(f"Search Results for '{search_query}'") search_clean = search_query.strip().lower() def title_match_score(title): title_lower = title.lower() if title_lower == search_clean: return 3 elif title_lower.startswith(search_clean): return 2 elif search_clean in title_lower: return 1 else: return 0 movie_df["match_score"] = movie_df["clean_title"].apply(title_match_score) strong_matches = movie_df[movie_df["match_score"] > 0].sort_values("match_score", ascending=False) if strong_matches.empty: close_titles = difflib.get_close_matches(search_query, movie_df["clean_title"], n=25, cutoff=0.5) filtered = movie_df[movie_df["clean_title"].isin(close_titles)].head(25) else: filtered = strong_matches.head(25) if filtered.empty: st.warning("No movies found.") else: st.markdown(""" """, unsafe_allow_html=True) for _, movie in filtered.iterrows(): poster_url, _ = get_tmdb_data(movie["clean_title"], movie["year"]) poster_html = ( f" Poster

" if poster_url and "placeholder.com" not in poster_url else "

No
Image

" ) st.markdown(f"""

{poster_html}

{movie['clean_title']}

Genres: {movie['genres']}

Year: {movie['year']}

""", unsafe_allow_html=True) else: st.title("Welcome to Movie Recommender") # Modell-Auswahl Dropdown model_choice = st.radio( "Choose Recommendation Model:", options=["SVD"], index=0, horizontal=True, key="model_selection" ) @st.cache_resource def load_remote_pickle(url): response = requests.get(url) response.raise_for_status() return pickle.loads(response.content) @st.cache_resource def load_models(): SVD_URL = "https://huggingface.co/lenawilli/App_models_Py/resolve/main/svd_model.pkl" TRAINSET_URL = "https://huggingface.co/lenawilli/App_models_Py/resolve/main/trainset.pkl" svd_model = load_remote_pickle(SVD_URL) trainset = load_remote_pickle(TRAINSET_URL) return svd_model, trainset svd_model, trainset = load_models() if not all_ratings_data: st.info("No ratings available yet. Start rating some movies!") else: ratings_df = pd.DataFrame(all_ratings_data) ratings_df["timestamp"] = pd.to_datetime(ratings_df["timestamp"]) merged = pd.merge(ratings_df, movie_df, left_on="movie_id", right_on="movieId") def make_clickable_title(row): return f"{row['clean_title']}" def show_table(dataframe, label, checkbox_key): show_all = st.checkbox(f"Show all in {label}", key=checkbox_key) st.subheader(label) display_df = dataframe.copy() if not show_all: display_df = display_df.head(5) if display_df.empty: st.caption("No entries.") return df_display = display_df[["movieId", "clean_title", "rating", "genres", "year", "timestamp"]].copy() df_display["Title"] = df_display.apply( lambda row: f"{row['clean_title']}", axis=1 ) df_display["Rated"] = df_display["rating"].apply(render_star_rating) df_display["Date"] = df_display["timestamp"].dt.strftime("%Y-%m-%d") df_display = df_display[["Title", "Rated", "genres", "year", "Date"]] st.markdown(""" """, unsafe_allow_html=True) html_table = df_display.to_html(classes='styled-table', escape=False, index=False) st.markdown(html_table, unsafe_allow_html=True) # Show all tables recent = merged.sort_values("timestamp", ascending=False) show_table(recent, "🕓 Recently Rated", checkbox_key="recently_rated") top = merged[merged["rating"] >= 4].sort_values(["rating", "timestamp"], ascending=[False, False]) show_table(top, "🌟 Top Rated", checkbox_key="top_rated") worst = merged[merged["rating"] <= 2].sort_values(["rating", "timestamp"], ascending=[True, False]) show_table(worst, "😞 Worst Rated", checkbox_key="worst_rated") st.subheader("🎯 Recommended For You") user_ratings_dict = {r["movie_id"]: r["rating"] for r in all_ratings_data} import random if user_ratings_dict: ratings_full = pd.DataFrame(all_ratings_data) ratings_full["userId"] = 999999 # Dummy user ratings_full["rating"] = ratings_full["rating"].astype(float) with st.spinner("Loading recommendations..."): recommendations_full = recommend_with_svd(svd_model, trainset, ratings_full, user_ratings_dict, top_n=30) top10 = recommendations_full.head(10).sample(n=6, random_state=42) top11_30 = recommendations_full.iloc[10:30].sample(n=4, random_state=99) combined = pd.concat([top10, top11_30]).sample(frac=1, random_state=123).reset_index(drop=True) recommended_df = pd.merge(combined, movie_df, on="movieId", how="left") for _, movie in recommended_df.iterrows(): poster_url, _ = get_tmdb_data(movie["clean_title"], movie["year"]) poster_html = ( f"

" if poster_url and "placeholder.com" not in poster_url else "

No
Image

" ) st.markdown(f"""

{poster_html}

{movie['clean_title']}
{movie['genres']} · {movie['year']}
Predicted Rating: {round(movie['rating'], 2)}

""", unsafe_allow_html=True) else: st.info("Rate a few movies to get recommendations.")