StreamlitApp / src /streamlit_app.py
lenawilli's picture
Update src/streamlit_app.py
80461d1 verified
import streamlit as st
import pandas as pd
import random
import os
import json
from datetime import datetime
import requests
import difflib
import pickle
from keras.models import load_model
import requests
os.environ["SURPRISE_DATA_FOLDER"] = "/tmp/.surprise_data"
from recommendation_utils import (
load_svd_model, load_trainset,
recommend_with_svd
)
#from recommendation_utils import (
# load_nn_model, load_svd_model, load_trainset,
# recommend_with_nn, recommend_with_svd, load_encodings
#)
# encodings = load_encodings("/tmp/encodings.pkl")
st.set_page_config(layout="wide")
MOVIES_PATH = os.path.join(os.path.dirname(__file__), "movies.csv")
RATINGS_JSON_PATH = "/tmp/ratings.json"
POSTER_PLACEHOLDER = "https://via.placeholder.com/300x450.png?text=No+Poster"
TMDB_API_KEY = "d15fc170483ad01d6b3d59561432fefc"
@st.cache_data(show_spinner=False, ttl=86400) # 24h Cache
def get_tmdb_data(title, year=None):
url = "https://api.themoviedb.org/3/search/movie"
params = {
"api_key": TMDB_API_KEY,
"query": title,
}
if year and year != 0:
params["year"] = year
try:
response = requests.get(url, params=params)
if response.status_code == 200 and response.json()["results"]:
result = response.json()["results"][0]
poster_path = result.get("poster_path")
movie_id = result.get("id")
poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}" if poster_path else POSTER_PLACEHOLDER
tmdb_link = f"https://www.themoviedb.org/movie/{movie_id}" if movie_id else None
return poster_url, tmdb_link
except Exception:
pass
return POSTER_PLACEHOLDER, None
def load_ratings_cached():
if os.path.exists(RATINGS_JSON_PATH):
with open(RATINGS_JSON_PATH, "r") as f:
return json.load(f)
return []
def save_rating_to_json(entry):
all_ratings = load_ratings_cached()
all_ratings = [r for r in all_ratings if r["movie_id"] != entry["movie_id"]]
all_ratings.append(entry)
with open(RATINGS_JSON_PATH, "w") as f:
json.dump(all_ratings, f, indent=2, default=str)
# Cache invalidieren
load_ratings_cached.clear()
@st.cache_data(show_spinner=False)
def load_movies():
df = pd.read_csv(MOVIES_PATH)
df["year"] = df["title"].str.extract(r'\((\d{4})\)').fillna("0").astype(int)
df["clean_title"] = df["title"].str.replace(r'\(\d{4}\)', '', regex=True).str.strip()
df["genres"] = df["genres"].fillna("Unknown")
return df
movie_df = load_movies()
movie_titles = movie_df["title"].unique().tolist()
movie_id_to_title = dict(zip(movie_df["movieId"], movie_df["title"]))
title_to_movie_id = dict(zip(movie_df["title"], movie_df["movieId"]))
if "rated" not in st.session_state:
st.session_state.rated = []
if "quiz_history" not in st.session_state:
st.session_state.quiz_history = []
st.markdown("""
<style>
html, body, [class*="css"] {
background-color: #0b0b0b !important;
color: #f0f0f0 !important;
font-family: 'Georgia', serif;
}
.nav-bar {
display: flex;
justify-content: space-between;
align-items: center;
background-color: #1a1a1a;
padding: 1rem;
width: 100%;
border-bottom: 2px solid #5c1a1b;
}
.nav-left, .nav-right {
display: flex;
align-items: center;
}
.nav-item {
color: #f0f0f0;
font-size: 18px;
text-decoration: none;
margin-right: 1rem;
transition: color 0.3s;
}
.nav-item:hover {
color: #b32d2e;
}
.search-form {
display: flex;
width: 100%;
max-width: 700px;
}
.search-input {
flex: 1;
padding: 0.5rem;
font-size: 16px;
border: none;
border-radius: 4px 0 0 4px;
background-color: #333;
color: white;
}
.search-button {
padding: 0.5rem 1rem;
font-size: 16px;
background-color: #5c1a1b;
color: white;
border: none;
border-radius: 0 4px 4px 0;
cursor: pointer;
}
.search-button:hover {
background-color: #732323;
}
.star {
color: #d4af37;
font-size: 1.4em;
padding-right: 2px;
}
h1, h2, h3 {
color: #b32d2e !important;
}
.stButton>button {
background-color: #5c1a1b !important;
color: white !important;
border-radius: 5px;
border: none;
}
.stButton>button:hover {
background-color: #732323 !important;
}
</style>
<div class="nav-bar">
<div class="nav-left">
<a class="nav-item" href="/?home=true" target="_self">Home</a>
</div>
<form class="search-form" action="/" method="get">
<input type="text" name="search" class="search-input" placeholder="Search movies...">
<button type="submit" class="search-button">Search</button>
</form>
<div class="nav-right">
<a class="nav-item" href="/?rateflow=true" target="_self">Rate</a>
</div>
</div>
""", unsafe_allow_html=True)
query_params = st.query_params
page = query_params.get("rateflow")
search_query = query_params.get("search")
movie_id = query_params.get("movie_id")
def render_star_rating(rating):
return "".join(["<span class='star'>β˜…</span>" for _ in range(rating)])
all_ratings_data = load_ratings_cached()
if movie_id:
try:
movie_id = int(movie_id)
match = movie_df[movie_df["movieId"] == movie_id]
if match.empty:
st.error(f"Movie with ID {movie_id} not found.")
st.stop()
movie_info = match.iloc[0]
st.title(movie_info["clean_title"])
poster_url, tmdb_link = get_tmdb_data(movie_info["clean_title"], movie_info["year"])
col1, col2 = st.columns([1, 3])
with col1:
if poster_url and "placeholder.com" not in poster_url:
st.image(poster_url, use_container_width=True)
else:
st.markdown("""
<div style='width:100%;border:2px dashed gray;height:450px;display:flex;align-items:center;justify-content:center;color:gray;'>
No picture available
</div>
""", unsafe_allow_html=True)
with col2:
st.subheader("Details")
st.write(f"**Genres:** {movie_info['genres']}")
st.write(f"**Year:** {movie_info['year']}")
if tmdb_link:
st.markdown(f"<a href='{tmdb_link}' target='_blank'>View on TMDb</a>", unsafe_allow_html=True)
st.markdown("### Your Rating")
existing_rating = next((r["rating"] for r in all_ratings_data if r["movie_id"] == movie_id), None)
initial_index = (existing_rating - 1) if existing_rating else 0
rating_key = f"detail_rating_{movie_id}"
new_rating = st.radio("Rate this movie:", [1, 2, 3, 4, 5], horizontal=True, index=initial_index, key=rating_key)
if st.button("Submit Rating", key=f"submit_rating_btn_{movie_id}"):
save_rating_to_json({
"movie_id": int(movie_info["movieId"]),
"rating": new_rating,
"timestamp": datetime.now().isoformat()
})
st.success("Rating saved.")
st.rerun()
except Exception as e:
st.error(f"Could not load movie details: {e}")
elif page:
st.title("Rate Random Movies")
movie = movie_df.sample(1).iloc[0]
poster_url, tmdb_link = get_tmdb_data(movie["clean_title"], movie["year"])
movie_id = int(movie["movieId"]) # explizit casten!
col1, col2 = st.columns([1, 2])
with col1:
if poster_url and "placeholder.com" not in poster_url:
st.image(poster_url, width=200)
else:
st.markdown("""
<div style='width:200px;height:300px;border:2px dashed gray;display:flex;align-items:center;justify-content:center;color:gray;font-size:12px;'>
No<br>Image
</div>
""", unsafe_allow_html=True)
with col2:
st.subheader(movie["clean_title"])
st.markdown(f"**Genres:** {movie['genres']}")
st.markdown(f"**Year:** {movie['year']}")
if tmdb_link:
st.markdown(f"<a href='{tmdb_link}' target='_blank'>View on TMDb</a>", unsafe_allow_html=True)
# Bewertungsauswahl
rating = st.radio("Rate this movie:", [1, 2, 3, 4, 5], horizontal=True, key=f"rating_{movie_id}")
col_submit, col_skip = st.columns([1, 1])
with col_submit:
if st.button("Submit Rating", key=f"submit_{movie_id}"):
save_rating_to_json({
"movie_id": movie_id,
"rating": rating,
"timestamp": datetime.now().isoformat()
})
st.success("Rating saved.")
st.rerun()
with col_skip:
if st.button("Didn't Watch", key=f"skip_{movie_id}"):
st.rerun()
elif search_query:
st.title(f"Search Results for '{search_query}'")
search_clean = search_query.strip().lower()
def title_match_score(title):
title_lower = title.lower()
if title_lower == search_clean:
return 3
elif title_lower.startswith(search_clean):
return 2
elif search_clean in title_lower:
return 1
else:
return 0
movie_df["match_score"] = movie_df["clean_title"].apply(title_match_score)
strong_matches = movie_df[movie_df["match_score"] > 0].sort_values("match_score", ascending=False)
if strong_matches.empty:
close_titles = difflib.get_close_matches(search_query, movie_df["clean_title"], n=25, cutoff=0.5)
filtered = movie_df[movie_df["clean_title"].isin(close_titles)].head(25)
else:
filtered = strong_matches.head(25)
if filtered.empty:
st.warning("No movies found.")
else:
st.markdown("""
<style>
.poster {
width: 100px;
height: 150px;
flex-shrink: 0;
border-radius: 4px;
object-fit: cover;
background: #333;
}
.movie-box {
background-color: #1a1a1a;
padding: 15px;
border-radius: 10px;
margin-bottom: 15px;
display: flex;
align-items: flex-start;
gap: 20px;
}
.movie-box:hover {
background-color: #262626;
}
.poster {
width: 100px;
height: 150px;
flex-shrink: 0;
border-radius: 4px;
object-fit: cover;
background: #333;
}
.movie-content {
flex-grow: 1;
}
.movie-title {
font-size: 20px;
font-weight: bold;
color: #e63946;
margin-bottom: 0.5rem;
}
.movie-details {
color: #ccc;
font-size: 15px;
margin-bottom: 0.5rem;
}
a.movie-link {
color: #b32d2e;
text-decoration: none;
}
a.movie-link:hover {
text-decoration: underline;
}
</style>
""", unsafe_allow_html=True)
for _, movie in filtered.iterrows():
poster_url, _ = get_tmdb_data(movie["clean_title"], movie["year"])
poster_html = (
f"<img src='{poster_url}' class='poster' alt='Poster'>" if poster_url and "placeholder.com" not in poster_url
else "<div style='width:100px;height:150px;border:2px dashed gray;display:flex;align-items:center;justify-content:center;color:gray;font-size:12px;'>No<br>Image</div>"
)
st.markdown(f"""
<div class="movie-box">
{poster_html}
<div class="movie-content">
<div class="movie-title">
<a href='/?movie_id={movie["movieId"]}' class="movie-link">{movie['clean_title']}</a>
</div>
<div class="movie-details">
<p><strong>Genres:</strong> {movie['genres']}</p>
<p><strong>Year:</strong> {movie['year']}</p>
</div>
</div>
</div>
""", unsafe_allow_html=True)
else:
st.title("Welcome to Movie Recommender")
# Modell-Auswahl Dropdown
model_choice = st.radio(
"Choose Recommendation Model:",
options=["SVD"],
index=0,
horizontal=True,
key="model_selection"
)
@st.cache_resource
def load_remote_pickle(url):
response = requests.get(url)
response.raise_for_status()
return pickle.loads(response.content)
@st.cache_resource
def load_models():
SVD_URL = "https://huggingface.co/lenawilli/App_models_Py/resolve/main/svd_model.pkl"
TRAINSET_URL = "https://huggingface.co/lenawilli/App_models_Py/resolve/main/trainset.pkl"
svd_model = load_remote_pickle(SVD_URL)
trainset = load_remote_pickle(TRAINSET_URL)
return svd_model, trainset
svd_model, trainset = load_models()
if not all_ratings_data:
st.info("No ratings available yet. Start rating some movies!")
else:
ratings_df = pd.DataFrame(all_ratings_data)
ratings_df["timestamp"] = pd.to_datetime(ratings_df["timestamp"])
merged = pd.merge(ratings_df, movie_df, left_on="movie_id", right_on="movieId")
def make_clickable_title(row):
return f"<a href='/?movie_id={row['movieId']}' target='_self'>{row['clean_title']}</a>"
def show_table(dataframe, label, checkbox_key):
show_all = st.checkbox(f"Show all in {label}", key=checkbox_key)
st.subheader(label)
display_df = dataframe.copy()
if not show_all:
display_df = display_df.head(5)
if display_df.empty:
st.caption("No entries.")
return
df_display = display_df[["movieId", "clean_title", "rating", "genres", "year", "timestamp"]].copy()
df_display["Title"] = df_display.apply(
lambda row: f"<a href='/?movie_id={row['movieId']}' target='_self' style='color:#e63946;text-decoration:none;'>{row['clean_title']}</a>",
axis=1
)
df_display["Rated"] = df_display["rating"].apply(render_star_rating)
df_display["Date"] = df_display["timestamp"].dt.strftime("%Y-%m-%d")
df_display = df_display[["Title", "Rated", "genres", "year", "Date"]]
st.markdown("""
<style>
.styled-table {
width: 100%;
border-collapse: collapse;
font-size: 16px;
font-family: 'Segoe UI', sans-serif;
background-color: #1a1a1a;
color: #f0f0f0;
border-radius: 8px;
overflow: hidden;
margin-bottom: 2em;
}
.styled-table thead tr {
background-color: #5c1a1b;
text-align: left;
}
.styled-table th, .styled-table td {
padding: 12px 15px;
text-align: left;
}
.styled-table tbody tr {
border-bottom: 1px solid #333;
}
.styled-table tbody tr:hover {
background-color: #2a2a2a;
}
</style>
""", unsafe_allow_html=True)
html_table = df_display.to_html(classes='styled-table', escape=False, index=False)
st.markdown(html_table, unsafe_allow_html=True)
# Show all tables
recent = merged.sort_values("timestamp", ascending=False)
show_table(recent, "πŸ•“ Recently Rated", checkbox_key="recently_rated")
top = merged[merged["rating"] >= 4].sort_values(["rating", "timestamp"], ascending=[False, False])
show_table(top, "🌟 Top Rated", checkbox_key="top_rated")
worst = merged[merged["rating"] <= 2].sort_values(["rating", "timestamp"], ascending=[True, False])
show_table(worst, "😞 Worst Rated", checkbox_key="worst_rated")
st.subheader("🎯 Recommended For You")
user_ratings_dict = {r["movie_id"]: r["rating"] for r in all_ratings_data}
import random
if user_ratings_dict:
ratings_full = pd.DataFrame(all_ratings_data)
ratings_full["userId"] = 999999 # Dummy user
ratings_full["rating"] = ratings_full["rating"].astype(float)
with st.spinner("Loading recommendations..."):
recommendations_full = recommend_with_svd(svd_model, trainset, ratings_full, user_ratings_dict, top_n=30)
top10 = recommendations_full.head(10).sample(n=6, random_state=42)
top11_30 = recommendations_full.iloc[10:30].sample(n=4, random_state=99)
combined = pd.concat([top10, top11_30]).sample(frac=1, random_state=123).reset_index(drop=True)
recommended_df = pd.merge(combined, movie_df, on="movieId", how="left")
for _, movie in recommended_df.iterrows():
poster_url, _ = get_tmdb_data(movie["clean_title"], movie["year"])
poster_html = (
f"<img src='{poster_url}' width='100' style='border-radius:5px;' />"
if poster_url and "placeholder.com" not in poster_url
else "<div style='width:100px;height:150px;border:2px dashed gray;display:flex;align-items:center;justify-content:center;color:gray;font-size:12px;'>No<br>Image</div>"
)
st.markdown(f"""
<div style="margin-bottom:1em;padding:1em;border:1px solid #333;border-radius:10px;background:#1a1a1a;">
<div style="display:flex;gap:20px;">
<div>{poster_html}</div>
<div>
<a href='/?movie_id={movie["movieId"]}' style="font-size:18px;color:#e63946;font-weight:bold;">{movie['clean_title']}</a><br>
<span style="color:#ccc;">{movie['genres']} Β· {movie['year']}</span><br>
<span style="color:#d4af37;">Predicted Rating: {round(movie['rating'], 2)}</span>
</div>
</div>
</div>
""", unsafe_allow_html=True)
else:
st.info("Rate a few movies to get recommendations.")