Spaces:

lenawilli
/

StreamlitApp

Sleeping

App Files Files Community

StreamlitApp / src /streamlit_app.py

lenawilli

Update src/streamlit_app.py

80461d1 verified 12 months ago

raw

history blame contribute delete

19.2 kB

	import streamlit as st
	import pandas as pd
	import random
	import os
	import json
	from datetime import datetime
	import requests
	import difflib
	import pickle
	from keras.models import load_model
	import requests

	os.environ["SURPRISE_DATA_FOLDER"] = "/tmp/.surprise_data"


	from recommendation_utils import (
	load_svd_model, load_trainset,
	recommend_with_svd
	)
	#from recommendation_utils import (
	# load_nn_model, load_svd_model, load_trainset,
	# recommend_with_nn, recommend_with_svd, load_encodings
	#)

	# encodings = load_encodings("/tmp/encodings.pkl")

	st.set_page_config(layout="wide")

	MOVIES_PATH = os.path.join(os.path.dirname(__file__), "movies.csv")
	RATINGS_JSON_PATH = "/tmp/ratings.json"
	POSTER_PLACEHOLDER = "https://via.placeholder.com/300x450.png?text=No+Poster"
	TMDB_API_KEY = "d15fc170483ad01d6b3d59561432fefc"

	@st.cache_data(show_spinner=False, ttl=86400) # 24h Cache
	def get_tmdb_data(title, year=None):
	url = "https://api.themoviedb.org/3/search/movie"
	params = {
	"api_key": TMDB_API_KEY,
	"query": title,
	}
	if year and year != 0:
	params["year"] = year
	try:
	response = requests.get(url, params=params)
	if response.status_code == 200 and response.json()["results"]:
	result = response.json()["results"][0]
	poster_path = result.get("poster_path")
	movie_id = result.get("id")
	poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}" if poster_path else POSTER_PLACEHOLDER
	tmdb_link = f"https://www.themoviedb.org/movie/{movie_id}" if movie_id else None
	return poster_url, tmdb_link
	except Exception:
	pass
	return POSTER_PLACEHOLDER, None

	def load_ratings_cached():
	if os.path.exists(RATINGS_JSON_PATH):
	with open(RATINGS_JSON_PATH, "r") as f:
	return json.load(f)
	return []

	def save_rating_to_json(entry):
	all_ratings = load_ratings_cached()
	all_ratings = [r for r in all_ratings if r["movie_id"] != entry["movie_id"]]
	all_ratings.append(entry)
	with open(RATINGS_JSON_PATH, "w") as f:
	json.dump(all_ratings, f, indent=2, default=str)

	# Cache invalidieren
	load_ratings_cached.clear()

	@st.cache_data(show_spinner=False)
	def load_movies():
	df = pd.read_csv(MOVIES_PATH)
	df["year"] = df["title"].str.extract(r'\((\d{4})\)').fillna("0").astype(int)
	df["clean_title"] = df["title"].str.replace(r'\(\d{4}\)', '', regex=True).str.strip()
	df["genres"] = df["genres"].fillna("Unknown")
	return df

	movie_df = load_movies()
	movie_titles = movie_df["title"].unique().tolist()
	movie_id_to_title = dict(zip(movie_df["movieId"], movie_df["title"]))
	title_to_movie_id = dict(zip(movie_df["title"], movie_df["movieId"]))

	if "rated" not in st.session_state:
	st.session_state.rated = []
	if "quiz_history" not in st.session_state:
	st.session_state.quiz_history = []

	st.markdown("""
	<style>
	html, body, [class*="css"] {
	background-color: #0b0b0b !important;
	color: #f0f0f0 !important;
	font-family: 'Georgia', serif;
	}
	.nav-bar {
	display: flex;
	justify-content: space-between;
	align-items: center;
	background-color: #1a1a1a;
	padding: 1rem;
	width: 100%;
	border-bottom: 2px solid #5c1a1b;
	}
	.nav-left, .nav-right {
	display: flex;
	align-items: center;
	}
	.nav-item {
	color: #f0f0f0;
	font-size: 18px;
	text-decoration: none;
	margin-right: 1rem;
	transition: color 0.3s;
	}
	.nav-item:hover {
	color: #b32d2e;
	}
	.search-form {
	display: flex;
	width: 100%;
	max-width: 700px;
	}
	.search-input {
	flex: 1;
	padding: 0.5rem;
	font-size: 16px;
	border: none;
	border-radius: 4px 0 0 4px;
	background-color: #333;
	color: white;
	}
	.search-button {
	padding: 0.5rem 1rem;
	font-size: 16px;
	background-color: #5c1a1b;
	color: white;
	border: none;
	border-radius: 0 4px 4px 0;
	cursor: pointer;
	}
	.search-button:hover {
	background-color: #732323;
	}
	.star {
	color: #d4af37;
	font-size: 1.4em;
	padding-right: 2px;
	}
	h1, h2, h3 {
	color: #b32d2e !important;
	}
	.stButton>button {
	background-color: #5c1a1b !important;
	color: white !important;
	border-radius: 5px;
	border: none;
	}
	.stButton>button:hover {
	background-color: #732323 !important;
	}
	</style>
	<div class="nav-bar">
	<div class="nav-left">
	<a class="nav-item" href="/?home=true" target="_self">Home</a>
	</div>
	<form class="search-form" action="/" method="get">
	<input type="text" name="search" class="search-input" placeholder="Search movies...">
	<button type="submit" class="search-button">Search</button>
	</form>
	<div class="nav-right">
	<a class="nav-item" href="/?rateflow=true" target="_self">Rate</a>
	</div>
	</div>
	""", unsafe_allow_html=True)

	query_params = st.query_params
	page = query_params.get("rateflow")
	search_query = query_params.get("search")
	movie_id = query_params.get("movie_id")

	def render_star_rating(rating):
	return "".join(["<span class='star'>★</span>" for _ in range(rating)])

	all_ratings_data = load_ratings_cached()

	if movie_id:
	try:
	movie_id = int(movie_id)
	match = movie_df[movie_df["movieId"] == movie_id]

	if match.empty:
	st.error(f"Movie with ID {movie_id} not found.")
	st.stop()

	movie_info = match.iloc[0]
	st.title(movie_info["clean_title"])
	poster_url, tmdb_link = get_tmdb_data(movie_info["clean_title"], movie_info["year"])

	col1, col2 = st.columns([1, 3])
	with col1:
	if poster_url and "placeholder.com" not in poster_url:
	st.image(poster_url, use_container_width=True)
	else:
	st.markdown("""
	<div style='width:100%;border:2px dashed gray;height:450px;display:flex;align-items:center;justify-content:center;color:gray;'>
	No picture available
	</div>
	""", unsafe_allow_html=True)

	with col2:
	st.subheader("Details")
	st.write(f"Genres: {movie_info['genres']}")
	st.write(f"Year: {movie_info['year']}")
	if tmdb_link:
	st.markdown(f"<a href='{tmdb_link}' target='_blank'>View on TMDb</a>", unsafe_allow_html=True)

	st.markdown("### Your Rating")
	existing_rating = next((r["rating"] for r in all_ratings_data if r["movie_id"] == movie_id), None)
	initial_index = (existing_rating - 1) if existing_rating else 0

	rating_key = f"detail_rating_{movie_id}"
	new_rating = st.radio("Rate this movie:", [1, 2, 3, 4, 5], horizontal=True, index=initial_index, key=rating_key)

	if st.button("Submit Rating", key=f"submit_rating_btn_{movie_id}"):
	save_rating_to_json({
	"movie_id": int(movie_info["movieId"]),
	"rating": new_rating,
	"timestamp": datetime.now().isoformat()
	})

	st.success("Rating saved.")
	st.rerun()

	except Exception as e:
	st.error(f"Could not load movie details: {e}")


	elif page:
	st.title("Rate Random Movies")

	movie = movie_df.sample(1).iloc[0]
	poster_url, tmdb_link = get_tmdb_data(movie["clean_title"], movie["year"])
	movie_id = int(movie["movieId"]) # explizit casten!

	col1, col2 = st.columns([1, 2])

	with col1:
	if poster_url and "placeholder.com" not in poster_url:
	st.image(poster_url, width=200)
	else:
	st.markdown("""
	<div style='width:200px;height:300px;border:2px dashed gray;display:flex;align-items:center;justify-content:center;color:gray;font-size:12px;'>
	No<br>Image
	</div>
	""", unsafe_allow_html=True)

	with col2:
	st.subheader(movie["clean_title"])
	st.markdown(f"Genres: {movie['genres']}")
	st.markdown(f"Year: {movie['year']}")
	if tmdb_link:
	st.markdown(f"<a href='{tmdb_link}' target='_blank'>View on TMDb</a>", unsafe_allow_html=True)

	# Bewertungsauswahl
	rating = st.radio("Rate this movie:", [1, 2, 3, 4, 5], horizontal=True, key=f"rating_{movie_id}")

	col_submit, col_skip = st.columns([1, 1])

	with col_submit:
	if st.button("Submit Rating", key=f"submit_{movie_id}"):
	save_rating_to_json({
	"movie_id": movie_id,
	"rating": rating,
	"timestamp": datetime.now().isoformat()
	})
	st.success("Rating saved.")
	st.rerun()

	with col_skip:
	if st.button("Didn't Watch", key=f"skip_{movie_id}"):
	st.rerun()

	elif search_query:
	st.title(f"Search Results for '{search_query}'")

	search_clean = search_query.strip().lower()

	def title_match_score(title):
	title_lower = title.lower()
	if title_lower == search_clean:
	return 3
	elif title_lower.startswith(search_clean):
	return 2
	elif search_clean in title_lower:
	return 1
	else:
	return 0

	movie_df["match_score"] = movie_df["clean_title"].apply(title_match_score)
	strong_matches = movie_df[movie_df["match_score"] > 0].sort_values("match_score", ascending=False)

	if strong_matches.empty:
	close_titles = difflib.get_close_matches(search_query, movie_df["clean_title"], n=25, cutoff=0.5)
	filtered = movie_df[movie_df["clean_title"].isin(close_titles)].head(25)
	else:
	filtered = strong_matches.head(25)

	if filtered.empty:
	st.warning("No movies found.")
	else:
	st.markdown("""
	<style>
	.poster {
	width: 100px;
	height: 150px;
	flex-shrink: 0;
	border-radius: 4px;
	object-fit: cover;
	background: #333;
	}
	.movie-box {
	background-color: #1a1a1a;
	padding: 15px;
	border-radius: 10px;
	margin-bottom: 15px;
	display: flex;
	align-items: flex-start;
	gap: 20px;
	}
	.movie-box:hover {
	background-color: #262626;
	}
	.poster {
	width: 100px;
	height: 150px;
	flex-shrink: 0;
	border-radius: 4px;
	object-fit: cover;
	background: #333;
	}
	.movie-content {
	flex-grow: 1;
	}
	.movie-title {
	font-size: 20px;
	font-weight: bold;
	color: #e63946;
	margin-bottom: 0.5rem;
	}
	.movie-details {
	color: #ccc;
	font-size: 15px;
	margin-bottom: 0.5rem;
	}
	a.movie-link {
	color: #b32d2e;
	text-decoration: none;
	}
	a.movie-link:hover {
	text-decoration: underline;
	}
	</style>
	""", unsafe_allow_html=True)

	for _, movie in filtered.iterrows():
	poster_url, _ = get_tmdb_data(movie["clean_title"], movie["year"])
	poster_html = (
	f"<img src='{poster_url}' class='poster' alt='Poster'>" if poster_url and "placeholder.com" not in poster_url
	else "<div style='width:100px;height:150px;border:2px dashed gray;display:flex;align-items:center;justify-content:center;color:gray;font-size:12px;'>No<br>Image</div>"
	)

	st.markdown(f"""
	<div class="movie-box">
	{poster_html}
	<div class="movie-content">
	<div class="movie-title">
	<a href='/?movie_id={movie["movieId"]}' class="movie-link">{movie['clean_title']}</a>
	</div>
	<div class="movie-details">
	<p><strong>Genres:</strong> {movie['genres']}</p>
	<p><strong>Year:</strong> {movie['year']}</p>
	</div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	else:
	st.title("Welcome to Movie Recommender")

	# Modell-Auswahl Dropdown
	model_choice = st.radio(
	"Choose Recommendation Model:",
	options=["SVD"],
	index=0,
	horizontal=True,
	key="model_selection"
	)

	@st.cache_resource
	def load_remote_pickle(url):
	response = requests.get(url)
	response.raise_for_status()
	return pickle.loads(response.content)

	@st.cache_resource
	def load_models():
	SVD_URL = "https://huggingface.co/lenawilli/App_models_Py/resolve/main/svd_model.pkl"
	TRAINSET_URL = "https://huggingface.co/lenawilli/App_models_Py/resolve/main/trainset.pkl"

	svd_model = load_remote_pickle(SVD_URL)
	trainset = load_remote_pickle(TRAINSET_URL)

	return svd_model, trainset

	svd_model, trainset = load_models()

	if not all_ratings_data:
	st.info("No ratings available yet. Start rating some movies!")
	else:
	ratings_df = pd.DataFrame(all_ratings_data)
	ratings_df["timestamp"] = pd.to_datetime(ratings_df["timestamp"])
	merged = pd.merge(ratings_df, movie_df, left_on="movie_id", right_on="movieId")

	def make_clickable_title(row):
	return f"<a href='/?movie_id={row['movieId']}' target='_self'>{row['clean_title']}</a>"

	def show_table(dataframe, label, checkbox_key):
	show_all = st.checkbox(f"Show all in {label}", key=checkbox_key)
	st.subheader(label)
	display_df = dataframe.copy()
	if not show_all:
	display_df = display_df.head(5)

	if display_df.empty:
	st.caption("No entries.")
	return

	df_display = display_df[["movieId", "clean_title", "rating", "genres", "year", "timestamp"]].copy()
	df_display["Title"] = df_display.apply(
	lambda row: f"<a href='/?movie_id={row['movieId']}' target='_self' style='color:#e63946;text-decoration:none;'>{row['clean_title']}</a>",
	axis=1
	)
	df_display["Rated"] = df_display["rating"].apply(render_star_rating)
	df_display["Date"] = df_display["timestamp"].dt.strftime("%Y-%m-%d")
	df_display = df_display[["Title", "Rated", "genres", "year", "Date"]]

	st.markdown("""
	<style>
	.styled-table {
	width: 100%;
	border-collapse: collapse;
	font-size: 16px;
	font-family: 'Segoe UI', sans-serif;
	background-color: #1a1a1a;
	color: #f0f0f0;
	border-radius: 8px;
	overflow: hidden;
	margin-bottom: 2em;
	}
	.styled-table thead tr {
	background-color: #5c1a1b;
	text-align: left;
	}
	.styled-table th, .styled-table td {
	padding: 12px 15px;
	text-align: left;
	}
	.styled-table tbody tr {
	border-bottom: 1px solid #333;
	}
	.styled-table tbody tr:hover {
	background-color: #2a2a2a;
	}
	</style>
	""", unsafe_allow_html=True)

	html_table = df_display.to_html(classes='styled-table', escape=False, index=False)
	st.markdown(html_table, unsafe_allow_html=True)

	# Show all tables
	recent = merged.sort_values("timestamp", ascending=False)
	show_table(recent, "🕓 Recently Rated", checkbox_key="recently_rated")

	top = merged[merged["rating"] >= 4].sort_values(["rating", "timestamp"], ascending=[False, False])
	show_table(top, "🌟 Top Rated", checkbox_key="top_rated")

	worst = merged[merged["rating"] <= 2].sort_values(["rating", "timestamp"], ascending=[True, False])
	show_table(worst, "😞 Worst Rated", checkbox_key="worst_rated")

	st.subheader("🎯 Recommended For You")

	user_ratings_dict = {r["movie_id"]: r["rating"] for r in all_ratings_data}

	import random

	if user_ratings_dict:
	ratings_full = pd.DataFrame(all_ratings_data)
	ratings_full["userId"] = 999999 # Dummy user
	ratings_full["rating"] = ratings_full["rating"].astype(float)

	with st.spinner("Loading recommendations..."):
	recommendations_full = recommend_with_svd(svd_model, trainset, ratings_full, user_ratings_dict, top_n=30)

	top10 = recommendations_full.head(10).sample(n=6, random_state=42)

	top11_30 = recommendations_full.iloc[10:30].sample(n=4, random_state=99)

	combined = pd.concat([top10, top11_30]).sample(frac=1, random_state=123).reset_index(drop=True)

	recommended_df = pd.merge(combined, movie_df, on="movieId", how="left")

	for _, movie in recommended_df.iterrows():
	poster_url, _ = get_tmdb_data(movie["clean_title"], movie["year"])
	poster_html = (
	f"<img src='{poster_url}' width='100' style='border-radius:5px;' />"
	if poster_url and "placeholder.com" not in poster_url
	else "<div style='width:100px;height:150px;border:2px dashed gray;display:flex;align-items:center;justify-content:center;color:gray;font-size:12px;'>No<br>Image</div>"
	)

	st.markdown(f"""
	<div style="margin-bottom:1em;padding:1em;border:1px solid #333;border-radius:10px;background:#1a1a1a;">
	<div style="display:flex;gap:20px;">
	<div>{poster_html}</div>
	<div>
	<a href='/?movie_id={movie["movieId"]}' style="font-size:18px;color:#e63946;font-weight:bold;">{movie['clean_title']}</a><br>
	<span style="color:#ccc;">{movie['genres']} · {movie['year']}</span><br>
	<span style="color:#d4af37;">Predicted Rating: {round(movie['rating'], 2)}</span>
	</div>
	</div>
	</div>
	""", unsafe_allow_html=True)


	else:
	st.info("Rate a few movies to get recommendations.")