# app.py - OPTIMIZED VERSION with Specialist Centroids & Normalized Ratings
# Key optimization: Uses pre-computed top_similar_items for O(1) lookups
# Layer 3 Normalization: Σ(Similarity × Rating) / Σ|Similarity| for proper collaborative filtering
import gradio as gr
import json
import os
import time
import requests
import litellm
from config import TMDB_API_KEY, LLM_MODEL_NAME, BASE_PROMPT, LOGIC_INJECTION, SOCIAL_INJECTION
# --- Configuration & Constants ---
PROCESSED_DIR = 'data/processed'
POSTER_PLACEHOLDER = "https://placehold.co/150x225/000000/FFFFFF?text=Poster"
# --- Data Loading & Helpers ---
def load_data(filename, is_pickle=False):
import pickle
path = os.path.join(PROCESSED_DIR, filename)
try:
if is_pickle:
with open(path, 'rb') as f:
return pickle.load(f)
else:
with open(path, 'r') as f:
return json.load(f)
except FileNotFoundError:
raise FileNotFoundError(f"Could not find '{filename}'. Please ensure data_prep.py ran successfully.")
def get_poster_url(movie_title):
"""Fetches a movie poster URL from the TMDB API."""
if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY":
return POSTER_PLACEHOLDER
try:
search_title = movie_title
year = None
if '(' in movie_title and ')' in movie_title:
year_str = movie_title[movie_title.rfind('(')+1:movie_title.rfind(')')]
if year_str.isdigit():
year = int(year_str)
search_title = movie_title[:movie_title.rfind('(')].strip()
search_params = {'api_key': TMDB_API_KEY, 'query': search_title}
if year:
search_params['year'] = year
response = requests.get("https://api.themoviedb.org/3/search/movie",
params=search_params, timeout=3)
response.raise_for_status()
data = response.json()
if data['results'] and data['results'][0].get('poster_path'):
poster_path = data['results'][0].get('poster_path')
return f"https://image.tmdb.org/t/p/w500{poster_path}"
except requests.exceptions.RequestException as e:
print(f"TMDB API call failed: {e}")
return POSTER_PLACEHOLDER
# Load data at startup
print("Loading pre-computed data...")
persona_archetypes = load_data('persona_archetypes.json')
home_recommendations = load_data('home_recommendations.json')
cached_hooks = load_data('cached_hooks.json')
movie_metadata = load_data('movie_metadata.json')
top_similar_items = load_data('top_similar_items.pkl', is_pickle=True)
user_avg_ratings = load_data('user_avg_ratings.json')
print(f"✅ Loaded pre-computed similarity lookups for {len(top_similar_items)} movies")
# --- LLM EXPLANATION FUNCTION ---
def call_llm_with_fallback(full_prompt, movie_data, persona_name):
"""Calls the LLM using LiteLLM and provides a fallback explanation."""
try:
print(f"DEBUG: Generating explanation for '{movie_data.get('movie_title', 'Unknown')}' using model: {LLM_MODEL_NAME}")
start_ts = time.time()
response = litellm.completion(
model=LLM_MODEL_NAME,
messages=[{"role": "user", "content": full_prompt}],
max_tokens=120,
temperature=0.1,
stream=False
)
#print(f"DEBUG: Full LLM Response Object: {response}")
print(f"DEBUG: Explanation generated in {time.time() - start_ts:.4f}s")
report = response.choices[0].message.content
print("--- LLM Response ---")
print(report)
print("--------------------")
return report, False
except Exception as e:
error_message = f"LiteLLM API Error: {e}"
print(f"⚠️ {error_message}")
gr.Warning(error_message)
# Fallback logic
genres = movie_data.get('genres', ['Drama'])
primary_genre = genres[0] if genres else "Drama"
fallback_text = f"Recommended because you enjoyed similar {primary_genre} films. Highly rated by users with your preferences."
return fallback_text, True
def generate_hook_on_the_fly(movie_title):
"""Generates a short, snappy hook for a movie using LiteLLM."""
prompt = f"Generate a 5-10 word snappy, atmospheric hook for the movie: {movie_title}."
try:
print(f"DEBUG: Generating hook for '{movie_title}' using model: {LLM_MODEL_NAME}")
start_ts = time.time()
response = litellm.completion(
model=LLM_MODEL_NAME,
messages=[{"role": "user", "content": prompt}],
max_tokens=25,
temperature=0.7,
stream=False,
)
#print(f"DEBUG: Full LLM Response Object: {response}")
print(f"DEBUG: Hook generated in {time.time() - start_ts:.4f}s")
hook = response.choices[0].message.content.strip().replace('"', '')
return hook
except Exception as e:
print(f"LiteLLM API call for on-the-fly hook failed: {e}")
return "A truly captivating film."
def generate_explanation(movie_id, persona_name, style):
movie_id_str = str(movie_id)
movie_info = movie_metadata[movie_id_str]
style_injection = LOGIC_INJECTION if style == "Logic-Driven" else SOCIAL_INJECTION
# Create a single, consolidated prompt to be more direct
full_prompt = f"""You are a digital movie concierge explaining a recommendation to a user.
Your explanation must be exactly 3 sentences and a maximum of 60 words.
Be punchy and engaging. Do not start with 'Based on your history'.
{style_injection}
Explain why the user with {persona_name} persona would like the movie '{movie_info['movie_title']}'
(Genres: {', '.join(movie_info['genres'])}). But do not mention the persona directly."""
return call_llm_with_fallback(full_prompt, movie_info, persona_name)
css = """
body { background-color: #0f172a !important; color: #f8fafc; }
.gradio-container { max-width: 1280px !important; margin: 0 auto !important; }
.view-container {
padding: 20px;
border: none;
border-radius: 8px;
margin-top: 20px;
background: #1e293b;
box-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.5);
}
.movie-card-container {
background: #1e293b;
border-radius: 1rem;
overflow: hidden;
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.3);
display: flex !important;
flex-direction: column !important;
height: 100%;
padding: 1rem;
gap: 1rem;
border: 1px solid #334155;
}
.movie-card-content { display: flex; flex-direction: column; flex-grow: 1; background-color: #1e293b;}
.poster-container { position: relative; aspect-ratio: 2/3; background: #0f172a; }
.poster-img { width: 100%; height: 100%; object-fit: cover; }
.movie-card-text-content {
display: flex;
flex-direction: column;
flex-grow: 1;
}
.movie-title {
font-weight: 700;
font-size: 1.125rem;
color: #f8fafc;
flex-shrink: 0;
height: 2.5em;
line-height: 1.25em;
overflow: hidden;
}
.movie-year { font-size: 0.875rem; color: #94a3b8; margin-top: 2px; flex-shrink: 0; }
.catchy-phrase {
font-style: italic;
color: #818cf8;
font-size: 1rem;
margin: 0.75rem 0;
text-align: left;
height: 2.5em;
line-height: 1.2em;
overflow: hidden;
}
.genre-pills-container { display: flex; flex-wrap: wrap; gap: 5px; margin-top: 0.5rem; flex-shrink: 0; }
.genre-pill { background: #334155; color: #e2e8f0; padding: 4px 8px; border-radius: 9999px; font-size: 0.7rem; font-weight: 500; }
.synthesis-pill {
position: absolute; top: 12px; left: 12px;
background: rgba(15, 23, 42, 0.8);
color: white; padding: 4px 10px;
border-radius: 9999px; font-size: 12px; font-weight: 600;
backdrop-filter: blur(4px);
border: 1px solid rgba(255, 255, 255, 0.1);
}
.gradio-button {
background-color: #3b82f6 !important;
color: white !important;
border: none !important;
border-radius: 0.5rem !important;
font-weight: 600 !important;
transition: background-color 0.2s ease-in-out !important;
padding: 10px !important;
}
.gradio-button:hover { background-color: #2563eb !important; }
#persona_selection_view {
max-width: 800px;
margin: 40px auto !important;
}
#persona_selection_view h2 { color: #f8fafc !important; }
#home_title h1, #home_title h2 { color: #f8fafc !important; }
#detail_view { padding: 20px; border: none; border-radius: 8px; margin-top: 20px; background: #1e293b; box-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.5); }
#detail_poster img { max-height: 450px; }
#detail_title h2 { color: #f8fafc !important; }
#detail_explanation p { color: #e2e8f0 !important; }
#detail_explanation { border: 1px solid #334155; padding: 1rem; border-radius: 0.5rem; background: #0f172a; }
#detail_view label {
color: white !important;
font-weight: 600 !important;
font-size: 1rem !important;
padding: 0.5rem 0.5rem !important;
}
#detail_view .gr-radio {
margin: 1rem 0 !important;
padding: 1.5rem !important;
}
"""
def generate_card_html(movie_data, latency_s=0.02, is_live=False):
"""Generates HTML for a single movie card."""
badge_text = f"⚡️ Synthesis: {latency_s:.2f}s"
genres_html = "".join([f'{g}'
for g in movie_data.get('genres', [])[:5]])
return f"""
{badge_text}
{movie_data['title']}
{genres_html}
"{movie_data.get('hook', 'A must-see film.')}"
"""
def update_home_cards_ui(persona_name, recommendations, is_live=False, time_taken=0.02):
"""Updates UI with movie cards for given recommendations."""
updates = []
hooks = cached_hooks.get(persona_name, {}) if not is_live else {}
for i in range(6):
if i < len(recommendations):
rec = recommendations[i]
movie_id = str(rec['movie_id'])
movie_info = movie_metadata[movie_id]
card_data = {
"title": movie_info['movie_title'],
"year": movie_info.get('year', ''),
"genres": movie_info.get('genres', []),
"poster": get_poster_url(movie_info['movie_title']),
"hook": hooks.get(movie_id, "A great movie awaits!")
}
if is_live:
card_data["hook"] = rec.get('hook', "A movie worth watching!")
html_content = generate_card_html(card_data, time_taken, is_live)
updates.append(gr.update(value=html_content, visible=True))
updates.append(gr.update(visible=True)) # Show button
updates.append(movie_id)
else:
updates.append(gr.update(visible=False))
updates.append(gr.update(visible=False)) # Hide button
updates.append(None)
return updates
# --- UI Construction ---
with gr.Blocks(css=css, js="() => document.body.classList.add('dark')") as demo:
selected_persona = gr.State(None)
selected_movie_id = gr.State(None)
gr.Markdown("# 🎬 AI-Powered Explainable Recommendations")
with gr.Column(visible=True, elem_id="persona_selection_view", elem_classes="view-container") as persona_view:
gr.Markdown("## Choose a Persona")
persona_buttons = [gr.Button(f"View Recommendations for {name}")
for name in persona_archetypes.keys()]
gr.Markdown("""
""")
with gr.Column(visible=False, elem_id="home_view",
elem_classes="view-container") as home_view:
with gr.Row():
back_to_persona_button = gr.Button("⬅️ Change Persona")
home_title = gr.Markdown(elem_id="home_title")
all_genres = sorted(list(set(g for m in movie_metadata.values() for g in m['genres'])))
all_genres.insert(0, "All (Cached Home Page)")
genre_dropdown = gr.Dropdown(all_genres, label="Select Genre",
value="All (Cached Home Page)")
with gr.Column(elem_id="recommendations_grid") as recommendations_grid:
home_cards = []
for i in range(2):
with gr.Row():
for j in range(3):
with gr.Column(elem_classes="movie-card-container"):
html = gr.HTML()
btn = gr.Button("⚡ See Full Explanation")
movie_id_state = gr.State(None)
home_cards.append({"html": html, "btn": btn, "id": movie_id_state})
no_recommendations_message = gr.Markdown("""
🤷 No recommendations found
Try selecting a different genre for this persona.
""", visible=False)
with gr.Column(visible=False, elem_id="detail_view",
elem_classes="view-container") as detail_view:
back_button = gr.Button("⬅️ Back to Recommendations")
with gr.Row():
detail_poster = gr.Image(POSTER_PLACEHOLDER, show_label=False, elem_id="detail_poster")
with gr.Column():
detail_title = gr.Markdown(elem_id="detail_title")
detail_explanation = gr.Markdown(elem_id="detail_explanation", label="The 'Why'")
narrative_style = gr.Radio(["Logic-Driven", "Social-Behavioral"],
label="Narrative Style", value="Logic-Driven")
detail_fallback_info = gr.Markdown(visible=False, elem_id="detail_fallback",
value="⚠️ LLM explanation failed or is disabled. Showing template-based reason.")
# Build output list
all_card_outputs = [item for card in home_cards for item in (card['html'], card['btn'], card['id'])]
home_page_outputs = [persona_view, home_view, detail_view, selected_persona, home_title] + all_card_outputs
detail_outputs = [home_view, detail_view, selected_movie_id, detail_title,
detail_poster, detail_explanation, detail_fallback_info]
def select_persona_and_render_home(persona_name):
print(f'Loading Home page for persona: {persona_name}')
start_time = time.time()
recs = home_recommendations.get(persona_name, [])
home_page_updates = [gr.update(visible=False), gr.update(visible=True),
gr.update(visible=False), persona_name, f"# {persona_name}'s Recommendations"]
card_updates = update_home_cards_ui(persona_name, recs)
time_taken = time.time() - start_time
print(f"Home page for '{persona_name}' rendered in {time_taken:.4f}s ({time_taken * 1000:.2f}ms)")
return home_page_updates + card_updates
def filter_by_genre(genre, persona_name):
"""Filter recommendations by genre using Layer 3 normalization.
Formula: R = Σ(Similarity × Normalized_Rating) / Σ|Similarity|
- Uses specialist centroid's consolidated history (normalized ratings)
- Properly normalizes by total similarity to prevent hub-movie inflation
"""
print(f"Starting genre filter for '{genre}'...")
start_time = time.time()
# Handle cached home page separately
if genre == "All (Cached Home Page)":
recs = home_recommendations.get(persona_name, [])
title = f"# {persona_name}'s Recommendations"
card_updates = update_home_cards_ui(persona_name, recs)
print(f"Genre filter '{genre}' completed in {time.time() - start_time:.4f}s (cached)")
return [title, gr.update(visible=True), gr.update(visible=False)] + card_updates
if not persona_name:
return ["# Error: No persona selected", gr.update(visible=False), gr.update(visible=False)] + [gr.update(visible=False)] * 18
archetype = persona_archetypes[persona_name]
history = archetype['consolidated_history'] # NORMALIZED ratings (-2 to +2 scale)
candidate_numerators = {}
candidate_denominators = {}
for m_id_str, rating in history.items():
if rating < 0.5: continue
sims = top_similar_items.get(str(m_id_str), {})
for sim_id, score in sims.items():
if sim_id in history: continue
meta = movie_metadata.get(sim_id)
if meta and genre in meta['genres']:
candidate_numerators[sim_id] = candidate_numerators.get(sim_id, 0) + (score * rating)
candidate_denominators[sim_id] = candidate_denominators.get(sim_id, 0) + score
candidate_scores = {
m_id: candidate_numerators[m_id] / candidate_denominators[m_id]
for m_id in candidate_numerators if candidate_denominators.get(m_id, 0) > 0
}
sorted_ids = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)[:6]
time_taken = time.time() - start_time
print(f"Genre filter '{genre}' recoomendations completed in {time_taken:.4f}s")
# Generate live recommendations with on-the-fly hooks
print(f"Starting hook generation for live recommendations for genre {genre}...")
start_time = time.time()
live_recs = []
for m_id, _ in sorted_ids:
title = movie_metadata[m_id]['movie_title']
hook = generate_hook_on_the_fly(title)
live_recs.append({
"movie_id": int(m_id),
"movie_title": title,
"hook": hook
})
time.sleep(0.5) # Gentle pause to prevent HF Backend Error 40001
time_taken = time.time() - start_time
print(f"Hook generation for genre {genre} completed in {time_taken:.4f}s")
title_update = f"# {genre} Recommendations for {persona_name}"
# Handle no recommendations
if not live_recs:
return [title_update, gr.update(visible=False), gr.update(visible=True)] + [gr.update()] * 18
card_updates = update_home_cards_ui(persona_name, live_recs, is_live=True, time_taken=time_taken)
return [title_update, gr.update(visible=True), gr.update(visible=False)] + card_updates
def transition_to_detail_view(movie_id):
if not movie_id:
return {k: gr.update() for k in detail_outputs}
movie_info = movie_metadata[str(movie_id)]
poster_url = get_poster_url(movie_info['movie_title'])
# Loading spinner HTML
loading_html = """
Generating personalized explanation...
"""
return {
home_view: gr.update(visible=False),
detail_view: gr.update(visible=True),
selected_movie_id: movie_id,
detail_title: f"## {movie_info['movie_title']}",
detail_poster: poster_url,
detail_explanation: loading_html,
detail_fallback_info: gr.update(visible=False)
}
def generate_final_explanation(movie_id, persona_name, style):
if not movie_id:
return "", gr.update(visible=False)
explanation, is_fallback = generate_explanation(movie_id, persona_name, style)
return explanation, gr.update(visible=is_fallback)
def back_to_home():
return {home_view: gr.update(visible=True), detail_view: gr.update(visible=False)}
def back_to_persona_selection():
return {persona_view: gr.update(visible=True), home_view: gr.update(visible=False)}
# Event wiring: Persona buttons
for i, button in enumerate(persona_buttons):
persona_name = list(persona_archetypes.keys())[i]
button.click(lambda name=persona_name: select_persona_and_render_home(name),
inputs=[], outputs=home_page_outputs)
# Event wiring: Genre filter
genre_dropdown.change(filter_by_genre,
inputs=[genre_dropdown, selected_persona],
outputs=[home_title, recommendations_grid, no_recommendations_message] + all_card_outputs)
# Event wiring: "See Full Explanation" buttons
for card in home_cards:
card['btn'].click(transition_to_detail_view,
inputs=[card['id']],
outputs=detail_outputs
).then(generate_final_explanation,
inputs=[card['id'], selected_persona, narrative_style],
outputs=[detail_explanation, detail_fallback_info])
# Event wiring: Narrative style change
def on_style_change(style, movie_id, persona_name):
"""Callback to regenerate explanation when the narrative style changes."""
if not movie_id:
return "", gr.update(visible=False)
explanation, is_fallback = generate_explanation(movie_id, persona_name, style)
return explanation, gr.update(visible=is_fallback)
narrative_style.change(on_style_change,
[narrative_style, selected_movie_id, selected_persona],
[detail_explanation, detail_fallback_info])
# Event wiring: Back buttons
back_button.click(back_to_home, [], [home_view, detail_view])
back_to_persona_button.click(back_to_persona_selection, [], [persona_view, home_view])
if __name__ == "__main__":
demo.launch(debug=True)