# app.py - OPTIMIZED VERSION with Specialist Centroids & Normalized Ratings # Key optimization: Uses pre-computed top_similar_items for O(1) lookups # Layer 3 Normalization: Σ(Similarity × Rating) / Σ|Similarity| for proper collaborative filtering import gradio as gr import json import os import time import requests import litellm from config import TMDB_API_KEY, LLM_MODEL_NAME, BASE_PROMPT, LOGIC_INJECTION, SOCIAL_INJECTION # --- Configuration & Constants --- PROCESSED_DIR = 'data/processed' POSTER_PLACEHOLDER = "https://placehold.co/150x225/000000/FFFFFF?text=Poster" # --- Data Loading & Helpers --- def load_data(filename, is_pickle=False): import pickle path = os.path.join(PROCESSED_DIR, filename) try: if is_pickle: with open(path, 'rb') as f: return pickle.load(f) else: with open(path, 'r') as f: return json.load(f) except FileNotFoundError: raise FileNotFoundError(f"Could not find '{filename}'. Please ensure data_prep.py ran successfully.") def get_poster_url(movie_title): """Fetches a movie poster URL from the TMDB API.""" if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY": return POSTER_PLACEHOLDER try: search_title = movie_title year = None if '(' in movie_title and ')' in movie_title: year_str = movie_title[movie_title.rfind('(')+1:movie_title.rfind(')')] if year_str.isdigit(): year = int(year_str) search_title = movie_title[:movie_title.rfind('(')].strip() search_params = {'api_key': TMDB_API_KEY, 'query': search_title} if year: search_params['year'] = year response = requests.get("https://api.themoviedb.org/3/search/movie", params=search_params, timeout=3) response.raise_for_status() data = response.json() if data['results'] and data['results'][0].get('poster_path'): poster_path = data['results'][0].get('poster_path') return f"https://image.tmdb.org/t/p/w500{poster_path}" except requests.exceptions.RequestException as e: print(f"TMDB API call failed: {e}") return POSTER_PLACEHOLDER # Load data at startup print("Loading pre-computed data...") persona_archetypes = load_data('persona_archetypes.json') home_recommendations = load_data('home_recommendations.json') cached_hooks = load_data('cached_hooks.json') movie_metadata = load_data('movie_metadata.json') top_similar_items = load_data('top_similar_items.pkl', is_pickle=True) user_avg_ratings = load_data('user_avg_ratings.json') print(f"✅ Loaded pre-computed similarity lookups for {len(top_similar_items)} movies") # --- LLM EXPLANATION FUNCTION --- def call_llm_with_fallback(full_prompt, movie_data, persona_name): """Calls the LLM using LiteLLM and provides a fallback explanation.""" try: print(f"DEBUG: Generating explanation for '{movie_data.get('movie_title', 'Unknown')}' using model: {LLM_MODEL_NAME}") start_ts = time.time() response = litellm.completion( model=LLM_MODEL_NAME, messages=[{"role": "user", "content": full_prompt}], max_tokens=120, temperature=0.1, stream=False ) #print(f"DEBUG: Full LLM Response Object: {response}") print(f"DEBUG: Explanation generated in {time.time() - start_ts:.4f}s") report = response.choices[0].message.content print("--- LLM Response ---") print(report) print("--------------------") return report, False except Exception as e: error_message = f"LiteLLM API Error: {e}" print(f"⚠️ {error_message}") gr.Warning(error_message) # Fallback logic genres = movie_data.get('genres', ['Drama']) primary_genre = genres[0] if genres else "Drama" fallback_text = f"Recommended because you enjoyed similar {primary_genre} films. Highly rated by users with your preferences." return fallback_text, True def generate_hook_on_the_fly(movie_title): """Generates a short, snappy hook for a movie using LiteLLM.""" prompt = f"Generate a 5-10 word snappy, atmospheric hook for the movie: {movie_title}." try: print(f"DEBUG: Generating hook for '{movie_title}' using model: {LLM_MODEL_NAME}") start_ts = time.time() response = litellm.completion( model=LLM_MODEL_NAME, messages=[{"role": "user", "content": prompt}], max_tokens=25, temperature=0.7, stream=False, ) #print(f"DEBUG: Full LLM Response Object: {response}") print(f"DEBUG: Hook generated in {time.time() - start_ts:.4f}s") hook = response.choices[0].message.content.strip().replace('"', '') return hook except Exception as e: print(f"LiteLLM API call for on-the-fly hook failed: {e}") return "A truly captivating film." def generate_explanation(movie_id, persona_name, style): movie_id_str = str(movie_id) movie_info = movie_metadata[movie_id_str] style_injection = LOGIC_INJECTION if style == "Logic-Driven" else SOCIAL_INJECTION # Create a single, consolidated prompt to be more direct full_prompt = f"""You are a digital movie concierge explaining a recommendation to a user. Your explanation must be exactly 3 sentences and a maximum of 60 words. Be punchy and engaging. Do not start with 'Based on your history'. {style_injection} Explain why the user with {persona_name} persona would like the movie '{movie_info['movie_title']}' (Genres: {', '.join(movie_info['genres'])}). But do not mention the persona directly.""" return call_llm_with_fallback(full_prompt, movie_info, persona_name) css = """ body { background-color: #0f172a !important; color: #f8fafc; } .gradio-container { max-width: 1280px !important; margin: 0 auto !important; } .view-container { padding: 20px; border: none; border-radius: 8px; margin-top: 20px; background: #1e293b; box-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.5); } .movie-card-container { background: #1e293b; border-radius: 1rem; overflow: hidden; box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.3); display: flex !important; flex-direction: column !important; height: 100%; padding: 1rem; gap: 1rem; border: 1px solid #334155; } .movie-card-content { display: flex; flex-direction: column; flex-grow: 1; background-color: #1e293b;} .poster-container { position: relative; aspect-ratio: 2/3; background: #0f172a; } .poster-img { width: 100%; height: 100%; object-fit: cover; } .movie-card-text-content { display: flex; flex-direction: column; flex-grow: 1; } .movie-title { font-weight: 700; font-size: 1.125rem; color: #f8fafc; flex-shrink: 0; height: 2.5em; line-height: 1.25em; overflow: hidden; } .movie-year { font-size: 0.875rem; color: #94a3b8; margin-top: 2px; flex-shrink: 0; } .catchy-phrase { font-style: italic; color: #818cf8; font-size: 1rem; margin: 0.75rem 0; text-align: left; height: 2.5em; line-height: 1.2em; overflow: hidden; } .genre-pills-container { display: flex; flex-wrap: wrap; gap: 5px; margin-top: 0.5rem; flex-shrink: 0; } .genre-pill { background: #334155; color: #e2e8f0; padding: 4px 8px; border-radius: 9999px; font-size: 0.7rem; font-weight: 500; } .synthesis-pill { position: absolute; top: 12px; left: 12px; background: rgba(15, 23, 42, 0.8); color: white; padding: 4px 10px; border-radius: 9999px; font-size: 12px; font-weight: 600; backdrop-filter: blur(4px); border: 1px solid rgba(255, 255, 255, 0.1); } .gradio-button { background-color: #3b82f6 !important; color: white !important; border: none !important; border-radius: 0.5rem !important; font-weight: 600 !important; transition: background-color 0.2s ease-in-out !important; padding: 10px !important; } .gradio-button:hover { background-color: #2563eb !important; } #persona_selection_view { max-width: 800px; margin: 40px auto !important; } #persona_selection_view h2 { color: #f8fafc !important; } #home_title h1, #home_title h2 { color: #f8fafc !important; } #detail_view { padding: 20px; border: none; border-radius: 8px; margin-top: 20px; background: #1e293b; box-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.5); } #detail_poster img { max-height: 450px; } #detail_title h2 { color: #f8fafc !important; } #detail_explanation p { color: #e2e8f0 !important; } #detail_explanation { border: 1px solid #334155; padding: 1rem; border-radius: 0.5rem; background: #0f172a; } #detail_view label { color: white !important; font-weight: 600 !important; font-size: 1rem !important; padding: 0.5rem 0.5rem !important; } #detail_view .gr-radio { margin: 1rem 0 !important; padding: 1.5rem !important; } """ def generate_card_html(movie_data, latency_s=0.02, is_live=False): """Generates HTML for a single movie card.""" badge_text = f"⚡️ Synthesis: {latency_s:.2f}s" genres_html = "".join([f'{g}' for g in movie_data.get('genres', [])[:5]]) return f"""
{badge_text}
{movie_data['title']}
{genres_html}
"{movie_data.get('hook', 'A must-see film.')}"
""" def update_home_cards_ui(persona_name, recommendations, is_live=False, time_taken=0.02): """Updates UI with movie cards for given recommendations.""" updates = [] hooks = cached_hooks.get(persona_name, {}) if not is_live else {} for i in range(6): if i < len(recommendations): rec = recommendations[i] movie_id = str(rec['movie_id']) movie_info = movie_metadata[movie_id] card_data = { "title": movie_info['movie_title'], "year": movie_info.get('year', ''), "genres": movie_info.get('genres', []), "poster": get_poster_url(movie_info['movie_title']), "hook": hooks.get(movie_id, "A great movie awaits!") } if is_live: card_data["hook"] = rec.get('hook', "A movie worth watching!") html_content = generate_card_html(card_data, time_taken, is_live) updates.append(gr.update(value=html_content, visible=True)) updates.append(gr.update(visible=True)) # Show button updates.append(movie_id) else: updates.append(gr.update(visible=False)) updates.append(gr.update(visible=False)) # Hide button updates.append(None) return updates # --- UI Construction --- with gr.Blocks(css=css, js="() => document.body.classList.add('dark')") as demo: selected_persona = gr.State(None) selected_movie_id = gr.State(None) gr.Markdown("# 🎬 AI-Powered Explainable Recommendations") with gr.Column(visible=True, elem_id="persona_selection_view", elem_classes="view-container") as persona_view: gr.Markdown("## Choose a Persona") persona_buttons = [gr.Button(f"View Recommendations for {name}") for name in persona_archetypes.keys()] gr.Markdown("""

Note: Since this demo doesn't include user accounts, we use User Personas to showcase personalized recommendations. Each persona represents a unique set of movie preferences.

Learn more about how this demo works in our blog →
""") with gr.Column(visible=False, elem_id="home_view", elem_classes="view-container") as home_view: with gr.Row(): back_to_persona_button = gr.Button("⬅️ Change Persona") home_title = gr.Markdown(elem_id="home_title") all_genres = sorted(list(set(g for m in movie_metadata.values() for g in m['genres']))) all_genres.insert(0, "All (Cached Home Page)") genre_dropdown = gr.Dropdown(all_genres, label="Select Genre", value="All (Cached Home Page)") with gr.Column(elem_id="recommendations_grid") as recommendations_grid: home_cards = [] for i in range(2): with gr.Row(): for j in range(3): with gr.Column(elem_classes="movie-card-container"): html = gr.HTML() btn = gr.Button("⚡ See Full Explanation") movie_id_state = gr.State(None) home_cards.append({"html": html, "btn": btn, "id": movie_id_state}) no_recommendations_message = gr.Markdown("""

🤷  No recommendations found

Try selecting a different genre for this persona.

""", visible=False) with gr.Column(visible=False, elem_id="detail_view", elem_classes="view-container") as detail_view: back_button = gr.Button("⬅️ Back to Recommendations") with gr.Row(): detail_poster = gr.Image(POSTER_PLACEHOLDER, show_label=False, elem_id="detail_poster") with gr.Column(): detail_title = gr.Markdown(elem_id="detail_title") detail_explanation = gr.Markdown(elem_id="detail_explanation", label="The 'Why'") narrative_style = gr.Radio(["Logic-Driven", "Social-Behavioral"], label="Narrative Style", value="Logic-Driven") detail_fallback_info = gr.Markdown(visible=False, elem_id="detail_fallback", value="⚠️ LLM explanation failed or is disabled. Showing template-based reason.") # Build output list all_card_outputs = [item for card in home_cards for item in (card['html'], card['btn'], card['id'])] home_page_outputs = [persona_view, home_view, detail_view, selected_persona, home_title] + all_card_outputs detail_outputs = [home_view, detail_view, selected_movie_id, detail_title, detail_poster, detail_explanation, detail_fallback_info] def select_persona_and_render_home(persona_name): print(f'Loading Home page for persona: {persona_name}') start_time = time.time() recs = home_recommendations.get(persona_name, []) home_page_updates = [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), persona_name, f"# {persona_name}'s Recommendations"] card_updates = update_home_cards_ui(persona_name, recs) time_taken = time.time() - start_time print(f"Home page for '{persona_name}' rendered in {time_taken:.4f}s ({time_taken * 1000:.2f}ms)") return home_page_updates + card_updates def filter_by_genre(genre, persona_name): """Filter recommendations by genre using Layer 3 normalization. Formula: R = Σ(Similarity × Normalized_Rating) / Σ|Similarity| - Uses specialist centroid's consolidated history (normalized ratings) - Properly normalizes by total similarity to prevent hub-movie inflation """ print(f"Starting genre filter for '{genre}'...") start_time = time.time() # Handle cached home page separately if genre == "All (Cached Home Page)": recs = home_recommendations.get(persona_name, []) title = f"# {persona_name}'s Recommendations" card_updates = update_home_cards_ui(persona_name, recs) print(f"Genre filter '{genre}' completed in {time.time() - start_time:.4f}s (cached)") return [title, gr.update(visible=True), gr.update(visible=False)] + card_updates if not persona_name: return ["# Error: No persona selected", gr.update(visible=False), gr.update(visible=False)] + [gr.update(visible=False)] * 18 archetype = persona_archetypes[persona_name] history = archetype['consolidated_history'] # NORMALIZED ratings (-2 to +2 scale) candidate_numerators = {} candidate_denominators = {} for m_id_str, rating in history.items(): if rating < 0.5: continue sims = top_similar_items.get(str(m_id_str), {}) for sim_id, score in sims.items(): if sim_id in history: continue meta = movie_metadata.get(sim_id) if meta and genre in meta['genres']: candidate_numerators[sim_id] = candidate_numerators.get(sim_id, 0) + (score * rating) candidate_denominators[sim_id] = candidate_denominators.get(sim_id, 0) + score candidate_scores = { m_id: candidate_numerators[m_id] / candidate_denominators[m_id] for m_id in candidate_numerators if candidate_denominators.get(m_id, 0) > 0 } sorted_ids = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)[:6] time_taken = time.time() - start_time print(f"Genre filter '{genre}' recoomendations completed in {time_taken:.4f}s") # Generate live recommendations with on-the-fly hooks print(f"Starting hook generation for live recommendations for genre {genre}...") start_time = time.time() live_recs = [] for m_id, _ in sorted_ids: title = movie_metadata[m_id]['movie_title'] hook = generate_hook_on_the_fly(title) live_recs.append({ "movie_id": int(m_id), "movie_title": title, "hook": hook }) time.sleep(0.5) # Gentle pause to prevent HF Backend Error 40001 time_taken = time.time() - start_time print(f"Hook generation for genre {genre} completed in {time_taken:.4f}s") title_update = f"# {genre} Recommendations for {persona_name}" # Handle no recommendations if not live_recs: return [title_update, gr.update(visible=False), gr.update(visible=True)] + [gr.update()] * 18 card_updates = update_home_cards_ui(persona_name, live_recs, is_live=True, time_taken=time_taken) return [title_update, gr.update(visible=True), gr.update(visible=False)] + card_updates def transition_to_detail_view(movie_id): if not movie_id: return {k: gr.update() for k in detail_outputs} movie_info = movie_metadata[str(movie_id)] poster_url = get_poster_url(movie_info['movie_title']) # Loading spinner HTML loading_html = """

Generating personalized explanation...

""" return { home_view: gr.update(visible=False), detail_view: gr.update(visible=True), selected_movie_id: movie_id, detail_title: f"## {movie_info['movie_title']}", detail_poster: poster_url, detail_explanation: loading_html, detail_fallback_info: gr.update(visible=False) } def generate_final_explanation(movie_id, persona_name, style): if not movie_id: return "", gr.update(visible=False) explanation, is_fallback = generate_explanation(movie_id, persona_name, style) return explanation, gr.update(visible=is_fallback) def back_to_home(): return {home_view: gr.update(visible=True), detail_view: gr.update(visible=False)} def back_to_persona_selection(): return {persona_view: gr.update(visible=True), home_view: gr.update(visible=False)} # Event wiring: Persona buttons for i, button in enumerate(persona_buttons): persona_name = list(persona_archetypes.keys())[i] button.click(lambda name=persona_name: select_persona_and_render_home(name), inputs=[], outputs=home_page_outputs) # Event wiring: Genre filter genre_dropdown.change(filter_by_genre, inputs=[genre_dropdown, selected_persona], outputs=[home_title, recommendations_grid, no_recommendations_message] + all_card_outputs) # Event wiring: "See Full Explanation" buttons for card in home_cards: card['btn'].click(transition_to_detail_view, inputs=[card['id']], outputs=detail_outputs ).then(generate_final_explanation, inputs=[card['id'], selected_persona, narrative_style], outputs=[detail_explanation, detail_fallback_info]) # Event wiring: Narrative style change def on_style_change(style, movie_id, persona_name): """Callback to regenerate explanation when the narrative style changes.""" if not movie_id: return "", gr.update(visible=False) explanation, is_fallback = generate_explanation(movie_id, persona_name, style) return explanation, gr.update(visible=is_fallback) narrative_style.change(on_style_change, [narrative_style, selected_movie_id, selected_persona], [detail_explanation, detail_fallback_info]) # Event wiring: Back buttons back_button.click(back_to_home, [], [home_view, detail_view]) back_to_persona_button.click(back_to_persona_selection, [], [persona_view, home_view]) if __name__ == "__main__": demo.launch(debug=True)