Spaces:

amoyakd
/

Recommendation-LLM-Explainer

Running

File size: 23,705 Bytes

# app.py - OPTIMIZED VERSION with Specialist Centroids & Normalized Ratings
# Key optimization: Uses pre-computed top_similar_items for O(1) lookups
# Layer 3 Normalization: Σ(Similarity × Rating) / Σ|Similarity| for proper collaborative filtering

import gradio as gr
import json
import os
import time
import requests
import litellm
from config import TMDB_API_KEY, LLM_MODEL_NAME, BASE_PROMPT, LOGIC_INJECTION, SOCIAL_INJECTION

# --- Configuration & Constants ---
PROCESSED_DIR = 'data/processed'
POSTER_PLACEHOLDER = "https://placehold.co/150x225/000000/FFFFFF?text=Poster"

# --- Data Loading & Helpers ---
def load_data(filename, is_pickle=False):
    import pickle
    path = os.path.join(PROCESSED_DIR, filename)
    try:
        if is_pickle:
            with open(path, 'rb') as f: 
                return pickle.load(f)
        else:
            with open(path, 'r') as f: 
                return json.load(f)
    except FileNotFoundError:
        raise FileNotFoundError(f"Could not find '{filename}'. Please ensure data_prep.py ran successfully.")

def get_poster_url(movie_title):
    """Fetches a movie poster URL from the TMDB API."""
    if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY":
        return POSTER_PLACEHOLDER
    
    try:
        search_title = movie_title
        year = None
        if '(' in movie_title and ')' in movie_title:
            year_str = movie_title[movie_title.rfind('(')+1:movie_title.rfind(')')]
            if year_str.isdigit():
                year = int(year_str)
                search_title = movie_title[:movie_title.rfind('(')].strip()

        search_params = {'api_key': TMDB_API_KEY, 'query': search_title}
        if year:
            search_params['year'] = year
            
        response = requests.get("https://api.themoviedb.org/3/search/movie", 
                              params=search_params, timeout=3)
        response.raise_for_status()
        data = response.json()
        
        if data['results'] and data['results'][0].get('poster_path'):
            poster_path = data['results'][0].get('poster_path')
            return f"https://image.tmdb.org/t/p/w500{poster_path}"

    except requests.exceptions.RequestException as e:
        print(f"TMDB API call failed: {e}")
    
    return POSTER_PLACEHOLDER

# Load data at startup
print("Loading pre-computed data...")
persona_archetypes = load_data('persona_archetypes.json')
home_recommendations = load_data('home_recommendations.json')
cached_hooks = load_data('cached_hooks.json')
movie_metadata = load_data('movie_metadata.json')
top_similar_items = load_data('top_similar_items.pkl', is_pickle=True)
user_avg_ratings = load_data('user_avg_ratings.json')
print(f"✅ Loaded pre-computed similarity lookups for {len(top_similar_items)} movies")

# --- LLM EXPLANATION FUNCTION ---
def call_llm_with_fallback(full_prompt, movie_data, persona_name):
    """Calls the LLM using LiteLLM and provides a fallback explanation."""
    try:
        print(f"DEBUG: Generating explanation for '{movie_data.get('movie_title', 'Unknown')}' using model: {LLM_MODEL_NAME}")
        start_ts = time.time()
        response = litellm.completion(
            model=LLM_MODEL_NAME,
            messages=[{"role": "user", "content": full_prompt}],
            max_tokens=120,
            temperature=0.1,
            stream=False
        )
        #print(f"DEBUG: Full LLM Response Object: {response}")
        print(f"DEBUG: Explanation generated in {time.time() - start_ts:.4f}s")
        report = response.choices[0].message.content
        print("--- LLM Response ---")
        print(report)
        print("--------------------")
        return report, False

    except Exception as e:
        error_message = f"LiteLLM API Error: {e}"
        print(f"⚠️ {error_message}")
        gr.Warning(error_message)
        # Fallback logic
        genres = movie_data.get('genres', ['Drama'])
        primary_genre = genres[0] if genres else "Drama"
        fallback_text = f"Recommended because you enjoyed similar {primary_genre} films. Highly rated by users with your preferences."
        return fallback_text, True

def generate_hook_on_the_fly(movie_title):
    """Generates a short, snappy hook for a movie using LiteLLM."""
    prompt = f"Generate a 5-10 word snappy, atmospheric hook for the movie: {movie_title}."
    try:
        print(f"DEBUG: Generating hook for '{movie_title}' using model: {LLM_MODEL_NAME}")
        start_ts = time.time()
        response = litellm.completion(
            model=LLM_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=25,
            temperature=0.7,
            stream=False,
        )
        #print(f"DEBUG: Full LLM Response Object: {response}")
        print(f"DEBUG: Hook generated in {time.time() - start_ts:.4f}s")
        hook = response.choices[0].message.content.strip().replace('"', '')
        return hook
    except Exception as e:
        print(f"LiteLLM API call for on-the-fly hook failed: {e}")
        return "A truly captivating film."

def generate_explanation(movie_id, persona_name, style):
    movie_id_str = str(movie_id)
    movie_info = movie_metadata[movie_id_str]
    style_injection = LOGIC_INJECTION if style == "Logic-Driven" else SOCIAL_INJECTION
    
    # Create a single, consolidated prompt to be more direct
    full_prompt = f"""You are a digital movie concierge explaining a recommendation to a user.
                    Your explanation must be exactly 3 sentences and a maximum of 60 words.
                    Be punchy and engaging. Do not start with 'Based on your history'.
                    {style_injection}

            Explain why the user with {persona_name} persona would like the movie '{movie_info['movie_title']}' 
            (Genres: {', '.join(movie_info['genres'])}). But do not mention the persona directly."""
    
    return call_llm_with_fallback(full_prompt, movie_info, persona_name)

css = """
body { background-color: #0f172a !important; color: #f8fafc; }
.gradio-container { max-width: 1280px !important; margin: 0 auto !important; }
.view-container { 
    padding: 20px; 
    border: none; 
    border-radius: 8px; 
    margin-top: 20px; 
    background: #1e293b; 
    box-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.5); 
}

.movie-card-container {
    background: #1e293b; 
    border-radius: 1rem; 
    overflow: hidden; 
    box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.3);
    display: flex !important;
    flex-direction: column !important;
    height: 100%;
    padding: 1rem;
    gap: 1rem;
    border: 1px solid #334155;
}
.movie-card-content { display: flex; flex-direction: column; flex-grow: 1; background-color: #1e293b;}
.poster-container { position: relative; aspect-ratio: 2/3; background: #0f172a; }
.poster-img { width: 100%; height: 100%; object-fit: cover; }
.movie-card-text-content {
    display: flex;
    flex-direction: column;
    flex-grow: 1;
}
.movie-title { 
    font-weight: 700; 
    font-size: 1.125rem; 
    color: #f8fafc; 
    flex-shrink: 0;
    height: 2.5em; 
    line-height: 1.25em; 
    overflow: hidden;
}
.movie-year { font-size: 0.875rem; color: #94a3b8; margin-top: 2px; flex-shrink: 0; }
.catchy-phrase { 
    font-style: italic; 
    color: #818cf8; 
    font-size: 1rem; 
    margin: 0.75rem 0; 
    text-align: left;
    height: 2.5em; 
    line-height: 1.2em;
    overflow: hidden;
}
.genre-pills-container { display: flex; flex-wrap: wrap; gap: 5px; margin-top: 0.5rem; flex-shrink: 0; }
.genre-pill { background: #334155; color: #e2e8f0; padding: 4px 8px; border-radius: 9999px; font-size: 0.7rem; font-weight: 500; }
.synthesis-pill { 
    position: absolute; top: 12px; left: 12px; 
    background: rgba(15, 23, 42, 0.8);
    color: white; padding: 4px 10px; 
    border-radius: 9999px; font-size: 12px; font-weight: 600;
    backdrop-filter: blur(4px); 
    border: 1px solid rgba(255, 255, 255, 0.1);
}

.gradio-button {
    background-color: #3b82f6 !important;
    color: white !important;
    border: none !important;
    border-radius: 0.5rem !important;
    font-weight: 600 !important;
    transition: background-color 0.2s ease-in-out !important;
    padding: 10px !important;
}
.gradio-button:hover { background-color: #2563eb !important; }

#persona_selection_view { 
    max-width: 800px; 
    margin: 40px auto !important; 
}

#persona_selection_view h2 { color: #f8fafc !important; }
#home_title h1, #home_title h2 { color: #f8fafc !important; }
#detail_view { padding: 20px; border: none; border-radius: 8px; margin-top: 20px; background: #1e293b; box-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.5); }
#detail_poster img { max-height: 450px; }
#detail_title h2 { color: #f8fafc !important; }
#detail_explanation p { color: #e2e8f0 !important; }
#detail_explanation { border: 1px solid #334155; padding: 1rem; border-radius: 0.5rem; background: #0f172a; }

#detail_view label {
    color: white !important;
    font-weight: 600 !important;
    font-size: 1rem !important;
    padding: 0.5rem 0.5rem !important;
}

#detail_view .gr-radio {
    margin: 1rem 0 !important;
    padding: 1.5rem !important;
}
"""

def generate_card_html(movie_data, latency_s=0.02, is_live=False):
    """Generates HTML for a single movie card."""
    badge_text = f"⚡️ Synthesis: {latency_s:.2f}s"
    genres_html = "".join([f'<span class="genre-pill">{g}</span>' 
                          for g in movie_data.get('genres', [])[:5]])
    return f"""
    <div class="movie-card-content">
        <div class="poster-container">
            <img src="{movie_data['poster']}" class="poster-img" />
            <div class="synthesis-pill">{badge_text}</div>
        </div>
        <div class="movie-card-text-content">
            <div class="movie-title">{movie_data['title']}</div>
            <div class="genre-pills-container">{genres_html}</div>
            <div class="catchy-phrase">"{movie_data.get('hook', 'A must-see film.')}"</div>
        </div>
    </div>
    """

def update_home_cards_ui(persona_name, recommendations, is_live=False, time_taken=0.02):
    """Updates UI with movie cards for given recommendations."""
    updates = []
    hooks = cached_hooks.get(persona_name, {}) if not is_live else {}

    for i in range(6):
        if i < len(recommendations):
            rec = recommendations[i]
            movie_id = str(rec['movie_id'])
            movie_info = movie_metadata[movie_id]
            card_data = {
                "title": movie_info['movie_title'],
                "year": movie_info.get('year', ''),
                "genres": movie_info.get('genres', []),
                "poster": get_poster_url(movie_info['movie_title']),
                "hook": hooks.get(movie_id, "A great movie awaits!")
            }
            if is_live:
                card_data["hook"] = rec.get('hook', "A movie worth watching!")
            html_content = generate_card_html(card_data, time_taken, is_live)
            updates.append(gr.update(value=html_content, visible=True))
            updates.append(gr.update(visible=True))  # Show button
            updates.append(movie_id)
        else:
            updates.append(gr.update(visible=False))
            updates.append(gr.update(visible=False))  # Hide button
            updates.append(None)
    return updates



# --- UI Construction ---
with gr.Blocks(css=css, js="() => document.body.classList.add('dark')") as demo:
    selected_persona = gr.State(None)
    selected_movie_id = gr.State(None)

    
    gr.Markdown("# 🎬 AI-Powered Explainable Recommendations")

    with gr.Column(visible=True, elem_id="persona_selection_view", elem_classes="view-container") as persona_view:
        gr.Markdown("## Choose a Persona")
        persona_buttons = [gr.Button(f"View Recommendations for {name}") 
                          for name in persona_archetypes.keys()]
        gr.Markdown("""
            <div style="margin-top: 1.5rem; padding: 1rem; background-color: #f1f5f9; border-radius: 0.75rem; border: 1px solid #e2e8f0;">
                <p style="color: #475569; font-size: 0.9rem; margin-bottom: 0.5rem; line-height: 1.5;">
                    <strong style="color: #1e293b;">Note:</strong> Since this demo doesn't include user accounts, we use <b style="color: #1e293b;">User Personas</b> to showcase personalized recommendations. Each persona represents a unique set of movie preferences.
                </p>
                <a href="https://blogs-amoydutta.hashnode.dev/explainable-recommendations-in-practice-a-demo-driven-approach" target="_blank" style="color: #4f46e5; font-size: 0.9rem; font-weight: 600; text-decoration: none;">
                    Learn more about how this demo works in our blog →
                </a>
            </div>
        """)

    with gr.Column(visible=False, elem_id="home_view", 
                   elem_classes="view-container") as home_view:
        with gr.Row():
            back_to_persona_button = gr.Button("⬅️ Change Persona")
            home_title = gr.Markdown(elem_id="home_title")

        all_genres = sorted(list(set(g for m in movie_metadata.values() for g in m['genres'])))
        all_genres.insert(0, "All (Cached Home Page)")
        genre_dropdown = gr.Dropdown(all_genres, label="Select Genre", 
                                    value="All (Cached Home Page)")

        with gr.Column(elem_id="recommendations_grid") as recommendations_grid:
            home_cards = []
            for i in range(2):
                with gr.Row():
                    for j in range(3):
                        with gr.Column(elem_classes="movie-card-container"):
                            html = gr.HTML()
                            btn = gr.Button("⚡ See Full Explanation")
                            movie_id_state = gr.State(None)
                            home_cards.append({"html": html, "btn": btn, "id": movie_id_state})

        no_recommendations_message = gr.Markdown("""
        <div style="width: 100%; display: flex; justify-content: center; align-items: center; padding: 4rem 0;">
            <div style="text-align: center;">
                <h3 style="color: #1e293b;">🤷&nbsp; No recommendations found</h3>
                <p style="color: #64748b; font-size: 0.875rem;">Try selecting a different genre for this persona.</p>
            </div>
        </div>
""", visible=False)

    with gr.Column(visible=False, elem_id="detail_view", 
                   elem_classes="view-container") as detail_view:
        back_button = gr.Button("⬅️ Back to Recommendations")
        with gr.Row():
            detail_poster = gr.Image(POSTER_PLACEHOLDER, show_label=False, elem_id="detail_poster")
            with gr.Column():
                detail_title = gr.Markdown(elem_id="detail_title")
                detail_explanation = gr.Markdown(elem_id="detail_explanation", label="The 'Why'")
                narrative_style = gr.Radio(["Logic-Driven", "Social-Behavioral"], 
                                          label="Narrative Style", value="Logic-Driven")
                detail_fallback_info = gr.Markdown(visible=False, elem_id="detail_fallback", 
                                                  value="⚠️ LLM explanation failed or is disabled. Showing template-based reason.")

    # Build output list
    all_card_outputs = [item for card in home_cards for item in (card['html'], card['btn'], card['id'])]
    home_page_outputs = [persona_view, home_view, detail_view, selected_persona, home_title] + all_card_outputs
    detail_outputs = [home_view, detail_view, selected_movie_id, detail_title, 
                     detail_poster, detail_explanation, detail_fallback_info]

    def select_persona_and_render_home(persona_name):
        print(f'Loading Home page for persona: {persona_name}')
        start_time = time.time()
        recs = home_recommendations.get(persona_name, [])
        home_page_updates = [gr.update(visible=False), gr.update(visible=True), 
                            gr.update(visible=False), persona_name, f"# {persona_name}'s Recommendations"]
        card_updates = update_home_cards_ui(persona_name, recs)
        time_taken = time.time() - start_time
        print(f"Home page for '{persona_name}' rendered in {time_taken:.4f}s ({time_taken * 1000:.2f}ms)")
        return home_page_updates + card_updates

    def filter_by_genre(genre, persona_name):
        """Filter recommendations by genre using Layer 3 normalization.
        
        Formula: R = Σ(Similarity × Normalized_Rating) / Σ|Similarity|
        - Uses specialist centroid's consolidated history (normalized ratings)
        - Properly normalizes by total similarity to prevent hub-movie inflation
        """
        print(f"Starting genre filter for '{genre}'...")
        start_time = time.time()
        # Handle cached home page separately
        if genre == "All (Cached Home Page)":
            recs = home_recommendations.get(persona_name, [])
            title = f"# {persona_name}'s Recommendations"
            card_updates = update_home_cards_ui(persona_name, recs)
            print(f"Genre filter '{genre}' completed in {time.time() - start_time:.4f}s (cached)")
            return [title, gr.update(visible=True), gr.update(visible=False)] + card_updates
        
        if not persona_name:
            return ["# Error: No persona selected", gr.update(visible=False), gr.update(visible=False)] + [gr.update(visible=False)] * 18
        
        archetype = persona_archetypes[persona_name]
        history = archetype['consolidated_history']  # NORMALIZED ratings (-2 to +2 scale)
        candidate_numerators = {}
        candidate_denominators = {}
        
        for m_id_str, rating in history.items():
            if rating < 0.5: continue
            sims = top_similar_items.get(str(m_id_str), {})
            for sim_id, score in sims.items():
                if sim_id in history: continue
                meta = movie_metadata.get(sim_id)
                if meta and genre in meta['genres']:
                    candidate_numerators[sim_id] = candidate_numerators.get(sim_id, 0) + (score * rating)
                    candidate_denominators[sim_id] = candidate_denominators.get(sim_id, 0) + score

        candidate_scores = {
            m_id: candidate_numerators[m_id] / candidate_denominators[m_id] 
            for m_id in candidate_numerators if candidate_denominators.get(m_id, 0) > 0
        }

        sorted_ids = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)[:6]
        time_taken = time.time() - start_time
        print(f"Genre filter '{genre}' recoomendations completed in {time_taken:.4f}s")
        
        # Generate live recommendations with on-the-fly hooks
        print(f"Starting hook generation for live recommendations for genre {genre}...")
        start_time = time.time()
        
        live_recs = []
        for m_id, _ in sorted_ids:
            title = movie_metadata[m_id]['movie_title']
            hook = generate_hook_on_the_fly(title)
            live_recs.append({
                "movie_id": int(m_id), 
                "movie_title": title,
                "hook": hook
            })
            time.sleep(0.5) # Gentle pause to prevent HF Backend Error 40001
        
        time_taken = time.time() - start_time
        print(f"Hook generation for genre {genre} completed in {time_taken:.4f}s")
        
        
        title_update = f"# {genre} Recommendations for {persona_name}"
        
        # Handle no recommendations
        if not live_recs:
            return [title_update, gr.update(visible=False), gr.update(visible=True)] + [gr.update()] * 18

        card_updates = update_home_cards_ui(persona_name, live_recs, is_live=True, time_taken=time_taken)
        return [title_update, gr.update(visible=True), gr.update(visible=False)] + card_updates

    def transition_to_detail_view(movie_id):
        if not movie_id:
            return {k: gr.update() for k in detail_outputs}
            
        movie_info = movie_metadata[str(movie_id)]
        poster_url = get_poster_url(movie_info['movie_title'])
        
        # Loading spinner HTML
        loading_html = """
        <div style="display: flex; align-items: center; justify-content: center; min-height: 200px; width: 100%; flex-direction: column; gap: 1rem; padding: 2rem;">
            <div class="loader"></div>
            <p style="color: #1e293b; font-size: 1.1rem; font-weight: 600;">Generating personalized explanation...</p>
        </div>
        <style>
        .loader {
            border: 4px solid #f3f3f3;
            border-top: 4px solid #3b82f6;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
        </style>
        """
        
        return {
            home_view: gr.update(visible=False), 
            detail_view: gr.update(visible=True), 
            selected_movie_id: movie_id, 
            detail_title: f"## {movie_info['movie_title']}", 
            detail_poster: poster_url, 
            detail_explanation: loading_html, 
            detail_fallback_info: gr.update(visible=False)
        }

    def generate_final_explanation(movie_id, persona_name, style):
        if not movie_id:
             return "", gr.update(visible=False)
        explanation, is_fallback = generate_explanation(movie_id, persona_name, style)
        return explanation, gr.update(visible=is_fallback)

    def back_to_home():
        return {home_view: gr.update(visible=True), detail_view: gr.update(visible=False)}

    def back_to_persona_selection():
        return {persona_view: gr.update(visible=True), home_view: gr.update(visible=False)}

    # Event wiring: Persona buttons
    for i, button in enumerate(persona_buttons):
        persona_name = list(persona_archetypes.keys())[i]
        button.click(lambda name=persona_name: select_persona_and_render_home(name), 
                    inputs=[], outputs=home_page_outputs)

    # Event wiring: Genre filter
    genre_dropdown.change(filter_by_genre, 
                         inputs=[genre_dropdown, selected_persona], 
                         outputs=[home_title, recommendations_grid, no_recommendations_message] + all_card_outputs)
    
    # Event wiring: "See Full Explanation" buttons
    for card in home_cards:
        card['btn'].click(transition_to_detail_view, 
                         inputs=[card['id']], 
                         outputs=detail_outputs
        ).then(generate_final_explanation, 
               inputs=[card['id'], selected_persona, narrative_style], 
               outputs=[detail_explanation, detail_fallback_info])
    
    # Event wiring: Narrative style change
    def on_style_change(style, movie_id, persona_name):
        """Callback to regenerate explanation when the narrative style changes."""
        if not movie_id:
            return "", gr.update(visible=False)
        explanation, is_fallback = generate_explanation(movie_id, persona_name, style)
        return explanation, gr.update(visible=is_fallback)

    narrative_style.change(on_style_change, 
                          [narrative_style, selected_movie_id, selected_persona], 
                          [detail_explanation, detail_fallback_info])
    
    # Event wiring: Back buttons
    back_button.click(back_to_home, [], [home_view, detail_view])
    back_to_persona_button.click(back_to_persona_selection, [], [persona_view, home_view])

if __name__ == "__main__":
    demo.launch(debug=True)