Spaces:

MLBench
/

getscenes

Running

File size: 33,356 Bytes

import gradio as gr
import openai
import json
from datetime import datetime, timedelta
import uuid
from typing import Dict

from config import OPENAI_API_KEY, DB_PATH, EMBED_MODEL
from utils import get_embedding, cosine_similarity, find_top_k_matches
from scraper import scrape_workshops_from_squarespace
from database import (
    fetch_all_embeddings, 
    fetch_row_by_id, 
    fetch_all_faq_embeddings, 
    get_session_state, 
    update_session_state, 
    log_question
)

# ============================================================================
# CONFIGURATION
# ============================================================================

if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY not found in .env file")

openai.api_key = OPENAI_API_KEY


# Store session ID for the conversation
session_id = str(uuid.uuid4())

# Cache for workshop data and embeddings
workshop_cache = {
    'data': [],
    'embeddings': [],
    'last_updated': None,
    'cache_duration': timedelta(hours=24)
}

# ============================================================================
# KEYWORD LISTS FOR ROUTING
# ============================================================================

EMOTIONAL_KEYWORDS = [
    'stuck', 'frustrated', 'discouraged', 'overwhelmed', 'scared',
    'nervous', 'anxious', 'worried', 'fear', 'doubt', 'confidence',
    'insecure', 'lost', 'confused', 'struggling', 'hard time',
    'giving up', 'burnout', 'rejection', 'failed', 'can\'t',
    'feeling', 'feel', 'emotional', 'depressed', 'sad', 'unmotivated',
    'hopeless', 'stressed', 'pressure', 'imposter'
]

ACTION_KEYWORDS = [
    'get an agent', 'find agent', 'need agent', 'want agent', 'sign with agent',
    'more auditions', 'book', 'booking', 'callbacks', 'improve',
    'better', 'self-tape', 'materials', 'headshots', 'reel',
    'network', 'connections', 'industry', 'career', 'strategy',
    'agent prep', 'total agent prep', 'workshop', 'class', 'training',
    'results', 'success', 'grow', 'advance', 'level up'
]

POLICY_KEYWORDS = [
    'refund', 'refunds', 'money back',
    'attend', 'attendance', 'miss', 'missed', 'missing', 'absent',
    'late', 'lateness', 'tardy',
    'reschedule', 'change date', 'move class',
    'credit', 'credits',
    'cancel', 'cancellation', 'canceling',
    'policy', 'policies'
]

DETAIL_SYNONYMS = [
    'detail', 'details', 'explain', 'elaborate', 'tell me more', 
    'more info', 'describe', 'thorough', 'comprehensive'
]

PERSONA_INSTRUCTION = """
You are a warm, encouraging mentor at Get Scene Studios. Your goal is to help actors navigate their careers with confidence.
- Sound natural and human, not scripted or robotic. Use conversational transitions like "I'd suggest starting with..." or "A great way to approach this is..."
- Be encouraging but practical. Acknowledge that the acting journey is a marathon, not a sprint.
- Help the user THINK: Instead of just giving an answer, add a brief "mentorship flourish" that explains the value of a recommendation (e.g., "This workshop is great because it gets you comfortable with the pressure of a real callback.")
"""

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def calculate_workshop_confidence(w: Dict) -> float:
    """Calculate confidence score of retrieved workshop data"""
    score = 0.0
    if w.get('title'): score += 0.3
    if w.get('instructor_name'): score += 0.3
    if w.get('date'): score += 0.2
    if w.get('time'): score += 0.1
    if w.get('source_url'): score += 0.1
    return round(score, 2)

# ============================================================================
# WORKSHOP FUNCTIONS
# ============================================================================

def get_current_workshops():
    """Get current workshops with caching"""
    global workshop_cache
    
    now = datetime.now()
    
    # Check if cache is still valid
    if (workshop_cache['last_updated'] and 
        now - workshop_cache['last_updated'] < workshop_cache['cache_duration'] and
        workshop_cache['data']):
        print("Using cached workshop data")
        return workshop_cache['data'], workshop_cache['embeddings']
    
    print("Fetching fresh workshop data...")
    
    # Use robust Squarespace scraping system
    online_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/online")
    instudio_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/instudio")
    
    all_workshops = online_workshops + instudio_workshops
    
    # Data Integrity: Validate and score workshops
    valid_workshops = []
    total_score = 0
    for w in all_workshops:
        conf = calculate_workshop_confidence(w)
        if conf >= 0.8:
            valid_workshops.append(w)
            total_score += conf
        else:
            print(f"⚠️ Rejecting weak record (Confidence: {conf}): {w.get('title', 'Unknown')}", flush=True)
    
    avg_conf = total_score / len(valid_workshops) if valid_workshops else 0
    print(f"📊 DATA INTEGRITY: Found {len(all_workshops)} total, {len(valid_workshops)} valid (Confidence >= 0.8)", flush=True)
    print(f"📈 Retrieval Confidence: {avg_conf:.2f} (Average)", flush=True)
    
    all_workshops = valid_workshops
    
    if not all_workshops:
        if workshop_cache['data']:
            print("Scraping failed, using cached data")
            return workshop_cache['data'], workshop_cache['embeddings']
        else:
            print("No workshop data available")
            return [], []
    
    # Generate embeddings for workshops
    workshop_embeddings = []
    for workshop in all_workshops:
        try:
            embedding = get_embedding(workshop['full_text'])
            workshop_embeddings.append(embedding)
        except Exception as e:
            print(f"Error generating embedding for workshop: {e}")
            workshop_embeddings.append([0] * 1536)
    
    # Update cache
    workshop_cache['data'] = all_workshops
    workshop_cache['embeddings'] = workshop_embeddings
    workshop_cache['last_updated'] = now
    
    print(f"Cached {len(all_workshops)} workshops")
    return all_workshops, workshop_embeddings

def find_top_workshops(user_embedding, k=3):
    """Find top matching workshops using real-time data"""
    workshops, workshop_embeddings = get_current_workshops()
    
    if not workshops:
        return []
    
    scored = []
    for i, (workshop, emb) in enumerate(zip(workshops, workshop_embeddings)):
        try:
            score = cosine_similarity(user_embedding, emb)
            scored.append((score, i, workshop['full_text'], workshop))
        except Exception as e:
            print(f"Error calculating similarity: {e}")
            continue
    
    scored.sort(reverse=True)
    return scored[:k]

# ============================================================================
# PROMPT BUILDING FUNCTIONS
# ============================================================================

def generate_enriched_links(row):
    base_url = row.get("youtube_url")
    guest_name = row.get("guest_name", "")
    highlights = json.loads(row.get("highlight_json", "[]"))
    summary = highlights[0]["summary"] if highlights else ""
    
    # Truncate summary to first sentence only
    if summary:
        first_sentence = summary.split('.')[0] + '.'
        
        if len(first_sentence) > 120:
            short_summary = first_sentence[:117] + "..."
        else:
            short_summary = first_sentence
    else:
        short_summary = "Industry insights for actors"
    
    markdown = f"🎧 [Watch {guest_name}'s episode here]({base_url}) - {short_summary}"
    return [markdown]

def build_enhanced_prompt(user_question, context_results, top_workshops, user_preference=None, enriched_podcast_links=None, wants_details=False, current_topic=None):
    """Builds the system prompt with strict formatting rules."""
    
    # Free classes are ONLY available online (never in-studio)
    free_class_url = "https://www.getscenestudios.com/online"
    
    # helper for clean links
    def format_workshop(w):
        if not w.get('title') or not w.get('instructor_name') or not w.get('date'):
            return None
            
        link = "https://www.getscenestudios.com/instudio" if "/instudio" in w.get('source_url', '') else "https://www.getscenestudios.com/online"
        
        # User Preference Filtering
        w_type = "Online" if "online" in w.get('source_url', '') else "In-Studio"
        if user_preference:
            if user_preference.lower() != w_type.lower():
                return None

        # Calculate confidence using logic (already present in HF app.py at line 89)
        confidence = calculate_workshop_confidence(w)
        if confidence < 0.70:
            return None

        # R2: Force format inclusion into the title link for robustness
        display_title = f"{w['title']} ({w_type})"
        return f"- [{display_title}]({link}) with {w['instructor_name']} on {w['date']} at {w.get('time', '')}"

    # Prepare workshop list (Top 3 max to display, but check top 10 for better filtering)
    workshop_lines = []
    if top_workshops:
        for _, _, _, w_data in top_workshops[:10]:  # Check top 10, take top 3 valid after filtering
            formatted = format_workshop(w_data)
            if formatted:
                workshop_lines.append(formatted)
                

    workshop_text = ""
    if workshop_lines:
        workshop_text = "\n".join(workshop_lines[:3])
    else:
        # Improved fallback to avoid generic/placeholder-like feeling
        label = f"{user_preference.capitalize()} " if user_preference else ""
        link = "https://www.getscenestudios.com/online" if user_preference == 'online' else "https://www.getscenestudios.com/instudio" if user_preference == 'instudio' else "https://www.getscenestudios.com/online"
        workshop_text = f"We are constantly updating our schedule! Check our current {label}availability and latest workshops at {link}"
    
    # Handle missing podcast data strictly
    if not enriched_podcast_links:
        single_podcast = "Our latest industry insights are available on YouTube: https://www.youtube.com/@GetSceneStudios"
    else:
        single_podcast = enriched_podcast_links[0]
    
    # --- EMOTIONAL / SUPPORT MODE CHECK ---
    is_emotional = detect_response_type(user_question) == "support"
    
    if is_emotional:
        prompt = f"""{PERSONA_INSTRUCTION}

You are acting in SUPPORT MODE. 
        
CRITICAL INSTRUCTIONS:
1. ACKNOWLEDGE their feelings first (e.g., "I hear how frustrating it is to feel stuck...").
2. Provide SUPPORTIVE language (2-3 sentences max).
3. Offer EXACTLY ONE gentle follow-up resource: either the podcast OR the free class. 
4. DO NOT suggest paid workshops or upsell in this response.
5. KEEP IT BRIEF (≤150 words).

USER'S QUESTION: {user_question}

REQUIRED RESPONSE FORMAT:
[Your empathetic, supportive acknowledgment]

Here's a free resource that might help you move forward:
[Pick ONE: {single_podcast} OR Free Class at {free_class_url}]

Questions? Contact info@getscenestudios.com"""
        return prompt

    # --- STANDARD LOGIC FOR CONTEXT SNIPPET ---
    question_lower = user_question.lower()
    context_snippet = ""
    
    # Priority 1: Direct Keywords in current question
    detected_topic = None
    if any(word in question_lower for word in ['agent', 'representation', 'rep', 'manager']):
        detected_topic = 'agent'
    elif any(word in question_lower for word in ['beginner', 'new', 'start', 'beginning']):
        detected_topic = 'beginner'
    elif any(word in question_lower for word in ['callback', 'audition', 'tape', 'self-tape', 'booking']):
        detected_topic = 'audition'
    elif any(word in question_lower for word in ['mentorship', 'coaching']):
        detected_topic = 'mentorship'
    elif any(word in question_lower for word in ['price', 'cost', 'how much']):
        detected_topic = 'pricing'
    elif any(word in question_lower for word in ['class', 'workshop', 'training', 'learn']):
        detected_topic = 'classes'
    elif any(word in question_lower for word in ['membership', 'gsp', 'plus']):
        detected_topic = 'membership'
        
    # Priority 2: Fallback to session context if current question is ambiguous
    if not detected_topic and current_topic:
        topic_map = {
            'agent_seeking': 'agent',
            'beginner': 'beginner',
            'audition_help': 'audition',
            'mentorship': 'mentorship',
            'pricing': 'pricing',
            'classes': 'classes',
            'membership': 'membership'
        }
        detected_topic = topic_map.get(current_topic)

    # Assign snippet based on topic
    if detected_topic == 'agent':
        context_snippet = "Get Scene Studios has helped 1000+ actors land representation. Total Agent Prep offers live practice with working agents (age 16+, limited to 12 actors)."
    elif detected_topic == 'beginner':
        context_snippet = "Get Scene Studios specializes in getting actors audition-ready fast with camera technique and professional self-tape skills."
    elif detected_topic == 'audition':
        context_snippet = "Get Scene offers Crush the Callback (Zoom simulation) and Perfect Submission (self-tape mastery) for actors refining their technique."
    elif detected_topic == 'mentorship':
        context_snippet = "Working Actor Mentorship is a 6-month program ($3,000) with structured feedback and industry access."
    elif detected_topic == 'pricing':
        context_snippet = "Get Scene Studios pricing varies by program. Most workshops cap at 12-14 actors for personalized feedback."
    elif detected_topic == 'classes':
         link = "https://www.getscenestudios.com/online" if user_preference == 'online' else "https://www.getscenestudios.com/instudio"
         context_snippet = f"Get Scene Studios offers world-class {user_preference or ''} acting workshops. Our sessions focus on camera technique and industry readiness. Full details at {link}."
    elif detected_topic == 'membership':
         context_snippet = "Get Scene Plus (GSP) is our membership program that provides ongoing access to industry pros and audition insights."
    else:
        context_snippet = "Get Scene Studios (founded by Jesse Malinowski) offers training for TV/film actors at all levels."

    preference_instruction = ""
    if not user_preference:
        preference_instruction = """
IMPORTANT: We need to know if the user prefers "Online" or "In-Studio" workshops.
If their question implies a location or they haven't specified, ask: "Are you looking for Online or In-Studio training?" as part of your response.
"""
    else:
         preference_instruction = f"""
USER PREFERENCE KNOWN: {user_preference.upper()}
1. DO NOT ask "Online or In-Studio" again.
2. Ensure your recommendations align with {user_preference.upper()} where possible.
"""

    # Brevity & Cognitive Load: Direct instructions based on user intent
    detail_instruction = "Answer the user's question briefly (2-3 sentences max, ≤150 words total)."
    if wants_details:
        target = f" regarding {detected_topic or 'the current recommendations'}"
        detail_instruction = f"Provide a detailed and thorough explanation for the user's request{target}. Focus on being helpful and providing deep value as a mentor."

    prompt = f"""{PERSONA_INSTRUCTION}

{context_snippet}
 
CRITICAL INSTRUCTIONS:
- {detail_instruction}
- Use natural, human transitions between your answer and the recommendations.
- For each recommendation, add a tiny bit of "mentor advice" on why it helps.
- Then ALWAYS provide exactly these three numbered recommendations (1. 2. 3.):
- Use ONLY the provided links - do not invent recommendations
- Every workshop Title MUST be followed by its format in parentheses, e.g., "Workshop Name (Online)" or "Workshop Name (In-Studio)".
- Focus on clean, readable formatting.{preference_instruction}
 
USER'S QUESTION: {user_question}
 
REQUIRED RESPONSE FORMAT:
[Your brief answer to their question, ≤150 words total]
 
Here's your path forward:
1. Free class (start here, no credit card required): {free_class_url}
2. Recommended podcast episode:
{single_podcast}
3. Relevant paid workshop:
{workshop_text}
 
Questions? Contact info@getscenestudios.com"""
    
    return prompt

# ============================================================================
# DETECTION FUNCTIONS
# ============================================================================

def detect_question_category(question):
    """Categorize user questions for better context injection"""
    question_lower = question.lower()
    
    categories = {
        'agent_seeking': ['agent', 'representation', 'rep', 'manager', 'get an agent'],
        'beginner': ['beginner', 'new', 'start', 'beginning', 'first time', 'never acted'],
        'audition_help': ['audition', 'callback', 'tape', 'self-tape', 'submission'],
        'mentorship': ['mentorship', 'coaching', 'intensive', 'mentor', 'one-on-one'],
        'pricing': ['price', 'cost', 'pricing', '$', 'money', 'payment', 'fee'],
        'classes': ['class', 'workshop', 'training', 'course', 'learn'],
        'membership': ['membership', 'join', 'member', 'gsp', 'plus'],
        'technical': ['self-tape', 'equipment', 'lighting', 'editing', 'camera']
    }
    
    detected = []
    for category, keywords in categories.items():
        if any(keyword in question_lower for keyword in keywords):
            detected.append(category)
    
    return detected

def detect_response_type(question):
    """Detect if question is emotional/support vs action/results oriented"""
    question_lower = question.lower()
    
    emotional_count = sum(1 for word in EMOTIONAL_KEYWORDS if word in question_lower)
    action_count = sum(1 for word in ACTION_KEYWORDS if word in question_lower)
    
    if emotional_count > 0 and emotional_count >= action_count:
        return "support"
    return "standard"

def detect_policy_issue(question):
    """Detect if question violates hard policy rules (refunds, attendance, etc.) using word boundaries"""
    import re
    question_lower = question.lower()
    for word in POLICY_KEYWORDS:
        # User regex word boundaries to prevent substring matches (e.g., 'submission' matching 'miss')
        pattern = rf'\b{re.escape(word)}\b'
        if re.search(pattern, question_lower):
            return True
    return False

def detect_preference(question):
    """Detect if user is stating a preference"""
    q_lower = question.lower()
    if 'online' in q_lower and 'studio' not in q_lower:
        return 'online'
    if ('studio' in q_lower or 'person' in q_lower or 'atlanta' in q_lower) and 'online' not in q_lower:
        return 'instudio'
    return None

def get_contextual_business_info(categories):
    """Return relevant business information based on detected question categories"""
    
    context_map = {
        'agent_seeking': {
            'programs': ['Total Agent Prep', 'Working Actor Mentorship'],
            'key_info': 'Live pitch practice with real agents, Actors Access optimization',
            'journey': 'Total Agent Prep → GSP → Mentorship for sustained progress'
        },
        'beginner': {
            'programs': ['Free Classes', 'Get Scene 360', 'Get Scene Plus'],
            'key_info': 'Start with holistic foundation, build consistency',
            'journey': 'Free class → Get Scene 360 → GSP membership'
        },
        'audition_help': {
            'programs': ['Perfect Submission', 'Crush the Callback', 'Audition Insight'],
            'key_info': 'Self-tape mastery, callback simulation, pro feedback',
            'journey': 'Perfect Submission → GSP for ongoing Audition Insight'
        },
        'mentorship': {
            'programs': ['Working Actor Mentorship'],
            'key_info': '6-month intensive with structured feedback and accountability',
            'journey': 'Ready for commitment → WAM → Advanced workshops'
        }
    }
    
    relevant_info = {}
    for category in categories:
        if category in context_map:
            relevant_info[category] = context_map[category]
    
    return relevant_info

# ============================================================================
# MAIN CHATBOT LOGIC
# ============================================================================

def update_knowledge_from_question(session_id: str, question: str):
    """Extract attributes and update knowledge dictionary"""
    updates = {}
    
    # Extract Format
    pref = detect_preference(question)
    if pref:
        updates['format'] = pref
        
    # Extract Topic
    cats = detect_question_category(question)
    if cats:
        # Prioritize specific topics over generic ones
        priority_topics = ['agent_seeking', 'beginner', 'audition_help', 'mentorship', 'pricing']
        for topic in priority_topics:
            if topic in cats:
                updates['topic'] = topic
                break
        if 'topic' not in updates and cats:
             updates['topic'] = cats[0]

    if updates:
        update_session_state(session_id, knowledge_update=updates, increment_count=False)
        return updates
    return {}

def process_question(question: str, current_session_id: str):
    """Main function to process user questions - replaces Flask /ask endpoint"""
    
    if not question:
        return "Question is required"

    # 0. HARD POLICY CHECK
    if detect_policy_issue(question):
        log_question(question, current_session_id)
        
        return "Please email info@getscenestudios.com."

    # 1. Handle Session & Knowledge State
    update_knowledge_from_question(current_session_id, question)
    
    session_state = get_session_state(current_session_id)
    
    try:
        knowledge = json.loads(session_state.get('knowledge_context', '{}'))
    except:
        knowledge = {}
        
    user_preference = knowledge.get('format')
    current_topic = knowledge.get('topic')
    
    if not user_preference:
        user_preference = session_state.get('preference')
    
    update_session_state(current_session_id, increment_count=True)

    # Create embedding of user question
    user_embedding = get_embedding(question)

    # Check FAQ embeddings first
    faq_data = fetch_all_faq_embeddings()
    top_faqs = []

    for entry_id, question_text, answer_text, emb in faq_data:
        score = cosine_similarity(user_embedding, emb)
        top_faqs.append((score, entry_id, question_text, answer_text))
    top_faqs.sort(reverse=True)

    faq_threshold = 0.50       # Lowered from 0.85 to capture direct matches better
    ambiguous_threshold = 0.60 # Lowered from 0.70

    # If high-confidence FAQ match found
    if top_faqs and top_faqs[0][0] >= faq_threshold:
        update_session_state(current_session_id, reset_clarification=True, increment_count=False)
        
        best_score, faq_id, question_text, answer_text = top_faqs[0]
        
        mentor_framing_start = "That's a great question! Here's the information on that:"
        mentor_framing_end = "I hope that clears things up! Remember, every bit of knowledge helps you steer your career in the right direction."
        
        enhanced_answer = f"{mentor_framing_start}\n\n{answer_text}"
        
        # R5: Policy Guard for FAQ answers using word boundaries
        # policy_violation = False
        # import re
        # for word in POLICY_KEYWORDS:
        #     pattern = rf'\b{re.escape(word)}\b'
        #     if re.search(pattern, enhanced_answer.lower()):
        #         policy_violation = True
        #         break
                
        # if policy_violation:
        #     enhanced_answer = "Please email info@getscenestudios.com for assistance with this."
        # else:
        categories = detect_question_category(question)
        contextual_info = get_contextual_business_info(categories)
            
        if contextual_info:
            next_steps = []
            for category, info in contextual_info.items():
                next_steps.append(f"A great next step for you: {info['journey']}")
                
            if next_steps:
                enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
                    
            enhanced_answer += f"\n\n{mentor_framing_end}\n\nQuestions? Contact info@getscenestudios.com"

        # Log question
        log_question(question, current_session_id, answer=enhanced_answer)

        return enhanced_answer
    
    elif top_faqs and top_faqs[0][0] >= ambiguous_threshold:
        # AMBIGUOUS ZONE
        needs_clarification = False
        best_match_q = top_faqs[0][2]
        
        # 1. Never clarify if the best match question is identical to the user question
        if question.lower().strip('?') == best_match_q.lower().strip('?'):
            needs_clarification = False
        else:
            # 2. Check Format logic (only if locational)
            is_locational = any(w in question.lower() for w in ['online', 'studio', 'person', 'atlanta', 'location', 'where'])
            if is_locational and not user_preference:
                 needs_clarification = True
                 
            # 3. Check Topic logic (only if generic)
            is_generic_query = any(w in question.lower() for w in ['price', 'cost', 'how much', 'schedule', 'when'])
            if is_generic_query and not current_topic:
                 needs_clarification = True
                 
            # 4. Force resolve if already asked once
            clarification_count = session_state.get('clarification_count', 0)
            if clarification_count > 0:
                needs_clarification = False 
            
        if needs_clarification:
            update_session_state(current_session_id, increment_clarification=True, increment_count=False)
            best_match_q = top_faqs[0][2]
            return f"Did you mean: {best_match_q}?"

        # Auto-Resolve
        update_session_state(current_session_id, reset_clarification=True, increment_count=False)
        
        best_score, faq_id, question_text, answer_text = top_faqs[0]
        
        categories = detect_question_category(question)
        contextual_info = get_contextual_business_info(categories)
        
        enhanced_answer = answer_text
        if contextual_info:
            next_steps = []
            for category, info in contextual_info.items():
                next_steps.append(f"Next step: Consider {info['journey']}")
            
            if next_steps:
                enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
                enhanced_answer += f"\n\nQuestions? Contact info@getscenestudios.com"
        
        log_question(question, current_session_id, answer=enhanced_answer)

        return enhanced_answer

    else:
        # 3. HALLUCINATION GUARD
        categories = detect_question_category(question)
        
        has_session_context = (current_topic is not None) or (user_preference is not None)
        
        FOLLOWUP_KEYWORDS = ['yes', 'no', 'sure', 'okay', 'thanks', 'thank you', 'please', 'go ahead', 'continue', 'more']
        is_acting_related = (
            len(categories) > 0 or 
            detect_response_type(question) == "support" or 
            any(k in question.lower() for k in ACTION_KEYWORDS) or
            any(k in question.lower() for k in DETAIL_SYNONYMS) or
            any(k in question.lower() for k in ['class', 'workshop', 'coaching', 'studio', 'acting', 'online', 'person', 'atlanta', 'training', 'prefer', 'preference', 'format', 'recommendation', 'online class', 'online workshop','instudio class','instudio workshop', 'actor', 'scene', 'audition', 'theatre', 'film', 'tv', 'commercial', 'agent', 'rep', 'manager']) or
            (has_session_context and any(k == question.lower().strip('.!') for k in FOLLOWUP_KEYWORDS))
        )
        
        if not is_acting_related:
            return "I'm not exactly sure about that. Please email info@getscenestudios.com so a member of our team can get you the most accurate answer!"
    
    # 4. LLM PATH
    update_session_state(current_session_id, reset_clarification=True, increment_count=False)
    podcast_data = fetch_all_embeddings("podcast_episodes")
    top_workshops = find_top_workshops(user_embedding, k=10)
    top_podcasts = find_top_k_matches(user_embedding, podcast_data, k=3)

    enriched_podcast_links = []
    for _, podcast_id, _ in top_podcasts:
        row = fetch_row_by_id("podcast_episodes", podcast_id)
        enriched_podcast_links.extend(generate_enriched_links(row))

    if not enriched_podcast_links:
        fallback = fetch_row_by_id("podcast_episodes", podcast_data[0][0])
        enriched_podcast_links = generate_enriched_links(fallback)

    # 5. Brevity & Detail Detection
    wants_details = any(syn in question.lower() for syn in DETAIL_SYNONYMS)
    
    final_prompt = build_enhanced_prompt(
        question, 
        None,
        top_workshops,
        user_preference=user_preference,
        enriched_podcast_links=enriched_podcast_links,
        wants_details=wants_details,
        current_topic=current_topic
    )

    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": final_prompt},
            {"role": "user", "content": question}
        ]
    )

    # Log question
    log_question(question, current_session_id)

    return response.choices[0].message.content.strip()

# ============================================================================
# GRADIO INTERFACE
# ============================================================================

def chat_with_bot(message, history):
    """
    Process message directly without Flask API
    
    Args:
        message: User's current message
        history: Chat history (list of message dictionaries)
    
    Returns:
        Updated history with new exchange
    """
    global session_id
    
    if not message.strip():
        return history
    
    try:
        # Process question directly
        bot_reply = process_question(message, session_id)
    except Exception as e:
        bot_reply = f"❌ Error: {str(e)}"
    
    # Append to history in Gradio 6.0 format
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": bot_reply})
    return history

def reset_session():
    """Reset session ID for new conversation"""
    global session_id
    session_id = str(uuid.uuid4())
    return [] #, f"🔄 New session started: {session_id[:8]}..."

# Create Gradio interface
with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
    
    gr.Markdown(
        """
        # 🎬 Get Scene Studios AI Chatbot
        
        Ask questions about acting classes, workshops and more!
        """
    )
    
    # # Session info display
    # session_info = gr.Textbox(
    #     label="Current Session ID",
    #     value=f"Session: {session_id[:8]}...",
    #     interactive=False,
    #     scale=1
    # )
    
    # Chatbot interface
    chatbot = gr.Chatbot(
        label="Conversation",
        height=500
    )
    
    # Input area
    with gr.Row():
        msg = gr.Textbox(
            label="Your Message",
            lines=2,
            scale=4
        )
        submit_btn = gr.Button("Send 📤", scale=1, variant="primary")
    
    # Action buttons
    with gr.Row():
        clear_btn = gr.Button("Clear Chat 🗑️", scale=1)
        reset_btn = gr.Button("New Session 🔄", scale=1)
    
    # Example questions
    # gr.Examples(
    #     examples=[
    #         "How much does it cost?",
    #         "I want to get an agent",
    #         "I'm a beginner, where should I start?",
    #         "Tell me about your workshops",
    #         "Do you have online classes?",
    #         "What's the difference between Perfect Submission and Crush the Callback?",
    #         "I prefer in-studio training",
    #         "Tell me about mentorship programs"
    #     ],
    #     inputs=msg,
    #     label="💡 Try these example questions:"
    # )
    
    # Event handlers
    submit_btn.click(
        fn=chat_with_bot,
        inputs=[msg, chatbot],
        outputs=[chatbot]
    ).then(
        fn=lambda: "",
        inputs=None,
        outputs=[msg]
    )
    
    msg.submit(
        fn=chat_with_bot,
        inputs=[msg, chatbot],
        outputs=[chatbot]
    ).then(
        fn=lambda: "",
        inputs=None,
        outputs=[msg]
    )
    
    clear_btn.click(
        fn=lambda: [],
        inputs=None,
        outputs=[chatbot]
    )
    
    reset_btn.click(
        fn=reset_session,
        inputs=None,
        outputs=[chatbot] #, session_info]
    )

# Launch the app
if __name__ == "__main__":
    print("\n" + "="*60)
    print("🎬 Get Scene Studios Chatbot")
    print("="*60)
    print("\n✅ No Flask API needed - all processing is done directly!")
    print("🌐 Gradio interface will open in your browser")
    print("="*60 + "\n")
    
    demo.launch()