File size: 33,356 Bytes
e7f736a 752c466 e7f736a 752c466 e7f736a c8fe250 e7f736a c8fe250 e7f736a c8fe250 e7f736a c8fe250 e7f736a d55ca03 e7f736a d55ca03 e7f736a d55ca03 752c466 e7f736a d55ca03 e7f736a d55ca03 e7f736a d55ca03 e7f736a 752c466 e7f736a 752c466 e7f736a 752c466 e7f736a d994162 e7f736a db358eb d994162 e7f736a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 |
import gradio as gr
import openai
import json
from datetime import datetime, timedelta
import uuid
from typing import Dict
from config import OPENAI_API_KEY, DB_PATH, EMBED_MODEL
from utils import get_embedding, cosine_similarity, find_top_k_matches
from scraper import scrape_workshops_from_squarespace
from database import (
fetch_all_embeddings,
fetch_row_by_id,
fetch_all_faq_embeddings,
get_session_state,
update_session_state,
log_question
)
# ============================================================================
# CONFIGURATION
# ============================================================================
if not OPENAI_API_KEY:
raise ValueError("OPENAI_API_KEY not found in .env file")
openai.api_key = OPENAI_API_KEY
# Store session ID for the conversation
session_id = str(uuid.uuid4())
# Cache for workshop data and embeddings
workshop_cache = {
'data': [],
'embeddings': [],
'last_updated': None,
'cache_duration': timedelta(hours=24)
}
# ============================================================================
# KEYWORD LISTS FOR ROUTING
# ============================================================================
EMOTIONAL_KEYWORDS = [
'stuck', 'frustrated', 'discouraged', 'overwhelmed', 'scared',
'nervous', 'anxious', 'worried', 'fear', 'doubt', 'confidence',
'insecure', 'lost', 'confused', 'struggling', 'hard time',
'giving up', 'burnout', 'rejection', 'failed', 'can\'t',
'feeling', 'feel', 'emotional', 'depressed', 'sad', 'unmotivated',
'hopeless', 'stressed', 'pressure', 'imposter'
]
ACTION_KEYWORDS = [
'get an agent', 'find agent', 'need agent', 'want agent', 'sign with agent',
'more auditions', 'book', 'booking', 'callbacks', 'improve',
'better', 'self-tape', 'materials', 'headshots', 'reel',
'network', 'connections', 'industry', 'career', 'strategy',
'agent prep', 'total agent prep', 'workshop', 'class', 'training',
'results', 'success', 'grow', 'advance', 'level up'
]
POLICY_KEYWORDS = [
'refund', 'refunds', 'money back',
'attend', 'attendance', 'miss', 'missed', 'missing', 'absent',
'late', 'lateness', 'tardy',
'reschedule', 'change date', 'move class',
'credit', 'credits',
'cancel', 'cancellation', 'canceling',
'policy', 'policies'
]
DETAIL_SYNONYMS = [
'detail', 'details', 'explain', 'elaborate', 'tell me more',
'more info', 'describe', 'thorough', 'comprehensive'
]
PERSONA_INSTRUCTION = """
You are a warm, encouraging mentor at Get Scene Studios. Your goal is to help actors navigate their careers with confidence.
- Sound natural and human, not scripted or robotic. Use conversational transitions like "I'd suggest starting with..." or "A great way to approach this is..."
- Be encouraging but practical. Acknowledge that the acting journey is a marathon, not a sprint.
- Help the user THINK: Instead of just giving an answer, add a brief "mentorship flourish" that explains the value of a recommendation (e.g., "This workshop is great because it gets you comfortable with the pressure of a real callback.")
"""
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def calculate_workshop_confidence(w: Dict) -> float:
"""Calculate confidence score of retrieved workshop data"""
score = 0.0
if w.get('title'): score += 0.3
if w.get('instructor_name'): score += 0.3
if w.get('date'): score += 0.2
if w.get('time'): score += 0.1
if w.get('source_url'): score += 0.1
return round(score, 2)
# ============================================================================
# WORKSHOP FUNCTIONS
# ============================================================================
def get_current_workshops():
"""Get current workshops with caching"""
global workshop_cache
now = datetime.now()
# Check if cache is still valid
if (workshop_cache['last_updated'] and
now - workshop_cache['last_updated'] < workshop_cache['cache_duration'] and
workshop_cache['data']):
print("Using cached workshop data")
return workshop_cache['data'], workshop_cache['embeddings']
print("Fetching fresh workshop data...")
# Use robust Squarespace scraping system
online_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/online")
instudio_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/instudio")
all_workshops = online_workshops + instudio_workshops
# Data Integrity: Validate and score workshops
valid_workshops = []
total_score = 0
for w in all_workshops:
conf = calculate_workshop_confidence(w)
if conf >= 0.8:
valid_workshops.append(w)
total_score += conf
else:
print(f"β οΈ Rejecting weak record (Confidence: {conf}): {w.get('title', 'Unknown')}", flush=True)
avg_conf = total_score / len(valid_workshops) if valid_workshops else 0
print(f"π DATA INTEGRITY: Found {len(all_workshops)} total, {len(valid_workshops)} valid (Confidence >= 0.8)", flush=True)
print(f"π Retrieval Confidence: {avg_conf:.2f} (Average)", flush=True)
all_workshops = valid_workshops
if not all_workshops:
if workshop_cache['data']:
print("Scraping failed, using cached data")
return workshop_cache['data'], workshop_cache['embeddings']
else:
print("No workshop data available")
return [], []
# Generate embeddings for workshops
workshop_embeddings = []
for workshop in all_workshops:
try:
embedding = get_embedding(workshop['full_text'])
workshop_embeddings.append(embedding)
except Exception as e:
print(f"Error generating embedding for workshop: {e}")
workshop_embeddings.append([0] * 1536)
# Update cache
workshop_cache['data'] = all_workshops
workshop_cache['embeddings'] = workshop_embeddings
workshop_cache['last_updated'] = now
print(f"Cached {len(all_workshops)} workshops")
return all_workshops, workshop_embeddings
def find_top_workshops(user_embedding, k=3):
"""Find top matching workshops using real-time data"""
workshops, workshop_embeddings = get_current_workshops()
if not workshops:
return []
scored = []
for i, (workshop, emb) in enumerate(zip(workshops, workshop_embeddings)):
try:
score = cosine_similarity(user_embedding, emb)
scored.append((score, i, workshop['full_text'], workshop))
except Exception as e:
print(f"Error calculating similarity: {e}")
continue
scored.sort(reverse=True)
return scored[:k]
# ============================================================================
# PROMPT BUILDING FUNCTIONS
# ============================================================================
def generate_enriched_links(row):
base_url = row.get("youtube_url")
guest_name = row.get("guest_name", "")
highlights = json.loads(row.get("highlight_json", "[]"))
summary = highlights[0]["summary"] if highlights else ""
# Truncate summary to first sentence only
if summary:
first_sentence = summary.split('.')[0] + '.'
if len(first_sentence) > 120:
short_summary = first_sentence[:117] + "..."
else:
short_summary = first_sentence
else:
short_summary = "Industry insights for actors"
markdown = f"π§ [Watch {guest_name}'s episode here]({base_url}) - {short_summary}"
return [markdown]
def build_enhanced_prompt(user_question, context_results, top_workshops, user_preference=None, enriched_podcast_links=None, wants_details=False, current_topic=None):
"""Builds the system prompt with strict formatting rules."""
# Free classes are ONLY available online (never in-studio)
free_class_url = "https://www.getscenestudios.com/online"
# helper for clean links
def format_workshop(w):
if not w.get('title') or not w.get('instructor_name') or not w.get('date'):
return None
link = "https://www.getscenestudios.com/instudio" if "/instudio" in w.get('source_url', '') else "https://www.getscenestudios.com/online"
# User Preference Filtering
w_type = "Online" if "online" in w.get('source_url', '') else "In-Studio"
if user_preference:
if user_preference.lower() != w_type.lower():
return None
# Calculate confidence using logic (already present in HF app.py at line 89)
confidence = calculate_workshop_confidence(w)
if confidence < 0.70:
return None
# R2: Force format inclusion into the title link for robustness
display_title = f"{w['title']} ({w_type})"
return f"- [{display_title}]({link}) with {w['instructor_name']} on {w['date']} at {w.get('time', '')}"
# Prepare workshop list (Top 3 max to display, but check top 10 for better filtering)
workshop_lines = []
if top_workshops:
for _, _, _, w_data in top_workshops[:10]: # Check top 10, take top 3 valid after filtering
formatted = format_workshop(w_data)
if formatted:
workshop_lines.append(formatted)
workshop_text = ""
if workshop_lines:
workshop_text = "\n".join(workshop_lines[:3])
else:
# Improved fallback to avoid generic/placeholder-like feeling
label = f"{user_preference.capitalize()} " if user_preference else ""
link = "https://www.getscenestudios.com/online" if user_preference == 'online' else "https://www.getscenestudios.com/instudio" if user_preference == 'instudio' else "https://www.getscenestudios.com/online"
workshop_text = f"We are constantly updating our schedule! Check our current {label}availability and latest workshops at {link}"
# Handle missing podcast data strictly
if not enriched_podcast_links:
single_podcast = "Our latest industry insights are available on YouTube: https://www.youtube.com/@GetSceneStudios"
else:
single_podcast = enriched_podcast_links[0]
# --- EMOTIONAL / SUPPORT MODE CHECK ---
is_emotional = detect_response_type(user_question) == "support"
if is_emotional:
prompt = f"""{PERSONA_INSTRUCTION}
You are acting in SUPPORT MODE.
CRITICAL INSTRUCTIONS:
1. ACKNOWLEDGE their feelings first (e.g., "I hear how frustrating it is to feel stuck...").
2. Provide SUPPORTIVE language (2-3 sentences max).
3. Offer EXACTLY ONE gentle follow-up resource: either the podcast OR the free class.
4. DO NOT suggest paid workshops or upsell in this response.
5. KEEP IT BRIEF (β€150 words).
USER'S QUESTION: {user_question}
REQUIRED RESPONSE FORMAT:
[Your empathetic, supportive acknowledgment]
Here's a free resource that might help you move forward:
[Pick ONE: {single_podcast} OR Free Class at {free_class_url}]
Questions? Contact info@getscenestudios.com"""
return prompt
# --- STANDARD LOGIC FOR CONTEXT SNIPPET ---
question_lower = user_question.lower()
context_snippet = ""
# Priority 1: Direct Keywords in current question
detected_topic = None
if any(word in question_lower for word in ['agent', 'representation', 'rep', 'manager']):
detected_topic = 'agent'
elif any(word in question_lower for word in ['beginner', 'new', 'start', 'beginning']):
detected_topic = 'beginner'
elif any(word in question_lower for word in ['callback', 'audition', 'tape', 'self-tape', 'booking']):
detected_topic = 'audition'
elif any(word in question_lower for word in ['mentorship', 'coaching']):
detected_topic = 'mentorship'
elif any(word in question_lower for word in ['price', 'cost', 'how much']):
detected_topic = 'pricing'
elif any(word in question_lower for word in ['class', 'workshop', 'training', 'learn']):
detected_topic = 'classes'
elif any(word in question_lower for word in ['membership', 'gsp', 'plus']):
detected_topic = 'membership'
# Priority 2: Fallback to session context if current question is ambiguous
if not detected_topic and current_topic:
topic_map = {
'agent_seeking': 'agent',
'beginner': 'beginner',
'audition_help': 'audition',
'mentorship': 'mentorship',
'pricing': 'pricing',
'classes': 'classes',
'membership': 'membership'
}
detected_topic = topic_map.get(current_topic)
# Assign snippet based on topic
if detected_topic == 'agent':
context_snippet = "Get Scene Studios has helped 1000+ actors land representation. Total Agent Prep offers live practice with working agents (age 16+, limited to 12 actors)."
elif detected_topic == 'beginner':
context_snippet = "Get Scene Studios specializes in getting actors audition-ready fast with camera technique and professional self-tape skills."
elif detected_topic == 'audition':
context_snippet = "Get Scene offers Crush the Callback (Zoom simulation) and Perfect Submission (self-tape mastery) for actors refining their technique."
elif detected_topic == 'mentorship':
context_snippet = "Working Actor Mentorship is a 6-month program ($3,000) with structured feedback and industry access."
elif detected_topic == 'pricing':
context_snippet = "Get Scene Studios pricing varies by program. Most workshops cap at 12-14 actors for personalized feedback."
elif detected_topic == 'classes':
link = "https://www.getscenestudios.com/online" if user_preference == 'online' else "https://www.getscenestudios.com/instudio"
context_snippet = f"Get Scene Studios offers world-class {user_preference or ''} acting workshops. Our sessions focus on camera technique and industry readiness. Full details at {link}."
elif detected_topic == 'membership':
context_snippet = "Get Scene Plus (GSP) is our membership program that provides ongoing access to industry pros and audition insights."
else:
context_snippet = "Get Scene Studios (founded by Jesse Malinowski) offers training for TV/film actors at all levels."
preference_instruction = ""
if not user_preference:
preference_instruction = """
IMPORTANT: We need to know if the user prefers "Online" or "In-Studio" workshops.
If their question implies a location or they haven't specified, ask: "Are you looking for Online or In-Studio training?" as part of your response.
"""
else:
preference_instruction = f"""
USER PREFERENCE KNOWN: {user_preference.upper()}
1. DO NOT ask "Online or In-Studio" again.
2. Ensure your recommendations align with {user_preference.upper()} where possible.
"""
# Brevity & Cognitive Load: Direct instructions based on user intent
detail_instruction = "Answer the user's question briefly (2-3 sentences max, β€150 words total)."
if wants_details:
target = f" regarding {detected_topic or 'the current recommendations'}"
detail_instruction = f"Provide a detailed and thorough explanation for the user's request{target}. Focus on being helpful and providing deep value as a mentor."
prompt = f"""{PERSONA_INSTRUCTION}
{context_snippet}
CRITICAL INSTRUCTIONS:
- {detail_instruction}
- Use natural, human transitions between your answer and the recommendations.
- For each recommendation, add a tiny bit of "mentor advice" on why it helps.
- Then ALWAYS provide exactly these three numbered recommendations (1. 2. 3.):
- Use ONLY the provided links - do not invent recommendations
- Every workshop Title MUST be followed by its format in parentheses, e.g., "Workshop Name (Online)" or "Workshop Name (In-Studio)".
- Focus on clean, readable formatting.{preference_instruction}
USER'S QUESTION: {user_question}
REQUIRED RESPONSE FORMAT:
[Your brief answer to their question, β€150 words total]
Here's your path forward:
1. Free class (start here, no credit card required): {free_class_url}
2. Recommended podcast episode:
{single_podcast}
3. Relevant paid workshop:
{workshop_text}
Questions? Contact info@getscenestudios.com"""
return prompt
# ============================================================================
# DETECTION FUNCTIONS
# ============================================================================
def detect_question_category(question):
"""Categorize user questions for better context injection"""
question_lower = question.lower()
categories = {
'agent_seeking': ['agent', 'representation', 'rep', 'manager', 'get an agent'],
'beginner': ['beginner', 'new', 'start', 'beginning', 'first time', 'never acted'],
'audition_help': ['audition', 'callback', 'tape', 'self-tape', 'submission'],
'mentorship': ['mentorship', 'coaching', 'intensive', 'mentor', 'one-on-one'],
'pricing': ['price', 'cost', 'pricing', '$', 'money', 'payment', 'fee'],
'classes': ['class', 'workshop', 'training', 'course', 'learn'],
'membership': ['membership', 'join', 'member', 'gsp', 'plus'],
'technical': ['self-tape', 'equipment', 'lighting', 'editing', 'camera']
}
detected = []
for category, keywords in categories.items():
if any(keyword in question_lower for keyword in keywords):
detected.append(category)
return detected
def detect_response_type(question):
"""Detect if question is emotional/support vs action/results oriented"""
question_lower = question.lower()
emotional_count = sum(1 for word in EMOTIONAL_KEYWORDS if word in question_lower)
action_count = sum(1 for word in ACTION_KEYWORDS if word in question_lower)
if emotional_count > 0 and emotional_count >= action_count:
return "support"
return "standard"
def detect_policy_issue(question):
"""Detect if question violates hard policy rules (refunds, attendance, etc.) using word boundaries"""
import re
question_lower = question.lower()
for word in POLICY_KEYWORDS:
# User regex word boundaries to prevent substring matches (e.g., 'submission' matching 'miss')
pattern = rf'\b{re.escape(word)}\b'
if re.search(pattern, question_lower):
return True
return False
def detect_preference(question):
"""Detect if user is stating a preference"""
q_lower = question.lower()
if 'online' in q_lower and 'studio' not in q_lower:
return 'online'
if ('studio' in q_lower or 'person' in q_lower or 'atlanta' in q_lower) and 'online' not in q_lower:
return 'instudio'
return None
def get_contextual_business_info(categories):
"""Return relevant business information based on detected question categories"""
context_map = {
'agent_seeking': {
'programs': ['Total Agent Prep', 'Working Actor Mentorship'],
'key_info': 'Live pitch practice with real agents, Actors Access optimization',
'journey': 'Total Agent Prep β GSP β Mentorship for sustained progress'
},
'beginner': {
'programs': ['Free Classes', 'Get Scene 360', 'Get Scene Plus'],
'key_info': 'Start with holistic foundation, build consistency',
'journey': 'Free class β Get Scene 360 β GSP membership'
},
'audition_help': {
'programs': ['Perfect Submission', 'Crush the Callback', 'Audition Insight'],
'key_info': 'Self-tape mastery, callback simulation, pro feedback',
'journey': 'Perfect Submission β GSP for ongoing Audition Insight'
},
'mentorship': {
'programs': ['Working Actor Mentorship'],
'key_info': '6-month intensive with structured feedback and accountability',
'journey': 'Ready for commitment β WAM β Advanced workshops'
}
}
relevant_info = {}
for category in categories:
if category in context_map:
relevant_info[category] = context_map[category]
return relevant_info
# ============================================================================
# MAIN CHATBOT LOGIC
# ============================================================================
def update_knowledge_from_question(session_id: str, question: str):
"""Extract attributes and update knowledge dictionary"""
updates = {}
# Extract Format
pref = detect_preference(question)
if pref:
updates['format'] = pref
# Extract Topic
cats = detect_question_category(question)
if cats:
# Prioritize specific topics over generic ones
priority_topics = ['agent_seeking', 'beginner', 'audition_help', 'mentorship', 'pricing']
for topic in priority_topics:
if topic in cats:
updates['topic'] = topic
break
if 'topic' not in updates and cats:
updates['topic'] = cats[0]
if updates:
update_session_state(session_id, knowledge_update=updates, increment_count=False)
return updates
return {}
def process_question(question: str, current_session_id: str):
"""Main function to process user questions - replaces Flask /ask endpoint"""
if not question:
return "Question is required"
# 0. HARD POLICY CHECK
if detect_policy_issue(question):
log_question(question, current_session_id)
return "Please email info@getscenestudios.com."
# 1. Handle Session & Knowledge State
update_knowledge_from_question(current_session_id, question)
session_state = get_session_state(current_session_id)
try:
knowledge = json.loads(session_state.get('knowledge_context', '{}'))
except:
knowledge = {}
user_preference = knowledge.get('format')
current_topic = knowledge.get('topic')
if not user_preference:
user_preference = session_state.get('preference')
update_session_state(current_session_id, increment_count=True)
# Create embedding of user question
user_embedding = get_embedding(question)
# Check FAQ embeddings first
faq_data = fetch_all_faq_embeddings()
top_faqs = []
for entry_id, question_text, answer_text, emb in faq_data:
score = cosine_similarity(user_embedding, emb)
top_faqs.append((score, entry_id, question_text, answer_text))
top_faqs.sort(reverse=True)
faq_threshold = 0.50 # Lowered from 0.85 to capture direct matches better
ambiguous_threshold = 0.60 # Lowered from 0.70
# If high-confidence FAQ match found
if top_faqs and top_faqs[0][0] >= faq_threshold:
update_session_state(current_session_id, reset_clarification=True, increment_count=False)
best_score, faq_id, question_text, answer_text = top_faqs[0]
mentor_framing_start = "That's a great question! Here's the information on that:"
mentor_framing_end = "I hope that clears things up! Remember, every bit of knowledge helps you steer your career in the right direction."
enhanced_answer = f"{mentor_framing_start}\n\n{answer_text}"
# R5: Policy Guard for FAQ answers using word boundaries
# policy_violation = False
# import re
# for word in POLICY_KEYWORDS:
# pattern = rf'\b{re.escape(word)}\b'
# if re.search(pattern, enhanced_answer.lower()):
# policy_violation = True
# break
# if policy_violation:
# enhanced_answer = "Please email info@getscenestudios.com for assistance with this."
# else:
categories = detect_question_category(question)
contextual_info = get_contextual_business_info(categories)
if contextual_info:
next_steps = []
for category, info in contextual_info.items():
next_steps.append(f"A great next step for you: {info['journey']}")
if next_steps:
enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
enhanced_answer += f"\n\n{mentor_framing_end}\n\nQuestions? Contact info@getscenestudios.com"
# Log question
log_question(question, current_session_id, answer=enhanced_answer)
return enhanced_answer
elif top_faqs and top_faqs[0][0] >= ambiguous_threshold:
# AMBIGUOUS ZONE
needs_clarification = False
best_match_q = top_faqs[0][2]
# 1. Never clarify if the best match question is identical to the user question
if question.lower().strip('?') == best_match_q.lower().strip('?'):
needs_clarification = False
else:
# 2. Check Format logic (only if locational)
is_locational = any(w in question.lower() for w in ['online', 'studio', 'person', 'atlanta', 'location', 'where'])
if is_locational and not user_preference:
needs_clarification = True
# 3. Check Topic logic (only if generic)
is_generic_query = any(w in question.lower() for w in ['price', 'cost', 'how much', 'schedule', 'when'])
if is_generic_query and not current_topic:
needs_clarification = True
# 4. Force resolve if already asked once
clarification_count = session_state.get('clarification_count', 0)
if clarification_count > 0:
needs_clarification = False
if needs_clarification:
update_session_state(current_session_id, increment_clarification=True, increment_count=False)
best_match_q = top_faqs[0][2]
return f"Did you mean: {best_match_q}?"
# Auto-Resolve
update_session_state(current_session_id, reset_clarification=True, increment_count=False)
best_score, faq_id, question_text, answer_text = top_faqs[0]
categories = detect_question_category(question)
contextual_info = get_contextual_business_info(categories)
enhanced_answer = answer_text
if contextual_info:
next_steps = []
for category, info in contextual_info.items():
next_steps.append(f"Next step: Consider {info['journey']}")
if next_steps:
enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
enhanced_answer += f"\n\nQuestions? Contact info@getscenestudios.com"
log_question(question, current_session_id, answer=enhanced_answer)
return enhanced_answer
else:
# 3. HALLUCINATION GUARD
categories = detect_question_category(question)
has_session_context = (current_topic is not None) or (user_preference is not None)
FOLLOWUP_KEYWORDS = ['yes', 'no', 'sure', 'okay', 'thanks', 'thank you', 'please', 'go ahead', 'continue', 'more']
is_acting_related = (
len(categories) > 0 or
detect_response_type(question) == "support" or
any(k in question.lower() for k in ACTION_KEYWORDS) or
any(k in question.lower() for k in DETAIL_SYNONYMS) or
any(k in question.lower() for k in ['class', 'workshop', 'coaching', 'studio', 'acting', 'online', 'person', 'atlanta', 'training', 'prefer', 'preference', 'format', 'recommendation', 'online class', 'online workshop','instudio class','instudio workshop', 'actor', 'scene', 'audition', 'theatre', 'film', 'tv', 'commercial', 'agent', 'rep', 'manager']) or
(has_session_context and any(k == question.lower().strip('.!') for k in FOLLOWUP_KEYWORDS))
)
if not is_acting_related:
return "I'm not exactly sure about that. Please email info@getscenestudios.com so a member of our team can get you the most accurate answer!"
# 4. LLM PATH
update_session_state(current_session_id, reset_clarification=True, increment_count=False)
podcast_data = fetch_all_embeddings("podcast_episodes")
top_workshops = find_top_workshops(user_embedding, k=10)
top_podcasts = find_top_k_matches(user_embedding, podcast_data, k=3)
enriched_podcast_links = []
for _, podcast_id, _ in top_podcasts:
row = fetch_row_by_id("podcast_episodes", podcast_id)
enriched_podcast_links.extend(generate_enriched_links(row))
if not enriched_podcast_links:
fallback = fetch_row_by_id("podcast_episodes", podcast_data[0][0])
enriched_podcast_links = generate_enriched_links(fallback)
# 5. Brevity & Detail Detection
wants_details = any(syn in question.lower() for syn in DETAIL_SYNONYMS)
final_prompt = build_enhanced_prompt(
question,
None,
top_workshops,
user_preference=user_preference,
enriched_podcast_links=enriched_podcast_links,
wants_details=wants_details,
current_topic=current_topic
)
response = openai.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": final_prompt},
{"role": "user", "content": question}
]
)
# Log question
log_question(question, current_session_id)
return response.choices[0].message.content.strip()
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
def chat_with_bot(message, history):
"""
Process message directly without Flask API
Args:
message: User's current message
history: Chat history (list of message dictionaries)
Returns:
Updated history with new exchange
"""
global session_id
if not message.strip():
return history
try:
# Process question directly
bot_reply = process_question(message, session_id)
except Exception as e:
bot_reply = f"β Error: {str(e)}"
# Append to history in Gradio 6.0 format
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": bot_reply})
return history
def reset_session():
"""Reset session ID for new conversation"""
global session_id
session_id = str(uuid.uuid4())
return [] #, f"π New session started: {session_id[:8]}..."
# Create Gradio interface
with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
gr.Markdown(
"""
# π¬ Get Scene Studios AI Chatbot
Ask questions about acting classes, workshops and more!
"""
)
# # Session info display
# session_info = gr.Textbox(
# label="Current Session ID",
# value=f"Session: {session_id[:8]}...",
# interactive=False,
# scale=1
# )
# Chatbot interface
chatbot = gr.Chatbot(
label="Conversation",
height=500
)
# Input area
with gr.Row():
msg = gr.Textbox(
label="Your Message",
lines=2,
scale=4
)
submit_btn = gr.Button("Send π€", scale=1, variant="primary")
# Action buttons
with gr.Row():
clear_btn = gr.Button("Clear Chat ποΈ", scale=1)
reset_btn = gr.Button("New Session π", scale=1)
# Example questions
# gr.Examples(
# examples=[
# "How much does it cost?",
# "I want to get an agent",
# "I'm a beginner, where should I start?",
# "Tell me about your workshops",
# "Do you have online classes?",
# "What's the difference between Perfect Submission and Crush the Callback?",
# "I prefer in-studio training",
# "Tell me about mentorship programs"
# ],
# inputs=msg,
# label="π‘ Try these example questions:"
# )
# Event handlers
submit_btn.click(
fn=chat_with_bot,
inputs=[msg, chatbot],
outputs=[chatbot]
).then(
fn=lambda: "",
inputs=None,
outputs=[msg]
)
msg.submit(
fn=chat_with_bot,
inputs=[msg, chatbot],
outputs=[chatbot]
).then(
fn=lambda: "",
inputs=None,
outputs=[msg]
)
clear_btn.click(
fn=lambda: [],
inputs=None,
outputs=[chatbot]
)
reset_btn.click(
fn=reset_session,
inputs=None,
outputs=[chatbot] #, session_info]
)
# Launch the app
if __name__ == "__main__":
print("\n" + "="*60)
print("π¬ Get Scene Studios Chatbot")
print("="*60)
print("\nβ
No Flask API needed - all processing is done directly!")
print("π Gradio interface will open in your browser")
print("="*60 + "\n")
demo.launch()
|