""" Chat-based Demo Builder Interface Clean, conversational interface for creating ThoughtSpot demos """ import warnings warnings.filterwarnings('ignore', message='.*tuples.*format.*chatbot.*deprecated.*') import gradio as gr import os import sys import json import time import glob from datetime import datetime from dotenv import load_dotenv from demo_builder_class import DemoBuilder from supabase_client import load_gradio_settings, get_admin_setting, inject_admin_settings_to_env from main_research import MultiLLMResearcher, Website from demo_personas import ( build_company_analysis_prompt, build_industry_research_prompt, VERTICALS, FUNCTIONS, get_use_case_config, parse_use_case ) from llm_config import ( DEFAULT_LLM_MODEL, UI_MODEL_CHOICES, get_openai_api_key, map_llm_display_to_provider, ) load_dotenv(override=True) # ========================================================================== # TS ENVIRONMENT HELPERS # ========================================================================== def get_ts_environments() -> list: """Return list of environment labels from TS_ENV_N_LABEL/URL/.env entries.""" envs = [] i = 1 while True: label = os.getenv(f'TS_ENV_{i}_LABEL', '').strip() url = os.getenv(f'TS_ENV_{i}_URL', '').strip() if not label or not url: break envs.append(label) i += 1 return envs or ['(no environments configured)'] def get_ts_env_url(label: str) -> str: """Return the URL for a given environment label.""" i = 1 while True: env_label = os.getenv(f'TS_ENV_{i}_LABEL', '').strip() if not env_label: break if env_label == label: return os.getenv(f'TS_ENV_{i}_URL', '').strip().rstrip('/') i += 1 return '' def get_ts_env_auth_key(label: str) -> str: """Return the actual auth key value for a given environment label. TS_ENV_N_KEY_VAR holds the name of the env var containing the token. """ i = 1 while True: env_label = os.getenv(f'TS_ENV_{i}_LABEL', '').strip() if not env_label: break if env_label == label: key_var = os.getenv(f'TS_ENV_{i}_KEY_VAR', '').strip() return os.getenv(key_var, '').strip() if key_var else '' i += 1 return '' # ========================================================================== # SETTINGS SCHEMA - Single source of truth for all settings # To add a new setting: add ONE entry here, then create the UI component # Format: (component_key, storage_key, default_value, converter_fn) # ========================================================================== SETTINGS_SCHEMA = [ # App Settings - Left Column ('default_ai_model', 'default_llm', DEFAULT_LLM_MODEL, str), # default_company_url removed — company is chat-driven now ('default_use_case', 'default_use_case', 'Sales Analytics', str), ('liveboard_name', 'liveboard_name', '', str), ('tag_name', 'tag_name', '', str), # App Settings - Right Column ('fact_table_size', 'fact_table_size', '1000', str), ('dim_table_size', 'dim_table_size', '100', str), ('object_naming_prefix', 'object_naming_prefix', '', str), ('column_naming_style', 'column_naming_style', 'Regular Case', str), ('use_existing_model', 'use_existing_model', False, bool), ('existing_model_guid', 'existing_model_guid', '', str), ('geo_scope', 'geo_scope', 'USA Only', str), ('validation_mode', 'validation_mode', 'Off', str), # Off = auto-run, On = pause at checkpoints # Advanced AI Settings (collapsed accordion) ('temperature_slider', 'temperature', 0.3, float), ('max_tokens', 'max_tokens', 4000, int), ('batch_size', 'batch_size', 5000, int), ('thread_count', 'thread_count', 4, int), # Database Connection Settings (collapsed accordion) ('sf_account', 'snowflake_account', '', str), ('sf_user', 'snowflake_user', '', str), ('sf_role', 'snowflake_role', 'ACCOUNTADMIN', str), ('default_warehouse', 'default_warehouse', 'COMPUTE_WH', str), ('default_database', 'default_database', 'DEMO_DB', str), ('default_schema', 'default_schema', 'PUBLIC', str), # ts_instance_url removed — replaced by TS Environment dropdown on front page ('ts_username', 'thoughtspot_username', '', str), ('data_adjuster_url', 'data_adjuster_url', '', str), ('share_with', 'share_with', '', str), # Status (special - not saved, just displayed) ('settings_status', None, '', str), ] def get_settings_defaults(): """Return list of default values in schema order""" return [default for _, _, default, _ in SETTINGS_SCHEMA] def load_settings_values(settings_dict: dict, user_email: str = "") -> list: """Load settings from dict, returning values in schema order""" values = [] for component_key, storage_key, default, converter in SETTINGS_SCHEMA: if storage_key is None: # Special case: settings_status values.append(f"✅ Settings loaded for {user_email}" if user_email else "") else: raw_value = settings_dict.get(storage_key, default) try: if converter == bool: # Special handling: bool("False") returns True (non-empty string) # We need to check the actual string value if isinstance(raw_value, bool): values.append(raw_value) elif isinstance(raw_value, str): values.append(raw_value.lower() in ('true', '1', 'yes')) else: values.append(bool(raw_value) if raw_value else default) else: values.append(converter(raw_value) if raw_value else default) except (ValueError, TypeError): values.append(default) return values def build_settings_save_dict(values: list) -> dict: """Build dict for saving from values in schema order""" save_dict = {} for i, (component_key, storage_key, default, converter) in enumerate(SETTINGS_SCHEMA): if storage_key is None: # Skip settings_status continue if i < len(values): value = values[i] # Convert to string for storage (Supabase stores as text) save_dict[storage_key] = str(value) if value is not None else str(default) return save_dict def require_authenticated_email(request: gr.Request = None, user_email: str = None) -> str: """Require an authenticated user identity for settings operations.""" request_user = getattr(request, 'username', None) if request else None resolved_user = request_user or user_email if resolved_user and str(resolved_user).strip(): return str(resolved_user).strip().lower() # Local-dev no-auth mode is explicit and still requires a concrete user key. no_auth = os.getenv('DEMOPREP_NO_AUTH', 'false').lower() in ('true', '1', 'yes') if no_auth: dev_user_email = os.getenv('DEMOPREP_DEV_USER_EMAIL', '').strip().lower() if dev_user_email: return dev_user_email raise ValueError( "DEMOPREP_NO_AUTH=true requires DEMOPREP_DEV_USER_EMAIL to be set. " "Set both values and retry." ) raise ValueError( "Authenticated username is required for settings operations. " "Please sign in and retry." ) def build_initial_chat_message(company: str, use_case: str) -> str: """Build the pre-filled chat message from current settings.""" if company and use_case: return f"{company}, {use_case}" return "" def safe_print(*args, **kwargs): """Print that silently handles broken pipes (background processes)""" try: print(*args, **kwargs) except BrokenPipeError: pass # Ignore - happens when running in background class ChatDemoInterface: """ New chat-based interface for demo creation """ def __init__(self, user_email: str = None): self.user_email = user_email # Must be set by login system self.demo_builder = None self.conversation_history = [] self.settings = self.load_default_settings() self.ai_feedback_log = [] self.ddl_code = "" self.population_code = "" self.research_results = None # Vertical × Function use case system self.vertical = None self.function = None self.use_case_config = None # Generic use case handling self.is_generic_use_case = False self.generic_use_case_context = "" self.pending_generic_company = None self.pending_generic_use_case = None # New tab content self.live_progress_log = [] # Real-time deployment progress self.demo_pack_content = "" # Generated demo pack markdown self.spotter_viz_story = "" # Spotter Viz story (NL prompts for Spotter Viz agent) # Per-session loggers (NOT module-level singletons — avoids cross-session contamination) self._session_logger = None self._prompt_logger = None def _get_effective_user_email(self) -> str: """Resolve and cache effective user identity for settings access.""" self.user_email = require_authenticated_email(user_email=self.user_email) return self.user_email def load_default_settings(self): """Load settings from Supabase or defaults""" # Fallback defaults (ONLY used if settings not found) defaults = { 'company': 'Amazon.com', 'use_case': 'Sales Analytics', 'model': DEFAULT_LLM_MODEL, 'fact_table_size': '1000', 'dim_table_size': '100', 'stage': 'initialization', 'tag_name': None, 'validation_mode': 'Off', # Off = auto-run, On = pause at checkpoints 'geo_scope': 'USA Only' # USA Only or International } try: # Try to load from Supabase user_email = self._get_effective_user_email() if user_email: settings = load_gradio_settings(user_email) # Only override if values are meaningful (not generic placeholders) company = settings.get('default_company_url', '').strip() use_case = settings.get('default_use_case', '').strip() if company and company not in ['your company', 'yourcompany', '']: defaults['company'] = company if use_case and use_case not in ['analytics', '']: defaults['use_case'] = use_case if settings.get('default_llm'): defaults['model'] = settings.get('default_llm') if settings.get('fact_table_size'): defaults['fact_table_size'] = settings.get('fact_table_size') if settings.get('dim_table_size'): defaults['dim_table_size'] = settings.get('dim_table_size') if settings.get('tag_name'): defaults['tag_name'] = settings.get('tag_name') if settings.get('geo_scope'): defaults['geo_scope'] = settings.get('geo_scope') if settings.get('validation_mode'): defaults['validation_mode'] = settings.get('validation_mode') except Exception as e: print(f"Could not load settings from Supabase: {e}") print(f"DEBUG: Final settings - company: {defaults['company']}, use_case: {defaults['use_case']}") return defaults def format_welcome_message(self, company, use_case): """Create the initial welcome message""" # Always show the setup message - user must explicitly provide company and use case needs_setup = True if needs_setup: # Build use case examples from VERTICALS × FUNCTIONS uc_examples = [] for v_name in list(VERTICALS.keys())[:3]: # Show first 3 verticals for f_name in list(FUNCTIONS.keys())[:2]: # Show first 2 functions each uc_examples.append(f" - {v_name} {f_name}") uc_list = "\n".join(uc_examples) return f"""👋 **Welcome to ThoughtSpot Demo Builder!** Tell me who you're demoing to and what they care about — I'll build the whole thing. **Just say something like:** - *"Nike.com, Retail Sales"* - *"Salesforce.com, Software Sales"* - *"Target.com — supply chain analytics for their VP of Operations"* - *"Pfizer.com — analyzing clinical trial pipeline for a CMO persona"* **Pre-configured use cases** (KPIs, outliers, and Spotter questions ready to go): {uc_list} - ...and more! **Or describe any custom use case** — AI will research and build it from scratch. > 💡 **Tip:** Select your ThoughtSpot environment and AI model in the panel to the right before starting. *What company and use case are you working on?*""" else: return f"""👋 **Welcome to ThoughtSpot Demo Builder!** I'm ready to create a perfect ThoughtSpot demo! **Company:** {company} **Use Case:** {use_case} **Ready to start?** - Type "start" to begin research - Type `/over` to change company or use case - Or just tell me what you'd like to do! What's your first step?""" def validate_required_settings(self) -> list: """ Check that required admin and user settings are configured. Returns list of missing settings. Empty list = all good. """ missing = [] # Check admin settings (from environment, injected from Supabase) admin_checks = { 'THOUGHTSPOT_URL': get_admin_setting('THOUGHTSPOT_URL', required=False), 'THOUGHTSPOT_TRUSTED_AUTH_KEY': get_admin_setting('THOUGHTSPOT_TRUSTED_AUTH_KEY', required=False), 'THOUGHTSPOT_ADMIN_USER': get_admin_setting('THOUGHTSPOT_ADMIN_USER', required=False), 'SNOWFLAKE_ACCOUNT': get_admin_setting('SNOWFLAKE_ACCOUNT', required=False), 'SNOWFLAKE_KP_USER': get_admin_setting('SNOWFLAKE_KP_USER', required=False), 'SNOWFLAKE_KP_PK': get_admin_setting('SNOWFLAKE_KP_PK', required=False), 'SNOWFLAKE_ROLE': get_admin_setting('SNOWFLAKE_ROLE', required=False), 'SNOWFLAKE_WAREHOUSE': get_admin_setting('SNOWFLAKE_WAREHOUSE', required=False), 'SNOWFLAKE_DATABASE': get_admin_setting('SNOWFLAKE_DATABASE', required=False), } # LLM key now comes from environment only. has_llm = bool(get_openai_api_key(required=False)) for key, val in admin_checks.items(): if not val: missing.append(key) if not has_llm: missing.append('OPENAI_API_KEY') return missing def process_chat_message(self, message, chat_history, current_stage, current_model, company, use_case): """ Process user message and return updated chat history and state (with streaming) Returns: (chat_history, current_stage, current_model, company, use_case, next_textbox_value) """ from session_logger import init_session_logger from prompt_logger import PromptLogger # Init per-session loggers on first message (stored on self, not module singletons) if self._session_logger is None: session_id = datetime.now().strftime("%Y%m%d_%H%M%S") self._session_logger = init_session_logger(session_id, user_email=getattr(self, 'user_email', None)) self._prompt_logger = PromptLogger(session_id=session_id) _slog = self._session_logger _slog.log(current_stage or 'init', f"user message received: {message[:120]}") # Add user message to history chat_history.append((message, None)) # If data_adjuster_url is saved in settings and we're at init, inject it as the message # so the user lands directly in Data Adjuster without having to paste the URL manually da_url = self.settings.get('data_adjuster_url', '').strip() if da_url and current_stage == 'initialization' and 'pinboard/' in da_url: message = da_url chat_history[-1] = (da_url, None) # Validate required settings before proceeding missing = self.validate_required_settings() if missing and current_stage == 'initialization': missing_str = ", ".join(missing) error_msg = ( f"**⚠️ Missing required settings:**\n\n" f"`{missing_str}`\n\n" f"Please configure these values. " f"LLM key must be set in `.env` as `OPENAI_API_KEY`." ) chat_history[-1] = (message, error_msg) yield chat_history, current_stage, current_model, company, use_case, "" return # Check for special commands if message.strip().lower().startswith('/over'): # Override command - extract new values response = self.handle_override(message) chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return message_lower = message.lower() # Pre-clean any URLs in the message (fix typos like double dots) import re cleaned_message = re.sub(r'\.{2,}', '.', message) # Stage-based processing if current_stage == 'initialization': # Check if user pasted a ThoughtSpot liveboard URL → jump straight to data adjuster lb_guid_match = re.search( r'pinboard/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', message, re.I ) if lb_guid_match: liveboard_guid = lb_guid_match.group(1) chat_history[-1] = (message, "🔍 **Resolving liveboard context...**") yield chat_history, current_stage, current_model, company, use_case, "" try: from smart_data_adjuster import SmartDataAdjuster, load_context_from_liveboard from thoughtspot_deployer import ThoughtSpotDeployer from supabase_client import get_admin_setting ts_url = (self.settings.get('thoughtspot_url') or '').strip() or get_admin_setting('THOUGHTSPOT_URL') ts_user = get_admin_setting('THOUGHTSPOT_ADMIN_USER') ts_secret = (self.settings.get('thoughtspot_trusted_auth_key') or '').strip() or get_admin_setting('THOUGHTSPOT_TRUSTED_AUTH_KEY') ts_client = ThoughtSpotDeployer(base_url=ts_url, username=ts_user, secret_key=ts_secret) ts_client.authenticate() ctx = load_context_from_liveboard(liveboard_guid, ts_client) llm_model = self.settings.get('model', DEFAULT_LLM_MODEL) adjuster = SmartDataAdjuster( database=ctx['database'], schema=ctx['schema'], liveboard_guid=liveboard_guid, llm_model=llm_model, ts_url=ts_url, ts_secret=ts_secret, prompt_logger=self._prompt_logger, ) adjuster.connect() if not adjuster.load_liveboard_context(): raise ValueError("Liveboard has no answer-based visualizations to adjust.") self._adjuster = adjuster current_stage = 'outlier_adjustment' viz_list = "\n".join( f" [{i+1}] {v['name']}" for i, v in enumerate(adjuster.visualizations) ) response = f"""✅ **Liveboard context loaded — ready for data adjustments** **Liveboard:** {ctx['liveboard_name']} **Model:** {ctx['model_name']} **Snowflake:** `{ctx['database']}`.`{ctx['schema']}` **Visualizations:** {viz_list} Tell me what to change — e.g. *"increase webcam revenue by 20%"* or *"make Acme Corp 50B"*. Type **done** when finished.""" except Exception as e: response = f"❌ **Could not load liveboard context**\n\n`{e}`" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Check if user just provided a standalone URL (e.g., "Comscore.com") standalone_url = re.search(r'^([a-zA-Z0-9-]+\.[a-zA-Z]{2,})$', cleaned_message.strip()) if standalone_url: # User provided just a URL - ask them to include the use case too detected_company = standalone_url.group(1) # Build dynamic use case list from VERTICALS × FUNCTIONS uc_opts = [f"- {v} {f}" for v in list(VERTICALS.keys())[:3] for f in FUNCTIONS.keys()] uc_opts_str = "\n".join(uc_opts) response = f"""I see you want to use **{detected_company}** - great choice! Now I need to know what kind of demo you want. Please tell me both together: ``` I'm creating a demo for company: {detected_company} use case: Retail Sales ``` **Configured use cases** (with KPIs, outliers, Spotter questions): {uc_opts_str} - Or any custom use case you want! What use case would you like for {detected_company}?""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Check if user is providing company and use case if "creating a demo for" in message_lower or "create a demo for" in message_lower: # Extract company and use case (use cleaned message for better matching) extracted_company = self.extract_company_from_message(cleaned_message) extracted_use_case = self.extract_use_case_from_message(cleaned_message) if extracted_company: company = extracted_company if extracted_use_case: # Parse use case into vertical × function using new system self.vertical, self.function = parse_use_case(extracted_use_case) self.use_case_config = get_use_case_config( self.vertical or "Generic", self.function or "Generic" ) # Determine if this is a known or generic use case is_known = self.vertical and self.function and not self.use_case_config.get('is_generic') use_case_display = self.use_case_config.get('use_case_name', extracted_use_case) # Store company/use case for context prompt if is_known: # Matched a configured vertical × function combination self.is_generic_use_case = False self.pending_generic_company = company self.pending_generic_use_case = use_case_display use_case_type_note = f"\n\n*Matched: **{self.vertical}** × **{self.function}** — using configured KPIs, outlier patterns, and Spotter questions.*" elif self.vertical or self.function: # Partial match - have vertical OR function but not both self.is_generic_use_case = True self.pending_generic_company = company self.pending_generic_use_case = extracted_use_case matched = self.vertical or self.function use_case_type_note = f"\n\n*Partial match: **{matched}** recognized — AI will fill in the gaps based on research.*" else: # Fully generic/custom use case self.is_generic_use_case = True self.pending_generic_company = company self.pending_generic_use_case = extracted_use_case use_case_type_note = "\n\n*Custom use case — AI will research the industry to build a relevant schema and KPIs.*" # ALWAYS ask for additional context (for both generic and standard use cases) context_prompt = f"""✅ **Demo Configuration** I am creating a demo for **{company}** with use case: **{self.pending_generic_use_case}**{use_case_type_note} **Default Schema:** - 1 fact table (transactions/events) - 3-4 dimension tables (customers, products, dates, etc.) **Want to customize?** You can add requirements like: - "I need 2 fact tables: SALES and INVENTORY" - "Include a RETURNS table" - "Focus on employee retention metrics" **Type your requirements, or say "proceed" to use defaults.**""" chat_history[-1] = (message, context_prompt) current_stage = 'awaiting_context' yield chat_history, current_stage, current_model, company, use_case, "proceed" return # Validate both are provided before proceeding if not extracted_company or not extracted_use_case: # Friendly message based on what's missing if extracted_company and not extracted_use_case: # Got company, need use case error_msg = f"""Great! I see you want to create a demo for **{extracted_company}**. Do you have a use case in mind? For example: - Sales Analytics - Supply Chain - Customer Analytics - Or any custom use case you'd like! **Just tell me like this:** ``` I'm creating a demo for company: {extracted_company} use case: Supply Chain ``` What would you like to analyze?""" elif extracted_use_case and not extracted_company: # Got use case, need company error_msg = f"""Perfect! I see you want to analyze **{extracted_use_case}**. Which company should I research? (Must be a real website) **Examples:** Nike.com, Target.com, Walmart.com **Just tell me like this:** ``` I'm creating a demo for company: Nike.com use case: {extracted_use_case} ``` What company URL should I use?""" else: # Got neither - shouldn't happen with our pattern matching, but just in case error_msg = """I need both a company and a use case to get started! **Example:** ``` I'm creating a demo for company: Nike.com use case: Supply Chain ``` What company and use case would you like?""" chat_history[-1] = (message, error_msg) yield chat_history, current_stage, current_model, company, use_case, "" return # Update stage to research current_stage = 'research' # Show confirmation and starting message confirmation_msg = f"""✅ **Got it!** **Company:** {company} **Use Case:** {use_case} 🔍 **Starting Research...** I'm now researching {company}'s business model and {use_case} requirements. This may take 1-2 minutes. Watch the AI Feedback tab for progress!""" chat_history[-1] = (message, confirmation_msg) yield chat_history, current_stage, current_model, company, use_case, "" # Start research with streaming last_response = "" for response in self.run_research_streaming(company, use_case): chat_history[-1] = (message, response) last_response = response yield chat_history, current_stage, current_model, company, use_case, "" # Stay in research stage - will move to create_ddl when user approves # After research completes, pre-fill "yes" if it ended with the approval question with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"RESEARCH COMPLETE, last_response contains 'Ready to create'? {'Ready to create the DDL now?' in last_response}\n") if "Ready to create the DDL now?" in last_response or "Would you like to use the cached results?" in last_response: with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"PRE-FILLING yes for DDL\n") yield chat_history, current_stage, current_model, company, use_case, "yes" else: yield chat_history, current_stage, current_model, company, use_case, "" return # --- Catch-all: try to extract company + use case from any free-form message --- # Handles: "Nike.com, Retail Sales" / "Salesforce - Software Sales" / etc. extracted_company = self.extract_company_from_message(cleaned_message) extracted_use_case = self.extract_use_case_from_message(cleaned_message) if extracted_company and extracted_use_case: company = extracted_company self.vertical, self.function = parse_use_case(extracted_use_case) self.use_case_config = get_use_case_config( self.vertical or "Generic", self.function or "Generic" ) is_known = self.vertical and self.function and not self.use_case_config.get('is_generic') use_case_display = self.use_case_config.get('use_case_name', extracted_use_case) self.is_generic_use_case = not is_known self.pending_generic_company = company self.pending_generic_use_case = use_case_display if is_known: note = f"\n\n*Matched: **{self.vertical}** × **{self.function}** — KPIs, outliers, and Spotter questions ready.*" elif self.vertical or self.function: note = f"\n\n*Partial match: **{self.vertical or self.function}** recognized — AI will fill in the gaps.*" else: note = "\n\n*Custom use case — AI will research and build from scratch.*" context_prompt = f"""✅ **Demo Configuration** **Company:** {company} **Use Case:** {use_case_display}{note} **Want to add any requirements?** (or just say "proceed") - "Include a RETURNS table" - "Focus on enterprise accounts only" - "I need 2 fact tables: Sales and Inventory" *Type your requirements, or say **"proceed"** to use defaults.*""" chat_history[-1] = (message, context_prompt) current_stage = 'awaiting_context' yield chat_history, current_stage, current_model, company, use_case_display, "proceed" return elif extracted_company and not extracted_use_case: uc_opts = "\n".join([f"- {v} {f}" for v in list(VERTICALS.keys())[:3] for f in FUNCTIONS.keys()]) response = f"""Got it — **{extracted_company}**! What use case are we building? A few options: {uc_opts} - Or describe any custom scenario!""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return else: # Nothing useful extracted — show a brief prompt response = """I need a **company** and **use case** to get started. Try something like: - *"Nike.com, Retail Sales"* - *"Salesforce.com — Software pipeline analytics"* - *"Walmart.com, Supply Chain"*""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return elif current_stage == 'awaiting_context': # User is providing context for ANY use case (both generic and established) if message_lower.strip() in ['proceed', 'continue', 'no', 'skip']: # User wants to proceed without additional context self.generic_use_case_context = "" context_note = "Proceeding with standard configuration." else: # User provided additional context - store it self.generic_use_case_context = message.strip() context_note = f"Using additional context:\n> {message.strip()}" # Get stored company and use case company = self.pending_generic_company use_case = self.pending_generic_use_case use_case_type = "generic" if self.is_generic_use_case else "established" # Check validation_mode setting from supabase_client import load_gradio_settings, get_admin_setting settings = load_gradio_settings(self._get_effective_user_email()) validation_mode = settings.get('validation_mode', 'Off') if validation_mode == 'Off': # AUTO-RUN MODE: Run entire pipeline without any more prompts current_stage = 'deploy' auto_run_msg = f"""✅ **Starting Auto-Run Mode** **Company:** {company} **Use Case:** {use_case} ({use_case_type}) {context_note} 🚀 **Running complete pipeline...** - Research → DDL → Snowflake → LegitData → ThoughtSpot Watch the AI Feedback tab for real-time progress!""" chat_history[-1] = (message, auto_run_msg) yield chat_history, current_stage, current_model, company, use_case, "" # Run research - yield every response to keep Gradio alive self.log_feedback("DEBUG AUTO-RUN: Starting research loop...") last_research_response = "" research_yield_count = 0 for response in self.run_research_streaming(company, use_case, self.generic_use_case_context): last_research_response = response research_yield_count += 1 chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" self.log_feedback(f"DEBUG AUTO-RUN: Research loop EXITED after {research_yield_count} yields") # Show research complete chat_history[-1] = (message, "✅ **Research Complete!**\n\n📝 **Creating DDL...**") yield chat_history, current_stage, current_model, company, use_case, "" self.log_feedback("DEBUG AUTO-RUN: Moving to DDL creation...") # Auto-create DDL ddl_response, ddl_code = self.run_ddl_creation() # Check if DDL creation failed if not ddl_code or ddl_code.strip() == "": chat_history[-1] = (message, f"{ddl_response}\n\n❌ **Cannot proceed without valid DDL.** Please fix the error and try again.") yield chat_history, current_stage, current_model, company, use_case, "" return chat_history[-1] = (message, f"✅ DDL Created\n\n🚀 **Deploying to Snowflake...**") yield chat_history, current_stage, current_model, company, use_case, "" # Auto-deploy with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: Starting deployment streaming\n") try: final_result = None deploy_update_count = 0 for progress_update in self.run_deployment_streaming(): if isinstance(progress_update, tuple): final_result = progress_update elif isinstance(progress_update, dict): current_stage = progress_update.get('stage', current_stage) chat_history[-1] = (message, progress_update['response']) yield chat_history, current_stage, current_model, company, use_case, "" else: deploy_update_count += 1 # Only yield every 5th deployment update if deploy_update_count % 5 == 0 or "Complete" in str(progress_update) or "Error" in str(progress_update): chat_history[-1] = (message, progress_update) yield chat_history, current_stage, current_model, company, use_case, "" with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: Deployment loop EXITED, final_result={type(final_result)}, len={len(final_result) if final_result else 0}\n") if final_result: with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: final_result[1]={final_result[1] if len(final_result) > 1 else 'N/A'}\n") # Check for auto_ts to continue to ThoughtSpot deployment if len(final_result) == 3 and final_result[1] == "auto_ts": deploy_response, _, auto_schema = final_result with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: auto_ts detected, schema={auto_schema}\n") chat_history[-1] = (message, deploy_response) yield chat_history, current_stage, current_model, company, use_case, "" # Run ThoughtSpot deployment with detailed logging with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: Creating TS generator...\n") ts_generator = self._run_thoughtspot_deployment(auto_schema, company, use_case) with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: Generator created, starting iteration...\n") ts_update_count = 0 for ts_update in ts_generator: ts_update_count += 1 with open('/tmp/demoprep_debug.log', 'a') as f: update_preview = str(ts_update)[:100] if ts_update else "None" f.write(f"AUTO-RUN: TS update #{ts_update_count}: {update_preview}\n") if isinstance(ts_update, dict): current_stage = ts_update.get('stage', current_stage) chat_history[-1] = (message, ts_update['response']) else: chat_history[-1] = (message, ts_update) yield chat_history, current_stage, current_model, company, use_case, "" with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: TS deployment loop complete, {ts_update_count} updates\n") else: # Not auto_ts, just show the result if len(final_result) >= 2: final_response = final_result[0] next_msg = final_result[1] if len(final_result) > 1 else "" else: final_response = str(final_result) next_msg = "" chat_history[-1] = (message, final_response) yield chat_history, current_stage, current_model, company, use_case, next_msg except Exception as e: import traceback with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"AUTO-RUN: EXCEPTION: {str(e)}\n{traceback.format_exc()}\n") error_msg = f"**Deployment Error:** {str(e)}\n\n```\n{traceback.format_exc()}\n```" chat_history[-1] = (message, error_msg) yield chat_history, current_stage, current_model, company, use_case, "" return else: # VALIDATION MODE ON: Follow normal flow with pauses current_stage = 'research' confirmation_msg = f"""✅ **Got it!** **Company:** {company} **Use Case:** {use_case} ({use_case_type}) {context_note} 🔍 **Starting Research...** I'm now researching {company}'s business model and {use_case} requirements. This may take 1-2 minutes. Watch the AI Feedback tab for progress!""" chat_history[-1] = (message, confirmation_msg) yield chat_history, current_stage, current_model, company, use_case, "" # Start research with streaming last_response = "" for response in self.run_research_streaming(company, use_case, self.generic_use_case_context): chat_history[-1] = (message, response) last_response = response yield chat_history, current_stage, current_model, company, use_case, "" # Pre-fill "yes" if ready for next step if "Ready to create the DDL now?" in last_response or "Would you like to use the cached results?" in last_response: yield chat_history, current_stage, current_model, company, use_case, "yes" else: yield chat_history, current_stage, current_model, company, use_case, "" return elif current_stage == 'research': # Check if we're waiting for cache response if hasattr(self, '_cache_available') and self._cache_available: if 'yes' in message_lower: # User wants to use cache chat_history[-1] = (message, "Loading cached research...") yield chat_history, current_stage, current_model, company, use_case, "" success = self._load_cached_research(self._cached_research_path, company, use_case) if success: company_name = self.demo_builder.extract_company_name() response = f"""✅ **Research Loaded from Cache!** **Company:** {company_name} **Use Case:** {use_case} **Cached results successfully loaded!** - Company analysis retrieved - Industry research retrieved - Ready to proceed **Ready to create the DDL now?** (Type 'yes' to proceed)""" chat_history[-1] = (message, response) current_stage = 'research' # Stay in research, will move to create_ddl when approved self._cache_available = False yield chat_history, current_stage, current_model, company, use_case, "yes" # Pre-fill "yes" return else: response = "❌ Failed to load cache, running fresh research..." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" self._cache_available = False # Fall through to run fresh research elif 'no' in message_lower: # User wants fresh research chat_history[-1] = (message, "Running fresh research...") yield chat_history, current_stage, current_model, company, use_case, "" self._cache_available = False # Fall through to run fresh research else: # Invalid response, ask again response = "Please type 'yes' to use cached results or 'no' to run fresh research." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # If we get here and cache was declined, run fresh research if not hasattr(self, '_cache_available') or not self._cache_available: last_response = "" for response in self.run_research_streaming(company, use_case): chat_history[-1] = (message, response) last_response = response yield chat_history, current_stage, current_model, company, use_case, "" # Stay in research stage - will move to create_ddl when user approves # Pre-fill "yes" after research if "Ready to create the DDL now?" in last_response: yield chat_history, current_stage, current_model, company, use_case, "yes" else: yield chat_history, current_stage, current_model, company, use_case, "" return # Normal research stage handling (no cache prompt active) # This is only reached when validation_mode = On (otherwise auto-run handles it) if 'yes' in message_lower or 'proceed' in message_lower or 'continue' in message_lower: current_stage = 'create_ddl' response, ddl_code = self.run_ddl_creation() chat_history[-1] = (message, response) # With validation_mode On, we show DDL and wait for approval yield chat_history, current_stage, current_model, company, use_case, "yes" return elif 'no' in message_lower or 'redo' in message_lower: chat_history[-1] = (message, "Restarting research...") yield chat_history, current_stage, current_model, company, use_case, "" self._cache_available = False # Clear cache flag last_response = "" for response in self.run_research_streaming(company, use_case): chat_history[-1] = (message, response) last_response = response yield chat_history, current_stage, current_model, company, use_case, "" # Stay in research stage, pre-fill "yes" if "Ready to create the DDL now?" in last_response: yield chat_history, current_stage, current_model, company, use_case, "yes" else: yield chat_history, current_stage, current_model, company, use_case, "" return elif current_stage == 'create_ddl': # Waiting for DDL approval with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"ENTERED create_ddl stage, message={message_lower[:50]}\n") if 'yes' in message_lower or 'approve' in message_lower or 'proceed' in message_lower: with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"DDL APPROVED, moving to deploy\n") current_stage = 'deploy' # Stream deployment progress directly to chat try: final_result = None for progress_update in self.run_deployment_streaming(): if isinstance(progress_update, tuple): final_result = progress_update elif isinstance(progress_update, dict): current_stage = progress_update.get('stage', current_stage) chat_history[-1] = (message, f"**DDL Approved - Deploying...**\n\n{progress_update['response']}") yield chat_history, current_stage, current_model, company, use_case, "" else: chat_history[-1] = (message, f"**DDL Approved - Deploying...**\n\n{progress_update}") yield chat_history, current_stage, current_model, company, use_case, "" # Handle final result def debug_log(msg): with open('/tmp/demoprep_debug.log', 'a') as f: import datetime f.write(f"[{datetime.datetime.now()}] {msg}\n") f.flush() self.log_feedback(msg) if final_result: debug_log(f"DEBUG create_ddl: final_result len={len(final_result)}, [1]='{final_result[1] if len(final_result) > 1 else 'N/A'}'") if len(final_result) == 3 and final_result[1] == "auto_ts": # Auto-continue to ThoughtSpot deployment deploy_response, _, auto_schema = final_result debug_log(f"DEBUG create_ddl: auto_ts, schema={auto_schema}") chat_history[-1] = (message, deploy_response) yield chat_history, current_stage, current_model, company, use_case, "" debug_log("DEBUG create_ddl: About to call _run_thoughtspot_deployment") # Run ThoughtSpot deployment (mirrors 'thoughtspot' handler) ts_update_count = 0 for ts_update in self._run_thoughtspot_deployment(auto_schema, company, use_case): ts_update_count += 1 debug_log(f"DEBUG create_ddl: ts_update #{ts_update_count} type={type(ts_update)}") if isinstance(ts_update, dict): # Final result current_stage = ts_update.get('stage', current_stage) chat_history[-1] = (message, ts_update['response']) else: # Progress update chat_history[-1] = (message, ts_update) yield chat_history, current_stage, current_model, company, use_case, "" debug_log(f"DEBUG create_ddl: TS deployment loop complete, {ts_update_count} updates") else: deploy_response, next_msg = final_result chat_history[-1] = (message, deploy_response) yield chat_history, current_stage, current_model, company, use_case, next_msg except Exception as e: import traceback error_msg = f"**Deployment Error:** {str(e)}\n\n```\n{traceback.format_exc()}\n```" with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"DEBUG create_ddl: Exception: {str(e)}\n{traceback.format_exc()}\n") chat_history[-1] = (message, error_msg) yield chat_history, current_stage, current_model, company, use_case, "" return elif 'no' in message_lower or 'redo' in message_lower: response, ddl_code = self.run_ddl_creation() chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return elif current_stage == 'populate': # Handle "deploy" command when LegitData is ready if 'deploy' in message_lower: current_stage = 'deploy' try: # Stream deployment progress directly to chat final_result = None for progress_update in self.run_deployment_streaming(): if isinstance(progress_update, tuple): final_result = progress_update elif isinstance(progress_update, dict): current_stage = progress_update.get('stage', current_stage) chat_history[-1] = (message, progress_update['response']) yield chat_history, current_stage, current_model, company, use_case, "" else: chat_history[-1] = (message, progress_update) yield chat_history, current_stage, current_model, company, use_case, "" # Handle final result if final_result: if len(final_result) == 3 and final_result[1] == "auto_ts": # Auto-continue to ThoughtSpot deployment deploy_response, _, auto_schema = final_result chat_history[-1] = (message, deploy_response) yield chat_history, current_stage, current_model, company, use_case, "" # Run ThoughtSpot deployment (mirrors 'thoughtspot' handler) for ts_update in self._run_thoughtspot_deployment(auto_schema, company, use_case): if isinstance(ts_update, dict): # Final result current_stage = ts_update.get('stage', current_stage) chat_history[-1] = (message, ts_update['response']) else: # Progress update chat_history[-1] = (message, ts_update) yield chat_history, current_stage, current_model, company, use_case, "" else: deploy_response, next_msg = final_result chat_history[-1] = (message, deploy_response) yield chat_history, current_stage, current_model, company, use_case, next_msg except Exception as e: # Deployment failed - show error message error_msg = f"**Deployment Error:** {str(e)}" chat_history[-1] = (message, error_msg) yield chat_history, current_stage, current_model, company, use_case, "" return # Handle population retry if needed elif 'yes' in message_lower or 'retry' in message_lower: response, pop_code = self.run_population() chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" # Auto-proceed to deployment if successful if "Complete" in response: current_stage = 'deploy' try: # Stream deployment progress final_result = None for progress_update in self.run_deployment_streaming(): if isinstance(progress_update, tuple): final_result = progress_update elif isinstance(progress_update, dict): current_stage = progress_update.get('stage', current_stage) chat_history.append((None, progress_update['response'])) yield chat_history, current_stage, current_model, company, use_case, "" else: chat_history.append((None, progress_update)) yield chat_history, current_stage, current_model, company, use_case, "" if final_result: if len(final_result) == 3 and final_result[1] == "auto_ts": # Auto-continue to ThoughtSpot deployment deploy_response, _, auto_schema = final_result chat_history.append((None, deploy_response)) yield chat_history, current_stage, current_model, company, use_case, "" # Run ThoughtSpot deployment (mirrors 'thoughtspot' handler) for ts_update in self._run_thoughtspot_deployment(auto_schema, company, use_case): if isinstance(ts_update, dict): # Final result current_stage = ts_update.get('stage', current_stage) chat_history.append((None, ts_update['response'])) else: # Progress update chat_history.append((None, ts_update)) yield chat_history, current_stage, current_model, company, use_case, "" else: deploy_response, next_msg = final_result chat_history.append((None, deploy_response)) yield chat_history, current_stage, current_model, company, use_case, next_msg except Exception as e: error_msg = str(e) chat_history.append((None, error_msg)) yield chat_history, current_stage, current_model, company, use_case, "" return elif current_stage == 'deploy': # Handle post-deployment commands (thoughtspot, truncate) if 'thoughtspot' in message_lower: # User wants to create ThoughtSpot objects - use helper method schema_name = getattr(self, '_deployed_schema_name', getattr(self, '_last_schema_name', 'UNKNOWN')) for ts_update in self._run_thoughtspot_deployment(schema_name, company, use_case): if isinstance(ts_update, dict): # Final result current_stage = ts_update.get('stage', current_stage) chat_history[-1] = (message, ts_update['response']) else: # Progress update chat_history[-1] = (message, ts_update) yield chat_history, current_stage, current_model, company, use_case, "" return # Handle deployment errors (usually population failures) if hasattr(self, '_last_population_error'): # Handle '1' or 'retry' - retry with same code if 'retry' in message_lower or message_lower.strip() == '1': # Retry with same code chat_history[-1] = (message, "Retrying population...") yield chat_history, current_stage, current_model, company, use_case, "" try: # Check required attributes exist if not hasattr(self.demo_builder, 'data_population_results') or not self.demo_builder.data_population_results: response = "❌ **Error:** Population code not found. Please run population again first." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return if not hasattr(self, '_last_schema_name') or not self._last_schema_name: response = "❌ **Error:** Schema name not found. Please run deployment again first." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return from demo_prep import execute_population_script is_template = getattr(self.demo_builder, 'population_code_source', 'llm') == 'template' success, msg = execute_population_script( self.demo_builder.data_population_results, self._last_schema_name, skip_modifications=is_template ) if success: response = f"✅ **Population Successful!**\n\n{msg}\n\nDemo deployed to Snowflake! 🎉" del self._last_population_error del self._last_schema_name else: response = f"❌ Still failed: {msg[:200]}...\n\nTry 'truncate' (or '2') or 'fix' (or '3')?" except Exception as e: import traceback error_details = traceback.format_exc() self.log_feedback(f"❌ Retry error: {error_details}") response = f"❌ **Retry failed with error:**\n\n```\n{str(e)}\n```\n\nPlease try 'truncate' (or '2') to clear tables first." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return elif 'truncate' in message_lower or message_lower.strip() == '2': # Truncate tables and retry chat_history[-1] = (message, "Truncating tables and retrying...") yield chat_history, current_stage, current_model, company, use_case, "" try: # Check required attributes exist if not hasattr(self, '_last_schema_name') or not self._last_schema_name: response = "❌ **Error:** Schema name not found. Please run deployment again first." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return if not hasattr(self.demo_builder, 'data_population_results') or not self.demo_builder.data_population_results: response = "❌ **Error:** Population code not found. Please run population again first." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return from cdw_connector import SnowflakeDeployer from demo_prep import execute_population_script deployer = SnowflakeDeployer() deployer.connect() # Truncate all tables in schema try: cursor = deployer.connection.cursor() cursor.execute(f"USE SCHEMA {self._last_schema_name}") cursor.execute("SHOW TABLES") tables = cursor.fetchall() for table in tables: table_name = table[1] self.log_feedback(f"Truncating {table_name}...") cursor.execute(f"TRUNCATE TABLE {table_name}") cursor.close() deployer.disconnect() self.log_feedback("✅ Tables truncated") except Exception as e: self.log_feedback(f"⚠️ Truncate warning: {e}") if deployer.connection: deployer.disconnect() # Retry population is_template = getattr(self.demo_builder, 'population_code_source', 'llm') == 'template' success, msg = execute_population_script( self.demo_builder.data_population_results, self._last_schema_name, skip_modifications=is_template ) if success: response = f"✅ **Population Successful!**\n\n{msg}\n\nDemo deployed to Snowflake! 🎉" del self._last_population_error del self._last_schema_name else: response = f"❌ Still failed: {msg[:200]}...\n\nTry 'fix' (or '3') to let AI correct the code?" except Exception as e: import traceback error_details = traceback.format_exc() self.log_feedback(f"❌ Truncate/retry error: {error_details}") response = f"❌ **Truncate/retry failed with error:**\n\n```\n{str(e)}\n```\n\nPlease check the error details above." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return elif 'fix' in message_lower or message_lower.strip() == '3': # Regenerate the code using the fixed template chat_history[-1] = (message, "🔧 Regenerating population code with fixed template...") yield chat_history, current_stage, current_model, company, use_case, "" try: # Check required attributes exist if not hasattr(self.demo_builder, 'schema_generation_results') or not self.demo_builder.schema_generation_results: response = "❌ **Error:** DDL schema not found. Please run DDL creation again first." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return if not hasattr(self, '_last_schema_name') or not self._last_schema_name: response = "❌ **Error:** Schema name not found. Please run deployment again first." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return self.log_feedback("🔧 Regenerating population code from scratch...") # Regenerate using the reliable template from schema_utils import parse_ddl_schema schema_info = parse_ddl_schema(self.demo_builder.schema_generation_results) if not schema_info: response = "❌ Failed to parse DDL schema. Cannot regenerate." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Generate new code using the template (which includes all fixes) fixed_code = self.get_fallback_population_code(schema_info) # Validate it compiles try: compile(fixed_code, '', 'exec') self.log_feedback("✅ Regenerated code validated") except SyntaxError as e: response = f"❌ Template generation bug: {e}\n\nPlease contact support." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Update the code and mark as template-generated self.demo_builder.data_population_results = fixed_code self.population_code = fixed_code self.demo_builder.population_code_source = "template" # Mark as template self.log_feedback("🔧 Code regenerated, retrying deployment...") # Truncate and retry from cdw_connector import SnowflakeDeployer from demo_prep import execute_population_script deployer = SnowflakeDeployer() deployer.connect() try: cursor = deployer.connection.cursor() cursor.execute(f"USE SCHEMA {self._last_schema_name}") cursor.execute("SHOW TABLES") tables = cursor.fetchall() for table in tables: cursor.execute(f"TRUNCATE TABLE {table[1]}") cursor.close() deployer.disconnect() except Exception as e: self.log_feedback(f"⚠️ Truncate warning: {e}") if deployer.connection: deployer.disconnect() success, msg = execute_population_script( fixed_code, self._last_schema_name, skip_modifications=True # Template code, don't modify ) if success: response = f"✅ **Fixed and Successful!**\n\n{msg}\n\nDemo deployed to Snowflake! 🎉" del self._last_population_error del self._last_schema_name else: response = f"❌ AI fix didn't work: {msg[:200]}...\n\nTry 'fix' again or 'retry'?" except Exception as e: import traceback error_details = traceback.format_exc() self.log_feedback(f"❌ Fix/regenerate error: {error_details}") response = f"❌ **Fix/regenerate failed with error:**\n\n```\n{str(e)}\n```\n\nPlease check the error details above." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return return elif current_stage == 'outlier_adjustment': # Handle outlier adjustment stage if 'done' in message_lower or 'finish' in message_lower or 'complete' in message_lower: # Close adjuster connection if hasattr(self, '_adjuster'): self._adjuster.close() response = """✅ **Demo Creation Complete!** Your demo is fully deployed with custom outliers! **Summary:** - ✅ Research completed - ✅ DDL schema created - ✅ Data populated - ✅ Deployed to Snowflake - ✅ ThoughtSpot objects created - ✅ Custom outliers added **Access your demo:** - ThoughtSpot Liveboard: Check your ThoughtSpot instance - Snowflake Data: Query your schema 🎉 **Ready to present!**""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Check if waiting for confirmation if hasattr(self, '_pending_adjustment'): if 'yes' in message_lower or 'execute' in message_lower or 'confirm' in message_lower: # Execute the pending adjustment try: adjuster = self._adjuster strategy = self._pending_adjustment['strategy'] chat_history[-1] = (message, "**Executing SQL...**") yield chat_history, current_stage, current_model, company, use_case, "" result = adjuster.execute_sql(strategy['sql']) if result['success']: response = f"""✅ **SUCCESS!** Updated {result['rows_affected']} rows. **Next steps:** - 🔄 Refresh your ThoughtSpot liveboard to see changes - Or make another adjustment - Type **'done'** when finished **What else would you like to adjust?**""" else: response = f"""❌ **FAILED** {result['error']} **Common issues:** - Number too large for column (try smaller value) - Database connection issue Try a different adjustment or type **'done'** to finish.""" del self._pending_adjustment chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return except Exception as e: del self._pending_adjustment response = f"""❌ **Execution Error** {str(e)} Try a different adjustment or type **'done'** to finish.""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return elif 'no' in message_lower or 'cancel' in message_lower: del self._pending_adjustment response = """❌ **Cancelled** No changes made. Try another adjustment or type **'done'** to finish.""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Check if adjuster is available if not hasattr(self, '_adjuster'): response = """❌ **Adjuster Not Available** Smart data adjuster was not initialized properly. Type **'done'** to finish the workflow.""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Process adjustment request try: adjuster = self._adjuster # Show processing message chat_history[-1] = (message, "🤔 **Analyzing request...**") yield chat_history, current_stage, current_model, company, use_case, "" # Match request to visualization match = adjuster.match_request_to_viz(message) if not match: response = """❌ **Could not understand request** I couldn't match your request to a visualization. **Examples:** - "make 1080p webcam 40B" - "increase smart watch by 20%" - "viz 3, increase laptop to 50B" Try again or type **'done'** to finish.""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Pick metric column (use hint from match if available) metric_hint = match.get('metric_hint') metric_column = adjuster._pick_metric_column(metric_hint) if not metric_column: response = "❌ Could not identify a metric column in your schema. Try specifying the column name." chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Get current value (new 4-tuple return: value, matched_name, dim_table, fact_table) entity_type = match.get('entity_type') current_value, matched_entity, dim_table, fact_table = adjuster.get_current_value( match['entity_value'], metric_column, entity_type ) if current_value == 0 or matched_entity is None: response = ( f"❌ **No data found** for `{match['entity_value']}`.\n\n" f"Check the spelling or try a different entity. Type **'done'** to finish." ) chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Calculate target target_value = match.get('target_value') percentage = match.get('percentage') if match.get('is_percentage') else None if percentage is not None: target_value = current_value * (1 + percentage / 100) match['target_value'] = target_value # Generate strategy strategy = adjuster.generate_strategy( match['entity_value'], metric_column, current_value, target_value=target_value, percentage=percentage, entity_type=entity_type, ) # Present smart confirmation confirmation = adjuster.present_smart_confirmation(match, current_value, strategy, metric_column) # Store for execution if user confirms self._pending_adjustment = { 'match': match, 'strategy': strategy, 'current_value': current_value } response = f"""{confirmation} **Type 'yes' to execute, 'no' to cancel, or make another adjustment request.**""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return except Exception as e: import traceback error_details = traceback.format_exc() self.log_feedback(f"❌ Adjustment error: {error_details}") response = f"""❌ **Adjustment Error** {str(e)} Try a different request or type **'done'** to finish.""" chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" return # Fallback to regular message processing response = self.process_regular_message(message, current_stage, company, use_case) chat_history[-1] = (message, response) yield chat_history, current_stage, current_model, company, use_case, "" def extract_company_from_message(self, message): """Extract company name from message""" import re # Pre-clean the message: fix common typos # Replace multiple dots with single dot (e.g., "Comscore..com" -> "Comscore.com") cleaned_message = re.sub(r'\.{2,}', '.', message) # Remove spaces around dots in URLs (e.g., "Nike . com" -> "Nike.com") cleaned_message = re.sub(r'\s*\.\s*', '.', cleaned_message) # Simpler, more robust patterns - capture domain.tld format # Don't include trailing punctuation in the URL pattern patterns = [ # With colon: "company: tinder.com" r'company:\s*([a-zA-Z0-9-]+\.[a-zA-Z]{2,})', # Without colon: "for the company tinder.com" or "company tinder.com" r'for\s+(?:the\s+)?company\s+([a-zA-Z0-9-]+\.[a-zA-Z]{2,})', r'company\s+([a-zA-Z0-9-]+\.[a-zA-Z]{2,})', # "demo for tinder.com" r'demo\s+for\s+([a-zA-Z0-9-]+\.[a-zA-Z]{2,})', ] for pattern in patterns: match = re.search(pattern, cleaned_message, re.IGNORECASE) if match: company = match.group(1).strip() # Clean up trailing dots just in case company = company.rstrip('.') return company # Fallback: any bare domain.tld in the message (e.g. "Nike.com, Retail Sales") bare_url = re.search(r'\b([a-zA-Z0-9-]+\.[a-zA-Z]{2,})\b', cleaned_message, re.IGNORECASE) if bare_url: return bare_url.group(1).rstrip('.') return None def extract_use_case_from_message(self, message): """Extract use case from message""" import re # Patterns for extracting use case - ordered by specificity patterns = [ r'use\s+case\s+of\s+([^,\n]+?)(?:\s*$)', # "use case of XYZ" r'use\s+case:\s*([^,\n]+?)(?:\s*$|\s+and)', # "use case: XYZ" r'(?:the\s+)?use\s+case\s+is\s+([^,\n]+?)(?:\s*$)', # "the use case is XYZ" r'for\s+(?:the\s+)?use\s+case:\s*([^,\n]+?)(?:\s*$|\s+and)', # "for the use case: XYZ" r'(?:and|\.)\s+use\s+case\s+(?:of\s+)?([^,\n]+?)(?:\s*$)', # ". use case of XYZ" or "and use case XYZ" r'with\s+(?:the\s+)?use\s+(?:case\s+)?([^,\n]+?)(?:\s*$)', # "with the use XYZ" or "with the use case XYZ" r'(?:for|with)\s+(?:the\s+)?use:\s*([^,\n]+?)(?:\s*$)', # "for the use: XYZ" r'focused on\s+([^,\n]+?)(?:\s*$|\s+and)', # "focused on XYZ" # "with a X use case" format - capture everything between "with a" and "use case" r'with\s+(?:a|an)\s+([^,\n]+?)\s+use\s+case', # "with a Subscription use case" # "with a X" at end of message (no trailing "use case") r'with\s+(?:a|an)\s+([^,\n]+?)(?:\s*$)', # "with a Subscription Conversion & Pricing Impact" ] for pattern in patterns: match = re.search(pattern, message, re.IGNORECASE) if match: use_case = match.group(1).strip() # Clean up trailing punctuation use_case = use_case.rstrip('.') # Don't return if it looks like a company/url (contains .com, .org, etc) if re.search(r'\.(com|org|net|io|co|ai)\b', use_case, re.IGNORECASE): continue return use_case # Fallback: text after a comma or dash following a domain.tld # Handles: "Nike.com, Retail Sales" / "Nike.com - Supply Chain" after_domain = re.search( r'[a-zA-Z0-9-]+\.[a-zA-Z]{2,}[\s]*[,\-–—]\s*(.+)', message, re.IGNORECASE ) if after_domain: use_case = after_domain.group(1).strip().rstrip('.') if use_case and not re.search(r'\.(com|org|net|io|co|ai)\b', use_case, re.IGNORECASE): return use_case return None def handle_override(self, message): """Handle /over command to change company or use case""" parts = message.strip().split(maxsplit=1) if len(parts) < 2: return """🔧 **Override Command** To change settings, use: - `/over company: [new company]` - Change company - `/over usecase: [new use case]` - Change use case - `/over company: [company] usecase: [use case]` - Change both **Example:** `/over company: Amazon.com usecase: supply chain` """ override_text = parts[1] new_company = None new_usecase = None # Parse override text if 'company:' in override_text.lower(): company_part = override_text.lower().split('company:')[1] if 'usecase:' in company_part: new_company = company_part.split('usecase:')[0].strip() else: new_company = company_part.strip() if 'usecase:' in override_text.lower(): usecase_part = override_text.lower().split('usecase:')[1].strip() new_usecase = usecase_part if new_company or new_usecase: response = "✅ **Settings Updated!**\n\n" if new_company: response += f"📍 Company: **{new_company}**\n" if new_usecase: response += f"🎯 Use Case: **{new_usecase}**\n" response += "\nWhat would you like to do next?" return response return "❌ Could not parse override. Use format: `/over company: [name] usecase: [case]`" def log_feedback(self, message): """Add message to AI feedback log""" import datetime timestamp = datetime.datetime.now().strftime("%H:%M:%S") self.ai_feedback_log.append(f"[{timestamp}] {message}") # Limit log size to prevent memory issues (keep last 500 entries) if len(self.ai_feedback_log) > 500: self.ai_feedback_log = self.ai_feedback_log[-500:] print(f"[AI Feedback] {message}") # Also print to console return "\n".join(self.ai_feedback_log) def run_research_streaming(self, company, use_case, generic_context=""): """Run the research phase with streaming updates Args: company: Company URL/name use_case: Use case name generic_context: Additional context provided by user for generic use cases """ _slog = self._session_logger _t = _slog.log_start("research") if _slog else None print(f"\n\n[CACHE DEBUG] === run_research_streaming called ===") print(f"[CACHE DEBUG] company: {company}") print(f"[CACHE DEBUG] use_case: {use_case}\n\n") import time import os from main_research import ResultsManager # Validate that we have actual values if not company: yield "❌ **Error:** No company provided. Please specify a company URL." return if not use_case: yield f"❌ **Error:** No use case provided. Please specify what you want to analyze.\n\n**Company provided:** {company}\n**Use case needed:** Tell me what analytics you want!" return progress_message = "🔍 **Starting Research**\n\n" yield progress_message try: # Initialize demo builder if needed OR if company/use_case changed # CRITICAL: Always create fresh DemoBuilder when company/use_case changes # to avoid persisting prompts/data from previous runs needs_new_builder = ( not self.demo_builder or self.demo_builder.company_url != company or self.demo_builder.use_case != use_case ) if needs_new_builder: if self.demo_builder: self.log_feedback(f"🔄 Company/use case changed - creating fresh DemoBuilder (was: {self.demo_builder.company_url}/{self.demo_builder.use_case})") else: self.log_feedback("Initializing DemoBuilder...") progress_message += "✓ Initializing DemoBuilder...\n" yield progress_message self.demo_builder = DemoBuilder( use_case=use_case, company_url=company ) # Prepare URL - clean up any extra text that might have been captured # Remove "use case:" and anything after it, and clean whitespace import re clean_company = re.sub(r'\s+use\s+case:.*$', '', company, flags=re.IGNORECASE).strip() clean_company = re.sub(r'\s+and\s+.*$', '', clean_company, flags=re.IGNORECASE).strip() url = clean_company if clean_company.startswith('http') else f"https://{clean_company}" # Check for cached research results domain = url.replace('https://', '').replace('http://', '').replace('www.', '').split('/')[0] safe_domain = domain.replace('.', '_').replace(':', '_') use_case_safe = use_case.lower().replace(' ', '_').replace('/', '_') # Try new format first (with use case) # Use absolute path to ensure we find cache regardless of CWD script_dir = os.path.dirname(os.path.abspath(__file__)) results_dir = os.path.join(script_dir, "results") cache_filename = f"{safe_domain}_{use_case_safe}.json" cache_filepath = os.path.join(results_dir, cache_filename) # If exact match doesn't exist, try fuzzy matching for similar use cases if not os.path.exists(cache_filepath): import glob print(f"[CACHE DEBUG] Current working directory: {os.getcwd()}") print(f"[CACHE DEBUG] Script directory: {script_dir}") print(f"[CACHE DEBUG] Results directory: {results_dir}") similar_files = glob.glob(os.path.join(results_dir, f"{safe_domain}_*.json")) print(f"[CACHE DEBUG] Exact file {cache_filepath} not found") print(f"[CACHE DEBUG] Glob pattern: {results_dir}/{safe_domain}_*.json") print(f"[CACHE DEBUG] Similar files found: {similar_files}") if similar_files: # Found similar cache files for this company cache_filepath = similar_files[0] # Use the first one found cache_filename = os.path.basename(cache_filepath) print(f"[CACHE DEBUG] Using similar file: {cache_filename}") self.log_feedback(f"📋 Found similar cache file: {cache_filename}") elif not os.path.exists(cache_filepath): # Try old format (without use case) old_cache_filename = f"research_{safe_domain}.json" old_cache_filepath = os.path.join(results_dir, old_cache_filename) if os.path.exists(old_cache_filepath): cache_filename = old_cache_filename cache_filepath = old_cache_filepath cached_results = None cache_age_hours = None # Check for cached research and use automatically if valid print(f"[CACHE DEBUG] Final cache_filepath: {cache_filepath}, exists: {os.path.exists(cache_filepath)}") if os.path.exists(cache_filepath): try: # Check cache age (5 day expiry) cache_mtime = os.path.getmtime(cache_filepath) cache_age = time.time() - cache_mtime cache_age_hours = cache_age / 3600 # Convert to hours if cache_age_hours <= 120: # Cache valid for 5 days (120 hours) self.log_feedback(f"📋 Using cached research (age: {cache_age_hours:.1f} hours)") progress_message += f"📋 **Using Cached Research** ({cache_age_hours:.1f} hours old)\n\n" # Load cached results automatically with open(cache_filepath, 'r') as f: cached_data = json.load(f) self.demo_builder.company_analysis_results = cached_data.get('company_summary', '') self.demo_builder.industry_research_results = cached_data.get('research_paper', '') self.demo_builder.combined_research_results = self.demo_builder.get_research_context() self.demo_builder.company_url = cached_data.get('url', url) self.demo_builder.advance_stage() progress_message += "✅ **Research loaded from cache!**\n\n" progress_message += "Proceeding to DDL generation...\n" self.log_feedback("✅ Research loaded from cache, generating DDL") yield progress_message # Automatically trigger DDL generation try: response, ddl_code = self.run_ddl_creation() yield response except Exception as e: import traceback error_msg = f"❌ DDL generation failed: {str(e)}\n{traceback.format_exc()}" self.log_feedback(error_msg) yield error_msg if _slog and _t: _slog.log_end("research", _t) return else: self.log_feedback(f"📋 Cache too old ({cache_age_hours:.1f} hours), running fresh research") progress_message += f"📋 Cache expired ({cache_age_hours:.1f} hours old), running fresh research...\n" yield progress_message except Exception as e: self.log_feedback(f"⚠️ Could not load cache: {str(e)}") progress_message += f"⚠️ Could not load cache, running fresh research...\n" yield progress_message else: self.log_feedback("📋 No cached research found") progress_message += "📋 No cached research found, running fresh research...\n" yield progress_message # No valid cache, proceed with fresh research yield from self._run_fresh_research( company, use_case, url, progress_message, cache_filename, results_dir, generic_context, ) if _slog and _t: _slog.log_end("research", _t) except Exception as e: import traceback error_msg = f"❌ Research failed: {str(e)}\n{traceback.format_exc()}" self.log_feedback(error_msg) yield f"❌ Research failed: {str(e)}" if _slog and _t: _slog.log_end("research", _t, error=str(e)) def _run_fresh_research( self, company, use_case, url, progress_message, cache_filename, results_dir, generic_context="", ): """Run fresh research (no cache) Args: results_dir: Absolute path to the research cache directory. generic_context: Additional context for generic use cases """ import os from datetime import datetime from main_research import ResultsManager # Extract website content self.log_feedback(f"Extracting website content from {company}...") progress_message += f"✓ Extracting website content from {company}...\n" yield progress_message website = Website(url) self.demo_builder.website_data = website # Check if website extraction failed - but continue anyway with internet search if not website.text or len(website.text) == 0: warning_msg = f"⚠️ **Couldn't access website directly** - continuing with internet research\n\n" if website.error_message: warning_msg += f"(Error: {website.error_message})\n\n" self.log_feedback(warning_msg) progress_message += warning_msg yield progress_message # Set fallback values for prompts website.text = f"Website content unavailable. Company URL: {url}. Please research this company using general knowledge and the use case context." website.title = company # Use company name as title fallback website.css_links = [] website.logo_candidates = [] else: self.log_feedback(f"Extracted {len(website.text)} characters from {website.title}") progress_message += f"✓ Extracted {len(website.text)} characters from {website.title}\n\n" yield progress_message # Get LLM provider model = self.settings.get('model', DEFAULT_LLM_MODEL) provider_name, model_name = map_llm_display_to_provider(model) self.log_feedback(f"Using {provider_name}/{model_name} for research...") progress_message += f"✓ Using {provider_name}/{model_name}\n\n" yield progress_message # Initialize researcher researcher = MultiLLMResearcher(provider=provider_name, model=model_name) # Company Analysis progress_message += "🔍 **Phase 1: Company Analysis**\n" yield progress_message self.log_feedback("Analyzing company...") system_prompt, user_prompt = build_company_analysis_prompt( use_case, website.title, url, website.text, len(website.css_links), website.logo_candidates ) # Inject generic use case context if provided if generic_context: user_prompt += f"\n\n**Additional Context from User:**\n{generic_context}" self.log_feedback(f"📝 Added generic use case context: {generic_context[:100]}...") messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ] # Stream the response self.log_feedback("DEBUG: Starting company analysis streaming...") response = researcher.make_request(messages, temperature=0.3, max_tokens=4000, stream=True) company_analysis = "" chunk_count = 0 for chunk in response: chunk_text = researcher.extract_chunk_content(chunk) if chunk_text: company_analysis += chunk_text chunk_count += 1 if chunk_count % 5 == 0: # Update every 5 chunks progress_message_temp = progress_message + f"Analyzing... ({len(company_analysis)} chars)\n" yield progress_message_temp self.log_feedback(f"Company: {chunk_text[:50]}...") self.log_feedback(f"DEBUG: Company analysis streaming COMPLETE. Total chunks: {chunk_count}, chars: {len(company_analysis)}") # Log the prompt/response from prompt_logger import log_researcher_call log_researcher_call("research_company", researcher, messages, company_analysis, logger=self._prompt_logger) self.demo_builder.company_analysis_results = company_analysis self.log_feedback("✅ Company analysis complete!") progress_message += f"✅ Company analysis complete ({len(company_analysis)} chars)\n\n" yield progress_message # Industry Research progress_message += "🔍 **Phase 2: Industry Research**\n" yield progress_message self.log_feedback("Researching industry best practices...") system_prompt, user_prompt = build_industry_research_prompt(use_case, company_analysis) # Inject generic use case context if provided if generic_context: user_prompt += f"\n\n**Additional Context from User:**\n{generic_context}" messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ] # Stream the response self.log_feedback("DEBUG: Starting industry research streaming...") response = researcher.make_request(messages, temperature=0.4, max_tokens=4000, stream=True) industry_research = "" chunk_count = 0 self.log_feedback(f"DEBUG: Response object type: {type(response)}") for chunk in response: chunk_text = researcher.extract_chunk_content(chunk) if chunk_text: industry_research += chunk_text chunk_count += 1 if chunk_count % 5 == 0: # Update every 5 chunks progress_message_temp = progress_message + f"Researching... ({len(industry_research)} chars)\n" yield progress_message_temp self.log_feedback(f"Research: {chunk_text[:50]}...") self.log_feedback(f"DEBUG: Industry research streaming COMPLETE. Total chunks: {chunk_count}, chars: {len(industry_research)}") # Log the prompt/response log_researcher_call("research_industry", researcher, messages, industry_research, logger=self._prompt_logger) self.demo_builder.industry_research_results = industry_research self.log_feedback("✅ Industry research complete!") progress_message += f"✅ Industry research complete ({len(industry_research)} chars)\n\n" yield progress_message # Save to cache try: self.log_feedback("💾 Saving research to cache...") research_results = { 'company_summary': company_analysis, 'research_paper': industry_research, 'url': url, 'use_case': use_case, 'generated_at': datetime.now().isoformat(), } os.makedirs(results_dir, exist_ok=True) ResultsManager.save_results(research_results, cache_filename, results_dir) progress_message += "💾 Cached research results for future use!\n\n" yield progress_message except Exception as e: self.log_feedback(f"⚠️ Could not cache results: {str(e)}") # Update stage self.demo_builder.advance_stage() self.demo_builder.set_ready() self.research_results = { 'company': company, 'use_case': use_case, 'completed': True } self.log_feedback("✅ Research completed successfully!") # Generate synopsis company_name = self.demo_builder.extract_company_name() synopsis = f"""✅ **Research Complete!** **Company:** {company_name} **Use Case:** {use_case} **What I learned:** - Analyzed company website and business model - Researched industry best practices for {use_case} - Generated context for building realistic demo data **Ready to create the DDL now?** (Type 'yes' to proceed or 'no' to redo research)""" yield synopsis def _load_cached_research(self, cache_filepath, company, use_case): """Load research from cache""" from main_research import ResultsManager try: cached_results = ResultsManager.load_results(cache_filepath) if not isinstance(cached_results, dict): return None # Store in demo_builder self.demo_builder.company_analysis_results = cached_results.get('company_summary', '') self.demo_builder.industry_research_results = cached_results.get('research_paper', '') # Also need to get website data for company name extraction url = company if company.startswith('http') else f"https://{company}" self.demo_builder.website_data = Website(url) # Update stage self.demo_builder.advance_stage() self.demo_builder.set_ready() self.research_results = { 'company': company, 'use_case': use_case, 'completed': True } return True except Exception as e: self.log_feedback(f"❌ Error loading cache: {str(e)}") return None def _generate_spotter_questions(self, use_case: str, ddl_code: str) -> list: """Generate use-case specific Spotter questions based on the schema. Priority order: 1. liveboard_questions.spotter_qs from the vertical×function config 2. FUNCTIONS[fn].spotter_templates from the config 3. Hardcoded fallbacks per use case 4. Generic questions """ import re # Priority 1: Use spotter_qs from liveboard_questions in use case config try: uc_config = self.use_case_config or get_use_case_config( self.vertical or "Generic", self.function or "Generic" ) lq = uc_config.get("liveboard_questions", []) configured_questions = [] for q in lq: for sq in q.get('spotter_qs', []): configured_questions.append({ 'question': sq, 'purpose': f'Reveals {q["title"]} pattern' }) if configured_questions: v = self.vertical or "Generic" f = self.function or "Generic" self.log_feedback(f"📋 Using {len(configured_questions)} Spotter questions from {v}×{f} config") return configured_questions[:8] except Exception as e: self.log_feedback(f"⚠️ Spotter questions not available: {e}") # Priority 2: Try FUNCTIONS config for spotter_templates try: uc_config = self.use_case_config or get_use_case_config( self.vertical or "Generic", self.function or "Generic" ) spotter_templates = uc_config.get('spotter_templates', []) if spotter_templates: template_questions = [{'question': t, 'purpose': 'From use case config'} for t in spotter_templates] self.log_feedback(f"📋 Using {len(template_questions)} Spotter templates from config") return template_questions[:8] except: pass # Priority 3+4: Fallback to DDL-based and hardcoded questions # Extract column names from DDL columns = [] if ddl_code: # Find column definitions (word followed by data type) col_pattern = r'^\s+(\w+)\s+(VARCHAR|NUMBER|DATE|BOOLEAN|INT|DECIMAL|FLOAT|TIMESTAMP)' for match in re.finditer(col_pattern, ddl_code, re.MULTILINE | re.IGNORECASE): col_name = match.group(1).lower().replace('_', ' ') columns.append(col_name) # Find likely metrics (columns with revenue, sales, cost, amount, count, total, etc.) metrics = [c for c in columns if any(m in c for m in ['revenue', 'sales', 'cost', 'amount', 'count', 'total', 'spend', 'clicks', 'impressions', 'conversions', 'rate', 'roi', 'ctr', 'cpm', 'cpc'])] # Find likely dimensions (columns with name, type, category, region, channel, segment, etc.) dimensions = [c for c in columns if any(d in c for d in ['name', 'type', 'category', 'region', 'channel', 'segment', 'campaign', 'audience', 'product', 'brand', 'status'])] # Use-case specific question templates (legacy fallback) use_case_questions = { 'Marketing Analytics': [ {'question': 'What is total spend by channel this quarter?', 'purpose': 'Shows channel performance analysis'}, {'question': 'Which campaigns have the highest ROI?', 'purpose': 'Shows ranking and efficiency metrics'}, {'question': 'How have conversions changed month over month?', 'purpose': 'Shows trend analysis and change detection'}, {'question': 'What is the conversion rate by audience segment?', 'purpose': 'Shows segmentation analysis'}, ], 'Sales Analytics': [ {'question': 'What is total revenue by region this quarter?', 'purpose': 'Shows geographic performance'}, {'question': 'Who are the top 10 sales reps by revenue?', 'purpose': 'Shows ranking capabilities'}, {'question': 'How has pipeline changed compared to last month?', 'purpose': 'Shows change detection'}, {'question': 'What is win rate by product category?', 'purpose': 'Shows conversion analysis'}, ], 'Demand/Inventory Planning': [ {'question': 'What is current inventory by product category?', 'purpose': 'Shows inventory overview'}, {'question': 'Which products are at risk of stockout?', 'purpose': 'Shows proactive alerting'}, {'question': 'How has demand changed compared to forecast?', 'purpose': 'Shows variance analysis'}, {'question': 'What is days of supply by warehouse?', 'purpose': 'Shows operational metrics'}, ], 'Merchandising': [ {'question': 'What is sales performance by product category?', 'purpose': 'Shows category analysis'}, {'question': 'Which products have the highest margin?', 'purpose': 'Shows profitability ranking'}, {'question': 'How has sell-through rate changed this month?', 'purpose': 'Shows trend analysis'}, {'question': 'What is inventory turnover by store?', 'purpose': 'Shows store-level metrics'}, ], 'Loss Prevention Analytics': [ {'question': 'What is total shrinkage by store?', 'purpose': 'Shows loss by location'}, {'question': 'Which stores have the highest shrinkage rate?', 'purpose': 'Shows risk ranking'}, {'question': 'How has shrinkage trended over the past 6 months?', 'purpose': 'Shows trend analysis'}, {'question': 'What are the top causes of loss?', 'purpose': 'Shows root cause analysis'}, ], } # Get questions for this use case, or fall back to generic questions = use_case_questions.get(use_case, [ {'question': 'What are the key metrics this month?', 'purpose': 'Shows summary view'}, {'question': 'What changed compared to last period?', 'purpose': 'Shows change detection'}, {'question': 'What are the top performers?', 'purpose': 'Shows ranking capabilities'}, {'question': 'How have trends changed over time?', 'purpose': 'Shows time-series analysis'}, ]) # If we found actual metrics/dimensions in DDL, try to make questions more specific if metrics and dimensions: metric = metrics[0].title() dimension = dimensions[0].title() questions[0] = {'question': f'What is total {metric} by {dimension}?', 'purpose': 'Shows core metric breakdown'} return questions def _get_demo_tips(self, use_case: str) -> str: """Get use-case specific demo tips""" tips = { 'Marketing Analytics': """- **Lead with ROI**: Marketing leaders care about efficiency, show spend vs. results - **Highlight attribution**: Show how ThoughtSpot can break down performance by channel/campaign - **Show real-time**: Marketing moves fast - emphasize live data and quick answers - **Monitor setup**: Demo alerting for campaign performance thresholds""", 'Sales Analytics': """- **Focus on pipeline**: Sales leaders want to see deal flow and forecasting - **Show rep performance**: Ranking and leaderboards resonate with sales teams - **Highlight forecasting**: Show how AI can predict outcomes - **Territory analysis**: Geographic breakdowns are always compelling""", 'Demand/Inventory Planning': """- **Lead with stockouts**: Show how to prevent lost sales - **Forecast vs. actual**: Variance analysis is key for planners - **Supplier performance**: Show lead time and reliability metrics - **Seasonal patterns**: Highlight time-series capabilities""", 'Merchandising': """- **Category performance**: Start with what's selling - **Margin analysis**: Profitability is always top of mind - **Assortment optimization**: Show breadth vs. depth analysis - **Store comparisons**: Regional and store-level drill-down""", 'Loss Prevention Analytics': """- **Risk scoring**: Show high-risk locations or categories - **Trend detection**: Highlight anomaly detection capabilities - **Root cause**: Drill into why losses are occurring - **ROI of prevention**: Connect to business impact""", } return tips.get(use_case, """- **Start broad**: Begin with executive summary metrics - **Then drill down**: Show the ability to explore details - **Ask questions**: Let the AI demonstrate natural language - **End with action**: Show how insights lead to decisions""") def _generate_spotter_viz_story(self, company_name: str, use_case: str, model_name: str = None, liveboard_name: str = None) -> str: """Generate a Spotter Viz story — a conversational sequence of NL prompts that can be entered into ThoughtSpot's Spotter Viz agent to build a liveboard. Returns two sections: 1. Persona-driven story built from liveboard_questions config (always present) 2. AI-generated story from LLM (appended after a divider) """ from demo_personas import parse_use_case, get_use_case_config v, f = parse_use_case(use_case or '') uc_cfg = get_use_case_config(v or "Generic", f or "Generic") lq = uc_cfg.get("liveboard_questions", []) data_source = model_name or f"{company_name} model" # --- Section 1: Persona-driven story from liveboard_questions --- header = f"""# Spotter Viz Story: {company_name} ## {use_case} *Copy these prompts into ThoughtSpot Spotter Viz to build this liveboard interactively.* --- ## Part 1: Structured Demo Flow """ steps = [] step_num = 1 for q in lq: title = q['title'] viz_q = q['viz_question'] insight = q.get('insight', '') spotter_qs = q.get('spotter_qs', []) step = f"### Step {step_num}: {title}\n" step += f'> "{viz_q}"\n\n' if insight: step += f"**What to look for:** {insight}\n\n" if spotter_qs: step += "**Follow-up Spotter questions:**\n" for sq in spotter_qs[:2]: step += f'> "{sq}"\n\n' steps.append(step) step_num += 1 if steps: persona_section = header + "\n".join(steps) else: persona_section = header + f'> "Build a {use_case} dashboard for {company_name} using {data_source}"\n\n' persona_section += "\n---\n\n" # --- Section 2: AI-generated story --- try: from prompts import build_prompt vertical = v or "Generic" function = f or "Generic" company_context = f"Company: {company_name}\nUse Case: {use_case}" if model_name: company_context += f"\nData Source/Model: {model_name}" if liveboard_name: company_context += f"\nLiveboard Name: {liveboard_name}" if hasattr(self, 'demo_builder') and self.demo_builder: research = getattr(self.demo_builder, 'company_summary', '') or '' if research: company_context += f"\n\nCompany Research:\n{research[:1500]}" prompt = build_prompt( stage="spotter_viz_story", vertical=vertical, function=function, company_context=company_context, ) llm_model = self.settings.get('model', DEFAULT_LLM_MODEL) self.log_feedback(f"🎬 Generating AI Spotter Viz story ({llm_model})...") provider_name, model_name_str = map_llm_display_to_provider(llm_model) researcher = MultiLLMResearcher(provider=provider_name, model=model_name_str) ai_story = researcher.make_request(prompt, max_tokens=2000, temperature=0.7) ai_section = "## Part 2: AI-Generated Story\n\n" + ai_story except Exception as e: self.log_feedback(f"⚠️ AI Spotter Viz story generation failed: {e}") ai_section = "## Part 2: AI-Generated Story\n\n*(Generation failed — use the structured flow above.)*" return persona_section + ai_section def _build_fallback_spotter_story(self, company_name: str, use_case: str, model_name: str = None) -> str: """Build a basic Spotter Viz story without LLM, using available context.""" data_source = model_name or f"{company_name} model" # Get spotter questions from use case config spotter_qs = [] try: from demo_personas import parse_use_case, get_use_case_config v, f = parse_use_case(use_case or '') uc_cfg = get_use_case_config(v or "Generic", f or "Generic") for q in uc_cfg.get("liveboard_questions", []): if q.get("required") and q.get("spotter_qs"): spotter_qs.append(q["spotter_qs"][0]) except: pass story = f"""# Spotter Viz Story: {company_name} ## {use_case} *Copy these prompts into ThoughtSpot Spotter Viz to build this liveboard interactively.* --- ### Step 1: Set Context > "Create a new liveboard for {company_name} {use_case} using the {data_source} data source." **Expected result:** Empty liveboard created with the correct data source connected. ### Step 2: Add Key KPIs > "Add KPI cards showing the main metrics with weekly sparklines." **Expected result:** KPI tiles with sparkline trends at the top of the liveboard. ### Step 3: Add Trend Analysis > "Add a line chart showing how the primary metric has trended over the last 12 months." **Expected result:** Time-series visualization showing monthly trends. ### Step 4: Add Category Breakdown > "Show a bar chart breaking down performance by the main dimension." **Expected result:** Categorical breakdown chart. ### Step 5: Add Comparison > "Add a comparison showing this period vs. last period." **Expected result:** Period-over-period comparison visualization. """ if spotter_qs: story += "\n### Step 6: Explore with Spotter Questions\n" for i, q in enumerate(spotter_qs[:3]): story += f'> "{q}"\n\n' story += """ --- *Refine the liveboard further by asking Spotter Viz to adjust colors, reorganize tiles, or add filters.* """ return story def run_research(self, company, use_case): """Run the research phase""" import time self.log_feedback(f"🔍 Starting research for {company} - {use_case}") try: # Initialize demo builder if needed OR if company/use_case changed # CRITICAL: Always create fresh DemoBuilder when company/use_case changes # to avoid persisting prompts/data from previous runs needs_new_builder = ( not self.demo_builder or self.demo_builder.company_url != company or self.demo_builder.use_case != use_case ) if needs_new_builder: if self.demo_builder: self.log_feedback(f"🔄 Company/use case changed - creating fresh DemoBuilder (was: {self.demo_builder.company_url}/{self.demo_builder.use_case})") else: self.log_feedback("Initializing DemoBuilder...") self.demo_builder = DemoBuilder( use_case=use_case, company_url=company ) # Extract website content self.log_feedback(f"Extracting website content from {company}...") url = company if company.startswith('http') else f"https://{company}" website = Website(url) self.demo_builder.website_data = website # Check if website extraction failed if not website.text or len(website.text) == 0: error_msg = f"❌ **Couldn't access the website**\n\n" if website.error_message: error_msg += f"**Error:** {website.error_message}\n\n" error_msg += f"**URL:** {url}\n\n" error_msg += "**Troubleshooting:**\n" error_msg += "- Verify the URL is correct and accessible in your browser\n" error_msg += "- Check if the site requires authentication\n" error_msg += "- The site may be blocking automated requests\n" error_msg += "- Try accessing the site manually to confirm it's working\n\n" error_msg += "Please check the URL and try again." self.log_feedback(error_msg) return None self.log_feedback(f"Extracted {len(website.text)} characters from {website.title}") # Get LLM provider model = self.settings.get('model', DEFAULT_LLM_MODEL) provider_name, model_name = map_llm_display_to_provider(model) self.log_feedback(f"Using {provider_name}/{model_name} for research...") # Initialize researcher researcher = MultiLLMResearcher(provider=provider_name, model=model_name) # Company Analysis self.log_feedback("Analyzing company...") system_prompt, user_prompt = build_company_analysis_prompt( use_case, website.title, url, website.text, len(website.css_links), website.logo_candidates ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ] # Stream the response response = researcher.make_request(messages, temperature=0.3, max_tokens=4000, stream=True) company_analysis = "" for chunk in response: chunk_text = researcher.extract_chunk_content(chunk) if chunk_text: company_analysis += chunk_text self.log_feedback(f"Company: {chunk_text[:50]}...") from prompt_logger import log_researcher_call log_researcher_call("research_company", researcher, messages, company_analysis, logger=self._prompt_logger) self.demo_builder.company_analysis_results = company_analysis self.log_feedback("✅ Company analysis complete!") # Industry Research self.log_feedback("Researching industry best practices...") system_prompt, user_prompt = build_industry_research_prompt(use_case, company_analysis) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ] # Stream the response response = researcher.make_request(messages, temperature=0.4, max_tokens=4000, stream=True) industry_research = "" for chunk in response: chunk_text = researcher.extract_chunk_content(chunk) if chunk_text: industry_research += chunk_text self.log_feedback(f"Research: {chunk_text[:50]}...") log_researcher_call("research_industry", researcher, messages, industry_research, logger=self._prompt_logger) self.demo_builder.industry_research_results = industry_research self.log_feedback("✅ Industry research complete!") # Update stage self.demo_builder.advance_stage() self.demo_builder.set_ready() self.research_results = { 'company': company, 'use_case': use_case, 'completed': True } self.log_feedback("✅ Research completed successfully!") # Generate synopsis company_name = self.demo_builder.extract_company_name() synopsis = f"""✅ **Research Complete!** **Company:** {company_name} **Use Case:** {use_case} **What I learned:** - Analyzed company website and business model - Researched industry best practices for {use_case} - Generated context for building realistic demo data **Ready to create the DDL now?** (Type 'yes' to proceed or 'no' to redo research)""" return synopsis except Exception as e: import traceback error_msg = f"❌ Research failed: {str(e)}\n{traceback.format_exc()}" self.log_feedback(error_msg) return f"❌ Research failed: {str(e)}" def run_ddl_creation(self): """Run DDL creation""" self.log_feedback("📝 Starting DDL creation...") try: # Get timestamp for schema naming from datetime import datetime now = datetime.now() yymmdd = now.strftime("%y%m%d") hhmmss = now.strftime("%H%M%S") # Clean company and use case names (5 chars company, 3 chars use case) company_clean = self.demo_builder.extract_company_name().replace(" ", "").replace(".", "")[:5] usecase_clean = self.demo_builder.use_case.replace(" ", "").replace("-", "")[:3] schema_name = f"DM{yymmdd}_{hhmmss}_{company_clean}_{usecase_clean}" # Build DDL generation prompt with geo context geo_scope = self.settings.get('geo_scope', 'USA Only') if geo_scope == 'USA Only': geo_guidance = """GEOGRAPHY REQUIREMENTS: - All geographic data should be USA-based (US states, US cities) - Use USD as the only currency (no CURRENCY_CODE column needed, or set to 'USD') - State columns should contain US state names or abbreviations - Do not include international addresses or locations""" else: geo_guidance = """GEOGRAPHY REQUIREMENTS: - Include international geographic data (multiple countries, global cities) - Include CURRENCY_CODE column with valid ISO currency codes (USD, EUR, GBP, MXN, CAD, JPY, etc.) - State/Province columns can contain international regions""" # Check for user context that should override defaults user_context = getattr(self, 'generic_use_case_context', '') or '' user_override_section = "" if user_context.strip(): user_override_section = f""" **USER REQUIREMENTS (OVERRIDE DEFAULTS ABOVE):** {user_context} If user requirements conflict with defaults above, FOLLOW THE USER REQUIREMENTS. For example, if user says "2 fact tables", create 2 fact tables instead of 1. """ # Get use case config for structured guidance v = self.vertical or "Generic" f = self.function or "Generic" uc_config = self.use_case_config or get_use_case_config(v, f) # Build use case context section uc_context_parts = [] if uc_config.get('entities'): uc_context_parts.append(f"- **Core Entities:** {', '.join(uc_config['entities'])}") if uc_config.get('kpis'): uc_context_parts.append(f"- **Required KPIs:** {', '.join(uc_config['kpis'])}") if uc_config.get('industry_terms'): uc_context_parts.append(f"- **Industry Terms:** {', '.join(uc_config['industry_terms'])}") if uc_config.get('data_patterns'): uc_context_parts.append(f"- **Data Patterns:** {', '.join(uc_config['data_patterns'])}") uc_context = "\n".join(uc_context_parts) use_case_guidance = "" if uc_context: use_case_guidance = f""" USE CASE FRAMEWORK ({v} × {f}): {uc_context} Design tables and columns that support these KPIs and entities. """ schema_prompt = f"""Create ONLY complete SQL DDL statements for a {self.demo_builder.use_case} demo database. {self.demo_builder.get_research_context()} {geo_guidance} {use_case_guidance} SCHEMA DESIGN REQUIREMENTS (defaults): - **USE STAR SCHEMA PATTERN**: Create 1 fact table and 3-4 dimension tables (unless user specifies otherwise) - Fact table contains measurable business events with foreign keys to dimensions - Dimension tables contain descriptive attributes (customers, products, dates, locations) - Use simple one-to-many relationships (dimensions → fact table) - NO circular joins, NO complex many-to-many relationships, NO bridge tables - Keep it simple and easy to demonstrate {user_override_section} TECHNICAL REQUIREMENTS: - Return ONLY executable SQL DDL statements - **SNOWFLAKE SYNTAX ONLY** - NOT MySQL or other databases - DO NOT USE AUTO_INCREMENT - use IDENTITY(1,1) for Snowflake auto-increment - DO NOT USE JSON DATA TYPE - use VARCHAR instead for any complex data - DO NOT USE CHECK CONSTRAINTS (e.g., `CHECK (...)` or `CONSTRAINT ... CHECK (...)`) - Use only basic SQL data types: INT, VARCHAR, DATE, DECIMAL, TIMESTAMP - Create dimension tables first, then the fact table - No explanatory text, no markdown, no comments - Include realistic column names that match the business context - Add proper constraints and relationships DATE COLUMN REQUIREMENTS (IMPORTANT): - Use a single date column named 'FULL_DATE' (type DATE) - DO NOT create separate columns for: DAY_OF_MONTH, DAY_OF_WEEK, WEEK_OF_YEAR, MONTH_NUMBER, MONTH_NAME, QUARTER, YEAR, FISCAL_YEAR, FISCAL_QUARTER - ThoughtSpot automatically extracts day/week/month/quarter/year from DATE columns - DO include these useful custom fields: IS_WEEKEND (BOOLEAN), IS_HOLIDAY (BOOLEAN), HOLIDAY_NAME (VARCHAR) - These custom fields support queries about weekends and holidays (Black Friday, Prime Day, etc.) TABLE NAMING REQUIREMENTS: - **DO NOT use DIM_ or FACT_ prefixes** (e.g., NOT DIM_PRODUCT or FACT_SALES) - Use simple, descriptive table names (e.g., PRODUCTS, CUSTOMERS, SALES, ORDERS) - Dimension tables: Use plural nouns (CUSTOMERS, PRODUCTS, WAREHOUSES) - Fact tables: Use descriptive names (SALES, TRANSACTIONS, ORDERS, INVENTORY_MOVEMENTS) - Keep names concise and business-friendly SNOWFLAKE SYNTAX EXAMPLES: - Auto-increment: ColumnID INT IDENTITY(1,1) PRIMARY KEY - NOT: ColumnID INT PRIMARY KEY AUTO_INCREMENT - String: CustomerName VARCHAR(100) NOT NULL - Decimal: Amount DECIMAL(10,2) - Date: CreatedDate DATE DEFAULT CURRENT_DATE() Generate complete CREATE TABLE statements with proper Snowflake syntax and dependencies.""" self.demo_builder.ddl_generation_prompt = schema_prompt # Get LLM provider model = self.settings.get('model', DEFAULT_LLM_MODEL) provider_name, model_name = map_llm_display_to_provider(model) self.log_feedback(f"Using {provider_name}/{model_name} for DDL generation...") # Initialize researcher researcher = MultiLLMResearcher(provider=provider_name, model=model_name) messages = [ {"role": "system", "content": "You are a database architect creating Snowflake schemas for analytics demos."}, {"role": "user", "content": schema_prompt} ] # Make request (non-streaming for DDL) self.log_feedback("Generating DDL...") import time as _time _ddl_start = _time.time() ddl_result = researcher.make_request(messages, temperature=0.2, max_tokens=4000, stream=False) _ddl_ms = int((_time.time() - _ddl_start) * 1000) # Log the prompt/response from prompt_logger import log_researcher_call log_researcher_call("ddl", researcher, messages, ddl_result or "", duration_ms=_ddl_ms, logger=self._prompt_logger) # Validate DDL result if not ddl_result or not isinstance(ddl_result, str) or 'CREATE TABLE' not in ddl_result.upper(): raise Exception(f"DDL generation failed or produced invalid output. Result: {ddl_result[:200] if ddl_result else 'None'}") # Store in demo_builder self.demo_builder.schema_generation_results = ddl_result self.ddl_code = ddl_result # Advance stage self.demo_builder.advance_stage() self.log_feedback("✅ DDL created successfully!") response = f"""✅ **DDL Creation Complete!** **Schema:** {schema_name} **Complete Generated DDL:** ```sql {self.ddl_code} ``` **Do you approve this DDL?** - Type 'yes' to proceed with population - Type 'no' to regenerate the DDL""" return response, self.ddl_code except Exception as e: import traceback error_msg = f"❌ DDL creation failed: {str(e)}\n{traceback.format_exc()}" self.log_feedback(error_msg) # Set schema_generation_results to empty string so it's not None self.demo_builder.schema_generation_results = "" self.ddl_code = "" return error_msg, "" def get_fallback_population_code(self, schema_info, fact_rows=10000, dim_rows=100): """Generate a simple, reliable fallback population code using plain strings Args: schema_info: Dict of table definitions fact_rows: Number of rows for fact tables (default: 10000) dim_rows: Number of rows for dimension tables (default: 100) """ # schema_info is a dict: {'table_name': {'columns': [...], 'raw_definition': '...'}} def is_fact_table(table_name, columns): """Detect if table is likely a fact table (has measures/metrics)""" table_lower = table_name.lower() # FIRST: Explicitly classify dimension tables (these are NEVER facts) dimension_keywords = ['customer', 'product', 'seller', 'vendor', 'user', 'employee', 'center', 'warehouse', 'store', 'location', 'region', 'category', 'fulfillment', 'supplier', 'account', 'item_master', 'channel'] if any(keyword in table_lower for keyword in dimension_keywords): return False # SECOND: Common fact table name patterns fact_keywords = ['transaction', 'order', 'sale', 'event', 'log', 'activity', 'purchase', 'payment', 'shipment', 'invoice', 'fact', 'line'] if any(keyword in table_lower for keyword in fact_keywords): return True # THIRD: Check for numeric/measure columns (amount, quantity, price, etc.) measure_count = 0 for col in columns: col_name = col['name'].lower() col_type = col.get('type', '').upper() if any(word in col_name for word in ['amount', 'quantity', 'price', 'total', 'cost', 'revenue']): measure_count += 1 if 'DECIMAL' in col_type or 'FLOAT' in col_type or 'DOUBLE' in col_type: measure_count += 1 # If has 3+ measure-like columns, likely a fact table (raised threshold) return measure_count >= 3 code_parts = [] # Header code_parts.append("from dotenv import load_dotenv") code_parts.append("import os") code_parts.append("import snowflake.connector") code_parts.append("from faker import Faker") code_parts.append("import random") code_parts.append("from datetime import datetime, timedelta") code_parts.append("") code_parts.append("load_dotenv()") code_parts.append("") code_parts.append("from snowflake_auth import get_snowflake_connection_params") code_parts.append("") # Build populate functions table_names = [] for table_name, table_data in schema_info.items(): columns = table_data['columns'] # Build column lists (skip auto-increment IDs and malformed names) col_names = [] col_types = [] # Track types for each valid column for col in columns: col_name = col['name'] col_type = col.get('type', 'VARCHAR').upper() # Skip IDs (let database auto-generate) if col_name.lower() in ['id', table_name.lower() + '_id']: continue if 'IDENTITY' in col_type or 'AUTOINCREMENT' in col_type: continue # Skip malformed column names (numbers, special chars, etc) if not col_name.replace('_', '').replace(' ', '').isalnum(): continue if col_name.isdigit(): continue if any(char in col_name for char in ['(', ')', ',', ';']): continue col_names.append(col_name) col_types.append(col_type) if not col_names: continue # Skip tables with no insertable columns # Only add to table_names if we're actually creating a function for it # Determine row count based on table type is_fact = is_fact_table(table_name, columns) row_count = fact_rows if is_fact else dim_rows table_names.append((table_name, row_count)) placeholders = ', '.join(['%s'] * len(col_names)) col_list = ', '.join(col_names) # Build fake data generation (ONLY for valid columns in col_names) fake_values = [] for i, col_name in enumerate(col_names): col_type = col_types[i] # Use the col_type we saved earlier # Check for special business columns FIRST (by name pattern) col_name_upper = col_name.upper() # FLAG columns if 'FLAG' in col_name_upper or col_name_upper.startswith('IS_'): if 'INT' in col_type or 'NUMBER' in col_type: fake_values.append("random.choice([0, 1])") else: fake_values.append("random.choice(['Y', 'N'])") # QUARTER columns - Q1, Q2, Q3, Q4 elif 'QUARTER' in col_name_upper or col_name_upper == 'QTR': if 'INT' in col_type or 'NUMBER' in col_type: fake_values.append("random.randint(1, 4)") else: fake_values.append("random.choice(['Q1', 'Q2', 'Q3', 'Q4'])") # MONTH columns elif 'MONTH' in col_name_upper: if 'INT' in col_type or 'NUMBER' in col_type: fake_values.append("random.randint(1, 12)") else: fake_values.append("random.choice(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])") # YEAR columns elif 'YEAR' in col_name_upper: fake_values.append("random.randint(2020, 2024)") # STATUS columns (short codes) elif 'STATUS' in col_name_upper and 'CHAR' in col_type: fake_values.append("random.choice(['ACTIVE', 'PENDING', 'CLOSED'])") # TYPE columns (short codes) - but NOT CATEGORY (handled below with full names) elif 'TYPE' in col_name_upper and 'CHAR' in col_type and 'CATEGORY' not in col_name_upper: fake_values.append("random.choice(['A', 'B', 'C'])") elif 'VARCHAR' in col_type or 'TEXT' in col_type or 'STRING' in col_type or 'CHAR' in col_type: # Extract VARCHAR length - always truncate generated values to fit import re length_match = re.search(r'\((\d+)\)', col_type) varchar_length = int(length_match.group(1)) if length_match else 255 # Generate domain-specific realistic data based on column name, then truncate to fit base_value = None if 'NAME' in col_name_upper and 'COMPANY' not in col_name_upper: # Check domain-specific name columns BEFORE falling back to fake.name() if 'DRUG' in col_name_upper or 'MEDICATION' in col_name_upper or 'THERAPEUTIC' in col_name_upper: base_value = "random.choice(['Lipitor', 'Humira', 'Eliquis', 'Keytruda', 'Revlimid', 'Opdivo', 'Ozempic', 'Dupixent', 'Trulicity', 'Entresto', 'Metformin', 'Atorvastatin', 'Lisinopril', 'Amlodipine', 'Metoprolol', 'Omeprazole', 'Simvastatin', 'Losartan', 'Albuterol', 'Gabapentin'])" elif 'PRODUCT' in col_name_upper: base_value = "random.choice(['Laptop Pro 15', 'Wireless Mouse 2.4GHz', 'USB-C Cable 6ft', 'Monitor Stand Adjustable', 'Mechanical Keyboard RGB', 'Noise Canceling Headphones', '1080p Webcam', 'Portable SSD 1TB', 'Power Bank 20000mAh', 'Tablet 10 inch', 'Smart Watch', 'Bluetooth Speaker', 'Gaming Mouse Pad', 'Phone Case', 'Screen Protector', 'Charging Cable', 'Desk Lamp LED', 'Laptop Bag', 'Wireless Earbuds', 'USB Hub'])" elif 'CUSTOMER' in col_name_upper or 'USER' in col_name_upper: base_value = "fake.name()" elif 'SELLER' in col_name_upper or 'VENDOR' in col_name_upper: base_value = "random.choice(['Amazon', 'Best Buy', 'Walmart', 'Target', 'Costco', 'Home Depot', 'Lowes', 'Macys', 'Nordstrom', 'Kohls'])" else: base_value = "fake.name()" elif 'CATEGORY' in col_name_upper: base_value = "random.choice(['Electronics', 'Home & Kitchen', 'Books', 'Clothing', 'Sports', 'Toys', 'Beauty', 'Automotive'])" elif 'BRAND' in col_name_upper: base_value = "random.choice(['Samsung', 'Apple', 'Sony', 'LG', 'Dell', 'HP', 'Lenovo', 'Amazon Basics', 'Anker', 'Logitech'])" elif 'CHANNEL' in col_name_upper or 'SOURCE' in col_name_upper: # Marketing channels for lead generation / call tracking base_value = "random.choice(['Google Ads Search', 'Bing Ads', 'Facebook Ads', 'LinkedIn Ads', 'Instagram Ads', 'Twitter Ads', 'Display Network', 'Programmatic Display', 'Retargeting', 'TV Commercial', 'Radio Ads', 'Billboard', 'Print Ads', 'Direct Mail', 'Email Newsletter', 'Organic Search', 'Social Media Organic', 'Google My Business', 'Referral', 'Affiliate Marketing', 'Content Marketing', 'Webinar', 'Podcast Sponsorship'])" elif 'CAMPAIGN' in col_name_upper and ('NAME' in col_name_upper or col_name_upper == 'CAMPAIGN_NAME'): # Marketing campaign names (usually reference the channel) base_value = "random.choice(['Google Ads Q4 Lead Gen', 'Facebook Black Friday Promo', 'LinkedIn Spring Campaign', 'Instagram New Product Launch', 'Email Brand Awareness', 'Display Holiday Special', 'Google Ads Summer Sale', 'Facebook Back to School', 'LinkedIn Valentine Promo', 'Google Shopping Cyber Monday', 'Email Free Trial Offer', 'Webinar Registration Q3', 'Email Nurture Series', 'Display Retargeting Q3', 'Google Ads Demo Request', 'Referral Rewards Program', 'Google Ads Year End Sale', 'Facebook New Year Campaign', 'Instagram Flash Sale', 'Email Limited Time Offer', 'Google Ads Early Bird', 'LinkedIn VIP Member Drive', 'Facebook Product Teaser', 'Display Conference Promo', 'Email Partner Campaign', 'Google Ads Seasonal', 'Facebook Customer Appreciation', 'Email Win Back Campaign', 'LinkedIn Upsell Drive', 'Display Cross-Sell Q4'])" elif ('CENTER' in col_name_upper and 'NAME' in col_name_upper) or ('CALL' in col_name_upper and 'CENTER' in col_name_upper): # Call center names base_value = "random.choice(['New York Contact Center', 'Los Angeles Support Hub', 'Chicago Call Center', 'Dallas Operations Center', 'Phoenix Customer Care', 'Philadelphia Service Center', 'San Diego Support Center', 'Miami Contact Hub', 'Atlanta Operations', 'Denver Call Center', 'Seattle Support Center', 'Boston Customer Service', 'Portland Contact Center', 'Austin Operations Hub', 'Las Vegas Call Center', 'Toronto Support Center', 'Offshore Manila Center', 'Offshore Bangalore Hub', 'Remote East Coast Team', 'Remote West Coast Team', 'Central Support Center', 'National Call Center', 'Regional North Hub', 'Regional South Hub', 'Enterprise Support Center'])" elif 'DESCRIPTION' in col_name_upper or 'DESC' in col_name_upper: base_value = "random.choice(['High quality product', 'Best seller', 'Customer favorite', 'New arrival', 'Limited edition', 'Premium quality'])" elif 'EMAIL' in col_name_upper: base_value = "fake.email()" elif 'PHONE' in col_name_upper: base_value = "f'{random.randint(200, 999)}-{random.randint(200, 999)}-{random.randint(1000, 9999)}'" elif 'ADDRESS' in col_name_upper or 'STREET' in col_name_upper: base_value = "f'{random.randint(1, 9999)} {random.choice([\"Main\", \"Oak\", \"Park\", \"Maple\", \"Cedar\", \"Elm\", \"Washington\", \"Lake\", \"Hill\", \"Broadway\"])} {random.choice([\"St\", \"Ave\", \"Blvd\", \"Dr\", \"Ln\"])}'" elif 'CITY' in col_name_upper: base_value = "random.choice(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose', 'Austin', 'Seattle', 'Denver', 'Boston', 'Portland', 'Miami', 'Atlanta', 'Detroit', 'Las Vegas', 'Toronto'])" elif 'STATE' in col_name_upper or 'PROVINCE' in col_name_upper: base_value = "random.choice(['California', 'Texas', 'New York', 'Florida', 'Illinois', 'Ohio', 'Georgia', 'Washington', 'Virginia', 'Arizona', 'Colorado', 'Oregon', 'Nevada', 'Utah', 'Iowa'])" elif 'COUNTRY' in col_name_upper: base_value = "random.choice(['USA', 'Canada', 'UK', 'Germany', 'France', 'Japan', 'Australia', 'India', 'China', 'Brazil', 'Mexico', 'Spain', 'Italy', 'Netherlands', 'Sweden'])" elif 'ZIP' in col_name_upper or 'POSTAL' in col_name_upper: base_value = "random.choice(['10001', '90210', '60601', '77001', '85001', '19101', '78201', '92101', '75201', '95101', '78701', '98101', '80201', '02101', '97201'])" elif 'COMPANY' in col_name_upper: base_value = "random.choice(['Amazon', 'Microsoft', 'Apple Inc', 'Google LLC', 'Meta', 'Tesla Inc', 'Netflix', 'Adobe Inc', 'Oracle Corp', 'Salesforce', 'IBM Corp', 'Intel Corp', 'Cisco Systems', 'Dell Technologies', 'HP Inc'])" # --- People names (columns without NAME in them) --- elif any(kw in col_name_upper for kw in ['SALES_REP', 'SALESREP', 'ACCOUNT_REP', 'REP_ID']): base_value = "fake.name()" elif col_name_upper in ('REP', 'AGENT', 'REPRESENTATIVE'): base_value = "fake.name()" elif any(kw in col_name_upper for kw in ['MANAGER', 'SUPERVISOR', 'DIRECTOR', 'OWNER']): base_value = "fake.name()" elif any(kw in col_name_upper for kw in ['EMPLOYEE', 'EMP_', 'STAFF', 'ASSOCIATE']): base_value = "fake.name()" elif any(kw in col_name_upper for kw in ['CONTACT', 'ASSIGNED_TO', 'CREATED_BY', 'UPDATED_BY', 'APPROVED_BY']): base_value = "fake.name()" elif any(kw in col_name_upper for kw in ['PHYSICIAN', 'DOCTOR', 'PROVIDER', 'PRESCRIBER', 'HCP']): base_value = "fake.name()" elif any(kw in col_name_upper for kw in ['PATIENT', 'MEMBER', 'SUBSCRIBER']): base_value = "fake.name()" # --- Geographic / organizational --- elif 'REGION' in col_name_upper: base_value = "random.choice(['Northeast', 'Southeast', 'Midwest', 'Southwest', 'West', 'Pacific Northwest', 'Mid-Atlantic', 'New England', 'South Central', 'Mountain West', 'Great Lakes', 'Gulf Coast'])" elif 'TERRITORY' in col_name_upper: base_value = "random.choice(['Northeast Territory', 'Southeast Territory', 'Central Territory', 'Western Territory', 'Pacific Territory', 'Mountain Territory', 'Great Lakes Territory', 'Southern Territory', 'Mid-Atlantic Territory', 'Texas Territory'])" elif 'DEPARTMENT' in col_name_upper or 'DEPT' in col_name_upper: base_value = "random.choice(['Sales', 'Marketing', 'Finance', 'Operations', 'Engineering', 'Human Resources', 'Customer Success', 'Legal', 'Product', 'IT', 'Supply Chain', 'Research'])" elif 'SEGMENT' in col_name_upper: base_value = "random.choice(['Enterprise', 'Mid-Market', 'SMB', 'Consumer', 'Government', 'Education', 'Healthcare', 'Premium', 'Standard', 'Basic'])" elif 'TIER' in col_name_upper: base_value = "random.choice(['Platinum', 'Gold', 'Silver', 'Bronze', 'Premium', 'Standard', 'Basic'])" elif 'PRIORITY' in col_name_upper: base_value = "random.choice(['Critical', 'High', 'Medium', 'Low', 'Urgent'])" # --- Business domain --- elif 'PAYMENT' in col_name_upper and ('METHOD' in col_name_upper or 'TYPE' in col_name_upper): base_value = "random.choice(['Credit Card', 'Debit Card', 'ACH Transfer', 'Wire Transfer', 'Check', 'PayPal', 'Apple Pay', 'Google Pay'])" elif 'SHIPPING' in col_name_upper or 'SHIP_METHOD' in col_name_upper: base_value = "random.choice(['Standard Ground', 'Express 2-Day', 'Next Day Air', 'Economy', 'Freight', 'Same Day', 'International Standard', 'Priority Mail'])" elif 'WAREHOUSE' in col_name_upper or 'FULFILLMENT' in col_name_upper: base_value = "random.choice(['East Coast DC', 'West Coast DC', 'Central Hub', 'Southeast Warehouse', 'Pacific Distribution', 'Northeast Fulfillment', 'Texas DC', 'Midwest Hub', 'Mountain West DC', 'Southern Distribution'])" elif 'STORE' in col_name_upper or 'LOCATION' in col_name_upper: base_value = "random.choice(['Downtown Flagship', 'Mall of America', 'Northgate Plaza', 'Southside Center', 'Airport Terminal', 'University District', 'Waterfront Promenade', 'Tech Park', 'Suburban Commons', 'Metro Center', 'Eastside Galleria', 'Westfield Mall'])" elif 'TITLE' in col_name_upper or 'JOB' in col_name_upper or 'ROLE' in col_name_upper or 'POSITION' in col_name_upper: base_value = "random.choice(['VP Sales', 'Account Executive', 'Sales Manager', 'Director of Marketing', 'Product Manager', 'Data Analyst', 'Regional Manager', 'CFO', 'Operations Lead', 'Supply Chain Manager', 'Customer Success Manager', 'Business Analyst'])" elif 'SUBCATEGORY' in col_name_upper or 'SUB_CATEGORY' in col_name_upper: base_value = "random.choice(['Laptops', 'Smartphones', 'Headphones', 'Monitors', 'Tablets', 'Accessories', 'Networking', 'Storage', 'Printers', 'Cameras', 'Wearables', 'Audio'])" elif 'CURRENCY' in col_name_upper: base_value = "random.choice(['USD', 'EUR', 'GBP', 'CAD', 'JPY', 'AUD', 'MXN'])" elif 'REASON' in col_name_upper: base_value = "random.choice(['Price', 'Quality', 'Delivery Delay', 'Wrong Item', 'Defective', 'Changed Mind', 'Better Alternative', 'Budget Cut', 'Not as Described'])" elif 'OUTCOME' in col_name_upper or 'RESULT' in col_name_upper or 'DISPOSITION' in col_name_upper: base_value = "random.choice(['Won', 'Lost', 'Pending', 'Qualified', 'Disqualified', 'No Decision', 'Deferred'])" elif 'STAGE' in col_name_upper or 'PHASE' in col_name_upper: base_value = "random.choice(['Prospecting', 'Qualification', 'Proposal', 'Negotiation', 'Closed Won', 'Closed Lost', 'Discovery', 'Demo', 'Contract Review'])" elif 'RATING' in col_name_upper or 'GRADE' in col_name_upper or 'SCORE' in col_name_upper: base_value = "random.choice(['A', 'B', 'C', 'D', 'Excellent', 'Good', 'Average', 'Below Average'])" elif 'COLOR' in col_name_upper: base_value = "random.choice(['Black', 'White', 'Navy', 'Red', 'Blue', 'Gray', 'Green', 'Brown', 'Beige', 'Olive'])" elif 'SIZE' in col_name_upper: base_value = "random.choice(['XS', 'S', 'M', 'L', 'XL', 'XXL', 'One Size'])" elif 'GENDER' in col_name_upper or 'SEX' in col_name_upper: base_value = "random.choice(['Male', 'Female', 'Non-Binary', 'Prefer Not to Say'])" elif 'INDUSTRY' in col_name_upper or 'VERTICAL' in col_name_upper: base_value = "random.choice(['Technology', 'Healthcare', 'Financial Services', 'Retail', 'Manufacturing', 'Education', 'Government', 'Media', 'Energy', 'Real Estate'])" elif 'SPECIALTY' in col_name_upper or 'SPECIALIZATION' in col_name_upper: base_value = "random.choice(['Cardiology', 'Oncology', 'Orthopedics', 'Neurology', 'Pediatrics', 'Dermatology', 'Internal Medicine', 'Family Medicine', 'Psychiatry', 'Radiology'])" elif 'DRUG' in col_name_upper or 'MEDICATION' in col_name_upper or 'THERAPEUTIC' in col_name_upper: base_value = "random.choice(['Lipitor', 'Humira', 'Eliquis', 'Keytruda', 'Revlimid', 'Opdivo', 'Ozempic', 'Dupixent', 'Trulicity', 'Entresto'])" elif 'SKU' in col_name_upper or 'ITEM_CODE' in col_name_upper or 'PRODUCT_CODE' in col_name_upper: base_value = "f'SKU-{random.randint(10000, 99999)}'" elif 'ORDER_NUMBER' in col_name_upper or 'ORDER_NUM' in col_name_upper or 'INVOICE_NUM' in col_name_upper: base_value = "f'ORD-{random.randint(100000, 999999)}'" elif 'URL' in col_name_upper or 'WEBSITE' in col_name_upper: base_value = "fake.url()" elif 'NOTES' in col_name_upper or 'COMMENT' in col_name_upper: base_value = "random.choice(['Follow up next week', 'Priority customer', 'Pending review', 'No issues', 'Escalated', 'Resolved', 'Awaiting approval', 'On track'])" else: # Default: use faker word as last resort base_value = "fake.word()" # Always truncate to VARCHAR length - simple and works for all cases fake_values.append(f"({base_value})[:{varchar_length}]") elif 'INT' in col_type or 'NUMBER' in col_type or 'BIGINT' in col_type: fake_values.append("random.randint(1, 1000)") elif 'DECIMAL' in col_type or 'FLOAT' in col_type or 'DOUBLE' in col_type or 'NUMERIC' in col_type: # Extract precision and scale from DECIMAL(p,s) import re decimal_match = re.search(r'\((\d+),\s*(\d+)\)', col_type) if decimal_match: precision = int(decimal_match.group(1)) scale = int(decimal_match.group(2)) # Max value is 10^(precision-scale) - 1, with 'scale' decimal places # E.g., DECIMAL(3,2) -> max is 9.99, DECIMAL(5,2) -> max is 999.99 max_val = (10 ** (precision - scale)) - 1 fake_values.append(f"round(random.uniform(0, {max_val}), {scale})") else: # No precision specified, use safe defaults fake_values.append("round(random.uniform(10, 1000), 2)") elif 'DATE' in col_type and 'TIME' not in col_type: # DATE but not DATETIME/TIMESTAMP fake_values.append("fake.date_between(start_date='-2y', end_date='today')") elif 'TIMESTAMP' in col_type or 'DATETIME' in col_type: fake_values.append("fake.date_time_between(start_date='-2y', end_date='now')") elif 'BOOL' in col_type or 'BOOLEAN' in col_type: # For Snowflake BOOLEAN, use True/False which will be converted to SQL TRUE/FALSE fake_values.append("random.choice([True, False])") else: # Unknown type - default to string fake_values.append("str(fake.word())") fake_values_str = ', '.join(fake_values) if fake_values else "fake.word()" # Build function code_parts.append(f"def populate_{table_name.lower()}(cursor, fake):") code_parts.append(f' """Populate {table_name} table ({"fact" if is_fact else "dimension"})"""') code_parts.append(" data = []") code_parts.append(f" for _ in range({row_count}):") code_parts.append(f" data.append(({fake_values_str},))") code_parts.append("") code_parts.append(" cursor.executemany(") code_parts.append(f' "INSERT INTO {table_name} ({col_list}) VALUES ({placeholders})",') code_parts.append(" data") code_parts.append(" )") code_parts.append(f' print(f"✅ Inserted {{len(data)}} rows into {table_name}")') code_parts.append("") # Safety check - ensure we have at least one table if not table_names: raise Exception("No valid tables found in schema. All tables were skipped due to having no insertable columns.") # Main function code_parts.append("def main():") code_parts.append(" conn_params = get_snowflake_connection_params()") code_parts.append(" conn_params.pop('schema', None) # Remove to avoid duplicate") code_parts.append(" conn = snowflake.connector.connect(**conn_params, schema=os.getenv('SNOWFLAKE_SCHEMA'), autocommit=False)") code_parts.append("") code_parts.append(" try:") code_parts.append(" cursor = conn.cursor()") code_parts.append(" fake = Faker()") code_parts.append("") # Add function calls for table_name, row_count in table_names: code_parts.append(f" populate_{table_name.lower()}(cursor, fake)") code_parts.append("") code_parts.append(" conn.commit()") code_parts.append(' print("✅ All data committed successfully")') code_parts.append(" except Exception as e:") code_parts.append(" conn.rollback()") code_parts.append(' print(f"❌ Error: {str(e)}")') code_parts.append(" raise") code_parts.append(" finally:") code_parts.append(" cursor.close()") code_parts.append(" conn.close()") code_parts.append("") code_parts.append('if __name__ == "__main__":') code_parts.append(" main()") code = '\n'.join(code_parts) # Validate the generated code compiles try: compile(code, '', 'exec') self.log_feedback("✅ Template validated successfully before return") except SyntaxError as e: self.log_feedback(f"❌ TEMPLATE GENERATION BUG: {e}") self.log_feedback(f" Error at line {e.lineno}: {e.msg}") # Show the problematic lines lines = code.split('\n') if e.lineno: start = max(0, e.lineno - 3) end = min(len(lines), e.lineno + 2) self.log_feedback(f"\n Context:") for i in range(start, end): marker = ">>> " if i == e.lineno - 1 else " " self.log_feedback(f"{marker}{i+1:3}: {lines[i]}") raise Exception(f"Template generation has a bug: {e}") # Debug: Log first 1000 chars self.log_feedback(f"Generated template preview:\n{code[:1000]}") return code def run_population(self): """Run data population code generation""" self.log_feedback("🔢 Starting data population...") try: from schema_utils import parse_ddl_schema, generate_schema_constrained_prompt import re # Parse use case into vertical and function vertical, function = parse_use_case(self.demo_builder.use_case) config = get_use_case_config(vertical or "Generic", function or "Generic") # Build business context for population # Handle both new config structure and backward compatibility target_persona = config.get('target_persona', 'Business Leader') business_problem = config.get('business_problem', 'Need for faster, data-driven decisions') demo_objectives = config.get('demo_objectives', 'Show self-service analytics and business insights') # For generic cases, use the use_case_name use_case_display = config.get('use_case_name', self.demo_builder.use_case) business_context = f""" BUSINESS CONTEXT: - Use Case: {use_case_display} - Target Persona: {target_persona} - Business Problem: {business_problem} - Demo Objectives: {demo_objectives} MANDATORY CONNECTION CODE (MUST BE COMPLETE): ```python from dotenv import load_dotenv import os import snowflake.connector from faker import Faker import random from datetime import datetime, timedelta load_dotenv() from snowflake_auth import get_snowflake_connection_params def main(): conn_params = get_snowflake_connection_params() conn = snowflake.connector.connect(**conn_params, schema=os.getenv('SNOWFLAKE_SCHEMA'), autocommit=False) try: cursor = conn.cursor() fake = Faker() # [YOUR POPULATION CODE HERE - populate tables] conn.commit() print("✅ All data committed successfully") except Exception as e: conn.rollback() print(f"❌ Error: {{str(e)}}") raise finally: cursor.close() conn.close() if __name__ == "__main__": main() ``` CRITICAL REQUIREMENTS: 1. **COMPLETE try/except/finally blocks** - NO incomplete blocks 2. Use cursor.executemany() for batch inserts with %s placeholders (NOT ?) 3. Create baseline normal data (1000+ rows per table) 4. Include strategic outliers with structured comments 5. NO explanatory text, just executable Python code 6. **DO NOT leave try blocks incomplete** - always include except and finally 7. Use Faker library for realistic data generation 8. **PROPER INDENTATION** - Use 4 spaces per indent level, NO TABS 9. **SYNTAX CHECK** - Ensure all code is valid Python with correct indentation RETURN FORMAT: - Return ONLY the complete Python code - Start with imports, end with if __name__ == "__main__" - NO markdown, NO explanations, NO comments outside the code - All code must be properly indented and executable """ # Parse schema and generate prompt self.log_feedback("Parsing DDL schema...") # Validate DDL exists if not self.demo_builder.schema_generation_results: raise Exception("❌ DDL is missing. Please create DDL first.") self.log_feedback(f"DDL length: {len(self.demo_builder.schema_generation_results)} characters") schema_info = parse_ddl_schema(self.demo_builder.schema_generation_results) if not schema_info: raise Exception("❌ Failed to parse DDL schema. DDL may be malformed.") self.log_feedback(f"Parsed {len(schema_info)} tables from DDL") self.log_feedback("Using LegitData for data generation...") self.demo_builder.data_population_results = "LEGITDATA" self.demo_builder.population_code_source = "legitdata" self.population_code = "# Data generated by LegitData" self.demo_builder.advance_stage() self.log_feedback("✅ Ready for deployment with LegitData!") response = f"""✅ **Data Population Ready!** LegitData will generate realistic, AI-powered data. **When you're ready, type 'deploy'** to: - Create Snowflake schema & tables - Populate with generated data - Create ThoughtSpot model & liveboard ⏱️ *This takes 2-5 minutes - watch the terminal for progress.*""" return response, self.population_code except Exception as e: import traceback error_msg = f"❌ Population failed: {str(e)}\n\n{traceback.format_exc()}\n\n**Would you like to retry?** (Type 'yes' to retry)" self.log_feedback(error_msg) return error_msg, self.population_code if hasattr(self, 'population_code') else "" def run_deployment(self): """Run deployment to Snowflake using LegitData (non-streaming version)""" # Consume the streaming version and return final result result = None for update in self.run_deployment_streaming(): result = update return result def run_deployment_streaming(self): """Run deployment to Snowflake using LegitData - yields progress updates""" _slog = self._session_logger _t_deploy = _slog.log_start("deploy") if _slog else None _deploy_error = None progress = "" # Clear and initialize live progress for Snowflake deployment self.live_progress_log = ["=" * 60, "SNOWFLAKE DEPLOYMENT STARTING", "=" * 60, ""] def log_progress(msg): """Log to both AI feedback and live progress""" self.log_feedback(msg) self.live_progress_log.append(msg) try: # Ensure deploy-time modules that still use os.getenv() see Supabase admin settings. inject_admin_settings_to_env() from cdw_connector import SnowflakeDeployer # Step 1: Connect progress = "**Step 1/3: Connecting to Snowflake...**" yield progress log_progress("Connecting to Snowflake...") deployer = SnowflakeDeployer() success, message = deployer.connect() if not success: raise Exception(f"Snowflake connection failed: {message}") progress += f"\n[OK] {message}" log_progress(f"[OK] {message}") yield progress # Step 2: Create schema and tables company_name = self.demo_builder.extract_company_name() progress += f"\n\n**Step 2/3: Creating schema and tables...**" progress += f"\n Company: {company_name}" progress += f"\n Use Case: {self.demo_builder.use_case}" yield progress log_progress("Creating schema and deploying DDL...") log_progress(f" Company: {company_name}") log_progress(f" Use Case: {self.demo_builder.use_case}") # Generate base name for schema from demo_prep import generate_demo_base_name naming_prefix = self.settings.get('object_naming_prefix', '') base_name = generate_demo_base_name(naming_prefix, company_name) log_progress(f" Base Name: {base_name}") success, schema_name, deploy_message = deployer.create_demo_schema_and_deploy( base_name, self.demo_builder.schema_generation_results ) # DEBUG: Log what was passed ddl_passed = self.demo_builder.schema_generation_results log_progress(f"[DEBUG] DDL type passed to deployer: {type(ddl_passed)}") log_progress(f"[DEBUG] DDL is None: {ddl_passed is None}") if ddl_passed: log_progress(f"[DEBUG] DDL length: {len(ddl_passed)}") log_progress(f"[DEBUG] DDL first 100 chars: {ddl_passed[:100]}") if not success: log_progress(f"[ERROR] DDL Deployment failed!") raise Exception(f"Schema deployment failed: {deploy_message}") progress += f"\n[OK] Schema created: {schema_name}" progress += f"\n[OK] Tables created" log_progress(f"[OK] Schema created: {schema_name}") log_progress(f"[OK] Tables created successfully") yield progress # Step 3: Populate with LegitData from legitdata_bridge import populate_demo_data import threading import time as time_module # Determine size from settings fact_rows = int(self.settings.get('fact_table_size', 1000)) if fact_rows <= 100: size = "small" elif fact_rows <= 1000: size = "medium" elif fact_rows <= 10000: size = "large" else: size = "xl" # Show size details size_details = { "small": "~500 rows total", "medium": "~1,500 rows total", "large": "~15,000 rows total", "xl": "~50,000+ rows total" } progress += f"\n\n**Step 3/3: Populating tables with data...**" progress += f"\n Size: {size} ({size_details.get(size, '')})" progress += f"\n Running LegitData... (typically 2-3 minutes)" progress += f"\n *(detailed progress in Live Progress tab)*" yield progress log_progress("") log_progress("Running LegitData for data generation...") log_progress(f" Size preset: {size}") log_progress(f" Company URL: {self.demo_builder.company_url}") # Track population progress pop_messages = [] def pop_callback(msg): log_progress(msg) pop_messages.append(msg) # Run populate_demo_data in a background thread so we can yield progress pop_result = {"success": None, "message": None, "results": None, "done": False} def run_population(): try: # Validate DDL before passing to legitdata ddl = self.demo_builder.schema_generation_results if not ddl or not isinstance(ddl, str): raise Exception(f"DDL is invalid (type: {type(ddl)}). Cannot populate data. Please regenerate DDL.") # Check if DDL contains the word "None" which would indicate AI generated bad SQL if ddl == "None" or ddl.strip() == "None": raise Exception("DDL generation returned 'None'. Please regenerate DDL with a different prompt or model.") success, message, results = populate_demo_data( ddl_content=ddl, company_url=self.demo_builder.company_url, use_case=self.demo_builder.use_case, schema_name=schema_name, llm_model=self.settings.get('model', ''), size=size, progress_callback=pop_callback ) pop_result["success"] = success pop_result["message"] = message pop_result["results"] = results except Exception as e: pop_result["success"] = False pop_result["message"] = str(e) pop_result["results"] = None finally: pop_result["done"] = True pop_thread = threading.Thread(target=run_population) pop_thread.start() # Yield progress updates while LegitData runs spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] spinner_idx = 0 start_time = time_module.time() last_msg_count = 0 while not pop_result["done"]: time_module.sleep(2) # Check every 2 seconds elapsed = int(time_module.time() - start_time) mins = elapsed // 60 secs = elapsed % 60 # Build progress message with spinner and elapsed time spinner_char = spinner[spinner_idx % len(spinner)] spinner_idx += 1 progress_update = progress + f"\n\n{spinner_char} **LegitData running...** ({mins}:{secs:02d} elapsed)" # Show recent progress messages from callback if len(pop_messages) > last_msg_count: recent_msgs = pop_messages[last_msg_count:last_msg_count + 3] # Show up to 3 new messages for msg in recent_msgs: if msg.strip(): progress_update += f"\n • {msg[:60]}..." last_msg_count = len(pop_messages) yield progress_update # Get results from thread pop_success = pop_result["success"] pop_message = pop_result["message"] results = pop_result["results"] if not pop_success: self._last_population_error = pop_message self._last_schema_name = schema_name log_progress(f"[ERROR] Population error: {pop_message}") raise Exception(f"Population failed: {pop_message[:200]}") progress += f"\n[OK] Data populated" log_progress(f"[OK] {pop_message}") self._deployed_schema_name = schema_name log_progress("") log_progress("=" * 60) log_progress("SNOWFLAKE DEPLOYMENT COMPLETE") log_progress("=" * 60) # Check validation_mode setting to decide whether to auto-continue from supabase_client import load_gradio_settings, get_admin_setting settings = load_gradio_settings(self._get_effective_user_email()) validation_mode = settings.get('validation_mode', 'Off') if validation_mode == 'On': # Validation mode - show message asking to type 'thoughtspot' final_response = f"""{progress} **Deployment Complete** Schema: **{schema_name}** Tables: Created and populated Status: Ready for ThoughtSpot **Next:** Type **'thoughtspot'** to create ThoughtSpot objects""" yield (final_response, "thoughtspot") else: # Auto-continue to ThoughtSpot deployment (default behavior) final_response = f"""{progress} **Deployment Complete** Schema: **{schema_name}** Tables: Created and populated **Auto-continuing to ThoughtSpot deployment...**""" log_progress("Auto-continuing to ThoughtSpot deployment...") with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"ABOUT TO YIELD auto_ts tuple, schema={schema_name}\n") yield (final_response, "auto_ts", schema_name) with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"YIELD COMPLETED for auto_ts\n") except Exception as e: import traceback error_msg = str(e) _deploy_error = error_msg log_progress(f"[ERROR] {error_msg}") raise Exception(f"Deployment failed: {error_msg}") finally: if _slog and _t_deploy: _slog.log_end("deploy", _t_deploy, error=_deploy_error) def _run_thoughtspot_deployment(self, schema_name, company, use_case): """ Generator that runs ThoughtSpot deployment and yields progress updates. Yields: str: Progress messages during deployment dict: Final result with 'response' and 'stage' keys """ # Debug log at entry with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"_run_thoughtspot_deployment: ENTERED, schema={schema_name}\n") import os from thoughtspot_deployer import ThoughtSpotDeployer from cdw_connector import SnowflakeDeployer from supabase_client import load_gradio_settings, get_admin_setting from demo_prep import generate_demo_base_name _slog = self._session_logger _t_ts = _slog.log_start("thoughtspot") if _slog else None _ts_error = None with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"_run_thoughtspot_deployment: imports done, about to yield first message\n") yield "**Starting ThoughtSpot Deployment...**\n\n" with open('/tmp/demoprep_debug.log', 'a') as f: f.write(f"_run_thoughtspot_deployment: first yield complete\n") self.log_feedback("Deploying to ThoughtSpot...") # Check if using existing model mode use_existing_model = self.settings.get('use_existing_model', False) existing_model_guid = self.settings.get('existing_model_guid', '') if use_existing_model and existing_model_guid: # Skip all table creation - go directly to liveboard creation yield f"**Using Existing Model Mode**\n\nModel GUID: `{existing_model_guid}`\n\nSkipping table/model creation...\n\n" self.log_feedback(f"Using existing model: {existing_model_guid}") try: from liveboard_creator import create_liveboard_from_model # Get ThoughtSpot settings ts_url = get_admin_setting('THOUGHTSPOT_URL') ts_user = get_admin_setting('THOUGHTSPOT_ADMIN_USER') ts_secret = get_admin_setting('THOUGHTSPOT_TRUSTED_AUTH_KEY') liveboard_method = 'HYBRID' # Only HYBRID method is supported # Clean company name for display (strip .com, .org, etc) clean_company = company.split('.')[0].title() if '.' in company else company liveboard_name = self.settings.get('liveboard_name', '') or f"{clean_company} - {use_case}" # Get company data for liveboard company_data = { 'name': clean_company, 'url': getattr(self.demo_builder, 'company_url', company), 'logo_url': getattr(self.demo_builder, 'logo_url', None), 'primary_color': getattr(self.demo_builder, 'primary_color', '#3498db'), 'secondary_color': getattr(self.demo_builder, 'secondary_color', '#2c3e50') } yield f"**Creating Liveboard from Existing Model**\n\nMethod: {liveboard_method}\nModel: `{existing_model_guid}`\n\n" # Create liveboard liveboard_result = create_liveboard_from_model( model_id=existing_model_guid, model_name="Existing Model", # We don't have the name use_case=use_case, company_data=company_data, liveboard_name=liveboard_name, method=liveboard_method, num_visualizations=8 ) if liveboard_result.get('success'): liveboard_url = liveboard_result.get('liveboard_url', '') yield { 'response': f"""✅ **Liveboard Created from Existing Model** **Model GUID:** `{existing_model_guid}` **Liveboard:** [{liveboard_name}]({liveboard_url}) The liveboard was created using the existing model. No new tables or models were created.""", 'stage': 'complete' } return else: error = liveboard_result.get('error', 'Unknown error') yield { 'response': f"""❌ **Liveboard Creation Failed** **Model GUID:** `{existing_model_guid}` **Error:** {error} Please verify the model GUID is correct and you have access to it.""", 'stage': 'thoughtspot' } return except Exception as e: import traceback yield { 'response': f"""❌ **Error Using Existing Model** **Error:** {str(e)} **Details:** ``` {traceback.format_exc()} ```""", 'stage': 'thoughtspot' } return try: # FIRST: Verify schema exists in Snowflake yield "**Starting ThoughtSpot Deployment...**\n\nVerifying Snowflake schema..." sf_deployer = SnowflakeDeployer() sf_deployer.connect() cursor = sf_deployer.connection.cursor() # Get DDL and database info ddl = self.demo_builder.schema_generation_results database = get_admin_setting('SNOWFLAKE_DATABASE') # Check if schema exists cursor.execute(f"USE DATABASE {database}") cursor.execute(f"SHOW SCHEMAS LIKE '{schema_name}'") schemas = cursor.fetchall() if not schemas: yield { 'response': f"""❌ **Schema Not Found in Snowflake** The schema `{database}.{schema_name}` doesn't exist in Snowflake. **Did the deployment complete successfully?** If not, try these options: 1. Type **'deploy'** - Deploy to Snowflake first 2. Check if tables were actually created in Snowflake Cannot deploy to ThoughtSpot without valid Snowflake schema.""", 'stage': 'deploy' } return # Check if tables exist cursor.execute(f'USE SCHEMA "{schema_name}"') cursor.execute(f"SHOW TABLES") tables = cursor.fetchall() if not tables: yield { 'response': f"""❌ **No Tables Found** The schema `{database}.{schema_name}` exists but has no tables! **What happened:** - Schema was created but table creation may have failed - Or tables were dropped/truncated **Next steps:** 1. Type **'deploy'** - Re-run the full Snowflake deployment 2. Or check Snowflake manually to see what's there Cannot deploy to ThoughtSpot without tables.""", 'stage': 'deploy' } return sf_deployer.connection.close() yield f"**Starting ThoughtSpot Deployment...**\n\nSchema verified: {database}.{schema_name}\nFound {len(tables)} tables\n\n" # Create deployer — prefer session-selected env (from TS env dropdown), # fall back to admin settings ts_url = (self.settings.get('thoughtspot_url') or '').strip() or get_admin_setting('THOUGHTSPOT_URL') ts_user = get_admin_setting('THOUGHTSPOT_ADMIN_USER') ts_secret = (self.settings.get('thoughtspot_trusted_auth_key') or '').strip() or get_admin_setting('THOUGHTSPOT_TRUSTED_AUTH_KEY') deployer = ThoughtSpotDeployer( base_url=ts_url, username=ts_user, secret_key=ts_secret ) # Apply column naming style from settings deployer.column_naming_style = self.settings.get('column_naming_style', 'Regular Case') deployer.prompt_logger = self._prompt_logger # Clear and prepare progress capture self.live_progress_log = ["=" * 60, "THOUGHTSPOT DEPLOYMENT STARTING", "=" * 60, ""] progress_messages = [] def progress_callback(msg): progress_messages.append(msg) self.live_progress_log.append(msg) self.log_feedback(msg) safe_print(msg, flush=True) # Show initial message yield { 'stage': 'thoughtspot', 'response': """**Starting ThoughtSpot Deployment...** Authenticating with ThoughtSpot... **This takes 2-5 minutes.** **Switch to the "Live Progress" tab** to watch real-time progress. Steps: 1. Schema creation 2. Data generation 3. Model creation 4. Liveboard creation This chat will update when complete.""" } safe_print("\n" + "="*60, flush=True) safe_print("THOUGHTSPOT DEPLOYMENT STARTING", flush=True) safe_print("="*60, flush=True) safe_print("Watch Live Progress tab for real-time updates...\n", flush=True) # Load settings settings = load_gradio_settings(self._get_effective_user_email()) liveboard_name = self.settings.get('liveboard_name', '') or settings.get('liveboard_name', '') # Default liveboard name to company name (without .com) if blank if not liveboard_name: liveboard_name = company.replace('.com', '').replace('.', ' ').strip().title() llm_model = settings.get('default_llm', self.settings.get('model', DEFAULT_LLM_MODEL)) tag_name_value = settings.get('tag_name') naming_prefix = settings.get('object_naming_prefix', '') # Extract base_name from schema_name (e.g., BLA_01311648_EAX_sch -> BLA_01311648_EAX) # DO NOT regenerate - must match what Snowflake deployment used base_name = schema_name.replace('_sch', '') if schema_name.endswith('_sch') else schema_name liveboard_method = 'HYBRID' # Only HYBRID method is supported print(f"🔍 DEBUG: tag_name='{tag_name_value}', liveboard_method='{liveboard_method}'") # Run deployment in a thread with progress spinner (like LegitData) import threading import time as time_module ts_result = {"done": False, "results": None, "error": None} def run_ts_deployment(): try: ts_result["results"] = deployer.deploy_all( ddl=ddl, database=database, schema=schema_name, base_name=base_name, company_name=company, use_case=use_case, liveboard_name=liveboard_name, llm_model=llm_model, tag_name=tag_name_value, liveboard_method=liveboard_method, share_with=self.settings.get('share_with', '').strip() or None, progress_callback=progress_callback ) except Exception as e: ts_result["error"] = str(e) finally: ts_result["done"] = True ts_thread = threading.Thread(target=run_ts_deployment) ts_thread.start() # Yield progress updates with spinner while ThoughtSpot deployment runs spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] spinner_idx = 0 start_time = time_module.time() last_msg_count = 0 base_progress = """**ThoughtSpot Deployment in Progress...** Steps: 1. ✅ Authenticating 2. 🔄 Creating connection & tables 3. ⏳ Creating model 4. ⏳ Creating liveboard **Watch "Live Progress" tab for real-time details.**""" while not ts_result["done"]: time_module.sleep(2) # Check every 2 seconds elapsed = int(time_module.time() - start_time) mins = elapsed // 60 secs = elapsed % 60 # Build progress message with spinner and elapsed time spinner_char = spinner[spinner_idx % len(spinner)] spinner_idx += 1 progress_update = base_progress + f"\n\n{spinner_char} **ThoughtSpot deploying...** ({mins}:{secs:02d} elapsed)" # Show recent progress messages from callback if len(progress_messages) > last_msg_count: recent_msgs = progress_messages[-3:] # Show last 3 messages progress_update += "\n\n**Recent:**" for msg in recent_msgs: if msg.strip(): # Clean up the message for display clean_msg = msg.replace('[ThoughtSpot]', '').replace('[AI Feedback]', '').strip() if clean_msg: progress_update += f"\n • {clean_msg[:70]}" last_msg_count = len(progress_messages) yield progress_update # Get results from thread if ts_result["error"]: raise Exception(ts_result["error"]) results = ts_result["results"] safe_print("\n" + "="*60, flush=True) if results.get('success'): safe_print("DEPLOYMENT COMPLETE", flush=True) self.live_progress_log.extend(["", "=" * 60, "DEPLOYMENT COMPLETE", "=" * 60]) else: safe_print("DEPLOYMENT FAILED", flush=True) safe_print(f"Errors: {results.get('errors', [])}", flush=True) self.live_progress_log.extend(["", "DEPLOYMENT FAILED", f"Errors: {results.get('errors', [])}"]) safe_print("="*60 + "\n", flush=True) progress_log = '\n'.join(progress_messages) if progress_messages else 'No progress messages captured' # Generate Demo Pack on success if results.get('success'): try: company_name = company.replace('.com', '').replace('.', ' ').title() # Generate use-case specific Spotter questions spotter_questions = self._generate_spotter_questions(use_case, self.ddl_code) spotter_section = "\n".join([f'{i+1}. **"{q["question"]}"** - {q["purpose"]}' for i, q in enumerate(spotter_questions)]) # Use-case specific demo tips demo_tips = self._get_demo_tips(use_case) # Build URLs for demo pack ts_base = get_admin_setting('THOUGHTSPOT_URL', required=False).rstrip('/') pack_model_url = f"{ts_base}/#/data/tables/{results.get('model_guid', '')}" if results.get('model_guid') and ts_base else results.get('model', 'N/A') pack_lb_url = results.get('liveboard_url') or (f"{ts_base}/#/pinboard/{liveboard_guid}" if liveboard_guid and ts_base else results.get('liveboard', 'N/A')) self.demo_pack_content = f"""# {company_name} Demo Pack ## {use_case} *Generated: {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M')}* --- ## Deployment Summary - **Liveboard:** {pack_lb_url} - **Model:** {pack_model_url} - **Tables:** {len(results.get('tables', []))} imported --- ## Suggested Spotter Questions Ask these questions to showcase ThoughtSpot's AI capabilities: {spotter_section} --- ## Demo Flow 1. Start with the overview liveboard to set context 2. Ask a Spotter question to show natural language search 3. Drill into an interesting metric to show interactivity 4. Show Monitor for proactive alerting --- ## Tips for {use_case} {demo_tips} --- *Pro tip: Use "what changed" questions to show ThoughtSpot's change detection!* """ safe_print("Demo Pack generated - check the Demo Pack tab.", flush=True) self.live_progress_log.append("Demo Pack generated") except Exception as e: safe_print(f"Could not generate demo pack: {e}", flush=True) self.demo_pack_content = f"*Demo pack generation failed: {e}*" # Generate Spotter Viz Story try: model_name_for_story = results.get('model', None) liveboard_name_for_story = results.get('liveboard', None) self.spotter_viz_story = self._generate_spotter_viz_story( company_name=company_name, use_case=use_case, model_name=model_name_for_story, liveboard_name=liveboard_name_for_story ) safe_print("Spotter Viz Story generated - check the Spotter Viz Story tab.", flush=True) self.live_progress_log.append("Spotter Viz Story generated") except Exception as e: safe_print(f"Could not generate Spotter Viz story: {e}", flush=True) self.spotter_viz_story = f"*Spotter Viz story generation failed: {e}*" # Build final response if results.get('success'): # Safely extract GUIDs - handle None, 'None', 'N/A', empty string liveboard_guid = results.get('liveboard_guid') or results.get('liveboard_id') if not liveboard_guid or liveboard_guid in ('None', 'N/A', ''): liveboard_guid = None liveboard_name_result = results.get('liveboard', 'N/A') self._liveboard_guid = liveboard_guid self._liveboard_name = liveboard_name_result final_stage = 'deploy' # Try to load adjuster for outliers stage if liveboard_guid: try: from smart_data_adjuster import SmartDataAdjuster # Pass selected LLM model + session-selected TS env to adjuster llm_model = self.settings.get('model', DEFAULT_LLM_MODEL) adjuster = SmartDataAdjuster( database, schema_name, liveboard_guid, llm_model=llm_model, ts_url=self.settings.get('thoughtspot_url') or None, ts_secret=self.settings.get('thoughtspot_trusted_auth_key') or None, prompt_logger=self._prompt_logger, ) adjuster.connect() if adjuster.load_liveboard_context(): self._adjuster = adjuster viz_list = "\n".join([ f" [{i+1}] {v['name']}" for i, v in enumerate(adjuster.visualizations) ]) # Build clickable links - validate GUIDs before creating URLs ts_url = get_admin_setting('THOUGHTSPOT_URL', required=False).rstrip('/') model_guid = results.get('model_guid') or '' # Only create liveboard URL if we have a valid GUID lb_url = results.get('liveboard_url', '') if not lb_url and liveboard_guid and ts_url: lb_url = f"{ts_url}/#/pinboard/{liveboard_guid}" # Format table names table_names = results.get('tables', []) tables_list = ', '.join(table_names) if table_names else 'N/A' # Build URLs for easy Slack pasting model_url = f"{ts_url}/#/data/tables/{model_guid}" if model_guid and ts_url else None response = f"""**ThoughtSpot Deployment Complete** **Created:** - Connection: {results.get('connection', 'N/A')} - Tables: {tables_list} - Model: {model_url if model_url else results.get('model', 'N/A')} - Liveboard: {lb_url if lb_url else liveboard_name_result} **Your demo is ready!** 🎉 --- **🎯 Ready for Outlier Adjustments!** I've loaded your liveboard context. Here are the visualizations: {viz_list} **What you can do:** - Naturally request changes: "make 1080p webcam 40B" - Adjust by percentage: "increase smart watch by 20%" - Reference by viz number: "viz 3, increase laptop to 50B" **Try an adjustment now, or type 'done' to finish!**""" final_stage = 'outlier_adjustment' else: ts_url = get_admin_setting('THOUGHTSPOT_URL', required=False).rstrip('/') model_guid = results.get('model_guid') or '' lb_url = results.get('liveboard_url', '') if not lb_url and liveboard_guid and ts_url: lb_url = f"{ts_url}/#/pinboard/{liveboard_guid}" table_names = results.get('tables', []) tables_list = ', '.join(table_names) if table_names else 'N/A' # Build URLs for easy Slack pasting model_url = f"{ts_url}/#/data/tables/{model_guid}" if model_guid and ts_url else None response = f"""**ThoughtSpot Deployment Complete** **Created:** - Connection: {results.get('connection', 'N/A')} - Tables: {tables_list} - Model: {model_url if model_url else results.get('model', 'N/A')} - Liveboard: {lb_url if lb_url else liveboard_name_result} Your demo is ready! Note: Could not load liveboard context for adjustments. Type **'done'** to finish.""" except Exception as e: self.log_feedback(f"Failed to load liveboard context: {e}") ts_url = get_admin_setting('THOUGHTSPOT_URL', required=False).rstrip('/') model_guid = results.get('model_guid') or '' lb_url = results.get('liveboard_url', '') if not lb_url and liveboard_guid and ts_url: lb_url = f"{ts_url}/#/pinboard/{liveboard_guid}" table_names = results.get('tables', []) tables_list = ', '.join(table_names) if table_names else 'N/A' # Build URLs for easy Slack pasting model_url = f"{ts_url}/#/data/tables/{model_guid}" if model_guid and ts_url else None response = f"""**ThoughtSpot Deployment Complete** **Created:** - Connection: {results.get('connection', 'N/A')} - Tables: {tables_list} - Model: {model_url if model_url else results.get('model', 'N/A')} - Liveboard: {lb_url if lb_url else liveboard_name_result} Your demo is ready! Note: Could not load liveboard context for adjustments: {str(e)} Type **'done'** to finish.""" else: # Deployed OK but no liveboard GUID returned — treat as partial success ts_url = get_admin_setting('THOUGHTSPOT_URL', required=False).rstrip('/') model_guid = results.get('model_guid') or '' table_names = results.get('tables', []) tables_list = ', '.join(table_names) if table_names else 'N/A' model_url = f"{ts_url}/#/data/tables/{model_guid}" if model_guid and ts_url else results.get('model', 'N/A') response = f"""⚠️ **Partial Success — Dataset & Model Created** Your Snowflake data and ThoughtSpot model were deployed successfully. The liveboard GUID couldn't be retrieved — it may still have been created in ThoughtSpot. **Created:** - Connection: {results.get('connection', 'N/A')} - Tables: {tables_list} - Model: {model_url} **What you can do:** - Check the **Spotter Viz Story** tab to recreate the liveboard manually - Log into ThoughtSpot to verify whether the liveboard was created - Type **'retry liveboard'** to try building it again Type **'done'** to finish.""" yield {'response': response, 'stage': final_stage} else: errors = results.get('errors', ['Unknown error']) error_details = '\n'.join(errors) # Check for partial success: Snowflake + model deployed OK but liveboard failed model_ok = bool(results.get('model_guid')) liveboard_errors = [e for e in errors if 'liveboard' in e.lower()] non_liveboard_errors = [e for e in errors if 'liveboard' not in e.lower()] if model_ok and liveboard_errors and not non_liveboard_errors: # Partial success — dataset and model are live, only liveboard failed ts_url = get_admin_setting('THOUGHTSPOT_URL', required=False).rstrip('/') model_guid = results.get('model_guid') or '' table_names = results.get('tables', []) tables_list = ', '.join(table_names) if table_names else 'N/A' model_url = f"{ts_url}/#/data/tables/{model_guid}" if model_guid and ts_url else results.get('model', 'N/A') lb_error = liveboard_errors[0] yield { 'response': f"""⚠️ **Partial Success — Dataset & Model Created** Your Snowflake data and ThoughtSpot model were deployed successfully. The liveboard couldn't be built automatically. **Created:** - Connection: {results.get('connection', 'N/A')} - Tables: {tables_list} - Model: {model_url} **Liveboard error:** ``` {lb_error} ``` **What you can do:** - Check the **Spotter Viz Story** tab — use that sequence to recreate the liveboard manually in Spotter Viz - Type **'retry liveboard'** to try building it again - Or continue in ThoughtSpot using your model directly""", 'stage': 'deploy' } else: if 'schema validation' in error_details.lower() or 'schema' in error_details.lower(): guidance = "**Root Cause:** The model TML has validation errors." elif 'connection' in error_details.lower(): guidance = "**Root Cause:** Connection issue with ThoughtSpot or Snowflake" elif 'authenticate' in error_details.lower() or 'auth' in error_details.lower(): guidance = "**Root Cause:** Authentication failed" else: guidance = "**Check the progress log above for details.**" yield { 'response': f"""❌ **ThoughtSpot Deployment Failed** **Error Details:** ``` {error_details} ``` **Progress Log:** ``` {progress_log} ``` {guidance} **Next Steps:** - Type **'retry'** to try again - Or fix the issues above first""", 'stage': 'deploy' } except Exception as e: import traceback error_details = traceback.format_exc() _ts_error = str(e) self.log_feedback(f"❌ ThoughtSpot deployment error: {error_details}") yield { 'response': f"""❌ **ThoughtSpot Deployment Error** **Error:** {str(e)} **Full Details:** ``` {error_details} ``` **Common Causes:** - Missing ThoughtSpot credentials in .env - Snowflake connection issues - Invalid schema or table names **Next Steps:** - Verify credentials are correct - Type **'retry'** to try again""", 'stage': 'deploy' } finally: if _slog and _t_ts: _slog.log_end("thoughtspot", _t_ts, error=_ts_error) def process_regular_message(self, message, current_stage, company, use_case): """Process regular chat messages""" message_lower = message.lower() # Check if message contains company and use case info if "creating a demo for" in message_lower or "create a demo for" in message_lower: # Extract company and use case from message extracted_company = self.extract_company_from_message(message) extracted_use_case = self.extract_use_case_from_message(message) if extracted_company: company = extracted_company if extracted_use_case: # Use vertical × function system to resolve use case v, f = parse_use_case(extracted_use_case) config = get_use_case_config(v or "Generic", f or "Generic") use_case = config.get('use_case_name', extracted_use_case) # Automatically trigger research return f"""✅ **Got it!** **Company:** {company} **Use Case:** {use_case} 🔍 **Starting Research...** I'll analyze {company} and research {use_case} best practices. This will take about 2-3 minutes. (Type 'stop' if you want to change anything)""" # Simple intent detection (Phase 1) if any(word in message_lower for word in ['research', 'start', 'begin', 'analyze']): # Just confirm and tell them to use the proper format return f"""To start research, please use this format: ``` I'm creating a demo for company: {company} use case: {use_case} ``` This will automatically begin the research process!""" elif any(word in message_lower for word in ['configure', 'settings', 'change']): return """⚙️ **Configuration Options** You can change: - **Company**: `/over company: [new company]` - **Use Case**: `/over usecase: [new use case]` - **AI Model**: Use the dropdown on the right → What would you like to adjust?""" elif 'help' in message_lower: return """💡 **How to Use This Interface** **Commands:** - `/over company: [name]` - Change company - `/over usecase: [case]` - Change use case **What You Can Say:** - "Start research" - Begin demo creation - "Configure settings" - Adjust parameters - "What stage are we at?" - Check progress - Ask any question naturally! **Current Stage:** You can see it on the right side → **AI Model:** Editable in the dropdown on the right → What would you like to do?""" elif any(word in message_lower for word in ['stage', 'progress', 'status', 'where']): return f"""📊 **Current Status** **Stage:** {current_stage.replace('_', ' ').title()} **Company:** {company} **Use Case:** {use_case} **What's Next:** Tell me what you'd like to do, and I'll guide you through the process!""" else: # Generic helpful response - shouldn't normally reach here return f"""I'm ready to work on **{use_case}** for **{company}**. To get started, just say: ``` I'm creating a demo for company: {company} use case: {use_case} ``` Or if you want to change something, use `/over` to adjust.""" def create_chat_interface(): """ Create the new chat-based demo builder interface IMPORTANT: Each browser session gets its own ChatDemoInterface instance via gr.State() to enable multi-user concurrent access. """ # Bootstrap defaults before authenticated user-specific settings are loaded default_settings = { "company": "Amazon.com", "use_case": "Sales Analytics", "model": DEFAULT_LLM_MODEL, "stage": "initialization", } with gr.Blocks( title="ThoughtSpot Demo Builder - Chat", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan") ) as interface: # SESSION STATE: Each user gets their own ChatDemoInterface instance # This is the key fix for multi-user support! chat_controller_state = gr.State(None) # Initialized on first interaction # State variables current_stage = gr.State(default_settings['stage']) current_model = gr.State(default_settings['model']) current_company = gr.State(default_settings['company']) current_usecase = gr.State(default_settings['use_case']) current_liveboard_name = gr.State(default_settings.get('liveboard_name', '')) # Header with logout link with gr.Row(equal_height=True): gr.Markdown(""" # 💬 ThoughtSpot Demo Builder ### AI-Powered Conversational Demo Creation """) gr.HTML("""
Sign Out →
""") # Additional state for new tabs ai_feedback_state = gr.State("") ddl_code_state = gr.State("") population_code_state = gr.State("") live_progress_state = gr.State("") demo_pack_state = gr.State("") with gr.Tabs(): with gr.Tab("💬 Chat"): chat_components = create_chat_tab( chat_controller_state, default_settings, current_stage, current_model, current_company, current_usecase, ai_feedback_state, ddl_code_state, population_code_state ) with gr.Tab("Live Progress"): gr.Markdown("### Deployment Log") gr.Markdown("*Detailed technical log - main progress shows in Chat*") live_progress_display = gr.TextArea( label="Live Progress", value="Waiting for deployment to start...\n\nGo to Chat tab and type 'thoughtspot' or 'deploy' to begin.", lines=35, max_lines=35, interactive=False, show_label=False, elem_classes=["live-progress-area"] ) # Timer-based refresh for live progress (every 2 seconds) # INACTIVE by default - activated when deployment starts, deactivated when done # Otherwise idle tabs consume concurrency slots and block new sessions live_progress_timer = gr.Timer(value=2, active=False) def refresh_live_progress(controller): """Poll and return current progress log""" if controller is None: return "Waiting for deployment to start...\n\nGo to Chat tab and type 'thoughtspot' or 'deploy' to begin." live_progress = getattr(controller, 'live_progress_log', []) if live_progress: return "\n".join(live_progress) return "Waiting for deployment to start...\n\nGo to Chat tab and type 'thoughtspot' or 'deploy' to begin." live_progress_timer.tick( fn=refresh_live_progress, inputs=[chat_controller_state], outputs=[live_progress_display] ) with gr.Tab("📋 Demo Pack"): gr.Markdown("### Demo Pack - Talking Points & Spotter Questions") gr.Markdown("*Generated automatically after deployment completes*") demo_pack_display = gr.Markdown( value="Demo pack will be generated after deployment completes.\n\nThis will include:\n- Key insights/outliers\n- Spotter questions to ask\n- Talking points for the demo", elem_classes=["demo-pack-content"] ) with gr.Tab("🎬 Spotter Viz Story"): gr.Markdown("### Spotter Viz Story — Natural Language Liveboard Builder") gr.Markdown("*Conversational prompts you can enter into ThoughtSpot Spotter Viz to recreate this liveboard.*") spotter_viz_story_display = gr.Markdown( value="Spotter Viz story will be generated after liveboard creation.\n\n**What is Spotter Viz?**\nSpotter Viz is an AI agent in ThoughtSpot that creates, structures, and styles Liveboards through natural language prompts. The agent reviews the data, proposes layouts, generates KPIs and visualizations, and allows conversational refinement.", elem_classes=["spotter-viz-story-content"] ) with gr.Tab("⚙️ Settings"): settings_components = create_settings_tab() with gr.Tab("🤖 AI Feedback"): ai_feedback_display = gr.TextArea( label="AI Processing Log", value="", lines=30, max_lines=30, interactive=False, show_label=False ) with gr.Tab("📄 DDL Code"): ddl_display = gr.Code( label="Generated DDL", language="sql", value="-- DDL will appear here after generation", lines=30, interactive=False ) with gr.Tab("📝 Prompt Log", visible=False) as prompt_log_tab: gr.Markdown("### Prompt Log — What We Send to LLMs") gr.Markdown("*Every prompt and response is logged here for review. Updates every 5 seconds.*") prompt_log_display = gr.Markdown( value="No prompts logged yet. Start a demo build to see LLM calls here.", elem_classes=["prompt-log-content"] ) # Timer-based refresh for prompt log (every 5 seconds) prompt_log_timer = gr.Timer(value=5, active=True) def refresh_prompt_log(controller): """Poll and return current prompt log summary for this session.""" try: if controller is not None and controller._prompt_logger is not None: return controller._prompt_logger.get_summary() except Exception: pass return "No prompts logged yet." prompt_log_timer.tick( fn=refresh_prompt_log, inputs=[chat_controller_state], outputs=[prompt_log_display] ) with gr.Tab("👤 User Management", visible=False) as user_mgmt_tab: gr.Markdown("### User Management (Admin Only)") gr.Markdown("*Add, deactivate, or reset passwords for DemoPrep users.*") with gr.Row(): with gr.Column(scale=2): gr.Markdown("#### Current Users") user_list_display = gr.Dataframe( headers=["Email", "Display Name", "Admin", "Active", "Last Login"], datatype=["str", "str", "bool", "bool", "str"], interactive=False, label="Users" ) refresh_users_btn = gr.Button("🔄 Refresh User List", size="sm") with gr.Column(scale=1): gr.Markdown("#### Add New User") new_user_email = gr.Textbox(label="Email", placeholder="user@company.com") new_user_password = gr.Textbox(label="Password", type="password") new_user_display = gr.Textbox(label="Display Name", placeholder="Jane Doe") new_user_admin = gr.Checkbox(label="Admin?", value=False) add_user_btn = gr.Button("➕ Add User", variant="primary") gr.Markdown("---") gr.Markdown("#### User Actions") action_email = gr.Textbox(label="User Email (for actions below)") with gr.Row(): deactivate_btn = gr.Button("🚫 Deactivate", size="sm") activate_btn = gr.Button("✅ Activate", size="sm") new_password = gr.Textbox(label="New Password", type="password") reset_pw_btn = gr.Button("🔑 Reset Password", size="sm") user_mgmt_status = gr.Textbox(label="Status", interactive=False) def load_user_list(): """Load user list from Supabase.""" try: from supabase_client import UserManager um = UserManager() users = um.list_users() rows = [] for u in users: rows.append([ u.get('email', ''), u.get('display_name', ''), u.get('is_admin', False), u.get('is_active', True), str(u.get('last_login', 'Never'))[:19] if u.get('last_login') else 'Never' ]) return rows except Exception as e: return [[f"Error: {e}", "", False, False, ""]] def add_user_handler(email, password, display_name, is_admin): """Add a new user.""" if not email or not password: return load_user_list(), "Email and password are required." try: from supabase_client import UserManager um = UserManager() success = um.add_user(email, password, display_name, is_admin) if success: return load_user_list(), f"User {email} added successfully." else: return load_user_list(), f"Failed to add user {email}." except Exception as e: return load_user_list(), f"Error: {e}" def deactivate_handler(email): if not email: return load_user_list(), "Enter an email first." try: from supabase_client import UserManager um = UserManager() um.deactivate_user(email) return load_user_list(), f"User {email} deactivated." except Exception as e: return load_user_list(), f"Error: {e}" def activate_handler(email): if not email: return load_user_list(), "Enter an email first." try: from supabase_client import UserManager um = UserManager() um.activate_user(email) return load_user_list(), f"User {email} activated." except Exception as e: return load_user_list(), f"Error: {e}" def reset_password_handler(email, new_pw): if not email or not new_pw: return "Enter email and new password." try: from supabase_client import UserManager um = UserManager() um.reset_password(email, new_pw) return f"Password reset for {email}." except Exception as e: return f"Error: {e}" # Wire up buttons refresh_users_btn.click( fn=load_user_list, inputs=[], outputs=[user_list_display] ) add_user_btn.click( fn=add_user_handler, inputs=[new_user_email, new_user_password, new_user_display, new_user_admin], outputs=[user_list_display, user_mgmt_status] ) deactivate_btn.click( fn=deactivate_handler, inputs=[action_email], outputs=[user_list_display, user_mgmt_status] ) activate_btn.click( fn=activate_handler, inputs=[action_email], outputs=[user_list_display, user_mgmt_status] ) reset_pw_btn.click( fn=reset_password_handler, inputs=[action_email, new_password], outputs=[user_mgmt_status] ) # Auto-load users on tab open interface.load( fn=load_user_list, inputs=[], outputs=[user_list_display] ) with gr.Tab("⚙️ Admin Settings", visible=True) as admin_settings_tab: gr.Markdown("### System-Wide Settings") gr.Markdown("These settings apply to all users. Only admins can view and edit.") # Hidden fields — values still saved/loaded but not shown in UI admin_ts_url = gr.Textbox(visible=False) admin_ts_auth_key = gr.Textbox(visible=False) admin_openai_key = gr.Textbox(visible=False) admin_google_key = gr.Textbox(visible=False) with gr.Row(): with gr.Column(): gr.Markdown("#### ThoughtSpot Connection") admin_ts_user = gr.Textbox(label="ThoughtSpot Admin Username", placeholder="admin@company.com") admin_share_with = gr.Textbox( label="Default Share With (User or Group)", placeholder="user@company.com or group-name", info="System-wide default: model + liveboard shared here after every build" ) with gr.Column(): gr.Markdown("#### Snowflake Connection") admin_sf_account = gr.Textbox(label="Snowflake Account") admin_sf_kp_user = gr.Textbox(label="Key Pair User") admin_sf_kp_pk = gr.Textbox(label="Private Key (PEM)", lines=3, type="password") admin_sf_kp_pass = gr.Textbox(label="Private Key Passphrase", type="password") admin_sf_role = gr.Textbox(label="Role") admin_sf_warehouse = gr.Textbox(label="Warehouse") admin_sf_database = gr.Textbox(label="Database") admin_sf_sso_user = gr.Textbox(label="SSO User (for Snowflake browser auth)") with gr.Row(): load_admin_btn = gr.Button("🔄 Load Current Settings", size="sm") save_admin_btn = gr.Button("💾 Save Admin Settings", variant="primary", size="sm") admin_settings_status = gr.Textbox(label="Status", interactive=False) # Admin settings field list (order matches ADMIN_SETTINGS_KEYS) admin_fields = [ admin_ts_url, admin_ts_auth_key, admin_ts_user, admin_openai_key, admin_google_key, admin_sf_account, admin_sf_kp_user, admin_sf_kp_pk, admin_sf_kp_pass, admin_sf_role, admin_sf_warehouse, admin_sf_database, admin_sf_sso_user, admin_share_with, ] admin_keys_order = [ "THOUGHTSPOT_URL", "THOUGHTSPOT_TRUSTED_AUTH_KEY", "THOUGHTSPOT_ADMIN_USER", "OPENAI_API_KEY", "GOOGLE_API_KEY", "SNOWFLAKE_ACCOUNT", "SNOWFLAKE_KP_USER", "SNOWFLAKE_KP_PK", "SNOWFLAKE_KP_PASSPHRASE", "SNOWFLAKE_ROLE", "SNOWFLAKE_WAREHOUSE", "SNOWFLAKE_DATABASE", "SNOWFLAKE_SSO_USER", "SHARE_WITH", ] def load_admin_settings_handler(): """Load admin settings from Supabase and populate fields.""" try: from supabase_client import load_admin_settings settings = load_admin_settings(force_refresh=True) values = [settings.get(k, "") for k in admin_keys_order] return values + ["Settings loaded from Supabase."] except Exception as e: return [""] * len(admin_keys_order) + [f"Error loading: {e}"] def save_admin_settings_handler(*field_values): """Save admin settings to Supabase.""" try: from supabase_client import save_admin_settings settings_dict = {} for key, val in zip(admin_keys_order, field_values): settings_dict[key] = val or "" success = save_admin_settings(settings_dict) if success: return "✅ Admin settings saved to Supabase and applied." else: return "❌ Failed to save some admin settings." except Exception as e: return f"❌ Error saving: {e}" load_admin_btn.click( fn=load_admin_settings_handler, inputs=[], outputs=admin_fields + [admin_settings_status] ) save_admin_btn.click( fn=save_admin_settings_handler, inputs=admin_fields, outputs=[admin_settings_status] ) # Auto-load admin settings on tab open interface.load( fn=load_admin_settings_handler, inputs=[], outputs=admin_fields + [admin_settings_status] ) # --- Session Log Viewer --- gr.Markdown("---") gr.Markdown("### 📋 Session Logs") with gr.Row(): log_user_filter = gr.Textbox(label="Filter by user (email, blank=all)", scale=2) log_limit = gr.Dropdown(label="Show", choices=["25", "50", "100"], value="50", scale=1) log_refresh_btn = gr.Button("🔄 Refresh", scale=1) session_log_display = gr.Dataframe( headers=["Time", "User", "Stage", "Event", "Duration (ms)", "Error"], label="Recent Sessions", interactive=False, wrap=True, ) def load_session_logs(user_filter, limit): """Load session logs from Supabase session_logs table.""" try: from supabase_client import SupabaseSettings ss = SupabaseSettings() if not ss.is_enabled(): return [["Supabase not configured", "", "", "", "", ""]] query = ss.client.table("session_logs").select( "ts,user_email,stage,event,duration_ms,error" ).order("ts", desc=True).limit(int(limit)) if user_filter and user_filter.strip(): query = query.ilike("user_email", f"%{user_filter.strip()}%") result = query.execute() rows = [] for r in result.data: ts = r.get("ts", "")[:19].replace("T", " ") # trim to seconds rows.append([ ts, r.get("user_email", ""), r.get("stage", ""), r.get("event", ""), str(r.get("duration_ms", "") or ""), r.get("error", "") or "", ]) return rows if rows else [["No logs found", "", "", "", "", ""]] except Exception as e: return [[f"Error: {e}", "", "", "", "", ""]] log_refresh_btn.click( fn=load_session_logs, inputs=[log_user_filter, log_limit], outputs=[session_log_display] ) # Check admin status and toggle admin-only settings visibility def check_admin_visibility(request: gr.Request): """Check if logged-in user is admin and toggle settings visibility.""" username = getattr(request, 'username', None) or '' is_admin = False try: from supabase_client import UserManager um = UserManager() if um.enabled and username: is_admin = um.is_admin(username) else: is_admin = True # Local dev mode - show everything except Exception: is_admin = True # If check fails, show everything return ( gr.update(visible=is_admin), # admin_ai_accordion gr.update(visible=is_admin), # admin_db_accordion gr.update(visible=is_admin), # admin_settings_tab gr.update(visible=is_admin), # prompt_log_tab gr.update(visible=is_admin), # user_mgmt_tab ) admin_outputs = [ settings_components['_admin_ai_accordion'], settings_components['_admin_db_accordion'], admin_settings_tab, prompt_log_tab, user_mgmt_tab, ] interface.load( fn=check_admin_visibility, inputs=[], outputs=admin_outputs ) # Create update function for tabs spotter_viz_default = "Spotter Viz story will be generated after liveboard creation.\n\n**What is Spotter Viz?**\nSpotter Viz is an AI agent in ThoughtSpot that creates, structures, and styles Liveboards through natural language prompts. The agent reviews the data, proposes layouts, generates KPIs and visualizations, and allows conversational refinement." def update_all_tabs(controller): if controller is None: return ( "", "-- DDL will appear here after generation", "Progress will appear here during deployment...", "Demo pack will be generated after deployment completes.\n\nThis will include:\n- Key insights/outliers\n- Spotter questions to ask\n- Talking points for the demo", spotter_viz_default ) # Get live progress from controller (captures deployment output) live_progress = getattr(controller, 'live_progress_log', []) live_progress_text = "\n".join(live_progress) if live_progress else "Progress will appear here during deployment..." # Get demo pack from controller demo_pack = getattr(controller, 'demo_pack_content', '') demo_pack_text = demo_pack if demo_pack else "Demo pack will be generated after deployment completes.\n\nThis will include:\n- Key insights/outliers\n- Spotter questions to ask\n- Talking points for the demo" # Get Spotter Viz story from controller spotter_story = getattr(controller, 'spotter_viz_story', '') spotter_story_text = spotter_story if spotter_story else spotter_viz_default return ( "\n".join(controller.ai_feedback_log), controller.ddl_code if controller.ddl_code else "-- DDL will appear here after generation", live_progress_text, demo_pack_text, spotter_story_text ) # Wire up tab updates on chat interactions # These will update after each message chat_components['chatbot'].change( fn=update_all_tabs, inputs=[chat_controller_state], outputs=[ai_feedback_display, ddl_display, live_progress_display, demo_pack_display, spotter_viz_story_display] ) # Load settings from Supabase on startup (uses SETTINGS_SCHEMA) def load_settings_on_startup(request: gr.Request = None): """Load saved settings from Supabase - uses schema-driven helper""" try: user_email = require_authenticated_email(request) print(f"[LOAD] load_settings_on_startup for {user_email}") settings = load_gradio_settings(user_email) result = load_settings_values(settings, user_email) print(f"[LOAD] load_settings_on_startup OK — {len(result)} values") return result except Exception as e: import traceback print(f"[LOAD ERROR] load_settings_on_startup failed: {e}\n{traceback.format_exc()}") # Return schema-length list of defaults so Gradio doesn't crash return [default for _, _, default, _ in SETTINGS_SCHEMA] def load_session_state_on_startup(request: gr.Request = None): """Initialize chat/session state from the same authenticated user settings.""" try: user_email = require_authenticated_email(request) print(f"[LOAD] load_session_state_on_startup for {user_email}") except Exception as e: print(f"[LOAD ERROR] load_session_state_on_startup auth failed: {e}") user_email = "" try: settings = load_gradio_settings(user_email) company = (str(settings.get("default_company_url", "")).strip() or "Amazon.com") use_case = (str(settings.get("default_use_case", "")).strip() or "Sales Analytics") model = (str(settings.get("default_llm", "")).strip() or DEFAULT_LLM_MODEL) liveboard_name = str(settings.get("liveboard_name", "")).strip() initial_message = build_initial_chat_message(company, use_case) print(f"[LOAD] load_session_state_on_startup OK — model={model}, company={company}") return ( "initialization", model, company, use_case, liveboard_name, gr.update(value=model), gr.update(value=liveboard_name), initial_message, ) except Exception as e: import traceback print(f"[LOAD ERROR] load_session_state_on_startup failed: {e}\n{traceback.format_exc()}") return ( "initialization", DEFAULT_LLM_MODEL, "Amazon.com", "Sales Analytics", "", gr.update(value=DEFAULT_LLM_MODEL), gr.update(value=""), "", ) # Wire up load handler - outputs follow SETTINGS_SCHEMA order interface.load( fn=load_settings_on_startup, inputs=[], outputs=[settings_components[key] for key, _, _, _ in SETTINGS_SCHEMA] ) interface.load( fn=load_session_state_on_startup, inputs=[], outputs=[ current_stage, current_model, current_company, current_usecase, current_liveboard_name, chat_components["model_dropdown"], chat_components["liveboard_name_input"], chat_components["msg"], ] ) return interface def create_chat_tab(chat_controller_state, settings, current_stage, current_model, current_company, current_usecase, ai_feedback_state=None, ddl_code_state=None, population_code_state=None): """Create the main chat interface tab IMPORTANT: chat_controller_state is a gr.State that holds each session's ChatDemoInterface instance. This enables multi-user concurrent access. """ # Create initial welcome message (before any session exists) initial_controller = ChatDemoInterface() initial_welcome = initial_controller.format_welcome_message( settings['company'], settings['use_case'] ) with gr.Row(): # Left column - Chat with gr.Column(scale=3): chatbot = gr.Chatbot( value=[(None, initial_welcome)], height=650, label="Demo Builder Assistant", show_label=False, avatar_images=None, # No avatars - keeps it clean type='tuples' ) with gr.Row(): msg = gr.Textbox( label="Your message", value="", placeholder="Type your message here or use /over to change settings...", lines=1, max_lines=1, scale=5, show_label=False, interactive=True ) send_btn = gr.Button("Send", variant="primary", scale=1) # Quick action buttons with gr.Row(): start_btn = gr.Button("🔍 Start Research", size="sm") configure_btn = gr.Button("⚙️ Configure", size="sm") help_btn = gr.Button("💡 Help", size="sm") # Right column - Status & Settings with gr.Column(scale=1): # TS Environment selector ts_env_choices = get_ts_environments() ts_env_dropdown = gr.Dropdown( label="TS Environment", choices=ts_env_choices, value=ts_env_choices[0] if ts_env_choices else None, interactive=True, ) # AI Model selector model_dropdown = gr.Dropdown( label="AI Model", choices=list(UI_MODEL_CHOICES), value=settings['model'], interactive=True, allow_custom_value=True ) # Liveboard name (quick access — same setting as in Settings tab) liveboard_name_input = gr.Textbox( label="Liveboard Name", placeholder="Auto-generated if blank", value=settings.get('liveboard_name', ''), lines=1, interactive=True, ) gr.Markdown("### 📈 Progress") # Stage order used to determine done/current/upcoming _STAGE_ORDER = [ 'initialization', 'awaiting_context', 'research', 'create_ddl', 'deploy', 'populate', 'thoughtspot', 'outlier_adjustment', 'complete', ] # Each display step: (label, [stage keys that map to it]) _PROGRESS_STEPS = [ ('Init', ['initialization']), ('Research', ['awaiting_context', 'research']), ('DDL', ['create_ddl']), ('Data', ['deploy', 'populate']), ('ThoughtSpot', ['thoughtspot']), ('Data Adjuster',['outlier_adjustment']), ('Complete', ['complete']), ] def get_progress_html(stage): """Generate progress HTML showing done/current/upcoming states.""" try: current_idx = _STAGE_ORDER.index(stage) except ValueError: current_idx = 0 # Find which display step is current current_step = None for step_label, step_keys in _PROGRESS_STEPS: for k in step_keys: if stage == k: current_step = step_label break # Build ordered list of display steps that are reached reached = set() for step_label, step_keys in _PROGRESS_STEPS: for k in step_keys: try: if _STAGE_ORDER.index(k) <= current_idx: reached.add(step_label) except ValueError: pass html = "
" for step_label, _ in _PROGRESS_STEPS: # Skip Data Adjuster unless it's active if step_label == 'Data Adjuster' and step_label not in reached: continue if step_label == current_step: html += (f"
" f"▶ {step_label}
") elif step_label in reached: html += (f"
" f"✓ {step_label}
") else: html += (f"
" f"○ {step_label}
") html += "
" return html progress_html = gr.HTML(get_progress_html('initialization')) # Event handlers - each creates/uses session-specific controller def send_message(controller, message, history, stage, model, company, usecase, env_label=None, liveboard_name_ui=None, request: gr.Request = None): """Handle sending a message - creates controller if needed""" import traceback username = getattr(request, 'username', None) if request else None if controller is None: controller = ChatDemoInterface(user_email=username) print(f"[SESSION] Created new ChatDemoInterface for {username or 'anonymous'}") # Apply selected TS environment settings to the new controller if env_label: _url = get_ts_env_url(env_label) _key_value = get_ts_env_auth_key(env_label) if _url: controller.settings['thoughtspot_url'] = _url if _key_value: controller.settings['thoughtspot_trusted_auth_key'] = _key_value # Always use the current UI value — takes priority over DB-loaded default if liveboard_name_ui is not None: controller.settings['liveboard_name'] = liveboard_name_ui try: for result in controller.process_chat_message( message, history, stage, model, company, usecase ): new_stage = result[1] if len(result) > 1 else stage progress = get_progress_html(new_stage) yield (controller,) + result + (progress,) except Exception as e: err_tb = traceback.format_exc() print(f"[ERROR] send_message unhandled exception:\n{err_tb}") err_msg = ( f"❌ **An unexpected error occurred**\n\n" f"`{type(e).__name__}: {e}`\n\n" f"The pipeline has been interrupted. You can try again or start a new session." ) history = history or [] history.append((message, err_msg)) yield (controller, history, stage, model, company, usecase, "", get_progress_html(stage)) def quick_action(controller, action_text, history, stage, model, company, usecase, env_label=None, liveboard_name_ui=None, request: gr.Request = None): """Handle quick action button clicks""" username = getattr(request, 'username', None) if request else None if controller is None: controller = ChatDemoInterface(user_email=username) print(f"[SESSION] Created new ChatDemoInterface for {username or 'anonymous'}") # Apply selected TS environment settings to the new controller if env_label: _url = get_ts_env_url(env_label) _key_value = get_ts_env_auth_key(env_label) if _url: controller.settings['thoughtspot_url'] = _url if _key_value: controller.settings['thoughtspot_trusted_auth_key'] = _key_value # Always use the current UI value — takes priority over DB-loaded default if liveboard_name_ui is not None: controller.settings['liveboard_name'] = liveboard_name_ui for result in controller.process_chat_message( action_text, history, stage, model, company, usecase ): new_stage = result[1] if len(result) > 1 else stage progress = get_progress_html(new_stage) yield (controller,) + result + (progress,) # Wire up send button and enter key _send_inputs = [chat_controller_state, msg, chatbot, current_stage, current_model, current_company, current_usecase, ts_env_dropdown, liveboard_name_input] _send_outputs = [chat_controller_state, chatbot, current_stage, current_model, current_company, current_usecase, msg, progress_html] msg.submit(fn=send_message, inputs=_send_inputs, outputs=_send_outputs) send_btn.click(fn=send_message, inputs=_send_inputs, outputs=_send_outputs) # Quick action wrapper functions def start_action(controller, history, stage, model, company, usecase, env_label, liveboard_name_ui): yield from quick_action(controller, "Start research", history, stage, model, company, usecase, env_label, liveboard_name_ui) def configure_action(controller, history, stage, model, company, usecase, env_label, liveboard_name_ui): yield from quick_action(controller, "Configure settings", history, stage, model, company, usecase, env_label, liveboard_name_ui) def help_action(controller, history, stage, model, company, usecase, env_label, liveboard_name_ui): yield from quick_action(controller, "Help", history, stage, model, company, usecase, env_label, liveboard_name_ui) _action_inputs = [chat_controller_state, chatbot, current_stage, current_model, current_company, current_usecase, ts_env_dropdown, liveboard_name_input] _action_outputs = [chat_controller_state, chatbot, current_stage, current_model, current_company, current_usecase, msg, progress_html] # Quick action buttons start_btn.click(fn=start_action, inputs=_action_inputs, outputs=_action_outputs) configure_btn.click(fn=configure_action, inputs=_action_inputs, outputs=_action_outputs) help_btn.click(fn=help_action, inputs=_action_inputs, outputs=_action_outputs) # Model dropdown change def update_model(new_model, controller, history): if controller is not None: controller.settings['model'] = new_model return new_model, history model_dropdown.change( fn=update_model, inputs=[model_dropdown, chat_controller_state, chatbot], outputs=[current_model, chatbot] ) # Liveboard name blur — update controller settings when user leaves the field. # Using blur (not change) avoids per-keystroke queue events that cause the # Gradio progress spinner/timer to appear on this component while a deploy runs. # No output needed — the value is already in the textbox; we only update the dict. def update_liveboard_name(name, controller): if controller is not None: controller.settings['liveboard_name'] = name liveboard_name_input.blur( fn=update_liveboard_name, inputs=[liveboard_name_input, chat_controller_state], outputs=[] ) # TS environment change — update controller settings in real-time def update_ts_env(label, controller): url = get_ts_env_url(label) auth_key_value = get_ts_env_auth_key(label) if controller is not None: if url: controller.settings['thoughtspot_url'] = url if auth_key_value: controller.settings['thoughtspot_trusted_auth_key'] = auth_key_value return label ts_env_dropdown.change( fn=update_ts_env, inputs=[ts_env_dropdown, chat_controller_state], outputs=[] ) # Return components for external access return { 'chatbot': chatbot, 'msg': msg, 'model_dropdown': model_dropdown, 'send_btn': send_btn, 'send_btn_ref': send_btn, 'ts_env_dropdown': ts_env_dropdown, 'liveboard_name_input': liveboard_name_input, 'progress_html': progress_html } def create_settings_tab(): """Create the settings configuration tab - returns components for loading Uses module-level SETTINGS_SCHEMA for consistency with load/save functions. """ gr.Markdown("## ⚙️ Configuration Settings") gr.Markdown("Configure your demo builder preferences") # Default Settings — the three fields that appear on the chat page too gr.Markdown("### ⭐ Default Settings") gr.Markdown("*These also appear on the chat page — set them once here as defaults.*") # default_company_url removed — company is set via chat conversation default_company_url = gr.Textbox(visible=False) # Build use case choices from VERTICALS × FUNCTIONS matrix use_case_choices = [] for v_name in VERTICALS: for f_name in FUNCTIONS: use_case_choices.append(f"{v_name} {f_name}") use_case_choices.append("Custom (type in chat)") with gr.Row(): default_ai_model = gr.Dropdown( label="Default AI Model", choices=list(UI_MODEL_CHOICES), value=DEFAULT_LLM_MODEL, info="Primary model for demo generation", allow_custom_value=True ) default_use_case = gr.Dropdown( label="Default Use Case", choices=use_case_choices, value="Retail Sales", info="Vertical × Function combination, or type any custom use case" ) liveboard_name = gr.Textbox( label="Default Liveboard Name", placeholder="My Demo Liveboard", value="", info="Default name for generated liveboards (overridable on chat page)" ) gr.Markdown("---") gr.Markdown("### 🔧 App Settings") with gr.Row(): with gr.Column(): tag_name = gr.Textbox( label="Tag Name", placeholder="e.g., 'Sales_Demo' or 'Q4_2024'", value="", info="Tag to apply to all ThoughtSpot objects (connection, tables, model, liveboard)" ) fact_table_size = gr.Dropdown( label="Fact Table Size", choices=["1000", "10000", "100000"], value="1000", info="Number of rows in fact table" ) dim_table_size = gr.Dropdown( label="Dim Table Size", choices=["50", "100", "1000"], value="100", info="Number of rows in dimension tables" ) with gr.Column(): object_naming_prefix = gr.Textbox( label="Object Naming Prefix", placeholder="e.g., 'ACME_' or 'DEMO_'", value="", info="Prefix for ThoughtSpot objects (for future use)" ) column_naming_style = gr.Dropdown( label="Column Naming Style", choices=["Regular Case", "snake_case", "camelCase", "PascalCase", "UPPER_CASE", "original"], value="Regular Case", info="Naming convention for ThoughtSpot model columns (Regular Case = State Id, Total Revenue)" ) # Hidden — use_existing_model kept in schema for backward compat but not shown use_existing_model = gr.Checkbox(visible=False, value=False) existing_model_guid = gr.Textbox(visible=False, value="") gr.Markdown("---") gr.Markdown("### 🌍 Other Settings") with gr.Row(): with gr.Column(): geo_scope = gr.Dropdown( label="Geographic Scope", choices=["USA Only", "International"], value="USA Only", info="USA Only: US states/cities, USD currency. International: Global locations, multiple currencies." ) validation_mode = gr.Radio( label="Validation Mode", choices=["On", "Off"], value="Off", info="On: Pause at DDL & TS checkpoints. Off: Auto-run entire pipeline after context question." ) # Advanced AI Settings (Admin Only) admin_ai_accordion = gr.Accordion("🤖 Advanced AI Settings (Admin)", open=False, visible=True) with admin_ai_accordion: with gr.Row(): temperature_slider = gr.Slider( minimum=0.0, maximum=1.0, value=0.3, step=0.1, label="Temperature", info="Controls randomness (lower = more focused)" ) max_tokens = gr.Number( value=4000, label="Max Tokens", info="Maximum tokens for AI responses" ) with gr.Row(): batch_size = gr.Slider( minimum=1000, maximum=50000, value=5000, step=1000, label="Batch Size", info="Rows per batch for bulk operations" ) thread_count = gr.Slider( minimum=1, maximum=16, value=4, step=1, label="Thread Count", info="Parallel threads for data generation" ) # Database Connections (Admin Only) admin_db_accordion = gr.Accordion("💾 Database Connections (Admin)", open=False, visible=True) with admin_db_accordion: gr.Markdown(""" **⚠️ Note:** These fields are **legacy placeholders** and are **not used by deploy runtime**. Active deployment credentials come from **Admin Settings** (system-wide `__admin__` values in Supabase). This section is for reference/future use only. """) with gr.Row(): with gr.Column(): gr.Markdown("### ❄️ Snowflake Connection") sf_account = gr.Textbox( label="Snowflake Account", placeholder="xy12345.us-east-1", info="Your Snowflake account identifier" ) sf_user = gr.Textbox( label="Snowflake User", placeholder="your_username", info="Snowflake username (password in .env)" ) sf_role = gr.Textbox( label="Snowflake Role", value="ACCOUNTADMIN", info="Default role for connections" ) default_warehouse = gr.Textbox( label="Default Warehouse", value="COMPUTE_WH", info="Snowflake warehouse for demos" ) default_database = gr.Textbox( label="Default Database", value="DEMO_DB", info="Snowflake database for demos" ) default_schema = gr.Textbox( label="Default Schema", value="PUBLIC", info="Default schema for demos" ) with gr.Column(): gr.Markdown("### 📊 ThoughtSpot Settings") # ts_instance_url removed — replaced by TS Environment dropdown on front page ts_instance_url = gr.Textbox(visible=False) ts_username = gr.Textbox( label="ThoughtSpot Username", placeholder="your.email@company.com", info="Your ThoughtSpot login" ) ts_password = gr.Textbox( label="ThoughtSpot Password", placeholder="••••••••", type="password", info="Your ThoughtSpot password (stored securely)" ) gr.Markdown("---") gr.Markdown("### 🔧 Data Adjuster") gr.Markdown("*Jump straight to data adjustment on an existing liveboard — skips the build pipeline entirely.*") with gr.Row(): with gr.Column(): data_adjuster_url = gr.Textbox( label="Liveboard URL", placeholder="https://your-instance.thoughtspot.cloud/#/pinboard/guid", value="", info="Paste a ThoughtSpot liveboard URL to open it directly in Data Adjuster" ) gr.Markdown("---") gr.Markdown("### 🔗 Sharing") gr.Markdown("*After building, model and liveboard are shared with this user or group (can edit).*") with gr.Row(): with gr.Column(): share_with = gr.Textbox( label="Share With (User or Group)", placeholder="your.email@company.com or group-name", value="", info="User email (contains @) or group name. Leave blank to skip sharing." ) gr.Markdown("---") with gr.Row(): save_settings_btn = gr.Button("💾 Save Settings", variant="primary", size="lg") reset_settings_btn = gr.Button("🔄 Reset to Defaults", size="lg") settings_status = gr.Markdown("") def save_settings_handler(request: gr.Request, *args): """Save all settings to Supabase - uses schema-driven helper""" try: from supabase_client import SupabaseSettings user_email = require_authenticated_email(request) settings_client = SupabaseSettings() if not settings_client.is_enabled(): return "⚠️ Supabase not configured. Settings saved locally only." # Build save dict from args using schema settings_to_save = build_settings_save_dict(list(args)) success = settings_client.save_all_settings(user_email, settings_to_save) if success: return f"✅ **Settings saved successfully!**\n\nSaved for user: `{user_email}`" else: return "❌ Error saving some settings. Check console for details." except Exception as e: return f"❌ Error saving settings: {str(e)}" def reset_settings_handler(): return "🔄 Settings reset to defaults! (Refresh page to see defaults)" # Build components dict matching SETTINGS_SCHEMA order # This is the single mapping from schema keys to component variables all_components = { # App Settings - Left Column 'default_ai_model': default_ai_model, 'default_company_url': default_company_url, 'default_use_case': default_use_case, 'liveboard_name': liveboard_name, 'tag_name': tag_name, # App Settings - Right Column 'fact_table_size': fact_table_size, 'dim_table_size': dim_table_size, 'object_naming_prefix': object_naming_prefix, 'column_naming_style': column_naming_style, 'use_existing_model': use_existing_model, 'existing_model_guid': existing_model_guid, 'geo_scope': geo_scope, 'validation_mode': validation_mode, # Advanced AI Settings 'temperature_slider': temperature_slider, 'max_tokens': max_tokens, 'batch_size': batch_size, 'thread_count': thread_count, # Database Connection Settings 'sf_account': sf_account, 'sf_user': sf_user, 'sf_role': sf_role, 'default_warehouse': default_warehouse, 'default_database': default_database, 'default_schema': default_schema, 'ts_instance_url': ts_instance_url, 'ts_username': ts_username, 'data_adjuster_url': data_adjuster_url, 'share_with': share_with, # Status 'settings_status': settings_status, # Admin-only visibility toggles '_admin_ai_accordion': admin_ai_accordion, '_admin_db_accordion': admin_db_accordion, } # Get inputs in schema order (exclude settings_status which is output only) save_inputs = [all_components[key] for key, storage_key, _, _ in SETTINGS_SCHEMA if storage_key is not None] save_settings_btn.click( fn=save_settings_handler, inputs=save_inputs, outputs=[settings_status] ) reset_settings_btn.click( fn=reset_settings_handler, inputs=[], outputs=[settings_status] ) # ----------------------------------------------------------------------- # Change Password # ----------------------------------------------------------------------- gr.Markdown("---") with gr.Accordion("🔒 Change Password", open=False): with gr.Row(): with gr.Column(scale=1): cp_current = gr.Textbox(label="Current Password", type="password", placeholder="Your current password") cp_new = gr.Textbox(label="New Password", type="password", placeholder="At least 8 characters") cp_confirm = gr.Textbox(label="Confirm New Password", type="password", placeholder="Repeat new password") cp_btn = gr.Button("Change Password", variant="primary") cp_status = gr.Markdown("") with gr.Column(scale=1): gr.Markdown(""" **Password requirements:** - At least 8 characters - Current password required to confirm identity *Forgot your password? Ask an admin to reset it via the Admin tab, then change it here after signing in.* """) def change_password_handler(current, new_pw, confirm, request: gr.Request = None): if not current or not new_pw or not confirm: return "❌ All fields are required." if new_pw != confirm: return "❌ New passwords don't match." if len(new_pw) < 8: return "❌ New password must be at least 8 characters." try: user_email = require_authenticated_email(request) from supabase_client import UserManager um = UserManager() if not um.enabled: return "⚠️ Supabase not configured — password change unavailable." if not um.authenticate(user_email, current): return "❌ Current password is incorrect." um.reset_password(user_email, new_pw) return "✅ Password changed successfully. Use your new password next time you sign in." except Exception as e: return f"❌ Error: {e}" cp_btn.click( fn=change_password_handler, inputs=[cp_current, cp_new, cp_confirm], outputs=[cp_status] ) # Return components dict for loading (follows schema order) return all_components def authenticate_user(username: str, password: str) -> bool: """ Gradio auth callback — validates against Supabase demoprep_users table. Falls back to no-auth if Supabase is not configured (local dev). """ try: from supabase_client import UserManager um = UserManager() if not um.enabled: # Supabase not configured — allow anyone (local dev mode) print(f"[Auth] Supabase not configured, allowing login for: {username}") return True user = um.authenticate(username, password) if user: # Keep env-backed modules aligned with Supabase admin settings after login. inject_admin_settings_to_env() print(f"[Auth] Login successful: {username} (admin={user.get('is_admin', False)})") return True else: print(f"[Auth] Login failed: {username}") return False except Exception as e: print(f"[Auth] Error during authentication: {e}") # If auth system is broken, don't lock everyone out return False if __name__ == "__main__": """Launch the chat interface standalone""" print("Starting Chat-Based Demo Builder...") app = create_chat_interface() # Enable queue with concurrency to handle multiple requests app.queue( default_concurrency_limit=10, # Allow up to 10 concurrent requests api_open=False ) # Bypass Gradio localhost accessibility check (httpx 0.28 compatibility) import gradio.networking as _gn _gn.url_ok = lambda url: True # Determine auth mode # If DEMOPREP_NO_AUTH=true, skip login (local dev override) no_auth = os.getenv('DEMOPREP_NO_AUTH', 'false').lower() in ('true', '1', 'yes') auth_fn = None if no_auth else authenticate_user app.launch( server_name="0.0.0.0", server_port=7863, # Different port from main app (7860) and old chat (7861) share=False, inbrowser=False, debug=True, auth=auth_fn, max_threads=20 # Allow multiple threads for concurrent requests )