Spaces:

thoughtspot-dp
/

demoprep

Sleeping

mikeboone Claude commited on Nov 14, 2025

Commit

b2a3e22

1 Parent(s): ae8e50d

fix: Implement consistent naming convention across all objects

- Replace verbose DM{YYMMDD}_{HHMMSS}_{COMPANY}_{USECASE} format
- New format: {PREFIX}_{DDMICROS}_{objtype} (prefix optional)
- Extract base timestamp from schema to ensure consistency across objects
- Add naming_prefix field to UI settings (saved in Supabase)
- Fix DDL transaction commit in cdw_connector.py:153
- Add USE SCHEMA command to population scripts for proper context
- Remove confusing "per table" timing messages, show batch call times only
- Enhance Liveboard error handling with detailed exception catching

Key fixes:
- Schema/connection/model now use same base timestamp
- Tables persist in Snowflake (commit added)
- Population scripts set proper schema context
- Clean performance metrics display

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (6) hide show

cdw_connector.py +36 -10
demo_logger.py +87 -29
demo_prep.py +142 -63
liveboard_creator.py +87 -7
supabase_client.py +1 -0
thoughtspot_deployer.py +513 -190

cdw_connector.py CHANGED Viewed

@@ -72,19 +72,40 @@ class SnowflakeDeployer:
             except Exception as e:
                 print(f"Error closing connection: {str(e)}")
-    def create_demo_schema_and_deploy(self, company_name: str, use_case: str, ddl_statements: str) -> Tuple[bool, Optional[str], str]:
-        """Create timestamped schema and deploy DDL statements"""
         if not self.connection:
             success, message = self.connect()
             if not success:
                 return False, None, f"Connection failed: {message}"
         try:
-            # Generate schema name with timestamp
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            company_short = company_name.replace(" ", "")[:5]  # First 5 chars
-            use_case_short = use_case.replace(" ", "")[:3]  # First 3 chars
-            schema_name = f"{timestamp}_{company_short}_{use_case_short}".upper()
             cursor = self.connection.cursor()
@@ -127,15 +148,20 @@ class SnowflakeDeployer:
             # Check autocommit setting
             print(f"🔍 DEBUG: Connection autocommit: {self.connection.autocommit}")
             # VERIFY the schema actually exists
             print(f"🔍 DEBUG: Verifying schema exists...")
             cursor.execute(f'SELECT CURRENT_SCHEMA()')
             current_schema = cursor.fetchone()[0]
             print(f"🔍 DEBUG: Current schema: {current_schema}")
             print(f"🔍 DEBUG: Schema verification completed - using current schema context")
             cursor.close()
             success_message = f"Schema '{schema_name}' created successfully with {executed_count} tables"
             return True, schema_name, success_message

             except Exception as e:
                 print(f"Error closing connection: {str(e)}")
+    def create_demo_schema_and_deploy(self, company_name: str, use_case: str, ddl_statements: str, naming_prefix: str = "") -> Tuple[bool, Optional[str], str]:
+        """Create timestamped schema and deploy DDL statements
+        Args:
+            naming_prefix: Optional custom prefix for schema name
+                          Format: {prefix}_{day+ms}_scm or just {day+ms}_scm if blank
+        """
         if not self.connection:
             success, message = self.connect()
             if not success:
                 return False, None, f"Connection failed: {message}"
         try:
+            # Generate schema name using same convention as ThoughtSpot objects
+            import re
+            from datetime import datetime as dt
+            now = dt.now()
+            # Clean prefix (uppercase, alphanumeric only) - can be empty string
+            if naming_prefix:
+                prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
+            else:
+                prefix_clean = ""
+            # Short unique timestamp: day (2 digits) + microseconds (6 digits)
+            day = now.strftime('%d')
+            microsec = now.strftime('%f')  # 6 digits
+            short_timestamp = f"{day}{microsec}"
+            # Base format: PREFIX_DDMICROS_scm (prefix can be empty)
+            if prefix_clean:
+                schema_name = f"{prefix_clean}_{short_timestamp}_scm"
+            else:
+                schema_name = f"{short_timestamp}_scm"
             cursor = self.connection.cursor()
             # Check autocommit setting
             print(f"🔍 DEBUG: Connection autocommit: {self.connection.autocommit}")
+            # COMMIT the transaction
+            print(f"🔍 DEBUG: Committing transaction...")
+            self.connection.commit()
+            print(f"✅ Transaction committed - {executed_count} tables created")
             # VERIFY the schema actually exists
             print(f"🔍 DEBUG: Verifying schema exists...")
             cursor.execute(f'SELECT CURRENT_SCHEMA()')
             current_schema = cursor.fetchone()[0]
             print(f"🔍 DEBUG: Current schema: {current_schema}")
             print(f"🔍 DEBUG: Schema verification completed - using current schema context")
             cursor.close()
             success_message = f"Schema '{schema_name}' created successfully with {executed_count} tables"
             return True, schema_name, success_message

demo_logger.py CHANGED Viewed

@@ -25,17 +25,24 @@ load_dotenv()
 class DemoLogger:
     """Comprehensive logger for demo creation process"""
-    def __init__(self, session_id: str = None, user_email: str = None):
         """
         Initialize logger with session ID and optional user email
         Args:
             session_id: Unique ID for this demo creation session
             user_email: User's email for tracking
         """
         self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
         self.user_email = user_email or "anonymous"
         # Create logs directory
         self.log_dir = Path("logs") / "demo_sessions" / self.session_id
         self.log_dir.mkdir(parents=True, exist_ok=True)
@@ -64,7 +71,14 @@ class DemoLogger:
             "status": "started"
         }
-        self.logger.info(f"Demo session started: {self.session_id}")
     def setup_logger(self):
         """Set up Python logger"""
@@ -100,47 +114,79 @@ class DemoLogger:
         self.session_data["llm_model"] = llm_model
         self.session_data["settings"].update(kwargs)
-        self.logger.info(f"Demo Configuration:")
-        self.logger.info(f"  Use Case: {use_case}")
-        self.logger.info(f"  Company: {company}")
-        self.logger.info(f"  URL: {company_url}")
-        self.logger.info(f"  Industry: {industry}")
-        self.logger.info(f"  LLM Model: {llm_model}")
         self.save_session_data()
     def log_ddl(self, ddl_content: str, schema_name: str = None):
-        """Log the generated DDL"""
-        self.session_data["ddl"] = ddl_content
         self.session_data["schema_name"] = schema_name
-        # Save DDL to separate file
         ddl_file = self.log_dir / "generated_ddl.sql"
         with open(ddl_file, 'w') as f:
             f.write(ddl_content)
         # Log summary
         tables = ddl_content.count("CREATE TABLE")
-        self.logger.info(f"DDL Generated: {tables} tables, {len(ddl_content)} characters")
-        self.logger.info(f"DDL saved to: {ddl_file}")
-        # Check for SALESTRANSACTIONS table (known issue)
-        if "SALESTRANSACTIONS" not in ddl_content.upper():
-            self.log_warning("SALESTRANSACTIONS table not found in DDL - this will cause deployment issues!")
         self.save_session_data()
     def log_population_code(self, code: str):
-        """Log the generated population code"""
-        self.session_data["population_code"] = code[:5000]  # Truncate for storage
-        # Save full code to file
         code_file = self.log_dir / "population_code.py"
         with open(code_file, 'w') as f:
             f.write(code)
-        self.logger.info(f"Population code generated: {len(code)} characters")
-        self.logger.info(f"Code saved to: {code_file}")
         self.save_session_data()
@@ -148,9 +194,16 @@ class DemoLogger:
         """Log a discovered outlier"""
         self.session_data["outliers"].append(outlier)
-        self.logger.info(f"Outlier discovered: {outlier.get('title', 'Unknown')}")
-        self.logger.debug(f"  Insight: {outlier.get('insight', 'N/A')}")
-        self.logger.debug(f"  Impact: {outlier.get('impact', 'N/A')}")
         self.save_session_data()
@@ -190,13 +243,14 @@ class DemoLogger:
         self.save_session_data()
     def log_stage_completion(self, stage: str, success: bool,
-                            duration_seconds: float = None):
         """Log completion of a demo creation stage"""
         stage_data = {
             "stage": stage,
             "success": success,
             "duration": duration_seconds,
-            "timestamp": datetime.now().isoformat()
         }
         if "stages" not in self.session_data:
@@ -205,9 +259,13 @@ class DemoLogger:
         self.session_data["stages"].append(stage_data)
         status = "✅ SUCCESS" if success else "❌ FAILED"
-        self.logger.info(f"Stage '{stage}' {status}")
         if duration_seconds:
-            self.logger.info(f"  Duration: {duration_seconds:.2f} seconds")
         self.save_session_data()

 class DemoLogger:
     """Comprehensive logger for demo creation process"""
+    def __init__(self, session_id: str = None, user_email: str = None, debug_mode: bool = None):
         """
         Initialize logger with session ID and optional user email
         Args:
             session_id: Unique ID for this demo creation session
             user_email: User's email for tracking
+            debug_mode: Enable debug mode (saves DDL, population code, outliers)
         """
         self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
         self.user_email = user_email or "anonymous"
+        # Check for DEBUG environment variable or parameter
+        if debug_mode is None:
+            self.debug_mode = os.getenv('DEBUG', 'false').lower() in ['true', '1', 'yes']
+        else:
+            self.debug_mode = debug_mode
         # Create logs directory
         self.log_dir = Path("logs") / "demo_sessions" / self.session_id
         self.log_dir.mkdir(parents=True, exist_ok=True)
             "status": "started"
         }
+        # Log session start with timestamp
+        self.logger.info("="*80)
+        self.logger.info(f"🚀 Demo Session Started")
+        self.logger.info(f"   Session ID: {self.session_id}")
+        self.logger.info(f"   User: {self.user_email}")
+        self.logger.info(f"   Timestamp: {self.session_data['start_time']}")
+        self.logger.info(f"   Debug Mode: {'ON' if self.debug_mode else 'OFF'}")
+        self.logger.info("="*80)
     def setup_logger(self):
         """Set up Python logger"""
         self.session_data["llm_model"] = llm_model
         self.session_data["settings"].update(kwargs)
+        self.logger.info("="*80)
+        self.logger.info("📋 Demo Configuration")
+        self.logger.info("="*80)
+        self.logger.info(f"🎯 MAIN SETTINGS:")
+        self.logger.info(f"   Company URL: {company_url}")
+        self.logger.info(f"   Use Case: {use_case}")
+        self.logger.info(f"   LLM Model: {llm_model}")
+        self.logger.info(f"")
+        self.logger.info(f"📊 Additional Info:")
+        self.logger.info(f"   Company: {company}")
+        self.logger.info(f"   Industry: {industry}")
+        if kwargs:
+            self.logger.info(f"")
+            self.logger.info(f"⚙️  Other Settings:")
+            for key, value in kwargs.items():
+                self.logger.info(f"   {key}: {value}")
+        self.logger.info("="*80)
         self.save_session_data()
     def log_ddl(self, ddl_content: str, schema_name: str = None):
+        """Log the generated DDL - ALWAYS saves to file"""
+        # Always save full DDL if debug mode
+        if self.debug_mode:
+            self.session_data["ddl"] = ddl_content
+        else:
+            self.session_data["ddl"] = ddl_content[:1000] + "..." if len(ddl_content) > 1000 else ddl_content
         self.session_data["schema_name"] = schema_name
+        # ALWAYS save DDL to separate file
         ddl_file = self.log_dir / "generated_ddl.sql"
         with open(ddl_file, 'w') as f:
             f.write(ddl_content)
         # Log summary
         tables = ddl_content.count("CREATE TABLE")
+        self.logger.info("="*80)
+        self.logger.info(f"📝 DDL Generated")
+        self.logger.info("="*80)
+        self.logger.info(f"   Tables: {tables}")
+        self.logger.info(f"   Size: {len(ddl_content)} characters")
+        self.logger.info(f"   Schema: {schema_name or 'N/A'}")
+        self.logger.info(f"   Saved to: {ddl_file}")
+        # Check for SALESTRANSACTIONS or SALES_TRANSACTIONS table (known issue)
+        ddl_upper = ddl_content.upper()
+        if "SALESTRANSACTIONS" not in ddl_upper and "SALES_TRANSACTIONS" not in ddl_upper:
+            self.logger.warning("⚠️  SALESTRANSACTIONS table not found in DDL - this will cause deployment issues!")
+            self.log_warning("SALESTRANSACTIONS table not found in DDL")
+        self.logger.info("="*80)
         self.save_session_data()
     def log_population_code(self, code: str):
+        """Log the generated population code - saves to file if DEBUG mode"""
+        # Always save summary
+        self.session_data["population_code"] = code[:5000] if not self.debug_mode else code
+        # ALWAYS save full code to file
         code_file = self.log_dir / "population_code.py"
         with open(code_file, 'w') as f:
             f.write(code)
+        self.logger.info("="*80)
+        self.logger.info(f"🐍 Population Code Generated")
+        self.logger.info("="*80)
+        self.logger.info(f"   Size: {len(code)} characters")
+        self.logger.info(f"   Saved to: {code_file}")
+        self.logger.info("="*80)
         self.save_session_data()
         """Log a discovered outlier"""
         self.session_data["outliers"].append(outlier)
+        self.logger.info(f"💡 Outlier discovered: {outlier.get('title', 'Unknown')}")
+        if self.debug_mode:
+            self.logger.info(f"   Insight: {outlier.get('insight', 'N/A')}")
+            self.logger.info(f"   Impact: {outlier.get('impact', 'N/A')}")
+        # Save all outliers to file in DEBUG mode
+        if self.debug_mode:
+            outliers_file = self.log_dir / "outliers.json"
+            with open(outliers_file, 'w') as f:
+                json.dump(self.session_data["outliers"], f, indent=2)
         self.save_session_data()
         self.save_session_data()
     def log_stage_completion(self, stage: str, success: bool,
+                            duration_seconds: float = None, details: str = None):
         """Log completion of a demo creation stage"""
         stage_data = {
             "stage": stage,
             "success": success,
             "duration": duration_seconds,
+            "timestamp": datetime.now().isoformat(),
+            "details": details
         }
         if "stages" not in self.session_data:
         self.session_data["stages"].append(stage_data)
         status = "✅ SUCCESS" if success else "❌ FAILED"
+        self.logger.info("="*80)
+        self.logger.info(f"📍 Stage: {stage.upper()} - {status}")
         if duration_seconds:
+            self.logger.info(f"   Duration: {duration_seconds:.2f} seconds")
+        if details:
+            self.logger.info(f"   Details: {details}")
+        self.logger.info("="*80)
         self.save_session_data()

demo_prep.py CHANGED Viewed

@@ -313,26 +313,34 @@ def execute_population_script(python_code, schema_name):
             "conn_params = get_snowflake_connection_params()",
             "conn_params = get_snowflake_connection_params()\nconn_params.pop('schema', None)  # Remove schema to avoid duplicate"
         )
         # Simple and safe schema replacement - just replace the placeholder
         cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
         cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
         # FIX: Remove fake.unique() calls that cause "duplicated values after 1,000 iterations" error
         cleaned_code = cleaned_code.replace("fake.unique.word()", "fake.word()")
         cleaned_code = cleaned_code.replace("fake.unique.email()", "fake.email()")
         cleaned_code = cleaned_code.replace("fake.unique.company()", "fake.company()")
         # FIX: Truncate phone numbers to avoid extension overflow (e.g., '790-923-3730x07350')
-        # Handle multiple variations of phone number generation
-        cleaned_code = cleaned_code.replace("fake.phone_number()", "fake.phone_number()[:20]")
-        cleaned_code = re.sub(r"faker\.phone_number\(\)", "faker.phone_number()[:20]", cleaned_code)
-        cleaned_code = re.sub(r"'Phone':\s*fake\.phone_number\(\)", "'Phone': fake.phone_number()[:20]", cleaned_code)
         # Also handle when it's in a larger expression or assignment
-        cleaned_code = re.sub(r"(\w+\.phone_number\(\))(?!\[)", r"\1[:20]", cleaned_code)
         # FIX: Convert SQLite-style ? placeholders to Snowflake-style %s placeholders
-        import re
         cleaned_code = re.sub(r'\bVALUES\s*\(\?', 'VALUES (%s', cleaned_code)
         cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
@@ -343,9 +351,8 @@ def execute_population_script(python_code, schema_name):
     print("🚀 STARTING DATA POPULATION EXECUTION")
     print("=" * 50)"""
         )
         # Add logging to populate functions dynamically
-        import re
         # Find all populate function definitions and add logging
         def add_function_logging(match):
@@ -397,6 +404,7 @@ def execute_population_script(python_code, schema_name):
             "os": os,
             "random": random,
             "datetime": datetime,
             "get_snowflake_connection_params": get_snowflake_connection_params,
         }
@@ -408,11 +416,24 @@ def execute_population_script(python_code, schema_name):
         print(f"🔍 DEBUG: Cleaned code preview (first 500 chars):")
         print(cleaned_code[:500])
         print("...")
-        # Save the full generated code to a file for debugging
-        with open(f"/tmp/generated_population_script_{schema_name}.py", "w") as f:
             f.write(cleaned_code)
-        print(f"🔍 DEBUG: Full generated code saved to /tmp/generated_population_script_{schema_name}.py")
         # First, validate the syntax before executing
         try:
@@ -439,6 +460,24 @@ def execute_population_script(python_code, schema_name):
         print("=" * 50)
         print(f"❌ DATA POPULATION FAILED: {str(e)}")
         print("=" * 50)
         return False, f"Population execution failed: {str(e)}"
 def generate_deployment_summary(demo_builder_instance):
@@ -891,6 +930,12 @@ def create_demo_prep_interface():
                         label="Schema Prefix",
                         info="Prefix for schema names (e.g., demo_20250930)"
                     )
                 with gr.TabItem("Admin"):
                     ts_instance_url = gr.Textbox(
@@ -1094,7 +1139,7 @@ def create_demo_prep_interface():
         # Progressive workflow with Multi-LLM support
         def progressive_workflow_handler(
             url, use_case, llm_provider, demo_builder_instance, mode="expert", stop_before_deploy=False,
-            temperature=0.3, max_tokens=4000
         ):
             # Initialize DemoBuilder if none exists
             if demo_builder_instance is None:
@@ -1102,6 +1147,17 @@ def create_demo_prep_interface():
                     return "Please enter a company URL", None, "Start Research", False
                 demo_builder_instance = DemoBuilder(use_case, url.strip())
             # Initialize or continue with existing content
             if not hasattr(demo_builder_instance, 'accumulated_content'):
                 accumulated_content = ""
@@ -1327,6 +1383,10 @@ def create_demo_prep_interface():
                     workflow_updates += f"💡 Next: Click 'Create DDL' to generate database schema\n"
                     demo_builder_instance.workflow_updates = workflow_updates
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                 elif demo_builder_instance.current_stage == "create":
@@ -1338,24 +1398,26 @@ def create_demo_prep_interface():
                     )
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
-                    # Extract company name for schema naming
-                    company_name = (
-                        demo_builder_instance.extract_company_name()
-                        .replace(" ", "")
-                        .replace("-", "")
-                    )
-                    # Use new DM naming convention for schema
-                    now = time.localtime()
-                    yymmdd = time.strftime('%y%m%d', now)
-                    hhmmss = time.strftime('%H%M%S', now)
-                    # Clean and truncate company name (5 chars)
-                    company_clean = re.sub(r'[^a-zA-Z0-9]', '', company_name.upper())[:5]
-                    # Clean and truncate use case (3 chars)
-                    usecase_clean = re.sub(r'[^a-zA-Z0-9]', '', use_case.upper())[:3]
-                    schema_name = f"DM{yymmdd}_{hhmmss}_{company_clean}_{usecase_clean}"
                     schema_prompt = f"""Create ONLY complete SQL DDL statements for a {use_case} demo database.
@@ -1380,6 +1442,11 @@ TECHNICAL REQUIREMENTS:
 - Include realistic column names that match the business context
 - Add proper constraints and relationships
 SNOWFLAKE SYNTAX EXAMPLES:
 - Auto-increment: ColumnID INT IDENTITY(1,1) PRIMARY KEY
 - NOT: ColumnID INT PRIMARY KEY AUTO_INCREMENT
@@ -1426,6 +1493,11 @@ Generate complete CREATE TABLE statements with proper Snowflake syntax and depen
                     demo_builder_instance.accumulated_content = accumulated_content
                     demo_builder_instance.workflow_updates = workflow_updates
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                 elif demo_builder_instance.current_stage == "populate":
@@ -1649,13 +1721,18 @@ Schema Validation: Will be checked next...
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                     print(f"🔍 DEBUG: About to advance from populate to deploy stage")
                     demo_builder_instance.advance_stage()
                     demo_builder_instance.accumulated_content = accumulated_content
                     demo_builder_instance.workflow_updates = workflow_updates
                     print(f"🔍 DEBUG: Advanced to stage: {demo_builder_instance.current_stage}")
                     print(f"🔍 DEBUG: Stage status: {demo_builder_instance.stage_status}")
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                     return  # Exit populate stage immediately after advancing
@@ -1695,6 +1772,7 @@ Schema Validation: Will be checked next...
                                 company_name,
                                 use_case,
                                 demo_builder_instance.schema_generation_results,
                             )
                         )
@@ -1803,6 +1881,7 @@ Schema Validation: Will be checked next...
                                         schema=schema_name,
                                         company_name=company_name,
                                         use_case=use_case,
                                         progress_callback=ts_detailed_callback
                                     )
                                     deployment_complete = True
@@ -1837,25 +1916,9 @@ Schema Validation: Will be checked next...
                                 demo_builder_instance.deployment_status += ts_progress_msg
                                 workflow_updates += ts_progress_msg
                                 yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
-                                # Add detailed progress messages to main window after deployment
-                                workflow_updates += "\nBuilding relationships...\n"
-                                workflow_updates += "Creating tables...\n"
-                                # Show detailed table creation info if available
-                                if 'tables' in ts_results and ts_results['tables']:
-                                    workflow_updates += f"Generated joins between tables\n"
-                                    for table_name in ts_results['tables']:
-                                        workflow_updates += f"  • {table_name} created successfully\n"
-                                    workflow_updates += f"Tables created successfully ({len(ts_results['tables'])} total)\n"
-                                else:
-                                    workflow_updates += "Generated joins between tables\n"
-                                    workflow_updates += "Tables created successfully\n"
-                                workflow_updates += "Added joins to semantic model\n"
-                                workflow_updates += "Model created successfully!\n"
-                                yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                                 if ts_results['success']:
                                     ts_msg = "\nThoughtSpot metadata deployment completed!\n\n"
                                     ts_msg += "Created ThoughtSpot objects:\n"
@@ -1959,9 +2022,19 @@ Schema Validation: Will be checked next...
                         demo_builder_instance.advance_stage()
                         demo_builder_instance.accumulated_content = accumulated_content
                         yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                     except Exception as e:
                         error_msg = f"❌ Deployment error: {str(e)}\n"
                         demo_builder_instance.deployment_status += error_msg
                         workflow_updates += error_msg
@@ -1977,7 +2050,7 @@ Schema Validation: Will be checked next...
                     yield accumulated_content, workflow_updates, None, "Research", False
         # Redo functionality
-        def redo_latest_stage(demo_builder_instance, llm_provider, url, use_case, mode="expert", stop_deploy=False, temperature=0.3, max_tokens=4000):
             if (
                 demo_builder_instance
                 and demo_builder_instance.current_stage != "research"
@@ -1992,7 +2065,7 @@ Schema Validation: Will be checked next...
                 # Re-run the workflow with same format as button_click_handler
                 for result in progressive_workflow_handler(
-                    url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens
                 ):
                     if len(result) == 5:
                         # New format with workflow_updates
@@ -2659,9 +2732,9 @@ SCRIPT REQUIREMENTS:
         # Legacy CDW deployment function removed - deployment now handled by main workflow
         # Main click handler with multi-LLM support - UPDATED to route to AI Output tab
-        def button_click_handler(url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens):
             for result in progressive_workflow_handler(
-                url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens
             ):
                 if len(result) == 5:
                     # New format with workflow_updates
@@ -2692,7 +2765,7 @@ SCRIPT REQUIREMENTS:
         # Event handlers - UPDATED for new AI Output tab and Results box
         research_btn.click(
             fn=button_click_handler,
-            inputs=[url_input, industry_dropdown, llm_dropdown, demo_builder, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input],
             outputs=[
                 ai_output_box,         # streaming content goes to AI Output tab
                 results_box,           # dynamic content based on stage/mode
@@ -2712,10 +2785,10 @@ SCRIPT REQUIREMENTS:
         )
         redo_btn.click(
-            fn=lambda db, llm, url, use_case, mode, stop_deploy, temp, max_tok: [
-                result for result in redo_latest_stage(db, llm, url, use_case, mode, stop_deploy, temp, max_tok)
             ],
-            inputs=[demo_builder, llm_dropdown, url_input, industry_dropdown, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input],
             outputs=[
                 ai_output_box,         # streaming content goes to AI Output tab
                 results_box,           # dynamic content based on stage/mode
@@ -3191,7 +3264,7 @@ SCRIPT REQUIREMENTS:
         def save_settings_handler(
             email, ai_model, temperature, max_tokens, company_size, company_url, use_case, data_volume,
-            warehouse, database, ts_url, ts_username, batch_size, thread_count
         ):
             """Save settings to Supabase database"""
             if not email or '@' not in email:
@@ -3215,7 +3288,8 @@ SCRIPT REQUIREMENTS:
                     "thoughtspot_url": ts_url,
                     "thoughtspot_username": ts_username,
                     "batch_size": batch_size,
-                    "thread_count": thread_count
                 }
                 success = settings_client.save_all_settings(email, settings)
@@ -3243,6 +3317,7 @@ SCRIPT REQUIREMENTS:
                     "",  # ts_username
                     5000,  # batch_size
                     4,  # thread_count
                     "Settings not loaded - enter valid email",  # status
                     "gpt-4o",  # llm_dropdown (main form AI Model)
                     ""  # url_input (main form Company URL)
@@ -3266,6 +3341,7 @@ SCRIPT REQUIREMENTS:
                     settings.get("thoughtspot_username", ""),
                     int(settings.get("batch_size", 5000)),
                     int(settings.get("thread_count", 4)),
                     f"✅ Settings loaded for {email}",
                     ai_model,  # llm_dropdown (same as default_ai_model)
                     company_url  # url_input (same as default_company_url)
@@ -3281,6 +3357,7 @@ SCRIPT REQUIREMENTS:
                     "",
                     5000,
                     4,
                     f"❌ Error loading settings: {str(e)}"
                 ]
@@ -3307,7 +3384,8 @@ SCRIPT REQUIREMENTS:
                 ts_instance_url,
                 ts_username,
                 batch_size_slider,
-                thread_count_slider
             ],
             outputs=[settings_status]
         )
@@ -3552,6 +3630,7 @@ SCRIPT REQUIREMENTS:
                 ts_username,
                 batch_size_slider,
                 thread_count_slider,
                 settings_status,
                 llm_dropdown,  # Also update main form AI Model dropdown
                 url_input  # Also update main form Company URL

             "conn_params = get_snowflake_connection_params()",
             "conn_params = get_snowflake_connection_params()\nconn_params.pop('schema', None)  # Remove schema to avoid duplicate"
         )
+        # CRITICAL FIX: Add USE SCHEMA command after cursor creation to set schema context
+        cleaned_code = cleaned_code.replace(
+            "cursor = conn.cursor()",
+            f"cursor = conn.cursor()\n    cursor.execute('USE SCHEMA \"{schema_name}\"')  # Set schema context for INSERTs"
+        )
         # Simple and safe schema replacement - just replace the placeholder
         cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
         cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
+        # Import re module at the top of this section
+        import re
         # FIX: Remove fake.unique() calls that cause "duplicated values after 1,000 iterations" error
         cleaned_code = cleaned_code.replace("fake.unique.word()", "fake.word()")
         cleaned_code = cleaned_code.replace("fake.unique.email()", "fake.email()")
         cleaned_code = cleaned_code.replace("fake.unique.company()", "fake.company()")
         # FIX: Truncate phone numbers to avoid extension overflow (e.g., '790-923-3730x07350')
+        # Handle multiple variations of phone number generation - truncate to 15 chars (max field size)
+        cleaned_code = cleaned_code.replace("fake.phone_number()", "fake.phone_number()[:15]")
+        cleaned_code = re.sub(r"faker\.phone_number\(\)", "faker.phone_number()[:15]", cleaned_code)
+        cleaned_code = re.sub(r"'Phone':\s*fake\.phone_number\(\)", "'Phone': fake.phone_number()[:15]", cleaned_code)
         # Also handle when it's in a larger expression or assignment
+        cleaned_code = re.sub(r"(\w+\.phone_number\(\))(?!\[)", r"\1[:15]", cleaned_code)
         # FIX: Convert SQLite-style ? placeholders to Snowflake-style %s placeholders
         cleaned_code = re.sub(r'\bVALUES\s*\(\?', 'VALUES (%s', cleaned_code)
         cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
     print("🚀 STARTING DATA POPULATION EXECUTION")
     print("=" * 50)"""
         )
         # Add logging to populate functions dynamically
         # Find all populate function definitions and add logging
         def add_function_logging(match):
             "os": os,
             "random": random,
             "datetime": datetime,
+            "re": re,
             "get_snowflake_connection_params": get_snowflake_connection_params,
         }
         print(f"🔍 DEBUG: Cleaned code preview (first 500 chars):")
         print(cleaned_code[:500])
         print("...")
+        # ALWAYS save the full generated code for debugging (not just in DEBUG mode)
+        # Save to /tmp for immediate access
+        tmp_file = f"/tmp/generated_population_script_{schema_name}.py"
+        with open(tmp_file, "w") as f:
+            f.write(cleaned_code)
+        print(f"📁 Population script saved to: {tmp_file}")
+        # Also save to logs directory for persistence
+        from pathlib import Path
+        import datetime
+        log_dir = Path("logs") / "population_scripts"
+        log_dir.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        log_file = log_dir / f"population_{schema_name}_{timestamp}.py"
+        with open(log_file, "w") as f:
             f.write(cleaned_code)
+        print(f"📁 Also saved to: {log_file}")
         # First, validate the syntax before executing
         try:
         print("=" * 50)
         print(f"❌ DATA POPULATION FAILED: {str(e)}")
         print("=" * 50)
+        # Print detailed traceback
+        import traceback
+        print("📋 Full traceback:")
+        traceback.print_exc()
+        # Show which line of generated code caused the error
+        tb = traceback.extract_tb(e.__traceback__)
+        for frame in tb:
+            if '<population_script>' in frame.filename:
+                print(f"\n❌ Error in generated code at line {frame.lineno}:")
+                lines = cleaned_code.split('\n')
+                start = max(0, frame.lineno - 3)
+                end = min(len(lines), frame.lineno + 2)
+                for i in range(start, end):
+                    marker = ">>> " if i == frame.lineno - 1 else "    "
+                    print(f"{marker}{i+1:3d}: {lines[i]}")
         return False, f"Population execution failed: {str(e)}"
 def generate_deployment_summary(demo_builder_instance):
                         label="Schema Prefix",
                         info="Prefix for schema names (e.g., demo_20250930)"
                     )
+                    naming_prefix_input = gr.Textbox(
+                        value="",
+                        label="Object Naming Prefix (Optional)",
+                        placeholder="e.g., BOONE, SALES, DEMO",
+                        info="Custom prefix for all objects. Format: {prefix}_{day+ms}_{objtype}. Leave blank for default naming."
+                    )
                 with gr.TabItem("Admin"):
                     ts_instance_url = gr.Textbox(
         # Progressive workflow with Multi-LLM support
         def progressive_workflow_handler(
             url, use_case, llm_provider, demo_builder_instance, mode="expert", stop_before_deploy=False,
+            temperature=0.3, max_tokens=4000, naming_prefix=""
         ):
             # Initialize DemoBuilder if none exists
             if demo_builder_instance is None:
                     return "Please enter a company URL", None, "Start Research", False
                 demo_builder_instance = DemoBuilder(use_case, url.strip())
+                # Initialize demo logger for this session
+                session_logger = get_demo_logger(user_email="demo_user")
+                demo_builder_instance.logger = session_logger
+                session_logger.log_demo_config(
+                    use_case=use_case,
+                    company=url,
+                    company_url=url,
+                    industry="TBD",
+                    llm_model=llm_provider
+                )
             # Initialize or continue with existing content
             if not hasattr(demo_builder_instance, 'accumulated_content'):
                 accumulated_content = ""
                     workflow_updates += f"💡 Next: Click 'Create DDL' to generate database schema\n"
                     demo_builder_instance.workflow_updates = workflow_updates
+                    # Log research stage completion
+                    if hasattr(demo_builder_instance, 'logger'):
+                        demo_builder_instance.logger.log_stage_completion("research", True)
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                 elif demo_builder_instance.current_stage == "create":
                     )
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
+                    # Generate schema name using same convention as ThoughtSpot objects
+                    from datetime import datetime as dt
+                    now = dt.now()
+                    # Clean prefix (uppercase, alphanumeric only) - can be empty string
+                    if naming_prefix:
+                        prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
+                    else:
+                        prefix_clean = ""
+                    # Short unique timestamp: day (2 digits) + microseconds (6 digits)
+                    day = now.strftime('%d')
+                    microsec = now.strftime('%f')  # 6 digits
+                    short_timestamp = f"{day}{microsec}"
+                    # Base format: PREFIX_DDMICROS (prefix can be empty)
+                    if prefix_clean:
+                        schema_name = f"{prefix_clean}_{short_timestamp}_scm"
+                    else:
+                        schema_name = f"{short_timestamp}_scm"
                     schema_prompt = f"""Create ONLY complete SQL DDL statements for a {use_case} demo database.
 - Include realistic column names that match the business context
 - Add proper constraints and relationships
+**CRITICAL - COLUMN NAME CONSISTENCY:**
+- When referencing columns in FOREIGN KEY constraints, use the EXACT column name (including case) as defined in the CREATE TABLE statement
+- Example: If you define "LocationID INT" then reference it as "LocationID" NOT "LOCATIONID"
+- Column names in FOREIGN KEY REFERENCES must match the PRIMARY KEY column name exactly
 SNOWFLAKE SYNTAX EXAMPLES:
 - Auto-increment: ColumnID INT IDENTITY(1,1) PRIMARY KEY
 - NOT: ColumnID INT PRIMARY KEY AUTO_INCREMENT
                     demo_builder_instance.accumulated_content = accumulated_content
                     demo_builder_instance.workflow_updates = workflow_updates
+                    # Log DDL stage completion and save DDL
+                    if hasattr(demo_builder_instance, 'logger'):
+                        demo_builder_instance.logger.log_ddl(schema_results, demo_builder_instance.schema_name if hasattr(demo_builder_instance, 'schema_name') else None)
+                        demo_builder_instance.logger.log_stage_completion("ddl", True)
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                 elif demo_builder_instance.current_stage == "populate":
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                     print(f"🔍 DEBUG: About to advance from populate to deploy stage")
                     demo_builder_instance.advance_stage()
                     demo_builder_instance.accumulated_content = accumulated_content
                     demo_builder_instance.workflow_updates = workflow_updates
                     print(f"🔍 DEBUG: Advanced to stage: {demo_builder_instance.current_stage}")
                     print(f"🔍 DEBUG: Stage status: {demo_builder_instance.stage_status}")
+                    # Log population code stage completion
+                    if hasattr(demo_builder_instance, 'logger') and hasattr(demo_builder_instance, 'population_code'):
+                        demo_builder_instance.logger.log_population_code(demo_builder_instance.population_code)
+                        demo_builder_instance.logger.log_stage_completion("population_code", True)
                     yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                     return  # Exit populate stage immediately after advancing
                                 company_name,
                                 use_case,
                                 demo_builder_instance.schema_generation_results,
+                                naming_prefix=naming_prefix
                             )
                         )
                                         schema=schema_name,
                                         company_name=company_name,
                                         use_case=use_case,
+                                        naming_prefix=naming_prefix or None,
                                         progress_callback=ts_detailed_callback
                                     )
                                     deployment_complete = True
                                 demo_builder_instance.deployment_status += ts_progress_msg
                                 workflow_updates += ts_progress_msg
                                 yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
+                                # Deployment progress is shown live during execution - no duplicate summary needed
                                 if ts_results['success']:
                                     ts_msg = "\nThoughtSpot metadata deployment completed!\n\n"
                                     ts_msg += "Created ThoughtSpot objects:\n"
                         demo_builder_instance.advance_stage()
                         demo_builder_instance.accumulated_content = accumulated_content
+                        # Log deployment stage completion
+                        if hasattr(demo_builder_instance, 'logger'):
+                            deploy_success = deploy_success if 'deploy_success' in locals() else False
+                            demo_builder_instance.logger.log_stage_completion("deploy", deploy_success,
+                                details=f"Schema: {schema_name if 'schema_name' in locals() else 'N/A'}")
                         yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
                     except Exception as e:
+                        # Log deployment error
+                        if hasattr(demo_builder_instance, 'logger'):
+                            demo_builder_instance.logger.log_error(str(e), "deployment", e)
                         error_msg = f"❌ Deployment error: {str(e)}\n"
                         demo_builder_instance.deployment_status += error_msg
                         workflow_updates += error_msg
                     yield accumulated_content, workflow_updates, None, "Research", False
         # Redo functionality
+        def redo_latest_stage(demo_builder_instance, llm_provider, url, use_case, mode="expert", stop_deploy=False, temperature=0.3, max_tokens=4000, naming_prefix=""):
             if (
                 demo_builder_instance
                 and demo_builder_instance.current_stage != "research"
                 # Re-run the workflow with same format as button_click_handler
                 for result in progressive_workflow_handler(
+                    url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix
                 ):
                     if len(result) == 5:
                         # New format with workflow_updates
         # Legacy CDW deployment function removed - deployment now handled by main workflow
         # Main click handler with multi-LLM support - UPDATED to route to AI Output tab
+        def button_click_handler(url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix=""):
             for result in progressive_workflow_handler(
+                url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix
             ):
                 if len(result) == 5:
                     # New format with workflow_updates
         # Event handlers - UPDATED for new AI Output tab and Results box
         research_btn.click(
             fn=button_click_handler,
+            inputs=[url_input, industry_dropdown, llm_dropdown, demo_builder, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input, naming_prefix_input],
             outputs=[
                 ai_output_box,         # streaming content goes to AI Output tab
                 results_box,           # dynamic content based on stage/mode
         )
         redo_btn.click(
+            fn=lambda db, llm, url, use_case, mode, stop_deploy, temp, max_tok, naming_prefix: [
+                result for result in redo_latest_stage(db, llm, url, use_case, mode, stop_deploy, temp, max_tok, naming_prefix)
             ],
+            inputs=[demo_builder, llm_dropdown, url_input, industry_dropdown, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input, naming_prefix_input],
             outputs=[
                 ai_output_box,         # streaming content goes to AI Output tab
                 results_box,           # dynamic content based on stage/mode
         def save_settings_handler(
             email, ai_model, temperature, max_tokens, company_size, company_url, use_case, data_volume,
+            warehouse, database, ts_url, ts_username, batch_size, thread_count, naming_prefix
         ):
             """Save settings to Supabase database"""
             if not email or '@' not in email:
                     "thoughtspot_url": ts_url,
                     "thoughtspot_username": ts_username,
                     "batch_size": batch_size,
+                    "thread_count": thread_count,
+                    "naming_prefix": naming_prefix or ""
                 }
                 success = settings_client.save_all_settings(email, settings)
                     "",  # ts_username
                     5000,  # batch_size
                     4,  # thread_count
+                    "",  # naming_prefix
                     "Settings not loaded - enter valid email",  # status
                     "gpt-4o",  # llm_dropdown (main form AI Model)
                     ""  # url_input (main form Company URL)
                     settings.get("thoughtspot_username", ""),
                     int(settings.get("batch_size", 5000)),
                     int(settings.get("thread_count", 4)),
+                    settings.get("naming_prefix", ""),  # naming_prefix
                     f"✅ Settings loaded for {email}",
                     ai_model,  # llm_dropdown (same as default_ai_model)
                     company_url  # url_input (same as default_company_url)
                     "",
                     5000,
                     4,
+                    "",  # naming_prefix
                     f"❌ Error loading settings: {str(e)}"
                 ]
                 ts_instance_url,
                 ts_username,
                 batch_size_slider,
+                thread_count_slider,
+                naming_prefix_input
             ],
             outputs=[settings_status]
         )
                 ts_username,
                 batch_size_slider,
                 thread_count_slider,
+                naming_prefix_input,
                 settings_status,
                 llm_dropdown,  # Also update main form AI Model dropdown
                 url_input  # Also update main form Company URL

liveboard_creator.py CHANGED Viewed

@@ -1019,12 +1019,16 @@ Examples:
         text_content = viz_config.get('text_content', viz_config.get('name', ''))
         bg_color = viz_config.get('background_color', '#2E3D4D')  # Default dark background
-        # TEXT tiles in ThoughtSpot are simple structures
         text_tml = {
             'id': viz_config['id'],
             'answer': {
                 'name': viz_config.get('name', 'Text'),
                 'description': viz_config.get('description', ''),
                 'text_tile': {
                     'text': text_content,
                     'background_color': bg_color
@@ -1510,6 +1514,7 @@ Return ONLY a valid JSON object with structure:
                 # Debug: print response structure
                 print(f"   API Response type: {type(result)}")
                 if isinstance(result, list) and len(result) > 0:
                     print(f"   First item keys: {list(result[0].keys())}")
                     response_obj = result[0].get('response', {})
@@ -1517,12 +1522,20 @@ Return ONLY a valid JSON object with structure:
                     status_obj = response_obj.get('status', {})
                     print(f"   Status: {status_obj}")
-                    # If error, check if there's more info in the full result
                     if status_obj.get('status_code') == 'ERROR':
-                        print(f"   Full error details:")
                         import json
                         print(json.dumps(result[0], indent=2)[:2000])
                     # Navigate response structure
                     liveboard_id = result[0].get('response', {}).get('header', {}).get('id_guid')
                     if not liveboard_id:
@@ -1574,10 +1587,26 @@ Return ONLY a valid JSON object with structure:
                     'success': False,
                     'error': f"API returned status {response.status_code}: {response.text}"
                 }
         except Exception as e:
             return {
                 'success': False,
-                'error': str(e)
             }
@@ -1603,6 +1632,57 @@ def create_liveboard_from_model(
     Returns:
         Deployment result dictionary
     """
-    creator = LiveboardCreator(ts_client, model_id, model_name)
-    liveboard_tml = creator.create_liveboard_tml(company_data, use_case, num_visualizations)
-    return creator.deploy_liveboard(liveboard_tml)

         text_content = viz_config.get('text_content', viz_config.get('name', ''))
         bg_color = viz_config.get('background_color', '#2E3D4D')  # Default dark background
+        # TEXT tiles in ThoughtSpot need tables field even though they don't query data
         text_tml = {
             'id': viz_config['id'],
             'answer': {
                 'name': viz_config.get('name', 'Text'),
                 'description': viz_config.get('description', ''),
+                'tables': [{
+                    'id': self.model_name,
+                    'name': self.model_name
+                }],
                 'text_tile': {
                     'text': text_content,
                     'background_color': bg_color
                 # Debug: print response structure
                 print(f"   API Response type: {type(result)}")
+                status_obj = {}
                 if isinstance(result, list) and len(result) > 0:
                     print(f"   First item keys: {list(result[0].keys())}")
                     response_obj = result[0].get('response', {})
                     status_obj = response_obj.get('status', {})
                     print(f"   Status: {status_obj}")
+                    # Check for API errors first
                     if status_obj.get('status_code') == 'ERROR':
+                        error_msg = status_obj.get('error_message', 'Unknown error')
+                        print(f"   ❌ Liveboard creation failed: {error_msg}")
                         import json
+                        print(f"   Full error details:")
                         print(json.dumps(result[0], indent=2)[:2000])
+                        return {
+                            'success': False,
+                            'error': error_msg,
+                            'response': result
+                        }
                     # Navigate response structure
                     liveboard_id = result[0].get('response', {}).get('header', {}).get('id_guid')
                     if not liveboard_id:
                     'success': False,
                     'error': f"API returned status {response.status_code}: {response.text}"
                 }
+        except AttributeError as e:
+            import traceback
+            return {
+                'success': False,
+                'error': f'AttributeError in Liveboard deployment: {str(e)}',
+                'details': traceback.format_exc()
+            }
+        except KeyError as e:
+            import traceback
+            return {
+                'success': False,
+                'error': f'KeyError in Liveboard deployment: {str(e)}',
+                'details': traceback.format_exc()
+            }
         except Exception as e:
+            import traceback
             return {
                 'success': False,
+                'error': f'{type(e).__name__}: {str(e)}',
+                'details': traceback.format_exc()
             }
     Returns:
         Deployment result dictionary
     """
+    try:
+        # Validate inputs
+        if not model_id or not model_id.strip():
+            return {
+                'success': False,
+                'error': 'Invalid model_id: empty or None'
+            }
+        if not model_name or not model_name.strip():
+            return {
+                'success': False,
+                'error': 'Invalid model_name: empty or None'
+            }
+        # Create Liveboard creator
+        creator = LiveboardCreator(ts_client, model_id, model_name)
+        # Generate Liveboard TML
+        liveboard_tml = creator.create_liveboard_tml(company_data, use_case, num_visualizations)
+        if not liveboard_tml:
+            return {
+                'success': False,
+                'error': 'Failed to generate Liveboard TML: returned None or empty'
+            }
+        # Deploy Liveboard
+        result = creator.deploy_liveboard(liveboard_tml)
+        if not result:
+            return {
+                'success': False,
+                'error': 'deploy_liveboard returned None'
+            }
+        return result
+    except AttributeError as e:
+        return {
+            'success': False,
+            'error': f'AttributeError (possible missing field): {str(e)}'
+        }
+    except KeyError as e:
+        return {
+            'success': False,
+            'error': f'KeyError (missing required key): {str(e)}'
+        }
+    except Exception as e:
+        import traceback
+        return {
+            'success': False,
+            'error': f'Unexpected error: {type(e).__name__}: {str(e)}',
+            'traceback': traceback.format_exc()
+        }

supabase_client.py CHANGED Viewed

@@ -355,6 +355,7 @@ def load_gradio_settings(email: str) -> Dict[str, Any]:
         "default_data_volume": "Medium (10K rows)",
         "default_warehouse": "COMPUTE_WH",
         "default_database": "DEMO_DB",
         # ThoughtSpot Connection
         "thoughtspot_url": "",

         "default_data_volume": "Medium (10K rows)",
         "default_warehouse": "COMPUTE_WH",
         "default_database": "DEMO_DB",
+        "naming_prefix": "",  # Object naming prefix
         # ThoughtSpot Connection
         "thoughtspot_url": "",

thoughtspot_deployer.py CHANGED Viewed

@@ -84,13 +84,18 @@ class ThoughtSpotDeployer:
         return private_key_raw
-    def authenticate(self) -> bool:
-        """Authenticate with ThoughtSpot"""
         try:
             auth_url = f"{self.base_url}/api/rest/2.0/auth/session/login"
-            print(f"   🔐 Attempting authentication to: {auth_url}")
-            print(f"   👤 Username: {self.username}")
             response = self.session.post(
                 auth_url,
                 json={
@@ -98,31 +103,38 @@ class ThoughtSpotDeployer:
                     "password": self.password
                 }
             )
-            print(f"   📡 HTTP Status: {response.status_code}")
             if response.status_code in [200, 204]:  # Accept both 200 and 204 as success
                 if response.status_code == 200:
                     # Extract token from JSON response
                     result = response.json()
-                    print(f"   ✅ Response: {json.dumps(result, indent=2)}")
                     if 'token' in result:
                         self.headers['Authorization'] = f'Bearer {result["token"]}'
-                        print("   🎟️  Token extracted successfully")
                         return True
                     else:
-                        print("   ❌ No token in response")
                 elif response.status_code == 204:
                     # HTTP 204 No Content - authentication successful, use session cookies
-                    print("   ✅ Authentication successful (HTTP 204 - using session cookies)")
                     # For 204, we use session cookies instead of bearer token
                     return True
             else:
-                print(f"   ❌ HTTP Error {response.status_code}: {response.text}")
             return False
         except Exception as e:
-            print(f"   💥 Authentication exception: {e}")
             return False
     def parse_ddl(self, ddl: str) -> Tuple[Dict, List]:
@@ -318,33 +330,63 @@ class ThoughtSpotDeployer:
         joins = []
         table_name_upper = table_name.upper()
         table_cols = [col['name'].upper() for col in columns]
         # Find foreign key relationships
         for col_name in table_cols:
-            if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
-                # This looks like a foreign key - find the target table
-                potential_target = col_name[:-2] + 'S'  # CUSTOMERID -> CUSTOMERS
-                # Check if target table exists in THIS deployment AND it's not the same table
-                # IMPORTANT: Only create joins to tables in the same schema/connection
-                available_tables_upper = [t.upper() for t in all_tables.keys()]
-                if (potential_target in available_tables_upper and
-                    potential_target != table_name_upper):
-                    constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
-                    join_def = {
-                        'name': constraint_id,
-                        'destination': {
-                            'name': potential_target
-                        },
-                        'on': f"[{table_name_upper}::{col_name}] = [{potential_target}::{col_name}]",
-                        'type': 'INNER'
-                    }
-                    joins.append(join_def)
-                    print(f"   🔗 Generated join: {table_name_upper} -> {potential_target} on {col_name}")
-                else:
-                    if potential_target not in available_tables_upper and potential_target != table_name_upper:
-                        print(f"   ⏭️  Skipping join: {table_name_upper}.{col_name} -> {potential_target} (table not in this deployment)")
         return joins
     def create_connection_tml(self, connection_name: str) -> str:
@@ -503,21 +545,62 @@ class ThoughtSpotDeployer:
                 return True
         return False
-    def _is_surrogate_primary_key(self, col: Dict, col_name: str) -> bool:
-        """Check if column is a meaningless surrogate key (numeric ID)"""
-        # Common patterns: ID, _ID, ID_, ends with 'id'
-        if col_name.upper().endswith('ID'):
-            # Check if it's numeric (INT, BIGINT, NUMBER)
-            col_type = col.get('type', '').upper()
-            if any(t in col_type for t in ['INT', 'NUMBER', 'NUMERIC', 'BIGINT']):
-                return True
-        return False
     def _create_model_with_constraints(self, tables: Dict, foreign_keys: List, table_guids: Dict,
                                      table_constraints: Dict, model_name: str, connection_name: str) -> str:
         """Generate model TML with constraint references like our successful test"""
         print("   📋 Creating model with constraint references")
         # Build column name conflict tracking
         column_name_counts = {}
         for table_name, columns in tables.items():
@@ -526,7 +609,83 @@ class ThoughtSpotDeployer:
                 if col_name not in column_name_counts:
                     column_name_counts[col_name] = []
                 column_name_counts[col_name].append(table_name.upper())
         model = {
             'guid': None,
             'model': {
@@ -543,28 +702,28 @@ class ThoughtSpotDeployer:
                 }
             }
         }
-        # Add model_tables with FQNs and constraint-based joins
         for table_name in tables.keys():
             table_name_upper = table_name.upper()
             table_guid = table_guids.get(table_name_upper)
             table_entry = {
                 'name': table_name_upper,
                 'fqn': table_guid
             }
-            # Add joins if this table has constraints
-            if table_name_upper in table_constraints and table_constraints[table_name_upper]:
                 table_entry['joins'] = []
-                for constraint in table_constraints[table_name_upper]:
                     join_entry = {
-                        'with': constraint['destination'],
-                        'referencing_join': constraint['constraint_id']
                     }
                     table_entry['joins'].append(join_entry)
-                    print(f"   🔗 Added join: {table_name_upper} -> {constraint['destination']}")
             model['model']['model_tables'].append(table_entry)
         # Add columns with proper global conflict resolution (same as working version)
@@ -581,7 +740,8 @@ class ThoughtSpotDeployer:
                     continue
                 # SKIP surrogate primary keys (numeric IDs) - nobody searches "customer 23455"
-                if self._is_surrogate_primary_key(col, col_name):
                     print(f"   ⏭️  Skipping surrogate PK: {table_name_upper}.{col_name}")
                     continue
@@ -671,18 +831,65 @@ class ThoughtSpotDeployer:
             for col in columns:
                 col_name = col['name'].upper()
-                # Check if this looks like a foreign key (ends with ID but isn't the table's own ID)
-                if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
-                    # Infer the target table name (CUSTOMERID -> CUSTOMERS, LOCATIONID -> LOCATIONS)
-                    potential_target = col_name[:-2] + 'S'
-                    # Check if the target table exists in this schema
-                    if potential_target not in table_names_upper and potential_target != table_name_upper:
-                        warnings.append(
-                            f"⚠️  {table_name}.{col_name} appears to reference {potential_target}, "
-                            f"but {potential_target} is not in this schema. "
-                            f"The join will be skipped during deployment."
-                        )
         return warnings
@@ -1092,41 +1299,50 @@ class ThoughtSpotDeployer:
             print(f"   ⚠️  Could not create schema: {e}")
             print(f"   📝 Will proceed assuming schema exists or will be created by table operations")
-    def _generate_demo_names(self, company_name: str = None, use_case: str = None):
-        """Generate standardized demo names using DM convention"""
         from datetime import datetime
         import re
-        # Get timestamp components
         now = datetime.now()
-        yymmdd = now.strftime('%y%m%d')
-        hhmmss = now.strftime('%H%M%S')
-        # Clean and truncate company name (5 chars)
-        if company_name:
-            company_clean = re.sub(r'[^a-zA-Z0-9]', '', company_name.upper())[:5]
         else:
-            company_clean = 'DEMO'[:5]
-        # Clean and truncate use case (3 chars)
-        if use_case:
-            usecase_clean = re.sub(r'[^a-zA-Z0-9]', '', use_case.upper())[:3]
         else:
-            usecase_clean = 'GEN'[:3]
-        # Generate names
-        base_name = f"DM{yymmdd}_{hhmmss}_{company_clean}_{usecase_clean}"
         return {
-            'schema': base_name,
-            'connection': f"{base_name}_conn",
-            'model': f"{base_name}_model",
-            'base': base_name
         }
-    def deploy_all(self, ddl: str, database: str, schema: str,
-                  connection_name: str = None, company_name: str = None,
-                  use_case: str = None, progress_callback=None) -> Dict:
         """
         Deploy complete data model to ThoughtSpot
@@ -1148,12 +1364,29 @@ class ThoughtSpotDeployer:
         }
         table_guids = {}  # Store table GUIDs for model creation
         def log_progress(message):
-            """Helper to log progress both to console and callback"""
             print(message, flush=True)
             if progress_callback:
                 progress_callback(message)
         try:
             # STEP 0: Authenticate first!
@@ -1179,10 +1412,20 @@ class ThoughtSpotDeployer:
             else:
                 log_progress("✅ All foreign key references are valid\n")
-            # Step 1: Create connection using new naming convention
-            demo_names = self._generate_demo_names(company_name, use_case)
             if not connection_name:
-                connection_name = demo_names['connection']
             log_progress("1️⃣ Checking/Creating connection...")
             log_progress(f"   Connection name: {connection_name}")
@@ -1276,111 +1519,130 @@ class ThoughtSpotDeployer:
             log_progress("\n2️⃣ Creating tables...")
             # PHASE 1: Create all tables WITHOUT joins (to ensure all tables exist first)
-            log_progress("   📋 Phase 1: Creating tables without joins...")
-            for table_name, columns in tables.items():
-                import time
-                start_time = time.time()
-                log_progress(f"   🔄 Creating table: {table_name.upper()} (no joins)...")
-                # Create table TML WITHOUT joins_with section (pass None for all_tables)
                 tml_start = time.time()
                 table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
                 tml_time = time.time() - tml_start
-                log_progress(f"      📝 TML generation took: {tml_time:.2f} seconds")
-                # Log the size of the TML
-                log_progress(f"      📏 TML size: {len(table_tml)} characters, {len(columns)} columns")
-                # Make the API call
-                api_start = time.time()
-                log_progress(f"      🌐 Sending to ThoughtSpot API...")
-                response = self.session.post(
-                    f"{self.base_url}/api/rest/2.0/metadata/tml/import",
-                    json={
-                        "metadata_tmls": [table_tml],
-                        "import_policy": "PARTIAL",
-                        "create_new": True
-                    }
-                )
-                api_time = time.time() - api_start
-                log_progress(f"      ⏱️  API call took: {api_time:.2f} seconds")
-                if response.status_code == 200:
-                    result = response.json()
-                    # Handle both response formats (list or dict with 'object' key)
-                    if isinstance(result, list):
-                        objects = result
-                    elif isinstance(result, dict) and 'object' in result:
-                        objects = result['object']
-                    else:
-                        error = f"Table {table_name} failed: Unexpected response format: {type(result)}"
-                        log_progress(f"   ❌ {error}")
-                        results['errors'].append(error)
-                        continue
-                    if objects and len(objects) > 0:
-                        obj = objects[0]
                         if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
                             table_guid = obj.get('response', {}).get('header', {}).get('id_guid')
-                            total_time = time.time() - start_time
-                            log_progress(f"   ✅ Table created: {table_name.upper()} (Total time: {total_time:.2f} seconds)")
-                            log_progress(f"      GUID: {table_guid}")
-                            results['tables'].append(table_name.upper())
-                            table_guids[table_name.upper()] = table_guid
                         else:
-                            error = f"Table {table_name} failed: {obj.get('response', {}).get('status', {}).get('error_message')}"
                             log_progress(f"   ❌ {error}")
                             results['errors'].append(error)
-                            # DON'T return - continue creating other tables
-                    else:
-                        error = f"Table {table_name} failed: No object in response"
-                        log_progress(f"   ❌ {error}")
-                        results['errors'].append(error)
                 else:
-                    error = f"Table {table_name} HTTP error: {response.status_code} - {response.text}"
                     log_progress(f"   ❌ {error}")
                     results['errors'].append(error)
             # Check if we created any tables successfully
             if not table_guids:
                 log_progress("   ❌ No tables were created successfully in Phase 1")
                 return results
-            log_progress(f"   ✅ Phase 1 complete: {len(table_guids)} tables created")
-            # PHASE 2: Update tables WITH joins (now that all tables exist)
-            log_progress("\n   📋 Phase 2: Adding joins to tables...")
             for table_name, columns in tables.items():
-                # Only add joins if the table was created successfully in Phase 1
                 table_name_upper = table_name.upper()
                 if table_name_upper not in table_guids:
                     log_progress(f"   ⏭️  Skipping joins for {table_name_upper} (table creation failed)")
                     continue
-                # Get the GUID for this table
                 table_guid = table_guids[table_name_upper]
-                log_progress(f"   🔗 Adding joins to: {table_name_upper}...")
                 # Create table TML WITH joins_with section AND the table GUID
                 table_tml = self.create_table_tml(
                     table_name, columns, connection_name, database, schema,
                     all_tables=tables, table_guid=table_guid
                 )
                 response = self.session.post(
                     f"{self.base_url}/api/rest/2.0/metadata/tml/import",
                     json={
-                        "metadata_tmls": [table_tml],
                         "import_policy": "PARTIAL",
-                        "create_new": False  # Update existing table
                     }
                 )
                 if response.status_code == 200:
                     result = response.json()
-                    # Handle both response formats (list or dict with 'object' key)
                     if isinstance(result, list):
                         objects = result
                     elif isinstance(result, dict) and 'object' in result:
@@ -1389,21 +1651,23 @@ class ThoughtSpotDeployer:
                         log_progress(f"   ⚠️  Unexpected response format for joins: {type(result)}")
                         objects = []
                     if objects and len(objects) > 0:
-                        obj = objects[0]
-                        if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
-                            log_progress(f"   ✅ Joins added: {table_name.upper()}")
-                        else:
-                            error = f"Adding joins to {table_name} failed: {obj.get('response', {}).get('status', {}).get('error_message')}"
-                            log_progress(f"   ⚠️  {error}")
-                            results['errors'].append(error)
-                            # Don't fail - table still exists without joins
                     else:
-                        log_progress(f"   ⚠️  Could not add joins to {table_name.upper()}")
                 else:
-                    log_progress(f"   ⚠️  HTTP error adding joins to {table_name.upper()}: {response.status_code}")
-            log_progress(f"   ✅ Phase 2 complete: Joins processed for all tables")
             actual_constraint_ids = {}  # We'll generate these for the model
             # Skip separate relationship creation for now
@@ -1412,8 +1676,9 @@ class ThoughtSpotDeployer:
             # Step 3: Extract constraint IDs from created tables
             log_progress("\n2️⃣.5 Extracting constraint IDs from created tables...")
             table_constraints = {}
             for table_name, table_guid in table_guids.items():
                 log_progress(f"   🔍 Getting constraint IDs for {table_name}...")
@@ -1446,18 +1711,32 @@ class ThoughtSpotDeployer:
                                         'destination': destination
                                     })
                                     log_progress(f"      🔗 Found join: {constraint_id} -> {destination}")
             log_progress(f"   ✅ Extracted constraints from {len(table_constraints)} tables")
             # Step 4: Create model (semantic layer) with constraint references
             log_progress("\n3️⃣ Creating model (semantic layer) with joins...")
-            # Use the demo_names that were generated earlier
-            model_name = demo_names['model']
             log_progress(f"   Model name: {model_name}")
             # Use the enhanced model creation that includes constraint references
             model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
             print(f"\n📄 Model TML being sent:\n{model_tml}")
             response = self.session.post(
                 f"{self.base_url}/api/rest/2.0/metadata/tml/import",
@@ -1467,7 +1746,9 @@ class ThoughtSpotDeployer:
                     "create_new": True
                 }
             )
             if response.status_code == 200:
                 result = response.json()
@@ -1485,6 +1766,8 @@ class ThoughtSpotDeployer:
                 if objects and len(objects) > 0:
                     if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
                         model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
                         log_progress(f"   ✅ Model created successfully!")
                         log_progress(f"      Model: {model_name}")
                         log_progress(f"      GUID: {model_guid}")
@@ -1546,13 +1829,39 @@ class ThoughtSpotDeployer:
                             traceback.print_exc()
                     else:
                         print(f"📋 Full model response: {objects}")  # DEBUG: Show full response
-                        error = f"Model failed: {objects[0].get('response', {}).get('status', {}).get('error_message')}"
-                        print(f"   ❌ {error}")
                         results['errors'].append(error)
                 else:
                     error = "Model failed: No objects in response"
                     log_progress(f"   ❌ {error}")
                     results['errors'].append(error)
             # Mark as successful if we got this far
             results['success'] = len(results['errors']) == 0
@@ -1560,29 +1869,43 @@ class ThoughtSpotDeployer:
         except Exception as e:
             error_msg = str(e)
             print(f"❌ Deployment failed: {error_msg}")
             results['errors'].append(error_msg)
         return results
-def deploy_to_thoughtspot(ddl: str, database: str, schema: str,
-                         connection_name: str = None, company_name: str = None,
-                         use_case: str = None, progress_callback=None) -> Dict:
     """
     Convenience function for deploying to ThoughtSpot
     Args:
         ddl: Data Definition Language statements
         database: Target database name
         schema: Target schema name
         connection_name: Optional connection name
         progress_callback: Optional callback for progress updates
     Returns:
         Dict with deployment results
     """
     deployer = ThoughtSpotDeployer()
-    return deployer.deploy_all(ddl, database, schema, connection_name, company_name, use_case, progress_callback)
 if __name__ == "__main__":

         return private_key_raw
+    def authenticate(self, verbose: bool = False) -> bool:
+        """Authenticate with ThoughtSpot
+        Args:
+            verbose: If True, print detailed authentication info. Default False for clean output.
+        """
         try:
             auth_url = f"{self.base_url}/api/rest/2.0/auth/session/login"
+            if verbose:
+                print(f"   🔐 Attempting authentication to: {auth_url}")
+                print(f"   👤 Username: {self.username}")
             response = self.session.post(
                 auth_url,
                 json={
                     "password": self.password
                 }
             )
+            if verbose:
+                print(f"   📡 HTTP Status: {response.status_code}")
             if response.status_code in [200, 204]:  # Accept both 200 and 204 as success
                 if response.status_code == 200:
                     # Extract token from JSON response
                     result = response.json()
+                    if verbose:
+                        print(f"   ✅ Response: {json.dumps(result, indent=2)}")
                     if 'token' in result:
                         self.headers['Authorization'] = f'Bearer {result["token"]}'
+                        if verbose:
+                            print("   🎟️  Token extracted successfully")
                         return True
                     else:
+                        if verbose:
+                            print("   ❌ No token in response")
                 elif response.status_code == 204:
                     # HTTP 204 No Content - authentication successful, use session cookies
+                    if verbose:
+                        print("   ✅ Authentication successful (HTTP 204 - using session cookies)")
                     # For 204, we use session cookies instead of bearer token
                     return True
             else:
+                if verbose:
+                    print(f"   ❌ HTTP Error {response.status_code}: {response.text}")
             return False
         except Exception as e:
+            if verbose:
+                print(f"   💥 Authentication exception: {e}")
             return False
     def parse_ddl(self, ddl: str) -> Tuple[Dict, List]:
         joins = []
         table_name_upper = table_name.upper()
         table_cols = [col['name'].upper() for col in columns]
+        available_tables_upper = [t.upper() for t in all_tables.keys()]
         # Find foreign key relationships
         for col_name in table_cols:
+            if not col_name.endswith('ID'):
+                continue
+            # Extract the base name from the column (CUSTOMER_ID -> CUSTOMER, CUSTOMERID -> CUSTOMER)
+            if col_name.endswith('_ID'):
+                col_base = col_name[:-3]  # Remove _ID
+            else:
+                col_base = col_name[:-2]  # Remove ID
+            # Check if this is the table's own primary key
+            if table_name_upper.endswith('S') and len(table_name_upper) > 1:
+                table_base = table_name_upper[:-1]  # CUSTOMERS -> CUSTOMER, SALES_TRANSACTIONS -> SALES_TRANSACTION
+            else:
+                table_base = table_name_upper
+            table_base_alt = table_name_upper.rstrip('S')
+            # Skip if this is the table's own PK
+            is_own_pk = (col_base == table_name_upper or
+                         col_base == table_base or
+                         col_base == table_base_alt)
+            if is_own_pk:
+                continue
+            # Try to find the target table (CUSTOMER -> CUSTOMERS)
+            possible_targets = [
+                col_base + 'S',   # CUSTOMER -> CUSTOMERS
+                col_base + 'ES',  # Less common
+                col_base          # Already plural
+            ]
+            found_target = None
+            for potential_target in possible_targets:
+                if potential_target in available_tables_upper and potential_target != table_name_upper:
+                    found_target = potential_target
+                    break
+            if found_target:
+                constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
+                join_def = {
+                    'name': constraint_id,
+                    'destination': {
+                        'name': found_target
+                    },
+                    'on': f"[{table_name_upper}::{col_name}] = [{found_target}::{col_name}]",
+                    'type': 'INNER'
+                }
+                joins.append(join_def)
+                print(f"   🔗 Generated join: {table_name_upper} -> {found_target} on {col_name}")
+            else:
+                print(f"   ⏭️  Skipping join: {table_name_upper}.{col_name} -> {possible_targets[0]} (table not in this deployment)")
         return joins
     def create_connection_tml(self, connection_name: str) -> str:
                 return True
         return False
+    def _is_surrogate_primary_key(self, col: Dict, col_name: str, table_name: str = None) -> bool:
+        """Check if column is a meaningless surrogate key (numeric ID) for THIS table.
+        Foreign key columns (like CUSTOMER_ID in SALES_TRANSACTIONS) should NOT be
+        treated as surrogate keys - they're important for joins.
+        """
+        col_name_upper = col_name.upper()
+        # Must end with ID and be numeric
+        if not col_name_upper.endswith('ID'):
+            return False
+        col_type = col.get('type', '').upper()
+        if not any(t in col_type for t in ['INT', 'NUMBER', 'NUMERIC', 'BIGINT']):
+            return False
+        # If we don't have table context, be conservative
+        if not table_name:
+            return False
+        table_name_upper = table_name.upper()
+        # Extract the base name from the column (CUSTOMER_ID -> CUSTOMER, CUSTOMERID -> CUSTOMER)
+        if col_name_upper.endswith('_ID'):
+            col_base = col_name_upper[:-3]  # Remove _ID
+        else:
+            col_base = col_name_upper[:-2]  # Remove ID
+        # Extract the base name from the table (CUSTOMERS -> CUSTOMER, SALES_TRANSACTIONS -> SALES_TRANSACTION)
+        # Handle plural table names
+        if table_name_upper.endswith('S') and len(table_name_upper) > 1:
+            table_base = table_name_upper[:-1]  # CUSTOMERS -> CUSTOMER
+        else:
+            table_base = table_name_upper
+        # Also try removing all trailing 'S' characters for cases like SALES -> SALE
+        table_base_alt = table_name_upper.rstrip('S')
+        # It's a surrogate PK only if the column name matches the table name
+        # Examples:
+        #   CUSTOMER_ID in CUSTOMERS table -> surrogate PK (skip)
+        #   CUSTOMER_ID in SALES_TRANSACTIONS table -> foreign key (DO NOT skip)
+        is_own_pk = (col_base == table_name_upper or
+                     col_base == table_base or
+                     col_base == table_base_alt or
+                     col_name_upper == f"{table_name_upper}_ID" or
+                     col_name_upper == f"{table_base}_ID" or
+                     col_name_upper == f"{table_base_alt}_ID")
+        return is_own_pk
     def _create_model_with_constraints(self, tables: Dict, foreign_keys: List, table_guids: Dict,
                                      table_constraints: Dict, model_name: str, connection_name: str) -> str:
         """Generate model TML with constraint references like our successful test"""
         print("   📋 Creating model with constraint references")
         # Build column name conflict tracking
         column_name_counts = {}
         for table_name, columns in tables.items():
                 if col_name not in column_name_counts:
                     column_name_counts[col_name] = []
                 column_name_counts[col_name].append(table_name.upper())
+        # INFER JOINS: Build joins from foreign key column names
+        # Structure: {source_table: [{target: target_table, constraint_id: ID}, ...]}
+        inferred_joins = {}
+        table_names_upper = [t.upper() for t in tables.keys()]
+        for table_name, columns in tables.items():
+            table_name_upper = table_name.upper()
+            for col in columns:
+                col_name = col['name'].upper()
+                # Check if this looks like a foreign key (ends with ID or _ID but isn't the table's own ID)
+                if col_name.endswith('ID'):
+                    # Infer the target table name (CUSTOMERID -> CUSTOMERS, CUSTOMER_ID -> CUSTOMERS)
+                    if col_name.endswith('_ID'):
+                        col_base = col_name[:-3]  # CUSTOMER_ID -> CUSTOMER
+                    else:
+                        col_base = col_name[:-2]  # CUSTOMERID -> CUSTOMER
+                    # Check if this is the table's own primary key
+                    # Handle plural table names: CUSTOMERS has CUSTOMER_ID, not CUSTOMERS_ID
+                    if table_name_upper.endswith('S') and len(table_name_upper) > 1:
+                        table_base = table_name_upper[:-1]  # CUSTOMERS -> CUSTOMER
+                    else:
+                        table_base = table_name_upper
+                    table_base_alt = table_name_upper.rstrip('S')  # Alternative: strip all trailing S
+                    is_self_reference = (col_base == table_name_upper or
+                                         col_base == table_base or
+                                         col_base == table_base_alt)
+                    # Skip if this is the table's own PK
+                    if is_self_reference:
+                        continue
+                    # Try multiple potential target table names
+                    potential_targets = [
+                        col_base + 'S',   # CUSTOMER -> CUSTOMERS
+                        col_base + 'ES',  # Less common but possible
+                        col_base          # Already plural
+                    ]
+                    # Find the first matching target table
+                    found_target = None
+                    for potential_target in potential_targets:
+                        if potential_target in table_names_upper and potential_target != table_name_upper:
+                            found_target = potential_target
+                            break
+                    # Create join if we found a valid target
+                    if found_target:
+                        if table_name_upper not in inferred_joins:
+                            inferred_joins[table_name_upper] = []
+                        # Look up the REAL constraint ID from table_constraints first
+                        constraint_id = None
+                        if table_name_upper in table_constraints:
+                            for constraint in table_constraints[table_name_upper]:
+                                if constraint['destination'] == found_target:
+                                    constraint_id = constraint['constraint_id']
+                                    print(f"   ✅ Using real constraint ID from ThoughtSpot: {constraint_id}")
+                                    break
+                        # Fallback: generate synthetic ID if not found (shouldn't happen if Phase 2 worked)
+                        if not constraint_id:
+                            constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
+                            print(f"   ⚠️  Generated synthetic constraint ID (real ID not found): {constraint_id[:40]}...")
+                        inferred_joins[table_name_upper].append({
+                            'target': found_target,
+                            'constraint_id': constraint_id,
+                            'source_column': col_name
+                        })
+                        print(f"   🔗 Inferred join: {table_name_upper}.{col_name} -> {found_target}")
         model = {
             'guid': None,
             'model': {
                 }
             }
         }
+        # Add model_tables with FQNs and inferred joins
         for table_name in tables.keys():
             table_name_upper = table_name.upper()
             table_guid = table_guids.get(table_name_upper)
             table_entry = {
                 'name': table_name_upper,
                 'fqn': table_guid
             }
+            # Add joins from inferred relationships
+            if table_name_upper in inferred_joins and inferred_joins[table_name_upper]:
                 table_entry['joins'] = []
+                for join_info in inferred_joins[table_name_upper]:
                     join_entry = {
+                        'with': join_info['target'],
+                        'referencing_join': join_info['constraint_id']
                     }
                     table_entry['joins'].append(join_entry)
+                    print(f"   ✅ Added join to model: {table_name_upper} -> {join_info['target']}")
             model['model']['model_tables'].append(table_entry)
         # Add columns with proper global conflict resolution (same as working version)
                     continue
                 # SKIP surrogate primary keys (numeric IDs) - nobody searches "customer 23455"
+                # Pass table_name to distinguish PKs from FKs
+                if self._is_surrogate_primary_key(col, col_name, table_name_upper):
                     print(f"   ⏭️  Skipping surrogate PK: {table_name_upper}.{col_name}")
                     continue
             for col in columns:
                 col_name = col['name'].upper()
+                # Check if this looks like a foreign key (ends with ID or _ID but isn't the table's own ID)
+                if not col_name.endswith('ID'):
+                    continue
+                # Extract the base name from the column
+                if col_name.endswith('_ID'):
+                    col_base = col_name[:-3]  # CUSTOMER_ID -> CUSTOMER
+                else:
+                    col_base = col_name[:-2]  # CUSTOMERID -> CUSTOMER
+                # Extract the base name from the table to check if this is the table's own PK
+                if table_name_upper.endswith('S') and len(table_name_upper) > 1:
+                    table_base = table_name_upper[:-1]  # CUSTOMERS -> CUSTOMER
+                else:
+                    table_base = table_name_upper
+                table_base_alt = table_name_upper.rstrip('S')
+                # For compound names like SALES_TRANSACTIONS, also check the last word
+                # SALES_TRANSACTIONS -> TRANSACTION
+                if '_' in table_name_upper:
+                    last_part = table_name_upper.split('_')[-1]
+                    if last_part.endswith('S'):
+                        table_last_part = last_part[:-1]
+                    else:
+                        table_last_part = last_part
+                else:
+                    table_last_part = None
+                # Skip if this is the table's own primary key
+                is_own_pk = (col_base == table_name_upper or
+                             col_base == table_base or
+                             col_base == table_base_alt or
+                             (table_last_part and col_base == table_last_part))
+                if is_own_pk:
+                    continue
+                # Try to find the target table (CUSTOMER -> CUSTOMERS)
+                # Try multiple plural forms
+                possible_targets = [
+                    col_base + 'S',      # CUSTOMER -> CUSTOMERS
+                    col_base + 'ES',     # LOCATION -> LOCATIONES (unlikely but possible)
+                    col_base            # Already plural or no 'S' form
+                ]
+                found_target = None
+                for target in possible_targets:
+                    if target in table_names_upper and target != table_name_upper:
+                        found_target = target
+                        break
+                # Only warn if we can't find ANY matching table
+                if not found_target:
+                    warnings.append(
+                        f"⚠️  {table_name}.{col_name} appears to reference {possible_targets[0]}, "
+                        f"but {possible_targets[0]} is not in this schema. "
+                        f"The join will be skipped during deployment."
+                    )
         return warnings
             print(f"   ⚠️  Could not create schema: {e}")
             print(f"   📝 Will proceed assuming schema exists or will be created by table operations")
+    def _generate_demo_names(self, company_name: str = None, use_case: str = None, naming_prefix: str = None):
+        """Generate standardized demo names using consistent format
+        Args:
+            naming_prefix: Optional custom prefix
+                          Format: {prefix}_{day+ms}_{objtype}
+                          Example with prefix: BOONE_14123456_scm
+                          Example blank prefix: _14123456_scm
+        Note: company_name and use_case parameters deprecated but kept for compatibility
+        """
         from datetime import datetime
         import re
         now = datetime.now()
+        # Clean prefix (uppercase, alphanumeric only) - can be empty string
+        if naming_prefix:
+            prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
         else:
+            prefix_clean = ""
+        # Short unique timestamp: day (2 digits) + microseconds (6 digits)
+        day = now.strftime('%d')
+        microsec = now.strftime('%f')  # 6 digits
+        short_timestamp = f"{day}{microsec}"
+        # Base format: PREFIX_DDMICROS (prefix can be empty)
+        if prefix_clean:
+            base = f"{prefix_clean}_{short_timestamp}"
         else:
+            base = short_timestamp  # Just timestamp if no prefix
         return {
+            'schema': f"{base}_scm",      # Schema
+            'connection': f"{base}_conn",  # Connection
+            'model': f"{base}_mdl",       # Model
+            'table': f"{base}_tbl",       # Table (if needed)
+            'base': base
         }
+    def deploy_all(self, ddl: str, database: str, schema: str,
+                  connection_name: str = None, company_name: str = None,
+                  use_case: str = None, naming_prefix: str = None, progress_callback=None) -> Dict:
         """
         Deploy complete data model to ThoughtSpot
         }
         table_guids = {}  # Store table GUIDs for model creation
+        # Setup deployment log file
+        from pathlib import Path
+        import datetime as dt
+        log_dir = Path("logs") / "deployments"
+        log_dir.mkdir(parents=True, exist_ok=True)
+        timestamp_str = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
+        deployment_log_file = log_dir / f"deployment_{schema}_{timestamp_str}.log"
+        deployment_log = open(deployment_log_file, "w", buffering=1)  # Line buffered
+        deployment_log.write(f"=== ThoughtSpot Deployment Log ===\n")
+        deployment_log.write(f"Schema: {schema}\n")
+        deployment_log.write(f"Database: {database}\n")
+        deployment_log.write(f"Timestamp: {dt.datetime.now().isoformat()}\n")
+        deployment_log.write(f"=" * 50 + "\n\n")
         def log_progress(message):
+            """Helper to log progress to console, callback, AND file"""
             print(message, flush=True)
             if progress_callback:
                 progress_callback(message)
+            # ALWAYS write to log file
+            deployment_log.write(message + "\n")
+            deployment_log.flush()
         try:
             # STEP 0: Authenticate first!
             else:
                 log_progress("✅ All foreign key references are valid\n")
+            # Step 1: Extract base from schema name to ensure consistency
+            # Schema format: PREFIX_DDMICROS_scm (e.g., AMAZ_14283952_scm)
+            # We need to use the SAME base for all ThoughtSpot objects
+            import re
+            if schema.endswith('_scm'):
+                base_from_schema = schema[:-4]  # Remove "_scm" suffix
+            else:
+                # Fallback: generate new names if schema doesn't follow convention
+                demo_names = self._generate_demo_names(company_name, use_case, naming_prefix)
+                base_from_schema = demo_names['base']
+            # Use the schema's base for all ThoughtSpot objects
             if not connection_name:
+                connection_name = f"{base_from_schema}_conn"
             log_progress("1️⃣ Checking/Creating connection...")
             log_progress(f"   Connection name: {connection_name}")
             log_progress("\n2️⃣ Creating tables...")
             # PHASE 1: Create all tables WITHOUT joins (to ensure all tables exist first)
+            log_progress("   📋 Phase 1: Creating ALL tables in one batch (no joins)...")
+            import time
+            # Step 1.1: Generate TMLs for all tables
+            batch_start = time.time()
+            table_tmls = []
+            table_names_list = []
+            for table_name, columns in tables.items():
                 tml_start = time.time()
                 table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
                 tml_time = time.time() - tml_start
+                table_tmls.append(table_tml)
+                table_names_list.append(table_name.upper())
+                log_progress(f"   📝 Generated TML for {table_name.upper()} ({tml_time:.3f}s, {len(table_tml)} chars, {len(columns)} columns)")
+            # Step 1.2: Send ALL tables in ONE API call
+            log_progress(f"\n   🚀 Batch creating {len(table_tmls)} tables...")
+            api_start = time.time()
+            response = self.session.post(
+                f"{self.base_url}/api/rest/2.0/metadata/tml/import",
+                json={
+                    "metadata_tmls": table_tmls,  # Send ALL tables at once!
+                    "import_policy": "PARTIAL",
+                    "create_new": True
+                }
+            )
+            api_time = time.time() - api_start
+            total_time = time.time() - batch_start
+            log_progress(f"   ⏱️  Phase 1 batch call: {api_time:.2f} seconds")
+            # Step 1.3: Process batch response
+            if response.status_code == 200:
+                result = response.json()
+                # Handle both response formats
+                if isinstance(result, list):
+                    objects = result
+                elif isinstance(result, dict) and 'object' in result:
+                    objects = result['object']
+                else:
+                    error = f"Batch table creation failed: Unexpected response format: {type(result)}"
+                    log_progress(f"   ❌ {error}")
+                    results['errors'].append(error)
+                    return results
+                # Process each table result
+                if objects and len(objects) > 0:
+                    for idx, obj in enumerate(objects):
+                        table_name = table_names_list[idx] if idx < len(table_names_list) else f"Table_{idx}"
                         if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
                             table_guid = obj.get('response', {}).get('header', {}).get('id_guid')
+                            log_progress(f"   ✅ {table_name} created (GUID: {table_guid})")
+                            results['tables'].append(table_name)
+                            table_guids[table_name] = table_guid
                         else:
+                            error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
+                            error = f"{table_name} failed: {error_msg}"
                             log_progress(f"   ❌ {error}")
                             results['errors'].append(error)
                 else:
+                    error = "Batch table creation failed: No objects in response"
                     log_progress(f"   ❌ {error}")
                     results['errors'].append(error)
+                    return results
+            else:
+                error = f"Batch table creation HTTP error: {response.status_code}"
+                log_progress(f"   ❌ {error}")
+                log_progress(f"   📄 Response: {response.text[:500]}")
+                results['errors'].append(error)
+                return results
             # Check if we created any tables successfully
             if not table_guids:
                 log_progress("   ❌ No tables were created successfully in Phase 1")
                 return results
+            log_progress(f"\n   ✅ Phase 1 complete: {len(table_guids)} tables created in {total_time:.2f} seconds")
+            # PHASE 2: Update tables WITH joins (now that all tables exist) - BATCH MODE
+            log_progress("\n   📋 Phase 2: Adding joins to ALL tables in one batch...")
+            phase2_start = time.time()
+            # Generate TML for all tables with joins
+            table_tmls_with_joins = []
+            table_names_for_phase2 = []
             for table_name, columns in tables.items():
                 table_name_upper = table_name.upper()
                 if table_name_upper not in table_guids:
                     log_progress(f"   ⏭️  Skipping joins for {table_name_upper} (table creation failed)")
                     continue
                 table_guid = table_guids[table_name_upper]
                 # Create table TML WITH joins_with section AND the table GUID
                 table_tml = self.create_table_tml(
                     table_name, columns, connection_name, database, schema,
                     all_tables=tables, table_guid=table_guid
                 )
+                table_tmls_with_joins.append(table_tml)
+                table_names_for_phase2.append(table_name_upper)
+            # Single batch API call to update all tables with joins
+            if table_tmls_with_joins:
+                log_progress(f"   🚀 Batch updating {len(table_tmls_with_joins)} tables with joins...")
+                api_start = time.time()
                 response = self.session.post(
                     f"{self.base_url}/api/rest/2.0/metadata/tml/import",
                     json={
+                        "metadata_tmls": table_tmls_with_joins,
                         "import_policy": "PARTIAL",
+                        "create_new": False  # Update existing tables
                     }
                 )
+                phase2_api_time = time.time() - api_start
+                log_progress(f"   ⏱️  Phase 2 batch call: {phase2_api_time:.2f} seconds")
                 if response.status_code == 200:
                     result = response.json()
+                    # Handle both response formats
                     if isinstance(result, list):
                         objects = result
                     elif isinstance(result, dict) and 'object' in result:
                         log_progress(f"   ⚠️  Unexpected response format for joins: {type(result)}")
                         objects = []
+                    # Process each table result
                     if objects and len(objects) > 0:
+                        for idx, obj in enumerate(objects):
+                            table_name = table_names_for_phase2[idx] if idx < len(table_names_for_phase2) else f"Table_{idx}"
+                            if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
+                                log_progress(f"   ✅ Joins added: {table_name}")
+                            else:
+                                error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
+                                log_progress(f"   ⚠️  Joins failed for {table_name}: {error_msg}")
+                                results['errors'].append(f"Adding joins to {table_name} failed: {error_msg}")
                     else:
+                        log_progress(f"   ⚠️  No results returned for join updates")
                 else:
+                    log_progress(f"   ⚠️  HTTP error adding joins: {response.status_code}")
+                    log_progress(f"   📄 Response: {response.text[:500]}")
+            log_progress(f"   ✅ Phase 2 complete: Joins added to all tables in one batch")
             actual_constraint_ids = {}  # We'll generate these for the model
             # Skip separate relationship creation for now
             # Step 3: Extract constraint IDs from created tables
             log_progress("\n2️⃣.5 Extracting constraint IDs from created tables...")
+            extract_start = time.time()
             table_constraints = {}
             for table_name, table_guid in table_guids.items():
                 log_progress(f"   🔍 Getting constraint IDs for {table_name}...")
                                         'destination': destination
                                     })
                                     log_progress(f"      🔗 Found join: {constraint_id} -> {destination}")
+            extract_time = time.time() - extract_start
+            log_progress(f"   ⏱️  Constraint extraction time: {extract_time:.2f} seconds")
             log_progress(f"   ✅ Extracted constraints from {len(table_constraints)} tables")
             # Step 4: Create model (semantic layer) with constraint references
             log_progress("\n3️⃣ Creating model (semantic layer) with joins...")
+            model_start = time.time()
+            # Use the same base from schema for model name
+            model_name = f"{base_from_schema}_mdl"
             log_progress(f"   Model name: {model_name}")
             # Use the enhanced model creation that includes constraint references
             model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
             print(f"\n📄 Model TML being sent:\n{model_tml}")
+            # ALWAYS save model TML to logs for debugging
+            from pathlib import Path
+            import datetime as dt
+            log_dir = Path("logs") / "model_tmls"
+            log_dir.mkdir(parents=True, exist_ok=True)
+            timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
+            tml_file = log_dir / f"model_{model_name}_{timestamp}.tml"
+            with open(tml_file, "w") as f:
+                f.write(model_tml)
+            log_progress(f"   📁 Model TML saved to: {tml_file}")
             response = self.session.post(
                 f"{self.base_url}/api/rest/2.0/metadata/tml/import",
                     "create_new": True
                 }
             )
+            log_progress(f"   📡 Model API response status: {response.status_code}")
             if response.status_code == 200:
                 result = response.json()
                 if objects and len(objects) > 0:
                     if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
                         model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
+                        model_time = time.time() - model_start
+                        log_progress(f"   ⏱️  Model creation time: {model_time:.2f} seconds")
                         log_progress(f"   ✅ Model created successfully!")
                         log_progress(f"      Model: {model_name}")
                         log_progress(f"      GUID: {model_guid}")
                             traceback.print_exc()
                     else:
                         print(f"📋 Full model response: {objects}")  # DEBUG: Show full response
+                        status_obj = objects[0].get('response', {}).get('status', {})
+                        error_msg = status_obj.get('error_message', 'Unknown error')
+                        status_code = status_obj.get('status_code', 'Unknown')
+                        error = f"Model failed: {error_msg} (status: {status_code})"
+                        log_progress(f"   ❌ {error}")
+                        if 'error' in status_obj:
+                            log_progress(f"   📋 Additional error details: {status_obj['error']}")
+                        # Save full error response to log file
+                        error_log_dir = Path("logs") / "model_errors"
+                        error_log_dir.mkdir(parents=True, exist_ok=True)
+                        error_file = error_log_dir / f"model_error_{model_name}_{timestamp}.json"
+                        import json
+                        with open(error_file, "w") as f:
+                            json.dump(objects, f, indent=2)
+                        log_progress(f"   📁 Full error saved to: {error_file}")
                         results['errors'].append(error)
                 else:
                     error = "Model failed: No objects in response"
                     log_progress(f"   ❌ {error}")
                     results['errors'].append(error)
+            else:
+                # Handle non-200 responses
+                error = f"Model API call failed with status {response.status_code}"
+                log_progress(f"   ❌ {error}")
+                log_progress(f"   📄 Response: {response.text[:500]}")  # First 500 chars
+                try:
+                    error_detail = response.json()
+                    log_progress(f"   📋 Error details: {error_detail}")
+                except:
+                    pass
+                results['errors'].append(error)
             # Mark as successful if we got this far
             results['success'] = len(results['errors']) == 0
         except Exception as e:
             error_msg = str(e)
             print(f"❌ Deployment failed: {error_msg}")
+            deployment_log.write(f"❌ Deployment failed: {error_msg}\n")
             results['errors'].append(error_msg)
+        finally:
+            # Close deployment log file
+            deployment_log.write(f"\n{'='*50}\n")
+            deployment_log.write(f"Deployment completed at: {dt.datetime.now().isoformat()}\n")
+            deployment_log.write(f"Log saved to: {deployment_log_file}\n")
+            deployment_log.close()
+            # Print directly since deployment_log is now closed
+            print(f"\n📁 Full deployment log saved to: {deployment_log_file}")
+            if progress_callback:
+                progress_callback(f"\n📁 Full deployment log saved to: {deployment_log_file}")
         return results
+def deploy_to_thoughtspot(ddl: str, database: str, schema: str,
+                         connection_name: str = None, company_name: str = None,
+                         use_case: str = None, naming_prefix: str = None, progress_callback=None) -> Dict:
     """
     Convenience function for deploying to ThoughtSpot
     Args:
         ddl: Data Definition Language statements
         database: Target database name
         schema: Target schema name
         connection_name: Optional connection name
+        company_name: Optional company name (used in default naming)
+        use_case: Optional use case (used in default naming)
+        naming_prefix: Optional custom naming prefix (format: {prefix}_{day+ms}_{objtype})
         progress_callback: Optional callback for progress updates
     Returns:
         Dict with deployment results
     """
     deployer = ThoughtSpotDeployer()
+    return deployer.deploy_all(ddl, database, schema, connection_name, company_name, use_case, naming_prefix, progress_callback)
 if __name__ == "__main__":