mikeboone Claude commited on
Commit
b2a3e22
Β·
1 Parent(s): ae8e50d

fix: Implement consistent naming convention across all objects

Browse files

- Replace verbose DM{YYMMDD}_{HHMMSS}_{COMPANY}_{USECASE} format
- New format: {PREFIX}_{DDMICROS}_{objtype} (prefix optional)
- Extract base timestamp from schema to ensure consistency across objects
- Add naming_prefix field to UI settings (saved in Supabase)
- Fix DDL transaction commit in cdw_connector.py:153
- Add USE SCHEMA command to population scripts for proper context
- Remove confusing "per table" timing messages, show batch call times only
- Enhance Liveboard error handling with detailed exception catching

Key fixes:
- Schema/connection/model now use same base timestamp
- Tables persist in Snowflake (commit added)
- Population scripts set proper schema context
- Clean performance metrics display

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (6) hide show
  1. cdw_connector.py +36 -10
  2. demo_logger.py +87 -29
  3. demo_prep.py +142 -63
  4. liveboard_creator.py +87 -7
  5. supabase_client.py +1 -0
  6. thoughtspot_deployer.py +513 -190
cdw_connector.py CHANGED
@@ -72,19 +72,40 @@ class SnowflakeDeployer:
72
  except Exception as e:
73
  print(f"Error closing connection: {str(e)}")
74
 
75
- def create_demo_schema_and_deploy(self, company_name: str, use_case: str, ddl_statements: str) -> Tuple[bool, Optional[str], str]:
76
- """Create timestamped schema and deploy DDL statements"""
 
 
 
 
 
77
  if not self.connection:
78
  success, message = self.connect()
79
  if not success:
80
  return False, None, f"Connection failed: {message}"
81
-
82
  try:
83
- # Generate schema name with timestamp
84
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
85
- company_short = company_name.replace(" ", "")[:5] # First 5 chars
86
- use_case_short = use_case.replace(" ", "")[:3] # First 3 chars
87
- schema_name = f"{timestamp}_{company_short}_{use_case_short}".upper()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  cursor = self.connection.cursor()
90
 
@@ -127,15 +148,20 @@ class SnowflakeDeployer:
127
  # Check autocommit setting
128
  print(f"πŸ” DEBUG: Connection autocommit: {self.connection.autocommit}")
129
 
 
 
 
 
 
130
  # VERIFY the schema actually exists
131
  print(f"πŸ” DEBUG: Verifying schema exists...")
132
  cursor.execute(f'SELECT CURRENT_SCHEMA()')
133
  current_schema = cursor.fetchone()[0]
134
  print(f"πŸ” DEBUG: Current schema: {current_schema}")
135
  print(f"πŸ” DEBUG: Schema verification completed - using current schema context")
136
-
137
  cursor.close()
138
-
139
  success_message = f"Schema '{schema_name}' created successfully with {executed_count} tables"
140
  return True, schema_name, success_message
141
 
 
72
  except Exception as e:
73
  print(f"Error closing connection: {str(e)}")
74
 
75
+ def create_demo_schema_and_deploy(self, company_name: str, use_case: str, ddl_statements: str, naming_prefix: str = "") -> Tuple[bool, Optional[str], str]:
76
+ """Create timestamped schema and deploy DDL statements
77
+
78
+ Args:
79
+ naming_prefix: Optional custom prefix for schema name
80
+ Format: {prefix}_{day+ms}_scm or just {day+ms}_scm if blank
81
+ """
82
  if not self.connection:
83
  success, message = self.connect()
84
  if not success:
85
  return False, None, f"Connection failed: {message}"
86
+
87
  try:
88
+ # Generate schema name using same convention as ThoughtSpot objects
89
+ import re
90
+ from datetime import datetime as dt
91
+ now = dt.now()
92
+
93
+ # Clean prefix (uppercase, alphanumeric only) - can be empty string
94
+ if naming_prefix:
95
+ prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
96
+ else:
97
+ prefix_clean = ""
98
+
99
+ # Short unique timestamp: day (2 digits) + microseconds (6 digits)
100
+ day = now.strftime('%d')
101
+ microsec = now.strftime('%f') # 6 digits
102
+ short_timestamp = f"{day}{microsec}"
103
+
104
+ # Base format: PREFIX_DDMICROS_scm (prefix can be empty)
105
+ if prefix_clean:
106
+ schema_name = f"{prefix_clean}_{short_timestamp}_scm"
107
+ else:
108
+ schema_name = f"{short_timestamp}_scm"
109
 
110
  cursor = self.connection.cursor()
111
 
 
148
  # Check autocommit setting
149
  print(f"πŸ” DEBUG: Connection autocommit: {self.connection.autocommit}")
150
 
151
+ # COMMIT the transaction
152
+ print(f"πŸ” DEBUG: Committing transaction...")
153
+ self.connection.commit()
154
+ print(f"βœ… Transaction committed - {executed_count} tables created")
155
+
156
  # VERIFY the schema actually exists
157
  print(f"πŸ” DEBUG: Verifying schema exists...")
158
  cursor.execute(f'SELECT CURRENT_SCHEMA()')
159
  current_schema = cursor.fetchone()[0]
160
  print(f"πŸ” DEBUG: Current schema: {current_schema}")
161
  print(f"πŸ” DEBUG: Schema verification completed - using current schema context")
162
+
163
  cursor.close()
164
+
165
  success_message = f"Schema '{schema_name}' created successfully with {executed_count} tables"
166
  return True, schema_name, success_message
167
 
demo_logger.py CHANGED
@@ -25,17 +25,24 @@ load_dotenv()
25
  class DemoLogger:
26
  """Comprehensive logger for demo creation process"""
27
 
28
- def __init__(self, session_id: str = None, user_email: str = None):
29
  """
30
  Initialize logger with session ID and optional user email
31
 
32
  Args:
33
  session_id: Unique ID for this demo creation session
34
  user_email: User's email for tracking
 
35
  """
36
  self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
37
  self.user_email = user_email or "anonymous"
38
 
 
 
 
 
 
 
39
  # Create logs directory
40
  self.log_dir = Path("logs") / "demo_sessions" / self.session_id
41
  self.log_dir.mkdir(parents=True, exist_ok=True)
@@ -64,7 +71,14 @@ class DemoLogger:
64
  "status": "started"
65
  }
66
 
67
- self.logger.info(f"Demo session started: {self.session_id}")
 
 
 
 
 
 
 
68
 
69
  def setup_logger(self):
70
  """Set up Python logger"""
@@ -100,47 +114,79 @@ class DemoLogger:
100
  self.session_data["llm_model"] = llm_model
101
  self.session_data["settings"].update(kwargs)
102
 
103
- self.logger.info(f"Demo Configuration:")
104
- self.logger.info(f" Use Case: {use_case}")
105
- self.logger.info(f" Company: {company}")
106
- self.logger.info(f" URL: {company_url}")
107
- self.logger.info(f" Industry: {industry}")
108
- self.logger.info(f" LLM Model: {llm_model}")
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  self.save_session_data()
111
 
112
  def log_ddl(self, ddl_content: str, schema_name: str = None):
113
- """Log the generated DDL"""
114
- self.session_data["ddl"] = ddl_content
 
 
 
 
 
115
  self.session_data["schema_name"] = schema_name
116
 
117
- # Save DDL to separate file
118
  ddl_file = self.log_dir / "generated_ddl.sql"
119
  with open(ddl_file, 'w') as f:
120
  f.write(ddl_content)
121
 
122
  # Log summary
123
  tables = ddl_content.count("CREATE TABLE")
124
- self.logger.info(f"DDL Generated: {tables} tables, {len(ddl_content)} characters")
125
- self.logger.info(f"DDL saved to: {ddl_file}")
126
-
127
- # Check for SALESTRANSACTIONS table (known issue)
128
- if "SALESTRANSACTIONS" not in ddl_content.upper():
129
- self.log_warning("SALESTRANSACTIONS table not found in DDL - this will cause deployment issues!")
 
 
 
 
 
 
 
 
 
130
 
131
  self.save_session_data()
132
 
133
  def log_population_code(self, code: str):
134
- """Log the generated population code"""
135
- self.session_data["population_code"] = code[:5000] # Truncate for storage
 
136
 
137
- # Save full code to file
138
  code_file = self.log_dir / "population_code.py"
139
  with open(code_file, 'w') as f:
140
  f.write(code)
141
 
142
- self.logger.info(f"Population code generated: {len(code)} characters")
143
- self.logger.info(f"Code saved to: {code_file}")
 
 
 
 
144
 
145
  self.save_session_data()
146
 
@@ -148,9 +194,16 @@ class DemoLogger:
148
  """Log a discovered outlier"""
149
  self.session_data["outliers"].append(outlier)
150
 
151
- self.logger.info(f"Outlier discovered: {outlier.get('title', 'Unknown')}")
152
- self.logger.debug(f" Insight: {outlier.get('insight', 'N/A')}")
153
- self.logger.debug(f" Impact: {outlier.get('impact', 'N/A')}")
 
 
 
 
 
 
 
154
 
155
  self.save_session_data()
156
 
@@ -190,13 +243,14 @@ class DemoLogger:
190
  self.save_session_data()
191
 
192
  def log_stage_completion(self, stage: str, success: bool,
193
- duration_seconds: float = None):
194
  """Log completion of a demo creation stage"""
195
  stage_data = {
196
  "stage": stage,
197
  "success": success,
198
  "duration": duration_seconds,
199
- "timestamp": datetime.now().isoformat()
 
200
  }
201
 
202
  if "stages" not in self.session_data:
@@ -205,9 +259,13 @@ class DemoLogger:
205
  self.session_data["stages"].append(stage_data)
206
 
207
  status = "βœ… SUCCESS" if success else "❌ FAILED"
208
- self.logger.info(f"Stage '{stage}' {status}")
 
209
  if duration_seconds:
210
- self.logger.info(f" Duration: {duration_seconds:.2f} seconds")
 
 
 
211
 
212
  self.save_session_data()
213
 
 
25
  class DemoLogger:
26
  """Comprehensive logger for demo creation process"""
27
 
28
+ def __init__(self, session_id: str = None, user_email: str = None, debug_mode: bool = None):
29
  """
30
  Initialize logger with session ID and optional user email
31
 
32
  Args:
33
  session_id: Unique ID for this demo creation session
34
  user_email: User's email for tracking
35
+ debug_mode: Enable debug mode (saves DDL, population code, outliers)
36
  """
37
  self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
38
  self.user_email = user_email or "anonymous"
39
 
40
+ # Check for DEBUG environment variable or parameter
41
+ if debug_mode is None:
42
+ self.debug_mode = os.getenv('DEBUG', 'false').lower() in ['true', '1', 'yes']
43
+ else:
44
+ self.debug_mode = debug_mode
45
+
46
  # Create logs directory
47
  self.log_dir = Path("logs") / "demo_sessions" / self.session_id
48
  self.log_dir.mkdir(parents=True, exist_ok=True)
 
71
  "status": "started"
72
  }
73
 
74
+ # Log session start with timestamp
75
+ self.logger.info("="*80)
76
+ self.logger.info(f"πŸš€ Demo Session Started")
77
+ self.logger.info(f" Session ID: {self.session_id}")
78
+ self.logger.info(f" User: {self.user_email}")
79
+ self.logger.info(f" Timestamp: {self.session_data['start_time']}")
80
+ self.logger.info(f" Debug Mode: {'ON' if self.debug_mode else 'OFF'}")
81
+ self.logger.info("="*80)
82
 
83
  def setup_logger(self):
84
  """Set up Python logger"""
 
114
  self.session_data["llm_model"] = llm_model
115
  self.session_data["settings"].update(kwargs)
116
 
117
+ self.logger.info("="*80)
118
+ self.logger.info("πŸ“‹ Demo Configuration")
119
+ self.logger.info("="*80)
120
+ self.logger.info(f"🎯 MAIN SETTINGS:")
121
+ self.logger.info(f" Company URL: {company_url}")
122
+ self.logger.info(f" Use Case: {use_case}")
123
+ self.logger.info(f" LLM Model: {llm_model}")
124
+ self.logger.info(f"")
125
+ self.logger.info(f"πŸ“Š Additional Info:")
126
+ self.logger.info(f" Company: {company}")
127
+ self.logger.info(f" Industry: {industry}")
128
+
129
+ if kwargs:
130
+ self.logger.info(f"")
131
+ self.logger.info(f"βš™οΈ Other Settings:")
132
+ for key, value in kwargs.items():
133
+ self.logger.info(f" {key}: {value}")
134
+
135
+ self.logger.info("="*80)
136
 
137
  self.save_session_data()
138
 
139
  def log_ddl(self, ddl_content: str, schema_name: str = None):
140
+ """Log the generated DDL - ALWAYS saves to file"""
141
+ # Always save full DDL if debug mode
142
+ if self.debug_mode:
143
+ self.session_data["ddl"] = ddl_content
144
+ else:
145
+ self.session_data["ddl"] = ddl_content[:1000] + "..." if len(ddl_content) > 1000 else ddl_content
146
+
147
  self.session_data["schema_name"] = schema_name
148
 
149
+ # ALWAYS save DDL to separate file
150
  ddl_file = self.log_dir / "generated_ddl.sql"
151
  with open(ddl_file, 'w') as f:
152
  f.write(ddl_content)
153
 
154
  # Log summary
155
  tables = ddl_content.count("CREATE TABLE")
156
+ self.logger.info("="*80)
157
+ self.logger.info(f"πŸ“ DDL Generated")
158
+ self.logger.info("="*80)
159
+ self.logger.info(f" Tables: {tables}")
160
+ self.logger.info(f" Size: {len(ddl_content)} characters")
161
+ self.logger.info(f" Schema: {schema_name or 'N/A'}")
162
+ self.logger.info(f" Saved to: {ddl_file}")
163
+
164
+ # Check for SALESTRANSACTIONS or SALES_TRANSACTIONS table (known issue)
165
+ ddl_upper = ddl_content.upper()
166
+ if "SALESTRANSACTIONS" not in ddl_upper and "SALES_TRANSACTIONS" not in ddl_upper:
167
+ self.logger.warning("⚠️ SALESTRANSACTIONS table not found in DDL - this will cause deployment issues!")
168
+ self.log_warning("SALESTRANSACTIONS table not found in DDL")
169
+
170
+ self.logger.info("="*80)
171
 
172
  self.save_session_data()
173
 
174
  def log_population_code(self, code: str):
175
+ """Log the generated population code - saves to file if DEBUG mode"""
176
+ # Always save summary
177
+ self.session_data["population_code"] = code[:5000] if not self.debug_mode else code
178
 
179
+ # ALWAYS save full code to file
180
  code_file = self.log_dir / "population_code.py"
181
  with open(code_file, 'w') as f:
182
  f.write(code)
183
 
184
+ self.logger.info("="*80)
185
+ self.logger.info(f"🐍 Population Code Generated")
186
+ self.logger.info("="*80)
187
+ self.logger.info(f" Size: {len(code)} characters")
188
+ self.logger.info(f" Saved to: {code_file}")
189
+ self.logger.info("="*80)
190
 
191
  self.save_session_data()
192
 
 
194
  """Log a discovered outlier"""
195
  self.session_data["outliers"].append(outlier)
196
 
197
+ self.logger.info(f"πŸ’‘ Outlier discovered: {outlier.get('title', 'Unknown')}")
198
+ if self.debug_mode:
199
+ self.logger.info(f" Insight: {outlier.get('insight', 'N/A')}")
200
+ self.logger.info(f" Impact: {outlier.get('impact', 'N/A')}")
201
+
202
+ # Save all outliers to file in DEBUG mode
203
+ if self.debug_mode:
204
+ outliers_file = self.log_dir / "outliers.json"
205
+ with open(outliers_file, 'w') as f:
206
+ json.dump(self.session_data["outliers"], f, indent=2)
207
 
208
  self.save_session_data()
209
 
 
243
  self.save_session_data()
244
 
245
  def log_stage_completion(self, stage: str, success: bool,
246
+ duration_seconds: float = None, details: str = None):
247
  """Log completion of a demo creation stage"""
248
  stage_data = {
249
  "stage": stage,
250
  "success": success,
251
  "duration": duration_seconds,
252
+ "timestamp": datetime.now().isoformat(),
253
+ "details": details
254
  }
255
 
256
  if "stages" not in self.session_data:
 
259
  self.session_data["stages"].append(stage_data)
260
 
261
  status = "βœ… SUCCESS" if success else "❌ FAILED"
262
+ self.logger.info("="*80)
263
+ self.logger.info(f"πŸ“ Stage: {stage.upper()} - {status}")
264
  if duration_seconds:
265
+ self.logger.info(f" Duration: {duration_seconds:.2f} seconds")
266
+ if details:
267
+ self.logger.info(f" Details: {details}")
268
+ self.logger.info("="*80)
269
 
270
  self.save_session_data()
271
 
demo_prep.py CHANGED
@@ -313,26 +313,34 @@ def execute_population_script(python_code, schema_name):
313
  "conn_params = get_snowflake_connection_params()",
314
  "conn_params = get_snowflake_connection_params()\nconn_params.pop('schema', None) # Remove schema to avoid duplicate"
315
  )
316
-
 
 
 
 
 
 
317
  # Simple and safe schema replacement - just replace the placeholder
318
  cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
319
  cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
320
 
 
 
 
321
  # FIX: Remove fake.unique() calls that cause "duplicated values after 1,000 iterations" error
322
  cleaned_code = cleaned_code.replace("fake.unique.word()", "fake.word()")
323
  cleaned_code = cleaned_code.replace("fake.unique.email()", "fake.email()")
324
  cleaned_code = cleaned_code.replace("fake.unique.company()", "fake.company()")
325
 
326
  # FIX: Truncate phone numbers to avoid extension overflow (e.g., '790-923-3730x07350')
327
- # Handle multiple variations of phone number generation
328
- cleaned_code = cleaned_code.replace("fake.phone_number()", "fake.phone_number()[:20]")
329
- cleaned_code = re.sub(r"faker\.phone_number\(\)", "faker.phone_number()[:20]", cleaned_code)
330
- cleaned_code = re.sub(r"'Phone':\s*fake\.phone_number\(\)", "'Phone': fake.phone_number()[:20]", cleaned_code)
331
  # Also handle when it's in a larger expression or assignment
332
- cleaned_code = re.sub(r"(\w+\.phone_number\(\))(?!\[)", r"\1[:20]", cleaned_code)
333
 
334
  # FIX: Convert SQLite-style ? placeholders to Snowflake-style %s placeholders
335
- import re
336
  cleaned_code = re.sub(r'\bVALUES\s*\(\?', 'VALUES (%s', cleaned_code)
337
  cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
338
 
@@ -343,9 +351,8 @@ def execute_population_script(python_code, schema_name):
343
  print("πŸš€ STARTING DATA POPULATION EXECUTION")
344
  print("=" * 50)"""
345
  )
346
-
347
  # Add logging to populate functions dynamically
348
- import re
349
 
350
  # Find all populate function definitions and add logging
351
  def add_function_logging(match):
@@ -397,6 +404,7 @@ def execute_population_script(python_code, schema_name):
397
  "os": os,
398
  "random": random,
399
  "datetime": datetime,
 
400
  "get_snowflake_connection_params": get_snowflake_connection_params,
401
  }
402
 
@@ -408,11 +416,24 @@ def execute_population_script(python_code, schema_name):
408
  print(f"πŸ” DEBUG: Cleaned code preview (first 500 chars):")
409
  print(cleaned_code[:500])
410
  print("...")
411
-
412
- # Save the full generated code to a file for debugging
413
- with open(f"/tmp/generated_population_script_{schema_name}.py", "w") as f:
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  f.write(cleaned_code)
415
- print(f"πŸ” DEBUG: Full generated code saved to /tmp/generated_population_script_{schema_name}.py")
416
 
417
  # First, validate the syntax before executing
418
  try:
@@ -439,6 +460,24 @@ def execute_population_script(python_code, schema_name):
439
  print("=" * 50)
440
  print(f"❌ DATA POPULATION FAILED: {str(e)}")
441
  print("=" * 50)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  return False, f"Population execution failed: {str(e)}"
443
 
444
  def generate_deployment_summary(demo_builder_instance):
@@ -891,6 +930,12 @@ def create_demo_prep_interface():
891
  label="Schema Prefix",
892
  info="Prefix for schema names (e.g., demo_20250930)"
893
  )
 
 
 
 
 
 
894
 
895
  with gr.TabItem("Admin"):
896
  ts_instance_url = gr.Textbox(
@@ -1094,7 +1139,7 @@ def create_demo_prep_interface():
1094
  # Progressive workflow with Multi-LLM support
1095
  def progressive_workflow_handler(
1096
  url, use_case, llm_provider, demo_builder_instance, mode="expert", stop_before_deploy=False,
1097
- temperature=0.3, max_tokens=4000
1098
  ):
1099
  # Initialize DemoBuilder if none exists
1100
  if demo_builder_instance is None:
@@ -1102,6 +1147,17 @@ def create_demo_prep_interface():
1102
  return "Please enter a company URL", None, "Start Research", False
1103
  demo_builder_instance = DemoBuilder(use_case, url.strip())
1104
 
 
 
 
 
 
 
 
 
 
 
 
1105
  # Initialize or continue with existing content
1106
  if not hasattr(demo_builder_instance, 'accumulated_content'):
1107
  accumulated_content = ""
@@ -1327,6 +1383,10 @@ def create_demo_prep_interface():
1327
  workflow_updates += f"πŸ’‘ Next: Click 'Create DDL' to generate database schema\n"
1328
  demo_builder_instance.workflow_updates = workflow_updates
1329
 
 
 
 
 
1330
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1331
 
1332
  elif demo_builder_instance.current_stage == "create":
@@ -1338,24 +1398,26 @@ def create_demo_prep_interface():
1338
  )
1339
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1340
 
1341
- # Extract company name for schema naming
1342
- company_name = (
1343
- demo_builder_instance.extract_company_name()
1344
- .replace(" ", "")
1345
- .replace("-", "")
1346
- )
1347
- # Use new DM naming convention for schema
1348
- now = time.localtime()
1349
- yymmdd = time.strftime('%y%m%d', now)
1350
- hhmmss = time.strftime('%H%M%S', now)
1351
-
1352
- # Clean and truncate company name (5 chars)
1353
- company_clean = re.sub(r'[^a-zA-Z0-9]', '', company_name.upper())[:5]
1354
-
1355
- # Clean and truncate use case (3 chars)
1356
- usecase_clean = re.sub(r'[^a-zA-Z0-9]', '', use_case.upper())[:3]
1357
-
1358
- schema_name = f"DM{yymmdd}_{hhmmss}_{company_clean}_{usecase_clean}"
 
 
1359
 
1360
  schema_prompt = f"""Create ONLY complete SQL DDL statements for a {use_case} demo database.
1361
 
@@ -1380,6 +1442,11 @@ TECHNICAL REQUIREMENTS:
1380
  - Include realistic column names that match the business context
1381
  - Add proper constraints and relationships
1382
 
 
 
 
 
 
1383
  SNOWFLAKE SYNTAX EXAMPLES:
1384
  - Auto-increment: ColumnID INT IDENTITY(1,1) PRIMARY KEY
1385
  - NOT: ColumnID INT PRIMARY KEY AUTO_INCREMENT
@@ -1426,6 +1493,11 @@ Generate complete CREATE TABLE statements with proper Snowflake syntax and depen
1426
  demo_builder_instance.accumulated_content = accumulated_content
1427
  demo_builder_instance.workflow_updates = workflow_updates
1428
 
 
 
 
 
 
1429
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1430
 
1431
  elif demo_builder_instance.current_stage == "populate":
@@ -1649,13 +1721,18 @@ Schema Validation: Will be checked next...
1649
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1650
 
1651
  print(f"πŸ” DEBUG: About to advance from populate to deploy stage")
1652
-
1653
  demo_builder_instance.advance_stage()
1654
  demo_builder_instance.accumulated_content = accumulated_content
1655
  demo_builder_instance.workflow_updates = workflow_updates
1656
  print(f"πŸ” DEBUG: Advanced to stage: {demo_builder_instance.current_stage}")
1657
  print(f"πŸ” DEBUG: Stage status: {demo_builder_instance.stage_status}")
1658
-
 
 
 
 
 
1659
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1660
  return # Exit populate stage immediately after advancing
1661
 
@@ -1695,6 +1772,7 @@ Schema Validation: Will be checked next...
1695
  company_name,
1696
  use_case,
1697
  demo_builder_instance.schema_generation_results,
 
1698
  )
1699
  )
1700
 
@@ -1803,6 +1881,7 @@ Schema Validation: Will be checked next...
1803
  schema=schema_name,
1804
  company_name=company_name,
1805
  use_case=use_case,
 
1806
  progress_callback=ts_detailed_callback
1807
  )
1808
  deployment_complete = True
@@ -1837,25 +1916,9 @@ Schema Validation: Will be checked next...
1837
  demo_builder_instance.deployment_status += ts_progress_msg
1838
  workflow_updates += ts_progress_msg
1839
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1840
-
1841
- # Add detailed progress messages to main window after deployment
1842
- workflow_updates += "\nBuilding relationships...\n"
1843
- workflow_updates += "Creating tables...\n"
1844
-
1845
- # Show detailed table creation info if available
1846
- if 'tables' in ts_results and ts_results['tables']:
1847
- workflow_updates += f"Generated joins between tables\n"
1848
- for table_name in ts_results['tables']:
1849
- workflow_updates += f" β€’ {table_name} created successfully\n"
1850
- workflow_updates += f"Tables created successfully ({len(ts_results['tables'])} total)\n"
1851
- else:
1852
- workflow_updates += "Generated joins between tables\n"
1853
- workflow_updates += "Tables created successfully\n"
1854
 
1855
- workflow_updates += "Added joins to semantic model\n"
1856
- workflow_updates += "Model created successfully!\n"
1857
- yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1858
-
1859
  if ts_results['success']:
1860
  ts_msg = "\nThoughtSpot metadata deployment completed!\n\n"
1861
  ts_msg += "Created ThoughtSpot objects:\n"
@@ -1959,9 +2022,19 @@ Schema Validation: Will be checked next...
1959
 
1960
  demo_builder_instance.advance_stage()
1961
  demo_builder_instance.accumulated_content = accumulated_content
 
 
 
 
 
 
 
1962
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1963
 
1964
  except Exception as e:
 
 
 
1965
  error_msg = f"❌ Deployment error: {str(e)}\n"
1966
  demo_builder_instance.deployment_status += error_msg
1967
  workflow_updates += error_msg
@@ -1977,7 +2050,7 @@ Schema Validation: Will be checked next...
1977
  yield accumulated_content, workflow_updates, None, "Research", False
1978
 
1979
  # Redo functionality
1980
- def redo_latest_stage(demo_builder_instance, llm_provider, url, use_case, mode="expert", stop_deploy=False, temperature=0.3, max_tokens=4000):
1981
  if (
1982
  demo_builder_instance
1983
  and demo_builder_instance.current_stage != "research"
@@ -1992,7 +2065,7 @@ Schema Validation: Will be checked next...
1992
 
1993
  # Re-run the workflow with same format as button_click_handler
1994
  for result in progressive_workflow_handler(
1995
- url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens
1996
  ):
1997
  if len(result) == 5:
1998
  # New format with workflow_updates
@@ -2659,9 +2732,9 @@ SCRIPT REQUIREMENTS:
2659
  # Legacy CDW deployment function removed - deployment now handled by main workflow
2660
 
2661
  # Main click handler with multi-LLM support - UPDATED to route to AI Output tab
2662
- def button_click_handler(url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens):
2663
  for result in progressive_workflow_handler(
2664
- url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens
2665
  ):
2666
  if len(result) == 5:
2667
  # New format with workflow_updates
@@ -2692,7 +2765,7 @@ SCRIPT REQUIREMENTS:
2692
  # Event handlers - UPDATED for new AI Output tab and Results box
2693
  research_btn.click(
2694
  fn=button_click_handler,
2695
- inputs=[url_input, industry_dropdown, llm_dropdown, demo_builder, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input],
2696
  outputs=[
2697
  ai_output_box, # streaming content goes to AI Output tab
2698
  results_box, # dynamic content based on stage/mode
@@ -2712,10 +2785,10 @@ SCRIPT REQUIREMENTS:
2712
  )
2713
 
2714
  redo_btn.click(
2715
- fn=lambda db, llm, url, use_case, mode, stop_deploy, temp, max_tok: [
2716
- result for result in redo_latest_stage(db, llm, url, use_case, mode, stop_deploy, temp, max_tok)
2717
  ],
2718
- inputs=[demo_builder, llm_dropdown, url_input, industry_dropdown, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input],
2719
  outputs=[
2720
  ai_output_box, # streaming content goes to AI Output tab
2721
  results_box, # dynamic content based on stage/mode
@@ -3191,7 +3264,7 @@ SCRIPT REQUIREMENTS:
3191
 
3192
  def save_settings_handler(
3193
  email, ai_model, temperature, max_tokens, company_size, company_url, use_case, data_volume,
3194
- warehouse, database, ts_url, ts_username, batch_size, thread_count
3195
  ):
3196
  """Save settings to Supabase database"""
3197
  if not email or '@' not in email:
@@ -3215,7 +3288,8 @@ SCRIPT REQUIREMENTS:
3215
  "thoughtspot_url": ts_url,
3216
  "thoughtspot_username": ts_username,
3217
  "batch_size": batch_size,
3218
- "thread_count": thread_count
 
3219
  }
3220
 
3221
  success = settings_client.save_all_settings(email, settings)
@@ -3243,6 +3317,7 @@ SCRIPT REQUIREMENTS:
3243
  "", # ts_username
3244
  5000, # batch_size
3245
  4, # thread_count
 
3246
  "Settings not loaded - enter valid email", # status
3247
  "gpt-4o", # llm_dropdown (main form AI Model)
3248
  "" # url_input (main form Company URL)
@@ -3266,6 +3341,7 @@ SCRIPT REQUIREMENTS:
3266
  settings.get("thoughtspot_username", ""),
3267
  int(settings.get("batch_size", 5000)),
3268
  int(settings.get("thread_count", 4)),
 
3269
  f"βœ… Settings loaded for {email}",
3270
  ai_model, # llm_dropdown (same as default_ai_model)
3271
  company_url # url_input (same as default_company_url)
@@ -3281,6 +3357,7 @@ SCRIPT REQUIREMENTS:
3281
  "",
3282
  5000,
3283
  4,
 
3284
  f"❌ Error loading settings: {str(e)}"
3285
  ]
3286
 
@@ -3307,7 +3384,8 @@ SCRIPT REQUIREMENTS:
3307
  ts_instance_url,
3308
  ts_username,
3309
  batch_size_slider,
3310
- thread_count_slider
 
3311
  ],
3312
  outputs=[settings_status]
3313
  )
@@ -3552,6 +3630,7 @@ SCRIPT REQUIREMENTS:
3552
  ts_username,
3553
  batch_size_slider,
3554
  thread_count_slider,
 
3555
  settings_status,
3556
  llm_dropdown, # Also update main form AI Model dropdown
3557
  url_input # Also update main form Company URL
 
313
  "conn_params = get_snowflake_connection_params()",
314
  "conn_params = get_snowflake_connection_params()\nconn_params.pop('schema', None) # Remove schema to avoid duplicate"
315
  )
316
+
317
+ # CRITICAL FIX: Add USE SCHEMA command after cursor creation to set schema context
318
+ cleaned_code = cleaned_code.replace(
319
+ "cursor = conn.cursor()",
320
+ f"cursor = conn.cursor()\n cursor.execute('USE SCHEMA \"{schema_name}\"') # Set schema context for INSERTs"
321
+ )
322
+
323
  # Simple and safe schema replacement - just replace the placeholder
324
  cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
325
  cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
326
 
327
+ # Import re module at the top of this section
328
+ import re
329
+
330
  # FIX: Remove fake.unique() calls that cause "duplicated values after 1,000 iterations" error
331
  cleaned_code = cleaned_code.replace("fake.unique.word()", "fake.word()")
332
  cleaned_code = cleaned_code.replace("fake.unique.email()", "fake.email()")
333
  cleaned_code = cleaned_code.replace("fake.unique.company()", "fake.company()")
334
 
335
  # FIX: Truncate phone numbers to avoid extension overflow (e.g., '790-923-3730x07350')
336
+ # Handle multiple variations of phone number generation - truncate to 15 chars (max field size)
337
+ cleaned_code = cleaned_code.replace("fake.phone_number()", "fake.phone_number()[:15]")
338
+ cleaned_code = re.sub(r"faker\.phone_number\(\)", "faker.phone_number()[:15]", cleaned_code)
339
+ cleaned_code = re.sub(r"'Phone':\s*fake\.phone_number\(\)", "'Phone': fake.phone_number()[:15]", cleaned_code)
340
  # Also handle when it's in a larger expression or assignment
341
+ cleaned_code = re.sub(r"(\w+\.phone_number\(\))(?!\[)", r"\1[:15]", cleaned_code)
342
 
343
  # FIX: Convert SQLite-style ? placeholders to Snowflake-style %s placeholders
 
344
  cleaned_code = re.sub(r'\bVALUES\s*\(\?', 'VALUES (%s', cleaned_code)
345
  cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
346
 
 
351
  print("πŸš€ STARTING DATA POPULATION EXECUTION")
352
  print("=" * 50)"""
353
  )
354
+
355
  # Add logging to populate functions dynamically
 
356
 
357
  # Find all populate function definitions and add logging
358
  def add_function_logging(match):
 
404
  "os": os,
405
  "random": random,
406
  "datetime": datetime,
407
+ "re": re,
408
  "get_snowflake_connection_params": get_snowflake_connection_params,
409
  }
410
 
 
416
  print(f"πŸ” DEBUG: Cleaned code preview (first 500 chars):")
417
  print(cleaned_code[:500])
418
  print("...")
419
+
420
+ # ALWAYS save the full generated code for debugging (not just in DEBUG mode)
421
+ # Save to /tmp for immediate access
422
+ tmp_file = f"/tmp/generated_population_script_{schema_name}.py"
423
+ with open(tmp_file, "w") as f:
424
+ f.write(cleaned_code)
425
+ print(f"πŸ“ Population script saved to: {tmp_file}")
426
+
427
+ # Also save to logs directory for persistence
428
+ from pathlib import Path
429
+ import datetime
430
+ log_dir = Path("logs") / "population_scripts"
431
+ log_dir.mkdir(parents=True, exist_ok=True)
432
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
433
+ log_file = log_dir / f"population_{schema_name}_{timestamp}.py"
434
+ with open(log_file, "w") as f:
435
  f.write(cleaned_code)
436
+ print(f"πŸ“ Also saved to: {log_file}")
437
 
438
  # First, validate the syntax before executing
439
  try:
 
460
  print("=" * 50)
461
  print(f"❌ DATA POPULATION FAILED: {str(e)}")
462
  print("=" * 50)
463
+
464
+ # Print detailed traceback
465
+ import traceback
466
+ print("πŸ“‹ Full traceback:")
467
+ traceback.print_exc()
468
+
469
+ # Show which line of generated code caused the error
470
+ tb = traceback.extract_tb(e.__traceback__)
471
+ for frame in tb:
472
+ if '<population_script>' in frame.filename:
473
+ print(f"\n❌ Error in generated code at line {frame.lineno}:")
474
+ lines = cleaned_code.split('\n')
475
+ start = max(0, frame.lineno - 3)
476
+ end = min(len(lines), frame.lineno + 2)
477
+ for i in range(start, end):
478
+ marker = ">>> " if i == frame.lineno - 1 else " "
479
+ print(f"{marker}{i+1:3d}: {lines[i]}")
480
+
481
  return False, f"Population execution failed: {str(e)}"
482
 
483
  def generate_deployment_summary(demo_builder_instance):
 
930
  label="Schema Prefix",
931
  info="Prefix for schema names (e.g., demo_20250930)"
932
  )
933
+ naming_prefix_input = gr.Textbox(
934
+ value="",
935
+ label="Object Naming Prefix (Optional)",
936
+ placeholder="e.g., BOONE, SALES, DEMO",
937
+ info="Custom prefix for all objects. Format: {prefix}_{day+ms}_{objtype}. Leave blank for default naming."
938
+ )
939
 
940
  with gr.TabItem("Admin"):
941
  ts_instance_url = gr.Textbox(
 
1139
  # Progressive workflow with Multi-LLM support
1140
  def progressive_workflow_handler(
1141
  url, use_case, llm_provider, demo_builder_instance, mode="expert", stop_before_deploy=False,
1142
+ temperature=0.3, max_tokens=4000, naming_prefix=""
1143
  ):
1144
  # Initialize DemoBuilder if none exists
1145
  if demo_builder_instance is None:
 
1147
  return "Please enter a company URL", None, "Start Research", False
1148
  demo_builder_instance = DemoBuilder(use_case, url.strip())
1149
 
1150
+ # Initialize demo logger for this session
1151
+ session_logger = get_demo_logger(user_email="demo_user")
1152
+ demo_builder_instance.logger = session_logger
1153
+ session_logger.log_demo_config(
1154
+ use_case=use_case,
1155
+ company=url,
1156
+ company_url=url,
1157
+ industry="TBD",
1158
+ llm_model=llm_provider
1159
+ )
1160
+
1161
  # Initialize or continue with existing content
1162
  if not hasattr(demo_builder_instance, 'accumulated_content'):
1163
  accumulated_content = ""
 
1383
  workflow_updates += f"πŸ’‘ Next: Click 'Create DDL' to generate database schema\n"
1384
  demo_builder_instance.workflow_updates = workflow_updates
1385
 
1386
+ # Log research stage completion
1387
+ if hasattr(demo_builder_instance, 'logger'):
1388
+ demo_builder_instance.logger.log_stage_completion("research", True)
1389
+
1390
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1391
 
1392
  elif demo_builder_instance.current_stage == "create":
 
1398
  )
1399
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1400
 
1401
+ # Generate schema name using same convention as ThoughtSpot objects
1402
+ from datetime import datetime as dt
1403
+ now = dt.now()
1404
+
1405
+ # Clean prefix (uppercase, alphanumeric only) - can be empty string
1406
+ if naming_prefix:
1407
+ prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
1408
+ else:
1409
+ prefix_clean = ""
1410
+
1411
+ # Short unique timestamp: day (2 digits) + microseconds (6 digits)
1412
+ day = now.strftime('%d')
1413
+ microsec = now.strftime('%f') # 6 digits
1414
+ short_timestamp = f"{day}{microsec}"
1415
+
1416
+ # Base format: PREFIX_DDMICROS (prefix can be empty)
1417
+ if prefix_clean:
1418
+ schema_name = f"{prefix_clean}_{short_timestamp}_scm"
1419
+ else:
1420
+ schema_name = f"{short_timestamp}_scm"
1421
 
1422
  schema_prompt = f"""Create ONLY complete SQL DDL statements for a {use_case} demo database.
1423
 
 
1442
  - Include realistic column names that match the business context
1443
  - Add proper constraints and relationships
1444
 
1445
+ **CRITICAL - COLUMN NAME CONSISTENCY:**
1446
+ - When referencing columns in FOREIGN KEY constraints, use the EXACT column name (including case) as defined in the CREATE TABLE statement
1447
+ - Example: If you define "LocationID INT" then reference it as "LocationID" NOT "LOCATIONID"
1448
+ - Column names in FOREIGN KEY REFERENCES must match the PRIMARY KEY column name exactly
1449
+
1450
  SNOWFLAKE SYNTAX EXAMPLES:
1451
  - Auto-increment: ColumnID INT IDENTITY(1,1) PRIMARY KEY
1452
  - NOT: ColumnID INT PRIMARY KEY AUTO_INCREMENT
 
1493
  demo_builder_instance.accumulated_content = accumulated_content
1494
  demo_builder_instance.workflow_updates = workflow_updates
1495
 
1496
+ # Log DDL stage completion and save DDL
1497
+ if hasattr(demo_builder_instance, 'logger'):
1498
+ demo_builder_instance.logger.log_ddl(schema_results, demo_builder_instance.schema_name if hasattr(demo_builder_instance, 'schema_name') else None)
1499
+ demo_builder_instance.logger.log_stage_completion("ddl", True)
1500
+
1501
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1502
 
1503
  elif demo_builder_instance.current_stage == "populate":
 
1721
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1722
 
1723
  print(f"πŸ” DEBUG: About to advance from populate to deploy stage")
1724
+
1725
  demo_builder_instance.advance_stage()
1726
  demo_builder_instance.accumulated_content = accumulated_content
1727
  demo_builder_instance.workflow_updates = workflow_updates
1728
  print(f"πŸ” DEBUG: Advanced to stage: {demo_builder_instance.current_stage}")
1729
  print(f"πŸ” DEBUG: Stage status: {demo_builder_instance.stage_status}")
1730
+
1731
+ # Log population code stage completion
1732
+ if hasattr(demo_builder_instance, 'logger') and hasattr(demo_builder_instance, 'population_code'):
1733
+ demo_builder_instance.logger.log_population_code(demo_builder_instance.population_code)
1734
+ demo_builder_instance.logger.log_stage_completion("population_code", True)
1735
+
1736
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
1737
  return # Exit populate stage immediately after advancing
1738
 
 
1772
  company_name,
1773
  use_case,
1774
  demo_builder_instance.schema_generation_results,
1775
+ naming_prefix=naming_prefix
1776
  )
1777
  )
1778
 
 
1881
  schema=schema_name,
1882
  company_name=company_name,
1883
  use_case=use_case,
1884
+ naming_prefix=naming_prefix or None,
1885
  progress_callback=ts_detailed_callback
1886
  )
1887
  deployment_complete = True
 
1916
  demo_builder_instance.deployment_status += ts_progress_msg
1917
  workflow_updates += ts_progress_msg
1918
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1919
 
1920
+ # Deployment progress is shown live during execution - no duplicate summary needed
1921
+
 
 
1922
  if ts_results['success']:
1923
  ts_msg = "\nThoughtSpot metadata deployment completed!\n\n"
1924
  ts_msg += "Created ThoughtSpot objects:\n"
 
2022
 
2023
  demo_builder_instance.advance_stage()
2024
  demo_builder_instance.accumulated_content = accumulated_content
2025
+
2026
+ # Log deployment stage completion
2027
+ if hasattr(demo_builder_instance, 'logger'):
2028
+ deploy_success = deploy_success if 'deploy_success' in locals() else False
2029
+ demo_builder_instance.logger.log_stage_completion("deploy", deploy_success,
2030
+ details=f"Schema: {schema_name if 'schema_name' in locals() else 'N/A'}")
2031
+
2032
  yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
2033
 
2034
  except Exception as e:
2035
+ # Log deployment error
2036
+ if hasattr(demo_builder_instance, 'logger'):
2037
+ demo_builder_instance.logger.log_error(str(e), "deployment", e)
2038
  error_msg = f"❌ Deployment error: {str(e)}\n"
2039
  demo_builder_instance.deployment_status += error_msg
2040
  workflow_updates += error_msg
 
2050
  yield accumulated_content, workflow_updates, None, "Research", False
2051
 
2052
  # Redo functionality
2053
+ def redo_latest_stage(demo_builder_instance, llm_provider, url, use_case, mode="expert", stop_deploy=False, temperature=0.3, max_tokens=4000, naming_prefix=""):
2054
  if (
2055
  demo_builder_instance
2056
  and demo_builder_instance.current_stage != "research"
 
2065
 
2066
  # Re-run the workflow with same format as button_click_handler
2067
  for result in progressive_workflow_handler(
2068
+ url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix
2069
  ):
2070
  if len(result) == 5:
2071
  # New format with workflow_updates
 
2732
  # Legacy CDW deployment function removed - deployment now handled by main workflow
2733
 
2734
  # Main click handler with multi-LLM support - UPDATED to route to AI Output tab
2735
+ def button_click_handler(url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix=""):
2736
  for result in progressive_workflow_handler(
2737
+ url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix
2738
  ):
2739
  if len(result) == 5:
2740
  # New format with workflow_updates
 
2765
  # Event handlers - UPDATED for new AI Output tab and Results box
2766
  research_btn.click(
2767
  fn=button_click_handler,
2768
+ inputs=[url_input, industry_dropdown, llm_dropdown, demo_builder, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input, naming_prefix_input],
2769
  outputs=[
2770
  ai_output_box, # streaming content goes to AI Output tab
2771
  results_box, # dynamic content based on stage/mode
 
2785
  )
2786
 
2787
  redo_btn.click(
2788
+ fn=lambda db, llm, url, use_case, mode, stop_deploy, temp, max_tok, naming_prefix: [
2789
+ result for result in redo_latest_stage(db, llm, url, use_case, mode, stop_deploy, temp, max_tok, naming_prefix)
2790
  ],
2791
+ inputs=[demo_builder, llm_dropdown, url_input, industry_dropdown, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input, naming_prefix_input],
2792
  outputs=[
2793
  ai_output_box, # streaming content goes to AI Output tab
2794
  results_box, # dynamic content based on stage/mode
 
3264
 
3265
  def save_settings_handler(
3266
  email, ai_model, temperature, max_tokens, company_size, company_url, use_case, data_volume,
3267
+ warehouse, database, ts_url, ts_username, batch_size, thread_count, naming_prefix
3268
  ):
3269
  """Save settings to Supabase database"""
3270
  if not email or '@' not in email:
 
3288
  "thoughtspot_url": ts_url,
3289
  "thoughtspot_username": ts_username,
3290
  "batch_size": batch_size,
3291
+ "thread_count": thread_count,
3292
+ "naming_prefix": naming_prefix or ""
3293
  }
3294
 
3295
  success = settings_client.save_all_settings(email, settings)
 
3317
  "", # ts_username
3318
  5000, # batch_size
3319
  4, # thread_count
3320
+ "", # naming_prefix
3321
  "Settings not loaded - enter valid email", # status
3322
  "gpt-4o", # llm_dropdown (main form AI Model)
3323
  "" # url_input (main form Company URL)
 
3341
  settings.get("thoughtspot_username", ""),
3342
  int(settings.get("batch_size", 5000)),
3343
  int(settings.get("thread_count", 4)),
3344
+ settings.get("naming_prefix", ""), # naming_prefix
3345
  f"βœ… Settings loaded for {email}",
3346
  ai_model, # llm_dropdown (same as default_ai_model)
3347
  company_url # url_input (same as default_company_url)
 
3357
  "",
3358
  5000,
3359
  4,
3360
+ "", # naming_prefix
3361
  f"❌ Error loading settings: {str(e)}"
3362
  ]
3363
 
 
3384
  ts_instance_url,
3385
  ts_username,
3386
  batch_size_slider,
3387
+ thread_count_slider,
3388
+ naming_prefix_input
3389
  ],
3390
  outputs=[settings_status]
3391
  )
 
3630
  ts_username,
3631
  batch_size_slider,
3632
  thread_count_slider,
3633
+ naming_prefix_input,
3634
  settings_status,
3635
  llm_dropdown, # Also update main form AI Model dropdown
3636
  url_input # Also update main form Company URL
liveboard_creator.py CHANGED
@@ -1019,12 +1019,16 @@ Examples:
1019
  text_content = viz_config.get('text_content', viz_config.get('name', ''))
1020
  bg_color = viz_config.get('background_color', '#2E3D4D') # Default dark background
1021
 
1022
- # TEXT tiles in ThoughtSpot are simple structures
1023
  text_tml = {
1024
  'id': viz_config['id'],
1025
  'answer': {
1026
  'name': viz_config.get('name', 'Text'),
1027
  'description': viz_config.get('description', ''),
 
 
 
 
1028
  'text_tile': {
1029
  'text': text_content,
1030
  'background_color': bg_color
@@ -1510,6 +1514,7 @@ Return ONLY a valid JSON object with structure:
1510
 
1511
  # Debug: print response structure
1512
  print(f" API Response type: {type(result)}")
 
1513
  if isinstance(result, list) and len(result) > 0:
1514
  print(f" First item keys: {list(result[0].keys())}")
1515
  response_obj = result[0].get('response', {})
@@ -1517,12 +1522,20 @@ Return ONLY a valid JSON object with structure:
1517
  status_obj = response_obj.get('status', {})
1518
  print(f" Status: {status_obj}")
1519
 
1520
- # If error, check if there's more info in the full result
1521
  if status_obj.get('status_code') == 'ERROR':
1522
- print(f" Full error details:")
 
1523
  import json
 
1524
  print(json.dumps(result[0], indent=2)[:2000])
1525
 
 
 
 
 
 
 
1526
  # Navigate response structure
1527
  liveboard_id = result[0].get('response', {}).get('header', {}).get('id_guid')
1528
  if not liveboard_id:
@@ -1574,10 +1587,26 @@ Return ONLY a valid JSON object with structure:
1574
  'success': False,
1575
  'error': f"API returned status {response.status_code}: {response.text}"
1576
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1577
  except Exception as e:
 
1578
  return {
1579
  'success': False,
1580
- 'error': str(e)
 
1581
  }
1582
 
1583
 
@@ -1603,6 +1632,57 @@ def create_liveboard_from_model(
1603
  Returns:
1604
  Deployment result dictionary
1605
  """
1606
- creator = LiveboardCreator(ts_client, model_id, model_name)
1607
- liveboard_tml = creator.create_liveboard_tml(company_data, use_case, num_visualizations)
1608
- return creator.deploy_liveboard(liveboard_tml)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019
  text_content = viz_config.get('text_content', viz_config.get('name', ''))
1020
  bg_color = viz_config.get('background_color', '#2E3D4D') # Default dark background
1021
 
1022
+ # TEXT tiles in ThoughtSpot need tables field even though they don't query data
1023
  text_tml = {
1024
  'id': viz_config['id'],
1025
  'answer': {
1026
  'name': viz_config.get('name', 'Text'),
1027
  'description': viz_config.get('description', ''),
1028
+ 'tables': [{
1029
+ 'id': self.model_name,
1030
+ 'name': self.model_name
1031
+ }],
1032
  'text_tile': {
1033
  'text': text_content,
1034
  'background_color': bg_color
 
1514
 
1515
  # Debug: print response structure
1516
  print(f" API Response type: {type(result)}")
1517
+ status_obj = {}
1518
  if isinstance(result, list) and len(result) > 0:
1519
  print(f" First item keys: {list(result[0].keys())}")
1520
  response_obj = result[0].get('response', {})
 
1522
  status_obj = response_obj.get('status', {})
1523
  print(f" Status: {status_obj}")
1524
 
1525
+ # Check for API errors first
1526
  if status_obj.get('status_code') == 'ERROR':
1527
+ error_msg = status_obj.get('error_message', 'Unknown error')
1528
+ print(f" ❌ Liveboard creation failed: {error_msg}")
1529
  import json
1530
+ print(f" Full error details:")
1531
  print(json.dumps(result[0], indent=2)[:2000])
1532
 
1533
+ return {
1534
+ 'success': False,
1535
+ 'error': error_msg,
1536
+ 'response': result
1537
+ }
1538
+
1539
  # Navigate response structure
1540
  liveboard_id = result[0].get('response', {}).get('header', {}).get('id_guid')
1541
  if not liveboard_id:
 
1587
  'success': False,
1588
  'error': f"API returned status {response.status_code}: {response.text}"
1589
  }
1590
+ except AttributeError as e:
1591
+ import traceback
1592
+ return {
1593
+ 'success': False,
1594
+ 'error': f'AttributeError in Liveboard deployment: {str(e)}',
1595
+ 'details': traceback.format_exc()
1596
+ }
1597
+ except KeyError as e:
1598
+ import traceback
1599
+ return {
1600
+ 'success': False,
1601
+ 'error': f'KeyError in Liveboard deployment: {str(e)}',
1602
+ 'details': traceback.format_exc()
1603
+ }
1604
  except Exception as e:
1605
+ import traceback
1606
  return {
1607
  'success': False,
1608
+ 'error': f'{type(e).__name__}: {str(e)}',
1609
+ 'details': traceback.format_exc()
1610
  }
1611
 
1612
 
 
1632
  Returns:
1633
  Deployment result dictionary
1634
  """
1635
+ try:
1636
+ # Validate inputs
1637
+ if not model_id or not model_id.strip():
1638
+ return {
1639
+ 'success': False,
1640
+ 'error': 'Invalid model_id: empty or None'
1641
+ }
1642
+
1643
+ if not model_name or not model_name.strip():
1644
+ return {
1645
+ 'success': False,
1646
+ 'error': 'Invalid model_name: empty or None'
1647
+ }
1648
+
1649
+ # Create Liveboard creator
1650
+ creator = LiveboardCreator(ts_client, model_id, model_name)
1651
+
1652
+ # Generate Liveboard TML
1653
+ liveboard_tml = creator.create_liveboard_tml(company_data, use_case, num_visualizations)
1654
+
1655
+ if not liveboard_tml:
1656
+ return {
1657
+ 'success': False,
1658
+ 'error': 'Failed to generate Liveboard TML: returned None or empty'
1659
+ }
1660
+
1661
+ # Deploy Liveboard
1662
+ result = creator.deploy_liveboard(liveboard_tml)
1663
+
1664
+ if not result:
1665
+ return {
1666
+ 'success': False,
1667
+ 'error': 'deploy_liveboard returned None'
1668
+ }
1669
+
1670
+ return result
1671
+
1672
+ except AttributeError as e:
1673
+ return {
1674
+ 'success': False,
1675
+ 'error': f'AttributeError (possible missing field): {str(e)}'
1676
+ }
1677
+ except KeyError as e:
1678
+ return {
1679
+ 'success': False,
1680
+ 'error': f'KeyError (missing required key): {str(e)}'
1681
+ }
1682
+ except Exception as e:
1683
+ import traceback
1684
+ return {
1685
+ 'success': False,
1686
+ 'error': f'Unexpected error: {type(e).__name__}: {str(e)}',
1687
+ 'traceback': traceback.format_exc()
1688
+ }
supabase_client.py CHANGED
@@ -355,6 +355,7 @@ def load_gradio_settings(email: str) -> Dict[str, Any]:
355
  "default_data_volume": "Medium (10K rows)",
356
  "default_warehouse": "COMPUTE_WH",
357
  "default_database": "DEMO_DB",
 
358
 
359
  # ThoughtSpot Connection
360
  "thoughtspot_url": "",
 
355
  "default_data_volume": "Medium (10K rows)",
356
  "default_warehouse": "COMPUTE_WH",
357
  "default_database": "DEMO_DB",
358
+ "naming_prefix": "", # Object naming prefix
359
 
360
  # ThoughtSpot Connection
361
  "thoughtspot_url": "",
thoughtspot_deployer.py CHANGED
@@ -84,13 +84,18 @@ class ThoughtSpotDeployer:
84
 
85
  return private_key_raw
86
 
87
- def authenticate(self) -> bool:
88
- """Authenticate with ThoughtSpot"""
 
 
 
 
89
  try:
90
  auth_url = f"{self.base_url}/api/rest/2.0/auth/session/login"
91
- print(f" πŸ” Attempting authentication to: {auth_url}")
92
- print(f" πŸ‘€ Username: {self.username}")
93
-
 
94
  response = self.session.post(
95
  auth_url,
96
  json={
@@ -98,31 +103,38 @@ class ThoughtSpotDeployer:
98
  "password": self.password
99
  }
100
  )
101
-
102
- print(f" πŸ“‘ HTTP Status: {response.status_code}")
103
-
 
104
  if response.status_code in [200, 204]: # Accept both 200 and 204 as success
105
  if response.status_code == 200:
106
  # Extract token from JSON response
107
  result = response.json()
108
- print(f" βœ… Response: {json.dumps(result, indent=2)}")
 
109
  if 'token' in result:
110
  self.headers['Authorization'] = f'Bearer {result["token"]}'
111
- print(" 🎟️ Token extracted successfully")
 
112
  return True
113
  else:
114
- print(" ❌ No token in response")
 
115
  elif response.status_code == 204:
116
  # HTTP 204 No Content - authentication successful, use session cookies
117
- print(" βœ… Authentication successful (HTTP 204 - using session cookies)")
 
118
  # For 204, we use session cookies instead of bearer token
119
  return True
120
  else:
121
- print(f" ❌ HTTP Error {response.status_code}: {response.text}")
 
122
  return False
123
-
124
  except Exception as e:
125
- print(f" πŸ’₯ Authentication exception: {e}")
 
126
  return False
127
 
128
  def parse_ddl(self, ddl: str) -> Tuple[Dict, List]:
@@ -318,33 +330,63 @@ class ThoughtSpotDeployer:
318
  joins = []
319
  table_name_upper = table_name.upper()
320
  table_cols = [col['name'].upper() for col in columns]
321
-
 
322
  # Find foreign key relationships
323
  for col_name in table_cols:
324
- if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
325
- # This looks like a foreign key - find the target table
326
- potential_target = col_name[:-2] + 'S' # CUSTOMERID -> CUSTOMERS
327
-
328
- # Check if target table exists in THIS deployment AND it's not the same table
329
- # IMPORTANT: Only create joins to tables in the same schema/connection
330
- available_tables_upper = [t.upper() for t in all_tables.keys()]
331
- if (potential_target in available_tables_upper and
332
- potential_target != table_name_upper):
333
- constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
334
- join_def = {
335
- 'name': constraint_id,
336
- 'destination': {
337
- 'name': potential_target
338
- },
339
- 'on': f"[{table_name_upper}::{col_name}] = [{potential_target}::{col_name}]",
340
- 'type': 'INNER'
341
- }
342
- joins.append(join_def)
343
- print(f" πŸ”— Generated join: {table_name_upper} -> {potential_target} on {col_name}")
344
- else:
345
- if potential_target not in available_tables_upper and potential_target != table_name_upper:
346
- print(f" ⏭️ Skipping join: {table_name_upper}.{col_name} -> {potential_target} (table not in this deployment)")
347
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  return joins
349
 
350
  def create_connection_tml(self, connection_name: str) -> str:
@@ -503,21 +545,62 @@ class ThoughtSpotDeployer:
503
  return True
504
  return False
505
 
506
- def _is_surrogate_primary_key(self, col: Dict, col_name: str) -> bool:
507
- """Check if column is a meaningless surrogate key (numeric ID)"""
508
- # Common patterns: ID, _ID, ID_, ends with 'id'
509
- if col_name.upper().endswith('ID'):
510
- # Check if it's numeric (INT, BIGINT, NUMBER)
511
- col_type = col.get('type', '').upper()
512
- if any(t in col_type for t in ['INT', 'NUMBER', 'NUMERIC', 'BIGINT']):
513
- return True
514
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
 
516
  def _create_model_with_constraints(self, tables: Dict, foreign_keys: List, table_guids: Dict,
517
  table_constraints: Dict, model_name: str, connection_name: str) -> str:
518
  """Generate model TML with constraint references like our successful test"""
519
  print(" πŸ“‹ Creating model with constraint references")
520
-
521
  # Build column name conflict tracking
522
  column_name_counts = {}
523
  for table_name, columns in tables.items():
@@ -526,7 +609,83 @@ class ThoughtSpotDeployer:
526
  if col_name not in column_name_counts:
527
  column_name_counts[col_name] = []
528
  column_name_counts[col_name].append(table_name.upper())
529
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  model = {
531
  'guid': None,
532
  'model': {
@@ -543,28 +702,28 @@ class ThoughtSpotDeployer:
543
  }
544
  }
545
  }
546
-
547
- # Add model_tables with FQNs and constraint-based joins
548
  for table_name in tables.keys():
549
  table_name_upper = table_name.upper()
550
  table_guid = table_guids.get(table_name_upper)
551
-
552
  table_entry = {
553
  'name': table_name_upper,
554
  'fqn': table_guid
555
  }
556
-
557
- # Add joins if this table has constraints
558
- if table_name_upper in table_constraints and table_constraints[table_name_upper]:
559
  table_entry['joins'] = []
560
- for constraint in table_constraints[table_name_upper]:
561
  join_entry = {
562
- 'with': constraint['destination'],
563
- 'referencing_join': constraint['constraint_id']
564
  }
565
  table_entry['joins'].append(join_entry)
566
- print(f" πŸ”— Added join: {table_name_upper} -> {constraint['destination']}")
567
-
568
  model['model']['model_tables'].append(table_entry)
569
 
570
  # Add columns with proper global conflict resolution (same as working version)
@@ -581,7 +740,8 @@ class ThoughtSpotDeployer:
581
  continue
582
 
583
  # SKIP surrogate primary keys (numeric IDs) - nobody searches "customer 23455"
584
- if self._is_surrogate_primary_key(col, col_name):
 
585
  print(f" ⏭️ Skipping surrogate PK: {table_name_upper}.{col_name}")
586
  continue
587
 
@@ -671,18 +831,65 @@ class ThoughtSpotDeployer:
671
  for col in columns:
672
  col_name = col['name'].upper()
673
 
674
- # Check if this looks like a foreign key (ends with ID but isn't the table's own ID)
675
- if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
676
- # Infer the target table name (CUSTOMERID -> CUSTOMERS, LOCATIONID -> LOCATIONS)
677
- potential_target = col_name[:-2] + 'S'
 
 
 
 
 
 
 
 
 
 
 
678
 
679
- # Check if the target table exists in this schema
680
- if potential_target not in table_names_upper and potential_target != table_name_upper:
681
- warnings.append(
682
- f"⚠️ {table_name}.{col_name} appears to reference {potential_target}, "
683
- f"but {potential_target} is not in this schema. "
684
- f"The join will be skipped during deployment."
685
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
686
 
687
  return warnings
688
 
@@ -1092,41 +1299,50 @@ class ThoughtSpotDeployer:
1092
  print(f" ⚠️ Could not create schema: {e}")
1093
  print(f" πŸ“ Will proceed assuming schema exists or will be created by table operations")
1094
 
1095
- def _generate_demo_names(self, company_name: str = None, use_case: str = None):
1096
- """Generate standardized demo names using DM convention"""
 
 
 
 
 
 
 
 
 
1097
  from datetime import datetime
1098
  import re
1099
-
1100
- # Get timestamp components
1101
  now = datetime.now()
1102
- yymmdd = now.strftime('%y%m%d')
1103
- hhmmss = now.strftime('%H%M%S')
1104
-
1105
- # Clean and truncate company name (5 chars)
1106
- if company_name:
1107
- company_clean = re.sub(r'[^a-zA-Z0-9]', '', company_name.upper())[:5]
1108
  else:
1109
- company_clean = 'DEMO'[:5]
1110
-
1111
- # Clean and truncate use case (3 chars)
1112
- if use_case:
1113
- usecase_clean = re.sub(r'[^a-zA-Z0-9]', '', use_case.upper())[:3]
 
 
 
 
 
1114
  else:
1115
- usecase_clean = 'GEN'[:3]
1116
-
1117
- # Generate names
1118
- base_name = f"DM{yymmdd}_{hhmmss}_{company_clean}_{usecase_clean}"
1119
-
1120
  return {
1121
- 'schema': base_name,
1122
- 'connection': f"{base_name}_conn",
1123
- 'model': f"{base_name}_model",
1124
- 'base': base_name
 
1125
  }
1126
 
1127
- def deploy_all(self, ddl: str, database: str, schema: str,
1128
- connection_name: str = None, company_name: str = None,
1129
- use_case: str = None, progress_callback=None) -> Dict:
1130
  """
1131
  Deploy complete data model to ThoughtSpot
1132
 
@@ -1148,12 +1364,29 @@ class ThoughtSpotDeployer:
1148
  }
1149
 
1150
  table_guids = {} # Store table GUIDs for model creation
1151
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152
  def log_progress(message):
1153
- """Helper to log progress both to console and callback"""
1154
  print(message, flush=True)
1155
  if progress_callback:
1156
  progress_callback(message)
 
 
 
1157
 
1158
  try:
1159
  # STEP 0: Authenticate first!
@@ -1179,10 +1412,20 @@ class ThoughtSpotDeployer:
1179
  else:
1180
  log_progress("βœ… All foreign key references are valid\n")
1181
 
1182
- # Step 1: Create connection using new naming convention
1183
- demo_names = self._generate_demo_names(company_name, use_case)
 
 
 
 
 
 
 
 
 
 
1184
  if not connection_name:
1185
- connection_name = demo_names['connection']
1186
 
1187
  log_progress("1️⃣ Checking/Creating connection...")
1188
  log_progress(f" Connection name: {connection_name}")
@@ -1276,111 +1519,130 @@ class ThoughtSpotDeployer:
1276
  log_progress("\n2️⃣ Creating tables...")
1277
 
1278
  # PHASE 1: Create all tables WITHOUT joins (to ensure all tables exist first)
1279
- log_progress(" πŸ“‹ Phase 1: Creating tables without joins...")
1280
- for table_name, columns in tables.items():
1281
- import time
1282
- start_time = time.time()
1283
- log_progress(f" πŸ”„ Creating table: {table_name.upper()} (no joins)...")
 
 
1284
 
1285
- # Create table TML WITHOUT joins_with section (pass None for all_tables)
1286
  tml_start = time.time()
1287
  table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
1288
  tml_time = time.time() - tml_start
1289
- log_progress(f" πŸ“ TML generation took: {tml_time:.2f} seconds")
 
 
1290
 
1291
- # Log the size of the TML
1292
- log_progress(f" πŸ“ TML size: {len(table_tml)} characters, {len(columns)} columns")
1293
-
1294
- # Make the API call
1295
- api_start = time.time()
1296
- log_progress(f" 🌐 Sending to ThoughtSpot API...")
1297
- response = self.session.post(
1298
- f"{self.base_url}/api/rest/2.0/metadata/tml/import",
1299
- json={
1300
- "metadata_tmls": [table_tml],
1301
- "import_policy": "PARTIAL",
1302
- "create_new": True
1303
- }
1304
- )
1305
- api_time = time.time() - api_start
1306
- log_progress(f" ⏱️ API call took: {api_time:.2f} seconds")
1307
 
1308
- if response.status_code == 200:
1309
- result = response.json()
 
1310
 
1311
- # Handle both response formats (list or dict with 'object' key)
1312
- if isinstance(result, list):
1313
- objects = result
1314
- elif isinstance(result, dict) and 'object' in result:
1315
- objects = result['object']
1316
- else:
1317
- error = f"Table {table_name} failed: Unexpected response format: {type(result)}"
1318
- log_progress(f" ❌ {error}")
1319
- results['errors'].append(error)
1320
- continue
1321
 
1322
- if objects and len(objects) > 0:
1323
- obj = objects[0]
 
 
1324
  if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
1325
  table_guid = obj.get('response', {}).get('header', {}).get('id_guid')
1326
- total_time = time.time() - start_time
1327
- log_progress(f" βœ… Table created: {table_name.upper()} (Total time: {total_time:.2f} seconds)")
1328
- log_progress(f" GUID: {table_guid}")
1329
- results['tables'].append(table_name.upper())
1330
- table_guids[table_name.upper()] = table_guid
1331
  else:
1332
- error = f"Table {table_name} failed: {obj.get('response', {}).get('status', {}).get('error_message')}"
 
1333
  log_progress(f" ❌ {error}")
1334
  results['errors'].append(error)
1335
- # DON'T return - continue creating other tables
1336
- else:
1337
- error = f"Table {table_name} failed: No object in response"
1338
- log_progress(f" ❌ {error}")
1339
- results['errors'].append(error)
1340
  else:
1341
- error = f"Table {table_name} HTTP error: {response.status_code} - {response.text}"
1342
  log_progress(f" ❌ {error}")
1343
  results['errors'].append(error)
 
 
 
 
 
 
 
1344
 
1345
  # Check if we created any tables successfully
1346
  if not table_guids:
1347
  log_progress(" ❌ No tables were created successfully in Phase 1")
1348
  return results
1349
 
1350
- log_progress(f" βœ… Phase 1 complete: {len(table_guids)} tables created")
 
 
 
 
 
 
 
 
1351
 
1352
- # PHASE 2: Update tables WITH joins (now that all tables exist)
1353
- log_progress("\n πŸ“‹ Phase 2: Adding joins to tables...")
1354
  for table_name, columns in tables.items():
1355
- # Only add joins if the table was created successfully in Phase 1
1356
  table_name_upper = table_name.upper()
1357
  if table_name_upper not in table_guids:
1358
  log_progress(f" ⏭️ Skipping joins for {table_name_upper} (table creation failed)")
1359
  continue
1360
 
1361
- # Get the GUID for this table
1362
  table_guid = table_guids[table_name_upper]
1363
 
1364
- log_progress(f" πŸ”— Adding joins to: {table_name_upper}...")
1365
  # Create table TML WITH joins_with section AND the table GUID
1366
  table_tml = self.create_table_tml(
1367
  table_name, columns, connection_name, database, schema,
1368
  all_tables=tables, table_guid=table_guid
1369
  )
 
 
 
 
 
 
 
1370
 
1371
  response = self.session.post(
1372
  f"{self.base_url}/api/rest/2.0/metadata/tml/import",
1373
  json={
1374
- "metadata_tmls": [table_tml],
1375
  "import_policy": "PARTIAL",
1376
- "create_new": False # Update existing table
1377
  }
1378
  )
1379
 
 
 
 
1380
  if response.status_code == 200:
1381
  result = response.json()
1382
 
1383
- # Handle both response formats (list or dict with 'object' key)
1384
  if isinstance(result, list):
1385
  objects = result
1386
  elif isinstance(result, dict) and 'object' in result:
@@ -1389,21 +1651,23 @@ class ThoughtSpotDeployer:
1389
  log_progress(f" ⚠️ Unexpected response format for joins: {type(result)}")
1390
  objects = []
1391
 
 
1392
  if objects and len(objects) > 0:
1393
- obj = objects[0]
1394
- if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
1395
- log_progress(f" βœ… Joins added: {table_name.upper()}")
1396
- else:
1397
- error = f"Adding joins to {table_name} failed: {obj.get('response', {}).get('status', {}).get('error_message')}"
1398
- log_progress(f" ⚠️ {error}")
1399
- results['errors'].append(error)
1400
- # Don't fail - table still exists without joins
1401
  else:
1402
- log_progress(f" ⚠️ Could not add joins to {table_name.upper()}")
1403
  else:
1404
- log_progress(f" ⚠️ HTTP error adding joins to {table_name.upper()}: {response.status_code}")
 
1405
 
1406
- log_progress(f" βœ… Phase 2 complete: Joins processed for all tables")
1407
  actual_constraint_ids = {} # We'll generate these for the model
1408
 
1409
  # Skip separate relationship creation for now
@@ -1412,8 +1676,9 @@ class ThoughtSpotDeployer:
1412
 
1413
  # Step 3: Extract constraint IDs from created tables
1414
  log_progress("\n2️⃣.5 Extracting constraint IDs from created tables...")
 
1415
  table_constraints = {}
1416
-
1417
  for table_name, table_guid in table_guids.items():
1418
  log_progress(f" πŸ” Getting constraint IDs for {table_name}...")
1419
 
@@ -1446,18 +1711,32 @@ class ThoughtSpotDeployer:
1446
  'destination': destination
1447
  })
1448
  log_progress(f" πŸ”— Found join: {constraint_id} -> {destination}")
1449
-
 
 
1450
  log_progress(f" βœ… Extracted constraints from {len(table_constraints)} tables")
1451
-
1452
  # Step 4: Create model (semantic layer) with constraint references
1453
  log_progress("\n3️⃣ Creating model (semantic layer) with joins...")
1454
- # Use the demo_names that were generated earlier
1455
- model_name = demo_names['model']
 
1456
  log_progress(f" Model name: {model_name}")
1457
 
1458
  # Use the enhanced model creation that includes constraint references
1459
  model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
1460
  print(f"\nπŸ“„ Model TML being sent:\n{model_tml}")
 
 
 
 
 
 
 
 
 
 
 
1461
 
1462
  response = self.session.post(
1463
  f"{self.base_url}/api/rest/2.0/metadata/tml/import",
@@ -1467,7 +1746,9 @@ class ThoughtSpotDeployer:
1467
  "create_new": True
1468
  }
1469
  )
1470
-
 
 
1471
  if response.status_code == 200:
1472
  result = response.json()
1473
 
@@ -1485,6 +1766,8 @@ class ThoughtSpotDeployer:
1485
  if objects and len(objects) > 0:
1486
  if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
1487
  model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
 
 
1488
  log_progress(f" βœ… Model created successfully!")
1489
  log_progress(f" Model: {model_name}")
1490
  log_progress(f" GUID: {model_guid}")
@@ -1546,13 +1829,39 @@ class ThoughtSpotDeployer:
1546
  traceback.print_exc()
1547
  else:
1548
  print(f"πŸ“‹ Full model response: {objects}") # DEBUG: Show full response
1549
- error = f"Model failed: {objects[0].get('response', {}).get('status', {}).get('error_message')}"
1550
- print(f" ❌ {error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1551
  results['errors'].append(error)
1552
  else:
1553
  error = "Model failed: No objects in response"
1554
  log_progress(f" ❌ {error}")
1555
  results['errors'].append(error)
 
 
 
 
 
 
 
 
 
 
 
1556
 
1557
  # Mark as successful if we got this far
1558
  results['success'] = len(results['errors']) == 0
@@ -1560,29 +1869,43 @@ class ThoughtSpotDeployer:
1560
  except Exception as e:
1561
  error_msg = str(e)
1562
  print(f"❌ Deployment failed: {error_msg}")
 
1563
  results['errors'].append(error_msg)
1564
-
 
 
 
 
 
 
 
 
 
 
1565
  return results
1566
 
1567
 
1568
- def deploy_to_thoughtspot(ddl: str, database: str, schema: str,
1569
- connection_name: str = None, company_name: str = None,
1570
- use_case: str = None, progress_callback=None) -> Dict:
1571
  """
1572
  Convenience function for deploying to ThoughtSpot
1573
-
1574
  Args:
1575
  ddl: Data Definition Language statements
1576
  database: Target database name
1577
  schema: Target schema name
1578
  connection_name: Optional connection name
 
 
 
1579
  progress_callback: Optional callback for progress updates
1580
-
1581
  Returns:
1582
  Dict with deployment results
1583
  """
1584
  deployer = ThoughtSpotDeployer()
1585
- return deployer.deploy_all(ddl, database, schema, connection_name, company_name, use_case, progress_callback)
1586
 
1587
 
1588
  if __name__ == "__main__":
 
84
 
85
  return private_key_raw
86
 
87
+ def authenticate(self, verbose: bool = False) -> bool:
88
+ """Authenticate with ThoughtSpot
89
+
90
+ Args:
91
+ verbose: If True, print detailed authentication info. Default False for clean output.
92
+ """
93
  try:
94
  auth_url = f"{self.base_url}/api/rest/2.0/auth/session/login"
95
+ if verbose:
96
+ print(f" πŸ” Attempting authentication to: {auth_url}")
97
+ print(f" πŸ‘€ Username: {self.username}")
98
+
99
  response = self.session.post(
100
  auth_url,
101
  json={
 
103
  "password": self.password
104
  }
105
  )
106
+
107
+ if verbose:
108
+ print(f" πŸ“‘ HTTP Status: {response.status_code}")
109
+
110
  if response.status_code in [200, 204]: # Accept both 200 and 204 as success
111
  if response.status_code == 200:
112
  # Extract token from JSON response
113
  result = response.json()
114
+ if verbose:
115
+ print(f" βœ… Response: {json.dumps(result, indent=2)}")
116
  if 'token' in result:
117
  self.headers['Authorization'] = f'Bearer {result["token"]}'
118
+ if verbose:
119
+ print(" 🎟️ Token extracted successfully")
120
  return True
121
  else:
122
+ if verbose:
123
+ print(" ❌ No token in response")
124
  elif response.status_code == 204:
125
  # HTTP 204 No Content - authentication successful, use session cookies
126
+ if verbose:
127
+ print(" βœ… Authentication successful (HTTP 204 - using session cookies)")
128
  # For 204, we use session cookies instead of bearer token
129
  return True
130
  else:
131
+ if verbose:
132
+ print(f" ❌ HTTP Error {response.status_code}: {response.text}")
133
  return False
134
+
135
  except Exception as e:
136
+ if verbose:
137
+ print(f" πŸ’₯ Authentication exception: {e}")
138
  return False
139
 
140
  def parse_ddl(self, ddl: str) -> Tuple[Dict, List]:
 
330
  joins = []
331
  table_name_upper = table_name.upper()
332
  table_cols = [col['name'].upper() for col in columns]
333
+ available_tables_upper = [t.upper() for t in all_tables.keys()]
334
+
335
  # Find foreign key relationships
336
  for col_name in table_cols:
337
+ if not col_name.endswith('ID'):
338
+ continue
339
+
340
+ # Extract the base name from the column (CUSTOMER_ID -> CUSTOMER, CUSTOMERID -> CUSTOMER)
341
+ if col_name.endswith('_ID'):
342
+ col_base = col_name[:-3] # Remove _ID
343
+ else:
344
+ col_base = col_name[:-2] # Remove ID
345
+
346
+ # Check if this is the table's own primary key
347
+ if table_name_upper.endswith('S') and len(table_name_upper) > 1:
348
+ table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER, SALES_TRANSACTIONS -> SALES_TRANSACTION
349
+ else:
350
+ table_base = table_name_upper
351
+
352
+ table_base_alt = table_name_upper.rstrip('S')
353
+
354
+ # Skip if this is the table's own PK
355
+ is_own_pk = (col_base == table_name_upper or
356
+ col_base == table_base or
357
+ col_base == table_base_alt)
358
+
359
+ if is_own_pk:
360
+ continue
361
+
362
+ # Try to find the target table (CUSTOMER -> CUSTOMERS)
363
+ possible_targets = [
364
+ col_base + 'S', # CUSTOMER -> CUSTOMERS
365
+ col_base + 'ES', # Less common
366
+ col_base # Already plural
367
+ ]
368
+
369
+ found_target = None
370
+ for potential_target in possible_targets:
371
+ if potential_target in available_tables_upper and potential_target != table_name_upper:
372
+ found_target = potential_target
373
+ break
374
+
375
+ if found_target:
376
+ constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
377
+ join_def = {
378
+ 'name': constraint_id,
379
+ 'destination': {
380
+ 'name': found_target
381
+ },
382
+ 'on': f"[{table_name_upper}::{col_name}] = [{found_target}::{col_name}]",
383
+ 'type': 'INNER'
384
+ }
385
+ joins.append(join_def)
386
+ print(f" πŸ”— Generated join: {table_name_upper} -> {found_target} on {col_name}")
387
+ else:
388
+ print(f" ⏭️ Skipping join: {table_name_upper}.{col_name} -> {possible_targets[0]} (table not in this deployment)")
389
+
390
  return joins
391
 
392
  def create_connection_tml(self, connection_name: str) -> str:
 
545
  return True
546
  return False
547
 
548
+ def _is_surrogate_primary_key(self, col: Dict, col_name: str, table_name: str = None) -> bool:
549
+ """Check if column is a meaningless surrogate key (numeric ID) for THIS table.
550
+
551
+ Foreign key columns (like CUSTOMER_ID in SALES_TRANSACTIONS) should NOT be
552
+ treated as surrogate keys - they're important for joins.
553
+ """
554
+ col_name_upper = col_name.upper()
555
+
556
+ # Must end with ID and be numeric
557
+ if not col_name_upper.endswith('ID'):
558
+ return False
559
+
560
+ col_type = col.get('type', '').upper()
561
+ if not any(t in col_type for t in ['INT', 'NUMBER', 'NUMERIC', 'BIGINT']):
562
+ return False
563
+
564
+ # If we don't have table context, be conservative
565
+ if not table_name:
566
+ return False
567
+
568
+ table_name_upper = table_name.upper()
569
+
570
+ # Extract the base name from the column (CUSTOMER_ID -> CUSTOMER, CUSTOMERID -> CUSTOMER)
571
+ if col_name_upper.endswith('_ID'):
572
+ col_base = col_name_upper[:-3] # Remove _ID
573
+ else:
574
+ col_base = col_name_upper[:-2] # Remove ID
575
+
576
+ # Extract the base name from the table (CUSTOMERS -> CUSTOMER, SALES_TRANSACTIONS -> SALES_TRANSACTION)
577
+ # Handle plural table names
578
+ if table_name_upper.endswith('S') and len(table_name_upper) > 1:
579
+ table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER
580
+ else:
581
+ table_base = table_name_upper
582
+
583
+ # Also try removing all trailing 'S' characters for cases like SALES -> SALE
584
+ table_base_alt = table_name_upper.rstrip('S')
585
+
586
+ # It's a surrogate PK only if the column name matches the table name
587
+ # Examples:
588
+ # CUSTOMER_ID in CUSTOMERS table -> surrogate PK (skip)
589
+ # CUSTOMER_ID in SALES_TRANSACTIONS table -> foreign key (DO NOT skip)
590
+ is_own_pk = (col_base == table_name_upper or
591
+ col_base == table_base or
592
+ col_base == table_base_alt or
593
+ col_name_upper == f"{table_name_upper}_ID" or
594
+ col_name_upper == f"{table_base}_ID" or
595
+ col_name_upper == f"{table_base_alt}_ID")
596
+
597
+ return is_own_pk
598
 
599
  def _create_model_with_constraints(self, tables: Dict, foreign_keys: List, table_guids: Dict,
600
  table_constraints: Dict, model_name: str, connection_name: str) -> str:
601
  """Generate model TML with constraint references like our successful test"""
602
  print(" πŸ“‹ Creating model with constraint references")
603
+
604
  # Build column name conflict tracking
605
  column_name_counts = {}
606
  for table_name, columns in tables.items():
 
609
  if col_name not in column_name_counts:
610
  column_name_counts[col_name] = []
611
  column_name_counts[col_name].append(table_name.upper())
612
+
613
+ # INFER JOINS: Build joins from foreign key column names
614
+ # Structure: {source_table: [{target: target_table, constraint_id: ID}, ...]}
615
+ inferred_joins = {}
616
+ table_names_upper = [t.upper() for t in tables.keys()]
617
+
618
+ for table_name, columns in tables.items():
619
+ table_name_upper = table_name.upper()
620
+
621
+ for col in columns:
622
+ col_name = col['name'].upper()
623
+
624
+ # Check if this looks like a foreign key (ends with ID or _ID but isn't the table's own ID)
625
+ if col_name.endswith('ID'):
626
+ # Infer the target table name (CUSTOMERID -> CUSTOMERS, CUSTOMER_ID -> CUSTOMERS)
627
+ if col_name.endswith('_ID'):
628
+ col_base = col_name[:-3] # CUSTOMER_ID -> CUSTOMER
629
+ else:
630
+ col_base = col_name[:-2] # CUSTOMERID -> CUSTOMER
631
+
632
+ # Check if this is the table's own primary key
633
+ # Handle plural table names: CUSTOMERS has CUSTOMER_ID, not CUSTOMERS_ID
634
+ if table_name_upper.endswith('S') and len(table_name_upper) > 1:
635
+ table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER
636
+ else:
637
+ table_base = table_name_upper
638
+
639
+ table_base_alt = table_name_upper.rstrip('S') # Alternative: strip all trailing S
640
+
641
+ is_self_reference = (col_base == table_name_upper or
642
+ col_base == table_base or
643
+ col_base == table_base_alt)
644
+
645
+ # Skip if this is the table's own PK
646
+ if is_self_reference:
647
+ continue
648
+
649
+ # Try multiple potential target table names
650
+ potential_targets = [
651
+ col_base + 'S', # CUSTOMER -> CUSTOMERS
652
+ col_base + 'ES', # Less common but possible
653
+ col_base # Already plural
654
+ ]
655
+
656
+ # Find the first matching target table
657
+ found_target = None
658
+ for potential_target in potential_targets:
659
+ if potential_target in table_names_upper and potential_target != table_name_upper:
660
+ found_target = potential_target
661
+ break
662
+
663
+ # Create join if we found a valid target
664
+ if found_target:
665
+ if table_name_upper not in inferred_joins:
666
+ inferred_joins[table_name_upper] = []
667
+
668
+ # Look up the REAL constraint ID from table_constraints first
669
+ constraint_id = None
670
+ if table_name_upper in table_constraints:
671
+ for constraint in table_constraints[table_name_upper]:
672
+ if constraint['destination'] == found_target:
673
+ constraint_id = constraint['constraint_id']
674
+ print(f" βœ… Using real constraint ID from ThoughtSpot: {constraint_id}")
675
+ break
676
+
677
+ # Fallback: generate synthetic ID if not found (shouldn't happen if Phase 2 worked)
678
+ if not constraint_id:
679
+ constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
680
+ print(f" ⚠️ Generated synthetic constraint ID (real ID not found): {constraint_id[:40]}...")
681
+
682
+ inferred_joins[table_name_upper].append({
683
+ 'target': found_target,
684
+ 'constraint_id': constraint_id,
685
+ 'source_column': col_name
686
+ })
687
+ print(f" πŸ”— Inferred join: {table_name_upper}.{col_name} -> {found_target}")
688
+
689
  model = {
690
  'guid': None,
691
  'model': {
 
702
  }
703
  }
704
  }
705
+
706
+ # Add model_tables with FQNs and inferred joins
707
  for table_name in tables.keys():
708
  table_name_upper = table_name.upper()
709
  table_guid = table_guids.get(table_name_upper)
710
+
711
  table_entry = {
712
  'name': table_name_upper,
713
  'fqn': table_guid
714
  }
715
+
716
+ # Add joins from inferred relationships
717
+ if table_name_upper in inferred_joins and inferred_joins[table_name_upper]:
718
  table_entry['joins'] = []
719
+ for join_info in inferred_joins[table_name_upper]:
720
  join_entry = {
721
+ 'with': join_info['target'],
722
+ 'referencing_join': join_info['constraint_id']
723
  }
724
  table_entry['joins'].append(join_entry)
725
+ print(f" βœ… Added join to model: {table_name_upper} -> {join_info['target']}")
726
+
727
  model['model']['model_tables'].append(table_entry)
728
 
729
  # Add columns with proper global conflict resolution (same as working version)
 
740
  continue
741
 
742
  # SKIP surrogate primary keys (numeric IDs) - nobody searches "customer 23455"
743
+ # Pass table_name to distinguish PKs from FKs
744
+ if self._is_surrogate_primary_key(col, col_name, table_name_upper):
745
  print(f" ⏭️ Skipping surrogate PK: {table_name_upper}.{col_name}")
746
  continue
747
 
 
831
  for col in columns:
832
  col_name = col['name'].upper()
833
 
834
+ # Check if this looks like a foreign key (ends with ID or _ID but isn't the table's own ID)
835
+ if not col_name.endswith('ID'):
836
+ continue
837
+
838
+ # Extract the base name from the column
839
+ if col_name.endswith('_ID'):
840
+ col_base = col_name[:-3] # CUSTOMER_ID -> CUSTOMER
841
+ else:
842
+ col_base = col_name[:-2] # CUSTOMERID -> CUSTOMER
843
+
844
+ # Extract the base name from the table to check if this is the table's own PK
845
+ if table_name_upper.endswith('S') and len(table_name_upper) > 1:
846
+ table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER
847
+ else:
848
+ table_base = table_name_upper
849
 
850
+ table_base_alt = table_name_upper.rstrip('S')
851
+
852
+ # For compound names like SALES_TRANSACTIONS, also check the last word
853
+ # SALES_TRANSACTIONS -> TRANSACTION
854
+ if '_' in table_name_upper:
855
+ last_part = table_name_upper.split('_')[-1]
856
+ if last_part.endswith('S'):
857
+ table_last_part = last_part[:-1]
858
+ else:
859
+ table_last_part = last_part
860
+ else:
861
+ table_last_part = None
862
+
863
+ # Skip if this is the table's own primary key
864
+ is_own_pk = (col_base == table_name_upper or
865
+ col_base == table_base or
866
+ col_base == table_base_alt or
867
+ (table_last_part and col_base == table_last_part))
868
+
869
+ if is_own_pk:
870
+ continue
871
+
872
+ # Try to find the target table (CUSTOMER -> CUSTOMERS)
873
+ # Try multiple plural forms
874
+ possible_targets = [
875
+ col_base + 'S', # CUSTOMER -> CUSTOMERS
876
+ col_base + 'ES', # LOCATION -> LOCATIONES (unlikely but possible)
877
+ col_base # Already plural or no 'S' form
878
+ ]
879
+
880
+ found_target = None
881
+ for target in possible_targets:
882
+ if target in table_names_upper and target != table_name_upper:
883
+ found_target = target
884
+ break
885
+
886
+ # Only warn if we can't find ANY matching table
887
+ if not found_target:
888
+ warnings.append(
889
+ f"⚠️ {table_name}.{col_name} appears to reference {possible_targets[0]}, "
890
+ f"but {possible_targets[0]} is not in this schema. "
891
+ f"The join will be skipped during deployment."
892
+ )
893
 
894
  return warnings
895
 
 
1299
  print(f" ⚠️ Could not create schema: {e}")
1300
  print(f" πŸ“ Will proceed assuming schema exists or will be created by table operations")
1301
 
1302
+ def _generate_demo_names(self, company_name: str = None, use_case: str = None, naming_prefix: str = None):
1303
+ """Generate standardized demo names using consistent format
1304
+
1305
+ Args:
1306
+ naming_prefix: Optional custom prefix
1307
+ Format: {prefix}_{day+ms}_{objtype}
1308
+ Example with prefix: BOONE_14123456_scm
1309
+ Example blank prefix: _14123456_scm
1310
+
1311
+ Note: company_name and use_case parameters deprecated but kept for compatibility
1312
+ """
1313
  from datetime import datetime
1314
  import re
1315
+
 
1316
  now = datetime.now()
1317
+
1318
+ # Clean prefix (uppercase, alphanumeric only) - can be empty string
1319
+ if naming_prefix:
1320
+ prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
 
 
1321
  else:
1322
+ prefix_clean = ""
1323
+
1324
+ # Short unique timestamp: day (2 digits) + microseconds (6 digits)
1325
+ day = now.strftime('%d')
1326
+ microsec = now.strftime('%f') # 6 digits
1327
+ short_timestamp = f"{day}{microsec}"
1328
+
1329
+ # Base format: PREFIX_DDMICROS (prefix can be empty)
1330
+ if prefix_clean:
1331
+ base = f"{prefix_clean}_{short_timestamp}"
1332
  else:
1333
+ base = short_timestamp # Just timestamp if no prefix
1334
+
 
 
 
1335
  return {
1336
+ 'schema': f"{base}_scm", # Schema
1337
+ 'connection': f"{base}_conn", # Connection
1338
+ 'model': f"{base}_mdl", # Model
1339
+ 'table': f"{base}_tbl", # Table (if needed)
1340
+ 'base': base
1341
  }
1342
 
1343
+ def deploy_all(self, ddl: str, database: str, schema: str,
1344
+ connection_name: str = None, company_name: str = None,
1345
+ use_case: str = None, naming_prefix: str = None, progress_callback=None) -> Dict:
1346
  """
1347
  Deploy complete data model to ThoughtSpot
1348
 
 
1364
  }
1365
 
1366
  table_guids = {} # Store table GUIDs for model creation
1367
+
1368
+ # Setup deployment log file
1369
+ from pathlib import Path
1370
+ import datetime as dt
1371
+ log_dir = Path("logs") / "deployments"
1372
+ log_dir.mkdir(parents=True, exist_ok=True)
1373
+ timestamp_str = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
1374
+ deployment_log_file = log_dir / f"deployment_{schema}_{timestamp_str}.log"
1375
+ deployment_log = open(deployment_log_file, "w", buffering=1) # Line buffered
1376
+ deployment_log.write(f"=== ThoughtSpot Deployment Log ===\n")
1377
+ deployment_log.write(f"Schema: {schema}\n")
1378
+ deployment_log.write(f"Database: {database}\n")
1379
+ deployment_log.write(f"Timestamp: {dt.datetime.now().isoformat()}\n")
1380
+ deployment_log.write(f"=" * 50 + "\n\n")
1381
+
1382
  def log_progress(message):
1383
+ """Helper to log progress to console, callback, AND file"""
1384
  print(message, flush=True)
1385
  if progress_callback:
1386
  progress_callback(message)
1387
+ # ALWAYS write to log file
1388
+ deployment_log.write(message + "\n")
1389
+ deployment_log.flush()
1390
 
1391
  try:
1392
  # STEP 0: Authenticate first!
 
1412
  else:
1413
  log_progress("βœ… All foreign key references are valid\n")
1414
 
1415
+ # Step 1: Extract base from schema name to ensure consistency
1416
+ # Schema format: PREFIX_DDMICROS_scm (e.g., AMAZ_14283952_scm)
1417
+ # We need to use the SAME base for all ThoughtSpot objects
1418
+ import re
1419
+ if schema.endswith('_scm'):
1420
+ base_from_schema = schema[:-4] # Remove "_scm" suffix
1421
+ else:
1422
+ # Fallback: generate new names if schema doesn't follow convention
1423
+ demo_names = self._generate_demo_names(company_name, use_case, naming_prefix)
1424
+ base_from_schema = demo_names['base']
1425
+
1426
+ # Use the schema's base for all ThoughtSpot objects
1427
  if not connection_name:
1428
+ connection_name = f"{base_from_schema}_conn"
1429
 
1430
  log_progress("1️⃣ Checking/Creating connection...")
1431
  log_progress(f" Connection name: {connection_name}")
 
1519
  log_progress("\n2️⃣ Creating tables...")
1520
 
1521
  # PHASE 1: Create all tables WITHOUT joins (to ensure all tables exist first)
1522
+ log_progress(" πŸ“‹ Phase 1: Creating ALL tables in one batch (no joins)...")
1523
+ import time
1524
+
1525
+ # Step 1.1: Generate TMLs for all tables
1526
+ batch_start = time.time()
1527
+ table_tmls = []
1528
+ table_names_list = []
1529
 
1530
+ for table_name, columns in tables.items():
1531
  tml_start = time.time()
1532
  table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
1533
  tml_time = time.time() - tml_start
1534
+ table_tmls.append(table_tml)
1535
+ table_names_list.append(table_name.upper())
1536
+ log_progress(f" πŸ“ Generated TML for {table_name.upper()} ({tml_time:.3f}s, {len(table_tml)} chars, {len(columns)} columns)")
1537
 
1538
+ # Step 1.2: Send ALL tables in ONE API call
1539
+ log_progress(f"\n πŸš€ Batch creating {len(table_tmls)} tables...")
1540
+ api_start = time.time()
1541
+ response = self.session.post(
1542
+ f"{self.base_url}/api/rest/2.0/metadata/tml/import",
1543
+ json={
1544
+ "metadata_tmls": table_tmls, # Send ALL tables at once!
1545
+ "import_policy": "PARTIAL",
1546
+ "create_new": True
1547
+ }
1548
+ )
1549
+ api_time = time.time() - api_start
1550
+ total_time = time.time() - batch_start
1551
+ log_progress(f" ⏱️ Phase 1 batch call: {api_time:.2f} seconds")
 
 
1552
 
1553
+ # Step 1.3: Process batch response
1554
+ if response.status_code == 200:
1555
+ result = response.json()
1556
 
1557
+ # Handle both response formats
1558
+ if isinstance(result, list):
1559
+ objects = result
1560
+ elif isinstance(result, dict) and 'object' in result:
1561
+ objects = result['object']
1562
+ else:
1563
+ error = f"Batch table creation failed: Unexpected response format: {type(result)}"
1564
+ log_progress(f" ❌ {error}")
1565
+ results['errors'].append(error)
1566
+ return results
1567
 
1568
+ # Process each table result
1569
+ if objects and len(objects) > 0:
1570
+ for idx, obj in enumerate(objects):
1571
+ table_name = table_names_list[idx] if idx < len(table_names_list) else f"Table_{idx}"
1572
  if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
1573
  table_guid = obj.get('response', {}).get('header', {}).get('id_guid')
1574
+ log_progress(f" βœ… {table_name} created (GUID: {table_guid})")
1575
+ results['tables'].append(table_name)
1576
+ table_guids[table_name] = table_guid
 
 
1577
  else:
1578
+ error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
1579
+ error = f"{table_name} failed: {error_msg}"
1580
  log_progress(f" ❌ {error}")
1581
  results['errors'].append(error)
 
 
 
 
 
1582
  else:
1583
+ error = "Batch table creation failed: No objects in response"
1584
  log_progress(f" ❌ {error}")
1585
  results['errors'].append(error)
1586
+ return results
1587
+ else:
1588
+ error = f"Batch table creation HTTP error: {response.status_code}"
1589
+ log_progress(f" ❌ {error}")
1590
+ log_progress(f" πŸ“„ Response: {response.text[:500]}")
1591
+ results['errors'].append(error)
1592
+ return results
1593
 
1594
  # Check if we created any tables successfully
1595
  if not table_guids:
1596
  log_progress(" ❌ No tables were created successfully in Phase 1")
1597
  return results
1598
 
1599
+ log_progress(f"\n βœ… Phase 1 complete: {len(table_guids)} tables created in {total_time:.2f} seconds")
1600
+
1601
+ # PHASE 2: Update tables WITH joins (now that all tables exist) - BATCH MODE
1602
+ log_progress("\n πŸ“‹ Phase 2: Adding joins to ALL tables in one batch...")
1603
+ phase2_start = time.time()
1604
+
1605
+ # Generate TML for all tables with joins
1606
+ table_tmls_with_joins = []
1607
+ table_names_for_phase2 = []
1608
 
 
 
1609
  for table_name, columns in tables.items():
 
1610
  table_name_upper = table_name.upper()
1611
  if table_name_upper not in table_guids:
1612
  log_progress(f" ⏭️ Skipping joins for {table_name_upper} (table creation failed)")
1613
  continue
1614
 
 
1615
  table_guid = table_guids[table_name_upper]
1616
 
 
1617
  # Create table TML WITH joins_with section AND the table GUID
1618
  table_tml = self.create_table_tml(
1619
  table_name, columns, connection_name, database, schema,
1620
  all_tables=tables, table_guid=table_guid
1621
  )
1622
+ table_tmls_with_joins.append(table_tml)
1623
+ table_names_for_phase2.append(table_name_upper)
1624
+
1625
+ # Single batch API call to update all tables with joins
1626
+ if table_tmls_with_joins:
1627
+ log_progress(f" πŸš€ Batch updating {len(table_tmls_with_joins)} tables with joins...")
1628
+ api_start = time.time()
1629
 
1630
  response = self.session.post(
1631
  f"{self.base_url}/api/rest/2.0/metadata/tml/import",
1632
  json={
1633
+ "metadata_tmls": table_tmls_with_joins,
1634
  "import_policy": "PARTIAL",
1635
+ "create_new": False # Update existing tables
1636
  }
1637
  )
1638
 
1639
+ phase2_api_time = time.time() - api_start
1640
+ log_progress(f" ⏱️ Phase 2 batch call: {phase2_api_time:.2f} seconds")
1641
+
1642
  if response.status_code == 200:
1643
  result = response.json()
1644
 
1645
+ # Handle both response formats
1646
  if isinstance(result, list):
1647
  objects = result
1648
  elif isinstance(result, dict) and 'object' in result:
 
1651
  log_progress(f" ⚠️ Unexpected response format for joins: {type(result)}")
1652
  objects = []
1653
 
1654
+ # Process each table result
1655
  if objects and len(objects) > 0:
1656
+ for idx, obj in enumerate(objects):
1657
+ table_name = table_names_for_phase2[idx] if idx < len(table_names_for_phase2) else f"Table_{idx}"
1658
+ if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
1659
+ log_progress(f" βœ… Joins added: {table_name}")
1660
+ else:
1661
+ error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
1662
+ log_progress(f" ⚠️ Joins failed for {table_name}: {error_msg}")
1663
+ results['errors'].append(f"Adding joins to {table_name} failed: {error_msg}")
1664
  else:
1665
+ log_progress(f" ⚠️ No results returned for join updates")
1666
  else:
1667
+ log_progress(f" ⚠️ HTTP error adding joins: {response.status_code}")
1668
+ log_progress(f" πŸ“„ Response: {response.text[:500]}")
1669
 
1670
+ log_progress(f" βœ… Phase 2 complete: Joins added to all tables in one batch")
1671
  actual_constraint_ids = {} # We'll generate these for the model
1672
 
1673
  # Skip separate relationship creation for now
 
1676
 
1677
  # Step 3: Extract constraint IDs from created tables
1678
  log_progress("\n2️⃣.5 Extracting constraint IDs from created tables...")
1679
+ extract_start = time.time()
1680
  table_constraints = {}
1681
+
1682
  for table_name, table_guid in table_guids.items():
1683
  log_progress(f" πŸ” Getting constraint IDs for {table_name}...")
1684
 
 
1711
  'destination': destination
1712
  })
1713
  log_progress(f" πŸ”— Found join: {constraint_id} -> {destination}")
1714
+
1715
+ extract_time = time.time() - extract_start
1716
+ log_progress(f" ⏱️ Constraint extraction time: {extract_time:.2f} seconds")
1717
  log_progress(f" βœ… Extracted constraints from {len(table_constraints)} tables")
1718
+
1719
  # Step 4: Create model (semantic layer) with constraint references
1720
  log_progress("\n3️⃣ Creating model (semantic layer) with joins...")
1721
+ model_start = time.time()
1722
+ # Use the same base from schema for model name
1723
+ model_name = f"{base_from_schema}_mdl"
1724
  log_progress(f" Model name: {model_name}")
1725
 
1726
  # Use the enhanced model creation that includes constraint references
1727
  model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
1728
  print(f"\nπŸ“„ Model TML being sent:\n{model_tml}")
1729
+
1730
+ # ALWAYS save model TML to logs for debugging
1731
+ from pathlib import Path
1732
+ import datetime as dt
1733
+ log_dir = Path("logs") / "model_tmls"
1734
+ log_dir.mkdir(parents=True, exist_ok=True)
1735
+ timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
1736
+ tml_file = log_dir / f"model_{model_name}_{timestamp}.tml"
1737
+ with open(tml_file, "w") as f:
1738
+ f.write(model_tml)
1739
+ log_progress(f" πŸ“ Model TML saved to: {tml_file}")
1740
 
1741
  response = self.session.post(
1742
  f"{self.base_url}/api/rest/2.0/metadata/tml/import",
 
1746
  "create_new": True
1747
  }
1748
  )
1749
+
1750
+ log_progress(f" πŸ“‘ Model API response status: {response.status_code}")
1751
+
1752
  if response.status_code == 200:
1753
  result = response.json()
1754
 
 
1766
  if objects and len(objects) > 0:
1767
  if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
1768
  model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
1769
+ model_time = time.time() - model_start
1770
+ log_progress(f" ⏱️ Model creation time: {model_time:.2f} seconds")
1771
  log_progress(f" βœ… Model created successfully!")
1772
  log_progress(f" Model: {model_name}")
1773
  log_progress(f" GUID: {model_guid}")
 
1829
  traceback.print_exc()
1830
  else:
1831
  print(f"πŸ“‹ Full model response: {objects}") # DEBUG: Show full response
1832
+ status_obj = objects[0].get('response', {}).get('status', {})
1833
+ error_msg = status_obj.get('error_message', 'Unknown error')
1834
+ status_code = status_obj.get('status_code', 'Unknown')
1835
+ error = f"Model failed: {error_msg} (status: {status_code})"
1836
+ log_progress(f" ❌ {error}")
1837
+ if 'error' in status_obj:
1838
+ log_progress(f" πŸ“‹ Additional error details: {status_obj['error']}")
1839
+
1840
+ # Save full error response to log file
1841
+ error_log_dir = Path("logs") / "model_errors"
1842
+ error_log_dir.mkdir(parents=True, exist_ok=True)
1843
+ error_file = error_log_dir / f"model_error_{model_name}_{timestamp}.json"
1844
+ import json
1845
+ with open(error_file, "w") as f:
1846
+ json.dump(objects, f, indent=2)
1847
+ log_progress(f" πŸ“ Full error saved to: {error_file}")
1848
+
1849
  results['errors'].append(error)
1850
  else:
1851
  error = "Model failed: No objects in response"
1852
  log_progress(f" ❌ {error}")
1853
  results['errors'].append(error)
1854
+ else:
1855
+ # Handle non-200 responses
1856
+ error = f"Model API call failed with status {response.status_code}"
1857
+ log_progress(f" ❌ {error}")
1858
+ log_progress(f" πŸ“„ Response: {response.text[:500]}") # First 500 chars
1859
+ try:
1860
+ error_detail = response.json()
1861
+ log_progress(f" πŸ“‹ Error details: {error_detail}")
1862
+ except:
1863
+ pass
1864
+ results['errors'].append(error)
1865
 
1866
  # Mark as successful if we got this far
1867
  results['success'] = len(results['errors']) == 0
 
1869
  except Exception as e:
1870
  error_msg = str(e)
1871
  print(f"❌ Deployment failed: {error_msg}")
1872
+ deployment_log.write(f"❌ Deployment failed: {error_msg}\n")
1873
  results['errors'].append(error_msg)
1874
+ finally:
1875
+ # Close deployment log file
1876
+ deployment_log.write(f"\n{'='*50}\n")
1877
+ deployment_log.write(f"Deployment completed at: {dt.datetime.now().isoformat()}\n")
1878
+ deployment_log.write(f"Log saved to: {deployment_log_file}\n")
1879
+ deployment_log.close()
1880
+ # Print directly since deployment_log is now closed
1881
+ print(f"\nπŸ“ Full deployment log saved to: {deployment_log_file}")
1882
+ if progress_callback:
1883
+ progress_callback(f"\nπŸ“ Full deployment log saved to: {deployment_log_file}")
1884
+
1885
  return results
1886
 
1887
 
1888
+ def deploy_to_thoughtspot(ddl: str, database: str, schema: str,
1889
+ connection_name: str = None, company_name: str = None,
1890
+ use_case: str = None, naming_prefix: str = None, progress_callback=None) -> Dict:
1891
  """
1892
  Convenience function for deploying to ThoughtSpot
1893
+
1894
  Args:
1895
  ddl: Data Definition Language statements
1896
  database: Target database name
1897
  schema: Target schema name
1898
  connection_name: Optional connection name
1899
+ company_name: Optional company name (used in default naming)
1900
+ use_case: Optional use case (used in default naming)
1901
+ naming_prefix: Optional custom naming prefix (format: {prefix}_{day+ms}_{objtype})
1902
  progress_callback: Optional callback for progress updates
1903
+
1904
  Returns:
1905
  Dict with deployment results
1906
  """
1907
  deployer = ThoughtSpotDeployer()
1908
+ return deployer.deploy_all(ddl, database, schema, connection_name, company_name, use_case, naming_prefix, progress_callback)
1909
 
1910
 
1911
  if __name__ == "__main__":