Spaces:
Sleeping
fix: Implement consistent naming convention across all objects
Browse files- Replace verbose DM{YYMMDD}_{HHMMSS}_{COMPANY}_{USECASE} format
- New format: {PREFIX}_{DDMICROS}_{objtype} (prefix optional)
- Extract base timestamp from schema to ensure consistency across objects
- Add naming_prefix field to UI settings (saved in Supabase)
- Fix DDL transaction commit in cdw_connector.py:153
- Add USE SCHEMA command to population scripts for proper context
- Remove confusing "per table" timing messages, show batch call times only
- Enhance Liveboard error handling with detailed exception catching
Key fixes:
- Schema/connection/model now use same base timestamp
- Tables persist in Snowflake (commit added)
- Population scripts set proper schema context
- Clean performance metrics display
π€ Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- cdw_connector.py +36 -10
- demo_logger.py +87 -29
- demo_prep.py +142 -63
- liveboard_creator.py +87 -7
- supabase_client.py +1 -0
- thoughtspot_deployer.py +513 -190
|
@@ -72,19 +72,40 @@ class SnowflakeDeployer:
|
|
| 72 |
except Exception as e:
|
| 73 |
print(f"Error closing connection: {str(e)}")
|
| 74 |
|
| 75 |
-
def create_demo_schema_and_deploy(self, company_name: str, use_case: str, ddl_statements: str) -> Tuple[bool, Optional[str], str]:
|
| 76 |
-
"""Create timestamped schema and deploy DDL statements
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
if not self.connection:
|
| 78 |
success, message = self.connect()
|
| 79 |
if not success:
|
| 80 |
return False, None, f"Connection failed: {message}"
|
| 81 |
-
|
| 82 |
try:
|
| 83 |
-
# Generate schema name
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
cursor = self.connection.cursor()
|
| 90 |
|
|
@@ -127,15 +148,20 @@ class SnowflakeDeployer:
|
|
| 127 |
# Check autocommit setting
|
| 128 |
print(f"π DEBUG: Connection autocommit: {self.connection.autocommit}")
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
# VERIFY the schema actually exists
|
| 131 |
print(f"π DEBUG: Verifying schema exists...")
|
| 132 |
cursor.execute(f'SELECT CURRENT_SCHEMA()')
|
| 133 |
current_schema = cursor.fetchone()[0]
|
| 134 |
print(f"π DEBUG: Current schema: {current_schema}")
|
| 135 |
print(f"π DEBUG: Schema verification completed - using current schema context")
|
| 136 |
-
|
| 137 |
cursor.close()
|
| 138 |
-
|
| 139 |
success_message = f"Schema '{schema_name}' created successfully with {executed_count} tables"
|
| 140 |
return True, schema_name, success_message
|
| 141 |
|
|
|
|
| 72 |
except Exception as e:
|
| 73 |
print(f"Error closing connection: {str(e)}")
|
| 74 |
|
| 75 |
+
def create_demo_schema_and_deploy(self, company_name: str, use_case: str, ddl_statements: str, naming_prefix: str = "") -> Tuple[bool, Optional[str], str]:
|
| 76 |
+
"""Create timestamped schema and deploy DDL statements
|
| 77 |
+
|
| 78 |
+
Args:
|
| 79 |
+
naming_prefix: Optional custom prefix for schema name
|
| 80 |
+
Format: {prefix}_{day+ms}_scm or just {day+ms}_scm if blank
|
| 81 |
+
"""
|
| 82 |
if not self.connection:
|
| 83 |
success, message = self.connect()
|
| 84 |
if not success:
|
| 85 |
return False, None, f"Connection failed: {message}"
|
| 86 |
+
|
| 87 |
try:
|
| 88 |
+
# Generate schema name using same convention as ThoughtSpot objects
|
| 89 |
+
import re
|
| 90 |
+
from datetime import datetime as dt
|
| 91 |
+
now = dt.now()
|
| 92 |
+
|
| 93 |
+
# Clean prefix (uppercase, alphanumeric only) - can be empty string
|
| 94 |
+
if naming_prefix:
|
| 95 |
+
prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
|
| 96 |
+
else:
|
| 97 |
+
prefix_clean = ""
|
| 98 |
+
|
| 99 |
+
# Short unique timestamp: day (2 digits) + microseconds (6 digits)
|
| 100 |
+
day = now.strftime('%d')
|
| 101 |
+
microsec = now.strftime('%f') # 6 digits
|
| 102 |
+
short_timestamp = f"{day}{microsec}"
|
| 103 |
+
|
| 104 |
+
# Base format: PREFIX_DDMICROS_scm (prefix can be empty)
|
| 105 |
+
if prefix_clean:
|
| 106 |
+
schema_name = f"{prefix_clean}_{short_timestamp}_scm"
|
| 107 |
+
else:
|
| 108 |
+
schema_name = f"{short_timestamp}_scm"
|
| 109 |
|
| 110 |
cursor = self.connection.cursor()
|
| 111 |
|
|
|
|
| 148 |
# Check autocommit setting
|
| 149 |
print(f"π DEBUG: Connection autocommit: {self.connection.autocommit}")
|
| 150 |
|
| 151 |
+
# COMMIT the transaction
|
| 152 |
+
print(f"π DEBUG: Committing transaction...")
|
| 153 |
+
self.connection.commit()
|
| 154 |
+
print(f"β
Transaction committed - {executed_count} tables created")
|
| 155 |
+
|
| 156 |
# VERIFY the schema actually exists
|
| 157 |
print(f"π DEBUG: Verifying schema exists...")
|
| 158 |
cursor.execute(f'SELECT CURRENT_SCHEMA()')
|
| 159 |
current_schema = cursor.fetchone()[0]
|
| 160 |
print(f"π DEBUG: Current schema: {current_schema}")
|
| 161 |
print(f"π DEBUG: Schema verification completed - using current schema context")
|
| 162 |
+
|
| 163 |
cursor.close()
|
| 164 |
+
|
| 165 |
success_message = f"Schema '{schema_name}' created successfully with {executed_count} tables"
|
| 166 |
return True, schema_name, success_message
|
| 167 |
|
|
@@ -25,17 +25,24 @@ load_dotenv()
|
|
| 25 |
class DemoLogger:
|
| 26 |
"""Comprehensive logger for demo creation process"""
|
| 27 |
|
| 28 |
-
def __init__(self, session_id: str = None, user_email: str = None):
|
| 29 |
"""
|
| 30 |
Initialize logger with session ID and optional user email
|
| 31 |
|
| 32 |
Args:
|
| 33 |
session_id: Unique ID for this demo creation session
|
| 34 |
user_email: User's email for tracking
|
|
|
|
| 35 |
"""
|
| 36 |
self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 37 |
self.user_email = user_email or "anonymous"
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# Create logs directory
|
| 40 |
self.log_dir = Path("logs") / "demo_sessions" / self.session_id
|
| 41 |
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -64,7 +71,14 @@ class DemoLogger:
|
|
| 64 |
"status": "started"
|
| 65 |
}
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
def setup_logger(self):
|
| 70 |
"""Set up Python logger"""
|
|
@@ -100,47 +114,79 @@ class DemoLogger:
|
|
| 100 |
self.session_data["llm_model"] = llm_model
|
| 101 |
self.session_data["settings"].update(kwargs)
|
| 102 |
|
| 103 |
-
self.logger.info(
|
| 104 |
-
self.logger.info(
|
| 105 |
-
self.logger.info(
|
| 106 |
-
self.logger.info(f"
|
| 107 |
-
self.logger.info(f"
|
| 108 |
-
self.logger.info(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
self.save_session_data()
|
| 111 |
|
| 112 |
def log_ddl(self, ddl_content: str, schema_name: str = None):
|
| 113 |
-
"""Log the generated DDL"""
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
self.session_data["schema_name"] = schema_name
|
| 116 |
|
| 117 |
-
#
|
| 118 |
ddl_file = self.log_dir / "generated_ddl.sql"
|
| 119 |
with open(ddl_file, 'w') as f:
|
| 120 |
f.write(ddl_content)
|
| 121 |
|
| 122 |
# Log summary
|
| 123 |
tables = ddl_content.count("CREATE TABLE")
|
| 124 |
-
self.logger.info(
|
| 125 |
-
self.logger.info(f"DDL
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
self.save_session_data()
|
| 132 |
|
| 133 |
def log_population_code(self, code: str):
|
| 134 |
-
"""Log the generated population code"""
|
| 135 |
-
|
|
|
|
| 136 |
|
| 137 |
-
#
|
| 138 |
code_file = self.log_dir / "population_code.py"
|
| 139 |
with open(code_file, 'w') as f:
|
| 140 |
f.write(code)
|
| 141 |
|
| 142 |
-
self.logger.info(
|
| 143 |
-
self.logger.info(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
self.save_session_data()
|
| 146 |
|
|
@@ -148,9 +194,16 @@ class DemoLogger:
|
|
| 148 |
"""Log a discovered outlier"""
|
| 149 |
self.session_data["outliers"].append(outlier)
|
| 150 |
|
| 151 |
-
self.logger.info(f"Outlier discovered: {outlier.get('title', 'Unknown')}")
|
| 152 |
-
self.
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
self.save_session_data()
|
| 156 |
|
|
@@ -190,13 +243,14 @@ class DemoLogger:
|
|
| 190 |
self.save_session_data()
|
| 191 |
|
| 192 |
def log_stage_completion(self, stage: str, success: bool,
|
| 193 |
-
duration_seconds: float = None):
|
| 194 |
"""Log completion of a demo creation stage"""
|
| 195 |
stage_data = {
|
| 196 |
"stage": stage,
|
| 197 |
"success": success,
|
| 198 |
"duration": duration_seconds,
|
| 199 |
-
"timestamp": datetime.now().isoformat()
|
|
|
|
| 200 |
}
|
| 201 |
|
| 202 |
if "stages" not in self.session_data:
|
|
@@ -205,9 +259,13 @@ class DemoLogger:
|
|
| 205 |
self.session_data["stages"].append(stage_data)
|
| 206 |
|
| 207 |
status = "β
SUCCESS" if success else "β FAILED"
|
| 208 |
-
self.logger.info(
|
|
|
|
| 209 |
if duration_seconds:
|
| 210 |
-
self.logger.info(f"
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
self.save_session_data()
|
| 213 |
|
|
|
|
| 25 |
class DemoLogger:
|
| 26 |
"""Comprehensive logger for demo creation process"""
|
| 27 |
|
| 28 |
+
def __init__(self, session_id: str = None, user_email: str = None, debug_mode: bool = None):
|
| 29 |
"""
|
| 30 |
Initialize logger with session ID and optional user email
|
| 31 |
|
| 32 |
Args:
|
| 33 |
session_id: Unique ID for this demo creation session
|
| 34 |
user_email: User's email for tracking
|
| 35 |
+
debug_mode: Enable debug mode (saves DDL, population code, outliers)
|
| 36 |
"""
|
| 37 |
self.session_id = session_id or datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 38 |
self.user_email = user_email or "anonymous"
|
| 39 |
|
| 40 |
+
# Check for DEBUG environment variable or parameter
|
| 41 |
+
if debug_mode is None:
|
| 42 |
+
self.debug_mode = os.getenv('DEBUG', 'false').lower() in ['true', '1', 'yes']
|
| 43 |
+
else:
|
| 44 |
+
self.debug_mode = debug_mode
|
| 45 |
+
|
| 46 |
# Create logs directory
|
| 47 |
self.log_dir = Path("logs") / "demo_sessions" / self.session_id
|
| 48 |
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 71 |
"status": "started"
|
| 72 |
}
|
| 73 |
|
| 74 |
+
# Log session start with timestamp
|
| 75 |
+
self.logger.info("="*80)
|
| 76 |
+
self.logger.info(f"π Demo Session Started")
|
| 77 |
+
self.logger.info(f" Session ID: {self.session_id}")
|
| 78 |
+
self.logger.info(f" User: {self.user_email}")
|
| 79 |
+
self.logger.info(f" Timestamp: {self.session_data['start_time']}")
|
| 80 |
+
self.logger.info(f" Debug Mode: {'ON' if self.debug_mode else 'OFF'}")
|
| 81 |
+
self.logger.info("="*80)
|
| 82 |
|
| 83 |
def setup_logger(self):
|
| 84 |
"""Set up Python logger"""
|
|
|
|
| 114 |
self.session_data["llm_model"] = llm_model
|
| 115 |
self.session_data["settings"].update(kwargs)
|
| 116 |
|
| 117 |
+
self.logger.info("="*80)
|
| 118 |
+
self.logger.info("π Demo Configuration")
|
| 119 |
+
self.logger.info("="*80)
|
| 120 |
+
self.logger.info(f"π― MAIN SETTINGS:")
|
| 121 |
+
self.logger.info(f" Company URL: {company_url}")
|
| 122 |
+
self.logger.info(f" Use Case: {use_case}")
|
| 123 |
+
self.logger.info(f" LLM Model: {llm_model}")
|
| 124 |
+
self.logger.info(f"")
|
| 125 |
+
self.logger.info(f"π Additional Info:")
|
| 126 |
+
self.logger.info(f" Company: {company}")
|
| 127 |
+
self.logger.info(f" Industry: {industry}")
|
| 128 |
+
|
| 129 |
+
if kwargs:
|
| 130 |
+
self.logger.info(f"")
|
| 131 |
+
self.logger.info(f"βοΈ Other Settings:")
|
| 132 |
+
for key, value in kwargs.items():
|
| 133 |
+
self.logger.info(f" {key}: {value}")
|
| 134 |
+
|
| 135 |
+
self.logger.info("="*80)
|
| 136 |
|
| 137 |
self.save_session_data()
|
| 138 |
|
| 139 |
def log_ddl(self, ddl_content: str, schema_name: str = None):
|
| 140 |
+
"""Log the generated DDL - ALWAYS saves to file"""
|
| 141 |
+
# Always save full DDL if debug mode
|
| 142 |
+
if self.debug_mode:
|
| 143 |
+
self.session_data["ddl"] = ddl_content
|
| 144 |
+
else:
|
| 145 |
+
self.session_data["ddl"] = ddl_content[:1000] + "..." if len(ddl_content) > 1000 else ddl_content
|
| 146 |
+
|
| 147 |
self.session_data["schema_name"] = schema_name
|
| 148 |
|
| 149 |
+
# ALWAYS save DDL to separate file
|
| 150 |
ddl_file = self.log_dir / "generated_ddl.sql"
|
| 151 |
with open(ddl_file, 'w') as f:
|
| 152 |
f.write(ddl_content)
|
| 153 |
|
| 154 |
# Log summary
|
| 155 |
tables = ddl_content.count("CREATE TABLE")
|
| 156 |
+
self.logger.info("="*80)
|
| 157 |
+
self.logger.info(f"π DDL Generated")
|
| 158 |
+
self.logger.info("="*80)
|
| 159 |
+
self.logger.info(f" Tables: {tables}")
|
| 160 |
+
self.logger.info(f" Size: {len(ddl_content)} characters")
|
| 161 |
+
self.logger.info(f" Schema: {schema_name or 'N/A'}")
|
| 162 |
+
self.logger.info(f" Saved to: {ddl_file}")
|
| 163 |
+
|
| 164 |
+
# Check for SALESTRANSACTIONS or SALES_TRANSACTIONS table (known issue)
|
| 165 |
+
ddl_upper = ddl_content.upper()
|
| 166 |
+
if "SALESTRANSACTIONS" not in ddl_upper and "SALES_TRANSACTIONS" not in ddl_upper:
|
| 167 |
+
self.logger.warning("β οΈ SALESTRANSACTIONS table not found in DDL - this will cause deployment issues!")
|
| 168 |
+
self.log_warning("SALESTRANSACTIONS table not found in DDL")
|
| 169 |
+
|
| 170 |
+
self.logger.info("="*80)
|
| 171 |
|
| 172 |
self.save_session_data()
|
| 173 |
|
| 174 |
def log_population_code(self, code: str):
|
| 175 |
+
"""Log the generated population code - saves to file if DEBUG mode"""
|
| 176 |
+
# Always save summary
|
| 177 |
+
self.session_data["population_code"] = code[:5000] if not self.debug_mode else code
|
| 178 |
|
| 179 |
+
# ALWAYS save full code to file
|
| 180 |
code_file = self.log_dir / "population_code.py"
|
| 181 |
with open(code_file, 'w') as f:
|
| 182 |
f.write(code)
|
| 183 |
|
| 184 |
+
self.logger.info("="*80)
|
| 185 |
+
self.logger.info(f"π Population Code Generated")
|
| 186 |
+
self.logger.info("="*80)
|
| 187 |
+
self.logger.info(f" Size: {len(code)} characters")
|
| 188 |
+
self.logger.info(f" Saved to: {code_file}")
|
| 189 |
+
self.logger.info("="*80)
|
| 190 |
|
| 191 |
self.save_session_data()
|
| 192 |
|
|
|
|
| 194 |
"""Log a discovered outlier"""
|
| 195 |
self.session_data["outliers"].append(outlier)
|
| 196 |
|
| 197 |
+
self.logger.info(f"π‘ Outlier discovered: {outlier.get('title', 'Unknown')}")
|
| 198 |
+
if self.debug_mode:
|
| 199 |
+
self.logger.info(f" Insight: {outlier.get('insight', 'N/A')}")
|
| 200 |
+
self.logger.info(f" Impact: {outlier.get('impact', 'N/A')}")
|
| 201 |
+
|
| 202 |
+
# Save all outliers to file in DEBUG mode
|
| 203 |
+
if self.debug_mode:
|
| 204 |
+
outliers_file = self.log_dir / "outliers.json"
|
| 205 |
+
with open(outliers_file, 'w') as f:
|
| 206 |
+
json.dump(self.session_data["outliers"], f, indent=2)
|
| 207 |
|
| 208 |
self.save_session_data()
|
| 209 |
|
|
|
|
| 243 |
self.save_session_data()
|
| 244 |
|
| 245 |
def log_stage_completion(self, stage: str, success: bool,
|
| 246 |
+
duration_seconds: float = None, details: str = None):
|
| 247 |
"""Log completion of a demo creation stage"""
|
| 248 |
stage_data = {
|
| 249 |
"stage": stage,
|
| 250 |
"success": success,
|
| 251 |
"duration": duration_seconds,
|
| 252 |
+
"timestamp": datetime.now().isoformat(),
|
| 253 |
+
"details": details
|
| 254 |
}
|
| 255 |
|
| 256 |
if "stages" not in self.session_data:
|
|
|
|
| 259 |
self.session_data["stages"].append(stage_data)
|
| 260 |
|
| 261 |
status = "β
SUCCESS" if success else "β FAILED"
|
| 262 |
+
self.logger.info("="*80)
|
| 263 |
+
self.logger.info(f"π Stage: {stage.upper()} - {status}")
|
| 264 |
if duration_seconds:
|
| 265 |
+
self.logger.info(f" Duration: {duration_seconds:.2f} seconds")
|
| 266 |
+
if details:
|
| 267 |
+
self.logger.info(f" Details: {details}")
|
| 268 |
+
self.logger.info("="*80)
|
| 269 |
|
| 270 |
self.save_session_data()
|
| 271 |
|
|
@@ -313,26 +313,34 @@ def execute_population_script(python_code, schema_name):
|
|
| 313 |
"conn_params = get_snowflake_connection_params()",
|
| 314 |
"conn_params = get_snowflake_connection_params()\nconn_params.pop('schema', None) # Remove schema to avoid duplicate"
|
| 315 |
)
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
# Simple and safe schema replacement - just replace the placeholder
|
| 318 |
cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
|
| 319 |
cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
|
| 320 |
|
|
|
|
|
|
|
|
|
|
| 321 |
# FIX: Remove fake.unique() calls that cause "duplicated values after 1,000 iterations" error
|
| 322 |
cleaned_code = cleaned_code.replace("fake.unique.word()", "fake.word()")
|
| 323 |
cleaned_code = cleaned_code.replace("fake.unique.email()", "fake.email()")
|
| 324 |
cleaned_code = cleaned_code.replace("fake.unique.company()", "fake.company()")
|
| 325 |
|
| 326 |
# FIX: Truncate phone numbers to avoid extension overflow (e.g., '790-923-3730x07350')
|
| 327 |
-
# Handle multiple variations of phone number generation
|
| 328 |
-
cleaned_code = cleaned_code.replace("fake.phone_number()", "fake.phone_number()[:
|
| 329 |
-
cleaned_code = re.sub(r"faker\.phone_number\(\)", "faker.phone_number()[:
|
| 330 |
-
cleaned_code = re.sub(r"'Phone':\s*fake\.phone_number\(\)", "'Phone': fake.phone_number()[:
|
| 331 |
# Also handle when it's in a larger expression or assignment
|
| 332 |
-
cleaned_code = re.sub(r"(\w+\.phone_number\(\))(?!\[)", r"\1[:
|
| 333 |
|
| 334 |
# FIX: Convert SQLite-style ? placeholders to Snowflake-style %s placeholders
|
| 335 |
-
import re
|
| 336 |
cleaned_code = re.sub(r'\bVALUES\s*\(\?', 'VALUES (%s', cleaned_code)
|
| 337 |
cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
|
| 338 |
|
|
@@ -343,9 +351,8 @@ def execute_population_script(python_code, schema_name):
|
|
| 343 |
print("π STARTING DATA POPULATION EXECUTION")
|
| 344 |
print("=" * 50)"""
|
| 345 |
)
|
| 346 |
-
|
| 347 |
# Add logging to populate functions dynamically
|
| 348 |
-
import re
|
| 349 |
|
| 350 |
# Find all populate function definitions and add logging
|
| 351 |
def add_function_logging(match):
|
|
@@ -397,6 +404,7 @@ def execute_population_script(python_code, schema_name):
|
|
| 397 |
"os": os,
|
| 398 |
"random": random,
|
| 399 |
"datetime": datetime,
|
|
|
|
| 400 |
"get_snowflake_connection_params": get_snowflake_connection_params,
|
| 401 |
}
|
| 402 |
|
|
@@ -408,11 +416,24 @@ def execute_population_script(python_code, schema_name):
|
|
| 408 |
print(f"π DEBUG: Cleaned code preview (first 500 chars):")
|
| 409 |
print(cleaned_code[:500])
|
| 410 |
print("...")
|
| 411 |
-
|
| 412 |
-
#
|
| 413 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
f.write(cleaned_code)
|
| 415 |
-
print(f"
|
| 416 |
|
| 417 |
# First, validate the syntax before executing
|
| 418 |
try:
|
|
@@ -439,6 +460,24 @@ def execute_population_script(python_code, schema_name):
|
|
| 439 |
print("=" * 50)
|
| 440 |
print(f"β DATA POPULATION FAILED: {str(e)}")
|
| 441 |
print("=" * 50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
return False, f"Population execution failed: {str(e)}"
|
| 443 |
|
| 444 |
def generate_deployment_summary(demo_builder_instance):
|
|
@@ -891,6 +930,12 @@ def create_demo_prep_interface():
|
|
| 891 |
label="Schema Prefix",
|
| 892 |
info="Prefix for schema names (e.g., demo_20250930)"
|
| 893 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 894 |
|
| 895 |
with gr.TabItem("Admin"):
|
| 896 |
ts_instance_url = gr.Textbox(
|
|
@@ -1094,7 +1139,7 @@ def create_demo_prep_interface():
|
|
| 1094 |
# Progressive workflow with Multi-LLM support
|
| 1095 |
def progressive_workflow_handler(
|
| 1096 |
url, use_case, llm_provider, demo_builder_instance, mode="expert", stop_before_deploy=False,
|
| 1097 |
-
temperature=0.3, max_tokens=4000
|
| 1098 |
):
|
| 1099 |
# Initialize DemoBuilder if none exists
|
| 1100 |
if demo_builder_instance is None:
|
|
@@ -1102,6 +1147,17 @@ def create_demo_prep_interface():
|
|
| 1102 |
return "Please enter a company URL", None, "Start Research", False
|
| 1103 |
demo_builder_instance = DemoBuilder(use_case, url.strip())
|
| 1104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1105 |
# Initialize or continue with existing content
|
| 1106 |
if not hasattr(demo_builder_instance, 'accumulated_content'):
|
| 1107 |
accumulated_content = ""
|
|
@@ -1327,6 +1383,10 @@ def create_demo_prep_interface():
|
|
| 1327 |
workflow_updates += f"π‘ Next: Click 'Create DDL' to generate database schema\n"
|
| 1328 |
demo_builder_instance.workflow_updates = workflow_updates
|
| 1329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1330 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1331 |
|
| 1332 |
elif demo_builder_instance.current_stage == "create":
|
|
@@ -1338,24 +1398,26 @@ def create_demo_prep_interface():
|
|
| 1338 |
)
|
| 1339 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1340 |
|
| 1341 |
-
#
|
| 1342 |
-
|
| 1343 |
-
|
| 1344 |
-
|
| 1345 |
-
|
| 1346 |
-
|
| 1347 |
-
|
| 1348 |
-
|
| 1349 |
-
|
| 1350 |
-
|
| 1351 |
-
|
| 1352 |
-
|
| 1353 |
-
|
| 1354 |
-
|
| 1355 |
-
|
| 1356 |
-
|
| 1357 |
-
|
| 1358 |
-
|
|
|
|
|
|
|
| 1359 |
|
| 1360 |
schema_prompt = f"""Create ONLY complete SQL DDL statements for a {use_case} demo database.
|
| 1361 |
|
|
@@ -1380,6 +1442,11 @@ TECHNICAL REQUIREMENTS:
|
|
| 1380 |
- Include realistic column names that match the business context
|
| 1381 |
- Add proper constraints and relationships
|
| 1382 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1383 |
SNOWFLAKE SYNTAX EXAMPLES:
|
| 1384 |
- Auto-increment: ColumnID INT IDENTITY(1,1) PRIMARY KEY
|
| 1385 |
- NOT: ColumnID INT PRIMARY KEY AUTO_INCREMENT
|
|
@@ -1426,6 +1493,11 @@ Generate complete CREATE TABLE statements with proper Snowflake syntax and depen
|
|
| 1426 |
demo_builder_instance.accumulated_content = accumulated_content
|
| 1427 |
demo_builder_instance.workflow_updates = workflow_updates
|
| 1428 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1429 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1430 |
|
| 1431 |
elif demo_builder_instance.current_stage == "populate":
|
|
@@ -1649,13 +1721,18 @@ Schema Validation: Will be checked next...
|
|
| 1649 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1650 |
|
| 1651 |
print(f"π DEBUG: About to advance from populate to deploy stage")
|
| 1652 |
-
|
| 1653 |
demo_builder_instance.advance_stage()
|
| 1654 |
demo_builder_instance.accumulated_content = accumulated_content
|
| 1655 |
demo_builder_instance.workflow_updates = workflow_updates
|
| 1656 |
print(f"π DEBUG: Advanced to stage: {demo_builder_instance.current_stage}")
|
| 1657 |
print(f"π DEBUG: Stage status: {demo_builder_instance.stage_status}")
|
| 1658 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1659 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1660 |
return # Exit populate stage immediately after advancing
|
| 1661 |
|
|
@@ -1695,6 +1772,7 @@ Schema Validation: Will be checked next...
|
|
| 1695 |
company_name,
|
| 1696 |
use_case,
|
| 1697 |
demo_builder_instance.schema_generation_results,
|
|
|
|
| 1698 |
)
|
| 1699 |
)
|
| 1700 |
|
|
@@ -1803,6 +1881,7 @@ Schema Validation: Will be checked next...
|
|
| 1803 |
schema=schema_name,
|
| 1804 |
company_name=company_name,
|
| 1805 |
use_case=use_case,
|
|
|
|
| 1806 |
progress_callback=ts_detailed_callback
|
| 1807 |
)
|
| 1808 |
deployment_complete = True
|
|
@@ -1837,25 +1916,9 @@ Schema Validation: Will be checked next...
|
|
| 1837 |
demo_builder_instance.deployment_status += ts_progress_msg
|
| 1838 |
workflow_updates += ts_progress_msg
|
| 1839 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1840 |
-
|
| 1841 |
-
# Add detailed progress messages to main window after deployment
|
| 1842 |
-
workflow_updates += "\nBuilding relationships...\n"
|
| 1843 |
-
workflow_updates += "Creating tables...\n"
|
| 1844 |
-
|
| 1845 |
-
# Show detailed table creation info if available
|
| 1846 |
-
if 'tables' in ts_results and ts_results['tables']:
|
| 1847 |
-
workflow_updates += f"Generated joins between tables\n"
|
| 1848 |
-
for table_name in ts_results['tables']:
|
| 1849 |
-
workflow_updates += f" β’ {table_name} created successfully\n"
|
| 1850 |
-
workflow_updates += f"Tables created successfully ({len(ts_results['tables'])} total)\n"
|
| 1851 |
-
else:
|
| 1852 |
-
workflow_updates += "Generated joins between tables\n"
|
| 1853 |
-
workflow_updates += "Tables created successfully\n"
|
| 1854 |
|
| 1855 |
-
|
| 1856 |
-
|
| 1857 |
-
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1858 |
-
|
| 1859 |
if ts_results['success']:
|
| 1860 |
ts_msg = "\nThoughtSpot metadata deployment completed!\n\n"
|
| 1861 |
ts_msg += "Created ThoughtSpot objects:\n"
|
|
@@ -1959,9 +2022,19 @@ Schema Validation: Will be checked next...
|
|
| 1959 |
|
| 1960 |
demo_builder_instance.advance_stage()
|
| 1961 |
demo_builder_instance.accumulated_content = accumulated_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1962 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1963 |
|
| 1964 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 1965 |
error_msg = f"β Deployment error: {str(e)}\n"
|
| 1966 |
demo_builder_instance.deployment_status += error_msg
|
| 1967 |
workflow_updates += error_msg
|
|
@@ -1977,7 +2050,7 @@ Schema Validation: Will be checked next...
|
|
| 1977 |
yield accumulated_content, workflow_updates, None, "Research", False
|
| 1978 |
|
| 1979 |
# Redo functionality
|
| 1980 |
-
def redo_latest_stage(demo_builder_instance, llm_provider, url, use_case, mode="expert", stop_deploy=False, temperature=0.3, max_tokens=4000):
|
| 1981 |
if (
|
| 1982 |
demo_builder_instance
|
| 1983 |
and demo_builder_instance.current_stage != "research"
|
|
@@ -1992,7 +2065,7 @@ Schema Validation: Will be checked next...
|
|
| 1992 |
|
| 1993 |
# Re-run the workflow with same format as button_click_handler
|
| 1994 |
for result in progressive_workflow_handler(
|
| 1995 |
-
url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens
|
| 1996 |
):
|
| 1997 |
if len(result) == 5:
|
| 1998 |
# New format with workflow_updates
|
|
@@ -2659,9 +2732,9 @@ SCRIPT REQUIREMENTS:
|
|
| 2659 |
# Legacy CDW deployment function removed - deployment now handled by main workflow
|
| 2660 |
|
| 2661 |
# Main click handler with multi-LLM support - UPDATED to route to AI Output tab
|
| 2662 |
-
def button_click_handler(url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens):
|
| 2663 |
for result in progressive_workflow_handler(
|
| 2664 |
-
url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens
|
| 2665 |
):
|
| 2666 |
if len(result) == 5:
|
| 2667 |
# New format with workflow_updates
|
|
@@ -2692,7 +2765,7 @@ SCRIPT REQUIREMENTS:
|
|
| 2692 |
# Event handlers - UPDATED for new AI Output tab and Results box
|
| 2693 |
research_btn.click(
|
| 2694 |
fn=button_click_handler,
|
| 2695 |
-
inputs=[url_input, industry_dropdown, llm_dropdown, demo_builder, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input],
|
| 2696 |
outputs=[
|
| 2697 |
ai_output_box, # streaming content goes to AI Output tab
|
| 2698 |
results_box, # dynamic content based on stage/mode
|
|
@@ -2712,10 +2785,10 @@ SCRIPT REQUIREMENTS:
|
|
| 2712 |
)
|
| 2713 |
|
| 2714 |
redo_btn.click(
|
| 2715 |
-
fn=lambda db, llm, url, use_case, mode, stop_deploy, temp, max_tok: [
|
| 2716 |
-
result for result in redo_latest_stage(db, llm, url, use_case, mode, stop_deploy, temp, max_tok)
|
| 2717 |
],
|
| 2718 |
-
inputs=[demo_builder, llm_dropdown, url_input, industry_dropdown, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input],
|
| 2719 |
outputs=[
|
| 2720 |
ai_output_box, # streaming content goes to AI Output tab
|
| 2721 |
results_box, # dynamic content based on stage/mode
|
|
@@ -3191,7 +3264,7 @@ SCRIPT REQUIREMENTS:
|
|
| 3191 |
|
| 3192 |
def save_settings_handler(
|
| 3193 |
email, ai_model, temperature, max_tokens, company_size, company_url, use_case, data_volume,
|
| 3194 |
-
warehouse, database, ts_url, ts_username, batch_size, thread_count
|
| 3195 |
):
|
| 3196 |
"""Save settings to Supabase database"""
|
| 3197 |
if not email or '@' not in email:
|
|
@@ -3215,7 +3288,8 @@ SCRIPT REQUIREMENTS:
|
|
| 3215 |
"thoughtspot_url": ts_url,
|
| 3216 |
"thoughtspot_username": ts_username,
|
| 3217 |
"batch_size": batch_size,
|
| 3218 |
-
"thread_count": thread_count
|
|
|
|
| 3219 |
}
|
| 3220 |
|
| 3221 |
success = settings_client.save_all_settings(email, settings)
|
|
@@ -3243,6 +3317,7 @@ SCRIPT REQUIREMENTS:
|
|
| 3243 |
"", # ts_username
|
| 3244 |
5000, # batch_size
|
| 3245 |
4, # thread_count
|
|
|
|
| 3246 |
"Settings not loaded - enter valid email", # status
|
| 3247 |
"gpt-4o", # llm_dropdown (main form AI Model)
|
| 3248 |
"" # url_input (main form Company URL)
|
|
@@ -3266,6 +3341,7 @@ SCRIPT REQUIREMENTS:
|
|
| 3266 |
settings.get("thoughtspot_username", ""),
|
| 3267 |
int(settings.get("batch_size", 5000)),
|
| 3268 |
int(settings.get("thread_count", 4)),
|
|
|
|
| 3269 |
f"β
Settings loaded for {email}",
|
| 3270 |
ai_model, # llm_dropdown (same as default_ai_model)
|
| 3271 |
company_url # url_input (same as default_company_url)
|
|
@@ -3281,6 +3357,7 @@ SCRIPT REQUIREMENTS:
|
|
| 3281 |
"",
|
| 3282 |
5000,
|
| 3283 |
4,
|
|
|
|
| 3284 |
f"β Error loading settings: {str(e)}"
|
| 3285 |
]
|
| 3286 |
|
|
@@ -3307,7 +3384,8 @@ SCRIPT REQUIREMENTS:
|
|
| 3307 |
ts_instance_url,
|
| 3308 |
ts_username,
|
| 3309 |
batch_size_slider,
|
| 3310 |
-
thread_count_slider
|
|
|
|
| 3311 |
],
|
| 3312 |
outputs=[settings_status]
|
| 3313 |
)
|
|
@@ -3552,6 +3630,7 @@ SCRIPT REQUIREMENTS:
|
|
| 3552 |
ts_username,
|
| 3553 |
batch_size_slider,
|
| 3554 |
thread_count_slider,
|
|
|
|
| 3555 |
settings_status,
|
| 3556 |
llm_dropdown, # Also update main form AI Model dropdown
|
| 3557 |
url_input # Also update main form Company URL
|
|
|
|
| 313 |
"conn_params = get_snowflake_connection_params()",
|
| 314 |
"conn_params = get_snowflake_connection_params()\nconn_params.pop('schema', None) # Remove schema to avoid duplicate"
|
| 315 |
)
|
| 316 |
+
|
| 317 |
+
# CRITICAL FIX: Add USE SCHEMA command after cursor creation to set schema context
|
| 318 |
+
cleaned_code = cleaned_code.replace(
|
| 319 |
+
"cursor = conn.cursor()",
|
| 320 |
+
f"cursor = conn.cursor()\n cursor.execute('USE SCHEMA \"{schema_name}\"') # Set schema context for INSERTs"
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
# Simple and safe schema replacement - just replace the placeholder
|
| 324 |
cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
|
| 325 |
cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
|
| 326 |
|
| 327 |
+
# Import re module at the top of this section
|
| 328 |
+
import re
|
| 329 |
+
|
| 330 |
# FIX: Remove fake.unique() calls that cause "duplicated values after 1,000 iterations" error
|
| 331 |
cleaned_code = cleaned_code.replace("fake.unique.word()", "fake.word()")
|
| 332 |
cleaned_code = cleaned_code.replace("fake.unique.email()", "fake.email()")
|
| 333 |
cleaned_code = cleaned_code.replace("fake.unique.company()", "fake.company()")
|
| 334 |
|
| 335 |
# FIX: Truncate phone numbers to avoid extension overflow (e.g., '790-923-3730x07350')
|
| 336 |
+
# Handle multiple variations of phone number generation - truncate to 15 chars (max field size)
|
| 337 |
+
cleaned_code = cleaned_code.replace("fake.phone_number()", "fake.phone_number()[:15]")
|
| 338 |
+
cleaned_code = re.sub(r"faker\.phone_number\(\)", "faker.phone_number()[:15]", cleaned_code)
|
| 339 |
+
cleaned_code = re.sub(r"'Phone':\s*fake\.phone_number\(\)", "'Phone': fake.phone_number()[:15]", cleaned_code)
|
| 340 |
# Also handle when it's in a larger expression or assignment
|
| 341 |
+
cleaned_code = re.sub(r"(\w+\.phone_number\(\))(?!\[)", r"\1[:15]", cleaned_code)
|
| 342 |
|
| 343 |
# FIX: Convert SQLite-style ? placeholders to Snowflake-style %s placeholders
|
|
|
|
| 344 |
cleaned_code = re.sub(r'\bVALUES\s*\(\?', 'VALUES (%s', cleaned_code)
|
| 345 |
cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
|
| 346 |
|
|
|
|
| 351 |
print("π STARTING DATA POPULATION EXECUTION")
|
| 352 |
print("=" * 50)"""
|
| 353 |
)
|
| 354 |
+
|
| 355 |
# Add logging to populate functions dynamically
|
|
|
|
| 356 |
|
| 357 |
# Find all populate function definitions and add logging
|
| 358 |
def add_function_logging(match):
|
|
|
|
| 404 |
"os": os,
|
| 405 |
"random": random,
|
| 406 |
"datetime": datetime,
|
| 407 |
+
"re": re,
|
| 408 |
"get_snowflake_connection_params": get_snowflake_connection_params,
|
| 409 |
}
|
| 410 |
|
|
|
|
| 416 |
print(f"π DEBUG: Cleaned code preview (first 500 chars):")
|
| 417 |
print(cleaned_code[:500])
|
| 418 |
print("...")
|
| 419 |
+
|
| 420 |
+
# ALWAYS save the full generated code for debugging (not just in DEBUG mode)
|
| 421 |
+
# Save to /tmp for immediate access
|
| 422 |
+
tmp_file = f"/tmp/generated_population_script_{schema_name}.py"
|
| 423 |
+
with open(tmp_file, "w") as f:
|
| 424 |
+
f.write(cleaned_code)
|
| 425 |
+
print(f"π Population script saved to: {tmp_file}")
|
| 426 |
+
|
| 427 |
+
# Also save to logs directory for persistence
|
| 428 |
+
from pathlib import Path
|
| 429 |
+
import datetime
|
| 430 |
+
log_dir = Path("logs") / "population_scripts"
|
| 431 |
+
log_dir.mkdir(parents=True, exist_ok=True)
|
| 432 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 433 |
+
log_file = log_dir / f"population_{schema_name}_{timestamp}.py"
|
| 434 |
+
with open(log_file, "w") as f:
|
| 435 |
f.write(cleaned_code)
|
| 436 |
+
print(f"π Also saved to: {log_file}")
|
| 437 |
|
| 438 |
# First, validate the syntax before executing
|
| 439 |
try:
|
|
|
|
| 460 |
print("=" * 50)
|
| 461 |
print(f"β DATA POPULATION FAILED: {str(e)}")
|
| 462 |
print("=" * 50)
|
| 463 |
+
|
| 464 |
+
# Print detailed traceback
|
| 465 |
+
import traceback
|
| 466 |
+
print("π Full traceback:")
|
| 467 |
+
traceback.print_exc()
|
| 468 |
+
|
| 469 |
+
# Show which line of generated code caused the error
|
| 470 |
+
tb = traceback.extract_tb(e.__traceback__)
|
| 471 |
+
for frame in tb:
|
| 472 |
+
if '<population_script>' in frame.filename:
|
| 473 |
+
print(f"\nβ Error in generated code at line {frame.lineno}:")
|
| 474 |
+
lines = cleaned_code.split('\n')
|
| 475 |
+
start = max(0, frame.lineno - 3)
|
| 476 |
+
end = min(len(lines), frame.lineno + 2)
|
| 477 |
+
for i in range(start, end):
|
| 478 |
+
marker = ">>> " if i == frame.lineno - 1 else " "
|
| 479 |
+
print(f"{marker}{i+1:3d}: {lines[i]}")
|
| 480 |
+
|
| 481 |
return False, f"Population execution failed: {str(e)}"
|
| 482 |
|
| 483 |
def generate_deployment_summary(demo_builder_instance):
|
|
|
|
| 930 |
label="Schema Prefix",
|
| 931 |
info="Prefix for schema names (e.g., demo_20250930)"
|
| 932 |
)
|
| 933 |
+
naming_prefix_input = gr.Textbox(
|
| 934 |
+
value="",
|
| 935 |
+
label="Object Naming Prefix (Optional)",
|
| 936 |
+
placeholder="e.g., BOONE, SALES, DEMO",
|
| 937 |
+
info="Custom prefix for all objects. Format: {prefix}_{day+ms}_{objtype}. Leave blank for default naming."
|
| 938 |
+
)
|
| 939 |
|
| 940 |
with gr.TabItem("Admin"):
|
| 941 |
ts_instance_url = gr.Textbox(
|
|
|
|
| 1139 |
# Progressive workflow with Multi-LLM support
|
| 1140 |
def progressive_workflow_handler(
|
| 1141 |
url, use_case, llm_provider, demo_builder_instance, mode="expert", stop_before_deploy=False,
|
| 1142 |
+
temperature=0.3, max_tokens=4000, naming_prefix=""
|
| 1143 |
):
|
| 1144 |
# Initialize DemoBuilder if none exists
|
| 1145 |
if demo_builder_instance is None:
|
|
|
|
| 1147 |
return "Please enter a company URL", None, "Start Research", False
|
| 1148 |
demo_builder_instance = DemoBuilder(use_case, url.strip())
|
| 1149 |
|
| 1150 |
+
# Initialize demo logger for this session
|
| 1151 |
+
session_logger = get_demo_logger(user_email="demo_user")
|
| 1152 |
+
demo_builder_instance.logger = session_logger
|
| 1153 |
+
session_logger.log_demo_config(
|
| 1154 |
+
use_case=use_case,
|
| 1155 |
+
company=url,
|
| 1156 |
+
company_url=url,
|
| 1157 |
+
industry="TBD",
|
| 1158 |
+
llm_model=llm_provider
|
| 1159 |
+
)
|
| 1160 |
+
|
| 1161 |
# Initialize or continue with existing content
|
| 1162 |
if not hasattr(demo_builder_instance, 'accumulated_content'):
|
| 1163 |
accumulated_content = ""
|
|
|
|
| 1383 |
workflow_updates += f"π‘ Next: Click 'Create DDL' to generate database schema\n"
|
| 1384 |
demo_builder_instance.workflow_updates = workflow_updates
|
| 1385 |
|
| 1386 |
+
# Log research stage completion
|
| 1387 |
+
if hasattr(demo_builder_instance, 'logger'):
|
| 1388 |
+
demo_builder_instance.logger.log_stage_completion("research", True)
|
| 1389 |
+
|
| 1390 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1391 |
|
| 1392 |
elif demo_builder_instance.current_stage == "create":
|
|
|
|
| 1398 |
)
|
| 1399 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1400 |
|
| 1401 |
+
# Generate schema name using same convention as ThoughtSpot objects
|
| 1402 |
+
from datetime import datetime as dt
|
| 1403 |
+
now = dt.now()
|
| 1404 |
+
|
| 1405 |
+
# Clean prefix (uppercase, alphanumeric only) - can be empty string
|
| 1406 |
+
if naming_prefix:
|
| 1407 |
+
prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
|
| 1408 |
+
else:
|
| 1409 |
+
prefix_clean = ""
|
| 1410 |
+
|
| 1411 |
+
# Short unique timestamp: day (2 digits) + microseconds (6 digits)
|
| 1412 |
+
day = now.strftime('%d')
|
| 1413 |
+
microsec = now.strftime('%f') # 6 digits
|
| 1414 |
+
short_timestamp = f"{day}{microsec}"
|
| 1415 |
+
|
| 1416 |
+
# Base format: PREFIX_DDMICROS (prefix can be empty)
|
| 1417 |
+
if prefix_clean:
|
| 1418 |
+
schema_name = f"{prefix_clean}_{short_timestamp}_scm"
|
| 1419 |
+
else:
|
| 1420 |
+
schema_name = f"{short_timestamp}_scm"
|
| 1421 |
|
| 1422 |
schema_prompt = f"""Create ONLY complete SQL DDL statements for a {use_case} demo database.
|
| 1423 |
|
|
|
|
| 1442 |
- Include realistic column names that match the business context
|
| 1443 |
- Add proper constraints and relationships
|
| 1444 |
|
| 1445 |
+
**CRITICAL - COLUMN NAME CONSISTENCY:**
|
| 1446 |
+
- When referencing columns in FOREIGN KEY constraints, use the EXACT column name (including case) as defined in the CREATE TABLE statement
|
| 1447 |
+
- Example: If you define "LocationID INT" then reference it as "LocationID" NOT "LOCATIONID"
|
| 1448 |
+
- Column names in FOREIGN KEY REFERENCES must match the PRIMARY KEY column name exactly
|
| 1449 |
+
|
| 1450 |
SNOWFLAKE SYNTAX EXAMPLES:
|
| 1451 |
- Auto-increment: ColumnID INT IDENTITY(1,1) PRIMARY KEY
|
| 1452 |
- NOT: ColumnID INT PRIMARY KEY AUTO_INCREMENT
|
|
|
|
| 1493 |
demo_builder_instance.accumulated_content = accumulated_content
|
| 1494 |
demo_builder_instance.workflow_updates = workflow_updates
|
| 1495 |
|
| 1496 |
+
# Log DDL stage completion and save DDL
|
| 1497 |
+
if hasattr(demo_builder_instance, 'logger'):
|
| 1498 |
+
demo_builder_instance.logger.log_ddl(schema_results, demo_builder_instance.schema_name if hasattr(demo_builder_instance, 'schema_name') else None)
|
| 1499 |
+
demo_builder_instance.logger.log_stage_completion("ddl", True)
|
| 1500 |
+
|
| 1501 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1502 |
|
| 1503 |
elif demo_builder_instance.current_stage == "populate":
|
|
|
|
| 1721 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1722 |
|
| 1723 |
print(f"π DEBUG: About to advance from populate to deploy stage")
|
| 1724 |
+
|
| 1725 |
demo_builder_instance.advance_stage()
|
| 1726 |
demo_builder_instance.accumulated_content = accumulated_content
|
| 1727 |
demo_builder_instance.workflow_updates = workflow_updates
|
| 1728 |
print(f"π DEBUG: Advanced to stage: {demo_builder_instance.current_stage}")
|
| 1729 |
print(f"π DEBUG: Stage status: {demo_builder_instance.stage_status}")
|
| 1730 |
+
|
| 1731 |
+
# Log population code stage completion
|
| 1732 |
+
if hasattr(demo_builder_instance, 'logger') and hasattr(demo_builder_instance, 'population_code'):
|
| 1733 |
+
demo_builder_instance.logger.log_population_code(demo_builder_instance.population_code)
|
| 1734 |
+
demo_builder_instance.logger.log_stage_completion("population_code", True)
|
| 1735 |
+
|
| 1736 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 1737 |
return # Exit populate stage immediately after advancing
|
| 1738 |
|
|
|
|
| 1772 |
company_name,
|
| 1773 |
use_case,
|
| 1774 |
demo_builder_instance.schema_generation_results,
|
| 1775 |
+
naming_prefix=naming_prefix
|
| 1776 |
)
|
| 1777 |
)
|
| 1778 |
|
|
|
|
| 1881 |
schema=schema_name,
|
| 1882 |
company_name=company_name,
|
| 1883 |
use_case=use_case,
|
| 1884 |
+
naming_prefix=naming_prefix or None,
|
| 1885 |
progress_callback=ts_detailed_callback
|
| 1886 |
)
|
| 1887 |
deployment_complete = True
|
|
|
|
| 1916 |
demo_builder_instance.deployment_status += ts_progress_msg
|
| 1917 |
workflow_updates += ts_progress_msg
|
| 1918 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1919 |
|
| 1920 |
+
# Deployment progress is shown live during execution - no duplicate summary needed
|
| 1921 |
+
|
|
|
|
|
|
|
| 1922 |
if ts_results['success']:
|
| 1923 |
ts_msg = "\nThoughtSpot metadata deployment completed!\n\n"
|
| 1924 |
ts_msg += "Created ThoughtSpot objects:\n"
|
|
|
|
| 2022 |
|
| 2023 |
demo_builder_instance.advance_stage()
|
| 2024 |
demo_builder_instance.accumulated_content = accumulated_content
|
| 2025 |
+
|
| 2026 |
+
# Log deployment stage completion
|
| 2027 |
+
if hasattr(demo_builder_instance, 'logger'):
|
| 2028 |
+
deploy_success = deploy_success if 'deploy_success' in locals() else False
|
| 2029 |
+
demo_builder_instance.logger.log_stage_completion("deploy", deploy_success,
|
| 2030 |
+
details=f"Schema: {schema_name if 'schema_name' in locals() else 'N/A'}")
|
| 2031 |
+
|
| 2032 |
yield accumulated_content, workflow_updates, demo_builder_instance, demo_builder_instance.get_current_button_text(), demo_builder_instance.is_button_disabled()
|
| 2033 |
|
| 2034 |
except Exception as e:
|
| 2035 |
+
# Log deployment error
|
| 2036 |
+
if hasattr(demo_builder_instance, 'logger'):
|
| 2037 |
+
demo_builder_instance.logger.log_error(str(e), "deployment", e)
|
| 2038 |
error_msg = f"β Deployment error: {str(e)}\n"
|
| 2039 |
demo_builder_instance.deployment_status += error_msg
|
| 2040 |
workflow_updates += error_msg
|
|
|
|
| 2050 |
yield accumulated_content, workflow_updates, None, "Research", False
|
| 2051 |
|
| 2052 |
# Redo functionality
|
| 2053 |
+
def redo_latest_stage(demo_builder_instance, llm_provider, url, use_case, mode="expert", stop_deploy=False, temperature=0.3, max_tokens=4000, naming_prefix=""):
|
| 2054 |
if (
|
| 2055 |
demo_builder_instance
|
| 2056 |
and demo_builder_instance.current_stage != "research"
|
|
|
|
| 2065 |
|
| 2066 |
# Re-run the workflow with same format as button_click_handler
|
| 2067 |
for result in progressive_workflow_handler(
|
| 2068 |
+
url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix
|
| 2069 |
):
|
| 2070 |
if len(result) == 5:
|
| 2071 |
# New format with workflow_updates
|
|
|
|
| 2732 |
# Legacy CDW deployment function removed - deployment now handled by main workflow
|
| 2733 |
|
| 2734 |
# Main click handler with multi-LLM support - UPDATED to route to AI Output tab
|
| 2735 |
+
def button_click_handler(url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix=""):
|
| 2736 |
for result in progressive_workflow_handler(
|
| 2737 |
+
url, use_case, llm_provider, demo_builder_instance, mode, stop_deploy, temperature, max_tokens, naming_prefix
|
| 2738 |
):
|
| 2739 |
if len(result) == 5:
|
| 2740 |
# New format with workflow_updates
|
|
|
|
| 2765 |
# Event handlers - UPDATED for new AI Output tab and Results box
|
| 2766 |
research_btn.click(
|
| 2767 |
fn=button_click_handler,
|
| 2768 |
+
inputs=[url_input, industry_dropdown, llm_dropdown, demo_builder, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input, naming_prefix_input],
|
| 2769 |
outputs=[
|
| 2770 |
ai_output_box, # streaming content goes to AI Output tab
|
| 2771 |
results_box, # dynamic content based on stage/mode
|
|
|
|
| 2785 |
)
|
| 2786 |
|
| 2787 |
redo_btn.click(
|
| 2788 |
+
fn=lambda db, llm, url, use_case, mode, stop_deploy, temp, max_tok, naming_prefix: [
|
| 2789 |
+
result for result in redo_latest_stage(db, llm, url, use_case, mode, stop_deploy, temp, max_tok, naming_prefix)
|
| 2790 |
],
|
| 2791 |
+
inputs=[demo_builder, llm_dropdown, url_input, industry_dropdown, workflow_mode, stop_before_deploy, temperature_slider, max_tokens_input, naming_prefix_input],
|
| 2792 |
outputs=[
|
| 2793 |
ai_output_box, # streaming content goes to AI Output tab
|
| 2794 |
results_box, # dynamic content based on stage/mode
|
|
|
|
| 3264 |
|
| 3265 |
def save_settings_handler(
|
| 3266 |
email, ai_model, temperature, max_tokens, company_size, company_url, use_case, data_volume,
|
| 3267 |
+
warehouse, database, ts_url, ts_username, batch_size, thread_count, naming_prefix
|
| 3268 |
):
|
| 3269 |
"""Save settings to Supabase database"""
|
| 3270 |
if not email or '@' not in email:
|
|
|
|
| 3288 |
"thoughtspot_url": ts_url,
|
| 3289 |
"thoughtspot_username": ts_username,
|
| 3290 |
"batch_size": batch_size,
|
| 3291 |
+
"thread_count": thread_count,
|
| 3292 |
+
"naming_prefix": naming_prefix or ""
|
| 3293 |
}
|
| 3294 |
|
| 3295 |
success = settings_client.save_all_settings(email, settings)
|
|
|
|
| 3317 |
"", # ts_username
|
| 3318 |
5000, # batch_size
|
| 3319 |
4, # thread_count
|
| 3320 |
+
"", # naming_prefix
|
| 3321 |
"Settings not loaded - enter valid email", # status
|
| 3322 |
"gpt-4o", # llm_dropdown (main form AI Model)
|
| 3323 |
"" # url_input (main form Company URL)
|
|
|
|
| 3341 |
settings.get("thoughtspot_username", ""),
|
| 3342 |
int(settings.get("batch_size", 5000)),
|
| 3343 |
int(settings.get("thread_count", 4)),
|
| 3344 |
+
settings.get("naming_prefix", ""), # naming_prefix
|
| 3345 |
f"β
Settings loaded for {email}",
|
| 3346 |
ai_model, # llm_dropdown (same as default_ai_model)
|
| 3347 |
company_url # url_input (same as default_company_url)
|
|
|
|
| 3357 |
"",
|
| 3358 |
5000,
|
| 3359 |
4,
|
| 3360 |
+
"", # naming_prefix
|
| 3361 |
f"β Error loading settings: {str(e)}"
|
| 3362 |
]
|
| 3363 |
|
|
|
|
| 3384 |
ts_instance_url,
|
| 3385 |
ts_username,
|
| 3386 |
batch_size_slider,
|
| 3387 |
+
thread_count_slider,
|
| 3388 |
+
naming_prefix_input
|
| 3389 |
],
|
| 3390 |
outputs=[settings_status]
|
| 3391 |
)
|
|
|
|
| 3630 |
ts_username,
|
| 3631 |
batch_size_slider,
|
| 3632 |
thread_count_slider,
|
| 3633 |
+
naming_prefix_input,
|
| 3634 |
settings_status,
|
| 3635 |
llm_dropdown, # Also update main form AI Model dropdown
|
| 3636 |
url_input # Also update main form Company URL
|
|
@@ -1019,12 +1019,16 @@ Examples:
|
|
| 1019 |
text_content = viz_config.get('text_content', viz_config.get('name', ''))
|
| 1020 |
bg_color = viz_config.get('background_color', '#2E3D4D') # Default dark background
|
| 1021 |
|
| 1022 |
-
# TEXT tiles in ThoughtSpot
|
| 1023 |
text_tml = {
|
| 1024 |
'id': viz_config['id'],
|
| 1025 |
'answer': {
|
| 1026 |
'name': viz_config.get('name', 'Text'),
|
| 1027 |
'description': viz_config.get('description', ''),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1028 |
'text_tile': {
|
| 1029 |
'text': text_content,
|
| 1030 |
'background_color': bg_color
|
|
@@ -1510,6 +1514,7 @@ Return ONLY a valid JSON object with structure:
|
|
| 1510 |
|
| 1511 |
# Debug: print response structure
|
| 1512 |
print(f" API Response type: {type(result)}")
|
|
|
|
| 1513 |
if isinstance(result, list) and len(result) > 0:
|
| 1514 |
print(f" First item keys: {list(result[0].keys())}")
|
| 1515 |
response_obj = result[0].get('response', {})
|
|
@@ -1517,12 +1522,20 @@ Return ONLY a valid JSON object with structure:
|
|
| 1517 |
status_obj = response_obj.get('status', {})
|
| 1518 |
print(f" Status: {status_obj}")
|
| 1519 |
|
| 1520 |
-
#
|
| 1521 |
if status_obj.get('status_code') == 'ERROR':
|
| 1522 |
-
|
|
|
|
| 1523 |
import json
|
|
|
|
| 1524 |
print(json.dumps(result[0], indent=2)[:2000])
|
| 1525 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1526 |
# Navigate response structure
|
| 1527 |
liveboard_id = result[0].get('response', {}).get('header', {}).get('id_guid')
|
| 1528 |
if not liveboard_id:
|
|
@@ -1574,10 +1587,26 @@ Return ONLY a valid JSON object with structure:
|
|
| 1574 |
'success': False,
|
| 1575 |
'error': f"API returned status {response.status_code}: {response.text}"
|
| 1576 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1577 |
except Exception as e:
|
|
|
|
| 1578 |
return {
|
| 1579 |
'success': False,
|
| 1580 |
-
'error': str(e)
|
|
|
|
| 1581 |
}
|
| 1582 |
|
| 1583 |
|
|
@@ -1603,6 +1632,57 @@ def create_liveboard_from_model(
|
|
| 1603 |
Returns:
|
| 1604 |
Deployment result dictionary
|
| 1605 |
"""
|
| 1606 |
-
|
| 1607 |
-
|
| 1608 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1019 |
text_content = viz_config.get('text_content', viz_config.get('name', ''))
|
| 1020 |
bg_color = viz_config.get('background_color', '#2E3D4D') # Default dark background
|
| 1021 |
|
| 1022 |
+
# TEXT tiles in ThoughtSpot need tables field even though they don't query data
|
| 1023 |
text_tml = {
|
| 1024 |
'id': viz_config['id'],
|
| 1025 |
'answer': {
|
| 1026 |
'name': viz_config.get('name', 'Text'),
|
| 1027 |
'description': viz_config.get('description', ''),
|
| 1028 |
+
'tables': [{
|
| 1029 |
+
'id': self.model_name,
|
| 1030 |
+
'name': self.model_name
|
| 1031 |
+
}],
|
| 1032 |
'text_tile': {
|
| 1033 |
'text': text_content,
|
| 1034 |
'background_color': bg_color
|
|
|
|
| 1514 |
|
| 1515 |
# Debug: print response structure
|
| 1516 |
print(f" API Response type: {type(result)}")
|
| 1517 |
+
status_obj = {}
|
| 1518 |
if isinstance(result, list) and len(result) > 0:
|
| 1519 |
print(f" First item keys: {list(result[0].keys())}")
|
| 1520 |
response_obj = result[0].get('response', {})
|
|
|
|
| 1522 |
status_obj = response_obj.get('status', {})
|
| 1523 |
print(f" Status: {status_obj}")
|
| 1524 |
|
| 1525 |
+
# Check for API errors first
|
| 1526 |
if status_obj.get('status_code') == 'ERROR':
|
| 1527 |
+
error_msg = status_obj.get('error_message', 'Unknown error')
|
| 1528 |
+
print(f" β Liveboard creation failed: {error_msg}")
|
| 1529 |
import json
|
| 1530 |
+
print(f" Full error details:")
|
| 1531 |
print(json.dumps(result[0], indent=2)[:2000])
|
| 1532 |
|
| 1533 |
+
return {
|
| 1534 |
+
'success': False,
|
| 1535 |
+
'error': error_msg,
|
| 1536 |
+
'response': result
|
| 1537 |
+
}
|
| 1538 |
+
|
| 1539 |
# Navigate response structure
|
| 1540 |
liveboard_id = result[0].get('response', {}).get('header', {}).get('id_guid')
|
| 1541 |
if not liveboard_id:
|
|
|
|
| 1587 |
'success': False,
|
| 1588 |
'error': f"API returned status {response.status_code}: {response.text}"
|
| 1589 |
}
|
| 1590 |
+
except AttributeError as e:
|
| 1591 |
+
import traceback
|
| 1592 |
+
return {
|
| 1593 |
+
'success': False,
|
| 1594 |
+
'error': f'AttributeError in Liveboard deployment: {str(e)}',
|
| 1595 |
+
'details': traceback.format_exc()
|
| 1596 |
+
}
|
| 1597 |
+
except KeyError as e:
|
| 1598 |
+
import traceback
|
| 1599 |
+
return {
|
| 1600 |
+
'success': False,
|
| 1601 |
+
'error': f'KeyError in Liveboard deployment: {str(e)}',
|
| 1602 |
+
'details': traceback.format_exc()
|
| 1603 |
+
}
|
| 1604 |
except Exception as e:
|
| 1605 |
+
import traceback
|
| 1606 |
return {
|
| 1607 |
'success': False,
|
| 1608 |
+
'error': f'{type(e).__name__}: {str(e)}',
|
| 1609 |
+
'details': traceback.format_exc()
|
| 1610 |
}
|
| 1611 |
|
| 1612 |
|
|
|
|
| 1632 |
Returns:
|
| 1633 |
Deployment result dictionary
|
| 1634 |
"""
|
| 1635 |
+
try:
|
| 1636 |
+
# Validate inputs
|
| 1637 |
+
if not model_id or not model_id.strip():
|
| 1638 |
+
return {
|
| 1639 |
+
'success': False,
|
| 1640 |
+
'error': 'Invalid model_id: empty or None'
|
| 1641 |
+
}
|
| 1642 |
+
|
| 1643 |
+
if not model_name or not model_name.strip():
|
| 1644 |
+
return {
|
| 1645 |
+
'success': False,
|
| 1646 |
+
'error': 'Invalid model_name: empty or None'
|
| 1647 |
+
}
|
| 1648 |
+
|
| 1649 |
+
# Create Liveboard creator
|
| 1650 |
+
creator = LiveboardCreator(ts_client, model_id, model_name)
|
| 1651 |
+
|
| 1652 |
+
# Generate Liveboard TML
|
| 1653 |
+
liveboard_tml = creator.create_liveboard_tml(company_data, use_case, num_visualizations)
|
| 1654 |
+
|
| 1655 |
+
if not liveboard_tml:
|
| 1656 |
+
return {
|
| 1657 |
+
'success': False,
|
| 1658 |
+
'error': 'Failed to generate Liveboard TML: returned None or empty'
|
| 1659 |
+
}
|
| 1660 |
+
|
| 1661 |
+
# Deploy Liveboard
|
| 1662 |
+
result = creator.deploy_liveboard(liveboard_tml)
|
| 1663 |
+
|
| 1664 |
+
if not result:
|
| 1665 |
+
return {
|
| 1666 |
+
'success': False,
|
| 1667 |
+
'error': 'deploy_liveboard returned None'
|
| 1668 |
+
}
|
| 1669 |
+
|
| 1670 |
+
return result
|
| 1671 |
+
|
| 1672 |
+
except AttributeError as e:
|
| 1673 |
+
return {
|
| 1674 |
+
'success': False,
|
| 1675 |
+
'error': f'AttributeError (possible missing field): {str(e)}'
|
| 1676 |
+
}
|
| 1677 |
+
except KeyError as e:
|
| 1678 |
+
return {
|
| 1679 |
+
'success': False,
|
| 1680 |
+
'error': f'KeyError (missing required key): {str(e)}'
|
| 1681 |
+
}
|
| 1682 |
+
except Exception as e:
|
| 1683 |
+
import traceback
|
| 1684 |
+
return {
|
| 1685 |
+
'success': False,
|
| 1686 |
+
'error': f'Unexpected error: {type(e).__name__}: {str(e)}',
|
| 1687 |
+
'traceback': traceback.format_exc()
|
| 1688 |
+
}
|
|
@@ -355,6 +355,7 @@ def load_gradio_settings(email: str) -> Dict[str, Any]:
|
|
| 355 |
"default_data_volume": "Medium (10K rows)",
|
| 356 |
"default_warehouse": "COMPUTE_WH",
|
| 357 |
"default_database": "DEMO_DB",
|
|
|
|
| 358 |
|
| 359 |
# ThoughtSpot Connection
|
| 360 |
"thoughtspot_url": "",
|
|
|
|
| 355 |
"default_data_volume": "Medium (10K rows)",
|
| 356 |
"default_warehouse": "COMPUTE_WH",
|
| 357 |
"default_database": "DEMO_DB",
|
| 358 |
+
"naming_prefix": "", # Object naming prefix
|
| 359 |
|
| 360 |
# ThoughtSpot Connection
|
| 361 |
"thoughtspot_url": "",
|
|
@@ -84,13 +84,18 @@ class ThoughtSpotDeployer:
|
|
| 84 |
|
| 85 |
return private_key_raw
|
| 86 |
|
| 87 |
-
def authenticate(self) -> bool:
|
| 88 |
-
"""Authenticate with ThoughtSpot
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
try:
|
| 90 |
auth_url = f"{self.base_url}/api/rest/2.0/auth/session/login"
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
| 94 |
response = self.session.post(
|
| 95 |
auth_url,
|
| 96 |
json={
|
|
@@ -98,31 +103,38 @@ class ThoughtSpotDeployer:
|
|
| 98 |
"password": self.password
|
| 99 |
}
|
| 100 |
)
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
| 104 |
if response.status_code in [200, 204]: # Accept both 200 and 204 as success
|
| 105 |
if response.status_code == 200:
|
| 106 |
# Extract token from JSON response
|
| 107 |
result = response.json()
|
| 108 |
-
|
|
|
|
| 109 |
if 'token' in result:
|
| 110 |
self.headers['Authorization'] = f'Bearer {result["token"]}'
|
| 111 |
-
|
|
|
|
| 112 |
return True
|
| 113 |
else:
|
| 114 |
-
|
|
|
|
| 115 |
elif response.status_code == 204:
|
| 116 |
# HTTP 204 No Content - authentication successful, use session cookies
|
| 117 |
-
|
|
|
|
| 118 |
# For 204, we use session cookies instead of bearer token
|
| 119 |
return True
|
| 120 |
else:
|
| 121 |
-
|
|
|
|
| 122 |
return False
|
| 123 |
-
|
| 124 |
except Exception as e:
|
| 125 |
-
|
|
|
|
| 126 |
return False
|
| 127 |
|
| 128 |
def parse_ddl(self, ddl: str) -> Tuple[Dict, List]:
|
|
@@ -318,33 +330,63 @@ class ThoughtSpotDeployer:
|
|
| 318 |
joins = []
|
| 319 |
table_name_upper = table_name.upper()
|
| 320 |
table_cols = [col['name'].upper() for col in columns]
|
| 321 |
-
|
|
|
|
| 322 |
# Find foreign key relationships
|
| 323 |
for col_name in table_cols:
|
| 324 |
-
if col_name.endswith('ID')
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
return joins
|
| 349 |
|
| 350 |
def create_connection_tml(self, connection_name: str) -> str:
|
|
@@ -503,21 +545,62 @@ class ThoughtSpotDeployer:
|
|
| 503 |
return True
|
| 504 |
return False
|
| 505 |
|
| 506 |
-
def _is_surrogate_primary_key(self, col: Dict, col_name: str) -> bool:
|
| 507 |
-
"""Check if column is a meaningless surrogate key (numeric ID)
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
|
| 516 |
def _create_model_with_constraints(self, tables: Dict, foreign_keys: List, table_guids: Dict,
|
| 517 |
table_constraints: Dict, model_name: str, connection_name: str) -> str:
|
| 518 |
"""Generate model TML with constraint references like our successful test"""
|
| 519 |
print(" π Creating model with constraint references")
|
| 520 |
-
|
| 521 |
# Build column name conflict tracking
|
| 522 |
column_name_counts = {}
|
| 523 |
for table_name, columns in tables.items():
|
|
@@ -526,7 +609,83 @@ class ThoughtSpotDeployer:
|
|
| 526 |
if col_name not in column_name_counts:
|
| 527 |
column_name_counts[col_name] = []
|
| 528 |
column_name_counts[col_name].append(table_name.upper())
|
| 529 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
model = {
|
| 531 |
'guid': None,
|
| 532 |
'model': {
|
|
@@ -543,28 +702,28 @@ class ThoughtSpotDeployer:
|
|
| 543 |
}
|
| 544 |
}
|
| 545 |
}
|
| 546 |
-
|
| 547 |
-
# Add model_tables with FQNs and
|
| 548 |
for table_name in tables.keys():
|
| 549 |
table_name_upper = table_name.upper()
|
| 550 |
table_guid = table_guids.get(table_name_upper)
|
| 551 |
-
|
| 552 |
table_entry = {
|
| 553 |
'name': table_name_upper,
|
| 554 |
'fqn': table_guid
|
| 555 |
}
|
| 556 |
-
|
| 557 |
-
# Add joins
|
| 558 |
-
if table_name_upper in
|
| 559 |
table_entry['joins'] = []
|
| 560 |
-
for
|
| 561 |
join_entry = {
|
| 562 |
-
'with':
|
| 563 |
-
'referencing_join':
|
| 564 |
}
|
| 565 |
table_entry['joins'].append(join_entry)
|
| 566 |
-
print(f"
|
| 567 |
-
|
| 568 |
model['model']['model_tables'].append(table_entry)
|
| 569 |
|
| 570 |
# Add columns with proper global conflict resolution (same as working version)
|
|
@@ -581,7 +740,8 @@ class ThoughtSpotDeployer:
|
|
| 581 |
continue
|
| 582 |
|
| 583 |
# SKIP surrogate primary keys (numeric IDs) - nobody searches "customer 23455"
|
| 584 |
-
|
|
|
|
| 585 |
print(f" βοΈ Skipping surrogate PK: {table_name_upper}.{col_name}")
|
| 586 |
continue
|
| 587 |
|
|
@@ -671,18 +831,65 @@ class ThoughtSpotDeployer:
|
|
| 671 |
for col in columns:
|
| 672 |
col_name = col['name'].upper()
|
| 673 |
|
| 674 |
-
# Check if this looks like a foreign key (ends with ID but isn't the table's own ID)
|
| 675 |
-
if col_name.endswith('ID')
|
| 676 |
-
|
| 677 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 678 |
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
|
| 687 |
return warnings
|
| 688 |
|
|
@@ -1092,41 +1299,50 @@ class ThoughtSpotDeployer:
|
|
| 1092 |
print(f" β οΈ Could not create schema: {e}")
|
| 1093 |
print(f" π Will proceed assuming schema exists or will be created by table operations")
|
| 1094 |
|
| 1095 |
-
def _generate_demo_names(self, company_name: str = None, use_case: str = None):
|
| 1096 |
-
"""Generate standardized demo names using
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1097 |
from datetime import datetime
|
| 1098 |
import re
|
| 1099 |
-
|
| 1100 |
-
# Get timestamp components
|
| 1101 |
now = datetime.now()
|
| 1102 |
-
|
| 1103 |
-
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
if company_name:
|
| 1107 |
-
company_clean = re.sub(r'[^a-zA-Z0-9]', '', company_name.upper())[:5]
|
| 1108 |
else:
|
| 1109 |
-
|
| 1110 |
-
|
| 1111 |
-
#
|
| 1112 |
-
|
| 1113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1114 |
else:
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
# Generate names
|
| 1118 |
-
base_name = f"DM{yymmdd}_{hhmmss}_{company_clean}_{usecase_clean}"
|
| 1119 |
-
|
| 1120 |
return {
|
| 1121 |
-
'schema':
|
| 1122 |
-
'connection': f"{
|
| 1123 |
-
'model': f"{
|
| 1124 |
-
'
|
|
|
|
| 1125 |
}
|
| 1126 |
|
| 1127 |
-
def deploy_all(self, ddl: str, database: str, schema: str,
|
| 1128 |
-
connection_name: str = None, company_name: str = None,
|
| 1129 |
-
use_case: str = None, progress_callback=None) -> Dict:
|
| 1130 |
"""
|
| 1131 |
Deploy complete data model to ThoughtSpot
|
| 1132 |
|
|
@@ -1148,12 +1364,29 @@ class ThoughtSpotDeployer:
|
|
| 1148 |
}
|
| 1149 |
|
| 1150 |
table_guids = {} # Store table GUIDs for model creation
|
| 1151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1152 |
def log_progress(message):
|
| 1153 |
-
"""Helper to log progress
|
| 1154 |
print(message, flush=True)
|
| 1155 |
if progress_callback:
|
| 1156 |
progress_callback(message)
|
|
|
|
|
|
|
|
|
|
| 1157 |
|
| 1158 |
try:
|
| 1159 |
# STEP 0: Authenticate first!
|
|
@@ -1179,10 +1412,20 @@ class ThoughtSpotDeployer:
|
|
| 1179 |
else:
|
| 1180 |
log_progress("β
All foreign key references are valid\n")
|
| 1181 |
|
| 1182 |
-
# Step 1:
|
| 1183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1184 |
if not connection_name:
|
| 1185 |
-
connection_name =
|
| 1186 |
|
| 1187 |
log_progress("1οΈβ£ Checking/Creating connection...")
|
| 1188 |
log_progress(f" Connection name: {connection_name}")
|
|
@@ -1276,111 +1519,130 @@ class ThoughtSpotDeployer:
|
|
| 1276 |
log_progress("\n2οΈβ£ Creating tables...")
|
| 1277 |
|
| 1278 |
# PHASE 1: Create all tables WITHOUT joins (to ensure all tables exist first)
|
| 1279 |
-
log_progress(" π Phase 1: Creating tables
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
-
|
| 1283 |
-
|
|
|
|
|
|
|
| 1284 |
|
| 1285 |
-
|
| 1286 |
tml_start = time.time()
|
| 1287 |
table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
|
| 1288 |
tml_time = time.time() - tml_start
|
| 1289 |
-
|
|
|
|
|
|
|
| 1290 |
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
api_time = time.time() - api_start
|
| 1306 |
-
log_progress(f" β±οΈ API call took: {api_time:.2f} seconds")
|
| 1307 |
|
| 1308 |
-
|
| 1309 |
-
|
|
|
|
| 1310 |
|
| 1311 |
-
|
| 1312 |
-
|
| 1313 |
-
|
| 1314 |
-
|
| 1315 |
-
|
| 1316 |
-
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
|
| 1320 |
-
|
| 1321 |
|
| 1322 |
-
|
| 1323 |
-
|
|
|
|
|
|
|
| 1324 |
if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1325 |
table_guid = obj.get('response', {}).get('header', {}).get('id_guid')
|
| 1326 |
-
|
| 1327 |
-
|
| 1328 |
-
|
| 1329 |
-
results['tables'].append(table_name.upper())
|
| 1330 |
-
table_guids[table_name.upper()] = table_guid
|
| 1331 |
else:
|
| 1332 |
-
|
|
|
|
| 1333 |
log_progress(f" β {error}")
|
| 1334 |
results['errors'].append(error)
|
| 1335 |
-
# DON'T return - continue creating other tables
|
| 1336 |
-
else:
|
| 1337 |
-
error = f"Table {table_name} failed: No object in response"
|
| 1338 |
-
log_progress(f" β {error}")
|
| 1339 |
-
results['errors'].append(error)
|
| 1340 |
else:
|
| 1341 |
-
error =
|
| 1342 |
log_progress(f" β {error}")
|
| 1343 |
results['errors'].append(error)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1344 |
|
| 1345 |
# Check if we created any tables successfully
|
| 1346 |
if not table_guids:
|
| 1347 |
log_progress(" β No tables were created successfully in Phase 1")
|
| 1348 |
return results
|
| 1349 |
|
| 1350 |
-
log_progress(f" β
Phase 1 complete: {len(table_guids)} tables created")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1351 |
|
| 1352 |
-
# PHASE 2: Update tables WITH joins (now that all tables exist)
|
| 1353 |
-
log_progress("\n π Phase 2: Adding joins to tables...")
|
| 1354 |
for table_name, columns in tables.items():
|
| 1355 |
-
# Only add joins if the table was created successfully in Phase 1
|
| 1356 |
table_name_upper = table_name.upper()
|
| 1357 |
if table_name_upper not in table_guids:
|
| 1358 |
log_progress(f" βοΈ Skipping joins for {table_name_upper} (table creation failed)")
|
| 1359 |
continue
|
| 1360 |
|
| 1361 |
-
# Get the GUID for this table
|
| 1362 |
table_guid = table_guids[table_name_upper]
|
| 1363 |
|
| 1364 |
-
log_progress(f" π Adding joins to: {table_name_upper}...")
|
| 1365 |
# Create table TML WITH joins_with section AND the table GUID
|
| 1366 |
table_tml = self.create_table_tml(
|
| 1367 |
table_name, columns, connection_name, database, schema,
|
| 1368 |
all_tables=tables, table_guid=table_guid
|
| 1369 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1370 |
|
| 1371 |
response = self.session.post(
|
| 1372 |
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
| 1373 |
json={
|
| 1374 |
-
"metadata_tmls":
|
| 1375 |
"import_policy": "PARTIAL",
|
| 1376 |
-
"create_new": False # Update existing
|
| 1377 |
}
|
| 1378 |
)
|
| 1379 |
|
|
|
|
|
|
|
|
|
|
| 1380 |
if response.status_code == 200:
|
| 1381 |
result = response.json()
|
| 1382 |
|
| 1383 |
-
# Handle both response formats
|
| 1384 |
if isinstance(result, list):
|
| 1385 |
objects = result
|
| 1386 |
elif isinstance(result, dict) and 'object' in result:
|
|
@@ -1389,21 +1651,23 @@ class ThoughtSpotDeployer:
|
|
| 1389 |
log_progress(f" β οΈ Unexpected response format for joins: {type(result)}")
|
| 1390 |
objects = []
|
| 1391 |
|
|
|
|
| 1392 |
if objects and len(objects) > 0:
|
| 1393 |
-
obj
|
| 1394 |
-
|
| 1395 |
-
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
| 1399 |
-
|
| 1400 |
-
|
| 1401 |
else:
|
| 1402 |
-
log_progress(f" β οΈ
|
| 1403 |
else:
|
| 1404 |
-
log_progress(f" β οΈ HTTP error adding joins
|
|
|
|
| 1405 |
|
| 1406 |
-
log_progress(f" β
Phase 2 complete: Joins
|
| 1407 |
actual_constraint_ids = {} # We'll generate these for the model
|
| 1408 |
|
| 1409 |
# Skip separate relationship creation for now
|
|
@@ -1412,8 +1676,9 @@ class ThoughtSpotDeployer:
|
|
| 1412 |
|
| 1413 |
# Step 3: Extract constraint IDs from created tables
|
| 1414 |
log_progress("\n2οΈβ£.5 Extracting constraint IDs from created tables...")
|
|
|
|
| 1415 |
table_constraints = {}
|
| 1416 |
-
|
| 1417 |
for table_name, table_guid in table_guids.items():
|
| 1418 |
log_progress(f" π Getting constraint IDs for {table_name}...")
|
| 1419 |
|
|
@@ -1446,18 +1711,32 @@ class ThoughtSpotDeployer:
|
|
| 1446 |
'destination': destination
|
| 1447 |
})
|
| 1448 |
log_progress(f" π Found join: {constraint_id} -> {destination}")
|
| 1449 |
-
|
|
|
|
|
|
|
| 1450 |
log_progress(f" β
Extracted constraints from {len(table_constraints)} tables")
|
| 1451 |
-
|
| 1452 |
# Step 4: Create model (semantic layer) with constraint references
|
| 1453 |
log_progress("\n3οΈβ£ Creating model (semantic layer) with joins...")
|
| 1454 |
-
|
| 1455 |
-
|
|
|
|
| 1456 |
log_progress(f" Model name: {model_name}")
|
| 1457 |
|
| 1458 |
# Use the enhanced model creation that includes constraint references
|
| 1459 |
model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
|
| 1460 |
print(f"\nπ Model TML being sent:\n{model_tml}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1461 |
|
| 1462 |
response = self.session.post(
|
| 1463 |
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
|
@@ -1467,7 +1746,9 @@ class ThoughtSpotDeployer:
|
|
| 1467 |
"create_new": True
|
| 1468 |
}
|
| 1469 |
)
|
| 1470 |
-
|
|
|
|
|
|
|
| 1471 |
if response.status_code == 200:
|
| 1472 |
result = response.json()
|
| 1473 |
|
|
@@ -1485,6 +1766,8 @@ class ThoughtSpotDeployer:
|
|
| 1485 |
if objects and len(objects) > 0:
|
| 1486 |
if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1487 |
model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
|
|
|
|
|
|
|
| 1488 |
log_progress(f" β
Model created successfully!")
|
| 1489 |
log_progress(f" Model: {model_name}")
|
| 1490 |
log_progress(f" GUID: {model_guid}")
|
|
@@ -1546,13 +1829,39 @@ class ThoughtSpotDeployer:
|
|
| 1546 |
traceback.print_exc()
|
| 1547 |
else:
|
| 1548 |
print(f"π Full model response: {objects}") # DEBUG: Show full response
|
| 1549 |
-
|
| 1550 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1551 |
results['errors'].append(error)
|
| 1552 |
else:
|
| 1553 |
error = "Model failed: No objects in response"
|
| 1554 |
log_progress(f" β {error}")
|
| 1555 |
results['errors'].append(error)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1556 |
|
| 1557 |
# Mark as successful if we got this far
|
| 1558 |
results['success'] = len(results['errors']) == 0
|
|
@@ -1560,29 +1869,43 @@ class ThoughtSpotDeployer:
|
|
| 1560 |
except Exception as e:
|
| 1561 |
error_msg = str(e)
|
| 1562 |
print(f"β Deployment failed: {error_msg}")
|
|
|
|
| 1563 |
results['errors'].append(error_msg)
|
| 1564 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1565 |
return results
|
| 1566 |
|
| 1567 |
|
| 1568 |
-
def deploy_to_thoughtspot(ddl: str, database: str, schema: str,
|
| 1569 |
-
connection_name: str = None, company_name: str = None,
|
| 1570 |
-
use_case: str = None, progress_callback=None) -> Dict:
|
| 1571 |
"""
|
| 1572 |
Convenience function for deploying to ThoughtSpot
|
| 1573 |
-
|
| 1574 |
Args:
|
| 1575 |
ddl: Data Definition Language statements
|
| 1576 |
database: Target database name
|
| 1577 |
schema: Target schema name
|
| 1578 |
connection_name: Optional connection name
|
|
|
|
|
|
|
|
|
|
| 1579 |
progress_callback: Optional callback for progress updates
|
| 1580 |
-
|
| 1581 |
Returns:
|
| 1582 |
Dict with deployment results
|
| 1583 |
"""
|
| 1584 |
deployer = ThoughtSpotDeployer()
|
| 1585 |
-
return deployer.deploy_all(ddl, database, schema, connection_name, company_name, use_case, progress_callback)
|
| 1586 |
|
| 1587 |
|
| 1588 |
if __name__ == "__main__":
|
|
|
|
| 84 |
|
| 85 |
return private_key_raw
|
| 86 |
|
| 87 |
+
def authenticate(self, verbose: bool = False) -> bool:
|
| 88 |
+
"""Authenticate with ThoughtSpot
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
verbose: If True, print detailed authentication info. Default False for clean output.
|
| 92 |
+
"""
|
| 93 |
try:
|
| 94 |
auth_url = f"{self.base_url}/api/rest/2.0/auth/session/login"
|
| 95 |
+
if verbose:
|
| 96 |
+
print(f" π Attempting authentication to: {auth_url}")
|
| 97 |
+
print(f" π€ Username: {self.username}")
|
| 98 |
+
|
| 99 |
response = self.session.post(
|
| 100 |
auth_url,
|
| 101 |
json={
|
|
|
|
| 103 |
"password": self.password
|
| 104 |
}
|
| 105 |
)
|
| 106 |
+
|
| 107 |
+
if verbose:
|
| 108 |
+
print(f" π‘ HTTP Status: {response.status_code}")
|
| 109 |
+
|
| 110 |
if response.status_code in [200, 204]: # Accept both 200 and 204 as success
|
| 111 |
if response.status_code == 200:
|
| 112 |
# Extract token from JSON response
|
| 113 |
result = response.json()
|
| 114 |
+
if verbose:
|
| 115 |
+
print(f" β
Response: {json.dumps(result, indent=2)}")
|
| 116 |
if 'token' in result:
|
| 117 |
self.headers['Authorization'] = f'Bearer {result["token"]}'
|
| 118 |
+
if verbose:
|
| 119 |
+
print(" ποΈ Token extracted successfully")
|
| 120 |
return True
|
| 121 |
else:
|
| 122 |
+
if verbose:
|
| 123 |
+
print(" β No token in response")
|
| 124 |
elif response.status_code == 204:
|
| 125 |
# HTTP 204 No Content - authentication successful, use session cookies
|
| 126 |
+
if verbose:
|
| 127 |
+
print(" β
Authentication successful (HTTP 204 - using session cookies)")
|
| 128 |
# For 204, we use session cookies instead of bearer token
|
| 129 |
return True
|
| 130 |
else:
|
| 131 |
+
if verbose:
|
| 132 |
+
print(f" β HTTP Error {response.status_code}: {response.text}")
|
| 133 |
return False
|
| 134 |
+
|
| 135 |
except Exception as e:
|
| 136 |
+
if verbose:
|
| 137 |
+
print(f" π₯ Authentication exception: {e}")
|
| 138 |
return False
|
| 139 |
|
| 140 |
def parse_ddl(self, ddl: str) -> Tuple[Dict, List]:
|
|
|
|
| 330 |
joins = []
|
| 331 |
table_name_upper = table_name.upper()
|
| 332 |
table_cols = [col['name'].upper() for col in columns]
|
| 333 |
+
available_tables_upper = [t.upper() for t in all_tables.keys()]
|
| 334 |
+
|
| 335 |
# Find foreign key relationships
|
| 336 |
for col_name in table_cols:
|
| 337 |
+
if not col_name.endswith('ID'):
|
| 338 |
+
continue
|
| 339 |
+
|
| 340 |
+
# Extract the base name from the column (CUSTOMER_ID -> CUSTOMER, CUSTOMERID -> CUSTOMER)
|
| 341 |
+
if col_name.endswith('_ID'):
|
| 342 |
+
col_base = col_name[:-3] # Remove _ID
|
| 343 |
+
else:
|
| 344 |
+
col_base = col_name[:-2] # Remove ID
|
| 345 |
+
|
| 346 |
+
# Check if this is the table's own primary key
|
| 347 |
+
if table_name_upper.endswith('S') and len(table_name_upper) > 1:
|
| 348 |
+
table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER, SALES_TRANSACTIONS -> SALES_TRANSACTION
|
| 349 |
+
else:
|
| 350 |
+
table_base = table_name_upper
|
| 351 |
+
|
| 352 |
+
table_base_alt = table_name_upper.rstrip('S')
|
| 353 |
+
|
| 354 |
+
# Skip if this is the table's own PK
|
| 355 |
+
is_own_pk = (col_base == table_name_upper or
|
| 356 |
+
col_base == table_base or
|
| 357 |
+
col_base == table_base_alt)
|
| 358 |
+
|
| 359 |
+
if is_own_pk:
|
| 360 |
+
continue
|
| 361 |
+
|
| 362 |
+
# Try to find the target table (CUSTOMER -> CUSTOMERS)
|
| 363 |
+
possible_targets = [
|
| 364 |
+
col_base + 'S', # CUSTOMER -> CUSTOMERS
|
| 365 |
+
col_base + 'ES', # Less common
|
| 366 |
+
col_base # Already plural
|
| 367 |
+
]
|
| 368 |
+
|
| 369 |
+
found_target = None
|
| 370 |
+
for potential_target in possible_targets:
|
| 371 |
+
if potential_target in available_tables_upper and potential_target != table_name_upper:
|
| 372 |
+
found_target = potential_target
|
| 373 |
+
break
|
| 374 |
+
|
| 375 |
+
if found_target:
|
| 376 |
+
constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
|
| 377 |
+
join_def = {
|
| 378 |
+
'name': constraint_id,
|
| 379 |
+
'destination': {
|
| 380 |
+
'name': found_target
|
| 381 |
+
},
|
| 382 |
+
'on': f"[{table_name_upper}::{col_name}] = [{found_target}::{col_name}]",
|
| 383 |
+
'type': 'INNER'
|
| 384 |
+
}
|
| 385 |
+
joins.append(join_def)
|
| 386 |
+
print(f" π Generated join: {table_name_upper} -> {found_target} on {col_name}")
|
| 387 |
+
else:
|
| 388 |
+
print(f" βοΈ Skipping join: {table_name_upper}.{col_name} -> {possible_targets[0]} (table not in this deployment)")
|
| 389 |
+
|
| 390 |
return joins
|
| 391 |
|
| 392 |
def create_connection_tml(self, connection_name: str) -> str:
|
|
|
|
| 545 |
return True
|
| 546 |
return False
|
| 547 |
|
| 548 |
+
def _is_surrogate_primary_key(self, col: Dict, col_name: str, table_name: str = None) -> bool:
|
| 549 |
+
"""Check if column is a meaningless surrogate key (numeric ID) for THIS table.
|
| 550 |
+
|
| 551 |
+
Foreign key columns (like CUSTOMER_ID in SALES_TRANSACTIONS) should NOT be
|
| 552 |
+
treated as surrogate keys - they're important for joins.
|
| 553 |
+
"""
|
| 554 |
+
col_name_upper = col_name.upper()
|
| 555 |
+
|
| 556 |
+
# Must end with ID and be numeric
|
| 557 |
+
if not col_name_upper.endswith('ID'):
|
| 558 |
+
return False
|
| 559 |
+
|
| 560 |
+
col_type = col.get('type', '').upper()
|
| 561 |
+
if not any(t in col_type for t in ['INT', 'NUMBER', 'NUMERIC', 'BIGINT']):
|
| 562 |
+
return False
|
| 563 |
+
|
| 564 |
+
# If we don't have table context, be conservative
|
| 565 |
+
if not table_name:
|
| 566 |
+
return False
|
| 567 |
+
|
| 568 |
+
table_name_upper = table_name.upper()
|
| 569 |
+
|
| 570 |
+
# Extract the base name from the column (CUSTOMER_ID -> CUSTOMER, CUSTOMERID -> CUSTOMER)
|
| 571 |
+
if col_name_upper.endswith('_ID'):
|
| 572 |
+
col_base = col_name_upper[:-3] # Remove _ID
|
| 573 |
+
else:
|
| 574 |
+
col_base = col_name_upper[:-2] # Remove ID
|
| 575 |
+
|
| 576 |
+
# Extract the base name from the table (CUSTOMERS -> CUSTOMER, SALES_TRANSACTIONS -> SALES_TRANSACTION)
|
| 577 |
+
# Handle plural table names
|
| 578 |
+
if table_name_upper.endswith('S') and len(table_name_upper) > 1:
|
| 579 |
+
table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER
|
| 580 |
+
else:
|
| 581 |
+
table_base = table_name_upper
|
| 582 |
+
|
| 583 |
+
# Also try removing all trailing 'S' characters for cases like SALES -> SALE
|
| 584 |
+
table_base_alt = table_name_upper.rstrip('S')
|
| 585 |
+
|
| 586 |
+
# It's a surrogate PK only if the column name matches the table name
|
| 587 |
+
# Examples:
|
| 588 |
+
# CUSTOMER_ID in CUSTOMERS table -> surrogate PK (skip)
|
| 589 |
+
# CUSTOMER_ID in SALES_TRANSACTIONS table -> foreign key (DO NOT skip)
|
| 590 |
+
is_own_pk = (col_base == table_name_upper or
|
| 591 |
+
col_base == table_base or
|
| 592 |
+
col_base == table_base_alt or
|
| 593 |
+
col_name_upper == f"{table_name_upper}_ID" or
|
| 594 |
+
col_name_upper == f"{table_base}_ID" or
|
| 595 |
+
col_name_upper == f"{table_base_alt}_ID")
|
| 596 |
+
|
| 597 |
+
return is_own_pk
|
| 598 |
|
| 599 |
def _create_model_with_constraints(self, tables: Dict, foreign_keys: List, table_guids: Dict,
|
| 600 |
table_constraints: Dict, model_name: str, connection_name: str) -> str:
|
| 601 |
"""Generate model TML with constraint references like our successful test"""
|
| 602 |
print(" π Creating model with constraint references")
|
| 603 |
+
|
| 604 |
# Build column name conflict tracking
|
| 605 |
column_name_counts = {}
|
| 606 |
for table_name, columns in tables.items():
|
|
|
|
| 609 |
if col_name not in column_name_counts:
|
| 610 |
column_name_counts[col_name] = []
|
| 611 |
column_name_counts[col_name].append(table_name.upper())
|
| 612 |
+
|
| 613 |
+
# INFER JOINS: Build joins from foreign key column names
|
| 614 |
+
# Structure: {source_table: [{target: target_table, constraint_id: ID}, ...]}
|
| 615 |
+
inferred_joins = {}
|
| 616 |
+
table_names_upper = [t.upper() for t in tables.keys()]
|
| 617 |
+
|
| 618 |
+
for table_name, columns in tables.items():
|
| 619 |
+
table_name_upper = table_name.upper()
|
| 620 |
+
|
| 621 |
+
for col in columns:
|
| 622 |
+
col_name = col['name'].upper()
|
| 623 |
+
|
| 624 |
+
# Check if this looks like a foreign key (ends with ID or _ID but isn't the table's own ID)
|
| 625 |
+
if col_name.endswith('ID'):
|
| 626 |
+
# Infer the target table name (CUSTOMERID -> CUSTOMERS, CUSTOMER_ID -> CUSTOMERS)
|
| 627 |
+
if col_name.endswith('_ID'):
|
| 628 |
+
col_base = col_name[:-3] # CUSTOMER_ID -> CUSTOMER
|
| 629 |
+
else:
|
| 630 |
+
col_base = col_name[:-2] # CUSTOMERID -> CUSTOMER
|
| 631 |
+
|
| 632 |
+
# Check if this is the table's own primary key
|
| 633 |
+
# Handle plural table names: CUSTOMERS has CUSTOMER_ID, not CUSTOMERS_ID
|
| 634 |
+
if table_name_upper.endswith('S') and len(table_name_upper) > 1:
|
| 635 |
+
table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER
|
| 636 |
+
else:
|
| 637 |
+
table_base = table_name_upper
|
| 638 |
+
|
| 639 |
+
table_base_alt = table_name_upper.rstrip('S') # Alternative: strip all trailing S
|
| 640 |
+
|
| 641 |
+
is_self_reference = (col_base == table_name_upper or
|
| 642 |
+
col_base == table_base or
|
| 643 |
+
col_base == table_base_alt)
|
| 644 |
+
|
| 645 |
+
# Skip if this is the table's own PK
|
| 646 |
+
if is_self_reference:
|
| 647 |
+
continue
|
| 648 |
+
|
| 649 |
+
# Try multiple potential target table names
|
| 650 |
+
potential_targets = [
|
| 651 |
+
col_base + 'S', # CUSTOMER -> CUSTOMERS
|
| 652 |
+
col_base + 'ES', # Less common but possible
|
| 653 |
+
col_base # Already plural
|
| 654 |
+
]
|
| 655 |
+
|
| 656 |
+
# Find the first matching target table
|
| 657 |
+
found_target = None
|
| 658 |
+
for potential_target in potential_targets:
|
| 659 |
+
if potential_target in table_names_upper and potential_target != table_name_upper:
|
| 660 |
+
found_target = potential_target
|
| 661 |
+
break
|
| 662 |
+
|
| 663 |
+
# Create join if we found a valid target
|
| 664 |
+
if found_target:
|
| 665 |
+
if table_name_upper not in inferred_joins:
|
| 666 |
+
inferred_joins[table_name_upper] = []
|
| 667 |
+
|
| 668 |
+
# Look up the REAL constraint ID from table_constraints first
|
| 669 |
+
constraint_id = None
|
| 670 |
+
if table_name_upper in table_constraints:
|
| 671 |
+
for constraint in table_constraints[table_name_upper]:
|
| 672 |
+
if constraint['destination'] == found_target:
|
| 673 |
+
constraint_id = constraint['constraint_id']
|
| 674 |
+
print(f" β
Using real constraint ID from ThoughtSpot: {constraint_id}")
|
| 675 |
+
break
|
| 676 |
+
|
| 677 |
+
# Fallback: generate synthetic ID if not found (shouldn't happen if Phase 2 worked)
|
| 678 |
+
if not constraint_id:
|
| 679 |
+
constraint_id = f"SYS_CONSTRAINT_{self._generate_constraint_id()}"
|
| 680 |
+
print(f" β οΈ Generated synthetic constraint ID (real ID not found): {constraint_id[:40]}...")
|
| 681 |
+
|
| 682 |
+
inferred_joins[table_name_upper].append({
|
| 683 |
+
'target': found_target,
|
| 684 |
+
'constraint_id': constraint_id,
|
| 685 |
+
'source_column': col_name
|
| 686 |
+
})
|
| 687 |
+
print(f" π Inferred join: {table_name_upper}.{col_name} -> {found_target}")
|
| 688 |
+
|
| 689 |
model = {
|
| 690 |
'guid': None,
|
| 691 |
'model': {
|
|
|
|
| 702 |
}
|
| 703 |
}
|
| 704 |
}
|
| 705 |
+
|
| 706 |
+
# Add model_tables with FQNs and inferred joins
|
| 707 |
for table_name in tables.keys():
|
| 708 |
table_name_upper = table_name.upper()
|
| 709 |
table_guid = table_guids.get(table_name_upper)
|
| 710 |
+
|
| 711 |
table_entry = {
|
| 712 |
'name': table_name_upper,
|
| 713 |
'fqn': table_guid
|
| 714 |
}
|
| 715 |
+
|
| 716 |
+
# Add joins from inferred relationships
|
| 717 |
+
if table_name_upper in inferred_joins and inferred_joins[table_name_upper]:
|
| 718 |
table_entry['joins'] = []
|
| 719 |
+
for join_info in inferred_joins[table_name_upper]:
|
| 720 |
join_entry = {
|
| 721 |
+
'with': join_info['target'],
|
| 722 |
+
'referencing_join': join_info['constraint_id']
|
| 723 |
}
|
| 724 |
table_entry['joins'].append(join_entry)
|
| 725 |
+
print(f" β
Added join to model: {table_name_upper} -> {join_info['target']}")
|
| 726 |
+
|
| 727 |
model['model']['model_tables'].append(table_entry)
|
| 728 |
|
| 729 |
# Add columns with proper global conflict resolution (same as working version)
|
|
|
|
| 740 |
continue
|
| 741 |
|
| 742 |
# SKIP surrogate primary keys (numeric IDs) - nobody searches "customer 23455"
|
| 743 |
+
# Pass table_name to distinguish PKs from FKs
|
| 744 |
+
if self._is_surrogate_primary_key(col, col_name, table_name_upper):
|
| 745 |
print(f" βοΈ Skipping surrogate PK: {table_name_upper}.{col_name}")
|
| 746 |
continue
|
| 747 |
|
|
|
|
| 831 |
for col in columns:
|
| 832 |
col_name = col['name'].upper()
|
| 833 |
|
| 834 |
+
# Check if this looks like a foreign key (ends with ID or _ID but isn't the table's own ID)
|
| 835 |
+
if not col_name.endswith('ID'):
|
| 836 |
+
continue
|
| 837 |
+
|
| 838 |
+
# Extract the base name from the column
|
| 839 |
+
if col_name.endswith('_ID'):
|
| 840 |
+
col_base = col_name[:-3] # CUSTOMER_ID -> CUSTOMER
|
| 841 |
+
else:
|
| 842 |
+
col_base = col_name[:-2] # CUSTOMERID -> CUSTOMER
|
| 843 |
+
|
| 844 |
+
# Extract the base name from the table to check if this is the table's own PK
|
| 845 |
+
if table_name_upper.endswith('S') and len(table_name_upper) > 1:
|
| 846 |
+
table_base = table_name_upper[:-1] # CUSTOMERS -> CUSTOMER
|
| 847 |
+
else:
|
| 848 |
+
table_base = table_name_upper
|
| 849 |
|
| 850 |
+
table_base_alt = table_name_upper.rstrip('S')
|
| 851 |
+
|
| 852 |
+
# For compound names like SALES_TRANSACTIONS, also check the last word
|
| 853 |
+
# SALES_TRANSACTIONS -> TRANSACTION
|
| 854 |
+
if '_' in table_name_upper:
|
| 855 |
+
last_part = table_name_upper.split('_')[-1]
|
| 856 |
+
if last_part.endswith('S'):
|
| 857 |
+
table_last_part = last_part[:-1]
|
| 858 |
+
else:
|
| 859 |
+
table_last_part = last_part
|
| 860 |
+
else:
|
| 861 |
+
table_last_part = None
|
| 862 |
+
|
| 863 |
+
# Skip if this is the table's own primary key
|
| 864 |
+
is_own_pk = (col_base == table_name_upper or
|
| 865 |
+
col_base == table_base or
|
| 866 |
+
col_base == table_base_alt or
|
| 867 |
+
(table_last_part and col_base == table_last_part))
|
| 868 |
+
|
| 869 |
+
if is_own_pk:
|
| 870 |
+
continue
|
| 871 |
+
|
| 872 |
+
# Try to find the target table (CUSTOMER -> CUSTOMERS)
|
| 873 |
+
# Try multiple plural forms
|
| 874 |
+
possible_targets = [
|
| 875 |
+
col_base + 'S', # CUSTOMER -> CUSTOMERS
|
| 876 |
+
col_base + 'ES', # LOCATION -> LOCATIONES (unlikely but possible)
|
| 877 |
+
col_base # Already plural or no 'S' form
|
| 878 |
+
]
|
| 879 |
+
|
| 880 |
+
found_target = None
|
| 881 |
+
for target in possible_targets:
|
| 882 |
+
if target in table_names_upper and target != table_name_upper:
|
| 883 |
+
found_target = target
|
| 884 |
+
break
|
| 885 |
+
|
| 886 |
+
# Only warn if we can't find ANY matching table
|
| 887 |
+
if not found_target:
|
| 888 |
+
warnings.append(
|
| 889 |
+
f"β οΈ {table_name}.{col_name} appears to reference {possible_targets[0]}, "
|
| 890 |
+
f"but {possible_targets[0]} is not in this schema. "
|
| 891 |
+
f"The join will be skipped during deployment."
|
| 892 |
+
)
|
| 893 |
|
| 894 |
return warnings
|
| 895 |
|
|
|
|
| 1299 |
print(f" β οΈ Could not create schema: {e}")
|
| 1300 |
print(f" π Will proceed assuming schema exists or will be created by table operations")
|
| 1301 |
|
| 1302 |
+
def _generate_demo_names(self, company_name: str = None, use_case: str = None, naming_prefix: str = None):
|
| 1303 |
+
"""Generate standardized demo names using consistent format
|
| 1304 |
+
|
| 1305 |
+
Args:
|
| 1306 |
+
naming_prefix: Optional custom prefix
|
| 1307 |
+
Format: {prefix}_{day+ms}_{objtype}
|
| 1308 |
+
Example with prefix: BOONE_14123456_scm
|
| 1309 |
+
Example blank prefix: _14123456_scm
|
| 1310 |
+
|
| 1311 |
+
Note: company_name and use_case parameters deprecated but kept for compatibility
|
| 1312 |
+
"""
|
| 1313 |
from datetime import datetime
|
| 1314 |
import re
|
| 1315 |
+
|
|
|
|
| 1316 |
now = datetime.now()
|
| 1317 |
+
|
| 1318 |
+
# Clean prefix (uppercase, alphanumeric only) - can be empty string
|
| 1319 |
+
if naming_prefix:
|
| 1320 |
+
prefix_clean = re.sub(r'[^a-zA-Z0-9]', '', naming_prefix.upper())
|
|
|
|
|
|
|
| 1321 |
else:
|
| 1322 |
+
prefix_clean = ""
|
| 1323 |
+
|
| 1324 |
+
# Short unique timestamp: day (2 digits) + microseconds (6 digits)
|
| 1325 |
+
day = now.strftime('%d')
|
| 1326 |
+
microsec = now.strftime('%f') # 6 digits
|
| 1327 |
+
short_timestamp = f"{day}{microsec}"
|
| 1328 |
+
|
| 1329 |
+
# Base format: PREFIX_DDMICROS (prefix can be empty)
|
| 1330 |
+
if prefix_clean:
|
| 1331 |
+
base = f"{prefix_clean}_{short_timestamp}"
|
| 1332 |
else:
|
| 1333 |
+
base = short_timestamp # Just timestamp if no prefix
|
| 1334 |
+
|
|
|
|
|
|
|
|
|
|
| 1335 |
return {
|
| 1336 |
+
'schema': f"{base}_scm", # Schema
|
| 1337 |
+
'connection': f"{base}_conn", # Connection
|
| 1338 |
+
'model': f"{base}_mdl", # Model
|
| 1339 |
+
'table': f"{base}_tbl", # Table (if needed)
|
| 1340 |
+
'base': base
|
| 1341 |
}
|
| 1342 |
|
| 1343 |
+
def deploy_all(self, ddl: str, database: str, schema: str,
|
| 1344 |
+
connection_name: str = None, company_name: str = None,
|
| 1345 |
+
use_case: str = None, naming_prefix: str = None, progress_callback=None) -> Dict:
|
| 1346 |
"""
|
| 1347 |
Deploy complete data model to ThoughtSpot
|
| 1348 |
|
|
|
|
| 1364 |
}
|
| 1365 |
|
| 1366 |
table_guids = {} # Store table GUIDs for model creation
|
| 1367 |
+
|
| 1368 |
+
# Setup deployment log file
|
| 1369 |
+
from pathlib import Path
|
| 1370 |
+
import datetime as dt
|
| 1371 |
+
log_dir = Path("logs") / "deployments"
|
| 1372 |
+
log_dir.mkdir(parents=True, exist_ok=True)
|
| 1373 |
+
timestamp_str = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1374 |
+
deployment_log_file = log_dir / f"deployment_{schema}_{timestamp_str}.log"
|
| 1375 |
+
deployment_log = open(deployment_log_file, "w", buffering=1) # Line buffered
|
| 1376 |
+
deployment_log.write(f"=== ThoughtSpot Deployment Log ===\n")
|
| 1377 |
+
deployment_log.write(f"Schema: {schema}\n")
|
| 1378 |
+
deployment_log.write(f"Database: {database}\n")
|
| 1379 |
+
deployment_log.write(f"Timestamp: {dt.datetime.now().isoformat()}\n")
|
| 1380 |
+
deployment_log.write(f"=" * 50 + "\n\n")
|
| 1381 |
+
|
| 1382 |
def log_progress(message):
|
| 1383 |
+
"""Helper to log progress to console, callback, AND file"""
|
| 1384 |
print(message, flush=True)
|
| 1385 |
if progress_callback:
|
| 1386 |
progress_callback(message)
|
| 1387 |
+
# ALWAYS write to log file
|
| 1388 |
+
deployment_log.write(message + "\n")
|
| 1389 |
+
deployment_log.flush()
|
| 1390 |
|
| 1391 |
try:
|
| 1392 |
# STEP 0: Authenticate first!
|
|
|
|
| 1412 |
else:
|
| 1413 |
log_progress("β
All foreign key references are valid\n")
|
| 1414 |
|
| 1415 |
+
# Step 1: Extract base from schema name to ensure consistency
|
| 1416 |
+
# Schema format: PREFIX_DDMICROS_scm (e.g., AMAZ_14283952_scm)
|
| 1417 |
+
# We need to use the SAME base for all ThoughtSpot objects
|
| 1418 |
+
import re
|
| 1419 |
+
if schema.endswith('_scm'):
|
| 1420 |
+
base_from_schema = schema[:-4] # Remove "_scm" suffix
|
| 1421 |
+
else:
|
| 1422 |
+
# Fallback: generate new names if schema doesn't follow convention
|
| 1423 |
+
demo_names = self._generate_demo_names(company_name, use_case, naming_prefix)
|
| 1424 |
+
base_from_schema = demo_names['base']
|
| 1425 |
+
|
| 1426 |
+
# Use the schema's base for all ThoughtSpot objects
|
| 1427 |
if not connection_name:
|
| 1428 |
+
connection_name = f"{base_from_schema}_conn"
|
| 1429 |
|
| 1430 |
log_progress("1οΈβ£ Checking/Creating connection...")
|
| 1431 |
log_progress(f" Connection name: {connection_name}")
|
|
|
|
| 1519 |
log_progress("\n2οΈβ£ Creating tables...")
|
| 1520 |
|
| 1521 |
# PHASE 1: Create all tables WITHOUT joins (to ensure all tables exist first)
|
| 1522 |
+
log_progress(" π Phase 1: Creating ALL tables in one batch (no joins)...")
|
| 1523 |
+
import time
|
| 1524 |
+
|
| 1525 |
+
# Step 1.1: Generate TMLs for all tables
|
| 1526 |
+
batch_start = time.time()
|
| 1527 |
+
table_tmls = []
|
| 1528 |
+
table_names_list = []
|
| 1529 |
|
| 1530 |
+
for table_name, columns in tables.items():
|
| 1531 |
tml_start = time.time()
|
| 1532 |
table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
|
| 1533 |
tml_time = time.time() - tml_start
|
| 1534 |
+
table_tmls.append(table_tml)
|
| 1535 |
+
table_names_list.append(table_name.upper())
|
| 1536 |
+
log_progress(f" π Generated TML for {table_name.upper()} ({tml_time:.3f}s, {len(table_tml)} chars, {len(columns)} columns)")
|
| 1537 |
|
| 1538 |
+
# Step 1.2: Send ALL tables in ONE API call
|
| 1539 |
+
log_progress(f"\n π Batch creating {len(table_tmls)} tables...")
|
| 1540 |
+
api_start = time.time()
|
| 1541 |
+
response = self.session.post(
|
| 1542 |
+
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
| 1543 |
+
json={
|
| 1544 |
+
"metadata_tmls": table_tmls, # Send ALL tables at once!
|
| 1545 |
+
"import_policy": "PARTIAL",
|
| 1546 |
+
"create_new": True
|
| 1547 |
+
}
|
| 1548 |
+
)
|
| 1549 |
+
api_time = time.time() - api_start
|
| 1550 |
+
total_time = time.time() - batch_start
|
| 1551 |
+
log_progress(f" β±οΈ Phase 1 batch call: {api_time:.2f} seconds")
|
|
|
|
|
|
|
| 1552 |
|
| 1553 |
+
# Step 1.3: Process batch response
|
| 1554 |
+
if response.status_code == 200:
|
| 1555 |
+
result = response.json()
|
| 1556 |
|
| 1557 |
+
# Handle both response formats
|
| 1558 |
+
if isinstance(result, list):
|
| 1559 |
+
objects = result
|
| 1560 |
+
elif isinstance(result, dict) and 'object' in result:
|
| 1561 |
+
objects = result['object']
|
| 1562 |
+
else:
|
| 1563 |
+
error = f"Batch table creation failed: Unexpected response format: {type(result)}"
|
| 1564 |
+
log_progress(f" β {error}")
|
| 1565 |
+
results['errors'].append(error)
|
| 1566 |
+
return results
|
| 1567 |
|
| 1568 |
+
# Process each table result
|
| 1569 |
+
if objects and len(objects) > 0:
|
| 1570 |
+
for idx, obj in enumerate(objects):
|
| 1571 |
+
table_name = table_names_list[idx] if idx < len(table_names_list) else f"Table_{idx}"
|
| 1572 |
if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1573 |
table_guid = obj.get('response', {}).get('header', {}).get('id_guid')
|
| 1574 |
+
log_progress(f" β
{table_name} created (GUID: {table_guid})")
|
| 1575 |
+
results['tables'].append(table_name)
|
| 1576 |
+
table_guids[table_name] = table_guid
|
|
|
|
|
|
|
| 1577 |
else:
|
| 1578 |
+
error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
|
| 1579 |
+
error = f"{table_name} failed: {error_msg}"
|
| 1580 |
log_progress(f" β {error}")
|
| 1581 |
results['errors'].append(error)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1582 |
else:
|
| 1583 |
+
error = "Batch table creation failed: No objects in response"
|
| 1584 |
log_progress(f" β {error}")
|
| 1585 |
results['errors'].append(error)
|
| 1586 |
+
return results
|
| 1587 |
+
else:
|
| 1588 |
+
error = f"Batch table creation HTTP error: {response.status_code}"
|
| 1589 |
+
log_progress(f" β {error}")
|
| 1590 |
+
log_progress(f" π Response: {response.text[:500]}")
|
| 1591 |
+
results['errors'].append(error)
|
| 1592 |
+
return results
|
| 1593 |
|
| 1594 |
# Check if we created any tables successfully
|
| 1595 |
if not table_guids:
|
| 1596 |
log_progress(" β No tables were created successfully in Phase 1")
|
| 1597 |
return results
|
| 1598 |
|
| 1599 |
+
log_progress(f"\n β
Phase 1 complete: {len(table_guids)} tables created in {total_time:.2f} seconds")
|
| 1600 |
+
|
| 1601 |
+
# PHASE 2: Update tables WITH joins (now that all tables exist) - BATCH MODE
|
| 1602 |
+
log_progress("\n π Phase 2: Adding joins to ALL tables in one batch...")
|
| 1603 |
+
phase2_start = time.time()
|
| 1604 |
+
|
| 1605 |
+
# Generate TML for all tables with joins
|
| 1606 |
+
table_tmls_with_joins = []
|
| 1607 |
+
table_names_for_phase2 = []
|
| 1608 |
|
|
|
|
|
|
|
| 1609 |
for table_name, columns in tables.items():
|
|
|
|
| 1610 |
table_name_upper = table_name.upper()
|
| 1611 |
if table_name_upper not in table_guids:
|
| 1612 |
log_progress(f" βοΈ Skipping joins for {table_name_upper} (table creation failed)")
|
| 1613 |
continue
|
| 1614 |
|
|
|
|
| 1615 |
table_guid = table_guids[table_name_upper]
|
| 1616 |
|
|
|
|
| 1617 |
# Create table TML WITH joins_with section AND the table GUID
|
| 1618 |
table_tml = self.create_table_tml(
|
| 1619 |
table_name, columns, connection_name, database, schema,
|
| 1620 |
all_tables=tables, table_guid=table_guid
|
| 1621 |
)
|
| 1622 |
+
table_tmls_with_joins.append(table_tml)
|
| 1623 |
+
table_names_for_phase2.append(table_name_upper)
|
| 1624 |
+
|
| 1625 |
+
# Single batch API call to update all tables with joins
|
| 1626 |
+
if table_tmls_with_joins:
|
| 1627 |
+
log_progress(f" π Batch updating {len(table_tmls_with_joins)} tables with joins...")
|
| 1628 |
+
api_start = time.time()
|
| 1629 |
|
| 1630 |
response = self.session.post(
|
| 1631 |
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
| 1632 |
json={
|
| 1633 |
+
"metadata_tmls": table_tmls_with_joins,
|
| 1634 |
"import_policy": "PARTIAL",
|
| 1635 |
+
"create_new": False # Update existing tables
|
| 1636 |
}
|
| 1637 |
)
|
| 1638 |
|
| 1639 |
+
phase2_api_time = time.time() - api_start
|
| 1640 |
+
log_progress(f" β±οΈ Phase 2 batch call: {phase2_api_time:.2f} seconds")
|
| 1641 |
+
|
| 1642 |
if response.status_code == 200:
|
| 1643 |
result = response.json()
|
| 1644 |
|
| 1645 |
+
# Handle both response formats
|
| 1646 |
if isinstance(result, list):
|
| 1647 |
objects = result
|
| 1648 |
elif isinstance(result, dict) and 'object' in result:
|
|
|
|
| 1651 |
log_progress(f" β οΈ Unexpected response format for joins: {type(result)}")
|
| 1652 |
objects = []
|
| 1653 |
|
| 1654 |
+
# Process each table result
|
| 1655 |
if objects and len(objects) > 0:
|
| 1656 |
+
for idx, obj in enumerate(objects):
|
| 1657 |
+
table_name = table_names_for_phase2[idx] if idx < len(table_names_for_phase2) else f"Table_{idx}"
|
| 1658 |
+
if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1659 |
+
log_progress(f" β
Joins added: {table_name}")
|
| 1660 |
+
else:
|
| 1661 |
+
error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
|
| 1662 |
+
log_progress(f" β οΈ Joins failed for {table_name}: {error_msg}")
|
| 1663 |
+
results['errors'].append(f"Adding joins to {table_name} failed: {error_msg}")
|
| 1664 |
else:
|
| 1665 |
+
log_progress(f" β οΈ No results returned for join updates")
|
| 1666 |
else:
|
| 1667 |
+
log_progress(f" β οΈ HTTP error adding joins: {response.status_code}")
|
| 1668 |
+
log_progress(f" π Response: {response.text[:500]}")
|
| 1669 |
|
| 1670 |
+
log_progress(f" β
Phase 2 complete: Joins added to all tables in one batch")
|
| 1671 |
actual_constraint_ids = {} # We'll generate these for the model
|
| 1672 |
|
| 1673 |
# Skip separate relationship creation for now
|
|
|
|
| 1676 |
|
| 1677 |
# Step 3: Extract constraint IDs from created tables
|
| 1678 |
log_progress("\n2οΈβ£.5 Extracting constraint IDs from created tables...")
|
| 1679 |
+
extract_start = time.time()
|
| 1680 |
table_constraints = {}
|
| 1681 |
+
|
| 1682 |
for table_name, table_guid in table_guids.items():
|
| 1683 |
log_progress(f" π Getting constraint IDs for {table_name}...")
|
| 1684 |
|
|
|
|
| 1711 |
'destination': destination
|
| 1712 |
})
|
| 1713 |
log_progress(f" π Found join: {constraint_id} -> {destination}")
|
| 1714 |
+
|
| 1715 |
+
extract_time = time.time() - extract_start
|
| 1716 |
+
log_progress(f" β±οΈ Constraint extraction time: {extract_time:.2f} seconds")
|
| 1717 |
log_progress(f" β
Extracted constraints from {len(table_constraints)} tables")
|
| 1718 |
+
|
| 1719 |
# Step 4: Create model (semantic layer) with constraint references
|
| 1720 |
log_progress("\n3οΈβ£ Creating model (semantic layer) with joins...")
|
| 1721 |
+
model_start = time.time()
|
| 1722 |
+
# Use the same base from schema for model name
|
| 1723 |
+
model_name = f"{base_from_schema}_mdl"
|
| 1724 |
log_progress(f" Model name: {model_name}")
|
| 1725 |
|
| 1726 |
# Use the enhanced model creation that includes constraint references
|
| 1727 |
model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
|
| 1728 |
print(f"\nπ Model TML being sent:\n{model_tml}")
|
| 1729 |
+
|
| 1730 |
+
# ALWAYS save model TML to logs for debugging
|
| 1731 |
+
from pathlib import Path
|
| 1732 |
+
import datetime as dt
|
| 1733 |
+
log_dir = Path("logs") / "model_tmls"
|
| 1734 |
+
log_dir.mkdir(parents=True, exist_ok=True)
|
| 1735 |
+
timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1736 |
+
tml_file = log_dir / f"model_{model_name}_{timestamp}.tml"
|
| 1737 |
+
with open(tml_file, "w") as f:
|
| 1738 |
+
f.write(model_tml)
|
| 1739 |
+
log_progress(f" π Model TML saved to: {tml_file}")
|
| 1740 |
|
| 1741 |
response = self.session.post(
|
| 1742 |
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
|
|
|
| 1746 |
"create_new": True
|
| 1747 |
}
|
| 1748 |
)
|
| 1749 |
+
|
| 1750 |
+
log_progress(f" π‘ Model API response status: {response.status_code}")
|
| 1751 |
+
|
| 1752 |
if response.status_code == 200:
|
| 1753 |
result = response.json()
|
| 1754 |
|
|
|
|
| 1766 |
if objects and len(objects) > 0:
|
| 1767 |
if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1768 |
model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
|
| 1769 |
+
model_time = time.time() - model_start
|
| 1770 |
+
log_progress(f" β±οΈ Model creation time: {model_time:.2f} seconds")
|
| 1771 |
log_progress(f" β
Model created successfully!")
|
| 1772 |
log_progress(f" Model: {model_name}")
|
| 1773 |
log_progress(f" GUID: {model_guid}")
|
|
|
|
| 1829 |
traceback.print_exc()
|
| 1830 |
else:
|
| 1831 |
print(f"π Full model response: {objects}") # DEBUG: Show full response
|
| 1832 |
+
status_obj = objects[0].get('response', {}).get('status', {})
|
| 1833 |
+
error_msg = status_obj.get('error_message', 'Unknown error')
|
| 1834 |
+
status_code = status_obj.get('status_code', 'Unknown')
|
| 1835 |
+
error = f"Model failed: {error_msg} (status: {status_code})"
|
| 1836 |
+
log_progress(f" β {error}")
|
| 1837 |
+
if 'error' in status_obj:
|
| 1838 |
+
log_progress(f" π Additional error details: {status_obj['error']}")
|
| 1839 |
+
|
| 1840 |
+
# Save full error response to log file
|
| 1841 |
+
error_log_dir = Path("logs") / "model_errors"
|
| 1842 |
+
error_log_dir.mkdir(parents=True, exist_ok=True)
|
| 1843 |
+
error_file = error_log_dir / f"model_error_{model_name}_{timestamp}.json"
|
| 1844 |
+
import json
|
| 1845 |
+
with open(error_file, "w") as f:
|
| 1846 |
+
json.dump(objects, f, indent=2)
|
| 1847 |
+
log_progress(f" π Full error saved to: {error_file}")
|
| 1848 |
+
|
| 1849 |
results['errors'].append(error)
|
| 1850 |
else:
|
| 1851 |
error = "Model failed: No objects in response"
|
| 1852 |
log_progress(f" β {error}")
|
| 1853 |
results['errors'].append(error)
|
| 1854 |
+
else:
|
| 1855 |
+
# Handle non-200 responses
|
| 1856 |
+
error = f"Model API call failed with status {response.status_code}"
|
| 1857 |
+
log_progress(f" β {error}")
|
| 1858 |
+
log_progress(f" π Response: {response.text[:500]}") # First 500 chars
|
| 1859 |
+
try:
|
| 1860 |
+
error_detail = response.json()
|
| 1861 |
+
log_progress(f" π Error details: {error_detail}")
|
| 1862 |
+
except:
|
| 1863 |
+
pass
|
| 1864 |
+
results['errors'].append(error)
|
| 1865 |
|
| 1866 |
# Mark as successful if we got this far
|
| 1867 |
results['success'] = len(results['errors']) == 0
|
|
|
|
| 1869 |
except Exception as e:
|
| 1870 |
error_msg = str(e)
|
| 1871 |
print(f"β Deployment failed: {error_msg}")
|
| 1872 |
+
deployment_log.write(f"β Deployment failed: {error_msg}\n")
|
| 1873 |
results['errors'].append(error_msg)
|
| 1874 |
+
finally:
|
| 1875 |
+
# Close deployment log file
|
| 1876 |
+
deployment_log.write(f"\n{'='*50}\n")
|
| 1877 |
+
deployment_log.write(f"Deployment completed at: {dt.datetime.now().isoformat()}\n")
|
| 1878 |
+
deployment_log.write(f"Log saved to: {deployment_log_file}\n")
|
| 1879 |
+
deployment_log.close()
|
| 1880 |
+
# Print directly since deployment_log is now closed
|
| 1881 |
+
print(f"\nπ Full deployment log saved to: {deployment_log_file}")
|
| 1882 |
+
if progress_callback:
|
| 1883 |
+
progress_callback(f"\nπ Full deployment log saved to: {deployment_log_file}")
|
| 1884 |
+
|
| 1885 |
return results
|
| 1886 |
|
| 1887 |
|
| 1888 |
+
def deploy_to_thoughtspot(ddl: str, database: str, schema: str,
|
| 1889 |
+
connection_name: str = None, company_name: str = None,
|
| 1890 |
+
use_case: str = None, naming_prefix: str = None, progress_callback=None) -> Dict:
|
| 1891 |
"""
|
| 1892 |
Convenience function for deploying to ThoughtSpot
|
| 1893 |
+
|
| 1894 |
Args:
|
| 1895 |
ddl: Data Definition Language statements
|
| 1896 |
database: Target database name
|
| 1897 |
schema: Target schema name
|
| 1898 |
connection_name: Optional connection name
|
| 1899 |
+
company_name: Optional company name (used in default naming)
|
| 1900 |
+
use_case: Optional use case (used in default naming)
|
| 1901 |
+
naming_prefix: Optional custom naming prefix (format: {prefix}_{day+ms}_{objtype})
|
| 1902 |
progress_callback: Optional callback for progress updates
|
| 1903 |
+
|
| 1904 |
Returns:
|
| 1905 |
Dict with deployment results
|
| 1906 |
"""
|
| 1907 |
deployer = ThoughtSpotDeployer()
|
| 1908 |
+
return deployer.deploy_all(ddl, database, schema, connection_name, company_name, use_case, naming_prefix, progress_callback)
|
| 1909 |
|
| 1910 |
|
| 1911 |
if __name__ == "__main__":
|