Spaces:
Running
Running
Fix chat interface: Enter key, DDL display, and pre-filled responses
Browse files- MCP_liveboard_creation.md +530 -0
- POPULATION_FIX_SUMMARY.md +160 -0
- chat_interface.py +0 -0
- demo_prep.py +234 -61
- liveboard_creator.py +5 -1
- requirements.txt +1 -0
- schema_utils.py +32 -4
- supabase_client.py +11 -0
- thoughtspot_deployer.py +201 -124
MCP_liveboard_creation.md
ADDED
|
@@ -0,0 +1,530 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ThoughtSpot MCP Implementation Guide
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
This document provides a comprehensive guide for implementing ThoughtSpot's Model Context Protocol (MCP) to create automated, AI-driven analytics liveboards.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Table of Contents
|
| 9 |
+
1. [What is MCP](#what-is-mcp)
|
| 10 |
+
2. [Architecture](#architecture)
|
| 11 |
+
3. [Prerequisites](#prerequisites)
|
| 12 |
+
4. [Available MCP Tools](#available-mcp-tools)
|
| 13 |
+
5. [Implementation Workflow](#implementation-workflow)
|
| 14 |
+
6. [Code Examples](#code-examples)
|
| 15 |
+
7. [Best Practices](#best-practices)
|
| 16 |
+
8. [Troubleshooting](#troubleshooting)
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
## What is MCP
|
| 21 |
+
|
| 22 |
+
**Model Context Protocol (MCP)** is a standardized protocol that enables AI agents and applications to interact with ThoughtSpot's analytics capabilities programmatically.
|
| 23 |
+
|
| 24 |
+
### Key Benefits
|
| 25 |
+
- π€ **AI-Native**: Designed for AI agents like Claude, ChatGPT, etc.
|
| 26 |
+
- π **Standardized**: Uses JSON-RPC over stdio (stdin/stdout)
|
| 27 |
+
- π― **Intent-Based**: Converts natural language queries into precise data questions
|
| 28 |
+
- π **End-to-End**: From question generation to liveboard creation
|
| 29 |
+
|
| 30 |
+
### Communication Method
|
| 31 |
+
- **NOT HTTP/REST** - MCP uses stdio (subprocess communication)
|
| 32 |
+
- Uses `mcp-remote` proxy for OAuth authentication
|
| 33 |
+
- Spawns MCP server as subprocess, communicates via stdin/stdout
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## Architecture
|
| 38 |
+
|
| 39 |
+
```
|
| 40 |
+
βββββββββββββββββββ
|
| 41 |
+
β Your Python β
|
| 42 |
+
β Application β
|
| 43 |
+
ββββββββββ¬βββββββββ
|
| 44 |
+
β
|
| 45 |
+
βΌ
|
| 46 |
+
βββββββββββββββββββ
|
| 47 |
+
β MCP Python β
|
| 48 |
+
β SDK β
|
| 49 |
+
ββββββββββ¬βββββββββ
|
| 50 |
+
β stdio
|
| 51 |
+
βΌ
|
| 52 |
+
βββββββββββββββββββ
|
| 53 |
+
β mcp-remote β
|
| 54 |
+
β (OAuth Proxy) β
|
| 55 |
+
ββββββββββ¬βββββββββ
|
| 56 |
+
β HTTPS
|
| 57 |
+
βΌ
|
| 58 |
+
βββββββββββββββββββ
|
| 59 |
+
β ThoughtSpot β
|
| 60 |
+
β MCP Server β
|
| 61 |
+
βββββββββββββββββββ
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### Components
|
| 65 |
+
1. **Your Application**: Python code using MCP SDK
|
| 66 |
+
2. **MCP Python SDK**: Handles stdio client communication
|
| 67 |
+
3. **mcp-remote**: npx package that handles OAuth and proxies requests
|
| 68 |
+
4. **ThoughtSpot MCP Server**: `https://agent.thoughtspot.app/mcp`
|
| 69 |
+
|
| 70 |
+
---
|
| 71 |
+
|
| 72 |
+
## Prerequisites
|
| 73 |
+
|
| 74 |
+
### Required Software
|
| 75 |
+
- **Python**: 3.8 or higher
|
| 76 |
+
- **Node.js/NPX**: For running `mcp-remote`
|
| 77 |
+
- **MCP Python SDK**: `pip install mcp`
|
| 78 |
+
|
| 79 |
+
### Required Credentials
|
| 80 |
+
- ThoughtSpot instance URL (e.g., `se-thoughtspot-cloud.thoughtspot.cloud`)
|
| 81 |
+
- ThoughtSpot username and password (for OAuth)
|
| 82 |
+
- Datasource/Model GUIDs from your ThoughtSpot instance
|
| 83 |
+
|
| 84 |
+
### Environment Setup
|
| 85 |
+
```bash
|
| 86 |
+
# Install MCP SDK
|
| 87 |
+
pip install mcp
|
| 88 |
+
|
| 89 |
+
# Verify npx is available
|
| 90 |
+
npx --version
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
---
|
| 94 |
+
|
| 95 |
+
## Available MCP Tools
|
| 96 |
+
|
| 97 |
+
ThoughtSpot MCP provides 4 core tools:
|
| 98 |
+
|
| 99 |
+
### 1. ping
|
| 100 |
+
**Purpose**: Health check to verify connection
|
| 101 |
+
|
| 102 |
+
**Parameters**: None
|
| 103 |
+
|
| 104 |
+
**Returns**: "Pong"
|
| 105 |
+
|
| 106 |
+
**Example**:
|
| 107 |
+
```python
|
| 108 |
+
result = await session.call_tool("ping", {})
|
| 109 |
+
# Returns: "Pong"
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
### 2. getRelevantQuestions
|
| 115 |
+
**Purpose**: Convert vague queries into precise, answerable questions based on datasource schema
|
| 116 |
+
|
| 117 |
+
**Parameters**:
|
| 118 |
+
- `query` (string, **required**): High-level question or task (e.g., "sales performance", "top products")
|
| 119 |
+
- `datasourceIds` (array, **required**): Array of datasource/model GUIDs
|
| 120 |
+
- `additionalContext` (string, optional): Extra context to improve question generation
|
| 121 |
+
|
| 122 |
+
**Returns**: JSON array of suggested questions
|
| 123 |
+
```json
|
| 124 |
+
{
|
| 125 |
+
"questions": [
|
| 126 |
+
{
|
| 127 |
+
"question": "What is the product with the highest total sales amount?",
|
| 128 |
+
"datasourceId": "eb600ad2-ad91-4640-819a-f953602bd4c1"
|
| 129 |
+
}
|
| 130 |
+
]
|
| 131 |
+
}
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
**Use Case**: Turn user's natural language into specific data queries
|
| 135 |
+
|
| 136 |
+
---
|
| 137 |
+
|
| 138 |
+
### 3. getAnswer
|
| 139 |
+
**Purpose**: Execute a question against ThoughtSpot and retrieve data/visualization
|
| 140 |
+
|
| 141 |
+
**Parameters**:
|
| 142 |
+
- `question` (string, **required**): The specific question to answer (typically from `getRelevantQuestions`)
|
| 143 |
+
- `datasourceId` (string, **required**): Single datasource/model GUID
|
| 144 |
+
|
| 145 |
+
**Returns**: JSON with data, metadata, and viewing URL
|
| 146 |
+
```json
|
| 147 |
+
{
|
| 148 |
+
"data": "CSV formatted data...",
|
| 149 |
+
"question": "What is the product with the highest total sales amount?",
|
| 150 |
+
"session_identifier": "uuid",
|
| 151 |
+
"generation_number": 2,
|
| 152 |
+
"frame_url": "https://instance.thoughtspot.cloud/#/embed/..."
|
| 153 |
+
}
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
**Use Case**: Get actual data and visualizations for specific questions
|
| 157 |
+
|
| 158 |
+
---
|
| 159 |
+
|
| 160 |
+
### 4. createLiveboard
|
| 161 |
+
**Purpose**: Create a ThoughtSpot liveboard (dashboard) with multiple visualizations
|
| 162 |
+
|
| 163 |
+
**Parameters**:
|
| 164 |
+
- `name` (string, **required**): Liveboard title
|
| 165 |
+
- `answers` (array, **required**): Array of answer objects from `getAnswer` calls
|
| 166 |
+
- `noteTile` (string, **required**): HTML content for summary/note tile
|
| 167 |
+
|
| 168 |
+
**Returns**: Success message with liveboard URL
|
| 169 |
+
```json
|
| 170 |
+
{
|
| 171 |
+
"message": "Liveboard created successfully",
|
| 172 |
+
"url": "https://instance.thoughtspot.cloud/#/pinboard/[GUID]"
|
| 173 |
+
}
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
**Use Case**: Build comprehensive dashboards from multiple analyses
|
| 177 |
+
|
| 178 |
+
---
|
| 179 |
+
|
| 180 |
+
## Implementation Workflow
|
| 181 |
+
|
| 182 |
+
### Standard 4-Step Process
|
| 183 |
+
|
| 184 |
+
```
|
| 185 |
+
1. ping β Verify connection
|
| 186 |
+
2. getRelevantQuestions β Generate data questions
|
| 187 |
+
3. getAnswer (multiple) β Get data for each question
|
| 188 |
+
4. createLiveboard β Build dashboard
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
### Detailed Flow
|
| 192 |
+
|
| 193 |
+
```python
|
| 194 |
+
# Step 1: Connect and verify
|
| 195 |
+
session = ClientSession(...)
|
| 196 |
+
await session.call_tool("ping", {})
|
| 197 |
+
|
| 198 |
+
# Step 2: Generate questions
|
| 199 |
+
questions = await session.call_tool("getRelevantQuestions", {
|
| 200 |
+
"query": "sales performance",
|
| 201 |
+
"datasourceIds": ["datasource-guid"]
|
| 202 |
+
})
|
| 203 |
+
|
| 204 |
+
# Step 3: Get answers for each question
|
| 205 |
+
answers = []
|
| 206 |
+
for q in questions:
|
| 207 |
+
answer = await session.call_tool("getAnswer", {
|
| 208 |
+
"question": q['question'],
|
| 209 |
+
"datasourceId": q['datasourceId']
|
| 210 |
+
})
|
| 211 |
+
answers.append(answer)
|
| 212 |
+
|
| 213 |
+
# Step 4: Create liveboard
|
| 214 |
+
liveboard = await session.call_tool("createLiveboard", {
|
| 215 |
+
"name": "Sales Performance Dashboard",
|
| 216 |
+
"answers": answers,
|
| 217 |
+
"noteTile": "<html>...</html>"
|
| 218 |
+
})
|
| 219 |
+
```
|
| 220 |
+
|
| 221 |
+
---
|
| 222 |
+
|
| 223 |
+
## Code Examples
|
| 224 |
+
|
| 225 |
+
### Minimal Working Example
|
| 226 |
+
|
| 227 |
+
```python
|
| 228 |
+
import asyncio
|
| 229 |
+
from mcp import ClientSession, StdioServerParameters
|
| 230 |
+
from mcp.client.stdio import stdio_client
|
| 231 |
+
|
| 232 |
+
async def create_liveboard():
|
| 233 |
+
# Configure MCP connection
|
| 234 |
+
server_params = StdioServerParameters(
|
| 235 |
+
command="npx",
|
| 236 |
+
args=["mcp-remote@latest", "https://agent.thoughtspot.app/mcp"]
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
async with stdio_client(server_params) as (read, write):
|
| 240 |
+
async with ClientSession(read, write) as session:
|
| 241 |
+
await session.initialize()
|
| 242 |
+
|
| 243 |
+
# Your datasource GUID
|
| 244 |
+
datasource_id = "your-datasource-guid-here"
|
| 245 |
+
|
| 246 |
+
# Get relevant questions
|
| 247 |
+
result = await session.call_tool("getRelevantQuestions", {
|
| 248 |
+
"query": "top products",
|
| 249 |
+
"datasourceIds": [datasource_id]
|
| 250 |
+
})
|
| 251 |
+
|
| 252 |
+
# Parse questions
|
| 253 |
+
import json
|
| 254 |
+
data = json.loads(result.content[0].text)
|
| 255 |
+
questions = data['questions']
|
| 256 |
+
|
| 257 |
+
# Get answer for first question
|
| 258 |
+
answer_result = await session.call_tool("getAnswer", {
|
| 259 |
+
"question": questions[0]['question'],
|
| 260 |
+
"datasourceId": datasource_id
|
| 261 |
+
})
|
| 262 |
+
|
| 263 |
+
answer_data = json.loads(answer_result.content[0].text)
|
| 264 |
+
|
| 265 |
+
# Create liveboard
|
| 266 |
+
liveboard_result = await session.call_tool("createLiveboard", {
|
| 267 |
+
"name": "Product Analysis",
|
| 268 |
+
"answers": [answer_data],
|
| 269 |
+
"noteTile": "<h2>Product Analysis</h2><p>Top products by sales</p>"
|
| 270 |
+
})
|
| 271 |
+
|
| 272 |
+
print(liveboard_result.content[0].text)
|
| 273 |
+
|
| 274 |
+
asyncio.run(create_liveboard())
|
| 275 |
+
```
|
| 276 |
+
|
| 277 |
+
### Comprehensive Multi-Visualization Example
|
| 278 |
+
|
| 279 |
+
```python
|
| 280 |
+
async def create_comprehensive_analysis():
|
| 281 |
+
server_params = StdioServerParameters(
|
| 282 |
+
command="npx",
|
| 283 |
+
args=["mcp-remote@latest", "https://agent.thoughtspot.app/mcp"]
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
async with stdio_client(server_params) as (read, write):
|
| 287 |
+
async with ClientSession(read, write) as session:
|
| 288 |
+
await session.initialize()
|
| 289 |
+
|
| 290 |
+
datasource_id = "your-datasource-guid"
|
| 291 |
+
|
| 292 |
+
# Multiple query perspectives
|
| 293 |
+
queries = [
|
| 294 |
+
"top selling products",
|
| 295 |
+
"sales trends over time",
|
| 296 |
+
"product performance comparison"
|
| 297 |
+
]
|
| 298 |
+
|
| 299 |
+
all_questions = []
|
| 300 |
+
all_answers = []
|
| 301 |
+
|
| 302 |
+
# Generate questions from multiple angles
|
| 303 |
+
for query in queries:
|
| 304 |
+
result = await session.call_tool("getRelevantQuestions", {
|
| 305 |
+
"query": query,
|
| 306 |
+
"datasourceIds": [datasource_id]
|
| 307 |
+
})
|
| 308 |
+
|
| 309 |
+
data = json.loads(result.content[0].text)
|
| 310 |
+
all_questions.extend(data['questions'][:3]) # Top 3 from each
|
| 311 |
+
|
| 312 |
+
# Get answers for all questions
|
| 313 |
+
for q in all_questions[:10]: # Limit to 10 visualizations
|
| 314 |
+
try:
|
| 315 |
+
answer = await session.call_tool("getAnswer", {
|
| 316 |
+
"question": q['question'],
|
| 317 |
+
"datasourceId": datasource_id
|
| 318 |
+
})
|
| 319 |
+
answer_data = json.loads(answer.content[0].text)
|
| 320 |
+
all_answers.append(answer_data)
|
| 321 |
+
except Exception as e:
|
| 322 |
+
print(f"Failed to get answer: {e}")
|
| 323 |
+
|
| 324 |
+
# Create rich liveboard
|
| 325 |
+
note_tile = """
|
| 326 |
+
<div style="background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%);
|
| 327 |
+
padding: 40px; border-radius: 20px; color: white;">
|
| 328 |
+
<h1>π Comprehensive Sales Analysis</h1>
|
| 329 |
+
<div style="background: rgba(255,255,255,0.15); padding: 25px;
|
| 330 |
+
border-radius: 15px; margin: 20px 0;">
|
| 331 |
+
<h2>π― Executive Summary</h2>
|
| 332 |
+
<p>Analysis of product performance across multiple dimensions</p>
|
| 333 |
+
</div>
|
| 334 |
+
<div style="margin-top: 20px;">
|
| 335 |
+
<h3>π Key Findings</h3>
|
| 336 |
+
<ul>
|
| 337 |
+
<li>Top product performance metrics</li>
|
| 338 |
+
<li>Sales trends and patterns</li>
|
| 339 |
+
<li>Comparative analysis across products</li>
|
| 340 |
+
</ul>
|
| 341 |
+
</div>
|
| 342 |
+
</div>
|
| 343 |
+
"""
|
| 344 |
+
|
| 345 |
+
liveboard = await session.call_tool("createLiveboard", {
|
| 346 |
+
"name": "π Comprehensive Product Analysis",
|
| 347 |
+
"answers": all_answers,
|
| 348 |
+
"noteTile": note_tile
|
| 349 |
+
})
|
| 350 |
+
|
| 351 |
+
return liveboard.content[0].text
|
| 352 |
+
|
| 353 |
+
asyncio.run(create_comprehensive_analysis())
|
| 354 |
+
```
|
| 355 |
+
|
| 356 |
+
---
|
| 357 |
+
|
| 358 |
+
## Best Practices
|
| 359 |
+
|
| 360 |
+
### 1. Query Design
|
| 361 |
+
- β
Use broad, natural language queries: "sales performance", "customer trends"
|
| 362 |
+
- β Avoid overly specific SQL-like queries
|
| 363 |
+
- β
Let ThoughtSpot's AI interpret the schema
|
| 364 |
+
- β
Use multiple query angles for comprehensive analysis
|
| 365 |
+
|
| 366 |
+
### 2. Error Handling
|
| 367 |
+
```python
|
| 368 |
+
try:
|
| 369 |
+
answer = await session.call_tool("getAnswer", {...})
|
| 370 |
+
except Exception as e:
|
| 371 |
+
print(f"Question failed: {str(e)}")
|
| 372 |
+
# Continue with other questions
|
| 373 |
+
```
|
| 374 |
+
|
| 375 |
+
### 3. Datasource Selection
|
| 376 |
+
- Use models (joined tables) instead of single tables when possible
|
| 377 |
+
- Models provide richer context for question generation
|
| 378 |
+
- Verify datasource has data before using
|
| 379 |
+
|
| 380 |
+
### 4. Liveboard Design
|
| 381 |
+
- Include rich HTML note tiles with:
|
| 382 |
+
- Executive summary
|
| 383 |
+
- Key findings
|
| 384 |
+
- Visual styling (gradients, colors, emojis)
|
| 385 |
+
- Methodology explanation
|
| 386 |
+
- Aim for 7-10 visualizations for comprehensive analysis
|
| 387 |
+
- Group related visualizations together
|
| 388 |
+
|
| 389 |
+
### 5. Authentication
|
| 390 |
+
- OAuth is handled automatically by `mcp-remote`
|
| 391 |
+
- Browser will open for first-time authentication
|
| 392 |
+
- Subsequent calls reuse the session
|
| 393 |
+
- OAuth server runs on `localhost:9414`
|
| 394 |
+
|
| 395 |
+
---
|
| 396 |
+
|
| 397 |
+
## Troubleshooting
|
| 398 |
+
|
| 399 |
+
### Common Issues
|
| 400 |
+
|
| 401 |
+
#### 1. "No answer found for your query"
|
| 402 |
+
**Cause**: Datasource is empty or question doesn't match schema
|
| 403 |
+
|
| 404 |
+
**Solution**:
|
| 405 |
+
- Verify datasource has data
|
| 406 |
+
- Use system tables (TS: Search, TS: Database) for testing
|
| 407 |
+
- Try simpler questions first
|
| 408 |
+
|
| 409 |
+
#### 2. "Expected object, received string" (createLiveboard)
|
| 410 |
+
**Cause**: Passing string instead of parsed JSON object
|
| 411 |
+
|
| 412 |
+
**Solution**:
|
| 413 |
+
```python
|
| 414 |
+
# β Wrong
|
| 415 |
+
answers = [result.content[0].text]
|
| 416 |
+
|
| 417 |
+
# β
Correct
|
| 418 |
+
import json
|
| 419 |
+
answer_data = json.loads(result.content[0].text)
|
| 420 |
+
answers = [answer_data]
|
| 421 |
+
```
|
| 422 |
+
|
| 423 |
+
#### 3. Connection timeouts
|
| 424 |
+
**Cause**: Network issues or MCP server unavailable
|
| 425 |
+
|
| 426 |
+
**Solution**:
|
| 427 |
+
- Test with `ping` first
|
| 428 |
+
- Verify npx is installed: `npx --version`
|
| 429 |
+
- Check ThoughtSpot instance is accessible
|
| 430 |
+
|
| 431 |
+
#### 4. Authentication loop
|
| 432 |
+
**Cause**: OAuth token expired or not saved
|
| 433 |
+
|
| 434 |
+
**Solution**:
|
| 435 |
+
- Close browser and restart
|
| 436 |
+
- Clear OAuth cache at `~/.mcp-remote/`
|
| 437 |
+
- Ensure OAuth callback server on 9414 is not blocked
|
| 438 |
+
|
| 439 |
+
---
|
| 440 |
+
|
| 441 |
+
## Getting Datasource GUIDs
|
| 442 |
+
|
| 443 |
+
### Method 1: ThoughtSpot UI
|
| 444 |
+
1. Log into ThoughtSpot instance
|
| 445 |
+
2. Navigate to **Data** β **Connections** or **Models**
|
| 446 |
+
3. Click on datasource/model
|
| 447 |
+
4. Copy GUID from URL or details page
|
| 448 |
+
|
| 449 |
+
### Method 2: REST API
|
| 450 |
+
```python
|
| 451 |
+
import requests
|
| 452 |
+
|
| 453 |
+
# Authenticate
|
| 454 |
+
auth_url = f"https://{ts_instance}/api/rest/2.0/auth/token/full"
|
| 455 |
+
response = requests.post(auth_url, json={
|
| 456 |
+
"username": "your_username",
|
| 457 |
+
"password": "your_password"
|
| 458 |
+
})
|
| 459 |
+
token = response.json()['token']
|
| 460 |
+
|
| 461 |
+
# List datasources
|
| 462 |
+
search_url = f"https://{ts_instance}/api/rest/2.0/metadata/search"
|
| 463 |
+
response = requests.post(search_url,
|
| 464 |
+
headers={"Authorization": f"Bearer {token}"},
|
| 465 |
+
json={"metadata": [{"type": "LOGICAL_TABLE"}]}
|
| 466 |
+
)
|
| 467 |
+
|
| 468 |
+
for item in response.json():
|
| 469 |
+
print(f"{item['metadata_name']}: {item['metadata_id']}")
|
| 470 |
+
```
|
| 471 |
+
|
| 472 |
+
---
|
| 473 |
+
|
| 474 |
+
## File Structure
|
| 475 |
+
|
| 476 |
+
Recommended project structure:
|
| 477 |
+
|
| 478 |
+
```
|
| 479 |
+
project/
|
| 480 |
+
βββ mcp/
|
| 481 |
+
β βββ mcp_working_example.py # Basic example
|
| 482 |
+
β βββ test_get_questions.py # Comprehensive example
|
| 483 |
+
β βββ list_mcp_tools.py # Tool documentation
|
| 484 |
+
β βββ get_datasources.py # Helper to get GUIDs
|
| 485 |
+
βββ .env # ThoughtSpot credentials
|
| 486 |
+
βββ requirements.txt # mcp, python-dotenv
|
| 487 |
+
```
|
| 488 |
+
|
| 489 |
+
---
|
| 490 |
+
|
| 491 |
+
## Environment Variables
|
| 492 |
+
|
| 493 |
+
```properties
|
| 494 |
+
# .env file
|
| 495 |
+
THOUGHTSPOT_URL=your-instance.thoughtspot.cloud
|
| 496 |
+
THOUGHTSPOT_USERNAME=your_username
|
| 497 |
+
THOUGHTSPOT_PASSWORD=your_password
|
| 498 |
+
```
|
| 499 |
+
|
| 500 |
+
---
|
| 501 |
+
|
| 502 |
+
## Complete Reference Implementation
|
| 503 |
+
|
| 504 |
+
See `test_get_questions.py` in this repository for a complete, production-ready implementation with:
|
| 505 |
+
- Multiple query generation
|
| 506 |
+
- Error handling
|
| 507 |
+
- Rich HTML formatting
|
| 508 |
+
- 7+ visualizations
|
| 509 |
+
- Professional liveboard styling
|
| 510 |
+
|
| 511 |
+
---
|
| 512 |
+
|
| 513 |
+
## Support & Resources
|
| 514 |
+
|
| 515 |
+
- **ThoughtSpot MCP Server**: https://agent.thoughtspot.app/mcp
|
| 516 |
+
- **MCP Python SDK**: https://github.com/modelcontextprotocol/python-sdk
|
| 517 |
+
- **ThoughtSpot REST API Docs**: https://developers.thoughtspot.com
|
| 518 |
+
|
| 519 |
+
---
|
| 520 |
+
|
| 521 |
+
## Version History
|
| 522 |
+
|
| 523 |
+
- **v1.0** (November 2025): Initial implementation guide
|
| 524 |
+
- MCP SDK version: 1.21.1
|
| 525 |
+
- mcp-remote version: 0.1.30
|
| 526 |
+
|
| 527 |
+
---
|
| 528 |
+
|
| 529 |
+
*Document created: November 14, 2025*
|
| 530 |
+
*Last updated: November 14, 2025*
|
POPULATION_FIX_SUMMARY.md
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Population Code Generation Fix - Summary
|
| 2 |
+
|
| 3 |
+
## Problem
|
| 4 |
+
The population code was failing with "unexpected indent" errors on line 75, despite template generating clean code.
|
| 5 |
+
|
| 6 |
+
## Root Causes Identified
|
| 7 |
+
|
| 8 |
+
### 1. **Code Modification After Generation**
|
| 9 |
+
- `execute_population_script()` was applying dangerous string replacements to clean template code
|
| 10 |
+
- These replacements (lines 352-381 in demo_prep.py) were breaking indentation
|
| 11 |
+
|
| 12 |
+
### 2. **Template Logic Bug**
|
| 13 |
+
- Table names were being added to the list BEFORE validating columns
|
| 14 |
+
- This caused function calls to non-existent functions
|
| 15 |
+
- Result: incomplete try/except/finally blocks
|
| 16 |
+
|
| 17 |
+
### 3. **No Distinction Between Template vs LLM Code**
|
| 18 |
+
- All code was treated the same way
|
| 19 |
+
- Template code doesn't need the safety fixes that LLM code needs
|
| 20 |
+
|
| 21 |
+
## Solutions Implemented
|
| 22 |
+
|
| 23 |
+
### Solution 1: Flag System for Code Source β
|
| 24 |
+
**Files:** `demo_prep.py`, `chat_interface.py`
|
| 25 |
+
|
| 26 |
+
- Added `skip_modifications` parameter to `execute_population_script()`
|
| 27 |
+
- Template code now bypasses all dangerous string replacements
|
| 28 |
+
- Only does safe schema name replacement
|
| 29 |
+
- LLM code still gets safety fixes
|
| 30 |
+
|
| 31 |
+
**Usage:**
|
| 32 |
+
```python
|
| 33 |
+
execute_population_script(code, schema_name, skip_modifications=True) # For template code
|
| 34 |
+
execute_population_script(code, schema_name, skip_modifications=False) # For LLM code
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
### Solution 2: Comprehensive Diagnostics β
|
| 38 |
+
**Files:** `demo_prep.py`
|
| 39 |
+
|
| 40 |
+
Saves code at each step to `/tmp/demowire_debug/`:
|
| 41 |
+
- `1_original_code.py` - Code before any modifications
|
| 42 |
+
- `2_after_modifications.py` - After string replacements
|
| 43 |
+
- `3_validated_code.py` - Final validated code
|
| 44 |
+
|
| 45 |
+
**Benefits:**
|
| 46 |
+
- Easy to see exactly what code is being executed
|
| 47 |
+
- Can debug indentation issues visually
|
| 48 |
+
- Compare before/after modifications
|
| 49 |
+
|
| 50 |
+
### Solution 3: Bulletproof Template Generator β
|
| 51 |
+
**Files:** `chat_interface.py`
|
| 52 |
+
|
| 53 |
+
Improvements:
|
| 54 |
+
1. **Column Validation Before Table Addition**
|
| 55 |
+
- Only adds table names after validating it has insertable columns
|
| 56 |
+
- Prevents orphaned function calls
|
| 57 |
+
|
| 58 |
+
2. **Better Type Handling**
|
| 59 |
+
- Handles VARCHAR(n) length specifications
|
| 60 |
+
- Supports BIGINT, DOUBLE, NUMERIC, BOOLEAN
|
| 61 |
+
- Auto-detects IDENTITY/AUTOINCREMENT columns
|
| 62 |
+
- More robust column name filtering
|
| 63 |
+
|
| 64 |
+
3. **Safety Check**
|
| 65 |
+
- Raises clear error if no valid tables found
|
| 66 |
+
- Prevents generation of empty main() functions
|
| 67 |
+
|
| 68 |
+
### Solution 4: Source Tracking β
|
| 69 |
+
**Files:** `chat_interface.py`
|
| 70 |
+
|
| 71 |
+
- Added `demo_builder.population_code_source` attribute
|
| 72 |
+
- Tracks whether code came from "template" or "llm"
|
| 73 |
+
- All execution paths now check this flag
|
| 74 |
+
|
| 75 |
+
## Testing
|
| 76 |
+
|
| 77 |
+
### Debug Scripts Created:
|
| 78 |
+
1. `debug_template_generation.py` - Test template with sample DDL
|
| 79 |
+
2. `debug_execution_modifications.py` - Trace code modifications
|
| 80 |
+
|
| 81 |
+
### Test Results:
|
| 82 |
+
- Template generates clean, valid Python (59-72 lines)
|
| 83 |
+
- Code compiles successfully before modifications
|
| 84 |
+
- Modified code only fails when replacements break indentation
|
| 85 |
+
|
| 86 |
+
## Next Steps
|
| 87 |
+
|
| 88 |
+
### Completed β
:
|
| 89 |
+
1. β
Fix template approach - make bulletproof
|
| 90 |
+
2. β
Stop execute_population_script from modifying template code
|
| 91 |
+
3. β
Add comprehensive diagnostics
|
| 92 |
+
|
| 93 |
+
### Remaining:
|
| 94 |
+
1. Add hybrid LLM approach as fallback (if template fails)
|
| 95 |
+
2. Test with actual user DDL
|
| 96 |
+
|
| 97 |
+
## How to Use
|
| 98 |
+
|
| 99 |
+
### For Template Code:
|
| 100 |
+
```python
|
| 101 |
+
# Generation
|
| 102 |
+
code = interface.get_fallback_population_code(schema_info)
|
| 103 |
+
interface.demo_builder.population_code_source = "template"
|
| 104 |
+
|
| 105 |
+
# Execution
|
| 106 |
+
success, msg = execute_population_script(
|
| 107 |
+
code,
|
| 108 |
+
schema_name,
|
| 109 |
+
skip_modifications=True
|
| 110 |
+
)
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
### For LLM Code:
|
| 114 |
+
```python
|
| 115 |
+
# Generation (via LLM)
|
| 116 |
+
code = generate_from_llm(...)
|
| 117 |
+
interface.demo_builder.population_code_source = "llm"
|
| 118 |
+
|
| 119 |
+
# Execution (with safety fixes)
|
| 120 |
+
success, msg = execute_population_script(
|
| 121 |
+
code,
|
| 122 |
+
schema_name,
|
| 123 |
+
skip_modifications=False
|
| 124 |
+
)
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
## Debugging
|
| 128 |
+
|
| 129 |
+
If errors still occur:
|
| 130 |
+
1. Check `/tmp/demowire_debug/` for saved code files
|
| 131 |
+
2. Compare the 3 versions to see what changed
|
| 132 |
+
3. Look for console output showing which path was taken:
|
| 133 |
+
- "π― Template-generated code detected"
|
| 134 |
+
- "β οΈ LLM-generated code - applying safety fixes"
|
| 135 |
+
|
| 136 |
+
## Key Files Modified
|
| 137 |
+
|
| 138 |
+
1. **demo_prep.py**
|
| 139 |
+
- Lines 302-309: Added `skip_modifications` parameter
|
| 140 |
+
- Lines 346-355: Added debug file saving
|
| 141 |
+
- Lines 356-382: Added conditional modification logic
|
| 142 |
+
- Lines 473-476: Added validated code saving
|
| 143 |
+
|
| 144 |
+
2. **chat_interface.py**
|
| 145 |
+
- Line 1251: Added `population_code_source` tracking
|
| 146 |
+
- Lines 1040-1106: Improved template column/type handling
|
| 147 |
+
- Lines 1315-1359: Added source checking before execution
|
| 148 |
+
- Multiple locations: Updated all execute_population_script calls
|
| 149 |
+
|
| 150 |
+
## Summary
|
| 151 |
+
|
| 152 |
+
The fix ensures that:
|
| 153 |
+
- β
Template code stays clean (no modifications)
|
| 154 |
+
- β
LLM code gets safety fixes
|
| 155 |
+
- β
All code is saved for debugging
|
| 156 |
+
- β
Template handles edge cases better
|
| 157 |
+
- β
Clear distinction between code sources
|
| 158 |
+
|
| 159 |
+
The template approach is now production-ready!
|
| 160 |
+
|
chat_interface.py
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
demo_prep.py
CHANGED
|
@@ -299,78 +299,249 @@ def extract_python_code(mixed_content):
|
|
| 299 |
return "\n".join(python_lines)
|
| 300 |
|
| 301 |
|
| 302 |
-
def execute_population_script(python_code, schema_name):
|
| 303 |
-
"""Execute population script with simple, reliable approach
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
try:
|
| 305 |
-
#
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
if not clean_code.strip():
|
| 309 |
return False, "No Python code found in population results"
|
| 310 |
|
| 311 |
-
#
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
)
|
| 316 |
-
|
| 317 |
-
# Simple and safe schema replacement - just replace the placeholder
|
| 318 |
-
cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
|
| 319 |
-
cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
|
| 320 |
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
|
| 326 |
-
|
| 327 |
-
|
| 328 |
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
|
| 333 |
|
| 334 |
-
#
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
print("π STARTING DATA POPULATION EXECUTION")
|
| 339 |
-
print("=" * 50)"""
|
| 340 |
-
)
|
| 341 |
|
| 342 |
-
#
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
# Find all populate function definitions and add logging
|
| 346 |
-
def add_function_logging(match):
|
| 347 |
-
func_name = match.group(1)
|
| 348 |
-
table_name = func_name.replace('populate_', '').upper()
|
| 349 |
-
return f"""def {func_name}():
|
| 350 |
-
print("π Populating {table_name} with sample records...")"""
|
| 351 |
-
|
| 352 |
-
# Use regex to find and replace all populate function definitions
|
| 353 |
-
cleaned_code = re.sub(
|
| 354 |
-
r'def (populate_\w+)\(\):',
|
| 355 |
-
add_function_logging,
|
| 356 |
-
cleaned_code
|
| 357 |
-
)
|
| 358 |
|
| 359 |
-
#
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
# Import all necessary modules for execution environment
|
| 376 |
import random
|
|
@@ -427,6 +598,8 @@ def execute_population_script(python_code, schema_name):
|
|
| 427 |
raise e
|
| 428 |
|
| 429 |
# Execute the code directly - the logging is now built into the generated code
|
|
|
|
|
|
|
| 430 |
exec(cleaned_code, exec_globals)
|
| 431 |
|
| 432 |
return True, "Population script executed successfully"
|
|
|
|
| 299 |
return "\n".join(python_lines)
|
| 300 |
|
| 301 |
|
| 302 |
+
def execute_population_script(python_code, schema_name, skip_modifications=False):
|
| 303 |
+
"""Execute population script with simple, reliable approach
|
| 304 |
+
|
| 305 |
+
Args:
|
| 306 |
+
python_code: The Python code to execute
|
| 307 |
+
schema_name: The Snowflake schema name
|
| 308 |
+
skip_modifications: If True, skip all string replacements (for template-generated code)
|
| 309 |
+
"""
|
| 310 |
+
import re
|
| 311 |
+
|
| 312 |
+
def replace_with_indentation(code, pattern, replacement_lines):
|
| 313 |
+
"""Replace pattern with multiple lines, preserving indentation"""
|
| 314 |
+
lines = code.split('\n')
|
| 315 |
+
new_lines = []
|
| 316 |
+
for line in lines:
|
| 317 |
+
if pattern in line:
|
| 318 |
+
# Get the indentation of the current line
|
| 319 |
+
indent = len(line) - len(line.lstrip())
|
| 320 |
+
indent_str = ' ' * indent
|
| 321 |
+
# Add the first line (keep original)
|
| 322 |
+
new_lines.append(line)
|
| 323 |
+
# Add replacement lines with same indentation
|
| 324 |
+
for repl_line in replacement_lines:
|
| 325 |
+
new_lines.append(indent_str + repl_line)
|
| 326 |
+
else:
|
| 327 |
+
new_lines.append(line)
|
| 328 |
+
return '\n'.join(new_lines)
|
| 329 |
+
|
| 330 |
try:
|
| 331 |
+
# NEW: Check if code is already clean Python (no markdown wrapping)
|
| 332 |
+
# If it compiles as-is, don't extract/modify it!
|
| 333 |
+
try:
|
| 334 |
+
compile(python_code, '<initial_check>', 'exec')
|
| 335 |
+
# It's already valid Python! Use as-is!
|
| 336 |
+
clean_code = python_code
|
| 337 |
+
print("β
Code is already clean Python - using as-is without extraction")
|
| 338 |
+
except:
|
| 339 |
+
# Has markdown wrapping or other issues - extract it
|
| 340 |
+
clean_code = extract_python_code(python_code)
|
| 341 |
+
print("β οΈ Code needed extraction from markdown")
|
| 342 |
|
| 343 |
if not clean_code.strip():
|
| 344 |
return False, "No Python code found in population results"
|
| 345 |
|
| 346 |
+
# DEBUG: Save original code
|
| 347 |
+
import tempfile
|
| 348 |
+
import os as os_module
|
| 349 |
+
debug_dir = os_module.path.join(tempfile.gettempdir(), 'demowire_debug')
|
| 350 |
+
os_module.makedirs(debug_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
|
| 352 |
+
with open(os_module.path.join(debug_dir, '1_original_code.py'), 'w') as f:
|
| 353 |
+
f.write(clean_code)
|
| 354 |
+
print(f"π Saved original code to {debug_dir}/1_original_code.py")
|
| 355 |
+
|
| 356 |
+
# Skip all modifications if this is template-generated code
|
| 357 |
+
if skip_modifications:
|
| 358 |
+
print("π― Template-generated code detected - skipping all modifications")
|
| 359 |
+
cleaned_code = clean_code
|
| 360 |
+
# Only do schema replacement - this is always safe
|
| 361 |
+
cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
|
| 362 |
+
cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
|
| 363 |
+
else:
|
| 364 |
+
print("β οΈ LLM-generated code - applying safety fixes")
|
| 365 |
+
# CRITICAL FIX: Remove schema from conn_params to avoid duplicate schema parameter
|
| 366 |
+
# Only add if not already present (new templates include it by default)
|
| 367 |
+
if "conn_params.pop('schema'" not in clean_code:
|
| 368 |
+
cleaned_code = replace_with_indentation(
|
| 369 |
+
clean_code,
|
| 370 |
+
"conn_params = get_snowflake_connection_params()",
|
| 371 |
+
["conn_params.pop('schema', None) # Remove schema to avoid duplicate"]
|
| 372 |
+
)
|
| 373 |
+
else:
|
| 374 |
+
cleaned_code = clean_code
|
| 375 |
+
print("β
Schema pop already in code, skipping injection")
|
| 376 |
+
|
| 377 |
+
# Simple and safe schema replacement - just replace the placeholder
|
| 378 |
+
cleaned_code = cleaned_code.replace("os.getenv('SNOWFLAKE_SCHEMA')", f"'{schema_name}'")
|
| 379 |
+
cleaned_code = cleaned_code.replace('os.getenv("SNOWFLAKE_SCHEMA")', f'"{schema_name}"')
|
| 380 |
+
|
| 381 |
+
# FIX: Remove fake.unique() calls that cause "duplicated values after 1,000 iterations" error
|
| 382 |
+
cleaned_code = cleaned_code.replace("fake.unique.word()", "fake.word()")
|
| 383 |
+
cleaned_code = cleaned_code.replace("fake.unique.email()", "fake.email()")
|
| 384 |
+
cleaned_code = cleaned_code.replace("fake.unique.company()", "fake.company()")
|
| 385 |
|
| 386 |
+
# FIX: Truncate phone numbers to avoid extension overflow (e.g., '790-923-3730x07350')
|
| 387 |
+
cleaned_code = cleaned_code.replace("fake.phone_number()", "fake.phone_number()[:20]")
|
| 388 |
|
| 389 |
+
# FIX: Convert SQLite-style ? placeholders to Snowflake-style %s placeholders
|
| 390 |
+
cleaned_code = re.sub(r'\bVALUES\s*\(\?', 'VALUES (%s', cleaned_code)
|
| 391 |
+
cleaned_code = re.sub(r',\s*\?', ', %s', cleaned_code)
|
|
|
|
| 392 |
|
| 393 |
+
# DEBUG: Save modified code
|
| 394 |
+
with open(os_module.path.join(debug_dir, '2_after_modifications.py'), 'w') as f:
|
| 395 |
+
f.write(cleaned_code)
|
| 396 |
+
print(f"π Saved modified code to {debug_dir}/2_after_modifications.py")
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
+
# DISABLED ALL CODE-MODIFYING REGEXES!
|
| 399 |
+
# The new template generator creates clean, complete code
|
| 400 |
+
# These regexes were breaking the indentation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
+
# NO LONGER NEEDED: Template already has logging
|
| 403 |
+
# NO LONGER NEEDED: Template already has proper function signatures
|
| 404 |
+
# NO LONGER NEEDED: Template already has print statements
|
| 405 |
+
|
| 406 |
+
# FIX INDENTATION: Try to fix common indentation issues before execution
|
| 407 |
+
def fix_python_indentation(code):
|
| 408 |
+
"""Fix common Python indentation issues aggressively"""
|
| 409 |
+
import textwrap
|
| 410 |
+
|
| 411 |
+
# Replace tabs with 4 spaces
|
| 412 |
+
code = code.replace('\t', ' ')
|
| 413 |
+
|
| 414 |
+
# Remove any leading/trailing whitespace from the entire code
|
| 415 |
+
code = code.strip()
|
| 416 |
+
|
| 417 |
+
# AGGRESSIVE FIX: Detect if top-level code has indentation and remove it
|
| 418 |
+
lines = code.split('\n')
|
| 419 |
+
|
| 420 |
+
# Check if the first non-empty, non-comment line is indented
|
| 421 |
+
# (this would be a syntax error)
|
| 422 |
+
first_code_line_idx = None
|
| 423 |
+
for i, line in enumerate(lines):
|
| 424 |
+
stripped = line.strip()
|
| 425 |
+
if stripped and not stripped.startswith('#'):
|
| 426 |
+
first_code_line_idx = i
|
| 427 |
+
break
|
| 428 |
+
|
| 429 |
+
if first_code_line_idx is not None:
|
| 430 |
+
first_line = lines[first_code_line_idx]
|
| 431 |
+
if first_line[0] in (' ', '\t'):
|
| 432 |
+
# Top-level code is indented! Fix it
|
| 433 |
+
print(f"β οΈ Detected indented top-level code, removing excess indentation...")
|
| 434 |
+
# Use textwrap.dedent to remove common leading whitespace
|
| 435 |
+
code = textwrap.dedent(code)
|
| 436 |
+
|
| 437 |
+
# Now normalize all indentation to 4 spaces
|
| 438 |
+
lines = code.split('\n')
|
| 439 |
+
fixed_lines = []
|
| 440 |
+
in_string = False
|
| 441 |
+
string_delimiter = None
|
| 442 |
+
|
| 443 |
+
for line_num, line in enumerate(lines):
|
| 444 |
+
# Don't mess with empty lines
|
| 445 |
+
if not line.strip():
|
| 446 |
+
fixed_lines.append('')
|
| 447 |
+
continue
|
| 448 |
+
|
| 449 |
+
# Check for top-level keywords that should NEVER be indented
|
| 450 |
+
stripped = line.strip()
|
| 451 |
+
if re.match(r'^(import |from |def |class |if __name__|@)', stripped):
|
| 452 |
+
# Check if this is at top level (should have no indent)
|
| 453 |
+
# Look backwards to see if we're inside a def/class
|
| 454 |
+
in_def_or_class = False
|
| 455 |
+
for prev_line in reversed(fixed_lines):
|
| 456 |
+
if prev_line.strip().startswith('def ') or prev_line.strip().startswith('class '):
|
| 457 |
+
in_def_or_class = True
|
| 458 |
+
break
|
| 459 |
+
if prev_line.strip() and not prev_line[0].isspace() and prev_line.strip() not in ['', 'import ', 'from ']:
|
| 460 |
+
break
|
| 461 |
+
|
| 462 |
+
if not in_def_or_class and stripped.startswith(('import ', 'from ', 'if __name__')):
|
| 463 |
+
# Top-level import or main block - no indentation!
|
| 464 |
+
fixed_lines.append(stripped)
|
| 465 |
+
continue
|
| 466 |
+
|
| 467 |
+
# For other lines, normalize indentation
|
| 468 |
+
leading_spaces = len(line) - len(stripped)
|
| 469 |
+
# Round to nearest multiple of 4
|
| 470 |
+
indent_level = round(leading_spaces / 4)
|
| 471 |
+
fixed_line = (' ' * indent_level) + stripped
|
| 472 |
+
fixed_lines.append(fixed_line)
|
| 473 |
+
|
| 474 |
+
return '\n'.join(fixed_lines)
|
| 475 |
+
|
| 476 |
+
# Try to compile FIRST - only fix if broken
|
| 477 |
+
code_is_valid = False
|
| 478 |
+
try:
|
| 479 |
+
compile(cleaned_code, '<test>', 'exec')
|
| 480 |
+
print("β
Code syntax validated before execution - using as-is")
|
| 481 |
+
code_is_valid = True
|
| 482 |
+
|
| 483 |
+
# DEBUG: Save validated code
|
| 484 |
+
with open(os_module.path.join(debug_dir, '3_validated_code.py'), 'w') as f:
|
| 485 |
+
f.write(cleaned_code)
|
| 486 |
+
print(f"π Saved validated code to {debug_dir}/3_validated_code.py")
|
| 487 |
+
|
| 488 |
+
except SyntaxError as e:
|
| 489 |
+
print(f"β οΈ Syntax error at line {e.lineno}: {e.msg}")
|
| 490 |
+
if e.lineno:
|
| 491 |
+
lines = cleaned_code.split('\n')
|
| 492 |
+
start = max(0, e.lineno - 5)
|
| 493 |
+
end = min(len(lines), e.lineno + 5)
|
| 494 |
+
print(f"\nCode context around line {e.lineno}:")
|
| 495 |
+
for i in range(start, end):
|
| 496 |
+
marker = ">>> " if i == e.lineno - 1 else " "
|
| 497 |
+
# Show repr to see whitespace
|
| 498 |
+
print(f"{marker}{i+1:3}: {repr(lines[i])}")
|
| 499 |
+
|
| 500 |
+
print("\nπ§ Attempting auto-fix...")
|
| 501 |
+
|
| 502 |
+
# Try to fix indentation
|
| 503 |
+
cleaned_code = fix_python_indentation(cleaned_code)
|
| 504 |
+
|
| 505 |
+
# Try compiling again
|
| 506 |
+
try:
|
| 507 |
+
compile(cleaned_code, '<test>', 'exec')
|
| 508 |
+
print("β
Fixed indentation issues automatically!")
|
| 509 |
+
code_is_valid = True
|
| 510 |
+
except SyntaxError as e2:
|
| 511 |
+
print(f"\nβ Auto-fix failed. Error at line {e2.lineno}: {e2.msg}")
|
| 512 |
+
if e2.lineno:
|
| 513 |
+
lines = cleaned_code.split('\n')
|
| 514 |
+
start = max(0, e2.lineno - 3)
|
| 515 |
+
end = min(len(lines), e2.lineno + 2)
|
| 516 |
+
print(f"\nCode context after fix attempt:")
|
| 517 |
+
for i in range(start, end):
|
| 518 |
+
marker = ">>> " if i == e2.lineno - 1 else " "
|
| 519 |
+
print(f"{marker}{i+1:3}: {lines[i]}")
|
| 520 |
+
|
| 521 |
+
# Save failed code for debugging
|
| 522 |
+
import tempfile
|
| 523 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
|
| 524 |
+
f.write(cleaned_code)
|
| 525 |
+
print(f"\nπ Saved failed code to: {f.name}")
|
| 526 |
+
|
| 527 |
+
# Raise the error so it gets reported properly
|
| 528 |
+
return False, f"Population execution failed: {e2.msg} (<population_script>, line {e2.lineno})"
|
| 529 |
+
|
| 530 |
+
# Only proceed if code is valid
|
| 531 |
+
if not code_is_valid:
|
| 532 |
+
return False, "Population code failed validation"
|
| 533 |
+
|
| 534 |
+
# DEBUG: Save the code that's about to be executed
|
| 535 |
+
import tempfile
|
| 536 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='_EXECUTING.py', delete=False, dir='/tmp') as f:
|
| 537 |
+
f.write(cleaned_code)
|
| 538 |
+
debug_file = f.name
|
| 539 |
+
print(f"\nπ EXECUTING CODE SAVED TO: {debug_file}")
|
| 540 |
+
print(f" You can inspect it with: cat {debug_file}")
|
| 541 |
+
print(f"\n Lines 75-80:")
|
| 542 |
+
lines = cleaned_code.split('\n')
|
| 543 |
+
for i in range(74, min(80, len(lines))):
|
| 544 |
+
print(f" {i+1:3}: {lines[i]}")
|
| 545 |
|
| 546 |
# Import all necessary modules for execution environment
|
| 547 |
import random
|
|
|
|
| 598 |
raise e
|
| 599 |
|
| 600 |
# Execute the code directly - the logging is now built into the generated code
|
| 601 |
+
# CRITICAL: Set __name__ so the if __name__ == "__main__" block runs
|
| 602 |
+
exec_globals['__name__'] = '__main__'
|
| 603 |
exec(cleaned_code, exec_globals)
|
| 604 |
|
| 605 |
return True, "Population script executed successfully"
|
liveboard_creator.py
CHANGED
|
@@ -1019,12 +1019,16 @@ Examples:
|
|
| 1019 |
text_content = viz_config.get('text_content', viz_config.get('name', ''))
|
| 1020 |
bg_color = viz_config.get('background_color', '#2E3D4D') # Default dark background
|
| 1021 |
|
| 1022 |
-
# TEXT tiles in ThoughtSpot
|
| 1023 |
text_tml = {
|
| 1024 |
'id': viz_config['id'],
|
| 1025 |
'answer': {
|
| 1026 |
'name': viz_config.get('name', 'Text'),
|
| 1027 |
'description': viz_config.get('description', ''),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1028 |
'text_tile': {
|
| 1029 |
'text': text_content,
|
| 1030 |
'background_color': bg_color
|
|
|
|
| 1019 |
text_content = viz_config.get('text_content', viz_config.get('name', ''))
|
| 1020 |
bg_color = viz_config.get('background_color', '#2E3D4D') # Default dark background
|
| 1021 |
|
| 1022 |
+
# TEXT tiles in ThoughtSpot need tables field even though they don't query data
|
| 1023 |
text_tml = {
|
| 1024 |
'id': viz_config['id'],
|
| 1025 |
'answer': {
|
| 1026 |
'name': viz_config.get('name', 'Text'),
|
| 1027 |
'description': viz_config.get('description', ''),
|
| 1028 |
+
'tables': [{
|
| 1029 |
+
'id': self.model_name,
|
| 1030 |
+
'name': self.model_name
|
| 1031 |
+
}],
|
| 1032 |
'text_tile': {
|
| 1033 |
'text': text_content,
|
| 1034 |
'background_color': bg_color
|
requirements.txt
CHANGED
|
@@ -15,6 +15,7 @@ sqlparse>=0.4.4
|
|
| 15 |
snowflake-connector-python>=3.6.0
|
| 16 |
cryptography>=41.0.0 # Required for key pair authentication
|
| 17 |
PyYAML>=6.0.0
|
|
|
|
| 18 |
|
| 19 |
# Data Processing
|
| 20 |
faker>=20.1.0
|
|
|
|
| 15 |
snowflake-connector-python>=3.6.0
|
| 16 |
cryptography>=41.0.0 # Required for key pair authentication
|
| 17 |
PyYAML>=6.0.0
|
| 18 |
+
supabase>=2.0.0 # PostgreSQL-based settings persistence
|
| 19 |
|
| 20 |
# Data Processing
|
| 21 |
faker>=20.1.0
|
schema_utils.py
CHANGED
|
@@ -62,17 +62,45 @@ def parse_ddl_schema(ddl_content: str) -> Dict[str, Any]:
|
|
| 62 |
|
| 63 |
for table_name, columns_def in matches:
|
| 64 |
columns = []
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
for line in column_lines:
|
| 69 |
line = line.strip()
|
| 70 |
if line and not line.startswith('PRIMARY KEY') and not line.startswith('FOREIGN KEY'):
|
| 71 |
-
# Extract column name and type
|
| 72 |
parts = line.split()
|
| 73 |
if parts:
|
| 74 |
col_name = parts[0]
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
columns.append({
|
| 77 |
'name': col_name,
|
| 78 |
'type': col_type
|
|
|
|
| 62 |
|
| 63 |
for table_name, columns_def in matches:
|
| 64 |
columns = []
|
| 65 |
+
|
| 66 |
+
# Smart column parsing - split by comma but NOT inside parentheses
|
| 67 |
+
column_lines = []
|
| 68 |
+
current_col = ""
|
| 69 |
+
paren_depth = 0
|
| 70 |
+
|
| 71 |
+
for char in columns_def:
|
| 72 |
+
if char == '(':
|
| 73 |
+
paren_depth += 1
|
| 74 |
+
current_col += char
|
| 75 |
+
elif char == ')':
|
| 76 |
+
paren_depth -= 1
|
| 77 |
+
current_col += char
|
| 78 |
+
elif char == ',' and paren_depth == 0:
|
| 79 |
+
# This is a column separator, not inside type definition
|
| 80 |
+
if current_col.strip():
|
| 81 |
+
column_lines.append(current_col.strip())
|
| 82 |
+
current_col = ""
|
| 83 |
+
else:
|
| 84 |
+
current_col += char
|
| 85 |
+
|
| 86 |
+
# Don't forget the last column
|
| 87 |
+
if current_col.strip():
|
| 88 |
+
column_lines.append(current_col.strip())
|
| 89 |
|
| 90 |
for line in column_lines:
|
| 91 |
line = line.strip()
|
| 92 |
if line and not line.startswith('PRIMARY KEY') and not line.startswith('FOREIGN KEY'):
|
| 93 |
+
# Extract column name and type (including parameters like DECIMAL(10,2))
|
| 94 |
parts = line.split()
|
| 95 |
if parts:
|
| 96 |
col_name = parts[0]
|
| 97 |
+
# Get the FULL type including parameters (e.g., DECIMAL(3,2), VARCHAR(100))
|
| 98 |
+
# Use regex to capture type with optional parameters
|
| 99 |
+
type_match = re.search(r'(\w+(?:\([^)]+\))?)', line)
|
| 100 |
+
if type_match and type_match.start() > 0: # Make sure we're past the column name
|
| 101 |
+
col_type = type_match.group(1)
|
| 102 |
+
else:
|
| 103 |
+
col_type = parts[1] if len(parts) > 1 else 'VARCHAR'
|
| 104 |
columns.append({
|
| 105 |
'name': col_name,
|
| 106 |
'type': col_type
|
supabase_client.py
CHANGED
|
@@ -355,10 +355,21 @@ def load_gradio_settings(email: str) -> Dict[str, Any]:
|
|
| 355 |
"default_data_volume": "Medium (10K rows)",
|
| 356 |
"default_warehouse": "COMPUTE_WH",
|
| 357 |
"default_database": "DEMO_DB",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
# ThoughtSpot Connection
|
| 360 |
"thoughtspot_url": "",
|
| 361 |
"thoughtspot_username": "",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
# Advanced Options
|
| 364 |
"batch_size": 5000,
|
|
|
|
| 355 |
"default_data_volume": "Medium (10K rows)",
|
| 356 |
"default_warehouse": "COMPUTE_WH",
|
| 357 |
"default_database": "DEMO_DB",
|
| 358 |
+
|
| 359 |
+
# Data Generation Settings
|
| 360 |
+
"fact_table_size": "10000",
|
| 361 |
+
"dim_table_size": "100",
|
| 362 |
|
| 363 |
# ThoughtSpot Connection
|
| 364 |
"thoughtspot_url": "",
|
| 365 |
"thoughtspot_username": "",
|
| 366 |
+
"liveboard_name": "",
|
| 367 |
+
|
| 368 |
+
# Snowflake Connection
|
| 369 |
+
"snowflake_account": "",
|
| 370 |
+
"snowflake_user": "",
|
| 371 |
+
"snowflake_role": "ACCOUNTADMIN",
|
| 372 |
+
"default_schema": "PUBLIC",
|
| 373 |
|
| 374 |
# Advanced Options
|
| 375 |
"batch_size": 5000,
|
thoughtspot_deployer.py
CHANGED
|
@@ -323,7 +323,13 @@ class ThoughtSpotDeployer:
|
|
| 323 |
for col_name in table_cols:
|
| 324 |
if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
|
| 325 |
# This looks like a foreign key - find the target table
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
# Check if target table exists in THIS deployment AND it's not the same table
|
| 329 |
# IMPORTANT: Only create joins to tables in the same schema/connection
|
|
@@ -673,8 +679,14 @@ class ThoughtSpotDeployer:
|
|
| 673 |
|
| 674 |
# Check if this looks like a foreign key (ends with ID but isn't the table's own ID)
|
| 675 |
if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
|
| 676 |
-
# Infer the target table name (
|
| 677 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 678 |
|
| 679 |
# Check if the target table exists in this schema
|
| 680 |
if potential_target not in table_names_upper and potential_target != table_name_upper:
|
|
@@ -741,18 +753,31 @@ class ThoughtSpotDeployer:
|
|
| 741 |
def _determine_column_type(self, data_type: str, col_name: str) -> tuple:
|
| 742 |
"""Determine if column should be ATTRIBUTE or MEASURE"""
|
| 743 |
base_type = data_type.upper().split('(')[0]
|
|
|
|
| 744 |
|
| 745 |
# SALEID is special - it's treated as a measure in the working example
|
| 746 |
-
if
|
| 747 |
return 'MEASURE', 'SUM'
|
| 748 |
|
| 749 |
-
# Numeric types
|
| 750 |
-
if base_type in ['NUMBER', 'DECIMAL', 'FLOAT', 'DOUBLE'
|
| 751 |
-
#
|
| 752 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
return 'MEASURE', 'SUM'
|
| 754 |
|
| 755 |
-
# Everything else is an attribute
|
| 756 |
return 'ATTRIBUTE', None
|
| 757 |
|
| 758 |
def _build_table_relationships(self, tables: Dict, foreign_keys: List) -> Dict:
|
|
@@ -1151,16 +1176,28 @@ class ThoughtSpotDeployer:
|
|
| 1151 |
|
| 1152 |
def log_progress(message):
|
| 1153 |
"""Helper to log progress both to console and callback"""
|
| 1154 |
-
print
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1155 |
if progress_callback:
|
| 1156 |
-
|
|
|
|
|
|
|
|
|
|
| 1157 |
|
| 1158 |
try:
|
|
|
|
|
|
|
|
|
|
| 1159 |
# STEP 0: Authenticate first!
|
| 1160 |
-
log_progress("
|
| 1161 |
if not self.authenticate():
|
| 1162 |
raise Exception("ThoughtSpot authentication failed")
|
| 1163 |
-
|
|
|
|
| 1164 |
|
| 1165 |
# Parse DDL
|
| 1166 |
tables, foreign_keys = self.parse_ddl(ddl)
|
|
@@ -1168,32 +1205,34 @@ class ThoughtSpotDeployer:
|
|
| 1168 |
raise Exception("No tables found in DDL")
|
| 1169 |
|
| 1170 |
# Validate foreign key references before deployment
|
| 1171 |
-
log_progress("π Validating foreign key references...")
|
| 1172 |
fk_warnings = self.validate_foreign_key_references(tables)
|
| 1173 |
if fk_warnings:
|
| 1174 |
-
log_progress("
|
| 1175 |
-
for warning in fk_warnings:
|
| 1176 |
-
log_progress(f" {warning}")
|
| 1177 |
-
log_progress("\n βΉοΈ These warnings indicate potential schema inconsistencies.")
|
| 1178 |
-
log_progress(" βΉοΈ Deployment will continue, but joins to missing tables will be skipped.\n")
|
| 1179 |
-
else:
|
| 1180 |
-
log_progress("β
All foreign key references are valid\n")
|
| 1181 |
|
| 1182 |
-
# Step 1: Create connection using
|
| 1183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1184 |
if not connection_name:
|
| 1185 |
connection_name = demo_names['connection']
|
| 1186 |
|
| 1187 |
-
log_progress("
|
| 1188 |
-
log_progress(f" Connection name: {connection_name}")
|
| 1189 |
|
| 1190 |
# Check if connection already exists first
|
| 1191 |
existing_connection = self.get_connection_by_name(connection_name)
|
| 1192 |
if existing_connection:
|
| 1193 |
-
log_progress(f"β»οΈ Reusing existing connection: {connection_name}")
|
| 1194 |
connection_guid = existing_connection['header']['id_guid']
|
| 1195 |
connection_fqn = connection_guid
|
| 1196 |
results['connection'] = connection_name
|
|
|
|
| 1197 |
else:
|
| 1198 |
log_progress(f"π Creating new connection: {connection_name}")
|
| 1199 |
# Make connection name unique to avoid duplicates only if creating new
|
|
@@ -1273,122 +1312,140 @@ class ThoughtSpotDeployer:
|
|
| 1273 |
table_relationships = self._build_table_relationships(tables, foreign_keys)
|
| 1274 |
|
| 1275 |
# Step 2: TWO-PHASE TABLE CREATION (to avoid dependency order issues)
|
| 1276 |
-
|
| 1277 |
-
|
| 1278 |
-
|
| 1279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1280 |
for table_name, columns in tables.items():
|
| 1281 |
-
|
| 1282 |
-
# Create table TML WITHOUT joins_with section (pass None for all_tables)
|
| 1283 |
table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1284 |
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
json={
|
| 1288 |
-
"metadata_tmls": [table_tml],
|
| 1289 |
-
"import_policy": "PARTIAL",
|
| 1290 |
-
"create_new": True
|
| 1291 |
-
}
|
| 1292 |
-
)
|
| 1293 |
-
|
| 1294 |
-
if response.status_code == 200:
|
| 1295 |
-
result = response.json()
|
| 1296 |
-
|
| 1297 |
-
# Handle both response formats (list or dict with 'object' key)
|
| 1298 |
-
if isinstance(result, list):
|
| 1299 |
-
objects = result
|
| 1300 |
-
elif isinstance(result, dict) and 'object' in result:
|
| 1301 |
-
objects = result['object']
|
| 1302 |
-
else:
|
| 1303 |
-
error = f"Table {table_name} failed: Unexpected response format: {type(result)}"
|
| 1304 |
-
log_progress(f" β {error}")
|
| 1305 |
-
results['errors'].append(error)
|
| 1306 |
-
continue
|
| 1307 |
|
| 1308 |
-
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
-
|
| 1312 |
-
|
| 1313 |
-
log_progress(f" GUID: {table_guid}")
|
| 1314 |
-
results['tables'].append(table_name.upper())
|
| 1315 |
-
table_guids[table_name.upper()] = table_guid
|
| 1316 |
-
else:
|
| 1317 |
-
error = f"Table {table_name} failed: {obj.get('response', {}).get('status', {}).get('error_message')}"
|
| 1318 |
-
log_progress(f" β {error}")
|
| 1319 |
-
results['errors'].append(error)
|
| 1320 |
-
# DON'T return - continue creating other tables
|
| 1321 |
-
else:
|
| 1322 |
-
error = f"Table {table_name} failed: No object in response"
|
| 1323 |
-
log_progress(f" β {error}")
|
| 1324 |
-
results['errors'].append(error)
|
| 1325 |
else:
|
| 1326 |
-
error = f"
|
| 1327 |
log_progress(f" β {error}")
|
| 1328 |
results['errors'].append(error)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1329 |
|
| 1330 |
# Check if we created any tables successfully
|
| 1331 |
if not table_guids:
|
| 1332 |
-
log_progress(" β No tables were created successfully in
|
| 1333 |
return results
|
| 1334 |
|
| 1335 |
-
|
|
|
|
| 1336 |
|
| 1337 |
-
# PHASE 2: Update tables WITH joins
|
| 1338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1339 |
for table_name, columns in tables.items():
|
| 1340 |
-
# Only add joins if the table was created successfully in Phase 1
|
| 1341 |
table_name_upper = table_name.upper()
|
|
|
|
|
|
|
| 1342 |
if table_name_upper not in table_guids:
|
| 1343 |
-
|
| 1344 |
continue
|
| 1345 |
|
| 1346 |
# Get the GUID for this table
|
| 1347 |
table_guid = table_guids[table_name_upper]
|
| 1348 |
-
|
| 1349 |
-
|
| 1350 |
# Create table TML WITH joins_with section AND the table GUID
|
| 1351 |
table_tml = self.create_table_tml(
|
| 1352 |
table_name, columns, connection_name, database, schema,
|
| 1353 |
all_tables=tables, table_guid=table_guid
|
| 1354 |
)
|
| 1355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1356 |
response = self.session.post(
|
| 1357 |
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
| 1358 |
json={
|
| 1359 |
-
"metadata_tmls":
|
| 1360 |
"import_policy": "PARTIAL",
|
| 1361 |
-
"create_new": False # Update existing
|
| 1362 |
}
|
| 1363 |
)
|
| 1364 |
|
| 1365 |
if response.status_code == 200:
|
| 1366 |
result = response.json()
|
| 1367 |
|
| 1368 |
-
# Handle both response formats
|
| 1369 |
if isinstance(result, list):
|
| 1370 |
objects = result
|
| 1371 |
elif isinstance(result, dict) and 'object' in result:
|
| 1372 |
objects = result['object']
|
| 1373 |
else:
|
| 1374 |
-
log_progress(f" β οΈ Unexpected response format for joins: {type(result)}")
|
| 1375 |
objects = []
|
| 1376 |
|
| 1377 |
-
|
| 1378 |
-
|
|
|
|
|
|
|
| 1379 |
if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1380 |
-
|
| 1381 |
else:
|
| 1382 |
-
|
| 1383 |
-
|
| 1384 |
-
results['errors'].append(
|
| 1385 |
-
# Don't fail - table still exists without joins
|
| 1386 |
-
else:
|
| 1387 |
-
log_progress(f" β οΈ Could not add joins to {table_name.upper()}")
|
| 1388 |
else:
|
| 1389 |
-
log_progress(f" β οΈ
|
| 1390 |
|
| 1391 |
-
|
|
|
|
| 1392 |
actual_constraint_ids = {} # We'll generate these for the model
|
| 1393 |
|
| 1394 |
# Skip separate relationship creation for now
|
|
@@ -1396,11 +1453,10 @@ class ThoughtSpotDeployer:
|
|
| 1396 |
# self.create_relationships_separately(table_relationships, table_guids)
|
| 1397 |
|
| 1398 |
# Step 3: Extract constraint IDs from created tables
|
| 1399 |
-
log_progress("\n2οΈβ£.5 Extracting constraint IDs from created tables...")
|
| 1400 |
table_constraints = {}
|
| 1401 |
|
| 1402 |
for table_name, table_guid in table_guids.items():
|
| 1403 |
-
|
| 1404 |
|
| 1405 |
# Export table TML to get constraint IDs
|
| 1406 |
export_response = self.session.post(
|
|
@@ -1430,15 +1486,11 @@ class ThoughtSpotDeployer:
|
|
| 1430 |
'constraint_id': constraint_id,
|
| 1431 |
'destination': destination
|
| 1432 |
})
|
| 1433 |
-
log_progress(f" π Found join: {constraint_id} -> {destination}")
|
| 1434 |
-
|
| 1435 |
-
log_progress(f" β
Extracted constraints from {len(table_constraints)} tables")
|
| 1436 |
|
| 1437 |
# Step 4: Create model (semantic layer) with constraint references
|
| 1438 |
-
|
| 1439 |
-
# Use the demo_names that were generated earlier
|
| 1440 |
model_name = demo_names['model']
|
| 1441 |
-
log_progress(f"
|
| 1442 |
|
| 1443 |
# Use the enhanced model creation that includes constraint references
|
| 1444 |
model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
|
|
@@ -1470,14 +1522,12 @@ class ThoughtSpotDeployer:
|
|
| 1470 |
if objects and len(objects) > 0:
|
| 1471 |
if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1472 |
model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
|
| 1473 |
-
|
| 1474 |
-
log_progress(f"
|
| 1475 |
-
log_progress(f" GUID: {model_guid}")
|
| 1476 |
results['model'] = model_name
|
| 1477 |
results['model_guid'] = model_guid
|
| 1478 |
|
| 1479 |
# Step 3.5: Enable Spotter on the model via API
|
| 1480 |
-
log_progress("\n3οΈβ£.5 Enabling Spotter on model...")
|
| 1481 |
try:
|
| 1482 |
enable_response = self.session.post(
|
| 1483 |
f"{self.base_url}/api/rest/2.0/metadata/sage/enable",
|
|
@@ -1486,15 +1536,13 @@ class ThoughtSpotDeployer:
|
|
| 1486 |
}
|
| 1487 |
)
|
| 1488 |
if enable_response.status_code == 200:
|
| 1489 |
-
log_progress(f"
|
| 1490 |
-
else:
|
| 1491 |
-
log_progress(f" β οΈ Could not enable Spotter: {enable_response.status_code}")
|
| 1492 |
-
log_progress(f" Response: {enable_response.text}")
|
| 1493 |
except Exception as spotter_error:
|
| 1494 |
-
|
| 1495 |
|
| 1496 |
# Step 4: Auto-create Liveboard from model
|
| 1497 |
-
|
|
|
|
| 1498 |
try:
|
| 1499 |
from liveboard_creator import create_liveboard_from_model
|
| 1500 |
|
|
@@ -1514,26 +1562,39 @@ class ThoughtSpotDeployer:
|
|
| 1514 |
)
|
| 1515 |
|
| 1516 |
if liveboard_result.get('success'):
|
| 1517 |
-
|
| 1518 |
-
log_progress(f"
|
| 1519 |
-
log_progress(f" GUID: {liveboard_result.get('liveboard_guid')}")
|
| 1520 |
results['liveboard'] = liveboard_result.get('liveboard_name')
|
| 1521 |
results['liveboard_guid'] = liveboard_result.get('liveboard_guid')
|
| 1522 |
else:
|
| 1523 |
error = f"Liveboard creation failed: {liveboard_result.get('error', 'Unknown error')}"
|
| 1524 |
-
log_progress(f" β οΈ {error}")
|
| 1525 |
results['errors'].append(error)
|
| 1526 |
except Exception as lb_error:
|
| 1527 |
error = f"Liveboard creation exception: {str(lb_error)}"
|
| 1528 |
-
log_progress(f" β οΈ {error}")
|
| 1529 |
results['errors'].append(error)
|
| 1530 |
-
import traceback
|
| 1531 |
-
traceback.print_exc()
|
| 1532 |
else:
|
| 1533 |
-
|
| 1534 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1535 |
print(f" β {error}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1536 |
results['errors'].append(error)
|
|
|
|
| 1537 |
else:
|
| 1538 |
error = "Model failed: No objects in response"
|
| 1539 |
log_progress(f" β {error}")
|
|
@@ -1543,9 +1604,25 @@ class ThoughtSpotDeployer:
|
|
| 1543 |
results['success'] = len(results['errors']) == 0
|
| 1544 |
|
| 1545 |
except Exception as e:
|
|
|
|
| 1546 |
error_msg = str(e)
|
| 1547 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1548 |
results['errors'].append(error_msg)
|
|
|
|
| 1549 |
|
| 1550 |
return results
|
| 1551 |
|
|
|
|
| 323 |
for col_name in table_cols:
|
| 324 |
if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
|
| 325 |
# This looks like a foreign key - find the target table
|
| 326 |
+
# Handle both CUSTOMER_ID and CUSTOMERID formats
|
| 327 |
+
if col_name.endswith('_ID'):
|
| 328 |
+
# CUSTOMER_ID -> CUSTOMERS
|
| 329 |
+
potential_target = col_name[:-3] + 'S'
|
| 330 |
+
else:
|
| 331 |
+
# CUSTOMERID -> CUSTOMERS
|
| 332 |
+
potential_target = col_name[:-2] + 'S'
|
| 333 |
|
| 334 |
# Check if target table exists in THIS deployment AND it's not the same table
|
| 335 |
# IMPORTANT: Only create joins to tables in the same schema/connection
|
|
|
|
| 679 |
|
| 680 |
# Check if this looks like a foreign key (ends with ID but isn't the table's own ID)
|
| 681 |
if col_name.endswith('ID') and col_name != f"{table_name_upper}ID":
|
| 682 |
+
# Infer the target table name (CUSTOMER_ID -> CUSTOMERS, CUSTOMERID -> CUSTOMERS)
|
| 683 |
+
# Handle both CUSTOMER_ID and CUSTOMERID formats
|
| 684 |
+
if col_name.endswith('_ID'):
|
| 685 |
+
# CUSTOMER_ID -> CUSTOMERS
|
| 686 |
+
potential_target = col_name[:-3] + 'S'
|
| 687 |
+
else:
|
| 688 |
+
# CUSTOMERID -> CUSTOMERS
|
| 689 |
+
potential_target = col_name[:-2] + 'S'
|
| 690 |
|
| 691 |
# Check if the target table exists in this schema
|
| 692 |
if potential_target not in table_names_upper and potential_target != table_name_upper:
|
|
|
|
| 753 |
def _determine_column_type(self, data_type: str, col_name: str) -> tuple:
|
| 754 |
"""Determine if column should be ATTRIBUTE or MEASURE"""
|
| 755 |
base_type = data_type.upper().split('(')[0]
|
| 756 |
+
col_upper = col_name.upper()
|
| 757 |
|
| 758 |
# SALEID is special - it's treated as a measure in the working example
|
| 759 |
+
if col_upper == 'SALEID':
|
| 760 |
return 'MEASURE', 'SUM'
|
| 761 |
|
| 762 |
+
# Numeric types should be measures (unless they're IDs)
|
| 763 |
+
if base_type in ['NUMBER', 'DECIMAL', 'FLOAT', 'DOUBLE', 'INT', 'INTEGER', 'BIGINT']:
|
| 764 |
+
# Skip ID columns - they're join keys
|
| 765 |
+
if col_upper.endswith('ID'):
|
| 766 |
+
return 'ATTRIBUTE', None
|
| 767 |
+
|
| 768 |
+
# All other numeric columns are measures
|
| 769 |
+
# Determine aggregation based on column name patterns
|
| 770 |
+
if any(word in col_upper for word in ['QUANTITY', 'QTY', 'COUNT', 'SOLD']):
|
| 771 |
+
return 'MEASURE', 'SUM'
|
| 772 |
+
elif any(word in col_upper for word in ['PRICE', 'COST', 'REVENUE', 'AMOUNT', 'TOTAL', 'PROFIT', 'DISCOUNT', 'SHIPPING', 'TAX']):
|
| 773 |
+
return 'MEASURE', 'SUM'
|
| 774 |
+
elif any(word in col_upper for word in ['RATING', 'SCORE', 'MARGIN', 'PERCENT', 'RATE']):
|
| 775 |
+
return 'MEASURE', 'AVERAGE'
|
| 776 |
+
else:
|
| 777 |
+
# Default: numeric = measure with SUM
|
| 778 |
return 'MEASURE', 'SUM'
|
| 779 |
|
| 780 |
+
# Everything else is an attribute (strings, dates, booleans, etc.)
|
| 781 |
return 'ATTRIBUTE', None
|
| 782 |
|
| 783 |
def _build_table_relationships(self, tables: Dict, foreign_keys: List) -> Dict:
|
|
|
|
| 1176 |
|
| 1177 |
def log_progress(message):
|
| 1178 |
"""Helper to log progress both to console and callback"""
|
| 1179 |
+
# ALWAYS print to console FIRST
|
| 1180 |
+
import sys
|
| 1181 |
+
print(f"[ThoughtSpot] {message}", flush=True)
|
| 1182 |
+
sys.stdout.flush() # Force flush
|
| 1183 |
+
|
| 1184 |
+
# Then call callback if provided
|
| 1185 |
if progress_callback:
|
| 1186 |
+
try:
|
| 1187 |
+
progress_callback(message)
|
| 1188 |
+
except Exception as e:
|
| 1189 |
+
print(f"[Warning] Callback error: {e}", flush=True)
|
| 1190 |
|
| 1191 |
try:
|
| 1192 |
+
import time
|
| 1193 |
+
start_time = time.time()
|
| 1194 |
+
|
| 1195 |
# STEP 0: Authenticate first!
|
| 1196 |
+
log_progress("π Auth started...")
|
| 1197 |
if not self.authenticate():
|
| 1198 |
raise Exception("ThoughtSpot authentication failed")
|
| 1199 |
+
auth_time = time.time() - start_time
|
| 1200 |
+
log_progress(f"β
Auth complete ({auth_time:.1f}s)")
|
| 1201 |
|
| 1202 |
# Parse DDL
|
| 1203 |
tables, foreign_keys = self.parse_ddl(ddl)
|
|
|
|
| 1205 |
raise Exception("No tables found in DDL")
|
| 1206 |
|
| 1207 |
# Validate foreign key references before deployment
|
|
|
|
| 1208 |
fk_warnings = self.validate_foreign_key_references(tables)
|
| 1209 |
if fk_warnings:
|
| 1210 |
+
log_progress(f"β οΈ {len(fk_warnings)} FK warning(s) - joins to missing tables will be skipped")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
|
| 1212 |
+
# Step 1: Create connection using EXISTING schema name from Snowflake
|
| 1213 |
+
# Extract base name from schema (e.g., "20251114_173139_AMAZO_SAL" -> use as base)
|
| 1214 |
+
# This ensures ThoughtSpot objects point to the actual Snowflake schema
|
| 1215 |
+
schema_base = schema # Use the actual schema name from Snowflake
|
| 1216 |
+
|
| 1217 |
+
demo_names = {
|
| 1218 |
+
'schema': schema_base,
|
| 1219 |
+
'connection': f"DM{schema_base}_conn",
|
| 1220 |
+
'model': f"DM{schema_base}_model",
|
| 1221 |
+
'base': schema_base
|
| 1222 |
+
}
|
| 1223 |
+
|
| 1224 |
if not connection_name:
|
| 1225 |
connection_name = demo_names['connection']
|
| 1226 |
|
| 1227 |
+
log_progress(f"π Creating connection: {connection_name}...")
|
|
|
|
| 1228 |
|
| 1229 |
# Check if connection already exists first
|
| 1230 |
existing_connection = self.get_connection_by_name(connection_name)
|
| 1231 |
if existing_connection:
|
|
|
|
| 1232 |
connection_guid = existing_connection['header']['id_guid']
|
| 1233 |
connection_fqn = connection_guid
|
| 1234 |
results['connection'] = connection_name
|
| 1235 |
+
log_progress(f"β
Connection ready")
|
| 1236 |
else:
|
| 1237 |
log_progress(f"π Creating new connection: {connection_name}")
|
| 1238 |
# Make connection name unique to avoid duplicates only if creating new
|
|
|
|
| 1312 |
table_relationships = self._build_table_relationships(tables, foreign_keys)
|
| 1313 |
|
| 1314 |
# Step 2: TWO-PHASE TABLE CREATION (to avoid dependency order issues)
|
| 1315 |
+
table_count = len(tables)
|
| 1316 |
+
batch1_start = time.time()
|
| 1317 |
+
log_progress(f"π Batch 1 of 2: Creating {table_count} tables...")
|
| 1318 |
+
|
| 1319 |
+
# PHASE 1: Create all tables WITHOUT joins in ONE batch API call
|
| 1320 |
+
# Build array of all table TMLs
|
| 1321 |
+
table_tmls_batch1 = []
|
| 1322 |
+
table_names_order = [] # Track order for matching response
|
| 1323 |
+
|
| 1324 |
for table_name, columns in tables.items():
|
| 1325 |
+
print(f"[ThoughtSpot] Preparing {table_name.upper()}...", flush=True)
|
|
|
|
| 1326 |
table_tml = self.create_table_tml(table_name, columns, connection_name, database, schema, all_tables=None)
|
| 1327 |
+
table_tmls_batch1.append(table_tml)
|
| 1328 |
+
table_names_order.append(table_name.upper())
|
| 1329 |
+
|
| 1330 |
+
# Send all tables in ONE API call
|
| 1331 |
+
log_progress(f" Sending batch request for {len(table_tmls_batch1)} tables...")
|
| 1332 |
+
response = self.session.post(
|
| 1333 |
+
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
| 1334 |
+
json={
|
| 1335 |
+
"metadata_tmls": table_tmls_batch1,
|
| 1336 |
+
"import_policy": "PARTIAL",
|
| 1337 |
+
"create_new": True
|
| 1338 |
+
}
|
| 1339 |
+
)
|
| 1340 |
|
| 1341 |
+
if response.status_code == 200:
|
| 1342 |
+
result = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1343 |
|
| 1344 |
+
# Handle both response formats (list or dict with 'object' key)
|
| 1345 |
+
if isinstance(result, list):
|
| 1346 |
+
objects = result
|
| 1347 |
+
elif isinstance(result, dict) and 'object' in result:
|
| 1348 |
+
objects = result['object']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1349 |
else:
|
| 1350 |
+
error = f"Batch 1 failed: Unexpected response format: {type(result)}"
|
| 1351 |
log_progress(f" β {error}")
|
| 1352 |
results['errors'].append(error)
|
| 1353 |
+
return results
|
| 1354 |
+
|
| 1355 |
+
# Process each table result
|
| 1356 |
+
for idx, obj in enumerate(objects):
|
| 1357 |
+
table_name = table_names_order[idx] if idx < len(table_names_order) else f"TABLE_{idx}"
|
| 1358 |
+
|
| 1359 |
+
if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1360 |
+
table_guid = obj.get('response', {}).get('header', {}).get('id_guid')
|
| 1361 |
+
print(f"[ThoughtSpot] β
{table_name} created", flush=True)
|
| 1362 |
+
results['tables'].append(table_name)
|
| 1363 |
+
table_guids[table_name] = table_guid
|
| 1364 |
+
else:
|
| 1365 |
+
error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
|
| 1366 |
+
error = f"Table {table_name} failed: {error_msg}"
|
| 1367 |
+
print(f"[ThoughtSpot] β {table_name} failed: {error_msg}", flush=True)
|
| 1368 |
+
results['errors'].append(error)
|
| 1369 |
+
else:
|
| 1370 |
+
error = f"Batch 1 HTTP error: {response.status_code} - {response.text}"
|
| 1371 |
+
log_progress(f" β {error}")
|
| 1372 |
+
results['errors'].append(error)
|
| 1373 |
+
return results
|
| 1374 |
|
| 1375 |
# Check if we created any tables successfully
|
| 1376 |
if not table_guids:
|
| 1377 |
+
log_progress(" β No tables were created successfully in Batch 1")
|
| 1378 |
return results
|
| 1379 |
|
| 1380 |
+
batch1_time = time.time() - batch1_start
|
| 1381 |
+
log_progress(f"β
Batch 1 complete: {len(table_guids)} tables created ({batch1_time:.1f}s)")
|
| 1382 |
|
| 1383 |
+
# PHASE 2: Update tables WITH joins in ONE batch API call
|
| 1384 |
+
batch2_start = time.time()
|
| 1385 |
+
log_progress(f"π Batch 2 of 2: Adding joins to {len(table_guids)} tables...")
|
| 1386 |
+
|
| 1387 |
+
# Build array of all table update TMLs (with joins)
|
| 1388 |
+
table_tmls_batch2 = []
|
| 1389 |
+
table_names_order_batch2 = []
|
| 1390 |
+
|
| 1391 |
for table_name, columns in tables.items():
|
|
|
|
| 1392 |
table_name_upper = table_name.upper()
|
| 1393 |
+
|
| 1394 |
+
# Only add joins if the table was created successfully in Phase 1
|
| 1395 |
if table_name_upper not in table_guids:
|
| 1396 |
+
print(f"[ThoughtSpot] Skipping {table_name_upper} (not created)", flush=True)
|
| 1397 |
continue
|
| 1398 |
|
| 1399 |
# Get the GUID for this table
|
| 1400 |
table_guid = table_guids[table_name_upper]
|
| 1401 |
+
|
| 1402 |
+
print(f"[ThoughtSpot] Preparing joins for {table_name_upper}...", flush=True)
|
| 1403 |
# Create table TML WITH joins_with section AND the table GUID
|
| 1404 |
table_tml = self.create_table_tml(
|
| 1405 |
table_name, columns, connection_name, database, schema,
|
| 1406 |
all_tables=tables, table_guid=table_guid
|
| 1407 |
)
|
| 1408 |
+
table_tmls_batch2.append(table_tml)
|
| 1409 |
+
table_names_order_batch2.append(table_name_upper)
|
| 1410 |
+
|
| 1411 |
+
# Send all table updates in ONE API call
|
| 1412 |
+
if table_tmls_batch2:
|
| 1413 |
+
log_progress(f" Sending batch request to add joins to {len(table_tmls_batch2)} tables...")
|
| 1414 |
response = self.session.post(
|
| 1415 |
f"{self.base_url}/api/rest/2.0/metadata/tml/import",
|
| 1416 |
json={
|
| 1417 |
+
"metadata_tmls": table_tmls_batch2,
|
| 1418 |
"import_policy": "PARTIAL",
|
| 1419 |
+
"create_new": False # Update existing tables
|
| 1420 |
}
|
| 1421 |
)
|
| 1422 |
|
| 1423 |
if response.status_code == 200:
|
| 1424 |
result = response.json()
|
| 1425 |
|
| 1426 |
+
# Handle both response formats
|
| 1427 |
if isinstance(result, list):
|
| 1428 |
objects = result
|
| 1429 |
elif isinstance(result, dict) and 'object' in result:
|
| 1430 |
objects = result['object']
|
| 1431 |
else:
|
|
|
|
| 1432 |
objects = []
|
| 1433 |
|
| 1434 |
+
# Process each result
|
| 1435 |
+
for idx, obj in enumerate(objects):
|
| 1436 |
+
table_name = table_names_order_batch2[idx] if idx < len(table_names_order_batch2) else f"TABLE_{idx}"
|
| 1437 |
+
|
| 1438 |
if obj.get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1439 |
+
print(f"[ThoughtSpot] β
{table_name} joins added", flush=True)
|
| 1440 |
else:
|
| 1441 |
+
error_msg = obj.get('response', {}).get('status', {}).get('error_message', 'Unknown error')
|
| 1442 |
+
print(f"[ThoughtSpot] β οΈ {table_name} joins failed: {error_msg}", flush=True)
|
| 1443 |
+
results['errors'].append(f"Joins for {table_name} failed: {error_msg}")
|
|
|
|
|
|
|
|
|
|
| 1444 |
else:
|
| 1445 |
+
log_progress(f" β οΈ Batch 2 HTTP error: {response.status_code}")
|
| 1446 |
|
| 1447 |
+
batch2_time = time.time() - batch2_start
|
| 1448 |
+
log_progress(f"β
Batch 2 complete: Joins added ({batch2_time:.1f}s)")
|
| 1449 |
actual_constraint_ids = {} # We'll generate these for the model
|
| 1450 |
|
| 1451 |
# Skip separate relationship creation for now
|
|
|
|
| 1453 |
# self.create_relationships_separately(table_relationships, table_guids)
|
| 1454 |
|
| 1455 |
# Step 3: Extract constraint IDs from created tables
|
|
|
|
| 1456 |
table_constraints = {}
|
| 1457 |
|
| 1458 |
for table_name, table_guid in table_guids.items():
|
| 1459 |
+
print(f"[ThoughtSpot] Extracting joins from {table_name}...", flush=True)
|
| 1460 |
|
| 1461 |
# Export table TML to get constraint IDs
|
| 1462 |
export_response = self.session.post(
|
|
|
|
| 1486 |
'constraint_id': constraint_id,
|
| 1487 |
'destination': destination
|
| 1488 |
})
|
|
|
|
|
|
|
|
|
|
| 1489 |
|
| 1490 |
# Step 4: Create model (semantic layer) with constraint references
|
| 1491 |
+
model_start = time.time()
|
|
|
|
| 1492 |
model_name = demo_names['model']
|
| 1493 |
+
log_progress(f"π Creating model: {model_name}...")
|
| 1494 |
|
| 1495 |
# Use the enhanced model creation that includes constraint references
|
| 1496 |
model_tml = self._create_model_with_constraints(tables, foreign_keys, table_guids, table_constraints, model_name, connection_name)
|
|
|
|
| 1522 |
if objects and len(objects) > 0:
|
| 1523 |
if objects[0].get('response', {}).get('status', {}).get('status_code') == 'OK':
|
| 1524 |
model_guid = objects[0].get('response', {}).get('header', {}).get('id_guid')
|
| 1525 |
+
model_time = time.time() - model_start
|
| 1526 |
+
log_progress(f"β
Model created ({model_time:.1f}s)")
|
|
|
|
| 1527 |
results['model'] = model_name
|
| 1528 |
results['model_guid'] = model_guid
|
| 1529 |
|
| 1530 |
# Step 3.5: Enable Spotter on the model via API
|
|
|
|
| 1531 |
try:
|
| 1532 |
enable_response = self.session.post(
|
| 1533 |
f"{self.base_url}/api/rest/2.0/metadata/sage/enable",
|
|
|
|
| 1536 |
}
|
| 1537 |
)
|
| 1538 |
if enable_response.status_code == 200:
|
| 1539 |
+
log_progress(f"π€ Spotter enabled")
|
|
|
|
|
|
|
|
|
|
| 1540 |
except Exception as spotter_error:
|
| 1541 |
+
pass # Not critical
|
| 1542 |
|
| 1543 |
# Step 4: Auto-create Liveboard from model
|
| 1544 |
+
lb_start = time.time()
|
| 1545 |
+
log_progress(f"π Creating liveboard...")
|
| 1546 |
try:
|
| 1547 |
from liveboard_creator import create_liveboard_from_model
|
| 1548 |
|
|
|
|
| 1562 |
)
|
| 1563 |
|
| 1564 |
if liveboard_result.get('success'):
|
| 1565 |
+
lb_time = time.time() - lb_start
|
| 1566 |
+
log_progress(f"β
Liveboard created ({lb_time:.1f}s)")
|
|
|
|
| 1567 |
results['liveboard'] = liveboard_result.get('liveboard_name')
|
| 1568 |
results['liveboard_guid'] = liveboard_result.get('liveboard_guid')
|
| 1569 |
else:
|
| 1570 |
error = f"Liveboard creation failed: {liveboard_result.get('error', 'Unknown error')}"
|
|
|
|
| 1571 |
results['errors'].append(error)
|
| 1572 |
except Exception as lb_error:
|
| 1573 |
error = f"Liveboard creation exception: {str(lb_error)}"
|
|
|
|
| 1574 |
results['errors'].append(error)
|
|
|
|
|
|
|
| 1575 |
else:
|
| 1576 |
+
# Extract detailed error information
|
| 1577 |
+
obj_response = objects[0].get('response', {})
|
| 1578 |
+
status = obj_response.get('status', {})
|
| 1579 |
+
error_message = status.get('error_message', 'Unknown error')
|
| 1580 |
+
error_code = status.get('error_code', 'N/A')
|
| 1581 |
+
|
| 1582 |
+
# Get any additional error details
|
| 1583 |
+
full_response = json.dumps(objects[0], indent=2)
|
| 1584 |
+
|
| 1585 |
+
# Build comprehensive error message
|
| 1586 |
+
error = f"Model validation failed: {error_message}"
|
| 1587 |
+
if error_code != 'N/A':
|
| 1588 |
+
error += f" (Error code: {error_code})"
|
| 1589 |
+
|
| 1590 |
+
print(f"π Full model response: {full_response}") # DEBUG: Show full response
|
| 1591 |
print(f" β {error}")
|
| 1592 |
+
log_progress(f" β {error}")
|
| 1593 |
+
log_progress(f" π Full response details:")
|
| 1594 |
+
log_progress(f"{full_response}")
|
| 1595 |
+
|
| 1596 |
results['errors'].append(error)
|
| 1597 |
+
results['errors'].append(f"Full API response: {full_response}")
|
| 1598 |
else:
|
| 1599 |
error = "Model failed: No objects in response"
|
| 1600 |
log_progress(f" β {error}")
|
|
|
|
| 1604 |
results['success'] = len(results['errors']) == 0
|
| 1605 |
|
| 1606 |
except Exception as e:
|
| 1607 |
+
import traceback
|
| 1608 |
error_msg = str(e)
|
| 1609 |
+
full_trace = traceback.format_exc()
|
| 1610 |
+
|
| 1611 |
+
# Log to console with full details
|
| 1612 |
+
print(f"\n{'='*60}")
|
| 1613 |
+
print(f"β DEPLOYMENT EXCEPTION")
|
| 1614 |
+
print(f"{'='*60}")
|
| 1615 |
+
print(f"Error: {error_msg}")
|
| 1616 |
+
print(f"\nFull traceback:")
|
| 1617 |
+
print(full_trace)
|
| 1618 |
+
print(f"{'='*60}\n")
|
| 1619 |
+
|
| 1620 |
+
# Log through callback too
|
| 1621 |
+
log_progress(f"β Deployment failed: {error_msg}")
|
| 1622 |
+
log_progress(f"Traceback: {full_trace}")
|
| 1623 |
+
|
| 1624 |
results['errors'].append(error_msg)
|
| 1625 |
+
results['errors'].append(f"Traceback: {full_trace}")
|
| 1626 |
|
| 1627 |
return results
|
| 1628 |
|