sqlbot commited on
Commit
33b870b
Β·
2 Parent(s): dab8ee9 efbc9d0

Merge master into main for Hugging Face

Browse files
.dockerignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.log
4
+ *.xlsx
5
+
6
+ .git
7
+ .gitignore
8
+ .gitattributes
9
+
10
+ .env
11
+ .env.*
12
+
13
+ venv/
14
+ .venv/
15
+ .mypy_cache/
16
+ .pytest_cache/
17
+ .ruff_cache/
.gitignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.log
4
+
5
+ # Virtual envs
6
+ venv/
7
+ .venv/
8
+
9
+ # Local environment / secrets
10
+ .env
11
+ .env.*
12
+
13
+ # Data files
14
+ *.xlsx
15
+
16
+ # OS / editor junk
17
+ .DS_Store
18
+ Thumbs.db
19
+ .idea/
20
+ .vscode/
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies (if needed, extend this)
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install Python deps
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application code
15
+ COPY . .
16
+
17
+ ENV PYTHONUNBUFFERED=1
18
+
19
+ # Hugging Face Spaces pass the port via the PORT env var
20
+ ENV PORT=7860
21
+ EXPOSE 7860
22
+
23
+ # Start FastAPI app with uvicorn
24
+ CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT}"]
README.md CHANGED
@@ -1,10 +1,21 @@
1
- ---
2
- title: Sqlbot
3
- emoji: πŸ”₯
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: docker
7
- pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ title: sqlbot
2
+ emoji: 🧠
3
+ colorFrom: blue
4
+ colorTo: green
 
5
  sdk: docker
6
+ app_port: 7860
7
  ---
8
 
9
+ ## sqlbot β€” AI SQL Analyst
10
+
11
+ This Space runs a FastAPI app that lets you ask natural-language questions about your PostgreSQL database and get:
12
+
13
+ - Generated SQL
14
+ - Executed query results
15
+ - Explanations and insights
16
+
17
+ ### Deployment notes
18
+
19
+ - The backend FastAPI app is defined in `app.py`.
20
+ - The Docker image is built from `Dockerfile` and exposes port `7860`.
21
+ - The app connects to PostgreSQL via the `DATABASE_URL` environment variable (configured in the Space settings, e.g., a Neon connection string).
ai/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ai package
ai/groq_setup.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DSPy language model setup for Groq and OpenAI.
2
+
3
+ Provides a factory function to create the right LM based on the
4
+ user-selected provider.
5
+ """
6
+
7
+ import dspy
8
+ import config
9
+
10
+
11
+ def get_lm(provider: str = "groq") -> dspy.LM:
12
+ """Return a configured DSPy language-model instance.
13
+
14
+ Parameters
15
+ ----------
16
+ provider : "groq" | "openai"
17
+ """
18
+ if provider == "openai":
19
+ lm = dspy.LM(
20
+ model=f"openai/{config.OPENAI_MODEL}",
21
+ api_key=config.OPENAI_API_KEY,
22
+ max_tokens=4096,
23
+ temperature=0.2,
24
+ )
25
+ else: # default: groq
26
+ lm = dspy.LM(
27
+ model=f"groq/{config.GROQ_MODEL}",
28
+ api_key=config.GROQ_API_KEY,
29
+ max_tokens=4096,
30
+ temperature=0.2,
31
+ )
32
+
33
+ dspy.configure(lm=lm)
34
+ return lm
ai/pipeline.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main DSPy reasoning pipeline β€” optimized for speed.
2
+
3
+ Reduced from 9 stages to 4 LLM calls in the happy path:
4
+ 1. AnalyzeAndPlan (question understanding + schema analysis + query planning)
5
+ 2. SQLGeneration
6
+ 3. SQLCritiqueAndFix (one pass; only retries on failure)
7
+ 4. InterpretAndInsight (interpretation + insights in one call)
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ import re
13
+ from typing import Any
14
+
15
+ import dspy
16
+
17
+ from ai.groq_setup import get_lm
18
+ from ai.signatures import (
19
+ AnalyzeAndPlan,
20
+ SQLGeneration,
21
+ SQLRepair,
22
+ InterpretAndInsight,
23
+ )
24
+ from ai.validator import validate_sql, check_sql_against_schema
25
+ from db.schema import format_schema
26
+ from db.relationships import format_relationships
27
+ from db.profiler import get_data_profile
28
+ from db.executor import execute_sql
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ MAX_REPAIR_RETRIES = 2
33
+
34
+
35
+ class SQLAnalystPipeline:
36
+ """End-to-end reasoning pipeline: question β†’ SQL β†’ results β†’ insights."""
37
+
38
+ def __init__(self, provider: str = "groq"):
39
+ self.provider = provider
40
+ self._lm = get_lm(provider)
41
+
42
+ # DSPy predict modules
43
+ self.analyze = dspy.Predict(AnalyzeAndPlan)
44
+ self.generate_sql = dspy.Predict(SQLGeneration)
45
+ self.interpret = dspy.Predict(InterpretAndInsight)
46
+ self.repair = dspy.Predict(SQLRepair)
47
+
48
+ # ── public API ──────────────────────────────────────────────────────
49
+
50
+ def run(self, question: str) -> dict[str, Any]:
51
+ """Run the full pipeline and return {sql, data, answer, insights}."""
52
+ schema_str = format_schema()
53
+ rels_str = format_relationships()
54
+ profile_str = get_data_profile()
55
+
56
+ # 1. Analyze & Plan (single LLM call replaces 3 former stages)
57
+ logger.info("Stage 1 β€” Analyze & Plan")
58
+ plan = self.analyze(
59
+ question=question,
60
+ schema_info=schema_str,
61
+ relationships=rels_str,
62
+ data_profile=profile_str,
63
+ )
64
+
65
+ plan_text = (
66
+ f"Intent: {plan.intent}\n"
67
+ f"Tables: {plan.relevant_tables}\n"
68
+ f"Columns: {plan.relevant_columns}\n"
69
+ f"Joins: {plan.join_conditions}\n"
70
+ f"Where: {plan.where_conditions}\n"
71
+ f"Aggregations: {plan.aggregations}\n"
72
+ f"Group By: {plan.group_by}\n"
73
+ f"Order By: {plan.order_by}\n"
74
+ f"Limit: {plan.limit_val}"
75
+ )
76
+
77
+ # 2. SQL Generation
78
+ logger.info("Stage 2 β€” SQL Generation")
79
+ sql_result = self.generate_sql(
80
+ question=question,
81
+ schema_info=schema_str,
82
+ query_plan=plan_text,
83
+ )
84
+ sql = self._clean_sql(sql_result.sql_query)
85
+
86
+ # 3. Code-based schema validation (instant β€” no LLM call)
87
+ logger.info("Stage 3 β€” Schema Validation")
88
+ from db.schema import get_schema
89
+ schema_valid, schema_issues = check_sql_against_schema(sql, get_schema())
90
+ if not schema_valid:
91
+ logger.warning(f"Schema issues detected: {schema_issues}")
92
+ # Try regenerating SQL once with the issues as feedback
93
+ sql_result = self.generate_sql(
94
+ question=question,
95
+ schema_info=schema_str,
96
+ query_plan=plan_text + f"\n\nPREVIOUS SQL HAD ISSUES: {schema_issues}. Fix them.",
97
+ )
98
+ sql = self._clean_sql(sql_result.sql_query)
99
+
100
+ # 4. Safety validation (no LLM call)
101
+ is_safe, reason = validate_sql(sql)
102
+ if not is_safe:
103
+ return {
104
+ "sql": sql,
105
+ "data": [],
106
+ "answer": f"Query rejected: {reason}",
107
+ "insights": "",
108
+ }
109
+
110
+ # 5. SQL Execution + repair loop
111
+ logger.info("Stage 4 β€” Executing SQL")
112
+ exec_result = execute_sql(sql)
113
+
114
+ for attempt in range(MAX_REPAIR_RETRIES):
115
+ if exec_result["success"]:
116
+ break
117
+ logger.warning(f"SQL error (attempt {attempt + 1}): {exec_result['error']}")
118
+ repair_result = self.repair(
119
+ sql_query=sql,
120
+ error_message=exec_result["error"],
121
+ schema_info=schema_str,
122
+ question=question,
123
+ )
124
+ sql = self._clean_sql(repair_result.corrected_sql)
125
+ is_safe, reason = validate_sql(sql)
126
+ if not is_safe:
127
+ return {
128
+ "sql": sql,
129
+ "data": [],
130
+ "answer": f"Repaired query rejected: {reason}",
131
+ "insights": "",
132
+ }
133
+ exec_result = execute_sql(sql)
134
+
135
+ if not exec_result["success"]:
136
+ return {
137
+ "sql": sql,
138
+ "data": [],
139
+ "answer": f"Failed after {MAX_REPAIR_RETRIES} repairs. Error: {exec_result['error']}",
140
+ "insights": "",
141
+ }
142
+
143
+ data = exec_result["data"]
144
+ data_for_llm = data[:50]
145
+ results_json = json.dumps(data_for_llm, default=str)
146
+
147
+ # 6. Interpret & Insight (single LLM call replaces 2 former stages)
148
+ logger.info("Stage 5 β€” Interpret & Insight")
149
+ result = self.interpret(
150
+ question=question,
151
+ sql_query=sql,
152
+ query_results=results_json,
153
+ )
154
+
155
+ return {
156
+ "sql": sql,
157
+ "data": data,
158
+ "answer": result.answer,
159
+ "insights": result.insights,
160
+ }
161
+
162
+ def generate_sql_only(self, question: str) -> str:
163
+ """Run the pipeline up to SQL generation and return just the SQL."""
164
+ schema_str = format_schema()
165
+ rels_str = format_relationships()
166
+ profile_str = get_data_profile()
167
+
168
+ plan = self.analyze(
169
+ question=question,
170
+ schema_info=schema_str,
171
+ relationships=rels_str,
172
+ data_profile=profile_str,
173
+ )
174
+
175
+ plan_text = (
176
+ f"Intent: {plan.intent}\n"
177
+ f"Tables: {plan.relevant_tables}\n"
178
+ f"Columns: {plan.relevant_columns}\n"
179
+ f"Joins: {plan.join_conditions}\n"
180
+ f"Where: {plan.where_conditions}\n"
181
+ f"Aggregations: {plan.aggregations}\n"
182
+ f"Group By: {plan.group_by}\n"
183
+ f"Order By: {plan.order_by}\n"
184
+ f"Limit: {plan.limit_val}"
185
+ )
186
+
187
+ sql_result = self.generate_sql(
188
+ question=question,
189
+ schema_info=schema_str,
190
+ query_plan=plan_text,
191
+ )
192
+ sql = self._clean_sql(sql_result.sql_query)
193
+
194
+ # Code-based schema check
195
+ from db.schema import get_schema
196
+ schema_valid, schema_issues = check_sql_against_schema(sql, get_schema())
197
+ if not schema_valid:
198
+ sql_result = self.generate_sql(
199
+ question=question,
200
+ schema_info=schema_str,
201
+ query_plan=plan_text + f"\n\nPREVIOUS SQL HAD ISSUES: {schema_issues}. Fix them.",
202
+ )
203
+ sql = self._clean_sql(sql_result.sql_query)
204
+
205
+ return sql
206
+
207
+ # ── helpers ─────────────────────────────────────────────────────────
208
+
209
+ @staticmethod
210
+ def _clean_sql(raw: str) -> str:
211
+ """Strip markdown fences, trailing prose, and whitespace from LLM SQL."""
212
+ sql = raw.strip()
213
+
214
+ # 1. Remove ```sql ... ``` wrappers
215
+ if sql.startswith("```"):
216
+ lines = sql.split("\n")
217
+ lines = [l for l in lines if not l.strip().startswith("```")]
218
+ sql = "\n".join(lines).strip()
219
+
220
+ # 2. Extract only the first valid SQL statement
221
+ match = re.search(
222
+ r"((?:SELECT|WITH)\b[\s\S]*?)(;|\n\n(?=[A-Z][a-z])|$)",
223
+ sql,
224
+ re.IGNORECASE,
225
+ )
226
+ if match:
227
+ sql = match.group(1).strip()
228
+
229
+ # 3. Remove trailing lines that look like natural language
230
+ cleaned_lines: list[str] = []
231
+ for line in sql.split("\n"):
232
+ stripped = line.strip()
233
+ if not stripped:
234
+ cleaned_lines.append(line)
235
+ continue
236
+ if re.match(
237
+ r"^(However|Note|This|The|Please|But|Also|In |It |I |Here|Since|Because|Although|Unfortunately)",
238
+ stripped,
239
+ ):
240
+ break
241
+ cleaned_lines.append(line)
242
+
243
+ sql = "\n".join(cleaned_lines).strip()
244
+
245
+ # 4. Remove trailing semicolons
246
+ sql = sql.rstrip(";")
247
+
248
+ return sql
ai/signatures.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DSPy Signature definitions β€” optimized for speed.
2
+
3
+ Consolidated from 8 signatures down to 4 to minimize LLM round-trips:
4
+ 1. AnalyzeAndPlan (combines question understanding + schema analysis + query planning)
5
+ 2. SQLGeneration
6
+ 3. SQLSelfCritique + Repair (combined)
7
+ 4. InterpretAndInsight (combines result interpretation + insight generation)
8
+ """
9
+
10
+ import dspy
11
+
12
+
13
+ # ── 1. Analyze & Plan (combines 3 former stages) ───────────────────────────
14
+
15
+ class AnalyzeAndPlan(dspy.Signature):
16
+ """You are an expert SQL analyst with strong business intelligence skills.
17
+ Given a user question, a database schema, and a DATA PROFILE showing actual
18
+ values in the database, analyze the question and produce a detailed query plan.
19
+
20
+ CRITICAL BUSINESS RULES β€” you MUST follow these:
21
+ 1. When calculating revenue, sales, or monetary metrics, ONLY include
22
+ records with a completed/closed/successful status. Filter out cancelled,
23
+ pending, open, returned, or failed records.
24
+ 2. Look at the data profile to see which status/categorical values exist
25
+ and decide which ones represent VALID/COMPLETED transactions.
26
+ 3. For AOV (Average Order Value), divide total revenue of CLOSED orders
27
+ by the COUNT of CLOSED orders only.
28
+ 4. When a column like 'status' exists, ALWAYS consider whether filtering
29
+ by status is needed for accurate business metrics.
30
+ 5. For inventory/stock metrics, consider item states appropriately.
31
+ 6. When computing counts, totals, or averages, think about which records
32
+ should logically be included vs excluded.
33
+
34
+ Steps:
35
+ 1. Understand the user's question (intent, metrics, entities, filters)
36
+ 2. Review the DATA PROFILE to understand actual values in the database
37
+ 3. Identify which tables and columns are relevant
38
+ 4. Determine appropriate filters (especially status-based) for accurate results
39
+ 5. Produce a complete logical query plan"""
40
+
41
+ question = dspy.InputField(desc="The user's natural-language question")
42
+ schema_info = dspy.InputField(desc="Full database schema with table names, columns, and types")
43
+ relationships = dspy.InputField(desc="Known relationships between tables")
44
+ data_profile = dspy.InputField(desc="Data profile showing actual values: distinct categorical values, numeric ranges, date ranges")
45
+
46
+ intent = dspy.OutputField(desc="What the user wants to know (1 sentence)")
47
+ relevant_tables = dspy.OutputField(desc="Comma-separated list of tables needed")
48
+ relevant_columns = dspy.OutputField(desc="Comma-separated list of table.column pairs needed")
49
+ join_conditions = dspy.OutputField(desc="JOIN conditions to use, or 'none'")
50
+ where_conditions = dspy.OutputField(desc="WHERE conditions including status/state filters for accurate business metrics, or 'none'")
51
+ aggregations = dspy.OutputField(desc="Aggregation functions to apply, or 'none'")
52
+ group_by = dspy.OutputField(desc="GROUP BY columns, or 'none'")
53
+ order_by = dspy.OutputField(desc="ORDER BY clause, or 'none'")
54
+ limit_val = dspy.OutputField(desc="LIMIT value, or 'none'")
55
+
56
+
57
+ # ── 2. SQL Generation ──────────────────────────────────────────────────────
58
+
59
+ class SQLGeneration(dspy.Signature):
60
+ """Generate a valid PostgreSQL SELECT query based on the query plan.
61
+ The query must be syntactically correct and only reference existing
62
+ tables and columns from the schema.
63
+
64
+ BUSINESS RULES:
65
+ - Include status/state filters from the query plan for accurate metrics
66
+ - Use appropriate aggregation functions
67
+ - Ensure the query respects business logic (e.g., only closed orders for revenue)
68
+
69
+ CRITICAL: Output ONLY the raw SQL. No markdown, no explanation, no comments."""
70
+
71
+ question = dspy.InputField(desc="The user's question")
72
+ schema_info = dspy.InputField(desc="Database schema")
73
+ query_plan = dspy.InputField(desc="Detailed logical query plan")
74
+
75
+ sql_query = dspy.OutputField(
76
+ desc="A valid PostgreSQL SELECT query. Output ONLY the raw SQL code. "
77
+ "Do NOT include any explanation, comments, markdown, or text before or after the SQL. "
78
+ "Do NOT wrap in code fences. Just the pure SQL statement."
79
+ )
80
+
81
+
82
+ # ── 3. SQL Self-Critique & Repair (combined) ───────────────────────────────
83
+
84
+ class SQLCritiqueAndFix(dspy.Signature):
85
+ """Evaluate a generated SQL query for correctness against the schema.
86
+ Check that all tables exist, all columns exist, JOINs are valid,
87
+ GROUP BY matches aggregations, and filters reference real columns.
88
+ If any issues are found, output the corrected SQL. If valid, repeat the SQL exactly."""
89
+
90
+ sql_query = dspy.InputField(desc="The generated SQL query")
91
+ schema_info = dspy.InputField(desc="Database schema")
92
+ question = dspy.InputField(desc="The original question")
93
+
94
+ is_valid = dspy.OutputField(desc="yes or no")
95
+ issues = dspy.OutputField(desc="List of issues found, or 'none'")
96
+ corrected_sql = dspy.OutputField(
97
+ desc="Corrected SQL query if issues found, otherwise repeat the original SQL exactly. "
98
+ "Output ONLY raw SQL code with no explanation or text."
99
+ )
100
+
101
+
102
+ # ── 4. Interpret & Insight (combined) ──────────────────────────────────────
103
+
104
+ class InterpretAndInsight(dspy.Signature):
105
+ """Interpret SQL query results for a non-technical user and generate insights.
106
+
107
+ 1. Summarize the main findings in plain English (2-3 sentences)
108
+ 2. Identify patterns, dominant contributors, outliers, and business implications"""
109
+
110
+ question = dspy.InputField(desc="The original question")
111
+ sql_query = dspy.InputField(desc="The SQL query that was executed")
112
+ query_results = dspy.InputField(desc="The query results as JSON")
113
+
114
+ answer = dspy.OutputField(
115
+ desc="A clear, non-technical explanation of the results (2-3 sentences)"
116
+ )
117
+ insights = dspy.OutputField(
118
+ desc="3-5 bullet-point analytical insights about the data"
119
+ )
120
+
121
+
122
+ # ── 5. SQL Repair (for execution errors) ──────────────────────────────────
123
+
124
+ class SQLRepair(dspy.Signature):
125
+ """Given a SQL query that produced a database error, generate a
126
+ corrected query that avoids the error."""
127
+
128
+ sql_query = dspy.InputField(desc="The SQL query that failed")
129
+ error_message = dspy.InputField(desc="The database error message")
130
+ schema_info = dspy.InputField(desc="Database schema")
131
+ question = dspy.InputField(desc="The original user question")
132
+
133
+ corrected_sql = dspy.OutputField(
134
+ desc="A corrected PostgreSQL SELECT query. Output ONLY the raw SQL code. "
135
+ "Do NOT include any explanation, comments, or text before or after the SQL."
136
+ )
ai/validator.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SQL safety validation.
2
+
3
+ Rejects any query that is not a pure SELECT statement.
4
+ """
5
+
6
+ import re
7
+
8
+ _FORBIDDEN_KEYWORDS = [
9
+ r"\bDROP\b",
10
+ r"\bDELETE\b",
11
+ r"\bUPDATE\b",
12
+ r"\bALTER\b",
13
+ r"\bTRUNCATE\b",
14
+ r"\bINSERT\b",
15
+ r"\bCREATE\b",
16
+ r"\bGRANT\b",
17
+ r"\bREVOKE\b",
18
+ r"\bEXEC\b",
19
+ r"\bEXECUTE\b",
20
+ ]
21
+
22
+ _FORBIDDEN_PATTERN = re.compile("|".join(_FORBIDDEN_KEYWORDS), re.IGNORECASE)
23
+
24
+
25
+ def validate_sql(sql: str) -> tuple[bool, str]:
26
+ """Check if a SQL string is safe to execute.
27
+
28
+ Returns
29
+ -------
30
+ (is_safe, reason)
31
+ """
32
+ stripped = sql.strip().rstrip(";").strip()
33
+
34
+ if not stripped:
35
+ return False, "Empty query."
36
+
37
+ # Must start with SELECT or WITH (CTE)
38
+ if not re.match(r"^\s*(SELECT|WITH)\b", stripped, re.IGNORECASE):
39
+ return False, "Only SELECT queries are allowed."
40
+
41
+ # Check for forbidden keywords
42
+ match = _FORBIDDEN_PATTERN.search(stripped)
43
+ if match:
44
+ return False, f"Forbidden keyword detected: {match.group().upper()}"
45
+
46
+ return True, ""
47
+
48
+
49
+ def check_sql_against_schema(sql: str, schema: dict[str, list[dict]]) -> tuple[bool, list[str]]:
50
+ """Programmatically check that tables/columns in SQL exist in the schema.
51
+
52
+ Returns (is_valid, list_of_issues).
53
+ Much faster and more accurate than LLM-based critique.
54
+ """
55
+ issues: list[str] = []
56
+
57
+ # Build lookup sets
58
+ all_tables = {t.lower() for t in schema}
59
+ table_columns: dict[str, set[str]] = {}
60
+ for t, cols in schema.items():
61
+ table_columns[t.lower()] = {c["column_name"].lower() for c in cols}
62
+ all_columns = set()
63
+ for cols in table_columns.values():
64
+ all_columns |= cols
65
+
66
+ sql_upper = sql.upper()
67
+
68
+ # Extract table references (FROM / JOIN)
69
+ table_refs = re.findall(
70
+ r'(?:FROM|JOIN)\s+"?(\w+)"?', sql, re.IGNORECASE
71
+ )
72
+ for tref in table_refs:
73
+ if tref.lower() not in all_tables:
74
+ issues.append(f"Table '{tref}' not found in schema")
75
+
76
+ # Basic check: if GROUP BY is present, verify SELECT has aggregation or is in GROUP BY
77
+ # (lightweight check β€” not full SQL parsing)
78
+ if "GROUP BY" in sql_upper and "SELECT" in sql_upper:
79
+ if not any(fn in sql_upper for fn in ["SUM(", "COUNT(", "AVG(", "MIN(", "MAX("]):
80
+ issues.append("GROUP BY present but no aggregation function found")
81
+
82
+ return (len(issues) == 0, issues)
83
+
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI application β€” AI SQL Analyst API and frontend server."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from fastapi import FastAPI
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import FileResponse
9
+ from fastapi.staticfiles import StaticFiles
10
+ from pydantic import BaseModel
11
+
12
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(message)s")
13
+
14
+ app = FastAPI(title="AI SQL Analyst", version="1.0.0")
15
+
16
+ # ── CORS ────────────────────────────────────────────────────────────────────
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_methods=["*"],
21
+ allow_headers=["*"],
22
+ )
23
+
24
+
25
+ # ── Request / Response schemas ──────────────────────────────────────────────
26
+
27
+ class QuestionRequest(BaseModel):
28
+ question: str
29
+ provider: str = "groq" # "groq" | "openai"
30
+
31
+
32
+ class GenerateSQLResponse(BaseModel):
33
+ sql: str
34
+
35
+
36
+ class ChatResponse(BaseModel):
37
+ sql: str
38
+ data: list
39
+ answer: str
40
+ insights: str
41
+
42
+
43
+ # ── Endpoints ───────────────────────────────────────────────────────────────
44
+
45
+ @app.post("/generate-sql", response_model=GenerateSQLResponse)
46
+ def generate_sql_endpoint(req: QuestionRequest):
47
+ from ai.pipeline import SQLAnalystPipeline
48
+
49
+ pipeline = SQLAnalystPipeline(provider=req.provider)
50
+ sql = pipeline.generate_sql_only(req.question)
51
+ return GenerateSQLResponse(sql=sql)
52
+
53
+
54
+ @app.post("/chat", response_model=ChatResponse)
55
+ def chat_endpoint(req: QuestionRequest):
56
+ from ai.pipeline import SQLAnalystPipeline
57
+
58
+ pipeline = SQLAnalystPipeline(provider=req.provider)
59
+ result = pipeline.run(req.question)
60
+ return ChatResponse(**result)
61
+
62
+
63
+ # ── Schema info endpoint (for debugging / transparency) ─────────────────────
64
+
65
+ @app.get("/schema")
66
+ def schema_endpoint():
67
+ from db.schema import get_schema
68
+ return get_schema()
69
+
70
+
71
+ @app.get("/relationships")
72
+ def relationships_endpoint():
73
+ from db.relationships import discover_relationships
74
+ rels = discover_relationships()
75
+ return [
76
+ {
77
+ "table_a": r.table_a, "column_a": r.column_a,
78
+ "table_b": r.table_b, "column_b": r.column_b,
79
+ "confidence": r.confidence, "source": r.source,
80
+ }
81
+ for r in rels
82
+ ]
83
+
84
+
85
+ # ── Frontend static files ──────────────────────────────────────────────────
86
+
87
+ FRONTEND_DIR = Path(__file__).parent / "frontend"
88
+
89
+ app.mount("/static", StaticFiles(directory=str(FRONTEND_DIR)), name="static")
90
+
91
+
92
+ @app.get("/")
93
+ def serve_frontend():
94
+ return FileResponse(str(FRONTEND_DIR / "index.html"))
95
+
96
+
97
+ # ── Run ─────────────────────────────────────────────────────────────────────
98
+
99
+ if __name__ == "__main__":
100
+ import uvicorn
101
+ uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Central configuration β€” reads .env and exposes all settings."""
2
+
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ # ── Database ────────────────────────────────────────────────────────────────
9
+ DATABASE_URL: str = os.getenv("DATABASE_URL", "postgresql://postgres:universe@localhost:5432/postgres")
10
+
11
+ # ── Groq ────────────────────────────────────────────────────────────────────
12
+ GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
13
+ GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
14
+
15
+ # ── OpenAI ──────────────────────────────────────────────────────────────────
16
+ OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
17
+ OPENAI_MODEL: str = os.getenv("OPENAI_MODEL", "gpt-4o")
data_sync.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Excel β†’ PostgreSQL data synchronization script.
2
+
3
+ Usage:
4
+ python data_sync.py path/to/file.xlsx
5
+ python data_sync.py path/to/folder/ (syncs all .xlsx files)
6
+
7
+ Normalizes column names to lowercase with underscores,
8
+ then upserts each sheet/file into a PostgreSQL table.
9
+ """
10
+
11
+ import os
12
+ import re
13
+ import sys
14
+
15
+ import pandas as pd
16
+ from sqlalchemy import text
17
+
18
+ from db.connection import get_engine
19
+
20
+
21
+ def normalize_column(name: str) -> str:
22
+ """Lowercase, strip, and replace non-alphanumeric chars with underscore."""
23
+ name = str(name).strip().lower()
24
+ name = re.sub(r"[^a-z0-9]+", "_", name)
25
+ name = name.strip("_")
26
+ return name or "unnamed_col"
27
+
28
+
29
+ def sync_dataframe(df: pd.DataFrame, table_name: str) -> None:
30
+ """Write a DataFrame to PostgreSQL, replacing the existing table."""
31
+ engine = get_engine()
32
+
33
+ # Normalize columns
34
+ df.columns = [normalize_column(c) for c in df.columns]
35
+
36
+ # Deduplicate column names
37
+ seen: dict[str, int] = {}
38
+ new_cols: list[str] = []
39
+ for col in df.columns:
40
+ if col in seen:
41
+ seen[col] += 1
42
+ new_cols.append(f"{col}_{seen[col]}")
43
+ else:
44
+ seen[col] = 0
45
+ new_cols.append(col)
46
+ df.columns = new_cols
47
+
48
+ df.to_sql(table_name, engine, if_exists="replace", index=False)
49
+ print(f" βœ“ Table '{table_name}' synced β€” {len(df)} rows, {len(df.columns)} columns")
50
+
51
+
52
+ def sync_excel(filepath: str) -> None:
53
+ """Sync all sheets in an Excel file to separate tables."""
54
+ basename = os.path.splitext(os.path.basename(filepath))[0]
55
+ table_name = normalize_column(basename)
56
+
57
+ xls = pd.ExcelFile(filepath)
58
+ sheets = xls.sheet_names
59
+
60
+ if len(sheets) == 1:
61
+ df = pd.read_excel(filepath, sheet_name=sheets[0])
62
+ sync_dataframe(df, table_name)
63
+ else:
64
+ for sheet in sheets:
65
+ df = pd.read_excel(filepath, sheet_name=sheet)
66
+ sheet_table = f"{table_name}_{normalize_column(sheet)}"
67
+ sync_dataframe(df, sheet_table)
68
+
69
+
70
+ def main() -> None:
71
+ if len(sys.argv) < 2:
72
+ print("Usage: python data_sync.py <path_to_xlsx_or_folder>")
73
+ sys.exit(1)
74
+
75
+ target = sys.argv[1]
76
+
77
+ if os.path.isdir(target):
78
+ files = [
79
+ os.path.join(target, f)
80
+ for f in os.listdir(target)
81
+ if f.endswith((".xlsx", ".xls"))
82
+ ]
83
+ if not files:
84
+ print(f"No Excel files found in {target}")
85
+ sys.exit(1)
86
+ for fp in sorted(files):
87
+ print(f"Syncing: {fp}")
88
+ sync_excel(fp)
89
+ elif os.path.isfile(target):
90
+ print(f"Syncing: {target}")
91
+ sync_excel(target)
92
+ else:
93
+ print(f"Path not found: {target}")
94
+ sys.exit(1)
95
+
96
+ print("\nβœ“ Data sync complete.")
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()
db/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # db package
db/connection.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SQLAlchemy engine and connection helpers."""
2
+
3
+ from sqlalchemy import create_engine
4
+ from sqlalchemy.engine import Engine
5
+
6
+ import config
7
+
8
+ _engine: Engine | None = None
9
+
10
+
11
+ def get_engine() -> Engine:
12
+ """Return a singleton SQLAlchemy engine."""
13
+ global _engine
14
+ if _engine is None:
15
+ _engine = create_engine(config.DATABASE_URL, pool_pre_ping=True)
16
+ return _engine
17
+
18
+
19
+ def get_connection():
20
+ """Return a new database connection (context-manager)."""
21
+ return get_engine().connect()
db/executor.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Safe SQL execution against PostgreSQL.
2
+
3
+ Only SELECT queries are allowed. Results are returned as list[dict].
4
+ Database errors are captured and returned for the AI repair loop.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+ from sqlalchemy import text
10
+
11
+ from db.connection import get_engine
12
+ from ai.validator import validate_sql
13
+
14
+
15
+ def execute_sql(sql: str) -> dict[str, Any]:
16
+ """Execute a SQL query and return results or error.
17
+
18
+ Returns
19
+ -------
20
+ dict with keys:
21
+ success : bool
22
+ data : list[dict] (on success)
23
+ columns : list[str] (on success)
24
+ error : str (on failure)
25
+ """
26
+ # Safety gate
27
+ is_safe, reason = validate_sql(sql)
28
+ if not is_safe:
29
+ return {"success": False, "data": [], "columns": [], "error": reason}
30
+
31
+ try:
32
+ with get_engine().connect() as conn:
33
+ result = conn.execute(text(sql))
34
+ columns = list(result.keys())
35
+ rows = [dict(zip(columns, row)) for row in result.fetchall()]
36
+ return {"success": True, "data": rows, "columns": columns, "error": ""}
37
+ except Exception as exc:
38
+ return {"success": False, "data": [], "columns": [], "error": str(exc)}
db/profiler.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data profiler β€” samples the actual database to give the AI business context.
2
+
3
+ Profiles each table to discover:
4
+ - Categorical columns and their distinct values (status, type, category, etc.)
5
+ - Numeric column ranges (min, max, avg)
6
+ - Date column ranges
7
+ - Sample rows
8
+
9
+ This info is injected into the AI prompts so it can make smart
10
+ business decisions (e.g., filter by status='closed' for revenue).
11
+ """
12
+
13
+ import time
14
+ from typing import Any
15
+
16
+ from sqlalchemy import text
17
+
18
+ from db.connection import get_engine
19
+ from db.schema import get_schema
20
+
21
+ # ── Cache ───────────────────────────────────────────────────────────────────
22
+ _profile_cache: str | None = None
23
+ _profile_ts: float = 0.0
24
+ _PROFILE_TTL: float = 600.0 # 10 minutes
25
+
26
+
27
+ def get_data_profile(force_refresh: bool = False) -> str:
28
+ """Return a formatted data profile string for prompt injection."""
29
+ global _profile_cache, _profile_ts
30
+
31
+ if not force_refresh and _profile_cache and (time.time() - _profile_ts < _PROFILE_TTL):
32
+ return _profile_cache
33
+
34
+ schema = get_schema()
35
+ profile_parts: list[str] = []
36
+
37
+ engine = get_engine()
38
+ with engine.connect() as conn:
39
+ for table, columns in schema.items():
40
+ table_profile = _profile_table(conn, table, columns)
41
+ if table_profile:
42
+ profile_parts.append(table_profile)
43
+
44
+ # Auto-generate business rules
45
+ rules = _generate_business_rules(schema)
46
+ if rules:
47
+ profile_parts.append(rules)
48
+
49
+ _profile_cache = "\n".join(profile_parts)
50
+ _profile_ts = time.time()
51
+ return _profile_cache
52
+
53
+
54
+ def _profile_table(conn, table: str, columns: list[dict]) -> str:
55
+ """Profile a single table."""
56
+ lines: list[str] = [f"TABLE PROFILE: {table}"]
57
+
58
+ # Row count
59
+ try:
60
+ count = conn.execute(text(f'SELECT count(*) FROM "{table}"')).scalar()
61
+ lines.append(f" Total rows: {count}")
62
+ except Exception:
63
+ return ""
64
+
65
+ if count == 0:
66
+ lines.append(" (empty table)")
67
+ return "\n".join(lines)
68
+
69
+ # Profile each column
70
+ for col in columns:
71
+ cname = col["column_name"]
72
+ dtype = col["data_type"]
73
+
74
+ try:
75
+ if _is_categorical(dtype, cname):
76
+ profile = _profile_categorical(conn, table, cname, count)
77
+ if profile:
78
+ lines.append(profile)
79
+ elif _is_numeric(dtype):
80
+ profile = _profile_numeric(conn, table, cname)
81
+ if profile:
82
+ lines.append(profile)
83
+ elif _is_date(dtype):
84
+ profile = _profile_date(conn, table, cname)
85
+ if profile:
86
+ lines.append(profile)
87
+ except Exception:
88
+ continue
89
+
90
+ lines.append("")
91
+ return "\n".join(lines)
92
+
93
+
94
+ def _is_categorical(dtype: str, cname: str) -> bool:
95
+ """Check if a column is likely categorical (status, type, category, etc.)."""
96
+ categorical_types = {"character varying", "text", "varchar", "char", "character"}
97
+ categorical_keywords = {
98
+ "status", "state", "type", "category", "kind", "class",
99
+ "group", "level", "tier", "grade", "priority", "stage",
100
+ "flag", "mode", "role", "region", "country", "city",
101
+ "gender", "channel", "source", "segment", "department",
102
+ }
103
+ if dtype.lower() in categorical_types:
104
+ # Check if the column name suggests it's categorical
105
+ lower_name = cname.lower()
106
+ if any(kw in lower_name for kw in categorical_keywords):
107
+ return True
108
+ # Also profile short text columns
109
+ return True
110
+ return False
111
+
112
+
113
+ def _is_numeric(dtype: str) -> bool:
114
+ numeric_types = {
115
+ "integer", "bigint", "smallint", "numeric", "real",
116
+ "double precision", "decimal", "float", "int",
117
+ }
118
+ return dtype.lower() in numeric_types
119
+
120
+
121
+ def _is_date(dtype: str) -> bool:
122
+ date_types = {
123
+ "date", "timestamp", "timestamp without time zone",
124
+ "timestamp with time zone", "timestamptz",
125
+ }
126
+ return dtype.lower() in date_types
127
+
128
+
129
+ def _profile_categorical(conn, table: str, col: str, total_rows: int) -> str | None:
130
+ """Get distinct values for categorical columns (up to 25 values)."""
131
+ result = conn.execute(text(
132
+ f'SELECT "{col}", count(*) as cnt FROM "{table}" '
133
+ f'WHERE "{col}" IS NOT NULL '
134
+ f'GROUP BY "{col}" ORDER BY cnt DESC LIMIT 25'
135
+ )).fetchall()
136
+
137
+ if not result:
138
+ return None
139
+
140
+ distinct_count = len(result)
141
+
142
+ # Only profile if it's truly categorical (not too many unique values)
143
+ if distinct_count > 20:
144
+ # Check total distinct count
145
+ total_distinct = conn.execute(text(
146
+ f'SELECT count(DISTINCT "{col}") FROM "{table}" WHERE "{col}" IS NOT NULL'
147
+ )).scalar()
148
+ if total_distinct > 50:
149
+ return f" {col}: {total_distinct} distinct values (high cardinality - not categorical)"
150
+
151
+ values_str = ", ".join(
152
+ f"'{r[0]}' ({r[1]} rows)" for r in result[:15]
153
+ )
154
+ return f" {col}: DISTINCT VALUES = [{values_str}]"
155
+
156
+
157
+ def _profile_numeric(conn, table: str, col: str) -> str | None:
158
+ """Get min, max, avg for numeric columns."""
159
+ result = conn.execute(text(
160
+ f'SELECT min("{col}"), max("{col}"), round(avg("{col}")::numeric, 2) '
161
+ f'FROM "{table}" WHERE "{col}" IS NOT NULL'
162
+ )).fetchone()
163
+
164
+ if not result or result[0] is None:
165
+ return None
166
+
167
+ return f" {col}: min={result[0]}, max={result[1]}, avg={result[2]}"
168
+
169
+
170
+ def _profile_date(conn, table: str, col: str) -> str | None:
171
+ """Get date range."""
172
+ result = conn.execute(text(
173
+ f'SELECT min("{col}"), max("{col}") '
174
+ f'FROM "{table}" WHERE "{col}" IS NOT NULL'
175
+ )).fetchone()
176
+
177
+ if not result or result[0] is None:
178
+ return None
179
+
180
+ return f" {col}: from {result[0]} to {result[1]}"
181
+
182
+
183
+ def _generate_business_rules(schema: dict[str, list[dict]]) -> str:
184
+ """Auto-infer business rules from column patterns across all tables."""
185
+ rules: list[str] = [
186
+ "=" * 60,
187
+ "BUSINESS INTELLIGENCE RULES β€” YOU MUST FOLLOW THESE",
188
+ "=" * 60,
189
+ ]
190
+
191
+ # ── Rule 0: Query type awareness
192
+ rules.append("")
193
+ rules.append("RULE 0 β€” KNOW YOUR QUERY TYPE:")
194
+ rules.append(" PRODUCT ATTRIBUTE queries (category, name, weight, details):")
195
+ rules.append(" β†’ Use product/variant catalog tables directly.")
196
+ rules.append(" β†’ No status filter needed.")
197
+ rules.append(" PRODUCT PRICE queries (most expensive, cheapest, price lookup):")
198
+ rules.append(" β†’ Use sales_order_line_pricing.selling_price_per_unit as source of truth.")
199
+ rules.append(" β†’ JOIN to product_master for product_name. GROUP BY to avoid duplicates.")
200
+ rules.append(" TRANSACTIONAL queries (revenue, AOV, order counts, sales trends):")
201
+ rules.append(" β†’ Use sales tables. MUST filter by sales_order.status = 'closed'.")
202
+ rules.append(" β†’ Examples: 'total revenue', 'AOV', 'top customers by spending'")
203
+
204
+ # ── Rule 1: Avoiding duplicates
205
+ rules.append("")
206
+ rules.append("RULE 1 β€” AVOID DUPLICATE ROWS (CRITICAL):")
207
+ rules.append(" When JOINing tables, products may have MULTIPLE variants (different karat, quality, etc.).")
208
+ rules.append(" This causes duplicate product names in results.")
209
+ rules.append(" ALWAYS use one of these to prevent duplicates:")
210
+ rules.append(" - GROUP BY product_id (or product_name) with MAX/MIN/AVG on value columns")
211
+ rules.append(" - SELECT DISTINCT when you only need unique values")
212
+ rules.append(" - Use subqueries with aggregation before joining")
213
+ rules.append(" NEVER return raw joins that produce repeated product names.")
214
+
215
+ # ── Rule 2: Product price lookup
216
+ rules.append("")
217
+ rules.append("RULE 2 β€” PRODUCT PRICE LOOKUP (SOURCE OF TRUTH):")
218
+ rules.append(" The SOURCE OF TRUTH for product prices is the sales_order_line_pricing table.")
219
+ rules.append(" It has 'selling_price_per_unit' which is the actual price per 1 unit of a product.")
220
+ rules.append(" For 'most expensive products', 'cheapest products', 'product price':")
221
+ rules.append(" β†’ Query sales_order_line_pricing and JOIN to product tables for product_name")
222
+ rules.append(" β†’ Use selling_price_per_unit (NOT line_total_price, NOT selling_price from catalog)")
223
+ rules.append(" β†’ GROUP BY product_id, product_name and use MAX(selling_price_per_unit)")
224
+ rules.append(" β†’ Join path: sales_order_line_pricing.product_id = product_master.product_id")
225
+ rules.append(" Do NOT use product_variant_summary.selling_price or variant_sku_table.selling_price")
226
+ rules.append(" β€” those are catalog/list prices, not actual transaction prices.")
227
+ rules.append(" For 'highest revenue products' or 'best selling products':")
228
+ rules.append(" β†’ Use SUM(line_total_price) grouped by product, filtered by status='closed'")
229
+
230
+ # ── Rule 3: Status filtering (only for transactional queries)
231
+ rules.append("")
232
+ rules.append("RULE 3 β€” STATUS FILTERING (TRANSACTIONAL ONLY):")
233
+ rules.append(" The 'status' column on the sales_order table has values: closed, open, cancelled, processing.")
234
+ rules.append(" For revenue, AOV, sales counts: WHERE status = 'closed'")
235
+ rules.append(" For product catalog queries: NO status filter needed")
236
+ rules.append(" IMPORTANT: The 'status' column is ONLY on the sales_order table.")
237
+ rules.append(" Do NOT look for payment_status or status on pricing/line tables β€” it does not exist there.")
238
+
239
+ # ── Rule 4: Unit price vs total price
240
+ rules.append("")
241
+ rules.append("RULE 4 β€” UNIT PRICE vs TOTAL PRICE:")
242
+ rules.append(" line_total_price = selling_price_per_unit Γ— quantity (total for order line)")
243
+ rules.append(" selling_price_per_unit = the actual price of 1 unit of the product")
244
+ rules.append(" base_price_per_unit = cost price of 1 unit before margin")
245
+ rules.append(" NEVER use line_total_price as a product's price β€” it includes quantity.")
246
+ rules.append(" To get a product's price: use selling_price_per_unit or selling_price column")
247
+
248
+ # ── Rule 5: Common metrics formulas
249
+ rules.append("")
250
+ rules.append("RULE 5 β€” METRIC FORMULAS:")
251
+ rules.append(" AOV = SUM(so.total_amount) / COUNT(DISTINCT so.so_id) WHERE so.status='closed'")
252
+ rules.append(" Revenue = SUM(so.total_amount) WHERE so.status='closed'")
253
+ rules.append(" Most Expensive Product = MAX(pvs.selling_price) GROUP BY product_id, product_name")
254
+ rules.append(" Margin % = (selling_price - base_price) / selling_price Γ— 100")
255
+ rules.append(" Order Count = COUNT(DISTINCT so.so_id) WHERE so.status='closed'")
256
+
257
+ # ── Rule 6: Table relationships
258
+ rules.append("")
259
+ rules.append("RULE 6 β€” TABLE JOIN PATHS:")
260
+ rules.append(" Sales chain: sales_order(so_id) β†’ sales_order_line(so_id, sol_id) β†’ sales_order_line_pricing(sol_id)")
261
+ rules.append(" Product chain: product_master(product_id) β†’ product_variant_summary(product_id) β†’ variant_sku_table(variant_sku)")
262
+ rules.append(" Sales ↔ Product: sales_order_line.variant_sku = variant_sku_table.variant_sku")
263
+ rules.append(" Sales ↔ Customer: sales_order.customer_id = customer_master.customer_id")
264
+ rules.append(" Sales ↔ Payment: sales_order.so_id = sales_order_payments.so_id")
265
+
266
+ return "\n".join(rules)
267
+
db/relationships.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Relationship discovery between database tables.
2
+
3
+ Detects relationships via:
4
+ 1. Explicit foreign-key constraints
5
+ 2. Matching column names across tables
6
+ 3. ID-like suffix patterns (*_id, *_key)
7
+ 4. Fuzzy name matching (cust_id β‰ˆ customer_id)
8
+ """
9
+
10
+ from dataclasses import dataclass
11
+ from difflib import SequenceMatcher
12
+
13
+ from sqlalchemy import text
14
+
15
+ from db.connection import get_engine
16
+ from db.schema import get_schema
17
+
18
+
19
+ @dataclass
20
+ class Relationship:
21
+ table_a: str
22
+ column_a: str
23
+ table_b: str
24
+ column_b: str
25
+ confidence: float # 0.0 – 1.0
26
+ source: str # "fk", "exact_match", "id_pattern", "fuzzy"
27
+
28
+
29
+ def discover_relationships() -> list[Relationship]:
30
+ """Return all discovered relationships across public tables."""
31
+ rels: list[Relationship] = []
32
+ rels.extend(_fk_relationships())
33
+ rels.extend(_implicit_relationships())
34
+ return _deduplicate(rels)
35
+
36
+
37
+ # ── Explicit FK relationships ───────────────────────────────────────────────
38
+
39
+ def _fk_relationships() -> list[Relationship]:
40
+ query = text("""
41
+ SELECT
42
+ tc.table_name AS source_table,
43
+ kcu.column_name AS source_column,
44
+ ccu.table_name AS target_table,
45
+ ccu.column_name AS target_column
46
+ FROM information_schema.table_constraints tc
47
+ JOIN information_schema.key_column_usage kcu
48
+ ON tc.constraint_name = kcu.constraint_name
49
+ AND tc.table_schema = kcu.table_schema
50
+ JOIN information_schema.constraint_column_usage ccu
51
+ ON ccu.constraint_name = tc.constraint_name
52
+ AND ccu.table_schema = tc.table_schema
53
+ WHERE tc.constraint_type = 'FOREIGN KEY'
54
+ AND tc.table_schema = 'public'
55
+ """)
56
+
57
+ rels: list[Relationship] = []
58
+ with get_engine().connect() as conn:
59
+ for row in conn.execute(query).fetchall():
60
+ rels.append(Relationship(
61
+ table_a=row[0], column_a=row[1],
62
+ table_b=row[2], column_b=row[3],
63
+ confidence=1.0, source="fk",
64
+ ))
65
+ return rels
66
+
67
+
68
+ # ── Implicit relationships ──────────────────────────────────────────────────
69
+
70
+ def _implicit_relationships() -> list[Relationship]:
71
+ schema = get_schema()
72
+ tables = list(schema.keys())
73
+ rels: list[Relationship] = []
74
+
75
+ for i, t1 in enumerate(tables):
76
+ cols1 = {c["column_name"] for c in schema[t1]}
77
+ for t2 in tables[i + 1:]:
78
+ cols2 = {c["column_name"] for c in schema[t2]}
79
+
80
+ # 1. Exact column-name matches
81
+ common = cols1 & cols2
82
+ for col in common:
83
+ rels.append(Relationship(
84
+ table_a=t1, column_a=col,
85
+ table_b=t2, column_b=col,
86
+ confidence=0.85, source="exact_match",
87
+ ))
88
+
89
+ # 2. ID-pattern matching (e.g. "id" in t1 ↔ "t1_id" in t2)
90
+ for c1 in cols1:
91
+ if not c1.endswith(("_id", "_key", "id")):
92
+ continue
93
+ for c2 in cols2:
94
+ if not c2.endswith(("_id", "_key", "id")):
95
+ continue
96
+ if c1 == c2:
97
+ continue # already caught above
98
+ base1 = c1.rsplit("_", 1)[0] if "_" in c1 else c1
99
+ base2 = c2.rsplit("_", 1)[0] if "_" in c2 else c2
100
+ if base1 == base2:
101
+ rels.append(Relationship(
102
+ table_a=t1, column_a=c1,
103
+ table_b=t2, column_b=c2,
104
+ confidence=0.75, source="id_pattern",
105
+ ))
106
+
107
+ # 3. Fuzzy matching for remaining column pairs
108
+ for c1 in cols1:
109
+ for c2 in cols2:
110
+ if c1 == c2:
111
+ continue
112
+ ratio = SequenceMatcher(None, c1, c2).ratio()
113
+ if ratio >= 0.75:
114
+ rels.append(Relationship(
115
+ table_a=t1, column_a=c1,
116
+ table_b=t2, column_b=c2,
117
+ confidence=round(ratio * 0.8, 2),
118
+ source="fuzzy",
119
+ ))
120
+
121
+ return rels
122
+
123
+
124
+ def _deduplicate(rels: list[Relationship]) -> list[Relationship]:
125
+ """Keep the highest-confidence relationship for each column pair."""
126
+ best: dict[tuple, Relationship] = {}
127
+ for r in rels:
128
+ key = tuple(sorted([(r.table_a, r.column_a), (r.table_b, r.column_b)]))
129
+ if key not in best or r.confidence > best[key].confidence:
130
+ best[key] = r
131
+ return list(best.values())
132
+
133
+
134
+ def format_relationships(rels: list[Relationship] | None = None) -> str:
135
+ """Format relationships as a readable string for prompt injection."""
136
+ if rels is None:
137
+ rels = discover_relationships()
138
+
139
+ if not rels:
140
+ return "No explicit or inferred relationships found between tables."
141
+
142
+ lines: list[str] = []
143
+ for r in sorted(rels, key=lambda x: -x.confidence):
144
+ lines.append(
145
+ f"{r.table_a}.{r.column_a} <-> {r.table_b}.{r.column_b} "
146
+ f"(confidence: {r.confidence:.0%}, source: {r.source})"
147
+ )
148
+ return "\n".join(lines)
db/schema.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Database schema introspection via information_schema.
2
+
3
+ Provides cached access to table/column metadata so the AI pipeline
4
+ always works with the real database structure.
5
+ """
6
+
7
+ import time
8
+ from typing import Any
9
+
10
+ from sqlalchemy import text
11
+
12
+ from db.connection import get_engine
13
+
14
+ # ── Cache ───────────────────────────────────────────────────────────────────
15
+ _schema_cache: dict[str, Any] | None = None
16
+ _cache_ts: float = 0.0
17
+ _CACHE_TTL: float = 300.0 # 5 minutes
18
+
19
+
20
+ def get_schema(force_refresh: bool = False) -> dict[str, list[dict]]:
21
+ """Return {table_name: [{column_name, data_type, is_nullable}, …]}.
22
+
23
+ Results are cached for 5 minutes.
24
+ """
25
+ global _schema_cache, _cache_ts
26
+
27
+ if not force_refresh and _schema_cache and (time.time() - _cache_ts < _CACHE_TTL):
28
+ return _schema_cache
29
+
30
+ query = text("""
31
+ SELECT table_name, column_name, data_type, is_nullable
32
+ FROM information_schema.columns
33
+ WHERE table_schema = 'public'
34
+ ORDER BY table_name, ordinal_position
35
+ """)
36
+
37
+ schema: dict[str, list[dict]] = {}
38
+ with get_engine().connect() as conn:
39
+ rows = conn.execute(query).fetchall()
40
+
41
+ for row in rows:
42
+ table = row[0]
43
+ col_info = {
44
+ "column_name": row[1],
45
+ "data_type": row[2],
46
+ "is_nullable": row[3],
47
+ }
48
+ schema.setdefault(table, []).append(col_info)
49
+
50
+ _schema_cache = schema
51
+ _cache_ts = time.time()
52
+ return schema
53
+
54
+
55
+ def format_schema(schema: dict[str, list[dict]] | None = None) -> str:
56
+ """Format schema as a readable string for prompt injection."""
57
+ if schema is None:
58
+ schema = get_schema()
59
+
60
+ lines: list[str] = []
61
+ for table, columns in schema.items():
62
+ col_strs = []
63
+ for c in columns:
64
+ nullable = "NULL" if c["is_nullable"] == "YES" else "NOT NULL"
65
+ col_strs.append(f" {c['column_name']} {c['data_type']} {nullable}")
66
+ lines.append(f"TABLE: {table}")
67
+ lines.extend(col_strs)
68
+ lines.append("")
69
+
70
+ return "\n".join(lines)
71
+
72
+
73
+ def get_table_names() -> list[str]:
74
+ """Return all public table names."""
75
+ return list(get_schema().keys())
frontend/index.html ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>AI SQL Analyst β€” Intelligent Data Explorer</title>
7
+ <meta name="description" content="Ask natural-language questions about your database and get instant SQL, results, and insights." />
8
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
10
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet" />
11
+ <link rel="stylesheet" href="/static/style.css" />
12
+ </head>
13
+ <body>
14
+ <!-- ── Background particles ─────────────────────────────────────── -->
15
+ <div class="bg-effects">
16
+ <div class="orb orb-1"></div>
17
+ <div class="orb orb-2"></div>
18
+ <div class="orb orb-3"></div>
19
+ </div>
20
+
21
+ <div class="container">
22
+
23
+ <!-- ── Header ───────────────────────────────────────────────── -->
24
+ <header class="header">
25
+ <div class="logo">
26
+ <div class="logo-icon">
27
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
28
+ <path d="M12 2L2 7l10 5 10-5-10-5z"/>
29
+ <path d="M2 17l10 5 10-5"/>
30
+ <path d="M2 12l10 5 10-5"/>
31
+ </svg>
32
+ </div>
33
+ <div>
34
+ <h1>AI SQL Analyst</h1>
35
+ <p class="tagline">Intelligent Data Explorer</p>
36
+ </div>
37
+ </div>
38
+
39
+ <!-- ── Model Switcher ──────────────────────────────────── -->
40
+ <div class="model-switcher">
41
+ <span class="switcher-label">Model</span>
42
+ <div class="switcher-track" id="modelSwitcher">
43
+ <button class="switcher-btn active" data-provider="groq">
44
+ <span class="btn-dot"></span>
45
+ Groq
46
+ </button>
47
+ <button class="switcher-btn" data-provider="openai">
48
+ <span class="btn-dot"></span>
49
+ OpenAI
50
+ </button>
51
+ </div>
52
+ </div>
53
+ </header>
54
+
55
+ <!-- ── Input Section ────────────────────────────────────────── -->
56
+ <section class="input-section">
57
+ <div class="input-card glass">
58
+ <label for="questionInput" class="input-label">
59
+ Ask a question about your data
60
+ </label>
61
+ <div class="input-wrapper">
62
+ <textarea
63
+ id="questionInput"
64
+ rows="3"
65
+ placeholder="e.g. What are the top 10 customers by total revenue?"
66
+ spellcheck="false"
67
+ ></textarea>
68
+ <button id="submitBtn" class="submit-btn" title="Send question">
69
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
70
+ <line x1="22" y1="2" x2="11" y2="13"/>
71
+ <polygon points="22 2 15 22 11 13 2 9 22 2"/>
72
+ </svg>
73
+ </button>
74
+ </div>
75
+ </div>
76
+ </section>
77
+
78
+ <!-- ── Loading ──────────────────────────────────────────────── -->
79
+ <div id="loadingIndicator" class="loading hidden">
80
+ <div class="loading-content">
81
+ <div class="spinner"></div>
82
+ <p class="loading-text">Reasoning about your question…</p>
83
+ <div class="loading-steps">
84
+ <span class="step active">Understanding</span>
85
+ <span class="step-arrow">β†’</span>
86
+ <span class="step">Analyzing Schema</span>
87
+ <span class="step-arrow">β†’</span>
88
+ <span class="step">Planning Query</span>
89
+ <span class="step-arrow">β†’</span>
90
+ <span class="step">Generating SQL</span>
91
+ <span class="step-arrow">β†’</span>
92
+ <span class="step">Executing</span>
93
+ <span class="step-arrow">β†’</span>
94
+ <span class="step">Interpreting</span>
95
+ </div>
96
+ </div>
97
+ </div>
98
+
99
+ <!-- ── Results Section ────────���─────────────────────────────── -->
100
+ <div id="resultsSection" class="results-section hidden">
101
+
102
+ <!-- SQL Card -->
103
+ <div class="result-card glass" id="sqlCard">
104
+ <div class="card-header">
105
+ <div class="card-icon sql-icon">
106
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
107
+ <polyline points="16 18 22 12 16 6"/>
108
+ <polyline points="8 6 2 12 8 18"/>
109
+ </svg>
110
+ </div>
111
+ <h2>Generated SQL</h2>
112
+ <button class="copy-btn" id="copySqlBtn" title="Copy SQL">
113
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
114
+ <rect x="9" y="9" width="13" height="13" rx="2"/>
115
+ <path d="M5 15H4a2 2 0 01-2-2V4a2 2 0 012-2h9a2 2 0 012 2v1"/>
116
+ </svg>
117
+ </button>
118
+ </div>
119
+ <pre class="sql-code"><code id="sqlOutput"></code></pre>
120
+ </div>
121
+
122
+ <!-- Data Card -->
123
+ <div class="result-card glass" id="dataCard">
124
+ <div class="card-header">
125
+ <div class="card-icon data-icon">
126
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
127
+ <ellipse cx="12" cy="5" rx="9" ry="3"/>
128
+ <path d="M21 12c0 1.66-4 3-9 3s-9-1.34-9-3"/>
129
+ <path d="M3 5v14c0 1.66 4 3 9 3s9-1.34 9-3V5"/>
130
+ </svg>
131
+ </div>
132
+ <h2>Query Results</h2>
133
+ <span class="row-count" id="rowCount"></span>
134
+ </div>
135
+ <div class="table-wrapper" id="tableWrapper">
136
+ <!-- Table injected by JS -->
137
+ </div>
138
+ </div>
139
+
140
+ <!-- Answer Card -->
141
+ <div class="result-card glass" id="answerCard">
142
+ <div class="card-header">
143
+ <div class="card-icon answer-icon">
144
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
145
+ <path d="M21 15a2 2 0 01-2 2H7l-4 4V5a2 2 0 012-2h14a2 2 0 012 2z"/>
146
+ </svg>
147
+ </div>
148
+ <h2>Explanation</h2>
149
+ </div>
150
+ <p class="answer-text" id="answerOutput"></p>
151
+ </div>
152
+
153
+ <!-- Insights Card -->
154
+ <div class="result-card glass" id="insightsCard">
155
+ <div class="card-header">
156
+ <div class="card-icon insights-icon">
157
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
158
+ <path d="M12 2a7 7 0 017 7c0 2.38-1.19 4.47-3 5.74V17a1 1 0 01-1 1H9a1 1 0 01-1-1v-2.26C6.19 13.47 5 11.38 5 9a7 7 0 017-7z"/>
159
+ <line x1="9" y1="21" x2="15" y2="21"/>
160
+ </svg>
161
+ </div>
162
+ <h2>Insights</h2>
163
+ </div>
164
+ <div class="insights-text" id="insightsOutput"></div>
165
+ </div>
166
+
167
+ </div>
168
+
169
+ <!-- ── Error ────────────────────────────────────────────────── -->
170
+ <div id="errorSection" class="error-section hidden">
171
+ <div class="result-card glass error-card">
172
+ <div class="card-header">
173
+ <div class="card-icon error-icon">
174
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
175
+ <circle cx="12" cy="12" r="10"/>
176
+ <line x1="15" y1="9" x2="9" y2="15"/>
177
+ <line x1="9" y1="9" x2="15" y2="15"/>
178
+ </svg>
179
+ </div>
180
+ <h2>Error</h2>
181
+ </div>
182
+ <p class="error-text" id="errorOutput"></p>
183
+ </div>
184
+ </div>
185
+
186
+ </div>
187
+
188
+ <script src="/static/script.js"></script>
189
+ </body>
190
+ </html>
frontend/script.js ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ═══════════════════════════════════════════════════════════════════════════
2
+ AI SQL Analyst β€” Frontend Logic
3
+ ═══════════════════════════════════════════════════════════════════════════ */
4
+
5
+ (function () {
6
+ "use strict";
7
+
8
+ // ── DOM refs ──────────────────────────────────────────────────────────
9
+ const questionInput = document.getElementById("questionInput");
10
+ const submitBtn = document.getElementById("submitBtn");
11
+ const loadingIndicator= document.getElementById("loadingIndicator");
12
+ const resultsSection = document.getElementById("resultsSection");
13
+ const errorSection = document.getElementById("errorSection");
14
+
15
+ const sqlOutput = document.getElementById("sqlOutput");
16
+ const tableWrapper = document.getElementById("tableWrapper");
17
+ const rowCount = document.getElementById("rowCount");
18
+ const answerOutput = document.getElementById("answerOutput");
19
+ const insightsOutput = document.getElementById("insightsOutput");
20
+ const errorOutput = document.getElementById("errorOutput");
21
+ const copySqlBtn = document.getElementById("copySqlBtn");
22
+
23
+ const modelSwitcher = document.getElementById("modelSwitcher");
24
+
25
+ let selectedProvider = "groq";
26
+ let loadingStepTimer = null;
27
+
28
+ // ── Model Switcher ───────────────────────────────────────────────────
29
+ modelSwitcher.addEventListener("click", (e) => {
30
+ const btn = e.target.closest(".switcher-btn");
31
+ if (!btn) return;
32
+ modelSwitcher.querySelectorAll(".switcher-btn").forEach(b => b.classList.remove("active"));
33
+ btn.classList.add("active");
34
+ selectedProvider = btn.dataset.provider;
35
+ });
36
+
37
+ // ── Submit ────────────────────────────────────────────────────────────
38
+ submitBtn.addEventListener("click", handleSubmit);
39
+ questionInput.addEventListener("keydown", (e) => {
40
+ if (e.key === "Enter" && !e.shiftKey) {
41
+ e.preventDefault();
42
+ handleSubmit();
43
+ }
44
+ });
45
+
46
+ async function handleSubmit() {
47
+ const question = questionInput.value.trim();
48
+ if (!question) return;
49
+
50
+ showLoading();
51
+ hideResults();
52
+ hideError();
53
+
54
+ try {
55
+ const res = await fetch("/chat", {
56
+ method: "POST",
57
+ headers: { "Content-Type": "application/json" },
58
+ body: JSON.stringify({ question, provider: selectedProvider }),
59
+ });
60
+
61
+ if (!res.ok) {
62
+ const err = await res.json().catch(() => ({ detail: res.statusText }));
63
+ throw new Error(err.detail || `HTTP ${res.status}`);
64
+ }
65
+
66
+ const data = await res.json();
67
+ renderResults(data);
68
+ } catch (err) {
69
+ showError(err.message || "Something went wrong. Please try again.");
70
+ } finally {
71
+ hideLoading();
72
+ }
73
+ }
74
+
75
+ // ── Render Results ───────────────────────────────────────────────────
76
+ function renderResults(data) {
77
+ // SQL
78
+ sqlOutput.textContent = data.sql || "(no SQL generated)";
79
+
80
+ // Data table
81
+ if (data.data && data.data.length > 0) {
82
+ rowCount.textContent = `${data.data.length} row${data.data.length !== 1 ? "s" : ""}`;
83
+ tableWrapper.innerHTML = buildTable(data.data);
84
+ } else {
85
+ rowCount.textContent = "0 rows";
86
+ tableWrapper.innerHTML = '<p style="padding:1rem;color:var(--text-muted);">No data returned.</p>';
87
+ }
88
+
89
+ // Answer
90
+ answerOutput.textContent = data.answer || "";
91
+
92
+ // Insights
93
+ insightsOutput.textContent = data.insights || "";
94
+
95
+ resultsSection.classList.remove("hidden");
96
+ }
97
+
98
+ function buildTable(rows) {
99
+ if (!rows.length) return "";
100
+ const cols = Object.keys(rows[0]);
101
+ // Limit display to 200 rows
102
+ const displayRows = rows.slice(0, 200);
103
+ let html = "<table><thead><tr>";
104
+ cols.forEach(c => { html += `<th>${escapeHtml(c)}</th>`; });
105
+ html += "</tr></thead><tbody>";
106
+ displayRows.forEach(row => {
107
+ html += "<tr>";
108
+ cols.forEach(c => {
109
+ const val = row[c];
110
+ html += `<td>${escapeHtml(val === null ? "NULL" : String(val))}</td>`;
111
+ });
112
+ html += "</tr>";
113
+ });
114
+ html += "</tbody></table>";
115
+ if (rows.length > 200) {
116
+ html += `<p style="padding:0.75rem 1rem;color:var(--text-muted);font-size:0.8rem;">Showing 200 of ${rows.length} rows</p>`;
117
+ }
118
+ return html;
119
+ }
120
+
121
+ // ── Copy SQL ─────────────────────────────────────────────────────────
122
+ copySqlBtn.addEventListener("click", () => {
123
+ const sql = sqlOutput.textContent;
124
+ navigator.clipboard.writeText(sql).then(() => {
125
+ copySqlBtn.style.color = "var(--accent-emerald)";
126
+ setTimeout(() => { copySqlBtn.style.color = ""; }, 1200);
127
+ });
128
+ });
129
+
130
+ // ── Loading animation ────────────────────────────────────────────────
131
+ function showLoading() {
132
+ loadingIndicator.classList.remove("hidden");
133
+ submitBtn.disabled = true;
134
+ animateLoadingSteps();
135
+ }
136
+
137
+ function hideLoading() {
138
+ loadingIndicator.classList.add("hidden");
139
+ submitBtn.disabled = false;
140
+ if (loadingStepTimer) clearInterval(loadingStepTimer);
141
+ }
142
+
143
+ function animateLoadingSteps() {
144
+ const steps = loadingIndicator.querySelectorAll(".step");
145
+ let idx = 0;
146
+ steps.forEach(s => s.classList.remove("active"));
147
+ if (steps.length) steps[0].classList.add("active");
148
+
149
+ loadingStepTimer = setInterval(() => {
150
+ steps.forEach(s => s.classList.remove("active"));
151
+ idx = (idx + 1) % steps.length;
152
+ steps[idx].classList.add("active");
153
+ }, 2000);
154
+ }
155
+
156
+ // ── Visibility helpers ───────────────────────────────────────────────
157
+ function hideResults() { resultsSection.classList.add("hidden"); }
158
+ function hideError() { errorSection.classList.add("hidden"); }
159
+
160
+ function showError(msg) {
161
+ errorOutput.textContent = msg;
162
+ errorSection.classList.remove("hidden");
163
+ }
164
+
165
+ // ── Escape HTML ──────────────────────────────────────────────────────
166
+ function escapeHtml(str) {
167
+ const div = document.createElement("div");
168
+ div.appendChild(document.createTextNode(str));
169
+ return div.innerHTML;
170
+ }
171
+ })();
frontend/style.css ADDED
@@ -0,0 +1,639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ═══════════════════════════════════════════════════════════════════════════
2
+ AI SQL Analyst β€” Premium Light-Mode Design System (Green Accent)
3
+ ═══════════════════════════════════════════════════════════════════════════ */
4
+
5
+ /* ── Reset & Variables ──────────────────────────────────────────────────── */
6
+ *,
7
+ *::before,
8
+ *::after {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ :root {
15
+ /* Palette β€” clean white + green */
16
+ --bg-primary: #ffffff;
17
+ --bg-secondary: #f8faf9;
18
+ --bg-card: rgba(255, 255, 255, 0.85);
19
+ --bg-card-hover: rgba(255, 255, 255, 0.95);
20
+ --border-subtle: rgba(16, 185, 129, 0.18);
21
+ --border-glow: rgba(16, 185, 129, 0.35);
22
+
23
+ --text-primary: #1e293b;
24
+ --text-secondary: #475569;
25
+ --text-muted: #94a3b8;
26
+
27
+ --accent-green: #10b981;
28
+ --accent-green-dark: #059669;
29
+ --accent-green-light: #34d399;
30
+ --accent-emerald: #047857;
31
+ --accent-teal: #0d9488;
32
+ --accent-cyan: #06b6d4;
33
+ --accent-amber: #f59e0b;
34
+ --accent-rose: #f43f5e;
35
+
36
+ --gradient-primary: linear-gradient(135deg, #10b981 0%, #059669 50%, #047857 100%);
37
+ --gradient-subtle: linear-gradient(135deg, rgba(16, 185, 129, 0.06) 0%, rgba(5, 150, 105, 0.04) 100%);
38
+
39
+ /* Typography */
40
+ --font-sans: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
41
+ --font-mono: 'JetBrains Mono', 'Fira Code', monospace;
42
+
43
+ /* Sizes */
44
+ --radius-sm: 8px;
45
+ --radius-md: 12px;
46
+ --radius-lg: 16px;
47
+ --radius-xl: 24px;
48
+
49
+ /* Transitions */
50
+ --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
51
+ --transition-smooth: 300ms cubic-bezier(0.4, 0, 0.2, 1);
52
+ --transition-spring: 500ms cubic-bezier(0.34, 1.56, 0.64, 1);
53
+ }
54
+
55
+ html {
56
+ font-size: 16px;
57
+ -webkit-font-smoothing: antialiased;
58
+ -moz-osx-font-smoothing: grayscale;
59
+ }
60
+
61
+ body {
62
+ font-family: var(--font-sans);
63
+ background: var(--bg-primary);
64
+ color: var(--text-primary);
65
+ min-height: 100vh;
66
+ overflow-x: hidden;
67
+ position: relative;
68
+ }
69
+
70
+ /* ── Background Effects ─────────────────────────────────────────────────── */
71
+
72
+ .bg-effects {
73
+ position: fixed;
74
+ inset: 0;
75
+ pointer-events: none;
76
+ z-index: 0;
77
+ overflow: hidden;
78
+ }
79
+
80
+ .orb {
81
+ position: absolute;
82
+ border-radius: 50%;
83
+ filter: blur(120px);
84
+ opacity: 0.08;
85
+ animation: orbFloat 20s ease-in-out infinite;
86
+ }
87
+
88
+ .orb-1 {
89
+ width: 600px;
90
+ height: 600px;
91
+ background: var(--accent-green);
92
+ top: -200px;
93
+ left: -100px;
94
+ animation-duration: 25s;
95
+ }
96
+
97
+ .orb-2 {
98
+ width: 500px;
99
+ height: 500px;
100
+ background: var(--accent-teal);
101
+ top: 50%;
102
+ right: -150px;
103
+ animation-duration: 30s;
104
+ animation-delay: -5s;
105
+ }
106
+
107
+ .orb-3 {
108
+ width: 400px;
109
+ height: 400px;
110
+ background: var(--accent-green-light);
111
+ bottom: -100px;
112
+ left: 30%;
113
+ animation-duration: 22s;
114
+ animation-delay: -10s;
115
+ }
116
+
117
+ @keyframes orbFloat {
118
+ 0%, 100% { transform: translate(0, 0) scale(1); }
119
+ 25% { transform: translate(40px, -30px) scale(1.05); }
120
+ 50% { transform: translate(-20px, 40px) scale(0.95); }
121
+ 75% { transform: translate(30px, 20px) scale(1.02); }
122
+ }
123
+
124
+ /* ── Container ──────────────────────────────────────────────────────────── */
125
+
126
+ .container {
127
+ position: relative;
128
+ z-index: 1;
129
+ max-width: 1100px;
130
+ margin: 0 auto;
131
+ padding: 2rem 1.5rem 4rem;
132
+ }
133
+
134
+ /* ── Glass Card Base ────────────────────────────────────────────────────── */
135
+
136
+ .glass {
137
+ background: var(--bg-card);
138
+ backdrop-filter: blur(20px);
139
+ -webkit-backdrop-filter: blur(20px);
140
+ border: 1px solid var(--border-subtle);
141
+ border-radius: var(--radius-lg);
142
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04), 0 4px 12px rgba(16, 185, 129, 0.04);
143
+ transition: border-color var(--transition-smooth), box-shadow var(--transition-smooth);
144
+ }
145
+
146
+ .glass:hover {
147
+ border-color: var(--border-glow);
148
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05), 0 8px 24px rgba(16, 185, 129, 0.08);
149
+ }
150
+
151
+ /* ── Header ─────────────────────────────────────────────────────────────── */
152
+
153
+ .header {
154
+ display: flex;
155
+ justify-content: space-between;
156
+ align-items: center;
157
+ margin-bottom: 2.5rem;
158
+ flex-wrap: wrap;
159
+ gap: 1rem;
160
+ }
161
+
162
+ .logo {
163
+ display: flex;
164
+ align-items: center;
165
+ gap: 1rem;
166
+ }
167
+
168
+ .logo-icon {
169
+ width: 48px;
170
+ height: 48px;
171
+ background: var(--gradient-primary);
172
+ border-radius: var(--radius-md);
173
+ display: flex;
174
+ align-items: center;
175
+ justify-content: center;
176
+ color: white;
177
+ box-shadow: 0 4px 16px rgba(16, 185, 129, 0.3);
178
+ }
179
+
180
+ .logo-icon svg {
181
+ width: 26px;
182
+ height: 26px;
183
+ }
184
+
185
+ .header h1 {
186
+ font-size: 1.6rem;
187
+ font-weight: 800;
188
+ background: var(--gradient-primary);
189
+ -webkit-background-clip: text;
190
+ -webkit-text-fill-color: transparent;
191
+ background-clip: text;
192
+ letter-spacing: -0.02em;
193
+ }
194
+
195
+ .tagline {
196
+ font-size: 0.8rem;
197
+ color: var(--text-muted);
198
+ font-weight: 400;
199
+ letter-spacing: 0.04em;
200
+ text-transform: uppercase;
201
+ }
202
+
203
+ /* ── Model Switcher ─────────────────────────────────────────────────────── */
204
+
205
+ .model-switcher {
206
+ display: flex;
207
+ align-items: center;
208
+ gap: 0.75rem;
209
+ }
210
+
211
+ .switcher-label {
212
+ font-size: 0.75rem;
213
+ color: var(--text-muted);
214
+ text-transform: uppercase;
215
+ letter-spacing: 0.08em;
216
+ font-weight: 600;
217
+ }
218
+
219
+ .switcher-track {
220
+ display: flex;
221
+ background: var(--bg-secondary);
222
+ border: 1px solid var(--border-subtle);
223
+ border-radius: var(--radius-xl);
224
+ padding: 3px;
225
+ gap: 2px;
226
+ }
227
+
228
+ .switcher-btn {
229
+ display: flex;
230
+ align-items: center;
231
+ gap: 0.4rem;
232
+ padding: 0.45rem 1rem;
233
+ border: none;
234
+ border-radius: var(--radius-xl);
235
+ background: transparent;
236
+ color: var(--text-muted);
237
+ font-family: var(--font-sans);
238
+ font-size: 0.8rem;
239
+ font-weight: 500;
240
+ cursor: pointer;
241
+ transition: all var(--transition-fast);
242
+ }
243
+
244
+ .switcher-btn:hover {
245
+ color: var(--text-secondary);
246
+ }
247
+
248
+ .switcher-btn.active {
249
+ background: var(--gradient-primary);
250
+ color: white;
251
+ box-shadow: 0 2px 10px rgba(16, 185, 129, 0.3);
252
+ }
253
+
254
+ .btn-dot {
255
+ width: 6px;
256
+ height: 6px;
257
+ border-radius: 50%;
258
+ background: currentColor;
259
+ opacity: 0.6;
260
+ }
261
+
262
+ .switcher-btn.active .btn-dot {
263
+ background: #a7f3d0;
264
+ opacity: 1;
265
+ box-shadow: 0 0 6px #a7f3d0;
266
+ animation: dotPulse 2s ease-in-out infinite;
267
+ }
268
+
269
+ @keyframes dotPulse {
270
+ 0%, 100% { box-shadow: 0 0 4px #a7f3d0; }
271
+ 50% { box-shadow: 0 0 12px #a7f3d0; }
272
+ }
273
+
274
+ /* ── Input Section ──────────────────────────────────────────────────────── */
275
+
276
+ .input-section {
277
+ margin-bottom: 2rem;
278
+ }
279
+
280
+ .input-card {
281
+ padding: 1.5rem;
282
+ }
283
+
284
+ .input-label {
285
+ display: block;
286
+ font-size: 0.85rem;
287
+ font-weight: 600;
288
+ color: var(--text-secondary);
289
+ margin-bottom: 0.75rem;
290
+ letter-spacing: 0.02em;
291
+ }
292
+
293
+ .input-wrapper {
294
+ display: flex;
295
+ gap: 0.75rem;
296
+ align-items: flex-end;
297
+ }
298
+
299
+ textarea {
300
+ flex: 1;
301
+ resize: vertical;
302
+ min-height: 70px;
303
+ max-height: 200px;
304
+ padding: 1rem 1.25rem;
305
+ background: var(--bg-secondary);
306
+ border: 1px solid var(--border-subtle);
307
+ border-radius: var(--radius-md);
308
+ color: var(--text-primary);
309
+ font-family: var(--font-sans);
310
+ font-size: 0.95rem;
311
+ line-height: 1.6;
312
+ transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
313
+ }
314
+
315
+ textarea::placeholder {
316
+ color: var(--text-muted);
317
+ }
318
+
319
+ textarea:focus {
320
+ outline: none;
321
+ border-color: var(--accent-green);
322
+ box-shadow: 0 0 0 3px rgba(16, 185, 129, 0.15);
323
+ }
324
+
325
+ .submit-btn {
326
+ width: 52px;
327
+ height: 52px;
328
+ flex-shrink: 0;
329
+ border: none;
330
+ border-radius: var(--radius-md);
331
+ background: var(--gradient-primary);
332
+ color: white;
333
+ cursor: pointer;
334
+ display: flex;
335
+ align-items: center;
336
+ justify-content: center;
337
+ transition: transform var(--transition-fast), box-shadow var(--transition-fast);
338
+ box-shadow: 0 4px 15px rgba(16, 185, 129, 0.3);
339
+ }
340
+
341
+ .submit-btn:hover {
342
+ transform: translateY(-2px);
343
+ box-shadow: 0 6px 25px rgba(16, 185, 129, 0.4);
344
+ }
345
+
346
+ .submit-btn:active {
347
+ transform: translateY(0);
348
+ }
349
+
350
+ .submit-btn svg {
351
+ width: 22px;
352
+ height: 22px;
353
+ }
354
+
355
+ .submit-btn:disabled {
356
+ opacity: 0.5;
357
+ cursor: not-allowed;
358
+ transform: none;
359
+ }
360
+
361
+ /* ── Loading ────────────────────────────────────────────────────────────── */
362
+
363
+ .loading {
364
+ text-align: center;
365
+ padding: 3rem 1rem;
366
+ }
367
+
368
+ .loading-content {
369
+ display: inline-flex;
370
+ flex-direction: column;
371
+ align-items: center;
372
+ gap: 1.25rem;
373
+ }
374
+
375
+ .spinner {
376
+ width: 48px;
377
+ height: 48px;
378
+ border: 3px solid rgba(16, 185, 129, 0.15);
379
+ border-top-color: var(--accent-green);
380
+ border-right-color: var(--accent-green-dark);
381
+ border-radius: 50%;
382
+ animation: spin 0.8s linear infinite;
383
+ }
384
+
385
+ @keyframes spin {
386
+ to { transform: rotate(360deg); }
387
+ }
388
+
389
+ .loading-text {
390
+ font-size: 1rem;
391
+ font-weight: 500;
392
+ color: var(--text-secondary);
393
+ }
394
+
395
+ .loading-steps {
396
+ display: flex;
397
+ align-items: center;
398
+ gap: 0.35rem;
399
+ flex-wrap: wrap;
400
+ justify-content: center;
401
+ }
402
+
403
+ .loading-steps .step {
404
+ font-size: 0.7rem;
405
+ padding: 0.25rem 0.5rem;
406
+ border-radius: var(--radius-sm);
407
+ background: rgba(16, 185, 129, 0.06);
408
+ color: var(--text-muted);
409
+ font-weight: 500;
410
+ transition: all var(--transition-smooth);
411
+ }
412
+
413
+ .loading-steps .step.active {
414
+ background: rgba(16, 185, 129, 0.15);
415
+ color: var(--accent-green-dark);
416
+ }
417
+
418
+ .loading-steps .step-arrow {
419
+ color: var(--text-muted);
420
+ font-size: 0.65rem;
421
+ opacity: 0.4;
422
+ }
423
+
424
+ /* ── Results Section ────────────────────────────────────────────────────── */
425
+
426
+ .results-section {
427
+ display: flex;
428
+ flex-direction: column;
429
+ gap: 1.25rem;
430
+ animation: fadeInUp 0.5s ease;
431
+ }
432
+
433
+ @keyframes fadeInUp {
434
+ from { opacity: 0; transform: translateY(20px); }
435
+ to { opacity: 1; transform: translateY(0); }
436
+ }
437
+
438
+ .result-card {
439
+ padding: 1.5rem;
440
+ }
441
+
442
+ .card-header {
443
+ display: flex;
444
+ align-items: center;
445
+ gap: 0.75rem;
446
+ margin-bottom: 1rem;
447
+ }
448
+
449
+ .card-icon {
450
+ width: 36px;
451
+ height: 36px;
452
+ border-radius: var(--radius-sm);
453
+ display: flex;
454
+ align-items: center;
455
+ justify-content: center;
456
+ flex-shrink: 0;
457
+ }
458
+
459
+ .card-icon svg {
460
+ width: 18px;
461
+ height: 18px;
462
+ }
463
+
464
+ .sql-icon { background: rgba(16, 185, 129, 0.1); color: var(--accent-green-dark); }
465
+ .data-icon { background: rgba(6, 182, 212, 0.1); color: var(--accent-teal); }
466
+ .answer-icon { background: rgba(16, 185, 129, 0.1); color: var(--accent-green); }
467
+ .insights-icon{ background: rgba(245, 158, 11, 0.1); color: var(--accent-amber); }
468
+ .error-icon { background: rgba(244, 63, 94, 0.1); color: var(--accent-rose); }
469
+
470
+ .card-header h2 {
471
+ font-size: 0.95rem;
472
+ font-weight: 700;
473
+ color: var(--text-primary);
474
+ flex: 1;
475
+ letter-spacing: -0.01em;
476
+ }
477
+
478
+ .copy-btn {
479
+ width: 34px;
480
+ height: 34px;
481
+ border: 1px solid var(--border-subtle);
482
+ background: transparent;
483
+ border-radius: var(--radius-sm);
484
+ color: var(--text-muted);
485
+ cursor: pointer;
486
+ display: flex;
487
+ align-items: center;
488
+ justify-content: center;
489
+ transition: all var(--transition-fast);
490
+ }
491
+
492
+ .copy-btn svg {
493
+ width: 16px;
494
+ height: 16px;
495
+ }
496
+
497
+ .copy-btn:hover {
498
+ border-color: var(--accent-green);
499
+ color: var(--accent-green);
500
+ background: rgba(16, 185, 129, 0.06);
501
+ }
502
+
503
+ .row-count {
504
+ font-size: 0.75rem;
505
+ color: var(--accent-green-dark);
506
+ padding: 0.2rem 0.6rem;
507
+ background: rgba(16, 185, 129, 0.08);
508
+ border-radius: var(--radius-xl);
509
+ font-weight: 500;
510
+ }
511
+
512
+ /* ── SQL Code Block ─────────────────────────────────────────────────────── */
513
+
514
+ .sql-code {
515
+ background: #f0fdf4;
516
+ border: 1px solid rgba(16, 185, 129, 0.15);
517
+ border-radius: var(--radius-md);
518
+ padding: 1.25rem;
519
+ overflow-x: auto;
520
+ font-family: var(--font-mono);
521
+ font-size: 0.85rem;
522
+ line-height: 1.7;
523
+ color: var(--accent-emerald);
524
+ tab-size: 2;
525
+ }
526
+
527
+ /* ── Data Table ─────────────────────────────────────────────────────────── */
528
+
529
+ .table-wrapper {
530
+ overflow-x: auto;
531
+ border: 1px solid var(--border-subtle);
532
+ border-radius: var(--radius-md);
533
+ max-height: 500px;
534
+ overflow-y: auto;
535
+ }
536
+
537
+ table {
538
+ width: 100%;
539
+ border-collapse: collapse;
540
+ font-size: 0.85rem;
541
+ }
542
+
543
+ thead {
544
+ position: sticky;
545
+ top: 0;
546
+ z-index: 2;
547
+ }
548
+
549
+ th {
550
+ background: rgba(16, 185, 129, 0.08);
551
+ color: var(--accent-green-dark);
552
+ font-weight: 600;
553
+ text-transform: uppercase;
554
+ font-size: 0.7rem;
555
+ letter-spacing: 0.06em;
556
+ padding: 0.75rem 1rem;
557
+ text-align: left;
558
+ white-space: nowrap;
559
+ border-bottom: 1px solid var(--border-subtle);
560
+ }
561
+
562
+ td {
563
+ padding: 0.65rem 1rem;
564
+ border-bottom: 1px solid rgba(0, 0, 0, 0.04);
565
+ color: var(--text-secondary);
566
+ white-space: nowrap;
567
+ }
568
+
569
+ tr:hover td {
570
+ background: rgba(16, 185, 129, 0.03);
571
+ color: var(--text-primary);
572
+ }
573
+
574
+ /* ── Answer & Insights ──────────────────────────────────────────────────── */
575
+
576
+ .answer-text {
577
+ font-size: 0.95rem;
578
+ line-height: 1.75;
579
+ color: var(--text-secondary);
580
+ }
581
+
582
+ .insights-text {
583
+ font-size: 0.9rem;
584
+ line-height: 1.75;
585
+ color: var(--text-secondary);
586
+ white-space: pre-wrap;
587
+ }
588
+
589
+ /* ── Error ──────────────────────────────────────────────────────────────── */
590
+
591
+ .error-card {
592
+ border-color: rgba(244, 63, 94, 0.25) !important;
593
+ }
594
+
595
+ .error-text {
596
+ font-size: 0.9rem;
597
+ line-height: 1.7;
598
+ color: var(--accent-rose);
599
+ }
600
+
601
+ /* ── Utilities ──────────────────────────────────────────────────────────── */
602
+
603
+ .hidden {
604
+ display: none !important;
605
+ }
606
+
607
+ /* ── Responsive ─────────────────────────────────────────────────────────── */
608
+
609
+ @media (max-width: 640px) {
610
+ .container {
611
+ padding: 1rem;
612
+ }
613
+
614
+ .header {
615
+ flex-direction: column;
616
+ align-items: flex-start;
617
+ }
618
+
619
+ .header h1 {
620
+ font-size: 1.3rem;
621
+ }
622
+
623
+ .input-wrapper {
624
+ flex-direction: column;
625
+ }
626
+
627
+ .submit-btn {
628
+ width: 100%;
629
+ height: 48px;
630
+ }
631
+
632
+ .loading-steps {
633
+ gap: 0.2rem;
634
+ }
635
+
636
+ .loading-steps .step-arrow {
637
+ display: none;
638
+ }
639
+ }
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ sqlalchemy
4
+ psycopg2-binary
5
+ dspy
6
+ litellm
7
+ groq
8
+ openai
9
+ pandas
10
+ openpyxl
11
+ pydantic
12
+ python-dotenv
space.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ sdk: docker
2
+ app_port: 7860
3
+ license: mit
4
+ title: AI SQL Analyst (sqlbot)