github-actions[bot] commited on
Commit
f0b4004
·
1 Parent(s): 3a2092d

Sync from GitHub main @ 843379ca43f29dcb9d317fdd2bf6d608bcbc8a7e

Browse files
README.md CHANGED
@@ -25,6 +25,35 @@ A live interactive demo is available on Hugging Face Spaces: 👉 [**Try the Dem
25
  <img src="docs/assets/screenshots/demo_list_albums_total_sales.png" width="700">
26
  </p>
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ---
29
 
30
  ## Why this exists
 
25
  <img src="docs/assets/screenshots/demo_list_albums_total_sales.png" width="700">
26
  </p>
27
 
28
+
29
+
30
+ ## Quickstart (Local)
31
+
32
+ ### 1) Install
33
+ ```bash
34
+ make install
35
+ ```
36
+
37
+ ### 2) Run API (Terminal 1)
38
+ ```bash
39
+ make demo-up
40
+ ```
41
+
42
+ ### 3) Smoke (Terminal 2)
43
+ ```bash
44
+ make demo-smoke
45
+ ```
46
+
47
+ ### 4) Observability stack (optional)
48
+ ```bash
49
+ make infra-up
50
+ ```
51
+
52
+ Then (optional Prometheus snapshot):
53
+ ```bash
54
+ make demo-metrics
55
+ ```
56
+
57
  ---
58
 
59
  ## Why this exists
adapters/db/base.py CHANGED
@@ -8,7 +8,10 @@ class DBAdapter(Protocol):
8
  dialect: str
9
 
10
  def preview_schema(self, limit_per_table: int = 0) -> str:
11
- """Generate a readable summary of the database schema with optional sample rows per table."""
 
 
 
12
 
13
  def execute(self, sql: str) -> Tuple[List[Tuple[Any, ...]], List[str]]:
14
  """Execute a SELECT query and return (rows, columns)."""
 
8
  dialect: str
9
 
10
  def preview_schema(self, limit_per_table: int = 0) -> str:
11
+ """Human-friendly schema preview (may include types, bullets, samples)."""
12
+
13
+ def derive_schema_preview(self) -> str:
14
+ """LLM/eval schema preview. Format: table(col1, col2, ...) one per line."""
15
 
16
  def execute(self, sql: str) -> Tuple[List[Tuple[Any, ...]], List[str]]:
17
  """Execute a SELECT query and return (rows, columns)."""
adapters/db/postgres_adapter.py CHANGED
@@ -83,3 +83,27 @@ class PostgresAdapter(DBAdapter):
83
  # psycopg returns rows like ("Seq Scan on ...",)
84
  plan_lines: List[str] = [str(r[0]) for r in rows if r and len(r) >= 1]
85
  return plan_lines
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  # psycopg returns rows like ("Seq Scan on ...",)
84
  plan_lines: List[str] = [str(r[0]) for r in rows if r and len(r) >= 1]
85
  return plan_lines
86
+
87
+ def derive_schema_preview(self) -> str:
88
+ """
89
+ LLM/eval schema preview. One line per table: table(col1, col2, ...)
90
+ """
91
+ with psycopg.connect(self.dsn) as conn:
92
+ with conn.cursor() as cur:
93
+ cur.execute(
94
+ """
95
+ SELECT table_name, column_name
96
+ FROM information_schema.columns
97
+ WHERE table_schema = 'public'
98
+ ORDER BY table_name, ordinal_position;
99
+ """
100
+ )
101
+ rows = cur.fetchall() or []
102
+
103
+ tables: dict[str, list[str]] = {}
104
+ for table, col in rows:
105
+ if table and col:
106
+ tables.setdefault(table, []).append(col)
107
+
108
+ lines = [f"{t}({', '.join(cols)})" for t, cols in tables.items() if cols]
109
+ return "\n".join(lines)
adapters/db/sqlite_adapter.py CHANGED
@@ -21,15 +21,44 @@ class SQLiteAdapter(DBAdapter):
21
  raise FileNotFoundError(f"SQLite DB does not exist: {self.path}")
22
  with sqlite3.connect(f"file:{self.path}?mode=ro", uri=True) as conn:
23
  cur = conn.cursor()
24
- cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
 
 
 
25
  tables = [t[0] for t in cur.fetchall()]
26
  lines = []
27
  for t in tables:
28
- cur.execute(f"PRAGMA table_info({t});")
 
29
  cols = [f"{c[1]}:{c[2]}" for c in cur.fetchall()]
30
  lines.append(f"- {t} ({', '.join(cols)})")
31
  return "\n".join(lines)
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def execute(self, sql: str) -> Tuple[List[Tuple[Any, ...]], List[str]]:
34
  if not self.path.exists():
35
  raise FileNotFoundError(f"SQLite DB does not exist: {self.path}")
 
21
  raise FileNotFoundError(f"SQLite DB does not exist: {self.path}")
22
  with sqlite3.connect(f"file:{self.path}?mode=ro", uri=True) as conn:
23
  cur = conn.cursor()
24
+ cur.execute(
25
+ "SELECT name FROM sqlite_master WHERE type='table' "
26
+ "AND name NOT LIKE 'sqlite_%' ORDER BY name;"
27
+ )
28
  tables = [t[0] for t in cur.fetchall()]
29
  lines = []
30
  for t in tables:
31
+ safe_t = t.replace("'", "''")
32
+ cur.execute(f"PRAGMA table_info('{safe_t}');")
33
  cols = [f"{c[1]}:{c[2]}" for c in cur.fetchall()]
34
  lines.append(f"- {t} ({', '.join(cols)})")
35
  return "\n".join(lines)
36
 
37
+ def derive_schema_preview(self) -> str:
38
+ """Return a stable schema preview string for prompts/evaluation."""
39
+ # Keep this aligned with eval runners: one line per table: Table(col1, col2, ...)
40
+ if not self.path.exists():
41
+ raise FileNotFoundError(f"SQLite DB does not exist: {self.path}")
42
+
43
+ with sqlite3.connect(f"file:{self.path}?mode=ro", uri=True) as conn:
44
+ cur = conn.cursor()
45
+ cur.execute(
46
+ "SELECT name FROM sqlite_master "
47
+ "WHERE type='table' AND name NOT LIKE 'sqlite_%' "
48
+ "ORDER BY name;"
49
+ )
50
+ tables = [t[0] for t in cur.fetchall()]
51
+ lines: List[str] = []
52
+ for t in tables:
53
+ safe_t = t.replace("'", "''")
54
+ cur.execute(f"PRAGMA table_info('{safe_t}');")
55
+ cols = [row[1] for row in cur.fetchall() if row and row[1]]
56
+ if cols:
57
+ lines.append(f"{t}({', '.join(cols)})")
58
+ else:
59
+ lines.append(f"{t}()")
60
+ return "\n".join(lines)
61
+
62
  def execute(self, sql: str) -> Tuple[List[Tuple[Any, ...]], List[str]]:
63
  if not self.path.exists():
64
  raise FileNotFoundError(f"SQLite DB does not exist: {self.path}")
app/services/nl2sql_service.py CHANGED
@@ -1,26 +1,25 @@
1
  from __future__ import annotations
2
 
3
- import sqlite3
4
  from dataclasses import dataclass
5
- from typing import Any, Optional
6
  from pathlib import Path
 
7
 
8
- from nl2sql.pipeline import FinalResult
9
- from nl2sql.pipeline_factory import pipeline_from_config_with_adapter
10
- from adapters.db.sqlite_adapter import SQLiteAdapter
11
  from adapters.db.postgres_adapter import PostgresAdapter
 
12
  from adapters.metrics.prometheus import PrometheusMetrics
13
-
14
  from app import state
15
- from app.settings import Settings
16
  from app.errors import (
17
  AppError,
18
  DbNotFound,
19
- SchemaRequired,
20
- SchemaDeriveError,
21
  PipelineConfigError,
22
  PipelineRunError,
 
 
23
  )
 
 
 
24
 
25
  Adapter = Any # You can replace this with a Protocol later
26
 
@@ -54,48 +53,16 @@ class NL2SQLService:
54
  raise DbNotFound(f"Could not resolve DB for db_id={db_id!r}")
55
  return SQLiteAdapter(path=path)
56
 
57
- default_path = self.settings.default_sqlite_path
 
 
 
 
58
  if not Path(default_path).exists():
59
  raise DbNotFound(f"SQLite database path does not exist: {default_path!r}")
60
 
61
  return SQLiteAdapter(path=default_path)
62
 
63
- def _introspect_sqlite_schema(self, adapter: Adapter) -> str:
64
- """
65
- Build a lightweight textual schema preview for a SQLite database.
66
-
67
- This is a straight port of the previous sqlite3 logic, but contained
68
- inside the service instead of the router.
69
- """
70
- db_path = getattr(adapter, "db_path", None) or getattr(adapter, "path", None)
71
- if not db_path:
72
- raise RuntimeError(
73
- "SQLite adapter must expose a .db_path or .path attribute"
74
- )
75
-
76
- if not Path(db_path).exists():
77
- raise FileNotFoundError(f"SQLite database path does not exist: {db_path}")
78
-
79
- lines: list[str] = []
80
- conn = sqlite3.connect(db_path)
81
- try:
82
- cur = conn.cursor()
83
- cur.execute(
84
- "SELECT name FROM sqlite_master WHERE type='table' "
85
- "AND name NOT LIKE 'sqlite_%' ORDER BY name"
86
- )
87
- tables = [row[0] for row in cur.fetchall()]
88
-
89
- for table in tables:
90
- cur.execute(f"PRAGMA table_info({table})")
91
- cols = [row[1] for row in cur.fetchall()]
92
- if cols:
93
- lines.append(f"{table}({', '.join(cols)})")
94
- finally:
95
- conn.close()
96
-
97
- return "\n".join(lines)
98
-
99
  def get_schema_preview(
100
  self,
101
  db_id: Optional[str],
@@ -117,7 +84,7 @@ class NL2SQLService:
117
 
118
  try:
119
  adapter = self._select_adapter(db_id)
120
- return self._introspect_sqlite_schema(adapter)
121
  except DbNotFound:
122
  raise
123
  except Exception as exc:
 
1
  from __future__ import annotations
2
 
3
+ import os
4
  from dataclasses import dataclass
 
5
  from pathlib import Path
6
+ from typing import Any, Optional
7
 
 
 
 
8
  from adapters.db.postgres_adapter import PostgresAdapter
9
+ from adapters.db.sqlite_adapter import SQLiteAdapter
10
  from adapters.metrics.prometheus import PrometheusMetrics
 
11
  from app import state
 
12
  from app.errors import (
13
  AppError,
14
  DbNotFound,
 
 
15
  PipelineConfigError,
16
  PipelineRunError,
17
+ SchemaDeriveError,
18
+ SchemaRequired,
19
  )
20
+ from app.settings import Settings
21
+ from nl2sql.pipeline import FinalResult
22
+ from nl2sql.pipeline_factory import pipeline_from_config_with_adapter
23
 
24
  Adapter = Any # You can replace this with a Protocol later
25
 
 
53
  raise DbNotFound(f"Could not resolve DB for db_id={db_id!r}")
54
  return SQLiteAdapter(path=path)
55
 
56
+ # Allow tests (and deployments) to override the default DB path at runtime
57
+ # even if Settings was instantiated before env vars were patched.
58
+ default_path = (
59
+ os.getenv("DEFAULT_SQLITE_PATH") or self.settings.default_sqlite_path
60
+ )
61
  if not Path(default_path).exists():
62
  raise DbNotFound(f"SQLite database path does not exist: {default_path!r}")
63
 
64
  return SQLiteAdapter(path=default_path)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def get_schema_preview(
67
  self,
68
  db_id: Optional[str],
 
84
 
85
  try:
86
  adapter = self._select_adapter(db_id)
87
+ return adapter.derive_schema_preview()
88
  except DbNotFound:
89
  raise
90
  except Exception as exc:
scripts/smoke_api.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Portable smoke request for NL2SQL Copilot.
2
+
3
+ - Ensures a demo SQLite DB exists under /tmp/nl2sql_dbs/smoke_demo.sqlite
4
+ - Uploads it to the API
5
+ - Runs a few representative queries
6
+ - Exits non-zero on failure (so Make/CI can trust it)
7
+
8
+ Env:
9
+ API_BASE: base URL of API (default: http://127.0.0.1:8000)
10
+ API_KEY: API key header value (default: dev-key)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import time
18
+ from pathlib import Path
19
+
20
+ import requests
21
+
22
+
23
+ API_BASE = os.getenv("API_BASE", "http://127.0.0.1:8000").rstrip("/")
24
+ API_KEY = os.getenv("API_KEY", "dev-key")
25
+
26
+ DB_DIR = Path("/tmp/nl2sql_dbs")
27
+ DB_PATH = DB_DIR / "smoke_demo.sqlite"
28
+
29
+
30
+ def _ensure_demo_db(path: Path) -> None:
31
+ """Delegate to scripts/smoke_run.py if available; otherwise fail."""
32
+ # Your repo already has scripts/smoke_run.py which creates the DB deterministically.
33
+ from smoke_run import ensure_demo_db # type: ignore
34
+
35
+ ensure_demo_db(path)
36
+
37
+
38
+ def _upload_db_and_get_id(path: Path) -> str:
39
+ url = f"{API_BASE}/api/v1/nl2sql/upload_db"
40
+ headers = {"X-API-Key": API_KEY}
41
+ with path.open("rb") as f:
42
+ resp = requests.post(url, headers=headers, files={"file": f}, timeout=30)
43
+ if resp.status_code != 200:
44
+ raise RuntimeError(f"Upload failed: {resp.status_code} {resp.text[:400]}")
45
+ data = resp.json()
46
+ db_id = data.get("db_id")
47
+ if not db_id:
48
+ raise RuntimeError(f"Invalid upload response: {data}")
49
+ return str(db_id)
50
+
51
+
52
+ def _run_query(db_id: str, query: str) -> dict:
53
+ url = f"{API_BASE}/api/v1/nl2sql"
54
+ headers = {"X-API-Key": API_KEY, "Content-Type": "application/json"}
55
+ payload = {"db_id": db_id, "query": query}
56
+
57
+ t0 = time.time()
58
+ resp = requests.post(url, headers=headers, json=payload, timeout=60)
59
+ dt_ms = int(round((time.time() - t0) * 1000))
60
+
61
+ out: dict = {}
62
+ try:
63
+ out = resp.json()
64
+ except Exception:
65
+ out = {"raw": resp.text}
66
+
67
+ return {"status": resp.status_code, "latency_ms": dt_ms, "body": out}
68
+
69
+
70
+ def main() -> int:
71
+ DB_DIR.mkdir(parents=True, exist_ok=True)
72
+
73
+ try:
74
+ _ensure_demo_db(DB_PATH)
75
+ except Exception as e:
76
+ print(f"❌ Failed to create demo DB: {e}")
77
+ return 2
78
+
79
+ try:
80
+ db_id = _upload_db_and_get_id(DB_PATH)
81
+ except Exception as e:
82
+ print(f"❌ Failed to upload demo DB: {e}")
83
+ return 3
84
+
85
+ checks = [
86
+ ("How many artists are there?", True),
87
+ ("Which customer spent the most based on total invoice amount?", True),
88
+ ("DELETE FROM users;", False), # must be blocked
89
+ ]
90
+
91
+ ok_all = True
92
+ for q, should_succeed in checks:
93
+ r = _run_query(db_id=db_id, query=q)
94
+ status = r["status"]
95
+ body = r["body"]
96
+ print(f"\nQuery: {q}")
97
+ print(f"HTTP {status} | {r['latency_ms']} ms")
98
+ print(json.dumps(body, indent=2)[:800])
99
+
100
+ if should_succeed:
101
+ if status != 200:
102
+ ok_all = False
103
+ else:
104
+ # A safety violation should not be a 200
105
+ if status == 200:
106
+ ok_all = False
107
+
108
+ if ok_all:
109
+ print("\n✅ demo-smoke passed")
110
+ return 0
111
+
112
+ print("\n❌ demo-smoke failed (see output above)")
113
+ return 4
114
+
115
+
116
+ if __name__ == "__main__":
117
+ raise SystemExit(main())
scripts/smoke_metrics.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Portable Prometheus metrics validation for NL2SQL Copilot.
2
+
3
+ This script does NOT require jq.
4
+ It queries Prometheus HTTP API and prints a small snapshot.
5
+
6
+ Pre-req:
7
+ - API is running and you've already exercised it (e.g. via smoke_api.py)
8
+ - Prometheus is reachable
9
+
10
+ Env:
11
+ PROMETHEUS_URL (default: http://127.0.0.1:9090)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ import json
18
+ from typing import Any, Dict
19
+
20
+ import requests
21
+
22
+
23
+ PROM = os.getenv("PROMETHEUS_URL", "http://127.0.0.1:9090").rstrip("/")
24
+
25
+
26
+ def prom_query(expr: str) -> Dict[str, Any]:
27
+ url = f"{PROM}/api/v1/query"
28
+ resp = requests.get(url, params={"query": expr}, timeout=15)
29
+ resp.raise_for_status()
30
+ return resp.json()
31
+
32
+
33
+ def main() -> int:
34
+ queries = [
35
+ "nl2sql:pipeline_success_ratio",
36
+ "nl2sql:stage_p95_ms",
37
+ ]
38
+
39
+ print("📊 Prometheus snapshot")
40
+ print(f"PROMETHEUS_URL={PROM}")
41
+
42
+ ok = True
43
+ for q in queries:
44
+ try:
45
+ out = prom_query(q)
46
+ print(f"\nQuery: {q}")
47
+ print(json.dumps(out, indent=2)[:1200])
48
+ except Exception as e:
49
+ ok = False
50
+ print(f"❌ Prometheus query failed for {q}: {e}")
51
+
52
+ return 0 if ok else 2
53
+
54
+
55
+ if __name__ == "__main__":
56
+ raise SystemExit(main())
scripts/smoke_metrics.sh CHANGED
@@ -1,52 +1,9 @@
 
1
  set -euo pipefail
2
 
3
- API_BASE=${API_BASE:-"http://127.0.0.1:8000"}
4
- API_KEY=${API_KEY:-"dev-key"}
5
- PROM=${PROMETHEUS_URL:-"http://127.0.0.1:9090"}
6
- TMP_DB="/tmp/nl2sql_dbs/smoke_demo.sqlite"
7
 
8
- echo "🧪 Running NL2SQL smoke metrics validation..."
9
- echo "API_BASE=$API_BASE"
10
- echo "PROMETHEUS_URL=$PROM"
11
- echo "TMP_DB=$TMP_DB"
12
 
13
- # --- 1. Make sure the DB exists ---
14
- if [ ! -f "$TMP_DB" ]; then
15
- echo "⚙️ Creating demo database via smoke_run.py..."
16
- python scripts/smoke_run.py || {
17
- echo "❌ smoke_run.py failed to create demo DB."
18
- exit 1
19
- }
20
- else
21
- echo "✅ Found existing DB at $TMP_DB"
22
- fi
23
-
24
- # --- 2. Upload DB and capture db_id ---
25
- echo "⬆️ Uploading demo DB..."
26
- DB_ID=$(curl -s -X POST "$API_BASE/api/v1/nl2sql/upload_db" \
27
- -H "X-API-Key: $API_KEY" \
28
- -F "file=@${TMP_DB}" | jq -r '.db_id')
29
-
30
- if [ "$DB_ID" = "null" ] || [ -z "$DB_ID" ]; then
31
- echo "❌ Failed to upload DB or get db_id."
32
- exit 1
33
- fi
34
- echo "✅ Uploaded DB_ID: $DB_ID"
35
-
36
- # --- 3. Run a few API smoke queries ---
37
- echo "🚀 Sending test queries..."
38
- curl -s -X POST "$API_BASE/api/v1/nl2sql" \
39
- -H "Content-Type: application/json" -H "X-API-Key: $API_KEY" \
40
- -d "{\"db_id\":\"$DB_ID\",\"query\":\"How many artists are there?\"}" | jq .
41
-
42
- curl -s -X POST "$API_BASE/api/v1/nl2sql" \
43
- -H "Content-Type: application/json" -H "X-API-Key: $API_KEY" \
44
- -d "{\"db_id\":\"$DB_ID\",\"query\":\"Which customer spent the most?\"}" | jq .
45
-
46
- # --- 4. Collect metrics snapshot from Prometheus ---
47
- echo "📊 Checking Prometheus metrics..."
48
- curl -s "$PROM/api/v1/query?query=nl2sql:pipeline_success_ratio" | jq .
49
-
50
- curl -s "$PROM/api/v1/query?query=nl2sql:stage_p95_ms" | jq .
51
-
52
- echo "✅ Smoke metrics check completed."
 
1
+ #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
+ # Deprecated: prefer `python scripts/smoke_metrics.py` (portable, no jq required).
 
 
 
5
 
6
+ PROMETHEUS_URL=${PROMETHEUS_URL:-"http://127.0.0.1:9090"}
7
+ export PROMETHEUS_URL
 
 
8
 
9
+ python scripts/smoke_metrics.py