Spaces:

melikakheirieh
/

nl2sql-copilot

Sleeping

App Files Files Community

github-actions[bot] commited on Dec 20, 2025

Commit

0ecc315

1 Parent(s): d2d07a3

Sync from GitHub main @ 793782272bfcd6bdae9a711aabd0ec2b0aef2312

Browse files

Files changed (6) hide show

adapters/db/base.py +2 -2
adapters/db/postgres_adapter.py +5 -3
adapters/db/sqlite_adapter.py +6 -2
nl2sql/errors/codes.py +1 -0
nl2sql/errors/mapper.py +1 -0
nl2sql/executor.py +79 -0

adapters/db/base.py CHANGED Viewed

@@ -13,5 +13,5 @@ class DBAdapter(Protocol):
     def execute(self, sql: str) -> Tuple[List[Tuple[Any, ...]], List[str]]:
         """Execute a SELECT query and return (rows, columns)."""
-    def explain_query_plan(self, sql: str) -> None:
-        """Validate SQL by asking the DB to plan it (must be read-only). Raise on failure."""

     def execute(self, sql: str) -> Tuple[List[Tuple[Any, ...]], List[str]]:
         """Execute a SELECT query and return (rows, columns)."""
+    def explain_query_plan(self, sql: str) -> List[str]:
+        """Return a query plan preview (must be read-only). Raise on failure."""

adapters/db/postgres_adapter.py CHANGED Viewed

@@ -69,7 +69,7 @@ class PostgresAdapter(DBAdapter):
                 cols: List[str] = [d[0] for d in desc if d]
                 return rows, cols
-    def explain_query_plan(self, sql: str) -> None:
         sql_stripped = (sql or "").strip().rstrip(";")
         if not sql_stripped.lower().startswith("select"):
             raise ValueError("Only SELECT statements are allowed.")
@@ -79,5 +79,7 @@ class PostgresAdapter(DBAdapter):
             with conn.cursor() as cur:
                 cur.execute("SET TRANSACTION READ ONLY;")
                 cur.execute(f"EXPLAIN {sql_stripped}")
-                # We don't need the output; if planning fails, it raises.
-                _ = cur.fetchall()

                 cols: List[str] = [d[0] for d in desc if d]
                 return rows, cols
+    def explain_query_plan(self, sql: str) -> List[str]:
         sql_stripped = (sql or "").strip().rstrip(";")
         if not sql_stripped.lower().startswith("select"):
             raise ValueError("Only SELECT statements are allowed.")
             with conn.cursor() as cur:
                 cur.execute("SET TRANSACTION READ ONLY;")
                 cur.execute(f"EXPLAIN {sql_stripped}")
+                rows = cur.fetchall() or []
+                # psycopg returns rows like ("Seq Scan on ...",)
+                plan_lines: List[str] = [str(r[0]) for r in rows if r and len(r) >= 1]
+                return plan_lines

adapters/db/sqlite_adapter.py CHANGED Viewed

@@ -45,7 +45,7 @@ class SQLiteAdapter(DBAdapter):
             log.info("Query executed successfully. Returned %d rows.", len(rows))
             return rows, cols
-    def explain_query_plan(self, sql: str) -> None:
         if not self.path.exists():
             raise FileNotFoundError(f"SQLite DB does not exist: {self.path}")
@@ -60,4 +60,8 @@ class SQLiteAdapter(DBAdapter):
                 conn.execute("PRAGMA query_only = ON;")
             except Exception:
                 pass
-            conn.execute(f"EXPLAIN QUERY PLAN {sql_stripped}")

             log.info("Query executed successfully. Returned %d rows.", len(rows))
             return rows, cols
+    def explain_query_plan(self, sql: str) -> List[str]:
         if not self.path.exists():
             raise FileNotFoundError(f"SQLite DB does not exist: {self.path}")
                 conn.execute("PRAGMA query_only = ON;")
             except Exception:
                 pass
+            cur = conn.execute(f"EXPLAIN QUERY PLAN {sql_stripped}")
+            rows = cur.fetchall() or []
+            # Rows are typically (id, parent, notused, detail)
+            plan_lines: List[str] = [str(r[-1]) for r in rows if r]
+            return plan_lines

nl2sql/errors/codes.py CHANGED Viewed

@@ -14,6 +14,7 @@ class ErrorCode(str, Enum):
     # --- Executor / DB ---
     DB_LOCKED = "DB_LOCKED"
     DB_TIMEOUT = "DB_TIMEOUT"
     LLM_FAILURE = "LLM_FAILURE"
     # --- LLM ---

     # --- Executor / DB ---
     DB_LOCKED = "DB_LOCKED"
     DB_TIMEOUT = "DB_TIMEOUT"
+    EXECUTOR_COST_GUARDRAIL_BLOCKED = "EXECUTOR_COST_GUARDRAIL_BLOCKED"
     LLM_FAILURE = "LLM_FAILURE"
     # --- LLM ---

nl2sql/errors/mapper.py CHANGED Viewed

@@ -8,6 +8,7 @@ ERROR_MAP = {
     ErrorCode.PLAN_SYNTAX_ERROR: (422, False),
     ErrorCode.DB_LOCKED: (503, True),
     ErrorCode.DB_TIMEOUT: (503, True),
     ErrorCode.LLM_TIMEOUT: (503, True),
     ErrorCode.PIPELINE_CRASH: (500, False),
 }

     ErrorCode.PLAN_SYNTAX_ERROR: (422, False),
     ErrorCode.DB_LOCKED: (503, True),
     ErrorCode.DB_TIMEOUT: (503, True),
+    ErrorCode.EXECUTOR_COST_GUARDRAIL_BLOCKED: (422, False),
     ErrorCode.LLM_TIMEOUT: (503, True),
     ErrorCode.PIPELINE_CRASH: (500, False),
 }

nl2sql/executor.py CHANGED Viewed

@@ -1,5 +1,9 @@
 import time
 from nl2sql.types import StageResult, StageTrace
 from adapters.db.base import DBAdapter
@@ -9,8 +13,79 @@ class Executor:
     def __init__(self, db: DBAdapter):
         self.db = db
     def run(self, sql: str) -> StageResult:
         t0 = time.perf_counter()
         try:
             rows, cols = self.db.execute(sql)
             trace = StageTrace(
@@ -20,6 +95,8 @@ class Executor:
                     "row_count": len(rows),
                     "col_count": len(cols),
                     "sql_length": len(sql or ""),
                 },
             )
             return StageResult(
@@ -33,6 +110,8 @@ class Executor:
                     "error": str(e),
                     "error_type": type(e).__name__,
                     "sql_length": len(sql or ""),
                 },
             )
             return StageResult(ok=False, data=None, trace=trace, error=[str(e)])

+import sqlglot
+from sqlglot import exp
 import time
 from nl2sql.types import StageResult, StageTrace
+from nl2sql.errors.codes import ErrorCode
 from adapters.db.base import DBAdapter
     def __init__(self, db: DBAdapter):
         self.db = db
+    def _preflight_cost_check(self, sql: str) -> tuple[bool, str, dict]:
+        """Return (ok, reason, notes). Reason is machine-readable."""
+        sql_stripped = (sql or "").strip().rstrip(";")
+        notes: dict = {"sql_length": len(sql_stripped)}
+        if not sql_stripped:
+            return False, "empty_sql", notes
+        # Parse for cheap structural signals (LIMIT/JOIN/ORDER)
+        try:
+            tree = sqlglot.parse_one(
+                sql_stripped, read=getattr(self.db, "dialect", None) or "sqlite"
+            )
+        except Exception:
+            # Safety should usually catch parse errors; executor treats as reject.
+            return False, "parse_error", notes
+        has_limit = tree.find(exp.Limit) is not None
+        join_count = sum(1 for _ in tree.find_all(exp.Join))
+        has_order = tree.find(exp.Order) is not None
+        has_star = tree.find(exp.Star) is not None
+        notes.update(
+            {"has_limit": has_limit, "join_count": join_count, "has_order": has_order}
+        )
+        # Ask DB for a plan preview
+        try:
+            plan_lines = self.db.explain_query_plan(sql_stripped)
+        except Exception as e:
+            # Planning failures are treated as non-OK but not as cost guardrail.
+            notes.update({"plan_error": str(e), "plan_error_type": type(e).__name__})
+            return True, "plan_unavailable", notes
+        plan_preview = plan_lines[:6] if isinstance(plan_lines, list) else []
+        notes.update({"plan_preview": plan_preview})
+        plan_text = "".join(plan_lines).lower() if isinstance(plan_lines, list) else ""
+        full_scan = ("scan" in plan_text) and ("index" not in plan_text)
+        notes.update({"full_scan": full_scan})
+        # MVP heuristics
+        # Block only the highest-risk pattern for v1: full scan + no LIMIT + SELECT *
+        if full_scan and (not has_limit) and has_star:
+            return False, "full_scan_without_limit", notes
+        # Very high join count is a strong proxy for expensive queries
+        if join_count >= 6:
+            return False, "too_many_joins", notes
+        return True, "ok", notes
     def run(self, sql: str) -> StageResult:
         t0 = time.perf_counter()
+        preflight_ok, preflight_reason, preflight_notes = self._preflight_cost_check(
+            sql
+        )
+        if not preflight_ok:
+            trace = StageTrace(
+                stage=self.name,
+                duration_ms=(time.perf_counter() - t0) * 1000,
+                summary="blocked",
+                notes={
+                    **preflight_notes,
+                    "blocked_reason": preflight_reason,
+                },
+            )
+            return StageResult(
+                ok=False,
+                data=None,
+                trace=trace,
+                error=[preflight_reason],
+                error_code=ErrorCode.EXECUTOR_COST_GUARDRAIL_BLOCKED,
+                retryable=False,
+            )
         try:
             rows, cols = self.db.execute(sql)
             trace = StageTrace(
                     "row_count": len(rows),
                     "col_count": len(cols),
                     "sql_length": len(sql or ""),
+                    "preflight": preflight_reason,
+                    **preflight_notes,
                 },
             )
             return StageResult(
                     "error": str(e),
                     "error_type": type(e).__name__,
                     "sql_length": len(sql or ""),
+                    "preflight": preflight_reason,
+                    **preflight_notes,
                 },
             )
             return StageResult(ok=False, data=None, trace=trace, error=[str(e)])