Spaces:

melikakheirieh
/

nl2sql-copilot

Sleeping

App Files Files Community

github-actions[bot] commited on Dec 20, 2025

Commit

4e73462

1 Parent(s): 8e8639a

Sync from GitHub main @ 517739c210f47d8dcf880b0b6b7501a464d6ef4f

Browse files

Files changed (8) hide show

adapters/llm/base.py +10 -4
adapters/llm/openai_provider.py +159 -160
nl2sql/errors/codes.py +1 -0
nl2sql/generator.py +16 -7
nl2sql/pipeline.py +71 -36
nl2sql/planner.py +90 -49
nl2sql/prompts/__init__.py +15 -0
nl2sql/prompts/contracts.py +38 -0

adapters/llm/base.py CHANGED Viewed

@@ -1,14 +1,19 @@
 from __future__ import annotations
-from typing import Tuple, Dict, Any, Protocol
 class LLMProvider(Protocol):
     PROVIDER_ID: str
     def plan(
-        self, *, user_query: str, schema_preview: str
-    ) -> Tuple[str, int, int, float]:
-        """Return (plan_text, token_in, token_out, cost_usd)."""
     def generate_sql(
         self,
@@ -16,6 +21,7 @@ class LLMProvider(Protocol):
         user_query: str,
         schema_preview: str,
         plan_text: str,
         clarify_answers: Dict[str, Any] | None = None,
     ) -> Tuple[str, str, int, int, float]:
         """Return (sql, rationale, token_in, token_out, cost_usd)."""

 from __future__ import annotations
+from typing import Any, Dict, List, Protocol, Tuple
 class LLMProvider(Protocol):
     PROVIDER_ID: str
     def plan(
+        self,
+        *,
+        user_query: str,
+        schema_preview: str,
+        constraints: List[str] | None = None,
+    ) -> Tuple[str, List[str], int, int, float]:
+        """Return (plan_text, used_tables, token_in, token_out, cost_usd)."""
     def generate_sql(
         self,
         user_query: str,
         schema_preview: str,
         plan_text: str,
+        constraints: List[str] | None = None,
         clarify_answers: Dict[str, Any] | None = None,
     ) -> Tuple[str, str, int, int, float]:
         """Return (sql, rationale, token_in, token_out, cost_usd)."""

adapters/llm/openai_provider.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import json
 import os
 import re
-from typing import Any, List, Tuple
 from adapters.llm.base import LLMProvider
 from openai import OpenAI
@@ -35,17 +35,15 @@ def _resolve_api_config() -> tuple[str, str, str]:
 class OpenAIProvider(LLMProvider):
-    """OpenAI LLM provider implementation."""
-    PROVIDER_ID = "openai"
-    def get_last_usage(self) -> dict[str, Any]:
-        """Return metadata of the last LLM call (tokens, cost, sql_length, kind)."""
-        return dict(self._last_usage)
-    def _create_chat_completion(self, **kwargs):
-        """OpenAI SDK seam for stable unit testing."""
-        return self.client.chat.completions.create(**kwargs)
     def __init__(self) -> None:
         """Initialize OpenAI client with config from environment."""
@@ -54,21 +52,114 @@ class OpenAIProvider(LLMProvider):
         os.environ["OPENAI_BASE_URL"] = base_url
         self.client = OpenAI(timeout=120.0)
         self.model = model
-        # last call usage/metadata for tracing
         self._last_usage: dict[str, Any] = {}
-    def plan(
-        self, *, user_query: str, schema_preview: str
-    ) -> Tuple[str, int, int, float]:
-        """Generate a query plan for the SQL generation.
-        Args:
-            user_query: The user's natural language question
-            schema_preview: Database schema information
-        Returns:
-            Tuple of (plan_text, prompt_tokens, completion_tokens, cost)
-        """
         system_prompt = """You are a SQL query planning expert. Analyze the user's question and database schema to create a clear execution plan.
 Your plan should:
@@ -86,6 +177,9 @@ Be concise but thorough."""
 Database Schema:
 {schema_preview}
 Create a step-by-step plan to answer this question with SQL."""
         completion = self._create_chat_completion(
@@ -100,6 +194,9 @@ Create a step-by-step plan to answer this question with SQL."""
         msg = completion.choices[0].message.content or ""
         usage = completion.usage
         if usage:
             prompt_tokens = usage.prompt_tokens
             completion_tokens = usage.completion_tokens
@@ -110,15 +207,15 @@ Create a step-by-step plan to answer this question with SQL."""
                 "completion_tokens": completion_tokens,
                 "cost_usd": cost,
             }
-            return (msg, prompt_tokens, completion_tokens, cost)
-        else:
-            self._last_usage = {
-                "kind": "plan",
-                "prompt_tokens": 0,
-                "completion_tokens": 0,
-                "cost_usd": 0.0,
-            }
-            return (msg, 0, 0, 0.0)
     def generate_sql(
         self,
@@ -126,21 +223,11 @@ Create a step-by-step plan to answer this question with SQL."""
         user_query: str,
         schema_preview: str,
         plan_text: str,
-        clarify_answers: dict[str, Any] | None = None,
     ) -> Tuple[str, str, int, int, float]:
-        """Generate SQL with improved prompt for Spider benchmark.
-        Args:
-            user_query: The user's natural language question
-            schema_preview: Database schema information
-            plan_text: Query execution plan
-            clarify_answers: Optional additional context_engineering
-        Returns:
-            Tuple of (sql, rationale, prompt_tokens, completion_tokens, cost)
-        """
-        system_prompt = """You are an expert SQL query generator for SQLite databases.
-You must follow these STRICT rules to generate clean, simple SQL:
 CRITICAL RULES:
 1. Write the SIMPLEST possible SQL that answers the question
@@ -173,6 +260,9 @@ Database Schema:
 Query Plan:
 {plan_text}
 Remember: Generate the SIMPLEST possible SQL. Avoid table prefixes, aliases, and unnecessary clauses.
 Example of what we want:
@@ -199,7 +289,6 @@ Now generate the SQL for the given question:"""
         content = text.strip() if text else ""
         usage = completion.usage
-        # Parse JSON response
         try:
             parsed = json.loads(content)
         except json.JSONDecodeError:
@@ -208,21 +297,21 @@ Now generate the SQL for the given question:"""
             if start != -1 and end != -1:
                 try:
                     parsed = json.loads(content[start : end + 1])
-                except Exception:
-                    raise ValueError(f"Invalid LLM JSON output: {content[:200]}")
             else:
                 raise ValueError(f"Invalid LLM JSON output: {content[:200]}")
-        sql = (parsed.get("sql") or "").strip()
-        rationale = parsed.get("rationale") or ""
-        # Post-process SQL to ensure simplicity
         sql = self._simplify_sql(sql)
         if not sql:
             raise ValueError("LLM returned empty 'sql'")
         sql_length = len(sql)
         if usage:
             prompt_tokens = usage.prompt_tokens
             completion_tokens = usage.completion_tokens
@@ -233,35 +322,33 @@ Now generate the SQL for the given question:"""
                 "completion_tokens": completion_tokens,
                 "cost_usd": cost,
                 "sql_length": sql_length,
             }
             return (sql, rationale, prompt_tokens, completion_tokens, cost)
-        else:
-            self._last_usage = {
-                "kind": "generate",
-                "prompt_tokens": 0,
-                "completion_tokens": 0,
-                "cost_usd": 0.0,
-                "sql_length": sql_length,
-            }
-            return (sql, rationale, 0, 0, 0.0)
     def _simplify_sql(self, sql: str) -> str:
         """Post-process SQL to remove common unnecessary additions."""
         if not sql:
             return sql
-        # Remove trailing semicolon
         sql = sql.rstrip(";")
-        # Remove unnecessary table prefixes in simple queries
-        # e.g., "singer.name" -> "name" when there's only one table
         if sql.lower().count(" from ") == 1 and " join " not in sql.lower():
             match = re.search(r"\bfrom\s+(\w+)", sql, re.IGNORECASE)
             if match:
                 table = match.group(1)
                 sql = re.sub(rf"\b{table}\.(\w+)\b", r"\1", sql)
-        # Remove unnecessary DISTINCT in COUNT(*)
         sql = re.sub(
             r"count\s*\(\s*distinct\s+\*\s*\)",
             "count(*)",
@@ -269,7 +356,6 @@ Now generate the SQL for the given question:"""
             flags=re.IGNORECASE,
         )
-        # Remove big default LIMITs that weren't requested
         sql = re.sub(
             r"\s+limit\s+(100|1000|10000)\b",
             "",
@@ -286,16 +372,7 @@ Now generate the SQL for the given question:"""
         error_msg: str,
         schema_preview: str,
     ) -> Tuple[str, int, int, float]:
-        """Repair SQL with focus on simplicity.
-        Args:
-            sql: Broken SQL query
-            error_msg: Error message from execution
-            schema_preview: Database schema information
-        Returns:
-            Tuple of (fixed_sql, prompt_tokens, completion_tokens, cost)
-        """
         system_prompt = """You are a SQL repair expert. Fix the given SQL query to resolve the error.
 IMPORTANT RULES:
@@ -332,7 +409,6 @@ Return the corrected SQL (keep it simple):"""
         text = completion.choices[0].message.content
         fixed_sql = text.strip() if text else ""
-        # Clean up accidental code fences
         if fixed_sql.startswith("```sql"):
             fixed_sql = fixed_sql[6:]
         if fixed_sql.startswith("```"):
@@ -344,7 +420,6 @@ Return the corrected SQL (keep it simple):"""
         fixed_sql = self._simplify_sql(fixed_sql)
         usage = completion.usage
         if usage:
             prompt_tokens = usage.prompt_tokens
             completion_tokens = usage.completion_tokens
@@ -357,88 +432,12 @@ Return the corrected SQL (keep it simple):"""
                 "sql_length": len(fixed_sql),
             }
             return (fixed_sql, prompt_tokens, completion_tokens, cost)
-        else:
-            self._last_usage = {
-                "kind": "repair",
-                "prompt_tokens": 0,
-                "completion_tokens": 0,
-                "cost_usd": 0.0,
-                "sql_length": len(fixed_sql),
-            }
-            return (fixed_sql, 0, 0, 0.0)
-    def _estimate_cost(self, usage: Any) -> float:
-        """Estimate cost based on token usage.
-        Args:
-            usage: OpenAI usage object with token counts
-        Returns:
-            Estimated cost in USD
-        """
-        if not usage:
-            return 0.0
-        # Pricing per 1K tokens (adjust based on model)
-        pricing = {
-            "gpt-4": {"input": 0.03, "output": 0.06},
-            "gpt-4-turbo": {"input": 0.01, "output": 0.03},
-            "gpt-4o": {"input": 0.005, "output": 0.015},
-            "gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
-            "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
         }
-        model_pricing = pricing.get(self.model, pricing["gpt-4o-mini"])
-        input_cost = (usage.prompt_tokens / 1000) * model_pricing["input"]
-        output_cost = (usage.completion_tokens / 1000) * model_pricing["output"]
-        return input_cost + output_cost
-    def clarify(
-        self,
-        *,
-        user_query: str,
-        schema_preview: str,
-        questions: List[str],
-    ) -> Tuple[str, int, int, float]:
-        """Clarify ambiguities in the user query.
-        Args:
-            user_query: The user's natural language question
-            schema_preview: Database schema information
-            questions: List of clarification questions
-        Returns:
-            Tuple of (answers, prompt_tokens, completion_tokens, cost)
-        """
-        system_prompt = """You are a helpful assistant that clarifies SQL query requirements.
-Answer the questions clearly and concisely based on the user's query and database schema."""
-        user_prompt = f"""User Query: {user_query}
-Database Schema:
-{schema_preview}
-Please answer these clarification questions:
-{chr(10).join(f"{i + 1}. {q}" for i, q in enumerate(questions))}"""
-        completion = self._create_chat_completion(
-            model=self.model,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ],
-            temperature=0.3,
-        )
-        answers = completion.choices[0].message.content or ""
-        usage = completion.usage
-        if usage:
-            prompt_tokens = usage.prompt_tokens
-            completion_tokens = usage.completion_tokens
-            cost = self._estimate_cost(usage)
-            return (answers, prompt_tokens, completion_tokens, cost)
-        else:
-            return (answers, 0, 0, 0.0)

 import json
 import os
 import re
+from typing import Any, Dict, List, Tuple
 from adapters.llm.base import LLMProvider
 from openai import OpenAI
 class OpenAIProvider(LLMProvider):
+    """OpenAI LLM provider implementation.
+    Goals for this implementation:
+    - Keep prompts and behavior as close as possible to the current repo version.
+    - Align method signatures + return shapes with the updated LLMProvider Protocol.
+    - Provide a lightweight `used_tables` signal for observability/drift checks.
+    """
+    PROVIDER_ID = "openai"
     def __init__(self) -> None:
         """Initialize OpenAI client with config from environment."""
         os.environ["OPENAI_BASE_URL"] = base_url
         self.client = OpenAI(timeout=120.0)
         self.model = model
         self._last_usage: dict[str, Any] = {}
+    def get_last_usage(self) -> dict[str, Any]:
+        """Return metadata of the last LLM call (tokens, cost, sql_length, kind)."""
+        return dict(self._last_usage)
+    def _create_chat_completion(self, **kwargs):
+        """OpenAI SDK seam for stable unit testing."""
+        return self.client.chat.completions.create(**kwargs)
+    # ---------------------------------------------------------------------
+    # Table extraction helpers (best-effort; no heavy parsing).
+    # ---------------------------------------------------------------------
+    def _extract_schema_tables(self, schema_preview: str) -> List[str]:
+        """Extract likely table names from the schema preview string."""
+        if not schema_preview:
+            return []
+        tables: List[str] = []
+        for m in re.finditer(
+            r"(?im)^\s*(?:-\s*)?table\s*[: ]\s*([A-Za-z_][A-Za-z0-9_]*)\b",
+            schema_preview,
+        ):
+            tables.append(m.group(1))
+        for m in re.finditer(
+            r"(?im)^\s*create\s+table\s+`?([A-Za-z_][A-Za-z0-9_]*)`?\b", schema_preview
+        ):
+            tables.append(m.group(1))
+        seen = set()
+        uniq: List[str] = []
+        for t in tables:
+            if t not in seen:
+                uniq.append(t)
+                seen.add(t)
+        return uniq
+    def _extract_tables_from_sql(self, sql: str) -> List[str]:
+        """Very lightweight table extraction from FROM/JOIN clauses."""
+        if not sql:
+            return []
+        pairs = re.findall(
+            r"\bfrom\s+([A-Za-z_][A-Za-z0-9_]*)|\bjoin\s+([A-Za-z_][A-Za-z0-9_]*)",
+            sql,
+            flags=re.IGNORECASE,
+        )
+        out: List[str] = []
+        for t1, t2 in pairs:
+            if t1:
+                out.append(t1)
+            if t2:
+                out.append(t2)
+        seen = set()
+        uniq: List[str] = []
+        for t in out:
+            if t not in seen:
+                uniq.append(t)
+                seen.add(t)
+        return uniq
+    def _extract_used_tables_from_plan(
+        self, plan_text: str, schema_preview: str
+    ) -> List[str]:
+        """Best-effort used table list from plan text by intersecting with schema table names."""
+        candidates = self._extract_schema_tables(schema_preview)
+        if not candidates or not plan_text:
+            return []
+        used: List[str] = []
+        for t in candidates:
+            if re.search(rf"\b{re.escape(t)}\b", plan_text, flags=re.IGNORECASE):
+                used.append(t)
+        return used
+    # ---------------------------------------------------------------------
+    # Cost estimation
+    # ---------------------------------------------------------------------
+    def _estimate_cost(self, usage: Any) -> float:
+        """Estimate cost based on token usage."""
+        if not usage:
+            return 0.0
+        pricing = {
+            "gpt-4": {"input": 0.03, "output": 0.06},
+            "gpt-4-turbo": {"input": 0.01, "output": 0.03},
+            "gpt-4o": {"input": 0.005, "output": 0.015},
+            "gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
+            "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
+        }
+        model_pricing = pricing.get(self.model, pricing["gpt-4o-mini"])
+        input_cost = (usage.prompt_tokens / 1000) * model_pricing["input"]
+        output_cost = (usage.completion_tokens / 1000) * model_pricing["output"]
+        return input_cost + output_cost
+    # ---------------------------------------------------------------------
+    # LLMProvider API
+    # ---------------------------------------------------------------------
+    def plan(
+        self,
+        *,
+        user_query: str,
+        schema_preview: str,
+        constraints: List[str] | None = None,
+    ) -> Tuple[str, List[str], int, int, float]:
+        """Return (plan_text, used_tables, token_in, token_out, cost_usd)."""
         system_prompt = """You are a SQL query planning expert. Analyze the user's question and database schema to create a clear execution plan.
 Your plan should:
 Database Schema:
 {schema_preview}
+Constraints:
+{constraints or []}
 Create a step-by-step plan to answer this question with SQL."""
         completion = self._create_chat_completion(
         msg = completion.choices[0].message.content or ""
         usage = completion.usage
+        plan_text = msg.strip()
+        used_tables = self._extract_used_tables_from_plan(plan_text, schema_preview)
         if usage:
             prompt_tokens = usage.prompt_tokens
             completion_tokens = usage.completion_tokens
                 "completion_tokens": completion_tokens,
                 "cost_usd": cost,
             }
+            return (plan_text, used_tables, prompt_tokens, completion_tokens, cost)
+        self._last_usage = {
+            "kind": "plan",
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "cost_usd": 0.0,
+        }
+        return (plan_text, used_tables, 0, 0, 0.0)
     def generate_sql(
         self,
         user_query: str,
         schema_preview: str,
         plan_text: str,
+        constraints: List[str] | None = None,
+        clarify_answers: Dict[str, Any] | None = None,
     ) -> Tuple[str, str, int, int, float]:
+        """Return (sql, rationale, token_in, token_out, cost_usd)."""
+        system_prompt = """You are an expert SQL generator.
 CRITICAL RULES:
 1. Write the SIMPLEST possible SQL that answers the question
 Query Plan:
 {plan_text}
+Constraints:
+{constraints or []}
 Remember: Generate the SIMPLEST possible SQL. Avoid table prefixes, aliases, and unnecessary clauses.
 Example of what we want:
         content = text.strip() if text else ""
         usage = completion.usage
         try:
             parsed = json.loads(content)
         except json.JSONDecodeError:
             if start != -1 and end != -1:
                 try:
                     parsed = json.loads(content[start : end + 1])
+                except Exception as e:
+                    raise ValueError(f"Invalid LLM JSON output: {content[:200]}") from e
             else:
                 raise ValueError(f"Invalid LLM JSON output: {content[:200]}")
+        sql = str(parsed.get("sql") or "").strip()
+        rationale = str(parsed.get("rationale") or "")
         sql = self._simplify_sql(sql)
         if not sql:
             raise ValueError("LLM returned empty 'sql'")
+        used_tables = self._extract_tables_from_sql(sql)
         sql_length = len(sql)
         if usage:
             prompt_tokens = usage.prompt_tokens
             completion_tokens = usage.completion_tokens
                 "completion_tokens": completion_tokens,
                 "cost_usd": cost,
                 "sql_length": sql_length,
+                "used_tables": used_tables,
             }
             return (sql, rationale, prompt_tokens, completion_tokens, cost)
+        self._last_usage = {
+            "kind": "generate",
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "cost_usd": 0.0,
+            "sql_length": sql_length,
+            "used_tables": used_tables,
+        }
+        return (sql, rationale, 0, 0, 0.0)
     def _simplify_sql(self, sql: str) -> str:
         """Post-process SQL to remove common unnecessary additions."""
         if not sql:
             return sql
         sql = sql.rstrip(";")
         if sql.lower().count(" from ") == 1 and " join " not in sql.lower():
             match = re.search(r"\bfrom\s+(\w+)", sql, re.IGNORECASE)
             if match:
                 table = match.group(1)
                 sql = re.sub(rf"\b{table}\.(\w+)\b", r"\1", sql)
         sql = re.sub(
             r"count\s*\(\s*distinct\s+\*\s*\)",
             "count(*)",
             flags=re.IGNORECASE,
         )
         sql = re.sub(
             r"\s+limit\s+(100|1000|10000)\b",
             "",
         error_msg: str,
         schema_preview: str,
     ) -> Tuple[str, int, int, float]:
+        """Return (patched_sql, token_in, token_out, cost_usd)."""
         system_prompt = """You are a SQL repair expert. Fix the given SQL query to resolve the error.
 IMPORTANT RULES:
         text = completion.choices[0].message.content
         fixed_sql = text.strip() if text else ""
         if fixed_sql.startswith("```sql"):
             fixed_sql = fixed_sql[6:]
         if fixed_sql.startswith("```"):
         fixed_sql = self._simplify_sql(fixed_sql)
         usage = completion.usage
         if usage:
             prompt_tokens = usage.prompt_tokens
             completion_tokens = usage.completion_tokens
                 "sql_length": len(fixed_sql),
             }
             return (fixed_sql, prompt_tokens, completion_tokens, cost)
+        self._last_usage = {
+            "kind": "repair",
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "cost_usd": 0.0,
+            "sql_length": len(fixed_sql),
         }
+        return (fixed_sql, 0, 0, 0.0)

nl2sql/errors/codes.py CHANGED Viewed

@@ -14,6 +14,7 @@ class ErrorCode(str, Enum):
     # --- Executor / DB ---
     DB_LOCKED = "DB_LOCKED"
     DB_TIMEOUT = "DB_TIMEOUT"
     # --- LLM ---
     LLM_TIMEOUT = "LLM_TIMEOUT"

     # --- Executor / DB ---
     DB_LOCKED = "DB_LOCKED"
     DB_TIMEOUT = "DB_TIMEOUT"
+    LLM_FAILURE = "LLM_FAILURE"
     # --- LLM ---
     LLM_TIMEOUT = "LLM_TIMEOUT"

nl2sql/generator.py CHANGED Viewed

@@ -20,7 +20,9 @@ class Generator:
         user_query: str,
         schema_preview: str,
         plan_text: str,
         clarify_answers: Optional[Dict[str, Any]] = None,
     ) -> StageResult:
         t0 = time.perf_counter()
@@ -29,10 +31,11 @@ class Generator:
                 user_query=user_query,
                 schema_preview=schema_preview,
                 plan_text=plan_text,
                 clarify_answers=clarify_answers or {},
             )
         except Exception as e:
-            # Provider/transport errors or unexpected runtime issues.
             return StageResult(
                 ok=False,
                 error=[f"Generator failed: {e}"],
@@ -40,18 +43,22 @@ class Generator:
                 trace=None,
             )
-        # Contract: expect a 5-tuple (sql, rationale, token_in, token_out, cost_usd)
-        if not isinstance(res, tuple) or len(res) != 5:
             return StageResult(
                 ok=False,
                 error=[
-                    "Generator contract violation: expected 5-tuple (sql, rationale, t_in, t_out, cost)"
                 ],
                 error_code=ErrorCode.LLM_BAD_OUTPUT,
                 trace=None,
             )
-        sql, rationale, t_in, t_out, cost = res
         # Type/shape checks
         if not isinstance(sql, str) or not sql.strip():
@@ -73,18 +80,20 @@ class Generator:
         # Normalize rationale to a string
         rationale = rationale or ""
         trace = StageTrace(
             stage=self.name,
             duration_ms=(time.perf_counter() - t0) * 1000.0,
             token_in=t_in,
             token_out=t_out,
             cost_usd=cost,
-            notes={"rationale_len": len(rationale)},
         )
         return StageResult(
             ok=True,
-            data={"sql": sql, "rationale": rationale},
             trace=trace,
             error_code=None,
             retryable=None,

         user_query: str,
         schema_preview: str,
         plan_text: str,
+        constraints: Optional[list[str]] = None,
         clarify_answers: Optional[Dict[str, Any]] = None,
+        traces: Optional[list[dict]] = None,
     ) -> StageResult:
         t0 = time.perf_counter()
                 user_query=user_query,
                 schema_preview=schema_preview,
                 plan_text=plan_text,
+                constraints=constraints or [],
                 clarify_answers=clarify_answers or {},
             )
         except Exception as e:
+            # Provider/transport errors or unexpected runtime exceptions.
             return StageResult(
                 ok=False,
                 error=[f"Generator failed: {e}"],
                 trace=None,
             )
+        if not isinstance(res, tuple) or len(res) not in (5, 6):
             return StageResult(
                 ok=False,
                 error=[
+                    "Generator contract violation: expected 5/6-tuple (sql, rationale, [used_tables], t_in, t_out, cost)"
                 ],
                 error_code=ErrorCode.LLM_BAD_OUTPUT,
                 trace=None,
             )
+        used_tables: list[str] = []
+        if len(res) == 6:
+            sql, rationale, used_tables, t_in, t_out, cost = res
+        else:
+            sql, rationale, t_in, t_out, cost = res
         # Type/shape checks
         if not isinstance(sql, str) or not sql.strip():
         # Normalize rationale to a string
         rationale = rationale or ""
         trace = StageTrace(
             stage=self.name,
+            summary="Generated SQL",
             duration_ms=(time.perf_counter() - t0) * 1000.0,
             token_in=t_in,
             token_out=t_out,
             cost_usd=cost,
+            notes={"rationale_len": len(rationale), "used_tables": used_tables},
         )
         return StageResult(
             ok=True,
+            data={"sql": sql, "rationale": rationale, "used_tables": used_tables},
             trace=trace,
             error_code=None,
             retryable=None,

nl2sql/pipeline.py CHANGED Viewed

@@ -276,6 +276,17 @@ class Pipeline:
         details: List[str] = []
         exec_result: Dict[str, Any] = {}
         def _fallback_trace(stage_name: str, dt_ms: float, ok: bool) -> None:
             traces.append(
                 self._mk_trace(
@@ -411,6 +422,33 @@ class Pipeline:
             sql = (r_gen.data or {}).get("sql")
             rationale = (r_gen.data or {}).get("rationale")
             # Guard: empty SQL
             if not sql or not str(sql).strip():
                 pipeline_runs_total.labels(status="error").inc()
@@ -485,44 +523,39 @@ class Pipeline:
             if r_exec.ok and isinstance(r_exec.data, dict):
                 exec_result = dict(r_exec.data)
-            # --- 6) verifier (run with repair for consistency) ---
-            t0 = time.perf_counter()
-            r_ver = self._run_with_repair(
-                "verifier",
-                self._call_verifier,
-                repair_input_builder=self._sql_repair_input_builder,
-                max_attempts=1,
-                sql=sql,
-                exec_result=(r_exec.data or {}),
-                traces=traces,
-            )
-            dt = (time.perf_counter() - t0) * 1000.0
-            stage_duration_ms.labels("verifier").observe(dt)
-            # Traces
-            traces.extend(self._trace_list(r_ver))
-            if not getattr(r_ver, "trace", None):
-                _fallback_trace("verifier", dt, r_ver.ok)
-            # If verifier (or its repair) produced a new SQL, consume it
-            if r_ver.data and isinstance(r_ver.data, dict):
-                repaired_sql = r_ver.data.get("sql")
-                if repaired_sql:
-                    sql = repaired_sql
             # Verified flag
-            verified = (
-                bool(
-                    r_ver.data
-                    and isinstance(r_ver.data, dict)
-                    and r_ver.data.get("verified")
-                )
-                or r_ver.ok
-            )
             # consume repaired SQL from verifier if any
-            if r_ver.data and "sql" in r_ver.data and r_ver.data["sql"]:
-                sql = r_ver.data["sql"]
             # --- 7) repair loop (if not verified) ---
             if not verified:
@@ -534,11 +567,12 @@ class Pipeline:
                         self.repair.run,
                         sql=sql,
                         error_msg="; ".join(details or ["unknown"]),
-                        schema_preview=schema_preview,
                     )
                     dt = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("repair").observe(dt)
                     traces.extend(self._trace_list(r_fix))
                     if not getattr(r_fix, "trace", None):
                         _fallback_trace("repair", dt, r_fix.ok)
                     if not r_fix.ok:
@@ -553,6 +587,7 @@ class Pipeline:
                     dt2 = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("safety").observe(dt2)
                     traces.extend(self._trace_list(r_safe2))
                     if not getattr(r_safe2, "trace", None):
                         _fallback_trace("safety", dt2, r_safe2.ok)
                     if not r_safe2.ok:
@@ -567,6 +602,7 @@ class Pipeline:
                     dt2 = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("executor").observe(dt2)
                     traces.extend(self._trace_list(r_exec2))
                     if not getattr(r_exec2, "trace", None):
                         _fallback_trace("executor", dt2, r_exec2.ok)
                     if not r_exec2.ok:
@@ -586,11 +622,10 @@ class Pipeline:
                     dt2 = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("verifier").observe(dt2)
                     traces.extend(self._trace_list(r_ver2))
                     if not getattr(r_ver2, "trace", None):
                         _fallback_trace("verifier", dt2, r_ver2.ok)
-                    verified = (
-                        bool(r_ver2.data and r_ver2.data.get("verified")) or r_ver2.ok
-                    )
                     if r_ver2.data and "sql" in r_ver2.data and r_ver2.data["sql"]:
                         sql = r_ver2.data["sql"]
                     if verified:

         details: List[str] = []
         exec_result: Dict[str, Any] = {}
+        def _tag_last_trace_attempt(stage_name: str, attempt: int) -> None:
+            # Attach attempt metadata to the most recent trace entry for this stage.
+            for t in reversed(traces):
+                if t.get("stage") == stage_name:
+                    notes = t.get("notes") or {}
+                    if not isinstance(notes, dict):
+                        notes = {}
+                    notes["attempt"] = attempt
+                    t["notes"] = notes
+                    return
         def _fallback_trace(stage_name: str, dt_ms: float, ok: bool) -> None:
             traces.append(
                 self._mk_trace(
             sql = (r_gen.data or {}).get("sql")
             rationale = (r_gen.data or {}).get("rationale")
+            # --- schema drift signal (planner vs generator table usage)
+            planner_used_tables = (
+                (r_plan.data or {}).get("used_tables")
+                or (r_plan.data or {}).get("tables")
+                or []
+            )
+            generator_used_tables = (
+                (r_gen.data or {}).get("used_tables")
+                or (r_gen.data or {}).get("tables")
+                or []
+            )
+            planner_set = set(planner_used_tables)
+            generator_set = set(generator_used_tables)
+            schema_drift = bool(generator_set - planner_set)
+            traces.append(
+                self._mk_trace(
+                    stage="schema_drift_check",
+                    duration_ms=0.0,
+                    summary="compare planner vs generator table usage",
+                    notes={
+                        "planner_used_tables": sorted(planner_set),
+                        "generator_used_tables": sorted(generator_set),
+                        "schema_drift": schema_drift,
+                    },
+                )
+            )
             # Guard: empty SQL
             if not sql or not str(sql).strip():
                 pipeline_runs_total.labels(status="error").inc()
             if r_exec.ok and isinstance(r_exec.data, dict):
                 exec_result = dict(r_exec.data)
+            # --- 6) verifier (only if execution succeeded) ---
+            r_ver = None
+            if r_exec.ok:
+                t0 = time.perf_counter()
+                r_ver = self._run_with_repair(
+                    "verifier",
+                    self._call_verifier,
+                    repair_input_builder=self._sql_repair_input_builder,
+                    max_attempts=1,
+                    sql=sql,
+                    exec_result=(r_exec.data or {}),
+                    traces=traces,
+                )
+                dt = (time.perf_counter() - t0) * 1000.0
+                stage_duration_ms.labels("verifier").observe(dt)
+                # Traces
+                # If verifier (or its repair) produced a new SQL, consume it
+                if r_ver.data and isinstance(r_ver.data, dict):
+                    repaired_sql = r_ver.data.get("sql")
+                    if repaired_sql:
+                        sql = repaired_sql
+            data = r_ver.data if (r_ver and isinstance(r_ver.data, dict)) else {}
             # Verified flag
+            verified = bool(data.get("verified") is True)
             # consume repaired SQL from verifier if any
+            repaired_sql = data.get("sql")
+            if repaired_sql:
+                sql = repaired_sql
             # --- 7) repair loop (if not verified) ---
             if not verified:
                         self.repair.run,
                         sql=sql,
                         error_msg="; ".join(details or ["unknown"]),
+                        schema_preview=schema_for_llm,
                     )
                     dt = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("repair").observe(dt)
                     traces.extend(self._trace_list(r_fix))
+                    _tag_last_trace_attempt("repair", _attempt)
                     if not getattr(r_fix, "trace", None):
                         _fallback_trace("repair", dt, r_fix.ok)
                     if not r_fix.ok:
                     dt2 = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("safety").observe(dt2)
                     traces.extend(self._trace_list(r_safe2))
+                    _tag_last_trace_attempt("safety", _attempt)
                     if not getattr(r_safe2, "trace", None):
                         _fallback_trace("safety", dt2, r_safe2.ok)
                     if not r_safe2.ok:
                     dt2 = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("executor").observe(dt2)
                     traces.extend(self._trace_list(r_exec2))
+                    _tag_last_trace_attempt("executor", _attempt)
                     if not getattr(r_exec2, "trace", None):
                         _fallback_trace("executor", dt2, r_exec2.ok)
                     if not r_exec2.ok:
                     dt2 = (time.perf_counter() - t0) * 1000.0
                     stage_duration_ms.labels("verifier").observe(dt2)
                     traces.extend(self._trace_list(r_ver2))
+                    _tag_last_trace_attempt("verifier", _attempt)
                     if not getattr(r_ver2, "trace", None):
                         _fallback_trace("verifier", dt2, r_ver2.ok)
+                    verified = bool(r_ver2.data and r_ver2.data.get("verified") is True)
                     if r_ver2.data and "sql" in r_ver2.data and r_ver2.data["sql"]:
                         sql = r_ver2.data["sql"]
                     if verified:

nl2sql/planner.py CHANGED Viewed

@@ -6,6 +6,23 @@ from typing import Any, Dict, List, Tuple, Optional
 __all__ = ["Planner"]
 # --------- Heuristic schema trimming (safe, mypy-clean) ---------
 def _tokenize_lower(s: str) -> List[str]:
     return re.findall(r"[a-z_]+", (s or "").lower())
@@ -14,41 +31,33 @@ def _tokenize_lower(s: str) -> List[str]:
 def _table_blocks(schema_text: str) -> List[Tuple[str, List[str]]]:
     """
     Parse plain-text schema into [(table_name, lines)] blocks,
-    supporting both 'Table: name' and 'CREATE TABLE name (' styles.
     """
     blocks: List[Tuple[str, List[str]]] = []
     cur_name: Optional[str] = None
     cur_lines: List[str] = []
-    def _flush() -> None:
         nonlocal cur_name, cur_lines
-        if cur_name is not None and cur_lines:
-            blocks.append((cur_name, cur_lines[:]))
         cur_name, cur_lines = None, []
-    for line in (schema_text or "").splitlines():
-        m = re.search(r"Table:\s*(\w+)", line, flags=re.IGNORECASE)
-        m2 = re.search(r"CREATE\s+TABLE\s+(\w+)\s*\(", line, flags=re.IGNORECASE)
-        started = False
-        name: Optional[str] = None
-        if m is not None:
-            name = m.group(1)
-            started = True
-        elif m2 is not None:
-            name = m2.group(1)
-            started = True
-        if started and name:
             _flush()
-            cur_name = name
-            cur_lines.append(line)
         else:
             if cur_name is not None:
-                cur_lines.append(line)
-        if cur_name is not None and line.strip().endswith(");"):
-            _flush()
     _flush()
     return blocks
@@ -64,29 +73,22 @@ def _pick_relevant_tables(schema_text: str, question: str, k: int = 3) -> str:
         q_toks = set(_tokenize_lower(question))
         scored: List[Tuple[int, str, List[str]]] = []
         for name, lines in blocks:
-            score = sum(1 for w in _tokenize_lower(name) if w in q_toks)
-            cols_line = " ".join(lines)
-            cols = re.findall(r"\b([A-Za-z_]\w*)\b", cols_line)
-            score += min(2, sum(1 for c in cols if c.lower() in q_toks))
             scored.append((score, name, lines))
-        scored.sort(key=lambda t: t[0], reverse=True)
-        keep = [b for b in scored[: max(1, k)] if b[0] > 0]
-        if not keep:
-            keep = scored[: max(1, k)]
         out_lines: List[str] = []
-        for _, _, lines in keep:
             out_lines.extend(lines)
-            if lines and lines[-1].strip() != "":
-                out_lines.append("")
-        trimmed = "\n".join(out_lines).strip()
-        return trimmed if trimmed else schema_text
     except Exception:
         return schema_text
-# ------------------------------ Planner ------------------------------
 class Planner:
     """Planner wrapper around the LLM provider."""
@@ -95,26 +97,65 @@ class Planner:
         # ensure model_id is always a str (for mypy)
         self.model_id: str = str(model_id or getattr(llm, "model", "unknown"))
         # in-memory cache: (model, hash(q), hash(trimmed)) → (plan, pin, pout, cost)
-        self._plan_cache: dict[tuple[str, int, int], tuple[str, int, int, float]] = {}
-    def run(self, *, user_query: str, schema_preview: str) -> Dict[str, Any]:
-        trimmed = _pick_relevant_tables(schema_preview or "", user_query or "", k=3)
         key: tuple[str, int, int] = (
             self.model_id,
             hash(user_query or ""),
-            hash(trimmed),
         )
         if key in self._plan_cache:
-            plan_text, pin, pout, cost = self._plan_cache[key]
         else:
-            plan_text, pin, pout, cost = self.llm.plan(
-                user_query=user_query, schema_preview=trimmed
-            )
-            self._plan_cache[key] = (plan_text, pin, pout, cost)
         return {
             "plan": plan_text,
             "usage": {
                 "prompt_tokens": pin,
                 "completion_tokens": pout,

 __all__ = ["Planner"]
+def _extract_table_names_from_schema(schema_text: str) -> List[str]:
+    """Best-effort table name extraction from schema preview."""
+    if not schema_text:
+        return []
+    names = re.findall(
+        r"(?im)^\s*create\s+table\s+`?([A-Za-z_][A-Za-z0-9_]*)`?\b", schema_text
+    )
+    # de-dup preserving order
+    seen: set[str] = set()
+    out: List[str] = []
+    for n in names:
+        if n not in seen:
+            out.append(n)
+            seen.add(n)
+    return out
 # --------- Heuristic schema trimming (safe, mypy-clean) ---------
 def _tokenize_lower(s: str) -> List[str]:
     return re.findall(r"[a-z_]+", (s or "").lower())
 def _table_blocks(schema_text: str) -> List[Tuple[str, List[str]]]:
     """
     Parse plain-text schema into [(table_name, lines)] blocks,
+    assuming SQLite preview format like:
+      Table: users
+        - id
+        - name
     """
     blocks: List[Tuple[str, List[str]]] = []
     cur_name: Optional[str] = None
     cur_lines: List[str] = []
+    def _flush():
         nonlocal cur_name, cur_lines
+        if cur_name is not None:
+            blocks.append((cur_name, cur_lines))
         cur_name, cur_lines = None, []
+    for raw in (schema_text or "").splitlines():
+        line = raw.strip()
+        if not line:
+            continue
+        m = re.match(r"^table:\s*([a-zA-Z0-9_]+)\s*$", line, re.IGNORECASE)
+        if m:
             _flush()
+            cur_name = m.group(1)
+            cur_lines = [raw]
         else:
             if cur_name is not None:
+                cur_lines.append(raw)
     _flush()
     return blocks
         q_toks = set(_tokenize_lower(question))
         scored: List[Tuple[int, str, List[str]]] = []
         for name, lines in blocks:
+            score = sum(1 for tok in _tokenize_lower(" ".join(lines)) if tok in q_toks)
             scored.append((score, name, lines))
+        scored.sort(key=lambda x: (-x[0], x[1]))
+        top = scored[:k]
+        # Keep stable order by original appearance? We'll keep by score then name for determinism.
         out_lines: List[str] = []
+        for _, _, lines in top:
             out_lines.extend(lines)
+            out_lines.append("")  # spacing
+        return "\n".join(out_lines).strip() if out_lines else schema_text
     except Exception:
         return schema_text
 class Planner:
     """Planner wrapper around the LLM provider."""
         # ensure model_id is always a str (for mypy)
         self.model_id: str = str(model_id or getattr(llm, "model", "unknown"))
         # in-memory cache: (model, hash(q), hash(trimmed)) → (plan, pin, pout, cost)
+        self._plan_cache: dict[
+            tuple[str, int, int], tuple[str, List[str], int, int, float]
+        ] = {}
+    def run(
+        self,
+        *,
+        user_query: str,
+        schema_preview: str,
+        constraints: Optional[List[str]] = None,
+        traces: Optional[List[dict]] = None,
+    ) -> Dict[str, Any]:
+        """Plan the query. Assumes schema_preview is already budgeted upstream."""
+        schema_preview = schema_preview or ""
+        constraints = constraints or []
         key: tuple[str, int, int] = (
             self.model_id,
             hash(user_query or ""),
+            hash(schema_preview),
         )
         if key in self._plan_cache:
+            plan_text, used_tables, pin, pout, cost = self._plan_cache[key]
         else:
+            # Call provider with backward-compatible kwargs
+            try:
+                res = self.llm.plan(
+                    user_query=user_query,
+                    schema_preview=schema_preview,
+                    constraints=constraints,
+                )
+            except TypeError:
+                # Older fakes/providers may not accept `constraints`
+                res = self.llm.plan(
+                    user_query=user_query,
+                    schema_preview=schema_preview,
+                )
+            if not isinstance(res, tuple):
+                raise TypeError("LLM plan() must return a tuple")
+            if len(res) == 5:
+                plan_text, used_tables, pin, pout, cost = res
+            elif len(res) == 4:
+                plan_text, pin, pout, cost = res
+                used_tables = _extract_table_names_from_schema(schema_preview)
+            else:
+                raise TypeError("LLM plan() must return 4- or 5-tuple")
+            # Ensure used_tables is always a list[str]
+            if not isinstance(used_tables, list):
+                used_tables = _extract_table_names_from_schema(schema_preview)
+            self._plan_cache[key] = (plan_text, used_tables, pin, pout, cost)
         return {
             "plan": plan_text,
+            "used_tables": used_tables,
             "usage": {
                 "prompt_tokens": pin,
                 "completion_tokens": pout,

nl2sql/prompts/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""Prompt contracts for LLM-facing stages."""
+from .contracts import (
+    PlannerPromptInput,
+    PlannerPromptOutput,
+    GeneratorPromptInput,
+    GeneratorPromptOutput,
+)
+__all__ = [
+    "PlannerPromptInput",
+    "PlannerPromptOutput",
+    "GeneratorPromptInput",
+    "GeneratorPromptOutput",
+]

nl2sql/prompts/contracts.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+# NOTE:
+# These are *prompt contracts* (input/output shapes) for LLM-facing stages.
+# They are intentionally lightweight to keep Block C minimal and low-risk.
+@dataclass(frozen=True)
+class PlannerPromptInput:
+    user_query: str
+    schema_preview: str  # already budgeted at pipeline boundary
+    constraints: List[str]
+@dataclass(frozen=True)
+class PlannerPromptOutput:
+    plan: str
+    used_tables: List[str]
+@dataclass(frozen=True)
+class GeneratorPromptInput:
+    user_query: str
+    schema_preview: str  # already budgeted at pipeline boundary
+    plan: str
+    constraints: List[str]
+    clarify_answers: Optional[Dict[str, Any]] = None
+@dataclass(frozen=True)
+class GeneratorPromptOutput:
+    sql: str
+    rationale: str
+    used_tables: List[str]