Spaces:

melikakheirieh
/

nl2sql-copilot

Running

App Files Files Community

Melika Kheirieh commited on Nov 5

Commit

79a5f4a

1 Parent(s): 8618ece

feat(trace): standardize StageTrace (add summary) and coerce duration_ms to int at API boundary

Browse files

Files changed (3) hide show

app/routers/nl2sql.py +26 -71
nl2sql/pipeline.py +83 -31
nl2sql/types.py +2 -1

app/routers/nl2sql.py CHANGED Viewed

@@ -57,16 +57,9 @@ def get_runner() -> Runner:
 def _build_pipeline(adapter) -> Any:
     """Thin wrapper for tests to monkeypatch; builds a pipeline bound to adapter."""
     return pipeline_from_config_with_adapter(CONFIG_PATH, adapter=adapter)
-#
-# # Stable public re-exports
-# Pipeline = _Pipeline
-# FinalResult = _FinalResult
-# __all__ = ["Pipeline", "FinalResult"]
 router = APIRouter(prefix="/nl2sql")
 # -------------------------------
@@ -148,7 +141,6 @@ _load_db_map()
 # -------------------------------
 # Adapter selection (lazy)
 # -------------------------------
-# ---------- SELECT ADAPTER ----------
 def _select_adapter(db_id: Optional[str]) -> Union[PostgresAdapter, SQLiteAdapter]:
     """
     Resolve a DB adapter based on module-level DB_MODE and an optional db_id.
@@ -207,66 +199,8 @@ def _get_llm() -> OpenAIProvider:
     return OpenAIProvider()
-# def _build_pipeline(adapter: Union[PostgresAdapter, SQLiteAdapter]) -> Pipeline:
-#     """
-#     Build a fresh Pipeline bound to the given adapter.
-#     All stateful/external pieces (LLM, executor) are instantiated here (lazy).
-#     """
-#     llm = _get_llm()
-#     detector = AmbiguityDetector()
-#     planner = Planner(llm=llm)
-#     generator = Generator(llm=llm)
-#     safety = Safety()
-#     executor = Executor(adapter)
-#     verifier = Verifier()
-#     repair = Repair(llm=llm)
-#     return Pipeline(
-#         detector=detector,
-#         planner=planner,
-#         generator=generator,
-#         safety=safety,
-#         executor=executor,
-#         verifier=verifier,
-#         repair=repair,
-#     )
-# -------------------------------
-# Dependency-injected runner
-# -------------------------------
-# class Runner(Protocol):
-#     def __call__(
-#         self, *, user_query: str, schema_preview: str | None = None
-#     ) -> FinalResult: ...
-#
-#
-# def get_runner(request: Request) -> Runner:
-#     """
-#     Returns a callable runner. Preferred path in production:
-#     - app.state.pipeline_runner (if set) -> used (e.g., tests or special wiring)
-#     - app.state.pipeline -> reuse existing
-#     - else build default pipeline lazily and cache
-#     """
-#     runner: Optional[Runner] = getattr(request.app.state, "pipeline_runner", None)  # type: ignore[attr-defined]
-#     if runner:
-#         return runner
-#
-#     pipeline: Optional[Pipeline] = getattr(request.app.state, "pipeline", None)  # type: ignore[attr-defined]
-#     if pipeline is None:
-#         # Build a default pipeline lazily (no side-effect on import)
-#         adapter = _select_adapter(db_id=None)
-#         try:
-#             pipeline = _build_pipeline(adapter)
-#             request.app.state.pipeline = pipeline  # type: ignore[attr-defined]
-#         except Exception as exc:
-#             raise HTTPException(
-#                 status_code=500, detail=f"Pipeline unavailable: {exc!s}"
-#             )
-#     return pipeline.run  # type: ignore[return-value]
 # -------------------------------
-# Helpers (unchanged)
 # -------------------------------
 def _to_dict(obj: Any) -> Any:
     if is_dataclass(obj) and not isinstance(obj, type):
@@ -275,29 +209,50 @@ def _to_dict(obj: Any) -> Any:
 def _round_trace(t: Any) -> Dict[str, Any]:
-    """Normalize a trace entry to a dict and coerce duration_ms to int."""
     if isinstance(t, dict):
         stage = t.get("stage", "?")
         ms = t.get("duration_ms", 0)
         notes = t.get("notes")
         cost = t.get("cost_usd")
     else:
         stage = getattr(t, "stage", "?")
         ms = getattr(t, "duration_ms", 0)
         notes = getattr(t, "notes", None)
         cost = getattr(t, "cost_usd", None)
     try:
-        ms_int = int(ms) if ms is not None else 0
     except Exception:
         ms_int = 0
-    return {
         "stage": str(stage) if stage is not None else "?",
         "duration_ms": ms_int,
         "notes": notes,
         "cost_usd": cost,
     }
 # -------------------------------
@@ -391,7 +346,7 @@ def nl2sql_handler(
         message = "; ".join(result.details or []) or "Unknown error"
         raise HTTPException(status_code=400, detail=message)
-    # Success path → 200
     traces = [_round_trace(t) for t in (result.traces or [])]
     return NL2SQLResponse(
         ambiguous=False,

 def _build_pipeline(adapter) -> Any:
     """Thin wrapper for tests to monkeypatch; builds a pipeline bound to adapter."""
     return pipeline_from_config_with_adapter(CONFIG_PATH, adapter=adapter)
 router = APIRouter(prefix="/nl2sql")
 # -------------------------------
 # -------------------------------
 # Adapter selection (lazy)
 # -------------------------------
 def _select_adapter(db_id: Optional[str]) -> Union[PostgresAdapter, SQLiteAdapter]:
     """
     Resolve a DB adapter based on module-level DB_MODE and an optional db_id.
     return OpenAIProvider()
 # -------------------------------
+# Helpers
 # -------------------------------
 def _to_dict(obj: Any) -> Any:
     if is_dataclass(obj) and not isinstance(obj, type):
 def _round_trace(t: Any) -> Dict[str, Any]:
+    """
+    Normalize a trace entry (dict or StageTrace-like object) for API/UI:
+    - stage: str (required)
+    - duration_ms: int (rounded)
+    - summary: optional (pass-through if exists)
+    - notes: optional
+    - token_in/out, cost_usd: pass-through if present
+    """
     if isinstance(t, dict):
         stage = t.get("stage", "?")
         ms = t.get("duration_ms", 0)
         notes = t.get("notes")
         cost = t.get("cost_usd")
+        summary = t.get("summary")
+        token_in = t.get("token_in")
+        token_out = t.get("token_out")
     else:
         stage = getattr(t, "stage", "?")
         ms = getattr(t, "duration_ms", 0)
         notes = getattr(t, "notes", None)
         cost = getattr(t, "cost_usd", None)
+        summary = getattr(t, "summary", None)
+        token_in = getattr(t, "token_in", None)
+        token_out = getattr(t, "token_out", None)
+    # coerce duration to int with rounding
     try:
+        ms_int = int(round(float(ms))) if ms is not None else 0
     except Exception:
         ms_int = 0
+    out: Dict[str, Any] = {
         "stage": str(stage) if stage is not None else "?",
         "duration_ms": ms_int,
         "notes": notes,
         "cost_usd": cost,
     }
+    if summary is not None:
+        out["summary"] = summary
+    if token_in is not None:
+        out["token_in"] = token_in
+    if token_out is not None:
+        out["token_out"] = token_out
+    return out
 # -------------------------------
         message = "; ".join(result.details or []) or "Unknown error"
         raise HTTPException(status_code=400, detail=message)
+    # Success path → 200 (coerce/standardize traces for API)
     traces = [_round_trace(t) for t in (result.traces or [])]
     return NL2SQLResponse(
         ambiguous=False,

nl2sql/pipeline.py CHANGED Viewed

@@ -68,6 +68,61 @@ class Pipeline:
                 traces.append(getattr(t, "__dict__", t))
         return traces
     # ------------------------------------------------------------
     @staticmethod
     def _safe_stage(fn, **kwargs) -> StageResult:
@@ -84,18 +139,6 @@ class Pipeline:
             tb = traceback.format_exc()
             return StageResult(ok=False, data=None, trace=None, error=[f"{e}", tb])
-    # ------------------------------------------------------------
-    @staticmethod
-    def _mk_trace(
-        stage: str, duration_ms: float, notes: Optional[Dict[str, Any]] = None
-    ) -> dict:
-        """Create a normalized trace dict."""
-        return {
-            "stage": stage,
-            "duration_ms": float(duration_ms),
-            "notes": notes or {},
-        }
     # ------------------------------------------------------------
     def run(
         self,
@@ -119,12 +162,14 @@ class Pipeline:
             t0 = time.perf_counter()
             questions = self.detector.detect(user_query, schema_preview)
             t1 = time.perf_counter()
             traces.append(
                 self._mk_trace(
-                    "detector",
-                    (t1 - t0) * 1000.0,
-                    {
-                        "ambiguous": bool(questions),
                         "questions_len": len(questions or []),
                     },
                 )
@@ -140,11 +185,18 @@ class Pipeline:
                     sql=None,
                     rationale=None,
                     verified=None,
-                    traces=traces,
                 )
         except Exception as e:
             # detector crash – mark as error but keep trace so far
-            traces.append(self._mk_trace("detector", 0.0, {"error": str(e)}))
             return FinalResult(
                 ok=False,
                 ambiguous=True,
@@ -154,7 +206,7 @@ class Pipeline:
                 sql=None,
                 rationale=None,
                 verified=None,
-                traces=traces,
             )
         # --- 2) planner ---
@@ -172,7 +224,7 @@ class Pipeline:
                 sql=None,
                 rationale=None,
                 verified=None,
-                traces=traces,
             )
         # --- 3) generator ---
@@ -194,7 +246,7 @@ class Pipeline:
                 sql=None,
                 rationale=None,
                 verified=None,
-                traces=traces,
             )
         sql = (r_gen.data or {}).get("sql")
@@ -213,7 +265,7 @@ class Pipeline:
                 sql=sql,
                 rationale=rationale,
                 verified=None,
-                traces=traces,
             )
         # --- 5) executor ---
@@ -283,11 +335,10 @@ class Pipeline:
             if any_exec_ok:
                 traces.append(
                     self._mk_trace(
-                        "pipeline",
-                        0.0,
-                        {
-                            "auto_fix": "verified=True (executor succeeded, verifier silent)"
-                        },
                     )
                 )
                 verified = True
@@ -299,9 +350,10 @@ class Pipeline:
         traces.append(
             self._mk_trace(
-                "pipeline",
-                0.0,
-                {"final_verified": bool(verified), "details_len": len(details)},
             )
         )
@@ -314,5 +366,5 @@ class Pipeline:
             rationale=rationale,
             verified=verified,
             questions=None,
-            traces=traces,
         )

                 traces.append(getattr(t, "__dict__", t))
         return traces
+    # ------------------------------------------------------------
+    @staticmethod
+    def _mk_trace(
+        stage: str,
+        duration_ms: float,
+        summary: str,
+        notes: Optional[Dict[str, Any]] = None,
+    ) -> dict:
+        """Create a normalized trace dict (internal: duration may be float)."""
+        return {
+            "stage": stage,
+            "duration_ms": float(duration_ms),
+            "summary": summary,
+            "notes": notes or {},
+        }
+    @staticmethod
+    def _normalize_traces(traces: List[dict]) -> List[dict]:
+        """
+        Normalize trace list for API/UI:
+        - coerce duration_ms to int
+        - ensure `summary` exists (fallback to a minimal one)
+        """
+        norm: List[dict] = []
+        for t in traces:
+            stage = str(t.get("stage", "unknown"))
+            dur = t.get("duration_ms", 0)
+            try:
+                dur_int = int(round(float(dur)))
+            except Exception:
+                dur_int = 0
+            summary = t.get("summary")
+            if not summary:
+                # fallback summary if not provided by stage
+                notes = t.get("notes") or {}
+                failed = bool(notes.get("error") or notes.get("errors"))
+                summary = "failed" if failed else "ok"
+            notes = t.get("notes") or {}
+            # preserve any accounting fields if present (token_in/out, cost_usd, ...)
+            payload = {
+                "stage": stage,
+                "duration_ms": dur_int,
+                "summary": summary,
+                "notes": notes,
+            }
+            # keep extra accounting if exists
+            if "token_in" in t:
+                payload["token_in"] = t["token_in"]
+            if "token_out" in t:
+                payload["token_out"] = t["token_out"]
+            if "cost_usd" in t:
+                payload["cost_usd"] = t["cost_usd"]
+            norm.append(payload)
+        return norm
     # ------------------------------------------------------------
     @staticmethod
     def _safe_stage(fn, **kwargs) -> StageResult:
             tb = traceback.format_exc()
             return StageResult(ok=False, data=None, trace=None, error=[f"{e}", tb])
     # ------------------------------------------------------------
     def run(
         self,
             t0 = time.perf_counter()
             questions = self.detector.detect(user_query, schema_preview)
             t1 = time.perf_counter()
+            is_amb = bool(questions)
             traces.append(
                 self._mk_trace(
+                    stage="detector",
+                    duration_ms=(t1 - t0) * 1000.0,
+                    summary=("ambiguous" if is_amb else "clear"),
+                    notes={
+                        "ambiguous": is_amb,
                         "questions_len": len(questions or []),
                     },
                 )
                     sql=None,
                     rationale=None,
                     verified=None,
+                    traces=self._normalize_traces(traces),
                 )
         except Exception as e:
             # detector crash – mark as error but keep trace so far
+            traces.append(
+                self._mk_trace(
+                    stage="detector",
+                    duration_ms=0.0,
+                    summary="failed",
+                    notes={"error": str(e)},
+                )
+            )
             return FinalResult(
                 ok=False,
                 ambiguous=True,
                 sql=None,
                 rationale=None,
                 verified=None,
+                traces=self._normalize_traces(traces),
             )
         # --- 2) planner ---
                 sql=None,
                 rationale=None,
                 verified=None,
+                traces=self._normalize_traces(traces),
             )
         # --- 3) generator ---
                 sql=None,
                 rationale=None,
                 verified=None,
+                traces=self._normalize_traces(traces),
             )
         sql = (r_gen.data or {}).get("sql")
                 sql=sql,
                 rationale=rationale,
                 verified=None,
+                traces=self._normalize_traces(traces),
             )
         # --- 5) executor ---
             if any_exec_ok:
                 traces.append(
                     self._mk_trace(
+                        stage="pipeline",
+                        duration_ms=0.0,
+                        summary="auto-verified",
+                        notes={"reason": "executor succeeded, verifier silent"},
                     )
                 )
                 verified = True
         traces.append(
             self._mk_trace(
+                stage="pipeline",
+                duration_ms=0.0,
+                summary="finalize",
+                notes={"final_verified": bool(verified), "details_len": len(details)},
             )
         )
             rationale=rationale,
             verified=verified,
             questions=None,
+            traces=self._normalize_traces(traces),
         )

nl2sql/types.py CHANGED Viewed

@@ -5,7 +5,8 @@ from typing import Any, Dict, Optional, List
 @dataclass(frozen=True)
 class StageTrace:
     stage: str
-    duration_ms: float
     notes: Optional[Dict[str, Any]] = None
     token_in: Optional[int] = None
     token_out: Optional[int] = None

 @dataclass(frozen=True)
 class StageTrace:
     stage: str
+    duration_ms: float  # keep float internally if you like
+    summary: str = ""  # ← default to keep legacy call-sites working
     notes: Optional[Dict[str, Any]] = None
     token_in: Optional[int] = None
     token_out: Optional[int] = None