github-actions[bot] commited on
Commit
11975fd
·
1 Parent(s): 562f213

Sync from GitHub main @ 0ffdf76846d3f126cc49a0fd3341046141b13f7d

Browse files
adapters/metrics/base.py CHANGED
@@ -1,11 +1,26 @@
1
  from __future__ import annotations
2
 
3
- from typing import Protocol
 
4
 
 
5
 
6
- class Metrics(Protocol):
 
 
7
  def observe_stage_duration_ms(self, *, stage: str, dt_ms: float) -> None: ...
8
 
 
9
  def inc_pipeline_run(self, *, status: str) -> None: ...
10
 
11
- def inc_repair_attempt(self, *, outcome: str) -> None: ...
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ from abc import ABC, abstractmethod
4
+ from typing import Literal
5
 
6
+ RepairOutcome = Literal["attempt", "success", "failed", "skipped"]
7
 
8
+
9
+ class Metrics(ABC):
10
+ @abstractmethod
11
  def observe_stage_duration_ms(self, *, stage: str, dt_ms: float) -> None: ...
12
 
13
+ @abstractmethod
14
  def inc_pipeline_run(self, *, status: str) -> None: ...
15
 
16
+ @abstractmethod
17
+ def inc_stage_call(self, *, stage: str, ok: bool) -> None: ...
18
+
19
+ @abstractmethod
20
+ def inc_stage_error(self, *, stage: str, error_code: str) -> None: ...
21
+
22
+ @abstractmethod
23
+ def inc_repair_trigger(self, *, stage: str, reason: str) -> None: ...
24
+
25
+ @abstractmethod
26
+ def inc_repair_attempt(self, *, stage: str, outcome: RepairOutcome) -> None: ...
adapters/metrics/noop.py CHANGED
@@ -1,6 +1,6 @@
1
  from __future__ import annotations
2
 
3
- from adapters.metrics.base import Metrics
4
 
5
 
6
  class NoOpMetrics(Metrics):
@@ -10,5 +10,14 @@ class NoOpMetrics(Metrics):
10
  def inc_pipeline_run(self, *, status: str) -> None:
11
  return
12
 
13
- def inc_repair_attempt(self, *, outcome: str) -> None:
 
 
 
 
 
 
 
 
 
14
  return
 
1
  from __future__ import annotations
2
 
3
+ from adapters.metrics.base import Metrics, RepairOutcome
4
 
5
 
6
  class NoOpMetrics(Metrics):
 
10
  def inc_pipeline_run(self, *, status: str) -> None:
11
  return
12
 
13
+ def inc_stage_call(self, *, stage: str, ok: bool) -> None:
14
+ return
15
+
16
+ def inc_stage_error(self, *, stage: str, error_code: str) -> None:
17
+ return
18
+
19
+ def inc_repair_trigger(self, *, stage: str, reason: str) -> None:
20
+ return
21
+
22
+ def inc_repair_attempt(self, *, stage: str, outcome: RepairOutcome) -> None:
23
  return
adapters/metrics/prometheus.py CHANGED
@@ -1,16 +1,50 @@
1
  from __future__ import annotations
2
 
3
- from adapters.metrics.base import Metrics
4
- from nl2sql.metrics import stage_duration_ms, pipeline_runs_total, repair_attempts_total
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  class PrometheusMetrics(Metrics):
8
  def observe_stage_duration_ms(self, *, stage: str, dt_ms: float) -> None:
9
- stage_duration_ms.labels(stage).observe(dt_ms)
10
 
11
  def inc_pipeline_run(self, *, status: str) -> None:
12
  pipeline_runs_total.labels(status=status).inc()
13
 
14
- def inc_repair_attempt(self, *, outcome: str) -> None:
15
- # outcome: attempt | success | failed | skipped
16
- repair_attempts_total.labels(outcome=outcome).inc()
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ from prometheus_client import Counter
4
+ from adapters.metrics.base import Metrics, RepairOutcome
5
+ from nl2sql.metrics import stage_duration_ms, pipeline_runs_total
6
+
7
+
8
+ stage_calls_total = Counter(
9
+ "stage_calls_total",
10
+ "Total number of stage calls by stage and success",
11
+ ["stage", "ok"],
12
+ )
13
+
14
+ stage_errors_total = Counter(
15
+ "stage_errors_total",
16
+ "Total number of stage errors by stage and error code",
17
+ ["stage", "error_code"],
18
+ )
19
+
20
+ repair_attempts_total = Counter(
21
+ "repair_attempts_total",
22
+ "Total repair attempts by stage and outcome",
23
+ ["stage", "outcome"],
24
+ )
25
+
26
+ repair_trigger_total = Counter(
27
+ "repair_trigger_total",
28
+ "Total repair triggers by stage and reason",
29
+ ["stage", "reason"],
30
+ )
31
 
32
 
33
  class PrometheusMetrics(Metrics):
34
  def observe_stage_duration_ms(self, *, stage: str, dt_ms: float) -> None:
35
+ stage_duration_ms.labels(stage=stage).observe(dt_ms)
36
 
37
  def inc_pipeline_run(self, *, status: str) -> None:
38
  pipeline_runs_total.labels(status=status).inc()
39
 
40
+ def inc_stage_call(self, *, stage: str, ok: bool) -> None:
41
+ stage_calls_total.labels(stage=stage, ok=str(ok).lower()).inc()
42
+
43
+ def inc_stage_error(self, *, stage: str, error_code: str) -> None:
44
+ stage_errors_total.labels(stage=stage, error_code=error_code).inc()
45
+
46
+ def inc_repair_trigger(self, *, stage: str, reason: str) -> None:
47
+ repair_trigger_total.labels(stage=stage, reason=reason).inc()
48
+
49
+ def inc_repair_attempt(self, *, stage: str, outcome: RepairOutcome) -> None:
50
+ repair_attempts_total.labels(stage=stage, outcome=outcome).inc()
nl2sql/pipeline.py CHANGED
@@ -208,6 +208,13 @@ class Pipeline:
208
 
209
  self.metrics.observe_stage_duration_ms(stage=stage_name, dt_ms=dt)
210
 
 
 
 
 
 
 
 
211
  # attach stage trace
212
  if getattr(r, "trace", None):
213
  traces.append(r.trace.__dict__)
@@ -227,7 +234,7 @@ class Pipeline:
227
  # stage failed → check repair availability
228
  eligible, reason = self._should_repair(stage_name, r)
229
  if not eligible:
230
- self.metrics.inc_repair_attempt(outcome="skipped")
231
  # annotate latest stage trace entry
232
  if traces and isinstance(traces[-1], dict):
233
  notes = traces[-1].get("notes") or {}
@@ -246,7 +253,8 @@ class Pipeline:
246
  repair_args = repair_input_builder(r, kwargs)
247
 
248
  # --- 3) Run repair (always logged) ---
249
- self.metrics.inc_repair_attempt(outcome="attempt")
 
250
  t1 = time.perf_counter()
251
  r_fix = self._safe_stage(self.repair.run, **repair_args)
252
  dt_fix = (time.perf_counter() - t1) * 1000.0
@@ -266,7 +274,7 @@ class Pipeline:
266
  )
267
 
268
  if not r_fix.ok:
269
- self.metrics.inc_repair_attempt(outcome="failed")
270
  return r # repair itself failed → stop here
271
 
272
  # --- 4) Only inject SQL if the stage is an SQL-producing stage ---
@@ -276,10 +284,10 @@ class Pipeline:
276
 
277
  # important: success metric must reflect if repair was applied meaningfully
278
  if stage_name in self.SQL_REPAIR_STAGES:
279
- self.metrics.inc_repair_attempt(outcome="success")
280
  else:
281
  # log-only mode counts as a success-attempt but not semantic success
282
- self.metrics.inc_repair_attempt(outcome="success")
283
 
284
  # for SQL stages, we re-run the stage again with modified kwargs
285
  # for log-only stages, this simply loops and stage is re-run unchanged
@@ -623,6 +631,7 @@ class Pipeline:
623
  ),
624
  )
625
  if eligible:
 
626
  # Prefer the real verifier message if present (tests expect this).
627
  err_list = (r_ver.error if (r_ver and r_ver.error) else None) or []
628
  error_msg = (
@@ -651,7 +660,7 @@ class Pipeline:
651
  "schema_preview": schema_for_llm,
652
  }
653
 
654
- self.metrics.inc_repair_attempt(outcome="attempt")
655
  r_rep = self.repair.run(**rep_kwargs)
656
 
657
  new_sql = (
@@ -713,11 +722,15 @@ class Pipeline:
713
  verified = bool(data2.get("verified") is True)
714
 
715
  if verified:
716
- self.metrics.inc_repair_attempt(outcome="success")
 
 
717
  else:
718
- self.metrics.inc_repair_attempt(outcome="failed")
 
 
719
  else:
720
- self.metrics.inc_repair_attempt(outcome="skipped")
721
 
722
  # --- 8) optional soft auto-verify (executor success, no details) --- (executor success, no details) ---
723
  if (verified is None or not verified) and not details:
 
208
 
209
  self.metrics.observe_stage_duration_ms(stage=stage_name, dt_ms=dt)
210
 
211
+ self.metrics.inc_stage_call(stage=stage_name, ok=r.ok)
212
+ if not r.ok and getattr(r, "error_code", None) is not None:
213
+ self.metrics.inc_stage_error(
214
+ stage=stage_name,
215
+ error_code=str(r.error_code),
216
+ )
217
+
218
  # attach stage trace
219
  if getattr(r, "trace", None):
220
  traces.append(r.trace.__dict__)
 
234
  # stage failed → check repair availability
235
  eligible, reason = self._should_repair(stage_name, r)
236
  if not eligible:
237
+ self.metrics.inc_repair_attempt(stage="verifier", outcome="skipped")
238
  # annotate latest stage trace entry
239
  if traces and isinstance(traces[-1], dict):
240
  notes = traces[-1].get("notes") or {}
 
253
  repair_args = repair_input_builder(r, kwargs)
254
 
255
  # --- 3) Run repair (always logged) ---
256
+ self.metrics.inc_repair_trigger(stage=stage_name, reason=reason)
257
+ self.metrics.inc_repair_attempt(stage="verifier", outcome="attempt")
258
  t1 = time.perf_counter()
259
  r_fix = self._safe_stage(self.repair.run, **repair_args)
260
  dt_fix = (time.perf_counter() - t1) * 1000.0
 
274
  )
275
 
276
  if not r_fix.ok:
277
+ self.metrics.inc_repair_attempt(stage="verifier", outcome="failed")
278
  return r # repair itself failed → stop here
279
 
280
  # --- 4) Only inject SQL if the stage is an SQL-producing stage ---
 
284
 
285
  # important: success metric must reflect if repair was applied meaningfully
286
  if stage_name in self.SQL_REPAIR_STAGES:
287
+ self.metrics.inc_repair_attempt(stage="verifier", outcome="success")
288
  else:
289
  # log-only mode counts as a success-attempt but not semantic success
290
+ self.metrics.inc_repair_attempt(stage="verifier", outcome="success")
291
 
292
  # for SQL stages, we re-run the stage again with modified kwargs
293
  # for log-only stages, this simply loops and stage is re-run unchanged
 
631
  ),
632
  )
633
  if eligible:
634
+ self.metrics.inc_repair_trigger(stage="verifier", reason=_reason)
635
  # Prefer the real verifier message if present (tests expect this).
636
  err_list = (r_ver.error if (r_ver and r_ver.error) else None) or []
637
  error_msg = (
 
660
  "schema_preview": schema_for_llm,
661
  }
662
 
663
+ self.metrics.inc_repair_attempt(stage="verifier", outcome="attempt")
664
  r_rep = self.repair.run(**rep_kwargs)
665
 
666
  new_sql = (
 
722
  verified = bool(data2.get("verified") is True)
723
 
724
  if verified:
725
+ self.metrics.inc_repair_attempt(
726
+ stage="verifier", outcome="success"
727
+ )
728
  else:
729
+ self.metrics.inc_repair_attempt(
730
+ stage="verifier", outcome="failed"
731
+ )
732
  else:
733
+ self.metrics.inc_repair_attempt(stage="verifier", outcome="skipped")
734
 
735
  # --- 8) optional soft auto-verify (executor success, no details) --- (executor success, no details) ---
736
  if (verified is None or not verified) and not details:
nl2sql/repair.py CHANGED
@@ -1,6 +1,5 @@
1
  import time
2
 
3
-
4
  from nl2sql.types import StageTrace, StageResult
5
  from adapters.llm.base import LLMProvider
6
 
@@ -21,7 +20,7 @@ class Repair:
21
  def __init__(self, llm: LLMProvider):
22
  self.llm = llm
23
 
24
- def run(self, sql: str, error_msg: str, schema_preview: str) -> StageResult:
25
  t0 = time.perf_counter()
26
  fixed_sql, t_in, t_out, cost = self.llm.repair(
27
  sql=sql,
@@ -36,4 +35,9 @@ class Repair:
36
  cost_usd=cost,
37
  notes={"old_sql_len": len(sql), "new_sql_len": len(fixed_sql)},
38
  )
39
- return StageResult(ok=True, data={"sql": fixed_sql}, trace=trace)
 
 
 
 
 
 
1
  import time
2
 
 
3
  from nl2sql.types import StageTrace, StageResult
4
  from adapters.llm.base import LLMProvider
5
 
 
20
  def __init__(self, llm: LLMProvider):
21
  self.llm = llm
22
 
23
+ def run(self, *, sql: str, error_msg: str, schema_preview: str) -> StageResult:
24
  t0 = time.perf_counter()
25
  fixed_sql, t_in, t_out, cost = self.llm.repair(
26
  sql=sql,
 
35
  cost_usd=cost,
36
  notes={"old_sql_len": len(sql), "new_sql_len": len(fixed_sql)},
37
  )
38
+
39
+ return StageResult(
40
+ ok=True,
41
+ data={"sql": fixed_sql},
42
+ trace=trace,
43
+ )