github-actions[bot] commited on
Commit
562f213
·
1 Parent(s): c743376

Sync from GitHub main @ d3788163c1b28737c76fe3930fe9f123a0a2d084

Browse files
adapters/metrics/__init__.py ADDED
File without changes
adapters/metrics/base.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Protocol
4
+
5
+
6
+ class Metrics(Protocol):
7
+ def observe_stage_duration_ms(self, *, stage: str, dt_ms: float) -> None: ...
8
+
9
+ def inc_pipeline_run(self, *, status: str) -> None: ...
10
+
11
+ def inc_repair_attempt(self, *, outcome: str) -> None: ...
adapters/metrics/noop.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from adapters.metrics.base import Metrics
4
+
5
+
6
+ class NoOpMetrics(Metrics):
7
+ def observe_stage_duration_ms(self, *, stage: str, dt_ms: float) -> None:
8
+ return
9
+
10
+ def inc_pipeline_run(self, *, status: str) -> None:
11
+ return
12
+
13
+ def inc_repair_attempt(self, *, outcome: str) -> None:
14
+ return
adapters/metrics/prometheus.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from adapters.metrics.base import Metrics
4
+ from nl2sql.metrics import stage_duration_ms, pipeline_runs_total, repair_attempts_total
5
+
6
+
7
+ class PrometheusMetrics(Metrics):
8
+ def observe_stage_duration_ms(self, *, stage: str, dt_ms: float) -> None:
9
+ stage_duration_ms.labels(stage).observe(dt_ms)
10
+
11
+ def inc_pipeline_run(self, *, status: str) -> None:
12
+ pipeline_runs_total.labels(status=status).inc()
13
+
14
+ def inc_repair_attempt(self, *, outcome: str) -> None:
15
+ # outcome: attempt | success | failed | skipped
16
+ repair_attempts_total.labels(outcome=outcome).inc()
nl2sql/pipeline.py CHANGED
@@ -15,7 +15,8 @@ from nl2sql.executor import Executor
15
  from nl2sql.verifier import Verifier
16
  from nl2sql.repair import Repair
17
  from nl2sql.stubs import NoOpExecutor, NoOpRepair, NoOpVerifier
18
- from nl2sql.metrics import stage_duration_ms, pipeline_runs_total, repair_attempts_total
 
19
  from nl2sql.errors.codes import ErrorCode
20
  from nl2sql.context_engineering.render import render_schema_pack
21
  from nl2sql.context_engineering.engineer import ContextEngineer
@@ -58,6 +59,7 @@ class Pipeline:
58
  verifier: Optional[Verifier] = None,
59
  repair: Optional[Repair] = None,
60
  context_engineer: ContextEngineer | None = None,
 
61
  ):
62
  self.detector = detector
63
  self.planner = planner
@@ -69,6 +71,7 @@ class Pipeline:
69
  # If the verifier explicitly requires verification, enforce it in finalize.
70
  self.require_verification = bool(getattr(self.verifier, "required", False))
71
  self.context_engineer = context_engineer
 
72
 
73
  # ---------------------------- helpers ----------------------------
74
  @staticmethod
@@ -203,7 +206,7 @@ class Pipeline:
203
  r = self._safe_stage(fn, **kwargs)
204
  dt = (time.perf_counter() - t0) * 1000.0
205
 
206
- stage_duration_ms.labels(stage_name).observe(dt)
207
 
208
  # attach stage trace
209
  if getattr(r, "trace", None):
@@ -224,7 +227,7 @@ class Pipeline:
224
  # stage failed → check repair availability
225
  eligible, reason = self._should_repair(stage_name, r)
226
  if not eligible:
227
- repair_attempts_total.labels(outcome="skipped").inc()
228
  # annotate latest stage trace entry
229
  if traces and isinstance(traces[-1], dict):
230
  notes = traces[-1].get("notes") or {}
@@ -243,12 +246,12 @@ class Pipeline:
243
  repair_args = repair_input_builder(r, kwargs)
244
 
245
  # --- 3) Run repair (always logged) ---
246
- repair_attempts_total.labels(outcome="attempt").inc()
247
  t1 = time.perf_counter()
248
  r_fix = self._safe_stage(self.repair.run, **repair_args)
249
  dt_fix = (time.perf_counter() - t1) * 1000.0
250
 
251
- stage_duration_ms.labels("repair").observe(dt_fix)
252
 
253
  if getattr(r_fix, "trace", None):
254
  traces.append(r_fix.trace.__dict__)
@@ -263,7 +266,7 @@ class Pipeline:
263
  )
264
 
265
  if not r_fix.ok:
266
- repair_attempts_total.labels(outcome="failed").inc()
267
  return r # repair itself failed → stop here
268
 
269
  # --- 4) Only inject SQL if the stage is an SQL-producing stage ---
@@ -273,10 +276,10 @@ class Pipeline:
273
 
274
  # important: success metric must reflect if repair was applied meaningfully
275
  if stage_name in self.SQL_REPAIR_STAGES:
276
- repair_attempts_total.labels(outcome="success").inc()
277
  else:
278
  # log-only mode counts as a success-attempt but not semantic success
279
- repair_attempts_total.labels(outcome="success").inc()
280
 
281
  # for SQL stages, we re-run the stage again with modified kwargs
282
  # for log-only stages, this simply loops and stage is re-run unchanged
@@ -383,7 +386,7 @@ class Pipeline:
383
  questions = self.detector.detect(user_query, schema_preview)
384
  dt = (time.perf_counter() - t0) * 1000.0
385
  is_amb = bool(questions)
386
- stage_duration_ms.labels("detector").observe(dt)
387
  traces.append(
388
  self._mk_trace(
389
  stage="detector",
@@ -393,7 +396,7 @@ class Pipeline:
393
  )
394
  )
395
  if questions:
396
- pipeline_runs_total.labels(status="ambiguous").inc()
397
  return FinalResult(
398
  ok=True,
399
  ambiguous=True,
@@ -428,7 +431,7 @@ class Pipeline:
428
  **planner_kwargs,
429
  )
430
  if not r_plan.ok:
431
- pipeline_runs_total.labels(status="error").inc()
432
  return FinalResult(
433
  ok=False,
434
  ambiguous=False,
@@ -466,7 +469,7 @@ class Pipeline:
466
  **gen_kwargs,
467
  )
468
  if not r_gen.ok:
469
- pipeline_runs_total.labels(status="error").inc()
470
  return FinalResult(
471
  ok=False,
472
  ambiguous=False,
@@ -512,7 +515,7 @@ class Pipeline:
512
 
513
  # Guard: empty SQL
514
  if not sql or not str(sql).strip():
515
- pipeline_runs_total.labels(status="error").inc()
516
  traces.append(
517
  self._mk_trace("generator", 0.0, "failed", {"reason": "empty_sql"})
518
  )
@@ -540,7 +543,7 @@ class Pipeline:
540
  traces=traces,
541
  )
542
  if not r_safe.ok:
543
- pipeline_runs_total.labels(status="error").inc()
544
  return FinalResult(
545
  ok=False,
546
  ambiguous=False,
@@ -588,7 +591,7 @@ class Pipeline:
588
  traces=traces,
589
  )
590
  dt = (time.perf_counter() - t0) * 1000.0
591
- stage_duration_ms.labels("verifier").observe(dt)
592
 
593
  # Attach a trace entry if verifier didn't provide one
594
  if getattr(r_ver, "trace", None):
@@ -648,7 +651,7 @@ class Pipeline:
648
  "schema_preview": schema_for_llm,
649
  }
650
 
651
- repair_attempts_total.labels(outcome="attempt").inc()
652
  r_rep = self.repair.run(**rep_kwargs)
653
 
654
  new_sql = (
@@ -670,7 +673,7 @@ class Pipeline:
670
  traces=traces,
671
  )
672
  if not r_safe2.ok:
673
- pipeline_runs_total.labels(status="error").inc()
674
  return FinalResult(
675
  ok=False,
676
  ambiguous=False,
@@ -710,11 +713,11 @@ class Pipeline:
710
  verified = bool(data2.get("verified") is True)
711
 
712
  if verified:
713
- repair_attempts_total.labels(outcome="success").inc()
714
  else:
715
- repair_attempts_total.labels(outcome="failed").inc()
716
  else:
717
- repair_attempts_total.labels(outcome="skipped").inc()
718
 
719
  # --- 8) optional soft auto-verify (executor success, no details) --- (executor success, no details) ---
720
  if (verified is None or not verified) and not details:
@@ -753,7 +756,7 @@ class Pipeline:
753
  else:
754
  verified_final = bool(verified)
755
 
756
- pipeline_runs_total.labels(status=("ok" if ok else "error")).inc()
757
 
758
  traces.append(
759
  self._mk_trace(
@@ -782,12 +785,12 @@ class Pipeline:
782
  )
783
 
784
  except Exception:
785
- pipeline_runs_total.labels(status="error").inc()
786
  # bubble up to make failures visible in tests and logs
787
  raise
788
 
789
  finally:
790
  # Always record total latency, even on early return/exception
791
- stage_duration_ms.labels("pipeline_total").observe(
792
- (time.perf_counter() - t_all0) * 1000.0
793
  )
 
15
  from nl2sql.verifier import Verifier
16
  from nl2sql.repair import Repair
17
  from nl2sql.stubs import NoOpExecutor, NoOpRepair, NoOpVerifier
18
+ from adapters.metrics.base import Metrics
19
+ from adapters.metrics.noop import NoOpMetrics
20
  from nl2sql.errors.codes import ErrorCode
21
  from nl2sql.context_engineering.render import render_schema_pack
22
  from nl2sql.context_engineering.engineer import ContextEngineer
 
59
  verifier: Optional[Verifier] = None,
60
  repair: Optional[Repair] = None,
61
  context_engineer: ContextEngineer | None = None,
62
+ metrics: Metrics | None = None,
63
  ):
64
  self.detector = detector
65
  self.planner = planner
 
71
  # If the verifier explicitly requires verification, enforce it in finalize.
72
  self.require_verification = bool(getattr(self.verifier, "required", False))
73
  self.context_engineer = context_engineer
74
+ self.metrics: Metrics = metrics or NoOpMetrics()
75
 
76
  # ---------------------------- helpers ----------------------------
77
  @staticmethod
 
206
  r = self._safe_stage(fn, **kwargs)
207
  dt = (time.perf_counter() - t0) * 1000.0
208
 
209
+ self.metrics.observe_stage_duration_ms(stage=stage_name, dt_ms=dt)
210
 
211
  # attach stage trace
212
  if getattr(r, "trace", None):
 
227
  # stage failed → check repair availability
228
  eligible, reason = self._should_repair(stage_name, r)
229
  if not eligible:
230
+ self.metrics.inc_repair_attempt(outcome="skipped")
231
  # annotate latest stage trace entry
232
  if traces and isinstance(traces[-1], dict):
233
  notes = traces[-1].get("notes") or {}
 
246
  repair_args = repair_input_builder(r, kwargs)
247
 
248
  # --- 3) Run repair (always logged) ---
249
+ self.metrics.inc_repair_attempt(outcome="attempt")
250
  t1 = time.perf_counter()
251
  r_fix = self._safe_stage(self.repair.run, **repair_args)
252
  dt_fix = (time.perf_counter() - t1) * 1000.0
253
 
254
+ self.metrics.observe_stage_duration_ms(stage="repair", dt_ms=dt_fix)
255
 
256
  if getattr(r_fix, "trace", None):
257
  traces.append(r_fix.trace.__dict__)
 
266
  )
267
 
268
  if not r_fix.ok:
269
+ self.metrics.inc_repair_attempt(outcome="failed")
270
  return r # repair itself failed → stop here
271
 
272
  # --- 4) Only inject SQL if the stage is an SQL-producing stage ---
 
276
 
277
  # important: success metric must reflect if repair was applied meaningfully
278
  if stage_name in self.SQL_REPAIR_STAGES:
279
+ self.metrics.inc_repair_attempt(outcome="success")
280
  else:
281
  # log-only mode counts as a success-attempt but not semantic success
282
+ self.metrics.inc_repair_attempt(outcome="success")
283
 
284
  # for SQL stages, we re-run the stage again with modified kwargs
285
  # for log-only stages, this simply loops and stage is re-run unchanged
 
386
  questions = self.detector.detect(user_query, schema_preview)
387
  dt = (time.perf_counter() - t0) * 1000.0
388
  is_amb = bool(questions)
389
+ self.metrics.observe_stage_duration_ms(stage="detector", dt_ms=dt)
390
  traces.append(
391
  self._mk_trace(
392
  stage="detector",
 
396
  )
397
  )
398
  if questions:
399
+ self.metrics.inc_pipeline_run(status="ambiguous")
400
  return FinalResult(
401
  ok=True,
402
  ambiguous=True,
 
431
  **planner_kwargs,
432
  )
433
  if not r_plan.ok:
434
+ self.metrics.inc_pipeline_run(status="error")
435
  return FinalResult(
436
  ok=False,
437
  ambiguous=False,
 
469
  **gen_kwargs,
470
  )
471
  if not r_gen.ok:
472
+ self.metrics.inc_pipeline_run(status="error")
473
  return FinalResult(
474
  ok=False,
475
  ambiguous=False,
 
515
 
516
  # Guard: empty SQL
517
  if not sql or not str(sql).strip():
518
+ self.metrics.inc_pipeline_run(status="error")
519
  traces.append(
520
  self._mk_trace("generator", 0.0, "failed", {"reason": "empty_sql"})
521
  )
 
543
  traces=traces,
544
  )
545
  if not r_safe.ok:
546
+ self.metrics.inc_pipeline_run(status="error")
547
  return FinalResult(
548
  ok=False,
549
  ambiguous=False,
 
591
  traces=traces,
592
  )
593
  dt = (time.perf_counter() - t0) * 1000.0
594
+ self.metrics.observe_stage_duration_ms(stage="verifier", dt_ms=dt)
595
 
596
  # Attach a trace entry if verifier didn't provide one
597
  if getattr(r_ver, "trace", None):
 
651
  "schema_preview": schema_for_llm,
652
  }
653
 
654
+ self.metrics.inc_repair_attempt(outcome="attempt")
655
  r_rep = self.repair.run(**rep_kwargs)
656
 
657
  new_sql = (
 
673
  traces=traces,
674
  )
675
  if not r_safe2.ok:
676
+ self.metrics.inc_pipeline_run(status="error")
677
  return FinalResult(
678
  ok=False,
679
  ambiguous=False,
 
713
  verified = bool(data2.get("verified") is True)
714
 
715
  if verified:
716
+ self.metrics.inc_repair_attempt(outcome="success")
717
  else:
718
+ self.metrics.inc_repair_attempt(outcome="failed")
719
  else:
720
+ self.metrics.inc_repair_attempt(outcome="skipped")
721
 
722
  # --- 8) optional soft auto-verify (executor success, no details) --- (executor success, no details) ---
723
  if (verified is None or not verified) and not details:
 
756
  else:
757
  verified_final = bool(verified)
758
 
759
+ self.metrics.inc_pipeline_run(status=("ok" if ok else "error"))
760
 
761
  traces.append(
762
  self._mk_trace(
 
785
  )
786
 
787
  except Exception:
788
+ self.metrics.inc_pipeline_run(status="error")
789
  # bubble up to make failures visible in tests and logs
790
  raise
791
 
792
  finally:
793
  # Always record total latency, even on early return/exception
794
+ self.metrics.observe_stage_duration_ms(
795
+ stage="pipeline_total", dt_ms=(time.perf_counter() - t_all0) * 1000.0
796
  )
nl2sql/pipeline_factory.py CHANGED
@@ -12,6 +12,9 @@ except Exception:
12
  pass
13
 
14
  from nl2sql.pipeline import Pipeline
 
 
 
15
  from nl2sql.registry import (
16
  DETECTORS,
17
  PLANNERS,
@@ -67,6 +70,13 @@ def _is_pytest() -> bool:
67
  return bool(os.getenv("PYTEST_CURRENT_TEST"))
68
 
69
 
 
 
 
 
 
 
 
70
  def _tr(
71
  stage: str,
72
  *,
@@ -214,6 +224,7 @@ def pipeline_from_config(path: str) -> Pipeline:
214
  verifier=verifier,
215
  repair=repair,
216
  context_engineer=context_engineer,
 
217
  )
218
 
219
 
@@ -327,4 +338,5 @@ def pipeline_from_config_with_adapter(path: str, *, adapter: DBAdapter) -> Pipel
327
  executor=executor,
328
  verifier=verifier,
329
  repair=repair,
 
330
  )
 
12
  pass
13
 
14
  from nl2sql.pipeline import Pipeline
15
+ from adapters.metrics.base import Metrics
16
+ from adapters.metrics.noop import NoOpMetrics
17
+ from adapters.metrics.prometheus import PrometheusMetrics
18
  from nl2sql.registry import (
19
  DETECTORS,
20
  PLANNERS,
 
70
  return bool(os.getenv("PYTEST_CURRENT_TEST"))
71
 
72
 
73
+ def _make_metrics() -> Metrics:
74
+ # Under pytest, keep metrics side-effect free.
75
+ if _is_pytest():
76
+ return NoOpMetrics()
77
+ return PrometheusMetrics()
78
+
79
+
80
  def _tr(
81
  stage: str,
82
  *,
 
224
  verifier=verifier,
225
  repair=repair,
226
  context_engineer=context_engineer,
227
+ metrics=_make_metrics(),
228
  )
229
 
230
 
 
338
  executor=executor,
339
  verifier=verifier,
340
  repair=repair,
341
+ metrics=_make_metrics(),
342
  )