Spaces:
Sleeping
Sleeping
File size: 3,734 Bytes
c24bfe8 1c4cb86 666306b 1c4cb86 c24bfe8 f89e294 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from prometheus_client import Counter, Histogram
from nl2sql.prom import REGISTRY
# -----------------------------------------------------------------------------
# Stage-level metrics
# -----------------------------------------------------------------------------
stage_duration_ms = Histogram(
"stage_duration_ms",
"Duration (ms) of each pipeline stage",
["stage"], # e.g. detector|planner|generator|safety|verifier
buckets=(1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000),
registry=REGISTRY,
)
# -----------------------------------------------------------------------------
# Safety stage metrics
# -----------------------------------------------------------------------------
safety_blocks_total = Counter(
"safety_blocks_total",
"Count of blocked SQL queries by safety checks",
[
"reason"
], # e.g. forbidden_keyword, multiple_statements, non_readonly, explain_not_allowed
registry=REGISTRY,
)
safety_checks_total = Counter(
"safety_checks_total",
"Total SQL queries checked by safety",
["ok"], # "true" or "false"
registry=REGISTRY,
)
# -----------------------------------------------------------------------------
# Verifier stage metrics
# -----------------------------------------------------------------------------
verifier_checks_total = Counter(
"verifier_checks_total",
"Count of verifier checks (success/failure)",
["ok"], # "true" | "false"
registry=REGISTRY,
)
verifier_failures_total = Counter(
"verifier_failures_total",
"Count of verifier failures by type",
["reason"], # e.g. parse_error, semantic_check_error, adapter_failure
registry=REGISTRY,
)
# -----------------------------------------------------------------------------
# Repair stage metrics
# -----------------------------------------------------------------------------
repair_attempts_total = Counter(
"repair_attempts_total",
"Number of repair loop attempts",
["outcome"], # attempt | success | failed
registry=REGISTRY,
)
# -----------------------------------------------------------------------------
# Pipeline-level metrics
# -----------------------------------------------------------------------------
pipeline_runs_total = Counter(
"pipeline_runs_total",
"Total number of full pipeline runs",
["status"], # ok | error | ambiguous
registry=REGISTRY,
)
# -----------------------------------------------------------------------------
# Cache metrics (optional)
# -----------------------------------------------------------------------------
cache_events_total = Counter(
"cache_events_total",
"Cache hit/miss events in the pipeline",
["hit"], # "true" | "false"
registry=REGISTRY,
)
# -----------------------------------------------------------------------------
# Prime all counters with zero to ensure Grafana panels always have data
# -----------------------------------------------------------------------------
for reason in (
"forbidden_keyword",
"multiple_statements",
"non_readonly",
"explain_not_allowed",
"parse_error",
"semantic_check_error",
"adapter_failure",
"unsafe-sql",
"malformed-sql",
"unknown",
):
safety_blocks_total.labels(reason=reason).inc(0)
verifier_failures_total.labels(reason=reason).inc(0)
for ok in ("true", "false"):
safety_checks_total.labels(ok=ok).inc(0)
verifier_checks_total.labels(ok=ok).inc(0)
for outcome in ("attempt", "success", "failed"):
repair_attempts_total.labels(outcome=outcome).inc(0)
for status in ("ok", "error", "ambiguous"):
pipeline_runs_total.labels(status=status).inc(0)
for hit in ("true", "false"):
cache_events_total.labels(hit=hit).inc(0)
|