cowWhySo's picture
Add fine-tuned classifier checkpoint
0602a1d verified
{
"schema_version": "toolcall-verifier-thresholds/v1",
"mode": "shadow",
"default_action": "allow",
"temperature": 1.0037251710891724,
"notes": [
"Deterministic guardrails remain authoritative.",
"Use ML in shadow mode first, then advisory nudges, then high-confidence enforcement only after eval proof.",
"deterministic_invalid is never enforced by ML in this default config.",
"wrong_tool_semantic stays conservative because current Forge telemetry showed high-confidence false positives on valid terminal/summarize calls."
],
"labels": {
"valid": {
"action": "allow",
"advisory_min_confidence": 0.0,
"enforce_min_confidence": 1.01
},
"wrong_tool_semantic": {
"action": "advisory_then_enforce_after_eval",
"advisory_min_confidence": 1.01,
"enforce_min_confidence": 1.01
},
"wrong_arguments_semantic": {
"action": "advisory_then_enforce_after_eval",
"advisory_min_confidence": 0.9,
"enforce_min_confidence": 0.995
},
"tool_not_needed": {
"action": "advisory_then_enforce_after_eval",
"advisory_min_confidence": 0.8,
"enforce_min_confidence": 0.95
},
"needs_clarification": {
"action": "advisory_then_enforce_after_eval",
"advisory_min_confidence": 1.01,
"enforce_min_confidence": 1.01
},
"deterministic_invalid": {
"action": "deterministic_only",
"advisory_min_confidence": 1.01,
"enforce_min_confidence": 1.01
}
}
}