{ "name": "Model routing", "objective": "Keep agent work on the right model for each task by routing on measured quality, latency, privacy, and cost rather than pinning everything to one model.", "trigger": { "type": "scheduled", "cadence_or_event": "Daily or weekly review; also when a new model ships or a cost or latency threshold is crossed." }, "intake": { "sources": ["per-task telemetry", "routing policy", "eval results", "model pricing and availability"], "selection_rule": "Review task classes whose quality, latency, or cost crosses a threshold; propose the smallest change that preserves quality and privacy." }, "workspace": { "isolation": "Read access to telemetry, eval results, pricing, and the current routing policy; offline replay only.", "allowed_actions": ["cluster tasks by class", "measure per-model quality, latency, and cost", "run offline replays", "open a routing-policy proposal"], "disallowed_actions": ["change production routing", "move a task to a non-approved model", "cross a privacy tier without review"] }, "context": { "required_files": ["routing policy", "privacy and data-residency rules", "eval baselines"], "runtime_sources": ["recent traces", "cost and latency dashboards", "model availability and pricing"] }, "agents": [ { "role": "Analyst", "responsibility": "Cluster tasks by class and measure quality, latency, and cost per model." }, { "role": "Proposer", "responsibility": "Suggest routing changes such as cheaper models for easy classes or fallbacks for hard ones." }, { "role": "Verifier", "responsibility": "Replay a representative sample on the proposed routing to confirm quality holds." }, { "role": "Reporter", "responsibility": "Record the proposed policy, evidence, and privacy constraints checked." } ], "verification": { "gates": ["proposed routes are replayed on a representative sample", "quality stays within tolerance of baseline", "privacy and residency constraints are checked per rerouted class", "cost and latency deltas include sample size and variance"], "receipts": ["task-class metrics", "proposed routes", "replay results", "privacy checks"] }, "state": { "artifacts": ["routing-policy proposal", "routing decision log"], "update_rule": "Persist task-class metrics, proposals, replay evidence, and privacy checks so changes are auditable and reversible." }, "budget": { "max_retries": 2, "max_runtime_minutes": 120 }, "escalation": { "conditions": ["quality-versus-cost tradeoff", "privacy-tier change", "data-residency question", "model deprecation forcing migration", "customer-impacting latency change"], "destination": "Workflow owner via the routing-policy proposal" }, "exit": { "success": "A replay-verified routing proposal preserves quality and privacy, or current routing is already optimal.", "stop_without_success": "A quality, cost, or privacy tradeoff needs owner approval." } }