File size: 3,066 Bytes
9ec4919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
  "name": "Model routing",
  "objective": "Keep agent work on the right model for each task by routing on measured quality, latency, privacy, and cost rather than pinning everything to one model.",
  "trigger": {
    "type": "scheduled",
    "cadence_or_event": "Daily or weekly review; also when a new model ships or a cost or latency threshold is crossed."
  },
  "intake": {
    "sources": ["per-task telemetry", "routing policy", "eval results", "model pricing and availability"],
    "selection_rule": "Review task classes whose quality, latency, or cost crosses a threshold; propose the smallest change that preserves quality and privacy."
  },
  "workspace": {
    "isolation": "Read access to telemetry, eval results, pricing, and the current routing policy; offline replay only.",
    "allowed_actions": ["cluster tasks by class", "measure per-model quality, latency, and cost", "run offline replays", "open a routing-policy proposal"],
    "disallowed_actions": ["change production routing", "move a task to a non-approved model", "cross a privacy tier without review"]
  },
  "context": {
    "required_files": ["routing policy", "privacy and data-residency rules", "eval baselines"],
    "runtime_sources": ["recent traces", "cost and latency dashboards", "model availability and pricing"]
  },
  "agents": [
    {
      "role": "Analyst",
      "responsibility": "Cluster tasks by class and measure quality, latency, and cost per model."
    },
    {
      "role": "Proposer",
      "responsibility": "Suggest routing changes such as cheaper models for easy classes or fallbacks for hard ones."
    },
    {
      "role": "Verifier",
      "responsibility": "Replay a representative sample on the proposed routing to confirm quality holds."
    },
    {
      "role": "Reporter",
      "responsibility": "Record the proposed policy, evidence, and privacy constraints checked."
    }
  ],
  "verification": {
    "gates": ["proposed routes are replayed on a representative sample", "quality stays within tolerance of baseline", "privacy and residency constraints are checked per rerouted class", "cost and latency deltas include sample size and variance"],
    "receipts": ["task-class metrics", "proposed routes", "replay results", "privacy checks"]
  },
  "state": {
    "artifacts": ["routing-policy proposal", "routing decision log"],
    "update_rule": "Persist task-class metrics, proposals, replay evidence, and privacy checks so changes are auditable and reversible."
  },
  "budget": {
    "max_retries": 2,
    "max_runtime_minutes": 120
  },
  "escalation": {
    "conditions": ["quality-versus-cost tradeoff", "privacy-tier change", "data-residency question", "model deprecation forcing migration", "customer-impacting latency change"],
    "destination": "Workflow owner via the routing-policy proposal"
  },
  "exit": {
    "success": "A replay-verified routing proposal preserves quality and privacy, or current routing is already optimal.",
    "stop_without_success": "A quality, cost, or privacy tradeoff needs owner approval."
  }
}