{
  "route_id": "R-001",
  "internal_benchmark_only": true,
  "evaluation_constraints": {
    "h002_precision_scope": "Any H-002 precision claim must stay inside the internal benchmark built from workspace/02_research/live_commerce_eval_scripts-20260401-2334.csv and later derived sanity controls.",
    "h004_formal_validation_phase": "stakeholder_demo"
  },
  "h003_mos": {
    "threshold": 3.0,
    "scale_max": 5,
    "minimum_raters": 3,
    "rubric": [
      {
        "score": 1,
        "label": "unusable",
        "description": "Speech quality is distracting or confusing enough that even internal rehearsal should not rely on it."
      },
      {
        "score": 2,
        "label": "weak",
        "description": "Meaning is understandable, but naturalness is too low for a credible demo turn."
      },
      {
        "score": 3,
        "label": "acceptable",
        "description": "Speech is understandable and acceptable for a controlled internal demo despite obvious synthetic artifacts."
      },
      {
        "score": 4,
        "label": "good",
        "description": "Speech is natural enough for stakeholder rehearsal with only minor synthetic artifacts."
      },
      {
        "score": 5,
        "label": "strong",
        "description": "Speech is consistently natural, clear, and production-like for the target use case."
      }
    ]
  }
}