VoiceDirector / config /evaluation_contract.json
dsa2dsads's picture
demo: package VoiceDirector stage-1 integration app
c0c4a30 verified
{
"route_id": "R-001",
"internal_benchmark_only": true,
"evaluation_constraints": {
"h002_precision_scope": "Any H-002 precision claim must stay inside the internal benchmark built from workspace/02_research/live_commerce_eval_scripts-20260401-2334.csv and later derived sanity controls.",
"h004_formal_validation_phase": "stakeholder_demo"
},
"h003_mos": {
"threshold": 3.0,
"scale_max": 5,
"minimum_raters": 3,
"rubric": [
{
"score": 1,
"label": "unusable",
"description": "Speech quality is distracting or confusing enough that even internal rehearsal should not rely on it."
},
{
"score": 2,
"label": "weak",
"description": "Meaning is understandable, but naturalness is too low for a credible demo turn."
},
{
"score": 3,
"label": "acceptable",
"description": "Speech is understandable and acceptable for a controlled internal demo despite obvious synthetic artifacts."
},
{
"score": 4,
"label": "good",
"description": "Speech is natural enough for stakeholder rehearsal with only minor synthetic artifacts."
},
{
"score": 5,
"label": "strong",
"description": "Speech is consistently natural, clear, and production-like for the target use case."
}
]
}
}