| { |
| "cases": [ |
| { |
| "name": "long-context-release", |
| "message": "Balstoties uz iepriekšējo sarunu, nosauc nākamos divus prioritāros soļus.", |
| "history": [ |
| { |
| "role": "user", |
| "content": "Mēs būvējam incident response roadmap lielai komandai." |
| }, |
| { |
| "role": "assistant", |
| "content": "Tu gribi prioritizēt alerting, ownership un postmortem procesu." |
| } |
| ], |
| "expected_terms": ["alerting", "ownership"], |
| "reference_facts": ["alerting", "ownership"], |
| "category": "long_context", |
| "level": "release", |
| "difficulty": "hard", |
| "tags": ["memory", "continuity"] |
| }, |
| { |
| "name": "helpfulness-release", |
| "message": "Palīdzi man izlemt, ko darīt tālāk, ja benchmark score krīt zem gate sliekšņa.", |
| "expected_terms": ["benchmark", "sliekšņa"], |
| "reference_facts": ["benchmark", "slieksnis"], |
| "category": "helpfulness", |
| "level": "release", |
| "difficulty": "hard", |
| "tags": ["release-gate", "operations"] |
| } |
| ] |
| } |
|
|