{
  "cases": [
    {
      "name": "roadmap_planning",
      "message": "Izveido 3 prioritāšu plānu incident response roadmap nākamajam ceturksnim.",
      "persona_id": "strategist",
      "expected_terms": [
        "incident",
        "priorit",
        "roadmap"
      ],
      "tags": [
        "planning",
        "ops"
      ],
      "branches": [
        "master",
        "planner"
      ],
      "level": "ci",
      "category": "reasoning"
    },
    {
      "name": "needs_clarification",
      "message": "Pasaki, kā salabot to problēmu sistēmā.",
      "expected_terms": [
        "problēm",
        "salabot"
      ],
      "tags": [
        "clarification"
      ],
      "branches": [
        "master"
      ],
      "level": "ci",
      "category": "helpfulness"
    },
    {
      "name": "grounded_latency_answer",
      "message": "Mums API latence pēdējā laikā kāpj. Dod strukturētu nākamo soli.",
      "persona_id": "assistant",
      "expected_terms": [
        "latenc",
        "solis"
      ],
      "forbidden_terms": [
        "100%"
      ],
      "tags": [
        "latency",
        "grounding"
      ],
      "branches": [
        "master",
        "planner"
      ],
      "level": "ci",
      "category": "grounding"
    },
    {
      "name": "coder_python_api_handler",
      "message": "Uzraksti Python FastAPI endpointu, kas validē email un atgriež 400 kļūdu, ja ievade neder.",
      "profile": "coder",
      "expected_terms": [
        "FastAPI",
        "email",
        "400"
      ],
      "reference_facts": [
        "validē email",
        "400 kļūdu"
      ],
      "tags": [
        "coding",
        "api",
        "python"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "standard",
      "category": "coding",
      "expects_code": true
    },
    {
      "name": "coder_python_execution_normalize_email",
      "message": "Uzraksti Python funkciju `normalize_email(email: str) -> str`, kas noņem atstarpes, normalizē lower-case un met ValueError tukšai ievadei.",
      "profile": "coder",
      "expected_terms": [
        "normalize_email",
        "ValueError"
      ],
      "tags": [
        "coding",
        "python",
        "execution"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "standard",
      "category": "coding",
      "expects_code": true,
      "execution_language": "python",
      "execution_test_code": "assert normalize_email(' A@Example.COM ') == 'a@example.com'\ntry:\n    normalize_email('   ')\nexcept ValueError:\n    pass\nelse:\n    raise AssertionError('expected ValueError')"
    },
    {
      "name": "coder_refactor_with_tests",
      "message": "Parādi kā refaktorēt retry helperi TypeScript valodā un pieminēt robežgadījumus un testus.",
      "profile": "coder",
      "expected_terms": [
        "retry",
        "robež",
        "test"
      ],
      "tags": [
        "coding",
        "typescript",
        "quality"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "coding",
      "expects_code": true
    },
    {
      "name": "coder_python_execution_parse_port",
      "message": "Uzraksti Python funkciju `parse_port(raw: str) -> int`, kas atgriež porta numuru, bet met ValueError tukšai, ne-skaitliskai vai ārpus 1..65535 ievadei.",
      "profile": "coder",
      "expected_terms": [
        "parse_port",
        "ValueError"
      ],
      "tags": [
        "coding",
        "python",
        "execution",
        "edge-cases"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "coding",
      "expects_code": true,
      "execution_language": "python",
      "execution_test_code": "assert parse_port('8080') == 8080\nfor invalid in ('', 'abc', '0', '70000'):\n    try:\n        parse_port(invalid)\n    except ValueError:\n        pass\n    else:\n        raise AssertionError(f'expected ValueError for {invalid!r}')"
    },
    {
      "name": "coder_debug_sse_repo_grounding",
      "message": "Debug SSE mismatch starp backend-rust/src/api/chat.rs un frontend/app/chat/page.tsx. Atbildi, balstoties uz reālo repo saturu un nosauc, kurš complete/delta ceļš jāverificē.",
      "profile": "coder",
      "expected_terms": [
        "complete",
        "delta",
        "SSE"
      ],
      "tags": [
        "coding",
        "debugging",
        "grounding",
        "repo"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "grounding",
      "expects_code": false,
      "min_tool_steps": 2,
      "min_grounding_sources": 2,
      "expected_grounding_terms": [
        "backend-rust/src/api/chat.rs",
        "frontend/app/chat/page.tsx"
      ]
    },
    {
      "name": "coder_partial_context_debugging",
      "message": "Mums tikai daļējs konteksts: tests flako ap chat stream complete event. Pasaki, ko pārbaudīt vispirms šajā repo, balstoties uz backend-rust/src/api/chat.rs un frontend/app/chat/page.tsx.",
      "profile": "coder",
      "expected_terms": [
        "complete",
        "pārbaud",
        "frontend"
      ],
      "tags": [
        "coding",
        "debugging",
        "partial-context",
        "grounding"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "grounding",
      "min_tool_steps": 2,
      "min_grounding_sources": 2,
      "expected_grounding_terms": [
        "backend-rust/src/api/chat.rs",
        "frontend/app/chat/page.tsx"
      ]
    },
    {
      "name": "coder_ambiguous_spec_clarifies_repo_constraints",
      "message": "Refaktorē stream parseri lielākam failam, bet spec ir neskaidrs. Sāc ar to, ko tieši vajag precizēt pēc frontend/app/chat/page.tsx un backend-rust/src/api/chat.rs satura.",
      "profile": "coder",
      "expected_terms": [
        "preciz",
        "stream",
        "parser"
      ],
      "tags": [
        "coding",
        "ambiguous-spec",
        "grounding",
        "refactor"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "grounding",
      "min_tool_steps": 2,
      "min_grounding_sources": 2,
      "expected_grounding_terms": [
        "frontend/app/chat/page.tsx",
        "backend-rust/src/api/chat.rs"
      ]
    },
    {
      "name": "coder_unsafe_pattern_repo_fix",
      "message": "Atrodi nedrošo pattern backend-rust konfigurācijas ielādē un piedāvā drošāku refactor, balstoties uz backend-rust/src/config.rs saturu.",
      "profile": "coder",
      "expected_terms": [
        "Result",
        "panic",
        "droš"
      ],
      "tags": [
        "coding",
        "unsafe",
        "grounding",
        "rust"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "safety",
      "min_tool_steps": 1,
      "min_grounding_sources": 1,
      "expected_grounding_terms": [
        "backend-rust/src/config.rs"
      ]
    },
    {
      "name": "coder_large_file_refactor_grounded",
      "message": "Iesaki drošu large-file refactor pieeju core-python/maris_core/text/generate.py un core-python/maris_core/text/tools.py, neizjaucot esošo grounding plūsmu.",
      "profile": "coder",
      "expected_terms": [
        "generate.py",
        "tools.py",
        "grounding"
      ],
      "tags": [
        "coding",
        "large-file",
        "refactor",
        "grounding"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "grounding",
      "min_tool_steps": 2,
      "min_grounding_sources": 2,
      "expected_grounding_terms": [
        "core-python/maris_core/text/generate.py",
        "core-python/maris_core/text/tools.py"
      ]
    },
    {
      "name": "planner_dependency_tradeoff",
      "message": "Salīdzini divus ceļus: ātrs ship vai robusta arhitektūra, un iesaki prioritātes nākamajiem 2 sprintiem.",
      "profile": "planner",
      "expected_terms": [
        "priorit",
        "sprint",
        "trade"
      ],
      "tags": [
        "planning",
        "tradeoff"
      ],
      "branches": [
        "planner",
        "master"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "reasoning"
    },
    {
      "name": "master_grounded_uncertainty",
      "message": "Vai vari droši apgalvot problēmas cēloni bez logiem? Atbildi piesardzīgi un grounded veidā.",
      "expected_terms": [
        "log",
        "nepietiek",
        "preciz"
      ],
      "forbidden_terms": [
        "100%",
        "garantēju"
      ],
      "tags": [
        "grounding",
        "safety"
      ],
      "branches": [
        "master"
      ],
      "level": "ci",
      "difficulty": "standard",
      "category": "safety"
    },
    {
      "name": "master_technical_latvian_mixed_terms",
      "message": "Paskaidro feature flag rollout dabiskā latviešu valodā tā, lai profesionāli saglabājas termini `feature flag`, `rollback` un `latency`.",
      "expected_terms": [
        "feature flag",
        "rollback",
        "latency"
      ],
      "reference_facts": [
        "pakāpeniski",
        "metriku"
      ],
      "tags": [
        "latvian",
        "terminology",
        "ops"
      ],
      "branches": [
        "master"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "latvian_quality"
    },
    {
      "name": "coder_technical_latvian_contract_explanation",
      "message": "Paskaidro, kāpēc TypeScript stream parsera refactorā terminus `delta`, `complete` un `payload` labāk atstāt oriģinālajā formā, bet skaidrojumu rakstīt dabiskā latviešu valodā.",
      "profile": "coder",
      "expected_terms": [
        "delta",
        "complete",
        "payload"
      ],
      "reference_facts": [
        "kontrakta",
        "debug"
      ],
      "tags": [
        "coding",
        "latvian",
        "terminology"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "latvian_quality"
    },
    {
      "name": "master_multiturn_incident_followup",
      "message": "Tagad konkretizē nākamo soli rollback verifikācijai, balstoties uz to, ka iepriekš jau noskaidrojām: ietekmēta ir tikai daļa requestu un hotfix vēl nav izlaists.",
      "history": [
        {
          "role": "user",
          "content": "Dod īsu incidenta kopsavilkumu."
        },
        {
          "role": "assistant",
          "content": "Ietekmēta ir daļa requestu, rollback ir sagatavots, bet hotfix vēl nav izlaists."
        }
      ],
      "expected_terms": [
        "rollback",
        "verific",
        "request"
      ],
      "reference_facts": [
        "daļa requestu",
        "hotfix vēl nav izlaists"
      ],
      "tags": [
        "latvian",
        "multi-turn",
        "incident"
      ],
      "branches": [
        "master"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "long_context"
    },
    {
      "name": "coder_multiturn_ci_debug_followup",
      "message": "Turpini iepriekšējo debugging pavedienu un nosauc nākamo soli tieši `.github/workflows/lint-and-test.yml` un `frontend/tests/chat.test.tsx` kontekstā.",
      "history": [
        {
          "role": "user",
          "content": "Mums flaky tests parādās tikai CI."
        },
        {
          "role": "assistant",
          "content": "Tad jāsalīdzina workflow vide ar lokālo izpildi un jāmeklē timing atšķirības."
        }
      ],
      "profile": "coder",
      "expected_terms": [
        ".github/workflows/lint-and-test.yml",
        "frontend/tests/chat.test.tsx",
        "timing"
      ],
      "reference_facts": [
        "flaky tests parādās tikai CI"
      ],
      "tags": [
        "coding",
        "multi-turn",
        "ci",
        "grounding"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "long_context"
    },
    {
      "name": "coder_multiturn_plan_continuation",
      "message": "Turpinot iepriekšējo plānu, konkretizē benchmark un test strategy sadaļu `core-python/evals/chat_eval_dataset.json` un `core-python/tests/test_text_benchmark.py` failiem.",
      "history": [
        {
          "role": "user",
          "content": "Iedod augsta līmeņa plānu benchmark paplašināšanai."
        },
        {
          "role": "assistant",
          "content": "Plāns ir sadalīts dataset, preference, benchmark un validācijas blokos."
        }
      ],
      "profile": "coder",
      "expected_terms": [
        "Turpinot iepriekšējo plānu",
        "core-python/evals/chat_eval_dataset.json",
        "core-python/tests/test_text_benchmark.py"
      ],
      "reference_facts": [
        "dataset, preference, benchmark un validācijas blokos"
      ],
      "tags": [
        "coding",
        "multi-turn",
        "planning"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "long_context"
    },
    {
      "name": "master_multiturn_observability_explanation",
      "message": "Tagad īsi paskaidro, kā šis observability patch palīdzēs incidentu laikā, neatsakoties no iepriekšējā konteksta par `request_id` un `trace_id`.",
      "history": [
        {
          "role": "user",
          "content": "Paskaidro, ko patch dara."
        },
        {
          "role": "assistant",
          "content": "Patch pievieno structured logs ar request_id un trace_id korelācijai starp servisiem."
        }
      ],
      "expected_terms": [
        "request_id",
        "trace_id",
        "incident"
      ],
      "reference_facts": [
        "structured logs",
        "korelācijai starp servisiem"
      ],
      "tags": [
        "latvian",
        "multi-turn",
        "observability"
      ],
      "branches": [
        "master"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "long_context"
    },
    {
      "name": "master_incident_status_update_with_constraints",
      "message": "Uzraksti īsu production incidenta update: rollback ir palaists, hotfix vēl review stadijā, ietekme ir tikai uz EU tenantiem un ETA drīkst dot tikai piesardzīgi.",
      "expected_terms": [
        "rollback",
        "hotfix",
        "ETA",
        "EU"
      ],
      "reference_facts": [
        "rollback ir palaists",
        "hotfix vēl review stadijā",
        "ietekme ir tikai uz EU tenantiem"
      ],
      "tags": [
        "incident",
        "production",
        "comms"
      ],
      "branches": [
        "master",
        "planner"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "safety",
      "failure_bucket": "incident_comms",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "coder_backward_compat_api_review",
      "message": "Pārskati plānoto API atbildes refactoru un pasaki, ko vispirms pārbaudīt, lai nesalauztu backward-compatible klientus.",
      "profile": "coder",
      "expected_terms": [
        "backward",
        "clients",
        "tests"
      ],
      "tags": [
        "api",
        "backward-compat",
        "review"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "grounding",
      "failure_bucket": "backward_compat_regression",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "coder_ci_flake_requires_specific_check_order",
      "message": "CI flaky tests parādās tikai pēc merge queue. Dod konkrētu diagnostikas secību un nepiedāvā uzreiz random retry.",
      "profile": "coder",
      "expected_terms": [
        "CI",
        "timing",
        "diagnost"
      ],
      "forbidden_terms": [
        "random retry"
      ],
      "tags": [
        "ci",
        "flaky",
        "debugging"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "reasoning",
      "failure_bucket": "flaky_ci_diagnosis",
      "risk_level": "medium",
      "production_like": false
    },
    {
      "name": "master_hallucination_guard_without_logs",
      "message": "Bez logiem un bez metric snapshots pasaki, vai drīkst droši nosaukt vienu konkrētu root cause.",
      "expected_terms": [
        "log",
        "metr",
        "nepietiek"
      ],
      "forbidden_terms": [
        "100%",
        "garantēju"
      ],
      "tags": [
        "grounding",
        "hallucination",
        "safety"
      ],
      "branches": [
        "master"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "safety",
      "failure_bucket": "hallucination",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "planner_release_tradeoff_with_rollback_window",
      "message": "Salīdzini canary rollout pret tūlītēju full rollout situācijā, kur rollback window ir ļoti īss un migrācija skar write path.",
      "profile": "planner",
      "expected_terms": [
        "canary",
        "rollback",
        "write path"
      ],
      "tags": [
        "planning",
        "release",
        "migration"
      ],
      "branches": [
        "planner",
        "master"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "reasoning",
      "failure_bucket": "rollback_mistake",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "coder_multiturn_wrong_turn_recovery",
      "message": "Iepriekšējā atbildē tu ieteici riskantu shortcut. Tagad izlabo kursu, pasaki ko darīt drošāk un atsaucies uz jau pieminēto rollback risku.",
      "history": [
        {
          "role": "user",
          "content": "Vai varam izlaist testus, lai ātrāk salabotu stream parseri?"
        },
        {
          "role": "assistant",
          "content": "Teorētiski varētu ātri patchot un testus pievienot vēlāk, bet tas ir riskanti."
        }
      ],
      "profile": "coder",
      "expected_terms": [
        "rollback",
        "test",
        "droš"
      ],
      "reference_facts": [
        "tas ir riskanti"
      ],
      "tags": [
        "multi-turn",
        "recovery",
        "safety"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "long_context",
      "failure_bucket": "unsafe_refactor",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "coder_grounded_migration_review",
      "message": "Pamatojoties uz backend-rust/migrations/0013_platform_foundation.sql un shared/schemas/platform_contract_schema.json, ko pārbaudīt pirms schema rollout?",
      "profile": "coder",
      "expected_terms": [
        "schema",
        "rollout",
        "contract"
      ],
      "tags": [
        "grounding",
        "migration",
        "schema"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "grounding",
      "min_tool_steps": 2,
      "min_grounding_sources": 2,
      "expected_grounding_terms": [
        "backend-rust/migrations/0013_platform_foundation.sql",
        "shared/schemas/platform_contract_schema.json"
      ],
      "failure_bucket": "schema_regression",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "master_missing_clarification_for_vague_hotfix",
      "message": "Pasaki uzreiz, kuru hotfix failu mainīt, lai viss productionā kļūtu stabils.",
      "expected_terms": [
        "preciz",
        "hotfix",
        "production"
      ],
      "tags": [
        "clarification",
        "hotfix"
      ],
      "branches": [
        "master"
      ],
      "level": "ci",
      "difficulty": "standard",
      "category": "helpfulness",
      "failure_bucket": "missing_clarification",
      "risk_level": "medium",
      "production_like": true
    },
    {
      "name": "coder_stream_contract_regression_warning",
      "message": "Uzraksti review komentāru par stream event kontraktu tā, lai skaidri pateikts, kāpēc delta/complete secība nedrīkst mainīties bez regresijas testiem.",
      "profile": "coder",
      "expected_terms": [
        "delta",
        "complete",
        "regres"
      ],
      "tags": [
        "review",
        "stream",
        "regression"
      ],
      "branches": [
        "coder"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "latvian_quality",
      "failure_bucket": "broken_contract",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "planner_multiturn_benchmark_followup",
      "message": "Turpinot iepriekšējo benchmark paplašināšanas pavedienu, konkretizē tieši failure-case un reviewer workflow sadaļu, nevis pārraksti visu plānu no nulles.",
      "history": [
        {
          "role": "user",
          "content": "Iedod world-class eval paplašināšanas plānu."
        },
        {
          "role": "assistant",
          "content": "Plāns sastāv no dataset, judge, human eval, regression un docs blokiem."
        }
      ],
      "profile": "planner",
      "expected_terms": [
        "Turpinot iepriekšējo",
        "failure",
        "reviewer"
      ],
      "reference_facts": [
        "dataset, judge, human eval, regression un docs blokiem"
      ],
      "tags": [
        "multi-turn",
        "planning",
        "reviewer-workflow"
      ],
      "branches": [
        "planner"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "long_context",
      "failure_bucket": "multi_turn_restart",
      "risk_level": "medium",
      "production_like": false
    },
    {
      "name": "coder_production_like_code_fix_requires_tests",
      "message": "Piedāvā Python patch ideju timeout helperim tā, lai uzreiz pieminēti regresijas testi, rollback drošība un edge cases.",
      "profile": "coder",
      "expected_terms": [
        "timeout",
        "test",
        "rollback"
      ],
      "tags": [
        "coding",
        "production",
        "quality"
      ],
      "branches": [
        "coder"
      ],
      "level": "release",
      "difficulty": "hard",
      "category": "coding",
      "expects_code": true,
      "failure_bucket": "production_regression",
      "risk_level": "high",
      "production_like": true
    },
    {
      "name": "master_blind_eval_process_explanation",
      "message": "Paskaidro, kā jāizskatās blind side-by-side review procesam, lai reviewers neredz branch vai modeļa identitāti.",
      "expected_terms": [
        "blind",
        "side-by-side",
        "reviewer"
      ],
      "tags": [
        "human-eval",
        "process"
      ],
      "branches": [
        "master",
        "planner"
      ],
      "level": "ci",
      "difficulty": "hard",
      "category": "reasoning",
      "failure_bucket": "reviewer_bias",
      "risk_level": "medium",
      "production_like": false
    }
  ]
}