| { |
| "cases": [ |
| { |
| "name": "coder_python_execution_normalize_email", |
| "message": "Uzraksti Python funkciju `normalize_email(email: str) -> str`, kas noņem atstarpes, normalizē lower-case un met ValueError tukšai ievadei.", |
| "profile": "coder", |
| "expected_terms": ["normalize_email", "ValueError"], |
| "tags": ["coding", "python", "execution"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "standard", |
| "category": "coding", |
| "expects_code": true, |
| "execution_language": "python", |
| "execution_test_code": "assert normalize_email(' A@Example.COM ') == 'a@example.com'\ntry:\n normalize_email(' ')\nexcept ValueError:\n pass\nelse:\n raise AssertionError('expected ValueError')" |
| }, |
| { |
| "name": "coder_python_execution_parse_port", |
| "message": "Uzraksti Python funkciju `parse_port(raw: str) -> int`, kas atgriež porta numuru, bet met ValueError tukšai, ne-skaitliskai vai ārpus 1..65535 ievadei.", |
| "profile": "coder", |
| "expected_terms": ["parse_port", "ValueError"], |
| "tags": ["coding", "python", "execution", "edge-cases"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "coding", |
| "expects_code": true, |
| "execution_language": "python", |
| "execution_test_code": "assert parse_port('8080') == 8080\nfor invalid in ('', 'abc', '0', '70000'):\n try:\n parse_port(invalid)\n except ValueError:\n pass\n else:\n raise AssertionError(f'expected ValueError for {invalid!r}')" |
| }, |
| { |
| "name": "coder_typescript_execution_next_delay", |
| "message": "Uzraksti TypeScript funkciju `nextDelay(attempt: number, baseMs = 250): number`, kas atbalsta exponential backoff un attempts<=0 gadījumā atgriež 0.", |
| "profile": "coder", |
| "expected_terms": ["nextDelay", "attempt"], |
| "tags": ["coding", "typescript", "execution"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "standard", |
| "category": "coding", |
| "expects_code": true, |
| "execution_language": "typescript", |
| "execution_test_code": "function assert(condition: boolean, message: string): void { if (!condition) throw new Error(message); }\nassert(nextDelay(0) === 0, 'attempt 0');\nassert(nextDelay(1) === 250, 'attempt 1');\nassert(nextDelay(3, 100) === 400, 'attempt 3')" |
| }, |
| { |
| "name": "coder_rust_execution_load_port", |
| "message": "Uzraksti Rust funkciju `load_port(raw: &str) -> Result<u16, String>`, kas atgriež kļūdu tukšai vai nederīgai porta vērtībai un nepieļauj panic.", |
| "profile": "coder", |
| "expected_terms": ["Result", "u16"], |
| "tags": ["coding", "rust", "execution"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "coding", |
| "expects_code": true, |
| "execution_language": "rust", |
| "execution_test_code": "fn main() {\n assert_eq!(load_port(\"8080\").unwrap(), 8080);\n assert!(load_port(\"\").is_err());\n assert!(load_port(\"0\").is_err());\n assert!(load_port(\"abc\").is_err());\n assert!(load_port(\"70000\").is_err());\n}" |
| }, |
| { |
| "name": "coder_sql_execution_pass_rate_regression", |
| "message": "Uzraksti SQL vaicājumu, kas apkopo execution pass rate pa branch un language no benchmark_results un execution_results tabulām, un iezīmē branchus zem 0.8 sliekšņa ar `is_regression` kolonnu.", |
| "profile": "coder", |
| "expected_terms": ["execution_pass_rate", "is_regression"], |
| "tags": ["coding", "sql", "execution"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "coding", |
| "expects_code": true, |
| "execution_language": "sql", |
| "execution_test_code": "CREATE TABLE benchmark_results (id INTEGER PRIMARY KEY, branch TEXT);\nCREATE TABLE execution_results (benchmark_run_id INTEGER, language TEXT, passed INTEGER);\nINSERT INTO benchmark_results (id, branch) VALUES (1, 'coder'), (2, 'planner');\nINSERT INTO execution_results (benchmark_run_id, language, passed) VALUES\n (1, 'typescript', 1),\n (1, 'typescript', 0),\n (1, 'rust', 1),\n (2, 'python', 1);\nCREATE TEMP TABLE actual AS {{CODE}};\nSELECT branch, language, execution_pass_rate, is_regression FROM actual;" |
| }, |
| { |
| "name": "coder_repo_patch_sse_contract", |
| "message": "Balstoties uz backend-rust/src/api/chat.rs un frontend/app/chat/page.tsx, uzraksti repo-level patch plānu SSE delta/complete kontrakta salāgošanai ar drošu rollout secību.", |
| "profile": "coder", |
| "expected_terms": ["delta", "complete", "rollout"], |
| "tags": ["coding", "repo-level", "grounding", "diff"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 2, |
| "min_grounding_sources": 2, |
| "expected_grounding_terms": ["backend-rust/src/api/chat.rs", "frontend/app/chat/page.tsx"] |
| }, |
| { |
| "name": "coder_repo_patch_python_bridge_failures", |
| "message": "Balstoties uz backend-rust/src/inference/python_bridge.rs, piedāvā repo-level refactor patch plānu vienotam timeout/stderr/invalid JSON error modelim bez copy-paste mappinga.", |
| "profile": "coder", |
| "expected_terms": ["timeout", "invalid JSON", "error"], |
| "tags": ["coding", "repo-level", "rust", "refactor"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 1, |
| "min_grounding_sources": 1, |
| "expected_grounding_terms": ["backend-rust/src/inference/python_bridge.rs"] |
| }, |
| { |
| "name": "coder_typescript_stream_event_union", |
| "message": "Izveido TypeScript discriminated union helperi chat stream event payloadiem, lai UI kods compile-time līmenī atšķir `delta`, `complete` un `route` eventus.", |
| "profile": "coder", |
| "expected_terms": ["type", "delta", "complete"], |
| "tags": ["coding", "typescript", "quality"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "standard", |
| "category": "coding", |
| "expects_code": true, |
| "execution_language": "typescript", |
| "execution_test_code": "function assert(condition: boolean, message: string): void { if (!condition) throw new Error(message); }\nconst routeEvent: ChatStreamEvent = { type: 'route', route: 'coder' };\nassert(routeEvent.type === 'route', 'route event');" |
| }, |
| { |
| "name": "coder_unsafe_pattern_repo_fix", |
| "message": "Atrodi nedrošo pattern backend-rust konfigurācijas ielādē un piedāvā drošāku refactor, balstoties uz backend-rust/src/config.rs saturu.", |
| "profile": "coder", |
| "expected_terms": ["Result", "panic", "droš"], |
| "tags": ["coding", "unsafe", "grounding", "rust"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "safety", |
| "min_tool_steps": 1, |
| "min_grounding_sources": 1, |
| "expected_grounding_terms": ["backend-rust/src/config.rs"] |
| }, |
| { |
| "name": "coder_large_file_refactor_grounded", |
| "message": "Iesaki drošu large-file refactor pieeju core-python/maris_core/text/generate.py un core-python/maris_core/text/tools.py, neizjaucot esošo grounding plūsmu.", |
| "profile": "coder", |
| "expected_terms": ["generate.py", "tools.py", "grounding"], |
| "tags": ["coding", "large-file", "refactor", "grounding"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 2, |
| "min_grounding_sources": 2, |
| "expected_grounding_terms": ["core-python/maris_core/text/generate.py", "core-python/maris_core/text/tools.py"] |
| }, |
| { |
| "name": "coder_repo_sql_query_audit", |
| "message": "Balstoties uz analytics/sql/query_audit.sql vai līdzīga SQL query slāņa patterniem, iesaki drošu refactor pieeju, kas aizvieto string concatenation ar parametrizētiem placeholderiem benchmark/event vaicājumiem.", |
| "profile": "coder", |
| "expected_terms": ["parameter", "query", "unsafe"], |
| "tags": ["coding", "sql", "unsafe", "repo-level"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "safety" |
| }, |
| { |
| "name": "coder_partial_context_debugging", |
| "message": "Mums tikai daļējs konteksts: tests flako ap chat stream complete event. Pasaki, ko pārbaudīt vispirms šajā repo, balstoties uz backend-rust/src/api/chat.rs un frontend/app/chat/page.tsx.", |
| "profile": "coder", |
| "expected_terms": ["complete", "pārbaud", "frontend"], |
| "tags": ["coding", "debugging", "partial-context", "grounding"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 2, |
| "min_grounding_sources": 2, |
| "expected_grounding_terms": ["backend-rust/src/api/chat.rs", "frontend/app/chat/page.tsx"] |
| }, |
| { |
| "name": "coder_benchmark_history_regression_patch", |
| "message": "Balstoties uz core-python/maris_core/text/benchmark.py, core-python/maris_core/training/train.py un core-python/scripts/eval_model.py, uzraksti repo-wide patch plānu benchmark history/regression tracking slānim ar artefaktiem, kas salīdzina current run pret baseline pa language un category.", |
| "profile": "coder", |
| "expected_terms": ["history", "regression", "category", "language"], |
| "tags": ["coding", "repo-level", "diff", "grounding", "benchmark"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 3, |
| "min_grounding_sources": 3, |
| "expected_grounding_terms": [ |
| "core-python/maris_core/text/benchmark.py", |
| "core-python/maris_core/training/train.py", |
| "core-python/scripts/eval_model.py" |
| ] |
| }, |
| { |
| "name": "coder_multi_file_bugfix_complete_event_duplication", |
| "message": "Balstoties uz frontend/app/chat/page.tsx un frontend/tests/chat.test.tsx, piedāvā multi-file bugfix patch, kas novērš dubultotu assistant final ziņu, kad complete event pienāk pēc pēdējā delta chunk.", |
| "profile": "coder", |
| "expected_terms": ["complete", "delta", "tests"], |
| "tags": ["coding", "multi-file", "bugfix", "grounding", "regression-risk"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 2, |
| "min_grounding_sources": 2, |
| "expected_grounding_terms": ["frontend/app/chat/page.tsx", "frontend/tests/chat.test.tsx"] |
| }, |
| { |
| "name": "coder_risky_refactor_stream_contract", |
| "message": "Balstoties uz backend-rust/src/api/chat.rs, frontend/app/chat/page.tsx un frontend/tests/chat.test.tsx, apraksti refactor ar regresiju riskiem stream event kontraktam, saglabājot backward-compatible rollout un delta/complete testus.", |
| "profile": "coder", |
| "expected_terms": ["backward-compatible", "delta", "complete", "tests"], |
| "tags": ["coding", "repo-level", "refactor", "regression-risk", "grounding"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 3, |
| "min_grounding_sources": 3, |
| "expected_grounding_terms": [ |
| "backend-rust/src/api/chat.rs", |
| "frontend/app/chat/page.tsx", |
| "frontend/tests/chat.test.tsx" |
| ] |
| }, |
| { |
| "name": "coder_ci_debug_execution_benchmark_incident", |
| "message": "Balstoties uz .github/workflows/core-train.yml, .github/workflows/lint-and-test.yml un core-python/scripts/eval_model.py, izveido incident-debugging patch plānu gadījumam, kad coder execution benchmarki vairs nepublicē history/regression artefaktus pēc workflow runa.", |
| "profile": "coder", |
| "expected_terms": ["workflow", "artifact", "history", "regression"], |
| "tags": ["coding", "ci", "debugging", "incident-recovery", "grounding"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 3, |
| "min_grounding_sources": 3, |
| "expected_grounding_terms": [ |
| ".github/workflows/core-train.yml", |
| ".github/workflows/lint-and-test.yml", |
| "core-python/scripts/eval_model.py" |
| ] |
| }, |
| { |
| "name": "coder_python_bridge_incident_recovery", |
| "message": "Balstoties uz backend-rust/src/inference/python_bridge.rs un backend-rust/src/api/chat.rs, uzraksti incident-recovery patch plānu timeout/stderr/invalid JSON degradācijas scenārijam, kur vajag ātru rollback, labāku diagnostiku un regresijas testus.", |
| "profile": "coder", |
| "expected_terms": ["timeout", "rollback", "diagnost", "tests"], |
| "tags": ["coding", "incident-recovery", "debugging", "grounding", "repo-level"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 2, |
| "min_grounding_sources": 2, |
| "expected_grounding_terms": [ |
| "backend-rust/src/inference/python_bridge.rs", |
| "backend-rust/src/api/chat.rs" |
| ], |
| "production_like": true |
| }, |
| { |
| "name": "coder_flaky_ci_grounded_fix_plan", |
| "message": "Balstoties uz .github/workflows/lint-and-test.yml, frontend/tests/chat.test.tsx un backend-rust/tests/api_tests.rs, uzraksti grounded fix plānu flaky CI scenārijam, kur chat stream complete tests izkrīt tikai GitHub Actions vidē.", |
| "profile": "coder", |
| "expected_terms": ["flaky", "GitHub Actions", "complete", "tests"], |
| "tags": ["coding", "ci", "flaky", "grounding", "incident-recovery"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 3, |
| "min_grounding_sources": 3, |
| "expected_grounding_terms": [ |
| ".github/workflows/lint-and-test.yml", |
| "frontend/tests/chat.test.tsx", |
| "backend-rust/tests/api_tests.rs" |
| ], |
| "production_like": true |
| }, |
| { |
| "name": "coder_config_diff_rollback_regression_review", |
| "message": "Balstoties uz huggingface/training-config.json, core-python/maris_core/training/config.py un .github/workflows/core-train.yml, piedāvā grounded patch plānu config diff + rollback scenārijam, kur benchmark gate pēc release workflow vairs neizmanto branch-specific suite.", |
| "profile": "coder", |
| "expected_terms": ["config", "rollback", "branch-specific", "benchmark"], |
| "tags": ["coding", "config-diff", "rollback", "grounding", "benchmark"], |
| "branches": ["coder"], |
| "level": "release", |
| "difficulty": "hard", |
| "category": "grounding", |
| "min_tool_steps": 3, |
| "min_grounding_sources": 3, |
| "expected_grounding_terms": [ |
| "huggingface/training-config.json", |
| "core-python/maris_core/training/config.py", |
| ".github/workflows/core-train.yml" |
| ], |
| "production_like": true |
| } |
| ] |
| } |
|
|