| { |
| "preferences": [ |
| { |
| "prompt": "Salabo Python funkciju normalize_email(email: str), lai tā noņem atstarpes, normalizē lower-case un met ValueError tukšai ievadei.", |
| "context": "Lietotājs grib production-ready helperi ar skaidru validāciju un īsu paskaidrojumu.", |
| "chosen": "```python\ndef normalize_email(email: str) -> str:\n normalized = email.strip().lower()\n if not normalized:\n raise ValueError(\"email is required\")\n return normalized\n```\nŠis variants korekti apstrādā tukšu ievadi un saglabā funkciju deterministisku.", |
| "rejected": "Tu vari vienkārši uzrakstīt kādu funkciju, kas varbūt pārvērš email lower-case, ja vajag.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "bugfix", |
| "language": "python", |
| "repo_context": [ |
| "core-python", |
| "backend-rust" |
| ], |
| "execution_required": true, |
| "tags": [ |
| "bugfix", |
| "validation", |
| "python" |
| ], |
| "source_type": "real_reviewer", |
| "reviewer_segment": "ops", |
| "risk_level": "high", |
| "grounding_scope": "repo-grounded", |
| "failure_bucket": "bugfix", |
| "preference_outcome": "chosen", |
| "confidence": 0.62, |
| "pair_id": "pref-pair-0001", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Refaktorē TypeScript retry helperi tā, lai tas atbalsta exponential backoff un skaidrus edge cases.", |
| "context": "Esošais helperis dubulto kodu un nestrādā korekti pie attempts=0.", |
| "chosen": "```ts\nexport function nextDelay(attempt: number, baseMs = 250): number {\n if (attempt <= 0) return 0;\n return baseMs * 2 ** (attempt - 1);\n}\n```\nPiemini arī testus priekš attempts=0 un lieliem mēģinājumu skaitiem.", |
| "rejected": "Var izmantot setTimeout un kaut kādu formulu. Testi nav vajadzīgi.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "refactor", |
| "language": "typescript", |
| "repo_context": [ |
| "frontend" |
| ], |
| "execution_required": true, |
| "tags": [ |
| "refactor", |
| "retry", |
| "edge-cases" |
| ], |
| "source_type": "internal_curated", |
| "reviewer_segment": "staff_engineer", |
| "risk_level": "medium", |
| "grounding_scope": "single-file", |
| "failure_bucket": "unsafe_refactor", |
| "preference_outcome": "chosen", |
| "confidence": 0.71, |
| "pair_id": "pref-pair-0002", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Uzraksti repo-level plānu un diff stila izmaiņu aprakstu frontend+backend SSE saskaņošanai.", |
| "context": "Jāsaskaņo event nosaukumi starp Rust backend un Next.js frontend.", |
| "chosen": "Nosauc konkrētos failus, event kontraktu, drošos migrācijas soļus un pievieno testu plānu abām pusēm.", |
| "rejected": "Pamaini backend un frontend tā, lai viss strādā.", |
| "source": "human_review", |
| "branch": "planner", |
| "task_type": "repo-level", |
| "language": "markdown", |
| "repo_context": [ |
| "backend-rust", |
| "frontend" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "repo-level", |
| "sse", |
| "planning" |
| ], |
| "source_type": "synthetic", |
| "reviewer_segment": "review_panel", |
| "risk_level": "medium", |
| "grounding_scope": "cross-service", |
| "failure_bucket": "broken_contract", |
| "preference_outcome": "chosen", |
| "confidence": 0.8, |
| "pair_id": "pref-pair-0003", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Uzraksti Pytest testu validācijas helperim, kas pārbauda ValueError un success path.", |
| "context": "Mērķis ir īss, konkrēts tests bez lieka boilerplate.", |
| "chosen": "```python\nimport pytest\n\nfrom app.validators import normalize_email\n\n\ndef test_normalize_email_rejects_blank() -> None:\n with pytest.raises(ValueError):\n normalize_email(\" \")\n\n\ndef test_normalize_email_normalizes_case() -> None:\n assert normalize_email(\" A@Example.COM \") == \"a@example.com\"\n```", |
| "rejected": "Vienkārši uzraksti, ka vajag testēt kļūdu un success scenāriju.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "test-writing", |
| "language": "python", |
| "repo_context": [ |
| "core-python" |
| ], |
| "execution_required": true, |
| "tags": [ |
| "tests", |
| "pytest", |
| "python" |
| ], |
| "source_type": "real_reviewer", |
| "reviewer_segment": "ops", |
| "risk_level": "high", |
| "grounding_scope": "repo-grounded", |
| "failure_bucket": "production_regression", |
| "preference_outcome": "chosen", |
| "confidence": 0.89, |
| "pair_id": "pref-pair-0004", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Salabo Rust konfigurācijas loaderi, lai tukšs vai nederīgs PORT neatstāj unwrap/panic un atgriež strukturētu Result kļūdu.", |
| "context": "Operators grib skaidru kļūdu, nevis panic trace startup laikā.", |
| "chosen": "```rust\npub fn load_port(raw: &str) -> Result<u16, String> {\n let normalized = raw.trim();\n if normalized.is_empty() {\n return Err(\"PORT is missing\".to_string());\n }\n normalized\n .parse::<u16>()\n .map_err(|_| \"PORT is invalid\".to_string())\n}\n```\nŠeit nav unwrap, kļūdas ir deterministiskas un viegli testējamas.", |
| "rejected": "```rust\npub fn load_port(raw: &str) -> u16 {\n raw.parse().unwrap()\n}\n```\nTas ir īsāk un pietiekami labi.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "bugfix", |
| "language": "rust", |
| "repo_context": [ |
| "backend-rust" |
| ], |
| "execution_required": true, |
| "tags": [ |
| "rust", |
| "bugfix", |
| "unsafe" |
| ], |
| "source_type": "internal_curated", |
| "reviewer_segment": "staff_engineer", |
| "risk_level": "medium", |
| "grounding_scope": "single-file", |
| "failure_bucket": "schema_regression", |
| "preference_outcome": "chosen", |
| "confidence": 0.62, |
| "pair_id": "pref-pair-0005", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Uzraksti TypeScript stream event union, kas compile-time līmenī atdala delta un complete payloadus.", |
| "context": "UI parseris bieži piekļūst neeksistējošiem laukiem, tāpēc vajag stingrāku typing.", |
| "chosen": "```ts\nexport type ChatStreamEvent =\n | { type: 'delta'; text: string }\n | { type: 'complete'; done: true; text?: string }\n | { type: 'route'; route: string };\n```\nPēc tam parserī jālieto `switch (event.type)` un testos jāpārbauda compile-safe narrowing.", |
| "rejected": "Var izmantot `any` un pārbaudīt laukus runtime laikā, tas būs ātrāk.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "refactor", |
| "language": "typescript", |
| "repo_context": [ |
| "frontend" |
| ], |
| "execution_required": true, |
| "tags": [ |
| "typescript", |
| "typing", |
| "stream" |
| ], |
| "source_type": "synthetic", |
| "reviewer_segment": "review_panel", |
| "risk_level": "medium", |
| "grounding_scope": "cross-service", |
| "failure_bucket": "incident_comms", |
| "preference_outcome": "chosen", |
| "confidence": 0.71, |
| "pair_id": "pref-pair-0006", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Iesaki SQL vaicājumu execution pass rate apkopošanai pa branch un language ar regression flag zem 0.8.", |
| "context": "Analytics pusē vajag deterministisku query bez string concatenation un ar skaidru alias naming.", |
| "chosen": "```sql\nSELECT\n b.branch,\n e.language,\n AVG(CASE WHEN e.passed THEN 1.0 ELSE 0.0 END) AS execution_pass_rate,\n CASE WHEN AVG(CASE WHEN e.passed THEN 1.0 ELSE 0.0 END) < 0.8 THEN 1 ELSE 0 END AS is_regression\nFROM benchmark_results b\nJOIN execution_results e ON e.benchmark_run_id = b.id\nGROUP BY b.branch, e.language;\n```\nTas ir skaidrs, parametrizējams un der benchmark dashboardam.", |
| "rejected": "```sql\nSELECT * FROM benchmark_results, execution_results;\n```\nPēc tam jau var filtrēt aplikācijā.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "repo-level", |
| "language": "sql", |
| "repo_context": [ |
| "operations" |
| ], |
| "execution_required": true, |
| "tags": [ |
| "sql", |
| "analytics", |
| "quality" |
| ], |
| "source_type": "real_reviewer", |
| "reviewer_segment": "ops", |
| "risk_level": "high", |
| "grounding_scope": "repo-grounded", |
| "failure_bucket": "multi_turn_restart", |
| "preference_outcome": "chosen", |
| "confidence": 0.8, |
| "pair_id": "pref-pair-0007", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Apraksti repo-level patch plānu python_bridge timeout/stderr/invalid JSON kļūdu vienotam error modelim.", |
| "context": "Svarīgi ir nosaukt konkrētus failus, migrācijas secību un testus, ne tikai vispārīgu refactor ieteikumu.", |
| "chosen": "Labs variants nosauc `backend-rust/src/inference/python_bridge.rs`, atsevišķu error enum/struct, migrācijas soļus un regresijas testus timeout/stderr/invalid JSON scenārijiem.", |
| "rejected": "Vienkārši ieliec kopēju error handleri kaut kur bridge slānī.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "repo-level", |
| "language": "markdown", |
| "repo_context": [ |
| "backend-rust", |
| "core-python" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "repo-level", |
| "bridge", |
| "errors" |
| ], |
| "source_type": "internal_curated", |
| "reviewer_segment": "staff_engineer", |
| "risk_level": "medium", |
| "grounding_scope": "single-file", |
| "failure_bucket": "hallucination", |
| "preference_outcome": "chosen", |
| "confidence": 0.89, |
| "pair_id": "pref-pair-0008", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Salabo nedrošu SQL query builderi, kas WHERE klauzulā concatenē lietotāja ievadi, un piedāvā parametrizētu alternatīvu.", |
| "context": "Galvenais ir novērst injection risku un saglabāt lasāmu query API.", |
| "chosen": "Drošais variants aizvieto string concatenation ar placeholderiem (`?`, `$1`) un parāda, kā parametri tiek padoti atsevišķi no query stringa. Papildus piemin testus injection un tukšas ievades gadījumiem.", |
| "rejected": "Var atstāt concatenation, ja inputu iepriekš `trim()` un pārbauda uz tukšu virkni.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "unsafe", |
| "language": "sql", |
| "repo_context": [ |
| "operations", |
| "backend-rust" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "unsafe", |
| "sql", |
| "security" |
| ], |
| "source_type": "synthetic", |
| "reviewer_segment": "review_panel", |
| "risk_level": "medium", |
| "grounding_scope": "cross-service", |
| "failure_bucket": "bugfix", |
| "preference_outcome": "chosen", |
| "confidence": 0.62, |
| "pair_id": "pref-pair-0009", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Uzraksti sliktā refactor piemēra noraidījumu, ja lietotājs grib sadalīt stream parseri mazos helperos, bet pazaudē delta/complete kontrakta pārbaudes.", |
| "context": "Mērķis ir uzsvērt, ka refactor nedrīkst salauzt esošo grounding un event secību testus.", |
| "chosen": "Labs chosen variants paskaidro, ka refactor jāveic kopā ar kontrakta testiem, event secības regresijas pārbaudēm un skaidru state ownership. Tas atsakās no helperu sadalīšanas, ja pazūd invarianti vai testu pārklājums.", |
| "rejected": "Sadalīt parseri helperos vienmēr ir labi; testus var pievienot vēlāk, ja būs laiks.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "refactor", |
| "language": "typescript", |
| "repo_context": [ |
| "frontend", |
| "backend-rust" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "refactor", |
| "stream", |
| "quality" |
| ], |
| "source_type": "real_reviewer", |
| "reviewer_segment": "ops", |
| "risk_level": "high", |
| "grounding_scope": "repo-grounded", |
| "failure_bucket": "unsafe_refactor", |
| "preference_outcome": "chosen", |
| "confidence": 0.71, |
| "pair_id": "pref-pair-0010", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Iesaki repo-wide patch plānu benchmark history/regression tracking slānim starp core-python/maris_core/text/benchmark.py, core-python/maris_core/training/train.py un core-python/scripts/eval_model.py.", |
| "context": "Svarīgi ir ne tikai saglabāt manifestu, bet arī salīdzināt current run pret baseline pa category un execution language.", |
| "chosen": "Labs variants nosauc visus trīs failus, pieprasa `benchmark-history.json` un `benchmark-regression-report.json`, saglabā baseline salīdzinājumu pa category/execution language un skaidri norāda, kur artefakti jāglabā training/eval plūsmā.", |
| "rejected": "Vienkārši saglabā vēl vienu benchmark JSON failu. Salīdzināšanu var izdarīt vēlāk, ja būs vajadzīgs.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "repo-level", |
| "language": "markdown", |
| "repo_context": [ |
| "core-python", |
| "github-actions" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "benchmark", |
| "history", |
| "regression", |
| "repo-level" |
| ], |
| "source_type": "internal_curated", |
| "reviewer_segment": "staff_engineer", |
| "risk_level": "medium", |
| "grounding_scope": "single-file", |
| "failure_bucket": "broken_contract", |
| "preference_outcome": "chosen", |
| "confidence": 0.8, |
| "pair_id": "pref-pair-0011", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Piedāvā multi-file bugfix pieeju complete event dubultotai assistant ziņai frontend/app/chat/page.tsx un frontend/tests/chat.test.tsx.", |
| "context": "Nepietiek tikai ar UI labojumu; jāpielāgo arī tests, kas sedz delta->complete secību.", |
| "chosen": "Drošais variants nosauc abus failus, saglabā inkrementālu delta renderēšanu, novērš final ziņas dubultošanu un pievieno regresijas testu tieši delta->complete secībai.", |
| "rejected": "Pamaini tikai UI failu, lai complete event vienkārši vienmēr pārraksta tekstu. Testi var pagaidīt.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "bugfix", |
| "language": "typescript", |
| "repo_context": [ |
| "frontend" |
| ], |
| "execution_required": true, |
| "tags": [ |
| "multi-file", |
| "bugfix", |
| "stream", |
| "tests" |
| ], |
| "source_type": "synthetic", |
| "reviewer_segment": "review_panel", |
| "risk_level": "medium", |
| "grounding_scope": "cross-service", |
| "failure_bucket": "production_regression", |
| "preference_outcome": "chosen", |
| "confidence": 0.89, |
| "pair_id": "pref-pair-0012", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Apraksti risky refactor python_bridge timeout/stderr/invalid JSON error modelim, nesalaužot backend API semantiku incidentu laikā.", |
| "context": "Lietotājam vajag refactor ar skaidriem regresiju riskiem, backward-compatible mappingu un testiem.", |
| "chosen": "Labs variants prasa vienotu typed error modeli, norāda backward-compatible mappingu API robežā un pievieno timeout/stderr/invalid JSON regresijas testus pirms merge.", |
| "rejected": "Iznes copy-paste match blokus helperī un cer, ka viss turpinās strādāt; incident recovery var sakārtot vēlāk.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "refactor", |
| "language": "rust", |
| "repo_context": [ |
| "backend-rust" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "refactor", |
| "regression-risk", |
| "incident-recovery", |
| "errors" |
| ], |
| "source_type": "real_reviewer", |
| "reviewer_segment": "ops", |
| "risk_level": "high", |
| "grounding_scope": "repo-grounded", |
| "failure_bucket": "schema_regression", |
| "preference_outcome": "chosen", |
| "confidence": 0.62, |
| "pair_id": "pref-pair-0013", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Iedod CI debugging un incident recovery pieeju, ja core-train workflow vairs nepublicē benchmark history/regression artefaktus pēc coder execution benchmark step.", |
| "context": "Mērķis ir nosaukt workflow failus, eval entrypoint un rollback/repair secību, nevis tikai pateikt 'pārbaudi CI logus'.", |
| "chosen": "Labs variants atsaucas uz `.github/workflows/core-train.yml`, `.github/workflows/lint-and-test.yml` un `core-python/scripts/eval_model.py`, izklāsta diagnostikas secību, artefaktu ceļus un incident recovery/rollback soļus ar smoke testu pēc remonta.", |
| "rejected": "Skaties CI logus un mēģini palaist workflow vēlreiz. Ja nepalīdz, droši vien artefakti nav vajadzīgi.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "ci-orchestration", |
| "language": "yaml", |
| "repo_context": [ |
| "github-actions", |
| "core-python" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "ci", |
| "debugging", |
| "incident-recovery", |
| "artifacts" |
| ], |
| "source_type": "internal_curated", |
| "reviewer_segment": "staff_engineer", |
| "risk_level": "medium", |
| "grounding_scope": "single-file", |
| "failure_bucket": "incident_comms", |
| "preference_outcome": "chosen", |
| "confidence": 0.71, |
| "pair_id": "pref-pair-0014", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Apraksti TypeScript stream parsera labojumu latviešu valodā tā, lai saglabājas profesionāla LV+EN terminoloģija.", |
| "context": "Svarīgi ir nedot mehānisku tulkojumu; jāpiemin `delta`, `complete`, `payload`, kontrakts un regresijas tests.", |
| "chosen": "Drošais variants skaidri pasaka, ka `delta` un `complete` ir event kontrakta termini, kurus nevajag mākslīgi pārtulkot. Tas dabiskā latviešu valodā izskaidro payload shape, state ownership un regresijas testu vajadzību delta->complete secībai.", |
| "rejected": "Plūsmas pabeigšanas gabals un kravas saturs jāpārtulko pilnībā latviski, jo angļu termini padara tekstu nepareizu. Testus var pieminēt vēlāk.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "refactor", |
| "language": "typescript", |
| "repo_context": [ |
| "frontend" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "latvian", |
| "terminology", |
| "stream", |
| "quality" |
| ], |
| "source_type": "synthetic", |
| "reviewer_segment": "review_panel", |
| "risk_level": "medium", |
| "grounding_scope": "cross-service", |
| "failure_bucket": "multi_turn_restart", |
| "preference_outcome": "chosen", |
| "confidence": 0.8, |
| "pair_id": "pref-pair-0015", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Uzraksti incidenta status update latviešu valodā par rollback un hotfix scenāriju.", |
| "context": "Atbildei jābūt īsai, faktoloģiskai un profesionālai, saglabājot stabilos terminus `rollback`, `hotfix` un `ETA`.", |
| "chosen": "Labs variants īsi nosauc ietekmi, current mitigation, rollback statusu, hotfix progresu un nākamo ETA checkpoint. Teksts ir latvisks pēc struktūras, bet terminus `rollback`, `hotfix` un `ETA` lieto dabiski, bez neveiklas burtiskas tulkošanas.", |
| "rejected": "Mēs veicam atpakaļripošanu un karsto labojumu tuvākajā laika brīdī, viss būs pilnībā kārtībā. Precīzus riskus vai ETA nav jāmin.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "ci-orchestration", |
| "language": "markdown", |
| "repo_context": [ |
| "operations" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "latvian", |
| "incident-recovery", |
| "tone" |
| ], |
| "source_type": "real_reviewer", |
| "reviewer_segment": "ops", |
| "risk_level": "high", |
| "grounding_scope": "repo-grounded", |
| "failure_bucket": "hallucination", |
| "preference_outcome": "chosen", |
| "confidence": 0.89, |
| "pair_id": "pref-pair-0016", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Turpini iepriekšējo sarunu par flaky CI testu un iedod nākamo soli, balstoties uz `.github/workflows/lint-and-test.yml` un `frontend/tests/chat.test.tsx`.", |
| "context": "Lietotājs jau iepriekš pateicis, ka problēma parādās tikai CI. Vēlamā atbilde nedrīkst ignorēt šo kontekstu.", |
| "chosen": "Labs variants atsaucas uz iepriekš minēto CI-only flaky uzvedību, nosauc workflow un test failu un dod konkrētu nākamo soli par timing/event secības pārbaudi. Tas parāda multi-turn atmiņu un nerestartē sarunu no nulles.", |
| "rejected": "Vispirms vajadzētu saprast, kas vispār ir CI un kur atrodas jūsu tests. Varbūt vajag vienkārši paskatīties kaut kādus logus.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "debugging", |
| "language": "yaml", |
| "repo_context": [ |
| "github-actions", |
| "frontend" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "latvian", |
| "multi-turn", |
| "ci", |
| "debugging" |
| ], |
| "source_type": "internal_curated", |
| "reviewer_segment": "staff_engineer", |
| "risk_level": "medium", |
| "grounding_scope": "single-file", |
| "failure_bucket": "bugfix", |
| "preference_outcome": "chosen", |
| "confidence": 0.62, |
| "pair_id": "pref-pair-0017", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": true |
| }, |
| { |
| "prompt": "Apraksti code review komentāru par observability patch, saglabājot terminus `structured logs`, `request_id`, `trace_id` un `sampling`.", |
| "context": "Mērķis ir profesionāls latviešu komentārs, nevis neveikls tulkojums kā 'strukturētie baļķi'.", |
| "chosen": "Drošais variants saglabā `structured logs`, `request_id`, `trace_id` un `sampling` oriģinālajā formā, bet latviski izskaidro operatoru ieguvumu un korelācijas vērtību incidentu laikā.", |
| "rejected": "Komentārā jāraksta par strukturētajiem baļķiem, pieprasījuma identifikatoru un parauga ņemšanu, jo angļu termini tehniskā tekstā nav labi.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "repo-level", |
| "language": "markdown", |
| "repo_context": [ |
| "backend-rust", |
| "core-python" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "latvian", |
| "observability", |
| "terminology" |
| ], |
| "source_type": "synthetic", |
| "reviewer_segment": "review_panel", |
| "risk_level": "medium", |
| "grounding_scope": "cross-service", |
| "failure_bucket": "unsafe_refactor", |
| "preference_outcome": "chosen", |
| "confidence": 0.71, |
| "pair_id": "pref-pair-0018", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Iedod pairwise labāku atbildi par SQL migration risku ar terminiem `schema drift`, `rollback window` un `data backfill`.", |
| "context": "Svarīgi ir precīzi nosaukt riskus un secību, nevis aizvietot terminus ar miglainiem aprakstiem.", |
| "chosen": "Labs variants skaidri nosauc `schema drift` risku, `rollback window` robežas un `data backfill` secību, vienlaikus saglabājot dabisku latviešu teikumu plūdumu un operatoram noderīgus secinājumus.", |
| "rejected": "Datubāzes pārmaiņu vilkme un datu aizpildīšana varbūt kaut kā ietekmēs sistēmu, bet detaļas nav īpaši svarīgas, ja viss šķiet droši.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "repo-level", |
| "language": "sql", |
| "repo_context": [ |
| "infra", |
| "operations" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "latvian", |
| "sql", |
| "migration", |
| "quality" |
| ], |
| "source_type": "real_reviewer", |
| "reviewer_segment": "ops", |
| "risk_level": "high", |
| "grounding_scope": "repo-grounded", |
| "failure_bucket": "broken_contract", |
| "preference_outcome": "chosen", |
| "confidence": 0.8, |
| "pair_id": "pref-pair-0019", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Paskaidro, kā multi-turn atbildē turpināt iepriekšēju plānu par benchmark paplašināšanu, nevis sākt pilnīgi jaunu struktūru.", |
| "context": "Lietotājs jau ir saskaņojis augsta līmeņa plānu; tagad vajag tikai konkretizēt benchmark un test strategy daļu.", |
| "chosen": "Labs variants sāk ar frāzi, kas parāda konteksta turpinājumu, piemēram, 'turpinot iepriekšējo plānu', un pēc tam nosauc konkrētus failus, testus un nākamo soli. Tas ir daudz stiprāks multi-turn signāls nekā pilnīgs restarts.", |
| "rejected": "Šeit ir pilnīgi jauns plāns no sākuma, neņemot vērā neko, ko apspriedām iepriekš.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "planning", |
| "language": "markdown", |
| "repo_context": [ |
| "core-python" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "latvian", |
| "multi-turn", |
| "planning" |
| ], |
| "source_type": "internal_curated", |
| "reviewer_segment": "staff_engineer", |
| "risk_level": "medium", |
| "grounding_scope": "single-file", |
| "failure_bucket": "production_regression", |
| "preference_outcome": "chosen", |
| "confidence": 0.89, |
| "pair_id": "pref-pair-0020", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": true |
| }, |
| { |
| "prompt": "Uzraksti incidenta update par `circuit breaker` un `error budget`, saglabājot profesionālu LV+EN terminoloģiju.", |
| "context": "Atbildei jābūt īsai, faktoloģiskai un jāizvairās no neveikliem burtiskiem tulkojumiem.", |
| "chosen": "Labs variants īsi pasaka, ka `circuit breaker` ir atvēries pēc `trip threshold` sasniegšanas, kā tas ietekmē `error budget`, un kāds ir current mitigation. Teksts ir latvisks pēc struktūras, bet terminoloģija paliek profesionāla.", |
| "rejected": "Strāvas pārtraucējs ir nostrādājis un kļūdu budžets ir izlietots, tādēļ mēs ceram uz labāku sistēmas pašsajūtu tuvākajā laikā.", |
| "source": "human_review", |
| "branch": "coder", |
| "task_type": "ci-orchestration", |
| "language": "rust", |
| "repo_context": [ |
| "backend-rust" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "latvian", |
| "incident-recovery", |
| "circuit-breaker" |
| ], |
| "source_type": "synthetic", |
| "reviewer_segment": "review_panel", |
| "risk_level": "medium", |
| "grounding_scope": "cross-service", |
| "failure_bucket": "schema_regression", |
| "preference_outcome": "chosen", |
| "confidence": 0.62, |
| "pair_id": "pref-pair-0021", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false |
| }, |
| { |
| "prompt": "Salīdzini divus blind side-by-side atbilžu variantus incidenta update uzdevumam un saglabā tikai reviewer preference rezultātu.", |
| "context": "Reviewer nedrīkst redzēt branch vai modeli; vajag preference outcome, confidence un īsu rationale.", |
| "chosen": "Labākais variants ir īss, faktoloģisks un piemin rollback statusu, hotfix progresu un ETA checkpoint bez nepamatotas pārliecības.", |
| "rejected": "Vari vienkārši izvēlēties atbildi, kas skan pārliecinošāk, pat ja tajā nav rollback vai ETA detaļu.", |
| "source": "human_review", |
| "source_type": "real_reviewer", |
| "annotator": "reviewer-07", |
| "reviewer_segment": "incident-command", |
| "branch": "coder", |
| "task_type": "human-eval", |
| "language": "markdown", |
| "risk_level": "high", |
| "grounding_scope": "ops-grounded", |
| "failure_bucket": "incident_comms", |
| "preference_outcome": "chosen", |
| "confidence": 0.91, |
| "pair_id": "pref-pair-9001", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false, |
| "repo_context": [ |
| "operations", |
| "core-python" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "human-eval", |
| "blind", |
| "incident" |
| ] |
| }, |
| { |
| "prompt": "Novērtē multi-turn atbildi, kurai jāturpina iepriekšējais benchmark plāns, nevis jārestartē saruna.", |
| "context": "Vajag preference example ar skaidru failure bucket multi-turn restartam.", |
| "chosen": "Spēcīgais variants sāk ar “turpinot iepriekšējo plānu”, saglabā iepriekš definētos blokus un konkretizē tikai benchmark/test strategy daļu.", |
| "rejected": "Sliktais variants pilnībā restartē plānu un ignorē jau apspriesto struktūru.", |
| "source": "human_review", |
| "source_type": "internal_curated", |
| "annotator": "eval-designer", |
| "reviewer_segment": "eval-design", |
| "branch": "planner", |
| "task_type": "planning", |
| "language": "markdown", |
| "risk_level": "medium", |
| "grounding_scope": "conversation-grounded", |
| "failure_bucket": "multi_turn_restart", |
| "preference_outcome": "chosen", |
| "confidence": 0.84, |
| "pair_id": "pref-pair-9002", |
| "blind": true, |
| "production_like": false, |
| "multi_turn": true, |
| "repo_context": [ |
| "core-python" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "multi-turn", |
| "planning", |
| "human-eval" |
| ] |
| }, |
| { |
| "prompt": "Iedod reviewer preference piemēru, kur drošāks variants atsakās no hallucinated root cause bez logiem.", |
| "context": "Šis piemērs vajadzīgs safety/hallucination bucketam.", |
| "chosen": "Labs variants pasaka, ka bez logiem un metric snapshots nevar droši nosaukt vienu root cause, un iesaka nākamos pārbaudes soļus.", |
| "rejected": "Sliktais variants ar lielu pārliecību nosauc vienu root cause bez jebkāda grounding.", |
| "source": "human_review", |
| "source_type": "real_reviewer", |
| "annotator": "reviewer-11", |
| "reviewer_segment": "sre", |
| "branch": "master", |
| "task_type": "safety", |
| "language": "markdown", |
| "risk_level": "high", |
| "grounding_scope": "log-aware", |
| "failure_bucket": "hallucination", |
| "preference_outcome": "chosen", |
| "confidence": 0.95, |
| "pair_id": "pref-pair-9003", |
| "blind": true, |
| "production_like": true, |
| "multi_turn": false, |
| "repo_context": [ |
| "operations" |
| ], |
| "execution_required": false, |
| "tags": [ |
| "safety", |
| "hallucination", |
| "human-eval" |
| ] |
| } |
| ] |
| } |
|
|