| { |
| "platform": "H200", |
| "model": "Qwen/Qwen3-Coder-30B-A3B-Instruct", |
| "pytorch": "2.11.0+cu130", |
| "n_topics": 30, |
| "n_agents": 4, |
| "agent_mix": "3 honest + 1 adversarial", |
| "seeds_completed": [42, 123], |
| "seeds_running": [456], |
| "date": "2026-05-08", |
| "per_seed": { |
| "42": { |
| "equal_1round": {"accuracy": 0.867, "correct": 26, "tokens": 41812}, |
| "equal_3round": {"accuracy": 0.567, "correct": 17, "tokens": 150099}, |
| "random_drop": {"accuracy": 0.833, "correct": 25, "tokens": 34181, "denied": 33}, |
| "occ_240_5": {"accuracy": 0.800, "correct": 24, "tokens": 40780, "denied": 6}, |
| "occ_180_3": {"accuracy": 0.867, "correct": 26, "tokens": 39952, "denied": 0}, |
| "occ_120_3": {"accuracy": 0.833, "correct": 25, "tokens": 42423, "denied": 0} |
| }, |
| "123": { |
| "equal_1round": {"accuracy": 0.900, "correct": 27, "tokens": 41875}, |
| "equal_3round": {"accuracy": 0.567, "correct": 17, "tokens": 149544}, |
| "random_drop": {"accuracy": 0.867, "correct": 26, "tokens": 27200, "denied": 35}, |
| "occ_240_5": {"accuracy": 0.767, "correct": 23, "tokens": 32071, "denied": 15}, |
| "occ_180_3": {"accuracy": 0.800, "correct": 24, "tokens": 42086, "denied": 0}, |
| "occ_120_3": {"accuracy": 0.867, "correct": 26, "tokens": 42902, "denied": 0} |
| } |
| }, |
| "aggregate_seeds_42_123": { |
| "equal_1round": {"mean_accuracy": 0.883, "mean_tokens": 41844}, |
| "equal_3round": {"mean_accuracy": 0.567, "mean_tokens": 149822}, |
| "random_drop": {"mean_accuracy": 0.850, "mean_tokens": 30691}, |
| "occ_240_5": {"mean_accuracy": 0.783, "mean_tokens": 36426}, |
| "occ_180_3": {"mean_accuracy": 0.833, "mean_tokens": 41019}, |
| "occ_120_3": {"mean_accuracy": 0.850, "mean_tokens": 42663} |
| }, |
| "key_findings": [ |
| "Equal 3-round collapses to 56.7% on BOTH seeds — 32pp below 1-round baseline (88.3%)", |
| "Adversarial agent given 3x speaking time floods the vote pool", |
| "Random 25% drop achieves 85.0% with 26.5% token savings — effective but undiscriminating", |
| "OCC 180/3 achieves 83.3% at iso-compute (41k vs 42k baseline tokens)", |
| "OCC 240/5 is too aggressive (78.3%), OCC 120/3 = random drop (85.0%)", |
| "OCC credit allocation prevents catastrophic failure but doesn't beat random gating at moderate budgets" |
| ] |
| } |
|
|