| """ |
| Benchmark 3 v2: Multi-Agent Debate with Variable Token Costs and Adversarial Agents |
| |
| Key improvements over v1: |
| - Agents have variable cost_per_turn (50 vs 500 tokens) — exposes OCC's advantage |
| - Adversarial overconfident agents (high verbosity, low accuracy) |
| - Tracks influence efficiency (correct flips per token) |
| - Measures bad-agent containment |
| |
| From v1: all agents had similar token costs, limiting compute savings to ~12%. |
| With variable costs, OCC should show >>30% savings by denying expensive wrong agents. |
| """ |
|
|
| import json |
| import random |
| from dataclasses import dataclass, field |
| from pathlib import Path |
| from typing import Dict, List, Optional, Any |
|
|
| import numpy as np |
|
|
| import sys |
| sys.path.insert(0, str(Path(__file__).parent.parent)) |
| from oracle.oracle import ImpactOracle, OracleResult |
| from ledger.ledger import CreditLedger |
| from broker.broker import ResourceBroker, Decision |
|
|
|
|
| @dataclass |
| class DebateTopic: |
| question: str |
| correct_answer: str |
| distractors: List[str] |
|
|
|
|
| @dataclass |
| class AgentConfig: |
| agent_id: str |
| accuracy: float |
| cost_per_turn: int |
| confidence_bias: float |
| verbose_prob: float |
| is_adversarial: bool = False |
|
|
|
|
| class DebateAgent: |
| """Simulated debate participant with configurable cost and behavior.""" |
|
|
| def __init__(self, config: AgentConfig): |
| self.config = config |
| self.tokens_used = 0 |
| self.turns_taken = 0 |
| self.influence_score = 0.0 |
| self.correct_flips = 0 |
| self.wrong_flips = 0 |
|
|
| def propose(self, topic: DebateTopic, prior_proposals: List[Dict]) -> Dict: |
| self.turns_taken += 1 |
|
|
| |
| if random.random() < self.config.verbose_prob: |
| tokens = self.config.cost_per_turn * 4 |
| else: |
| tokens = self.config.cost_per_turn + random.randint(-10, 20) |
| tokens = max(10, tokens) |
| self.tokens_used += tokens |
|
|
| |
| correct = random.random() < self.config.accuracy |
| if correct: |
| answer = topic.correct_answer |
| else: |
| answer = random.choice(topic.distractors) |
|
|
| |
| if correct: |
| confidence = 0.7 + random.random() * 0.3 + self.config.confidence_bias |
| else: |
| |
| if self.config.is_adversarial: |
| confidence = 0.8 + random.random() * 0.2 |
| else: |
| confidence = 0.4 + random.random() * 0.4 + self.config.confidence_bias |
| confidence = max(0.0, min(1.0, confidence)) |
|
|
| |
| if prior_proposals: |
| answers = [p["answer"] for p in prior_proposals] |
| majority = max(set(answers), key=answers.count) |
| if answer == majority: |
| influence = 0.1 |
| else: |
| influence = 0.5 |
| |
| if correct: |
| self.correct_flips += 1 |
| else: |
| self.wrong_flips += 1 |
| else: |
| influence = 0.3 |
|
|
| self.influence_score += influence |
|
|
| return { |
| "agent_id": self.config.agent_id, |
| "answer": answer, |
| "confidence": confidence, |
| "correct": correct, |
| "tokens": tokens, |
| "influence": influence, |
| "is_adversarial": self.config.is_adversarial, |
| } |
|
|
|
|
| class DebateBenchmarkV2: |
| """v2: Variable-cost agents + adversarial scenarios.""" |
|
|
| def __init__( |
| self, |
| n_topics: int = 50, |
| n_agents: int = 5, |
| budget_per_topic: float = 2000.0, |
| adversarial_fraction: float = 0.4, |
| seed: int = 42, |
| ): |
| self.n_topics = n_topics |
| self.n_agents = n_agents |
| self.budget_per_topic = budget_per_topic |
| self.adversarial_fraction = adversarial_fraction |
| self.seed = seed |
| self.topics: List[DebateTopic] = [] |
| self.oracle = ImpactOracle(compute_budget=budget_per_topic) |
|
|
| def create_agents(self) -> List[AgentConfig]: |
| """Create agents with variable costs and adversarial mix.""" |
| n_adversarial = int(self.n_agents * self.adversarial_fraction) |
| n_normal = self.n_agents - n_adversarial |
|
|
| configs = [] |
|
|
| |
| base_configs = [ |
| AgentConfig("agent_fast", accuracy=0.70, cost_per_turn=50, confidence_bias=0.05, verbose_prob=0.05), |
| AgentConfig("agent_medium", accuracy=0.65, cost_per_turn=200, confidence_bias=0.10, verbose_prob=0.10), |
| AgentConfig("agent_expensive", accuracy=0.72, cost_per_turn=500, confidence_bias=0.02, verbose_prob=0.05), |
| ] |
| configs.extend(base_configs[:n_normal]) |
|
|
| |
| for i in range(n_adversarial): |
| configs.append(AgentConfig( |
| agent_id=f"agent_adversarial_{i+1}", |
| accuracy=0.35 + random.random() * 0.15, |
| cost_per_turn=300 + random.randint(0, 300), |
| confidence_bias=0.30, |
| verbose_prob=0.40, |
| is_adversarial=True, |
| )) |
|
|
| random.shuffle(configs) |
| return configs |
|
|
| def generate_topics(self): |
| random.seed(self.seed) |
| np.random.seed(self.seed) |
|
|
| topic_pool = [ |
| ("What is 15 * 17?", "255", ["245", "265", "225", "275"]), |
| ("Capital of Australia?", "Canberra", ["Sydney", "Melbourne", "Perth", "Brisbane"]), |
| ("Author of '1984'?", "George Orwell", ["Aldous Huxley", "Ray Bradbury", "H.G. Wells", "Kurt Vonnegut"]), |
| ("Square root of 256?", "16", ["14", "18", "12", "20"]), |
| ("Element with symbol Au?", "Gold", ["Silver", "Aluminum", "Argon", "Astatine"]), |
| ("Year WWI ended?", "1918", ["1919", "1917", "1920", "1916"]), |
| ("Smallest prime number?", "2", ["1", "3", "0", "-1"]), |
| ("Largest planet?", "Jupiter", ["Saturn", "Neptune", "Uranus", "Earth"]), |
| ("Speed of light (m/s)?", "299792458", ["300000000", "299000000", "310000000", "280000000"]), |
| ("First US president?", "George Washington", ["Thomas Jefferson", "John Adams", "Abraham Lincoln", "Benjamin Franklin"]), |
| ("Chemical formula of water?", "H2O", ["HO2", "H2O2", "HO", "OH"]), |
| ("Number of continents?", "7", ["5", "6", "8", "4"]), |
| ("Distance from Earth to Sun (km)?", "149600000", ["150000000", "148000000", "151000000", "147000000"]), |
| ("Primary language of Brazil?", "Portuguese", ["Spanish", "English", "French", "Italian"]), |
| ("Formula for area of circle?", "pi*r^2", ["2*pi*r", "pi*d", "r^2*pi/2", "pi*r"]), |
| ] |
|
|
| for i in range(self.n_topics): |
| t = topic_pool[i % len(topic_pool)] |
| self.topics.append(DebateTopic(question=t[0], correct_answer=t[1], distractors=t[2])) |
|
|
| def _resolve_equal_turns(self, agents: List[DebateAgent], topic: DebateTopic, turns_per: int = 2) -> Dict: |
| proposals = [] |
| compute_used = 0.0 |
| for agent in agents: |
| for _ in range(turns_per): |
| prop = agent.propose(topic, proposals) |
| proposals.append(prop) |
| compute_used += prop["tokens"] |
|
|
| answers = [p["answer"] for p in proposals] |
| final = max(set(answers), key=answers.count) |
| correct = final == topic.correct_answer |
|
|
| return { |
| "strategy": "equal_turns", |
| "correct": correct, "final_answer": final, |
| "compute_used": compute_used, "n_turns": len(proposals), |
| "proposals": proposals, |
| "adversarial_turns": sum(1 for p in proposals if p.get("is_adversarial")), |
| "bad_agent_tokens": sum(p["tokens"] for p in proposals if p.get("is_adversarial")), |
| } |
|
|
| def _resolve_majority_vote(self, agents: List[DebateAgent], topic: DebateTopic) -> Dict: |
| proposals = [] |
| compute_used = 0.0 |
| for agent in agents: |
| prop = agent.propose(topic, proposals) |
| proposals.append(prop) |
| compute_used += prop["tokens"] |
|
|
| answers = [p["answer"] for p in proposals] |
| final = max(set(answers), key=answers.count) |
| correct = final == topic.correct_answer |
|
|
| return { |
| "strategy": "majority_vote", |
| "correct": correct, "final_answer": final, |
| "compute_used": compute_used, "n_turns": len(proposals), |
| "proposals": proposals, |
| "adversarial_turns": sum(1 for p in proposals if p.get("is_adversarial")), |
| "bad_agent_tokens": sum(p["tokens"] for p in proposals if p.get("is_adversarial")), |
| } |
|
|
| def _resolve_confidence_weighted(self, agents: List[DebateAgent], topic: DebateTopic) -> Dict: |
| proposals = [] |
| compute_used = 0.0 |
| for agent in agents: |
| prop = agent.propose(topic, proposals) |
| proposals.append(prop) |
| compute_used += prop["tokens"] |
|
|
| vote_scores: Dict[str, float] = {} |
| for p in proposals: |
| vote_scores[p["answer"]] = vote_scores.get(p["answer"], 0.0) + p["confidence"] |
| final = max(vote_scores, key=vote_scores.get) |
| correct = final == topic.correct_answer |
|
|
| return { |
| "strategy": "confidence_weighted", |
| "correct": correct, "final_answer": final, |
| "compute_used": compute_used, "n_turns": len(proposals), |
| "proposals": proposals, |
| "adversarial_turns": sum(1 for p in proposals if p.get("is_adversarial")), |
| "bad_agent_tokens": sum(p["tokens"] for p in proposals if p.get("is_adversarial")), |
| } |
|
|
| def _resolve_occ(self, agents: List[DebateAgent], topic: DebateTopic, |
| use_decay: bool = True, max_turns: int = 15) -> Dict: |
| """OCC with credit allocation and broker gating.""" |
| ledger = CreditLedger(decay_lambda=0.1 if use_decay else 0.0) |
| broker = ResourceBroker() |
| proposals = [] |
| compute_used = 0.0 |
| turns = 0 |
|
|
| |
| for agent in agents: |
| ledger.earn(agent.config.agent_id, topic.question[:30], "seed", 10.0, 0.0, 0.0, "initial_seed") |
|
|
| |
| for agent in agents: |
| prop = agent.propose(topic, proposals) |
| proposals.append(prop) |
| compute_used += prop["tokens"] |
| turns += 1 |
|
|
| oracle_res = self.oracle.score( |
| mode="debate", |
| action={"tokens_used": prop["tokens"]}, |
| context={"previous_correct": False}, |
| result={ |
| "final_correct": prop["correct"], |
| "agent_contribution": prop["influence"], |
| "compute_cost": prop["tokens"], |
| "tokens_used": prop["tokens"], |
| "total_turns": turns, |
| }, |
| agent_id=agent.config.agent_id, |
| ) |
|
|
| if prop["correct"]: |
| ledger.earn(agent.config.agent_id, topic.question[:30], f"turn_{turns}", |
| oracle_res.reward_value * 5.0, oracle_res.raw_score, prop["tokens"], "correct") |
| else: |
| |
| wrong_cost = prop["tokens"] / 500.0 |
| ledger.spend(agent.config.agent_id, topic.question[:30], f"turn_{turns}", |
| wrong_cost, reason="wrong_proposal") |
|
|
| |
| while turns < max_turns and compute_used < self.budget_per_topic: |
| |
| ranked = sorted( |
| [(a, ledger.balance(a.config.agent_id)) for a in agents], |
| key=lambda x: x[1], reverse=True, |
| ) |
|
|
| allocated = False |
| for agent, balance in ranked: |
| dec = broker.request( |
| "debate_turn", agent.config.agent_id, balance, |
| task_state={ |
| "correct_so_far": any(p["correct"] for p in proposals), |
| "n_adversarial": sum(1 for p in proposals if p.get("is_adversarial")), |
| }, |
| gaming_flags=["adversarial_agent"] if agent.config.is_adversarial else [], |
| ) |
|
|
| if dec.decision == Decision.ALLOW: |
| prop = agent.propose(topic, proposals) |
| proposals.append(prop) |
| compute_used += prop["tokens"] |
| turns += 1 |
|
|
| oracle_res = self.oracle.score( |
| mode="debate", |
| action={"tokens_used": prop["tokens"]}, |
| context={"previous_correct": any(p["correct"] for p in proposals[:-1])}, |
| result={ |
| "final_correct": prop["correct"], |
| "agent_contribution": prop["influence"], |
| "compute_cost": prop["tokens"], |
| "tokens_used": prop["tokens"], |
| "total_turns": turns, |
| }, |
| agent_id=agent.config.agent_id, |
| ) |
|
|
| if prop["correct"]: |
| ledger.earn(agent.config.agent_id, topic.question[:30], f"turn_{turns}", |
| oracle_res.reward_value * 3.0, oracle_res.raw_score, prop["tokens"], "correct") |
| else: |
| wrong_cost = prop["tokens"] / 500.0 |
| ledger.spend(agent.config.agent_id, topic.question[:30], f"turn_{turns}", |
| wrong_cost, reason="wrong_proposal") |
|
|
| allocated = True |
| break |
|
|
| if not allocated: |
| break |
|
|
| |
| vote_scores: Dict[str, float] = {} |
| for p in proposals: |
| w = max(0.1, ledger.balance(p["agent_id"])) |
| vote_scores[p["answer"]] = vote_scores.get(p["answer"], 0.0) + w |
| final = max(vote_scores, key=vote_scores.get) |
| correct = final == topic.correct_answer |
|
|
| n_adversarial_turns = sum(1 for p in proposals if p.get("is_adversarial")) |
| bad_tokens = sum(p["tokens"] for p in proposals if p.get("is_adversarial")) |
| adversarial_contained = n_adversarial_turns <= 1 |
|
|
| return { |
| "strategy": "occ_allocation", |
| "correct": correct, "final_answer": final, |
| "compute_used": compute_used, "n_turns": turns, |
| "proposals": proposals, |
| "adversarial_turns": n_adversarial_turns, |
| "bad_agent_tokens": bad_tokens, |
| "adversarial_contained": adversarial_contained, |
| } |
|
|
| def _summarize(self, results: List[Dict], label: str) -> Dict: |
| n = len(results) |
| correct = sum(1 for r in results if r["correct"]) |
| total_compute = sum(r["compute_used"] for r in results) |
| total_turns = sum(r["n_turns"] for r in results) |
| total_adv_turns = sum(r.get("adversarial_turns", 0) for r in results) |
| total_bad_tokens = sum(r.get("bad_agent_tokens", 0) for r in results) |
| contained = sum(1 for r in results if r.get("adversarial_contained", True)) |
|
|
| return { |
| "label": label, |
| "n_topics": n, |
| "accuracy": correct / n if n else 0.0, |
| "total_compute": float(total_compute), |
| "mean_compute_per_topic": float(total_compute / n) if n else 0.0, |
| "mean_turns": float(total_turns / n) if n else 0.0, |
| "mean_adv_turns": float(total_adv_turns / n) if n else 0.0, |
| "bad_agent_tokens": float(total_bad_tokens), |
| "bad_agent_containment": contained / n if n else 0.0, |
| "quality_per_1k_tokens": (correct / n) / (total_compute / 1000) if total_compute else 0.0, |
| "results": results, |
| } |
|
|
| def run_all(self) -> Dict[str, Dict]: |
| if not self.topics: |
| self.generate_topics() |
|
|
| agent_configs = self.create_agents() |
| print(f"Agents: {[(c.agent_id, c.accuracy, c.cost_per_turn, c.is_adversarial) for c in agent_configs]}") |
|
|
| strategies = {} |
|
|
| |
| agents_a = [DebateAgent(c) for c in agent_configs] |
| strategies["A_equal_turns"] = self._summarize( |
| [self._resolve_equal_turns(agents_a, t) for t in self.topics], "A. Equal turns" |
| ) |
|
|
| |
| agents_b = [DebateAgent(c) for c in agent_configs] |
| strategies["B_majority_vote"] = self._summarize( |
| [self._resolve_majority_vote(agents_b, t) for t in self.topics], "B. Majority vote" |
| ) |
|
|
| |
| agents_c = [DebateAgent(c) for c in agent_configs] |
| strategies["C_confidence_weighted"] = self._summarize( |
| [self._resolve_confidence_weighted(agents_c, t) for t in self.topics], "C. Confidence-weighted" |
| ) |
|
|
| |
| agents_e = [DebateAgent(c) for c in agent_configs] |
| strategies["E_occ"] = self._summarize( |
| [self._resolve_occ(agents_e, t, use_decay=True) for t in self.topics], "E. OCC allocation" |
| ) |
|
|
| |
| agents_f = [DebateAgent(c) for c in agent_configs] |
| strategies["F_occ_no_decay"] = self._summarize( |
| [self._resolve_occ(agents_f, t, use_decay=False) for t in self.topics], "F. OCC (no decay)" |
| ) |
|
|
| return strategies |
|
|
|
|
| def main(): |
| bench = DebateBenchmarkV2(n_topics=50, n_agents=5, adversarial_fraction=0.4, seed=42) |
| bench.generate_topics() |
| results = bench.run_all() |
|
|
| print("\n" + "=" * 70) |
| print("MULTI-AGENT DEBATE BENCHMARK v2 (Variable Costs + Adversarial)") |
| print("=" * 70) |
| print(f"{'Strategy':<25} {'Acc':>6} {'Comp':>8} {'Turns':>6} {'AdvT':>6} {'BadTok':>8} {'Contain':>8} {'Qual/K':>8}") |
| print("-" * 70) |
| for key in ["A_equal_turns", "B_majority_vote", "C_confidence_weighted", "E_occ", "F_occ_no_decay"]: |
| r = results[key] |
| print(f"{r['label']:<25} {r['accuracy']:.3f} {r['mean_compute_per_topic']:>7.0f} {r['mean_turns']:>5.1f} {r['mean_adv_turns']:>5.1f} {r['bad_agent_tokens']:>7.0f} {r['bad_agent_containment']:.2f} {r['quality_per_1k_tokens']:>8.4f}") |
|
|
| |
| baseline_acc = max(results["A_equal_turns"]["accuracy"], |
| results["B_majority_vote"]["accuracy"], |
| results["C_confidence_weighted"]["accuracy"]) |
| baseline_comp = min(results["A_equal_turns"]["mean_compute_per_topic"], |
| results["B_majority_vote"]["mean_compute_per_topic"], |
| results["C_confidence_weighted"]["mean_compute_per_topic"]) |
|
|
| occ = results["E_occ"] |
| print(f"\n--- Key Comparisons ---") |
| print(f"Best baseline accuracy: {baseline_acc:.3f}") |
| print(f"OCC accuracy: {occ['accuracy']:.3f}") |
| print(f"OCC compute saving vs equal_turns: {(1 - occ['mean_compute_per_topic'] / results['A_equal_turns']['mean_compute_per_topic']) * 100:.1f}%") |
| print(f"OCC bad-agent containment: {occ['bad_agent_containment']:.1%}") |
| print(f"Confidence-weighted bad-agent containment: {results['C_confidence_weighted']['bad_agent_containment']:.1%}") |
|
|
| Path("/app/occ/reports").mkdir(parents=True, exist_ok=True) |
| with open("/app/occ/reports/benchmark_debate_v2_results.json", "w") as f: |
| json.dump(results, f, indent=2, default=str) |
| print("\nSaved to reports/benchmark_debate_v2_results.json") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|