Upload benchmarks/benchmark_debate_adversarial.py

Browse files

Files changed (1) hide show

benchmarks/benchmark_debate_adversarial.py +100 -1

benchmarks/benchmark_debate_adversarial.py CHANGED Viewed

	@@ -1 +1,100 @@
1	- ~~See /app/occ/benchmarks/benchmark_debate_adversarial.py~~

+"""
+Overcome Limitation B: Multi-Agent Debate with Adversarial/Bad Agents.
+Shows OCC credit-based filtering when some agents are noisy or adversarial.
+"""
+import json
+import random
+from pathlib import Path
+from typing import Dict, List
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from benchmarks.benchmark_debate import DebateBenchmark, DebateTopic, SimulatedDebateAgent
+class AdversarialDebateAgent(SimulatedDebateAgent):
+    """Agent that deliberately gives wrong answers with high confidence."""
+    def __init__(self, agent_id: str, accuracy: float = 0.2, confidence_bias: float = 0.3, verbose_prob: float = 0.5):
+        super().__init__(agent_id, accuracy=accuracy, confidence_bias=confidence_bias, verbose_prob=verbose_prob)
+    def propose(self, topic: DebateTopic, prior_proposals: List[Dict]) -> Dict:
+        prop = super().propose(topic, prior_proposals)
+        prop["confidence"] = min(1.0, prop["confidence"] + 0.2)
+        return prop
+class LazyDebateAgent(SimulatedDebateAgent):
+    """Agent that barely participates."""
+    def __init__(self, agent_id: str, accuracy: float = 0.3, confidence_bias: float = -0.2):
+        super().__init__(agent_id, accuracy=accuracy, confidence_bias=confidence_bias, verbose_prob=0.0)
+    def propose(self, topic: DebateTopic, prior_proposals: List[Dict]) -> Dict:
+        prop = super().propose(topic, prior_proposals)
+        prop["tokens"] = 10
+        prop["confidence"] = 0.3
+        return prop
+class DebateAdversarialBenchmark(DebateBenchmark):
+    def __init__(self, n_topics: int = 50, n_agents: int = 4, bad_agent_ratio: float = 0.25, seed: int = 42):
+        super().__init__(n_topics=n_topics, n_agents=n_agents, seed=seed)
+        self.bad_agent_ratio = bad_agent_ratio
+    def create_mixed_agents(self) -> List[SimulatedDebateAgent]:
+        n_bad = max(1, int(self.n_agents * self.bad_agent_ratio))
+        n_good = self.n_agents - n_bad
+        agents = []
+        for i in range(n_good):
+            agents.append(SimulatedDebateAgent(f"good_{i}", accuracy=0.75, confidence_bias=0.05))
+        for i in range(n_bad):
+            if i % 2 == 0:
+                agents.append(AdversarialDebateAgent(f"bad_adv_{i}", accuracy=0.2, confidence_bias=0.3))
+            else:
+                agents.append(LazyDebateAgent(f"bad_lazy_{i}", accuracy=0.3))
+        return agents
+    def run_with_bad_agents(self, strategy: str = "occ") -> Dict:
+        agents = self.create_mixed_agents()
+        topic_results = []
+        for topic in self.topics:
+            if strategy == "equal_turns":
+                topic_results.append(self._resolve_equal_turns(agents, topic))
+            elif strategy == "occ":
+                topic_results.append(self._resolve_occ_allocation(agents, topic))
+            elif strategy == "confidence_weighted":
+                topic_results.append(self._resolve_confidence_weighted(agents, topic))
+            elif strategy == "majority_vote":
+                topic_results.append(self._resolve_majority_vote(agents, topic))
+        return self._summarize(topic_results, strategy)
+    def run_all_varying_bad_ratios(self) -> Dict:
+        results = {}
+        for ratio in [0.0, 0.25, 0.5]:
+            self.bad_agent_ratio = ratio
+            print(f"\n--- Bad agent ratio: {ratio} ---")
+            for strategy in ["equal_turns", "confidence_weighted", "occ"]:
+                res = self.run_with_bad_agents(strategy)
+                results[f"ratio_{ratio}_{strategy}"] = res
+                print(f"  {strategy}: acc={res['accuracy']:.3f}, compute={res['mean_compute_per_topic']:.1f}, "
+                      f"quality_per_compute={res['quality_per_compute']:.6f}")
+        return results
+def main():
+    bench = DebateAdversarialBenchmark(n_topics=50, n_agents=4, bad_agent_ratio=0.25, seed=42)
+    bench.generate_topics()
+    results = bench.run_all_varying_bad_ratios()
+    print("\n" + "=" * 60)
+    print("ADVERSARIAL DEBATE BENCHMARK")
+    print("=" * 60)
+    for label, res in results.items():
+        print(f"{label:35s}: acc={res['accuracy']:.3f}, compute={res['mean_compute_per_topic']:.1f}, "
+              f"quality/compute={res['quality_per_compute']:.6f}")
+    Path("/app/occ/reports").mkdir(parents=True, exist_ok=True)
+    with open("/app/occ/reports/benchmark_debate_adversarial_results.json", "w") as f:
+        json.dump(results, f, indent=2, default=str)
+    print("\nSaved to reports/benchmark_debate_adversarial_results.json")
+if __name__ == "__main__":
+    main()