| """ |
| Overcome Limitation B: Multi-Agent Debate with Adversarial/Bad Agents. |
| Shows OCC credit-based filtering when some agents are noisy or adversarial. |
| """ |
| import json |
| import random |
| from pathlib import Path |
| from typing import Dict, List |
|
|
| import sys |
| sys.path.insert(0, str(Path(__file__).parent.parent)) |
| from benchmarks.benchmark_debate import DebateBenchmark, DebateTopic, SimulatedDebateAgent |
|
|
|
|
| class AdversarialDebateAgent(SimulatedDebateAgent): |
| """Agent that deliberately gives wrong answers with high confidence.""" |
| def __init__(self, agent_id: str, accuracy: float = 0.2, confidence_bias: float = 0.3, verbose_prob: float = 0.5): |
| super().__init__(agent_id, accuracy=accuracy, confidence_bias=confidence_bias, verbose_prob=verbose_prob) |
|
|
| def propose(self, topic: DebateTopic, prior_proposals: List[Dict]) -> Dict: |
| prop = super().propose(topic, prior_proposals) |
| prop["confidence"] = min(1.0, prop["confidence"] + 0.2) |
| return prop |
|
|
|
|
| class LazyDebateAgent(SimulatedDebateAgent): |
| """Agent that barely participates.""" |
| def __init__(self, agent_id: str, accuracy: float = 0.3, confidence_bias: float = -0.2): |
| super().__init__(agent_id, accuracy=accuracy, confidence_bias=confidence_bias, verbose_prob=0.0) |
|
|
| def propose(self, topic: DebateTopic, prior_proposals: List[Dict]) -> Dict: |
| prop = super().propose(topic, prior_proposals) |
| prop["tokens"] = 10 |
| prop["confidence"] = 0.3 |
| return prop |
|
|
|
|
| class DebateAdversarialBenchmark(DebateBenchmark): |
| def __init__(self, n_topics: int = 50, n_agents: int = 4, bad_agent_ratio: float = 0.25, seed: int = 42): |
| super().__init__(n_topics=n_topics, n_agents=n_agents, seed=seed) |
| self.bad_agent_ratio = bad_agent_ratio |
|
|
| def create_mixed_agents(self) -> List[SimulatedDebateAgent]: |
| n_bad = max(1, int(self.n_agents * self.bad_agent_ratio)) |
| n_good = self.n_agents - n_bad |
| agents = [] |
| for i in range(n_good): |
| agents.append(SimulatedDebateAgent(f"good_{i}", accuracy=0.75, confidence_bias=0.05)) |
| for i in range(n_bad): |
| if i % 2 == 0: |
| agents.append(AdversarialDebateAgent(f"bad_adv_{i}", accuracy=0.2, confidence_bias=0.3)) |
| else: |
| agents.append(LazyDebateAgent(f"bad_lazy_{i}", accuracy=0.3)) |
| return agents |
|
|
| def run_with_bad_agents(self, strategy: str = "occ") -> Dict: |
| agents = self.create_mixed_agents() |
| topic_results = [] |
| for topic in self.topics: |
| if strategy == "equal_turns": |
| topic_results.append(self._resolve_equal_turns(agents, topic)) |
| elif strategy == "occ": |
| topic_results.append(self._resolve_occ_allocation(agents, topic)) |
| elif strategy == "confidence_weighted": |
| topic_results.append(self._resolve_confidence_weighted(agents, topic)) |
| elif strategy == "majority_vote": |
| topic_results.append(self._resolve_majority_vote(agents, topic)) |
| return self._summarize(topic_results, strategy) |
|
|
| def run_all_varying_bad_ratios(self) -> Dict: |
| results = {} |
| for ratio in [0.0, 0.25, 0.5]: |
| self.bad_agent_ratio = ratio |
| print(f"\n--- Bad agent ratio: {ratio} ---") |
| for strategy in ["equal_turns", "confidence_weighted", "occ"]: |
| res = self.run_with_bad_agents(strategy) |
| results[f"ratio_{ratio}_{strategy}"] = res |
| print(f" {strategy}: acc={res['accuracy']:.3f}, compute={res['mean_compute_per_topic']:.1f}, " |
| f"quality_per_compute={res['quality_per_compute']:.6f}") |
| return results |
|
|
|
|
| def main(): |
| bench = DebateAdversarialBenchmark(n_topics=50, n_agents=4, bad_agent_ratio=0.25, seed=42) |
| bench.generate_topics() |
| results = bench.run_all_varying_bad_ratios() |
| print("\n" + "=" * 60) |
| print("ADVERSARIAL DEBATE BENCHMARK") |
| print("=" * 60) |
| for label, res in results.items(): |
| print(f"{label:35s}: acc={res['accuracy']:.3f}, compute={res['mean_compute_per_topic']:.1f}, " |
| f"quality/compute={res['quality_per_compute']:.6f}") |
| Path("/app/occ/reports").mkdir(parents=True, exist_ok=True) |
| with open("/app/occ/reports/benchmark_debate_adversarial_results.json", "w") as f: |
| json.dump(results, f, indent=2, default=str) |
| print("\nSaved to reports/benchmark_debate_adversarial_results.json") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|