occ-stack / benchmarks /benchmark_debate_adversarial.py

Upload benchmarks/benchmark_debate_adversarial.py

e56f288 verified 27 days ago

4.52 kB

	"""
	Overcome Limitation B: Multi-Agent Debate with Adversarial/Bad Agents.
	Shows OCC credit-based filtering when some agents are noisy or adversarial.
	"""
	import json
	import random
	from pathlib import Path
	from typing import Dict, List

	import sys
	sys.path.insert(0, str(Path(__file__).parent.parent))
	from benchmarks.benchmark_debate import DebateBenchmark, DebateTopic, SimulatedDebateAgent


	class AdversarialDebateAgent(SimulatedDebateAgent):
	"""Agent that deliberately gives wrong answers with high confidence."""
	def __init__(self, agent_id: str, accuracy: float = 0.2, confidence_bias: float = 0.3, verbose_prob: float = 0.5):
	super().__init__(agent_id, accuracy=accuracy, confidence_bias=confidence_bias, verbose_prob=verbose_prob)

	def propose(self, topic: DebateTopic, prior_proposals: List[Dict]) -> Dict:
	prop = super().propose(topic, prior_proposals)
	prop["confidence"] = min(1.0, prop["confidence"] + 0.2)
	return prop


	class LazyDebateAgent(SimulatedDebateAgent):
	"""Agent that barely participates."""
	def __init__(self, agent_id: str, accuracy: float = 0.3, confidence_bias: float = -0.2):
	super().__init__(agent_id, accuracy=accuracy, confidence_bias=confidence_bias, verbose_prob=0.0)

	def propose(self, topic: DebateTopic, prior_proposals: List[Dict]) -> Dict:
	prop = super().propose(topic, prior_proposals)
	prop["tokens"] = 10
	prop["confidence"] = 0.3
	return prop


	class DebateAdversarialBenchmark(DebateBenchmark):
	def __init__(self, n_topics: int = 50, n_agents: int = 4, bad_agent_ratio: float = 0.25, seed: int = 42):
	super().__init__(n_topics=n_topics, n_agents=n_agents, seed=seed)
	self.bad_agent_ratio = bad_agent_ratio

	def create_mixed_agents(self) -> List[SimulatedDebateAgent]:
	n_bad = max(1, int(self.n_agents * self.bad_agent_ratio))
	n_good = self.n_agents - n_bad
	agents = []
	for i in range(n_good):
	agents.append(SimulatedDebateAgent(f"good_{i}", accuracy=0.75, confidence_bias=0.05))
	for i in range(n_bad):
	if i % 2 == 0:
	agents.append(AdversarialDebateAgent(f"bad_adv_{i}", accuracy=0.2, confidence_bias=0.3))
	else:
	agents.append(LazyDebateAgent(f"bad_lazy_{i}", accuracy=0.3))
	return agents

	def run_with_bad_agents(self, strategy: str = "occ") -> Dict:
	agents = self.create_mixed_agents()
	topic_results = []
	for topic in self.topics:
	if strategy == "equal_turns":
	topic_results.append(self._resolve_equal_turns(agents, topic))
	elif strategy == "occ":
	topic_results.append(self._resolve_occ_allocation(agents, topic))
	elif strategy == "confidence_weighted":
	topic_results.append(self._resolve_confidence_weighted(agents, topic))
	elif strategy == "majority_vote":
	topic_results.append(self._resolve_majority_vote(agents, topic))
	return self._summarize(topic_results, strategy)

	def run_all_varying_bad_ratios(self) -> Dict:
	results = {}
	for ratio in [0.0, 0.25, 0.5]:
	self.bad_agent_ratio = ratio
	print(f"\n--- Bad agent ratio: {ratio} ---")
	for strategy in ["equal_turns", "confidence_weighted", "occ"]:
	res = self.run_with_bad_agents(strategy)
	results[f"ratio_{ratio}_{strategy}"] = res
	print(f" {strategy}: acc={res['accuracy']:.3f}, compute={res['mean_compute_per_topic']:.1f}, "
	f"quality_per_compute={res['quality_per_compute']:.6f}")
	return results


	def main():
	bench = DebateAdversarialBenchmark(n_topics=50, n_agents=4, bad_agent_ratio=0.25, seed=42)
	bench.generate_topics()
	results = bench.run_all_varying_bad_ratios()
	print("\n" + "=" * 60)
	print("ADVERSARIAL DEBATE BENCHMARK")
	print("=" * 60)
	for label, res in results.items():
	print(f"{label:35s}: acc={res['accuracy']:.3f}, compute={res['mean_compute_per_topic']:.1f}, "
	f"quality/compute={res['quality_per_compute']:.6f}")
	Path("/app/occ/reports").mkdir(parents=True, exist_ok=True)
	with open("/app/occ/reports/benchmark_debate_adversarial_results.json", "w") as f:
	json.dump(results, f, indent=2, default=str)
	print("\nSaved to reports/benchmark_debate_adversarial_results.json")


	if __name__ == "__main__":
	main()