Qagents-workflows / tests /full_comparison.py
Deminiko
Initial commit: QAgents-workflos multi-agent quantum circuit optimization system
1bb4678
# Path: QAgents-workflos/tests/full_comparison.py
# Full comparison test across all modes and difficulties
"""Full mode comparison test."""
import sys
import os
import time
import json
from datetime import datetime
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
api_key = "$env:GOOGLE_API_KEY"
os.environ['GOOGLE_API_KEY'] = api_key
from tests.test_problems import ALL_PROBLEMS, ProblemDifficulty
from orchestrators import create_orchestrator
from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
from config import set_api_key
import re
set_api_key(api_key)
def extract_gates(qasm):
"""Count gates in QASM."""
if not qasm:
return 0
gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
def test_problem(problem, mode):
"""Test a single problem."""
start = time.perf_counter()
try:
if mode == "quasar":
orch = QuasarOrchestrator(max_iterations=3)
result = orch.run(
problem.prompt,
problem.expected.min_qubits,
problem.expected.expected_states if problem.expected.expected_states else None
)
success = result.success
qasm = result.final_qasm
llm = result.llm_calls
iterations = result.iterations
tiers = result.tiers_passed
elif mode == "hybrid":
orch = HybridOrchestrator()
result = orch.run(
problem.prompt,
problem.expected.min_qubits,
problem.expected.expected_states if problem.expected.expected_states else None
)
success = result.success
qasm = result.final_qasm
llm = result.llm_calls
iterations = result.iterations
tiers = result.tiers_passed
else:
orch = create_orchestrator(mode)
result = orch.run(problem.prompt)
success = result.success
qasm = result.final_output
llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
iterations = 1
tiers = []
elapsed = (time.perf_counter() - start) * 1000
gates = extract_gates(qasm)
return {
"success": success,
"time_ms": elapsed,
"llm": llm,
"gates": gates,
"iterations": iterations,
"tiers": tiers,
"qasm": qasm,
"error": None
}
except Exception as e:
elapsed = (time.perf_counter() - start) * 1000
return {
"success": False,
"time_ms": elapsed,
"llm": 0,
"gates": 0,
"iterations": 0,
"tiers": [],
"qasm": None,
"error": str(e)[:100]
}
def main():
print("=" * 100)
print("FULL MODE COMPARISON TEST")
print("=" * 100)
print(f"Date: {datetime.now().isoformat()}")
print(f"Total problems: {len(ALL_PROBLEMS)}")
print()
# Modes to test - focus on the key ones
modes = ["naked", "quasar", "hybrid", "blackboard"]
all_results = []
# Group by difficulty
for difficulty in [ProblemDifficulty.EASY, ProblemDifficulty.MEDIUM, ProblemDifficulty.HARD, ProblemDifficulty.VERY_HARD]:
problems = [p for p in ALL_PROBLEMS if p.difficulty == difficulty]
print(f"\n{'='*100}")
print(f"DIFFICULTY: {difficulty.value.upper()} ({len(problems)} problems)")
print("=" * 100)
for problem in problems:
print(f"\n {problem.id}: {problem.name}")
for mode in modes:
print(f" {mode:12}", end=" ", flush=True)
result = test_problem(problem, mode)
result["problem_id"] = problem.id
result["difficulty"] = difficulty.value
result["mode"] = mode
all_results.append(result)
status = "✅" if result["success"] else "❌"
time_str = f"{result['time_ms']:6.0f}ms"
llm_str = f"LLM:{result['llm']}"
gates_str = f"Gates:{result['gates']:2}"
extra = ""
if result["tiers"]:
extra = f" Tiers:{result['tiers']}"
print(f"{status} {time_str} {llm_str:6} {gates_str}{extra}")
if result["error"]:
print(f" ❌ Error: {result['error'][:60]}...")
time.sleep(5)
# Summary
print("\n\n" + "=" * 100)
print("SUMMARY BY MODE")
print("=" * 100)
for mode in modes:
mode_results = [r for r in all_results if r["mode"] == mode]
successes = sum(1 for r in mode_results if r["success"])
total = len(mode_results)
total_time = sum(r["time_ms"] for r in mode_results)
total_llm = sum(r["llm"] for r in mode_results)
avg_gates = sum(r["gates"] for r in mode_results if r["success"]) / max(successes, 1)
print(f"\n{mode.upper():12}")
print(f" Overall: {successes}/{total} ({100*successes/total:.0f}%)")
print(f" Time: {total_time/1000:.1f}s total, {total_time/total:.0f}ms avg")
print(f" LLM: {total_llm} calls ({total_llm/total:.1f} avg)")
print(f" Gates: {avg_gates:.1f} avg")
# By difficulty
for diff in ["easy", "medium", "hard", "very_hard"]:
diff_results = [r for r in mode_results if r["difficulty"] == diff]
if diff_results:
diff_success = sum(1 for r in diff_results if r["success"])
print(f" {diff:10}: {diff_success}/{len(diff_results)}")
# Save results
output_path = Path(__file__).parent.parent / "research" / f"full_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
output_path.parent.mkdir(parents=True, exist_ok=True)
# Clean QASM for JSON (can be long)
for r in all_results:
if r["qasm"]:
r["qasm"] = r["qasm"][:500] # Truncate for storage
with open(output_path, 'w') as f:
json.dump(all_results, f, indent=2)
print(f"\n\nResults saved to: {output_path}")
# Winner determination
print("\n" + "=" * 100)
print("🏆 WINNER BY DIFFICULTY")
print("=" * 100)
for diff in ["easy", "medium", "hard", "very_hard"]:
print(f"\n{diff.upper()}:")
best_mode = None
best_success = -1
for mode in modes:
mode_results = [r for r in all_results if r["mode"] == mode and r["difficulty"] == diff]
if mode_results:
successes = sum(1 for r in mode_results if r["success"])
if successes > best_success:
best_success = successes
best_mode = mode
if best_mode:
print(f" 🏆 {best_mode.upper()} ({best_success}/{len([r for r in all_results if r['difficulty']==diff and r['mode']==best_mode])})")
if __name__ == "__main__":
main()