Spaces:
Sleeping
Sleeping
| # Path: QAgents-workflos/tests/fast_eval.py | |
| # Fast evaluation - one problem per difficulty, all modes | |
| """Fast mode evaluation.""" | |
| import sys | |
| import os | |
| import time | |
| import json | |
| from datetime import datetime | |
| from pathlib import Path | |
| sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) | |
| api_key = "$env:GOOGLE_API_KEY" | |
| os.environ['GOOGLE_API_KEY'] = api_key | |
| from tests.test_problems import ( | |
| PROBLEM_E1_PHASE_FLIP, | |
| PROBLEM_M1_SWAP_DECOMPOSITION, | |
| PROBLEM_H1_DEUTSCH, | |
| PROBLEM_VH4_BERNSTEIN_VAZIRANI | |
| ) | |
| from orchestrators import create_orchestrator | |
| from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator | |
| from config import set_api_key | |
| import re | |
| set_api_key(api_key) | |
| def extract_gates(qasm): | |
| if not qasm: | |
| return 0 | |
| gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b' | |
| return len(re.findall(gate_pattern, qasm, re.IGNORECASE)) | |
| def test_problem(problem, mode, timeout=60): | |
| start = time.perf_counter() | |
| try: | |
| if mode == "quasar": | |
| orch = QuasarOrchestrator(max_iterations=3) | |
| result = orch.run(problem.prompt, problem.expected.min_qubits) | |
| return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000, | |
| "llm": result.llm_calls, "gates": extract_gates(result.final_qasm), "error": None} | |
| elif mode == "hybrid": | |
| orch = HybridOrchestrator() | |
| result = orch.run(problem.prompt, problem.expected.min_qubits) | |
| return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000, | |
| "llm": result.llm_calls, "gates": extract_gates(result.final_qasm), "error": None} | |
| else: | |
| orch = create_orchestrator(mode) | |
| result = orch.run(problem.prompt) | |
| llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0 | |
| return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000, | |
| "llm": llm, "gates": extract_gates(result.final_output), "error": "; ".join(result.errors) if result.errors else None} | |
| except Exception as e: | |
| return {"success": False, "time_ms": (time.perf_counter()-start)*1000, | |
| "llm": 0, "gates": 0, "error": str(e)[:60]} | |
| print("=" * 70) | |
| print("FAST MODE EVALUATION") | |
| print("=" * 70) | |
| print(f"Date: {datetime.now().isoformat()}") | |
| problems = [ | |
| ("EASY", PROBLEM_E1_PHASE_FLIP), | |
| ("MEDIUM", PROBLEM_M1_SWAP_DECOMPOSITION), | |
| ("HARD", PROBLEM_H1_DEUTSCH), | |
| ("VERY_HARD", PROBLEM_VH4_BERNSTEIN_VAZIRANI) | |
| ] | |
| modes = ["naked", "quasar", "hybrid", "blackboard"] | |
| all_results = {} | |
| for diff, problem in problems: | |
| print(f"\n{diff}: {problem.name}") | |
| print("-" * 50) | |
| all_results[diff] = {} | |
| for mode in modes: | |
| print(f" {mode:12}", end=" ", flush=True) | |
| result = test_problem(problem, mode) | |
| all_results[diff][mode] = result | |
| status = "✅" if result["success"] else "❌" | |
| print(f"{status} {result['time_ms']:5.0f}ms LLM:{result['llm']} Gates:{result['gates']}") | |
| if result["error"]: | |
| print(f" ⚠️ {result['error'][:40]}...") | |
| time.sleep(5) | |
| # Summary | |
| print("\n" + "=" * 70) | |
| print("SUMMARY") | |
| print("=" * 70) | |
| for mode in modes: | |
| successes = sum(1 for diff in all_results if all_results[diff][mode]["success"]) | |
| total_time = sum(all_results[diff][mode]["time_ms"] for diff in all_results) | |
| total_llm = sum(all_results[diff][mode]["llm"] for diff in all_results) | |
| print(f"\n{mode.upper():12} {successes}/4 ({25*successes}%) | {total_time:.0f}ms | {total_llm} LLM calls") | |
| for diff in all_results: | |
| r = all_results[diff][mode] | |
| status = "✅" if r["success"] else "❌" | |
| print(f" {diff:10} {status}") | |
| print("\n" + "=" * 70) | |
| print("DONE") | |