Spaces:
Sleeping
Sleeping
| # Path: QAgents-workflos/tests/mode_evaluation.py | |
| # Evaluate all modes on representative problems from each difficulty | |
| """Mode Evaluation: Test all modes on key problems from each difficulty level.""" | |
| import sys | |
| import os | |
| import time | |
| import json | |
| from datetime import datetime | |
| from pathlib import Path | |
| sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) | |
| api_key = "$env:GOOGLE_API_KEY" | |
| os.environ['GOOGLE_API_KEY'] = api_key | |
| from tests.test_problems import ( | |
| PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT, | |
| PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z, | |
| PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT, | |
| PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI | |
| ) | |
| from orchestrators import create_orchestrator | |
| from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator | |
| from config import set_api_key | |
| import re | |
| set_api_key(api_key) | |
| def extract_gates(qasm): | |
| """Count gates in QASM.""" | |
| if not qasm: | |
| return 0 | |
| gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b' | |
| return len(re.findall(gate_pattern, qasm, re.IGNORECASE)) | |
| def test_problem(problem, mode): | |
| """Test a single problem.""" | |
| start = time.perf_counter() | |
| try: | |
| if mode == "quasar": | |
| orch = QuasarOrchestrator(max_iterations=3) | |
| result = orch.run( | |
| problem.prompt, | |
| problem.expected.min_qubits, | |
| problem.expected.expected_states if problem.expected.expected_states else None | |
| ) | |
| success = result.success | |
| qasm = result.final_qasm | |
| llm = result.llm_calls | |
| iterations = result.iterations | |
| elif mode == "hybrid": | |
| orch = HybridOrchestrator() | |
| result = orch.run( | |
| problem.prompt, | |
| problem.expected.min_qubits, | |
| problem.expected.expected_states if problem.expected.expected_states else None | |
| ) | |
| success = result.success | |
| qasm = result.final_qasm | |
| llm = result.llm_calls | |
| iterations = result.iterations | |
| else: | |
| orch = create_orchestrator(mode) | |
| result = orch.run(problem.prompt) | |
| success = result.success | |
| qasm = result.final_output | |
| llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0 | |
| iterations = 1 | |
| elapsed = (time.perf_counter() - start) * 1000 | |
| gates = extract_gates(qasm) | |
| return { | |
| "success": success, | |
| "time_ms": elapsed, | |
| "llm": llm, | |
| "gates": gates, | |
| "iterations": iterations, | |
| "error": None | |
| } | |
| except Exception as e: | |
| elapsed = (time.perf_counter() - start) * 1000 | |
| return { | |
| "success": False, | |
| "time_ms": elapsed, | |
| "llm": 0, | |
| "gates": 0, | |
| "error": str(e)[:80] | |
| } | |
| def main(): | |
| print("=" * 80) | |
| print("MODE EVALUATION - KEY PROBLEMS FROM EACH DIFFICULTY") | |
| print("=" * 80) | |
| print(f"Date: {datetime.now().isoformat()}") | |
| print() | |
| # Key problems to test (2 per difficulty) | |
| test_problems = [ | |
| ("EASY", [PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT]), | |
| ("MEDIUM", [PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z]), | |
| ("HARD", [PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT]), | |
| ("VERY_HARD", [PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI]) | |
| ] | |
| # Modes to test - focus on working ones | |
| modes = ["naked", "quasar", "hybrid", "blackboard"] | |
| all_results = [] | |
| for diff_name, problems in test_problems: | |
| print(f"\n{'='*80}") | |
| print(f"{diff_name} PROBLEMS") | |
| print("=" * 80) | |
| for problem in problems: | |
| print(f"\n {problem.id}: {problem.name}") | |
| for mode in modes: | |
| print(f" {mode:12}", end=" ", flush=True) | |
| result = test_problem(problem, mode) | |
| result["problem_id"] = problem.id | |
| result["difficulty"] = diff_name.lower() | |
| result["mode"] = mode | |
| all_results.append(result) | |
| status = "✅" if result["success"] else "❌" | |
| time_str = f"{result['time_ms']:6.0f}ms" | |
| llm_str = f"LLM:{result['llm']}" | |
| gates_str = f"Gates:{result['gates']:2}" | |
| print(f"{status} {time_str} {llm_str:6} {gates_str}") | |
| if result["error"]: | |
| print(f" ⚠️ {result['error'][:50]}...") | |
| time.sleep(5) # Rate limiting | |
| # Summary | |
| print("\n\n" + "=" * 80) | |
| print("SUMMARY BY MODE") | |
| print("=" * 80) | |
| for mode in modes: | |
| mode_results = [r for r in all_results if r["mode"] == mode] | |
| successes = sum(1 for r in mode_results if r["success"]) | |
| total = len(mode_results) | |
| total_time = sum(r["time_ms"] for r in mode_results) | |
| total_llm = sum(r["llm"] for r in mode_results) | |
| avg_gates = sum(r["gates"] for r in mode_results if r["success"]) / max(successes, 1) | |
| print(f"\n{mode.upper():12}") | |
| print(f" Success: {successes}/{total} ({100*successes/total:.0f}%)") | |
| print(f" Time: {total_time:.0f}ms total, {total_time/total:.0f}ms avg") | |
| print(f" LLM: {total_llm} calls") | |
| print(f" Gates: {avg_gates:.1f} avg") | |
| # By difficulty | |
| for diff in ["easy", "medium", "hard", "very_hard"]: | |
| diff_results = [r for r in mode_results if r["difficulty"] == diff] | |
| if diff_results: | |
| diff_success = sum(1 for r in diff_results if r["success"]) | |
| print(f" {diff:10}: {diff_success}/{len(diff_results)}") | |
| # Winner by difficulty | |
| print("\n" + "=" * 80) | |
| print("🏆 WINNER BY DIFFICULTY") | |
| print("=" * 80) | |
| for diff in ["easy", "medium", "hard", "very_hard"]: | |
| diff_results = [r for r in all_results if r["difficulty"] == diff] | |
| print(f"\n{diff.upper()}:") | |
| for mode in modes: | |
| mode_diff_results = [r for r in diff_results if r["mode"] == mode] | |
| if mode_diff_results: | |
| successes = sum(1 for r in mode_diff_results if r["success"]) | |
| total_time = sum(r["time_ms"] for r in mode_diff_results) | |
| avg_time = total_time / len(mode_diff_results) | |
| print(f" {mode:12} {successes}/{len(mode_diff_results)} ({avg_time:.0f}ms avg)") | |
| # Save results | |
| output_path = Path(__file__).parent.parent / "research" / f"mode_evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(output_path, 'w') as f: | |
| json.dump(all_results, f, indent=2) | |
| print(f"\n\nResults saved to: {output_path}") | |
| if __name__ == "__main__": | |
| main() | |