File size: 7,208 Bytes
1bb4678
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Path: QAgents-workflos/tests/mode_evaluation.py
# Evaluate all modes on representative problems from each difficulty
"""Mode Evaluation: Test all modes on key problems from each difficulty level."""

import sys
import os
import time
import json
from datetime import datetime
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))

api_key = "$env:GOOGLE_API_KEY"
os.environ['GOOGLE_API_KEY'] = api_key

from tests.test_problems import (
    PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT,
    PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z,
    PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT,
    PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI
)
from orchestrators import create_orchestrator
from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
from config import set_api_key
import re

set_api_key(api_key)


def extract_gates(qasm):
    """Count gates in QASM."""
    if not qasm:
        return 0
    gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
    return len(re.findall(gate_pattern, qasm, re.IGNORECASE))


def test_problem(problem, mode):
    """Test a single problem."""
    start = time.perf_counter()
    
    try:
        if mode == "quasar":
            orch = QuasarOrchestrator(max_iterations=3)
            result = orch.run(
                problem.prompt, 
                problem.expected.min_qubits,
                problem.expected.expected_states if problem.expected.expected_states else None
            )
            success = result.success
            qasm = result.final_qasm
            llm = result.llm_calls
            iterations = result.iterations
            
        elif mode == "hybrid":
            orch = HybridOrchestrator()
            result = orch.run(
                problem.prompt, 
                problem.expected.min_qubits,
                problem.expected.expected_states if problem.expected.expected_states else None
            )
            success = result.success
            qasm = result.final_qasm
            llm = result.llm_calls
            iterations = result.iterations
            
        else:
            orch = create_orchestrator(mode)
            result = orch.run(problem.prompt)
            success = result.success
            qasm = result.final_output
            llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
            iterations = 1
            
        elapsed = (time.perf_counter() - start) * 1000
        gates = extract_gates(qasm)
        
        return {
            "success": success, 
            "time_ms": elapsed, 
            "llm": llm, 
            "gates": gates,
            "iterations": iterations,
            "error": None
        }
        
    except Exception as e:
        elapsed = (time.perf_counter() - start) * 1000
        return {
            "success": False, 
            "time_ms": elapsed, 
            "llm": 0, 
            "gates": 0,
            "error": str(e)[:80]
        }


def main():
    print("=" * 80)
    print("MODE EVALUATION - KEY PROBLEMS FROM EACH DIFFICULTY")
    print("=" * 80)
    print(f"Date: {datetime.now().isoformat()}")
    print()
    
    # Key problems to test (2 per difficulty)
    test_problems = [
        ("EASY", [PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT]),
        ("MEDIUM", [PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z]),
        ("HARD", [PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT]),
        ("VERY_HARD", [PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI])
    ]
    
    # Modes to test - focus on working ones
    modes = ["naked", "quasar", "hybrid", "blackboard"]
    
    all_results = []
    
    for diff_name, problems in test_problems:
        print(f"\n{'='*80}")
        print(f"{diff_name} PROBLEMS")
        print("=" * 80)
        
        for problem in problems:
            print(f"\n  {problem.id}: {problem.name}")
            
            for mode in modes:
                print(f"    {mode:12}", end=" ", flush=True)
                
                result = test_problem(problem, mode)
                result["problem_id"] = problem.id
                result["difficulty"] = diff_name.lower()
                result["mode"] = mode
                all_results.append(result)
                
                status = "✅" if result["success"] else "❌"
                time_str = f"{result['time_ms']:6.0f}ms"
                llm_str = f"LLM:{result['llm']}"
                gates_str = f"Gates:{result['gates']:2}"
                
                print(f"{status} {time_str} {llm_str:6} {gates_str}")
                
                if result["error"]:
                    print(f"           ⚠️ {result['error'][:50]}...")
                
                time.sleep(5)  # Rate limiting
    
    # Summary
    print("\n\n" + "=" * 80)
    print("SUMMARY BY MODE")
    print("=" * 80)
    
    for mode in modes:
        mode_results = [r for r in all_results if r["mode"] == mode]
        successes = sum(1 for r in mode_results if r["success"])
        total = len(mode_results)
        total_time = sum(r["time_ms"] for r in mode_results)
        total_llm = sum(r["llm"] for r in mode_results)
        avg_gates = sum(r["gates"] for r in mode_results if r["success"]) / max(successes, 1)
        
        print(f"\n{mode.upper():12}")
        print(f"  Success: {successes}/{total} ({100*successes/total:.0f}%)")
        print(f"  Time: {total_time:.0f}ms total, {total_time/total:.0f}ms avg")
        print(f"  LLM: {total_llm} calls")
        print(f"  Gates: {avg_gates:.1f} avg")
        
        # By difficulty
        for diff in ["easy", "medium", "hard", "very_hard"]:
            diff_results = [r for r in mode_results if r["difficulty"] == diff]
            if diff_results:
                diff_success = sum(1 for r in diff_results if r["success"])
                print(f"    {diff:10}: {diff_success}/{len(diff_results)}")
    
    # Winner by difficulty
    print("\n" + "=" * 80)
    print("🏆 WINNER BY DIFFICULTY")
    print("=" * 80)
    
    for diff in ["easy", "medium", "hard", "very_hard"]:
        diff_results = [r for r in all_results if r["difficulty"] == diff]
        
        print(f"\n{diff.upper()}:")
        for mode in modes:
            mode_diff_results = [r for r in diff_results if r["mode"] == mode]
            if mode_diff_results:
                successes = sum(1 for r in mode_diff_results if r["success"])
                total_time = sum(r["time_ms"] for r in mode_diff_results)
                avg_time = total_time / len(mode_diff_results)
                print(f"  {mode:12} {successes}/{len(mode_diff_results)} ({avg_time:.0f}ms avg)")
    
    # Save results
    output_path = Path(__file__).parent.parent / "research" / f"mode_evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    with open(output_path, 'w') as f:
        json.dump(all_results, f, indent=2)
    
    print(f"\n\nResults saved to: {output_path}")


if __name__ == "__main__":
    main()