Deminiko commited on
Commit
1bb4678
·
0 Parent(s):

Initial commit: QAgents-workflos multi-agent quantum circuit optimization system

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +76 -0
  2. .gitignore +53 -0
  3. AGENTS.md +64 -0
  4. LICENSE +21 -0
  5. README.md +265 -0
  6. __init__.py +6 -0
  7. agents/__init__.py +44 -0
  8. agents/base_agent.py +302 -0
  9. agents/llm_adapter.py +676 -0
  10. agents/specialized_agents.py +223 -0
  11. app.py +120 -0
  12. client/__init__.py +5 -0
  13. client/mcp_client.py +698 -0
  14. config.py +305 -0
  15. database/__init__.py +36 -0
  16. database/circuit_quality_db.py +414 -0
  17. database/storage.py +278 -0
  18. orchestrators/__init__.py +30 -0
  19. orchestrators/orchestrator.py +541 -0
  20. orchestrators/quasar_orchestrator.py +563 -0
  21. orchestrators/router.py +188 -0
  22. prompts/__init__.py +25 -0
  23. prompts/agent_prompts.py +276 -0
  24. prompts/optimized_prompts.py +289 -0
  25. requirements.txt +6 -0
  26. tasks-project-state.json +149 -0
  27. tests/__init__.py +87 -0
  28. tests/circuit_quality_analyzer.py +351 -0
  29. tests/comprehensive_test.py +287 -0
  30. tests/comprehensive_test_v2.py +299 -0
  31. tests/evaluation_harness.py +748 -0
  32. tests/evaluation_report.txt +54 -0
  33. tests/fast_eval.py +115 -0
  34. tests/final_eval.py +137 -0
  35. tests/full_comparison.py +214 -0
  36. tests/mini_test.py +75 -0
  37. tests/mode_evaluation.py +202 -0
  38. tests/quality_evaluation_harness.py +314 -0
  39. tests/quick_mode_test.py +81 -0
  40. tests/quick_test.py +85 -0
  41. tests/run_evaluation.py +197 -0
  42. tests/run_quality_eval.py +217 -0
  43. tests/test_db_storage.py +59 -0
  44. tests/test_mcp_client.py +181 -0
  45. tests/test_problems.py +709 -0
  46. tests/test_quality_analyzer.py +42 -0
  47. tests/test_ratelimited.py +37 -0
  48. tools/__init__.py +54 -0
  49. tools/quantum_tools.py +346 -0
  50. tools/tool_registry.py +118 -0
.env.example ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # QAgents-Workflows Environment Configuration
2
+ # Copy this file to .env and fill in your actual values
3
+ # For Hugging Face Spaces: Add these as Repository Secrets or Space Variables
4
+
5
+ # =============================================================================
6
+ # LLM Configuration (Model-Agnostic)
7
+ # =============================================================================
8
+
9
+ # LLM Provider: gemini (default), openai, anthropic, groq, ollama, etc.
10
+ # Leave empty to use default: "gemini"
11
+ LLM_PROVIDER=gemini
12
+
13
+ # LLM Model identifier
14
+ # For Gemini: gemini-2.5-flash-lite, gemini-2.5-flash, gemini-2.5-pro, gemini-2.0-flash
15
+ # For OpenAI: gpt-4o, gpt-4o-mini, gpt-4-turbo
16
+ # For Anthropic: claude-3-opus, claude-3-sonnet
17
+ # For Groq: llama-3-70b-versatile, mixtral-8x7b-32768
18
+ # For Ollama: mistral, neural-chat, starling-lm (local models)
19
+ # Leave empty to use default: "gemini-2.5-flash-lite"
20
+ LLM_MODEL=gemini-2.5-flash-lite
21
+
22
+ # =============================================================================
23
+ # API Keys (Provider-Specific)
24
+ # =============================================================================
25
+
26
+ # Google Gemini API Key (required for LLM_PROVIDER=gemini)
27
+ # Get from: https://aistudio.google.com/app/apikey
28
+ GOOGLE_API_KEY=your-gemini-api-key-here
29
+
30
+ # Alternative Gemini API Key (fallback if GOOGLE_API_KEY not set)
31
+ GENAI_API_KEY=
32
+
33
+ # OpenAI API Key (required for LLM_PROVIDER=openai)
34
+ OPENAI_API_KEY=sk-...
35
+
36
+ # Anthropic API Key (required for LLM_PROVIDER=anthropic)
37
+ ANTHROPIC_API_KEY=sk-ant-...
38
+
39
+ # Groq API Key (required for LLM_PROVIDER=groq)
40
+ GROQ_API_KEY=gsk_...
41
+
42
+ # Note: Ollama (LLM_PROVIDER=ollama) requires no API key - runs locally
43
+
44
+ # =============================================================================
45
+ # MCP Server Configuration (QuantumArchitect-MCP)
46
+ # =============================================================================
47
+
48
+ # MCP Server Base URL
49
+ # Local: http://127.0.0.1:7861
50
+ # Remote (ngrok example): https://your-ngrok-url.ngrok.io
51
+ # Leave empty to use default: http://127.0.0.1:7861
52
+ MCP_SERVER_URL=http://127.0.0.1:7861
53
+
54
+ # =============================================================================
55
+ # Optional: Cost Tracking and Evaluation
56
+ # =============================================================================
57
+
58
+ # Cost tracking can be enabled/disabled
59
+ # TRACK_COSTS=true
60
+
61
+ # =============================================================================
62
+ # Notes for Hugging Face Spaces
63
+ # =============================================================================
64
+
65
+ # 1. Upload this file as .env to your Space (or use Space Settings UI)
66
+ # 2. Go to Space Settings > Secrets > Add Secret
67
+ # 3. Add each variable:
68
+ # - Name: LLM_PROVIDER, Value: gemini
69
+ # - Name: LLM_MODEL, Value: gemini-2.5-flash-lite
70
+ # - Name: GOOGLE_API_KEY, Value: your-key
71
+ # - Name: MCP_SERVER_URL, Value: https://your-backend-url.ngrok.io
72
+ #
73
+ # 4. Restart the Space for changes to take effect
74
+ #
75
+ # Alternative: Use Space Variables (visible in Space info) instead of Secrets
76
+ # This is useful for non-sensitive settings like LLM_PROVIDER and MCP_SERVER_URL
.gitignore ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ .venv
25
+ venv/
26
+ ENV/
27
+ env/
28
+
29
+ # Environment Variables
30
+ .env # Actual secrets - never commit
31
+ # .env.example IS committed as a template - do not exclude it
32
+
33
+ # Database and Logs
34
+ *.db
35
+ *.sqlite3
36
+ database/data/
37
+ database/logs/
38
+ database/memory/
39
+ *.log
40
+
41
+ # IDEs
42
+ .vscode/
43
+ .idea/
44
+
45
+ # Project specific
46
+ research/
47
+
48
+ # Legacy/Backup files
49
+ *_old.py
50
+ *.bak
51
+
52
+ # Documentation work
53
+ .docs/
AGENTS.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project current structure :"""
2
+ QAgents-workflos\__pycache__
3
+ QAgents-workflos\agents
4
+ QAgents-workflos\agents\__pycache__
5
+ QAgents-workflos\agents\__init__.py
6
+ QAgents-workflos\agents\base_agent.py
7
+ QAgents-workflos\agents\llm_adapter.py
8
+ QAgents-workflos\agents\specialized_agents.py
9
+ QAgents-workflos\client
10
+ QAgents-workflos\client\__pycache__
11
+ QAgents-workflos\client\__init__.py
12
+ QAgents-workflos\client\mcp_client.py
13
+ QAgents-workflos\database
14
+ QAgents-workflos\database\__pycache__
15
+ QAgents-workflos\database\data
16
+ QAgents-workflos\database\logs
17
+ QAgents-workflos\database\memory
18
+ QAgents-workflos\database\__init__.py
19
+ QAgents-workflos\database\storage.py
20
+ QAgents-workflos\orchestrators
21
+ QAgents-workflos\orchestrators\__pycache__
22
+ QAgents-workflos\orchestrators\__init__.py
23
+ QAgents-workflos\orchestrators\orchestrator.py
24
+ QAgents-workflos\prompts
25
+ QAgents-workflos\prompts\__init__.py
26
+ QAgents-workflos\prompts\agent_prompts.py
27
+ QAgents-workflos\tests
28
+ QAgents-workflos\tests\__pycache__
29
+ QAgents-workflos\tests\__init__.py
30
+ QAgents-workflos\tests\evaluation_harness.py
31
+ QAgents-workflos\tests\test_problems.py
32
+ QAgents-workflos\tools
33
+ QAgents-workflos\tools\__pycache__
34
+ QAgents-workflos\tools\__init__.py
35
+ QAgents-workflos\tools\quantum_tools.py
36
+ QAgents-workflos\tools\tool_registry.py
37
+ QAgents-workflos\workflows
38
+ QAgents-workflos\workflows\__pycache__
39
+ QAgents-workflos\workflows\__init__.py
40
+ QAgents-workflos\workflows\workflow_definitions.py
41
+ QAgents-workflos\__init__.py
42
+ QAgents-workflos\AGENTS.md
43
+ QAgents-workflos\config.py
44
+ QAgents-workflos\DEPLOYMENT_CHECKLIST.md
45
+ QAgents-workflos\IMPLEMENTATION_CHECKLIST.md
46
+ QAgents-workflos\LLM_SYSTEM_SUMMARY.md
47
+ QAgents-workflos\QUICKREF.md
48
+ QAgents-workflos\README.md
49
+ QAgents-workflos\requirements.txt
50
+ QAgents-workflos\run_evaluation.py
51
+ QAgents-workflos\SETUP.md
52
+ QAgents-workflos\tasks-project-state.json
53
+ """
54
+
55
+ before work, on same terminal:"""
56
+ 1 activate .venv:
57
+ & D:\teach\quantum-circuits\.venv\Scripts\Activate.ps1
58
+
59
+ 2 activate app:
60
+ python QuantumArchitect-MCP\app.py
61
+ """
62
+
63
+
64
+ if any new data it must be writed on tasks-project-state.json root file or a folder module tasks-project-state.json file that detail the module file
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nicolas Ivan Larenas Bustamante
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: QAgents Quantum Circuit Orchestrator
3
+ emoji: ⚛️
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: Multi-agent quantum circuit generation with Gemini/LLMs
12
+ ---
13
+
14
+ # QAgents-Workflows: Quantum Circuit Optimization Agent System
15
+
16
+ A professional multi-agent system for autonomous quantum circuit optimization, featuring multiple architectural approaches and **model-agnostic LLM support** (Gemini, OpenAI, Anthropic, Groq, Ollama, and any LiteLLM provider).
17
+
18
+ ## 🏗️ Architectures
19
+
20
+ ### 1. Blackboard System (Free/Emergent)
21
+ - Agents communicate through a shared blackboard
22
+ - Decoupled, event-driven activation
23
+ - Emergent workflow based on data availability
24
+ - Maximum flexibility and adaptability
25
+
26
+ ### 2. Guided System (Strict Orchestration)
27
+ - Explicit state machine with defined transitions
28
+ - Central orchestrator controls workflow
29
+ - Predictable, auditable execution path
30
+ - Maximum reliability and control
31
+
32
+ ### 3. Naked System (Baseline)
33
+ - Single agent with direct MCP access
34
+ - No framework overhead
35
+ - Baseline for comparison
36
+
37
+ ## 🤖 Model-Agnostic LLM Support
38
+
39
+ The system works with **any LLM provider**:
40
+
41
+ | Provider | Setup | Models |
42
+ |----------|-------|--------|
43
+ | **Gemini** (Default) | `GOOGLE_API_KEY` | `gemini-2.5-flash-lite` |
44
+ | OpenAI | `OPENAI_API_KEY` | `gpt-4o`, `gpt-4o-mini` |
45
+ | Anthropic | `ANTHROPIC_API_KEY` | `claude-3-opus`, `claude-3-sonnet` |
46
+ | Groq | `GROQ_API_KEY` | `llama-3-70b`, `mixtral-8x7b` |
47
+ | Ollama (Local) | No key needed | Any local model |
48
+
49
+ **See [SETUP.md](SETUP.md) for detailed configuration.**
50
+
51
+ ## 📊 Evaluation Metrics
52
+
53
+ | Metric | Description |
54
+ |--------|-------------|
55
+ | **Time** | Total execution time in seconds |
56
+ | **Quality** | Circuit depth, gate count, hardware fitness score |
57
+ | **Effectiveness** | Did the circuit achieve the goal? |
58
+ | **Reliability** | Success rate across multiple runs |
59
+
60
+ ## 🚀 Quick Start
61
+
62
+ ```bash
63
+ # 1. Ensure QuantumArchitect-MCP is running
64
+ python QuantumArchitect-MCP/app.py
65
+
66
+ # 2. Set your API key (for Gemini by default)
67
+ set GOOGLE_API_KEY=your-key-here
68
+ # OR for OpenAI:
69
+ set OPENAI_API_KEY=your-key-here
70
+
71
+ # 3. Run the evaluation
72
+ python QAgents-workflos/run_evaluation.py
73
+
74
+ # For quick test (no LLM needed):
75
+ python QAgents-workflos/run_evaluation.py --quick
76
+
77
+ # Test specific mode:
78
+ python QAgents-workflos/run_evaluation.py --mode guided
79
+ python QAgents-workflos/run_evaluation.py --mode blackboard
80
+ python QAgents-workflos/run_evaluation.py --mode naked
81
+ ```
82
+
83
+ ## 🔧 Switching LLM Providers
84
+
85
+ ### Using Gemini (Default)
86
+ ```bash
87
+ set GOOGLE_API_KEY=your-gemini-key
88
+ # Models: gemini-2.5-flash-lite, gemini-2.5-pro
89
+ ```
90
+
91
+ ### Using OpenAI
92
+ Edit `config.py`:
93
+ ```python
94
+ provider: str = "openai"
95
+ model: str = "gpt-4o-mini"
96
+ ```
97
+ ```bash
98
+ set OPENAI_API_KEY=sk-...
99
+ ```
100
+
101
+ ### Using Anthropic
102
+ ```python
103
+ provider: str = "anthropic"
104
+ model: str = "claude-3-sonnet-20240229"
105
+ ```
106
+ ```bash
107
+ set ANTHROPIC_API_KEY=your-key
108
+ ```
109
+
110
+ ### Using Groq
111
+ ```python
112
+ provider: str = "groq"
113
+ model: str = "llama-3-70b-versatile"
114
+ ```
115
+ ```bash
116
+ set GROQ_API_KEY=your-key
117
+ ```
118
+
119
+ ### Using Local Ollama
120
+ ```python
121
+ provider: str = "ollama"
122
+ model: str = "mistral"
123
+ ```
124
+ No API key needed - runs locally on `http://localhost:11434`
125
+
126
+ ## 📁 Project Structure
127
+
128
+ ```
129
+ QAgents-workflos/
130
+ ├── agents/ # Agent implementations (Architect, Builder, etc.)
131
+ ├── client/ # MCP client for QuantumArchitect-MCP
132
+ ├── database/ # Storage layer (logs, memory, circuits)
133
+ ├── orchestrators/ # Orchestration modes (Naked, Guided, Blackboard, QUASAR, Hybrid)
134
+ ├── prompts/ # System prompts for agents and optimized LLM prompts
135
+ ├── tools/ # Tool registry and MCP endpoint wrappers
136
+ ├── workflows/ # Workflow definitions
137
+ ├── tests/ # Evaluation harnesses and test problems
138
+ ├── app.py # Gradio UI entry point (Hugging Face Space)
139
+ ├── config.py # Configuration with env var support
140
+ ├── requirements.txt # Python dependencies
141
+ ├── .env.example # Environment variable template
142
+ └── README.md # This file
143
+ ```
144
+
145
+ ## 🚀 Deployment to Hugging Face Spaces
146
+
147
+ ### Prerequisites
148
+ 1. Create a Hugging Face Space: https://huggingface.co/new-space
149
+ 2. Select **Gradio** as the SDK
150
+ 3. Push this repository to your Space
151
+
152
+ ### Environment Variables Configuration
153
+
154
+ The system reads configuration from **environment variables**, making it compatible with Hugging Face Spaces.
155
+
156
+ #### Critical Variables
157
+
158
+ | Variable | Purpose | Default | Example |
159
+ |----------|---------|---------|---------|
160
+ | `LLM_PROVIDER` | LLM provider to use | `gemini` | `gemini`, `openai`, `anthropic` |
161
+ | `LLM_MODEL` | Model identifier | `gemini-2.5-flash-lite` | `gpt-4o-mini`, `claude-3-sonnet` |
162
+ | `GOOGLE_API_KEY` | Gemini API key | (none) | Your API key from aistudio.google.com |
163
+ | `MCP_SERVER_URL` | Backend URL | `http://127.0.0.1:7861` | `https://your-backend.ngrok.io` |
164
+
165
+ #### Setting Variables in Hugging Face Space
166
+
167
+ **Option 1: Via Space Settings (Recommended)**
168
+ 1. Go to your Space settings
169
+ 2. Click **"Secrets and variables"** > **"New secret"**
170
+ 3. Add each variable:
171
+ - **Secret Name**: `GOOGLE_API_KEY` | **Value**: Your API key
172
+ - **Secret Name**: `MCP_SERVER_URL` | **Value**: Backend URL
173
+ 4. Add variables (non-sensitive):
174
+ - **Variable Name**: `LLM_PROVIDER` | **Value**: `gemini`
175
+ - **Variable Name**: `LLM_MODEL` | **Value**: `gemini-2.5-flash-lite`
176
+
177
+ **Option 2: Via .env File**
178
+ ```bash
179
+ # Copy .env.example to .env and fill in values
180
+ cp .env.example .env
181
+
182
+ # Commit and push to your Space
183
+ git add .env
184
+ git commit -m "Add environment configuration"
185
+ git push
186
+ ```
187
+
188
+ **⚠️ Important**: Never commit sensitive API keys directly. Use Space Secrets instead.
189
+
190
+ ### LLM Provider Configuration
191
+
192
+ #### Using Gemini (Default)
193
+ ```
194
+ LLM_PROVIDER=gemini
195
+ LLM_MODEL=gemini-2.5-flash-lite
196
+ GOOGLE_API_KEY=your-key-from-https://aistudio.google.com/app/apikey
197
+ ```
198
+
199
+ #### Using OpenAI
200
+ ```
201
+ LLM_PROVIDER=openai
202
+ LLM_MODEL=gpt-4o-mini
203
+ OPENAI_API_KEY=sk-...
204
+ ```
205
+
206
+ #### Using Anthropic
207
+ ```
208
+ LLM_PROVIDER=anthropic
209
+ LLM_MODEL=claude-3-sonnet-20240229
210
+ ANTHROPIC_API_KEY=sk-ant-...
211
+ ```
212
+
213
+ #### Using Groq
214
+ ```
215
+ LLM_PROVIDER=groq
216
+ LLM_MODEL=llama-3-70b-versatile
217
+ GROQ_API_KEY=gsk_...
218
+ ```
219
+
220
+ #### Using Local Ollama
221
+ ```
222
+ LLM_PROVIDER=ollama
223
+ LLM_MODEL=mistral
224
+ # No API key needed - runs locally on http://localhost:11434
225
+ ```
226
+
227
+ ### Backend Connection (MCP Server)
228
+
229
+ The Space communicates with the QuantumArchitect-MCP backend via `MCP_SERVER_URL`.
230
+
231
+ **Options:**
232
+
233
+ 1. **Local Development** (both running on your machine):
234
+ ```
235
+ MCP_SERVER_URL=http://127.0.0.1:7861
236
+ ```
237
+
238
+ 2. **Public Backend with ngrok** (tunnel remote server):
239
+ ```bash
240
+ # On your backend server:
241
+ ngrok http 7861
242
+ ```
243
+ Then set:
244
+ ```
245
+ MCP_SERVER_URL=https://your-ngrok-url.ngrok.io
246
+ ```
247
+
248
+ 3. **Deployed Backend** (your own server):
249
+ ```
250
+ MCP_SERVER_URL=https://your-quantum-api.example.com
251
+ ```
252
+
253
+ If `MCP_SERVER_URL` is not set or unreachable, the Space will still work but with local-only features.
254
+
255
+ ## 📁 Project Structure (Previous)
256
+ ├── agents/ # Agent definitions (types, behaviors)
257
+ ├── prompts/ # System prompts for each agent
258
+ ├── tools/ # MCP tool wrappers
259
+ ├── workflows/ # Workflow definitions
260
+ ├── orchestrators/ # Workflow orchestration logic
261
+ ├── client/ # MCP client connection
262
+ ├── database/ # Memory, logs, results storage
263
+ ├── tests/ # Evaluation framework
264
+ └── config.py # Global configuration
265
+ ```
__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """QAgents-Workflows: Multi-agent quantum circuit optimization system."""
2
+
3
+ from .config import config, set_mode, get_mode, SystemConfig
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["config", "set_mode", "get_mode", "SystemConfig"]
agents/__init__.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agents module: Base and specialized agent implementations."""
2
+
3
+ from .base_agent import (
4
+ BaseAgent,
5
+ LLMAgent,
6
+ RuleBasedAgent,
7
+ AgentRole,
8
+ AgentState,
9
+ AgentContext,
10
+ AgentAction,
11
+ AgentResult
12
+ )
13
+
14
+ from .specialized_agents import (
15
+ ArchitectAgent,
16
+ BuilderAgent,
17
+ ValidatorAgent,
18
+ OptimizerAgent,
19
+ AnalyzerAgent,
20
+ ScorerAgent,
21
+ SimulatorAgent,
22
+ create_all_agents
23
+ )
24
+
25
+ __all__ = [
26
+ # Base classes
27
+ "BaseAgent",
28
+ "LLMAgent",
29
+ "RuleBasedAgent",
30
+ "AgentRole",
31
+ "AgentState",
32
+ "AgentContext",
33
+ "AgentAction",
34
+ "AgentResult",
35
+ # Specialized agents
36
+ "ArchitectAgent",
37
+ "BuilderAgent",
38
+ "ValidatorAgent",
39
+ "OptimizerAgent",
40
+ "AnalyzerAgent",
41
+ "ScorerAgent",
42
+ "SimulatorAgent",
43
+ "create_all_agents"
44
+ ]
agents/base_agent.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agents Module: Base agent classes and specialized agents.
3
+ Supports both Blackboard (free) and Guided (strict) architectures.
4
+ Model-agnostic: Works with Gemini, OpenAI, Anthropic, Groq, Ollama, etc.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Dict, List, Optional, Callable
10
+ from enum import Enum
11
+ from datetime import datetime
12
+ import json
13
+ import logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class AgentRole(Enum):
19
+ """Roles agents can take in the system."""
20
+ ARCHITECT = "architect"
21
+ BUILDER = "builder"
22
+ VALIDATOR = "validator"
23
+ OPTIMIZER = "optimizer"
24
+ ANALYZER = "analyzer"
25
+ SCORER = "scorer"
26
+ COORDINATOR = "coordinator"
27
+
28
+
29
+ class AgentState(Enum):
30
+ """Agent execution states."""
31
+ IDLE = "idle"
32
+ THINKING = "thinking"
33
+ EXECUTING = "executing"
34
+ WAITING = "waiting"
35
+ COMPLETED = "completed"
36
+ ERROR = "error"
37
+
38
+
39
+ @dataclass
40
+ class AgentContext:
41
+ """Context passed to agents for decision making."""
42
+ goal: str
43
+ current_circuit: Optional[str] = None
44
+ history: List[Dict] = field(default_factory=list)
45
+ constraints: Dict = field(default_factory=dict)
46
+ shared_data: Dict = field(default_factory=dict)
47
+
48
+ def add_to_history(self, action: str, result: Any):
49
+ self.history.append({
50
+ "action": action,
51
+ "result": result,
52
+ "timestamp": datetime.now().isoformat()
53
+ })
54
+
55
+
56
+ @dataclass
57
+ class AgentAction:
58
+ """An action an agent wants to take."""
59
+ tool_name: str
60
+ arguments: Dict
61
+ reasoning: str
62
+ priority: float = 1.0
63
+
64
+
65
+ @dataclass
66
+ class AgentResult:
67
+ """Result of an agent's execution."""
68
+ success: bool
69
+ data: Any
70
+ message: str
71
+ actions_taken: List[str] = field(default_factory=list)
72
+ execution_time_ms: float = 0.0
73
+
74
+
75
+ class BaseAgent(ABC):
76
+ """
77
+ Abstract base class for all agents.
78
+ Provides common interface for both Blackboard and Guided architectures.
79
+ """
80
+
81
+ def __init__(self,
82
+ agent_id: str,
83
+ role: AgentRole,
84
+ tools: List[str] = None,
85
+ llm_config: Dict = None):
86
+ self.agent_id = agent_id
87
+ self.role = role
88
+ self.tools = tools or []
89
+ self.llm_config = llm_config or {}
90
+ self.state = AgentState.IDLE
91
+ self.memory: Dict = {}
92
+ self._callbacks: List[Callable] = []
93
+
94
+ @abstractmethod
95
+ def decide(self, context: AgentContext) -> Optional[AgentAction]:
96
+ """Decide what action to take given the context."""
97
+ pass
98
+
99
+ @abstractmethod
100
+ def execute(self, action: AgentAction, context: AgentContext) -> AgentResult:
101
+ """Execute the decided action."""
102
+ pass
103
+
104
+ def can_handle(self, context: AgentContext) -> bool:
105
+ """Check if this agent can handle the current context."""
106
+ return True
107
+
108
+ def on_state_change(self, callback: Callable):
109
+ """Register callback for state changes."""
110
+ self._callbacks.append(callback)
111
+
112
+ def _set_state(self, new_state: AgentState):
113
+ """Update state and notify callbacks."""
114
+ old_state = self.state
115
+ self.state = new_state
116
+ for cb in self._callbacks:
117
+ cb(self.agent_id, old_state, new_state)
118
+
119
+ def reset(self):
120
+ """Reset agent to initial state."""
121
+ self.state = AgentState.IDLE
122
+ self.memory.clear()
123
+
124
+
125
+ class LLMAgent(BaseAgent):
126
+ """
127
+ Agent that uses an LLM for decision making.
128
+ Model-agnostic: Supports Gemini, OpenAI, Anthropic, Groq, Ollama, etc.
129
+ Can be used in both Blackboard and Guided modes.
130
+ """
131
+
132
+ def __init__(self,
133
+ agent_id: str,
134
+ role: AgentRole,
135
+ system_prompt: str,
136
+ tools: List[str] = None,
137
+ llm_config: Dict = None):
138
+ super().__init__(agent_id, role, tools, llm_config)
139
+ self.system_prompt = system_prompt
140
+ self._adapter = None
141
+
142
+ def _get_adapter(self):
143
+ """Get the LLM adapter (lazy init)."""
144
+ if self._adapter is None:
145
+ from config import config
146
+ from agents.llm_adapter import get_llm_adapter
147
+
148
+ self._adapter = get_llm_adapter(
149
+ provider=config.llm.provider,
150
+ model=config.llm.model,
151
+ api_key=config.llm.api_key
152
+ )
153
+ return self._adapter
154
+
155
+ def _build_messages(self, context: AgentContext) -> List[Dict]:
156
+ """Build message list for LLM."""
157
+ messages = [{"role": "system", "content": self.system_prompt}]
158
+
159
+ context_msg = f"""
160
+ Goal: {context.goal}
161
+
162
+ Current Circuit:
163
+ {context.current_circuit or 'None yet'}
164
+
165
+ Constraints:
166
+ {json.dumps(context.constraints, indent=2)}
167
+
168
+ History (last 5 actions):
169
+ {json.dumps(context.history[-5:], indent=2)}
170
+ """
171
+ messages.append({"role": "user", "content": context_msg})
172
+ return messages
173
+
174
+ def decide(self, context: AgentContext) -> Optional[AgentAction]:
175
+ """Use LLM to decide on action."""
176
+ self._set_state(AgentState.THINKING)
177
+
178
+ try:
179
+ from config import config
180
+ from tools import registry
181
+
182
+ tool_schemas = [
183
+ registry.get(name).to_llm_schema()
184
+ for name in self.tools
185
+ if registry.get(name)
186
+ ]
187
+
188
+ messages = self._build_messages(context)
189
+ adapter = self._get_adapter()
190
+
191
+ llm_response = adapter.generate(
192
+ messages=messages,
193
+ tools=tool_schemas if tool_schemas else None,
194
+ temperature=self.llm_config.get("temperature", config.llm.temperature),
195
+ max_tokens=self.llm_config.get("max_tokens", config.llm.max_tokens)
196
+ )
197
+
198
+ if llm_response.tool_calls:
199
+ tool_call = llm_response.tool_calls[0]
200
+ return AgentAction(
201
+ tool_name=tool_call.tool_name,
202
+ arguments=tool_call.arguments,
203
+ reasoning=tool_call.reasoning
204
+ )
205
+
206
+ return None
207
+
208
+ except Exception as e:
209
+ logger.error(f"Agent {self.agent_id} decision failed: {e}")
210
+ self._set_state(AgentState.ERROR)
211
+ return None
212
+
213
+ def execute(self, action: AgentAction, context: AgentContext) -> AgentResult:
214
+ """Execute tool action."""
215
+ self._set_state(AgentState.EXECUTING)
216
+
217
+ import time
218
+ start = time.perf_counter()
219
+
220
+ try:
221
+ from tools import invoke_tool
222
+
223
+ result = invoke_tool(action.tool_name, **action.arguments)
224
+ elapsed = (time.perf_counter() - start) * 1000
225
+
226
+ context.add_to_history(action.tool_name, result)
227
+
228
+ self._set_state(AgentState.COMPLETED)
229
+ return AgentResult(
230
+ success=result.get("success", False),
231
+ data=result,
232
+ message=f"Executed {action.tool_name}",
233
+ actions_taken=[action.tool_name],
234
+ execution_time_ms=elapsed
235
+ )
236
+
237
+ except Exception as e:
238
+ logger.error(f"Agent {self.agent_id} execution failed: {e}")
239
+ self._set_state(AgentState.ERROR)
240
+ return AgentResult(
241
+ success=False,
242
+ data=None,
243
+ message=str(e)
244
+ )
245
+
246
+
247
+ class RuleBasedAgent(BaseAgent):
248
+ """
249
+ Agent that uses predefined rules for decision making.
250
+ Useful for deterministic behavior in Guided mode.
251
+ """
252
+
253
+ def __init__(self,
254
+ agent_id: str,
255
+ role: AgentRole,
256
+ rules: List[Callable[[AgentContext], Optional[AgentAction]]],
257
+ tools: List[str] = None):
258
+ super().__init__(agent_id, role, tools)
259
+ self.rules = rules
260
+
261
+ def decide(self, context: AgentContext) -> Optional[AgentAction]:
262
+ """Apply rules to decide action."""
263
+ self._set_state(AgentState.THINKING)
264
+
265
+ for rule in self.rules:
266
+ action = rule(context)
267
+ if action is not None:
268
+ return action
269
+
270
+ return None
271
+
272
+ def execute(self, action: AgentAction, context: AgentContext) -> AgentResult:
273
+ """Execute action using tools."""
274
+ self._set_state(AgentState.EXECUTING)
275
+
276
+ import time
277
+ start = time.perf_counter()
278
+
279
+ try:
280
+ from tools import invoke_tool
281
+
282
+ result = invoke_tool(action.tool_name, **action.arguments)
283
+ elapsed = (time.perf_counter() - start) * 1000
284
+
285
+ context.add_to_history(action.tool_name, result)
286
+
287
+ self._set_state(AgentState.COMPLETED)
288
+ return AgentResult(
289
+ success=result.get("success", False),
290
+ data=result,
291
+ message=f"Executed {action.tool_name}",
292
+ actions_taken=[action.tool_name],
293
+ execution_time_ms=elapsed
294
+ )
295
+
296
+ except Exception as e:
297
+ self._set_state(AgentState.ERROR)
298
+ return AgentResult(
299
+ success=False,
300
+ data=None,
301
+ message=str(e)
302
+ )
agents/llm_adapter.py ADDED
@@ -0,0 +1,676 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM Adapter: Model-agnostic LLM interface with multi-model fallback.
3
+ Supports Gemini (native), OpenAI, Anthropic, Groq, Ollama, and any LiteLLM provider.
4
+
5
+ Path: QAgents-workflos/agents/llm_adapter.py
6
+ Related: config.py (GEMINI_MODELS cascade, CostTrackingConfig)
7
+ orchestrators/orchestrator.py (uses get_llm_adapter)
8
+ specialized_agents.py (agents use LLM adapters)
9
+
10
+ Multi-Model Fallback System with Recovery:
11
+ ==========================================
12
+ When a model hits rate limits (429) or errors, automatically falls back to next model.
13
+ RECOVERY: When preferred model cooldown expires, automatically rotates back.
14
+
15
+ Cascade order (by RPD - highest to lowest):
16
+ 1. gemma-3-27b-it (14,400 RPD) - Highest availability
17
+ 2. gemini-2.5-flash-lite (1,000 RPD) - DEFAULT PREFERRED
18
+ 3. gemini-2.5-flash (250 RPD)
19
+ 4. gemini-2.0-flash (200 RPD)
20
+ 5. gemini-2.0-flash-lite (200 RPD)
21
+ 6. gemini-2.5-pro (50 RPD) - Last resort
22
+
23
+ Model Recovery Timer:
24
+ =====================
25
+ - Tracks when each model was rate-limited
26
+ - Calculates recovery time (RPM cooldown: 60s, RPD cooldown: reset at midnight)
27
+ - Automatically returns to preferred model when recovered
28
+ - Preferred model index configurable (default: 1 = gemini-2.5-flash-lite)
29
+ """
30
+
31
+ import json
32
+ import logging
33
+ import time
34
+ from abc import ABC, abstractmethod
35
+ from typing import Any, Dict, List, Optional
36
+ from dataclasses import dataclass, field
37
+ from collections import deque
38
+ from datetime import datetime, timedelta
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ # =============================================================================
44
+ # MULTI-MODEL RATE LIMITER
45
+ # =============================================================================
46
+
47
+ class ModelRateLimiter:
48
+ """
49
+ Rate limiter with per-model tracking, automatic fallback, and recovery.
50
+
51
+ Tracks:
52
+ - RPM: Requests per minute (sliding window)
53
+ - RPD: Requests per day (counter reset at midnight or manually)
54
+ - Recovery: When rate-limited models become available again
55
+
56
+ When current model exceeds limits, suggests next model in cascade.
57
+ When preferred model recovers, automatically rotates back.
58
+ """
59
+
60
+ def __init__(self, models: List[Dict] = None, preferred_model_idx: int = 1):
61
+ """
62
+ Initialize with model cascade from config.
63
+
64
+ Args:
65
+ models: List of model configs with rpm, rpd limits
66
+ preferred_model_idx: Index of preferred model (default: 1 = gemini-2.5-flash-lite)
67
+ """
68
+ from config import GEMINI_MODELS
69
+ self.models = models or GEMINI_MODELS
70
+ self.preferred_model_idx = preferred_model_idx # Model to return to after recovery
71
+ self.current_model_idx = preferred_model_idx # Start with preferred model
72
+
73
+ # Per-model tracking
74
+ self.model_usage: Dict[str, Dict] = {}
75
+ for model in self.models:
76
+ self.model_usage[model["name"]] = {
77
+ "rpm_window": deque(maxlen=model["rpm"]), # Sliding window
78
+ "rpd_count": 0,
79
+ "rpd_reset_time": datetime.now().replace(hour=0, minute=0, second=0) + timedelta(days=1),
80
+ "last_request_time": 0,
81
+ "total_tokens": 0,
82
+ "total_time_ms": 0.0,
83
+ # Recovery tracking
84
+ "rate_limited_at": None, # Timestamp when rate limited
85
+ "rpm_recovery_time": None, # When RPM limit recovers
86
+ "rpd_recovery_time": None, # When RPD limit recovers (midnight)
87
+ }
88
+
89
+ @property
90
+ def current_model(self) -> Dict:
91
+ """Get current model config."""
92
+ return self.models[self.current_model_idx]
93
+
94
+ @property
95
+ def current_model_name(self) -> str:
96
+ """Get current model name."""
97
+ return self.current_model["name"]
98
+
99
+ @property
100
+ def preferred_model_name(self) -> str:
101
+ """Get preferred model name."""
102
+ return self.models[self.preferred_model_idx]["name"]
103
+
104
+ def get_min_interval(self, model_name: str = None) -> float:
105
+ """Get minimum interval between requests for model (with 80% buffer)."""
106
+ if model_name is None:
107
+ model_name = self.current_model_name
108
+
109
+ for model in self.models:
110
+ if model["name"] == model_name:
111
+ # 80% buffer: 60s / (rpm * 0.8)
112
+ return 60.0 / (model["rpm"] * 0.8)
113
+ return 5.0 # Default 5 seconds
114
+
115
+ def check_preferred_model_recovery(self) -> bool:
116
+ """
117
+ Check if preferred model has recovered from rate limiting.
118
+ If recovered, automatically switch back to it.
119
+
120
+ Returns:
121
+ True if switched back to preferred model
122
+ """
123
+ if self.current_model_idx == self.preferred_model_idx:
124
+ return False # Already on preferred model
125
+
126
+ preferred_name = self.preferred_model_name
127
+ usage = self.model_usage.get(preferred_name)
128
+ if not usage:
129
+ return False
130
+
131
+ current_time = datetime.now()
132
+
133
+ # Check RPD recovery (resets at midnight)
134
+ if usage.get("rpd_recovery_time") and current_time >= usage["rpd_recovery_time"]:
135
+ usage["rpd_count"] = 0
136
+ usage["rpd_recovery_time"] = None
137
+ usage["rate_limited_at"] = None
138
+ logger.info(f"Preferred model {preferred_name} RPD limit reset - switching back")
139
+ self.current_model_idx = self.preferred_model_idx
140
+ return True
141
+
142
+ # Check RPM recovery (60 seconds)
143
+ if usage.get("rpm_recovery_time") and current_time >= usage["rpm_recovery_time"]:
144
+ usage["rpm_recovery_time"] = None
145
+ # Check if we can make a request now
146
+ can_req, _ = self.can_request(preferred_name)
147
+ if can_req:
148
+ logger.info(f"Preferred model {preferred_name} RPM recovered - switching back")
149
+ self.current_model_idx = self.preferred_model_idx
150
+ return True
151
+
152
+ return False
153
+
154
+ def can_request(self, model_name: str = None) -> tuple[bool, str]:
155
+ """
156
+ Check if we can make a request with current/specified model.
157
+
158
+ Returns:
159
+ (can_request: bool, reason: str)
160
+ """
161
+ if model_name is None:
162
+ model_name = self.current_model_name
163
+
164
+ if model_name not in self.model_usage:
165
+ return False, f"Unknown model: {model_name}"
166
+
167
+ usage = self.model_usage[model_name]
168
+ model_config = None
169
+ for m in self.models:
170
+ if m["name"] == model_name:
171
+ model_config = m
172
+ break
173
+
174
+ if not model_config:
175
+ return False, f"Model config not found: {model_name}"
176
+
177
+ # Check RPD (reset if new day)
178
+ if datetime.now() >= usage["rpd_reset_time"]:
179
+ usage["rpd_count"] = 0
180
+ usage["rpd_reset_time"] = datetime.now().replace(hour=0, minute=0, second=0) + timedelta(days=1)
181
+
182
+ if usage["rpd_count"] >= model_config["rpd"]:
183
+ return False, f"RPD limit reached ({model_config['rpd']}/day)"
184
+
185
+ # Check RPM (sliding window)
186
+ current_time = time.time()
187
+ window = usage["rpm_window"]
188
+
189
+ # Remove old entries (>60s ago)
190
+ while window and (current_time - window[0]) > 60:
191
+ window.popleft()
192
+
193
+ if len(window) >= model_config["rpm"]:
194
+ return False, f"RPM limit reached ({model_config['rpm']}/min)"
195
+
196
+ return True, "OK"
197
+
198
+ def wait_if_needed(self, model_name: str = None) -> float:
199
+ """
200
+ Wait if necessary to respect rate limits.
201
+
202
+ Returns:
203
+ Time waited in seconds
204
+ """
205
+ if model_name is None:
206
+ model_name = self.current_model_name
207
+
208
+ if model_name not in self.model_usage:
209
+ return 0.0
210
+
211
+ usage = self.model_usage[model_name]
212
+ current_time = time.time()
213
+ min_interval = self.get_min_interval(model_name)
214
+
215
+ time_since_last = current_time - usage["last_request_time"]
216
+
217
+ if time_since_last < min_interval:
218
+ sleep_time = min_interval - time_since_last
219
+ logger.info(f"Rate limiting [{model_name}]: waiting {sleep_time:.2f}s")
220
+ time.sleep(sleep_time)
221
+ return sleep_time
222
+
223
+ return 0.0
224
+
225
+ def record_request(self, model_name: str = None, tokens: int = 0, time_ms: float = 0):
226
+ """Record a successful request."""
227
+ if model_name is None:
228
+ model_name = self.current_model_name
229
+
230
+ if model_name not in self.model_usage:
231
+ return
232
+
233
+ usage = self.model_usage[model_name]
234
+ current_time = time.time()
235
+
236
+ usage["rpm_window"].append(current_time)
237
+ usage["rpd_count"] += 1
238
+ usage["last_request_time"] = current_time
239
+ usage["total_tokens"] += tokens
240
+ usage["total_time_ms"] += time_ms
241
+
242
+ logger.debug(f"Request recorded [{model_name}]: RPD {usage['rpd_count']}, tokens {tokens}")
243
+
244
+ def fallback_to_next(self, reason: str = "unknown") -> Optional[str]:
245
+ """
246
+ Switch to next model in cascade and record recovery time.
247
+
248
+ Args:
249
+ reason: Why fallback is needed ("rpm", "rpd", or "error")
250
+
251
+ Returns:
252
+ New model name or None if no more models available
253
+ """
254
+ current_model_name = self.current_model_name
255
+ usage = self.model_usage.get(current_model_name, {})
256
+
257
+ # Record when this model was rate limited and set recovery time
258
+ now = datetime.now()
259
+ usage["rate_limited_at"] = now
260
+
261
+ if "rpm" in reason.lower() or "429" in reason:
262
+ # RPM recovery: 60 seconds from now
263
+ usage["rpm_recovery_time"] = now + timedelta(seconds=60)
264
+ logger.info(f"Model {current_model_name} RPM limited - recovery at {usage['rpm_recovery_time']}")
265
+ elif "rpd" in reason.lower() or "quota" in reason.lower():
266
+ # RPD recovery: midnight tonight
267
+ usage["rpd_recovery_time"] = now.replace(hour=0, minute=0, second=0) + timedelta(days=1)
268
+ logger.info(f"Model {current_model_name} RPD limited - recovery at {usage['rpd_recovery_time']}")
269
+
270
+ if self.current_model_idx + 1 < len(self.models):
271
+ self.current_model_idx += 1
272
+ new_model = self.current_model_name
273
+ logger.warning(f"Falling back to model: {new_model}")
274
+ return new_model
275
+ else:
276
+ logger.error("No more models available in fallback cascade!")
277
+ return None
278
+
279
+ def reset_to_preferred(self):
280
+ """Reset to preferred model (default: gemini-2.5-flash-lite)."""
281
+ self.current_model_idx = self.preferred_model_idx
282
+ logger.info(f"Reset to preferred model: {self.preferred_model_name}")
283
+
284
+ def get_usage_summary(self) -> Dict:
285
+ """Get usage summary for all models."""
286
+ summary = {}
287
+ for model in self.models:
288
+ name = model["name"]
289
+ usage = self.model_usage[name]
290
+ summary[name] = {
291
+ "rpm_used": len(usage["rpm_window"]),
292
+ "rpm_limit": model["rpm"],
293
+ "rpd_used": usage["rpd_count"],
294
+ "rpd_limit": model["rpd"],
295
+ "total_tokens": usage["total_tokens"],
296
+ "total_time_ms": usage["total_time_ms"]
297
+ }
298
+ return summary
299
+
300
+
301
+ # Global rate limiter instance
302
+ _global_rate_limiter: Optional[ModelRateLimiter] = None
303
+
304
+ def get_rate_limiter() -> ModelRateLimiter:
305
+ """Get or create global rate limiter."""
306
+ global _global_rate_limiter
307
+ if _global_rate_limiter is None:
308
+ _global_rate_limiter = ModelRateLimiter()
309
+ return _global_rate_limiter
310
+
311
+
312
+ # =============================================================================
313
+ # LLM RESPONSE TYPES
314
+ # =============================================================================
315
+
316
+ @dataclass
317
+ class LLMToolCall:
318
+ """Standardized tool call across all providers."""
319
+ tool_name: str
320
+ arguments: Dict[str, Any]
321
+ reasoning: str
322
+
323
+
324
+ @dataclass
325
+ class LLMResponse:
326
+ """Standardized response across all providers."""
327
+ text: str
328
+ tool_calls: List[LLMToolCall]
329
+ finish_reason: str
330
+ model_used: str = "" # Track which model was actually used
331
+ tokens_used: int = 0 # Track token usage if available
332
+ time_ms: float = 0.0 # Track response time
333
+
334
+
335
+ # =============================================================================
336
+ # BASE ADAPTER
337
+ # =============================================================================
338
+
339
+ class BaseLLMAdapter(ABC):
340
+ """Abstract base for LLM adapters."""
341
+
342
+ def __init__(self, api_key: Optional[str] = None):
343
+ self.api_key = api_key
344
+
345
+ @abstractmethod
346
+ def generate(self,
347
+ messages: List[Dict[str, str]],
348
+ tools: Optional[List[Dict[str, Any]]] = None,
349
+ temperature: float = 0.2,
350
+ max_tokens: int = 2000) -> LLMResponse:
351
+ """Generate a response from the LLM."""
352
+ pass
353
+
354
+
355
+ # =============================================================================
356
+ # GEMINI ADAPTER WITH FALLBACK
357
+ # =============================================================================
358
+
359
+ class GeminiAdapter(BaseLLMAdapter):
360
+ """
361
+ Google Gemini API adapter with multi-model fallback.
362
+
363
+ Automatically falls back to next model when:
364
+ - Rate limit exceeded (429)
365
+ - API error occurs (if fallback_on_error=True)
366
+ - Model unavailable
367
+ """
368
+
369
+ def __init__(self,
370
+ model: str = "gemini-2.5-flash-lite",
371
+ api_key: Optional[str] = None,
372
+ enable_fallback: bool = True):
373
+ super().__init__(api_key)
374
+ self.model = model
375
+ self.enable_fallback = enable_fallback
376
+ self._client = None
377
+ self.rate_limiter = get_rate_limiter()
378
+
379
+ def _get_client(self):
380
+ """Lazy load Gemini client."""
381
+ if self._client is None:
382
+ try:
383
+ import google.genai
384
+ self._client = google.genai.Client(api_key=self.api_key)
385
+ except ImportError:
386
+ raise ImportError("google-genai not installed. Install with: pip install google-genai")
387
+ return self._client
388
+
389
+ def generate(self,
390
+ messages: List[Dict[str, str]],
391
+ tools: Optional[List[Dict[str, Any]]] = None,
392
+ temperature: float = 0.2,
393
+ max_tokens: int = 2000) -> LLMResponse:
394
+ """
395
+ Generate content using Gemini with automatic fallback.
396
+
397
+ Will try current model first, then fall back through cascade on errors.
398
+ """
399
+ start_time = time.time()
400
+ last_error = None
401
+ attempts = 0
402
+ max_attempts = len(self.rate_limiter.models)
403
+
404
+ while attempts < max_attempts:
405
+ current_model = self.rate_limiter.current_model_name
406
+ attempts += 1
407
+
408
+ try:
409
+ # Check if preferred model has recovered
410
+ self.rate_limiter.check_preferred_model_recovery()
411
+
412
+ # Check if we can make a request
413
+ can_request, reason = self.rate_limiter.can_request(current_model)
414
+
415
+ if not can_request:
416
+ logger.warning(f"Cannot request from {current_model}: {reason}")
417
+ if self.enable_fallback:
418
+ next_model = self.rate_limiter.fallback_to_next(reason)
419
+ if next_model:
420
+ continue
421
+ raise Exception(f"Rate limit exceeded: {reason}") # Wait if needed for RPM
422
+ self.rate_limiter.wait_if_needed(current_model)
423
+
424
+ # Make the actual API call
425
+ response = self._call_gemini(current_model, messages, tools, temperature, max_tokens)
426
+
427
+ # Record successful request
428
+ elapsed_ms = (time.time() - start_time) * 1000
429
+ tokens = self._estimate_tokens(messages, response.text)
430
+ self.rate_limiter.record_request(current_model, tokens, elapsed_ms)
431
+
432
+ # Update response metadata
433
+ response.model_used = current_model
434
+ response.tokens_used = tokens
435
+ response.time_ms = elapsed_ms
436
+
437
+ # Record in global cost tracking
438
+ try:
439
+ from config import config
440
+ config.evaluation.cost_tracking.record_request(current_model, tokens, elapsed_ms)
441
+ except Exception:
442
+ pass # Config might not be available
443
+
444
+ return response
445
+
446
+ except Exception as e:
447
+ last_error = e
448
+ error_str = str(e).lower()
449
+
450
+ # Check if it's a rate limit error
451
+ is_rate_limit = "429" in str(e) or "rate" in error_str or "quota" in error_str
452
+
453
+ if is_rate_limit or (self.enable_fallback and "error" in error_str):
454
+ logger.warning(f"Error with {current_model}: {e}")
455
+ next_model = self.rate_limiter.fallback_to_next(error_str)
456
+ if next_model:
457
+ logger.info(f"Retrying with fallback model: {next_model}")
458
+ continue
459
+
460
+ # Non-recoverable error or no fallback
461
+ raise
462
+
463
+ # Exhausted all models
464
+ raise Exception(f"All models exhausted. Last error: {last_error}")
465
+
466
+ def _call_gemini(self,
467
+ model: str,
468
+ messages: List[Dict[str, str]],
469
+ tools: Optional[List[Dict[str, Any]]],
470
+ temperature: float,
471
+ max_tokens: int) -> LLMResponse:
472
+ """Make actual Gemini API call."""
473
+ client = self._get_client()
474
+
475
+ # Convert messages to Gemini format
476
+ contents = []
477
+ for msg in messages:
478
+ role = "user" if msg["role"] in ["user", "system"] else "model"
479
+ contents.append({
480
+ "role": role,
481
+ "parts": [{"text": msg["content"]}]
482
+ })
483
+
484
+ # Build tools for Gemini
485
+ gemini_tools = None
486
+ if tools:
487
+ gemini_tools = [{
488
+ "function_declarations": [t["function"] for t in tools]
489
+ }]
490
+
491
+ # Call Gemini - tools go in config
492
+ config = {
493
+ "temperature": temperature,
494
+ "max_output_tokens": max_tokens
495
+ }
496
+ if gemini_tools:
497
+ config["tools"] = gemini_tools
498
+
499
+ response = client.models.generate_content(
500
+ model=model,
501
+ contents=contents,
502
+ config=config
503
+ )
504
+
505
+ # Extract response
506
+ text = response.text if hasattr(response, 'text') and response.text else ""
507
+ tool_calls = []
508
+
509
+ if hasattr(response, 'function_calls') and response.function_calls:
510
+ for func_call in response.function_calls:
511
+ args = func_call.args if isinstance(func_call.args, dict) else json.loads(str(func_call.args))
512
+ tool_calls.append(LLMToolCall(
513
+ tool_name=func_call.name,
514
+ arguments=args,
515
+ reasoning=text or "Tool selected by Gemini"
516
+ ))
517
+
518
+ return LLMResponse(
519
+ text=text,
520
+ tool_calls=tool_calls,
521
+ finish_reason=str(response.finish_reason) if hasattr(response, 'finish_reason') else "STOP"
522
+ )
523
+
524
+ def _estimate_tokens(self, messages: List[Dict], response_text: str) -> int:
525
+ """Estimate token count (rough: 4 chars = 1 token)."""
526
+ input_chars = sum(len(m.get("content", "") or "") for m in messages)
527
+ output_chars = len(response_text or "")
528
+ return (input_chars + output_chars) // 4
529
+
530
+
531
+ # =============================================================================
532
+ # LITELLM ADAPTER
533
+ # =============================================================================
534
+
535
+ class LiteLLMAdapter(BaseLLMAdapter):
536
+ """LiteLLM adapter for OpenAI, Anthropic, Groq, Ollama, and others."""
537
+
538
+ def __init__(self, model: str = "gpt-4o-mini", provider: str = "openai", api_key: Optional[str] = None):
539
+ super().__init__(api_key)
540
+ self.provider = provider
541
+ self.model_string = f"{provider}/{model}" if provider else model
542
+ self._client = None
543
+
544
+ def _get_client(self):
545
+ """Lazy load LiteLLM client."""
546
+ if self._client is None:
547
+ try:
548
+ import litellm
549
+ if self.api_key:
550
+ litellm.api_key = self.api_key
551
+ self._client = litellm
552
+ except ImportError:
553
+ raise ImportError("litellm not installed. Install with: pip install litellm")
554
+ return self._client
555
+
556
+ def generate(self,
557
+ messages: List[Dict[str, str]],
558
+ tools: Optional[List[Dict[str, Any]]] = None,
559
+ temperature: float = 0.2,
560
+ max_tokens: int = 2000) -> LLMResponse:
561
+ """Generate content using LiteLLM."""
562
+ try:
563
+ start_time = time.time()
564
+ client = self._get_client()
565
+
566
+ # Call LiteLLM
567
+ response = client.completion(
568
+ model=self.model_string,
569
+ messages=messages,
570
+ tools=tools,
571
+ temperature=temperature,
572
+ max_tokens=max_tokens
573
+ )
574
+
575
+ # Extract response
576
+ choice = response.choices[0]
577
+ text = choice.message.content or ""
578
+ tool_calls = []
579
+
580
+ if hasattr(choice.message, 'tool_calls') and choice.message.tool_calls:
581
+ for tool_call in choice.message.tool_calls:
582
+ args = json.loads(tool_call.function.arguments)
583
+ tool_calls.append(LLMToolCall(
584
+ tool_name=tool_call.function.name,
585
+ arguments=args,
586
+ reasoning=text or "Tool selected by LLM"
587
+ ))
588
+
589
+ elapsed_ms = (time.time() - start_time) * 1000
590
+ tokens = response.usage.total_tokens if hasattr(response, 'usage') else 0
591
+
592
+ return LLMResponse(
593
+ text=text,
594
+ tool_calls=tool_calls,
595
+ finish_reason=choice.finish_reason,
596
+ model_used=self.model_string,
597
+ tokens_used=tokens,
598
+ time_ms=elapsed_ms
599
+ )
600
+
601
+ except Exception as e:
602
+ logger.error(f"LiteLLM generation failed: {e}")
603
+ raise
604
+
605
+
606
+ # =============================================================================
607
+ # MOCK ADAPTER FOR TESTING
608
+ # =============================================================================
609
+
610
+ class MockLLMAdapter(BaseLLMAdapter):
611
+ """Mock LLM for testing without API keys."""
612
+
613
+ def generate(self,
614
+ messages: List[Dict[str, str]],
615
+ tools: Optional[List[Dict[str, Any]]] = None,
616
+ temperature: float = 0.2,
617
+ max_tokens: int = 2000) -> LLMResponse:
618
+ """Return a mock response."""
619
+ return LLMResponse(
620
+ text="Mock LLM response",
621
+ tool_calls=[],
622
+ finish_reason="stop",
623
+ model_used="mock",
624
+ tokens_used=10,
625
+ time_ms=1.0
626
+ )
627
+
628
+
629
+ # =============================================================================
630
+ # FACTORY FUNCTION
631
+ # =============================================================================
632
+
633
+ def get_llm_adapter(provider: str = "gemini",
634
+ model: str = "gemini-2.5-flash-lite",
635
+ api_key: Optional[str] = None,
636
+ enable_fallback: bool = True) -> BaseLLMAdapter:
637
+ """
638
+ Factory function to get the appropriate LLM adapter.
639
+
640
+ Args:
641
+ provider: LLM provider (gemini, openai, anthropic, etc.)
642
+ model: Model name
643
+ api_key: API key for authentication
644
+ enable_fallback: Enable automatic model fallback on rate limits
645
+
646
+ Returns:
647
+ Configured LLM adapter
648
+ """
649
+ if provider == "gemini":
650
+ try:
651
+ return GeminiAdapter(model=model, api_key=api_key, enable_fallback=enable_fallback)
652
+ except ImportError:
653
+ logger.warning("Gemini not available, trying LiteLLM")
654
+ return LiteLLMAdapter(model=model, provider="gemini", api_key=api_key)
655
+
656
+ elif provider in ["openai", "anthropic", "groq", "ollama", "cohere", "mistral"]:
657
+ return LiteLLMAdapter(model=model, provider=provider, api_key=api_key)
658
+
659
+ elif provider == "mock":
660
+ return MockLLMAdapter(api_key=api_key)
661
+
662
+ else:
663
+ # Try LiteLLM for unknown providers
664
+ logger.warning(f"Unknown provider {provider}, attempting LiteLLM")
665
+ return LiteLLMAdapter(model=model, provider=provider, api_key=api_key)
666
+
667
+
668
+ def get_usage_summary() -> Dict:
669
+ """Get usage summary from global rate limiter."""
670
+ return get_rate_limiter().get_usage_summary()
671
+
672
+
673
+ def reset_rate_limiter():
674
+ """Reset rate limiter to default state."""
675
+ global _global_rate_limiter
676
+ _global_rate_limiter = None
agents/specialized_agents.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/agents/specialized_agents.py
2
+ # Relations: Uses base_agent.py, prompts/agent_prompts.py
3
+ # Description: Domain-specific agents for quantum circuit optimization
4
+ """
5
+ Specialized Quantum Agents: Domain-specific agents for circuit optimization.
6
+ """
7
+
8
+ from typing import Optional, List, Dict, Any
9
+ from .base_agent import (
10
+ LLMAgent, RuleBasedAgent, AgentRole,
11
+ AgentContext, AgentAction, AgentResult
12
+ )
13
+
14
+
15
+ def _goal_to_string(context: AgentContext) -> str:
16
+ """Safely extract goal as string from context."""
17
+ goal = context.goal
18
+ if isinstance(goal, list):
19
+ goal = goal[0] if goal else ""
20
+ return str(goal).lower() if goal else ""
21
+
22
+
23
+ class ArchitectAgent(LLMAgent):
24
+ """
25
+ Plans the overall circuit structure.
26
+ Decides what type of circuit to build and the high-level approach.
27
+ """
28
+
29
+ def __init__(self, agent_id: str = "architect"):
30
+ from prompts import ARCHITECT_PROMPT
31
+
32
+ super().__init__(
33
+ agent_id=agent_id,
34
+ role=AgentRole.ARCHITECT,
35
+ system_prompt=ARCHITECT_PROMPT,
36
+ tools=[
37
+ "create_from_template",
38
+ "generate_from_description",
39
+ "analyze_circuit"
40
+ ]
41
+ )
42
+
43
+ def can_handle(self, context: AgentContext) -> bool:
44
+ """Can handle when no circuit exists or replanning needed."""
45
+ goal = _goal_to_string(context)
46
+ return context.current_circuit is None or "replan" in goal
47
+
48
+
49
+ class BuilderAgent(LLMAgent):
50
+ """
51
+ Builds and modifies circuits based on plans.
52
+ Handles the actual circuit construction.
53
+ """
54
+
55
+ def __init__(self, agent_id: str = "builder"):
56
+ from prompts import BUILDER_PROMPT
57
+
58
+ super().__init__(
59
+ agent_id=agent_id,
60
+ role=AgentRole.BUILDER,
61
+ system_prompt=BUILDER_PROMPT,
62
+ tools=[
63
+ "create_from_template",
64
+ "generate_random_circuit",
65
+ "generate_from_description",
66
+ "compose_circuits",
67
+ "tensor_circuits",
68
+ "repeat_circuit"
69
+ ]
70
+ )
71
+
72
+ def can_handle(self, context: AgentContext) -> bool:
73
+ """Can handle when we need to build a circuit."""
74
+ has_plan = any("plan" in str(h.get("action", "")).lower() for h in context.history)
75
+ no_circuit = context.current_circuit is None
76
+ return has_plan or no_circuit
77
+
78
+
79
+ class ValidatorAgent(LLMAgent):
80
+ """
81
+ Validates circuits for correctness and hardware compatibility.
82
+ """
83
+
84
+ def __init__(self, agent_id: str = "validator"):
85
+ from prompts import VALIDATOR_PROMPT
86
+
87
+ super().__init__(
88
+ agent_id=agent_id,
89
+ role=AgentRole.VALIDATOR,
90
+ system_prompt=VALIDATOR_PROMPT,
91
+ tools=[
92
+ "validate_syntax",
93
+ "check_connectivity",
94
+ "verify_unitary"
95
+ ]
96
+ )
97
+
98
+ def can_handle(self, context: AgentContext) -> bool:
99
+ """Can handle when there's a circuit to validate."""
100
+ return context.current_circuit is not None
101
+
102
+
103
+ class OptimizerAgent(LLMAgent):
104
+ """
105
+ Optimizes circuits for depth, gate count, and hardware fitness.
106
+ """
107
+
108
+ def __init__(self, agent_id: str = "optimizer"):
109
+ from prompts import OPTIMIZER_PROMPT
110
+
111
+ super().__init__(
112
+ agent_id=agent_id,
113
+ role=AgentRole.OPTIMIZER,
114
+ system_prompt=OPTIMIZER_PROMPT,
115
+ tools=[
116
+ "generate_inverse",
117
+ "compose_circuits",
118
+ "analyze_circuit",
119
+ "calculate_complexity",
120
+ "calculate_hardware_fitness"
121
+ ]
122
+ )
123
+
124
+ def can_handle(self, context: AgentContext) -> bool:
125
+ """Can handle when circuit exists and optimization is needed."""
126
+ if context.current_circuit is None:
127
+ return False
128
+ goal = _goal_to_string(context)
129
+ return "optimize" in goal or "improve" in goal
130
+
131
+
132
+ class AnalyzerAgent(LLMAgent):
133
+ """
134
+ Analyzes circuit properties and provides insights.
135
+ """
136
+
137
+ def __init__(self, agent_id: str = "analyzer"):
138
+ from prompts import ANALYZER_PROMPT
139
+
140
+ super().__init__(
141
+ agent_id=agent_id,
142
+ role=AgentRole.ANALYZER,
143
+ system_prompt=ANALYZER_PROMPT,
144
+ tools=[
145
+ "parse_qasm",
146
+ "analyze_circuit",
147
+ "get_circuit_depth",
148
+ "get_statevector",
149
+ "get_probabilities",
150
+ "estimate_resources",
151
+ "estimate_noise"
152
+ ]
153
+ )
154
+
155
+ def can_handle(self, context: AgentContext) -> bool:
156
+ """Can handle when circuit exists and analysis is needed."""
157
+ return context.current_circuit is not None
158
+
159
+
160
+ class ScorerAgent(LLMAgent):
161
+ """
162
+ Scores circuits on various metrics.
163
+ """
164
+
165
+ def __init__(self, agent_id: str = "scorer"):
166
+ from prompts import SCORER_PROMPT
167
+
168
+ super().__init__(
169
+ agent_id=agent_id,
170
+ role=AgentRole.SCORER,
171
+ system_prompt=SCORER_PROMPT,
172
+ tools=[
173
+ "calculate_complexity",
174
+ "calculate_hardware_fitness",
175
+ "calculate_expressibility",
176
+ "simulate_circuit"
177
+ ]
178
+ )
179
+
180
+ def can_handle(self, context: AgentContext) -> bool:
181
+ """Can handle when circuit exists and scoring is requested."""
182
+ if context.current_circuit is None:
183
+ return False
184
+ goal = _goal_to_string(context)
185
+ return "score" in goal or "evaluate" in goal
186
+
187
+
188
+ class SimulatorAgent(RuleBasedAgent):
189
+ """
190
+ Rule-based agent for circuit simulation.
191
+ Deterministic - always simulates when circuit is ready.
192
+ """
193
+
194
+ def __init__(self, agent_id: str = "simulator"):
195
+ def simulate_rule(context: AgentContext) -> Optional[AgentAction]:
196
+ if context.current_circuit:
197
+ return AgentAction(
198
+ tool_name="simulate_circuit",
199
+ arguments={"qasm": context.current_circuit, "shots": 1024},
200
+ reasoning="Circuit ready for simulation"
201
+ )
202
+ return None
203
+
204
+ super().__init__(
205
+ agent_id=agent_id,
206
+ role=AgentRole.ANALYZER,
207
+ rules=[simulate_rule],
208
+ tools=["simulate_circuit", "get_statevector", "get_probabilities"]
209
+ )
210
+
211
+
212
+ # Factory function to create all specialized agents
213
+ def create_all_agents() -> Dict[str, LLMAgent]:
214
+ """Create instances of all specialized agents."""
215
+ return {
216
+ "architect": ArchitectAgent(),
217
+ "builder": BuilderAgent(),
218
+ "validator": ValidatorAgent(),
219
+ "optimizer": OptimizerAgent(),
220
+ "analyzer": AnalyzerAgent(),
221
+ "scorer": ScorerAgent(),
222
+ "simulator": SimulatorAgent()
223
+ }
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ QAgents-Workflows: Hugging Face Space Entry Point
3
+ Provides a Gradio interface for the Quantum Circuit Orchestrator.
4
+ Reads all configuration from environment variables for HF Space deployment.
5
+ """
6
+
7
+ import os
8
+ import gradio as gr
9
+ import logging
10
+ from config import LLMConfig
11
+ from orchestrators import create_orchestrator
12
+ from client.mcp_client import get_client
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Log environment configuration at startup
19
+ logger.info("=" * 70)
20
+ logger.info("QAgents Quantum Circuit Orchestrator - Initialization")
21
+ logger.info("=" * 70)
22
+ logger.info(f"LLM Provider: {os.getenv('LLM_PROVIDER', 'gemini (default)')}")
23
+ logger.info(f"LLM Model: {os.getenv('LLM_MODEL', 'gemini-2.5-flash-lite (default)')}")
24
+ logger.info(f"MCP Server URL: {os.getenv('MCP_SERVER_URL', 'http://127.0.0.1:7861 (default)')}")
25
+ logger.info(f"Google API Key configured: {bool(os.getenv('GOOGLE_API_KEY') or os.getenv('GENAI_API_KEY'))}")
26
+ logger.info("=" * 70)
27
+
28
+ # Initialize MCP client (will use MCP_SERVER_URL env var if set)
29
+ mcp_client = get_client()
30
+
31
+ def generate_circuit(prompt, mode, difficulty):
32
+ """Generate a quantum circuit based on the prompt and mode."""
33
+ try:
34
+ logger.info(f"Generating circuit: mode={mode}, difficulty={difficulty}")
35
+ logger.info(f"Prompt: {prompt}")
36
+
37
+ # Create orchestrator
38
+ orch = create_orchestrator(mode.lower())
39
+
40
+ # Run generation
41
+ # Note: In a real deployment, we might want to map difficulty to specific constraints
42
+ # For now, we pass the prompt directly
43
+ result = orch.run(prompt)
44
+
45
+ if result.success:
46
+ output = f"✅ Success ({result.execution_time_ms:.0f}ms)\n\n"
47
+ if result.final_output:
48
+ output += result.final_output
49
+ else:
50
+ output += "No QASM generated."
51
+
52
+ # Add metrics if available
53
+ metrics = f"LLM Calls: {result.steps_completed}\n"
54
+ if hasattr(result, 'tokens_used'):
55
+ metrics += f"Tokens: {result.tokens_used}\n"
56
+
57
+ return output, metrics
58
+ else:
59
+ error_msg = "\n".join(result.errors)
60
+ return f"❌ Failed ({result.execution_time_ms:.0f}ms)\n\nErrors:\n{error_msg}", "N/A"
61
+
62
+ except Exception as e:
63
+ logger.error(f"Error generating circuit: {e}")
64
+ return f"❌ System Error: {str(e)}", "Error"
65
+
66
+ def check_mcp_status():
67
+ """Check connection to MCP server."""
68
+ try:
69
+ is_healthy = mcp_client.health_check()
70
+ status = "🟢 Connected" if is_healthy else "🔴 Disconnected"
71
+ url = os.environ.get("MCP_SERVER_URL", "http://127.0.0.1:7861")
72
+ return f"{status} ({url})"
73
+ except Exception as e:
74
+ return f"🔴 Error: {str(e)}"
75
+
76
+ # Create Gradio Interface
77
+ with gr.Blocks(title="Quantum Circuit Orchestrator") as demo:
78
+ gr.Markdown("# ⚛️ QAgents: Quantum Circuit Orchestrator")
79
+ gr.Markdown("Multi-agent system for generating optimized quantum circuits.")
80
+
81
+ with gr.Row():
82
+ with gr.Column(scale=2):
83
+ prompt_input = gr.Textbox(
84
+ label="Circuit Description",
85
+ placeholder="e.g., Create a 3-qubit GHZ state",
86
+ lines=3
87
+ )
88
+ with gr.Row():
89
+ mode_select = gr.Dropdown(
90
+ choices=["naked", "quasar", "hybrid", "blackboard"],
91
+ value="naked",
92
+ label="Orchestration Mode"
93
+ )
94
+ difficulty_select = gr.Dropdown(
95
+ choices=["EASY", "MEDIUM", "HARD", "VERY_HARD"],
96
+ value="EASY",
97
+ label="Estimated Difficulty"
98
+ )
99
+
100
+ generate_btn = gr.Button("Generate Circuit", variant="primary")
101
+
102
+ with gr.Column(scale=1):
103
+ mcp_status = gr.Textbox(label="MCP Server Status", value=check_mcp_status, interactive=False)
104
+ metrics_output = gr.Textbox(label="Execution Metrics", lines=4)
105
+
106
+ with gr.Row():
107
+ qasm_output = gr.Code(label="Generated QASM", language="qasm", lines=15)
108
+
109
+ # Event handlers
110
+ generate_btn.click(
111
+ fn=generate_circuit,
112
+ inputs=[prompt_input, mode_select, difficulty_select],
113
+ outputs=[qasm_output, metrics_output]
114
+ )
115
+
116
+ # Refresh status on load
117
+ demo.load(fn=check_mcp_status, outputs=[mcp_status])
118
+
119
+ if __name__ == "__main__":
120
+ demo.launch()
client/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """MCP Client module."""
2
+
3
+ from .mcp_client import MCPClient, MCPResponse, get_client
4
+
5
+ __all__ = ["MCPClient", "MCPResponse", "get_client"]
client/mcp_client.py ADDED
@@ -0,0 +1,698 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/client/mcp_client.py
2
+ # Relations: Uses QuantumArchitect-MCP Gradio server
3
+ # Description: MCP client with fallback local implementations for missing endpoints
4
+ """
5
+ MCP Client: Connection to QuantumArchitect-MCP endpoints.
6
+ Provides both synchronous and async interfaces.
7
+
8
+ Available Gradio endpoints (as of latest scan):
9
+ - ui_create_circuit: Create circuit from template
10
+ - ui_validate_circuit: Validate QASM syntax
11
+ - ui_simulate_circuit: Simulate circuit
12
+ - ui_score_circuit: Score circuit complexity/fitness
13
+
14
+ Missing endpoints use local fallback implementations.
15
+ """
16
+
17
+ import requests
18
+ from typing import Any, Dict, Optional, List
19
+ from dataclasses import dataclass, field
20
+ from datetime import datetime
21
+ import json
22
+ import logging
23
+ import re
24
+ import time
25
+ import random
26
+ import math
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @dataclass
32
+ class MCPResponse:
33
+ """Standardized response from MCP endpoints."""
34
+ success: bool
35
+ data: Any
36
+ endpoint: str
37
+ timestamp: datetime = field(default_factory=datetime.now)
38
+ error: Optional[str] = None
39
+ execution_time_ms: float = 0.0
40
+ is_fallback: bool = False # True if using local fallback
41
+
42
+
43
+ class QASMLocalAnalyzer:
44
+ """Local QASM analysis for fallback when MCP endpoints unavailable."""
45
+
46
+ GATE_PATTERN = re.compile(
47
+ r'^(h|x|y|z|s|t|sdg|tdg|cx|cz|cy|swap|ccx|rz|rx|ry|u1|u2|u3|p|measure|barrier)\b',
48
+ re.IGNORECASE
49
+ )
50
+
51
+ @staticmethod
52
+ def parse_qasm(qasm_code: str) -> Dict[str, Any]:
53
+ """Parse QASM code and extract structure."""
54
+ lines = [l.strip() for l in qasm_code.strip().split('\n')
55
+ if l.strip() and not l.strip().startswith('//')]
56
+
57
+ result = {
58
+ 'openqasm_version': '2.0',
59
+ 'includes': [],
60
+ 'qregs': [],
61
+ 'cregs': [],
62
+ 'gates': [],
63
+ 'num_qubits': 0,
64
+ 'num_classical': 0
65
+ }
66
+
67
+ for line in lines:
68
+ if line.startswith('OPENQASM'):
69
+ result['openqasm_version'] = line.split()[1].rstrip(';')
70
+ elif line.startswith('include'):
71
+ result['includes'].append(line.split('"')[1] if '"' in line else line.split()[1])
72
+ elif line.startswith('qreg'):
73
+ match = re.search(r'qreg\s+(\w+)\[(\d+)\]', line)
74
+ if match:
75
+ result['qregs'].append({'name': match.group(1), 'size': int(match.group(2))})
76
+ result['num_qubits'] += int(match.group(2))
77
+ elif line.startswith('creg'):
78
+ match = re.search(r'creg\s+(\w+)\[(\d+)\]', line)
79
+ if match:
80
+ result['cregs'].append({'name': match.group(1), 'size': int(match.group(2))})
81
+ result['num_classical'] += int(match.group(2))
82
+ elif QASMLocalAnalyzer.GATE_PATTERN.match(line):
83
+ gate_name = line.split()[0].split('(')[0]
84
+ result['gates'].append({'gate': gate_name, 'raw': line.rstrip(';')})
85
+
86
+ return result
87
+
88
+ @staticmethod
89
+ def analyze_circuit(qasm_code: str) -> Dict[str, Any]:
90
+ """Analyze circuit properties."""
91
+ parsed = QASMLocalAnalyzer.parse_qasm(qasm_code)
92
+ gates = parsed['gates']
93
+
94
+ gate_counts = {}
95
+ single_qubit_gates = 0
96
+ two_qubit_gates = 0
97
+ multi_qubit_gates = 0
98
+ measurement_count = 0
99
+
100
+ for g in gates:
101
+ gate = g['gate'].lower()
102
+ gate_counts[gate] = gate_counts.get(gate, 0) + 1
103
+
104
+ if gate == 'measure':
105
+ measurement_count += 1
106
+ elif gate in ['cx', 'cz', 'cy', 'swap']:
107
+ two_qubit_gates += 1
108
+ elif gate in ['ccx', 'cswap']:
109
+ multi_qubit_gates += 1
110
+ else:
111
+ single_qubit_gates += 1
112
+
113
+ # Estimate depth (simplified: assume all gates sequential)
114
+ depth = len([g for g in gates if g['gate'].lower() != 'measure'])
115
+
116
+ return {
117
+ 'num_qubits': parsed['num_qubits'],
118
+ 'num_classical_bits': parsed['num_classical'],
119
+ 'depth': depth,
120
+ 'gate_count': len(gates),
121
+ 'gate_breakdown': gate_counts,
122
+ 'single_qubit_gates': single_qubit_gates,
123
+ 'two_qubit_gates': two_qubit_gates,
124
+ 'multi_qubit_gates': multi_qubit_gates,
125
+ 'measurements': measurement_count
126
+ }
127
+
128
+ @staticmethod
129
+ def get_depth(qasm_code: str) -> int:
130
+ """Get circuit depth."""
131
+ analysis = QASMLocalAnalyzer.analyze_circuit(qasm_code)
132
+ return analysis['depth']
133
+
134
+ @staticmethod
135
+ def calculate_complexity(qasm_code: str) -> Dict[str, Any]:
136
+ """Calculate complexity score."""
137
+ analysis = QASMLocalAnalyzer.analyze_circuit(qasm_code)
138
+
139
+ # Scoring formula
140
+ depth_score = min(analysis['depth'] / 50.0, 1.0) * 30
141
+ gate_score = min(analysis['gate_count'] / 100.0, 1.0) * 30
142
+ two_q_score = min(analysis['two_qubit_gates'] / 20.0, 1.0) * 25
143
+ qubit_score = min(analysis['num_qubits'] / 10.0, 1.0) * 15
144
+
145
+ total = depth_score + gate_score + two_q_score + qubit_score
146
+
147
+ return {
148
+ 'complexity_score': round(total, 2),
149
+ 'depth_contribution': round(depth_score, 2),
150
+ 'gate_contribution': round(gate_score, 2),
151
+ 'entanglement_contribution': round(two_q_score, 2),
152
+ 'qubit_contribution': round(qubit_score, 2),
153
+ 'raw_metrics': analysis
154
+ }
155
+
156
+ @staticmethod
157
+ def validate_syntax(qasm_code: str) -> Dict[str, Any]:
158
+ """Validate QASM syntax."""
159
+ errors = []
160
+ warnings = []
161
+
162
+ lines = qasm_code.strip().split('\n')
163
+
164
+ has_openqasm = False
165
+ has_qreg = False
166
+
167
+ for i, line in enumerate(lines, 1):
168
+ line = line.strip()
169
+ if not line or line.startswith('//'):
170
+ continue
171
+
172
+ if line.startswith('OPENQASM'):
173
+ has_openqasm = True
174
+ elif line.startswith('qreg'):
175
+ has_qreg = True
176
+ elif not line.startswith(('include', 'creg', 'barrier', 'measure', 'OPENQASM', 'qreg')):
177
+ # Check for valid gate
178
+ if not QASMLocalAnalyzer.GATE_PATTERN.match(line):
179
+ if line and not line.endswith(';'):
180
+ warnings.append(f"Line {i}: Missing semicolon")
181
+
182
+ if not has_openqasm:
183
+ errors.append("Missing OPENQASM version declaration")
184
+ if not has_qreg:
185
+ errors.append("No quantum register (qreg) defined")
186
+
187
+ return {
188
+ 'valid': len(errors) == 0,
189
+ 'errors': errors,
190
+ 'warnings': warnings,
191
+ 'line_count': len(lines)
192
+ }
193
+
194
+ @staticmethod
195
+ def calculate_hardware_fitness(qasm_code: str, hardware: str = "ibm_brisbane") -> Dict[str, Any]:
196
+ """Calculate hardware fitness score."""
197
+ analysis = QASMLocalAnalyzer.analyze_circuit(qasm_code)
198
+
199
+ # Hardware profiles (simplified)
200
+ profiles = {
201
+ 'ibm_brisbane': {'max_qubits': 127, 'connectivity': 'heavy-hex', 'two_q_error': 0.01},
202
+ 'ibm_sherbrooke': {'max_qubits': 127, 'connectivity': 'heavy-hex', 'two_q_error': 0.008},
203
+ 'rigetti_aspen': {'max_qubits': 80, 'connectivity': 'octagonal', 'two_q_error': 0.02},
204
+ 'ionq_harmony': {'max_qubits': 11, 'connectivity': 'all-to-all', 'two_q_error': 0.005}
205
+ }
206
+
207
+ profile = profiles.get(hardware, profiles['ibm_brisbane'])
208
+
209
+ # Calculate fitness
210
+ qubit_fit = 100 if analysis['num_qubits'] <= profile['max_qubits'] else 50
211
+ depth_penalty = min(analysis['depth'] * 2, 30)
212
+ two_q_penalty = analysis['two_qubit_gates'] * profile['two_q_error'] * 100
213
+
214
+ fitness = max(0, qubit_fit - depth_penalty - two_q_penalty)
215
+
216
+ return {
217
+ 'fitness_score': round(fitness, 2),
218
+ 'hardware': hardware,
219
+ 'qubit_fit': qubit_fit,
220
+ 'depth_penalty': round(depth_penalty, 2),
221
+ 'error_penalty': round(two_q_penalty, 2),
222
+ 'recommendation': 'suitable' if fitness > 70 else 'marginal' if fitness > 40 else 'poor'
223
+ }
224
+
225
+
226
+ class MCPClient:
227
+ """
228
+ Client for QuantumArchitect-MCP server.
229
+ Wraps MCP endpoints with fallback to local implementations.
230
+
231
+ Primary endpoints (from Gradio):
232
+ - ui_create_circuit
233
+ - ui_validate_circuit
234
+ - ui_simulate_circuit
235
+ - ui_score_circuit
236
+
237
+ Missing endpoints use QASMLocalAnalyzer for fallback.
238
+ """
239
+
240
+ def __init__(self, base_url: str = "http://127.0.0.1:7861"):
241
+ self.base_url = base_url.rstrip("/")
242
+ self.session = requests.Session()
243
+ self._connected = False
244
+ self._analyzer = QASMLocalAnalyzer()
245
+
246
+ def _call(self, endpoint: str, **kwargs) -> MCPResponse:
247
+ """Internal method to call MCP endpoints."""
248
+ start = time.perf_counter()
249
+
250
+ try:
251
+ url = f"{self.base_url}/gradio_api/call/{endpoint}"
252
+ payload = {"data": list(kwargs.values()) if kwargs else []}
253
+
254
+ response = self.session.post(url, json=payload, timeout=30)
255
+ response.raise_for_status()
256
+
257
+ result = response.json()
258
+ event_id = result.get("event_id")
259
+
260
+ if event_id:
261
+ result_url = f"{self.base_url}/gradio_api/call/{endpoint}/{event_id}"
262
+ result_response = self.session.get(result_url, timeout=30)
263
+
264
+ lines = result_response.text.strip().split("\n")
265
+ for line in lines:
266
+ if line.startswith("data:"):
267
+ data = json.loads(line[5:].strip())
268
+ elapsed = (time.perf_counter() - start) * 1000
269
+ return MCPResponse(
270
+ success=True,
271
+ data=data[0] if isinstance(data, list) and len(data) == 1 else data,
272
+ endpoint=endpoint,
273
+ execution_time_ms=elapsed
274
+ )
275
+
276
+ elapsed = (time.perf_counter() - start) * 1000
277
+ return MCPResponse(
278
+ success=True,
279
+ data=result,
280
+ endpoint=endpoint,
281
+ execution_time_ms=elapsed
282
+ )
283
+
284
+ except Exception as e:
285
+ elapsed = (time.perf_counter() - start) * 1000
286
+ logger.warning(f"MCP call failed: {endpoint} - {e}")
287
+ return MCPResponse(
288
+ success=False,
289
+ data=None,
290
+ endpoint=endpoint,
291
+ error=str(e),
292
+ execution_time_ms=elapsed
293
+ )
294
+
295
+ def _fallback_response(self, endpoint: str, data: Any, start_time: float) -> MCPResponse:
296
+ """Create a fallback response using local implementation."""
297
+ elapsed = (time.perf_counter() - start_time) * 1000
298
+ return MCPResponse(
299
+ success=True,
300
+ data=data,
301
+ endpoint=f"{endpoint}(fallback)",
302
+ execution_time_ms=elapsed,
303
+ is_fallback=True
304
+ )
305
+
306
+ def health_check(self) -> bool:
307
+ """Check if MCP server is reachable."""
308
+ try:
309
+ response = self.session.get(f"{self.base_url}/", timeout=5)
310
+ self._connected = response.status_code == 200
311
+ return self._connected
312
+ except:
313
+ self._connected = False
314
+ return False
315
+
316
+ # ===== Circuit Creation Endpoints =====
317
+
318
+ def create_circuit_from_template(self, template_name: str, num_qubits: int = 2) -> MCPResponse:
319
+ """Create a circuit from a predefined template.
320
+ Maps to ui_create_circuit endpoint in Gradio."""
321
+ return self._call("ui_create_circuit", template=template_name, qubits=num_qubits, params="{}")
322
+
323
+ def generate_random_circuit(self, num_qubits: int = 3, depth: int = 5,
324
+ gate_set: str = "h,cx,rz") -> MCPResponse:
325
+ """Generate a random quantum circuit. Uses local fallback."""
326
+ start = time.perf_counter()
327
+ gates = gate_set.split(',')
328
+
329
+ qasm_lines = [
330
+ 'OPENQASM 2.0;',
331
+ 'include "qelib1.inc";',
332
+ f'qreg q[{num_qubits}];',
333
+ f'creg c[{num_qubits}];'
334
+ ]
335
+
336
+ for _ in range(depth):
337
+ gate = random.choice(gates)
338
+ if gate in ['h', 'x', 'y', 'z', 's', 't']:
339
+ q = random.randint(0, num_qubits - 1)
340
+ qasm_lines.append(f'{gate} q[{q}];')
341
+ elif gate in ['cx', 'cz']:
342
+ if num_qubits >= 2:
343
+ q1 = random.randint(0, num_qubits - 1)
344
+ q2 = random.randint(0, num_qubits - 1)
345
+ while q2 == q1:
346
+ q2 = random.randint(0, num_qubits - 1)
347
+ qasm_lines.append(f'{gate} q[{q1}], q[{q2}];')
348
+ elif gate in ['rz', 'rx', 'ry']:
349
+ q = random.randint(0, num_qubits - 1)
350
+ angle = round(random.uniform(0, 2 * math.pi), 4)
351
+ qasm_lines.append(f'{gate}({angle}) q[{q}];')
352
+
353
+ qasm_lines.append(f'measure q -> c;')
354
+ qasm_code = '\n'.join(qasm_lines)
355
+
356
+ return self._fallback_response("generate_random_circuit", {'qasm': qasm_code}, start)
357
+
358
+ def generate_circuit_from_description(self, description: str) -> MCPResponse:
359
+ """Generate circuit from natural language description.
360
+ Uses ui_create_circuit with best-matching template."""
361
+ desc_lower = description.lower()
362
+
363
+ if 'entangle' in desc_lower or 'bell' in desc_lower:
364
+ template = 'bell_state'
365
+ elif 'ghz' in desc_lower:
366
+ template = 'ghz_state'
367
+ elif 'superposition' in desc_lower:
368
+ template = 'superposition'
369
+ elif 'qft' in desc_lower or 'fourier' in desc_lower:
370
+ template = 'qft'
371
+ elif 'grover' in desc_lower or 'search' in desc_lower:
372
+ template = 'grover'
373
+ elif 'vqe' in desc_lower or 'variational' in desc_lower:
374
+ template = 'vqe'
375
+ else:
376
+ template = 'bell_state'
377
+
378
+ return self._call("ui_create_circuit", template=template, qubits=2, params="{}")
379
+
380
+ # ===== Parsing & Analysis Endpoints (Fallback) =====
381
+
382
+ def parse_qasm(self, qasm_code: str) -> MCPResponse:
383
+ """Parse OpenQASM code into circuit structure. Uses local fallback."""
384
+ start = time.perf_counter()
385
+ parsed = self._analyzer.parse_qasm(qasm_code)
386
+ return self._fallback_response("parse_qasm", parsed, start)
387
+
388
+ def analyze_circuit(self, qasm_code: str) -> MCPResponse:
389
+ """Analyze circuit properties (depth, gates, etc.). Uses local fallback."""
390
+ start = time.perf_counter()
391
+ analysis = self._analyzer.analyze_circuit(qasm_code)
392
+ return self._fallback_response("analyze_circuit", analysis, start)
393
+
394
+ def get_circuit_depth(self, qasm_code: str) -> MCPResponse:
395
+ """Get the depth of a circuit. Uses local fallback."""
396
+ start = time.perf_counter()
397
+ depth = self._analyzer.get_depth(qasm_code)
398
+ return self._fallback_response("get_circuit_depth", {'depth': depth}, start)
399
+
400
+ # ===== Validation Endpoints =====
401
+
402
+ def validate_syntax(self, qasm_code: str) -> MCPResponse:
403
+ """Validate QASM syntax. Maps to ui_validate_circuit."""
404
+ return self._call("ui_validate_circuit", qasm=qasm_code, hardware="")
405
+
406
+ def check_connectivity(self, qasm_code: str, hardware: str = "ibm_brisbane") -> MCPResponse:
407
+ """Check if circuit respects hardware connectivity. Uses ui_validate_circuit."""
408
+ return self._call("ui_validate_circuit", qasm=qasm_code, hardware=hardware)
409
+
410
+ def verify_unitary(self, qasm_code: str) -> MCPResponse:
411
+ """Verify circuit produces valid unitary. Uses local fallback."""
412
+ start = time.perf_counter()
413
+ validation = self._analyzer.validate_syntax(qasm_code)
414
+ result = {
415
+ 'is_unitary': validation['valid'],
416
+ 'errors': validation['errors'],
417
+ 'note': 'Local validation - full unitary check requires simulation'
418
+ }
419
+ return self._fallback_response("verify_unitary", result, start)
420
+
421
+ # ===== Simulation Endpoints =====
422
+
423
+ def simulate_circuit(self, qasm_code: str, shots: int = 1024) -> MCPResponse:
424
+ """Simulate circuit and get measurement results. Maps to ui_simulate_circuit."""
425
+ return self._call("ui_simulate_circuit", qasm=qasm_code, shots=shots)
426
+
427
+ def get_statevector(self, qasm_code: str) -> MCPResponse:
428
+ """Get the statevector of a circuit. Uses ui_simulate_circuit."""
429
+ result = self._call("ui_simulate_circuit", qasm=qasm_code, shots=1)
430
+ if result.success and result.data:
431
+ result.data = {'statevector_hint': 'Use simulation results for state info'}
432
+ return result
433
+
434
+ def get_probabilities(self, qasm_code: str) -> MCPResponse:
435
+ """Get probability distribution from circuit. Uses ui_simulate_circuit."""
436
+ result = self._call("ui_simulate_circuit", qasm=qasm_code, shots=1024)
437
+ if result.success and result.data:
438
+ # Extract probabilities from histogram
439
+ result.endpoint = "get_probabilities"
440
+ return result
441
+
442
+ # ===== Scoring Endpoints =====
443
+
444
+ def calculate_complexity_score(self, qasm_code: str) -> MCPResponse:
445
+ """Calculate circuit complexity score. Tries ui_score_circuit then fallback."""
446
+ result = self._call("ui_score_circuit", qasm=qasm_code, hardware="ibm_brisbane")
447
+ if result.success:
448
+ return result
449
+
450
+ # Fallback to local
451
+ start = time.perf_counter()
452
+ complexity = self._analyzer.calculate_complexity(qasm_code)
453
+ return self._fallback_response("calculate_complexity_score", complexity, start)
454
+
455
+ def calculate_hardware_fitness(self, qasm_code: str, hardware: str = "ibm_brisbane") -> MCPResponse:
456
+ """Calculate hardware fitness score. Tries ui_score_circuit then fallback."""
457
+ result = self._call("ui_score_circuit", qasm=qasm_code, hardware=hardware)
458
+ if result.success:
459
+ return result
460
+
461
+ # Fallback to local
462
+ start = time.perf_counter()
463
+ fitness = self._analyzer.calculate_hardware_fitness(qasm_code, hardware)
464
+ return self._fallback_response("calculate_hardware_fitness", fitness, start)
465
+
466
+ def calculate_expressibility(self, qasm_code: str) -> MCPResponse:
467
+ """Calculate circuit expressibility. Uses local fallback."""
468
+ start = time.perf_counter()
469
+ analysis = self._analyzer.analyze_circuit(qasm_code)
470
+
471
+ # Expressibility heuristic based on gate diversity and depth
472
+ gate_types = len(analysis['gate_breakdown'])
473
+ depth_factor = min(analysis['depth'] / 20.0, 1.0)
474
+ entangle_factor = min(analysis['two_qubit_gates'] / 5.0, 1.0)
475
+
476
+ expressibility = (gate_types * 0.3 + depth_factor * 0.35 + entangle_factor * 0.35) * 100
477
+
478
+ result = {
479
+ 'expressibility_score': round(expressibility, 2),
480
+ 'gate_diversity': gate_types,
481
+ 'depth_factor': round(depth_factor, 2),
482
+ 'entanglement_factor': round(entangle_factor, 2)
483
+ }
484
+ return self._fallback_response("calculate_expressibility", result, start)
485
+
486
+ # ===== Resource Estimation Endpoints (Fallback) =====
487
+
488
+ def estimate_resources(self, qasm_code: str) -> MCPResponse:
489
+ """Estimate resource requirements. Uses local fallback."""
490
+ start = time.perf_counter()
491
+ analysis = self._analyzer.analyze_circuit(qasm_code)
492
+
493
+ result = {
494
+ 'qubits_required': analysis['num_qubits'],
495
+ 'classical_bits': analysis['num_classical_bits'],
496
+ 'gate_count': analysis['gate_count'],
497
+ 'depth': analysis['depth'],
498
+ 'estimated_runtime_ms': analysis['depth'] * 0.1, # Rough estimate
499
+ 'memory_footprint_bytes': analysis['num_qubits'] * 16 * (2 ** analysis['num_qubits'])
500
+ }
501
+ return self._fallback_response("estimate_resources", result, start)
502
+
503
+ def estimate_noise(self, qasm_code: str, hardware: str = "ibm_brisbane") -> MCPResponse:
504
+ """Estimate noise impact on circuit. Uses local fallback."""
505
+ start = time.perf_counter()
506
+ analysis = self._analyzer.analyze_circuit(qasm_code)
507
+
508
+ # Noise profiles (simplified)
509
+ noise_rates = {
510
+ 'ibm_brisbane': {'single_q': 0.001, 'two_q': 0.01, 'readout': 0.02},
511
+ 'ibm_sherbrooke': {'single_q': 0.0008, 'two_q': 0.008, 'readout': 0.015},
512
+ 'rigetti_aspen': {'single_q': 0.002, 'two_q': 0.02, 'readout': 0.03},
513
+ 'ionq_harmony': {'single_q': 0.0003, 'two_q': 0.005, 'readout': 0.01}
514
+ }
515
+
516
+ rates = noise_rates.get(hardware, noise_rates['ibm_brisbane'])
517
+
518
+ single_q_error = analysis['single_qubit_gates'] * rates['single_q']
519
+ two_q_error = analysis['two_qubit_gates'] * rates['two_q']
520
+ readout_error = analysis['measurements'] * rates['readout']
521
+ total_error = 1 - (1 - single_q_error) * (1 - two_q_error) * (1 - readout_error)
522
+
523
+ result = {
524
+ 'estimated_fidelity': round(1 - total_error, 4),
525
+ 'single_qubit_error': round(single_q_error, 4),
526
+ 'two_qubit_error': round(two_q_error, 4),
527
+ 'readout_error': round(readout_error, 4),
528
+ 'total_error_probability': round(total_error, 4),
529
+ 'hardware': hardware
530
+ }
531
+ return self._fallback_response("estimate_noise", result, start)
532
+
533
+ # ===== Composition Endpoints (Fallback) =====
534
+
535
+ def compose_circuits(self, qasm1: str, qasm2: str, qubit_mapping: str = "") -> MCPResponse:
536
+ """Compose two circuits sequentially. Uses local fallback."""
537
+ start = time.perf_counter()
538
+
539
+ # Parse both circuits
540
+ parsed1 = self._analyzer.parse_qasm(qasm1)
541
+ parsed2 = self._analyzer.parse_qasm(qasm2)
542
+
543
+ # Simple sequential composition
544
+ num_qubits = max(parsed1['num_qubits'], parsed2['num_qubits'])
545
+
546
+ lines = [
547
+ 'OPENQASM 2.0;',
548
+ 'include "qelib1.inc";',
549
+ f'qreg q[{num_qubits}];',
550
+ f'creg c[{num_qubits}];'
551
+ ]
552
+
553
+ # Add gates from both circuits
554
+ for g in parsed1['gates']:
555
+ if g['gate'].lower() != 'measure':
556
+ lines.append(f"{g['raw']};")
557
+ for g in parsed2['gates']:
558
+ lines.append(f"{g['raw']};")
559
+
560
+ result = {'qasm': '\n'.join(lines)}
561
+ return self._fallback_response("compose_circuits", result, start)
562
+
563
+ def generate_inverse_circuit(self, qasm_code: str) -> MCPResponse:
564
+ """Generate the inverse of a circuit. Uses local fallback."""
565
+ start = time.perf_counter()
566
+ parsed = self._analyzer.parse_qasm(qasm_code)
567
+
568
+ # Inverse gate mappings
569
+ inverse_map = {
570
+ 'h': 'h', 'x': 'x', 'y': 'y', 'z': 'z',
571
+ 's': 'sdg', 'sdg': 's', 't': 'tdg', 'tdg': 't',
572
+ 'cx': 'cx', 'cz': 'cz', 'swap': 'swap'
573
+ }
574
+
575
+ lines = [
576
+ 'OPENQASM 2.0;',
577
+ 'include "qelib1.inc";',
578
+ f'qreg q[{parsed["num_qubits"]}];',
579
+ f'creg c[{parsed["num_classical"]}];'
580
+ ]
581
+
582
+ # Reverse and invert gates
583
+ for g in reversed(parsed['gates']):
584
+ gate = g['gate'].lower()
585
+ if gate == 'measure':
586
+ continue
587
+ inv_gate = inverse_map.get(gate, gate)
588
+ # Handle parametric gates
589
+ if '(' in g['raw']:
590
+ # Negate angle for rotation gates
591
+ raw = g['raw'].replace(gate, inv_gate)
592
+ if 'rz' in gate or 'rx' in gate or 'ry' in gate:
593
+ # Simple negation (not perfect)
594
+ pass
595
+ lines.append(f"{raw};")
596
+ else:
597
+ raw = g['raw'].replace(gate, inv_gate)
598
+ lines.append(f"{raw};")
599
+
600
+ result = {'qasm': '\n'.join(lines)}
601
+ return self._fallback_response("generate_inverse_circuit", result, start)
602
+
603
+ def tensor_circuits(self, qasm1: str, qasm2: str) -> MCPResponse:
604
+ """Tensor product of two circuits. Uses local fallback."""
605
+ start = time.perf_counter()
606
+
607
+ parsed1 = self._analyzer.parse_qasm(qasm1)
608
+ parsed2 = self._analyzer.parse_qasm(qasm2)
609
+
610
+ total_qubits = parsed1['num_qubits'] + parsed2['num_qubits']
611
+ offset = parsed1['num_qubits']
612
+
613
+ lines = [
614
+ 'OPENQASM 2.0;',
615
+ 'include "qelib1.inc";',
616
+ f'qreg q[{total_qubits}];',
617
+ f'creg c[{total_qubits}];'
618
+ ]
619
+
620
+ # Add gates from first circuit
621
+ for g in parsed1['gates']:
622
+ lines.append(f"{g['raw']};")
623
+
624
+ # Add gates from second circuit with offset
625
+ for g in parsed2['gates']:
626
+ raw = g['raw']
627
+ # Offset qubit indices
628
+ for i in range(parsed2['num_qubits'] - 1, -1, -1):
629
+ raw = raw.replace(f'q[{i}]', f'q[{i + offset}]')
630
+ lines.append(f"{raw};")
631
+
632
+ result = {'qasm': '\n'.join(lines)}
633
+ return self._fallback_response("tensor_circuits", result, start)
634
+
635
+ def repeat_circuit(self, qasm_code: str, n: int) -> MCPResponse:
636
+ """Repeat a circuit n times. Uses local fallback."""
637
+ start = time.perf_counter()
638
+ parsed = self._analyzer.parse_qasm(qasm_code)
639
+
640
+ lines = [
641
+ 'OPENQASM 2.0;',
642
+ 'include "qelib1.inc";',
643
+ f'qreg q[{parsed["num_qubits"]}];',
644
+ f'creg c[{parsed["num_classical"]}];'
645
+ ]
646
+
647
+ # Repeat non-measure gates n times
648
+ for _ in range(n):
649
+ for g in parsed['gates']:
650
+ if g['gate'].lower() != 'measure':
651
+ lines.append(f"{g['raw']};")
652
+
653
+ # Add measurements at end
654
+ for g in parsed['gates']:
655
+ if g['gate'].lower() == 'measure':
656
+ lines.append(f"{g['raw']};")
657
+ break
658
+
659
+ result = {'qasm': '\n'.join(lines)}
660
+ return self._fallback_response("repeat_circuit", result, start)
661
+
662
+ # ===== Utility Endpoints =====
663
+
664
+ def list_templates(self) -> MCPResponse:
665
+ """List available circuit templates."""
666
+ start = time.perf_counter()
667
+ templates = [
668
+ 'bell_state', 'ghz_state', 'w_state', 'superposition',
669
+ 'qft', 'grover', 'vqe', 'qaoa'
670
+ ]
671
+ return self._fallback_response("list_templates", {'templates': templates}, start)
672
+
673
+ def list_hardware_profiles(self) -> MCPResponse:
674
+ """List available hardware profiles."""
675
+ start = time.perf_counter()
676
+ profiles = ['ibm_brisbane', 'ibm_sherbrooke', 'rigetti_aspen', 'ionq_harmony']
677
+ return self._fallback_response("list_hardware_profiles", {'profiles': profiles}, start)
678
+
679
+
680
+ # Singleton client instance
681
+ _client: Optional[MCPClient] = None
682
+
683
+
684
+ def get_client(base_url: Optional[str] = None) -> MCPClient:
685
+ """
686
+ Get or create the MCP client singleton.
687
+
688
+ Args:
689
+ base_url: Optional URL override. If None, checks MCP_SERVER_URL env var,
690
+ then defaults to http://127.0.0.1:7861
691
+ """
692
+ global _client
693
+ if _client is None:
694
+ if base_url is None:
695
+ import os
696
+ base_url = os.environ.get("MCP_SERVER_URL", "http://127.0.0.1:7861")
697
+ _client = MCPClient(base_url)
698
+ return _client
config.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ QAgents-Workflows: Configuration
3
+ Central configuration for the multi-agent quantum circuit optimization system.
4
+
5
+ Path: QAgents-workflos/config.py
6
+ Related: agents/llm_adapter.py (uses GEMINI_MODELS for fallback cascade)
7
+ run_evaluation.py (uses config for evaluation settings)
8
+ workflows/workflow_definitions.py (references rate limits)
9
+ """
10
+
11
+ from pathlib import Path
12
+ from dataclasses import dataclass, field
13
+ from typing import Optional, List, Dict
14
+ import os
15
+
16
+ # Paths
17
+ PROJECT_ROOT = Path(__file__).parent
18
+ QUANTUM_MCP_ROOT = PROJECT_ROOT.parent / "QuantumArchitect-MCP"
19
+
20
+ # =============================================================================
21
+ # GEMINI MODEL CASCADE (sorted by RPD - highest to lowest for optimal fallback)
22
+ # =============================================================================
23
+ # When a model hits rate limits (RPM/RPD), fallback to next model in list.
24
+ # Free tier limits (as of 2025):
25
+ # - Gemma 3: 30 RPM, 15K TPM, 14,400 RPD (HIGHEST availability)
26
+ # - Flash-Lite: 15 RPM, 250K TPM, 1,000 RPD
27
+ # - Flash 2.5: 10 RPM, 250K TPM, 250 RPD
28
+ # - Flash 2.0: 15 RPM, 1M TPM, 200 RPD
29
+ # - Flash 2.0 Lite: 30 RPM, 1M TPM, 200 RPD
30
+ # - Pro 2.5: 2 RPM, 125K TPM, 50 RPD (LOWEST availability)
31
+ #
32
+ # EXPECTED REQUESTS PER EVALUATION (9 problems):
33
+ # - Naked mode: 0 LLM calls (direct MCP only)
34
+ # - Guided mode: ~36 LLM calls (4 per problem)
35
+ # - Blackboard: ~72-108 LLM calls (8-12 per problem)
36
+ # =============================================================================
37
+
38
+ GEMINI_MODELS: List[Dict] = [
39
+ # Highest RPD - most available (14,400/day = 10/min continuously)
40
+ {
41
+ "name": "gemma-3-27b-it",
42
+ "rpm": 30,
43
+ "tpm": 15_000,
44
+ "rpd": 14_400,
45
+ "priority": 1,
46
+ "notes": "Best for high-volume, may have lower quality than Flash"
47
+ },
48
+ # Good balance - default model (1,000/day)
49
+ {
50
+ "name": "gemini-2.5-flash-lite",
51
+ "rpm": 15,
52
+ "tpm": 250_000,
53
+ "rpd": 1_000,
54
+ "priority": 2,
55
+ "notes": "Good balance of quality and availability - DEFAULT"
56
+ },
57
+ # Higher quality - moderate availability (250/day)
58
+ {
59
+ "name": "gemini-2.5-flash",
60
+ "rpm": 10,
61
+ "tpm": 250_000,
62
+ "rpd": 250,
63
+ "priority": 3,
64
+ "notes": "Better quality, lower availability"
65
+ },
66
+ # High TPM for long contexts (200/day)
67
+ {
68
+ "name": "gemini-2.0-flash",
69
+ "rpm": 15,
70
+ "tpm": 1_000_000,
71
+ "rpd": 200,
72
+ "priority": 4,
73
+ "notes": "Good for long contexts, moderate availability"
74
+ },
75
+ # Fast variant (200/day)
76
+ {
77
+ "name": "gemini-2.0-flash-lite",
78
+ "rpm": 30,
79
+ "tpm": 1_000_000,
80
+ "rpd": 200,
81
+ "priority": 5,
82
+ "notes": "Fast responses, lower availability"
83
+ },
84
+ # Lowest RPD - highest quality, use sparingly (50/day)
85
+ {
86
+ "name": "gemini-2.5-pro",
87
+ "rpm": 2,
88
+ "tpm": 125_000,
89
+ "rpd": 50,
90
+ "priority": 6,
91
+ "notes": "Highest quality, use sparingly - LAST RESORT"
92
+ },
93
+ ]
94
+
95
+ def get_model_by_priority(priority: int = 1) -> Optional[Dict]:
96
+ """Get model config by priority (1=highest RPD)."""
97
+ for model in GEMINI_MODELS:
98
+ if model["priority"] == priority:
99
+ return model
100
+ return None
101
+
102
+ def get_next_model(current_name: str) -> Optional[Dict]:
103
+ """Get next model in fallback chain."""
104
+ for i, model in enumerate(GEMINI_MODELS):
105
+ if model["name"] == current_name:
106
+ if i + 1 < len(GEMINI_MODELS):
107
+ return GEMINI_MODELS[i + 1]
108
+ return None
109
+
110
+ def get_model_config(model_name: str) -> Optional[Dict]:
111
+ """Get model config by name."""
112
+ for model in GEMINI_MODELS:
113
+ if model["name"] == model_name:
114
+ return model
115
+ return None
116
+
117
+
118
+ @dataclass
119
+ class MCPConfig:
120
+ """MCP Server configuration."""
121
+ host: str = "127.0.0.1"
122
+ port: int = 7861
123
+ base_url: str = field(init=False)
124
+
125
+ def __post_init__(self):
126
+ self.base_url = f"http://{self.host}:{self.port}"
127
+
128
+
129
+ @dataclass
130
+ class RateLimitConfig:
131
+ """Rate limiting based on Gemini API free tier limits."""
132
+ # Default to gemini-2.5-flash-lite limits
133
+ rpm_limit: int = 15 # Requests per minute
134
+ tpm_limit: int = 250_000 # Tokens per minute
135
+ rpd_limit: int = 1_000 # Requests per day
136
+
137
+ # Conservative buffer (80% of limit = 12 RPM effective)
138
+ rpm_buffer: float = 0.8
139
+
140
+ @property
141
+ def min_request_interval(self) -> float:
142
+ """Minimum seconds between requests: 60 / (15 * 0.8) = 5 seconds."""
143
+ return 60.0 / (self.rpm_limit * self.rpm_buffer)
144
+
145
+
146
+ @dataclass
147
+ class LLMConfig:
148
+ """LLM configuration for agents - model agnostic via Gemini and LiteLLM.
149
+
150
+ Environment Variables (HuggingFace Space compatible):
151
+ - LLM_PROVIDER: Provider name (gemini, openai, anthropic, groq, ollama). Default: "gemini"
152
+ - LLM_MODEL: Model identifier. Default: "gemini-2.5-flash-lite"
153
+ - GOOGLE_API_KEY: Gemini API key (Gemini provider)
154
+ - GENAI_API_KEY: Alternative Gemini API key (fallback)
155
+ - OPENAI_API_KEY: OpenAI API key (OpenAI provider)
156
+ - ANTHROPIC_API_KEY: Anthropic API key (Anthropic provider)
157
+ - GROQ_API_KEY: Groq API key (Groq provider)
158
+ """
159
+ # Provider options: gemini, openai, anthropic, groq, ollama, etc.
160
+ # Reads from LLM_PROVIDER env var, falls back to "gemini"
161
+ provider: str = field(default_factory=lambda: os.getenv("LLM_PROVIDER", "gemini"))
162
+ # Model identifier - reads from LLM_MODEL env var, falls back to "gemini-2.5-flash-lite"
163
+ model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gemini-2.5-flash-lite"))
164
+ # API key - tries GOOGLE_API_KEY first (Gemini), then GENAI_API_KEY as fallback
165
+ api_key: Optional[str] = field(default_factory=lambda: os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY"))
166
+ temperature: float = 0.2
167
+ max_tokens: int = 2000
168
+
169
+ # Rate limiting
170
+ rate_limit: RateLimitConfig = field(default_factory=RateLimitConfig)
171
+ enable_rate_limiting: bool = True # Set to False to disable
172
+
173
+ # Multi-model fallback
174
+ enable_fallback: bool = True # Enable automatic model switching on rate limit
175
+ fallback_on_error: bool = True # Also fallback on API errors
176
+
177
+ @property
178
+ def model_string(self) -> str:
179
+ """Get full model string for API calls."""
180
+ if self.provider in ["gemini"]:
181
+ return self.model
182
+ else:
183
+ # LiteLLM format: provider/model
184
+ return f"{self.provider}/{self.model}"
185
+
186
+
187
+ @dataclass
188
+ class DatabaseConfig:
189
+ """Database/storage configuration."""
190
+ db_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "data")
191
+ log_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "logs")
192
+ memory_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "memory")
193
+
194
+ def __post_init__(self):
195
+ # Ensure directories exist
196
+ for path in [self.db_path, self.log_path, self.memory_path]:
197
+ path.mkdir(parents=True, exist_ok=True)
198
+
199
+
200
+ @dataclass
201
+ class CostTrackingConfig:
202
+ """Cost and usage tracking configuration."""
203
+ enabled: bool = True
204
+ track_requests: bool = True
205
+ track_tokens: bool = True
206
+ track_time: bool = True
207
+
208
+ # Usage counters (reset daily in production)
209
+ total_requests: int = 0
210
+ total_tokens: int = 0
211
+ total_time_ms: float = 0.0
212
+
213
+ # Per-model tracking
214
+ model_usage: Dict[str, Dict] = field(default_factory=dict)
215
+
216
+ def record_request(self, model: str, tokens: int, time_ms: float):
217
+ """Record a request for cost tracking."""
218
+ if not self.enabled:
219
+ return
220
+
221
+ self.total_requests += 1
222
+ self.total_tokens += tokens
223
+ self.total_time_ms += time_ms
224
+
225
+ if model not in self.model_usage:
226
+ self.model_usage[model] = {"requests": 0, "tokens": 0, "time_ms": 0.0}
227
+
228
+ self.model_usage[model]["requests"] += 1
229
+ self.model_usage[model]["tokens"] += tokens
230
+ self.model_usage[model]["time_ms"] += time_ms
231
+
232
+ def get_summary(self) -> Dict:
233
+ """Get cost tracking summary."""
234
+ return {
235
+ "total_requests": self.total_requests,
236
+ "total_tokens": self.total_tokens,
237
+ "total_time_ms": self.total_time_ms,
238
+ "avg_time_per_request": self.total_time_ms / max(1, self.total_requests),
239
+ "model_breakdown": self.model_usage.copy()
240
+ }
241
+
242
+ def reset(self):
243
+ """Reset all counters."""
244
+ self.total_requests = 0
245
+ self.total_tokens = 0
246
+ self.total_time_ms = 0.0
247
+ self.model_usage = {}
248
+
249
+
250
+ @dataclass
251
+ class EvaluationConfig:
252
+ """Evaluation settings."""
253
+ num_runs: int = 5 # Number of runs per problem for reliability
254
+ timeout_seconds: float = 120.0 # Max time per problem
255
+ save_results: bool = True
256
+
257
+ # Cost tracking for evaluation
258
+ cost_tracking: CostTrackingConfig = field(default_factory=CostTrackingConfig)
259
+
260
+
261
+ @dataclass
262
+ class SystemConfig:
263
+ """Master configuration."""
264
+ mcp: MCPConfig = field(default_factory=MCPConfig)
265
+ llm: LLMConfig = field(default_factory=LLMConfig)
266
+ database: DatabaseConfig = field(default_factory=DatabaseConfig)
267
+ evaluation: EvaluationConfig = field(default_factory=EvaluationConfig)
268
+
269
+ # System mode: "blackboard", "guided", or "naked"
270
+ active_mode: str = "guided"
271
+
272
+ # Debug settings
273
+ verbose: bool = True
274
+ log_level: str = "INFO"
275
+
276
+
277
+ # Global config instance
278
+ config = SystemConfig()
279
+
280
+
281
+ def set_mode(mode: str):
282
+ """Switch between blackboard, guided, and naked modes."""
283
+ if mode not in ("blackboard", "guided", "naked"):
284
+ raise ValueError(f"Invalid mode: {mode}. Use 'blackboard', 'guided', or 'naked'")
285
+ config.active_mode = mode
286
+
287
+
288
+ def get_mode() -> str:
289
+ """Get current system mode."""
290
+ return config.active_mode
291
+
292
+
293
+ def set_api_key(api_key: str):
294
+ """Set the API key for LLM calls."""
295
+ config.llm.api_key = api_key
296
+
297
+
298
+ def get_cost_summary() -> Dict:
299
+ """Get the current cost tracking summary."""
300
+ return config.evaluation.cost_tracking.get_summary()
301
+
302
+
303
+ def reset_cost_tracking():
304
+ """Reset cost tracking counters."""
305
+ config.evaluation.cost_tracking.reset()
database/__init__.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/database/__init__.py
2
+ # Purpose: Database module exports for storage, logging, memory, and circuit quality
3
+ # Relations: Provides unified access to all database functionality
4
+
5
+ """Database module for storage, logging, memory, and circuit quality tracking."""
6
+
7
+ from .storage import (
8
+ Database,
9
+ MemoryType,
10
+ MemoryEntry,
11
+ LogEntry,
12
+ ResultEntry,
13
+ get_database
14
+ )
15
+
16
+ from .circuit_quality_db import (
17
+ CircuitQualityDB,
18
+ CircuitEvaluation,
19
+ QualityMetrics,
20
+ get_quality_db
21
+ )
22
+
23
+ __all__ = [
24
+ # Original storage
25
+ "Database",
26
+ "MemoryType",
27
+ "MemoryEntry",
28
+ "LogEntry",
29
+ "ResultEntry",
30
+ "get_database",
31
+ # Quality tracking (NEW)
32
+ "CircuitQualityDB",
33
+ "CircuitEvaluation",
34
+ "QualityMetrics",
35
+ "get_quality_db"
36
+ ]
database/circuit_quality_db.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/database/circuit_quality_db.py
2
+ # Relations: Uses database/storage.py pattern, connects to MCP via client/
3
+ # Description: SQLite database for storing QASM circuits and quality metrics
4
+ # Enables circuit comparison across orchestration modes
5
+ # Tracks circuit_qasm text + all quality measurements
6
+
7
+ """
8
+ Circuit Quality Database: Store and compare quantum circuits with quality metrics.
9
+ Stores actual QASM code for later analysis and comparison between modes.
10
+ """
11
+
12
+ import sqlite3
13
+ import json
14
+ from pathlib import Path
15
+ from datetime import datetime
16
+ from typing import Any, Dict, List, Optional, Tuple
17
+ from dataclasses import dataclass, field, asdict
18
+ import logging
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class QualityMetrics:
25
+ """Quality metrics for a circuit."""
26
+ depth: int = 0
27
+ gate_count: int = 0
28
+ cx_count: int = 0
29
+ single_qubit_count: int = 0
30
+ hardware_fitness: float = 0.0
31
+ syntax_valid: bool = False
32
+ state_correctness: float = 0.0
33
+ complexity_score: float = 0.0
34
+ noise_estimate: float = 0.0
35
+
36
+ def overall_score(self) -> float:
37
+ """Calculate overall quality score (higher is better, 0-100)."""
38
+ score = 0.0
39
+ # Syntax: 20 points
40
+ score += 20.0 if self.syntax_valid else 0.0
41
+ # Hardware fitness: 20 points
42
+ score += 20.0 * min(self.hardware_fitness, 1.0)
43
+ # State correctness: 30 points
44
+ score += 30.0 * self.state_correctness
45
+ # Efficiency (lower depth/gates better): 15 points
46
+ if self.gate_count > 0:
47
+ efficiency = max(0, 1 - (self.depth / max(self.gate_count, 1)) / 10)
48
+ score += 15.0 * efficiency
49
+ # Lower CX count bonus: 15 points
50
+ if self.gate_count > 0:
51
+ cx_ratio = self.cx_count / max(self.gate_count, 1)
52
+ score += 15.0 * (1 - min(cx_ratio, 1.0))
53
+ return round(score, 2)
54
+
55
+
56
+ @dataclass
57
+ class CircuitEvaluation:
58
+ """Complete evaluation record with QASM and quality."""
59
+ id: Optional[int] = None
60
+ run_id: str = ""
61
+ timestamp: str = ""
62
+ problem_id: str = ""
63
+ problem_goal: str = ""
64
+ mode: str = "" # naked, guided, blackboard
65
+ qasm_code: str = "" # FULL QASM text stored
66
+ success: bool = False
67
+ execution_time_ms: float = 0.0
68
+ llm_requests: int = 0
69
+ tokens_used: int = 0
70
+ quality_metrics: QualityMetrics = field(default_factory=QualityMetrics)
71
+ errors: List[str] = field(default_factory=list)
72
+
73
+
74
+ class CircuitQualityDB:
75
+ """
76
+ SQLite database for storing circuits and quality metrics.
77
+ Primary purpose: Enable quality comparison across modes.
78
+ """
79
+
80
+ def __init__(self, db_path: Optional[Path] = None):
81
+ if db_path is None:
82
+ db_path = Path(__file__).parent / "data"
83
+ self.db_path = Path(db_path)
84
+ self.db_path.mkdir(parents=True, exist_ok=True)
85
+ self.db_file = self.db_path / "circuit_quality.db"
86
+ self._init_db()
87
+
88
+ def _init_db(self):
89
+ """Initialize database tables."""
90
+ with sqlite3.connect(self.db_file) as conn:
91
+ conn.executescript("""
92
+ -- Main table: stores full QASM and evaluation metadata
93
+ CREATE TABLE IF NOT EXISTS circuit_evaluations (
94
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
95
+ run_id TEXT NOT NULL,
96
+ timestamp TEXT NOT NULL,
97
+ problem_id TEXT NOT NULL,
98
+ problem_goal TEXT,
99
+ mode TEXT NOT NULL,
100
+ qasm_code TEXT,
101
+ success INTEGER NOT NULL,
102
+ execution_time_ms REAL,
103
+ llm_requests INTEGER DEFAULT 0,
104
+ tokens_used INTEGER DEFAULT 0,
105
+ errors TEXT
106
+ );
107
+
108
+ -- Quality metrics table: detailed quality measurements
109
+ CREATE TABLE IF NOT EXISTS quality_metrics (
110
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
111
+ eval_id INTEGER NOT NULL,
112
+ depth INTEGER DEFAULT 0,
113
+ gate_count INTEGER DEFAULT 0,
114
+ cx_count INTEGER DEFAULT 0,
115
+ single_qubit_count INTEGER DEFAULT 0,
116
+ hardware_fitness REAL DEFAULT 0.0,
117
+ syntax_valid INTEGER DEFAULT 0,
118
+ state_correctness REAL DEFAULT 0.0,
119
+ complexity_score REAL DEFAULT 0.0,
120
+ noise_estimate REAL DEFAULT 0.0,
121
+ overall_score REAL DEFAULT 0.0,
122
+ FOREIGN KEY (eval_id) REFERENCES circuit_evaluations(id)
123
+ );
124
+
125
+ -- Comparison runs: group multiple evaluations
126
+ CREATE TABLE IF NOT EXISTS comparison_runs (
127
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
128
+ run_id TEXT UNIQUE NOT NULL,
129
+ timestamp TEXT NOT NULL,
130
+ description TEXT,
131
+ num_problems INTEGER DEFAULT 0,
132
+ modes_tested TEXT,
133
+ summary TEXT
134
+ );
135
+
136
+ -- Create indexes for fast queries
137
+ CREATE INDEX IF NOT EXISTS idx_eval_run_id ON circuit_evaluations(run_id);
138
+ CREATE INDEX IF NOT EXISTS idx_eval_problem ON circuit_evaluations(problem_id);
139
+ CREATE INDEX IF NOT EXISTS idx_eval_mode ON circuit_evaluations(mode);
140
+ """)
141
+ conn.commit()
142
+
143
+ def save_evaluation(self, eval: CircuitEvaluation) -> int:
144
+ """Save a circuit evaluation with quality metrics. Returns eval ID."""
145
+ with sqlite3.connect(self.db_file) as conn:
146
+ cursor = conn.cursor()
147
+
148
+ # Insert main evaluation record
149
+ cursor.execute("""
150
+ INSERT INTO circuit_evaluations
151
+ (run_id, timestamp, problem_id, problem_goal, mode, qasm_code,
152
+ success, execution_time_ms, llm_requests, tokens_used, errors)
153
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
154
+ """, (
155
+ eval.run_id,
156
+ eval.timestamp or datetime.now().isoformat(),
157
+ eval.problem_id,
158
+ eval.problem_goal,
159
+ eval.mode,
160
+ eval.qasm_code, # FULL QASM stored here
161
+ 1 if eval.success else 0,
162
+ eval.execution_time_ms,
163
+ eval.llm_requests,
164
+ eval.tokens_used,
165
+ json.dumps(eval.errors)
166
+ ))
167
+ eval_id = cursor.lastrowid
168
+
169
+ # Insert quality metrics
170
+ metrics = eval.quality_metrics
171
+ cursor.execute("""
172
+ INSERT INTO quality_metrics
173
+ (eval_id, depth, gate_count, cx_count, single_qubit_count,
174
+ hardware_fitness, syntax_valid, state_correctness,
175
+ complexity_score, noise_estimate, overall_score)
176
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
177
+ """, (
178
+ eval_id,
179
+ metrics.depth,
180
+ metrics.gate_count,
181
+ metrics.cx_count,
182
+ metrics.single_qubit_count,
183
+ metrics.hardware_fitness,
184
+ 1 if metrics.syntax_valid else 0,
185
+ metrics.state_correctness,
186
+ metrics.complexity_score,
187
+ metrics.noise_estimate,
188
+ metrics.overall_score()
189
+ ))
190
+
191
+ conn.commit()
192
+ logger.info(f"Saved evaluation {eval_id} for {eval.problem_id}/{eval.mode}")
193
+ return eval_id
194
+
195
+ def save_comparison_run(self, run_id: str, description: str,
196
+ num_problems: int, modes: List[str], summary: Dict) -> None:
197
+ """Save a comparison run record."""
198
+ with sqlite3.connect(self.db_file) as conn:
199
+ conn.execute("""
200
+ INSERT OR REPLACE INTO comparison_runs
201
+ (run_id, timestamp, description, num_problems, modes_tested, summary)
202
+ VALUES (?, ?, ?, ?, ?, ?)
203
+ """, (
204
+ run_id,
205
+ datetime.now().isoformat(),
206
+ description,
207
+ num_problems,
208
+ json.dumps(modes),
209
+ json.dumps(summary)
210
+ ))
211
+ conn.commit()
212
+
213
+ def get_evaluations(self, problem_id: Optional[str] = None,
214
+ mode: Optional[str] = None,
215
+ run_id: Optional[str] = None,
216
+ limit: int = 100) -> List[CircuitEvaluation]:
217
+ """Get evaluations with optional filters."""
218
+ query = """
219
+ SELECT e.*, q.depth, q.gate_count, q.cx_count, q.single_qubit_count,
220
+ q.hardware_fitness, q.syntax_valid, q.state_correctness,
221
+ q.complexity_score, q.noise_estimate, q.overall_score
222
+ FROM circuit_evaluations e
223
+ LEFT JOIN quality_metrics q ON e.id = q.eval_id
224
+ WHERE 1=1
225
+ """
226
+ params = []
227
+
228
+ if problem_id:
229
+ query += " AND e.problem_id = ?"
230
+ params.append(problem_id)
231
+ if mode:
232
+ query += " AND e.mode = ?"
233
+ params.append(mode)
234
+ if run_id:
235
+ query += " AND e.run_id = ?"
236
+ params.append(run_id)
237
+
238
+ query += " ORDER BY e.timestamp DESC LIMIT ?"
239
+ params.append(limit)
240
+
241
+ evaluations = []
242
+ with sqlite3.connect(self.db_file) as conn:
243
+ conn.row_factory = sqlite3.Row
244
+ cursor = conn.execute(query, params)
245
+
246
+ for row in cursor:
247
+ metrics = QualityMetrics(
248
+ depth=row['depth'] or 0,
249
+ gate_count=row['gate_count'] or 0,
250
+ cx_count=row['cx_count'] or 0,
251
+ single_qubit_count=row['single_qubit_count'] or 0,
252
+ hardware_fitness=row['hardware_fitness'] or 0.0,
253
+ syntax_valid=bool(row['syntax_valid']),
254
+ state_correctness=row['state_correctness'] or 0.0,
255
+ complexity_score=row['complexity_score'] or 0.0,
256
+ noise_estimate=row['noise_estimate'] or 0.0
257
+ )
258
+
259
+ eval = CircuitEvaluation(
260
+ id=row['id'],
261
+ run_id=row['run_id'],
262
+ timestamp=row['timestamp'],
263
+ problem_id=row['problem_id'],
264
+ problem_goal=row['problem_goal'] or "",
265
+ mode=row['mode'],
266
+ qasm_code=row['qasm_code'] or "",
267
+ success=bool(row['success']),
268
+ execution_time_ms=row['execution_time_ms'] or 0.0,
269
+ llm_requests=row['llm_requests'] or 0,
270
+ tokens_used=row['tokens_used'] or 0,
271
+ quality_metrics=metrics,
272
+ errors=json.loads(row['errors']) if row['errors'] else []
273
+ )
274
+ evaluations.append(eval)
275
+
276
+ return evaluations
277
+
278
+ def get_circuit_by_id(self, eval_id: int) -> Optional[CircuitEvaluation]:
279
+ """Get a single evaluation by ID."""
280
+ evals = self.get_evaluations(limit=1)
281
+ for e in self.get_evaluations(limit=1000):
282
+ if e.id == eval_id:
283
+ return e
284
+ return None
285
+
286
+ def compare_modes_for_problem(self, problem_id: str, run_id: Optional[str] = None) -> Dict:
287
+ """Compare all modes for a specific problem."""
288
+ modes = ['naked', 'guided', 'blackboard']
289
+ comparison = {
290
+ "problem_id": problem_id,
291
+ "modes": {}
292
+ }
293
+
294
+ for mode in modes:
295
+ evals = self.get_evaluations(problem_id=problem_id, mode=mode, run_id=run_id)
296
+ if evals:
297
+ latest = evals[0]
298
+ comparison["modes"][mode] = {
299
+ "success": latest.success,
300
+ "qasm_code": latest.qasm_code,
301
+ "depth": latest.quality_metrics.depth,
302
+ "gate_count": latest.quality_metrics.gate_count,
303
+ "cx_count": latest.quality_metrics.cx_count,
304
+ "hardware_fitness": latest.quality_metrics.hardware_fitness,
305
+ "overall_score": latest.quality_metrics.overall_score(),
306
+ "execution_time_ms": latest.execution_time_ms,
307
+ "llm_requests": latest.llm_requests
308
+ }
309
+
310
+ return comparison
311
+
312
+ def get_quality_summary(self, run_id: Optional[str] = None) -> Dict:
313
+ """Get quality summary across all modes."""
314
+ query = """
315
+ SELECT e.mode,
316
+ COUNT(*) as count,
317
+ SUM(e.success) as successes,
318
+ AVG(q.overall_score) as avg_score,
319
+ AVG(q.depth) as avg_depth,
320
+ AVG(q.gate_count) as avg_gates,
321
+ AVG(q.cx_count) as avg_cx,
322
+ AVG(q.hardware_fitness) as avg_fitness,
323
+ AVG(e.execution_time_ms) as avg_time,
324
+ SUM(e.llm_requests) as total_llm,
325
+ SUM(e.tokens_used) as total_tokens
326
+ FROM circuit_evaluations e
327
+ LEFT JOIN quality_metrics q ON e.id = q.eval_id
328
+ """
329
+ params = []
330
+ if run_id:
331
+ query += " WHERE e.run_id = ?"
332
+ params.append(run_id)
333
+ query += " GROUP BY e.mode"
334
+
335
+ summary = {"modes": {}}
336
+ with sqlite3.connect(self.db_file) as conn:
337
+ conn.row_factory = sqlite3.Row
338
+ for row in conn.execute(query, params):
339
+ mode = row['mode']
340
+ count = row['count']
341
+ summary["modes"][mode] = {
342
+ "count": count,
343
+ "success_rate": row['successes'] / count if count > 0 else 0,
344
+ "avg_quality_score": round(row['avg_score'] or 0, 2),
345
+ "avg_depth": round(row['avg_depth'] or 0, 1),
346
+ "avg_gates": round(row['avg_gates'] or 0, 1),
347
+ "avg_cx_count": round(row['avg_cx'] or 0, 1),
348
+ "avg_hardware_fitness": round(row['avg_fitness'] or 0, 3),
349
+ "avg_time_ms": round(row['avg_time'] or 0, 1),
350
+ "total_llm_requests": row['total_llm'] or 0,
351
+ "total_tokens": row['total_tokens'] or 0
352
+ }
353
+
354
+ return summary
355
+
356
+ def export_circuits_markdown(self, run_id: Optional[str] = None) -> str:
357
+ """Export all circuits as markdown for comparison."""
358
+ evals = self.get_evaluations(run_id=run_id, limit=1000)
359
+
360
+ # Group by problem
361
+ by_problem: Dict[str, Dict[str, CircuitEvaluation]] = {}
362
+ for e in evals:
363
+ if e.problem_id not in by_problem:
364
+ by_problem[e.problem_id] = {}
365
+ by_problem[e.problem_id][e.mode] = e
366
+
367
+ md = ["# Circuit Quality Comparison Report\n"]
368
+ md.append(f"Generated: {datetime.now().isoformat()}\n")
369
+ if run_id:
370
+ md.append(f"Run ID: {run_id}\n")
371
+ md.append("\n---\n")
372
+
373
+ for problem_id, modes in sorted(by_problem.items()):
374
+ md.append(f"\n## Problem: {problem_id}\n")
375
+
376
+ for mode in ['naked', 'guided', 'blackboard']:
377
+ if mode not in modes:
378
+ md.append(f"\n### {mode.upper()}: NOT RUN\n")
379
+ continue
380
+
381
+ e = modes[mode]
382
+ q = e.quality_metrics
383
+
384
+ md.append(f"\n### {mode.upper()}\n")
385
+ md.append(f"- **Success**: {'✅' if e.success else '❌'}\n")
386
+ md.append(f"- **Quality Score**: {q.overall_score()}/100\n")
387
+ md.append(f"- **Depth**: {q.depth}\n")
388
+ md.append(f"- **Gate Count**: {q.gate_count}\n")
389
+ md.append(f"- **CX Count**: {q.cx_count}\n")
390
+ md.append(f"- **Hardware Fitness**: {q.hardware_fitness:.3f}\n")
391
+ md.append(f"- **Time**: {e.execution_time_ms:.0f}ms\n")
392
+ md.append(f"- **LLM Requests**: {e.llm_requests}\n")
393
+
394
+ if e.qasm_code:
395
+ md.append("\n```qasm\n")
396
+ md.append(e.qasm_code)
397
+ if not e.qasm_code.endswith('\n'):
398
+ md.append('\n')
399
+ md.append("```\n")
400
+ else:
401
+ md.append("\n*No circuit generated*\n")
402
+
403
+ return "".join(md)
404
+
405
+
406
+ # Singleton instance
407
+ _quality_db: Optional[CircuitQualityDB] = None
408
+
409
+ def get_quality_db() -> CircuitQualityDB:
410
+ """Get the global quality database instance."""
411
+ global _quality_db
412
+ if _quality_db is None:
413
+ _quality_db = CircuitQualityDB()
414
+ return _quality_db
database/storage.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Module: Storage for logs, results, memory, and context.
3
+ Provides both shared and per-agent storage with short/long-term memory.
4
+ """
5
+
6
+ import json
7
+ import sqlite3
8
+ from pathlib import Path
9
+ from datetime import datetime
10
+ from typing import Any, Dict, List, Optional
11
+ from dataclasses import dataclass, field, asdict
12
+ from enum import Enum
13
+ import logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class MemoryType(Enum):
18
+ """Types of memory storage."""
19
+ SHORT_TERM = "short_term" # Session-based, cleared on restart
20
+ LONG_TERM = "long_term" # Persistent across sessions
21
+ SHARED = "shared" # Shared between agents (blackboard)
22
+
23
+ @dataclass
24
+ class MemoryEntry:
25
+ """A single memory entry."""
26
+ key: str
27
+ value: Any
28
+ agent_id: Optional[str]
29
+ memory_type: MemoryType
30
+ timestamp: datetime = field(default_factory=datetime.now)
31
+ metadata: Dict = field(default_factory=dict)
32
+
33
+ @dataclass
34
+ class LogEntry:
35
+ """A log entry for audit trail."""
36
+ level: str
37
+ message: str
38
+ agent_id: Optional[str]
39
+ workflow_id: Optional[str]
40
+ timestamp: datetime = field(default_factory=datetime.now)
41
+ data: Dict = field(default_factory=dict)
42
+
43
+ @dataclass
44
+ class ResultEntry:
45
+ """A result from an evaluation run."""
46
+ run_id: str
47
+ system_mode: str # blackboard, guided, naked
48
+ problem_id: str
49
+ success: bool
50
+ execution_time_ms: float
51
+ circuit_qasm: Optional[str]
52
+ metrics: Dict = field(default_factory=dict)
53
+ timestamp: datetime = field(default_factory=datetime.now)
54
+
55
+
56
+ class Database:
57
+ """
58
+ SQLite-based storage for all system data.
59
+ Manages logs, results, and agent memory.
60
+ """
61
+
62
+ def __init__(self, db_path: Path):
63
+ self.db_path = db_path
64
+ self.db_path.mkdir(parents=True, exist_ok=True)
65
+ self.db_file = self.db_path / "qagents.db"
66
+ self._init_db()
67
+
68
+ def _init_db(self):
69
+ """Initialize database tables."""
70
+ with sqlite3.connect(self.db_file) as conn:
71
+ conn.executescript("""
72
+ CREATE TABLE IF NOT EXISTS memory (
73
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
74
+ key TEXT NOT NULL,
75
+ value TEXT NOT NULL,
76
+ agent_id TEXT,
77
+ memory_type TEXT NOT NULL,
78
+ timestamp TEXT NOT NULL,
79
+ metadata TEXT
80
+ );
81
+
82
+ CREATE TABLE IF NOT EXISTS logs (
83
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
84
+ level TEXT NOT NULL,
85
+ message TEXT NOT NULL,
86
+ agent_id TEXT,
87
+ workflow_id TEXT,
88
+ timestamp TEXT NOT NULL,
89
+ data TEXT
90
+ );
91
+
92
+ CREATE TABLE IF NOT EXISTS results (
93
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
94
+ run_id TEXT NOT NULL,
95
+ system_mode TEXT NOT NULL,
96
+ problem_id TEXT NOT NULL,
97
+ success INTEGER NOT NULL,
98
+ execution_time_ms REAL NOT NULL,
99
+ circuit_qasm TEXT,
100
+ metrics TEXT,
101
+ timestamp TEXT NOT NULL
102
+ );
103
+
104
+ CREATE INDEX IF NOT EXISTS idx_memory_key ON memory(key);
105
+ CREATE INDEX IF NOT EXISTS idx_memory_agent ON memory(agent_id);
106
+ CREATE INDEX IF NOT EXISTS idx_results_mode ON results(system_mode);
107
+ CREATE INDEX IF NOT EXISTS idx_results_problem ON results(problem_id);
108
+ """)
109
+
110
+ # ===== Memory Operations =====
111
+
112
+ def store_memory(self, entry: MemoryEntry):
113
+ """Store a memory entry."""
114
+ with sqlite3.connect(self.db_file) as conn:
115
+ conn.execute(
116
+ """INSERT INTO memory (key, value, agent_id, memory_type, timestamp, metadata)
117
+ VALUES (?, ?, ?, ?, ?, ?)""",
118
+ (entry.key, json.dumps(entry.value), entry.agent_id,
119
+ entry.memory_type.value, entry.timestamp.isoformat(),
120
+ json.dumps(entry.metadata))
121
+ )
122
+
123
+ def get_memory(self, key: str, agent_id: Optional[str] = None,
124
+ memory_type: Optional[MemoryType] = None) -> Optional[Any]:
125
+ """Retrieve a memory value."""
126
+ with sqlite3.connect(self.db_file) as conn:
127
+ query = "SELECT value FROM memory WHERE key = ?"
128
+ params = [key]
129
+
130
+ if agent_id:
131
+ query += " AND agent_id = ?"
132
+ params.append(agent_id)
133
+ if memory_type:
134
+ query += " AND memory_type = ?"
135
+ params.append(memory_type.value)
136
+
137
+ query += " ORDER BY timestamp DESC LIMIT 1"
138
+
139
+ result = conn.execute(query, params).fetchone()
140
+ return json.loads(result[0]) if result else None
141
+
142
+ def get_shared_memory(self, key: str) -> Optional[Any]:
143
+ """Get from shared blackboard memory."""
144
+ return self.get_memory(key, memory_type=MemoryType.SHARED)
145
+
146
+ def set_shared_memory(self, key: str, value: Any, agent_id: Optional[str] = None):
147
+ """Set shared blackboard memory."""
148
+ entry = MemoryEntry(
149
+ key=key,
150
+ value=value,
151
+ agent_id=agent_id,
152
+ memory_type=MemoryType.SHARED
153
+ )
154
+ self.store_memory(entry)
155
+
156
+ def clear_short_term_memory(self, agent_id: Optional[str] = None):
157
+ """Clear short-term memory (session reset)."""
158
+ with sqlite3.connect(self.db_file) as conn:
159
+ if agent_id:
160
+ conn.execute(
161
+ "DELETE FROM memory WHERE memory_type = ? AND agent_id = ?",
162
+ (MemoryType.SHORT_TERM.value, agent_id)
163
+ )
164
+ else:
165
+ conn.execute(
166
+ "DELETE FROM memory WHERE memory_type = ?",
167
+ (MemoryType.SHORT_TERM.value,)
168
+ )
169
+
170
+ # ===== Logging Operations =====
171
+
172
+ def log(self, entry: LogEntry):
173
+ """Store a log entry."""
174
+ with sqlite3.connect(self.db_file) as conn:
175
+ conn.execute(
176
+ """INSERT INTO logs (level, message, agent_id, workflow_id, timestamp, data)
177
+ VALUES (?, ?, ?, ?, ?, ?)""",
178
+ (entry.level, entry.message, entry.agent_id, entry.workflow_id,
179
+ entry.timestamp.isoformat(), json.dumps(entry.data))
180
+ )
181
+
182
+ def get_logs(self, agent_id: Optional[str] = None,
183
+ workflow_id: Optional[str] = None,
184
+ limit: int = 100) -> List[Dict]:
185
+ """Retrieve log entries."""
186
+ with sqlite3.connect(self.db_file) as conn:
187
+ query = "SELECT * FROM logs WHERE 1=1"
188
+ params = []
189
+
190
+ if agent_id:
191
+ query += " AND agent_id = ?"
192
+ params.append(agent_id)
193
+ if workflow_id:
194
+ query += " AND workflow_id = ?"
195
+ params.append(workflow_id)
196
+
197
+ query += f" ORDER BY timestamp DESC LIMIT {limit}"
198
+
199
+ rows = conn.execute(query, params).fetchall()
200
+ return [
201
+ {"level": r[1], "message": r[2], "agent_id": r[3],
202
+ "workflow_id": r[4], "timestamp": r[5], "data": json.loads(r[6] or "{}")}
203
+ for r in rows
204
+ ]
205
+
206
+ # ===== Results Operations =====
207
+
208
+ def store_result(self, entry: ResultEntry):
209
+ """Store an evaluation result."""
210
+ with sqlite3.connect(self.db_file) as conn:
211
+ conn.execute(
212
+ """INSERT INTO results (run_id, system_mode, problem_id, success,
213
+ execution_time_ms, circuit_qasm, metrics, timestamp)
214
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
215
+ (entry.run_id, entry.system_mode, entry.problem_id,
216
+ 1 if entry.success else 0, entry.execution_time_ms,
217
+ entry.circuit_qasm, json.dumps(entry.metrics),
218
+ entry.timestamp.isoformat())
219
+ )
220
+
221
+ def get_results(self, system_mode: Optional[str] = None,
222
+ problem_id: Optional[str] = None) -> List[ResultEntry]:
223
+ """Retrieve results for analysis."""
224
+ with sqlite3.connect(self.db_file) as conn:
225
+ query = "SELECT * FROM results WHERE 1=1"
226
+ params = []
227
+
228
+ if system_mode:
229
+ query += " AND system_mode = ?"
230
+ params.append(system_mode)
231
+ if problem_id:
232
+ query += " AND problem_id = ?"
233
+ params.append(problem_id)
234
+
235
+ query += " ORDER BY timestamp DESC"
236
+
237
+ rows = conn.execute(query, params).fetchall()
238
+ return [
239
+ ResultEntry(
240
+ run_id=r[1], system_mode=r[2], problem_id=r[3],
241
+ success=bool(r[4]), execution_time_ms=r[5],
242
+ circuit_qasm=r[6], metrics=json.loads(r[7] or "{}"),
243
+ timestamp=datetime.fromisoformat(r[8])
244
+ )
245
+ for r in rows
246
+ ]
247
+
248
+ def get_summary_stats(self) -> Dict:
249
+ """Get summary statistics across all runs."""
250
+ with sqlite3.connect(self.db_file) as conn:
251
+ stats = {}
252
+ for mode in ["blackboard", "guided", "naked"]:
253
+ rows = conn.execute(
254
+ """SELECT COUNT(*), AVG(execution_time_ms),
255
+ SUM(success) * 100.0 / COUNT(*)
256
+ FROM results WHERE system_mode = ?""",
257
+ (mode,)
258
+ ).fetchone()
259
+
260
+ stats[mode] = {
261
+ "total_runs": rows[0] or 0,
262
+ "avg_time_ms": rows[1] or 0,
263
+ "success_rate": rows[2] or 0
264
+ }
265
+ return stats
266
+
267
+
268
+ # Singleton instance
269
+ _db: Optional[Database] = None
270
+
271
+ def get_database(db_path: Optional[Path] = None) -> Database:
272
+ """Get or create the database singleton."""
273
+ global _db
274
+ if _db is None:
275
+ from config import config
276
+ path = db_path or config.database.db_path
277
+ _db = Database(path)
278
+ return _db
orchestrators/__init__.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Orchestrators module: Workflow orchestration for different modes."""
2
+
3
+ from .orchestrator import (
4
+ OrchestratorResult,
5
+ BaseOrchestrator,
6
+ BlackboardOrchestrator,
7
+ GuidedOrchestrator,
8
+ NakedOrchestrator,
9
+ create_orchestrator
10
+ )
11
+
12
+ from .quasar_orchestrator import (
13
+ QuasarOrchestrator,
14
+ HybridOrchestrator,
15
+ QuasarResult,
16
+ ValidationTier
17
+ )
18
+
19
+ __all__ = [
20
+ "OrchestratorResult",
21
+ "BaseOrchestrator",
22
+ "BlackboardOrchestrator",
23
+ "GuidedOrchestrator",
24
+ "NakedOrchestrator",
25
+ "QuasarOrchestrator",
26
+ "HybridOrchestrator",
27
+ "QuasarResult",
28
+ "ValidationTier",
29
+ "create_orchestrator"
30
+ ]
orchestrators/orchestrator.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/orchestrators/orchestrator.py
2
+ # Relations: Uses agents, workflows, database modules
3
+ # Description: Orchestrators for Blackboard, Guided, and Naked execution modes
4
+ """
5
+ Orchestrators Module: Workflow orchestration and execution.
6
+ Contains both Blackboard (free) and Guided (strict) orchestrators.
7
+ """
8
+
9
+ from abc import ABC, abstractmethod
10
+ from dataclasses import dataclass, field
11
+ from typing import Dict, List, Any, Optional
12
+ from datetime import datetime
13
+ import logging
14
+ import time
15
+
16
+ from agents import (
17
+ BaseAgent, AgentContext, AgentResult,
18
+ AgentState, create_all_agents
19
+ )
20
+ from workflows import (
21
+ WorkflowDefinition, WorkflowExecution,
22
+ WorkflowStatus, get_workflow
23
+ )
24
+ from database import get_database, LogEntry
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ @dataclass
30
+ class OrchestratorResult:
31
+ """Result from orchestrator execution."""
32
+ success: bool
33
+ final_output: Any
34
+ execution_time_ms: float
35
+ steps_completed: int
36
+ total_steps: int
37
+ agent_results: Dict[str, AgentResult] = field(default_factory=dict)
38
+ errors: List[str] = field(default_factory=list)
39
+
40
+
41
+ class BaseOrchestrator(ABC):
42
+ """Abstract base class for orchestrators."""
43
+
44
+ def __init__(self, name: str):
45
+ self.name = name
46
+ self.agents: Dict[str, BaseAgent] = {}
47
+ self.db = get_database()
48
+
49
+ def register_agent(self, agent: BaseAgent):
50
+ """Register an agent with the orchestrator."""
51
+ self.agents[agent.agent_id] = agent
52
+
53
+ def log(self, level: str, message: str, workflow_id: str = None, data: Dict = None):
54
+ """Log orchestrator activity."""
55
+ entry = LogEntry(
56
+ level=level,
57
+ message=message,
58
+ agent_id=self.name,
59
+ workflow_id=workflow_id,
60
+ data=data or {}
61
+ )
62
+ self.db.log(entry)
63
+
64
+ @abstractmethod
65
+ def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
66
+ """Run the orchestrator to achieve the goal."""
67
+ pass
68
+
69
+
70
+ class BlackboardOrchestrator(BaseOrchestrator):
71
+ """
72
+ Blackboard (Free) Orchestrator.
73
+
74
+ Uses a shared blackboard for agent communication.
75
+ Agents opportunistically activate when they can contribute.
76
+ Emergent workflow based on data availability.
77
+ """
78
+
79
+ def __init__(self):
80
+ super().__init__("blackboard")
81
+ self.blackboard: Dict[str, Any] = {}
82
+ self.max_iterations = 20
83
+
84
+ def _reset_blackboard(self, goal: str, initial_context: Dict = None):
85
+ """Initialize the blackboard with goal and context."""
86
+ # Ensure goal is a string
87
+ if isinstance(goal, list):
88
+ goal = goal[0] if goal else ""
89
+ goal = str(goal) if goal else ""
90
+
91
+ self.blackboard = {
92
+ "goal": goal,
93
+ "current_circuit": None,
94
+ "validation_passed": False,
95
+ "scores": None,
96
+ "completed": False,
97
+ **(initial_context or {})
98
+ }
99
+
100
+ def _build_context(self) -> AgentContext:
101
+ """Build agent context from blackboard."""
102
+ return AgentContext(
103
+ goal=self.blackboard.get("goal", ""),
104
+ current_circuit=self.blackboard.get("current_circuit"),
105
+ history=self.blackboard.get("history", []),
106
+ constraints=self.blackboard.get("constraints", {}),
107
+ shared_data=self.blackboard
108
+ )
109
+
110
+ def _find_active_agent(self, context: AgentContext) -> Optional[BaseAgent]:
111
+ """Find an agent that can handle the current state."""
112
+ # Priority order for agent selection - simplified for reliability
113
+ # First: generate circuit, then validate
114
+ priority_order = ["builder", "architect", "validator"]
115
+
116
+ for agent_id in priority_order:
117
+ agent = self.agents.get(agent_id)
118
+ if agent and agent.can_handle(context):
119
+ if agent.state == AgentState.IDLE:
120
+ return agent
121
+
122
+ return None
123
+
124
+ def _update_blackboard(self, agent_id: str, result: AgentResult):
125
+ """Update blackboard with agent results."""
126
+ if not result.success:
127
+ return
128
+
129
+ data = result.data
130
+ if isinstance(data, dict):
131
+ # Extract QASM if present
132
+ if "qasm" in data:
133
+ qasm = data["qasm"]
134
+ # Handle list responses
135
+ if isinstance(qasm, list):
136
+ qasm = qasm[0] if qasm else None
137
+ self.blackboard["current_circuit"] = qasm
138
+
139
+ # Update validation status
140
+ if "valid" in data:
141
+ self.blackboard["validation_passed"] = data["valid"]
142
+
143
+ # Update scores
144
+ if "score" in data:
145
+ self.blackboard["scores"] = data["score"]
146
+
147
+ # Track history
148
+ if "history" not in self.blackboard:
149
+ self.blackboard["history"] = []
150
+ self.blackboard["history"].append({
151
+ "agent": agent_id,
152
+ "action": result.actions_taken,
153
+ "success": result.success,
154
+ "timestamp": datetime.now().isoformat()
155
+ })
156
+
157
+ def _check_completion(self) -> bool:
158
+ """Check if the goal has been achieved."""
159
+ # Simple completion: we have a validated circuit
160
+ has_circuit = self.blackboard.get("current_circuit") is not None
161
+ is_validated = self.blackboard.get("validation_passed", False)
162
+ return has_circuit and is_validated
163
+
164
+ def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
165
+ """Run blackboard orchestration."""
166
+ start_time = time.perf_counter()
167
+
168
+ self.log("INFO", f"Starting blackboard orchestration for: {goal}")
169
+ self._reset_blackboard(goal, initial_context)
170
+
171
+ # Ensure we have agents
172
+ if not self.agents:
173
+ self.agents = create_all_agents()
174
+
175
+ agent_results = {}
176
+ steps_completed = 0
177
+ errors = []
178
+
179
+ for iteration in range(self.max_iterations):
180
+ context = self._build_context()
181
+
182
+ # Find an agent that can work
183
+ agent = self._find_active_agent(context)
184
+
185
+ if agent is None:
186
+ self.log("INFO", "No active agent found, checking completion")
187
+ if self._check_completion():
188
+ break
189
+ # No agent and not complete - might be stuck
190
+ if iteration > 5: # Give it a few tries
191
+ errors.append("No agent could make progress")
192
+ break
193
+ continue
194
+
195
+ self.log("INFO", f"Activating agent: {agent.agent_id}")
196
+
197
+ # Agent decides and executes - with null safety
198
+ try:
199
+ action = agent.decide(context)
200
+ if action is None:
201
+ self.log("WARN", f"Agent {agent.agent_id} returned no action, continuing")
202
+ agent.reset()
203
+ continue
204
+
205
+ result = agent.execute(action, context)
206
+ if result is None:
207
+ self.log("WARN", f"Agent {agent.agent_id} returned no result, continuing")
208
+ agent.reset()
209
+ continue
210
+
211
+ agent_results[agent.agent_id] = result
212
+ steps_completed += 1
213
+
214
+ # Update blackboard
215
+ self._update_blackboard(agent.agent_id, result)
216
+
217
+ except Exception as e:
218
+ self.log("ERROR", f"Agent {agent.agent_id} failed: {e}")
219
+ errors.append(f"Agent {agent.agent_id} error: {str(e)}")
220
+ agent.reset()
221
+ continue
222
+
223
+ # Reset agent for next potential activation
224
+ agent.reset()
225
+
226
+ # Check completion
227
+ if self._check_completion():
228
+ self.log("INFO", "Goal achieved!")
229
+ break
230
+
231
+ elapsed = (time.perf_counter() - start_time) * 1000
232
+
233
+ return OrchestratorResult(
234
+ success=self._check_completion(),
235
+ final_output=self.blackboard.get("current_circuit"),
236
+ execution_time_ms=elapsed,
237
+ steps_completed=steps_completed,
238
+ total_steps=self.max_iterations,
239
+ agent_results=agent_results,
240
+ errors=errors
241
+ )
242
+
243
+
244
+ class GuidedOrchestrator(BaseOrchestrator):
245
+ """
246
+ Guided (Strict) Orchestrator.
247
+
248
+ Follows a predefined workflow with explicit steps.
249
+ Central control over agent execution order.
250
+ Predictable, auditable execution path.
251
+ """
252
+
253
+ def __init__(self, workflow_name: str = "build"):
254
+ super().__init__("guided")
255
+ self.workflow = get_workflow(workflow_name)
256
+ if self.workflow is None:
257
+ raise ValueError(f"Unknown workflow: {workflow_name}")
258
+ self.execution: Optional[WorkflowExecution] = None
259
+
260
+ def set_workflow(self, workflow_name: str):
261
+ """Change the workflow."""
262
+ self.workflow = get_workflow(workflow_name)
263
+ if self.workflow is None:
264
+ raise ValueError(f"Unknown workflow: {workflow_name}")
265
+
266
+ def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
267
+ """Run guided workflow orchestration."""
268
+ start_time = time.perf_counter()
269
+
270
+ # Ensure goal is a string
271
+ if isinstance(goal, list):
272
+ goal = goal[0] if goal else ""
273
+ goal = str(goal) if goal else ""
274
+
275
+ self.log("INFO", f"Starting guided workflow '{self.workflow.name}' for: {goal}")
276
+
277
+ # Initialize execution state
278
+ self.execution = WorkflowExecution(
279
+ workflow=self.workflow,
280
+ context={"goal": goal, **(initial_context or {})}
281
+ )
282
+ self.execution.status = WorkflowStatus.IN_PROGRESS
283
+
284
+ # Ensure we have agents
285
+ if not self.agents:
286
+ self.agents = create_all_agents()
287
+
288
+ agent_results = {}
289
+
290
+ # Execute each step in order
291
+ while self.execution.current_step is not None:
292
+ step = self.execution.current_step
293
+ self.log("INFO", f"Executing step: {step.name} ({step.agent_type})")
294
+
295
+ # Get the agent for this step
296
+ agent = self.agents.get(step.agent_type)
297
+ if agent is None:
298
+ if step.required:
299
+ self.execution.fail(f"Missing agent: {step.agent_type}")
300
+ break
301
+ else:
302
+ self.log("WARN", f"Skipping optional step: {step.name}")
303
+ self.execution.advance()
304
+ continue
305
+
306
+ # Build context for agent
307
+ context = AgentContext(
308
+ goal=self.execution.context.get("goal", ""),
309
+ current_circuit=self.execution.context.get("circuit_qasm"),
310
+ history=[],
311
+ constraints={},
312
+ shared_data=self.execution.context
313
+ )
314
+
315
+ # Agent decides and executes
316
+ action = agent.decide(context)
317
+ if action is None:
318
+ # Agent has nothing to do - might be okay for some steps
319
+ self.log("WARN", f"Agent {step.agent_type} returned no action")
320
+ self.execution.advance()
321
+ continue
322
+
323
+ result = agent.execute(action, context)
324
+ agent_results[step.name] = result
325
+
326
+ # Store outputs in execution context
327
+ if result.success and result.data:
328
+ for output_key in step.outputs:
329
+ if isinstance(result.data, dict):
330
+ if output_key in result.data:
331
+ self.execution.context[output_key] = result.data[output_key]
332
+ elif "qasm" in result.data:
333
+ qasm = result.data["qasm"]
334
+ # Handle list responses
335
+ if isinstance(qasm, list):
336
+ qasm = qasm[0] if qasm else None
337
+ self.execution.context["circuit_qasm"] = qasm
338
+
339
+ # Handle failure
340
+ if not result.success and step.required:
341
+ self.execution.fail(f"Step {step.name} failed: {result.message}")
342
+ break
343
+
344
+ # Reset agent and advance
345
+ agent.reset()
346
+ self.execution.advance()
347
+
348
+ elapsed = (time.perf_counter() - start_time) * 1000
349
+
350
+ return OrchestratorResult(
351
+ success=self.execution.status == WorkflowStatus.COMPLETED,
352
+ final_output=self.execution.context.get(self.workflow.final_output),
353
+ execution_time_ms=elapsed,
354
+ steps_completed=self.execution.current_step_index,
355
+ total_steps=len(self.workflow.steps),
356
+ agent_results=agent_results,
357
+ errors=self.execution.errors
358
+ )
359
+
360
+
361
+ class NakedOrchestrator(BaseOrchestrator):
362
+ """
363
+ Naked (Baseline) Orchestrator.
364
+
365
+ Direct LLM-to-QASM generation with single call.
366
+ No multi-agent coordination, no structured workflow.
367
+ Uses ONE LLM call per problem for baseline comparison.
368
+
369
+ Purpose: Measure raw LLM capability at quantum circuit generation
370
+ without agentic overhead.
371
+ """
372
+
373
+ def __init__(self):
374
+ super().__init__("naked")
375
+ self._llm = None
376
+
377
+ def _get_llm(self):
378
+ """Lazy load LLM adapter."""
379
+ if self._llm is None:
380
+ from agents.llm_adapter import get_llm_adapter
381
+ from config import config
382
+ self._llm = get_llm_adapter(
383
+ provider="gemini",
384
+ api_key=config.llm.api_key,
385
+ enable_fallback=True
386
+ )
387
+ return self._llm
388
+
389
+ def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
390
+ """
391
+ Run naked LLM execution - ONE LLM call per problem.
392
+
393
+ This is the baseline test: can a single LLM call generate
394
+ valid QASM for a quantum computing problem?
395
+ """
396
+ start_time = time.perf_counter()
397
+
398
+ # Ensure goal is a string
399
+ if isinstance(goal, list):
400
+ goal = goal[0] if goal else ""
401
+ goal = str(goal) if goal else ""
402
+
403
+ self.log("INFO", f"Starting naked LLM execution for: {goal}")
404
+
405
+ from tools import invoke_tool
406
+
407
+ errors = []
408
+ circuit_qasm = None
409
+ llm_requests = 0
410
+ tokens_used = 0
411
+
412
+ # System prompt for direct QASM generation
413
+ system_prompt = """You are an expert quantum computing engineer.
414
+ Your task is to generate valid OpenQASM 2.0 code for the given quantum circuit problem.
415
+
416
+ RULES:
417
+ 1. Output ONLY valid OpenQASM 2.0 code
418
+ 2. Start with: OPENQASM 2.0; include "qelib1.inc";
419
+ 3. Declare qubits with: qreg q[N];
420
+ 4. Declare classical bits with: creg c[N];
421
+ 5. Use standard gates: h, x, y, z, cx, cz, ccx, swap, t, s, rx, ry, rz
422
+ 6. Add measurements with: measure q[i] -> c[i];
423
+ 7. NO explanations, NO markdown, ONLY QASM code
424
+
425
+ EXAMPLE OUTPUT:
426
+ OPENQASM 2.0;
427
+ include "qelib1.inc";
428
+ qreg q[2];
429
+ creg c[2];
430
+ h q[0];
431
+ cx q[0], q[1];
432
+ measure q[0] -> c[0];
433
+ measure q[1] -> c[1];
434
+ """
435
+
436
+ user_prompt = f"""Generate the OpenQASM 2.0 code for this quantum circuit problem:
437
+
438
+ {goal}
439
+
440
+ Output ONLY the QASM code, nothing else."""
441
+
442
+ try:
443
+ # Single LLM call - the naked baseline test
444
+ llm = self._get_llm()
445
+ response = llm.generate(
446
+ messages=[
447
+ {"role": "system", "content": system_prompt},
448
+ {"role": "user", "content": user_prompt}
449
+ ],
450
+ temperature=0.1, # Low temperature for deterministic output
451
+ max_tokens=1000
452
+ )
453
+ llm_requests = 1
454
+ tokens_used = response.tokens_used
455
+
456
+ # Extract QASM from response
457
+ raw_output = response.text.strip()
458
+
459
+ # Clean up common LLM artifacts
460
+ if "```" in raw_output:
461
+ # Extract from code block
462
+ lines = raw_output.split("\n")
463
+ in_block = False
464
+ qasm_lines = []
465
+ for line in lines:
466
+ if line.strip().startswith("```"):
467
+ if in_block:
468
+ break
469
+ in_block = True
470
+ continue
471
+ if in_block:
472
+ qasm_lines.append(line)
473
+ raw_output = "\n".join(qasm_lines)
474
+
475
+ # Ensure it starts with OPENQASM declaration
476
+ if "OPENQASM" in raw_output:
477
+ # Find the start of QASM
478
+ idx = raw_output.find("OPENQASM")
479
+ circuit_qasm = raw_output[idx:]
480
+ else:
481
+ # Try to use as-is if it looks like QASM
482
+ if "qreg" in raw_output or "include" in raw_output:
483
+ circuit_qasm = "OPENQASM 2.0;\ninclude \"qelib1.inc\";\n" + raw_output
484
+ else:
485
+ errors.append(f"LLM did not produce valid QASM: {raw_output[:100]}")
486
+
487
+ # Validate the generated QASM
488
+ if circuit_qasm:
489
+ validation = invoke_tool("validate_syntax", qasm=circuit_qasm)
490
+ if not validation.get("success") or not validation.get("valid", False):
491
+ error_msg = validation.get("error", "Unknown validation error")
492
+ errors.append(f"QASM validation failed: {error_msg}")
493
+ # Still keep the circuit for analysis
494
+ self.log("WARN", f"Generated QASM failed validation: {error_msg}")
495
+
496
+ except Exception as e:
497
+ errors.append(str(e))
498
+ self.log("ERROR", f"Naked LLM execution failed: {e}")
499
+
500
+ elapsed = (time.perf_counter() - start_time) * 1000
501
+
502
+ # Create a simple AgentResult-like dict for compatibility
503
+ from agents import AgentResult
504
+ naked_result = AgentResult(
505
+ success=circuit_qasm is not None and len(errors) == 0,
506
+ data={
507
+ "qasm": circuit_qasm,
508
+ "llm_requests": llm_requests,
509
+ "tokens_used": tokens_used
510
+ },
511
+ message=f"Generated QASM via naked LLM ({llm_requests} request, {tokens_used} tokens)"
512
+ )
513
+
514
+ return OrchestratorResult(
515
+ success=circuit_qasm is not None and len(errors) == 0,
516
+ final_output=circuit_qasm,
517
+ execution_time_ms=elapsed,
518
+ steps_completed=1 if llm_requests > 0 else 0,
519
+ total_steps=1,
520
+ agent_results={"naked_llm": naked_result},
521
+ errors=errors
522
+ )
523
+
524
+
525
+ # Factory function
526
+ def create_orchestrator(mode: str) -> BaseOrchestrator:
527
+ """Create an orchestrator based on mode."""
528
+ if mode == "blackboard":
529
+ return BlackboardOrchestrator()
530
+ elif mode == "guided":
531
+ return GuidedOrchestrator()
532
+ elif mode == "naked":
533
+ return NakedOrchestrator()
534
+ elif mode == "quasar":
535
+ from .quasar_orchestrator import QuasarOrchestrator
536
+ return QuasarOrchestrator()
537
+ elif mode == "hybrid":
538
+ from .quasar_orchestrator import HybridOrchestrator
539
+ return HybridOrchestrator()
540
+ else:
541
+ raise ValueError(f"Unknown mode: {mode}. Use 'blackboard', 'guided', 'naked', 'quasar', or 'hybrid'")
orchestrators/quasar_orchestrator.py ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/orchestrators/quasar_orchestrator.py
2
+ # Relations: Uses agents/llm_adapter.py, tools/quantum_tools.py, client/mcp_client.py
3
+ # Description: QUASAR-lite orchestrator implementing Tool-Augmented LLM with hierarchical rewards
4
+ """
5
+ QUASAR-Lite Orchestrator: Tool-Augmented LLM with Hierarchical Verification
6
+
7
+ Based on the QUASAR framework (2025) for quantum circuit generation:
8
+ - Tier 1: Syntax validation (compile check)
9
+ - Tier 2: Semantic validation (unitarity, qubit count)
10
+ - Tier 3: Correctness validation (expected states)
11
+ - Tier 4: Optimization (depth/gate count)
12
+
13
+ Key Innovation: LLM generates → Tool validates → Feedback loop until success
14
+ """
15
+
16
+ from dataclasses import dataclass, field
17
+ from typing import Dict, List, Any, Optional
18
+ from datetime import datetime
19
+ import logging
20
+ import time
21
+ import re
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class ValidationTier:
28
+ """Result from a validation tier."""
29
+ tier: int
30
+ name: str
31
+ passed: bool
32
+ message: str
33
+ details: Dict[str, Any] = field(default_factory=dict)
34
+
35
+
36
+ @dataclass
37
+ class QuasarResult:
38
+ """Result from QUASAR orchestration."""
39
+ success: bool
40
+ final_qasm: Optional[str]
41
+ execution_time_ms: float
42
+ llm_calls: int
43
+ tokens_used: int
44
+ tiers_passed: List[int]
45
+ validation_history: List[ValidationTier] = field(default_factory=list)
46
+ errors: List[str] = field(default_factory=list)
47
+ iterations: int = 0
48
+
49
+ @property
50
+ def final_output(self) -> Optional[str]:
51
+ """Alias for compatibility with OrchestratorResult."""
52
+ return self.final_qasm
53
+ class QuasarOrchestrator:
54
+ """
55
+ QUASAR-Lite: Tool-Augmented LLM for Quantum Circuit Generation
56
+
57
+ Key differences from NAKED mode:
58
+ 1. Validates after each generation attempt
59
+ 2. Provides error feedback to LLM for self-correction
60
+ 3. Uses hierarchical reward tiers
61
+ 4. Supports circuit partitioning for complex problems
62
+
63
+ Key differences from GUIDED mode:
64
+ 1. Single LLM with tool access (not multi-agent)
65
+ 2. External validation (not self-reflection)
66
+ 3. Iterative refinement with ground-truth feedback
67
+ """
68
+
69
+ def __init__(self, max_iterations: int = 3):
70
+ self.max_iterations = max_iterations
71
+ self._llm = None
72
+ self._mcp_client = None
73
+
74
+ def _get_llm(self):
75
+ """Lazy load LLM adapter."""
76
+ if self._llm is None:
77
+ from agents.llm_adapter import get_llm_adapter
78
+ from config import config
79
+ self._llm = get_llm_adapter(
80
+ provider="gemini",
81
+ api_key=config.llm.api_key,
82
+ enable_fallback=True
83
+ )
84
+ return self._llm
85
+
86
+ def _get_mcp(self):
87
+ """Lazy load MCP client for validation."""
88
+ if self._mcp_client is None:
89
+ from client.mcp_client import get_client
90
+ self._mcp_client = get_client()
91
+ return self._mcp_client
92
+
93
+ def _extract_qasm(self, text: str) -> Optional[str]:
94
+ """Extract QASM code from LLM response."""
95
+ if not text:
96
+ return None
97
+
98
+ # Clean up common LLM artifacts
99
+ if "```" in text:
100
+ lines = text.split("\n")
101
+ in_block = False
102
+ qasm_lines = []
103
+ for line in lines:
104
+ if line.strip().startswith("```"):
105
+ if in_block:
106
+ break
107
+ in_block = True
108
+ continue
109
+ if in_block:
110
+ qasm_lines.append(line)
111
+ text = "\n".join(qasm_lines)
112
+
113
+ # Find OPENQASM declaration
114
+ if "OPENQASM" in text:
115
+ idx = text.find("OPENQASM")
116
+ return text[idx:].strip()
117
+
118
+ # Try to construct valid QASM
119
+ if "qreg" in text or "include" in text:
120
+ return "OPENQASM 2.0;\ninclude \"qelib1.inc\";\n" + text.strip()
121
+
122
+ return None
123
+
124
+ def _validate_tier1_syntax(self, qasm: str) -> ValidationTier:
125
+ """Tier 1: Syntax validation - does it compile?"""
126
+ try:
127
+ mcp = self._get_mcp()
128
+ result = mcp.validate_syntax(qasm)
129
+
130
+ if result.success and result.data:
131
+ is_valid = result.data.get("valid", False)
132
+ errors = result.data.get("errors", [])
133
+
134
+ if is_valid:
135
+ return ValidationTier(
136
+ tier=1, name="Syntax", passed=True,
137
+ message="QASM syntax is valid",
138
+ details={"valid": True}
139
+ )
140
+ else:
141
+ return ValidationTier(
142
+ tier=1, name="Syntax", passed=False,
143
+ message=f"Syntax errors: {errors}",
144
+ details={"errors": errors}
145
+ )
146
+
147
+ return ValidationTier(
148
+ tier=1, name="Syntax", passed=False,
149
+ message="Validation failed",
150
+ details={"error": "MCP validation failed"}
151
+ )
152
+
153
+ except Exception as e:
154
+ # Fallback: basic regex validation
155
+ has_header = "OPENQASM" in qasm and "include" in qasm
156
+ has_qreg = "qreg" in qasm
157
+ has_creg = "creg" in qasm
158
+
159
+ if has_header and has_qreg:
160
+ return ValidationTier(
161
+ tier=1, name="Syntax", passed=True,
162
+ message="Basic syntax check passed (fallback)",
163
+ details={"fallback": True}
164
+ )
165
+ return ValidationTier(
166
+ tier=1, name="Syntax", passed=False,
167
+ message=f"Basic syntax check failed: {e}",
168
+ details={"error": str(e)}
169
+ )
170
+
171
+ def _validate_tier2_semantic(self, qasm: str, expected_qubits: int = None) -> ValidationTier:
172
+ """Tier 2: Semantic validation - qubit count, gate validity."""
173
+ try:
174
+ mcp = self._get_mcp()
175
+ result = mcp.analyze_circuit(qasm)
176
+
177
+ if result.success and result.data:
178
+ num_qubits = result.data.get("num_qubits", 0)
179
+ gate_count = result.data.get("gate_count", 0)
180
+
181
+ issues = []
182
+
183
+ # Check qubit count if expected
184
+ if expected_qubits and num_qubits != expected_qubits:
185
+ issues.append(f"Expected {expected_qubits} qubits, got {num_qubits}")
186
+
187
+ # Check for at least one gate
188
+ if gate_count == 0:
189
+ issues.append("No gates in circuit")
190
+
191
+ if issues:
192
+ return ValidationTier(
193
+ tier=2, name="Semantic", passed=False,
194
+ message="; ".join(issues),
195
+ details={"num_qubits": num_qubits, "gate_count": gate_count}
196
+ )
197
+
198
+ return ValidationTier(
199
+ tier=2, name="Semantic", passed=True,
200
+ message=f"Valid circuit: {num_qubits} qubits, {gate_count} gates",
201
+ details={"num_qubits": num_qubits, "gate_count": gate_count}
202
+ )
203
+
204
+ except Exception as e:
205
+ # Fallback: regex-based analysis
206
+ qreg_match = re.search(r'qreg\s+\w+\[(\d+)\]', qasm)
207
+ num_qubits = int(qreg_match.group(1)) if qreg_match else 0
208
+
209
+ gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|cy|swap|ccx|rz|rx|ry)\b'
210
+ gates = re.findall(gate_pattern, qasm, re.IGNORECASE)
211
+
212
+ return ValidationTier(
213
+ tier=2, name="Semantic", passed=len(gates) > 0,
214
+ message=f"Fallback analysis: {num_qubits} qubits, {len(gates)} gates",
215
+ details={"fallback": True, "num_qubits": num_qubits, "gate_count": len(gates)}
216
+ )
217
+
218
+ def _validate_tier3_correctness(self, qasm: str, expected_states: Dict[str, float] = None) -> ValidationTier:
219
+ """Tier 3: Correctness validation - expected output states."""
220
+ if not expected_states:
221
+ return ValidationTier(
222
+ tier=3, name="Correctness", passed=True,
223
+ message="No expected states specified, skipping",
224
+ details={"skipped": True}
225
+ )
226
+
227
+ try:
228
+ mcp = self._get_mcp()
229
+ result = mcp.simulate_circuit(qasm, shots=1024)
230
+
231
+ if result.success and result.data:
232
+ probs = result.data.get("probabilities", {})
233
+
234
+ # Check if expected states match
235
+ tolerance = 0.15
236
+ matches = []
237
+ mismatches = []
238
+
239
+ for state, expected_prob in expected_states.items():
240
+ actual_prob = probs.get(state, 0.0)
241
+ if abs(actual_prob - expected_prob) <= tolerance:
242
+ matches.append(f"|{state}⟩: {actual_prob:.3f} ≈ {expected_prob}")
243
+ else:
244
+ mismatches.append(f"|{state}⟩: got {actual_prob:.3f}, expected {expected_prob}")
245
+
246
+ if mismatches:
247
+ return ValidationTier(
248
+ tier=3, name="Correctness", passed=False,
249
+ message=f"State mismatches: {mismatches}",
250
+ details={"expected": expected_states, "actual": probs}
251
+ )
252
+
253
+ return ValidationTier(
254
+ tier=3, name="Correctness", passed=True,
255
+ message=f"States match: {matches}",
256
+ details={"matches": matches}
257
+ )
258
+
259
+ except Exception as e:
260
+ return ValidationTier(
261
+ tier=3, name="Correctness", passed=False,
262
+ message=f"Simulation failed: {e}",
263
+ details={"error": str(e)}
264
+ )
265
+
266
+ def _validate_tier4_optimization(self, qasm: str, max_depth: int = None) -> ValidationTier:
267
+ """Tier 4: Optimization - circuit depth and gate count."""
268
+ try:
269
+ mcp = self._get_mcp()
270
+ result = mcp.analyze_circuit(qasm)
271
+
272
+ if result.success and result.data:
273
+ depth = result.data.get("depth", 0)
274
+ gate_count = result.data.get("gate_count", 0)
275
+ cx_count = result.data.get("cx_count", 0)
276
+
277
+ details = {"depth": depth, "gate_count": gate_count, "cx_count": cx_count}
278
+
279
+ if max_depth and depth > max_depth:
280
+ return ValidationTier(
281
+ tier=4, name="Optimization", passed=False,
282
+ message=f"Depth {depth} exceeds max {max_depth}",
283
+ details=details
284
+ )
285
+
286
+ return ValidationTier(
287
+ tier=4, name="Optimization", passed=True,
288
+ message=f"Depth: {depth}, Gates: {gate_count}, CX: {cx_count}",
289
+ details=details
290
+ )
291
+
292
+ except Exception as e:
293
+ return ValidationTier(
294
+ tier=4, name="Optimization", passed=True,
295
+ message=f"Optimization check skipped: {e}",
296
+ details={"error": str(e)}
297
+ )
298
+
299
+ def _build_feedback_prompt(self, goal: str, previous_qasm: str,
300
+ failed_tier: ValidationTier, iteration: int) -> str:
301
+ """Build prompt with feedback for LLM self-correction."""
302
+ return f"""Your previous attempt to generate a quantum circuit had an error.
303
+
304
+ ORIGINAL TASK:
305
+ {goal}
306
+
307
+ YOUR PREVIOUS OUTPUT:
308
+ ```qasm
309
+ {previous_qasm or "(no valid QASM generated)"}
310
+ ```
311
+
312
+ VALIDATION ERROR (Tier {failed_tier.tier} - {failed_tier.name}):
313
+ {failed_tier.message}
314
+
315
+ Details: {failed_tier.details}
316
+
317
+ INSTRUCTIONS:
318
+ 1. Analyze the error carefully
319
+ 2. Fix the issue in your QASM code
320
+ 3. Output ONLY valid OpenQASM 2.0 code
321
+ 4. Start with: OPENQASM 2.0; include "qelib1.inc";
322
+
323
+ Generate the CORRECTED QASM code:"""
324
+
325
+ def _build_initial_prompt(self, goal: str, expected_qubits: int = None,
326
+ expected_states: Dict[str, float] = None) -> str:
327
+ """Build the initial generation prompt."""
328
+ constraints = []
329
+ if expected_qubits:
330
+ constraints.append(f"- Use exactly {expected_qubits} qubit(s)")
331
+ if expected_states:
332
+ states_str = ", ".join([f"|{s}⟩: {p}" for s, p in expected_states.items()])
333
+ constraints.append(f"- Expected measurement probabilities: {states_str}")
334
+
335
+ constraints_section = "\n".join(constraints) if constraints else "- No specific constraints"
336
+
337
+ return f"""Generate a quantum circuit for the following task:
338
+
339
+ TASK:
340
+ {goal}
341
+
342
+ CONSTRAINTS:
343
+ {constraints_section}
344
+
345
+ RULES:
346
+ 1. Output ONLY valid OpenQASM 2.0 code
347
+ 2. Start with: OPENQASM 2.0; include "qelib1.inc";
348
+ 3. Declare qubits with: qreg q[N];
349
+ 4. Declare classical bits with: creg c[N];
350
+ 5. Use standard gates: h, x, y, z, cx, cz, ccx, swap, t, s, rx, ry, rz
351
+ 6. Add measurements with: measure q[i] -> c[i];
352
+ 7. NO explanations, NO markdown, ONLY QASM code
353
+
354
+ Generate the OpenQASM 2.0 circuit:"""
355
+
356
+ def run(self, goal: str,
357
+ expected_qubits: int = None,
358
+ expected_states: Dict[str, float] = None,
359
+ max_depth: int = None) -> QuasarResult:
360
+ """
361
+ Run QUASAR-lite orchestration with hierarchical validation.
362
+
363
+ Args:
364
+ goal: The problem description
365
+ expected_qubits: Expected number of qubits (for Tier 2)
366
+ expected_states: Expected output states (for Tier 3)
367
+ max_depth: Maximum circuit depth (for Tier 4)
368
+
369
+ Returns:
370
+ QuasarResult with final QASM and validation history
371
+ """
372
+ start_time = time.perf_counter()
373
+
374
+ llm = self._get_llm()
375
+ llm_calls = 0
376
+ tokens_used = 0
377
+ validation_history = []
378
+ errors = []
379
+ current_qasm = None
380
+ tiers_passed = []
381
+
382
+ system_prompt = """You are an expert quantum computing engineer.
383
+ Your task is to generate valid OpenQASM 2.0 code for quantum circuits.
384
+ You will receive feedback if your code has errors and must correct them.
385
+ Always output ONLY valid QASM code, no explanations."""
386
+
387
+ # Initial prompt
388
+ user_prompt = self._build_initial_prompt(goal, expected_qubits, expected_states)
389
+
390
+ for iteration in range(self.max_iterations):
391
+ # Generate QASM
392
+ try:
393
+ response = llm.generate(
394
+ messages=[
395
+ {"role": "system", "content": system_prompt},
396
+ {"role": "user", "content": user_prompt}
397
+ ],
398
+ temperature=0.1 + (iteration * 0.1), # Increase temperature on retries
399
+ max_tokens=1500
400
+ )
401
+ llm_calls += 1
402
+ tokens_used += response.tokens_used
403
+
404
+ current_qasm = self._extract_qasm(response.text)
405
+
406
+ if not current_qasm:
407
+ errors.append(f"Iteration {iteration+1}: Failed to extract QASM")
408
+ user_prompt = self._build_feedback_prompt(
409
+ goal, response.text,
410
+ ValidationTier(0, "Extraction", False, "No valid QASM found in response"),
411
+ iteration
412
+ )
413
+ continue
414
+
415
+ except KeyboardInterrupt:
416
+ raise # Re-raise keyboard interrupt
417
+ except Exception as e:
418
+ errors.append(f"Iteration {iteration+1}: LLM error - {e}")
419
+ logger.error(f"QUASAR LLM error: {e}")
420
+ # Don't continue retrying on LLM errors, they'll likely fail again
421
+ break
422
+
423
+ # Run hierarchical validation
424
+ all_passed = True
425
+ tiers_passed = []
426
+
427
+ # Tier 1: Syntax
428
+ tier1 = self._validate_tier1_syntax(current_qasm)
429
+ validation_history.append(tier1)
430
+ if not tier1.passed:
431
+ all_passed = False
432
+ user_prompt = self._build_feedback_prompt(goal, current_qasm, tier1, iteration)
433
+ continue
434
+ tiers_passed.append(1)
435
+
436
+ # Tier 2: Semantic
437
+ tier2 = self._validate_tier2_semantic(current_qasm, expected_qubits)
438
+ validation_history.append(tier2)
439
+ if not tier2.passed:
440
+ all_passed = False
441
+ user_prompt = self._build_feedback_prompt(goal, current_qasm, tier2, iteration)
442
+ continue
443
+ tiers_passed.append(2)
444
+
445
+ # Tier 3: Correctness (if expected states provided)
446
+ if expected_states:
447
+ tier3 = self._validate_tier3_correctness(current_qasm, expected_states)
448
+ validation_history.append(tier3)
449
+ if not tier3.passed:
450
+ all_passed = False
451
+ user_prompt = self._build_feedback_prompt(goal, current_qasm, tier3, iteration)
452
+ continue
453
+ tiers_passed.append(3)
454
+
455
+ # Tier 4: Optimization (informational, doesn't fail)
456
+ tier4 = self._validate_tier4_optimization(current_qasm, max_depth)
457
+ validation_history.append(tier4)
458
+ if tier4.passed:
459
+ tiers_passed.append(4)
460
+
461
+ # All validations passed!
462
+ if all_passed:
463
+ elapsed = (time.perf_counter() - start_time) * 1000
464
+ return QuasarResult(
465
+ success=True,
466
+ final_qasm=current_qasm,
467
+ execution_time_ms=elapsed,
468
+ llm_calls=llm_calls,
469
+ tokens_used=tokens_used,
470
+ tiers_passed=tiers_passed,
471
+ validation_history=validation_history,
472
+ errors=errors,
473
+ iterations=iteration + 1
474
+ )
475
+
476
+ # Max iterations reached
477
+ elapsed = (time.perf_counter() - start_time) * 1000
478
+ return QuasarResult(
479
+ success=current_qasm is not None and len(tiers_passed) >= 2,
480
+ final_qasm=current_qasm,
481
+ execution_time_ms=elapsed,
482
+ llm_calls=llm_calls,
483
+ tokens_used=tokens_used,
484
+ tiers_passed=tiers_passed,
485
+ validation_history=validation_history,
486
+ errors=errors,
487
+ iterations=self.max_iterations
488
+ )
489
+
490
+
491
+ class HybridOrchestrator:
492
+ """
493
+ Hybrid Orchestrator: NAKED speed + QUASAR reliability
494
+
495
+ Strategy:
496
+ 1. Try NAKED mode first (fast, cheap)
497
+ 2. If NAKED fails validation, fall back to QUASAR (reliable, more expensive)
498
+
499
+ This gives best of both worlds:
500
+ - Easy problems: solved in 1 LLM call via NAKED
501
+ - Hard problems: solved via QUASAR with feedback loops
502
+ """
503
+
504
+ def __init__(self):
505
+ self._naked = None
506
+ self._quasar = None
507
+
508
+ def _get_naked(self):
509
+ """Lazy load NAKED orchestrator."""
510
+ if self._naked is None:
511
+ from orchestrators.orchestrator import NakedOrchestrator
512
+ self._naked = NakedOrchestrator()
513
+ return self._naked
514
+
515
+ def _get_quasar(self):
516
+ """Lazy load QUASAR orchestrator."""
517
+ if self._quasar is None:
518
+ self._quasar = QuasarOrchestrator(max_iterations=3)
519
+ return self._quasar
520
+
521
+ def run(self, goal: str,
522
+ expected_qubits: int = None,
523
+ expected_states: Dict[str, float] = None,
524
+ max_depth: int = None) -> QuasarResult:
525
+ """
526
+ Run hybrid orchestration: NAKED first, QUASAR on failure.
527
+
528
+ Returns:
529
+ QuasarResult for compatibility with comprehensive tests
530
+ """
531
+ start_time = time.perf_counter()
532
+
533
+ # Step 1: Try NAKED mode
534
+ naked = self._get_naked()
535
+ naked_result = naked.run(goal)
536
+
537
+ if naked_result.success and naked_result.final_output:
538
+ # Validate NAKED output
539
+ quasar = self._get_quasar()
540
+ qasm = naked_result.final_output
541
+
542
+ tier1 = quasar._validate_tier1_syntax(qasm)
543
+ tier2 = quasar._validate_tier2_semantic(qasm, expected_qubits)
544
+
545
+ if tier1.passed and tier2.passed:
546
+ # NAKED succeeded!
547
+ elapsed = (time.perf_counter() - start_time) * 1000
548
+ return QuasarResult(
549
+ success=True,
550
+ final_qasm=qasm,
551
+ execution_time_ms=elapsed,
552
+ llm_calls=1,
553
+ tokens_used=naked_result.agent_results.get("naked_llm", {}).data.get("tokens_used", 0) if naked_result.agent_results else 0,
554
+ tiers_passed=[1, 2],
555
+ validation_history=[tier1, tier2],
556
+ errors=[],
557
+ iterations=1
558
+ )
559
+
560
+ # Step 2: NAKED failed, use QUASAR
561
+ logger.info(f"NAKED failed, falling back to QUASAR for: {goal[:50]}...")
562
+ quasar = self._get_quasar()
563
+ return quasar.run(goal, expected_qubits, expected_states, max_depth)
orchestrators/router.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/orchestrators/router.py
2
+ # Relations: Used by orchestrators/orchestrator.py, run_quality_eval.py
3
+ # Description: Difficulty-aware orchestrator selection based on problem complexity
4
+ # Routes easy problems to NAKED (fastest, best quality)
5
+ # Routes medium to NAKED+optimization, hard to GUIDED
6
+
7
+ """
8
+ Difficulty-Aware Router: Selects optimal orchestration mode based on problem complexity.
9
+
10
+ Based on quality evaluation findings:
11
+ - NAKED mode: Best for easy problems (47.9/100 quality, 3.7s)
12
+ - NAKED+Optimizer: Best for medium (post-generation refinement)
13
+ - GUIDED: For hard problems (agents may add value for complex algorithms)
14
+
15
+ This router balances quality, cost, and execution time.
16
+ """
17
+
18
+ from typing import Optional, Dict, Literal
19
+ from dataclasses import dataclass
20
+ from tests.test_problems import TestProblem, ProblemDifficulty
21
+
22
+
23
+ @dataclass
24
+ class RoutingDecision:
25
+ """Result of routing decision."""
26
+ mode: Literal["naked", "guided", "blackboard"]
27
+ reason: str
28
+ expected_quality: float
29
+ expected_llm_calls: int
30
+ expected_time_ms: int
31
+ use_optimizer: bool = False
32
+
33
+
34
+ class DifficultyAwareRouter:
35
+ """
36
+ Routes problems to optimal orchestrators based on difficulty and characteristics.
37
+
38
+ Strategy:
39
+ - EASY: Use NAKED (proven best)
40
+ - MEDIUM: Use NAKED + post-optimization
41
+ - HARD: Use GUIDED if agents help, NAKED+optimizer as fallback
42
+
43
+ Can be configured for experimentation.
44
+ """
45
+
46
+ # Routing configuration (can be tuned)
47
+ ROUTING_CONFIG = {
48
+ "easy": {
49
+ "primary_mode": "naked",
50
+ "use_optimizer": False,
51
+ "fallback_mode": "guided",
52
+ "expected_quality": 47.9,
53
+ "expected_llm_calls": 3,
54
+ "expected_time_ms": 3700,
55
+ },
56
+ "medium": {
57
+ "primary_mode": "naked",
58
+ "use_optimizer": True, # Add post-generation optimization
59
+ "fallback_mode": "guided",
60
+ "expected_quality": 50.0, # Estimated with optimizer
61
+ "expected_llm_calls": 3,
62
+ "expected_time_ms": 5000,
63
+ },
64
+ "hard": {
65
+ "primary_mode": "guided", # Agents might help for complex algorithms
66
+ "use_optimizer": True,
67
+ "fallback_mode": "naked",
68
+ "expected_quality": 55.0, # Estimated
69
+ "expected_llm_calls": 7,
70
+ "expected_time_ms": 25000,
71
+ }
72
+ }
73
+
74
+ @classmethod
75
+ def route(cls, problem: TestProblem,
76
+ prefer_naked: bool = False,
77
+ prefer_guided: bool = False) -> RoutingDecision:
78
+ """
79
+ Route a problem to the optimal orchestrator.
80
+
81
+ Args:
82
+ problem: The quantum circuit problem to solve
83
+ prefer_naked: Force NAKED mode (for testing)
84
+ prefer_guided: Force GUIDED mode (for testing)
85
+
86
+ Returns:
87
+ RoutingDecision with selected mode and metadata
88
+ """
89
+
90
+ # Handle overrides
91
+ if prefer_naked:
92
+ return cls._make_decision("naked", problem, "User override")
93
+ if prefer_guided:
94
+ return cls._make_decision("guided", problem, "User override")
95
+
96
+ # Get difficulty level
97
+ difficulty = problem.difficulty.value if hasattr(problem.difficulty, 'value') else str(problem.difficulty)
98
+
99
+ # Get routing config for difficulty
100
+ config = cls.ROUTING_CONFIG.get(difficulty)
101
+ if not config:
102
+ # Default to guided for unknown difficulties
103
+ return cls._make_decision("guided", problem, f"Unknown difficulty: {difficulty}")
104
+
105
+ # Route based on difficulty
106
+ return cls._make_decision(
107
+ config["primary_mode"],
108
+ problem,
109
+ f"Routed based on difficulty: {difficulty}",
110
+ use_optimizer=config.get("use_optimizer", False),
111
+ expected_quality=config["expected_quality"],
112
+ expected_llm_calls=config["expected_llm_calls"],
113
+ expected_time_ms=config["expected_time_ms"],
114
+ )
115
+
116
+ @classmethod
117
+ def route_batch(cls, problems: list) -> Dict[str, RoutingDecision]:
118
+ """Route multiple problems."""
119
+ return {p.id: cls.route(p) for p in problems}
120
+
121
+ @classmethod
122
+ def _make_decision(cls, mode: str, problem: TestProblem, reason: str,
123
+ use_optimizer: bool = False,
124
+ expected_quality: float = 45.0,
125
+ expected_llm_calls: int = 3,
126
+ expected_time_ms: int = 5000) -> RoutingDecision:
127
+ """Create a routing decision."""
128
+ return RoutingDecision(
129
+ mode=mode,
130
+ reason=reason,
131
+ expected_quality=expected_quality,
132
+ expected_llm_calls=expected_llm_calls,
133
+ expected_time_ms=expected_time_ms,
134
+ use_optimizer=use_optimizer,
135
+ )
136
+
137
+ @classmethod
138
+ def print_strategy(cls):
139
+ """Print routing strategy."""
140
+ print("\n" + "="*80)
141
+ print("DIFFICULTY-AWARE ROUTING STRATEGY")
142
+ print("="*80)
143
+
144
+ for difficulty in ["easy", "medium", "hard"]:
145
+ config = cls.ROUTING_CONFIG[difficulty]
146
+ print(f"\n{difficulty.upper()}:")
147
+ print(f" Primary Mode: {config['primary_mode']}")
148
+ print(f" Use Optimizer: {config['use_optimizer']}")
149
+ print(f" Fallback: {config['fallback_mode']}")
150
+ print(f" Expected Quality: {config['expected_quality']:.1f}/100")
151
+ print(f" Expected LLM Calls: {config['expected_llm_calls']}")
152
+ print(f" Expected Time: {config['expected_time_ms']}ms")
153
+
154
+ print("\n" + "="*80)
155
+
156
+
157
+ def select_orchestrator_mode(problem: TestProblem) -> str:
158
+ """
159
+ Convenience function: Get orchestrator mode for a problem.
160
+
161
+ Usage:
162
+ mode = select_orchestrator_mode(problem)
163
+ orchestrator = create_orchestrator(mode)
164
+ """
165
+ decision = DifficultyAwareRouter.route(problem)
166
+ return decision.mode
167
+
168
+
169
+ def should_use_optimizer(problem: TestProblem) -> bool:
170
+ """Check if optimization should be applied after generation."""
171
+ decision = DifficultyAwareRouter.route(problem)
172
+ return decision.use_optimizer
173
+
174
+
175
+ # Example usage
176
+ if __name__ == "__main__":
177
+ from tests.test_problems import EASY_PROBLEMS, MEDIUM_PROBLEMS, HARD_PROBLEMS
178
+
179
+ print("\nExample: Routing all problems")
180
+ print("-" * 80)
181
+
182
+ all_problems = EASY_PROBLEMS + MEDIUM_PROBLEMS + HARD_PROBLEMS
183
+
184
+ for problem in all_problems:
185
+ decision = DifficultyAwareRouter.route(problem)
186
+ print(f"{problem.id:15} -> {decision.mode:10} ({decision.reason})")
187
+
188
+ DifficultyAwareRouter.print_strategy()
prompts/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Prompts module: System prompts for all agents."""
2
+
3
+ from .agent_prompts import (
4
+ ARCHITECT_PROMPT,
5
+ BUILDER_PROMPT,
6
+ VALIDATOR_PROMPT,
7
+ OPTIMIZER_PROMPT,
8
+ ANALYZER_PROMPT,
9
+ SCORER_PROMPT,
10
+ COORDINATOR_PROMPT,
11
+ ALL_PROMPTS,
12
+ get_prompt
13
+ )
14
+
15
+ __all__ = [
16
+ "ARCHITECT_PROMPT",
17
+ "BUILDER_PROMPT",
18
+ "VALIDATOR_PROMPT",
19
+ "OPTIMIZER_PROMPT",
20
+ "ANALYZER_PROMPT",
21
+ "SCORER_PROMPT",
22
+ "COORDINATOR_PROMPT",
23
+ "ALL_PROMPTS",
24
+ "get_prompt"
25
+ ]
prompts/agent_prompts.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prompts Module: System prompts for all agents.
3
+ Each prompt defines the agent's behavior, constraints, and expertise.
4
+ """
5
+
6
+ # ============================================================
7
+ # ARCHITECT AGENT PROMPT
8
+ # ============================================================
9
+
10
+ ARCHITECT_PROMPT = """You are a Quantum Circuit Architect agent. Your role is to plan and design quantum circuits at a high level.
11
+
12
+ ## Your Responsibilities:
13
+ 1. Understand the user's goal and translate it into a circuit design plan
14
+ 2. Choose appropriate circuit templates or patterns
15
+ 3. Determine the number of qubits and overall structure needed
16
+ 4. Consider hardware constraints when planning
17
+
18
+ ## Your Tools:
19
+ - create_from_template: Use predefined templates (bell_state, ghz, qft, grover)
20
+ - generate_from_description: Create circuits from natural language
21
+ - analyze_circuit: Analyze existing circuits to understand their structure
22
+
23
+ ## Guidelines:
24
+ - Start simple - prefer smaller circuits when possible
25
+ - Consider the target hardware's qubit count and connectivity
26
+ - Break complex goals into simpler sub-circuits that can be composed
27
+ - Document your reasoning for the chosen approach
28
+
29
+ ## Output Format:
30
+ When you select a tool, explain your reasoning briefly. Focus on:
31
+ 1. Why this approach fits the goal
32
+ 2. What the expected circuit structure will be
33
+ 3. Any constraints or considerations for the next steps
34
+
35
+ Be concise and action-oriented. Your job is to get a working circuit started."""
36
+
37
+
38
+ # ============================================================
39
+ # BUILDER AGENT PROMPT
40
+ # ============================================================
41
+
42
+ BUILDER_PROMPT = """You are a Quantum Circuit Builder agent. Your role is to construct and modify quantum circuits.
43
+
44
+ ## Your Responsibilities:
45
+ 1. Build circuits based on architectural plans
46
+ 2. Compose multiple circuits together
47
+ 3. Apply circuit transformations (tensor, repeat)
48
+ 4. Ensure the circuit syntax is correct
49
+
50
+ ## Your Tools:
51
+ - create_from_template: Build from predefined templates
52
+ - generate_random_circuit: Create random circuits for testing
53
+ - generate_from_description: Build from natural language
54
+ - compose_circuits: Combine circuits sequentially
55
+ - tensor_circuits: Combine circuits in parallel
56
+ - repeat_circuit: Repeat a circuit pattern
57
+
58
+ ## Guidelines:
59
+ - Follow the architect's plan closely
60
+ - Use compose_circuits to chain operations
61
+ - Use tensor_circuits when operations should be parallel
62
+ - Start with simple building blocks and combine them
63
+ - Check that qubit counts match when composing
64
+
65
+ ## Output Format:
66
+ Produce valid OpenQASM 2.0 circuits. When using tools:
67
+ 1. Specify exact parameters
68
+ 2. Explain how this builds toward the goal
69
+ 3. Note any assumptions about qubit ordering"""
70
+
71
+
72
+ # ============================================================
73
+ # VALIDATOR AGENT PROMPT
74
+ # ============================================================
75
+
76
+ VALIDATOR_PROMPT = """You are a Quantum Circuit Validator agent. Your role is to ensure circuits are correct and executable.
77
+
78
+ ## Your Responsibilities:
79
+ 1. Validate circuit syntax
80
+ 2. Check hardware connectivity compliance
81
+ 3. Verify unitary correctness
82
+ 4. Report any issues clearly
83
+
84
+ ## Your Tools:
85
+ - validate_syntax: Check QASM syntax for errors
86
+ - check_connectivity: Verify circuit works on target hardware
87
+ - verify_unitary: Confirm circuit produces valid unitary
88
+
89
+ ## Validation Order:
90
+ 1. ALWAYS start with syntax validation
91
+ 2. Then check connectivity for the target hardware
92
+ 3. Finally verify unitary correctness
93
+
94
+ ## Guidelines:
95
+ - Be thorough - check all aspects
96
+ - Report specific line numbers and gates for errors
97
+ - Suggest fixes when possible
98
+ - Hardware profiles available: ibm_eagle, ionq_aria, rigetti_aspen
99
+
100
+ ## Output Format:
101
+ Provide clear validation results:
102
+ - PASS/FAIL for each check
103
+ - Specific error locations if failed
104
+ - Suggestions for fixing issues"""
105
+
106
+
107
+ # ============================================================
108
+ # OPTIMIZER AGENT PROMPT
109
+ # ============================================================
110
+
111
+ OPTIMIZER_PROMPT = """You are a Quantum Circuit Optimizer agent. Your role is to improve circuit efficiency.
112
+
113
+ ## Your Responsibilities:
114
+ 1. Reduce circuit depth
115
+ 2. Minimize gate count
116
+ 3. Improve hardware fitness
117
+ 4. Apply optimization strategies
118
+
119
+ ## Your Tools:
120
+ - generate_inverse: Create inverse for identity elimination
121
+ - compose_circuits: Restructure by recomposing
122
+ - analyze_circuit: Check current metrics
123
+ - calculate_complexity: Get complexity score
124
+ - calculate_hardware_fitness: Check hardware compatibility
125
+
126
+ ## Optimization Strategies:
127
+ 1. Gate cancellation: U * U† = I
128
+ 2. Gate commutation: Reorder for parallel execution
129
+ 3. Decomposition: Break complex gates into native gates
130
+ 4. Depth reduction: Maximize parallelism
131
+
132
+ ## Guidelines:
133
+ - Always measure before and after optimization
134
+ - Target specific metrics (depth, gates, or fitness)
135
+ - Small improvements compound - iterate if needed
136
+ - Don't sacrifice correctness for speed
137
+
138
+ ## Output Format:
139
+ Report optimization results:
140
+ - Before/after metrics
141
+ - Techniques applied
142
+ - Improvement percentage"""
143
+
144
+
145
+ # ============================================================
146
+ # ANALYZER AGENT PROMPT
147
+ # ============================================================
148
+
149
+ ANALYZER_PROMPT = """You are a Quantum Circuit Analyzer agent. Your role is to extract insights from circuits.
150
+
151
+ ## Your Responsibilities:
152
+ 1. Parse and understand circuit structure
153
+ 2. Measure circuit properties (depth, gates, etc.)
154
+ 3. Simulate and get state/probability information
155
+ 4. Estimate resource requirements
156
+
157
+ ## Your Tools:
158
+ - parse_qasm: Extract circuit structure
159
+ - analyze_circuit: Get comprehensive analysis
160
+ - get_circuit_depth: Measure depth
161
+ - get_statevector: Get quantum state
162
+ - get_probabilities: Get measurement probabilities
163
+ - estimate_resources: Resource estimation
164
+ - estimate_noise: Noise impact estimation
165
+
166
+ ## Guidelines:
167
+ - Start with structural analysis (parse, analyze)
168
+ - Then get simulation results if needed
169
+ - Consider noise for realistic assessment
170
+ - Report findings clearly and completely
171
+
172
+ ## Analysis Areas:
173
+ 1. Structure: qubits, gates, depth, connectivity
174
+ 2. State: amplitudes, probabilities, entanglement
175
+ 3. Resources: execution time, error rates
176
+ 4. Comparison: vs ideal, vs other circuits
177
+
178
+ ## Output Format:
179
+ Provide structured analysis:
180
+ - Circuit summary (qubits, gates, depth)
181
+ - Key observations
182
+ - Recommendations if applicable"""
183
+
184
+
185
+ # ============================================================
186
+ # SCORER AGENT PROMPT
187
+ # ============================================================
188
+
189
+ SCORER_PROMPT = """You are a Quantum Circuit Scorer agent. Your role is to evaluate circuit quality.
190
+
191
+ ## Your Responsibilities:
192
+ 1. Calculate complexity scores
193
+ 2. Assess hardware fitness
194
+ 3. Measure expressibility
195
+ 4. Provide overall quality assessment
196
+
197
+ ## Your Tools:
198
+ - calculate_complexity: Lower is better (simpler circuit)
199
+ - calculate_hardware_fitness: Higher is better (easier to run)
200
+ - calculate_expressibility: How much state space coverage
201
+ - simulate_circuit: Verify functionality via simulation
202
+
203
+ ## Scoring Framework:
204
+ 1. Complexity (weight: 30%): Gate count, depth
205
+ 2. Hardware Fitness (weight: 40%): Connectivity, native gates
206
+ 3. Expressibility (weight: 20%): State space coverage
207
+ 4. Correctness (weight: 10%): Simulation accuracy
208
+
209
+ ## Guidelines:
210
+ - Always get all relevant scores
211
+ - Consider the specific use case when weighting
212
+ - Compare against reference circuits when available
213
+ - Provide actionable feedback
214
+
215
+ ## Output Format:
216
+ Provide comprehensive scoring:
217
+ - Individual scores with explanations
218
+ - Weighted overall score
219
+ - Strengths and weaknesses
220
+ - Improvement suggestions"""
221
+
222
+
223
+ # ============================================================
224
+ # COORDINATOR AGENT PROMPT (for Guided mode)
225
+ # ============================================================
226
+
227
+ COORDINATOR_PROMPT = """You are a Workflow Coordinator agent. Your role is to orchestrate other agents in a structured workflow.
228
+
229
+ ## Your Responsibilities:
230
+ 1. Parse the user's goal
231
+ 2. Determine the workflow sequence
232
+ 3. Dispatch tasks to specialized agents
233
+ 4. Collect and synthesize results
234
+
235
+ ## Workflow Templates:
236
+ 1. BUILD: Architect → Builder → Validator → Scorer
237
+ 2. OPTIMIZE: Analyzer → Optimizer → Validator → Scorer
238
+ 3. EVALUATE: Analyzer → Scorer
239
+ 4. FULL: Architect → Builder → Validator → Optimizer → Analyzer → Scorer
240
+
241
+ ## Guidelines:
242
+ - Choose the appropriate workflow for the goal
243
+ - Monitor agent progress and handle failures
244
+ - Aggregate results for final report
245
+ - Ensure each step completes before proceeding
246
+
247
+ ## State Machine:
248
+ - PLANNING: Determine workflow
249
+ - DISPATCHING: Assign task to agent
250
+ - WAITING: Wait for agent completion
251
+ - COLLECTING: Gather results
252
+ - COMPLETED: Final synthesis
253
+
254
+ ## Output Format:
255
+ Report workflow execution:
256
+ - Workflow chosen and why
257
+ - Each step's outcome
258
+ - Final aggregated results
259
+ - Any issues encountered"""
260
+
261
+
262
+ # Dictionary for easy access
263
+ ALL_PROMPTS = {
264
+ "architect": ARCHITECT_PROMPT,
265
+ "builder": BUILDER_PROMPT,
266
+ "validator": VALIDATOR_PROMPT,
267
+ "optimizer": OPTIMIZER_PROMPT,
268
+ "analyzer": ANALYZER_PROMPT,
269
+ "scorer": SCORER_PROMPT,
270
+ "coordinator": COORDINATOR_PROMPT
271
+ }
272
+
273
+
274
+ def get_prompt(agent_type: str) -> str:
275
+ """Get prompt for a specific agent type."""
276
+ return ALL_PROMPTS.get(agent_type, "")
prompts/optimized_prompts.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/prompts/optimized_prompts.py
2
+ # Relations: Used by orchestrators/orchestrator.py (NakedOrchestrator)
3
+ # Description: Enhanced prompts for NAKED mode with quantum optimization guidance
4
+ # These prompts achieve 47.9/100 quality and can be further improved
5
+ # by adding explicit optimization constraints
6
+
7
+ """
8
+ Optimized Prompts: Direct LLM prompts for quantum circuit generation
9
+
10
+ Based on quality evaluation findings:
11
+ - NAKED mode outperforms multi-agent approaches
12
+ - Direct prompts with explicit constraints improve quality
13
+ - Avoids hallucinated measurements and unnecessary operations
14
+ """
15
+
16
+ # =============================================================================
17
+ # QUANTUM CIRCUIT GENERATION PROMPT (NAKED MODE - OPTIMIZED)
18
+ # =============================================================================
19
+
20
+ QUANTUM_CIRCUIT_OPTIMIZED = """You are an expert quantum circuit designer. Generate OpenQASM 2.0 circuits that are:
21
+ 1. MINIMAL - use fewest possible gates
22
+ 2. CORRECT - solve the specific problem
23
+ 3. OPTIMAL - prefer lower depth and fewer two-qubit gates
24
+
25
+ CRITICAL CONSTRAINTS:
26
+ - Do NOT add measurement operations unless explicitly requested
27
+ - Do NOT use extra qubits beyond what the problem requires
28
+ - Do NOT add arbitrary gates (be precise)
29
+ - Prefer single-qubit gates over two-qubit gates
30
+ - Minimize circuit depth
31
+
32
+ PROBLEM: {problem_statement}
33
+
34
+ EXPECTED OUTPUT:
35
+ - Exactly {min_qubits} qubits (may use up to {max_qubits} if needed, but justify)
36
+ - Maximum {max_depth} gate depth {if max_depth else "(if applicable)"}
37
+ - Only gates in: {required_gates}
38
+ - Avoid gates: {forbidden_gates if forbidden_gates else "none"}
39
+
40
+ SOLUTION APPROACH:
41
+ 1. Understand what quantum state/operation is needed
42
+ 2. Choose the minimal gate sequence
43
+ 3. Verify the gates are available
44
+ 4. Return ONLY the QASM code
45
+
46
+ Return the complete OpenQASM 2.0 circuit wrapped in code blocks.
47
+ Format:
48
+ ```qasm
49
+ OPENQASM 2.0;
50
+ include "qelib1.inc";
51
+ [Your circuit here]
52
+ ```
53
+
54
+ Remember: Simplicity and correctness first, optimization second."""
55
+
56
+ # =============================================================================
57
+ # ENHANCED QUANTUM CIRCUIT GENERATION (WITH OPTIMIZATION HINTS)
58
+ # =============================================================================
59
+
60
+ QUANTUM_CIRCUIT_OPTIMIZED_V2 = """You are an expert quantum circuit designer with deep knowledge of quantum gate theory and optimization.
61
+
62
+ TASK: Generate an OpenQASM 2.0 quantum circuit that solves the following problem.
63
+
64
+ PROBLEM: {problem_statement}
65
+
66
+ DESIGN REQUIREMENTS:
67
+ ✓ Use exactly {min_qubits} qubit(s)
68
+ ✓ Keep depth ≤ {max_depth if max_depth else "minimal"}
69
+ ✓ Only use these gates: {required_gates}
70
+ ✓ Do NOT use: {forbidden_gates if forbidden_gates else "none"}
71
+
72
+ CRITICAL RULES (must follow):
73
+ 1. NO measurement operations unless explicitly required
74
+ 2. NO extra qubits - use only what's needed
75
+ 3. NO unnecessary gates - every gate serves a purpose
76
+ 4. Prefer H, X, Z, CX over complex multi-qubit gates
77
+ 5. Gate cancellations (e.g., X·X = I) are encouraged
78
+
79
+ OPTIMIZATION GUIDANCE:
80
+ - Minimize depth: Each qubit layer should have parallel operations where possible
81
+ - Minimize two-qubit gates: These are most expensive
82
+ - Look for identities: XX=I, ZZ=I, HZH=X, HXH=Z, etc.
83
+ - Consider what state you're creating, not just what gates to apply
84
+
85
+ SOLUTION CHECKLIST:
86
+ Before generating the circuit, think through:
87
+ 1. What is the target quantum state? (e.g., |+⟩, |Φ+⟩, etc.)
88
+ 2. What's the minimal gate sequence to create it?
89
+ 3. Can any gates be combined or cancelled?
90
+ 4. Is the depth truly minimal?
91
+
92
+ OUTPUT FORMAT:
93
+ Return ONLY the OpenQASM 2.0 code in a code block:
94
+
95
+ ```qasm
96
+ OPENQASM 2.0;
97
+ include "qelib1.inc";
98
+ qreg q[{min_qubits}];
99
+ [Your gates here]
100
+ ```
101
+
102
+ Do NOT include explanations, do NOT include measurements, do NOT use extra qubits."""
103
+
104
+ # =============================================================================
105
+ # SPECIALIZED PROMPTS FOR PROBLEM CATEGORIES
106
+ # =============================================================================
107
+
108
+ STATE_PREPARATION_PROMPT = """You are designing a quantum state preparation circuit.
109
+
110
+ PROBLEM: {problem_statement}
111
+
112
+ Your goal is to transform the initial state |0...0⟩ into the target quantum state.
113
+
114
+ TARGET STATE: {expected_states}
115
+
116
+ GATES AVAILABLE: {required_gates}
117
+
118
+ KEY INSIGHTS FOR STATE PREP:
119
+ - Hadamard (H) creates superposition: H|0⟩ = (|0⟩ + |1⟩)/√2
120
+ - Pauli-X flips: X|0⟩ = |1⟩, X|1⟩ = |0⟩
121
+ - Pauli-Z adds phase: Z|1⟩ = -|1⟩
122
+ - Phase flip: |−⟩ = (|0⟩ - |1⟩)/√2 requires X then H
123
+ - Bell states need H on first qubit, then CX
124
+
125
+ SOLUTION:
126
+ Return the minimal OpenQASM circuit:
127
+
128
+ ```qasm
129
+ OPENQASM 2.0;
130
+ include "qelib1.inc";
131
+ qreg q[{min_qubits}];
132
+ [Your gates here]
133
+ ```"""
134
+
135
+ ENTANGLEMENT_PROMPT = """You are designing an entanglement circuit.
136
+
137
+ PROBLEM: {problem_statement}
138
+
139
+ Your goal is to create entanglement between qubits.
140
+
141
+ TARGET: {expected_states}
142
+
143
+ ENTANGLEMENT FACTS:
144
+ - Bell state |Φ+�� = (|00⟩ + |11⟩)/√2 requires: H on qubit 0, CX from 0→1
145
+ - Bell state |Φ-⟩ = (|00⟩ - |11⟩)/√2 requires: X on qubit 0, H on qubit 0, CX from 0→1
146
+ - GHZ state |GHZ⟩ = (|000⟩ + |111⟩)/√2 needs H on first, two CXs
147
+ - Entanglement requires multi-qubit gates (CX/CNOT)
148
+
149
+ SOLUTION:
150
+ Return the minimal OpenQASM circuit:
151
+
152
+ ```qasm
153
+ OPENQASM 2.0;
154
+ include "qelib1.inc";
155
+ qreg q[{min_qubits}];
156
+ [Your gates here]
157
+ ```"""
158
+
159
+ ALGORITHM_PROMPT = """You are implementing a quantum algorithm.
160
+
161
+ PROBLEM: {problem_statement}
162
+
163
+ ALGORITHM STRUCTURE:
164
+ {problem_statement}
165
+
166
+ KEY ALGORITHM COMPONENTS:
167
+ - Prepare superposition (usually with Hadamard)
168
+ - Apply oracle (function evaluation)
169
+ - Apply diffusion/phase flip (algorithm-specific)
170
+ - Measure result
171
+
172
+ SOLUTION:
173
+ Return the complete OpenQASM circuit:
174
+
175
+ ```qasm
176
+ OPENQASM 2.0;
177
+ include "qelib1.inc";
178
+ qreg q[{min_qubits}];
179
+ [Your gates here]
180
+ ```
181
+
182
+ Focus on correctness of the algorithm structure over minimal gate count."""
183
+
184
+ # =============================================================================
185
+ # GATE SYNTHESIS / DECOMPOSITION
186
+ # =============================================================================
187
+
188
+ GATE_SYNTHESIS_PROMPT = """You are decomposing a complex quantum gate into basic gates.
189
+
190
+ PROBLEM: {problem_statement}
191
+
192
+ TARGET GATE: {goal}
193
+
194
+ DECOMPOSITION FACTS:
195
+ - SWAP gate = 3 CX gates (CX a→b, CX b→a, CX a→b)
196
+ - CZ gate = H on target, CX, H on target
197
+ - Y gate = S·X·S†
198
+ - T gate = rotation by π/8 around Z-axis
199
+ - Rx(θ) = H·Rz(θ)·H (where applicable)
200
+
201
+ CONSTRAINTS:
202
+ - Only use: {required_gates}
203
+ - Avoid: {forbidden_gates if forbidden_gates else "none"}
204
+ - Minimize gate count and depth
205
+
206
+ SOLUTION:
207
+ Return the decomposed OpenQASM circuit:
208
+
209
+ ```qasm
210
+ OPENQASM 2.0;
211
+ include "qelib1.inc";
212
+ qreg q[{min_qubits}];
213
+ [Your decomposition here]
214
+ ```"""
215
+
216
+ # =============================================================================
217
+ # HELPER FUNCTION: FORMAT PROMPT FOR PROBLEM
218
+ # =============================================================================
219
+
220
+ def get_optimized_prompt(problem, use_advanced=True):
221
+ """Generate optimized prompt for a problem.
222
+
223
+ Args:
224
+ problem: TestProblem instance
225
+ use_advanced: Use advanced V2 prompt with optimization hints
226
+
227
+ Returns:
228
+ Formatted prompt string
229
+ """
230
+ template = QUANTUM_CIRCUIT_OPTIMIZED_V2 if use_advanced else QUANTUM_CIRCUIT_OPTIMIZED
231
+
232
+ expected = problem.expected
233
+
234
+ # Determine required and forbidden gates
235
+ required_gates = expected.required_gates if expected.required_gates else ["h", "x", "z", "cx", "measure"]
236
+ forbidden_gates = expected.forbidden_gates if expected.forbidden_gates else []
237
+
238
+ # Format the prompt
239
+ prompt = template.format(
240
+ problem_statement=problem.prompt,
241
+ min_qubits=expected.min_qubits,
242
+ max_qubits=expected.max_qubits,
243
+ max_depth=expected.max_depth or "minimal",
244
+ required_gates=", ".join(required_gates),
245
+ forbidden_gates=", ".join(forbidden_gates) if forbidden_gates else "none",
246
+ expected_states=problem.expected.expected_states if hasattr(problem.expected, 'expected_states') else "N/A"
247
+ )
248
+
249
+ return prompt
250
+
251
+
252
+ def get_specialized_prompt(problem, use_advanced=True):
253
+ """Generate specialized prompt based on problem category.
254
+
255
+ Args:
256
+ problem: TestProblem instance
257
+ use_advanced: Use advanced optimization hints
258
+
259
+ Returns:
260
+ Formatted prompt string
261
+ """
262
+ from tests.test_problems import ProblemCategory
263
+
264
+ category_prompts = {
265
+ ProblemCategory.STATE_PREPARATION: STATE_PREPARATION_PROMPT,
266
+ ProblemCategory.GATE_SYNTHESIS: GATE_SYNTHESIS_PROMPT,
267
+ ProblemCategory.ALGORITHM: ALGORITHM_PROMPT,
268
+ ProblemCategory.ERROR_CORRECTION: QUANTUM_CIRCUIT_OPTIMIZED_V2,
269
+ ProblemCategory.OPTIMIZATION: QUANTUM_CIRCUIT_OPTIMIZED_V2,
270
+ }
271
+
272
+ template = category_prompts.get(problem.category, QUANTUM_CIRCUIT_OPTIMIZED_V2)
273
+
274
+ expected = problem.expected
275
+ required_gates = expected.required_gates if expected.required_gates else ["h", "x", "z", "cx"]
276
+ forbidden_gates = expected.forbidden_gates if expected.forbidden_gates else []
277
+
278
+ prompt = template.format(
279
+ problem_statement=problem.prompt,
280
+ goal=problem.name,
281
+ min_qubits=expected.min_qubits,
282
+ max_qubits=expected.max_qubits,
283
+ max_depth=expected.max_depth or "minimal",
284
+ required_gates=", ".join(required_gates),
285
+ forbidden_gates=", ".join(forbidden_gates) if forbidden_gates else "none",
286
+ expected_states=problem.expected.expected_states if hasattr(problem.expected, 'expected_states') else "N/A"
287
+ )
288
+
289
+ return prompt
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ google-genai>=0.6.0
2
+ litellm>=1.42.0
3
+ requests>=2.31.0
4
+ python-dotenv>=1.0.0
5
+ pydantic>=2.0.0
6
+ gradio>=4.0.0
tasks-project-state.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "QAgents-Workflows",
3
+ "version": "0.8.0",
4
+ "description": "Multi-agent quantum circuit optimization system with multi-model fallback",
5
+ "last_updated": "2024-11-28",
6
+ "status": "BLACKBOARD_FIXED_QUASAR_ADDED",
7
+ "notes": "Fixed BLACKBOARD NoneType errors. Added QUASAR orchestrator with tiered verification. Added VERY_HARD problems. Mini test shows NAKED 3.3s and BLACKBOARD 15s both pass EASY.",
8
+
9
+ "comprehensive_test_results": {
10
+ "test_date": "2024-11-29",
11
+ "previous_results": {
12
+ "naked": {"success": "9/9 (100%)", "avg_time_ms": 3929},
13
+ "guided": {"success": "7/9 (78%)", "avg_time_ms": 23120},
14
+ "blackboard": {"success": "2/9 (22%)", "avg_time_ms": 13507}
15
+ },
16
+ "latest_test_20241129": {
17
+ "problem": "HARD - Deutsch Algorithm",
18
+ "naked": {"success": true, "time_ms": 3914, "gates": 5},
19
+ "quasar": {"success": true, "time_ms": 7254, "gates": 5},
20
+ "hybrid": {"success": true, "time_ms": 7181, "gates": 5},
21
+ "blackboard": {"success": true, "time_ms": 20915, "gates": 2},
22
+ "result": "ALL 4 MODES PASSED"
23
+ },
24
+ "very_hard_test": {
25
+ "problem": "VERY_HARD - 4-Qubit QFT",
26
+ "naked": {"success": true, "time_ms": 4473, "gates": 12},
27
+ "quasar": {"success": true, "time_ms": 7811, "gates": 12},
28
+ "hybrid": "interrupted - rate limiting",
29
+ "blackboard": "interrupted - rate limiting"
30
+ }
31
+ },
32
+
33
+ "fixes_applied_20241128": {
34
+ "blackboard_null_safety": {
35
+ "file": "orchestrators/orchestrator.py",
36
+ "changes": ["Added try/except in agent execution loop", "Added null-checking for action and result"]
37
+ },
38
+ "llm_adapter_null_safety": {
39
+ "file": "agents/llm_adapter.py",
40
+ "changes": ["Fixed response.text None handling", "Fixed _estimate_tokens with null-safe len()"]
41
+ }
42
+ },
43
+
44
+ "new_orchestrators": {
45
+ "quasar": {
46
+ "file": "orchestrators/quasar_orchestrator.py",
47
+ "description": "Tiered verification orchestrator (QUASAR-lite)",
48
+ "tiers": [
49
+ "Tier 1: Syntax validation via MCP",
50
+ "Tier 2: Circuit analysis (depth, gates)",
51
+ "Tier 3: Simulation verification",
52
+ "Tier 4: Semantic correctness"
53
+ ]
54
+ },
55
+ "hybrid": {
56
+ "description": "NAKED first, QUASAR fallback on failure"
57
+ }
58
+ },
59
+
60
+ "new_problems": {
61
+ "very_hard_difficulty": [
62
+ "4-Qubit QFT",
63
+ "5-Qubit Entanglement Chain",
64
+ "Simon's Algorithm (2-bit)",
65
+ "Quantum Adder (1+1=10)"
66
+ ]
67
+ },
68
+
69
+ "model_cascade": {
70
+ "preferred_model": "gemini-2.5-flash-lite",
71
+ "models": [
72
+ {"name": "gemma-3-27b-it", "rpd": 14400, "priority": 1},
73
+ {"name": "gemini-2.5-flash-lite", "rpd": 1000, "priority": 2, "default": true},
74
+ {"name": "gemini-2.5-flash", "rpd": 250, "priority": 3},
75
+ {"name": "gemini-2.0-flash", "rpd": 200, "priority": 4},
76
+ {"name": "gemini-2.5-pro", "rpd": 50, "priority": 5}
77
+ ]
78
+ },
79
+
80
+ "architectures": {
81
+ "naked": {
82
+ "description": "Direct LLM-to-QASM generation",
83
+ "status": "PRODUCTION_READY",
84
+ "success_rate": "100%",
85
+ "recommended": true
86
+ },
87
+ "guided": {
88
+ "description": "4-agent pipeline (Analyzer, Designer, Generator, Validator)",
89
+ "status": "DEPRECATED",
90
+ "success_rate": "78%",
91
+ "note": "Replaced by QUASAR"
92
+ },
93
+ "blackboard": {
94
+ "description": "Event-driven multi-agent blackboard",
95
+ "status": "FIXED",
96
+ "success_rate": "~100% (needs full retest)",
97
+ "note": "NoneType errors fixed, ~5x slower than NAKED"
98
+ },
99
+ "quasar": {
100
+ "description": "Tiered verification with MCP tools",
101
+ "status": "NEW",
102
+ "file": "orchestrators/quasar_orchestrator.py"
103
+ },
104
+ "hybrid": {
105
+ "description": "NAKED first, QUASAR fallback",
106
+ "status": "NEW"
107
+ }
108
+ },
109
+
110
+ "new_files_created": [
111
+ {"file": "prompts/optimized_prompts.py", "purpose": "Enhanced prompts for NAKED mode"},
112
+ {"file": "orchestrators/router.py", "purpose": "Difficulty-aware orchestrator selection"},
113
+ {"file": "tests/comprehensive_test.py", "purpose": "Full diagnostic test script"},
114
+ {"file": "docs/COMPREHENSIVE_TEST_ANALYSIS.md", "purpose": "Analysis of all test results"},
115
+ {"file": "docs/STRATEGIC_IMPROVEMENTS.md", "purpose": "Improvement roadmap based on findings"},
116
+ {"file": "docs/PROJECT_ANALYSIS_20251128.md", "purpose": "Deep project analysis"}
117
+ ],
118
+
119
+ "recommendations": {
120
+ "immediate": [
121
+ "Adopt NAKED mode for production - 100% success, fastest, most efficient",
122
+ "Fix BLACKBOARD null-checking or deprecate entirely",
123
+ "Integrate optimized_prompts.py into NAKED orchestrator"
124
+ ],
125
+ "short_term": [
126
+ "Add circuit quality scoring beyond gate count",
127
+ "Improve GUIDED generator for hard problems",
128
+ "Implement hybrid: NAKED first, GUIDED on failure"
129
+ ],
130
+ "long_term": [
131
+ "Auto-select mode based on problem difficulty",
132
+ "MCP validation integration for correctness verification",
133
+ "Cost-aware orchestrator selection"
134
+ ]
135
+ },
136
+
137
+ "usage": {
138
+ "prerequisites": [
139
+ "Start MCP server: python QuantumArchitect-MCP/app.py",
140
+ "Set GOOGLE_API_KEY environment variable",
141
+ "Activate venv: & .venv/Scripts/Activate.ps1"
142
+ ],
143
+ "commands": {
144
+ "comprehensive_test": "python tests/comprehensive_test.py",
145
+ "quality_eval": "python tests/run_quality_eval.py --mode all --difficulty all",
146
+ "quick_test": "python tests/run_quality_eval.py --quick"
147
+ }
148
+ }
149
+ }
tests/__init__.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests module: Test problems and evaluation harness."""
2
+
3
+ from .test_problems import (
4
+ ProblemDifficulty,
5
+ ProblemCategory,
6
+ ExpectedOutput,
7
+ TestProblem,
8
+ # Problems by ID naming
9
+ PROBLEM_E1_PHASE_FLIP,
10
+ PROBLEM_E2_CONTROLLED_NOT,
11
+ PROBLEM_E3_MEASUREMENT_BASIS,
12
+ PROBLEM_M1_SWAP_DECOMPOSITION,
13
+ PROBLEM_M2_CONTROLLED_Z,
14
+ PROBLEM_M3_PHASE_ESTIMATION_PREP,
15
+ PROBLEM_H1_DEUTSCH,
16
+ PROBLEM_H2_GROVER_2QUBIT,
17
+ PROBLEM_H3_TELEPORTATION_PREP,
18
+ # Collections
19
+ EASY_PROBLEMS,
20
+ MEDIUM_PROBLEMS,
21
+ HARD_PROBLEMS,
22
+ ALL_PROBLEMS,
23
+ get_problem,
24
+ get_problems_by_difficulty,
25
+ get_problems_by_category,
26
+ get_problems_by_tag,
27
+ get_research_problem_set
28
+ )
29
+
30
+ from .evaluation_harness import (
31
+ MetricResult,
32
+ CostMetrics,
33
+ EvaluationResult,
34
+ AggregatedResults,
35
+ EvaluationHarness
36
+ )
37
+
38
+ from .circuit_quality_analyzer import (
39
+ CircuitQualityAnalyzer,
40
+ AnalysisResult,
41
+ get_analyzer
42
+ )
43
+
44
+ from .quality_evaluation_harness import (
45
+ QualityEvaluationHarness,
46
+ run_quick_quality_test
47
+ )
48
+
49
+ # Backward compatibility aliases
50
+ BELL_STATE_PROBLEM = PROBLEM_E2_CONTROLLED_NOT # Bell state is easy_002
51
+
52
+ __all__ = [
53
+ "ProblemDifficulty",
54
+ "ProblemCategory",
55
+ "ExpectedOutput",
56
+ "TestProblem",
57
+ "PROBLEM_E1_PHASE_FLIP",
58
+ "PROBLEM_E2_CONTROLLED_NOT",
59
+ "PROBLEM_E3_MEASUREMENT_BASIS",
60
+ "PROBLEM_M1_SWAP_DECOMPOSITION",
61
+ "PROBLEM_M2_CONTROLLED_Z",
62
+ "PROBLEM_M3_PHASE_ESTIMATION_PREP",
63
+ "PROBLEM_H1_DEUTSCH",
64
+ "PROBLEM_H2_GROVER_2QUBIT",
65
+ "PROBLEM_H3_TELEPORTATION_PREP",
66
+ "EASY_PROBLEMS",
67
+ "MEDIUM_PROBLEMS",
68
+ "HARD_PROBLEMS",
69
+ "ALL_PROBLEMS",
70
+ "get_problem",
71
+ "get_problems_by_difficulty",
72
+ "get_problems_by_category",
73
+ "get_problems_by_tag",
74
+ "get_research_problem_set",
75
+ "MetricResult",
76
+ "CostMetrics",
77
+ "EvaluationResult",
78
+ "AggregatedResults",
79
+ "EvaluationHarness",
80
+ "BELL_STATE_PROBLEM",
81
+ # Quality analysis
82
+ "CircuitQualityAnalyzer",
83
+ "AnalysisResult",
84
+ "get_analyzer",
85
+ "QualityEvaluationHarness",
86
+ "run_quick_quality_test"
87
+ ]
tests/circuit_quality_analyzer.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/circuit_quality_analyzer.py
2
+ # Relations: Uses client/mcp_client.py for MCP calls, database/circuit_quality_db.py for storage
3
+ # Description: Analyzes circuit quality using MCP endpoints
4
+ # Extracts: depth, gate_count, cx_count, hardware_fitness, validation, simulation
5
+ # Returns QualityMetrics for storage in database
6
+
7
+ """
8
+ Circuit Quality Analyzer: Use MCP endpoints to measure circuit quality.
9
+ This module connects to the MCP server and extracts quality metrics.
10
+ """
11
+
12
+ import re
13
+ import logging
14
+ from typing import Any, Dict, List, Optional, Tuple
15
+ from dataclasses import dataclass
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class AnalysisResult:
22
+ """Result from analyzing a circuit."""
23
+ depth: int = 0
24
+ gate_count: int = 0
25
+ cx_count: int = 0
26
+ single_qubit_count: int = 0
27
+ hardware_fitness: float = 0.0
28
+ syntax_valid: bool = False
29
+ complexity_score: float = 0.0
30
+ state_correctness: float = 0.0
31
+ noise_estimate: float = 0.0
32
+ probabilities: Dict[str, float] = None
33
+ errors: List[str] = None
34
+
35
+ def __post_init__(self):
36
+ if self.probabilities is None:
37
+ self.probabilities = {}
38
+ if self.errors is None:
39
+ self.errors = []
40
+
41
+
42
+ class CircuitQualityAnalyzer:
43
+ """
44
+ Analyzes circuit quality using MCP endpoints.
45
+ Connects to the running MCP server to get quality metrics.
46
+ """
47
+
48
+ def __init__(self, mcp_url: str = "http://127.0.0.1:7861"):
49
+ self.mcp_url = mcp_url
50
+ self._client = None
51
+
52
+ def _get_client(self):
53
+ """Get or create MCP client."""
54
+ if self._client is None:
55
+ try:
56
+ from client import get_client
57
+ self._client = get_client(self.mcp_url)
58
+ except Exception as e:
59
+ logger.error(f"Failed to get MCP client: {e}")
60
+ return None
61
+ return self._client
62
+
63
+ def _extract_value(self, result: Any, keys: List[str], default: Any = 0) -> Any:
64
+ """Safely extract value from nested result."""
65
+ if result is None:
66
+ return default
67
+
68
+ if isinstance(result, (int, float, bool)):
69
+ return result
70
+
71
+ if isinstance(result, list):
72
+ return result[0] if result else default
73
+
74
+ if isinstance(result, dict):
75
+ for key in keys:
76
+ if key in result:
77
+ val = result[key]
78
+ if isinstance(val, (int, float)):
79
+ return val
80
+ elif isinstance(val, dict):
81
+ # Try common nested keys
82
+ for subkey in ['value', 'score', 'depth', 'count', 'result']:
83
+ if subkey in val:
84
+ return val[subkey]
85
+ elif isinstance(val, list):
86
+ return val[0] if val else default
87
+ return val
88
+ # Try first value in dict
89
+ for v in result.values():
90
+ if isinstance(v, (int, float)):
91
+ return v
92
+
93
+ return default
94
+
95
+ def analyze_circuit(self, qasm_code: str, expected_states: Dict[str, float] = None) -> AnalysisResult:
96
+ """
97
+ Analyze a circuit using MCP endpoints.
98
+
99
+ Args:
100
+ qasm_code: The QASM code to analyze
101
+ expected_states: Expected probability distribution for correctness check
102
+
103
+ Returns:
104
+ AnalysisResult with all quality metrics
105
+ """
106
+ result = AnalysisResult()
107
+
108
+ if not qasm_code or not qasm_code.strip():
109
+ result.errors.append("Empty QASM code")
110
+ return result
111
+
112
+ client = self._get_client()
113
+ if client is None:
114
+ # Fallback to local analysis
115
+ return self._analyze_locally(qasm_code, expected_states)
116
+
117
+ # 1. Validate syntax
118
+ try:
119
+ resp = client.validate_syntax(qasm_code)
120
+ if resp.success:
121
+ valid = resp.data
122
+ if isinstance(valid, dict):
123
+ result.syntax_valid = valid.get('valid', False) or valid.get('is_valid', False)
124
+ elif isinstance(valid, bool):
125
+ result.syntax_valid = valid
126
+ elif isinstance(valid, list):
127
+ result.syntax_valid = "valid" in str(valid).lower()
128
+ else:
129
+ result.syntax_valid = bool(valid)
130
+ else:
131
+ result.errors.append(f"Validation error: {resp.error}")
132
+ except Exception as e:
133
+ result.errors.append(f"Validation failed: {e}")
134
+ # Still try to parse locally
135
+ result.syntax_valid = "OPENQASM" in qasm_code and "qreg" in qasm_code
136
+
137
+ # 2. Analyze circuit structure
138
+ try:
139
+ resp = client.analyze_circuit(qasm_code)
140
+ if resp.success and resp.data:
141
+ data = resp.data
142
+ if isinstance(data, dict):
143
+ result.depth = self._extract_value(data, ['depth', 'circuit_depth'], 0)
144
+ result.gate_count = self._extract_value(data, ['gate_count', 'gates', 'num_gates', 'total_gates'], 0)
145
+ result.cx_count = self._extract_value(data, ['cx_count', 'cnot_count', 'two_qubit_gates'], 0)
146
+ result.single_qubit_count = self._extract_value(data, ['single_qubit_count', 'single_qubit_gates', 'one_qubit_gates'], 0)
147
+ except Exception as e:
148
+ result.errors.append(f"Analysis failed: {e}")
149
+ # Fallback to local parsing
150
+ local = self._parse_qasm_locally(qasm_code)
151
+ result.depth = local.get('depth', 0)
152
+ result.gate_count = local.get('gate_count', 0)
153
+ result.cx_count = local.get('cx_count', 0)
154
+ result.single_qubit_count = local.get('single_qubit_count', 0)
155
+
156
+ # 3. Get circuit depth if not already set
157
+ if result.depth == 0:
158
+ try:
159
+ resp = client.get_circuit_depth(qasm_code)
160
+ if resp.success:
161
+ result.depth = self._extract_value(resp.data, ['depth', 'value'], 0)
162
+ except Exception as e:
163
+ result.errors.append(f"Depth check failed: {e}")
164
+
165
+ # 4. Calculate hardware fitness
166
+ try:
167
+ resp = client.calculate_hardware_fitness(qasm_code, "ibm_brisbane")
168
+ if resp.success:
169
+ result.hardware_fitness = self._extract_value(resp.data,
170
+ ['fitness', 'fitness_score', 'hardware_fitness', 'score'], 0.0)
171
+ if result.hardware_fitness > 1.0:
172
+ result.hardware_fitness = result.hardware_fitness / 100.0
173
+ except Exception as e:
174
+ result.errors.append(f"Hardware fitness failed: {e}")
175
+
176
+ # 5. Calculate complexity
177
+ try:
178
+ resp = client.calculate_complexity_score(qasm_code)
179
+ if resp.success:
180
+ result.complexity_score = self._extract_value(resp.data,
181
+ ['complexity', 'complexity_score', 'score', 'total'], 0.0)
182
+ except Exception as e:
183
+ result.errors.append(f"Complexity check failed: {e}")
184
+
185
+ # 6. Get probabilities and check correctness
186
+ try:
187
+ resp = client.get_probabilities(qasm_code)
188
+ if resp.success and resp.data:
189
+ probs = resp.data
190
+ if isinstance(probs, dict):
191
+ result.probabilities = probs
192
+ if expected_states:
193
+ result.state_correctness = self._check_correctness(probs, expected_states)
194
+ else:
195
+ # No expected states - assume 100% if circuit runs
196
+ result.state_correctness = 1.0
197
+ except Exception as e:
198
+ result.errors.append(f"Probability check failed: {e}")
199
+ if expected_states is None:
200
+ result.state_correctness = 0.8 # Partial credit if other metrics pass
201
+
202
+ # 7. Estimate noise
203
+ try:
204
+ resp = client.estimate_noise(qasm_code, "ibm_brisbane")
205
+ if resp.success:
206
+ result.noise_estimate = self._extract_value(resp.data,
207
+ ['noise', 'noise_estimate', 'error_rate', 'fidelity'], 0.0)
208
+ except Exception as e:
209
+ result.errors.append(f"Noise estimation failed: {e}")
210
+
211
+ return result
212
+
213
+ def _analyze_locally(self, qasm_code: str, expected_states: Dict[str, float] = None) -> AnalysisResult:
214
+ """Fallback local analysis when MCP is unavailable."""
215
+ result = AnalysisResult()
216
+
217
+ # Basic syntax check
218
+ result.syntax_valid = "OPENQASM" in qasm_code and "qreg" in qasm_code
219
+
220
+ # Parse gates
221
+ local = self._parse_qasm_locally(qasm_code)
222
+ result.depth = local.get('depth', 0)
223
+ result.gate_count = local.get('gate_count', 0)
224
+ result.cx_count = local.get('cx_count', 0)
225
+ result.single_qubit_count = local.get('single_qubit_count', 0)
226
+
227
+ # Estimate hardware fitness based on structure
228
+ if result.gate_count > 0:
229
+ # Penalize high CX ratio
230
+ cx_ratio = result.cx_count / result.gate_count
231
+ result.hardware_fitness = max(0.0, 1.0 - cx_ratio * 0.5)
232
+
233
+ # Complexity estimate
234
+ result.complexity_score = result.depth + result.cx_count * 2
235
+
236
+ # State correctness if syntax valid
237
+ if result.syntax_valid:
238
+ result.state_correctness = 0.7 # Partial credit
239
+
240
+ result.errors.append("Used local fallback analysis")
241
+ return result
242
+
243
+ def _parse_qasm_locally(self, qasm_code: str) -> Dict[str, int]:
244
+ """Parse QASM locally to extract gate counts."""
245
+ result = {
246
+ 'depth': 0,
247
+ 'gate_count': 0,
248
+ 'cx_count': 0,
249
+ 'single_qubit_count': 0
250
+ }
251
+
252
+ lines = qasm_code.strip().split('\n')
253
+ gate_depth_map = {} # qubit -> current depth
254
+
255
+ single_qubit_gates = ['h', 'x', 'y', 'z', 's', 't', 'sdg', 'tdg', 'rx', 'ry', 'rz', 'u1', 'u2', 'u3']
256
+ two_qubit_gates = ['cx', 'cz', 'swap', 'cp', 'crz', 'cnot']
257
+
258
+ for line in lines:
259
+ line = line.strip().lower()
260
+ if not line or line.startswith('//') or line.startswith('openqasm') or line.startswith('include'):
261
+ continue
262
+ if line.startswith('qreg') or line.startswith('creg') or line.startswith('measure') or line.startswith('barrier'):
263
+ continue
264
+
265
+ # Check for gates
266
+ for gate in single_qubit_gates:
267
+ if line.startswith(gate + ' ') or line.startswith(gate + '('):
268
+ result['single_qubit_count'] += 1
269
+ result['gate_count'] += 1
270
+ # Extract qubit
271
+ match = re.search(r'q\[(\d+)\]', line)
272
+ if match:
273
+ q = int(match.group(1))
274
+ gate_depth_map[q] = gate_depth_map.get(q, 0) + 1
275
+ break
276
+
277
+ for gate in two_qubit_gates:
278
+ if line.startswith(gate + ' '):
279
+ result['cx_count'] += 1
280
+ result['gate_count'] += 1
281
+ # Extract qubits
282
+ matches = re.findall(r'q\[(\d+)\]', line)
283
+ if matches:
284
+ for q in matches:
285
+ q = int(q)
286
+ gate_depth_map[q] = gate_depth_map.get(q, 0) + 1
287
+ break
288
+
289
+ if gate_depth_map:
290
+ result['depth'] = max(gate_depth_map.values())
291
+
292
+ return result
293
+
294
+ def _check_correctness(self, actual: Dict[str, float], expected: Dict[str, float]) -> float:
295
+ """Check how close actual probabilities are to expected."""
296
+ if not expected:
297
+ return 1.0
298
+
299
+ total_error = 0.0
300
+ for state, exp_prob in expected.items():
301
+ act_prob = actual.get(state, 0.0)
302
+ total_error += abs(exp_prob - act_prob)
303
+
304
+ # Also check for unexpected states
305
+ for state, act_prob in actual.items():
306
+ if state not in expected and act_prob > 0.01:
307
+ total_error += act_prob
308
+
309
+ # Normalize (max error = 2.0)
310
+ correctness = max(0.0, 1.0 - total_error / 2.0)
311
+ return correctness
312
+
313
+ def compare_circuits(self, qasm1: str, qasm2: str) -> Dict[str, Any]:
314
+ """Compare two circuits and return quality differences."""
315
+ result1 = self.analyze_circuit(qasm1)
316
+ result2 = self.analyze_circuit(qasm2)
317
+
318
+ return {
319
+ "circuit1": {
320
+ "depth": result1.depth,
321
+ "gate_count": result1.gate_count,
322
+ "cx_count": result1.cx_count,
323
+ "hardware_fitness": result1.hardware_fitness,
324
+ "syntax_valid": result1.syntax_valid
325
+ },
326
+ "circuit2": {
327
+ "depth": result2.depth,
328
+ "gate_count": result2.gate_count,
329
+ "cx_count": result2.cx_count,
330
+ "hardware_fitness": result2.hardware_fitness,
331
+ "syntax_valid": result2.syntax_valid
332
+ },
333
+ "comparison": {
334
+ "depth_diff": result2.depth - result1.depth,
335
+ "gate_diff": result2.gate_count - result1.gate_count,
336
+ "cx_diff": result2.cx_count - result1.cx_count,
337
+ "fitness_diff": result2.hardware_fitness - result1.hardware_fitness,
338
+ "circuit1_better": result1.depth < result2.depth or result1.hardware_fitness > result2.hardware_fitness
339
+ }
340
+ }
341
+
342
+
343
+ # Module-level singleton
344
+ _analyzer: Optional[CircuitQualityAnalyzer] = None
345
+
346
+ def get_analyzer(mcp_url: str = "http://127.0.0.1:7861") -> CircuitQualityAnalyzer:
347
+ """Get or create the quality analyzer."""
348
+ global _analyzer
349
+ if _analyzer is None:
350
+ _analyzer = CircuitQualityAnalyzer(mcp_url)
351
+ return _analyzer
tests/comprehensive_test.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/comprehensive_test.py
2
+ # Relations: Uses orchestrators/, tests/test_problems.py, config.py
3
+ # Description: Comprehensive test across all difficulties with detailed diagnostics
4
+ # Run with: python tests/comprehensive_test.py
5
+
6
+ """
7
+ Comprehensive Circuit Generation Test
8
+
9
+ Tests all 9 problems (easy, medium, hard) with all 3 modes (naked, guided, blackboard).
10
+ Provides detailed diagnostics on where each mode succeeds/fails.
11
+ """
12
+
13
+ import sys
14
+ import time
15
+ import os
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+
19
+ # Setup paths
20
+ sys.path.insert(0, str(Path(__file__).parent.parent))
21
+
22
+ from tests.test_problems import ALL_PROBLEMS, ProblemDifficulty
23
+ from orchestrators import create_orchestrator
24
+ from config import reset_cost_tracking, get_cost_summary, set_api_key
25
+
26
+
27
+ def extract_qasm(result):
28
+ """Extract QASM from orchestrator result."""
29
+ if not result or not result.final_output:
30
+ return None
31
+
32
+ qasm = result.final_output
33
+ if isinstance(qasm, list):
34
+ qasm = qasm[0] if qasm else None
35
+
36
+ return str(qasm) if qasm else None
37
+
38
+
39
+ def validate_qasm(qasm):
40
+ """Validate QASM structure and count gates."""
41
+ if not qasm:
42
+ return {"valid": False, "has_qreg": False, "gate_count": 0, "depth": 0}
43
+
44
+ valid = "OPENQASM" in qasm
45
+ has_qreg = "qreg" in qasm
46
+
47
+ # Count gates
48
+ gate_count = 0
49
+ for gate in ['h ', 'h(', 'x ', 'x(', 'z ', 'z(', 'cx ', 'cx(', 'cz ',
50
+ 'swap ', 't ', 's ', 'ry(', 'rz(', 'rx(', 'u1(', 'u2(', 'u3(']:
51
+ gate_count += qasm.lower().count(gate)
52
+
53
+ # Estimate depth (simplified)
54
+ lines = [l for l in qasm.split('\n') if l.strip() and not l.strip().startswith('//')]
55
+ depth = len([l for l in lines if any(g in l.lower() for g in ['h ', 'x ', 'cx ', 'cz ', 'swap'])])
56
+
57
+ return {"valid": valid, "has_qreg": has_qreg, "gate_count": gate_count, "depth": depth}
58
+
59
+
60
+ def run_comprehensive_test():
61
+ """Run comprehensive test across all problems and modes."""
62
+
63
+ # Set API key
64
+ api_key = os.getenv('GOOGLE_API_KEY') or os.getenv('GENAI_API_KEY')
65
+ if api_key:
66
+ set_api_key(api_key)
67
+ else:
68
+ print("ERROR: No API key found. Set GOOGLE_API_KEY environment variable.")
69
+ return
70
+
71
+ print("=" * 100)
72
+ print("COMPREHENSIVE CIRCUIT GENERATION TEST - ALL DIFFICULTIES")
73
+ print("=" * 100)
74
+ print(f"Date: {datetime.now().isoformat()}")
75
+ print(f"Problems: {len(ALL_PROBLEMS)} total (3 easy, 3 medium, 3 hard)")
76
+ print(f"Modes: naked, guided, blackboard")
77
+ print("=" * 100)
78
+
79
+ # Store all results
80
+ all_results = []
81
+
82
+ # Test each problem with each mode
83
+ for problem in ALL_PROBLEMS:
84
+ print(f"\n\n{'=' * 100}")
85
+ print(f"PROBLEM: {problem.id} - {problem.name}")
86
+ print(f"Difficulty: {problem.difficulty.value.upper()}")
87
+ print(f"Category: {problem.category.value}")
88
+ print(f"Expected qubits: {problem.expected.min_qubits}-{problem.expected.max_qubits}")
89
+ print(f"Required gates: {problem.expected.required_gates}")
90
+ print(f"Expected states: {problem.expected.expected_states}")
91
+ print("=" * 100)
92
+
93
+ for mode in ['naked', 'guided', 'blackboard']:
94
+ print(f"\n--- {mode.upper()} MODE ---")
95
+ reset_cost_tracking()
96
+
97
+ start = time.perf_counter()
98
+ result = None
99
+ qasm = None
100
+
101
+ try:
102
+ orchestrator = create_orchestrator(mode)
103
+ result = orchestrator.run(problem.goal)
104
+
105
+ elapsed = (time.perf_counter() - start) * 1000
106
+ cost = get_cost_summary()
107
+
108
+ # Extract and validate QASM
109
+ qasm = extract_qasm(result)
110
+ validation = validate_qasm(qasm)
111
+
112
+ success = result.success if result else False
113
+ errors = result.errors if result else []
114
+
115
+ # Print detailed results
116
+ status = '✅' if success and validation['valid'] else '❌'
117
+ print(f"{status} Success: {success}")
118
+ print(f" Time: {elapsed:.0f}ms")
119
+ print(f" LLM Calls: {cost.get('total_requests', 0)}")
120
+ print(f" Tokens: {cost.get('total_tokens', 0)}")
121
+ print(f" QASM Valid: {validation['valid']}")
122
+ print(f" Has qreg: {validation['has_qreg']}")
123
+ print(f" Gate Count: {validation['gate_count']}")
124
+ print(f" Est. Depth: {validation['depth']}")
125
+
126
+ if errors:
127
+ print(f" ⚠️ Errors: {errors[:2]}")
128
+
129
+ if qasm:
130
+ # Show first few lines of QASM
131
+ lines = qasm.split('\n')[:8]
132
+ print(" QASM:")
133
+ for line in lines:
134
+ print(f" {line}")
135
+ if len(qasm.split('\n')) > 8:
136
+ print(" ...")
137
+ else:
138
+ print(" QASM: None generated")
139
+
140
+ all_results.append({
141
+ 'problem_id': problem.id,
142
+ 'problem_name': problem.name,
143
+ 'difficulty': problem.difficulty.value,
144
+ 'category': problem.category.value,
145
+ 'mode': mode,
146
+ 'success': success and validation['valid'],
147
+ 'qasm_valid': validation['valid'],
148
+ 'time_ms': elapsed,
149
+ 'llm_calls': cost.get('total_requests', 0),
150
+ 'tokens': cost.get('total_tokens', 0),
151
+ 'gate_count': validation['gate_count'],
152
+ 'depth': validation['depth'],
153
+ 'qasm': qasm[:500] if qasm else None,
154
+ 'error': str(errors[0])[:100] if errors else None
155
+ })
156
+
157
+ except Exception as e:
158
+ elapsed = (time.perf_counter() - start) * 1000
159
+ error_msg = f"{type(e).__name__}: {str(e)[:200]}"
160
+ print(f"❌ EXCEPTION: {error_msg}")
161
+
162
+ import traceback
163
+ traceback.print_exc()
164
+
165
+ all_results.append({
166
+ 'problem_id': problem.id,
167
+ 'problem_name': problem.name,
168
+ 'difficulty': problem.difficulty.value,
169
+ 'category': problem.category.value,
170
+ 'mode': mode,
171
+ 'success': False,
172
+ 'qasm_valid': False,
173
+ 'time_ms': elapsed,
174
+ 'llm_calls': 0,
175
+ 'tokens': 0,
176
+ 'gate_count': 0,
177
+ 'depth': 0,
178
+ 'qasm': None,
179
+ 'error': error_msg[:100]
180
+ })
181
+
182
+ # Print final summary
183
+ print_summary(all_results)
184
+
185
+ # Save results to JSON
186
+ output_path = Path(__file__).parent.parent / f"research/comprehensive_test_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
187
+ output_path.parent.mkdir(exist_ok=True)
188
+
189
+ import json
190
+ with open(output_path, 'w') as f:
191
+ json.dump(all_results, f, indent=2)
192
+ print(f"\n\nResults saved to: {output_path}")
193
+
194
+ return all_results
195
+
196
+
197
+ def print_summary(all_results):
198
+ """Print summary by difficulty and mode."""
199
+
200
+ print("\n\n" + "=" * 100)
201
+ print("FINAL SUMMARY BY DIFFICULTY AND MODE")
202
+ print("=" * 100)
203
+
204
+ for diff in ['easy', 'medium', 'hard']:
205
+ print(f"\n{diff.upper()} PROBLEMS:")
206
+ print("-" * 80)
207
+
208
+ for mode in ['naked', 'guided', 'blackboard']:
209
+ mode_results = [r for r in all_results if r['difficulty'] == diff and r['mode'] == mode]
210
+ if mode_results:
211
+ successes = sum(1 for r in mode_results if r['success'])
212
+ total = len(mode_results)
213
+ avg_time = sum(r['time_ms'] for r in mode_results) / total
214
+ total_llm = sum(r['llm_calls'] for r in mode_results)
215
+ avg_gates = sum(r['gate_count'] for r in mode_results) / total
216
+
217
+ status = '✅' if successes == total else '⚠️ ' if successes > 0 else '❌'
218
+ print(f"{status} {mode:12} | Success: {successes}/{total} | Time: {avg_time:>6.0f}ms | LLM: {total_llm:>2} | Avg Gates: {avg_gates:.1f}")
219
+
220
+ # Show failures
221
+ failures = [r for r in mode_results if not r['success']]
222
+ for f in failures:
223
+ error_msg = f['error'][:60] if f['error'] else 'No QASM generated'
224
+ print(f" ❌ {f['problem_id']}: {error_msg}")
225
+
226
+ # Calculate winners
227
+ print("\n\n" + "=" * 100)
228
+ print("🏆 WINNER BY DIFFICULTY (Score = Success*100 - Time/1000 - LLM*0.5)")
229
+ print("=" * 100)
230
+
231
+ for diff in ['easy', 'medium', 'hard']:
232
+ print(f"\n{diff.upper()}:")
233
+ best_mode = None
234
+ best_score = -999
235
+
236
+ for mode in ['naked', 'guided', 'blackboard']:
237
+ mode_results = [r for r in all_results if r['difficulty'] == diff and r['mode'] == mode]
238
+ if mode_results:
239
+ successes = sum(1 for r in mode_results if r['success'])
240
+ total = len(mode_results)
241
+ avg_time = sum(r['time_ms'] for r in mode_results) / total
242
+ total_llm = sum(r['llm_calls'] for r in mode_results)
243
+
244
+ success_rate = successes / total
245
+ time_penalty = avg_time / 1000
246
+ llm_penalty = total_llm * 0.5
247
+ score = success_rate * 100 - time_penalty - llm_penalty
248
+
249
+ print(f" {mode:12}: Score={score:>6.1f} (Success={success_rate*100:.0f}%, Time={avg_time:.0f}ms, LLM={total_llm})")
250
+
251
+ if score > best_score:
252
+ best_score = score
253
+ best_mode = mode
254
+
255
+ print(f" 🏆 WINNER: {best_mode.upper() if best_mode else 'NONE'}")
256
+
257
+ # Overall recommendation
258
+ print("\n\n" + "=" * 100)
259
+ print("OVERALL RECOMMENDATIONS")
260
+ print("=" * 100)
261
+
262
+ # Calculate overall stats per mode
263
+ for mode in ['naked', 'guided', 'blackboard']:
264
+ mode_results = [r for r in all_results if r['mode'] == mode]
265
+ if mode_results:
266
+ successes = sum(1 for r in mode_results if r['success'])
267
+ total = len(mode_results)
268
+ avg_time = sum(r['time_ms'] for r in mode_results) / total
269
+ total_llm = sum(r['llm_calls'] for r in mode_results)
270
+ avg_gates = sum(r['gate_count'] for r in mode_results) / total
271
+
272
+ print(f"\n{mode.upper()}:")
273
+ print(f" Overall Success: {successes}/{total} ({100*successes/total:.0f}%)")
274
+ print(f" Average Time: {avg_time:.0f}ms")
275
+ print(f" Total LLM Calls: {total_llm}")
276
+ print(f" Average Gates: {avg_gates:.1f}")
277
+
278
+ # List failures
279
+ failures = [r for r in mode_results if not r['success']]
280
+ if failures:
281
+ print(f" Failures ({len(failures)}):")
282
+ for f in failures:
283
+ print(f" - {f['problem_id']} ({f['difficulty']}): {f['error'][:50] if f['error'] else 'Unknown'}")
284
+
285
+
286
+ if __name__ == "__main__":
287
+ run_comprehensive_test()
tests/comprehensive_test_v2.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/comprehensive_test_v2.py
2
+ # Relations: Uses orchestrators, test_problems, client/mcp_client
3
+ # Description: Full diagnostic test comparing all 5 modes including QUASAR and HYBRID
4
+ """
5
+ Comprehensive Test V2: Compare all orchestration modes
6
+
7
+ Modes tested:
8
+ 1. NAKED - Direct LLM (baseline)
9
+ 2. GUIDED - Multi-agent pipeline
10
+ 3. BLACKBOARD - Event-driven agents
11
+ 4. QUASAR - Tool-augmented LLM with hierarchical validation
12
+ 5. HYBRID - NAKED first, QUASAR fallback
13
+
14
+ Problems:
15
+ - 3 EASY
16
+ - 3 MEDIUM
17
+ - 3 HARD
18
+ - 4 VERY_HARD (new - to find NAKED limits)
19
+ """
20
+
21
+ import sys
22
+ import os
23
+ import json
24
+ import time
25
+ from datetime import datetime
26
+ from pathlib import Path
27
+
28
+ # Setup paths
29
+ sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
30
+
31
+ # Set API key BEFORE any imports
32
+ api_key = os.getenv('GOOGLE_API_KEY')
33
+ if not api_key:
34
+ api_key = "$env:GOOGLE_API_KEY"
35
+ os.environ['GOOGLE_API_KEY'] = api_key
36
+
37
+ from tests.test_problems import (
38
+ ALL_PROBLEMS, EASY_PROBLEMS, MEDIUM_PROBLEMS,
39
+ HARD_PROBLEMS, VERY_HARD_PROBLEMS,
40
+ ProblemDifficulty
41
+ )
42
+ from orchestrators import create_orchestrator
43
+ from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
44
+ from config import reset_cost_tracking, get_cost_summary, set_api_key
45
+ from client.mcp_client import get_client
46
+
47
+ # Set API key in config
48
+ set_api_key(api_key)
49
+
50
+
51
+ def extract_qasm_metrics(qasm: str) -> dict:
52
+ """Extract metrics from QASM code."""
53
+ if not qasm:
54
+ return {"gate_count": 0, "depth": 0, "qubits": 0}
55
+
56
+ import re
57
+
58
+ # Count qubits
59
+ qreg_match = re.search(r'qreg\s+\w+\[(\d+)\]', qasm)
60
+ qubits = int(qreg_match.group(1)) if qreg_match else 0
61
+
62
+ # Count gates (excluding declarations and measurements)
63
+ gate_pattern = r'\b(h|x|y|z|s|t|sdg|tdg|cx|cz|cy|swap|ccx|rz|rx|ry|u1|u2|u3|p|cp)\b'
64
+ gates = re.findall(gate_pattern, qasm, re.IGNORECASE)
65
+
66
+ # Estimate depth (simplified)
67
+ lines = [l.strip() for l in qasm.split('\n') if l.strip() and not l.strip().startswith(('OPENQASM', 'include', 'qreg', 'creg', '//'))]
68
+ depth = len([l for l in lines if any(g in l.lower() for g in ['h ', 'x ', 'y ', 'z ', 'cx', 'cz', 'swap', 'rx', 'ry', 'rz', 'ccx'])])
69
+
70
+ return {"gate_count": len(gates), "depth": depth, "qubits": qubits}
71
+
72
+
73
+ def run_test(problem, mode: str) -> dict:
74
+ """Run a single test and return results."""
75
+ result = {
76
+ "problem_id": problem.id,
77
+ "problem_name": problem.name,
78
+ "difficulty": problem.difficulty.value,
79
+ "category": problem.category.value,
80
+ "mode": mode,
81
+ "success": False,
82
+ "qasm_valid": False,
83
+ "time_ms": 0,
84
+ "llm_calls": 0,
85
+ "tokens": 0,
86
+ "gate_count": 0,
87
+ "depth": 0,
88
+ "qasm": None,
89
+ "error": None,
90
+ "tiers_passed": [],
91
+ "iterations": 0
92
+ }
93
+
94
+ start = time.perf_counter()
95
+ reset_cost_tracking()
96
+
97
+ try:
98
+ if mode in ["quasar", "hybrid"]:
99
+ # Use new orchestrators with expected values
100
+ if mode == "quasar":
101
+ orchestrator = QuasarOrchestrator(max_iterations=3)
102
+ else:
103
+ orchestrator = HybridOrchestrator()
104
+
105
+ quasar_result = orchestrator.run(
106
+ goal=problem.prompt,
107
+ expected_qubits=problem.expected.min_qubits,
108
+ expected_states=problem.expected.expected_states if problem.expected.expected_states else None,
109
+ max_depth=problem.expected.max_depth
110
+ )
111
+
112
+ result["success"] = quasar_result.success
113
+ result["qasm"] = quasar_result.final_qasm
114
+ result["llm_calls"] = quasar_result.llm_calls
115
+ result["tokens"] = quasar_result.tokens_used
116
+ result["tiers_passed"] = quasar_result.tiers_passed
117
+ result["iterations"] = quasar_result.iterations
118
+
119
+ if quasar_result.final_qasm:
120
+ result["qasm_valid"] = True
121
+ metrics = extract_qasm_metrics(quasar_result.final_qasm)
122
+ result["gate_count"] = metrics["gate_count"]
123
+ result["depth"] = metrics["depth"]
124
+
125
+ if quasar_result.errors:
126
+ result["error"] = "; ".join(quasar_result.errors)
127
+
128
+ else:
129
+ # Use standard orchestrators
130
+ orchestrator = create_orchestrator(mode)
131
+ orch_result = orchestrator.run(problem.prompt)
132
+
133
+ result["success"] = orch_result.success
134
+ result["qasm"] = orch_result.final_output
135
+
136
+ # Get LLM stats
137
+ cost = get_cost_summary()
138
+ result["llm_calls"] = cost.get("llm_requests", 0)
139
+ result["tokens"] = cost.get("total_tokens", 0)
140
+
141
+ if orch_result.final_output:
142
+ result["qasm_valid"] = True
143
+ metrics = extract_qasm_metrics(orch_result.final_output)
144
+ result["gate_count"] = metrics["gate_count"]
145
+ result["depth"] = metrics["depth"]
146
+
147
+ if orch_result.errors:
148
+ result["error"] = "; ".join(orch_result.errors)
149
+
150
+ except Exception as e:
151
+ result["error"] = str(e)
152
+
153
+ result["time_ms"] = (time.perf_counter() - start) * 1000
154
+ return result
155
+
156
+
157
+ def main():
158
+ print("=" * 100)
159
+ print("COMPREHENSIVE TEST V2 - ALL MODES INCLUDING QUASAR & HYBRID")
160
+ print("=" * 100)
161
+ print(f"Date: {datetime.now().isoformat()}")
162
+ print(f"Problems: {len(ALL_PROBLEMS)} total")
163
+ print(f" - Easy: {len(EASY_PROBLEMS)}")
164
+ print(f" - Medium: {len(MEDIUM_PROBLEMS)}")
165
+ print(f" - Hard: {len(HARD_PROBLEMS)}")
166
+ print(f" - Very Hard: {len(VERY_HARD_PROBLEMS)}")
167
+ print(f"Modes: naked, guided, blackboard, quasar, hybrid")
168
+ print("=" * 100)
169
+
170
+ # Check MCP server
171
+ try:
172
+ client = get_client()
173
+ if client.health_check():
174
+ print("✅ MCP Server connected")
175
+ else:
176
+ print("⚠️ MCP Server not responding - some validations may use fallback")
177
+ except:
178
+ print("⚠️ MCP Server not available")
179
+
180
+ all_results = []
181
+ modes = ["naked", "quasar", "hybrid", "guided", "blackboard"] # Order: fastest to slowest
182
+
183
+ # Group problems by difficulty
184
+ problem_groups = [
185
+ ("EASY", EASY_PROBLEMS),
186
+ ("MEDIUM", MEDIUM_PROBLEMS),
187
+ ("HARD", HARD_PROBLEMS),
188
+ ("VERY_HARD", VERY_HARD_PROBLEMS)
189
+ ]
190
+
191
+ for diff_name, problems in problem_groups:
192
+ print(f"\n{'='*100}")
193
+ print(f"DIFFICULTY: {diff_name}")
194
+ print("=" * 100)
195
+
196
+ for problem in problems:
197
+ print(f"\n--- Problem: {problem.id} - {problem.name} ---")
198
+
199
+ for mode in modes:
200
+ print(f" Testing {mode}...", end=" ", flush=True)
201
+
202
+ result = run_test(problem, mode)
203
+ all_results.append(result)
204
+
205
+ status = "✅" if result["success"] else "❌"
206
+ time_str = f"{result['time_ms']:.0f}ms"
207
+ llm_str = f"LLM:{result['llm_calls']}"
208
+ gates_str = f"Gates:{result['gate_count']}"
209
+
210
+ extra = ""
211
+ if mode in ["quasar", "hybrid"]:
212
+ tiers = result.get("tiers_passed", [])
213
+ extra = f" Tiers:{tiers}"
214
+
215
+ print(f"{status} {time_str} {llm_str} {gates_str}{extra}")
216
+
217
+ if result["error"] and not result["success"]:
218
+ print(f" Error: {result['error'][:80]}...")
219
+
220
+ # Rate limiting
221
+ time.sleep(5)
222
+
223
+ # Summary
224
+ print("\n\n" + "=" * 100)
225
+ print("FINAL SUMMARY BY MODE")
226
+ print("=" * 100)
227
+
228
+ for mode in modes:
229
+ mode_results = [r for r in all_results if r["mode"] == mode]
230
+ successes = sum(1 for r in mode_results if r["success"])
231
+ total = len(mode_results)
232
+ total_time = sum(r["time_ms"] for r in mode_results)
233
+ total_llm = sum(r["llm_calls"] for r in mode_results)
234
+ avg_gates = sum(r["gate_count"] for r in mode_results if r["success"]) / max(successes, 1)
235
+
236
+ print(f"\n{mode.upper()}:")
237
+ print(f" Success: {successes}/{total} ({100*successes/total:.1f}%)")
238
+ print(f" Total Time: {total_time:.0f}ms ({total_time/total:.0f}ms avg)")
239
+ print(f" LLM Calls: {total_llm} ({total_llm/total:.1f} avg)")
240
+ print(f" Avg Gates (success): {avg_gates:.1f}")
241
+
242
+ # Per difficulty
243
+ for diff in ["easy", "medium", "hard", "very_hard"]:
244
+ diff_results = [r for r in mode_results if r["difficulty"] == diff]
245
+ if diff_results:
246
+ diff_success = sum(1 for r in diff_results if r["success"])
247
+ print(f" {diff}: {diff_success}/{len(diff_results)}")
248
+
249
+ # Efficiency comparison
250
+ print("\n" + "=" * 100)
251
+ print("EFFICIENCY COMPARISON (Success per LLM call)")
252
+ print("=" * 100)
253
+
254
+ for mode in modes:
255
+ mode_results = [r for r in all_results if r["mode"] == mode]
256
+ successes = sum(1 for r in mode_results if r["success"])
257
+ total_llm = sum(r["llm_calls"] for r in mode_results)
258
+ efficiency = successes / max(total_llm, 1)
259
+ print(f" {mode}: {efficiency:.3f} successes per LLM call")
260
+
261
+ # Winner determination
262
+ print("\n" + "=" * 100)
263
+ print("WINNER BY DIFFICULTY")
264
+ print("=" * 100)
265
+
266
+ for diff in ["easy", "medium", "hard", "very_hard"]:
267
+ print(f"\n{diff.upper()}:")
268
+ best_mode = None
269
+ best_success = -1
270
+ best_efficiency = -1
271
+
272
+ for mode in modes:
273
+ mode_results = [r for r in all_results if r["mode"] == mode and r["difficulty"] == diff]
274
+ if mode_results:
275
+ successes = sum(1 for r in mode_results if r["success"])
276
+ total_llm = sum(r["llm_calls"] for r in mode_results)
277
+ efficiency = successes / max(total_llm, 1)
278
+
279
+ if successes > best_success or (successes == best_success and efficiency > best_efficiency):
280
+ best_success = successes
281
+ best_efficiency = efficiency
282
+ best_mode = mode
283
+
284
+ if best_mode:
285
+ print(f" 🏆 Winner: {best_mode.upper()} ({best_success} successes)")
286
+
287
+ # Save results
288
+ output_path = Path(__file__).parent.parent / "research" / f"comprehensive_test_v2_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
289
+ output_path.parent.mkdir(parents=True, exist_ok=True)
290
+
291
+ with open(output_path, 'w') as f:
292
+ json.dump(all_results, f, indent=2)
293
+
294
+ print(f"\n\nResults saved to: {output_path}")
295
+ print("=" * 100)
296
+
297
+
298
+ if __name__ == "__main__":
299
+ main()
tests/evaluation_harness.py ADDED
@@ -0,0 +1,748 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/evaluation_harness.py
2
+ # Relations: Uses orchestrators, tools, database, config modules
3
+ # Uses agents/llm_adapter.py for LLM usage tracking
4
+ # Description: Evaluation harness for comparative testing of Blackboard, Guided, and Naked modes
5
+ # Includes cost tracking (requests, tokens, time) for each mode
6
+ # Exports results to CSV for research analysis
7
+ """
8
+ Evaluation Harness: Measure time, quality, effectiveness, reliability.
9
+ Runs comparative tests across Blackboard, Guided, and Naked modes.
10
+
11
+ COST TRACKING METRICS:
12
+ ======================
13
+ For each mode, tracks:
14
+ - LLM requests: Number of calls to LLM API
15
+ - Tokens used: Total tokens consumed (input + output)
16
+ - Time: Total execution time
17
+ - Quality: Circuit correctness and complexity scores
18
+
19
+ MODES:
20
+ ======
21
+ - Naked: Direct LLM (1 call/problem) - baseline test
22
+ - Guided: Structured workflow (4 LLM calls/problem)
23
+ - Blackboard: Free-form collaboration (8-12 LLM calls/problem)
24
+
25
+ OUTPUT FORMATS:
26
+ ===============
27
+ - TXT: Human-readable report
28
+ - CSV: Research data for longitudinal analysis
29
+ """
30
+
31
+ import time
32
+ import json
33
+ import csv
34
+ import statistics
35
+ from dataclasses import dataclass, field, asdict
36
+ from typing import Dict, List, Any, Optional
37
+ from datetime import datetime
38
+ from pathlib import Path
39
+ import logging
40
+
41
+ from .test_problems import TestProblem, ALL_PROBLEMS, get_problem
42
+ from database import get_database, ResultEntry
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ @dataclass
48
+ class MetricResult:
49
+ """Result for a single metric."""
50
+ name: str
51
+ value: float
52
+ unit: str
53
+ passed: bool = True
54
+ details: str = ""
55
+
56
+
57
+ @dataclass
58
+ class CostMetrics:
59
+ """Cost metrics for a single run."""
60
+ llm_requests: int = 0
61
+ mcp_requests: int = 0
62
+ tokens_used: int = 0
63
+ time_ms: float = 0.0
64
+ models_used: List[str] = field(default_factory=list)
65
+
66
+ def cost_per_quality(self, quality_score: float) -> float:
67
+ """Calculate cost-per-quality ratio (lower is better)."""
68
+ if quality_score <= 0:
69
+ return float('inf')
70
+ # Cost = (requests * 1) + (tokens / 1000) + (time_ms / 1000)
71
+ cost = self.llm_requests + (self.tokens_used / 1000) + (self.time_ms / 1000)
72
+ return cost / quality_score
73
+
74
+
75
+ @dataclass
76
+ class EvaluationResult:
77
+ """Result of evaluating a single run."""
78
+ problem_id: str
79
+ system_mode: str
80
+ run_number: int
81
+ success: bool
82
+ execution_time_ms: float
83
+ circuit_qasm: Optional[str]
84
+ metrics: Dict[str, MetricResult] = field(default_factory=dict)
85
+ cost_metrics: CostMetrics = field(default_factory=CostMetrics)
86
+ errors: List[str] = field(default_factory=list)
87
+ timestamp: datetime = field(default_factory=datetime.now)
88
+
89
+
90
+ @dataclass
91
+ class AggregatedResults:
92
+ """Aggregated results for a problem across all runs."""
93
+ problem_id: str
94
+ system_mode: str
95
+ num_runs: int
96
+ success_rate: float
97
+ avg_time_ms: float
98
+ std_time_ms: float
99
+ avg_quality_score: float
100
+ effectiveness: float
101
+ reliability: float
102
+ # Cost aggregates
103
+ total_llm_requests: int = 0
104
+ total_mcp_requests: int = 0
105
+ total_tokens: int = 0
106
+ avg_cost_per_quality: float = 0.0
107
+ all_results: List[EvaluationResult] = field(default_factory=list)
108
+
109
+
110
+ class EvaluationHarness:
111
+ """
112
+ Runs comparative evaluations across different orchestration modes.
113
+ Measures: Time, Quality, Effectiveness, Reliability, Cost
114
+ """
115
+
116
+ def __init__(self, num_runs: int = 5, timeout_seconds: float = 120.0):
117
+ self.num_runs = num_runs
118
+ self.timeout_seconds = timeout_seconds
119
+ self.db = get_database()
120
+ self.results: Dict[str, Dict[str, AggregatedResults]] = {}
121
+
122
+ # Track MCP requests per run
123
+ self._mcp_request_count = 0
124
+
125
+ def _reset_cost_tracking(self):
126
+ """Reset cost tracking before a run."""
127
+ try:
128
+ from config import reset_cost_tracking
129
+ reset_cost_tracking()
130
+ except Exception:
131
+ pass
132
+ self._mcp_request_count = 0
133
+
134
+ def _get_cost_summary(self) -> Dict:
135
+ """Get cost tracking summary after a run."""
136
+ try:
137
+ from config import get_cost_summary
138
+ return get_cost_summary()
139
+ except Exception:
140
+ return {"total_requests": 0, "total_tokens": 0, "total_time_ms": 0.0}
141
+
142
+ def _get_llm_usage_summary(self) -> Dict:
143
+ """Get LLM usage from rate limiter."""
144
+ try:
145
+ from agents.llm_adapter import get_usage_summary
146
+ return get_usage_summary()
147
+ except Exception:
148
+ return {}
149
+
150
+ def evaluate_single_run(self, problem: TestProblem, mode: str,
151
+ run_number: int) -> EvaluationResult:
152
+ """Run a single evaluation with cost tracking."""
153
+ from orchestrators import create_orchestrator
154
+ from tools import invoke_tool
155
+
156
+ logger.info(f"Running {mode} on {problem.id}, run {run_number}")
157
+
158
+ # Reset cost tracking
159
+ self._reset_cost_tracking()
160
+
161
+ errors = []
162
+ circuit_qasm = None
163
+ metrics = {}
164
+ success = False
165
+ cost_metrics = CostMetrics()
166
+
167
+ start_time = time.perf_counter()
168
+
169
+ try:
170
+ # Create and run orchestrator
171
+ orchestrator = create_orchestrator(mode)
172
+ result = orchestrator.run(problem.goal)
173
+
174
+ circuit_qasm = result.final_output
175
+
176
+ # Handle list responses from MCP
177
+ if isinstance(circuit_qasm, list):
178
+ circuit_qasm = circuit_qasm[0] if circuit_qasm else None
179
+
180
+ # Ensure it's a string or None
181
+ if circuit_qasm is not None:
182
+ circuit_qasm = str(circuit_qasm) if not isinstance(circuit_qasm, str) else circuit_qasm
183
+
184
+ success = result.success and circuit_qasm is not None
185
+
186
+ if not success:
187
+ errors.extend(result.errors)
188
+
189
+ except Exception as e:
190
+ success = False
191
+ errors.append(str(e))
192
+ logger.error(f"Evaluation failed: {e}")
193
+
194
+ elapsed_ms = (time.perf_counter() - start_time) * 1000
195
+
196
+ # Collect cost metrics
197
+ cost_summary = self._get_cost_summary()
198
+ llm_usage = self._get_llm_usage_summary()
199
+
200
+ cost_metrics = CostMetrics(
201
+ llm_requests=cost_summary.get("total_requests", 0),
202
+ mcp_requests=self._mcp_request_count,
203
+ tokens_used=cost_summary.get("total_tokens", 0),
204
+ time_ms=elapsed_ms,
205
+ models_used=list(cost_summary.get("model_breakdown", {}).keys())
206
+ )
207
+
208
+ # Calculate metrics if we have a circuit
209
+ if circuit_qasm:
210
+ metrics = self._calculate_metrics(circuit_qasm, problem)
211
+
212
+ return EvaluationResult(
213
+ problem_id=problem.id,
214
+ system_mode=mode,
215
+ run_number=run_number,
216
+ success=success,
217
+ execution_time_ms=elapsed_ms,
218
+ circuit_qasm=circuit_qasm,
219
+ metrics=metrics,
220
+ cost_metrics=cost_metrics,
221
+ errors=errors
222
+ )
223
+
224
+ def _calculate_metrics(self, qasm: str, problem: TestProblem) -> Dict[str, MetricResult]:
225
+ """Calculate quality metrics for a circuit."""
226
+ from tools import invoke_tool
227
+
228
+ metrics = {}
229
+
230
+ try:
231
+ # Helper to extract value from potentially nested result
232
+ def extract_value(result, key, default=0):
233
+ val = result.get(key, default)
234
+ if isinstance(val, dict):
235
+ return val.get('depth', val.get('value', val.get('score', default)))
236
+ elif isinstance(val, list):
237
+ return val[0] if val else default
238
+ return val
239
+
240
+ # 1. Depth metric
241
+ self._mcp_request_count += 1
242
+ depth_result = invoke_tool("get_circuit_depth", qasm=qasm)
243
+ if depth_result.get("success"):
244
+ depth = extract_value(depth_result, "depth", 0)
245
+ if isinstance(depth, dict):
246
+ depth = depth.get('depth', 0)
247
+ max_depth = problem.expected.max_depth or 100
248
+ passed = depth <= max_depth if max_depth else True
249
+ metrics["depth"] = MetricResult(
250
+ name="Circuit Depth",
251
+ value=float(depth) if depth else 0,
252
+ unit="layers",
253
+ passed=passed,
254
+ details=f"Expected max: {max_depth}"
255
+ )
256
+
257
+ # 2. Complexity score
258
+ self._mcp_request_count += 1
259
+ complexity_result = invoke_tool("calculate_complexity", qasm=qasm)
260
+ if complexity_result.get("success"):
261
+ score = complexity_result.get("score", {})
262
+ if isinstance(score, dict):
263
+ complexity_value = score.get("complexity_score", score.get("total", 0))
264
+ elif isinstance(score, list):
265
+ complexity_value = 0
266
+ else:
267
+ complexity_value = float(score) if score else 0
268
+ metrics["complexity"] = MetricResult(
269
+ name="Complexity Score",
270
+ value=float(complexity_value) if complexity_value else 0,
271
+ unit="score",
272
+ passed=True
273
+ )
274
+
275
+ # 3. Hardware fitness
276
+ self._mcp_request_count += 1
277
+ fitness_result = invoke_tool("calculate_hardware_fitness", qasm=qasm)
278
+ if fitness_result.get("success"):
279
+ score = fitness_result.get("score", {})
280
+ if isinstance(score, dict):
281
+ fitness_value = score.get("fitness_score", score.get("fitness", 0))
282
+ elif isinstance(score, list):
283
+ fitness_value = 0
284
+ else:
285
+ fitness_value = float(score) if score else 0
286
+ metrics["hardware_fitness"] = MetricResult(
287
+ name="Hardware Fitness",
288
+ value=float(fitness_value) if fitness_value else 0,
289
+ unit="score",
290
+ passed=fitness_value > 0.5 if fitness_value else False
291
+ )
292
+
293
+ # 4. Validation
294
+ self._mcp_request_count += 1
295
+ validation_result = invoke_tool("validate_syntax", qasm=qasm)
296
+ valid_data = validation_result.get("valid", False)
297
+ # Handle list or complex response
298
+ if isinstance(valid_data, list):
299
+ valid = "valid" in str(valid_data).lower() or "✅" in str(valid_data)
300
+ elif isinstance(valid_data, dict):
301
+ valid = valid_data.get("valid", False)
302
+ else:
303
+ valid = bool(valid_data) and validation_result.get("success", False)
304
+ metrics["syntax_valid"] = MetricResult(
305
+ name="Syntax Validation",
306
+ value=1.0 if valid else 0.0,
307
+ unit="boolean",
308
+ passed=valid
309
+ )
310
+
311
+ # 5. Simulation correctness (if expected states defined)
312
+ if problem.expected.expected_states:
313
+ self._mcp_request_count += 1
314
+ prob_result = invoke_tool("get_probabilities", qasm=qasm)
315
+ if prob_result.get("success"):
316
+ probs = prob_result.get("probabilities", {})
317
+ if isinstance(probs, dict):
318
+ correctness = self._check_state_correctness(probs, problem.expected.expected_states)
319
+ else:
320
+ correctness = 0.5 # Default if can't parse
321
+ metrics["state_correctness"] = MetricResult(
322
+ name="State Correctness",
323
+ value=correctness,
324
+ unit="ratio",
325
+ passed=correctness > 0.9
326
+ )
327
+
328
+ except Exception as e:
329
+ logger.error(f"Metric calculation failed: {e}")
330
+
331
+ return metrics
332
+
333
+ def _check_state_correctness(self, actual: Dict[str, float],
334
+ expected: Dict[str, float]) -> float:
335
+ """Check how close actual probabilities are to expected."""
336
+ if not expected:
337
+ return 1.0
338
+
339
+ total_error = 0.0
340
+ for state, expected_prob in expected.items():
341
+ actual_prob = actual.get(state, 0.0)
342
+ total_error += abs(expected_prob - actual_prob)
343
+
344
+ # Normalize to 0-1 range (0 = perfect, 1 = worst)
345
+ max_error = 2.0 # Maximum possible error
346
+ correctness = 1.0 - (total_error / max_error)
347
+ return max(0.0, correctness)
348
+
349
+ def aggregate_results(self, results: List[EvaluationResult]) -> AggregatedResults:
350
+ """Aggregate multiple run results with cost metrics."""
351
+ if not results:
352
+ return AggregatedResults(
353
+ problem_id="",
354
+ system_mode="",
355
+ num_runs=0,
356
+ success_rate=0.0,
357
+ avg_time_ms=0.0,
358
+ std_time_ms=0.0,
359
+ avg_quality_score=0.0,
360
+ effectiveness=0.0,
361
+ reliability=0.0
362
+ )
363
+
364
+ problem_id = results[0].problem_id
365
+ system_mode = results[0].system_mode
366
+ num_runs = len(results)
367
+
368
+ # Success rate
369
+ successes = sum(1 for r in results if r.success)
370
+ success_rate = successes / num_runs
371
+
372
+ # Time statistics
373
+ times = [r.execution_time_ms for r in results]
374
+ avg_time = statistics.mean(times)
375
+ std_time = statistics.stdev(times) if len(times) > 1 else 0.0
376
+
377
+ # Cost aggregates
378
+ total_llm = sum(r.cost_metrics.llm_requests for r in results)
379
+ total_mcp = sum(r.cost_metrics.mcp_requests for r in results)
380
+ total_tokens = sum(r.cost_metrics.tokens_used for r in results)
381
+
382
+ # Quality score (average of metric scores for successful runs)
383
+ quality_scores = []
384
+ cost_per_quality_scores = []
385
+ for r in results:
386
+ if r.success and r.metrics:
387
+ # Combine relevant metrics
388
+ scores = []
389
+ if "complexity" in r.metrics:
390
+ # Invert complexity (lower is better)
391
+ scores.append(1.0 - min(r.metrics["complexity"].value / 100, 1.0))
392
+ if "hardware_fitness" in r.metrics:
393
+ scores.append(r.metrics["hardware_fitness"].value)
394
+ if "state_correctness" in r.metrics:
395
+ scores.append(r.metrics["state_correctness"].value)
396
+ if scores:
397
+ q_score = statistics.mean(scores)
398
+ quality_scores.append(q_score)
399
+ cost_per_quality_scores.append(r.cost_metrics.cost_per_quality(q_score))
400
+
401
+ avg_quality = statistics.mean(quality_scores) if quality_scores else 0.0
402
+ avg_cpq = statistics.mean(cost_per_quality_scores) if cost_per_quality_scores else float('inf')
403
+
404
+ # Effectiveness: Did we achieve the goal?
405
+ effective_runs = sum(
406
+ 1 for r in results
407
+ if r.success and r.metrics.get("state_correctness", MetricResult("", 0, "")).value > 0.8
408
+ )
409
+ effectiveness = effective_runs / num_runs if num_runs > 0 else 0.0
410
+
411
+ # Reliability: Consistency of results (based on variance of success and quality)
412
+ reliability = success_rate * (1.0 - std_time / max(avg_time, 1.0))
413
+ reliability = max(0.0, min(1.0, reliability))
414
+
415
+ return AggregatedResults(
416
+ problem_id=problem_id,
417
+ system_mode=system_mode,
418
+ num_runs=num_runs,
419
+ success_rate=success_rate,
420
+ avg_time_ms=avg_time,
421
+ std_time_ms=std_time,
422
+ avg_quality_score=avg_quality,
423
+ effectiveness=effectiveness,
424
+ reliability=reliability,
425
+ total_llm_requests=total_llm,
426
+ total_mcp_requests=total_mcp,
427
+ total_tokens=total_tokens,
428
+ avg_cost_per_quality=avg_cpq,
429
+ all_results=results
430
+ )
431
+
432
+ def evaluate_problem(self, problem: TestProblem,
433
+ modes: List[str] = None) -> Dict[str, AggregatedResults]:
434
+ """Evaluate a problem across all modes."""
435
+ if modes is None:
436
+ modes = ["blackboard", "guided", "naked"]
437
+
438
+ results_by_mode = {}
439
+
440
+ for mode in modes:
441
+ run_results = []
442
+
443
+ for run_num in range(1, self.num_runs + 1):
444
+ result = self.evaluate_single_run(problem, mode, run_num)
445
+ run_results.append(result)
446
+
447
+ # Store in database
448
+ self.db.store_result(ResultEntry(
449
+ run_id=f"{problem.id}_{mode}_{run_num}",
450
+ system_mode=mode,
451
+ problem_id=problem.id,
452
+ success=result.success,
453
+ execution_time_ms=result.execution_time_ms,
454
+ circuit_qasm=result.circuit_qasm,
455
+ metrics={k: asdict(v) for k, v in result.metrics.items()}
456
+ ))
457
+
458
+ aggregated = self.aggregate_results(run_results)
459
+ results_by_mode[mode] = aggregated
460
+
461
+ return results_by_mode
462
+
463
+ def evaluate_all(self, problems: List[TestProblem] = None,
464
+ modes: List[str] = None) -> Dict[str, Dict[str, AggregatedResults]]:
465
+ """Evaluate all problems across all modes."""
466
+ if problems is None:
467
+ problems = ALL_PROBLEMS
468
+ if modes is None:
469
+ modes = ["blackboard", "guided", "naked"]
470
+
471
+ all_results = {}
472
+
473
+ for problem in problems:
474
+ logger.info(f"Evaluating problem: {problem.name}")
475
+ all_results[problem.id] = self.evaluate_problem(problem, modes)
476
+
477
+ self.results = all_results
478
+ return all_results
479
+
480
+ def generate_report(self, output_path: Optional[Path] = None) -> str:
481
+ """Generate a comparison report with cost analysis."""
482
+ if not self.results:
483
+ return "No results to report. Run evaluate_all() first."
484
+
485
+ lines = [
486
+ "=" * 100,
487
+ "QUANTUM AGENT SYSTEM COMPARATIVE EVALUATION REPORT",
488
+ f"Generated: {datetime.now().isoformat()}",
489
+ f"Number of runs per problem: {self.num_runs}",
490
+ "=" * 100,
491
+ ""
492
+ ]
493
+
494
+ # Summary table with cost metrics
495
+ lines.append("SUMMARY BY MODE (with Cost Analysis)")
496
+ lines.append("-" * 100)
497
+ lines.append(f"{'Mode':<12} {'Success%':>9} {'Time(ms)':>10} {'Quality':>8} {'LLM Req':>8} {'Tokens':>10} {'Cost/Qual':>10}")
498
+ lines.append("-" * 100)
499
+
500
+ mode_totals = {
501
+ mode: {
502
+ "success": 0, "total": 0, "times": [], "quality": [],
503
+ "llm_req": 0, "mcp_req": 0, "tokens": 0, "cpq": []
504
+ }
505
+ for mode in ["blackboard", "guided", "naked"]
506
+ }
507
+
508
+ for problem_id, mode_results in self.results.items():
509
+ for mode, agg in mode_results.items():
510
+ mode_totals[mode]["success"] += agg.success_rate * agg.num_runs
511
+ mode_totals[mode]["total"] += agg.num_runs
512
+ mode_totals[mode]["times"].append(agg.avg_time_ms)
513
+ mode_totals[mode]["quality"].append(agg.avg_quality_score)
514
+ mode_totals[mode]["llm_req"] += agg.total_llm_requests
515
+ mode_totals[mode]["mcp_req"] += agg.total_mcp_requests
516
+ mode_totals[mode]["tokens"] += agg.total_tokens
517
+ if agg.avg_cost_per_quality != float('inf'):
518
+ mode_totals[mode]["cpq"].append(agg.avg_cost_per_quality)
519
+
520
+ for mode, totals in mode_totals.items():
521
+ if totals["total"] > 0:
522
+ success_pct = (totals["success"] / totals["total"]) * 100
523
+ avg_time = statistics.mean(totals["times"]) if totals["times"] else 0
524
+ avg_quality = statistics.mean(totals["quality"]) if totals["quality"] else 0
525
+ avg_cpq = statistics.mean(totals["cpq"]) if totals["cpq"] else float('inf')
526
+ cpq_str = f"{avg_cpq:.2f}" if avg_cpq != float('inf') else "N/A"
527
+
528
+ lines.append(
529
+ f"{mode:<12} {success_pct:>8.1f}% {avg_time:>9.0f} {avg_quality:>8.2f} "
530
+ f"{totals['llm_req']:>8} {totals['tokens']:>10} {cpq_str:>10}"
531
+ )
532
+
533
+ lines.append("")
534
+ lines.append("")
535
+
536
+ # Cost efficiency analysis
537
+ lines.append("COST EFFICIENCY ANALYSIS")
538
+ lines.append("-" * 60)
539
+ lines.append("")
540
+ lines.append("Expected LLM Requests per problem:")
541
+ lines.append(" - Naked: 1 (single direct LLM call)")
542
+ lines.append(" - Guided: 4 (one per agent: Architect, Builder, Validator, Scorer)")
543
+ lines.append(" - Blackboard: 8-12 (multiple collaborative rounds)")
544
+ lines.append("")
545
+ lines.append("Cost-per-Quality interpretation:")
546
+ lines.append(" - Lower is better (less resources for same quality)")
547
+ lines.append(" - Naked has lowest cost but tests raw LLM capability")
548
+ lines.append(" - Blackboard has highest cost but best quality potential")
549
+ lines.append("")
550
+
551
+ # Detailed results per problem
552
+ lines.append("DETAILED RESULTS BY PROBLEM")
553
+ lines.append("-" * 100)
554
+
555
+ for problem_id, mode_results in self.results.items():
556
+ problem = get_problem(problem_id)
557
+ problem_name = problem.name if problem else problem_id
558
+
559
+ lines.append(f"\n{problem_name} ({problem_id})")
560
+ lines.append("-" * 50)
561
+ lines.append(f"{'Mode':<12} {'Success':>8} {'Time(ms)':>10} {'Quality':>8} {'LLM':>6} {'Tokens':>8}")
562
+
563
+ for mode, agg in mode_results.items():
564
+ lines.append(
565
+ f"{mode:<12} "
566
+ f"{agg.success_rate*100:>7.0f}% "
567
+ f"{agg.avg_time_ms:>9.0f} "
568
+ f"{agg.avg_quality_score:>8.2f} "
569
+ f"{agg.total_llm_requests:>6} "
570
+ f"{agg.total_tokens:>8}"
571
+ )
572
+
573
+ lines.append("")
574
+ lines.append("=" * 100)
575
+ lines.append("END OF REPORT")
576
+
577
+ report = "\n".join(lines)
578
+
579
+ if output_path:
580
+ output_path.write_text(report)
581
+ logger.info(f"Report saved to: {output_path}")
582
+
583
+ return report
584
+
585
+ def export_csv(self, output_path: Optional[Path] = None) -> str:
586
+ """
587
+ Export results to CSV for research analysis.
588
+
589
+ CSV Columns:
590
+ - timestamp: When the evaluation was run
591
+ - problem_id: Unique problem identifier
592
+ - problem_name: Human-readable problem name
593
+ - difficulty: Problem difficulty (easy, medium, hard)
594
+ - mode: Execution mode (naked, guided, blackboard)
595
+ - run_number: Run iteration (1 to num_runs)
596
+ - success: Whether the run succeeded (True/False)
597
+ - time_ms: Execution time in milliseconds
598
+ - llm_requests: Number of LLM API calls
599
+ - tokens_used: Total tokens consumed
600
+ - mcp_requests: Number of MCP tool calls
601
+ - quality_score: Combined quality score (0-1)
602
+ - depth: Circuit depth
603
+ - complexity: Circuit complexity score
604
+ - hardware_fitness: Hardware compatibility score
605
+ - syntax_valid: Whether QASM syntax is valid
606
+ - state_correctness: Probability distribution correctness
607
+ - cost_per_quality: Cost efficiency ratio
608
+ - model_used: Primary LLM model used
609
+ - qasm_length: Length of generated QASM code
610
+ """
611
+ if not self.results:
612
+ return "No results to export. Run evaluate_all() first."
613
+
614
+ timestamp = datetime.now().isoformat()
615
+
616
+ # Default output path
617
+ if output_path is None:
618
+ output_dir = Path(__file__).parent.parent / "research"
619
+ output_dir.mkdir(exist_ok=True)
620
+ output_path = output_dir / f"evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
621
+
622
+ # CSV header
623
+ fieldnames = [
624
+ 'timestamp', 'problem_id', 'problem_name', 'difficulty',
625
+ 'mode', 'run_number', 'success', 'time_ms',
626
+ 'llm_requests', 'tokens_used', 'mcp_requests',
627
+ 'quality_score', 'depth', 'complexity', 'hardware_fitness',
628
+ 'syntax_valid', 'state_correctness', 'cost_per_quality',
629
+ 'model_used', 'qasm_length', 'errors'
630
+ ]
631
+
632
+ rows = []
633
+
634
+ for problem_id, mode_results in self.results.items():
635
+ problem = get_problem(problem_id)
636
+ problem_name = problem.name if problem else problem_id
637
+ difficulty = problem.difficulty if problem else "unknown"
638
+
639
+ for mode, agg in mode_results.items():
640
+ for result in agg.all_results:
641
+ # Extract metric values safely
642
+ def get_metric(name, default=0.0):
643
+ if name in result.metrics:
644
+ return result.metrics[name].value
645
+ return default
646
+
647
+ # Calculate quality score
648
+ quality_components = []
649
+ if "complexity" in result.metrics:
650
+ quality_components.append(1.0 - min(get_metric("complexity") / 100, 1.0))
651
+ if "hardware_fitness" in result.metrics:
652
+ quality_components.append(get_metric("hardware_fitness"))
653
+ if "state_correctness" in result.metrics:
654
+ quality_components.append(get_metric("state_correctness"))
655
+ quality_score = statistics.mean(quality_components) if quality_components else 0.0
656
+
657
+ # Cost per quality
658
+ cpq = result.cost_metrics.cost_per_quality(quality_score) if quality_score > 0 else float('inf')
659
+ cpq_str = f"{cpq:.4f}" if cpq != float('inf') else "inf"
660
+
661
+ # Model used
662
+ models = result.cost_metrics.models_used
663
+ model_used = models[0] if models else "unknown"
664
+
665
+ # QASM length
666
+ qasm_len = len(result.circuit_qasm) if result.circuit_qasm else 0
667
+
668
+ row = {
669
+ 'timestamp': timestamp,
670
+ 'problem_id': problem_id,
671
+ 'problem_name': problem_name,
672
+ 'difficulty': difficulty,
673
+ 'mode': mode,
674
+ 'run_number': result.run_number,
675
+ 'success': result.success,
676
+ 'time_ms': f"{result.execution_time_ms:.2f}",
677
+ 'llm_requests': result.cost_metrics.llm_requests,
678
+ 'tokens_used': result.cost_metrics.tokens_used,
679
+ 'mcp_requests': result.cost_metrics.mcp_requests,
680
+ 'quality_score': f"{quality_score:.4f}",
681
+ 'depth': get_metric("depth"),
682
+ 'complexity': f"{get_metric('complexity'):.2f}",
683
+ 'hardware_fitness': f"{get_metric('hardware_fitness'):.4f}",
684
+ 'syntax_valid': get_metric("syntax_valid") == 1.0,
685
+ 'state_correctness': f"{get_metric('state_correctness'):.4f}",
686
+ 'cost_per_quality': cpq_str,
687
+ 'model_used': model_used,
688
+ 'qasm_length': qasm_len,
689
+ 'errors': "; ".join(result.errors) if result.errors else ""
690
+ }
691
+ rows.append(row)
692
+
693
+ # Write CSV
694
+ with open(output_path, 'w', newline='', encoding='utf-8') as f:
695
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
696
+ writer.writeheader()
697
+ writer.writerows(rows)
698
+
699
+ logger.info(f"CSV exported to: {output_path}")
700
+ return str(output_path)
701
+
702
+ def get_summary_stats(self) -> Dict[str, Any]:
703
+ """
704
+ Get summary statistics for the evaluation run.
705
+ Useful for programmatic access to results.
706
+ """
707
+ if not self.results:
708
+ return {}
709
+
710
+ stats = {
711
+ 'timestamp': datetime.now().isoformat(),
712
+ 'num_problems': len(self.results),
713
+ 'runs_per_problem': self.num_runs,
714
+ 'modes': {}
715
+ }
716
+
717
+ for mode in ['naked', 'guided', 'blackboard']:
718
+ mode_stats = {
719
+ 'success_rate': 0.0,
720
+ 'avg_time_ms': 0.0,
721
+ 'total_llm_requests': 0,
722
+ 'total_tokens': 0,
723
+ 'avg_quality': 0.0
724
+ }
725
+
726
+ times = []
727
+ qualities = []
728
+ total_runs = 0
729
+ successes = 0
730
+
731
+ for problem_id, mode_results in self.results.items():
732
+ if mode in mode_results:
733
+ agg = mode_results[mode]
734
+ total_runs += agg.num_runs
735
+ successes += agg.success_rate * agg.num_runs
736
+ times.append(agg.avg_time_ms)
737
+ qualities.append(agg.avg_quality_score)
738
+ mode_stats['total_llm_requests'] += agg.total_llm_requests
739
+ mode_stats['total_tokens'] += agg.total_tokens
740
+
741
+ if total_runs > 0:
742
+ mode_stats['success_rate'] = successes / total_runs
743
+ mode_stats['avg_time_ms'] = statistics.mean(times) if times else 0
744
+ mode_stats['avg_quality'] = statistics.mean(qualities) if qualities else 0
745
+
746
+ stats['modes'][mode] = mode_stats
747
+
748
+ return stats
tests/evaluation_report.txt ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====================================================================================================
2
+ QUANTUM AGENT SYSTEM COMPARATIVE EVALUATION REPORT
3
+ Generated: 2025-11-28T18:38:30.068424
4
+ Number of runs per problem: 1
5
+ ====================================================================================================
6
+
7
+ SUMMARY BY MODE (with Cost Analysis)
8
+ ----------------------------------------------------------------------------------------------------
9
+ Mode Success% Time(ms) Quality LLM Req Tokens Cost/Qual
10
+ ----------------------------------------------------------------------------------------------------
11
+ blackboard 66.7% 14612 0.00 5 2709 N/A
12
+ guided 100.0% 23975 0.00 8 4481 N/A
13
+ naked 100.0% 5251 0.00 3 901 N/A
14
+
15
+
16
+ COST EFFICIENCY ANALYSIS
17
+ ------------------------------------------------------------
18
+
19
+ Expected LLM Requests per problem:
20
+ - Naked: 1 (single direct LLM call)
21
+ - Guided: 4 (one per agent: Architect, Builder, Validator, Scorer)
22
+ - Blackboard: 8-12 (multiple collaborative rounds)
23
+
24
+ Cost-per-Quality interpretation:
25
+ - Lower is better (less resources for same quality)
26
+ - Naked has lowest cost but tests raw LLM capability
27
+ - Blackboard has highest cost but best quality potential
28
+
29
+ DETAILED RESULTS BY PROBLEM
30
+ ----------------------------------------------------------------------------------------------------
31
+
32
+ Phase Flip State (easy_001)
33
+ --------------------------------------------------
34
+ Mode Success Time(ms) Quality LLM Tokens
35
+ blackboard 100% 11292 0.00 2 955
36
+ guided 100% 31284 0.00 4 2177
37
+ naked 100% 6894 0.00 1 293
38
+
39
+ Entanglement Generation (easy_002)
40
+ --------------------------------------------------
41
+ Mode Success Time(ms) Quality LLM Tokens
42
+ blackboard 0% 16832 0.00 1 529
43
+ guided 100% 20431 0.00 2 1046
44
+ naked 100% 1929 0.00 1 305
45
+
46
+ X-Basis Measurement Prep (easy_003)
47
+ --------------------------------------------------
48
+ Mode Success Time(ms) Quality LLM Tokens
49
+ blackboard 100% 15713 0.00 2 1225
50
+ guided 100% 20209 0.00 2 1258
51
+ naked 100% 6930 0.00 1 303
52
+
53
+ ====================================================================================================
54
+ END OF REPORT
tests/fast_eval.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/fast_eval.py
2
+ # Fast evaluation - one problem per difficulty, all modes
3
+ """Fast mode evaluation."""
4
+
5
+ import sys
6
+ import os
7
+ import time
8
+ import json
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
13
+
14
+ api_key = "$env:GOOGLE_API_KEY"
15
+ os.environ['GOOGLE_API_KEY'] = api_key
16
+
17
+ from tests.test_problems import (
18
+ PROBLEM_E1_PHASE_FLIP,
19
+ PROBLEM_M1_SWAP_DECOMPOSITION,
20
+ PROBLEM_H1_DEUTSCH,
21
+ PROBLEM_VH4_BERNSTEIN_VAZIRANI
22
+ )
23
+ from orchestrators import create_orchestrator
24
+ from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
25
+ from config import set_api_key
26
+ import re
27
+
28
+ set_api_key(api_key)
29
+
30
+
31
+ def extract_gates(qasm):
32
+ if not qasm:
33
+ return 0
34
+ gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
35
+ return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
36
+
37
+
38
+ def test_problem(problem, mode, timeout=60):
39
+ start = time.perf_counter()
40
+
41
+ try:
42
+ if mode == "quasar":
43
+ orch = QuasarOrchestrator(max_iterations=3)
44
+ result = orch.run(problem.prompt, problem.expected.min_qubits)
45
+ return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000,
46
+ "llm": result.llm_calls, "gates": extract_gates(result.final_qasm), "error": None}
47
+
48
+ elif mode == "hybrid":
49
+ orch = HybridOrchestrator()
50
+ result = orch.run(problem.prompt, problem.expected.min_qubits)
51
+ return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000,
52
+ "llm": result.llm_calls, "gates": extract_gates(result.final_qasm), "error": None}
53
+
54
+ else:
55
+ orch = create_orchestrator(mode)
56
+ result = orch.run(problem.prompt)
57
+ llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
58
+ return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000,
59
+ "llm": llm, "gates": extract_gates(result.final_output), "error": "; ".join(result.errors) if result.errors else None}
60
+
61
+ except Exception as e:
62
+ return {"success": False, "time_ms": (time.perf_counter()-start)*1000,
63
+ "llm": 0, "gates": 0, "error": str(e)[:60]}
64
+
65
+
66
+ print("=" * 70)
67
+ print("FAST MODE EVALUATION")
68
+ print("=" * 70)
69
+ print(f"Date: {datetime.now().isoformat()}")
70
+
71
+ problems = [
72
+ ("EASY", PROBLEM_E1_PHASE_FLIP),
73
+ ("MEDIUM", PROBLEM_M1_SWAP_DECOMPOSITION),
74
+ ("HARD", PROBLEM_H1_DEUTSCH),
75
+ ("VERY_HARD", PROBLEM_VH4_BERNSTEIN_VAZIRANI)
76
+ ]
77
+
78
+ modes = ["naked", "quasar", "hybrid", "blackboard"]
79
+ all_results = {}
80
+
81
+ for diff, problem in problems:
82
+ print(f"\n{diff}: {problem.name}")
83
+ print("-" * 50)
84
+ all_results[diff] = {}
85
+
86
+ for mode in modes:
87
+ print(f" {mode:12}", end=" ", flush=True)
88
+ result = test_problem(problem, mode)
89
+ all_results[diff][mode] = result
90
+
91
+ status = "✅" if result["success"] else "❌"
92
+ print(f"{status} {result['time_ms']:5.0f}ms LLM:{result['llm']} Gates:{result['gates']}")
93
+
94
+ if result["error"]:
95
+ print(f" ⚠️ {result['error'][:40]}...")
96
+
97
+ time.sleep(5)
98
+
99
+ # Summary
100
+ print("\n" + "=" * 70)
101
+ print("SUMMARY")
102
+ print("=" * 70)
103
+
104
+ for mode in modes:
105
+ successes = sum(1 for diff in all_results if all_results[diff][mode]["success"])
106
+ total_time = sum(all_results[diff][mode]["time_ms"] for diff in all_results)
107
+ total_llm = sum(all_results[diff][mode]["llm"] for diff in all_results)
108
+ print(f"\n{mode.upper():12} {successes}/4 ({25*successes}%) | {total_time:.0f}ms | {total_llm} LLM calls")
109
+ for diff in all_results:
110
+ r = all_results[diff][mode]
111
+ status = "✅" if r["success"] else "❌"
112
+ print(f" {diff:10} {status}")
113
+
114
+ print("\n" + "=" * 70)
115
+ print("DONE")
tests/final_eval.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/final_eval.py
2
+ # Final evaluation - NAKED vs BLACKBOARD on all difficulties
3
+ """Final mode evaluation: NAKED vs fixed BLACKBOARD."""
4
+
5
+ import sys
6
+ import os
7
+ import time
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+
11
+ sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
12
+
13
+ api_key = "$env:GOOGLE_API_KEY"
14
+ os.environ['GOOGLE_API_KEY'] = api_key
15
+
16
+ from tests.test_problems import ALL_PROBLEMS
17
+ from orchestrators import create_orchestrator
18
+ from config import set_api_key
19
+ import re
20
+
21
+ set_api_key(api_key)
22
+
23
+
24
+ def extract_gates(qasm):
25
+ if not qasm:
26
+ return 0
27
+ gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
28
+ return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
29
+
30
+
31
+ def test_problem(problem, mode):
32
+ start = time.perf_counter()
33
+
34
+ try:
35
+ orch = create_orchestrator(mode)
36
+ result = orch.run(problem.prompt)
37
+
38
+ llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
39
+
40
+ return {
41
+ "success": result.success,
42
+ "time_ms": (time.perf_counter()-start)*1000,
43
+ "llm": llm,
44
+ "gates": extract_gates(result.final_output),
45
+ "error": "; ".join(result.errors[:2]) if result.errors else None
46
+ }
47
+
48
+ except Exception as e:
49
+ return {
50
+ "success": False,
51
+ "time_ms": (time.perf_counter()-start)*1000,
52
+ "llm": 0,
53
+ "gates": 0,
54
+ "error": str(e)[:60]
55
+ }
56
+
57
+
58
+ print("=" * 80)
59
+ print("FINAL MODE EVALUATION: NAKED vs BLACKBOARD")
60
+ print("=" * 80)
61
+ print(f"Date: {datetime.now().isoformat()}")
62
+ print(f"Problems: {len(ALL_PROBLEMS)}")
63
+ print()
64
+
65
+ modes = ["naked", "blackboard"]
66
+ results_by_difficulty = {"easy": {}, "medium": {}, "hard": {}, "very_hard": {}}
67
+
68
+ for problem in ALL_PROBLEMS:
69
+ diff = problem.difficulty.value
70
+ print(f"\n{diff.upper()}: {problem.name}")
71
+
72
+ if diff not in results_by_difficulty:
73
+ results_by_difficulty[diff] = {}
74
+
75
+ for mode in modes:
76
+ print(f" {mode:12}", end=" ", flush=True)
77
+ result = test_problem(problem, mode)
78
+
79
+ if mode not in results_by_difficulty[diff]:
80
+ results_by_difficulty[diff][mode] = []
81
+ results_by_difficulty[diff][mode].append(result)
82
+
83
+ status = "✅" if result["success"] else "❌"
84
+ print(f"{status} {result['time_ms']:5.0f}ms LLM:{result['llm']} Gates:{result['gates']}")
85
+
86
+ if result["error"] and not result["success"]:
87
+ print(f" ⚠️ {result['error'][:50]}...")
88
+
89
+ time.sleep(4)
90
+
91
+ # Summary
92
+ print("\n\n" + "=" * 80)
93
+ print("FINAL SUMMARY")
94
+ print("=" * 80)
95
+
96
+ for mode in modes:
97
+ print(f"\n{mode.upper()}")
98
+ print("-" * 40)
99
+
100
+ total_success = 0
101
+ total_problems = 0
102
+ total_time = 0
103
+ total_llm = 0
104
+
105
+ for diff in ["easy", "medium", "hard", "very_hard"]:
106
+ if diff in results_by_difficulty and mode in results_by_difficulty[diff]:
107
+ results = results_by_difficulty[diff][mode]
108
+ successes = sum(1 for r in results if r["success"])
109
+ total_success += successes
110
+ total_problems += len(results)
111
+ total_time += sum(r["time_ms"] for r in results)
112
+ total_llm += sum(r["llm"] for r in results)
113
+
114
+ print(f" {diff:10}: {successes}/{len(results)}")
115
+
116
+ print(f"\n TOTAL: {total_success}/{total_problems} ({100*total_success/total_problems:.0f}%)")
117
+ print(f" Time: {total_time:.0f}ms total ({total_time/total_problems:.0f}ms avg)")
118
+ print(f" LLM calls: {total_llm}")
119
+
120
+ print("\n" + "=" * 80)
121
+ print("WINNER DETERMINATION")
122
+ print("=" * 80)
123
+
124
+ for diff in ["easy", "medium", "hard", "very_hard"]:
125
+ if diff not in results_by_difficulty:
126
+ continue
127
+
128
+ print(f"\n{diff.upper()}:")
129
+ for mode in modes:
130
+ if mode in results_by_difficulty[diff]:
131
+ results = results_by_difficulty[diff][mode]
132
+ successes = sum(1 for r in results if r["success"])
133
+ avg_time = sum(r["time_ms"] for r in results) / len(results)
134
+ print(f" {mode}: {successes}/{len(results)} ({avg_time:.0f}ms avg)")
135
+
136
+ print("\n" + "=" * 80)
137
+ print("DONE")
tests/full_comparison.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/full_comparison.py
2
+ # Full comparison test across all modes and difficulties
3
+ """Full mode comparison test."""
4
+
5
+ import sys
6
+ import os
7
+ import time
8
+ import json
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
13
+
14
+ api_key = "$env:GOOGLE_API_KEY"
15
+ os.environ['GOOGLE_API_KEY'] = api_key
16
+
17
+ from tests.test_problems import ALL_PROBLEMS, ProblemDifficulty
18
+ from orchestrators import create_orchestrator
19
+ from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
20
+ from config import set_api_key
21
+ import re
22
+
23
+ set_api_key(api_key)
24
+
25
+
26
+ def extract_gates(qasm):
27
+ """Count gates in QASM."""
28
+ if not qasm:
29
+ return 0
30
+ gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
31
+ return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
32
+
33
+
34
+ def test_problem(problem, mode):
35
+ """Test a single problem."""
36
+ start = time.perf_counter()
37
+
38
+ try:
39
+ if mode == "quasar":
40
+ orch = QuasarOrchestrator(max_iterations=3)
41
+ result = orch.run(
42
+ problem.prompt,
43
+ problem.expected.min_qubits,
44
+ problem.expected.expected_states if problem.expected.expected_states else None
45
+ )
46
+ success = result.success
47
+ qasm = result.final_qasm
48
+ llm = result.llm_calls
49
+ iterations = result.iterations
50
+ tiers = result.tiers_passed
51
+
52
+ elif mode == "hybrid":
53
+ orch = HybridOrchestrator()
54
+ result = orch.run(
55
+ problem.prompt,
56
+ problem.expected.min_qubits,
57
+ problem.expected.expected_states if problem.expected.expected_states else None
58
+ )
59
+ success = result.success
60
+ qasm = result.final_qasm
61
+ llm = result.llm_calls
62
+ iterations = result.iterations
63
+ tiers = result.tiers_passed
64
+
65
+ else:
66
+ orch = create_orchestrator(mode)
67
+ result = orch.run(problem.prompt)
68
+ success = result.success
69
+ qasm = result.final_output
70
+ llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
71
+ iterations = 1
72
+ tiers = []
73
+
74
+ elapsed = (time.perf_counter() - start) * 1000
75
+ gates = extract_gates(qasm)
76
+
77
+ return {
78
+ "success": success,
79
+ "time_ms": elapsed,
80
+ "llm": llm,
81
+ "gates": gates,
82
+ "iterations": iterations,
83
+ "tiers": tiers,
84
+ "qasm": qasm,
85
+ "error": None
86
+ }
87
+
88
+ except Exception as e:
89
+ elapsed = (time.perf_counter() - start) * 1000
90
+ return {
91
+ "success": False,
92
+ "time_ms": elapsed,
93
+ "llm": 0,
94
+ "gates": 0,
95
+ "iterations": 0,
96
+ "tiers": [],
97
+ "qasm": None,
98
+ "error": str(e)[:100]
99
+ }
100
+
101
+
102
+ def main():
103
+ print("=" * 100)
104
+ print("FULL MODE COMPARISON TEST")
105
+ print("=" * 100)
106
+ print(f"Date: {datetime.now().isoformat()}")
107
+ print(f"Total problems: {len(ALL_PROBLEMS)}")
108
+ print()
109
+
110
+ # Modes to test - focus on the key ones
111
+ modes = ["naked", "quasar", "hybrid", "blackboard"]
112
+
113
+ all_results = []
114
+
115
+ # Group by difficulty
116
+ for difficulty in [ProblemDifficulty.EASY, ProblemDifficulty.MEDIUM, ProblemDifficulty.HARD, ProblemDifficulty.VERY_HARD]:
117
+ problems = [p for p in ALL_PROBLEMS if p.difficulty == difficulty]
118
+
119
+ print(f"\n{'='*100}")
120
+ print(f"DIFFICULTY: {difficulty.value.upper()} ({len(problems)} problems)")
121
+ print("=" * 100)
122
+
123
+ for problem in problems:
124
+ print(f"\n {problem.id}: {problem.name}")
125
+
126
+ for mode in modes:
127
+ print(f" {mode:12}", end=" ", flush=True)
128
+
129
+ result = test_problem(problem, mode)
130
+ result["problem_id"] = problem.id
131
+ result["difficulty"] = difficulty.value
132
+ result["mode"] = mode
133
+ all_results.append(result)
134
+
135
+ status = "✅" if result["success"] else "❌"
136
+ time_str = f"{result['time_ms']:6.0f}ms"
137
+ llm_str = f"LLM:{result['llm']}"
138
+ gates_str = f"Gates:{result['gates']:2}"
139
+
140
+ extra = ""
141
+ if result["tiers"]:
142
+ extra = f" Tiers:{result['tiers']}"
143
+
144
+ print(f"{status} {time_str} {llm_str:6} {gates_str}{extra}")
145
+
146
+ if result["error"]:
147
+ print(f" ❌ Error: {result['error'][:60]}...")
148
+
149
+ time.sleep(5)
150
+
151
+ # Summary
152
+ print("\n\n" + "=" * 100)
153
+ print("SUMMARY BY MODE")
154
+ print("=" * 100)
155
+
156
+ for mode in modes:
157
+ mode_results = [r for r in all_results if r["mode"] == mode]
158
+ successes = sum(1 for r in mode_results if r["success"])
159
+ total = len(mode_results)
160
+ total_time = sum(r["time_ms"] for r in mode_results)
161
+ total_llm = sum(r["llm"] for r in mode_results)
162
+ avg_gates = sum(r["gates"] for r in mode_results if r["success"]) / max(successes, 1)
163
+
164
+ print(f"\n{mode.upper():12}")
165
+ print(f" Overall: {successes}/{total} ({100*successes/total:.0f}%)")
166
+ print(f" Time: {total_time/1000:.1f}s total, {total_time/total:.0f}ms avg")
167
+ print(f" LLM: {total_llm} calls ({total_llm/total:.1f} avg)")
168
+ print(f" Gates: {avg_gates:.1f} avg")
169
+
170
+ # By difficulty
171
+ for diff in ["easy", "medium", "hard", "very_hard"]:
172
+ diff_results = [r for r in mode_results if r["difficulty"] == diff]
173
+ if diff_results:
174
+ diff_success = sum(1 for r in diff_results if r["success"])
175
+ print(f" {diff:10}: {diff_success}/{len(diff_results)}")
176
+
177
+ # Save results
178
+ output_path = Path(__file__).parent.parent / "research" / f"full_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
179
+ output_path.parent.mkdir(parents=True, exist_ok=True)
180
+
181
+ # Clean QASM for JSON (can be long)
182
+ for r in all_results:
183
+ if r["qasm"]:
184
+ r["qasm"] = r["qasm"][:500] # Truncate for storage
185
+
186
+ with open(output_path, 'w') as f:
187
+ json.dump(all_results, f, indent=2)
188
+
189
+ print(f"\n\nResults saved to: {output_path}")
190
+
191
+ # Winner determination
192
+ print("\n" + "=" * 100)
193
+ print("🏆 WINNER BY DIFFICULTY")
194
+ print("=" * 100)
195
+
196
+ for diff in ["easy", "medium", "hard", "very_hard"]:
197
+ print(f"\n{diff.upper()}:")
198
+ best_mode = None
199
+ best_success = -1
200
+
201
+ for mode in modes:
202
+ mode_results = [r for r in all_results if r["mode"] == mode and r["difficulty"] == diff]
203
+ if mode_results:
204
+ successes = sum(1 for r in mode_results if r["success"])
205
+ if successes > best_success:
206
+ best_success = successes
207
+ best_mode = mode
208
+
209
+ if best_mode:
210
+ print(f" 🏆 {best_mode.upper()} ({best_success}/{len([r for r in all_results if r['difficulty']==diff and r['mode']==best_mode])})")
211
+
212
+
213
+ if __name__ == "__main__":
214
+ main()
tests/mini_test.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/mini_test.py
2
+ # Description: Test all 4 modes on problems of each difficulty
3
+ """
4
+ Mini Test: Comparison of NAKED, BLACKBOARD, GUIDED, HYBRID on 4 problems.
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ import warnings
10
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+
12
+ # Suppress Gemini function_call warning (it's informational, not an error)
13
+ warnings.filterwarnings("ignore", message=".*non-text parts.*")
14
+
15
+ from orchestrators import create_orchestrator
16
+ from tests.test_problems import get_problems_by_difficulty, ProblemDifficulty as Difficulty
17
+
18
+ def test_one(problem, mode):
19
+ """Test a single problem with a mode."""
20
+ orch = create_orchestrator(mode)
21
+ import time
22
+ start = time.perf_counter()
23
+ result = orch.run(problem.prompt)
24
+ elapsed = (time.perf_counter() - start) * 1000
25
+
26
+ # Count gates
27
+ gates = 0
28
+ if result.final_output:
29
+ gates = len([l for l in result.final_output.split('\n')
30
+ if l.strip() and not l.startswith(('OPENQASM', 'include', 'qreg', 'creg', 'measure', '//'))])
31
+
32
+ return result.success, elapsed, gates
33
+
34
+ def main():
35
+ print("=" * 70)
36
+ print("COMPREHENSIVE TEST: NAKED vs BLACKBOARD vs GUIDED vs HYBRID")
37
+ print("=" * 70)
38
+
39
+ # Test HARD problems to see where modes fail
40
+ modes = ["naked", "blackboard", "guided", "hybrid"]
41
+
42
+ # One problem per difficulty
43
+ test_problems = [
44
+ ("EASY", get_problems_by_difficulty(Difficulty.EASY)[0]),
45
+ ("HARD", get_problems_by_difficulty(Difficulty.HARD)[0]),
46
+ ("VERY_HARD", get_problems_by_difficulty(Difficulty.VERY_HARD)[0]),
47
+ ]
48
+
49
+ results = {mode: [] for mode in modes}
50
+
51
+ for diff_name, problem in test_problems:
52
+ print(f"\n{diff_name}: {problem.name}")
53
+ print("-" * 50)
54
+
55
+ for mode in modes:
56
+ try:
57
+ ok, ms, gates = test_one(problem, mode)
58
+ status = "✅" if ok else "❌"
59
+ print(f" {mode:12} {status} {ms:6.0f}ms {gates:2} gates")
60
+ results[mode].append(ok)
61
+ except Exception as e:
62
+ print(f" {mode:12} ❌ Error: {str(e)[:50]}")
63
+ results[mode].append(False)
64
+
65
+ print("\n" + "=" * 70)
66
+ print("SUMMARY")
67
+ print("=" * 70)
68
+ for mode in modes:
69
+ passed = sum(results[mode])
70
+ total = len(results[mode])
71
+ pct = 100*passed/total if total > 0 else 0
72
+ print(f" {mode:12}: {passed}/{total} passed ({pct:.0f}%)")
73
+
74
+ if __name__ == "__main__":
75
+ main()
tests/mode_evaluation.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/mode_evaluation.py
2
+ # Evaluate all modes on representative problems from each difficulty
3
+ """Mode Evaluation: Test all modes on key problems from each difficulty level."""
4
+
5
+ import sys
6
+ import os
7
+ import time
8
+ import json
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
13
+
14
+ api_key = "$env:GOOGLE_API_KEY"
15
+ os.environ['GOOGLE_API_KEY'] = api_key
16
+
17
+ from tests.test_problems import (
18
+ PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT,
19
+ PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z,
20
+ PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT,
21
+ PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI
22
+ )
23
+ from orchestrators import create_orchestrator
24
+ from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
25
+ from config import set_api_key
26
+ import re
27
+
28
+ set_api_key(api_key)
29
+
30
+
31
+ def extract_gates(qasm):
32
+ """Count gates in QASM."""
33
+ if not qasm:
34
+ return 0
35
+ gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
36
+ return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
37
+
38
+
39
+ def test_problem(problem, mode):
40
+ """Test a single problem."""
41
+ start = time.perf_counter()
42
+
43
+ try:
44
+ if mode == "quasar":
45
+ orch = QuasarOrchestrator(max_iterations=3)
46
+ result = orch.run(
47
+ problem.prompt,
48
+ problem.expected.min_qubits,
49
+ problem.expected.expected_states if problem.expected.expected_states else None
50
+ )
51
+ success = result.success
52
+ qasm = result.final_qasm
53
+ llm = result.llm_calls
54
+ iterations = result.iterations
55
+
56
+ elif mode == "hybrid":
57
+ orch = HybridOrchestrator()
58
+ result = orch.run(
59
+ problem.prompt,
60
+ problem.expected.min_qubits,
61
+ problem.expected.expected_states if problem.expected.expected_states else None
62
+ )
63
+ success = result.success
64
+ qasm = result.final_qasm
65
+ llm = result.llm_calls
66
+ iterations = result.iterations
67
+
68
+ else:
69
+ orch = create_orchestrator(mode)
70
+ result = orch.run(problem.prompt)
71
+ success = result.success
72
+ qasm = result.final_output
73
+ llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
74
+ iterations = 1
75
+
76
+ elapsed = (time.perf_counter() - start) * 1000
77
+ gates = extract_gates(qasm)
78
+
79
+ return {
80
+ "success": success,
81
+ "time_ms": elapsed,
82
+ "llm": llm,
83
+ "gates": gates,
84
+ "iterations": iterations,
85
+ "error": None
86
+ }
87
+
88
+ except Exception as e:
89
+ elapsed = (time.perf_counter() - start) * 1000
90
+ return {
91
+ "success": False,
92
+ "time_ms": elapsed,
93
+ "llm": 0,
94
+ "gates": 0,
95
+ "error": str(e)[:80]
96
+ }
97
+
98
+
99
+ def main():
100
+ print("=" * 80)
101
+ print("MODE EVALUATION - KEY PROBLEMS FROM EACH DIFFICULTY")
102
+ print("=" * 80)
103
+ print(f"Date: {datetime.now().isoformat()}")
104
+ print()
105
+
106
+ # Key problems to test (2 per difficulty)
107
+ test_problems = [
108
+ ("EASY", [PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT]),
109
+ ("MEDIUM", [PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z]),
110
+ ("HARD", [PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT]),
111
+ ("VERY_HARD", [PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI])
112
+ ]
113
+
114
+ # Modes to test - focus on working ones
115
+ modes = ["naked", "quasar", "hybrid", "blackboard"]
116
+
117
+ all_results = []
118
+
119
+ for diff_name, problems in test_problems:
120
+ print(f"\n{'='*80}")
121
+ print(f"{diff_name} PROBLEMS")
122
+ print("=" * 80)
123
+
124
+ for problem in problems:
125
+ print(f"\n {problem.id}: {problem.name}")
126
+
127
+ for mode in modes:
128
+ print(f" {mode:12}", end=" ", flush=True)
129
+
130
+ result = test_problem(problem, mode)
131
+ result["problem_id"] = problem.id
132
+ result["difficulty"] = diff_name.lower()
133
+ result["mode"] = mode
134
+ all_results.append(result)
135
+
136
+ status = "✅" if result["success"] else "❌"
137
+ time_str = f"{result['time_ms']:6.0f}ms"
138
+ llm_str = f"LLM:{result['llm']}"
139
+ gates_str = f"Gates:{result['gates']:2}"
140
+
141
+ print(f"{status} {time_str} {llm_str:6} {gates_str}")
142
+
143
+ if result["error"]:
144
+ print(f" ⚠️ {result['error'][:50]}...")
145
+
146
+ time.sleep(5) # Rate limiting
147
+
148
+ # Summary
149
+ print("\n\n" + "=" * 80)
150
+ print("SUMMARY BY MODE")
151
+ print("=" * 80)
152
+
153
+ for mode in modes:
154
+ mode_results = [r for r in all_results if r["mode"] == mode]
155
+ successes = sum(1 for r in mode_results if r["success"])
156
+ total = len(mode_results)
157
+ total_time = sum(r["time_ms"] for r in mode_results)
158
+ total_llm = sum(r["llm"] for r in mode_results)
159
+ avg_gates = sum(r["gates"] for r in mode_results if r["success"]) / max(successes, 1)
160
+
161
+ print(f"\n{mode.upper():12}")
162
+ print(f" Success: {successes}/{total} ({100*successes/total:.0f}%)")
163
+ print(f" Time: {total_time:.0f}ms total, {total_time/total:.0f}ms avg")
164
+ print(f" LLM: {total_llm} calls")
165
+ print(f" Gates: {avg_gates:.1f} avg")
166
+
167
+ # By difficulty
168
+ for diff in ["easy", "medium", "hard", "very_hard"]:
169
+ diff_results = [r for r in mode_results if r["difficulty"] == diff]
170
+ if diff_results:
171
+ diff_success = sum(1 for r in diff_results if r["success"])
172
+ print(f" {diff:10}: {diff_success}/{len(diff_results)}")
173
+
174
+ # Winner by difficulty
175
+ print("\n" + "=" * 80)
176
+ print("🏆 WINNER BY DIFFICULTY")
177
+ print("=" * 80)
178
+
179
+ for diff in ["easy", "medium", "hard", "very_hard"]:
180
+ diff_results = [r for r in all_results if r["difficulty"] == diff]
181
+
182
+ print(f"\n{diff.upper()}:")
183
+ for mode in modes:
184
+ mode_diff_results = [r for r in diff_results if r["mode"] == mode]
185
+ if mode_diff_results:
186
+ successes = sum(1 for r in mode_diff_results if r["success"])
187
+ total_time = sum(r["time_ms"] for r in mode_diff_results)
188
+ avg_time = total_time / len(mode_diff_results)
189
+ print(f" {mode:12} {successes}/{len(mode_diff_results)} ({avg_time:.0f}ms avg)")
190
+
191
+ # Save results
192
+ output_path = Path(__file__).parent.parent / "research" / f"mode_evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
193
+ output_path.parent.mkdir(parents=True, exist_ok=True)
194
+
195
+ with open(output_path, 'w') as f:
196
+ json.dump(all_results, f, indent=2)
197
+
198
+ print(f"\n\nResults saved to: {output_path}")
199
+
200
+
201
+ if __name__ == "__main__":
202
+ main()
tests/quality_evaluation_harness.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/quality_evaluation_harness.py
2
+ # Relations: Uses orchestrators/, tests/circuit_quality_analyzer.py, database/circuit_quality_db.py
3
+ # Description: Quality-focused evaluation harness that stores QASM circuits
4
+ # Runs all 3 modes, measures quality via MCP, stores in database
5
+ # Generates comparison reports with actual circuit outputs
6
+
7
+ """
8
+ Quality Evaluation Harness: Run evaluations focused on CIRCUIT QUALITY.
9
+ Key difference from regular harness: stores actual QASM and measures quality.
10
+ """
11
+
12
+ import time
13
+ import json
14
+ import logging
15
+ from datetime import datetime
16
+ from typing import Dict, List, Optional, Any
17
+ from pathlib import Path
18
+ import uuid
19
+
20
+ from .test_problems import TestProblem, ALL_PROBLEMS, get_problem, get_problems_by_difficulty, ProblemDifficulty
21
+ from .circuit_quality_analyzer import CircuitQualityAnalyzer, AnalysisResult
22
+ from database.circuit_quality_db import (
23
+ CircuitQualityDB, CircuitEvaluation, QualityMetrics, get_quality_db
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class QualityEvaluationHarness:
30
+ """
31
+ Runs quality-focused evaluations across all orchestration modes.
32
+ PRIMARY FOCUS: Circuit quality, not just success rate.
33
+ STORES: Full QASM code in database for later analysis.
34
+ """
35
+
36
+ def __init__(self, mcp_url: str = "http://127.0.0.1:7861"):
37
+ self.mcp_url = mcp_url
38
+ self.analyzer = CircuitQualityAnalyzer(mcp_url)
39
+ self.db = get_quality_db()
40
+ self.run_id = f"quality_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
41
+
42
+ def evaluate_single(self, problem: TestProblem, mode: str) -> CircuitEvaluation:
43
+ """
44
+ Run a single evaluation and return full CircuitEvaluation with QASM.
45
+
46
+ Args:
47
+ problem: The test problem to solve
48
+ mode: 'naked', 'guided', or 'blackboard'
49
+
50
+ Returns:
51
+ CircuitEvaluation with full QASM and quality metrics
52
+ """
53
+ from orchestrators import create_orchestrator
54
+
55
+ logger.info(f"Evaluating {problem.id} with {mode} mode")
56
+
57
+ # Reset cost tracking
58
+ try:
59
+ from config import reset_cost_tracking, get_cost_summary
60
+ reset_cost_tracking()
61
+ except ImportError:
62
+ get_cost_summary = lambda: {}
63
+
64
+ # Initialize result
65
+ eval_result = CircuitEvaluation(
66
+ run_id=self.run_id,
67
+ timestamp=datetime.now().isoformat(),
68
+ problem_id=problem.id,
69
+ problem_goal=problem.goal,
70
+ mode=mode
71
+ )
72
+
73
+ start_time = time.perf_counter()
74
+
75
+ try:
76
+ # Create and run orchestrator
77
+ orchestrator = create_orchestrator(mode)
78
+ result = orchestrator.run(problem.goal)
79
+
80
+ elapsed_ms = (time.perf_counter() - start_time) * 1000
81
+ eval_result.execution_time_ms = elapsed_ms
82
+
83
+ # Extract QASM
84
+ qasm = result.final_output
85
+ if isinstance(qasm, list):
86
+ qasm = qasm[0] if qasm else None
87
+ if qasm is not None:
88
+ qasm = str(qasm) if not isinstance(qasm, str) else qasm
89
+
90
+ eval_result.qasm_code = qasm or ""
91
+ eval_result.success = result.success and bool(qasm)
92
+
93
+ if not eval_result.success:
94
+ eval_result.errors = result.errors
95
+
96
+ except Exception as e:
97
+ elapsed_ms = (time.perf_counter() - start_time) * 1000
98
+ eval_result.execution_time_ms = elapsed_ms
99
+ eval_result.success = False
100
+ eval_result.errors = [str(e)]
101
+ logger.error(f"Evaluation failed for {problem.id}/{mode}: {e}")
102
+
103
+ # Get cost metrics
104
+ try:
105
+ cost = get_cost_summary()
106
+ eval_result.llm_requests = cost.get('total_requests', 0)
107
+ eval_result.tokens_used = cost.get('total_tokens', 0)
108
+ except Exception:
109
+ pass
110
+
111
+ # Analyze quality if we have QASM
112
+ if eval_result.qasm_code:
113
+ expected = problem.expected.expected_states if problem.expected else None
114
+ analysis = self.analyzer.analyze_circuit(eval_result.qasm_code, expected)
115
+
116
+ eval_result.quality_metrics = QualityMetrics(
117
+ depth=analysis.depth,
118
+ gate_count=analysis.gate_count,
119
+ cx_count=analysis.cx_count,
120
+ single_qubit_count=analysis.single_qubit_count,
121
+ hardware_fitness=analysis.hardware_fitness,
122
+ syntax_valid=analysis.syntax_valid,
123
+ state_correctness=analysis.state_correctness,
124
+ complexity_score=analysis.complexity_score,
125
+ noise_estimate=analysis.noise_estimate
126
+ )
127
+
128
+ if analysis.errors:
129
+ eval_result.errors.extend(analysis.errors)
130
+
131
+ # Store in database
132
+ eval_id = self.db.save_evaluation(eval_result)
133
+ eval_result.id = eval_id
134
+
135
+ logger.info(f"Stored evaluation {eval_id}: {problem.id}/{mode} - "
136
+ f"success={eval_result.success}, score={eval_result.quality_metrics.overall_score()}")
137
+
138
+ return eval_result
139
+
140
+ def evaluate_problem_all_modes(self, problem: TestProblem,
141
+ modes: List[str] = None) -> Dict[str, CircuitEvaluation]:
142
+ """Evaluate a single problem with all modes."""
143
+ if modes is None:
144
+ modes = ['naked', 'guided', 'blackboard']
145
+
146
+ results = {}
147
+ for mode in modes:
148
+ results[mode] = self.evaluate_single(problem, mode)
149
+
150
+ return results
151
+
152
+ def run_full_evaluation(self,
153
+ difficulties: List[str] = None,
154
+ modes: List[str] = None,
155
+ max_problems: int = None) -> str:
156
+ """
157
+ Run full evaluation across problems and modes.
158
+
159
+ Args:
160
+ difficulties: List of difficulties to test ('easy', 'medium', 'hard')
161
+ modes: List of modes to test ('naked', 'guided', 'blackboard')
162
+ max_problems: Maximum number of problems to test (for quick runs)
163
+
164
+ Returns:
165
+ run_id for this evaluation run
166
+ """
167
+ if difficulties is None:
168
+ difficulties = ['easy', 'medium', 'hard']
169
+ if modes is None:
170
+ modes = ['naked', 'guided', 'blackboard']
171
+
172
+ # Gather problems
173
+ all_probs = []
174
+ for diff in difficulties:
175
+ # Convert string to enum if needed
176
+ if isinstance(diff, str):
177
+ try:
178
+ diff_enum = ProblemDifficulty(diff)
179
+ except ValueError:
180
+ logger.warning(f"Invalid difficulty: {diff}")
181
+ continue
182
+ else:
183
+ diff_enum = diff
184
+
185
+ probs = get_problems_by_difficulty(diff_enum)
186
+ all_probs.extend(probs)
187
+
188
+ if max_problems:
189
+ all_probs = all_probs[:max_problems]
190
+
191
+ logger.info(f"Starting quality evaluation run {self.run_id}")
192
+ logger.info(f"Problems: {len(all_probs)}, Modes: {modes}")
193
+
194
+ # Run evaluations
195
+ total = len(all_probs) * len(modes)
196
+ completed = 0
197
+
198
+ for problem in all_probs:
199
+ for mode in modes:
200
+ try:
201
+ self.evaluate_single(problem, mode)
202
+ completed += 1
203
+ logger.info(f"Progress: {completed}/{total}")
204
+ except Exception as e:
205
+ logger.error(f"Failed {problem.id}/{mode}: {e}")
206
+ completed += 1
207
+
208
+ # Save run summary
209
+ summary = self.db.get_quality_summary(self.run_id)
210
+ self.db.save_comparison_run(
211
+ run_id=self.run_id,
212
+ description=f"Quality evaluation: {len(all_probs)} problems, {modes}",
213
+ num_problems=len(all_probs),
214
+ modes=modes,
215
+ summary=summary
216
+ )
217
+
218
+ return self.run_id
219
+
220
+ def generate_report(self, run_id: Optional[str] = None) -> str:
221
+ """Generate a comprehensive quality comparison report."""
222
+ if run_id is None:
223
+ run_id = self.run_id
224
+
225
+ # Get summary
226
+ summary = self.db.get_quality_summary(run_id)
227
+
228
+ # Get full circuit export
229
+ circuits_md = self.db.export_circuits_markdown(run_id)
230
+
231
+ # Build report
232
+ report = []
233
+ report.append("# CIRCUIT QUALITY EVALUATION REPORT\n")
234
+ report.append(f"Run ID: {run_id}\n")
235
+ report.append(f"Generated: {datetime.now().isoformat()}\n\n")
236
+
237
+ report.append("## EXECUTIVE SUMMARY\n\n")
238
+
239
+ # Summary table
240
+ report.append("| Mode | Success Rate | Quality Score | Avg Depth | Avg Gates | Avg CX | HW Fitness | LLM Calls |\n")
241
+ report.append("|------|-------------|---------------|-----------|-----------|--------|------------|----------|\n")
242
+
243
+ for mode in ['naked', 'guided', 'blackboard']:
244
+ if mode in summary.get('modes', {}):
245
+ m = summary['modes'][mode]
246
+ report.append(
247
+ f"| {mode.upper()} | {m['success_rate']*100:.0f}% | "
248
+ f"{m['avg_quality_score']:.1f}/100 | {m['avg_depth']:.1f} | "
249
+ f"{m['avg_gates']:.1f} | {m['avg_cx_count']:.1f} | "
250
+ f"{m['avg_hardware_fitness']:.3f} | {m['total_llm_requests']} |\n"
251
+ )
252
+
253
+ report.append("\n## KEY FINDINGS\n\n")
254
+
255
+ # Determine winner
256
+ modes_data = summary.get('modes', {})
257
+ if modes_data:
258
+ best_quality = max(modes_data.items(), key=lambda x: x[1].get('avg_quality_score', 0))
259
+ best_success = max(modes_data.items(), key=lambda x: x[1].get('success_rate', 0))
260
+ lowest_cost = min(modes_data.items(), key=lambda x: x[1].get('total_llm_requests', float('inf')))
261
+
262
+ report.append(f"- **Best Quality**: {best_quality[0].upper()} ({best_quality[1]['avg_quality_score']:.1f}/100)\n")
263
+ report.append(f"- **Best Success Rate**: {best_success[0].upper()} ({best_success[1]['success_rate']*100:.0f}%)\n")
264
+ report.append(f"- **Lowest Cost**: {lowest_cost[0].upper()} ({lowest_cost[1]['total_llm_requests']} LLM calls)\n")
265
+
266
+ # Quality per LLM call
267
+ report.append("\n### Quality Efficiency (Quality Score per LLM Call)\n\n")
268
+ for mode, data in modes_data.items():
269
+ llm_calls = data.get('total_llm_requests', 1) or 1
270
+ quality = data.get('avg_quality_score', 0)
271
+ efficiency = quality / llm_calls
272
+ report.append(f"- {mode.upper()}: {efficiency:.2f} quality points per LLM call\n")
273
+
274
+ report.append("\n---\n")
275
+ report.append("\n## DETAILED CIRCUIT COMPARISONS\n")
276
+ report.append(circuits_md)
277
+
278
+ return "".join(report)
279
+
280
+ def print_summary(self, run_id: Optional[str] = None):
281
+ """Print a quick summary to console."""
282
+ if run_id is None:
283
+ run_id = self.run_id
284
+
285
+ summary = self.db.get_quality_summary(run_id)
286
+
287
+ print("\n" + "="*70)
288
+ print("QUALITY EVALUATION SUMMARY")
289
+ print("="*70)
290
+
291
+ modes = summary.get('modes', {})
292
+ for mode in ['naked', 'guided', 'blackboard']:
293
+ if mode in modes:
294
+ m = modes[mode]
295
+ print(f"\n{mode.upper()}:")
296
+ print(f" Success Rate: {m['success_rate']*100:.0f}%")
297
+ print(f" Quality Score: {m['avg_quality_score']:.1f}/100")
298
+ print(f" Avg Depth: {m['avg_depth']:.1f}")
299
+ print(f" Avg Gates: {m['avg_gates']:.1f}")
300
+ print(f" Avg CX Count: {m['avg_cx_count']:.1f}")
301
+ print(f" HW Fitness: {m['avg_hardware_fitness']:.3f}")
302
+ print(f" LLM Requests: {m['total_llm_requests']}")
303
+
304
+ print("\n" + "="*70)
305
+
306
+
307
+ def run_quick_quality_test(mode: str = 'naked', problem_id: str = 'bell_state') -> CircuitEvaluation:
308
+ """Quick test function to verify system works."""
309
+ problem = get_problem(problem_id)
310
+ if not problem:
311
+ raise ValueError(f"Problem not found: {problem_id}")
312
+
313
+ harness = QualityEvaluationHarness()
314
+ return harness.evaluate_single(problem, mode)
tests/quick_mode_test.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/quick_mode_test.py
2
+ # Description: Quick test of all modes on one HARD problem
3
+ """
4
+ Quick Mode Test: Test all 4 modes on 1 problem each difficulty
5
+ Designed to be fast by testing only essential combinations.
6
+ """
7
+
8
+ import sys
9
+ import os
10
+ import warnings
11
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12
+
13
+ # Suppress warnings
14
+ warnings.filterwarnings("ignore", message=".*non-text parts.*")
15
+ warnings.filterwarnings("ignore", message=".*GOOGLE_API_KEY.*")
16
+
17
+ import time
18
+ from orchestrators import create_orchestrator
19
+ from tests.test_problems import get_problems_by_difficulty, ProblemDifficulty
20
+
21
+ def test_mode(mode, problem):
22
+ """Test a single mode on a problem."""
23
+ try:
24
+ orch = create_orchestrator(mode)
25
+ start = time.perf_counter()
26
+ result = orch.run(problem.prompt)
27
+ elapsed = (time.perf_counter() - start) * 1000
28
+
29
+ gates = 0
30
+ if result.final_output:
31
+ gates = len([l for l in result.final_output.split('\n')
32
+ if l.strip() and not l.startswith(('OPENQASM', 'include', 'qreg', 'creg', 'measure', '//'))])
33
+
34
+ return result.success, elapsed, gates, None
35
+ except Exception as e:
36
+ return False, 0, 0, str(e)[:50]
37
+
38
+ def main():
39
+ print("=" * 60)
40
+ print("QUICK MODE TEST: All 4 modes on HARD problem")
41
+ print("=" * 60)
42
+
43
+ # Get one VERY_HARD problem - this will show where modes struggle
44
+ very_hard_problems = get_problems_by_difficulty(ProblemDifficulty.VERY_HARD)
45
+ problem = very_hard_problems[0] # 4-Qubit QFT
46
+
47
+ print(f"\nProblem: {problem.name}")
48
+ print(f"Difficulty: VERY_HARD")
49
+ print(f"Description: {problem.prompt[:80]}...")
50
+ print("-" * 60)
51
+
52
+ modes = ["naked", "quasar", "hybrid", "blackboard"]
53
+ results = []
54
+
55
+ for mode in modes:
56
+ print(f"\nTesting {mode}...", end=" ", flush=True)
57
+ ok, ms, gates, error = test_mode(mode, problem)
58
+
59
+ if ok:
60
+ print(f"✅ {ms:.0f}ms, {gates} gates")
61
+ results.append((mode, True, ms, gates))
62
+ elif error:
63
+ print(f"❌ Error: {error}")
64
+ results.append((mode, False, 0, 0))
65
+ else:
66
+ print(f"❌ Failed ({ms:.0f}ms)")
67
+ results.append((mode, False, ms, gates))
68
+
69
+ print("\n" + "=" * 60)
70
+ print("RESULTS SUMMARY")
71
+ print("=" * 60)
72
+
73
+ for mode, ok, ms, gates in results:
74
+ status = "✅ PASS" if ok else "❌ FAIL"
75
+ print(f" {mode:12}: {status:10} {ms:6.0f}ms {gates:2} gates")
76
+
77
+ passed = sum(1 for r in results if r[1])
78
+ print(f"\nTotal: {passed}/{len(results)} modes passed")
79
+
80
+ if __name__ == "__main__":
81
+ main()
tests/quick_test.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/quick_test.py
2
+ # Quick test to compare modes on easy problems only
3
+ """Quick test for mode comparison."""
4
+
5
+ import sys
6
+ import os
7
+ import time
8
+ from pathlib import Path
9
+
10
+ sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
11
+
12
+ api_key = "$env:GOOGLE_API_KEY"
13
+ os.environ['GOOGLE_API_KEY'] = api_key
14
+
15
+ from tests.test_problems import EASY_PROBLEMS, VERY_HARD_PROBLEMS
16
+ from orchestrators import create_orchestrator
17
+ from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
18
+ from config import set_api_key
19
+
20
+ set_api_key(api_key)
21
+
22
+ def test_problem(problem, mode):
23
+ """Test a single problem."""
24
+ start = time.perf_counter()
25
+
26
+ try:
27
+ if mode == "quasar":
28
+ orch = QuasarOrchestrator(max_iterations=3)
29
+ result = orch.run(problem.prompt, problem.expected.min_qubits)
30
+ success = result.success
31
+ qasm = result.final_qasm
32
+ llm = result.llm_calls
33
+ elif mode == "hybrid":
34
+ orch = HybridOrchestrator()
35
+ result = orch.run(problem.prompt, problem.expected.min_qubits)
36
+ success = result.success
37
+ qasm = result.final_qasm
38
+ llm = result.llm_calls
39
+ else:
40
+ orch = create_orchestrator(mode)
41
+ result = orch.run(problem.prompt)
42
+ success = result.success
43
+ qasm = result.final_output
44
+ llm = len([k for k in result.agent_results.keys()]) if result.agent_results else 1
45
+
46
+ elapsed = (time.perf_counter() - start) * 1000
47
+ return {"success": success, "time_ms": elapsed, "llm": llm, "qasm": qasm[:100] if qasm else None}
48
+
49
+ except Exception as e:
50
+ elapsed = (time.perf_counter() - start) * 1000
51
+ return {"success": False, "time_ms": elapsed, "llm": 0, "error": str(e)[:50]}
52
+
53
+ print("=" * 80)
54
+ print("QUICK MODE COMPARISON TEST")
55
+ print("=" * 80)
56
+
57
+ # Test only first easy and first very_hard problem with all modes
58
+ test_cases = [
59
+ ("EASY", EASY_PROBLEMS[0]),
60
+ ("VERY_HARD", VERY_HARD_PROBLEMS[0])
61
+ ]
62
+
63
+ modes = ["naked", "quasar", "hybrid"] # Skip slow modes
64
+
65
+ for diff, problem in test_cases:
66
+ print(f"\n{diff}: {problem.name}")
67
+ print("-" * 60)
68
+
69
+ for mode in modes:
70
+ print(f" {mode}...", end=" ", flush=True)
71
+ result = test_problem(problem, mode)
72
+
73
+ status = "✅" if result["success"] else "❌"
74
+ time_str = f"{result['time_ms']:.0f}ms"
75
+ llm_str = f"LLM:{result.get('llm', '?')}"
76
+
77
+ print(f"{status} {time_str} {llm_str}")
78
+
79
+ if not result["success"] and "error" in result:
80
+ print(f" Error: {result['error']}")
81
+
82
+ time.sleep(5) # Rate limiting
83
+
84
+ print("\n" + "=" * 80)
85
+ print("DONE")
tests/run_evaluation.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """
3
+ QAgents-Workflows: Main Evaluation Runner
4
+ Runs comparative tests between Blackboard, Guided, and Naked modes.
5
+
6
+ Usage:
7
+ python run_evaluation.py # Run all tests
8
+ python run_evaluation.py --mode naked # Test specific mode
9
+ python run_evaluation.py --problem easy_001 # Test specific problem
10
+ python run_evaluation.py --quick # Quick test (1 run per problem)
11
+ """
12
+
13
+ import argparse
14
+ import logging
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ # Add parent to path for imports
19
+ sys.path.insert(0, str(Path(__file__).parent))
20
+
21
+ from config import config, set_mode
22
+ from client import get_client
23
+ from tests import (
24
+ EvaluationHarness,
25
+ ALL_PROBLEMS,
26
+ EASY_PROBLEMS,
27
+ get_problem
28
+ )
29
+
30
+
31
+ def setup_logging(verbose: bool = True):
32
+ """Configure logging."""
33
+ level = logging.DEBUG if verbose else logging.INFO
34
+ logging.basicConfig(
35
+ level=level,
36
+ format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
37
+ datefmt="%H:%M:%S"
38
+ )
39
+
40
+
41
+ def check_mcp_server():
42
+ """Check if MCP server is running."""
43
+ client = get_client()
44
+ if not client.health_check():
45
+ print("\n❌ ERROR: QuantumArchitect-MCP server is not running!")
46
+ print("\nPlease start it with:")
47
+ print(" cd D:\\teach\\quantum-circuits")
48
+ print(" & .venv\\Scripts\\Activate.ps1")
49
+ print(" python QuantumArchitect-MCP\\app.py")
50
+ print()
51
+ return False
52
+ print("✅ MCP server is running")
53
+ return True
54
+
55
+
56
+ def run_quick_test():
57
+ """Run a quick sanity test."""
58
+ print("\n Running Quick Test (Naked mode, Bell State)")
59
+ print("-" * 50)
60
+
61
+ from orchestrators import create_orchestrator
62
+ from tests import BELL_STATE_PROBLEM
63
+
64
+ orchestrator = create_orchestrator("naked")
65
+ result = orchestrator.run(BELL_STATE_PROBLEM.goal)
66
+
67
+ print(f"Success: {result.success}")
68
+ print(f"Time: {result.execution_time_ms:.1f}ms")
69
+ print(f"Steps: {result.steps_completed}")
70
+
71
+ if result.final_output:
72
+ print(f"\nGenerated Circuit:")
73
+ print(result.final_output[:500] if len(result.final_output) > 500 else result.final_output)
74
+
75
+ if result.errors:
76
+ print(f"\nErrors: {result.errors}")
77
+
78
+ return result.success
79
+
80
+
81
+ def run_full_evaluation(problems=None, modes=None, num_runs=3):
82
+ """Run full comparative evaluation."""
83
+ print("\n Starting Full Evaluation")
84
+ print("=" * 60)
85
+
86
+ if problems is None:
87
+ problems = EASY_PROBLEMS # Start with easy problems
88
+ if modes is None:
89
+ modes = ["blackboard", "guided", "naked"]
90
+
91
+ print(f"Problems: {len(problems)}")
92
+ print(f"Modes: {modes}")
93
+ print(f"Runs per problem: {num_runs}")
94
+ print()
95
+
96
+ harness = EvaluationHarness(num_runs=num_runs)
97
+
98
+ try:
99
+ results = harness.evaluate_all(problems=problems, modes=modes)
100
+
101
+ # Generate and print report
102
+ report = harness.generate_report()
103
+ print("\n" + report)
104
+
105
+ # Save report to file
106
+ report_path = Path(__file__).parent / "evaluation_report.txt"
107
+ report_path.write_text(report)
108
+ print(f"\n Report saved to: {report_path}")
109
+
110
+ # Export CSV for research
111
+ csv_path = harness.export_csv()
112
+ print(f" CSV exported to: {csv_path}")
113
+
114
+ # Print summary stats
115
+ stats = harness.get_summary_stats()
116
+ print("\n Summary Statistics:")
117
+ for mode, mode_stats in stats.get('modes', {}).items():
118
+ print(f" {mode}: {mode_stats['success_rate']*100:.1f}% success, "
119
+ f"{mode_stats['total_llm_requests']} LLM calls, "
120
+ f"{mode_stats['total_tokens']} tokens")
121
+
122
+ return True
123
+
124
+ except Exception as e:
125
+ logging.exception(f"Evaluation failed: {e}")
126
+ return False
127
+ def main():
128
+ parser = argparse.ArgumentParser(
129
+ description="QAgents Comparative Evaluation Runner",
130
+ formatter_class=argparse.RawDescriptionHelpFormatter,
131
+ epilog="""
132
+ Examples:
133
+ python run_evaluation.py # Full evaluation
134
+ python run_evaluation.py --quick # Quick sanity test
135
+ python run_evaluation.py --mode naked # Test naked mode only
136
+ python run_evaluation.py --easy # Only easy problems
137
+ python run_evaluation.py --runs 10 # 10 runs per problem
138
+ """
139
+ )
140
+
141
+ parser.add_argument("--quick", action="store_true",
142
+ help="Run quick sanity test only")
143
+ parser.add_argument("--mode", choices=["blackboard", "guided", "naked"],
144
+ help="Test specific mode only")
145
+ parser.add_argument("--problem", type=str,
146
+ help="Test specific problem by ID")
147
+ parser.add_argument("--easy", action="store_true",
148
+ help="Only easy problems")
149
+ parser.add_argument("--runs", type=int, default=3,
150
+ help="Number of runs per problem (default: 3)")
151
+ parser.add_argument("--verbose", "-v", action="store_true",
152
+ help="Verbose output")
153
+
154
+ args = parser.parse_args()
155
+
156
+ setup_logging(args.verbose)
157
+
158
+ print("=" * 60)
159
+ print("[EVALUATION] QAgents-Workflows Comparative Evaluation")
160
+ print("=" * 60)
161
+
162
+ # Check MCP server
163
+ if not check_mcp_server():
164
+ sys.exit(1)
165
+
166
+ # Quick test mode
167
+ if args.quick:
168
+ success = run_quick_test()
169
+ sys.exit(0 if success else 1)
170
+
171
+ # Determine problems to run
172
+ if args.problem:
173
+ problem = get_problem(args.problem)
174
+ if not problem:
175
+ print(f"❌ Unknown problem: {args.problem}")
176
+ sys.exit(1)
177
+ problems = [problem]
178
+ elif args.easy:
179
+ problems = EASY_PROBLEMS
180
+ else:
181
+ problems = ALL_PROBLEMS
182
+
183
+ # Determine modes to test
184
+ modes = [args.mode] if args.mode else None
185
+
186
+ # Run evaluation
187
+ success = run_full_evaluation(
188
+ problems=problems,
189
+ modes=modes,
190
+ num_runs=args.runs
191
+ )
192
+
193
+ sys.exit(0 if success else 1)
194
+
195
+
196
+ if __name__ == "__main__":
197
+ main()
tests/run_quality_eval.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/run_quality_eval.py
2
+ # Relations: Uses tests/quality_evaluation_harness.py, database/circuit_quality_db.py
3
+ # Description: CLI entry point for quality-focused evaluation
4
+ # Run with: python run_quality_eval.py --mode all --difficulty easy
5
+ # Generates quality comparison report with actual QASM circuits
6
+
7
+ """
8
+ Quality Evaluation Runner: CLI entry point for circuit quality comparison.
9
+
10
+ Usage:
11
+ python run_quality_eval.py --mode all --difficulty easy
12
+ python run_quality_eval.py --mode naked --problem easy_001
13
+ python run_quality_eval.py --report RUN_ID
14
+ """
15
+
16
+ import argparse
17
+ import logging
18
+ import sys
19
+ import os
20
+ from pathlib import Path
21
+ from datetime import datetime
22
+
23
+ # Add project root to path
24
+ sys.path.insert(0, str(Path(__file__).parent))
25
+
26
+ # Ensure API key is set BEFORE importing config
27
+ api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY")
28
+ if api_key:
29
+ os.environ["GOOGLE_API_KEY"] = api_key
30
+
31
+ from tests.quality_evaluation_harness import QualityEvaluationHarness, run_quick_quality_test
32
+ from tests.test_problems import get_problem, get_problems_by_difficulty
33
+ from database.circuit_quality_db import get_quality_db
34
+ from config import set_api_key
35
+
36
+ # Configure logging
37
+ logging.basicConfig(
38
+ level=logging.INFO,
39
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
40
+ )
41
+ logger = logging.getLogger(__name__)
42
+
43
+ # Explicitly set API key in config after logging is ready
44
+ if api_key:
45
+ set_api_key(api_key)
46
+ logger.info(f"API Key configured: {api_key[:10]}...")
47
+ else:
48
+ logger.warning("No GOOGLE_API_KEY or GENAI_API_KEY found in environment")
49
+
50
+
51
+ def run_evaluation(args):
52
+ """Run quality evaluation based on arguments."""
53
+ harness = QualityEvaluationHarness()
54
+
55
+ # Parse modes
56
+ if args.mode == 'all':
57
+ modes = ['naked', 'guided', 'blackboard']
58
+ else:
59
+ modes = [args.mode]
60
+
61
+ # Parse difficulties
62
+ if args.difficulty == 'all':
63
+ difficulties = ['easy', 'medium', 'hard']
64
+ else:
65
+ difficulties = [args.difficulty]
66
+
67
+ # Check if specific problem
68
+ if args.problem:
69
+ problem = get_problem(args.problem)
70
+ if not problem:
71
+ print(f"ERROR: Problem not found: {args.problem}")
72
+ return
73
+
74
+ print(f"\n{'='*60}")
75
+ print(f"Running quality evaluation for: {args.problem}")
76
+ print(f"Modes: {modes}")
77
+ print(f"{'='*60}\n")
78
+
79
+ results = harness.evaluate_problem_all_modes(problem, modes)
80
+
81
+ # Print results
82
+ for mode, result in results.items():
83
+ print(f"\n{mode.upper()}:")
84
+ print(f" Success: {'✅' if result.success else '❌'}")
85
+ print(f" Quality Score: {result.quality_metrics.overall_score()}/100")
86
+ print(f" Depth: {result.quality_metrics.depth}")
87
+ print(f" Gates: {result.quality_metrics.gate_count}")
88
+ print(f" CX: {result.quality_metrics.cx_count}")
89
+ print(f" Time: {result.execution_time_ms:.0f}ms")
90
+ print(f" LLM Calls: {result.llm_requests}")
91
+ if result.qasm_code:
92
+ print(f" QASM ({len(result.qasm_code)} chars):")
93
+ lines = result.qasm_code.split('\n')[:10]
94
+ for line in lines:
95
+ print(f" {line}")
96
+ if len(result.qasm_code.split('\n')) > 10:
97
+ print(" ...")
98
+ else:
99
+ # Full evaluation
100
+ print(f"\n{'='*60}")
101
+ print(f"Running full quality evaluation")
102
+ print(f"Difficulties: {difficulties}")
103
+ print(f"Modes: {modes}")
104
+ print(f"Max problems: {args.max_problems or 'all'}")
105
+ print(f"{'='*60}\n")
106
+
107
+ run_id = harness.run_full_evaluation(
108
+ difficulties=difficulties,
109
+ modes=modes,
110
+ max_problems=args.max_problems
111
+ )
112
+
113
+ # Print summary
114
+ harness.print_summary(run_id)
115
+
116
+ # Generate report file
117
+ report = harness.generate_report(run_id)
118
+ report_path = Path(__file__).parent / f"QUALITY_REPORT_{run_id}.md"
119
+ report_path.write_text(report, encoding='utf-8')
120
+ print(f"\nFull report saved to: {report_path}")
121
+
122
+ print(f"\nRun ID: {run_id}")
123
+ print("Use --report <run_id> to regenerate report later")
124
+
125
+
126
+ def show_report(run_id: str):
127
+ """Show report for a specific run."""
128
+ harness = QualityEvaluationHarness()
129
+ harness.run_id = run_id # Set to existing run
130
+
131
+ report = harness.generate_report(run_id)
132
+ print(report)
133
+
134
+
135
+ def list_runs():
136
+ """List all evaluation runs."""
137
+ db = get_quality_db()
138
+
139
+ query = "SELECT run_id, timestamp, description, num_problems FROM comparison_runs ORDER BY timestamp DESC LIMIT 20"
140
+ import sqlite3
141
+ with sqlite3.connect(db.db_file) as conn:
142
+ conn.row_factory = sqlite3.Row
143
+ rows = conn.execute(query).fetchall()
144
+
145
+ if not rows:
146
+ print("No evaluation runs found.")
147
+ return
148
+
149
+ print("\nRecent Evaluation Runs:")
150
+ print("-" * 80)
151
+ for row in rows:
152
+ print(f"{row['run_id']} | {row['timestamp']} | {row['num_problems']} problems | {row['description'] or 'N/A'}")
153
+ print("-" * 80)
154
+
155
+
156
+ def quick_test(args):
157
+ """Run a quick single test."""
158
+ mode = args.mode if args.mode != 'all' else 'naked'
159
+ problem_id = args.problem or 'easy_001'
160
+
161
+ print(f"\nQuick test: {problem_id} with {mode} mode")
162
+ print("-" * 40)
163
+
164
+ try:
165
+ result = run_quick_quality_test(mode, problem_id)
166
+ print(f"Success: {'✅' if result.success else '❌'}")
167
+ print(f"Quality Score: {result.quality_metrics.overall_score()}/100")
168
+ print(f"Depth: {result.quality_metrics.depth}")
169
+ print(f"Gates: {result.quality_metrics.gate_count}")
170
+ if result.qasm_code:
171
+ print(f"\nQASM:\n{result.qasm_code[:500]}")
172
+ if result.errors:
173
+ print(f"\nErrors: {result.errors}")
174
+ except Exception as e:
175
+ print(f"ERROR: {e}")
176
+ import traceback
177
+ traceback.print_exc()
178
+
179
+
180
+ def main():
181
+ parser = argparse.ArgumentParser(
182
+ description="Quality-focused quantum circuit evaluation",
183
+ formatter_class=argparse.RawDescriptionHelpFormatter,
184
+ epilog="""
185
+ Examples:
186
+ python run_quality_eval.py --quick # Quick test
187
+ python run_quality_eval.py --mode all --difficulty easy
188
+ python run_quality_eval.py --problem easy_001 --mode all
189
+ python run_quality_eval.py --list # List previous runs
190
+ python run_quality_eval.py --report quality_20241128_120000
191
+ """
192
+ )
193
+
194
+ parser.add_argument('--mode', choices=['naked', 'guided', 'blackboard', 'all'],
195
+ default='all', help='Orchestration mode(s) to test')
196
+ parser.add_argument('--difficulty', choices=['easy', 'medium', 'hard', 'all'],
197
+ default='easy', help='Problem difficulty level(s)')
198
+ parser.add_argument('--problem', type=str, help='Specific problem ID to test')
199
+ parser.add_argument('--max-problems', type=int, help='Maximum problems to test')
200
+ parser.add_argument('--quick', action='store_true', help='Run quick single test')
201
+ parser.add_argument('--report', type=str, help='Generate report for run ID')
202
+ parser.add_argument('--list', action='store_true', help='List previous runs')
203
+
204
+ args = parser.parse_args()
205
+
206
+ if args.list:
207
+ list_runs()
208
+ elif args.report:
209
+ show_report(args.report)
210
+ elif args.quick:
211
+ quick_test(args)
212
+ else:
213
+ run_evaluation(args)
214
+
215
+
216
+ if __name__ == "__main__":
217
+ main()
tests/test_db_storage.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/test_db_storage.py
2
+ # Description: Quick test to verify database storage works
3
+ """Test that database can store and retrieve circuits."""
4
+
5
+ from database.circuit_quality_db import CircuitQualityDB, CircuitEvaluation, QualityMetrics, get_quality_db
6
+ from datetime import datetime
7
+
8
+ def test_db():
9
+ # Test database
10
+ db = get_quality_db()
11
+ print(f'Database file: {db.db_file}')
12
+
13
+ # Create a test evaluation with sample QASM
14
+ test_qasm = """OPENQASM 2.0;
15
+ include "qelib1.inc";
16
+ qreg q[2];
17
+ creg c[2];
18
+ h q[0];
19
+ cx q[0], q[1];
20
+ measure q -> c;
21
+ """
22
+
23
+ test_eval = CircuitEvaluation(
24
+ run_id='test_manual_001',
25
+ timestamp=datetime.now().isoformat(),
26
+ problem_id='test_bell_state',
27
+ problem_goal='Create Bell state',
28
+ mode='manual_test',
29
+ qasm_code=test_qasm,
30
+ success=True,
31
+ execution_time_ms=0,
32
+ llm_requests=0,
33
+ tokens_used=0,
34
+ quality_metrics=QualityMetrics(
35
+ depth=2,
36
+ gate_count=3,
37
+ cx_count=1,
38
+ single_qubit_count=1,
39
+ hardware_fitness=0.95,
40
+ syntax_valid=True,
41
+ state_correctness=1.0
42
+ )
43
+ )
44
+
45
+ # Save to database
46
+ eval_id = db.save_evaluation(test_eval)
47
+ print(f'Saved evaluation ID: {eval_id}')
48
+
49
+ # Retrieve and verify
50
+ evals = db.get_evaluations(problem_id='test_bell_state')
51
+ print(f'Retrieved {len(evals)} evaluations')
52
+ if evals:
53
+ e = evals[0]
54
+ print(f'QASM stored ({len(e.qasm_code)} chars):')
55
+ print(e.qasm_code)
56
+ print(f'Quality score: {e.quality_metrics.overall_score()}/100')
57
+
58
+ if __name__ == "__main__":
59
+ test_db()
tests/test_mcp_client.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/test_mcp_client.py
2
+ # Relations: Tests client/mcp_client.py
3
+ # Description: Comprehensive tests for MCP client with Gradio and fallback implementations
4
+
5
+ """
6
+ Test suite for MCP client functionality.
7
+ Tests both Gradio-based endpoints and local fallback implementations.
8
+ """
9
+
10
+ import sys
11
+ import os
12
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13
+
14
+ from client.mcp_client import get_client, MCPClient, QASMLocalAnalyzer
15
+
16
+ # Sample QASM for testing
17
+ BELL_STATE_QASM = '''OPENQASM 2.0;
18
+ include "qelib1.inc";
19
+ qreg q[2];
20
+ creg c[2];
21
+ h q[0];
22
+ cx q[0], q[1];
23
+ measure q -> c;'''
24
+
25
+
26
+ def test_health_check():
27
+ """Test server health check."""
28
+ client = get_client()
29
+ result = client.health_check()
30
+ print(f"Health Check: {'OK' if result else 'FAILED'}")
31
+ return result
32
+
33
+
34
+ def test_create_circuit():
35
+ """Test circuit creation from template (uses Gradio)."""
36
+ client = get_client()
37
+ result = client.create_circuit_from_template('bell_state', 2)
38
+
39
+ print(f"Create Circuit:")
40
+ print(f" Success: {result.success}")
41
+ print(f" Endpoint: {result.endpoint}")
42
+ print(f" Time: {result.execution_time_ms:.2f}ms")
43
+ if result.success and result.data:
44
+ print(f" Data preview: {str(result.data)[:80]}...")
45
+ return result.success
46
+
47
+
48
+ def test_analyze_circuit():
49
+ """Test circuit analysis (uses fallback)."""
50
+ client = get_client()
51
+ result = client.analyze_circuit(BELL_STATE_QASM)
52
+
53
+ print(f"Analyze Circuit:")
54
+ print(f" Success: {result.success}")
55
+ print(f" Is Fallback: {result.is_fallback}")
56
+ if result.success:
57
+ print(f" Depth: {result.data.get('depth')}")
58
+ print(f" Gate Count: {result.data.get('gate_count')}")
59
+ print(f" Two-qubit Gates: {result.data.get('two_qubit_gates')}")
60
+ return result.success
61
+
62
+
63
+ def test_validate_syntax():
64
+ """Test syntax validation (uses Gradio)."""
65
+ client = get_client()
66
+ result = client.validate_syntax(BELL_STATE_QASM)
67
+
68
+ print(f"Validate Syntax:")
69
+ print(f" Success: {result.success}")
70
+ print(f" Endpoint: {result.endpoint}")
71
+ print(f" Time: {result.execution_time_ms:.2f}ms")
72
+ return result.success
73
+
74
+
75
+ def test_simulate_circuit():
76
+ """Test circuit simulation (uses Gradio)."""
77
+ client = get_client()
78
+ result = client.simulate_circuit(BELL_STATE_QASM, shots=100)
79
+
80
+ print(f"Simulate Circuit:")
81
+ print(f" Success: {result.success}")
82
+ print(f" Endpoint: {result.endpoint}")
83
+ print(f" Time: {result.execution_time_ms:.2f}ms")
84
+ if result.success and result.data:
85
+ print(f" Data preview: {str(result.data)[:80]}...")
86
+ return result.success
87
+
88
+
89
+ def test_complexity_score():
90
+ """Test complexity scoring (uses Gradio or fallback)."""
91
+ client = get_client()
92
+ result = client.calculate_complexity_score(BELL_STATE_QASM)
93
+
94
+ print(f"Complexity Score:")
95
+ print(f" Success: {result.success}")
96
+ print(f" Is Fallback: {result.is_fallback}")
97
+ if result.success and result.data:
98
+ if isinstance(result.data, dict):
99
+ print(f" Score: {result.data.get('complexity_score', 'N/A')}")
100
+ return result.success
101
+
102
+
103
+ def test_estimate_noise():
104
+ """Test noise estimation (uses fallback)."""
105
+ client = get_client()
106
+ result = client.estimate_noise(BELL_STATE_QASM, hardware='ibm_brisbane')
107
+
108
+ print(f"Estimate Noise:")
109
+ print(f" Success: {result.success}")
110
+ print(f" Is Fallback: {result.is_fallback}")
111
+ if result.success:
112
+ print(f" Fidelity: {result.data.get('estimated_fidelity')}")
113
+ print(f" Total Error: {result.data.get('total_error_probability')}")
114
+ return result.success
115
+
116
+
117
+ def test_local_analyzer():
118
+ """Test QASMLocalAnalyzer directly."""
119
+ analyzer = QASMLocalAnalyzer()
120
+
121
+ # Parse
122
+ parsed = analyzer.parse_qasm(BELL_STATE_QASM)
123
+ print(f"Local Parser:")
124
+ print(f" Qubits: {parsed['num_qubits']}")
125
+ print(f" Gates: {len(parsed['gates'])}")
126
+
127
+ # Analyze
128
+ analysis = analyzer.analyze_circuit(BELL_STATE_QASM)
129
+ print(f"Local Analyzer:")
130
+ print(f" Depth: {analysis['depth']}")
131
+ print(f" Gate breakdown: {analysis['gate_breakdown']}")
132
+
133
+ # Complexity
134
+ complexity = analyzer.calculate_complexity(BELL_STATE_QASM)
135
+ print(f"Local Complexity:")
136
+ print(f" Score: {complexity['complexity_score']}")
137
+
138
+ return True
139
+
140
+
141
+ def run_all_tests():
142
+ """Run all MCP client tests."""
143
+ print("=" * 50)
144
+ print("MCP Client Test Suite")
145
+ print("=" * 50)
146
+
147
+ tests = [
148
+ ("Health Check", test_health_check),
149
+ ("Create Circuit", test_create_circuit),
150
+ ("Analyze Circuit", test_analyze_circuit),
151
+ ("Validate Syntax", test_validate_syntax),
152
+ ("Simulate Circuit", test_simulate_circuit),
153
+ ("Complexity Score", test_complexity_score),
154
+ ("Estimate Noise", test_estimate_noise),
155
+ ("Local Analyzer", test_local_analyzer),
156
+ ]
157
+
158
+ results = []
159
+ for name, test_func in tests:
160
+ print(f"\n--- {name} ---")
161
+ try:
162
+ passed = test_func()
163
+ results.append((name, passed))
164
+ except Exception as e:
165
+ print(f"ERROR: {e}")
166
+ results.append((name, False))
167
+
168
+ print("\n" + "=" * 50)
169
+ print("Summary")
170
+ print("=" * 50)
171
+ passed = sum(1 for _, p in results if p)
172
+ print(f"Passed: {passed}/{len(results)}")
173
+ for name, p in results:
174
+ status = "✓" if p else "✗"
175
+ print(f" {status} {name}")
176
+
177
+ return all(p for _, p in results)
178
+
179
+
180
+ if __name__ == "__main__":
181
+ run_all_tests()
tests/test_problems.py ADDED
@@ -0,0 +1,709 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/tests/test_problems.py
2
+ # Relations: Used by evaluation_harness.py, run_evaluation.py
3
+ # Description: Real quantum computing problems requiring LLM reasoning
4
+ # Each problem has increasing complexity and real-world relevance
5
+ """
6
+ Test Problems Module: Real Quantum Computing Challenges
7
+
8
+ TESTING FRAMEWORK DESIGN:
9
+ =========================
10
+
11
+ Each problem requires actual LLM reasoning to solve - no hardcoded templates.
12
+ The LLM must understand the quantum mechanics and generate appropriate QASM.
13
+
14
+ EVALUATION MODES:
15
+ -----------------
16
+ 1. NAKED: 1 LLM call per problem (direct reasoning, no agents)
17
+ 2. GUIDED: 1 + 4 LLM calls (initial + architect/builder/validator/scorer agents)
18
+ 3. BLACKBOARD: 1 + 8-12 LLM calls (initial + collaborative agent rounds)
19
+
20
+ PROBLEM CATEGORIES:
21
+ -------------------
22
+ EASY (1-2 qubits, 1-3 gates):
23
+ - Fundamental single/two-qubit operations
24
+ - Direct QASM generation possible
25
+
26
+ MEDIUM (2-3 qubits, 4-8 gates):
27
+ - Require understanding of gate decomposition
28
+ - Multiple valid solutions possible
29
+
30
+ HARD (3+ qubits, 8+ gates):
31
+ - Algorithm implementation
32
+ - Optimization considerations
33
+ - Real-world applications
34
+ """
35
+
36
+ from dataclasses import dataclass, field
37
+ from typing import Dict, List, Optional, Any
38
+ from enum import Enum
39
+
40
+
41
+ class ProblemDifficulty(Enum):
42
+ """Problem difficulty levels."""
43
+ EASY = "easy"
44
+ MEDIUM = "medium"
45
+ HARD = "hard"
46
+ VERY_HARD = "very_hard" # New: Push NAKED to its limits
47
+
48
+
49
+ class ProblemCategory(Enum):
50
+ """Problem categories for research tracking."""
51
+ STATE_PREPARATION = "state_prep"
52
+ GATE_SYNTHESIS = "gate_synthesis"
53
+ ALGORITHM = "algorithm"
54
+ ERROR_CORRECTION = "error_correction"
55
+ OPTIMIZATION = "optimization"
56
+
57
+
58
+ @dataclass
59
+ class ExpectedOutput:
60
+ """Expected output for validation."""
61
+ min_qubits: int
62
+ max_qubits: int = 10
63
+ max_depth: Optional[int] = None
64
+ required_gates: List[str] = field(default_factory=list)
65
+ forbidden_gates: List[str] = field(default_factory=list)
66
+ expected_states: Dict[str, float] = field(default_factory=dict)
67
+ tolerance: float = 0.1 # Probability tolerance for state matching
68
+ must_be_unitary: bool = True
69
+ hardware_compatible: bool = True
70
+
71
+
72
+ @dataclass
73
+ class TestProblem:
74
+ """A quantum circuit test problem for LLM evaluation."""
75
+ id: str
76
+ name: str
77
+ description: str
78
+
79
+ # The prompt sent to the LLM - must require reasoning
80
+ prompt: str
81
+
82
+ # Category and difficulty for analysis
83
+ difficulty: ProblemDifficulty
84
+ category: ProblemCategory
85
+
86
+ # Validation criteria
87
+ expected: ExpectedOutput
88
+
89
+ # Metadata for research tracking
90
+ tags: List[str] = field(default_factory=list)
91
+ reference_solution: Optional[str] = None # Known optimal QASM
92
+ optimal_depth: Optional[int] = None
93
+ optimal_gate_count: Optional[int] = None
94
+
95
+ # Research tracking
96
+ requires_understanding: List[str] = field(default_factory=list)
97
+ common_mistakes: List[str] = field(default_factory=list)
98
+
99
+ @property
100
+ def goal(self) -> str:
101
+ """Alias for prompt - used by orchestrators."""
102
+ return self.prompt
103
+ # =============================================================================
104
+ # EASY PROBLEMS: Fundamental Quantum Operations
105
+ # =============================================================================
106
+
107
+ PROBLEM_E1_PHASE_FLIP = TestProblem(
108
+ id="easy_001",
109
+ name="Phase Flip State",
110
+ description="Create the |−⟩ state (phase-flipped superposition)",
111
+ prompt="""Create a quantum circuit that prepares the |−⟩ state.
112
+
113
+ The |−⟩ state is defined as: (|0⟩ - |1⟩)/√2
114
+
115
+ This is different from the |+⟩ state which is (|0⟩ + |1⟩)/√2.
116
+
117
+ Requirements:
118
+ - Use a single qubit
119
+ - The final state should have equal probability of 0 and 1
120
+ - But the relative phase between them should be π (negative)
121
+
122
+ Provide the OpenQASM 2.0 circuit.""",
123
+ difficulty=ProblemDifficulty.EASY,
124
+ category=ProblemCategory.STATE_PREPARATION,
125
+ expected=ExpectedOutput(
126
+ min_qubits=1,
127
+ max_qubits=1,
128
+ max_depth=2,
129
+ required_gates=["h", "z"], # or x then h
130
+ expected_states={"0": 0.5, "1": 0.5}
131
+ ),
132
+ tags=["superposition", "phase", "single-qubit"],
133
+ requires_understanding=["Hadamard gate", "Z gate", "quantum phases"],
134
+ common_mistakes=["Using only H (creates |+⟩ not |−⟩)", "Wrong gate order"],
135
+ optimal_depth=2,
136
+ optimal_gate_count=2
137
+ )
138
+
139
+ PROBLEM_E2_CONTROLLED_NOT = TestProblem(
140
+ id="easy_002",
141
+ name="Entanglement Generation",
142
+ description="Create maximal entanglement between two qubits",
143
+ prompt="""Create a quantum circuit that maximally entangles two qubits.
144
+
145
+ Starting from |00⟩, create the Bell state |Φ+⟩ = (|00⟩ + |11⟩)/√2
146
+
147
+ Requirements:
148
+ - Use exactly 2 qubits
149
+ - Measuring both qubits should give 00 or 11 with equal probability
150
+ - The qubits must be entangled (not just in superposition)
151
+
152
+ Think about what gates create entanglement.
153
+ Provide the OpenQASM 2.0 circuit.""",
154
+ difficulty=ProblemDifficulty.EASY,
155
+ category=ProblemCategory.STATE_PREPARATION,
156
+ expected=ExpectedOutput(
157
+ min_qubits=2,
158
+ max_qubits=2,
159
+ max_depth=3,
160
+ required_gates=["h", "cx"],
161
+ expected_states={"00": 0.5, "11": 0.5}
162
+ ),
163
+ tags=["entanglement", "bell", "cnot"],
164
+ requires_understanding=["Hadamard gate", "CNOT gate", "entanglement"],
165
+ common_mistakes=["Applying H to both qubits (no entanglement)", "Wrong CNOT direction"],
166
+ optimal_depth=2,
167
+ optimal_gate_count=2
168
+ )
169
+
170
+ PROBLEM_E3_MEASUREMENT_BASIS = TestProblem(
171
+ id="easy_003",
172
+ name="X-Basis Measurement Prep",
173
+ description="Prepare a state for X-basis measurement",
174
+ prompt="""Create a circuit that transforms a Z-basis state into X-basis.
175
+
176
+ Starting with |0⟩, prepare the state so that if we were to measure in the
177
+ X-basis (instead of Z-basis), we would get |+⟩ deterministically.
178
+
179
+ In other words: Transform |0⟩ → |+⟩ where |+⟩ = (|0⟩ + |1⟩)/√2
180
+
181
+ Requirements:
182
+ - Single qubit circuit
183
+ - The state should be the +1 eigenstate of the X operator
184
+
185
+ Provide the OpenQASM 2.0 circuit.""",
186
+ difficulty=ProblemDifficulty.EASY,
187
+ category=ProblemCategory.STATE_PREPARATION,
188
+ expected=ExpectedOutput(
189
+ min_qubits=1,
190
+ max_qubits=1,
191
+ max_depth=1,
192
+ required_gates=["h"],
193
+ expected_states={"0": 0.5, "1": 0.5}
194
+ ),
195
+ tags=["basis-change", "hadamard", "measurement"],
196
+ requires_understanding=["Measurement bases", "Hadamard as basis change"],
197
+ common_mistakes=["Not understanding basis transformation"],
198
+ optimal_depth=1,
199
+ optimal_gate_count=1
200
+ )
201
+
202
+
203
+ # =============================================================================
204
+ # MEDIUM PROBLEMS: Gate Decomposition and Multi-Qubit Operations
205
+ # =============================================================================
206
+
207
+ PROBLEM_M1_SWAP_DECOMPOSITION = TestProblem(
208
+ id="medium_001",
209
+ name="SWAP from CNOTs",
210
+ description="Implement SWAP gate using only CNOT gates",
211
+ prompt="""Decompose the SWAP gate into basic gates.
212
+
213
+ The SWAP gate exchanges the states of two qubits:
214
+ SWAP|ab⟩ = |ba⟩
215
+
216
+ You must implement SWAP using only CNOT gates (no native SWAP allowed).
217
+
218
+ Requirements:
219
+ - Use exactly 2 qubits
220
+ - Only use CNOT (cx) gates - no other two-qubit gates
221
+ - The circuit should swap the state of qubit 0 and qubit 1
222
+ - Test: if input is |01⟩, output should be |10⟩
223
+
224
+ Hint: CNOT can be thought of as conditional bit flip.
225
+
226
+ Provide the OpenQASM 2.0 circuit.""",
227
+ difficulty=ProblemDifficulty.MEDIUM,
228
+ category=ProblemCategory.GATE_SYNTHESIS,
229
+ expected=ExpectedOutput(
230
+ min_qubits=2,
231
+ max_qubits=2,
232
+ max_depth=6,
233
+ required_gates=["cx"],
234
+ forbidden_gates=["swap"]
235
+ ),
236
+ tags=["decomposition", "swap", "cnot-only"],
237
+ requires_understanding=["CNOT behavior", "Gate decomposition"],
238
+ common_mistakes=["Wrong number of CNOTs", "Wrong CNOT directions"],
239
+ reference_solution="OPENQASM 2.0;\ninclude \"qelib1.inc\";\nqreg q[2];\ncx q[0],q[1];\ncx q[1],q[0];\ncx q[0],q[1];",
240
+ optimal_depth=3,
241
+ optimal_gate_count=3
242
+ )
243
+
244
+ PROBLEM_M2_CONTROLLED_Z = TestProblem(
245
+ id="medium_002",
246
+ name="CZ from Basic Gates",
247
+ description="Build Controlled-Z using H and CNOT",
248
+ prompt="""Implement the Controlled-Z (CZ) gate using only Hadamard and CNOT gates.
249
+
250
+ The CZ gate applies a Z gate to the target qubit when the control is |1⟩:
251
+ CZ|00⟩ = |00⟩
252
+ CZ|01⟩ = |01⟩
253
+ CZ|10⟩ = |10⟩
254
+ CZ|11⟩ = -|11⟩ (note the phase flip!)
255
+
256
+ Requirements:
257
+ - Use only H and CNOT gates
258
+ - No native CZ gate allowed
259
+ - 2 qubits
260
+
261
+ Hint: Think about how H transforms Z operations.
262
+
263
+ Provide the OpenQASM 2.0 circuit.""",
264
+ difficulty=ProblemDifficulty.MEDIUM,
265
+ category=ProblemCategory.GATE_SYNTHESIS,
266
+ expected=ExpectedOutput(
267
+ min_qubits=2,
268
+ max_qubits=2,
269
+ max_depth=5,
270
+ required_gates=["h", "cx"],
271
+ forbidden_gates=["cz"]
272
+ ),
273
+ tags=["decomposition", "controlled-z", "phase"],
274
+ requires_understanding=["CZ gate definition", "H-Z-H = X identity"],
275
+ common_mistakes=["Forgetting H gates", "Wrong qubit as target"],
276
+ reference_solution="OPENQASM 2.0;\ninclude \"qelib1.inc\";\nqreg q[2];\nh q[1];\ncx q[0],q[1];\nh q[1];",
277
+ optimal_depth=3,
278
+ optimal_gate_count=3
279
+ )
280
+
281
+ PROBLEM_M3_PHASE_ESTIMATION_PREP = TestProblem(
282
+ id="medium_003",
283
+ name="Phase Kickback Setup",
284
+ description="Create the phase kickback configuration",
285
+ prompt="""Create a circuit demonstrating quantum phase kickback.
286
+
287
+ Phase kickback is a key concept where applying a controlled-U gate
288
+ causes the control qubit to acquire the eigenvalue phase.
289
+
290
+ Setup:
291
+ 1. Prepare control qubit in |+⟩ superposition
292
+ 2. Prepare target qubit in |1⟩ (eigenstate of Z with eigenvalue -1)
293
+ 3. Apply CZ gate
294
+ 4. The control qubit should now be in |−⟩ state
295
+
296
+ The final state of the control qubit (q[0]) should show the phase kickback.
297
+
298
+ Requirements:
299
+ - 2 qubits
300
+ - Control in superposition, target in |1⟩
301
+ - Apply controlled operation
302
+ - Use only basic gates (H, X, CX, CZ allowed)
303
+
304
+ Provide the OpenQASM 2.0 circuit.""",
305
+ difficulty=ProblemDifficulty.MEDIUM,
306
+ category=ProblemCategory.ALGORITHM,
307
+ expected=ExpectedOutput(
308
+ min_qubits=2,
309
+ max_qubits=2,
310
+ max_depth=5,
311
+ required_gates=["h", "x"],
312
+ expected_states={"01": 0.5, "11": 0.5} # After kickback
313
+ ),
314
+ tags=["phase-kickback", "algorithm-primitive", "phase-estimation"],
315
+ requires_understanding=["Phase kickback", "Eigenstates", "Controlled operations"],
316
+ common_mistakes=["Target not in eigenstate", "Missing superposition"],
317
+ optimal_depth=4,
318
+ optimal_gate_count=4
319
+ )
320
+
321
+
322
+ # =============================================================================
323
+ # HARD PROBLEMS: Algorithm Implementation
324
+ # =============================================================================
325
+
326
+ PROBLEM_H1_DEUTSCH = TestProblem(
327
+ id="hard_001",
328
+ name="Deutsch Algorithm",
329
+ description="Implement Deutsch's algorithm for function type detection",
330
+ prompt="""Implement Deutsch's algorithm to determine if a function is constant or balanced.
331
+
332
+ Deutsch's algorithm determines whether a black-box function f:{0,1}→{0,1} is:
333
+ - Constant: f(0)=f(1) (always 0 or always 1)
334
+ - Balanced: f(0)≠f(1) (different outputs)
335
+
336
+ For this problem, implement the oracle for the BALANCED function f(x) = x.
337
+
338
+ Algorithm structure:
339
+ 1. Initialize |01⟩ (input qubit |0⟩, ancilla qubit |1⟩)
340
+ 2. Apply H to both qubits
341
+ 3. Apply the oracle Uf: |x,y⟩ → |x, y⊕f(x)⟩
342
+ 4. Apply H to the input qubit
343
+ 5. Measure input qubit: |1⟩ means balanced
344
+
345
+ For f(x)=x, the oracle is just a CNOT.
346
+
347
+ Requirements:
348
+ - 2 qubits
349
+ - Implement full Deutsch circuit with f(x)=x oracle
350
+ - After measurement, input qubit should be in |1⟩
351
+
352
+ Provide the OpenQASM 2.0 circuit.""",
353
+ difficulty=ProblemDifficulty.HARD,
354
+ category=ProblemCategory.ALGORITHM,
355
+ expected=ExpectedOutput(
356
+ min_qubits=2,
357
+ max_qubits=2,
358
+ max_depth=8,
359
+ required_gates=["h", "x", "cx"],
360
+ expected_states={"11": 1.0} # Input qubit is 1 (balanced), ancilla is 1
361
+ ),
362
+ tags=["algorithm", "deutsch", "oracle"],
363
+ requires_understanding=["Deutsch algorithm", "Oracle construction", "Interference"],
364
+ common_mistakes=["Wrong initial state", "Missing ancilla preparation", "Oracle errors"],
365
+ optimal_depth=5,
366
+ optimal_gate_count=6
367
+ )
368
+
369
+ PROBLEM_H2_GROVER_2QUBIT = TestProblem(
370
+ id="hard_002",
371
+ name="Grover Search (2-qubit)",
372
+ description="Find marked state |11⟩ using Grover's algorithm",
373
+ prompt="""Implement 2-qubit Grover's search algorithm to find the state |11⟩.
374
+
375
+ Grover's algorithm amplifies the probability of the marked state.
376
+
377
+ For 2 qubits with 1 marked state, we need exactly 1 iteration:
378
+
379
+ 1. Initialize: H⊗H on |00⟩ → equal superposition
380
+ 2. Oracle: Mark |11⟩ with a phase flip (multiply by -1)
381
+ 3. Diffusion: Reflect about the average amplitude
382
+
383
+ Oracle for |11⟩: Apply CZ (or equivalent)
384
+ Diffusion operator: H⊗H · (2|00⟩⟨00| - I) · H⊗H
385
+
386
+ Requirements:
387
+ - 2 qubits
388
+ - After 1 Grover iteration, |11⟩ should have probability ≈ 1
389
+ - Use only basic gates
390
+
391
+ Provide the OpenQASM 2.0 circuit.""",
392
+ difficulty=ProblemDifficulty.HARD,
393
+ category=ProblemCategory.ALGORITHM,
394
+ expected=ExpectedOutput(
395
+ min_qubits=2,
396
+ max_qubits=2,
397
+ max_depth=12,
398
+ required_gates=["h", "x", "cx"],
399
+ expected_states={"11": 1.0},
400
+ tolerance=0.1
401
+ ),
402
+ tags=["algorithm", "grover", "search", "amplitude-amplification"],
403
+ requires_understanding=["Grover's algorithm", "Oracle design", "Diffusion operator"],
404
+ common_mistakes=["Wrong oracle phase", "Missing diffusion", "Too many/few iterations"],
405
+ optimal_depth=8,
406
+ optimal_gate_count=10
407
+ )
408
+
409
+ PROBLEM_H3_TELEPORTATION_PREP = TestProblem(
410
+ id="hard_003",
411
+ name="Quantum Teleportation Setup",
412
+ description="Prepare the entangled resource state for teleportation",
413
+ prompt="""Create the initial setup for quantum teleportation.
414
+
415
+ Quantum teleportation requires:
416
+ 1. The state to teleport |ψ⟩ on qubit 0
417
+ 2. A shared Bell pair between qubits 1 and 2
418
+
419
+ For this problem:
420
+ - Prepare qubit 0 in state |+⟩ (the state we'll "teleport")
421
+ - Prepare qubits 1 and 2 in the Bell state (|00⟩ + |11⟩)/√2
422
+ - Qubit 1 goes to Alice (sender), qubit 2 to Bob (receiver)
423
+
424
+ Requirements:
425
+ - 3 qubits
426
+ - q[0]: |+⟩ state (to be teleported)
427
+ - q[1], q[2]: Bell pair (shared entanglement)
428
+
429
+ After this setup, Alice has q[0] and q[1], Bob has q[2].
430
+
431
+ Provide the OpenQASM 2.0 circuit.""",
432
+ difficulty=ProblemDifficulty.HARD,
433
+ category=ProblemCategory.ALGORITHM,
434
+ expected=ExpectedOutput(
435
+ min_qubits=3,
436
+ max_qubits=3,
437
+ max_depth=4,
438
+ required_gates=["h", "cx"]
439
+ ),
440
+ tags=["algorithm", "teleportation", "entanglement", "bell-state"],
441
+ requires_understanding=["Quantum teleportation", "Bell states", "Entanglement as resource"],
442
+ common_mistakes=["Wrong qubits entangled", "State to teleport not prepared"],
443
+ optimal_depth=3,
444
+ optimal_gate_count=4
445
+ )
446
+
447
+
448
+ # =============================================================================
449
+ # PROBLEM SETS
450
+ # =============================================================================
451
+
452
+ EASY_PROBLEMS = [
453
+ PROBLEM_E1_PHASE_FLIP,
454
+ PROBLEM_E2_CONTROLLED_NOT,
455
+ PROBLEM_E3_MEASUREMENT_BASIS
456
+ ]
457
+
458
+ MEDIUM_PROBLEMS = [
459
+ PROBLEM_M1_SWAP_DECOMPOSITION,
460
+ PROBLEM_M2_CONTROLLED_Z,
461
+ PROBLEM_M3_PHASE_ESTIMATION_PREP
462
+ ]
463
+
464
+ HARD_PROBLEMS = [
465
+ PROBLEM_H1_DEUTSCH,
466
+ PROBLEM_H2_GROVER_2QUBIT,
467
+ PROBLEM_H3_TELEPORTATION_PREP
468
+ ]
469
+
470
+
471
+ # ============================================================================
472
+ # VERY_HARD PROBLEMS: Push NAKED to its limits
473
+ # ============================================================================
474
+
475
+ PROBLEM_VH1_QFT_4QUBIT = TestProblem(
476
+ id="very_hard_001",
477
+ name="4-Qubit QFT",
478
+ description="Implement full Quantum Fourier Transform on 4 qubits",
479
+ prompt="""Implement the complete Quantum Fourier Transform (QFT) on 4 qubits.
480
+
481
+ The QFT transforms computational basis states into Fourier basis:
482
+ QFT|x⟩ = (1/√N) Σ_{k=0}^{N-1} e^{2πixk/N} |k⟩
483
+
484
+ For 4 qubits (N=16), the circuit requires:
485
+ 1. Apply Hadamard to each qubit in sequence
486
+ 2. Apply controlled phase rotations (CR_k) between qubits
487
+ 3. SWAP qubits to correct bit ordering (optional for some conventions)
488
+
489
+ Phase rotation angles: R_k = rotation by π/2^(k-1)
490
+ - R_2 = π/2 (S gate or cp(π/2))
491
+ - R_3 = π/4 (T gate or cp(π/4))
492
+ - R_4 = π/8 (cp(π/8))
493
+
494
+ Requirements:
495
+ - Use exactly 4 qubits
496
+ - Must use H, controlled-phase (cp or crz), and optionally SWAP gates
497
+ - Do NOT use QFT as a black box - implement the full decomposition
498
+ - Include proper phase rotations between all qubit pairs
499
+
500
+ The output should show interference patterns in the Fourier basis.
501
+
502
+ Provide the OpenQASM 2.0 circuit.""",
503
+ difficulty=ProblemDifficulty.VERY_HARD,
504
+ category=ProblemCategory.ALGORITHM,
505
+ expected=ExpectedOutput(
506
+ min_qubits=4,
507
+ max_qubits=4,
508
+ max_depth=20,
509
+ required_gates=["h"]
510
+ ),
511
+ tags=["qft", "fourier", "phase-rotation", "multi-qubit"],
512
+ requires_understanding=["QFT algorithm", "Controlled phase gates", "Bit reversal"],
513
+ common_mistakes=["Wrong phase angles", "Missing controlled rotations", "Forgetting bit reversal"],
514
+ optimal_depth=12,
515
+ optimal_gate_count=16
516
+ )
517
+
518
+ PROBLEM_VH2_GROVER_3QUBIT = TestProblem(
519
+ id="very_hard_002",
520
+ name="Grover 3-Qubit Search",
521
+ description="Implement Grover's search on 3 qubits with 2 iterations",
522
+ prompt="""Implement 3-qubit Grover's search algorithm to find the marked state |101⟩.
523
+
524
+ For 3 qubits (N=8 states), the optimal number of iterations is approximately π√N/4 ≈ 2.
525
+
526
+ Algorithm structure (repeat 2 times):
527
+ 1. Initial superposition: H⊗H⊗H on |000⟩
528
+
529
+ For EACH Grover iteration:
530
+ 2. Oracle: Mark |101⟩ with phase flip (multiply amplitude by -1)
531
+ - Oracle for |101⟩: X on q[1], then CCZ (or Toffoli+phase), then X on q[1]
532
+ - Alternative: use multi-controlled Z gate
533
+
534
+ 3. Diffusion operator (Grover diffuser):
535
+ - Apply H to all qubits
536
+ - Apply X to all qubits
537
+ - Apply multi-controlled Z (CCZ or decomposition)
538
+ - Apply X to all qubits
539
+ - Apply H to all qubits
540
+
541
+ Requirements:
542
+ - Use exactly 3 qubits
543
+ - Implement BOTH oracle and diffusion operator
544
+ - Perform exactly 2 Grover iterations
545
+ - After 2 iterations, |101⟩ should have probability > 0.9
546
+ - Use basic gates: H, X, CX, CCX (Toffoli), CZ, or their equivalents
547
+
548
+ IMPORTANT: You must implement CCZ using either:
549
+ - ccx followed by cz and ccx (Toffoli-based)
550
+ - h on target, ccx, h on target (standard decomposition)
551
+
552
+ Provide the OpenQASM 2.0 circuit.""",
553
+ difficulty=ProblemDifficulty.VERY_HARD,
554
+ category=ProblemCategory.ALGORITHM,
555
+ expected=ExpectedOutput(
556
+ min_qubits=3,
557
+ max_qubits=3,
558
+ max_depth=30,
559
+ required_gates=["h", "x", "cx"],
560
+ expected_states={"101": 0.9},
561
+ tolerance=0.15
562
+ ),
563
+ tags=["grover", "search", "oracle", "diffusion", "multi-iteration"],
564
+ requires_understanding=["Grover's algorithm", "Multi-controlled gates", "Oracle design", "Diffusion operator"],
565
+ common_mistakes=["Wrong oracle", "Single iteration only", "Incorrect diffusion", "Missing CCZ decomposition"],
566
+ optimal_depth=24,
567
+ optimal_gate_count=40
568
+ )
569
+
570
+ PROBLEM_VH3_VQE_ANSATZ = TestProblem(
571
+ id="very_hard_003",
572
+ name="VQE Hardware-Efficient Ansatz",
573
+ description="Construct a 4-qubit hardware-efficient ansatz for VQE",
574
+ prompt="""Construct a 4-qubit hardware-efficient variational ansatz for VQE.
575
+
576
+ A hardware-efficient ansatz is a parameterized quantum circuit used in VQE
577
+ (Variational Quantum Eigensolver) to prepare trial wavefunctions.
578
+
579
+ Structure (2 layers):
580
+
581
+ LAYER 1:
582
+ 1. Apply Ry(θ) rotations to all 4 qubits (use ry gate with parameter, e.g., ry(pi/4))
583
+ 2. Apply Rz(φ) rotations to all 4 qubits (use rz gate with parameter, e.g., rz(pi/4))
584
+ 3. Apply entangling CNOT ladder: cx q[0],q[1]; cx q[1],q[2]; cx q[2],q[3];
585
+
586
+ LAYER 2:
587
+ 4. Apply Ry(θ') rotations to all 4 qubits
588
+ 5. Apply Rz(φ') rotations to all 4 qubits
589
+ 6. Apply entangling CNOT ladder again
590
+
591
+ For this implementation, use fixed angles:
592
+ - Layer 1: ry(0.5) and rz(0.3) on all qubits
593
+ - Layer 2: ry(0.7) and rz(0.2) on all qubits
594
+
595
+ Requirements:
596
+ - Use exactly 4 qubits
597
+ - Implement 2 full layers (rotation + entanglement each)
598
+ - Use ry, rz, and cx gates
599
+ - Linear entanglement pattern (nearest-neighbor CNOTs)
600
+
601
+ This circuit structure is used on real quantum hardware (IBM, Google) for
602
+ quantum chemistry and optimization problems.
603
+
604
+ Provide the OpenQASM 2.0 circuit.""",
605
+ difficulty=ProblemDifficulty.VERY_HARD,
606
+ category=ProblemCategory.ALGORITHM,
607
+ expected=ExpectedOutput(
608
+ min_qubits=4,
609
+ max_qubits=4,
610
+ max_depth=16,
611
+ required_gates=["ry", "rz", "cx"]
612
+ ),
613
+ tags=["vqe", "ansatz", "variational", "quantum-chemistry", "hardware-efficient"],
614
+ requires_understanding=["VQE algorithm", "Parameterized circuits", "Hardware constraints", "Entanglement layers"],
615
+ common_mistakes=["Missing rotation layers", "Wrong entanglement pattern", "Incorrect parameter format"],
616
+ optimal_depth=12,
617
+ optimal_gate_count=22
618
+ )
619
+
620
+ PROBLEM_VH4_BERNSTEIN_VAZIRANI = TestProblem(
621
+ id="very_hard_004",
622
+ name="Bernstein-Vazirani 4-bit",
623
+ description="Implement Bernstein-Vazirani algorithm to find hidden string s=1011",
624
+ prompt="""Implement the Bernstein-Vazirani algorithm to find the hidden string s=1011.
625
+
626
+ The Bernstein-Vazirani algorithm finds a hidden n-bit string s in ONE query.
627
+ Given a function f(x) = s·x mod 2 (bitwise dot product), find s.
628
+
629
+ For s=1011 (4 bits), we need 5 qubits (4 input + 1 ancilla):
630
+
631
+ Algorithm:
632
+ 1. Initialize all input qubits to |0⟩, ancilla to |1⟩
633
+ 2. Apply H to all 5 qubits (creates superposition + phase kickback setup)
634
+ 3. Apply Oracle U_f: For each bit s_i=1, apply CNOT from q[i] to ancilla
635
+ - s=1011 means: CNOT from q[0] to q[4], q[2] to q[4], q[3] to q[4]
636
+ - (s[0]=1, s[1]=0, s[2]=1, s[3]=1 → control qubits 0, 2, 3)
637
+ 4. Apply H to all input qubits (NOT the ancilla)
638
+ 5. Measure input qubits → reveals s directly
639
+
640
+ Requirements:
641
+ - Use 5 qubits (q[0-3] for input, q[4] for ancilla)
642
+ - Prepare ancilla in |1⟩ state before Hadamards
643
+ - Oracle: CNOT from q[0], q[2], q[3] to q[4] (positions where s has 1)
644
+ - Apply final Hadamards only to input qubits
645
+ - Measure input qubits → should give |1011⟩
646
+
647
+ After measurement, the input register should read 1011 with probability 1.0.
648
+
649
+ Provide the OpenQASM 2.0 circuit.""",
650
+ difficulty=ProblemDifficulty.VERY_HARD,
651
+ category=ProblemCategory.ALGORITHM,
652
+ expected=ExpectedOutput(
653
+ min_qubits=5,
654
+ max_qubits=5,
655
+ max_depth=10,
656
+ required_gates=["h", "x", "cx"],
657
+ expected_states={"10111": 1.0}, # 1011 in input register, 1 in ancilla
658
+ tolerance=0.05
659
+ ),
660
+ tags=["bernstein-vazirani", "oracle", "hidden-string", "query-complexity"],
661
+ requires_understanding=["Bernstein-Vazirani algorithm", "Oracle construction", "Phase kickback"],
662
+ common_mistakes=["Wrong oracle CNOTs", "Missing ancilla preparation", "Hadamards on ancilla"],
663
+ optimal_depth=6,
664
+ optimal_gate_count=15
665
+ )
666
+
667
+ VERY_HARD_PROBLEMS = [
668
+ PROBLEM_VH1_QFT_4QUBIT,
669
+ PROBLEM_VH2_GROVER_3QUBIT,
670
+ PROBLEM_VH3_VQE_ANSATZ,
671
+ PROBLEM_VH4_BERNSTEIN_VAZIRANI
672
+ ]
673
+
674
+ ALL_PROBLEMS = EASY_PROBLEMS + MEDIUM_PROBLEMS + HARD_PROBLEMS + VERY_HARD_PROBLEMS
675
+
676
+ # Problem registry by ID
677
+ PROBLEMS_BY_ID = {p.id: p for p in ALL_PROBLEMS}
678
+
679
+
680
+ def get_problem(problem_id: str) -> Optional[TestProblem]:
681
+ """Get a problem by ID."""
682
+ return PROBLEMS_BY_ID.get(problem_id)
683
+
684
+
685
+ def get_problems_by_difficulty(difficulty: ProblemDifficulty) -> List[TestProblem]:
686
+ """Get all problems of a specific difficulty."""
687
+ # Handle string input
688
+ if isinstance(difficulty, str):
689
+ difficulty = ProblemDifficulty(difficulty.lower())
690
+ return [p for p in ALL_PROBLEMS if p.difficulty == difficulty]
691
+
692
+
693
+ def get_problems_by_category(category: ProblemCategory) -> List[TestProblem]:
694
+ """Get all problems of a specific category."""
695
+ return [p for p in ALL_PROBLEMS if p.category == category]
696
+
697
+
698
+ def get_problems_by_tag(tag: str) -> List[TestProblem]:
699
+ """Get all problems with a specific tag."""
700
+ return [p for p in ALL_PROBLEMS if tag in p.tags]
701
+
702
+
703
+ def get_research_problem_set() -> List[TestProblem]:
704
+ """Get the standard research evaluation set (3 problems, one per difficulty)."""
705
+ return [
706
+ PROBLEM_E1_PHASE_FLIP, # Easy: Phase flip state
707
+ PROBLEM_M1_SWAP_DECOMPOSITION, # Medium: SWAP decomposition
708
+ PROBLEM_H1_DEUTSCH # Hard: Deutsch algorithm
709
+ ]
tests/test_quality_analyzer.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Path: QAgents-workflos/test_quality_analyzer.py
2
+ # Description: Test the circuit quality analyzer
3
+ """Test that quality analyzer works with MCP endpoints."""
4
+
5
+ from tests.circuit_quality_analyzer import CircuitQualityAnalyzer, get_analyzer
6
+
7
+ def test_analyzer():
8
+ analyzer = get_analyzer()
9
+
10
+ # Test with a Bell state circuit
11
+ test_qasm = """OPENQASM 2.0;
12
+ include "qelib1.inc";
13
+ qreg q[2];
14
+ creg c[2];
15
+ h q[0];
16
+ cx q[0], q[1];
17
+ measure q -> c;
18
+ """
19
+
20
+ print("Analyzing Bell state circuit...")
21
+ print("-" * 40)
22
+
23
+ result = analyzer.analyze_circuit(test_qasm)
24
+
25
+ print(f"Syntax Valid: {result.syntax_valid}")
26
+ print(f"Depth: {result.depth}")
27
+ print(f"Gate Count: {result.gate_count}")
28
+ print(f"CX Count: {result.cx_count}")
29
+ print(f"Single Qubit Count: {result.single_qubit_count}")
30
+ print(f"Hardware Fitness: {result.hardware_fitness}")
31
+ print(f"Complexity Score: {result.complexity_score}")
32
+ print(f"State Correctness: {result.state_correctness}")
33
+ print(f"Noise Estimate: {result.noise_estimate}")
34
+ print(f"Probabilities: {result.probabilities}")
35
+
36
+ if result.errors:
37
+ print(f"\nErrors/Warnings:")
38
+ for err in result.errors:
39
+ print(f" - {err}")
40
+
41
+ if __name__ == "__main__":
42
+ test_analyzer()
tests/test_ratelimited.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quick test of rate-limited evaluation on easy problems.
3
+ """
4
+ import os
5
+ from tests.evaluation_harness import EvaluationHarness
6
+ from tests.test_problems import EASY_PROBLEMS, MEDIUM_PROBLEMS, HARD_PROBLEMS
7
+
8
+ # Combine all problems
9
+ TEST_PROBLEMS = EASY_PROBLEMS + MEDIUM_PROBLEMS + HARD_PROBLEMS
10
+
11
+ # Ensure API key is set
12
+ os.environ["GOOGLE_API_KEY"] = "$env:GOOGLE_API_KEY"
13
+
14
+ print("=== RATE-LIMITED EVALUATION TEST ===")
15
+ print("Testing Guided mode (4 LLM calls per problem)")
16
+ print("Rate limit: 5 seconds between requests")
17
+ print("")
18
+
19
+ # Run only 3 easy problems with guided mode
20
+ harness = EvaluationHarness()
21
+ easy_problems = [p for p in TEST_PROBLEMS if p.id.startswith('easy')][:3]
22
+
23
+ print(f"Testing {len(easy_problems)} problems with Guided orchestration\n")
24
+ results = []
25
+
26
+ for problem in easy_problems:
27
+ print(f"Problem: {problem.name}")
28
+ result = harness.evaluate_single_run(problem, mode='guided', run_number=1)
29
+ results.append(result)
30
+ print(f" Success: {result.success}, Time: {result.execution_time_ms:.1f}ms\n")
31
+
32
+ # Summary
33
+ successes = sum(1 for r in results if r.success)
34
+ print("=== SUMMARY ===")
35
+ print(f"Success rate: {successes}/{len(results)} ({100*successes/len(results):.0f}%)")
36
+ print(f"Total API calls: ~{len(results) * 4} LLM requests")
37
+ print(f"Expected time with rate limiting: ~{len(results) * 4 * 5 / 60:.1f} minutes")
tools/__init__.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tools module: MCP endpoint wrappers as callable tools."""
2
+
3
+ from .tool_registry import (
4
+ ToolDefinition,
5
+ ToolCategory,
6
+ ToolRegistry,
7
+ registry,
8
+ register_tool
9
+ )
10
+
11
+ from .quantum_tools import (
12
+ get_all_tools,
13
+ get_tools_by_category,
14
+ invoke_tool,
15
+ # Creation tools
16
+ create_from_template,
17
+ generate_random_circuit,
18
+ generate_from_description,
19
+ # Analysis tools
20
+ parse_qasm,
21
+ analyze_circuit,
22
+ get_circuit_depth,
23
+ # Validation tools
24
+ validate_syntax,
25
+ check_connectivity,
26
+ verify_unitary,
27
+ # Simulation tools
28
+ simulate_circuit,
29
+ get_statevector,
30
+ get_probabilities,
31
+ # Scoring tools
32
+ calculate_complexity,
33
+ calculate_hardware_fitness,
34
+ calculate_expressibility,
35
+ # Resource tools
36
+ estimate_resources,
37
+ estimate_noise,
38
+ # Composition tools
39
+ compose_circuits,
40
+ generate_inverse,
41
+ tensor_circuits,
42
+ repeat_circuit
43
+ )
44
+
45
+ __all__ = [
46
+ "ToolDefinition",
47
+ "ToolCategory",
48
+ "ToolRegistry",
49
+ "registry",
50
+ "register_tool",
51
+ "get_all_tools",
52
+ "get_tools_by_category",
53
+ "invoke_tool"
54
+ ]
tools/quantum_tools.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quantum Tools: MCP endpoint wrappers registered as tools.
3
+ All 23 MCP endpoints wrapped as callable tools for agents.
4
+ """
5
+
6
+ from typing import Any, Dict, Optional
7
+ from .tool_registry import register_tool, ToolCategory, registry
8
+
9
+ # Import client lazily to avoid circular imports
10
+ def _get_client():
11
+ from client import get_client
12
+ return get_client()
13
+
14
+
15
+ # ===== CREATION TOOLS =====
16
+
17
+ @register_tool(
18
+ name="create_from_template",
19
+ description="Create a quantum circuit from a predefined template (bell_state, ghz, qft, grover, etc.)",
20
+ category=ToolCategory.CREATION,
21
+ parameters={
22
+ "template": {"type": "string", "description": "Template name", "required": True},
23
+ "num_qubits": {"type": "integer", "description": "Number of qubits", "required": False}
24
+ },
25
+ returns="QASM code of the created circuit"
26
+ )
27
+ def create_from_template(template: str, num_qubits: int = 2) -> Dict:
28
+ response = _get_client().create_circuit_from_template(template, num_qubits)
29
+ return {"success": response.success, "qasm": response.data, "error": response.error}
30
+
31
+
32
+ @register_tool(
33
+ name="generate_random_circuit",
34
+ description="Generate a random quantum circuit with specified parameters",
35
+ category=ToolCategory.CREATION,
36
+ parameters={
37
+ "num_qubits": {"type": "integer", "description": "Number of qubits", "required": True},
38
+ "depth": {"type": "integer", "description": "Circuit depth", "required": True},
39
+ "gate_set": {"type": "string", "description": "Comma-separated gates (h,cx,rz)", "required": False}
40
+ },
41
+ returns="QASM code of the random circuit"
42
+ )
43
+ def generate_random_circuit(num_qubits: int, depth: int, gate_set: str = "h,cx,rz") -> Dict:
44
+ response = _get_client().generate_random_circuit(num_qubits, depth, gate_set)
45
+ return {"success": response.success, "qasm": response.data, "error": response.error}
46
+
47
+
48
+ @register_tool(
49
+ name="generate_from_description",
50
+ description="Generate a circuit from natural language description",
51
+ category=ToolCategory.CREATION,
52
+ parameters={
53
+ "description": {"type": "string", "description": "Natural language description of the circuit", "required": True}
54
+ },
55
+ returns="QASM code of the generated circuit"
56
+ )
57
+ def generate_from_description(description: str) -> Dict:
58
+ response = _get_client().generate_circuit_from_description(description)
59
+ return {"success": response.success, "qasm": response.data, "error": response.error}
60
+
61
+
62
+ # ===== ANALYSIS TOOLS =====
63
+
64
+ @register_tool(
65
+ name="parse_qasm",
66
+ description="Parse OpenQASM code and extract circuit structure",
67
+ category=ToolCategory.ANALYSIS,
68
+ parameters={
69
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
70
+ },
71
+ returns="Parsed circuit structure with gates, qubits, etc."
72
+ )
73
+ def parse_qasm(qasm: str) -> Dict:
74
+ response = _get_client().parse_qasm(qasm)
75
+ return {"success": response.success, "structure": response.data, "error": response.error}
76
+
77
+
78
+ @register_tool(
79
+ name="analyze_circuit",
80
+ description="Analyze circuit properties: depth, gate count, qubit usage",
81
+ category=ToolCategory.ANALYSIS,
82
+ parameters={
83
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
84
+ },
85
+ returns="Circuit analysis with depth, gate counts, etc."
86
+ )
87
+ def analyze_circuit(qasm: str) -> Dict:
88
+ response = _get_client().analyze_circuit(qasm)
89
+ return {"success": response.success, "analysis": response.data, "error": response.error}
90
+
91
+
92
+ @register_tool(
93
+ name="get_circuit_depth",
94
+ description="Get the depth of a quantum circuit",
95
+ category=ToolCategory.ANALYSIS,
96
+ parameters={
97
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
98
+ },
99
+ returns="Integer depth value"
100
+ )
101
+ def get_circuit_depth(qasm: str) -> Dict:
102
+ response = _get_client().get_circuit_depth(qasm)
103
+ return {"success": response.success, "depth": response.data, "error": response.error}
104
+
105
+
106
+ # ===== VALIDATION TOOLS =====
107
+
108
+ @register_tool(
109
+ name="validate_syntax",
110
+ description="Validate QASM syntax for correctness",
111
+ category=ToolCategory.VALIDATION,
112
+ parameters={
113
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
114
+ },
115
+ returns="Validation result with any syntax errors"
116
+ )
117
+ def validate_syntax(qasm: str) -> Dict:
118
+ response = _get_client().validate_syntax(qasm)
119
+ return {"success": response.success, "valid": response.data, "error": response.error}
120
+
121
+
122
+ @register_tool(
123
+ name="check_connectivity",
124
+ description="Check if circuit respects hardware qubit connectivity",
125
+ category=ToolCategory.VALIDATION,
126
+ parameters={
127
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True},
128
+ "hardware": {"type": "string", "description": "Hardware profile (ibm_eagle, ionq_aria, rigetti_aspen)", "required": False}
129
+ },
130
+ returns="Connectivity check result"
131
+ )
132
+ def check_connectivity(qasm: str, hardware: str = "ibm_eagle") -> Dict:
133
+ response = _get_client().check_connectivity(qasm, hardware)
134
+ return {"success": response.success, "result": response.data, "error": response.error}
135
+
136
+
137
+ @register_tool(
138
+ name="verify_unitary",
139
+ description="Verify that circuit produces a valid unitary matrix",
140
+ category=ToolCategory.VALIDATION,
141
+ parameters={
142
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
143
+ },
144
+ returns="Unitary verification result"
145
+ )
146
+ def verify_unitary(qasm: str) -> Dict:
147
+ response = _get_client().verify_unitary(qasm)
148
+ return {"success": response.success, "result": response.data, "error": response.error}
149
+
150
+
151
+ # ===== SIMULATION TOOLS =====
152
+
153
+ @register_tool(
154
+ name="simulate_circuit",
155
+ description="Simulate circuit execution and get measurement results",
156
+ category=ToolCategory.SIMULATION,
157
+ parameters={
158
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True},
159
+ "shots": {"type": "integer", "description": "Number of measurement shots", "required": False}
160
+ },
161
+ returns="Measurement results with counts"
162
+ )
163
+ def simulate_circuit(qasm: str, shots: int = 1024) -> Dict:
164
+ response = _get_client().simulate_circuit(qasm, shots)
165
+ return {"success": response.success, "results": response.data, "error": response.error}
166
+
167
+
168
+ @register_tool(
169
+ name="get_statevector",
170
+ description="Get the statevector of a circuit (no measurement)",
171
+ category=ToolCategory.SIMULATION,
172
+ parameters={
173
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
174
+ },
175
+ returns="Statevector as complex amplitudes"
176
+ )
177
+ def get_statevector(qasm: str) -> Dict:
178
+ response = _get_client().get_statevector(qasm)
179
+ return {"success": response.success, "statevector": response.data, "error": response.error}
180
+
181
+
182
+ @register_tool(
183
+ name="get_probabilities",
184
+ description="Get probability distribution from circuit",
185
+ category=ToolCategory.SIMULATION,
186
+ parameters={
187
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
188
+ },
189
+ returns="Probability distribution over computational basis states"
190
+ )
191
+ def get_probabilities(qasm: str) -> Dict:
192
+ response = _get_client().get_probabilities(qasm)
193
+ return {"success": response.success, "probabilities": response.data, "error": response.error}
194
+
195
+
196
+ # ===== SCORING TOOLS =====
197
+
198
+ @register_tool(
199
+ name="calculate_complexity",
200
+ description="Calculate circuit complexity score (lower is better)",
201
+ category=ToolCategory.SCORING,
202
+ parameters={
203
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
204
+ },
205
+ returns="Complexity score and breakdown"
206
+ )
207
+ def calculate_complexity(qasm: str) -> Dict:
208
+ response = _get_client().calculate_complexity_score(qasm)
209
+ return {"success": response.success, "score": response.data, "error": response.error}
210
+
211
+
212
+ @register_tool(
213
+ name="calculate_hardware_fitness",
214
+ description="Calculate how well circuit fits target hardware",
215
+ category=ToolCategory.SCORING,
216
+ parameters={
217
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True},
218
+ "hardware": {"type": "string", "description": "Hardware profile", "required": False}
219
+ },
220
+ returns="Hardware fitness score (higher is better)"
221
+ )
222
+ def calculate_hardware_fitness(qasm: str, hardware: str = "ibm_eagle") -> Dict:
223
+ response = _get_client().calculate_hardware_fitness(qasm, hardware)
224
+ return {"success": response.success, "score": response.data, "error": response.error}
225
+
226
+
227
+ @register_tool(
228
+ name="calculate_expressibility",
229
+ description="Calculate circuit expressibility (ability to explore state space)",
230
+ category=ToolCategory.SCORING,
231
+ parameters={
232
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
233
+ },
234
+ returns="Expressibility score"
235
+ )
236
+ def calculate_expressibility(qasm: str) -> Dict:
237
+ response = _get_client().calculate_expressibility(qasm)
238
+ return {"success": response.success, "score": response.data, "error": response.error}
239
+
240
+
241
+ # ===== RESOURCE TOOLS =====
242
+
243
+ @register_tool(
244
+ name="estimate_resources",
245
+ description="Estimate resource requirements (qubits, gates, depth)",
246
+ category=ToolCategory.RESOURCE,
247
+ parameters={
248
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
249
+ },
250
+ returns="Resource estimation breakdown"
251
+ )
252
+ def estimate_resources(qasm: str) -> Dict:
253
+ response = _get_client().estimate_resources(qasm)
254
+ return {"success": response.success, "resources": response.data, "error": response.error}
255
+
256
+
257
+ @register_tool(
258
+ name="estimate_noise",
259
+ description="Estimate noise impact on circuit execution",
260
+ category=ToolCategory.RESOURCE,
261
+ parameters={
262
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True},
263
+ "hardware": {"type": "string", "description": "Hardware profile", "required": False}
264
+ },
265
+ returns="Noise estimation"
266
+ )
267
+ def estimate_noise(qasm: str, hardware: str = "ibm_eagle") -> Dict:
268
+ response = _get_client().estimate_noise(qasm, hardware)
269
+ return {"success": response.success, "noise": response.data, "error": response.error}
270
+
271
+
272
+ # ===== COMPOSITION TOOLS =====
273
+
274
+ @register_tool(
275
+ name="compose_circuits",
276
+ description="Compose two circuits sequentially",
277
+ category=ToolCategory.COMPOSITION,
278
+ parameters={
279
+ "qasm1": {"type": "string", "description": "First circuit QASM", "required": True},
280
+ "qasm2": {"type": "string", "description": "Second circuit QASM", "required": True},
281
+ "qubit_mapping": {"type": "string", "description": "Qubit mapping (e.g., '0:1,1:0')", "required": False}
282
+ },
283
+ returns="Composed circuit QASM"
284
+ )
285
+ def compose_circuits(qasm1: str, qasm2: str, qubit_mapping: str = "") -> Dict:
286
+ response = _get_client().compose_circuits(qasm1, qasm2, qubit_mapping)
287
+ return {"success": response.success, "qasm": response.data, "error": response.error}
288
+
289
+
290
+ @register_tool(
291
+ name="generate_inverse",
292
+ description="Generate the inverse (adjoint) of a circuit",
293
+ category=ToolCategory.COMPOSITION,
294
+ parameters={
295
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True}
296
+ },
297
+ returns="Inverse circuit QASM"
298
+ )
299
+ def generate_inverse(qasm: str) -> Dict:
300
+ response = _get_client().generate_inverse_circuit(qasm)
301
+ return {"success": response.success, "qasm": response.data, "error": response.error}
302
+
303
+
304
+ @register_tool(
305
+ name="tensor_circuits",
306
+ description="Create tensor product of two circuits (parallel composition)",
307
+ category=ToolCategory.COMPOSITION,
308
+ parameters={
309
+ "qasm1": {"type": "string", "description": "First circuit QASM", "required": True},
310
+ "qasm2": {"type": "string", "description": "Second circuit QASM", "required": True}
311
+ },
312
+ returns="Tensored circuit QASM"
313
+ )
314
+ def tensor_circuits(qasm1: str, qasm2: str) -> Dict:
315
+ response = _get_client().tensor_circuits(qasm1, qasm2)
316
+ return {"success": response.success, "qasm": response.data, "error": response.error}
317
+
318
+
319
+ @register_tool(
320
+ name="repeat_circuit",
321
+ description="Repeat a circuit n times",
322
+ category=ToolCategory.COMPOSITION,
323
+ parameters={
324
+ "qasm": {"type": "string", "description": "OpenQASM code", "required": True},
325
+ "n": {"type": "integer", "description": "Number of repetitions", "required": True}
326
+ },
327
+ returns="Repeated circuit QASM"
328
+ )
329
+ def repeat_circuit(qasm: str, n: int) -> Dict:
330
+ response = _get_client().repeat_circuit(qasm, n)
331
+ return {"success": response.success, "qasm": response.data, "error": response.error}
332
+
333
+
334
+ # ===== UTILITY FUNCTIONS =====
335
+
336
+ def get_all_tools():
337
+ """Get all registered tools."""
338
+ return registry.get_all()
339
+
340
+ def get_tools_by_category(category: ToolCategory):
341
+ """Get tools by category."""
342
+ return registry.get_by_category(category)
343
+
344
+ def invoke_tool(name: str, **kwargs):
345
+ """Invoke a tool by name."""
346
+ return registry.invoke(name, **kwargs)
tools/tool_registry.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tools Module: Wrapped MCP endpoints as callable tools for agents.
3
+ Each tool is a self-contained function that can be invoked by agents.
4
+ """
5
+
6
+ from typing import Any, Callable, Dict, List, Optional
7
+ from dataclasses import dataclass, field
8
+ from enum import Enum
9
+ import json
10
+
11
+ class ToolCategory(Enum):
12
+ """Categories of tools for agent specialization."""
13
+ CREATION = "creation"
14
+ ANALYSIS = "analysis"
15
+ VALIDATION = "validation"
16
+ SIMULATION = "simulation"
17
+ SCORING = "scoring"
18
+ COMPOSITION = "composition"
19
+ RESOURCE = "resource"
20
+
21
+ @dataclass
22
+ class ToolDefinition:
23
+ """Definition of a tool that agents can use."""
24
+ name: str
25
+ description: str
26
+ category: ToolCategory
27
+ parameters: Dict[str, Dict] # name -> {type, description, required}
28
+ function: Callable
29
+ returns: str
30
+
31
+ def to_llm_schema(self) -> Dict:
32
+ """Convert to OpenAI function calling format."""
33
+ properties = {}
34
+ required = []
35
+
36
+ for name, info in self.parameters.items():
37
+ properties[name] = {
38
+ "type": info.get("type", "string"),
39
+ "description": info.get("description", "")
40
+ }
41
+ if info.get("required", False):
42
+ required.append(name)
43
+
44
+ return {
45
+ "type": "function",
46
+ "function": {
47
+ "name": self.name,
48
+ "description": self.description,
49
+ "parameters": {
50
+ "type": "object",
51
+ "properties": properties,
52
+ "required": required
53
+ }
54
+ }
55
+ }
56
+
57
+
58
+ class ToolRegistry:
59
+ """Registry of all available tools."""
60
+
61
+ def __init__(self):
62
+ self._tools: Dict[str, ToolDefinition] = {}
63
+ self._by_category: Dict[ToolCategory, List[str]] = {cat: [] for cat in ToolCategory}
64
+
65
+ def register(self, tool: ToolDefinition):
66
+ """Register a tool."""
67
+ self._tools[tool.name] = tool
68
+ self._by_category[tool.category].append(tool.name)
69
+
70
+ def get(self, name: str) -> Optional[ToolDefinition]:
71
+ """Get a tool by name."""
72
+ return self._tools.get(name)
73
+
74
+ def get_by_category(self, category: ToolCategory) -> List[ToolDefinition]:
75
+ """Get all tools in a category."""
76
+ return [self._tools[name] for name in self._by_category[category]]
77
+
78
+ def get_all(self) -> List[ToolDefinition]:
79
+ """Get all registered tools."""
80
+ return list(self._tools.values())
81
+
82
+ def get_llm_schemas(self, categories: Optional[List[ToolCategory]] = None) -> List[Dict]:
83
+ """Get OpenAI function schemas for specified categories."""
84
+ if categories is None:
85
+ tools = self.get_all()
86
+ else:
87
+ tools = []
88
+ for cat in categories:
89
+ tools.extend(self.get_by_category(cat))
90
+ return [t.to_llm_schema() for t in tools]
91
+
92
+ def invoke(self, name: str, **kwargs) -> Any:
93
+ """Invoke a tool by name with arguments."""
94
+ tool = self.get(name)
95
+ if tool is None:
96
+ raise ValueError(f"Unknown tool: {name}")
97
+ return tool.function(**kwargs)
98
+
99
+
100
+ # Global registry
101
+ registry = ToolRegistry()
102
+
103
+
104
+ def register_tool(name: str, description: str, category: ToolCategory,
105
+ parameters: Dict, returns: str):
106
+ """Decorator to register a function as a tool."""
107
+ def decorator(func: Callable):
108
+ tool = ToolDefinition(
109
+ name=name,
110
+ description=description,
111
+ category=category,
112
+ parameters=parameters,
113
+ function=func,
114
+ returns=returns
115
+ )
116
+ registry.register(tool)
117
+ return func
118
+ return decorator