Spaces:
Sleeping
Sleeping
Deminiko
commited on
Commit
·
1bb4678
0
Parent(s):
Initial commit: QAgents-workflos multi-agent quantum circuit optimization system
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env.example +76 -0
- .gitignore +53 -0
- AGENTS.md +64 -0
- LICENSE +21 -0
- README.md +265 -0
- __init__.py +6 -0
- agents/__init__.py +44 -0
- agents/base_agent.py +302 -0
- agents/llm_adapter.py +676 -0
- agents/specialized_agents.py +223 -0
- app.py +120 -0
- client/__init__.py +5 -0
- client/mcp_client.py +698 -0
- config.py +305 -0
- database/__init__.py +36 -0
- database/circuit_quality_db.py +414 -0
- database/storage.py +278 -0
- orchestrators/__init__.py +30 -0
- orchestrators/orchestrator.py +541 -0
- orchestrators/quasar_orchestrator.py +563 -0
- orchestrators/router.py +188 -0
- prompts/__init__.py +25 -0
- prompts/agent_prompts.py +276 -0
- prompts/optimized_prompts.py +289 -0
- requirements.txt +6 -0
- tasks-project-state.json +149 -0
- tests/__init__.py +87 -0
- tests/circuit_quality_analyzer.py +351 -0
- tests/comprehensive_test.py +287 -0
- tests/comprehensive_test_v2.py +299 -0
- tests/evaluation_harness.py +748 -0
- tests/evaluation_report.txt +54 -0
- tests/fast_eval.py +115 -0
- tests/final_eval.py +137 -0
- tests/full_comparison.py +214 -0
- tests/mini_test.py +75 -0
- tests/mode_evaluation.py +202 -0
- tests/quality_evaluation_harness.py +314 -0
- tests/quick_mode_test.py +81 -0
- tests/quick_test.py +85 -0
- tests/run_evaluation.py +197 -0
- tests/run_quality_eval.py +217 -0
- tests/test_db_storage.py +59 -0
- tests/test_mcp_client.py +181 -0
- tests/test_problems.py +709 -0
- tests/test_quality_analyzer.py +42 -0
- tests/test_ratelimited.py +37 -0
- tools/__init__.py +54 -0
- tools/quantum_tools.py +346 -0
- tools/tool_registry.py +118 -0
.env.example
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# QAgents-Workflows Environment Configuration
|
| 2 |
+
# Copy this file to .env and fill in your actual values
|
| 3 |
+
# For Hugging Face Spaces: Add these as Repository Secrets or Space Variables
|
| 4 |
+
|
| 5 |
+
# =============================================================================
|
| 6 |
+
# LLM Configuration (Model-Agnostic)
|
| 7 |
+
# =============================================================================
|
| 8 |
+
|
| 9 |
+
# LLM Provider: gemini (default), openai, anthropic, groq, ollama, etc.
|
| 10 |
+
# Leave empty to use default: "gemini"
|
| 11 |
+
LLM_PROVIDER=gemini
|
| 12 |
+
|
| 13 |
+
# LLM Model identifier
|
| 14 |
+
# For Gemini: gemini-2.5-flash-lite, gemini-2.5-flash, gemini-2.5-pro, gemini-2.0-flash
|
| 15 |
+
# For OpenAI: gpt-4o, gpt-4o-mini, gpt-4-turbo
|
| 16 |
+
# For Anthropic: claude-3-opus, claude-3-sonnet
|
| 17 |
+
# For Groq: llama-3-70b-versatile, mixtral-8x7b-32768
|
| 18 |
+
# For Ollama: mistral, neural-chat, starling-lm (local models)
|
| 19 |
+
# Leave empty to use default: "gemini-2.5-flash-lite"
|
| 20 |
+
LLM_MODEL=gemini-2.5-flash-lite
|
| 21 |
+
|
| 22 |
+
# =============================================================================
|
| 23 |
+
# API Keys (Provider-Specific)
|
| 24 |
+
# =============================================================================
|
| 25 |
+
|
| 26 |
+
# Google Gemini API Key (required for LLM_PROVIDER=gemini)
|
| 27 |
+
# Get from: https://aistudio.google.com/app/apikey
|
| 28 |
+
GOOGLE_API_KEY=your-gemini-api-key-here
|
| 29 |
+
|
| 30 |
+
# Alternative Gemini API Key (fallback if GOOGLE_API_KEY not set)
|
| 31 |
+
GENAI_API_KEY=
|
| 32 |
+
|
| 33 |
+
# OpenAI API Key (required for LLM_PROVIDER=openai)
|
| 34 |
+
OPENAI_API_KEY=sk-...
|
| 35 |
+
|
| 36 |
+
# Anthropic API Key (required for LLM_PROVIDER=anthropic)
|
| 37 |
+
ANTHROPIC_API_KEY=sk-ant-...
|
| 38 |
+
|
| 39 |
+
# Groq API Key (required for LLM_PROVIDER=groq)
|
| 40 |
+
GROQ_API_KEY=gsk_...
|
| 41 |
+
|
| 42 |
+
# Note: Ollama (LLM_PROVIDER=ollama) requires no API key - runs locally
|
| 43 |
+
|
| 44 |
+
# =============================================================================
|
| 45 |
+
# MCP Server Configuration (QuantumArchitect-MCP)
|
| 46 |
+
# =============================================================================
|
| 47 |
+
|
| 48 |
+
# MCP Server Base URL
|
| 49 |
+
# Local: http://127.0.0.1:7861
|
| 50 |
+
# Remote (ngrok example): https://your-ngrok-url.ngrok.io
|
| 51 |
+
# Leave empty to use default: http://127.0.0.1:7861
|
| 52 |
+
MCP_SERVER_URL=http://127.0.0.1:7861
|
| 53 |
+
|
| 54 |
+
# =============================================================================
|
| 55 |
+
# Optional: Cost Tracking and Evaluation
|
| 56 |
+
# =============================================================================
|
| 57 |
+
|
| 58 |
+
# Cost tracking can be enabled/disabled
|
| 59 |
+
# TRACK_COSTS=true
|
| 60 |
+
|
| 61 |
+
# =============================================================================
|
| 62 |
+
# Notes for Hugging Face Spaces
|
| 63 |
+
# =============================================================================
|
| 64 |
+
|
| 65 |
+
# 1. Upload this file as .env to your Space (or use Space Settings UI)
|
| 66 |
+
# 2. Go to Space Settings > Secrets > Add Secret
|
| 67 |
+
# 3. Add each variable:
|
| 68 |
+
# - Name: LLM_PROVIDER, Value: gemini
|
| 69 |
+
# - Name: LLM_MODEL, Value: gemini-2.5-flash-lite
|
| 70 |
+
# - Name: GOOGLE_API_KEY, Value: your-key
|
| 71 |
+
# - Name: MCP_SERVER_URL, Value: https://your-backend-url.ngrok.io
|
| 72 |
+
#
|
| 73 |
+
# 4. Restart the Space for changes to take effect
|
| 74 |
+
#
|
| 75 |
+
# Alternative: Use Space Variables (visible in Space info) instead of Secrets
|
| 76 |
+
# This is useful for non-sensitive settings like LLM_PROVIDER and MCP_SERVER_URL
|
.gitignore
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
|
| 23 |
+
# Virtual Environment
|
| 24 |
+
.venv
|
| 25 |
+
venv/
|
| 26 |
+
ENV/
|
| 27 |
+
env/
|
| 28 |
+
|
| 29 |
+
# Environment Variables
|
| 30 |
+
.env # Actual secrets - never commit
|
| 31 |
+
# .env.example IS committed as a template - do not exclude it
|
| 32 |
+
|
| 33 |
+
# Database and Logs
|
| 34 |
+
*.db
|
| 35 |
+
*.sqlite3
|
| 36 |
+
database/data/
|
| 37 |
+
database/logs/
|
| 38 |
+
database/memory/
|
| 39 |
+
*.log
|
| 40 |
+
|
| 41 |
+
# IDEs
|
| 42 |
+
.vscode/
|
| 43 |
+
.idea/
|
| 44 |
+
|
| 45 |
+
# Project specific
|
| 46 |
+
research/
|
| 47 |
+
|
| 48 |
+
# Legacy/Backup files
|
| 49 |
+
*_old.py
|
| 50 |
+
*.bak
|
| 51 |
+
|
| 52 |
+
# Documentation work
|
| 53 |
+
.docs/
|
AGENTS.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project current structure :"""
|
| 2 |
+
QAgents-workflos\__pycache__
|
| 3 |
+
QAgents-workflos\agents
|
| 4 |
+
QAgents-workflos\agents\__pycache__
|
| 5 |
+
QAgents-workflos\agents\__init__.py
|
| 6 |
+
QAgents-workflos\agents\base_agent.py
|
| 7 |
+
QAgents-workflos\agents\llm_adapter.py
|
| 8 |
+
QAgents-workflos\agents\specialized_agents.py
|
| 9 |
+
QAgents-workflos\client
|
| 10 |
+
QAgents-workflos\client\__pycache__
|
| 11 |
+
QAgents-workflos\client\__init__.py
|
| 12 |
+
QAgents-workflos\client\mcp_client.py
|
| 13 |
+
QAgents-workflos\database
|
| 14 |
+
QAgents-workflos\database\__pycache__
|
| 15 |
+
QAgents-workflos\database\data
|
| 16 |
+
QAgents-workflos\database\logs
|
| 17 |
+
QAgents-workflos\database\memory
|
| 18 |
+
QAgents-workflos\database\__init__.py
|
| 19 |
+
QAgents-workflos\database\storage.py
|
| 20 |
+
QAgents-workflos\orchestrators
|
| 21 |
+
QAgents-workflos\orchestrators\__pycache__
|
| 22 |
+
QAgents-workflos\orchestrators\__init__.py
|
| 23 |
+
QAgents-workflos\orchestrators\orchestrator.py
|
| 24 |
+
QAgents-workflos\prompts
|
| 25 |
+
QAgents-workflos\prompts\__init__.py
|
| 26 |
+
QAgents-workflos\prompts\agent_prompts.py
|
| 27 |
+
QAgents-workflos\tests
|
| 28 |
+
QAgents-workflos\tests\__pycache__
|
| 29 |
+
QAgents-workflos\tests\__init__.py
|
| 30 |
+
QAgents-workflos\tests\evaluation_harness.py
|
| 31 |
+
QAgents-workflos\tests\test_problems.py
|
| 32 |
+
QAgents-workflos\tools
|
| 33 |
+
QAgents-workflos\tools\__pycache__
|
| 34 |
+
QAgents-workflos\tools\__init__.py
|
| 35 |
+
QAgents-workflos\tools\quantum_tools.py
|
| 36 |
+
QAgents-workflos\tools\tool_registry.py
|
| 37 |
+
QAgents-workflos\workflows
|
| 38 |
+
QAgents-workflos\workflows\__pycache__
|
| 39 |
+
QAgents-workflos\workflows\__init__.py
|
| 40 |
+
QAgents-workflos\workflows\workflow_definitions.py
|
| 41 |
+
QAgents-workflos\__init__.py
|
| 42 |
+
QAgents-workflos\AGENTS.md
|
| 43 |
+
QAgents-workflos\config.py
|
| 44 |
+
QAgents-workflos\DEPLOYMENT_CHECKLIST.md
|
| 45 |
+
QAgents-workflos\IMPLEMENTATION_CHECKLIST.md
|
| 46 |
+
QAgents-workflos\LLM_SYSTEM_SUMMARY.md
|
| 47 |
+
QAgents-workflos\QUICKREF.md
|
| 48 |
+
QAgents-workflos\README.md
|
| 49 |
+
QAgents-workflos\requirements.txt
|
| 50 |
+
QAgents-workflos\run_evaluation.py
|
| 51 |
+
QAgents-workflos\SETUP.md
|
| 52 |
+
QAgents-workflos\tasks-project-state.json
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
before work, on same terminal:"""
|
| 56 |
+
1 activate .venv:
|
| 57 |
+
& D:\teach\quantum-circuits\.venv\Scripts\Activate.ps1
|
| 58 |
+
|
| 59 |
+
2 activate app:
|
| 60 |
+
python QuantumArchitect-MCP\app.py
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if any new data it must be writed on tasks-project-state.json root file or a folder module tasks-project-state.json file that detail the module file
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Nicolas Ivan Larenas Bustamante
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: QAgents Quantum Circuit Orchestrator
|
| 3 |
+
emoji: ⚛️
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.0.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: Multi-agent quantum circuit generation with Gemini/LLMs
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# QAgents-Workflows: Quantum Circuit Optimization Agent System
|
| 15 |
+
|
| 16 |
+
A professional multi-agent system for autonomous quantum circuit optimization, featuring multiple architectural approaches and **model-agnostic LLM support** (Gemini, OpenAI, Anthropic, Groq, Ollama, and any LiteLLM provider).
|
| 17 |
+
|
| 18 |
+
## 🏗️ Architectures
|
| 19 |
+
|
| 20 |
+
### 1. Blackboard System (Free/Emergent)
|
| 21 |
+
- Agents communicate through a shared blackboard
|
| 22 |
+
- Decoupled, event-driven activation
|
| 23 |
+
- Emergent workflow based on data availability
|
| 24 |
+
- Maximum flexibility and adaptability
|
| 25 |
+
|
| 26 |
+
### 2. Guided System (Strict Orchestration)
|
| 27 |
+
- Explicit state machine with defined transitions
|
| 28 |
+
- Central orchestrator controls workflow
|
| 29 |
+
- Predictable, auditable execution path
|
| 30 |
+
- Maximum reliability and control
|
| 31 |
+
|
| 32 |
+
### 3. Naked System (Baseline)
|
| 33 |
+
- Single agent with direct MCP access
|
| 34 |
+
- No framework overhead
|
| 35 |
+
- Baseline for comparison
|
| 36 |
+
|
| 37 |
+
## 🤖 Model-Agnostic LLM Support
|
| 38 |
+
|
| 39 |
+
The system works with **any LLM provider**:
|
| 40 |
+
|
| 41 |
+
| Provider | Setup | Models |
|
| 42 |
+
|----------|-------|--------|
|
| 43 |
+
| **Gemini** (Default) | `GOOGLE_API_KEY` | `gemini-2.5-flash-lite` |
|
| 44 |
+
| OpenAI | `OPENAI_API_KEY` | `gpt-4o`, `gpt-4o-mini` |
|
| 45 |
+
| Anthropic | `ANTHROPIC_API_KEY` | `claude-3-opus`, `claude-3-sonnet` |
|
| 46 |
+
| Groq | `GROQ_API_KEY` | `llama-3-70b`, `mixtral-8x7b` |
|
| 47 |
+
| Ollama (Local) | No key needed | Any local model |
|
| 48 |
+
|
| 49 |
+
**See [SETUP.md](SETUP.md) for detailed configuration.**
|
| 50 |
+
|
| 51 |
+
## 📊 Evaluation Metrics
|
| 52 |
+
|
| 53 |
+
| Metric | Description |
|
| 54 |
+
|--------|-------------|
|
| 55 |
+
| **Time** | Total execution time in seconds |
|
| 56 |
+
| **Quality** | Circuit depth, gate count, hardware fitness score |
|
| 57 |
+
| **Effectiveness** | Did the circuit achieve the goal? |
|
| 58 |
+
| **Reliability** | Success rate across multiple runs |
|
| 59 |
+
|
| 60 |
+
## 🚀 Quick Start
|
| 61 |
+
|
| 62 |
+
```bash
|
| 63 |
+
# 1. Ensure QuantumArchitect-MCP is running
|
| 64 |
+
python QuantumArchitect-MCP/app.py
|
| 65 |
+
|
| 66 |
+
# 2. Set your API key (for Gemini by default)
|
| 67 |
+
set GOOGLE_API_KEY=your-key-here
|
| 68 |
+
# OR for OpenAI:
|
| 69 |
+
set OPENAI_API_KEY=your-key-here
|
| 70 |
+
|
| 71 |
+
# 3. Run the evaluation
|
| 72 |
+
python QAgents-workflos/run_evaluation.py
|
| 73 |
+
|
| 74 |
+
# For quick test (no LLM needed):
|
| 75 |
+
python QAgents-workflos/run_evaluation.py --quick
|
| 76 |
+
|
| 77 |
+
# Test specific mode:
|
| 78 |
+
python QAgents-workflos/run_evaluation.py --mode guided
|
| 79 |
+
python QAgents-workflos/run_evaluation.py --mode blackboard
|
| 80 |
+
python QAgents-workflos/run_evaluation.py --mode naked
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
## 🔧 Switching LLM Providers
|
| 84 |
+
|
| 85 |
+
### Using Gemini (Default)
|
| 86 |
+
```bash
|
| 87 |
+
set GOOGLE_API_KEY=your-gemini-key
|
| 88 |
+
# Models: gemini-2.5-flash-lite, gemini-2.5-pro
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
### Using OpenAI
|
| 92 |
+
Edit `config.py`:
|
| 93 |
+
```python
|
| 94 |
+
provider: str = "openai"
|
| 95 |
+
model: str = "gpt-4o-mini"
|
| 96 |
+
```
|
| 97 |
+
```bash
|
| 98 |
+
set OPENAI_API_KEY=sk-...
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### Using Anthropic
|
| 102 |
+
```python
|
| 103 |
+
provider: str = "anthropic"
|
| 104 |
+
model: str = "claude-3-sonnet-20240229"
|
| 105 |
+
```
|
| 106 |
+
```bash
|
| 107 |
+
set ANTHROPIC_API_KEY=your-key
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
### Using Groq
|
| 111 |
+
```python
|
| 112 |
+
provider: str = "groq"
|
| 113 |
+
model: str = "llama-3-70b-versatile"
|
| 114 |
+
```
|
| 115 |
+
```bash
|
| 116 |
+
set GROQ_API_KEY=your-key
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
### Using Local Ollama
|
| 120 |
+
```python
|
| 121 |
+
provider: str = "ollama"
|
| 122 |
+
model: str = "mistral"
|
| 123 |
+
```
|
| 124 |
+
No API key needed - runs locally on `http://localhost:11434`
|
| 125 |
+
|
| 126 |
+
## 📁 Project Structure
|
| 127 |
+
|
| 128 |
+
```
|
| 129 |
+
QAgents-workflos/
|
| 130 |
+
├── agents/ # Agent implementations (Architect, Builder, etc.)
|
| 131 |
+
├── client/ # MCP client for QuantumArchitect-MCP
|
| 132 |
+
├── database/ # Storage layer (logs, memory, circuits)
|
| 133 |
+
├── orchestrators/ # Orchestration modes (Naked, Guided, Blackboard, QUASAR, Hybrid)
|
| 134 |
+
├── prompts/ # System prompts for agents and optimized LLM prompts
|
| 135 |
+
├── tools/ # Tool registry and MCP endpoint wrappers
|
| 136 |
+
├── workflows/ # Workflow definitions
|
| 137 |
+
├── tests/ # Evaluation harnesses and test problems
|
| 138 |
+
├── app.py # Gradio UI entry point (Hugging Face Space)
|
| 139 |
+
├── config.py # Configuration with env var support
|
| 140 |
+
├── requirements.txt # Python dependencies
|
| 141 |
+
├── .env.example # Environment variable template
|
| 142 |
+
└── README.md # This file
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
## 🚀 Deployment to Hugging Face Spaces
|
| 146 |
+
|
| 147 |
+
### Prerequisites
|
| 148 |
+
1. Create a Hugging Face Space: https://huggingface.co/new-space
|
| 149 |
+
2. Select **Gradio** as the SDK
|
| 150 |
+
3. Push this repository to your Space
|
| 151 |
+
|
| 152 |
+
### Environment Variables Configuration
|
| 153 |
+
|
| 154 |
+
The system reads configuration from **environment variables**, making it compatible with Hugging Face Spaces.
|
| 155 |
+
|
| 156 |
+
#### Critical Variables
|
| 157 |
+
|
| 158 |
+
| Variable | Purpose | Default | Example |
|
| 159 |
+
|----------|---------|---------|---------|
|
| 160 |
+
| `LLM_PROVIDER` | LLM provider to use | `gemini` | `gemini`, `openai`, `anthropic` |
|
| 161 |
+
| `LLM_MODEL` | Model identifier | `gemini-2.5-flash-lite` | `gpt-4o-mini`, `claude-3-sonnet` |
|
| 162 |
+
| `GOOGLE_API_KEY` | Gemini API key | (none) | Your API key from aistudio.google.com |
|
| 163 |
+
| `MCP_SERVER_URL` | Backend URL | `http://127.0.0.1:7861` | `https://your-backend.ngrok.io` |
|
| 164 |
+
|
| 165 |
+
#### Setting Variables in Hugging Face Space
|
| 166 |
+
|
| 167 |
+
**Option 1: Via Space Settings (Recommended)**
|
| 168 |
+
1. Go to your Space settings
|
| 169 |
+
2. Click **"Secrets and variables"** > **"New secret"**
|
| 170 |
+
3. Add each variable:
|
| 171 |
+
- **Secret Name**: `GOOGLE_API_KEY` | **Value**: Your API key
|
| 172 |
+
- **Secret Name**: `MCP_SERVER_URL` | **Value**: Backend URL
|
| 173 |
+
4. Add variables (non-sensitive):
|
| 174 |
+
- **Variable Name**: `LLM_PROVIDER` | **Value**: `gemini`
|
| 175 |
+
- **Variable Name**: `LLM_MODEL` | **Value**: `gemini-2.5-flash-lite`
|
| 176 |
+
|
| 177 |
+
**Option 2: Via .env File**
|
| 178 |
+
```bash
|
| 179 |
+
# Copy .env.example to .env and fill in values
|
| 180 |
+
cp .env.example .env
|
| 181 |
+
|
| 182 |
+
# Commit and push to your Space
|
| 183 |
+
git add .env
|
| 184 |
+
git commit -m "Add environment configuration"
|
| 185 |
+
git push
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
**⚠️ Important**: Never commit sensitive API keys directly. Use Space Secrets instead.
|
| 189 |
+
|
| 190 |
+
### LLM Provider Configuration
|
| 191 |
+
|
| 192 |
+
#### Using Gemini (Default)
|
| 193 |
+
```
|
| 194 |
+
LLM_PROVIDER=gemini
|
| 195 |
+
LLM_MODEL=gemini-2.5-flash-lite
|
| 196 |
+
GOOGLE_API_KEY=your-key-from-https://aistudio.google.com/app/apikey
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
#### Using OpenAI
|
| 200 |
+
```
|
| 201 |
+
LLM_PROVIDER=openai
|
| 202 |
+
LLM_MODEL=gpt-4o-mini
|
| 203 |
+
OPENAI_API_KEY=sk-...
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
#### Using Anthropic
|
| 207 |
+
```
|
| 208 |
+
LLM_PROVIDER=anthropic
|
| 209 |
+
LLM_MODEL=claude-3-sonnet-20240229
|
| 210 |
+
ANTHROPIC_API_KEY=sk-ant-...
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
#### Using Groq
|
| 214 |
+
```
|
| 215 |
+
LLM_PROVIDER=groq
|
| 216 |
+
LLM_MODEL=llama-3-70b-versatile
|
| 217 |
+
GROQ_API_KEY=gsk_...
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
#### Using Local Ollama
|
| 221 |
+
```
|
| 222 |
+
LLM_PROVIDER=ollama
|
| 223 |
+
LLM_MODEL=mistral
|
| 224 |
+
# No API key needed - runs locally on http://localhost:11434
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
### Backend Connection (MCP Server)
|
| 228 |
+
|
| 229 |
+
The Space communicates with the QuantumArchitect-MCP backend via `MCP_SERVER_URL`.
|
| 230 |
+
|
| 231 |
+
**Options:**
|
| 232 |
+
|
| 233 |
+
1. **Local Development** (both running on your machine):
|
| 234 |
+
```
|
| 235 |
+
MCP_SERVER_URL=http://127.0.0.1:7861
|
| 236 |
+
```
|
| 237 |
+
|
| 238 |
+
2. **Public Backend with ngrok** (tunnel remote server):
|
| 239 |
+
```bash
|
| 240 |
+
# On your backend server:
|
| 241 |
+
ngrok http 7861
|
| 242 |
+
```
|
| 243 |
+
Then set:
|
| 244 |
+
```
|
| 245 |
+
MCP_SERVER_URL=https://your-ngrok-url.ngrok.io
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
3. **Deployed Backend** (your own server):
|
| 249 |
+
```
|
| 250 |
+
MCP_SERVER_URL=https://your-quantum-api.example.com
|
| 251 |
+
```
|
| 252 |
+
|
| 253 |
+
If `MCP_SERVER_URL` is not set or unreachable, the Space will still work but with local-only features.
|
| 254 |
+
|
| 255 |
+
## 📁 Project Structure (Previous)
|
| 256 |
+
├── agents/ # Agent definitions (types, behaviors)
|
| 257 |
+
├── prompts/ # System prompts for each agent
|
| 258 |
+
├── tools/ # MCP tool wrappers
|
| 259 |
+
├── workflows/ # Workflow definitions
|
| 260 |
+
├── orchestrators/ # Workflow orchestration logic
|
| 261 |
+
├── client/ # MCP client connection
|
| 262 |
+
├── database/ # Memory, logs, results storage
|
| 263 |
+
├── tests/ # Evaluation framework
|
| 264 |
+
└── config.py # Global configuration
|
| 265 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""QAgents-Workflows: Multi-agent quantum circuit optimization system."""
|
| 2 |
+
|
| 3 |
+
from .config import config, set_mode, get_mode, SystemConfig
|
| 4 |
+
|
| 5 |
+
__version__ = "0.1.0"
|
| 6 |
+
__all__ = ["config", "set_mode", "get_mode", "SystemConfig"]
|
agents/__init__.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Agents module: Base and specialized agent implementations."""
|
| 2 |
+
|
| 3 |
+
from .base_agent import (
|
| 4 |
+
BaseAgent,
|
| 5 |
+
LLMAgent,
|
| 6 |
+
RuleBasedAgent,
|
| 7 |
+
AgentRole,
|
| 8 |
+
AgentState,
|
| 9 |
+
AgentContext,
|
| 10 |
+
AgentAction,
|
| 11 |
+
AgentResult
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
from .specialized_agents import (
|
| 15 |
+
ArchitectAgent,
|
| 16 |
+
BuilderAgent,
|
| 17 |
+
ValidatorAgent,
|
| 18 |
+
OptimizerAgent,
|
| 19 |
+
AnalyzerAgent,
|
| 20 |
+
ScorerAgent,
|
| 21 |
+
SimulatorAgent,
|
| 22 |
+
create_all_agents
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
__all__ = [
|
| 26 |
+
# Base classes
|
| 27 |
+
"BaseAgent",
|
| 28 |
+
"LLMAgent",
|
| 29 |
+
"RuleBasedAgent",
|
| 30 |
+
"AgentRole",
|
| 31 |
+
"AgentState",
|
| 32 |
+
"AgentContext",
|
| 33 |
+
"AgentAction",
|
| 34 |
+
"AgentResult",
|
| 35 |
+
# Specialized agents
|
| 36 |
+
"ArchitectAgent",
|
| 37 |
+
"BuilderAgent",
|
| 38 |
+
"ValidatorAgent",
|
| 39 |
+
"OptimizerAgent",
|
| 40 |
+
"AnalyzerAgent",
|
| 41 |
+
"ScorerAgent",
|
| 42 |
+
"SimulatorAgent",
|
| 43 |
+
"create_all_agents"
|
| 44 |
+
]
|
agents/base_agent.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agents Module: Base agent classes and specialized agents.
|
| 3 |
+
Supports both Blackboard (free) and Guided (strict) architectures.
|
| 4 |
+
Model-agnostic: Works with Gemini, OpenAI, Anthropic, Groq, Ollama, etc.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from abc import ABC, abstractmethod
|
| 8 |
+
from dataclasses import dataclass, field
|
| 9 |
+
from typing import Any, Dict, List, Optional, Callable
|
| 10 |
+
from enum import Enum
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
import json
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AgentRole(Enum):
|
| 19 |
+
"""Roles agents can take in the system."""
|
| 20 |
+
ARCHITECT = "architect"
|
| 21 |
+
BUILDER = "builder"
|
| 22 |
+
VALIDATOR = "validator"
|
| 23 |
+
OPTIMIZER = "optimizer"
|
| 24 |
+
ANALYZER = "analyzer"
|
| 25 |
+
SCORER = "scorer"
|
| 26 |
+
COORDINATOR = "coordinator"
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class AgentState(Enum):
|
| 30 |
+
"""Agent execution states."""
|
| 31 |
+
IDLE = "idle"
|
| 32 |
+
THINKING = "thinking"
|
| 33 |
+
EXECUTING = "executing"
|
| 34 |
+
WAITING = "waiting"
|
| 35 |
+
COMPLETED = "completed"
|
| 36 |
+
ERROR = "error"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class AgentContext:
|
| 41 |
+
"""Context passed to agents for decision making."""
|
| 42 |
+
goal: str
|
| 43 |
+
current_circuit: Optional[str] = None
|
| 44 |
+
history: List[Dict] = field(default_factory=list)
|
| 45 |
+
constraints: Dict = field(default_factory=dict)
|
| 46 |
+
shared_data: Dict = field(default_factory=dict)
|
| 47 |
+
|
| 48 |
+
def add_to_history(self, action: str, result: Any):
|
| 49 |
+
self.history.append({
|
| 50 |
+
"action": action,
|
| 51 |
+
"result": result,
|
| 52 |
+
"timestamp": datetime.now().isoformat()
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@dataclass
|
| 57 |
+
class AgentAction:
|
| 58 |
+
"""An action an agent wants to take."""
|
| 59 |
+
tool_name: str
|
| 60 |
+
arguments: Dict
|
| 61 |
+
reasoning: str
|
| 62 |
+
priority: float = 1.0
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
@dataclass
|
| 66 |
+
class AgentResult:
|
| 67 |
+
"""Result of an agent's execution."""
|
| 68 |
+
success: bool
|
| 69 |
+
data: Any
|
| 70 |
+
message: str
|
| 71 |
+
actions_taken: List[str] = field(default_factory=list)
|
| 72 |
+
execution_time_ms: float = 0.0
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class BaseAgent(ABC):
|
| 76 |
+
"""
|
| 77 |
+
Abstract base class for all agents.
|
| 78 |
+
Provides common interface for both Blackboard and Guided architectures.
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
def __init__(self,
|
| 82 |
+
agent_id: str,
|
| 83 |
+
role: AgentRole,
|
| 84 |
+
tools: List[str] = None,
|
| 85 |
+
llm_config: Dict = None):
|
| 86 |
+
self.agent_id = agent_id
|
| 87 |
+
self.role = role
|
| 88 |
+
self.tools = tools or []
|
| 89 |
+
self.llm_config = llm_config or {}
|
| 90 |
+
self.state = AgentState.IDLE
|
| 91 |
+
self.memory: Dict = {}
|
| 92 |
+
self._callbacks: List[Callable] = []
|
| 93 |
+
|
| 94 |
+
@abstractmethod
|
| 95 |
+
def decide(self, context: AgentContext) -> Optional[AgentAction]:
|
| 96 |
+
"""Decide what action to take given the context."""
|
| 97 |
+
pass
|
| 98 |
+
|
| 99 |
+
@abstractmethod
|
| 100 |
+
def execute(self, action: AgentAction, context: AgentContext) -> AgentResult:
|
| 101 |
+
"""Execute the decided action."""
|
| 102 |
+
pass
|
| 103 |
+
|
| 104 |
+
def can_handle(self, context: AgentContext) -> bool:
|
| 105 |
+
"""Check if this agent can handle the current context."""
|
| 106 |
+
return True
|
| 107 |
+
|
| 108 |
+
def on_state_change(self, callback: Callable):
|
| 109 |
+
"""Register callback for state changes."""
|
| 110 |
+
self._callbacks.append(callback)
|
| 111 |
+
|
| 112 |
+
def _set_state(self, new_state: AgentState):
|
| 113 |
+
"""Update state and notify callbacks."""
|
| 114 |
+
old_state = self.state
|
| 115 |
+
self.state = new_state
|
| 116 |
+
for cb in self._callbacks:
|
| 117 |
+
cb(self.agent_id, old_state, new_state)
|
| 118 |
+
|
| 119 |
+
def reset(self):
|
| 120 |
+
"""Reset agent to initial state."""
|
| 121 |
+
self.state = AgentState.IDLE
|
| 122 |
+
self.memory.clear()
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
class LLMAgent(BaseAgent):
|
| 126 |
+
"""
|
| 127 |
+
Agent that uses an LLM for decision making.
|
| 128 |
+
Model-agnostic: Supports Gemini, OpenAI, Anthropic, Groq, Ollama, etc.
|
| 129 |
+
Can be used in both Blackboard and Guided modes.
|
| 130 |
+
"""
|
| 131 |
+
|
| 132 |
+
def __init__(self,
|
| 133 |
+
agent_id: str,
|
| 134 |
+
role: AgentRole,
|
| 135 |
+
system_prompt: str,
|
| 136 |
+
tools: List[str] = None,
|
| 137 |
+
llm_config: Dict = None):
|
| 138 |
+
super().__init__(agent_id, role, tools, llm_config)
|
| 139 |
+
self.system_prompt = system_prompt
|
| 140 |
+
self._adapter = None
|
| 141 |
+
|
| 142 |
+
def _get_adapter(self):
|
| 143 |
+
"""Get the LLM adapter (lazy init)."""
|
| 144 |
+
if self._adapter is None:
|
| 145 |
+
from config import config
|
| 146 |
+
from agents.llm_adapter import get_llm_adapter
|
| 147 |
+
|
| 148 |
+
self._adapter = get_llm_adapter(
|
| 149 |
+
provider=config.llm.provider,
|
| 150 |
+
model=config.llm.model,
|
| 151 |
+
api_key=config.llm.api_key
|
| 152 |
+
)
|
| 153 |
+
return self._adapter
|
| 154 |
+
|
| 155 |
+
def _build_messages(self, context: AgentContext) -> List[Dict]:
|
| 156 |
+
"""Build message list for LLM."""
|
| 157 |
+
messages = [{"role": "system", "content": self.system_prompt}]
|
| 158 |
+
|
| 159 |
+
context_msg = f"""
|
| 160 |
+
Goal: {context.goal}
|
| 161 |
+
|
| 162 |
+
Current Circuit:
|
| 163 |
+
{context.current_circuit or 'None yet'}
|
| 164 |
+
|
| 165 |
+
Constraints:
|
| 166 |
+
{json.dumps(context.constraints, indent=2)}
|
| 167 |
+
|
| 168 |
+
History (last 5 actions):
|
| 169 |
+
{json.dumps(context.history[-5:], indent=2)}
|
| 170 |
+
"""
|
| 171 |
+
messages.append({"role": "user", "content": context_msg})
|
| 172 |
+
return messages
|
| 173 |
+
|
| 174 |
+
def decide(self, context: AgentContext) -> Optional[AgentAction]:
|
| 175 |
+
"""Use LLM to decide on action."""
|
| 176 |
+
self._set_state(AgentState.THINKING)
|
| 177 |
+
|
| 178 |
+
try:
|
| 179 |
+
from config import config
|
| 180 |
+
from tools import registry
|
| 181 |
+
|
| 182 |
+
tool_schemas = [
|
| 183 |
+
registry.get(name).to_llm_schema()
|
| 184 |
+
for name in self.tools
|
| 185 |
+
if registry.get(name)
|
| 186 |
+
]
|
| 187 |
+
|
| 188 |
+
messages = self._build_messages(context)
|
| 189 |
+
adapter = self._get_adapter()
|
| 190 |
+
|
| 191 |
+
llm_response = adapter.generate(
|
| 192 |
+
messages=messages,
|
| 193 |
+
tools=tool_schemas if tool_schemas else None,
|
| 194 |
+
temperature=self.llm_config.get("temperature", config.llm.temperature),
|
| 195 |
+
max_tokens=self.llm_config.get("max_tokens", config.llm.max_tokens)
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
if llm_response.tool_calls:
|
| 199 |
+
tool_call = llm_response.tool_calls[0]
|
| 200 |
+
return AgentAction(
|
| 201 |
+
tool_name=tool_call.tool_name,
|
| 202 |
+
arguments=tool_call.arguments,
|
| 203 |
+
reasoning=tool_call.reasoning
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
return None
|
| 207 |
+
|
| 208 |
+
except Exception as e:
|
| 209 |
+
logger.error(f"Agent {self.agent_id} decision failed: {e}")
|
| 210 |
+
self._set_state(AgentState.ERROR)
|
| 211 |
+
return None
|
| 212 |
+
|
| 213 |
+
def execute(self, action: AgentAction, context: AgentContext) -> AgentResult:
|
| 214 |
+
"""Execute tool action."""
|
| 215 |
+
self._set_state(AgentState.EXECUTING)
|
| 216 |
+
|
| 217 |
+
import time
|
| 218 |
+
start = time.perf_counter()
|
| 219 |
+
|
| 220 |
+
try:
|
| 221 |
+
from tools import invoke_tool
|
| 222 |
+
|
| 223 |
+
result = invoke_tool(action.tool_name, **action.arguments)
|
| 224 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 225 |
+
|
| 226 |
+
context.add_to_history(action.tool_name, result)
|
| 227 |
+
|
| 228 |
+
self._set_state(AgentState.COMPLETED)
|
| 229 |
+
return AgentResult(
|
| 230 |
+
success=result.get("success", False),
|
| 231 |
+
data=result,
|
| 232 |
+
message=f"Executed {action.tool_name}",
|
| 233 |
+
actions_taken=[action.tool_name],
|
| 234 |
+
execution_time_ms=elapsed
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
logger.error(f"Agent {self.agent_id} execution failed: {e}")
|
| 239 |
+
self._set_state(AgentState.ERROR)
|
| 240 |
+
return AgentResult(
|
| 241 |
+
success=False,
|
| 242 |
+
data=None,
|
| 243 |
+
message=str(e)
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class RuleBasedAgent(BaseAgent):
|
| 248 |
+
"""
|
| 249 |
+
Agent that uses predefined rules for decision making.
|
| 250 |
+
Useful for deterministic behavior in Guided mode.
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
def __init__(self,
|
| 254 |
+
agent_id: str,
|
| 255 |
+
role: AgentRole,
|
| 256 |
+
rules: List[Callable[[AgentContext], Optional[AgentAction]]],
|
| 257 |
+
tools: List[str] = None):
|
| 258 |
+
super().__init__(agent_id, role, tools)
|
| 259 |
+
self.rules = rules
|
| 260 |
+
|
| 261 |
+
def decide(self, context: AgentContext) -> Optional[AgentAction]:
|
| 262 |
+
"""Apply rules to decide action."""
|
| 263 |
+
self._set_state(AgentState.THINKING)
|
| 264 |
+
|
| 265 |
+
for rule in self.rules:
|
| 266 |
+
action = rule(context)
|
| 267 |
+
if action is not None:
|
| 268 |
+
return action
|
| 269 |
+
|
| 270 |
+
return None
|
| 271 |
+
|
| 272 |
+
def execute(self, action: AgentAction, context: AgentContext) -> AgentResult:
|
| 273 |
+
"""Execute action using tools."""
|
| 274 |
+
self._set_state(AgentState.EXECUTING)
|
| 275 |
+
|
| 276 |
+
import time
|
| 277 |
+
start = time.perf_counter()
|
| 278 |
+
|
| 279 |
+
try:
|
| 280 |
+
from tools import invoke_tool
|
| 281 |
+
|
| 282 |
+
result = invoke_tool(action.tool_name, **action.arguments)
|
| 283 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 284 |
+
|
| 285 |
+
context.add_to_history(action.tool_name, result)
|
| 286 |
+
|
| 287 |
+
self._set_state(AgentState.COMPLETED)
|
| 288 |
+
return AgentResult(
|
| 289 |
+
success=result.get("success", False),
|
| 290 |
+
data=result,
|
| 291 |
+
message=f"Executed {action.tool_name}",
|
| 292 |
+
actions_taken=[action.tool_name],
|
| 293 |
+
execution_time_ms=elapsed
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
except Exception as e:
|
| 297 |
+
self._set_state(AgentState.ERROR)
|
| 298 |
+
return AgentResult(
|
| 299 |
+
success=False,
|
| 300 |
+
data=None,
|
| 301 |
+
message=str(e)
|
| 302 |
+
)
|
agents/llm_adapter.py
ADDED
|
@@ -0,0 +1,676 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLM Adapter: Model-agnostic LLM interface with multi-model fallback.
|
| 3 |
+
Supports Gemini (native), OpenAI, Anthropic, Groq, Ollama, and any LiteLLM provider.
|
| 4 |
+
|
| 5 |
+
Path: QAgents-workflos/agents/llm_adapter.py
|
| 6 |
+
Related: config.py (GEMINI_MODELS cascade, CostTrackingConfig)
|
| 7 |
+
orchestrators/orchestrator.py (uses get_llm_adapter)
|
| 8 |
+
specialized_agents.py (agents use LLM adapters)
|
| 9 |
+
|
| 10 |
+
Multi-Model Fallback System with Recovery:
|
| 11 |
+
==========================================
|
| 12 |
+
When a model hits rate limits (429) or errors, automatically falls back to next model.
|
| 13 |
+
RECOVERY: When preferred model cooldown expires, automatically rotates back.
|
| 14 |
+
|
| 15 |
+
Cascade order (by RPD - highest to lowest):
|
| 16 |
+
1. gemma-3-27b-it (14,400 RPD) - Highest availability
|
| 17 |
+
2. gemini-2.5-flash-lite (1,000 RPD) - DEFAULT PREFERRED
|
| 18 |
+
3. gemini-2.5-flash (250 RPD)
|
| 19 |
+
4. gemini-2.0-flash (200 RPD)
|
| 20 |
+
5. gemini-2.0-flash-lite (200 RPD)
|
| 21 |
+
6. gemini-2.5-pro (50 RPD) - Last resort
|
| 22 |
+
|
| 23 |
+
Model Recovery Timer:
|
| 24 |
+
=====================
|
| 25 |
+
- Tracks when each model was rate-limited
|
| 26 |
+
- Calculates recovery time (RPM cooldown: 60s, RPD cooldown: reset at midnight)
|
| 27 |
+
- Automatically returns to preferred model when recovered
|
| 28 |
+
- Preferred model index configurable (default: 1 = gemini-2.5-flash-lite)
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
import json
|
| 32 |
+
import logging
|
| 33 |
+
import time
|
| 34 |
+
from abc import ABC, abstractmethod
|
| 35 |
+
from typing import Any, Dict, List, Optional
|
| 36 |
+
from dataclasses import dataclass, field
|
| 37 |
+
from collections import deque
|
| 38 |
+
from datetime import datetime, timedelta
|
| 39 |
+
|
| 40 |
+
logger = logging.getLogger(__name__)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# =============================================================================
|
| 44 |
+
# MULTI-MODEL RATE LIMITER
|
| 45 |
+
# =============================================================================
|
| 46 |
+
|
| 47 |
+
class ModelRateLimiter:
|
| 48 |
+
"""
|
| 49 |
+
Rate limiter with per-model tracking, automatic fallback, and recovery.
|
| 50 |
+
|
| 51 |
+
Tracks:
|
| 52 |
+
- RPM: Requests per minute (sliding window)
|
| 53 |
+
- RPD: Requests per day (counter reset at midnight or manually)
|
| 54 |
+
- Recovery: When rate-limited models become available again
|
| 55 |
+
|
| 56 |
+
When current model exceeds limits, suggests next model in cascade.
|
| 57 |
+
When preferred model recovers, automatically rotates back.
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
def __init__(self, models: List[Dict] = None, preferred_model_idx: int = 1):
|
| 61 |
+
"""
|
| 62 |
+
Initialize with model cascade from config.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
models: List of model configs with rpm, rpd limits
|
| 66 |
+
preferred_model_idx: Index of preferred model (default: 1 = gemini-2.5-flash-lite)
|
| 67 |
+
"""
|
| 68 |
+
from config import GEMINI_MODELS
|
| 69 |
+
self.models = models or GEMINI_MODELS
|
| 70 |
+
self.preferred_model_idx = preferred_model_idx # Model to return to after recovery
|
| 71 |
+
self.current_model_idx = preferred_model_idx # Start with preferred model
|
| 72 |
+
|
| 73 |
+
# Per-model tracking
|
| 74 |
+
self.model_usage: Dict[str, Dict] = {}
|
| 75 |
+
for model in self.models:
|
| 76 |
+
self.model_usage[model["name"]] = {
|
| 77 |
+
"rpm_window": deque(maxlen=model["rpm"]), # Sliding window
|
| 78 |
+
"rpd_count": 0,
|
| 79 |
+
"rpd_reset_time": datetime.now().replace(hour=0, minute=0, second=0) + timedelta(days=1),
|
| 80 |
+
"last_request_time": 0,
|
| 81 |
+
"total_tokens": 0,
|
| 82 |
+
"total_time_ms": 0.0,
|
| 83 |
+
# Recovery tracking
|
| 84 |
+
"rate_limited_at": None, # Timestamp when rate limited
|
| 85 |
+
"rpm_recovery_time": None, # When RPM limit recovers
|
| 86 |
+
"rpd_recovery_time": None, # When RPD limit recovers (midnight)
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
@property
|
| 90 |
+
def current_model(self) -> Dict:
|
| 91 |
+
"""Get current model config."""
|
| 92 |
+
return self.models[self.current_model_idx]
|
| 93 |
+
|
| 94 |
+
@property
|
| 95 |
+
def current_model_name(self) -> str:
|
| 96 |
+
"""Get current model name."""
|
| 97 |
+
return self.current_model["name"]
|
| 98 |
+
|
| 99 |
+
@property
|
| 100 |
+
def preferred_model_name(self) -> str:
|
| 101 |
+
"""Get preferred model name."""
|
| 102 |
+
return self.models[self.preferred_model_idx]["name"]
|
| 103 |
+
|
| 104 |
+
def get_min_interval(self, model_name: str = None) -> float:
|
| 105 |
+
"""Get minimum interval between requests for model (with 80% buffer)."""
|
| 106 |
+
if model_name is None:
|
| 107 |
+
model_name = self.current_model_name
|
| 108 |
+
|
| 109 |
+
for model in self.models:
|
| 110 |
+
if model["name"] == model_name:
|
| 111 |
+
# 80% buffer: 60s / (rpm * 0.8)
|
| 112 |
+
return 60.0 / (model["rpm"] * 0.8)
|
| 113 |
+
return 5.0 # Default 5 seconds
|
| 114 |
+
|
| 115 |
+
def check_preferred_model_recovery(self) -> bool:
|
| 116 |
+
"""
|
| 117 |
+
Check if preferred model has recovered from rate limiting.
|
| 118 |
+
If recovered, automatically switch back to it.
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
True if switched back to preferred model
|
| 122 |
+
"""
|
| 123 |
+
if self.current_model_idx == self.preferred_model_idx:
|
| 124 |
+
return False # Already on preferred model
|
| 125 |
+
|
| 126 |
+
preferred_name = self.preferred_model_name
|
| 127 |
+
usage = self.model_usage.get(preferred_name)
|
| 128 |
+
if not usage:
|
| 129 |
+
return False
|
| 130 |
+
|
| 131 |
+
current_time = datetime.now()
|
| 132 |
+
|
| 133 |
+
# Check RPD recovery (resets at midnight)
|
| 134 |
+
if usage.get("rpd_recovery_time") and current_time >= usage["rpd_recovery_time"]:
|
| 135 |
+
usage["rpd_count"] = 0
|
| 136 |
+
usage["rpd_recovery_time"] = None
|
| 137 |
+
usage["rate_limited_at"] = None
|
| 138 |
+
logger.info(f"Preferred model {preferred_name} RPD limit reset - switching back")
|
| 139 |
+
self.current_model_idx = self.preferred_model_idx
|
| 140 |
+
return True
|
| 141 |
+
|
| 142 |
+
# Check RPM recovery (60 seconds)
|
| 143 |
+
if usage.get("rpm_recovery_time") and current_time >= usage["rpm_recovery_time"]:
|
| 144 |
+
usage["rpm_recovery_time"] = None
|
| 145 |
+
# Check if we can make a request now
|
| 146 |
+
can_req, _ = self.can_request(preferred_name)
|
| 147 |
+
if can_req:
|
| 148 |
+
logger.info(f"Preferred model {preferred_name} RPM recovered - switching back")
|
| 149 |
+
self.current_model_idx = self.preferred_model_idx
|
| 150 |
+
return True
|
| 151 |
+
|
| 152 |
+
return False
|
| 153 |
+
|
| 154 |
+
def can_request(self, model_name: str = None) -> tuple[bool, str]:
|
| 155 |
+
"""
|
| 156 |
+
Check if we can make a request with current/specified model.
|
| 157 |
+
|
| 158 |
+
Returns:
|
| 159 |
+
(can_request: bool, reason: str)
|
| 160 |
+
"""
|
| 161 |
+
if model_name is None:
|
| 162 |
+
model_name = self.current_model_name
|
| 163 |
+
|
| 164 |
+
if model_name not in self.model_usage:
|
| 165 |
+
return False, f"Unknown model: {model_name}"
|
| 166 |
+
|
| 167 |
+
usage = self.model_usage[model_name]
|
| 168 |
+
model_config = None
|
| 169 |
+
for m in self.models:
|
| 170 |
+
if m["name"] == model_name:
|
| 171 |
+
model_config = m
|
| 172 |
+
break
|
| 173 |
+
|
| 174 |
+
if not model_config:
|
| 175 |
+
return False, f"Model config not found: {model_name}"
|
| 176 |
+
|
| 177 |
+
# Check RPD (reset if new day)
|
| 178 |
+
if datetime.now() >= usage["rpd_reset_time"]:
|
| 179 |
+
usage["rpd_count"] = 0
|
| 180 |
+
usage["rpd_reset_time"] = datetime.now().replace(hour=0, minute=0, second=0) + timedelta(days=1)
|
| 181 |
+
|
| 182 |
+
if usage["rpd_count"] >= model_config["rpd"]:
|
| 183 |
+
return False, f"RPD limit reached ({model_config['rpd']}/day)"
|
| 184 |
+
|
| 185 |
+
# Check RPM (sliding window)
|
| 186 |
+
current_time = time.time()
|
| 187 |
+
window = usage["rpm_window"]
|
| 188 |
+
|
| 189 |
+
# Remove old entries (>60s ago)
|
| 190 |
+
while window and (current_time - window[0]) > 60:
|
| 191 |
+
window.popleft()
|
| 192 |
+
|
| 193 |
+
if len(window) >= model_config["rpm"]:
|
| 194 |
+
return False, f"RPM limit reached ({model_config['rpm']}/min)"
|
| 195 |
+
|
| 196 |
+
return True, "OK"
|
| 197 |
+
|
| 198 |
+
def wait_if_needed(self, model_name: str = None) -> float:
|
| 199 |
+
"""
|
| 200 |
+
Wait if necessary to respect rate limits.
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
Time waited in seconds
|
| 204 |
+
"""
|
| 205 |
+
if model_name is None:
|
| 206 |
+
model_name = self.current_model_name
|
| 207 |
+
|
| 208 |
+
if model_name not in self.model_usage:
|
| 209 |
+
return 0.0
|
| 210 |
+
|
| 211 |
+
usage = self.model_usage[model_name]
|
| 212 |
+
current_time = time.time()
|
| 213 |
+
min_interval = self.get_min_interval(model_name)
|
| 214 |
+
|
| 215 |
+
time_since_last = current_time - usage["last_request_time"]
|
| 216 |
+
|
| 217 |
+
if time_since_last < min_interval:
|
| 218 |
+
sleep_time = min_interval - time_since_last
|
| 219 |
+
logger.info(f"Rate limiting [{model_name}]: waiting {sleep_time:.2f}s")
|
| 220 |
+
time.sleep(sleep_time)
|
| 221 |
+
return sleep_time
|
| 222 |
+
|
| 223 |
+
return 0.0
|
| 224 |
+
|
| 225 |
+
def record_request(self, model_name: str = None, tokens: int = 0, time_ms: float = 0):
|
| 226 |
+
"""Record a successful request."""
|
| 227 |
+
if model_name is None:
|
| 228 |
+
model_name = self.current_model_name
|
| 229 |
+
|
| 230 |
+
if model_name not in self.model_usage:
|
| 231 |
+
return
|
| 232 |
+
|
| 233 |
+
usage = self.model_usage[model_name]
|
| 234 |
+
current_time = time.time()
|
| 235 |
+
|
| 236 |
+
usage["rpm_window"].append(current_time)
|
| 237 |
+
usage["rpd_count"] += 1
|
| 238 |
+
usage["last_request_time"] = current_time
|
| 239 |
+
usage["total_tokens"] += tokens
|
| 240 |
+
usage["total_time_ms"] += time_ms
|
| 241 |
+
|
| 242 |
+
logger.debug(f"Request recorded [{model_name}]: RPD {usage['rpd_count']}, tokens {tokens}")
|
| 243 |
+
|
| 244 |
+
def fallback_to_next(self, reason: str = "unknown") -> Optional[str]:
|
| 245 |
+
"""
|
| 246 |
+
Switch to next model in cascade and record recovery time.
|
| 247 |
+
|
| 248 |
+
Args:
|
| 249 |
+
reason: Why fallback is needed ("rpm", "rpd", or "error")
|
| 250 |
+
|
| 251 |
+
Returns:
|
| 252 |
+
New model name or None if no more models available
|
| 253 |
+
"""
|
| 254 |
+
current_model_name = self.current_model_name
|
| 255 |
+
usage = self.model_usage.get(current_model_name, {})
|
| 256 |
+
|
| 257 |
+
# Record when this model was rate limited and set recovery time
|
| 258 |
+
now = datetime.now()
|
| 259 |
+
usage["rate_limited_at"] = now
|
| 260 |
+
|
| 261 |
+
if "rpm" in reason.lower() or "429" in reason:
|
| 262 |
+
# RPM recovery: 60 seconds from now
|
| 263 |
+
usage["rpm_recovery_time"] = now + timedelta(seconds=60)
|
| 264 |
+
logger.info(f"Model {current_model_name} RPM limited - recovery at {usage['rpm_recovery_time']}")
|
| 265 |
+
elif "rpd" in reason.lower() or "quota" in reason.lower():
|
| 266 |
+
# RPD recovery: midnight tonight
|
| 267 |
+
usage["rpd_recovery_time"] = now.replace(hour=0, minute=0, second=0) + timedelta(days=1)
|
| 268 |
+
logger.info(f"Model {current_model_name} RPD limited - recovery at {usage['rpd_recovery_time']}")
|
| 269 |
+
|
| 270 |
+
if self.current_model_idx + 1 < len(self.models):
|
| 271 |
+
self.current_model_idx += 1
|
| 272 |
+
new_model = self.current_model_name
|
| 273 |
+
logger.warning(f"Falling back to model: {new_model}")
|
| 274 |
+
return new_model
|
| 275 |
+
else:
|
| 276 |
+
logger.error("No more models available in fallback cascade!")
|
| 277 |
+
return None
|
| 278 |
+
|
| 279 |
+
def reset_to_preferred(self):
|
| 280 |
+
"""Reset to preferred model (default: gemini-2.5-flash-lite)."""
|
| 281 |
+
self.current_model_idx = self.preferred_model_idx
|
| 282 |
+
logger.info(f"Reset to preferred model: {self.preferred_model_name}")
|
| 283 |
+
|
| 284 |
+
def get_usage_summary(self) -> Dict:
|
| 285 |
+
"""Get usage summary for all models."""
|
| 286 |
+
summary = {}
|
| 287 |
+
for model in self.models:
|
| 288 |
+
name = model["name"]
|
| 289 |
+
usage = self.model_usage[name]
|
| 290 |
+
summary[name] = {
|
| 291 |
+
"rpm_used": len(usage["rpm_window"]),
|
| 292 |
+
"rpm_limit": model["rpm"],
|
| 293 |
+
"rpd_used": usage["rpd_count"],
|
| 294 |
+
"rpd_limit": model["rpd"],
|
| 295 |
+
"total_tokens": usage["total_tokens"],
|
| 296 |
+
"total_time_ms": usage["total_time_ms"]
|
| 297 |
+
}
|
| 298 |
+
return summary
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
# Global rate limiter instance
|
| 302 |
+
_global_rate_limiter: Optional[ModelRateLimiter] = None
|
| 303 |
+
|
| 304 |
+
def get_rate_limiter() -> ModelRateLimiter:
|
| 305 |
+
"""Get or create global rate limiter."""
|
| 306 |
+
global _global_rate_limiter
|
| 307 |
+
if _global_rate_limiter is None:
|
| 308 |
+
_global_rate_limiter = ModelRateLimiter()
|
| 309 |
+
return _global_rate_limiter
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
# =============================================================================
|
| 313 |
+
# LLM RESPONSE TYPES
|
| 314 |
+
# =============================================================================
|
| 315 |
+
|
| 316 |
+
@dataclass
|
| 317 |
+
class LLMToolCall:
|
| 318 |
+
"""Standardized tool call across all providers."""
|
| 319 |
+
tool_name: str
|
| 320 |
+
arguments: Dict[str, Any]
|
| 321 |
+
reasoning: str
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
@dataclass
|
| 325 |
+
class LLMResponse:
|
| 326 |
+
"""Standardized response across all providers."""
|
| 327 |
+
text: str
|
| 328 |
+
tool_calls: List[LLMToolCall]
|
| 329 |
+
finish_reason: str
|
| 330 |
+
model_used: str = "" # Track which model was actually used
|
| 331 |
+
tokens_used: int = 0 # Track token usage if available
|
| 332 |
+
time_ms: float = 0.0 # Track response time
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
# =============================================================================
|
| 336 |
+
# BASE ADAPTER
|
| 337 |
+
# =============================================================================
|
| 338 |
+
|
| 339 |
+
class BaseLLMAdapter(ABC):
|
| 340 |
+
"""Abstract base for LLM adapters."""
|
| 341 |
+
|
| 342 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 343 |
+
self.api_key = api_key
|
| 344 |
+
|
| 345 |
+
@abstractmethod
|
| 346 |
+
def generate(self,
|
| 347 |
+
messages: List[Dict[str, str]],
|
| 348 |
+
tools: Optional[List[Dict[str, Any]]] = None,
|
| 349 |
+
temperature: float = 0.2,
|
| 350 |
+
max_tokens: int = 2000) -> LLMResponse:
|
| 351 |
+
"""Generate a response from the LLM."""
|
| 352 |
+
pass
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
# =============================================================================
|
| 356 |
+
# GEMINI ADAPTER WITH FALLBACK
|
| 357 |
+
# =============================================================================
|
| 358 |
+
|
| 359 |
+
class GeminiAdapter(BaseLLMAdapter):
|
| 360 |
+
"""
|
| 361 |
+
Google Gemini API adapter with multi-model fallback.
|
| 362 |
+
|
| 363 |
+
Automatically falls back to next model when:
|
| 364 |
+
- Rate limit exceeded (429)
|
| 365 |
+
- API error occurs (if fallback_on_error=True)
|
| 366 |
+
- Model unavailable
|
| 367 |
+
"""
|
| 368 |
+
|
| 369 |
+
def __init__(self,
|
| 370 |
+
model: str = "gemini-2.5-flash-lite",
|
| 371 |
+
api_key: Optional[str] = None,
|
| 372 |
+
enable_fallback: bool = True):
|
| 373 |
+
super().__init__(api_key)
|
| 374 |
+
self.model = model
|
| 375 |
+
self.enable_fallback = enable_fallback
|
| 376 |
+
self._client = None
|
| 377 |
+
self.rate_limiter = get_rate_limiter()
|
| 378 |
+
|
| 379 |
+
def _get_client(self):
|
| 380 |
+
"""Lazy load Gemini client."""
|
| 381 |
+
if self._client is None:
|
| 382 |
+
try:
|
| 383 |
+
import google.genai
|
| 384 |
+
self._client = google.genai.Client(api_key=self.api_key)
|
| 385 |
+
except ImportError:
|
| 386 |
+
raise ImportError("google-genai not installed. Install with: pip install google-genai")
|
| 387 |
+
return self._client
|
| 388 |
+
|
| 389 |
+
def generate(self,
|
| 390 |
+
messages: List[Dict[str, str]],
|
| 391 |
+
tools: Optional[List[Dict[str, Any]]] = None,
|
| 392 |
+
temperature: float = 0.2,
|
| 393 |
+
max_tokens: int = 2000) -> LLMResponse:
|
| 394 |
+
"""
|
| 395 |
+
Generate content using Gemini with automatic fallback.
|
| 396 |
+
|
| 397 |
+
Will try current model first, then fall back through cascade on errors.
|
| 398 |
+
"""
|
| 399 |
+
start_time = time.time()
|
| 400 |
+
last_error = None
|
| 401 |
+
attempts = 0
|
| 402 |
+
max_attempts = len(self.rate_limiter.models)
|
| 403 |
+
|
| 404 |
+
while attempts < max_attempts:
|
| 405 |
+
current_model = self.rate_limiter.current_model_name
|
| 406 |
+
attempts += 1
|
| 407 |
+
|
| 408 |
+
try:
|
| 409 |
+
# Check if preferred model has recovered
|
| 410 |
+
self.rate_limiter.check_preferred_model_recovery()
|
| 411 |
+
|
| 412 |
+
# Check if we can make a request
|
| 413 |
+
can_request, reason = self.rate_limiter.can_request(current_model)
|
| 414 |
+
|
| 415 |
+
if not can_request:
|
| 416 |
+
logger.warning(f"Cannot request from {current_model}: {reason}")
|
| 417 |
+
if self.enable_fallback:
|
| 418 |
+
next_model = self.rate_limiter.fallback_to_next(reason)
|
| 419 |
+
if next_model:
|
| 420 |
+
continue
|
| 421 |
+
raise Exception(f"Rate limit exceeded: {reason}") # Wait if needed for RPM
|
| 422 |
+
self.rate_limiter.wait_if_needed(current_model)
|
| 423 |
+
|
| 424 |
+
# Make the actual API call
|
| 425 |
+
response = self._call_gemini(current_model, messages, tools, temperature, max_tokens)
|
| 426 |
+
|
| 427 |
+
# Record successful request
|
| 428 |
+
elapsed_ms = (time.time() - start_time) * 1000
|
| 429 |
+
tokens = self._estimate_tokens(messages, response.text)
|
| 430 |
+
self.rate_limiter.record_request(current_model, tokens, elapsed_ms)
|
| 431 |
+
|
| 432 |
+
# Update response metadata
|
| 433 |
+
response.model_used = current_model
|
| 434 |
+
response.tokens_used = tokens
|
| 435 |
+
response.time_ms = elapsed_ms
|
| 436 |
+
|
| 437 |
+
# Record in global cost tracking
|
| 438 |
+
try:
|
| 439 |
+
from config import config
|
| 440 |
+
config.evaluation.cost_tracking.record_request(current_model, tokens, elapsed_ms)
|
| 441 |
+
except Exception:
|
| 442 |
+
pass # Config might not be available
|
| 443 |
+
|
| 444 |
+
return response
|
| 445 |
+
|
| 446 |
+
except Exception as e:
|
| 447 |
+
last_error = e
|
| 448 |
+
error_str = str(e).lower()
|
| 449 |
+
|
| 450 |
+
# Check if it's a rate limit error
|
| 451 |
+
is_rate_limit = "429" in str(e) or "rate" in error_str or "quota" in error_str
|
| 452 |
+
|
| 453 |
+
if is_rate_limit or (self.enable_fallback and "error" in error_str):
|
| 454 |
+
logger.warning(f"Error with {current_model}: {e}")
|
| 455 |
+
next_model = self.rate_limiter.fallback_to_next(error_str)
|
| 456 |
+
if next_model:
|
| 457 |
+
logger.info(f"Retrying with fallback model: {next_model}")
|
| 458 |
+
continue
|
| 459 |
+
|
| 460 |
+
# Non-recoverable error or no fallback
|
| 461 |
+
raise
|
| 462 |
+
|
| 463 |
+
# Exhausted all models
|
| 464 |
+
raise Exception(f"All models exhausted. Last error: {last_error}")
|
| 465 |
+
|
| 466 |
+
def _call_gemini(self,
|
| 467 |
+
model: str,
|
| 468 |
+
messages: List[Dict[str, str]],
|
| 469 |
+
tools: Optional[List[Dict[str, Any]]],
|
| 470 |
+
temperature: float,
|
| 471 |
+
max_tokens: int) -> LLMResponse:
|
| 472 |
+
"""Make actual Gemini API call."""
|
| 473 |
+
client = self._get_client()
|
| 474 |
+
|
| 475 |
+
# Convert messages to Gemini format
|
| 476 |
+
contents = []
|
| 477 |
+
for msg in messages:
|
| 478 |
+
role = "user" if msg["role"] in ["user", "system"] else "model"
|
| 479 |
+
contents.append({
|
| 480 |
+
"role": role,
|
| 481 |
+
"parts": [{"text": msg["content"]}]
|
| 482 |
+
})
|
| 483 |
+
|
| 484 |
+
# Build tools for Gemini
|
| 485 |
+
gemini_tools = None
|
| 486 |
+
if tools:
|
| 487 |
+
gemini_tools = [{
|
| 488 |
+
"function_declarations": [t["function"] for t in tools]
|
| 489 |
+
}]
|
| 490 |
+
|
| 491 |
+
# Call Gemini - tools go in config
|
| 492 |
+
config = {
|
| 493 |
+
"temperature": temperature,
|
| 494 |
+
"max_output_tokens": max_tokens
|
| 495 |
+
}
|
| 496 |
+
if gemini_tools:
|
| 497 |
+
config["tools"] = gemini_tools
|
| 498 |
+
|
| 499 |
+
response = client.models.generate_content(
|
| 500 |
+
model=model,
|
| 501 |
+
contents=contents,
|
| 502 |
+
config=config
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
+
# Extract response
|
| 506 |
+
text = response.text if hasattr(response, 'text') and response.text else ""
|
| 507 |
+
tool_calls = []
|
| 508 |
+
|
| 509 |
+
if hasattr(response, 'function_calls') and response.function_calls:
|
| 510 |
+
for func_call in response.function_calls:
|
| 511 |
+
args = func_call.args if isinstance(func_call.args, dict) else json.loads(str(func_call.args))
|
| 512 |
+
tool_calls.append(LLMToolCall(
|
| 513 |
+
tool_name=func_call.name,
|
| 514 |
+
arguments=args,
|
| 515 |
+
reasoning=text or "Tool selected by Gemini"
|
| 516 |
+
))
|
| 517 |
+
|
| 518 |
+
return LLMResponse(
|
| 519 |
+
text=text,
|
| 520 |
+
tool_calls=tool_calls,
|
| 521 |
+
finish_reason=str(response.finish_reason) if hasattr(response, 'finish_reason') else "STOP"
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
def _estimate_tokens(self, messages: List[Dict], response_text: str) -> int:
|
| 525 |
+
"""Estimate token count (rough: 4 chars = 1 token)."""
|
| 526 |
+
input_chars = sum(len(m.get("content", "") or "") for m in messages)
|
| 527 |
+
output_chars = len(response_text or "")
|
| 528 |
+
return (input_chars + output_chars) // 4
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
# =============================================================================
|
| 532 |
+
# LITELLM ADAPTER
|
| 533 |
+
# =============================================================================
|
| 534 |
+
|
| 535 |
+
class LiteLLMAdapter(BaseLLMAdapter):
|
| 536 |
+
"""LiteLLM adapter for OpenAI, Anthropic, Groq, Ollama, and others."""
|
| 537 |
+
|
| 538 |
+
def __init__(self, model: str = "gpt-4o-mini", provider: str = "openai", api_key: Optional[str] = None):
|
| 539 |
+
super().__init__(api_key)
|
| 540 |
+
self.provider = provider
|
| 541 |
+
self.model_string = f"{provider}/{model}" if provider else model
|
| 542 |
+
self._client = None
|
| 543 |
+
|
| 544 |
+
def _get_client(self):
|
| 545 |
+
"""Lazy load LiteLLM client."""
|
| 546 |
+
if self._client is None:
|
| 547 |
+
try:
|
| 548 |
+
import litellm
|
| 549 |
+
if self.api_key:
|
| 550 |
+
litellm.api_key = self.api_key
|
| 551 |
+
self._client = litellm
|
| 552 |
+
except ImportError:
|
| 553 |
+
raise ImportError("litellm not installed. Install with: pip install litellm")
|
| 554 |
+
return self._client
|
| 555 |
+
|
| 556 |
+
def generate(self,
|
| 557 |
+
messages: List[Dict[str, str]],
|
| 558 |
+
tools: Optional[List[Dict[str, Any]]] = None,
|
| 559 |
+
temperature: float = 0.2,
|
| 560 |
+
max_tokens: int = 2000) -> LLMResponse:
|
| 561 |
+
"""Generate content using LiteLLM."""
|
| 562 |
+
try:
|
| 563 |
+
start_time = time.time()
|
| 564 |
+
client = self._get_client()
|
| 565 |
+
|
| 566 |
+
# Call LiteLLM
|
| 567 |
+
response = client.completion(
|
| 568 |
+
model=self.model_string,
|
| 569 |
+
messages=messages,
|
| 570 |
+
tools=tools,
|
| 571 |
+
temperature=temperature,
|
| 572 |
+
max_tokens=max_tokens
|
| 573 |
+
)
|
| 574 |
+
|
| 575 |
+
# Extract response
|
| 576 |
+
choice = response.choices[0]
|
| 577 |
+
text = choice.message.content or ""
|
| 578 |
+
tool_calls = []
|
| 579 |
+
|
| 580 |
+
if hasattr(choice.message, 'tool_calls') and choice.message.tool_calls:
|
| 581 |
+
for tool_call in choice.message.tool_calls:
|
| 582 |
+
args = json.loads(tool_call.function.arguments)
|
| 583 |
+
tool_calls.append(LLMToolCall(
|
| 584 |
+
tool_name=tool_call.function.name,
|
| 585 |
+
arguments=args,
|
| 586 |
+
reasoning=text or "Tool selected by LLM"
|
| 587 |
+
))
|
| 588 |
+
|
| 589 |
+
elapsed_ms = (time.time() - start_time) * 1000
|
| 590 |
+
tokens = response.usage.total_tokens if hasattr(response, 'usage') else 0
|
| 591 |
+
|
| 592 |
+
return LLMResponse(
|
| 593 |
+
text=text,
|
| 594 |
+
tool_calls=tool_calls,
|
| 595 |
+
finish_reason=choice.finish_reason,
|
| 596 |
+
model_used=self.model_string,
|
| 597 |
+
tokens_used=tokens,
|
| 598 |
+
time_ms=elapsed_ms
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
except Exception as e:
|
| 602 |
+
logger.error(f"LiteLLM generation failed: {e}")
|
| 603 |
+
raise
|
| 604 |
+
|
| 605 |
+
|
| 606 |
+
# =============================================================================
|
| 607 |
+
# MOCK ADAPTER FOR TESTING
|
| 608 |
+
# =============================================================================
|
| 609 |
+
|
| 610 |
+
class MockLLMAdapter(BaseLLMAdapter):
|
| 611 |
+
"""Mock LLM for testing without API keys."""
|
| 612 |
+
|
| 613 |
+
def generate(self,
|
| 614 |
+
messages: List[Dict[str, str]],
|
| 615 |
+
tools: Optional[List[Dict[str, Any]]] = None,
|
| 616 |
+
temperature: float = 0.2,
|
| 617 |
+
max_tokens: int = 2000) -> LLMResponse:
|
| 618 |
+
"""Return a mock response."""
|
| 619 |
+
return LLMResponse(
|
| 620 |
+
text="Mock LLM response",
|
| 621 |
+
tool_calls=[],
|
| 622 |
+
finish_reason="stop",
|
| 623 |
+
model_used="mock",
|
| 624 |
+
tokens_used=10,
|
| 625 |
+
time_ms=1.0
|
| 626 |
+
)
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
# =============================================================================
|
| 630 |
+
# FACTORY FUNCTION
|
| 631 |
+
# =============================================================================
|
| 632 |
+
|
| 633 |
+
def get_llm_adapter(provider: str = "gemini",
|
| 634 |
+
model: str = "gemini-2.5-flash-lite",
|
| 635 |
+
api_key: Optional[str] = None,
|
| 636 |
+
enable_fallback: bool = True) -> BaseLLMAdapter:
|
| 637 |
+
"""
|
| 638 |
+
Factory function to get the appropriate LLM adapter.
|
| 639 |
+
|
| 640 |
+
Args:
|
| 641 |
+
provider: LLM provider (gemini, openai, anthropic, etc.)
|
| 642 |
+
model: Model name
|
| 643 |
+
api_key: API key for authentication
|
| 644 |
+
enable_fallback: Enable automatic model fallback on rate limits
|
| 645 |
+
|
| 646 |
+
Returns:
|
| 647 |
+
Configured LLM adapter
|
| 648 |
+
"""
|
| 649 |
+
if provider == "gemini":
|
| 650 |
+
try:
|
| 651 |
+
return GeminiAdapter(model=model, api_key=api_key, enable_fallback=enable_fallback)
|
| 652 |
+
except ImportError:
|
| 653 |
+
logger.warning("Gemini not available, trying LiteLLM")
|
| 654 |
+
return LiteLLMAdapter(model=model, provider="gemini", api_key=api_key)
|
| 655 |
+
|
| 656 |
+
elif provider in ["openai", "anthropic", "groq", "ollama", "cohere", "mistral"]:
|
| 657 |
+
return LiteLLMAdapter(model=model, provider=provider, api_key=api_key)
|
| 658 |
+
|
| 659 |
+
elif provider == "mock":
|
| 660 |
+
return MockLLMAdapter(api_key=api_key)
|
| 661 |
+
|
| 662 |
+
else:
|
| 663 |
+
# Try LiteLLM for unknown providers
|
| 664 |
+
logger.warning(f"Unknown provider {provider}, attempting LiteLLM")
|
| 665 |
+
return LiteLLMAdapter(model=model, provider=provider, api_key=api_key)
|
| 666 |
+
|
| 667 |
+
|
| 668 |
+
def get_usage_summary() -> Dict:
|
| 669 |
+
"""Get usage summary from global rate limiter."""
|
| 670 |
+
return get_rate_limiter().get_usage_summary()
|
| 671 |
+
|
| 672 |
+
|
| 673 |
+
def reset_rate_limiter():
|
| 674 |
+
"""Reset rate limiter to default state."""
|
| 675 |
+
global _global_rate_limiter
|
| 676 |
+
_global_rate_limiter = None
|
agents/specialized_agents.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/agents/specialized_agents.py
|
| 2 |
+
# Relations: Uses base_agent.py, prompts/agent_prompts.py
|
| 3 |
+
# Description: Domain-specific agents for quantum circuit optimization
|
| 4 |
+
"""
|
| 5 |
+
Specialized Quantum Agents: Domain-specific agents for circuit optimization.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from typing import Optional, List, Dict, Any
|
| 9 |
+
from .base_agent import (
|
| 10 |
+
LLMAgent, RuleBasedAgent, AgentRole,
|
| 11 |
+
AgentContext, AgentAction, AgentResult
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _goal_to_string(context: AgentContext) -> str:
|
| 16 |
+
"""Safely extract goal as string from context."""
|
| 17 |
+
goal = context.goal
|
| 18 |
+
if isinstance(goal, list):
|
| 19 |
+
goal = goal[0] if goal else ""
|
| 20 |
+
return str(goal).lower() if goal else ""
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class ArchitectAgent(LLMAgent):
|
| 24 |
+
"""
|
| 25 |
+
Plans the overall circuit structure.
|
| 26 |
+
Decides what type of circuit to build and the high-level approach.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
def __init__(self, agent_id: str = "architect"):
|
| 30 |
+
from prompts import ARCHITECT_PROMPT
|
| 31 |
+
|
| 32 |
+
super().__init__(
|
| 33 |
+
agent_id=agent_id,
|
| 34 |
+
role=AgentRole.ARCHITECT,
|
| 35 |
+
system_prompt=ARCHITECT_PROMPT,
|
| 36 |
+
tools=[
|
| 37 |
+
"create_from_template",
|
| 38 |
+
"generate_from_description",
|
| 39 |
+
"analyze_circuit"
|
| 40 |
+
]
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
def can_handle(self, context: AgentContext) -> bool:
|
| 44 |
+
"""Can handle when no circuit exists or replanning needed."""
|
| 45 |
+
goal = _goal_to_string(context)
|
| 46 |
+
return context.current_circuit is None or "replan" in goal
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class BuilderAgent(LLMAgent):
|
| 50 |
+
"""
|
| 51 |
+
Builds and modifies circuits based on plans.
|
| 52 |
+
Handles the actual circuit construction.
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
def __init__(self, agent_id: str = "builder"):
|
| 56 |
+
from prompts import BUILDER_PROMPT
|
| 57 |
+
|
| 58 |
+
super().__init__(
|
| 59 |
+
agent_id=agent_id,
|
| 60 |
+
role=AgentRole.BUILDER,
|
| 61 |
+
system_prompt=BUILDER_PROMPT,
|
| 62 |
+
tools=[
|
| 63 |
+
"create_from_template",
|
| 64 |
+
"generate_random_circuit",
|
| 65 |
+
"generate_from_description",
|
| 66 |
+
"compose_circuits",
|
| 67 |
+
"tensor_circuits",
|
| 68 |
+
"repeat_circuit"
|
| 69 |
+
]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
def can_handle(self, context: AgentContext) -> bool:
|
| 73 |
+
"""Can handle when we need to build a circuit."""
|
| 74 |
+
has_plan = any("plan" in str(h.get("action", "")).lower() for h in context.history)
|
| 75 |
+
no_circuit = context.current_circuit is None
|
| 76 |
+
return has_plan or no_circuit
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class ValidatorAgent(LLMAgent):
|
| 80 |
+
"""
|
| 81 |
+
Validates circuits for correctness and hardware compatibility.
|
| 82 |
+
"""
|
| 83 |
+
|
| 84 |
+
def __init__(self, agent_id: str = "validator"):
|
| 85 |
+
from prompts import VALIDATOR_PROMPT
|
| 86 |
+
|
| 87 |
+
super().__init__(
|
| 88 |
+
agent_id=agent_id,
|
| 89 |
+
role=AgentRole.VALIDATOR,
|
| 90 |
+
system_prompt=VALIDATOR_PROMPT,
|
| 91 |
+
tools=[
|
| 92 |
+
"validate_syntax",
|
| 93 |
+
"check_connectivity",
|
| 94 |
+
"verify_unitary"
|
| 95 |
+
]
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def can_handle(self, context: AgentContext) -> bool:
|
| 99 |
+
"""Can handle when there's a circuit to validate."""
|
| 100 |
+
return context.current_circuit is not None
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class OptimizerAgent(LLMAgent):
|
| 104 |
+
"""
|
| 105 |
+
Optimizes circuits for depth, gate count, and hardware fitness.
|
| 106 |
+
"""
|
| 107 |
+
|
| 108 |
+
def __init__(self, agent_id: str = "optimizer"):
|
| 109 |
+
from prompts import OPTIMIZER_PROMPT
|
| 110 |
+
|
| 111 |
+
super().__init__(
|
| 112 |
+
agent_id=agent_id,
|
| 113 |
+
role=AgentRole.OPTIMIZER,
|
| 114 |
+
system_prompt=OPTIMIZER_PROMPT,
|
| 115 |
+
tools=[
|
| 116 |
+
"generate_inverse",
|
| 117 |
+
"compose_circuits",
|
| 118 |
+
"analyze_circuit",
|
| 119 |
+
"calculate_complexity",
|
| 120 |
+
"calculate_hardware_fitness"
|
| 121 |
+
]
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
def can_handle(self, context: AgentContext) -> bool:
|
| 125 |
+
"""Can handle when circuit exists and optimization is needed."""
|
| 126 |
+
if context.current_circuit is None:
|
| 127 |
+
return False
|
| 128 |
+
goal = _goal_to_string(context)
|
| 129 |
+
return "optimize" in goal or "improve" in goal
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class AnalyzerAgent(LLMAgent):
|
| 133 |
+
"""
|
| 134 |
+
Analyzes circuit properties and provides insights.
|
| 135 |
+
"""
|
| 136 |
+
|
| 137 |
+
def __init__(self, agent_id: str = "analyzer"):
|
| 138 |
+
from prompts import ANALYZER_PROMPT
|
| 139 |
+
|
| 140 |
+
super().__init__(
|
| 141 |
+
agent_id=agent_id,
|
| 142 |
+
role=AgentRole.ANALYZER,
|
| 143 |
+
system_prompt=ANALYZER_PROMPT,
|
| 144 |
+
tools=[
|
| 145 |
+
"parse_qasm",
|
| 146 |
+
"analyze_circuit",
|
| 147 |
+
"get_circuit_depth",
|
| 148 |
+
"get_statevector",
|
| 149 |
+
"get_probabilities",
|
| 150 |
+
"estimate_resources",
|
| 151 |
+
"estimate_noise"
|
| 152 |
+
]
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
def can_handle(self, context: AgentContext) -> bool:
|
| 156 |
+
"""Can handle when circuit exists and analysis is needed."""
|
| 157 |
+
return context.current_circuit is not None
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class ScorerAgent(LLMAgent):
|
| 161 |
+
"""
|
| 162 |
+
Scores circuits on various metrics.
|
| 163 |
+
"""
|
| 164 |
+
|
| 165 |
+
def __init__(self, agent_id: str = "scorer"):
|
| 166 |
+
from prompts import SCORER_PROMPT
|
| 167 |
+
|
| 168 |
+
super().__init__(
|
| 169 |
+
agent_id=agent_id,
|
| 170 |
+
role=AgentRole.SCORER,
|
| 171 |
+
system_prompt=SCORER_PROMPT,
|
| 172 |
+
tools=[
|
| 173 |
+
"calculate_complexity",
|
| 174 |
+
"calculate_hardware_fitness",
|
| 175 |
+
"calculate_expressibility",
|
| 176 |
+
"simulate_circuit"
|
| 177 |
+
]
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
def can_handle(self, context: AgentContext) -> bool:
|
| 181 |
+
"""Can handle when circuit exists and scoring is requested."""
|
| 182 |
+
if context.current_circuit is None:
|
| 183 |
+
return False
|
| 184 |
+
goal = _goal_to_string(context)
|
| 185 |
+
return "score" in goal or "evaluate" in goal
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
class SimulatorAgent(RuleBasedAgent):
|
| 189 |
+
"""
|
| 190 |
+
Rule-based agent for circuit simulation.
|
| 191 |
+
Deterministic - always simulates when circuit is ready.
|
| 192 |
+
"""
|
| 193 |
+
|
| 194 |
+
def __init__(self, agent_id: str = "simulator"):
|
| 195 |
+
def simulate_rule(context: AgentContext) -> Optional[AgentAction]:
|
| 196 |
+
if context.current_circuit:
|
| 197 |
+
return AgentAction(
|
| 198 |
+
tool_name="simulate_circuit",
|
| 199 |
+
arguments={"qasm": context.current_circuit, "shots": 1024},
|
| 200 |
+
reasoning="Circuit ready for simulation"
|
| 201 |
+
)
|
| 202 |
+
return None
|
| 203 |
+
|
| 204 |
+
super().__init__(
|
| 205 |
+
agent_id=agent_id,
|
| 206 |
+
role=AgentRole.ANALYZER,
|
| 207 |
+
rules=[simulate_rule],
|
| 208 |
+
tools=["simulate_circuit", "get_statevector", "get_probabilities"]
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
# Factory function to create all specialized agents
|
| 213 |
+
def create_all_agents() -> Dict[str, LLMAgent]:
|
| 214 |
+
"""Create instances of all specialized agents."""
|
| 215 |
+
return {
|
| 216 |
+
"architect": ArchitectAgent(),
|
| 217 |
+
"builder": BuilderAgent(),
|
| 218 |
+
"validator": ValidatorAgent(),
|
| 219 |
+
"optimizer": OptimizerAgent(),
|
| 220 |
+
"analyzer": AnalyzerAgent(),
|
| 221 |
+
"scorer": ScorerAgent(),
|
| 222 |
+
"simulator": SimulatorAgent()
|
| 223 |
+
}
|
app.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
QAgents-Workflows: Hugging Face Space Entry Point
|
| 3 |
+
Provides a Gradio interface for the Quantum Circuit Orchestrator.
|
| 4 |
+
Reads all configuration from environment variables for HF Space deployment.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import gradio as gr
|
| 9 |
+
import logging
|
| 10 |
+
from config import LLMConfig
|
| 11 |
+
from orchestrators import create_orchestrator
|
| 12 |
+
from client.mcp_client import get_client
|
| 13 |
+
|
| 14 |
+
# Configure logging
|
| 15 |
+
logging.basicConfig(level=logging.INFO)
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
# Log environment configuration at startup
|
| 19 |
+
logger.info("=" * 70)
|
| 20 |
+
logger.info("QAgents Quantum Circuit Orchestrator - Initialization")
|
| 21 |
+
logger.info("=" * 70)
|
| 22 |
+
logger.info(f"LLM Provider: {os.getenv('LLM_PROVIDER', 'gemini (default)')}")
|
| 23 |
+
logger.info(f"LLM Model: {os.getenv('LLM_MODEL', 'gemini-2.5-flash-lite (default)')}")
|
| 24 |
+
logger.info(f"MCP Server URL: {os.getenv('MCP_SERVER_URL', 'http://127.0.0.1:7861 (default)')}")
|
| 25 |
+
logger.info(f"Google API Key configured: {bool(os.getenv('GOOGLE_API_KEY') or os.getenv('GENAI_API_KEY'))}")
|
| 26 |
+
logger.info("=" * 70)
|
| 27 |
+
|
| 28 |
+
# Initialize MCP client (will use MCP_SERVER_URL env var if set)
|
| 29 |
+
mcp_client = get_client()
|
| 30 |
+
|
| 31 |
+
def generate_circuit(prompt, mode, difficulty):
|
| 32 |
+
"""Generate a quantum circuit based on the prompt and mode."""
|
| 33 |
+
try:
|
| 34 |
+
logger.info(f"Generating circuit: mode={mode}, difficulty={difficulty}")
|
| 35 |
+
logger.info(f"Prompt: {prompt}")
|
| 36 |
+
|
| 37 |
+
# Create orchestrator
|
| 38 |
+
orch = create_orchestrator(mode.lower())
|
| 39 |
+
|
| 40 |
+
# Run generation
|
| 41 |
+
# Note: In a real deployment, we might want to map difficulty to specific constraints
|
| 42 |
+
# For now, we pass the prompt directly
|
| 43 |
+
result = orch.run(prompt)
|
| 44 |
+
|
| 45 |
+
if result.success:
|
| 46 |
+
output = f"✅ Success ({result.execution_time_ms:.0f}ms)\n\n"
|
| 47 |
+
if result.final_output:
|
| 48 |
+
output += result.final_output
|
| 49 |
+
else:
|
| 50 |
+
output += "No QASM generated."
|
| 51 |
+
|
| 52 |
+
# Add metrics if available
|
| 53 |
+
metrics = f"LLM Calls: {result.steps_completed}\n"
|
| 54 |
+
if hasattr(result, 'tokens_used'):
|
| 55 |
+
metrics += f"Tokens: {result.tokens_used}\n"
|
| 56 |
+
|
| 57 |
+
return output, metrics
|
| 58 |
+
else:
|
| 59 |
+
error_msg = "\n".join(result.errors)
|
| 60 |
+
return f"❌ Failed ({result.execution_time_ms:.0f}ms)\n\nErrors:\n{error_msg}", "N/A"
|
| 61 |
+
|
| 62 |
+
except Exception as e:
|
| 63 |
+
logger.error(f"Error generating circuit: {e}")
|
| 64 |
+
return f"❌ System Error: {str(e)}", "Error"
|
| 65 |
+
|
| 66 |
+
def check_mcp_status():
|
| 67 |
+
"""Check connection to MCP server."""
|
| 68 |
+
try:
|
| 69 |
+
is_healthy = mcp_client.health_check()
|
| 70 |
+
status = "🟢 Connected" if is_healthy else "🔴 Disconnected"
|
| 71 |
+
url = os.environ.get("MCP_SERVER_URL", "http://127.0.0.1:7861")
|
| 72 |
+
return f"{status} ({url})"
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"🔴 Error: {str(e)}"
|
| 75 |
+
|
| 76 |
+
# Create Gradio Interface
|
| 77 |
+
with gr.Blocks(title="Quantum Circuit Orchestrator") as demo:
|
| 78 |
+
gr.Markdown("# ⚛️ QAgents: Quantum Circuit Orchestrator")
|
| 79 |
+
gr.Markdown("Multi-agent system for generating optimized quantum circuits.")
|
| 80 |
+
|
| 81 |
+
with gr.Row():
|
| 82 |
+
with gr.Column(scale=2):
|
| 83 |
+
prompt_input = gr.Textbox(
|
| 84 |
+
label="Circuit Description",
|
| 85 |
+
placeholder="e.g., Create a 3-qubit GHZ state",
|
| 86 |
+
lines=3
|
| 87 |
+
)
|
| 88 |
+
with gr.Row():
|
| 89 |
+
mode_select = gr.Dropdown(
|
| 90 |
+
choices=["naked", "quasar", "hybrid", "blackboard"],
|
| 91 |
+
value="naked",
|
| 92 |
+
label="Orchestration Mode"
|
| 93 |
+
)
|
| 94 |
+
difficulty_select = gr.Dropdown(
|
| 95 |
+
choices=["EASY", "MEDIUM", "HARD", "VERY_HARD"],
|
| 96 |
+
value="EASY",
|
| 97 |
+
label="Estimated Difficulty"
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
generate_btn = gr.Button("Generate Circuit", variant="primary")
|
| 101 |
+
|
| 102 |
+
with gr.Column(scale=1):
|
| 103 |
+
mcp_status = gr.Textbox(label="MCP Server Status", value=check_mcp_status, interactive=False)
|
| 104 |
+
metrics_output = gr.Textbox(label="Execution Metrics", lines=4)
|
| 105 |
+
|
| 106 |
+
with gr.Row():
|
| 107 |
+
qasm_output = gr.Code(label="Generated QASM", language="qasm", lines=15)
|
| 108 |
+
|
| 109 |
+
# Event handlers
|
| 110 |
+
generate_btn.click(
|
| 111 |
+
fn=generate_circuit,
|
| 112 |
+
inputs=[prompt_input, mode_select, difficulty_select],
|
| 113 |
+
outputs=[qasm_output, metrics_output]
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
# Refresh status on load
|
| 117 |
+
demo.load(fn=check_mcp_status, outputs=[mcp_status])
|
| 118 |
+
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
demo.launch()
|
client/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""MCP Client module."""
|
| 2 |
+
|
| 3 |
+
from .mcp_client import MCPClient, MCPResponse, get_client
|
| 4 |
+
|
| 5 |
+
__all__ = ["MCPClient", "MCPResponse", "get_client"]
|
client/mcp_client.py
ADDED
|
@@ -0,0 +1,698 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/client/mcp_client.py
|
| 2 |
+
# Relations: Uses QuantumArchitect-MCP Gradio server
|
| 3 |
+
# Description: MCP client with fallback local implementations for missing endpoints
|
| 4 |
+
"""
|
| 5 |
+
MCP Client: Connection to QuantumArchitect-MCP endpoints.
|
| 6 |
+
Provides both synchronous and async interfaces.
|
| 7 |
+
|
| 8 |
+
Available Gradio endpoints (as of latest scan):
|
| 9 |
+
- ui_create_circuit: Create circuit from template
|
| 10 |
+
- ui_validate_circuit: Validate QASM syntax
|
| 11 |
+
- ui_simulate_circuit: Simulate circuit
|
| 12 |
+
- ui_score_circuit: Score circuit complexity/fitness
|
| 13 |
+
|
| 14 |
+
Missing endpoints use local fallback implementations.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import requests
|
| 18 |
+
from typing import Any, Dict, Optional, List
|
| 19 |
+
from dataclasses import dataclass, field
|
| 20 |
+
from datetime import datetime
|
| 21 |
+
import json
|
| 22 |
+
import logging
|
| 23 |
+
import re
|
| 24 |
+
import time
|
| 25 |
+
import random
|
| 26 |
+
import math
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@dataclass
|
| 32 |
+
class MCPResponse:
|
| 33 |
+
"""Standardized response from MCP endpoints."""
|
| 34 |
+
success: bool
|
| 35 |
+
data: Any
|
| 36 |
+
endpoint: str
|
| 37 |
+
timestamp: datetime = field(default_factory=datetime.now)
|
| 38 |
+
error: Optional[str] = None
|
| 39 |
+
execution_time_ms: float = 0.0
|
| 40 |
+
is_fallback: bool = False # True if using local fallback
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class QASMLocalAnalyzer:
|
| 44 |
+
"""Local QASM analysis for fallback when MCP endpoints unavailable."""
|
| 45 |
+
|
| 46 |
+
GATE_PATTERN = re.compile(
|
| 47 |
+
r'^(h|x|y|z|s|t|sdg|tdg|cx|cz|cy|swap|ccx|rz|rx|ry|u1|u2|u3|p|measure|barrier)\b',
|
| 48 |
+
re.IGNORECASE
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
@staticmethod
|
| 52 |
+
def parse_qasm(qasm_code: str) -> Dict[str, Any]:
|
| 53 |
+
"""Parse QASM code and extract structure."""
|
| 54 |
+
lines = [l.strip() for l in qasm_code.strip().split('\n')
|
| 55 |
+
if l.strip() and not l.strip().startswith('//')]
|
| 56 |
+
|
| 57 |
+
result = {
|
| 58 |
+
'openqasm_version': '2.0',
|
| 59 |
+
'includes': [],
|
| 60 |
+
'qregs': [],
|
| 61 |
+
'cregs': [],
|
| 62 |
+
'gates': [],
|
| 63 |
+
'num_qubits': 0,
|
| 64 |
+
'num_classical': 0
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
for line in lines:
|
| 68 |
+
if line.startswith('OPENQASM'):
|
| 69 |
+
result['openqasm_version'] = line.split()[1].rstrip(';')
|
| 70 |
+
elif line.startswith('include'):
|
| 71 |
+
result['includes'].append(line.split('"')[1] if '"' in line else line.split()[1])
|
| 72 |
+
elif line.startswith('qreg'):
|
| 73 |
+
match = re.search(r'qreg\s+(\w+)\[(\d+)\]', line)
|
| 74 |
+
if match:
|
| 75 |
+
result['qregs'].append({'name': match.group(1), 'size': int(match.group(2))})
|
| 76 |
+
result['num_qubits'] += int(match.group(2))
|
| 77 |
+
elif line.startswith('creg'):
|
| 78 |
+
match = re.search(r'creg\s+(\w+)\[(\d+)\]', line)
|
| 79 |
+
if match:
|
| 80 |
+
result['cregs'].append({'name': match.group(1), 'size': int(match.group(2))})
|
| 81 |
+
result['num_classical'] += int(match.group(2))
|
| 82 |
+
elif QASMLocalAnalyzer.GATE_PATTERN.match(line):
|
| 83 |
+
gate_name = line.split()[0].split('(')[0]
|
| 84 |
+
result['gates'].append({'gate': gate_name, 'raw': line.rstrip(';')})
|
| 85 |
+
|
| 86 |
+
return result
|
| 87 |
+
|
| 88 |
+
@staticmethod
|
| 89 |
+
def analyze_circuit(qasm_code: str) -> Dict[str, Any]:
|
| 90 |
+
"""Analyze circuit properties."""
|
| 91 |
+
parsed = QASMLocalAnalyzer.parse_qasm(qasm_code)
|
| 92 |
+
gates = parsed['gates']
|
| 93 |
+
|
| 94 |
+
gate_counts = {}
|
| 95 |
+
single_qubit_gates = 0
|
| 96 |
+
two_qubit_gates = 0
|
| 97 |
+
multi_qubit_gates = 0
|
| 98 |
+
measurement_count = 0
|
| 99 |
+
|
| 100 |
+
for g in gates:
|
| 101 |
+
gate = g['gate'].lower()
|
| 102 |
+
gate_counts[gate] = gate_counts.get(gate, 0) + 1
|
| 103 |
+
|
| 104 |
+
if gate == 'measure':
|
| 105 |
+
measurement_count += 1
|
| 106 |
+
elif gate in ['cx', 'cz', 'cy', 'swap']:
|
| 107 |
+
two_qubit_gates += 1
|
| 108 |
+
elif gate in ['ccx', 'cswap']:
|
| 109 |
+
multi_qubit_gates += 1
|
| 110 |
+
else:
|
| 111 |
+
single_qubit_gates += 1
|
| 112 |
+
|
| 113 |
+
# Estimate depth (simplified: assume all gates sequential)
|
| 114 |
+
depth = len([g for g in gates if g['gate'].lower() != 'measure'])
|
| 115 |
+
|
| 116 |
+
return {
|
| 117 |
+
'num_qubits': parsed['num_qubits'],
|
| 118 |
+
'num_classical_bits': parsed['num_classical'],
|
| 119 |
+
'depth': depth,
|
| 120 |
+
'gate_count': len(gates),
|
| 121 |
+
'gate_breakdown': gate_counts,
|
| 122 |
+
'single_qubit_gates': single_qubit_gates,
|
| 123 |
+
'two_qubit_gates': two_qubit_gates,
|
| 124 |
+
'multi_qubit_gates': multi_qubit_gates,
|
| 125 |
+
'measurements': measurement_count
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
@staticmethod
|
| 129 |
+
def get_depth(qasm_code: str) -> int:
|
| 130 |
+
"""Get circuit depth."""
|
| 131 |
+
analysis = QASMLocalAnalyzer.analyze_circuit(qasm_code)
|
| 132 |
+
return analysis['depth']
|
| 133 |
+
|
| 134 |
+
@staticmethod
|
| 135 |
+
def calculate_complexity(qasm_code: str) -> Dict[str, Any]:
|
| 136 |
+
"""Calculate complexity score."""
|
| 137 |
+
analysis = QASMLocalAnalyzer.analyze_circuit(qasm_code)
|
| 138 |
+
|
| 139 |
+
# Scoring formula
|
| 140 |
+
depth_score = min(analysis['depth'] / 50.0, 1.0) * 30
|
| 141 |
+
gate_score = min(analysis['gate_count'] / 100.0, 1.0) * 30
|
| 142 |
+
two_q_score = min(analysis['two_qubit_gates'] / 20.0, 1.0) * 25
|
| 143 |
+
qubit_score = min(analysis['num_qubits'] / 10.0, 1.0) * 15
|
| 144 |
+
|
| 145 |
+
total = depth_score + gate_score + two_q_score + qubit_score
|
| 146 |
+
|
| 147 |
+
return {
|
| 148 |
+
'complexity_score': round(total, 2),
|
| 149 |
+
'depth_contribution': round(depth_score, 2),
|
| 150 |
+
'gate_contribution': round(gate_score, 2),
|
| 151 |
+
'entanglement_contribution': round(two_q_score, 2),
|
| 152 |
+
'qubit_contribution': round(qubit_score, 2),
|
| 153 |
+
'raw_metrics': analysis
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
@staticmethod
|
| 157 |
+
def validate_syntax(qasm_code: str) -> Dict[str, Any]:
|
| 158 |
+
"""Validate QASM syntax."""
|
| 159 |
+
errors = []
|
| 160 |
+
warnings = []
|
| 161 |
+
|
| 162 |
+
lines = qasm_code.strip().split('\n')
|
| 163 |
+
|
| 164 |
+
has_openqasm = False
|
| 165 |
+
has_qreg = False
|
| 166 |
+
|
| 167 |
+
for i, line in enumerate(lines, 1):
|
| 168 |
+
line = line.strip()
|
| 169 |
+
if not line or line.startswith('//'):
|
| 170 |
+
continue
|
| 171 |
+
|
| 172 |
+
if line.startswith('OPENQASM'):
|
| 173 |
+
has_openqasm = True
|
| 174 |
+
elif line.startswith('qreg'):
|
| 175 |
+
has_qreg = True
|
| 176 |
+
elif not line.startswith(('include', 'creg', 'barrier', 'measure', 'OPENQASM', 'qreg')):
|
| 177 |
+
# Check for valid gate
|
| 178 |
+
if not QASMLocalAnalyzer.GATE_PATTERN.match(line):
|
| 179 |
+
if line and not line.endswith(';'):
|
| 180 |
+
warnings.append(f"Line {i}: Missing semicolon")
|
| 181 |
+
|
| 182 |
+
if not has_openqasm:
|
| 183 |
+
errors.append("Missing OPENQASM version declaration")
|
| 184 |
+
if not has_qreg:
|
| 185 |
+
errors.append("No quantum register (qreg) defined")
|
| 186 |
+
|
| 187 |
+
return {
|
| 188 |
+
'valid': len(errors) == 0,
|
| 189 |
+
'errors': errors,
|
| 190 |
+
'warnings': warnings,
|
| 191 |
+
'line_count': len(lines)
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
@staticmethod
|
| 195 |
+
def calculate_hardware_fitness(qasm_code: str, hardware: str = "ibm_brisbane") -> Dict[str, Any]:
|
| 196 |
+
"""Calculate hardware fitness score."""
|
| 197 |
+
analysis = QASMLocalAnalyzer.analyze_circuit(qasm_code)
|
| 198 |
+
|
| 199 |
+
# Hardware profiles (simplified)
|
| 200 |
+
profiles = {
|
| 201 |
+
'ibm_brisbane': {'max_qubits': 127, 'connectivity': 'heavy-hex', 'two_q_error': 0.01},
|
| 202 |
+
'ibm_sherbrooke': {'max_qubits': 127, 'connectivity': 'heavy-hex', 'two_q_error': 0.008},
|
| 203 |
+
'rigetti_aspen': {'max_qubits': 80, 'connectivity': 'octagonal', 'two_q_error': 0.02},
|
| 204 |
+
'ionq_harmony': {'max_qubits': 11, 'connectivity': 'all-to-all', 'two_q_error': 0.005}
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
profile = profiles.get(hardware, profiles['ibm_brisbane'])
|
| 208 |
+
|
| 209 |
+
# Calculate fitness
|
| 210 |
+
qubit_fit = 100 if analysis['num_qubits'] <= profile['max_qubits'] else 50
|
| 211 |
+
depth_penalty = min(analysis['depth'] * 2, 30)
|
| 212 |
+
two_q_penalty = analysis['two_qubit_gates'] * profile['two_q_error'] * 100
|
| 213 |
+
|
| 214 |
+
fitness = max(0, qubit_fit - depth_penalty - two_q_penalty)
|
| 215 |
+
|
| 216 |
+
return {
|
| 217 |
+
'fitness_score': round(fitness, 2),
|
| 218 |
+
'hardware': hardware,
|
| 219 |
+
'qubit_fit': qubit_fit,
|
| 220 |
+
'depth_penalty': round(depth_penalty, 2),
|
| 221 |
+
'error_penalty': round(two_q_penalty, 2),
|
| 222 |
+
'recommendation': 'suitable' if fitness > 70 else 'marginal' if fitness > 40 else 'poor'
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
class MCPClient:
|
| 227 |
+
"""
|
| 228 |
+
Client for QuantumArchitect-MCP server.
|
| 229 |
+
Wraps MCP endpoints with fallback to local implementations.
|
| 230 |
+
|
| 231 |
+
Primary endpoints (from Gradio):
|
| 232 |
+
- ui_create_circuit
|
| 233 |
+
- ui_validate_circuit
|
| 234 |
+
- ui_simulate_circuit
|
| 235 |
+
- ui_score_circuit
|
| 236 |
+
|
| 237 |
+
Missing endpoints use QASMLocalAnalyzer for fallback.
|
| 238 |
+
"""
|
| 239 |
+
|
| 240 |
+
def __init__(self, base_url: str = "http://127.0.0.1:7861"):
|
| 241 |
+
self.base_url = base_url.rstrip("/")
|
| 242 |
+
self.session = requests.Session()
|
| 243 |
+
self._connected = False
|
| 244 |
+
self._analyzer = QASMLocalAnalyzer()
|
| 245 |
+
|
| 246 |
+
def _call(self, endpoint: str, **kwargs) -> MCPResponse:
|
| 247 |
+
"""Internal method to call MCP endpoints."""
|
| 248 |
+
start = time.perf_counter()
|
| 249 |
+
|
| 250 |
+
try:
|
| 251 |
+
url = f"{self.base_url}/gradio_api/call/{endpoint}"
|
| 252 |
+
payload = {"data": list(kwargs.values()) if kwargs else []}
|
| 253 |
+
|
| 254 |
+
response = self.session.post(url, json=payload, timeout=30)
|
| 255 |
+
response.raise_for_status()
|
| 256 |
+
|
| 257 |
+
result = response.json()
|
| 258 |
+
event_id = result.get("event_id")
|
| 259 |
+
|
| 260 |
+
if event_id:
|
| 261 |
+
result_url = f"{self.base_url}/gradio_api/call/{endpoint}/{event_id}"
|
| 262 |
+
result_response = self.session.get(result_url, timeout=30)
|
| 263 |
+
|
| 264 |
+
lines = result_response.text.strip().split("\n")
|
| 265 |
+
for line in lines:
|
| 266 |
+
if line.startswith("data:"):
|
| 267 |
+
data = json.loads(line[5:].strip())
|
| 268 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 269 |
+
return MCPResponse(
|
| 270 |
+
success=True,
|
| 271 |
+
data=data[0] if isinstance(data, list) and len(data) == 1 else data,
|
| 272 |
+
endpoint=endpoint,
|
| 273 |
+
execution_time_ms=elapsed
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 277 |
+
return MCPResponse(
|
| 278 |
+
success=True,
|
| 279 |
+
data=result,
|
| 280 |
+
endpoint=endpoint,
|
| 281 |
+
execution_time_ms=elapsed
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
except Exception as e:
|
| 285 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 286 |
+
logger.warning(f"MCP call failed: {endpoint} - {e}")
|
| 287 |
+
return MCPResponse(
|
| 288 |
+
success=False,
|
| 289 |
+
data=None,
|
| 290 |
+
endpoint=endpoint,
|
| 291 |
+
error=str(e),
|
| 292 |
+
execution_time_ms=elapsed
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
def _fallback_response(self, endpoint: str, data: Any, start_time: float) -> MCPResponse:
|
| 296 |
+
"""Create a fallback response using local implementation."""
|
| 297 |
+
elapsed = (time.perf_counter() - start_time) * 1000
|
| 298 |
+
return MCPResponse(
|
| 299 |
+
success=True,
|
| 300 |
+
data=data,
|
| 301 |
+
endpoint=f"{endpoint}(fallback)",
|
| 302 |
+
execution_time_ms=elapsed,
|
| 303 |
+
is_fallback=True
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
def health_check(self) -> bool:
|
| 307 |
+
"""Check if MCP server is reachable."""
|
| 308 |
+
try:
|
| 309 |
+
response = self.session.get(f"{self.base_url}/", timeout=5)
|
| 310 |
+
self._connected = response.status_code == 200
|
| 311 |
+
return self._connected
|
| 312 |
+
except:
|
| 313 |
+
self._connected = False
|
| 314 |
+
return False
|
| 315 |
+
|
| 316 |
+
# ===== Circuit Creation Endpoints =====
|
| 317 |
+
|
| 318 |
+
def create_circuit_from_template(self, template_name: str, num_qubits: int = 2) -> MCPResponse:
|
| 319 |
+
"""Create a circuit from a predefined template.
|
| 320 |
+
Maps to ui_create_circuit endpoint in Gradio."""
|
| 321 |
+
return self._call("ui_create_circuit", template=template_name, qubits=num_qubits, params="{}")
|
| 322 |
+
|
| 323 |
+
def generate_random_circuit(self, num_qubits: int = 3, depth: int = 5,
|
| 324 |
+
gate_set: str = "h,cx,rz") -> MCPResponse:
|
| 325 |
+
"""Generate a random quantum circuit. Uses local fallback."""
|
| 326 |
+
start = time.perf_counter()
|
| 327 |
+
gates = gate_set.split(',')
|
| 328 |
+
|
| 329 |
+
qasm_lines = [
|
| 330 |
+
'OPENQASM 2.0;',
|
| 331 |
+
'include "qelib1.inc";',
|
| 332 |
+
f'qreg q[{num_qubits}];',
|
| 333 |
+
f'creg c[{num_qubits}];'
|
| 334 |
+
]
|
| 335 |
+
|
| 336 |
+
for _ in range(depth):
|
| 337 |
+
gate = random.choice(gates)
|
| 338 |
+
if gate in ['h', 'x', 'y', 'z', 's', 't']:
|
| 339 |
+
q = random.randint(0, num_qubits - 1)
|
| 340 |
+
qasm_lines.append(f'{gate} q[{q}];')
|
| 341 |
+
elif gate in ['cx', 'cz']:
|
| 342 |
+
if num_qubits >= 2:
|
| 343 |
+
q1 = random.randint(0, num_qubits - 1)
|
| 344 |
+
q2 = random.randint(0, num_qubits - 1)
|
| 345 |
+
while q2 == q1:
|
| 346 |
+
q2 = random.randint(0, num_qubits - 1)
|
| 347 |
+
qasm_lines.append(f'{gate} q[{q1}], q[{q2}];')
|
| 348 |
+
elif gate in ['rz', 'rx', 'ry']:
|
| 349 |
+
q = random.randint(0, num_qubits - 1)
|
| 350 |
+
angle = round(random.uniform(0, 2 * math.pi), 4)
|
| 351 |
+
qasm_lines.append(f'{gate}({angle}) q[{q}];')
|
| 352 |
+
|
| 353 |
+
qasm_lines.append(f'measure q -> c;')
|
| 354 |
+
qasm_code = '\n'.join(qasm_lines)
|
| 355 |
+
|
| 356 |
+
return self._fallback_response("generate_random_circuit", {'qasm': qasm_code}, start)
|
| 357 |
+
|
| 358 |
+
def generate_circuit_from_description(self, description: str) -> MCPResponse:
|
| 359 |
+
"""Generate circuit from natural language description.
|
| 360 |
+
Uses ui_create_circuit with best-matching template."""
|
| 361 |
+
desc_lower = description.lower()
|
| 362 |
+
|
| 363 |
+
if 'entangle' in desc_lower or 'bell' in desc_lower:
|
| 364 |
+
template = 'bell_state'
|
| 365 |
+
elif 'ghz' in desc_lower:
|
| 366 |
+
template = 'ghz_state'
|
| 367 |
+
elif 'superposition' in desc_lower:
|
| 368 |
+
template = 'superposition'
|
| 369 |
+
elif 'qft' in desc_lower or 'fourier' in desc_lower:
|
| 370 |
+
template = 'qft'
|
| 371 |
+
elif 'grover' in desc_lower or 'search' in desc_lower:
|
| 372 |
+
template = 'grover'
|
| 373 |
+
elif 'vqe' in desc_lower or 'variational' in desc_lower:
|
| 374 |
+
template = 'vqe'
|
| 375 |
+
else:
|
| 376 |
+
template = 'bell_state'
|
| 377 |
+
|
| 378 |
+
return self._call("ui_create_circuit", template=template, qubits=2, params="{}")
|
| 379 |
+
|
| 380 |
+
# ===== Parsing & Analysis Endpoints (Fallback) =====
|
| 381 |
+
|
| 382 |
+
def parse_qasm(self, qasm_code: str) -> MCPResponse:
|
| 383 |
+
"""Parse OpenQASM code into circuit structure. Uses local fallback."""
|
| 384 |
+
start = time.perf_counter()
|
| 385 |
+
parsed = self._analyzer.parse_qasm(qasm_code)
|
| 386 |
+
return self._fallback_response("parse_qasm", parsed, start)
|
| 387 |
+
|
| 388 |
+
def analyze_circuit(self, qasm_code: str) -> MCPResponse:
|
| 389 |
+
"""Analyze circuit properties (depth, gates, etc.). Uses local fallback."""
|
| 390 |
+
start = time.perf_counter()
|
| 391 |
+
analysis = self._analyzer.analyze_circuit(qasm_code)
|
| 392 |
+
return self._fallback_response("analyze_circuit", analysis, start)
|
| 393 |
+
|
| 394 |
+
def get_circuit_depth(self, qasm_code: str) -> MCPResponse:
|
| 395 |
+
"""Get the depth of a circuit. Uses local fallback."""
|
| 396 |
+
start = time.perf_counter()
|
| 397 |
+
depth = self._analyzer.get_depth(qasm_code)
|
| 398 |
+
return self._fallback_response("get_circuit_depth", {'depth': depth}, start)
|
| 399 |
+
|
| 400 |
+
# ===== Validation Endpoints =====
|
| 401 |
+
|
| 402 |
+
def validate_syntax(self, qasm_code: str) -> MCPResponse:
|
| 403 |
+
"""Validate QASM syntax. Maps to ui_validate_circuit."""
|
| 404 |
+
return self._call("ui_validate_circuit", qasm=qasm_code, hardware="")
|
| 405 |
+
|
| 406 |
+
def check_connectivity(self, qasm_code: str, hardware: str = "ibm_brisbane") -> MCPResponse:
|
| 407 |
+
"""Check if circuit respects hardware connectivity. Uses ui_validate_circuit."""
|
| 408 |
+
return self._call("ui_validate_circuit", qasm=qasm_code, hardware=hardware)
|
| 409 |
+
|
| 410 |
+
def verify_unitary(self, qasm_code: str) -> MCPResponse:
|
| 411 |
+
"""Verify circuit produces valid unitary. Uses local fallback."""
|
| 412 |
+
start = time.perf_counter()
|
| 413 |
+
validation = self._analyzer.validate_syntax(qasm_code)
|
| 414 |
+
result = {
|
| 415 |
+
'is_unitary': validation['valid'],
|
| 416 |
+
'errors': validation['errors'],
|
| 417 |
+
'note': 'Local validation - full unitary check requires simulation'
|
| 418 |
+
}
|
| 419 |
+
return self._fallback_response("verify_unitary", result, start)
|
| 420 |
+
|
| 421 |
+
# ===== Simulation Endpoints =====
|
| 422 |
+
|
| 423 |
+
def simulate_circuit(self, qasm_code: str, shots: int = 1024) -> MCPResponse:
|
| 424 |
+
"""Simulate circuit and get measurement results. Maps to ui_simulate_circuit."""
|
| 425 |
+
return self._call("ui_simulate_circuit", qasm=qasm_code, shots=shots)
|
| 426 |
+
|
| 427 |
+
def get_statevector(self, qasm_code: str) -> MCPResponse:
|
| 428 |
+
"""Get the statevector of a circuit. Uses ui_simulate_circuit."""
|
| 429 |
+
result = self._call("ui_simulate_circuit", qasm=qasm_code, shots=1)
|
| 430 |
+
if result.success and result.data:
|
| 431 |
+
result.data = {'statevector_hint': 'Use simulation results for state info'}
|
| 432 |
+
return result
|
| 433 |
+
|
| 434 |
+
def get_probabilities(self, qasm_code: str) -> MCPResponse:
|
| 435 |
+
"""Get probability distribution from circuit. Uses ui_simulate_circuit."""
|
| 436 |
+
result = self._call("ui_simulate_circuit", qasm=qasm_code, shots=1024)
|
| 437 |
+
if result.success and result.data:
|
| 438 |
+
# Extract probabilities from histogram
|
| 439 |
+
result.endpoint = "get_probabilities"
|
| 440 |
+
return result
|
| 441 |
+
|
| 442 |
+
# ===== Scoring Endpoints =====
|
| 443 |
+
|
| 444 |
+
def calculate_complexity_score(self, qasm_code: str) -> MCPResponse:
|
| 445 |
+
"""Calculate circuit complexity score. Tries ui_score_circuit then fallback."""
|
| 446 |
+
result = self._call("ui_score_circuit", qasm=qasm_code, hardware="ibm_brisbane")
|
| 447 |
+
if result.success:
|
| 448 |
+
return result
|
| 449 |
+
|
| 450 |
+
# Fallback to local
|
| 451 |
+
start = time.perf_counter()
|
| 452 |
+
complexity = self._analyzer.calculate_complexity(qasm_code)
|
| 453 |
+
return self._fallback_response("calculate_complexity_score", complexity, start)
|
| 454 |
+
|
| 455 |
+
def calculate_hardware_fitness(self, qasm_code: str, hardware: str = "ibm_brisbane") -> MCPResponse:
|
| 456 |
+
"""Calculate hardware fitness score. Tries ui_score_circuit then fallback."""
|
| 457 |
+
result = self._call("ui_score_circuit", qasm=qasm_code, hardware=hardware)
|
| 458 |
+
if result.success:
|
| 459 |
+
return result
|
| 460 |
+
|
| 461 |
+
# Fallback to local
|
| 462 |
+
start = time.perf_counter()
|
| 463 |
+
fitness = self._analyzer.calculate_hardware_fitness(qasm_code, hardware)
|
| 464 |
+
return self._fallback_response("calculate_hardware_fitness", fitness, start)
|
| 465 |
+
|
| 466 |
+
def calculate_expressibility(self, qasm_code: str) -> MCPResponse:
|
| 467 |
+
"""Calculate circuit expressibility. Uses local fallback."""
|
| 468 |
+
start = time.perf_counter()
|
| 469 |
+
analysis = self._analyzer.analyze_circuit(qasm_code)
|
| 470 |
+
|
| 471 |
+
# Expressibility heuristic based on gate diversity and depth
|
| 472 |
+
gate_types = len(analysis['gate_breakdown'])
|
| 473 |
+
depth_factor = min(analysis['depth'] / 20.0, 1.0)
|
| 474 |
+
entangle_factor = min(analysis['two_qubit_gates'] / 5.0, 1.0)
|
| 475 |
+
|
| 476 |
+
expressibility = (gate_types * 0.3 + depth_factor * 0.35 + entangle_factor * 0.35) * 100
|
| 477 |
+
|
| 478 |
+
result = {
|
| 479 |
+
'expressibility_score': round(expressibility, 2),
|
| 480 |
+
'gate_diversity': gate_types,
|
| 481 |
+
'depth_factor': round(depth_factor, 2),
|
| 482 |
+
'entanglement_factor': round(entangle_factor, 2)
|
| 483 |
+
}
|
| 484 |
+
return self._fallback_response("calculate_expressibility", result, start)
|
| 485 |
+
|
| 486 |
+
# ===== Resource Estimation Endpoints (Fallback) =====
|
| 487 |
+
|
| 488 |
+
def estimate_resources(self, qasm_code: str) -> MCPResponse:
|
| 489 |
+
"""Estimate resource requirements. Uses local fallback."""
|
| 490 |
+
start = time.perf_counter()
|
| 491 |
+
analysis = self._analyzer.analyze_circuit(qasm_code)
|
| 492 |
+
|
| 493 |
+
result = {
|
| 494 |
+
'qubits_required': analysis['num_qubits'],
|
| 495 |
+
'classical_bits': analysis['num_classical_bits'],
|
| 496 |
+
'gate_count': analysis['gate_count'],
|
| 497 |
+
'depth': analysis['depth'],
|
| 498 |
+
'estimated_runtime_ms': analysis['depth'] * 0.1, # Rough estimate
|
| 499 |
+
'memory_footprint_bytes': analysis['num_qubits'] * 16 * (2 ** analysis['num_qubits'])
|
| 500 |
+
}
|
| 501 |
+
return self._fallback_response("estimate_resources", result, start)
|
| 502 |
+
|
| 503 |
+
def estimate_noise(self, qasm_code: str, hardware: str = "ibm_brisbane") -> MCPResponse:
|
| 504 |
+
"""Estimate noise impact on circuit. Uses local fallback."""
|
| 505 |
+
start = time.perf_counter()
|
| 506 |
+
analysis = self._analyzer.analyze_circuit(qasm_code)
|
| 507 |
+
|
| 508 |
+
# Noise profiles (simplified)
|
| 509 |
+
noise_rates = {
|
| 510 |
+
'ibm_brisbane': {'single_q': 0.001, 'two_q': 0.01, 'readout': 0.02},
|
| 511 |
+
'ibm_sherbrooke': {'single_q': 0.0008, 'two_q': 0.008, 'readout': 0.015},
|
| 512 |
+
'rigetti_aspen': {'single_q': 0.002, 'two_q': 0.02, 'readout': 0.03},
|
| 513 |
+
'ionq_harmony': {'single_q': 0.0003, 'two_q': 0.005, 'readout': 0.01}
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
rates = noise_rates.get(hardware, noise_rates['ibm_brisbane'])
|
| 517 |
+
|
| 518 |
+
single_q_error = analysis['single_qubit_gates'] * rates['single_q']
|
| 519 |
+
two_q_error = analysis['two_qubit_gates'] * rates['two_q']
|
| 520 |
+
readout_error = analysis['measurements'] * rates['readout']
|
| 521 |
+
total_error = 1 - (1 - single_q_error) * (1 - two_q_error) * (1 - readout_error)
|
| 522 |
+
|
| 523 |
+
result = {
|
| 524 |
+
'estimated_fidelity': round(1 - total_error, 4),
|
| 525 |
+
'single_qubit_error': round(single_q_error, 4),
|
| 526 |
+
'two_qubit_error': round(two_q_error, 4),
|
| 527 |
+
'readout_error': round(readout_error, 4),
|
| 528 |
+
'total_error_probability': round(total_error, 4),
|
| 529 |
+
'hardware': hardware
|
| 530 |
+
}
|
| 531 |
+
return self._fallback_response("estimate_noise", result, start)
|
| 532 |
+
|
| 533 |
+
# ===== Composition Endpoints (Fallback) =====
|
| 534 |
+
|
| 535 |
+
def compose_circuits(self, qasm1: str, qasm2: str, qubit_mapping: str = "") -> MCPResponse:
|
| 536 |
+
"""Compose two circuits sequentially. Uses local fallback."""
|
| 537 |
+
start = time.perf_counter()
|
| 538 |
+
|
| 539 |
+
# Parse both circuits
|
| 540 |
+
parsed1 = self._analyzer.parse_qasm(qasm1)
|
| 541 |
+
parsed2 = self._analyzer.parse_qasm(qasm2)
|
| 542 |
+
|
| 543 |
+
# Simple sequential composition
|
| 544 |
+
num_qubits = max(parsed1['num_qubits'], parsed2['num_qubits'])
|
| 545 |
+
|
| 546 |
+
lines = [
|
| 547 |
+
'OPENQASM 2.0;',
|
| 548 |
+
'include "qelib1.inc";',
|
| 549 |
+
f'qreg q[{num_qubits}];',
|
| 550 |
+
f'creg c[{num_qubits}];'
|
| 551 |
+
]
|
| 552 |
+
|
| 553 |
+
# Add gates from both circuits
|
| 554 |
+
for g in parsed1['gates']:
|
| 555 |
+
if g['gate'].lower() != 'measure':
|
| 556 |
+
lines.append(f"{g['raw']};")
|
| 557 |
+
for g in parsed2['gates']:
|
| 558 |
+
lines.append(f"{g['raw']};")
|
| 559 |
+
|
| 560 |
+
result = {'qasm': '\n'.join(lines)}
|
| 561 |
+
return self._fallback_response("compose_circuits", result, start)
|
| 562 |
+
|
| 563 |
+
def generate_inverse_circuit(self, qasm_code: str) -> MCPResponse:
|
| 564 |
+
"""Generate the inverse of a circuit. Uses local fallback."""
|
| 565 |
+
start = time.perf_counter()
|
| 566 |
+
parsed = self._analyzer.parse_qasm(qasm_code)
|
| 567 |
+
|
| 568 |
+
# Inverse gate mappings
|
| 569 |
+
inverse_map = {
|
| 570 |
+
'h': 'h', 'x': 'x', 'y': 'y', 'z': 'z',
|
| 571 |
+
's': 'sdg', 'sdg': 's', 't': 'tdg', 'tdg': 't',
|
| 572 |
+
'cx': 'cx', 'cz': 'cz', 'swap': 'swap'
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
lines = [
|
| 576 |
+
'OPENQASM 2.0;',
|
| 577 |
+
'include "qelib1.inc";',
|
| 578 |
+
f'qreg q[{parsed["num_qubits"]}];',
|
| 579 |
+
f'creg c[{parsed["num_classical"]}];'
|
| 580 |
+
]
|
| 581 |
+
|
| 582 |
+
# Reverse and invert gates
|
| 583 |
+
for g in reversed(parsed['gates']):
|
| 584 |
+
gate = g['gate'].lower()
|
| 585 |
+
if gate == 'measure':
|
| 586 |
+
continue
|
| 587 |
+
inv_gate = inverse_map.get(gate, gate)
|
| 588 |
+
# Handle parametric gates
|
| 589 |
+
if '(' in g['raw']:
|
| 590 |
+
# Negate angle for rotation gates
|
| 591 |
+
raw = g['raw'].replace(gate, inv_gate)
|
| 592 |
+
if 'rz' in gate or 'rx' in gate or 'ry' in gate:
|
| 593 |
+
# Simple negation (not perfect)
|
| 594 |
+
pass
|
| 595 |
+
lines.append(f"{raw};")
|
| 596 |
+
else:
|
| 597 |
+
raw = g['raw'].replace(gate, inv_gate)
|
| 598 |
+
lines.append(f"{raw};")
|
| 599 |
+
|
| 600 |
+
result = {'qasm': '\n'.join(lines)}
|
| 601 |
+
return self._fallback_response("generate_inverse_circuit", result, start)
|
| 602 |
+
|
| 603 |
+
def tensor_circuits(self, qasm1: str, qasm2: str) -> MCPResponse:
|
| 604 |
+
"""Tensor product of two circuits. Uses local fallback."""
|
| 605 |
+
start = time.perf_counter()
|
| 606 |
+
|
| 607 |
+
parsed1 = self._analyzer.parse_qasm(qasm1)
|
| 608 |
+
parsed2 = self._analyzer.parse_qasm(qasm2)
|
| 609 |
+
|
| 610 |
+
total_qubits = parsed1['num_qubits'] + parsed2['num_qubits']
|
| 611 |
+
offset = parsed1['num_qubits']
|
| 612 |
+
|
| 613 |
+
lines = [
|
| 614 |
+
'OPENQASM 2.0;',
|
| 615 |
+
'include "qelib1.inc";',
|
| 616 |
+
f'qreg q[{total_qubits}];',
|
| 617 |
+
f'creg c[{total_qubits}];'
|
| 618 |
+
]
|
| 619 |
+
|
| 620 |
+
# Add gates from first circuit
|
| 621 |
+
for g in parsed1['gates']:
|
| 622 |
+
lines.append(f"{g['raw']};")
|
| 623 |
+
|
| 624 |
+
# Add gates from second circuit with offset
|
| 625 |
+
for g in parsed2['gates']:
|
| 626 |
+
raw = g['raw']
|
| 627 |
+
# Offset qubit indices
|
| 628 |
+
for i in range(parsed2['num_qubits'] - 1, -1, -1):
|
| 629 |
+
raw = raw.replace(f'q[{i}]', f'q[{i + offset}]')
|
| 630 |
+
lines.append(f"{raw};")
|
| 631 |
+
|
| 632 |
+
result = {'qasm': '\n'.join(lines)}
|
| 633 |
+
return self._fallback_response("tensor_circuits", result, start)
|
| 634 |
+
|
| 635 |
+
def repeat_circuit(self, qasm_code: str, n: int) -> MCPResponse:
|
| 636 |
+
"""Repeat a circuit n times. Uses local fallback."""
|
| 637 |
+
start = time.perf_counter()
|
| 638 |
+
parsed = self._analyzer.parse_qasm(qasm_code)
|
| 639 |
+
|
| 640 |
+
lines = [
|
| 641 |
+
'OPENQASM 2.0;',
|
| 642 |
+
'include "qelib1.inc";',
|
| 643 |
+
f'qreg q[{parsed["num_qubits"]}];',
|
| 644 |
+
f'creg c[{parsed["num_classical"]}];'
|
| 645 |
+
]
|
| 646 |
+
|
| 647 |
+
# Repeat non-measure gates n times
|
| 648 |
+
for _ in range(n):
|
| 649 |
+
for g in parsed['gates']:
|
| 650 |
+
if g['gate'].lower() != 'measure':
|
| 651 |
+
lines.append(f"{g['raw']};")
|
| 652 |
+
|
| 653 |
+
# Add measurements at end
|
| 654 |
+
for g in parsed['gates']:
|
| 655 |
+
if g['gate'].lower() == 'measure':
|
| 656 |
+
lines.append(f"{g['raw']};")
|
| 657 |
+
break
|
| 658 |
+
|
| 659 |
+
result = {'qasm': '\n'.join(lines)}
|
| 660 |
+
return self._fallback_response("repeat_circuit", result, start)
|
| 661 |
+
|
| 662 |
+
# ===== Utility Endpoints =====
|
| 663 |
+
|
| 664 |
+
def list_templates(self) -> MCPResponse:
|
| 665 |
+
"""List available circuit templates."""
|
| 666 |
+
start = time.perf_counter()
|
| 667 |
+
templates = [
|
| 668 |
+
'bell_state', 'ghz_state', 'w_state', 'superposition',
|
| 669 |
+
'qft', 'grover', 'vqe', 'qaoa'
|
| 670 |
+
]
|
| 671 |
+
return self._fallback_response("list_templates", {'templates': templates}, start)
|
| 672 |
+
|
| 673 |
+
def list_hardware_profiles(self) -> MCPResponse:
|
| 674 |
+
"""List available hardware profiles."""
|
| 675 |
+
start = time.perf_counter()
|
| 676 |
+
profiles = ['ibm_brisbane', 'ibm_sherbrooke', 'rigetti_aspen', 'ionq_harmony']
|
| 677 |
+
return self._fallback_response("list_hardware_profiles", {'profiles': profiles}, start)
|
| 678 |
+
|
| 679 |
+
|
| 680 |
+
# Singleton client instance
|
| 681 |
+
_client: Optional[MCPClient] = None
|
| 682 |
+
|
| 683 |
+
|
| 684 |
+
def get_client(base_url: Optional[str] = None) -> MCPClient:
|
| 685 |
+
"""
|
| 686 |
+
Get or create the MCP client singleton.
|
| 687 |
+
|
| 688 |
+
Args:
|
| 689 |
+
base_url: Optional URL override. If None, checks MCP_SERVER_URL env var,
|
| 690 |
+
then defaults to http://127.0.0.1:7861
|
| 691 |
+
"""
|
| 692 |
+
global _client
|
| 693 |
+
if _client is None:
|
| 694 |
+
if base_url is None:
|
| 695 |
+
import os
|
| 696 |
+
base_url = os.environ.get("MCP_SERVER_URL", "http://127.0.0.1:7861")
|
| 697 |
+
_client = MCPClient(base_url)
|
| 698 |
+
return _client
|
config.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
QAgents-Workflows: Configuration
|
| 3 |
+
Central configuration for the multi-agent quantum circuit optimization system.
|
| 4 |
+
|
| 5 |
+
Path: QAgents-workflos/config.py
|
| 6 |
+
Related: agents/llm_adapter.py (uses GEMINI_MODELS for fallback cascade)
|
| 7 |
+
run_evaluation.py (uses config for evaluation settings)
|
| 8 |
+
workflows/workflow_definitions.py (references rate limits)
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from dataclasses import dataclass, field
|
| 13 |
+
from typing import Optional, List, Dict
|
| 14 |
+
import os
|
| 15 |
+
|
| 16 |
+
# Paths
|
| 17 |
+
PROJECT_ROOT = Path(__file__).parent
|
| 18 |
+
QUANTUM_MCP_ROOT = PROJECT_ROOT.parent / "QuantumArchitect-MCP"
|
| 19 |
+
|
| 20 |
+
# =============================================================================
|
| 21 |
+
# GEMINI MODEL CASCADE (sorted by RPD - highest to lowest for optimal fallback)
|
| 22 |
+
# =============================================================================
|
| 23 |
+
# When a model hits rate limits (RPM/RPD), fallback to next model in list.
|
| 24 |
+
# Free tier limits (as of 2025):
|
| 25 |
+
# - Gemma 3: 30 RPM, 15K TPM, 14,400 RPD (HIGHEST availability)
|
| 26 |
+
# - Flash-Lite: 15 RPM, 250K TPM, 1,000 RPD
|
| 27 |
+
# - Flash 2.5: 10 RPM, 250K TPM, 250 RPD
|
| 28 |
+
# - Flash 2.0: 15 RPM, 1M TPM, 200 RPD
|
| 29 |
+
# - Flash 2.0 Lite: 30 RPM, 1M TPM, 200 RPD
|
| 30 |
+
# - Pro 2.5: 2 RPM, 125K TPM, 50 RPD (LOWEST availability)
|
| 31 |
+
#
|
| 32 |
+
# EXPECTED REQUESTS PER EVALUATION (9 problems):
|
| 33 |
+
# - Naked mode: 0 LLM calls (direct MCP only)
|
| 34 |
+
# - Guided mode: ~36 LLM calls (4 per problem)
|
| 35 |
+
# - Blackboard: ~72-108 LLM calls (8-12 per problem)
|
| 36 |
+
# =============================================================================
|
| 37 |
+
|
| 38 |
+
GEMINI_MODELS: List[Dict] = [
|
| 39 |
+
# Highest RPD - most available (14,400/day = 10/min continuously)
|
| 40 |
+
{
|
| 41 |
+
"name": "gemma-3-27b-it",
|
| 42 |
+
"rpm": 30,
|
| 43 |
+
"tpm": 15_000,
|
| 44 |
+
"rpd": 14_400,
|
| 45 |
+
"priority": 1,
|
| 46 |
+
"notes": "Best for high-volume, may have lower quality than Flash"
|
| 47 |
+
},
|
| 48 |
+
# Good balance - default model (1,000/day)
|
| 49 |
+
{
|
| 50 |
+
"name": "gemini-2.5-flash-lite",
|
| 51 |
+
"rpm": 15,
|
| 52 |
+
"tpm": 250_000,
|
| 53 |
+
"rpd": 1_000,
|
| 54 |
+
"priority": 2,
|
| 55 |
+
"notes": "Good balance of quality and availability - DEFAULT"
|
| 56 |
+
},
|
| 57 |
+
# Higher quality - moderate availability (250/day)
|
| 58 |
+
{
|
| 59 |
+
"name": "gemini-2.5-flash",
|
| 60 |
+
"rpm": 10,
|
| 61 |
+
"tpm": 250_000,
|
| 62 |
+
"rpd": 250,
|
| 63 |
+
"priority": 3,
|
| 64 |
+
"notes": "Better quality, lower availability"
|
| 65 |
+
},
|
| 66 |
+
# High TPM for long contexts (200/day)
|
| 67 |
+
{
|
| 68 |
+
"name": "gemini-2.0-flash",
|
| 69 |
+
"rpm": 15,
|
| 70 |
+
"tpm": 1_000_000,
|
| 71 |
+
"rpd": 200,
|
| 72 |
+
"priority": 4,
|
| 73 |
+
"notes": "Good for long contexts, moderate availability"
|
| 74 |
+
},
|
| 75 |
+
# Fast variant (200/day)
|
| 76 |
+
{
|
| 77 |
+
"name": "gemini-2.0-flash-lite",
|
| 78 |
+
"rpm": 30,
|
| 79 |
+
"tpm": 1_000_000,
|
| 80 |
+
"rpd": 200,
|
| 81 |
+
"priority": 5,
|
| 82 |
+
"notes": "Fast responses, lower availability"
|
| 83 |
+
},
|
| 84 |
+
# Lowest RPD - highest quality, use sparingly (50/day)
|
| 85 |
+
{
|
| 86 |
+
"name": "gemini-2.5-pro",
|
| 87 |
+
"rpm": 2,
|
| 88 |
+
"tpm": 125_000,
|
| 89 |
+
"rpd": 50,
|
| 90 |
+
"priority": 6,
|
| 91 |
+
"notes": "Highest quality, use sparingly - LAST RESORT"
|
| 92 |
+
},
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
def get_model_by_priority(priority: int = 1) -> Optional[Dict]:
|
| 96 |
+
"""Get model config by priority (1=highest RPD)."""
|
| 97 |
+
for model in GEMINI_MODELS:
|
| 98 |
+
if model["priority"] == priority:
|
| 99 |
+
return model
|
| 100 |
+
return None
|
| 101 |
+
|
| 102 |
+
def get_next_model(current_name: str) -> Optional[Dict]:
|
| 103 |
+
"""Get next model in fallback chain."""
|
| 104 |
+
for i, model in enumerate(GEMINI_MODELS):
|
| 105 |
+
if model["name"] == current_name:
|
| 106 |
+
if i + 1 < len(GEMINI_MODELS):
|
| 107 |
+
return GEMINI_MODELS[i + 1]
|
| 108 |
+
return None
|
| 109 |
+
|
| 110 |
+
def get_model_config(model_name: str) -> Optional[Dict]:
|
| 111 |
+
"""Get model config by name."""
|
| 112 |
+
for model in GEMINI_MODELS:
|
| 113 |
+
if model["name"] == model_name:
|
| 114 |
+
return model
|
| 115 |
+
return None
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
@dataclass
|
| 119 |
+
class MCPConfig:
|
| 120 |
+
"""MCP Server configuration."""
|
| 121 |
+
host: str = "127.0.0.1"
|
| 122 |
+
port: int = 7861
|
| 123 |
+
base_url: str = field(init=False)
|
| 124 |
+
|
| 125 |
+
def __post_init__(self):
|
| 126 |
+
self.base_url = f"http://{self.host}:{self.port}"
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
@dataclass
|
| 130 |
+
class RateLimitConfig:
|
| 131 |
+
"""Rate limiting based on Gemini API free tier limits."""
|
| 132 |
+
# Default to gemini-2.5-flash-lite limits
|
| 133 |
+
rpm_limit: int = 15 # Requests per minute
|
| 134 |
+
tpm_limit: int = 250_000 # Tokens per minute
|
| 135 |
+
rpd_limit: int = 1_000 # Requests per day
|
| 136 |
+
|
| 137 |
+
# Conservative buffer (80% of limit = 12 RPM effective)
|
| 138 |
+
rpm_buffer: float = 0.8
|
| 139 |
+
|
| 140 |
+
@property
|
| 141 |
+
def min_request_interval(self) -> float:
|
| 142 |
+
"""Minimum seconds between requests: 60 / (15 * 0.8) = 5 seconds."""
|
| 143 |
+
return 60.0 / (self.rpm_limit * self.rpm_buffer)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
@dataclass
|
| 147 |
+
class LLMConfig:
|
| 148 |
+
"""LLM configuration for agents - model agnostic via Gemini and LiteLLM.
|
| 149 |
+
|
| 150 |
+
Environment Variables (HuggingFace Space compatible):
|
| 151 |
+
- LLM_PROVIDER: Provider name (gemini, openai, anthropic, groq, ollama). Default: "gemini"
|
| 152 |
+
- LLM_MODEL: Model identifier. Default: "gemini-2.5-flash-lite"
|
| 153 |
+
- GOOGLE_API_KEY: Gemini API key (Gemini provider)
|
| 154 |
+
- GENAI_API_KEY: Alternative Gemini API key (fallback)
|
| 155 |
+
- OPENAI_API_KEY: OpenAI API key (OpenAI provider)
|
| 156 |
+
- ANTHROPIC_API_KEY: Anthropic API key (Anthropic provider)
|
| 157 |
+
- GROQ_API_KEY: Groq API key (Groq provider)
|
| 158 |
+
"""
|
| 159 |
+
# Provider options: gemini, openai, anthropic, groq, ollama, etc.
|
| 160 |
+
# Reads from LLM_PROVIDER env var, falls back to "gemini"
|
| 161 |
+
provider: str = field(default_factory=lambda: os.getenv("LLM_PROVIDER", "gemini"))
|
| 162 |
+
# Model identifier - reads from LLM_MODEL env var, falls back to "gemini-2.5-flash-lite"
|
| 163 |
+
model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gemini-2.5-flash-lite"))
|
| 164 |
+
# API key - tries GOOGLE_API_KEY first (Gemini), then GENAI_API_KEY as fallback
|
| 165 |
+
api_key: Optional[str] = field(default_factory=lambda: os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY"))
|
| 166 |
+
temperature: float = 0.2
|
| 167 |
+
max_tokens: int = 2000
|
| 168 |
+
|
| 169 |
+
# Rate limiting
|
| 170 |
+
rate_limit: RateLimitConfig = field(default_factory=RateLimitConfig)
|
| 171 |
+
enable_rate_limiting: bool = True # Set to False to disable
|
| 172 |
+
|
| 173 |
+
# Multi-model fallback
|
| 174 |
+
enable_fallback: bool = True # Enable automatic model switching on rate limit
|
| 175 |
+
fallback_on_error: bool = True # Also fallback on API errors
|
| 176 |
+
|
| 177 |
+
@property
|
| 178 |
+
def model_string(self) -> str:
|
| 179 |
+
"""Get full model string for API calls."""
|
| 180 |
+
if self.provider in ["gemini"]:
|
| 181 |
+
return self.model
|
| 182 |
+
else:
|
| 183 |
+
# LiteLLM format: provider/model
|
| 184 |
+
return f"{self.provider}/{self.model}"
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
@dataclass
|
| 188 |
+
class DatabaseConfig:
|
| 189 |
+
"""Database/storage configuration."""
|
| 190 |
+
db_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "data")
|
| 191 |
+
log_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "logs")
|
| 192 |
+
memory_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "memory")
|
| 193 |
+
|
| 194 |
+
def __post_init__(self):
|
| 195 |
+
# Ensure directories exist
|
| 196 |
+
for path in [self.db_path, self.log_path, self.memory_path]:
|
| 197 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
@dataclass
|
| 201 |
+
class CostTrackingConfig:
|
| 202 |
+
"""Cost and usage tracking configuration."""
|
| 203 |
+
enabled: bool = True
|
| 204 |
+
track_requests: bool = True
|
| 205 |
+
track_tokens: bool = True
|
| 206 |
+
track_time: bool = True
|
| 207 |
+
|
| 208 |
+
# Usage counters (reset daily in production)
|
| 209 |
+
total_requests: int = 0
|
| 210 |
+
total_tokens: int = 0
|
| 211 |
+
total_time_ms: float = 0.0
|
| 212 |
+
|
| 213 |
+
# Per-model tracking
|
| 214 |
+
model_usage: Dict[str, Dict] = field(default_factory=dict)
|
| 215 |
+
|
| 216 |
+
def record_request(self, model: str, tokens: int, time_ms: float):
|
| 217 |
+
"""Record a request for cost tracking."""
|
| 218 |
+
if not self.enabled:
|
| 219 |
+
return
|
| 220 |
+
|
| 221 |
+
self.total_requests += 1
|
| 222 |
+
self.total_tokens += tokens
|
| 223 |
+
self.total_time_ms += time_ms
|
| 224 |
+
|
| 225 |
+
if model not in self.model_usage:
|
| 226 |
+
self.model_usage[model] = {"requests": 0, "tokens": 0, "time_ms": 0.0}
|
| 227 |
+
|
| 228 |
+
self.model_usage[model]["requests"] += 1
|
| 229 |
+
self.model_usage[model]["tokens"] += tokens
|
| 230 |
+
self.model_usage[model]["time_ms"] += time_ms
|
| 231 |
+
|
| 232 |
+
def get_summary(self) -> Dict:
|
| 233 |
+
"""Get cost tracking summary."""
|
| 234 |
+
return {
|
| 235 |
+
"total_requests": self.total_requests,
|
| 236 |
+
"total_tokens": self.total_tokens,
|
| 237 |
+
"total_time_ms": self.total_time_ms,
|
| 238 |
+
"avg_time_per_request": self.total_time_ms / max(1, self.total_requests),
|
| 239 |
+
"model_breakdown": self.model_usage.copy()
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
def reset(self):
|
| 243 |
+
"""Reset all counters."""
|
| 244 |
+
self.total_requests = 0
|
| 245 |
+
self.total_tokens = 0
|
| 246 |
+
self.total_time_ms = 0.0
|
| 247 |
+
self.model_usage = {}
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
@dataclass
|
| 251 |
+
class EvaluationConfig:
|
| 252 |
+
"""Evaluation settings."""
|
| 253 |
+
num_runs: int = 5 # Number of runs per problem for reliability
|
| 254 |
+
timeout_seconds: float = 120.0 # Max time per problem
|
| 255 |
+
save_results: bool = True
|
| 256 |
+
|
| 257 |
+
# Cost tracking for evaluation
|
| 258 |
+
cost_tracking: CostTrackingConfig = field(default_factory=CostTrackingConfig)
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
@dataclass
|
| 262 |
+
class SystemConfig:
|
| 263 |
+
"""Master configuration."""
|
| 264 |
+
mcp: MCPConfig = field(default_factory=MCPConfig)
|
| 265 |
+
llm: LLMConfig = field(default_factory=LLMConfig)
|
| 266 |
+
database: DatabaseConfig = field(default_factory=DatabaseConfig)
|
| 267 |
+
evaluation: EvaluationConfig = field(default_factory=EvaluationConfig)
|
| 268 |
+
|
| 269 |
+
# System mode: "blackboard", "guided", or "naked"
|
| 270 |
+
active_mode: str = "guided"
|
| 271 |
+
|
| 272 |
+
# Debug settings
|
| 273 |
+
verbose: bool = True
|
| 274 |
+
log_level: str = "INFO"
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
# Global config instance
|
| 278 |
+
config = SystemConfig()
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def set_mode(mode: str):
|
| 282 |
+
"""Switch between blackboard, guided, and naked modes."""
|
| 283 |
+
if mode not in ("blackboard", "guided", "naked"):
|
| 284 |
+
raise ValueError(f"Invalid mode: {mode}. Use 'blackboard', 'guided', or 'naked'")
|
| 285 |
+
config.active_mode = mode
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def get_mode() -> str:
|
| 289 |
+
"""Get current system mode."""
|
| 290 |
+
return config.active_mode
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def set_api_key(api_key: str):
|
| 294 |
+
"""Set the API key for LLM calls."""
|
| 295 |
+
config.llm.api_key = api_key
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def get_cost_summary() -> Dict:
|
| 299 |
+
"""Get the current cost tracking summary."""
|
| 300 |
+
return config.evaluation.cost_tracking.get_summary()
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
def reset_cost_tracking():
|
| 304 |
+
"""Reset cost tracking counters."""
|
| 305 |
+
config.evaluation.cost_tracking.reset()
|
database/__init__.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/database/__init__.py
|
| 2 |
+
# Purpose: Database module exports for storage, logging, memory, and circuit quality
|
| 3 |
+
# Relations: Provides unified access to all database functionality
|
| 4 |
+
|
| 5 |
+
"""Database module for storage, logging, memory, and circuit quality tracking."""
|
| 6 |
+
|
| 7 |
+
from .storage import (
|
| 8 |
+
Database,
|
| 9 |
+
MemoryType,
|
| 10 |
+
MemoryEntry,
|
| 11 |
+
LogEntry,
|
| 12 |
+
ResultEntry,
|
| 13 |
+
get_database
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
from .circuit_quality_db import (
|
| 17 |
+
CircuitQualityDB,
|
| 18 |
+
CircuitEvaluation,
|
| 19 |
+
QualityMetrics,
|
| 20 |
+
get_quality_db
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
__all__ = [
|
| 24 |
+
# Original storage
|
| 25 |
+
"Database",
|
| 26 |
+
"MemoryType",
|
| 27 |
+
"MemoryEntry",
|
| 28 |
+
"LogEntry",
|
| 29 |
+
"ResultEntry",
|
| 30 |
+
"get_database",
|
| 31 |
+
# Quality tracking (NEW)
|
| 32 |
+
"CircuitQualityDB",
|
| 33 |
+
"CircuitEvaluation",
|
| 34 |
+
"QualityMetrics",
|
| 35 |
+
"get_quality_db"
|
| 36 |
+
]
|
database/circuit_quality_db.py
ADDED
|
@@ -0,0 +1,414 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/database/circuit_quality_db.py
|
| 2 |
+
# Relations: Uses database/storage.py pattern, connects to MCP via client/
|
| 3 |
+
# Description: SQLite database for storing QASM circuits and quality metrics
|
| 4 |
+
# Enables circuit comparison across orchestration modes
|
| 5 |
+
# Tracks circuit_qasm text + all quality measurements
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Circuit Quality Database: Store and compare quantum circuits with quality metrics.
|
| 9 |
+
Stores actual QASM code for later analysis and comparison between modes.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import sqlite3
|
| 13 |
+
import json
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 17 |
+
from dataclasses import dataclass, field, asdict
|
| 18 |
+
import logging
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class QualityMetrics:
|
| 25 |
+
"""Quality metrics for a circuit."""
|
| 26 |
+
depth: int = 0
|
| 27 |
+
gate_count: int = 0
|
| 28 |
+
cx_count: int = 0
|
| 29 |
+
single_qubit_count: int = 0
|
| 30 |
+
hardware_fitness: float = 0.0
|
| 31 |
+
syntax_valid: bool = False
|
| 32 |
+
state_correctness: float = 0.0
|
| 33 |
+
complexity_score: float = 0.0
|
| 34 |
+
noise_estimate: float = 0.0
|
| 35 |
+
|
| 36 |
+
def overall_score(self) -> float:
|
| 37 |
+
"""Calculate overall quality score (higher is better, 0-100)."""
|
| 38 |
+
score = 0.0
|
| 39 |
+
# Syntax: 20 points
|
| 40 |
+
score += 20.0 if self.syntax_valid else 0.0
|
| 41 |
+
# Hardware fitness: 20 points
|
| 42 |
+
score += 20.0 * min(self.hardware_fitness, 1.0)
|
| 43 |
+
# State correctness: 30 points
|
| 44 |
+
score += 30.0 * self.state_correctness
|
| 45 |
+
# Efficiency (lower depth/gates better): 15 points
|
| 46 |
+
if self.gate_count > 0:
|
| 47 |
+
efficiency = max(0, 1 - (self.depth / max(self.gate_count, 1)) / 10)
|
| 48 |
+
score += 15.0 * efficiency
|
| 49 |
+
# Lower CX count bonus: 15 points
|
| 50 |
+
if self.gate_count > 0:
|
| 51 |
+
cx_ratio = self.cx_count / max(self.gate_count, 1)
|
| 52 |
+
score += 15.0 * (1 - min(cx_ratio, 1.0))
|
| 53 |
+
return round(score, 2)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@dataclass
|
| 57 |
+
class CircuitEvaluation:
|
| 58 |
+
"""Complete evaluation record with QASM and quality."""
|
| 59 |
+
id: Optional[int] = None
|
| 60 |
+
run_id: str = ""
|
| 61 |
+
timestamp: str = ""
|
| 62 |
+
problem_id: str = ""
|
| 63 |
+
problem_goal: str = ""
|
| 64 |
+
mode: str = "" # naked, guided, blackboard
|
| 65 |
+
qasm_code: str = "" # FULL QASM text stored
|
| 66 |
+
success: bool = False
|
| 67 |
+
execution_time_ms: float = 0.0
|
| 68 |
+
llm_requests: int = 0
|
| 69 |
+
tokens_used: int = 0
|
| 70 |
+
quality_metrics: QualityMetrics = field(default_factory=QualityMetrics)
|
| 71 |
+
errors: List[str] = field(default_factory=list)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class CircuitQualityDB:
|
| 75 |
+
"""
|
| 76 |
+
SQLite database for storing circuits and quality metrics.
|
| 77 |
+
Primary purpose: Enable quality comparison across modes.
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
def __init__(self, db_path: Optional[Path] = None):
|
| 81 |
+
if db_path is None:
|
| 82 |
+
db_path = Path(__file__).parent / "data"
|
| 83 |
+
self.db_path = Path(db_path)
|
| 84 |
+
self.db_path.mkdir(parents=True, exist_ok=True)
|
| 85 |
+
self.db_file = self.db_path / "circuit_quality.db"
|
| 86 |
+
self._init_db()
|
| 87 |
+
|
| 88 |
+
def _init_db(self):
|
| 89 |
+
"""Initialize database tables."""
|
| 90 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 91 |
+
conn.executescript("""
|
| 92 |
+
-- Main table: stores full QASM and evaluation metadata
|
| 93 |
+
CREATE TABLE IF NOT EXISTS circuit_evaluations (
|
| 94 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 95 |
+
run_id TEXT NOT NULL,
|
| 96 |
+
timestamp TEXT NOT NULL,
|
| 97 |
+
problem_id TEXT NOT NULL,
|
| 98 |
+
problem_goal TEXT,
|
| 99 |
+
mode TEXT NOT NULL,
|
| 100 |
+
qasm_code TEXT,
|
| 101 |
+
success INTEGER NOT NULL,
|
| 102 |
+
execution_time_ms REAL,
|
| 103 |
+
llm_requests INTEGER DEFAULT 0,
|
| 104 |
+
tokens_used INTEGER DEFAULT 0,
|
| 105 |
+
errors TEXT
|
| 106 |
+
);
|
| 107 |
+
|
| 108 |
+
-- Quality metrics table: detailed quality measurements
|
| 109 |
+
CREATE TABLE IF NOT EXISTS quality_metrics (
|
| 110 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 111 |
+
eval_id INTEGER NOT NULL,
|
| 112 |
+
depth INTEGER DEFAULT 0,
|
| 113 |
+
gate_count INTEGER DEFAULT 0,
|
| 114 |
+
cx_count INTEGER DEFAULT 0,
|
| 115 |
+
single_qubit_count INTEGER DEFAULT 0,
|
| 116 |
+
hardware_fitness REAL DEFAULT 0.0,
|
| 117 |
+
syntax_valid INTEGER DEFAULT 0,
|
| 118 |
+
state_correctness REAL DEFAULT 0.0,
|
| 119 |
+
complexity_score REAL DEFAULT 0.0,
|
| 120 |
+
noise_estimate REAL DEFAULT 0.0,
|
| 121 |
+
overall_score REAL DEFAULT 0.0,
|
| 122 |
+
FOREIGN KEY (eval_id) REFERENCES circuit_evaluations(id)
|
| 123 |
+
);
|
| 124 |
+
|
| 125 |
+
-- Comparison runs: group multiple evaluations
|
| 126 |
+
CREATE TABLE IF NOT EXISTS comparison_runs (
|
| 127 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 128 |
+
run_id TEXT UNIQUE NOT NULL,
|
| 129 |
+
timestamp TEXT NOT NULL,
|
| 130 |
+
description TEXT,
|
| 131 |
+
num_problems INTEGER DEFAULT 0,
|
| 132 |
+
modes_tested TEXT,
|
| 133 |
+
summary TEXT
|
| 134 |
+
);
|
| 135 |
+
|
| 136 |
+
-- Create indexes for fast queries
|
| 137 |
+
CREATE INDEX IF NOT EXISTS idx_eval_run_id ON circuit_evaluations(run_id);
|
| 138 |
+
CREATE INDEX IF NOT EXISTS idx_eval_problem ON circuit_evaluations(problem_id);
|
| 139 |
+
CREATE INDEX IF NOT EXISTS idx_eval_mode ON circuit_evaluations(mode);
|
| 140 |
+
""")
|
| 141 |
+
conn.commit()
|
| 142 |
+
|
| 143 |
+
def save_evaluation(self, eval: CircuitEvaluation) -> int:
|
| 144 |
+
"""Save a circuit evaluation with quality metrics. Returns eval ID."""
|
| 145 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 146 |
+
cursor = conn.cursor()
|
| 147 |
+
|
| 148 |
+
# Insert main evaluation record
|
| 149 |
+
cursor.execute("""
|
| 150 |
+
INSERT INTO circuit_evaluations
|
| 151 |
+
(run_id, timestamp, problem_id, problem_goal, mode, qasm_code,
|
| 152 |
+
success, execution_time_ms, llm_requests, tokens_used, errors)
|
| 153 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 154 |
+
""", (
|
| 155 |
+
eval.run_id,
|
| 156 |
+
eval.timestamp or datetime.now().isoformat(),
|
| 157 |
+
eval.problem_id,
|
| 158 |
+
eval.problem_goal,
|
| 159 |
+
eval.mode,
|
| 160 |
+
eval.qasm_code, # FULL QASM stored here
|
| 161 |
+
1 if eval.success else 0,
|
| 162 |
+
eval.execution_time_ms,
|
| 163 |
+
eval.llm_requests,
|
| 164 |
+
eval.tokens_used,
|
| 165 |
+
json.dumps(eval.errors)
|
| 166 |
+
))
|
| 167 |
+
eval_id = cursor.lastrowid
|
| 168 |
+
|
| 169 |
+
# Insert quality metrics
|
| 170 |
+
metrics = eval.quality_metrics
|
| 171 |
+
cursor.execute("""
|
| 172 |
+
INSERT INTO quality_metrics
|
| 173 |
+
(eval_id, depth, gate_count, cx_count, single_qubit_count,
|
| 174 |
+
hardware_fitness, syntax_valid, state_correctness,
|
| 175 |
+
complexity_score, noise_estimate, overall_score)
|
| 176 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 177 |
+
""", (
|
| 178 |
+
eval_id,
|
| 179 |
+
metrics.depth,
|
| 180 |
+
metrics.gate_count,
|
| 181 |
+
metrics.cx_count,
|
| 182 |
+
metrics.single_qubit_count,
|
| 183 |
+
metrics.hardware_fitness,
|
| 184 |
+
1 if metrics.syntax_valid else 0,
|
| 185 |
+
metrics.state_correctness,
|
| 186 |
+
metrics.complexity_score,
|
| 187 |
+
metrics.noise_estimate,
|
| 188 |
+
metrics.overall_score()
|
| 189 |
+
))
|
| 190 |
+
|
| 191 |
+
conn.commit()
|
| 192 |
+
logger.info(f"Saved evaluation {eval_id} for {eval.problem_id}/{eval.mode}")
|
| 193 |
+
return eval_id
|
| 194 |
+
|
| 195 |
+
def save_comparison_run(self, run_id: str, description: str,
|
| 196 |
+
num_problems: int, modes: List[str], summary: Dict) -> None:
|
| 197 |
+
"""Save a comparison run record."""
|
| 198 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 199 |
+
conn.execute("""
|
| 200 |
+
INSERT OR REPLACE INTO comparison_runs
|
| 201 |
+
(run_id, timestamp, description, num_problems, modes_tested, summary)
|
| 202 |
+
VALUES (?, ?, ?, ?, ?, ?)
|
| 203 |
+
""", (
|
| 204 |
+
run_id,
|
| 205 |
+
datetime.now().isoformat(),
|
| 206 |
+
description,
|
| 207 |
+
num_problems,
|
| 208 |
+
json.dumps(modes),
|
| 209 |
+
json.dumps(summary)
|
| 210 |
+
))
|
| 211 |
+
conn.commit()
|
| 212 |
+
|
| 213 |
+
def get_evaluations(self, problem_id: Optional[str] = None,
|
| 214 |
+
mode: Optional[str] = None,
|
| 215 |
+
run_id: Optional[str] = None,
|
| 216 |
+
limit: int = 100) -> List[CircuitEvaluation]:
|
| 217 |
+
"""Get evaluations with optional filters."""
|
| 218 |
+
query = """
|
| 219 |
+
SELECT e.*, q.depth, q.gate_count, q.cx_count, q.single_qubit_count,
|
| 220 |
+
q.hardware_fitness, q.syntax_valid, q.state_correctness,
|
| 221 |
+
q.complexity_score, q.noise_estimate, q.overall_score
|
| 222 |
+
FROM circuit_evaluations e
|
| 223 |
+
LEFT JOIN quality_metrics q ON e.id = q.eval_id
|
| 224 |
+
WHERE 1=1
|
| 225 |
+
"""
|
| 226 |
+
params = []
|
| 227 |
+
|
| 228 |
+
if problem_id:
|
| 229 |
+
query += " AND e.problem_id = ?"
|
| 230 |
+
params.append(problem_id)
|
| 231 |
+
if mode:
|
| 232 |
+
query += " AND e.mode = ?"
|
| 233 |
+
params.append(mode)
|
| 234 |
+
if run_id:
|
| 235 |
+
query += " AND e.run_id = ?"
|
| 236 |
+
params.append(run_id)
|
| 237 |
+
|
| 238 |
+
query += " ORDER BY e.timestamp DESC LIMIT ?"
|
| 239 |
+
params.append(limit)
|
| 240 |
+
|
| 241 |
+
evaluations = []
|
| 242 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 243 |
+
conn.row_factory = sqlite3.Row
|
| 244 |
+
cursor = conn.execute(query, params)
|
| 245 |
+
|
| 246 |
+
for row in cursor:
|
| 247 |
+
metrics = QualityMetrics(
|
| 248 |
+
depth=row['depth'] or 0,
|
| 249 |
+
gate_count=row['gate_count'] or 0,
|
| 250 |
+
cx_count=row['cx_count'] or 0,
|
| 251 |
+
single_qubit_count=row['single_qubit_count'] or 0,
|
| 252 |
+
hardware_fitness=row['hardware_fitness'] or 0.0,
|
| 253 |
+
syntax_valid=bool(row['syntax_valid']),
|
| 254 |
+
state_correctness=row['state_correctness'] or 0.0,
|
| 255 |
+
complexity_score=row['complexity_score'] or 0.0,
|
| 256 |
+
noise_estimate=row['noise_estimate'] or 0.0
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
eval = CircuitEvaluation(
|
| 260 |
+
id=row['id'],
|
| 261 |
+
run_id=row['run_id'],
|
| 262 |
+
timestamp=row['timestamp'],
|
| 263 |
+
problem_id=row['problem_id'],
|
| 264 |
+
problem_goal=row['problem_goal'] or "",
|
| 265 |
+
mode=row['mode'],
|
| 266 |
+
qasm_code=row['qasm_code'] or "",
|
| 267 |
+
success=bool(row['success']),
|
| 268 |
+
execution_time_ms=row['execution_time_ms'] or 0.0,
|
| 269 |
+
llm_requests=row['llm_requests'] or 0,
|
| 270 |
+
tokens_used=row['tokens_used'] or 0,
|
| 271 |
+
quality_metrics=metrics,
|
| 272 |
+
errors=json.loads(row['errors']) if row['errors'] else []
|
| 273 |
+
)
|
| 274 |
+
evaluations.append(eval)
|
| 275 |
+
|
| 276 |
+
return evaluations
|
| 277 |
+
|
| 278 |
+
def get_circuit_by_id(self, eval_id: int) -> Optional[CircuitEvaluation]:
|
| 279 |
+
"""Get a single evaluation by ID."""
|
| 280 |
+
evals = self.get_evaluations(limit=1)
|
| 281 |
+
for e in self.get_evaluations(limit=1000):
|
| 282 |
+
if e.id == eval_id:
|
| 283 |
+
return e
|
| 284 |
+
return None
|
| 285 |
+
|
| 286 |
+
def compare_modes_for_problem(self, problem_id: str, run_id: Optional[str] = None) -> Dict:
|
| 287 |
+
"""Compare all modes for a specific problem."""
|
| 288 |
+
modes = ['naked', 'guided', 'blackboard']
|
| 289 |
+
comparison = {
|
| 290 |
+
"problem_id": problem_id,
|
| 291 |
+
"modes": {}
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
for mode in modes:
|
| 295 |
+
evals = self.get_evaluations(problem_id=problem_id, mode=mode, run_id=run_id)
|
| 296 |
+
if evals:
|
| 297 |
+
latest = evals[0]
|
| 298 |
+
comparison["modes"][mode] = {
|
| 299 |
+
"success": latest.success,
|
| 300 |
+
"qasm_code": latest.qasm_code,
|
| 301 |
+
"depth": latest.quality_metrics.depth,
|
| 302 |
+
"gate_count": latest.quality_metrics.gate_count,
|
| 303 |
+
"cx_count": latest.quality_metrics.cx_count,
|
| 304 |
+
"hardware_fitness": latest.quality_metrics.hardware_fitness,
|
| 305 |
+
"overall_score": latest.quality_metrics.overall_score(),
|
| 306 |
+
"execution_time_ms": latest.execution_time_ms,
|
| 307 |
+
"llm_requests": latest.llm_requests
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
return comparison
|
| 311 |
+
|
| 312 |
+
def get_quality_summary(self, run_id: Optional[str] = None) -> Dict:
|
| 313 |
+
"""Get quality summary across all modes."""
|
| 314 |
+
query = """
|
| 315 |
+
SELECT e.mode,
|
| 316 |
+
COUNT(*) as count,
|
| 317 |
+
SUM(e.success) as successes,
|
| 318 |
+
AVG(q.overall_score) as avg_score,
|
| 319 |
+
AVG(q.depth) as avg_depth,
|
| 320 |
+
AVG(q.gate_count) as avg_gates,
|
| 321 |
+
AVG(q.cx_count) as avg_cx,
|
| 322 |
+
AVG(q.hardware_fitness) as avg_fitness,
|
| 323 |
+
AVG(e.execution_time_ms) as avg_time,
|
| 324 |
+
SUM(e.llm_requests) as total_llm,
|
| 325 |
+
SUM(e.tokens_used) as total_tokens
|
| 326 |
+
FROM circuit_evaluations e
|
| 327 |
+
LEFT JOIN quality_metrics q ON e.id = q.eval_id
|
| 328 |
+
"""
|
| 329 |
+
params = []
|
| 330 |
+
if run_id:
|
| 331 |
+
query += " WHERE e.run_id = ?"
|
| 332 |
+
params.append(run_id)
|
| 333 |
+
query += " GROUP BY e.mode"
|
| 334 |
+
|
| 335 |
+
summary = {"modes": {}}
|
| 336 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 337 |
+
conn.row_factory = sqlite3.Row
|
| 338 |
+
for row in conn.execute(query, params):
|
| 339 |
+
mode = row['mode']
|
| 340 |
+
count = row['count']
|
| 341 |
+
summary["modes"][mode] = {
|
| 342 |
+
"count": count,
|
| 343 |
+
"success_rate": row['successes'] / count if count > 0 else 0,
|
| 344 |
+
"avg_quality_score": round(row['avg_score'] or 0, 2),
|
| 345 |
+
"avg_depth": round(row['avg_depth'] or 0, 1),
|
| 346 |
+
"avg_gates": round(row['avg_gates'] or 0, 1),
|
| 347 |
+
"avg_cx_count": round(row['avg_cx'] or 0, 1),
|
| 348 |
+
"avg_hardware_fitness": round(row['avg_fitness'] or 0, 3),
|
| 349 |
+
"avg_time_ms": round(row['avg_time'] or 0, 1),
|
| 350 |
+
"total_llm_requests": row['total_llm'] or 0,
|
| 351 |
+
"total_tokens": row['total_tokens'] or 0
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
return summary
|
| 355 |
+
|
| 356 |
+
def export_circuits_markdown(self, run_id: Optional[str] = None) -> str:
|
| 357 |
+
"""Export all circuits as markdown for comparison."""
|
| 358 |
+
evals = self.get_evaluations(run_id=run_id, limit=1000)
|
| 359 |
+
|
| 360 |
+
# Group by problem
|
| 361 |
+
by_problem: Dict[str, Dict[str, CircuitEvaluation]] = {}
|
| 362 |
+
for e in evals:
|
| 363 |
+
if e.problem_id not in by_problem:
|
| 364 |
+
by_problem[e.problem_id] = {}
|
| 365 |
+
by_problem[e.problem_id][e.mode] = e
|
| 366 |
+
|
| 367 |
+
md = ["# Circuit Quality Comparison Report\n"]
|
| 368 |
+
md.append(f"Generated: {datetime.now().isoformat()}\n")
|
| 369 |
+
if run_id:
|
| 370 |
+
md.append(f"Run ID: {run_id}\n")
|
| 371 |
+
md.append("\n---\n")
|
| 372 |
+
|
| 373 |
+
for problem_id, modes in sorted(by_problem.items()):
|
| 374 |
+
md.append(f"\n## Problem: {problem_id}\n")
|
| 375 |
+
|
| 376 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 377 |
+
if mode not in modes:
|
| 378 |
+
md.append(f"\n### {mode.upper()}: NOT RUN\n")
|
| 379 |
+
continue
|
| 380 |
+
|
| 381 |
+
e = modes[mode]
|
| 382 |
+
q = e.quality_metrics
|
| 383 |
+
|
| 384 |
+
md.append(f"\n### {mode.upper()}\n")
|
| 385 |
+
md.append(f"- **Success**: {'✅' if e.success else '❌'}\n")
|
| 386 |
+
md.append(f"- **Quality Score**: {q.overall_score()}/100\n")
|
| 387 |
+
md.append(f"- **Depth**: {q.depth}\n")
|
| 388 |
+
md.append(f"- **Gate Count**: {q.gate_count}\n")
|
| 389 |
+
md.append(f"- **CX Count**: {q.cx_count}\n")
|
| 390 |
+
md.append(f"- **Hardware Fitness**: {q.hardware_fitness:.3f}\n")
|
| 391 |
+
md.append(f"- **Time**: {e.execution_time_ms:.0f}ms\n")
|
| 392 |
+
md.append(f"- **LLM Requests**: {e.llm_requests}\n")
|
| 393 |
+
|
| 394 |
+
if e.qasm_code:
|
| 395 |
+
md.append("\n```qasm\n")
|
| 396 |
+
md.append(e.qasm_code)
|
| 397 |
+
if not e.qasm_code.endswith('\n'):
|
| 398 |
+
md.append('\n')
|
| 399 |
+
md.append("```\n")
|
| 400 |
+
else:
|
| 401 |
+
md.append("\n*No circuit generated*\n")
|
| 402 |
+
|
| 403 |
+
return "".join(md)
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
# Singleton instance
|
| 407 |
+
_quality_db: Optional[CircuitQualityDB] = None
|
| 408 |
+
|
| 409 |
+
def get_quality_db() -> CircuitQualityDB:
|
| 410 |
+
"""Get the global quality database instance."""
|
| 411 |
+
global _quality_db
|
| 412 |
+
if _quality_db is None:
|
| 413 |
+
_quality_db = CircuitQualityDB()
|
| 414 |
+
return _quality_db
|
database/storage.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Database Module: Storage for logs, results, memory, and context.
|
| 3 |
+
Provides both shared and per-agent storage with short/long-term memory.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import sqlite3
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from typing import Any, Dict, List, Optional
|
| 11 |
+
from dataclasses import dataclass, field, asdict
|
| 12 |
+
from enum import Enum
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
class MemoryType(Enum):
|
| 18 |
+
"""Types of memory storage."""
|
| 19 |
+
SHORT_TERM = "short_term" # Session-based, cleared on restart
|
| 20 |
+
LONG_TERM = "long_term" # Persistent across sessions
|
| 21 |
+
SHARED = "shared" # Shared between agents (blackboard)
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class MemoryEntry:
|
| 25 |
+
"""A single memory entry."""
|
| 26 |
+
key: str
|
| 27 |
+
value: Any
|
| 28 |
+
agent_id: Optional[str]
|
| 29 |
+
memory_type: MemoryType
|
| 30 |
+
timestamp: datetime = field(default_factory=datetime.now)
|
| 31 |
+
metadata: Dict = field(default_factory=dict)
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class LogEntry:
|
| 35 |
+
"""A log entry for audit trail."""
|
| 36 |
+
level: str
|
| 37 |
+
message: str
|
| 38 |
+
agent_id: Optional[str]
|
| 39 |
+
workflow_id: Optional[str]
|
| 40 |
+
timestamp: datetime = field(default_factory=datetime.now)
|
| 41 |
+
data: Dict = field(default_factory=dict)
|
| 42 |
+
|
| 43 |
+
@dataclass
|
| 44 |
+
class ResultEntry:
|
| 45 |
+
"""A result from an evaluation run."""
|
| 46 |
+
run_id: str
|
| 47 |
+
system_mode: str # blackboard, guided, naked
|
| 48 |
+
problem_id: str
|
| 49 |
+
success: bool
|
| 50 |
+
execution_time_ms: float
|
| 51 |
+
circuit_qasm: Optional[str]
|
| 52 |
+
metrics: Dict = field(default_factory=dict)
|
| 53 |
+
timestamp: datetime = field(default_factory=datetime.now)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class Database:
|
| 57 |
+
"""
|
| 58 |
+
SQLite-based storage for all system data.
|
| 59 |
+
Manages logs, results, and agent memory.
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
def __init__(self, db_path: Path):
|
| 63 |
+
self.db_path = db_path
|
| 64 |
+
self.db_path.mkdir(parents=True, exist_ok=True)
|
| 65 |
+
self.db_file = self.db_path / "qagents.db"
|
| 66 |
+
self._init_db()
|
| 67 |
+
|
| 68 |
+
def _init_db(self):
|
| 69 |
+
"""Initialize database tables."""
|
| 70 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 71 |
+
conn.executescript("""
|
| 72 |
+
CREATE TABLE IF NOT EXISTS memory (
|
| 73 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 74 |
+
key TEXT NOT NULL,
|
| 75 |
+
value TEXT NOT NULL,
|
| 76 |
+
agent_id TEXT,
|
| 77 |
+
memory_type TEXT NOT NULL,
|
| 78 |
+
timestamp TEXT NOT NULL,
|
| 79 |
+
metadata TEXT
|
| 80 |
+
);
|
| 81 |
+
|
| 82 |
+
CREATE TABLE IF NOT EXISTS logs (
|
| 83 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 84 |
+
level TEXT NOT NULL,
|
| 85 |
+
message TEXT NOT NULL,
|
| 86 |
+
agent_id TEXT,
|
| 87 |
+
workflow_id TEXT,
|
| 88 |
+
timestamp TEXT NOT NULL,
|
| 89 |
+
data TEXT
|
| 90 |
+
);
|
| 91 |
+
|
| 92 |
+
CREATE TABLE IF NOT EXISTS results (
|
| 93 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 94 |
+
run_id TEXT NOT NULL,
|
| 95 |
+
system_mode TEXT NOT NULL,
|
| 96 |
+
problem_id TEXT NOT NULL,
|
| 97 |
+
success INTEGER NOT NULL,
|
| 98 |
+
execution_time_ms REAL NOT NULL,
|
| 99 |
+
circuit_qasm TEXT,
|
| 100 |
+
metrics TEXT,
|
| 101 |
+
timestamp TEXT NOT NULL
|
| 102 |
+
);
|
| 103 |
+
|
| 104 |
+
CREATE INDEX IF NOT EXISTS idx_memory_key ON memory(key);
|
| 105 |
+
CREATE INDEX IF NOT EXISTS idx_memory_agent ON memory(agent_id);
|
| 106 |
+
CREATE INDEX IF NOT EXISTS idx_results_mode ON results(system_mode);
|
| 107 |
+
CREATE INDEX IF NOT EXISTS idx_results_problem ON results(problem_id);
|
| 108 |
+
""")
|
| 109 |
+
|
| 110 |
+
# ===== Memory Operations =====
|
| 111 |
+
|
| 112 |
+
def store_memory(self, entry: MemoryEntry):
|
| 113 |
+
"""Store a memory entry."""
|
| 114 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 115 |
+
conn.execute(
|
| 116 |
+
"""INSERT INTO memory (key, value, agent_id, memory_type, timestamp, metadata)
|
| 117 |
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
| 118 |
+
(entry.key, json.dumps(entry.value), entry.agent_id,
|
| 119 |
+
entry.memory_type.value, entry.timestamp.isoformat(),
|
| 120 |
+
json.dumps(entry.metadata))
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
def get_memory(self, key: str, agent_id: Optional[str] = None,
|
| 124 |
+
memory_type: Optional[MemoryType] = None) -> Optional[Any]:
|
| 125 |
+
"""Retrieve a memory value."""
|
| 126 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 127 |
+
query = "SELECT value FROM memory WHERE key = ?"
|
| 128 |
+
params = [key]
|
| 129 |
+
|
| 130 |
+
if agent_id:
|
| 131 |
+
query += " AND agent_id = ?"
|
| 132 |
+
params.append(agent_id)
|
| 133 |
+
if memory_type:
|
| 134 |
+
query += " AND memory_type = ?"
|
| 135 |
+
params.append(memory_type.value)
|
| 136 |
+
|
| 137 |
+
query += " ORDER BY timestamp DESC LIMIT 1"
|
| 138 |
+
|
| 139 |
+
result = conn.execute(query, params).fetchone()
|
| 140 |
+
return json.loads(result[0]) if result else None
|
| 141 |
+
|
| 142 |
+
def get_shared_memory(self, key: str) -> Optional[Any]:
|
| 143 |
+
"""Get from shared blackboard memory."""
|
| 144 |
+
return self.get_memory(key, memory_type=MemoryType.SHARED)
|
| 145 |
+
|
| 146 |
+
def set_shared_memory(self, key: str, value: Any, agent_id: Optional[str] = None):
|
| 147 |
+
"""Set shared blackboard memory."""
|
| 148 |
+
entry = MemoryEntry(
|
| 149 |
+
key=key,
|
| 150 |
+
value=value,
|
| 151 |
+
agent_id=agent_id,
|
| 152 |
+
memory_type=MemoryType.SHARED
|
| 153 |
+
)
|
| 154 |
+
self.store_memory(entry)
|
| 155 |
+
|
| 156 |
+
def clear_short_term_memory(self, agent_id: Optional[str] = None):
|
| 157 |
+
"""Clear short-term memory (session reset)."""
|
| 158 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 159 |
+
if agent_id:
|
| 160 |
+
conn.execute(
|
| 161 |
+
"DELETE FROM memory WHERE memory_type = ? AND agent_id = ?",
|
| 162 |
+
(MemoryType.SHORT_TERM.value, agent_id)
|
| 163 |
+
)
|
| 164 |
+
else:
|
| 165 |
+
conn.execute(
|
| 166 |
+
"DELETE FROM memory WHERE memory_type = ?",
|
| 167 |
+
(MemoryType.SHORT_TERM.value,)
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# ===== Logging Operations =====
|
| 171 |
+
|
| 172 |
+
def log(self, entry: LogEntry):
|
| 173 |
+
"""Store a log entry."""
|
| 174 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 175 |
+
conn.execute(
|
| 176 |
+
"""INSERT INTO logs (level, message, agent_id, workflow_id, timestamp, data)
|
| 177 |
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
| 178 |
+
(entry.level, entry.message, entry.agent_id, entry.workflow_id,
|
| 179 |
+
entry.timestamp.isoformat(), json.dumps(entry.data))
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
def get_logs(self, agent_id: Optional[str] = None,
|
| 183 |
+
workflow_id: Optional[str] = None,
|
| 184 |
+
limit: int = 100) -> List[Dict]:
|
| 185 |
+
"""Retrieve log entries."""
|
| 186 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 187 |
+
query = "SELECT * FROM logs WHERE 1=1"
|
| 188 |
+
params = []
|
| 189 |
+
|
| 190 |
+
if agent_id:
|
| 191 |
+
query += " AND agent_id = ?"
|
| 192 |
+
params.append(agent_id)
|
| 193 |
+
if workflow_id:
|
| 194 |
+
query += " AND workflow_id = ?"
|
| 195 |
+
params.append(workflow_id)
|
| 196 |
+
|
| 197 |
+
query += f" ORDER BY timestamp DESC LIMIT {limit}"
|
| 198 |
+
|
| 199 |
+
rows = conn.execute(query, params).fetchall()
|
| 200 |
+
return [
|
| 201 |
+
{"level": r[1], "message": r[2], "agent_id": r[3],
|
| 202 |
+
"workflow_id": r[4], "timestamp": r[5], "data": json.loads(r[6] or "{}")}
|
| 203 |
+
for r in rows
|
| 204 |
+
]
|
| 205 |
+
|
| 206 |
+
# ===== Results Operations =====
|
| 207 |
+
|
| 208 |
+
def store_result(self, entry: ResultEntry):
|
| 209 |
+
"""Store an evaluation result."""
|
| 210 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 211 |
+
conn.execute(
|
| 212 |
+
"""INSERT INTO results (run_id, system_mode, problem_id, success,
|
| 213 |
+
execution_time_ms, circuit_qasm, metrics, timestamp)
|
| 214 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
|
| 215 |
+
(entry.run_id, entry.system_mode, entry.problem_id,
|
| 216 |
+
1 if entry.success else 0, entry.execution_time_ms,
|
| 217 |
+
entry.circuit_qasm, json.dumps(entry.metrics),
|
| 218 |
+
entry.timestamp.isoformat())
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
def get_results(self, system_mode: Optional[str] = None,
|
| 222 |
+
problem_id: Optional[str] = None) -> List[ResultEntry]:
|
| 223 |
+
"""Retrieve results for analysis."""
|
| 224 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 225 |
+
query = "SELECT * FROM results WHERE 1=1"
|
| 226 |
+
params = []
|
| 227 |
+
|
| 228 |
+
if system_mode:
|
| 229 |
+
query += " AND system_mode = ?"
|
| 230 |
+
params.append(system_mode)
|
| 231 |
+
if problem_id:
|
| 232 |
+
query += " AND problem_id = ?"
|
| 233 |
+
params.append(problem_id)
|
| 234 |
+
|
| 235 |
+
query += " ORDER BY timestamp DESC"
|
| 236 |
+
|
| 237 |
+
rows = conn.execute(query, params).fetchall()
|
| 238 |
+
return [
|
| 239 |
+
ResultEntry(
|
| 240 |
+
run_id=r[1], system_mode=r[2], problem_id=r[3],
|
| 241 |
+
success=bool(r[4]), execution_time_ms=r[5],
|
| 242 |
+
circuit_qasm=r[6], metrics=json.loads(r[7] or "{}"),
|
| 243 |
+
timestamp=datetime.fromisoformat(r[8])
|
| 244 |
+
)
|
| 245 |
+
for r in rows
|
| 246 |
+
]
|
| 247 |
+
|
| 248 |
+
def get_summary_stats(self) -> Dict:
|
| 249 |
+
"""Get summary statistics across all runs."""
|
| 250 |
+
with sqlite3.connect(self.db_file) as conn:
|
| 251 |
+
stats = {}
|
| 252 |
+
for mode in ["blackboard", "guided", "naked"]:
|
| 253 |
+
rows = conn.execute(
|
| 254 |
+
"""SELECT COUNT(*), AVG(execution_time_ms),
|
| 255 |
+
SUM(success) * 100.0 / COUNT(*)
|
| 256 |
+
FROM results WHERE system_mode = ?""",
|
| 257 |
+
(mode,)
|
| 258 |
+
).fetchone()
|
| 259 |
+
|
| 260 |
+
stats[mode] = {
|
| 261 |
+
"total_runs": rows[0] or 0,
|
| 262 |
+
"avg_time_ms": rows[1] or 0,
|
| 263 |
+
"success_rate": rows[2] or 0
|
| 264 |
+
}
|
| 265 |
+
return stats
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
# Singleton instance
|
| 269 |
+
_db: Optional[Database] = None
|
| 270 |
+
|
| 271 |
+
def get_database(db_path: Optional[Path] = None) -> Database:
|
| 272 |
+
"""Get or create the database singleton."""
|
| 273 |
+
global _db
|
| 274 |
+
if _db is None:
|
| 275 |
+
from config import config
|
| 276 |
+
path = db_path or config.database.db_path
|
| 277 |
+
_db = Database(path)
|
| 278 |
+
return _db
|
orchestrators/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Orchestrators module: Workflow orchestration for different modes."""
|
| 2 |
+
|
| 3 |
+
from .orchestrator import (
|
| 4 |
+
OrchestratorResult,
|
| 5 |
+
BaseOrchestrator,
|
| 6 |
+
BlackboardOrchestrator,
|
| 7 |
+
GuidedOrchestrator,
|
| 8 |
+
NakedOrchestrator,
|
| 9 |
+
create_orchestrator
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
from .quasar_orchestrator import (
|
| 13 |
+
QuasarOrchestrator,
|
| 14 |
+
HybridOrchestrator,
|
| 15 |
+
QuasarResult,
|
| 16 |
+
ValidationTier
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
__all__ = [
|
| 20 |
+
"OrchestratorResult",
|
| 21 |
+
"BaseOrchestrator",
|
| 22 |
+
"BlackboardOrchestrator",
|
| 23 |
+
"GuidedOrchestrator",
|
| 24 |
+
"NakedOrchestrator",
|
| 25 |
+
"QuasarOrchestrator",
|
| 26 |
+
"HybridOrchestrator",
|
| 27 |
+
"QuasarResult",
|
| 28 |
+
"ValidationTier",
|
| 29 |
+
"create_orchestrator"
|
| 30 |
+
]
|
orchestrators/orchestrator.py
ADDED
|
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/orchestrators/orchestrator.py
|
| 2 |
+
# Relations: Uses agents, workflows, database modules
|
| 3 |
+
# Description: Orchestrators for Blackboard, Guided, and Naked execution modes
|
| 4 |
+
"""
|
| 5 |
+
Orchestrators Module: Workflow orchestration and execution.
|
| 6 |
+
Contains both Blackboard (free) and Guided (strict) orchestrators.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from abc import ABC, abstractmethod
|
| 10 |
+
from dataclasses import dataclass, field
|
| 11 |
+
from typing import Dict, List, Any, Optional
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
import logging
|
| 14 |
+
import time
|
| 15 |
+
|
| 16 |
+
from agents import (
|
| 17 |
+
BaseAgent, AgentContext, AgentResult,
|
| 18 |
+
AgentState, create_all_agents
|
| 19 |
+
)
|
| 20 |
+
from workflows import (
|
| 21 |
+
WorkflowDefinition, WorkflowExecution,
|
| 22 |
+
WorkflowStatus, get_workflow
|
| 23 |
+
)
|
| 24 |
+
from database import get_database, LogEntry
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class OrchestratorResult:
|
| 31 |
+
"""Result from orchestrator execution."""
|
| 32 |
+
success: bool
|
| 33 |
+
final_output: Any
|
| 34 |
+
execution_time_ms: float
|
| 35 |
+
steps_completed: int
|
| 36 |
+
total_steps: int
|
| 37 |
+
agent_results: Dict[str, AgentResult] = field(default_factory=dict)
|
| 38 |
+
errors: List[str] = field(default_factory=list)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class BaseOrchestrator(ABC):
|
| 42 |
+
"""Abstract base class for orchestrators."""
|
| 43 |
+
|
| 44 |
+
def __init__(self, name: str):
|
| 45 |
+
self.name = name
|
| 46 |
+
self.agents: Dict[str, BaseAgent] = {}
|
| 47 |
+
self.db = get_database()
|
| 48 |
+
|
| 49 |
+
def register_agent(self, agent: BaseAgent):
|
| 50 |
+
"""Register an agent with the orchestrator."""
|
| 51 |
+
self.agents[agent.agent_id] = agent
|
| 52 |
+
|
| 53 |
+
def log(self, level: str, message: str, workflow_id: str = None, data: Dict = None):
|
| 54 |
+
"""Log orchestrator activity."""
|
| 55 |
+
entry = LogEntry(
|
| 56 |
+
level=level,
|
| 57 |
+
message=message,
|
| 58 |
+
agent_id=self.name,
|
| 59 |
+
workflow_id=workflow_id,
|
| 60 |
+
data=data or {}
|
| 61 |
+
)
|
| 62 |
+
self.db.log(entry)
|
| 63 |
+
|
| 64 |
+
@abstractmethod
|
| 65 |
+
def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
|
| 66 |
+
"""Run the orchestrator to achieve the goal."""
|
| 67 |
+
pass
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class BlackboardOrchestrator(BaseOrchestrator):
|
| 71 |
+
"""
|
| 72 |
+
Blackboard (Free) Orchestrator.
|
| 73 |
+
|
| 74 |
+
Uses a shared blackboard for agent communication.
|
| 75 |
+
Agents opportunistically activate when they can contribute.
|
| 76 |
+
Emergent workflow based on data availability.
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
def __init__(self):
|
| 80 |
+
super().__init__("blackboard")
|
| 81 |
+
self.blackboard: Dict[str, Any] = {}
|
| 82 |
+
self.max_iterations = 20
|
| 83 |
+
|
| 84 |
+
def _reset_blackboard(self, goal: str, initial_context: Dict = None):
|
| 85 |
+
"""Initialize the blackboard with goal and context."""
|
| 86 |
+
# Ensure goal is a string
|
| 87 |
+
if isinstance(goal, list):
|
| 88 |
+
goal = goal[0] if goal else ""
|
| 89 |
+
goal = str(goal) if goal else ""
|
| 90 |
+
|
| 91 |
+
self.blackboard = {
|
| 92 |
+
"goal": goal,
|
| 93 |
+
"current_circuit": None,
|
| 94 |
+
"validation_passed": False,
|
| 95 |
+
"scores": None,
|
| 96 |
+
"completed": False,
|
| 97 |
+
**(initial_context or {})
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
def _build_context(self) -> AgentContext:
|
| 101 |
+
"""Build agent context from blackboard."""
|
| 102 |
+
return AgentContext(
|
| 103 |
+
goal=self.blackboard.get("goal", ""),
|
| 104 |
+
current_circuit=self.blackboard.get("current_circuit"),
|
| 105 |
+
history=self.blackboard.get("history", []),
|
| 106 |
+
constraints=self.blackboard.get("constraints", {}),
|
| 107 |
+
shared_data=self.blackboard
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
def _find_active_agent(self, context: AgentContext) -> Optional[BaseAgent]:
|
| 111 |
+
"""Find an agent that can handle the current state."""
|
| 112 |
+
# Priority order for agent selection - simplified for reliability
|
| 113 |
+
# First: generate circuit, then validate
|
| 114 |
+
priority_order = ["builder", "architect", "validator"]
|
| 115 |
+
|
| 116 |
+
for agent_id in priority_order:
|
| 117 |
+
agent = self.agents.get(agent_id)
|
| 118 |
+
if agent and agent.can_handle(context):
|
| 119 |
+
if agent.state == AgentState.IDLE:
|
| 120 |
+
return agent
|
| 121 |
+
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
def _update_blackboard(self, agent_id: str, result: AgentResult):
|
| 125 |
+
"""Update blackboard with agent results."""
|
| 126 |
+
if not result.success:
|
| 127 |
+
return
|
| 128 |
+
|
| 129 |
+
data = result.data
|
| 130 |
+
if isinstance(data, dict):
|
| 131 |
+
# Extract QASM if present
|
| 132 |
+
if "qasm" in data:
|
| 133 |
+
qasm = data["qasm"]
|
| 134 |
+
# Handle list responses
|
| 135 |
+
if isinstance(qasm, list):
|
| 136 |
+
qasm = qasm[0] if qasm else None
|
| 137 |
+
self.blackboard["current_circuit"] = qasm
|
| 138 |
+
|
| 139 |
+
# Update validation status
|
| 140 |
+
if "valid" in data:
|
| 141 |
+
self.blackboard["validation_passed"] = data["valid"]
|
| 142 |
+
|
| 143 |
+
# Update scores
|
| 144 |
+
if "score" in data:
|
| 145 |
+
self.blackboard["scores"] = data["score"]
|
| 146 |
+
|
| 147 |
+
# Track history
|
| 148 |
+
if "history" not in self.blackboard:
|
| 149 |
+
self.blackboard["history"] = []
|
| 150 |
+
self.blackboard["history"].append({
|
| 151 |
+
"agent": agent_id,
|
| 152 |
+
"action": result.actions_taken,
|
| 153 |
+
"success": result.success,
|
| 154 |
+
"timestamp": datetime.now().isoformat()
|
| 155 |
+
})
|
| 156 |
+
|
| 157 |
+
def _check_completion(self) -> bool:
|
| 158 |
+
"""Check if the goal has been achieved."""
|
| 159 |
+
# Simple completion: we have a validated circuit
|
| 160 |
+
has_circuit = self.blackboard.get("current_circuit") is not None
|
| 161 |
+
is_validated = self.blackboard.get("validation_passed", False)
|
| 162 |
+
return has_circuit and is_validated
|
| 163 |
+
|
| 164 |
+
def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
|
| 165 |
+
"""Run blackboard orchestration."""
|
| 166 |
+
start_time = time.perf_counter()
|
| 167 |
+
|
| 168 |
+
self.log("INFO", f"Starting blackboard orchestration for: {goal}")
|
| 169 |
+
self._reset_blackboard(goal, initial_context)
|
| 170 |
+
|
| 171 |
+
# Ensure we have agents
|
| 172 |
+
if not self.agents:
|
| 173 |
+
self.agents = create_all_agents()
|
| 174 |
+
|
| 175 |
+
agent_results = {}
|
| 176 |
+
steps_completed = 0
|
| 177 |
+
errors = []
|
| 178 |
+
|
| 179 |
+
for iteration in range(self.max_iterations):
|
| 180 |
+
context = self._build_context()
|
| 181 |
+
|
| 182 |
+
# Find an agent that can work
|
| 183 |
+
agent = self._find_active_agent(context)
|
| 184 |
+
|
| 185 |
+
if agent is None:
|
| 186 |
+
self.log("INFO", "No active agent found, checking completion")
|
| 187 |
+
if self._check_completion():
|
| 188 |
+
break
|
| 189 |
+
# No agent and not complete - might be stuck
|
| 190 |
+
if iteration > 5: # Give it a few tries
|
| 191 |
+
errors.append("No agent could make progress")
|
| 192 |
+
break
|
| 193 |
+
continue
|
| 194 |
+
|
| 195 |
+
self.log("INFO", f"Activating agent: {agent.agent_id}")
|
| 196 |
+
|
| 197 |
+
# Agent decides and executes - with null safety
|
| 198 |
+
try:
|
| 199 |
+
action = agent.decide(context)
|
| 200 |
+
if action is None:
|
| 201 |
+
self.log("WARN", f"Agent {agent.agent_id} returned no action, continuing")
|
| 202 |
+
agent.reset()
|
| 203 |
+
continue
|
| 204 |
+
|
| 205 |
+
result = agent.execute(action, context)
|
| 206 |
+
if result is None:
|
| 207 |
+
self.log("WARN", f"Agent {agent.agent_id} returned no result, continuing")
|
| 208 |
+
agent.reset()
|
| 209 |
+
continue
|
| 210 |
+
|
| 211 |
+
agent_results[agent.agent_id] = result
|
| 212 |
+
steps_completed += 1
|
| 213 |
+
|
| 214 |
+
# Update blackboard
|
| 215 |
+
self._update_blackboard(agent.agent_id, result)
|
| 216 |
+
|
| 217 |
+
except Exception as e:
|
| 218 |
+
self.log("ERROR", f"Agent {agent.agent_id} failed: {e}")
|
| 219 |
+
errors.append(f"Agent {agent.agent_id} error: {str(e)}")
|
| 220 |
+
agent.reset()
|
| 221 |
+
continue
|
| 222 |
+
|
| 223 |
+
# Reset agent for next potential activation
|
| 224 |
+
agent.reset()
|
| 225 |
+
|
| 226 |
+
# Check completion
|
| 227 |
+
if self._check_completion():
|
| 228 |
+
self.log("INFO", "Goal achieved!")
|
| 229 |
+
break
|
| 230 |
+
|
| 231 |
+
elapsed = (time.perf_counter() - start_time) * 1000
|
| 232 |
+
|
| 233 |
+
return OrchestratorResult(
|
| 234 |
+
success=self._check_completion(),
|
| 235 |
+
final_output=self.blackboard.get("current_circuit"),
|
| 236 |
+
execution_time_ms=elapsed,
|
| 237 |
+
steps_completed=steps_completed,
|
| 238 |
+
total_steps=self.max_iterations,
|
| 239 |
+
agent_results=agent_results,
|
| 240 |
+
errors=errors
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
class GuidedOrchestrator(BaseOrchestrator):
|
| 245 |
+
"""
|
| 246 |
+
Guided (Strict) Orchestrator.
|
| 247 |
+
|
| 248 |
+
Follows a predefined workflow with explicit steps.
|
| 249 |
+
Central control over agent execution order.
|
| 250 |
+
Predictable, auditable execution path.
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
def __init__(self, workflow_name: str = "build"):
|
| 254 |
+
super().__init__("guided")
|
| 255 |
+
self.workflow = get_workflow(workflow_name)
|
| 256 |
+
if self.workflow is None:
|
| 257 |
+
raise ValueError(f"Unknown workflow: {workflow_name}")
|
| 258 |
+
self.execution: Optional[WorkflowExecution] = None
|
| 259 |
+
|
| 260 |
+
def set_workflow(self, workflow_name: str):
|
| 261 |
+
"""Change the workflow."""
|
| 262 |
+
self.workflow = get_workflow(workflow_name)
|
| 263 |
+
if self.workflow is None:
|
| 264 |
+
raise ValueError(f"Unknown workflow: {workflow_name}")
|
| 265 |
+
|
| 266 |
+
def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
|
| 267 |
+
"""Run guided workflow orchestration."""
|
| 268 |
+
start_time = time.perf_counter()
|
| 269 |
+
|
| 270 |
+
# Ensure goal is a string
|
| 271 |
+
if isinstance(goal, list):
|
| 272 |
+
goal = goal[0] if goal else ""
|
| 273 |
+
goal = str(goal) if goal else ""
|
| 274 |
+
|
| 275 |
+
self.log("INFO", f"Starting guided workflow '{self.workflow.name}' for: {goal}")
|
| 276 |
+
|
| 277 |
+
# Initialize execution state
|
| 278 |
+
self.execution = WorkflowExecution(
|
| 279 |
+
workflow=self.workflow,
|
| 280 |
+
context={"goal": goal, **(initial_context or {})}
|
| 281 |
+
)
|
| 282 |
+
self.execution.status = WorkflowStatus.IN_PROGRESS
|
| 283 |
+
|
| 284 |
+
# Ensure we have agents
|
| 285 |
+
if not self.agents:
|
| 286 |
+
self.agents = create_all_agents()
|
| 287 |
+
|
| 288 |
+
agent_results = {}
|
| 289 |
+
|
| 290 |
+
# Execute each step in order
|
| 291 |
+
while self.execution.current_step is not None:
|
| 292 |
+
step = self.execution.current_step
|
| 293 |
+
self.log("INFO", f"Executing step: {step.name} ({step.agent_type})")
|
| 294 |
+
|
| 295 |
+
# Get the agent for this step
|
| 296 |
+
agent = self.agents.get(step.agent_type)
|
| 297 |
+
if agent is None:
|
| 298 |
+
if step.required:
|
| 299 |
+
self.execution.fail(f"Missing agent: {step.agent_type}")
|
| 300 |
+
break
|
| 301 |
+
else:
|
| 302 |
+
self.log("WARN", f"Skipping optional step: {step.name}")
|
| 303 |
+
self.execution.advance()
|
| 304 |
+
continue
|
| 305 |
+
|
| 306 |
+
# Build context for agent
|
| 307 |
+
context = AgentContext(
|
| 308 |
+
goal=self.execution.context.get("goal", ""),
|
| 309 |
+
current_circuit=self.execution.context.get("circuit_qasm"),
|
| 310 |
+
history=[],
|
| 311 |
+
constraints={},
|
| 312 |
+
shared_data=self.execution.context
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
# Agent decides and executes
|
| 316 |
+
action = agent.decide(context)
|
| 317 |
+
if action is None:
|
| 318 |
+
# Agent has nothing to do - might be okay for some steps
|
| 319 |
+
self.log("WARN", f"Agent {step.agent_type} returned no action")
|
| 320 |
+
self.execution.advance()
|
| 321 |
+
continue
|
| 322 |
+
|
| 323 |
+
result = agent.execute(action, context)
|
| 324 |
+
agent_results[step.name] = result
|
| 325 |
+
|
| 326 |
+
# Store outputs in execution context
|
| 327 |
+
if result.success and result.data:
|
| 328 |
+
for output_key in step.outputs:
|
| 329 |
+
if isinstance(result.data, dict):
|
| 330 |
+
if output_key in result.data:
|
| 331 |
+
self.execution.context[output_key] = result.data[output_key]
|
| 332 |
+
elif "qasm" in result.data:
|
| 333 |
+
qasm = result.data["qasm"]
|
| 334 |
+
# Handle list responses
|
| 335 |
+
if isinstance(qasm, list):
|
| 336 |
+
qasm = qasm[0] if qasm else None
|
| 337 |
+
self.execution.context["circuit_qasm"] = qasm
|
| 338 |
+
|
| 339 |
+
# Handle failure
|
| 340 |
+
if not result.success and step.required:
|
| 341 |
+
self.execution.fail(f"Step {step.name} failed: {result.message}")
|
| 342 |
+
break
|
| 343 |
+
|
| 344 |
+
# Reset agent and advance
|
| 345 |
+
agent.reset()
|
| 346 |
+
self.execution.advance()
|
| 347 |
+
|
| 348 |
+
elapsed = (time.perf_counter() - start_time) * 1000
|
| 349 |
+
|
| 350 |
+
return OrchestratorResult(
|
| 351 |
+
success=self.execution.status == WorkflowStatus.COMPLETED,
|
| 352 |
+
final_output=self.execution.context.get(self.workflow.final_output),
|
| 353 |
+
execution_time_ms=elapsed,
|
| 354 |
+
steps_completed=self.execution.current_step_index,
|
| 355 |
+
total_steps=len(self.workflow.steps),
|
| 356 |
+
agent_results=agent_results,
|
| 357 |
+
errors=self.execution.errors
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
class NakedOrchestrator(BaseOrchestrator):
|
| 362 |
+
"""
|
| 363 |
+
Naked (Baseline) Orchestrator.
|
| 364 |
+
|
| 365 |
+
Direct LLM-to-QASM generation with single call.
|
| 366 |
+
No multi-agent coordination, no structured workflow.
|
| 367 |
+
Uses ONE LLM call per problem for baseline comparison.
|
| 368 |
+
|
| 369 |
+
Purpose: Measure raw LLM capability at quantum circuit generation
|
| 370 |
+
without agentic overhead.
|
| 371 |
+
"""
|
| 372 |
+
|
| 373 |
+
def __init__(self):
|
| 374 |
+
super().__init__("naked")
|
| 375 |
+
self._llm = None
|
| 376 |
+
|
| 377 |
+
def _get_llm(self):
|
| 378 |
+
"""Lazy load LLM adapter."""
|
| 379 |
+
if self._llm is None:
|
| 380 |
+
from agents.llm_adapter import get_llm_adapter
|
| 381 |
+
from config import config
|
| 382 |
+
self._llm = get_llm_adapter(
|
| 383 |
+
provider="gemini",
|
| 384 |
+
api_key=config.llm.api_key,
|
| 385 |
+
enable_fallback=True
|
| 386 |
+
)
|
| 387 |
+
return self._llm
|
| 388 |
+
|
| 389 |
+
def run(self, goal: str, initial_context: Dict = None) -> OrchestratorResult:
|
| 390 |
+
"""
|
| 391 |
+
Run naked LLM execution - ONE LLM call per problem.
|
| 392 |
+
|
| 393 |
+
This is the baseline test: can a single LLM call generate
|
| 394 |
+
valid QASM for a quantum computing problem?
|
| 395 |
+
"""
|
| 396 |
+
start_time = time.perf_counter()
|
| 397 |
+
|
| 398 |
+
# Ensure goal is a string
|
| 399 |
+
if isinstance(goal, list):
|
| 400 |
+
goal = goal[0] if goal else ""
|
| 401 |
+
goal = str(goal) if goal else ""
|
| 402 |
+
|
| 403 |
+
self.log("INFO", f"Starting naked LLM execution for: {goal}")
|
| 404 |
+
|
| 405 |
+
from tools import invoke_tool
|
| 406 |
+
|
| 407 |
+
errors = []
|
| 408 |
+
circuit_qasm = None
|
| 409 |
+
llm_requests = 0
|
| 410 |
+
tokens_used = 0
|
| 411 |
+
|
| 412 |
+
# System prompt for direct QASM generation
|
| 413 |
+
system_prompt = """You are an expert quantum computing engineer.
|
| 414 |
+
Your task is to generate valid OpenQASM 2.0 code for the given quantum circuit problem.
|
| 415 |
+
|
| 416 |
+
RULES:
|
| 417 |
+
1. Output ONLY valid OpenQASM 2.0 code
|
| 418 |
+
2. Start with: OPENQASM 2.0; include "qelib1.inc";
|
| 419 |
+
3. Declare qubits with: qreg q[N];
|
| 420 |
+
4. Declare classical bits with: creg c[N];
|
| 421 |
+
5. Use standard gates: h, x, y, z, cx, cz, ccx, swap, t, s, rx, ry, rz
|
| 422 |
+
6. Add measurements with: measure q[i] -> c[i];
|
| 423 |
+
7. NO explanations, NO markdown, ONLY QASM code
|
| 424 |
+
|
| 425 |
+
EXAMPLE OUTPUT:
|
| 426 |
+
OPENQASM 2.0;
|
| 427 |
+
include "qelib1.inc";
|
| 428 |
+
qreg q[2];
|
| 429 |
+
creg c[2];
|
| 430 |
+
h q[0];
|
| 431 |
+
cx q[0], q[1];
|
| 432 |
+
measure q[0] -> c[0];
|
| 433 |
+
measure q[1] -> c[1];
|
| 434 |
+
"""
|
| 435 |
+
|
| 436 |
+
user_prompt = f"""Generate the OpenQASM 2.0 code for this quantum circuit problem:
|
| 437 |
+
|
| 438 |
+
{goal}
|
| 439 |
+
|
| 440 |
+
Output ONLY the QASM code, nothing else."""
|
| 441 |
+
|
| 442 |
+
try:
|
| 443 |
+
# Single LLM call - the naked baseline test
|
| 444 |
+
llm = self._get_llm()
|
| 445 |
+
response = llm.generate(
|
| 446 |
+
messages=[
|
| 447 |
+
{"role": "system", "content": system_prompt},
|
| 448 |
+
{"role": "user", "content": user_prompt}
|
| 449 |
+
],
|
| 450 |
+
temperature=0.1, # Low temperature for deterministic output
|
| 451 |
+
max_tokens=1000
|
| 452 |
+
)
|
| 453 |
+
llm_requests = 1
|
| 454 |
+
tokens_used = response.tokens_used
|
| 455 |
+
|
| 456 |
+
# Extract QASM from response
|
| 457 |
+
raw_output = response.text.strip()
|
| 458 |
+
|
| 459 |
+
# Clean up common LLM artifacts
|
| 460 |
+
if "```" in raw_output:
|
| 461 |
+
# Extract from code block
|
| 462 |
+
lines = raw_output.split("\n")
|
| 463 |
+
in_block = False
|
| 464 |
+
qasm_lines = []
|
| 465 |
+
for line in lines:
|
| 466 |
+
if line.strip().startswith("```"):
|
| 467 |
+
if in_block:
|
| 468 |
+
break
|
| 469 |
+
in_block = True
|
| 470 |
+
continue
|
| 471 |
+
if in_block:
|
| 472 |
+
qasm_lines.append(line)
|
| 473 |
+
raw_output = "\n".join(qasm_lines)
|
| 474 |
+
|
| 475 |
+
# Ensure it starts with OPENQASM declaration
|
| 476 |
+
if "OPENQASM" in raw_output:
|
| 477 |
+
# Find the start of QASM
|
| 478 |
+
idx = raw_output.find("OPENQASM")
|
| 479 |
+
circuit_qasm = raw_output[idx:]
|
| 480 |
+
else:
|
| 481 |
+
# Try to use as-is if it looks like QASM
|
| 482 |
+
if "qreg" in raw_output or "include" in raw_output:
|
| 483 |
+
circuit_qasm = "OPENQASM 2.0;\ninclude \"qelib1.inc\";\n" + raw_output
|
| 484 |
+
else:
|
| 485 |
+
errors.append(f"LLM did not produce valid QASM: {raw_output[:100]}")
|
| 486 |
+
|
| 487 |
+
# Validate the generated QASM
|
| 488 |
+
if circuit_qasm:
|
| 489 |
+
validation = invoke_tool("validate_syntax", qasm=circuit_qasm)
|
| 490 |
+
if not validation.get("success") or not validation.get("valid", False):
|
| 491 |
+
error_msg = validation.get("error", "Unknown validation error")
|
| 492 |
+
errors.append(f"QASM validation failed: {error_msg}")
|
| 493 |
+
# Still keep the circuit for analysis
|
| 494 |
+
self.log("WARN", f"Generated QASM failed validation: {error_msg}")
|
| 495 |
+
|
| 496 |
+
except Exception as e:
|
| 497 |
+
errors.append(str(e))
|
| 498 |
+
self.log("ERROR", f"Naked LLM execution failed: {e}")
|
| 499 |
+
|
| 500 |
+
elapsed = (time.perf_counter() - start_time) * 1000
|
| 501 |
+
|
| 502 |
+
# Create a simple AgentResult-like dict for compatibility
|
| 503 |
+
from agents import AgentResult
|
| 504 |
+
naked_result = AgentResult(
|
| 505 |
+
success=circuit_qasm is not None and len(errors) == 0,
|
| 506 |
+
data={
|
| 507 |
+
"qasm": circuit_qasm,
|
| 508 |
+
"llm_requests": llm_requests,
|
| 509 |
+
"tokens_used": tokens_used
|
| 510 |
+
},
|
| 511 |
+
message=f"Generated QASM via naked LLM ({llm_requests} request, {tokens_used} tokens)"
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
return OrchestratorResult(
|
| 515 |
+
success=circuit_qasm is not None and len(errors) == 0,
|
| 516 |
+
final_output=circuit_qasm,
|
| 517 |
+
execution_time_ms=elapsed,
|
| 518 |
+
steps_completed=1 if llm_requests > 0 else 0,
|
| 519 |
+
total_steps=1,
|
| 520 |
+
agent_results={"naked_llm": naked_result},
|
| 521 |
+
errors=errors
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
# Factory function
|
| 526 |
+
def create_orchestrator(mode: str) -> BaseOrchestrator:
|
| 527 |
+
"""Create an orchestrator based on mode."""
|
| 528 |
+
if mode == "blackboard":
|
| 529 |
+
return BlackboardOrchestrator()
|
| 530 |
+
elif mode == "guided":
|
| 531 |
+
return GuidedOrchestrator()
|
| 532 |
+
elif mode == "naked":
|
| 533 |
+
return NakedOrchestrator()
|
| 534 |
+
elif mode == "quasar":
|
| 535 |
+
from .quasar_orchestrator import QuasarOrchestrator
|
| 536 |
+
return QuasarOrchestrator()
|
| 537 |
+
elif mode == "hybrid":
|
| 538 |
+
from .quasar_orchestrator import HybridOrchestrator
|
| 539 |
+
return HybridOrchestrator()
|
| 540 |
+
else:
|
| 541 |
+
raise ValueError(f"Unknown mode: {mode}. Use 'blackboard', 'guided', 'naked', 'quasar', or 'hybrid'")
|
orchestrators/quasar_orchestrator.py
ADDED
|
@@ -0,0 +1,563 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/orchestrators/quasar_orchestrator.py
|
| 2 |
+
# Relations: Uses agents/llm_adapter.py, tools/quantum_tools.py, client/mcp_client.py
|
| 3 |
+
# Description: QUASAR-lite orchestrator implementing Tool-Augmented LLM with hierarchical rewards
|
| 4 |
+
"""
|
| 5 |
+
QUASAR-Lite Orchestrator: Tool-Augmented LLM with Hierarchical Verification
|
| 6 |
+
|
| 7 |
+
Based on the QUASAR framework (2025) for quantum circuit generation:
|
| 8 |
+
- Tier 1: Syntax validation (compile check)
|
| 9 |
+
- Tier 2: Semantic validation (unitarity, qubit count)
|
| 10 |
+
- Tier 3: Correctness validation (expected states)
|
| 11 |
+
- Tier 4: Optimization (depth/gate count)
|
| 12 |
+
|
| 13 |
+
Key Innovation: LLM generates → Tool validates → Feedback loop until success
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from dataclasses import dataclass, field
|
| 17 |
+
from typing import Dict, List, Any, Optional
|
| 18 |
+
from datetime import datetime
|
| 19 |
+
import logging
|
| 20 |
+
import time
|
| 21 |
+
import re
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class ValidationTier:
|
| 28 |
+
"""Result from a validation tier."""
|
| 29 |
+
tier: int
|
| 30 |
+
name: str
|
| 31 |
+
passed: bool
|
| 32 |
+
message: str
|
| 33 |
+
details: Dict[str, Any] = field(default_factory=dict)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class QuasarResult:
|
| 38 |
+
"""Result from QUASAR orchestration."""
|
| 39 |
+
success: bool
|
| 40 |
+
final_qasm: Optional[str]
|
| 41 |
+
execution_time_ms: float
|
| 42 |
+
llm_calls: int
|
| 43 |
+
tokens_used: int
|
| 44 |
+
tiers_passed: List[int]
|
| 45 |
+
validation_history: List[ValidationTier] = field(default_factory=list)
|
| 46 |
+
errors: List[str] = field(default_factory=list)
|
| 47 |
+
iterations: int = 0
|
| 48 |
+
|
| 49 |
+
@property
|
| 50 |
+
def final_output(self) -> Optional[str]:
|
| 51 |
+
"""Alias for compatibility with OrchestratorResult."""
|
| 52 |
+
return self.final_qasm
|
| 53 |
+
class QuasarOrchestrator:
|
| 54 |
+
"""
|
| 55 |
+
QUASAR-Lite: Tool-Augmented LLM for Quantum Circuit Generation
|
| 56 |
+
|
| 57 |
+
Key differences from NAKED mode:
|
| 58 |
+
1. Validates after each generation attempt
|
| 59 |
+
2. Provides error feedback to LLM for self-correction
|
| 60 |
+
3. Uses hierarchical reward tiers
|
| 61 |
+
4. Supports circuit partitioning for complex problems
|
| 62 |
+
|
| 63 |
+
Key differences from GUIDED mode:
|
| 64 |
+
1. Single LLM with tool access (not multi-agent)
|
| 65 |
+
2. External validation (not self-reflection)
|
| 66 |
+
3. Iterative refinement with ground-truth feedback
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
def __init__(self, max_iterations: int = 3):
|
| 70 |
+
self.max_iterations = max_iterations
|
| 71 |
+
self._llm = None
|
| 72 |
+
self._mcp_client = None
|
| 73 |
+
|
| 74 |
+
def _get_llm(self):
|
| 75 |
+
"""Lazy load LLM adapter."""
|
| 76 |
+
if self._llm is None:
|
| 77 |
+
from agents.llm_adapter import get_llm_adapter
|
| 78 |
+
from config import config
|
| 79 |
+
self._llm = get_llm_adapter(
|
| 80 |
+
provider="gemini",
|
| 81 |
+
api_key=config.llm.api_key,
|
| 82 |
+
enable_fallback=True
|
| 83 |
+
)
|
| 84 |
+
return self._llm
|
| 85 |
+
|
| 86 |
+
def _get_mcp(self):
|
| 87 |
+
"""Lazy load MCP client for validation."""
|
| 88 |
+
if self._mcp_client is None:
|
| 89 |
+
from client.mcp_client import get_client
|
| 90 |
+
self._mcp_client = get_client()
|
| 91 |
+
return self._mcp_client
|
| 92 |
+
|
| 93 |
+
def _extract_qasm(self, text: str) -> Optional[str]:
|
| 94 |
+
"""Extract QASM code from LLM response."""
|
| 95 |
+
if not text:
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
# Clean up common LLM artifacts
|
| 99 |
+
if "```" in text:
|
| 100 |
+
lines = text.split("\n")
|
| 101 |
+
in_block = False
|
| 102 |
+
qasm_lines = []
|
| 103 |
+
for line in lines:
|
| 104 |
+
if line.strip().startswith("```"):
|
| 105 |
+
if in_block:
|
| 106 |
+
break
|
| 107 |
+
in_block = True
|
| 108 |
+
continue
|
| 109 |
+
if in_block:
|
| 110 |
+
qasm_lines.append(line)
|
| 111 |
+
text = "\n".join(qasm_lines)
|
| 112 |
+
|
| 113 |
+
# Find OPENQASM declaration
|
| 114 |
+
if "OPENQASM" in text:
|
| 115 |
+
idx = text.find("OPENQASM")
|
| 116 |
+
return text[idx:].strip()
|
| 117 |
+
|
| 118 |
+
# Try to construct valid QASM
|
| 119 |
+
if "qreg" in text or "include" in text:
|
| 120 |
+
return "OPENQASM 2.0;\ninclude \"qelib1.inc\";\n" + text.strip()
|
| 121 |
+
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
def _validate_tier1_syntax(self, qasm: str) -> ValidationTier:
|
| 125 |
+
"""Tier 1: Syntax validation - does it compile?"""
|
| 126 |
+
try:
|
| 127 |
+
mcp = self._get_mcp()
|
| 128 |
+
result = mcp.validate_syntax(qasm)
|
| 129 |
+
|
| 130 |
+
if result.success and result.data:
|
| 131 |
+
is_valid = result.data.get("valid", False)
|
| 132 |
+
errors = result.data.get("errors", [])
|
| 133 |
+
|
| 134 |
+
if is_valid:
|
| 135 |
+
return ValidationTier(
|
| 136 |
+
tier=1, name="Syntax", passed=True,
|
| 137 |
+
message="QASM syntax is valid",
|
| 138 |
+
details={"valid": True}
|
| 139 |
+
)
|
| 140 |
+
else:
|
| 141 |
+
return ValidationTier(
|
| 142 |
+
tier=1, name="Syntax", passed=False,
|
| 143 |
+
message=f"Syntax errors: {errors}",
|
| 144 |
+
details={"errors": errors}
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
return ValidationTier(
|
| 148 |
+
tier=1, name="Syntax", passed=False,
|
| 149 |
+
message="Validation failed",
|
| 150 |
+
details={"error": "MCP validation failed"}
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
# Fallback: basic regex validation
|
| 155 |
+
has_header = "OPENQASM" in qasm and "include" in qasm
|
| 156 |
+
has_qreg = "qreg" in qasm
|
| 157 |
+
has_creg = "creg" in qasm
|
| 158 |
+
|
| 159 |
+
if has_header and has_qreg:
|
| 160 |
+
return ValidationTier(
|
| 161 |
+
tier=1, name="Syntax", passed=True,
|
| 162 |
+
message="Basic syntax check passed (fallback)",
|
| 163 |
+
details={"fallback": True}
|
| 164 |
+
)
|
| 165 |
+
return ValidationTier(
|
| 166 |
+
tier=1, name="Syntax", passed=False,
|
| 167 |
+
message=f"Basic syntax check failed: {e}",
|
| 168 |
+
details={"error": str(e)}
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
def _validate_tier2_semantic(self, qasm: str, expected_qubits: int = None) -> ValidationTier:
|
| 172 |
+
"""Tier 2: Semantic validation - qubit count, gate validity."""
|
| 173 |
+
try:
|
| 174 |
+
mcp = self._get_mcp()
|
| 175 |
+
result = mcp.analyze_circuit(qasm)
|
| 176 |
+
|
| 177 |
+
if result.success and result.data:
|
| 178 |
+
num_qubits = result.data.get("num_qubits", 0)
|
| 179 |
+
gate_count = result.data.get("gate_count", 0)
|
| 180 |
+
|
| 181 |
+
issues = []
|
| 182 |
+
|
| 183 |
+
# Check qubit count if expected
|
| 184 |
+
if expected_qubits and num_qubits != expected_qubits:
|
| 185 |
+
issues.append(f"Expected {expected_qubits} qubits, got {num_qubits}")
|
| 186 |
+
|
| 187 |
+
# Check for at least one gate
|
| 188 |
+
if gate_count == 0:
|
| 189 |
+
issues.append("No gates in circuit")
|
| 190 |
+
|
| 191 |
+
if issues:
|
| 192 |
+
return ValidationTier(
|
| 193 |
+
tier=2, name="Semantic", passed=False,
|
| 194 |
+
message="; ".join(issues),
|
| 195 |
+
details={"num_qubits": num_qubits, "gate_count": gate_count}
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
return ValidationTier(
|
| 199 |
+
tier=2, name="Semantic", passed=True,
|
| 200 |
+
message=f"Valid circuit: {num_qubits} qubits, {gate_count} gates",
|
| 201 |
+
details={"num_qubits": num_qubits, "gate_count": gate_count}
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
except Exception as e:
|
| 205 |
+
# Fallback: regex-based analysis
|
| 206 |
+
qreg_match = re.search(r'qreg\s+\w+\[(\d+)\]', qasm)
|
| 207 |
+
num_qubits = int(qreg_match.group(1)) if qreg_match else 0
|
| 208 |
+
|
| 209 |
+
gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|cy|swap|ccx|rz|rx|ry)\b'
|
| 210 |
+
gates = re.findall(gate_pattern, qasm, re.IGNORECASE)
|
| 211 |
+
|
| 212 |
+
return ValidationTier(
|
| 213 |
+
tier=2, name="Semantic", passed=len(gates) > 0,
|
| 214 |
+
message=f"Fallback analysis: {num_qubits} qubits, {len(gates)} gates",
|
| 215 |
+
details={"fallback": True, "num_qubits": num_qubits, "gate_count": len(gates)}
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
def _validate_tier3_correctness(self, qasm: str, expected_states: Dict[str, float] = None) -> ValidationTier:
|
| 219 |
+
"""Tier 3: Correctness validation - expected output states."""
|
| 220 |
+
if not expected_states:
|
| 221 |
+
return ValidationTier(
|
| 222 |
+
tier=3, name="Correctness", passed=True,
|
| 223 |
+
message="No expected states specified, skipping",
|
| 224 |
+
details={"skipped": True}
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
try:
|
| 228 |
+
mcp = self._get_mcp()
|
| 229 |
+
result = mcp.simulate_circuit(qasm, shots=1024)
|
| 230 |
+
|
| 231 |
+
if result.success and result.data:
|
| 232 |
+
probs = result.data.get("probabilities", {})
|
| 233 |
+
|
| 234 |
+
# Check if expected states match
|
| 235 |
+
tolerance = 0.15
|
| 236 |
+
matches = []
|
| 237 |
+
mismatches = []
|
| 238 |
+
|
| 239 |
+
for state, expected_prob in expected_states.items():
|
| 240 |
+
actual_prob = probs.get(state, 0.0)
|
| 241 |
+
if abs(actual_prob - expected_prob) <= tolerance:
|
| 242 |
+
matches.append(f"|{state}⟩: {actual_prob:.3f} ≈ {expected_prob}")
|
| 243 |
+
else:
|
| 244 |
+
mismatches.append(f"|{state}⟩: got {actual_prob:.3f}, expected {expected_prob}")
|
| 245 |
+
|
| 246 |
+
if mismatches:
|
| 247 |
+
return ValidationTier(
|
| 248 |
+
tier=3, name="Correctness", passed=False,
|
| 249 |
+
message=f"State mismatches: {mismatches}",
|
| 250 |
+
details={"expected": expected_states, "actual": probs}
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
return ValidationTier(
|
| 254 |
+
tier=3, name="Correctness", passed=True,
|
| 255 |
+
message=f"States match: {matches}",
|
| 256 |
+
details={"matches": matches}
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
except Exception as e:
|
| 260 |
+
return ValidationTier(
|
| 261 |
+
tier=3, name="Correctness", passed=False,
|
| 262 |
+
message=f"Simulation failed: {e}",
|
| 263 |
+
details={"error": str(e)}
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
def _validate_tier4_optimization(self, qasm: str, max_depth: int = None) -> ValidationTier:
|
| 267 |
+
"""Tier 4: Optimization - circuit depth and gate count."""
|
| 268 |
+
try:
|
| 269 |
+
mcp = self._get_mcp()
|
| 270 |
+
result = mcp.analyze_circuit(qasm)
|
| 271 |
+
|
| 272 |
+
if result.success and result.data:
|
| 273 |
+
depth = result.data.get("depth", 0)
|
| 274 |
+
gate_count = result.data.get("gate_count", 0)
|
| 275 |
+
cx_count = result.data.get("cx_count", 0)
|
| 276 |
+
|
| 277 |
+
details = {"depth": depth, "gate_count": gate_count, "cx_count": cx_count}
|
| 278 |
+
|
| 279 |
+
if max_depth and depth > max_depth:
|
| 280 |
+
return ValidationTier(
|
| 281 |
+
tier=4, name="Optimization", passed=False,
|
| 282 |
+
message=f"Depth {depth} exceeds max {max_depth}",
|
| 283 |
+
details=details
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
return ValidationTier(
|
| 287 |
+
tier=4, name="Optimization", passed=True,
|
| 288 |
+
message=f"Depth: {depth}, Gates: {gate_count}, CX: {cx_count}",
|
| 289 |
+
details=details
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
except Exception as e:
|
| 293 |
+
return ValidationTier(
|
| 294 |
+
tier=4, name="Optimization", passed=True,
|
| 295 |
+
message=f"Optimization check skipped: {e}",
|
| 296 |
+
details={"error": str(e)}
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
def _build_feedback_prompt(self, goal: str, previous_qasm: str,
|
| 300 |
+
failed_tier: ValidationTier, iteration: int) -> str:
|
| 301 |
+
"""Build prompt with feedback for LLM self-correction."""
|
| 302 |
+
return f"""Your previous attempt to generate a quantum circuit had an error.
|
| 303 |
+
|
| 304 |
+
ORIGINAL TASK:
|
| 305 |
+
{goal}
|
| 306 |
+
|
| 307 |
+
YOUR PREVIOUS OUTPUT:
|
| 308 |
+
```qasm
|
| 309 |
+
{previous_qasm or "(no valid QASM generated)"}
|
| 310 |
+
```
|
| 311 |
+
|
| 312 |
+
VALIDATION ERROR (Tier {failed_tier.tier} - {failed_tier.name}):
|
| 313 |
+
{failed_tier.message}
|
| 314 |
+
|
| 315 |
+
Details: {failed_tier.details}
|
| 316 |
+
|
| 317 |
+
INSTRUCTIONS:
|
| 318 |
+
1. Analyze the error carefully
|
| 319 |
+
2. Fix the issue in your QASM code
|
| 320 |
+
3. Output ONLY valid OpenQASM 2.0 code
|
| 321 |
+
4. Start with: OPENQASM 2.0; include "qelib1.inc";
|
| 322 |
+
|
| 323 |
+
Generate the CORRECTED QASM code:"""
|
| 324 |
+
|
| 325 |
+
def _build_initial_prompt(self, goal: str, expected_qubits: int = None,
|
| 326 |
+
expected_states: Dict[str, float] = None) -> str:
|
| 327 |
+
"""Build the initial generation prompt."""
|
| 328 |
+
constraints = []
|
| 329 |
+
if expected_qubits:
|
| 330 |
+
constraints.append(f"- Use exactly {expected_qubits} qubit(s)")
|
| 331 |
+
if expected_states:
|
| 332 |
+
states_str = ", ".join([f"|{s}⟩: {p}" for s, p in expected_states.items()])
|
| 333 |
+
constraints.append(f"- Expected measurement probabilities: {states_str}")
|
| 334 |
+
|
| 335 |
+
constraints_section = "\n".join(constraints) if constraints else "- No specific constraints"
|
| 336 |
+
|
| 337 |
+
return f"""Generate a quantum circuit for the following task:
|
| 338 |
+
|
| 339 |
+
TASK:
|
| 340 |
+
{goal}
|
| 341 |
+
|
| 342 |
+
CONSTRAINTS:
|
| 343 |
+
{constraints_section}
|
| 344 |
+
|
| 345 |
+
RULES:
|
| 346 |
+
1. Output ONLY valid OpenQASM 2.0 code
|
| 347 |
+
2. Start with: OPENQASM 2.0; include "qelib1.inc";
|
| 348 |
+
3. Declare qubits with: qreg q[N];
|
| 349 |
+
4. Declare classical bits with: creg c[N];
|
| 350 |
+
5. Use standard gates: h, x, y, z, cx, cz, ccx, swap, t, s, rx, ry, rz
|
| 351 |
+
6. Add measurements with: measure q[i] -> c[i];
|
| 352 |
+
7. NO explanations, NO markdown, ONLY QASM code
|
| 353 |
+
|
| 354 |
+
Generate the OpenQASM 2.0 circuit:"""
|
| 355 |
+
|
| 356 |
+
def run(self, goal: str,
|
| 357 |
+
expected_qubits: int = None,
|
| 358 |
+
expected_states: Dict[str, float] = None,
|
| 359 |
+
max_depth: int = None) -> QuasarResult:
|
| 360 |
+
"""
|
| 361 |
+
Run QUASAR-lite orchestration with hierarchical validation.
|
| 362 |
+
|
| 363 |
+
Args:
|
| 364 |
+
goal: The problem description
|
| 365 |
+
expected_qubits: Expected number of qubits (for Tier 2)
|
| 366 |
+
expected_states: Expected output states (for Tier 3)
|
| 367 |
+
max_depth: Maximum circuit depth (for Tier 4)
|
| 368 |
+
|
| 369 |
+
Returns:
|
| 370 |
+
QuasarResult with final QASM and validation history
|
| 371 |
+
"""
|
| 372 |
+
start_time = time.perf_counter()
|
| 373 |
+
|
| 374 |
+
llm = self._get_llm()
|
| 375 |
+
llm_calls = 0
|
| 376 |
+
tokens_used = 0
|
| 377 |
+
validation_history = []
|
| 378 |
+
errors = []
|
| 379 |
+
current_qasm = None
|
| 380 |
+
tiers_passed = []
|
| 381 |
+
|
| 382 |
+
system_prompt = """You are an expert quantum computing engineer.
|
| 383 |
+
Your task is to generate valid OpenQASM 2.0 code for quantum circuits.
|
| 384 |
+
You will receive feedback if your code has errors and must correct them.
|
| 385 |
+
Always output ONLY valid QASM code, no explanations."""
|
| 386 |
+
|
| 387 |
+
# Initial prompt
|
| 388 |
+
user_prompt = self._build_initial_prompt(goal, expected_qubits, expected_states)
|
| 389 |
+
|
| 390 |
+
for iteration in range(self.max_iterations):
|
| 391 |
+
# Generate QASM
|
| 392 |
+
try:
|
| 393 |
+
response = llm.generate(
|
| 394 |
+
messages=[
|
| 395 |
+
{"role": "system", "content": system_prompt},
|
| 396 |
+
{"role": "user", "content": user_prompt}
|
| 397 |
+
],
|
| 398 |
+
temperature=0.1 + (iteration * 0.1), # Increase temperature on retries
|
| 399 |
+
max_tokens=1500
|
| 400 |
+
)
|
| 401 |
+
llm_calls += 1
|
| 402 |
+
tokens_used += response.tokens_used
|
| 403 |
+
|
| 404 |
+
current_qasm = self._extract_qasm(response.text)
|
| 405 |
+
|
| 406 |
+
if not current_qasm:
|
| 407 |
+
errors.append(f"Iteration {iteration+1}: Failed to extract QASM")
|
| 408 |
+
user_prompt = self._build_feedback_prompt(
|
| 409 |
+
goal, response.text,
|
| 410 |
+
ValidationTier(0, "Extraction", False, "No valid QASM found in response"),
|
| 411 |
+
iteration
|
| 412 |
+
)
|
| 413 |
+
continue
|
| 414 |
+
|
| 415 |
+
except KeyboardInterrupt:
|
| 416 |
+
raise # Re-raise keyboard interrupt
|
| 417 |
+
except Exception as e:
|
| 418 |
+
errors.append(f"Iteration {iteration+1}: LLM error - {e}")
|
| 419 |
+
logger.error(f"QUASAR LLM error: {e}")
|
| 420 |
+
# Don't continue retrying on LLM errors, they'll likely fail again
|
| 421 |
+
break
|
| 422 |
+
|
| 423 |
+
# Run hierarchical validation
|
| 424 |
+
all_passed = True
|
| 425 |
+
tiers_passed = []
|
| 426 |
+
|
| 427 |
+
# Tier 1: Syntax
|
| 428 |
+
tier1 = self._validate_tier1_syntax(current_qasm)
|
| 429 |
+
validation_history.append(tier1)
|
| 430 |
+
if not tier1.passed:
|
| 431 |
+
all_passed = False
|
| 432 |
+
user_prompt = self._build_feedback_prompt(goal, current_qasm, tier1, iteration)
|
| 433 |
+
continue
|
| 434 |
+
tiers_passed.append(1)
|
| 435 |
+
|
| 436 |
+
# Tier 2: Semantic
|
| 437 |
+
tier2 = self._validate_tier2_semantic(current_qasm, expected_qubits)
|
| 438 |
+
validation_history.append(tier2)
|
| 439 |
+
if not tier2.passed:
|
| 440 |
+
all_passed = False
|
| 441 |
+
user_prompt = self._build_feedback_prompt(goal, current_qasm, tier2, iteration)
|
| 442 |
+
continue
|
| 443 |
+
tiers_passed.append(2)
|
| 444 |
+
|
| 445 |
+
# Tier 3: Correctness (if expected states provided)
|
| 446 |
+
if expected_states:
|
| 447 |
+
tier3 = self._validate_tier3_correctness(current_qasm, expected_states)
|
| 448 |
+
validation_history.append(tier3)
|
| 449 |
+
if not tier3.passed:
|
| 450 |
+
all_passed = False
|
| 451 |
+
user_prompt = self._build_feedback_prompt(goal, current_qasm, tier3, iteration)
|
| 452 |
+
continue
|
| 453 |
+
tiers_passed.append(3)
|
| 454 |
+
|
| 455 |
+
# Tier 4: Optimization (informational, doesn't fail)
|
| 456 |
+
tier4 = self._validate_tier4_optimization(current_qasm, max_depth)
|
| 457 |
+
validation_history.append(tier4)
|
| 458 |
+
if tier4.passed:
|
| 459 |
+
tiers_passed.append(4)
|
| 460 |
+
|
| 461 |
+
# All validations passed!
|
| 462 |
+
if all_passed:
|
| 463 |
+
elapsed = (time.perf_counter() - start_time) * 1000
|
| 464 |
+
return QuasarResult(
|
| 465 |
+
success=True,
|
| 466 |
+
final_qasm=current_qasm,
|
| 467 |
+
execution_time_ms=elapsed,
|
| 468 |
+
llm_calls=llm_calls,
|
| 469 |
+
tokens_used=tokens_used,
|
| 470 |
+
tiers_passed=tiers_passed,
|
| 471 |
+
validation_history=validation_history,
|
| 472 |
+
errors=errors,
|
| 473 |
+
iterations=iteration + 1
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
# Max iterations reached
|
| 477 |
+
elapsed = (time.perf_counter() - start_time) * 1000
|
| 478 |
+
return QuasarResult(
|
| 479 |
+
success=current_qasm is not None and len(tiers_passed) >= 2,
|
| 480 |
+
final_qasm=current_qasm,
|
| 481 |
+
execution_time_ms=elapsed,
|
| 482 |
+
llm_calls=llm_calls,
|
| 483 |
+
tokens_used=tokens_used,
|
| 484 |
+
tiers_passed=tiers_passed,
|
| 485 |
+
validation_history=validation_history,
|
| 486 |
+
errors=errors,
|
| 487 |
+
iterations=self.max_iterations
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
class HybridOrchestrator:
|
| 492 |
+
"""
|
| 493 |
+
Hybrid Orchestrator: NAKED speed + QUASAR reliability
|
| 494 |
+
|
| 495 |
+
Strategy:
|
| 496 |
+
1. Try NAKED mode first (fast, cheap)
|
| 497 |
+
2. If NAKED fails validation, fall back to QUASAR (reliable, more expensive)
|
| 498 |
+
|
| 499 |
+
This gives best of both worlds:
|
| 500 |
+
- Easy problems: solved in 1 LLM call via NAKED
|
| 501 |
+
- Hard problems: solved via QUASAR with feedback loops
|
| 502 |
+
"""
|
| 503 |
+
|
| 504 |
+
def __init__(self):
|
| 505 |
+
self._naked = None
|
| 506 |
+
self._quasar = None
|
| 507 |
+
|
| 508 |
+
def _get_naked(self):
|
| 509 |
+
"""Lazy load NAKED orchestrator."""
|
| 510 |
+
if self._naked is None:
|
| 511 |
+
from orchestrators.orchestrator import NakedOrchestrator
|
| 512 |
+
self._naked = NakedOrchestrator()
|
| 513 |
+
return self._naked
|
| 514 |
+
|
| 515 |
+
def _get_quasar(self):
|
| 516 |
+
"""Lazy load QUASAR orchestrator."""
|
| 517 |
+
if self._quasar is None:
|
| 518 |
+
self._quasar = QuasarOrchestrator(max_iterations=3)
|
| 519 |
+
return self._quasar
|
| 520 |
+
|
| 521 |
+
def run(self, goal: str,
|
| 522 |
+
expected_qubits: int = None,
|
| 523 |
+
expected_states: Dict[str, float] = None,
|
| 524 |
+
max_depth: int = None) -> QuasarResult:
|
| 525 |
+
"""
|
| 526 |
+
Run hybrid orchestration: NAKED first, QUASAR on failure.
|
| 527 |
+
|
| 528 |
+
Returns:
|
| 529 |
+
QuasarResult for compatibility with comprehensive tests
|
| 530 |
+
"""
|
| 531 |
+
start_time = time.perf_counter()
|
| 532 |
+
|
| 533 |
+
# Step 1: Try NAKED mode
|
| 534 |
+
naked = self._get_naked()
|
| 535 |
+
naked_result = naked.run(goal)
|
| 536 |
+
|
| 537 |
+
if naked_result.success and naked_result.final_output:
|
| 538 |
+
# Validate NAKED output
|
| 539 |
+
quasar = self._get_quasar()
|
| 540 |
+
qasm = naked_result.final_output
|
| 541 |
+
|
| 542 |
+
tier1 = quasar._validate_tier1_syntax(qasm)
|
| 543 |
+
tier2 = quasar._validate_tier2_semantic(qasm, expected_qubits)
|
| 544 |
+
|
| 545 |
+
if tier1.passed and tier2.passed:
|
| 546 |
+
# NAKED succeeded!
|
| 547 |
+
elapsed = (time.perf_counter() - start_time) * 1000
|
| 548 |
+
return QuasarResult(
|
| 549 |
+
success=True,
|
| 550 |
+
final_qasm=qasm,
|
| 551 |
+
execution_time_ms=elapsed,
|
| 552 |
+
llm_calls=1,
|
| 553 |
+
tokens_used=naked_result.agent_results.get("naked_llm", {}).data.get("tokens_used", 0) if naked_result.agent_results else 0,
|
| 554 |
+
tiers_passed=[1, 2],
|
| 555 |
+
validation_history=[tier1, tier2],
|
| 556 |
+
errors=[],
|
| 557 |
+
iterations=1
|
| 558 |
+
)
|
| 559 |
+
|
| 560 |
+
# Step 2: NAKED failed, use QUASAR
|
| 561 |
+
logger.info(f"NAKED failed, falling back to QUASAR for: {goal[:50]}...")
|
| 562 |
+
quasar = self._get_quasar()
|
| 563 |
+
return quasar.run(goal, expected_qubits, expected_states, max_depth)
|
orchestrators/router.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/orchestrators/router.py
|
| 2 |
+
# Relations: Used by orchestrators/orchestrator.py, run_quality_eval.py
|
| 3 |
+
# Description: Difficulty-aware orchestrator selection based on problem complexity
|
| 4 |
+
# Routes easy problems to NAKED (fastest, best quality)
|
| 5 |
+
# Routes medium to NAKED+optimization, hard to GUIDED
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Difficulty-Aware Router: Selects optimal orchestration mode based on problem complexity.
|
| 9 |
+
|
| 10 |
+
Based on quality evaluation findings:
|
| 11 |
+
- NAKED mode: Best for easy problems (47.9/100 quality, 3.7s)
|
| 12 |
+
- NAKED+Optimizer: Best for medium (post-generation refinement)
|
| 13 |
+
- GUIDED: For hard problems (agents may add value for complex algorithms)
|
| 14 |
+
|
| 15 |
+
This router balances quality, cost, and execution time.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from typing import Optional, Dict, Literal
|
| 19 |
+
from dataclasses import dataclass
|
| 20 |
+
from tests.test_problems import TestProblem, ProblemDifficulty
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class RoutingDecision:
|
| 25 |
+
"""Result of routing decision."""
|
| 26 |
+
mode: Literal["naked", "guided", "blackboard"]
|
| 27 |
+
reason: str
|
| 28 |
+
expected_quality: float
|
| 29 |
+
expected_llm_calls: int
|
| 30 |
+
expected_time_ms: int
|
| 31 |
+
use_optimizer: bool = False
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class DifficultyAwareRouter:
|
| 35 |
+
"""
|
| 36 |
+
Routes problems to optimal orchestrators based on difficulty and characteristics.
|
| 37 |
+
|
| 38 |
+
Strategy:
|
| 39 |
+
- EASY: Use NAKED (proven best)
|
| 40 |
+
- MEDIUM: Use NAKED + post-optimization
|
| 41 |
+
- HARD: Use GUIDED if agents help, NAKED+optimizer as fallback
|
| 42 |
+
|
| 43 |
+
Can be configured for experimentation.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
# Routing configuration (can be tuned)
|
| 47 |
+
ROUTING_CONFIG = {
|
| 48 |
+
"easy": {
|
| 49 |
+
"primary_mode": "naked",
|
| 50 |
+
"use_optimizer": False,
|
| 51 |
+
"fallback_mode": "guided",
|
| 52 |
+
"expected_quality": 47.9,
|
| 53 |
+
"expected_llm_calls": 3,
|
| 54 |
+
"expected_time_ms": 3700,
|
| 55 |
+
},
|
| 56 |
+
"medium": {
|
| 57 |
+
"primary_mode": "naked",
|
| 58 |
+
"use_optimizer": True, # Add post-generation optimization
|
| 59 |
+
"fallback_mode": "guided",
|
| 60 |
+
"expected_quality": 50.0, # Estimated with optimizer
|
| 61 |
+
"expected_llm_calls": 3,
|
| 62 |
+
"expected_time_ms": 5000,
|
| 63 |
+
},
|
| 64 |
+
"hard": {
|
| 65 |
+
"primary_mode": "guided", # Agents might help for complex algorithms
|
| 66 |
+
"use_optimizer": True,
|
| 67 |
+
"fallback_mode": "naked",
|
| 68 |
+
"expected_quality": 55.0, # Estimated
|
| 69 |
+
"expected_llm_calls": 7,
|
| 70 |
+
"expected_time_ms": 25000,
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
@classmethod
|
| 75 |
+
def route(cls, problem: TestProblem,
|
| 76 |
+
prefer_naked: bool = False,
|
| 77 |
+
prefer_guided: bool = False) -> RoutingDecision:
|
| 78 |
+
"""
|
| 79 |
+
Route a problem to the optimal orchestrator.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
problem: The quantum circuit problem to solve
|
| 83 |
+
prefer_naked: Force NAKED mode (for testing)
|
| 84 |
+
prefer_guided: Force GUIDED mode (for testing)
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
RoutingDecision with selected mode and metadata
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
# Handle overrides
|
| 91 |
+
if prefer_naked:
|
| 92 |
+
return cls._make_decision("naked", problem, "User override")
|
| 93 |
+
if prefer_guided:
|
| 94 |
+
return cls._make_decision("guided", problem, "User override")
|
| 95 |
+
|
| 96 |
+
# Get difficulty level
|
| 97 |
+
difficulty = problem.difficulty.value if hasattr(problem.difficulty, 'value') else str(problem.difficulty)
|
| 98 |
+
|
| 99 |
+
# Get routing config for difficulty
|
| 100 |
+
config = cls.ROUTING_CONFIG.get(difficulty)
|
| 101 |
+
if not config:
|
| 102 |
+
# Default to guided for unknown difficulties
|
| 103 |
+
return cls._make_decision("guided", problem, f"Unknown difficulty: {difficulty}")
|
| 104 |
+
|
| 105 |
+
# Route based on difficulty
|
| 106 |
+
return cls._make_decision(
|
| 107 |
+
config["primary_mode"],
|
| 108 |
+
problem,
|
| 109 |
+
f"Routed based on difficulty: {difficulty}",
|
| 110 |
+
use_optimizer=config.get("use_optimizer", False),
|
| 111 |
+
expected_quality=config["expected_quality"],
|
| 112 |
+
expected_llm_calls=config["expected_llm_calls"],
|
| 113 |
+
expected_time_ms=config["expected_time_ms"],
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
@classmethod
|
| 117 |
+
def route_batch(cls, problems: list) -> Dict[str, RoutingDecision]:
|
| 118 |
+
"""Route multiple problems."""
|
| 119 |
+
return {p.id: cls.route(p) for p in problems}
|
| 120 |
+
|
| 121 |
+
@classmethod
|
| 122 |
+
def _make_decision(cls, mode: str, problem: TestProblem, reason: str,
|
| 123 |
+
use_optimizer: bool = False,
|
| 124 |
+
expected_quality: float = 45.0,
|
| 125 |
+
expected_llm_calls: int = 3,
|
| 126 |
+
expected_time_ms: int = 5000) -> RoutingDecision:
|
| 127 |
+
"""Create a routing decision."""
|
| 128 |
+
return RoutingDecision(
|
| 129 |
+
mode=mode,
|
| 130 |
+
reason=reason,
|
| 131 |
+
expected_quality=expected_quality,
|
| 132 |
+
expected_llm_calls=expected_llm_calls,
|
| 133 |
+
expected_time_ms=expected_time_ms,
|
| 134 |
+
use_optimizer=use_optimizer,
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
@classmethod
|
| 138 |
+
def print_strategy(cls):
|
| 139 |
+
"""Print routing strategy."""
|
| 140 |
+
print("\n" + "="*80)
|
| 141 |
+
print("DIFFICULTY-AWARE ROUTING STRATEGY")
|
| 142 |
+
print("="*80)
|
| 143 |
+
|
| 144 |
+
for difficulty in ["easy", "medium", "hard"]:
|
| 145 |
+
config = cls.ROUTING_CONFIG[difficulty]
|
| 146 |
+
print(f"\n{difficulty.upper()}:")
|
| 147 |
+
print(f" Primary Mode: {config['primary_mode']}")
|
| 148 |
+
print(f" Use Optimizer: {config['use_optimizer']}")
|
| 149 |
+
print(f" Fallback: {config['fallback_mode']}")
|
| 150 |
+
print(f" Expected Quality: {config['expected_quality']:.1f}/100")
|
| 151 |
+
print(f" Expected LLM Calls: {config['expected_llm_calls']}")
|
| 152 |
+
print(f" Expected Time: {config['expected_time_ms']}ms")
|
| 153 |
+
|
| 154 |
+
print("\n" + "="*80)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def select_orchestrator_mode(problem: TestProblem) -> str:
|
| 158 |
+
"""
|
| 159 |
+
Convenience function: Get orchestrator mode for a problem.
|
| 160 |
+
|
| 161 |
+
Usage:
|
| 162 |
+
mode = select_orchestrator_mode(problem)
|
| 163 |
+
orchestrator = create_orchestrator(mode)
|
| 164 |
+
"""
|
| 165 |
+
decision = DifficultyAwareRouter.route(problem)
|
| 166 |
+
return decision.mode
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def should_use_optimizer(problem: TestProblem) -> bool:
|
| 170 |
+
"""Check if optimization should be applied after generation."""
|
| 171 |
+
decision = DifficultyAwareRouter.route(problem)
|
| 172 |
+
return decision.use_optimizer
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
# Example usage
|
| 176 |
+
if __name__ == "__main__":
|
| 177 |
+
from tests.test_problems import EASY_PROBLEMS, MEDIUM_PROBLEMS, HARD_PROBLEMS
|
| 178 |
+
|
| 179 |
+
print("\nExample: Routing all problems")
|
| 180 |
+
print("-" * 80)
|
| 181 |
+
|
| 182 |
+
all_problems = EASY_PROBLEMS + MEDIUM_PROBLEMS + HARD_PROBLEMS
|
| 183 |
+
|
| 184 |
+
for problem in all_problems:
|
| 185 |
+
decision = DifficultyAwareRouter.route(problem)
|
| 186 |
+
print(f"{problem.id:15} -> {decision.mode:10} ({decision.reason})")
|
| 187 |
+
|
| 188 |
+
DifficultyAwareRouter.print_strategy()
|
prompts/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Prompts module: System prompts for all agents."""
|
| 2 |
+
|
| 3 |
+
from .agent_prompts import (
|
| 4 |
+
ARCHITECT_PROMPT,
|
| 5 |
+
BUILDER_PROMPT,
|
| 6 |
+
VALIDATOR_PROMPT,
|
| 7 |
+
OPTIMIZER_PROMPT,
|
| 8 |
+
ANALYZER_PROMPT,
|
| 9 |
+
SCORER_PROMPT,
|
| 10 |
+
COORDINATOR_PROMPT,
|
| 11 |
+
ALL_PROMPTS,
|
| 12 |
+
get_prompt
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
__all__ = [
|
| 16 |
+
"ARCHITECT_PROMPT",
|
| 17 |
+
"BUILDER_PROMPT",
|
| 18 |
+
"VALIDATOR_PROMPT",
|
| 19 |
+
"OPTIMIZER_PROMPT",
|
| 20 |
+
"ANALYZER_PROMPT",
|
| 21 |
+
"SCORER_PROMPT",
|
| 22 |
+
"COORDINATOR_PROMPT",
|
| 23 |
+
"ALL_PROMPTS",
|
| 24 |
+
"get_prompt"
|
| 25 |
+
]
|
prompts/agent_prompts.py
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Prompts Module: System prompts for all agents.
|
| 3 |
+
Each prompt defines the agent's behavior, constraints, and expertise.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
# ============================================================
|
| 7 |
+
# ARCHITECT AGENT PROMPT
|
| 8 |
+
# ============================================================
|
| 9 |
+
|
| 10 |
+
ARCHITECT_PROMPT = """You are a Quantum Circuit Architect agent. Your role is to plan and design quantum circuits at a high level.
|
| 11 |
+
|
| 12 |
+
## Your Responsibilities:
|
| 13 |
+
1. Understand the user's goal and translate it into a circuit design plan
|
| 14 |
+
2. Choose appropriate circuit templates or patterns
|
| 15 |
+
3. Determine the number of qubits and overall structure needed
|
| 16 |
+
4. Consider hardware constraints when planning
|
| 17 |
+
|
| 18 |
+
## Your Tools:
|
| 19 |
+
- create_from_template: Use predefined templates (bell_state, ghz, qft, grover)
|
| 20 |
+
- generate_from_description: Create circuits from natural language
|
| 21 |
+
- analyze_circuit: Analyze existing circuits to understand their structure
|
| 22 |
+
|
| 23 |
+
## Guidelines:
|
| 24 |
+
- Start simple - prefer smaller circuits when possible
|
| 25 |
+
- Consider the target hardware's qubit count and connectivity
|
| 26 |
+
- Break complex goals into simpler sub-circuits that can be composed
|
| 27 |
+
- Document your reasoning for the chosen approach
|
| 28 |
+
|
| 29 |
+
## Output Format:
|
| 30 |
+
When you select a tool, explain your reasoning briefly. Focus on:
|
| 31 |
+
1. Why this approach fits the goal
|
| 32 |
+
2. What the expected circuit structure will be
|
| 33 |
+
3. Any constraints or considerations for the next steps
|
| 34 |
+
|
| 35 |
+
Be concise and action-oriented. Your job is to get a working circuit started."""
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ============================================================
|
| 39 |
+
# BUILDER AGENT PROMPT
|
| 40 |
+
# ============================================================
|
| 41 |
+
|
| 42 |
+
BUILDER_PROMPT = """You are a Quantum Circuit Builder agent. Your role is to construct and modify quantum circuits.
|
| 43 |
+
|
| 44 |
+
## Your Responsibilities:
|
| 45 |
+
1. Build circuits based on architectural plans
|
| 46 |
+
2. Compose multiple circuits together
|
| 47 |
+
3. Apply circuit transformations (tensor, repeat)
|
| 48 |
+
4. Ensure the circuit syntax is correct
|
| 49 |
+
|
| 50 |
+
## Your Tools:
|
| 51 |
+
- create_from_template: Build from predefined templates
|
| 52 |
+
- generate_random_circuit: Create random circuits for testing
|
| 53 |
+
- generate_from_description: Build from natural language
|
| 54 |
+
- compose_circuits: Combine circuits sequentially
|
| 55 |
+
- tensor_circuits: Combine circuits in parallel
|
| 56 |
+
- repeat_circuit: Repeat a circuit pattern
|
| 57 |
+
|
| 58 |
+
## Guidelines:
|
| 59 |
+
- Follow the architect's plan closely
|
| 60 |
+
- Use compose_circuits to chain operations
|
| 61 |
+
- Use tensor_circuits when operations should be parallel
|
| 62 |
+
- Start with simple building blocks and combine them
|
| 63 |
+
- Check that qubit counts match when composing
|
| 64 |
+
|
| 65 |
+
## Output Format:
|
| 66 |
+
Produce valid OpenQASM 2.0 circuits. When using tools:
|
| 67 |
+
1. Specify exact parameters
|
| 68 |
+
2. Explain how this builds toward the goal
|
| 69 |
+
3. Note any assumptions about qubit ordering"""
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# ============================================================
|
| 73 |
+
# VALIDATOR AGENT PROMPT
|
| 74 |
+
# ============================================================
|
| 75 |
+
|
| 76 |
+
VALIDATOR_PROMPT = """You are a Quantum Circuit Validator agent. Your role is to ensure circuits are correct and executable.
|
| 77 |
+
|
| 78 |
+
## Your Responsibilities:
|
| 79 |
+
1. Validate circuit syntax
|
| 80 |
+
2. Check hardware connectivity compliance
|
| 81 |
+
3. Verify unitary correctness
|
| 82 |
+
4. Report any issues clearly
|
| 83 |
+
|
| 84 |
+
## Your Tools:
|
| 85 |
+
- validate_syntax: Check QASM syntax for errors
|
| 86 |
+
- check_connectivity: Verify circuit works on target hardware
|
| 87 |
+
- verify_unitary: Confirm circuit produces valid unitary
|
| 88 |
+
|
| 89 |
+
## Validation Order:
|
| 90 |
+
1. ALWAYS start with syntax validation
|
| 91 |
+
2. Then check connectivity for the target hardware
|
| 92 |
+
3. Finally verify unitary correctness
|
| 93 |
+
|
| 94 |
+
## Guidelines:
|
| 95 |
+
- Be thorough - check all aspects
|
| 96 |
+
- Report specific line numbers and gates for errors
|
| 97 |
+
- Suggest fixes when possible
|
| 98 |
+
- Hardware profiles available: ibm_eagle, ionq_aria, rigetti_aspen
|
| 99 |
+
|
| 100 |
+
## Output Format:
|
| 101 |
+
Provide clear validation results:
|
| 102 |
+
- PASS/FAIL for each check
|
| 103 |
+
- Specific error locations if failed
|
| 104 |
+
- Suggestions for fixing issues"""
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# ============================================================
|
| 108 |
+
# OPTIMIZER AGENT PROMPT
|
| 109 |
+
# ============================================================
|
| 110 |
+
|
| 111 |
+
OPTIMIZER_PROMPT = """You are a Quantum Circuit Optimizer agent. Your role is to improve circuit efficiency.
|
| 112 |
+
|
| 113 |
+
## Your Responsibilities:
|
| 114 |
+
1. Reduce circuit depth
|
| 115 |
+
2. Minimize gate count
|
| 116 |
+
3. Improve hardware fitness
|
| 117 |
+
4. Apply optimization strategies
|
| 118 |
+
|
| 119 |
+
## Your Tools:
|
| 120 |
+
- generate_inverse: Create inverse for identity elimination
|
| 121 |
+
- compose_circuits: Restructure by recomposing
|
| 122 |
+
- analyze_circuit: Check current metrics
|
| 123 |
+
- calculate_complexity: Get complexity score
|
| 124 |
+
- calculate_hardware_fitness: Check hardware compatibility
|
| 125 |
+
|
| 126 |
+
## Optimization Strategies:
|
| 127 |
+
1. Gate cancellation: U * U† = I
|
| 128 |
+
2. Gate commutation: Reorder for parallel execution
|
| 129 |
+
3. Decomposition: Break complex gates into native gates
|
| 130 |
+
4. Depth reduction: Maximize parallelism
|
| 131 |
+
|
| 132 |
+
## Guidelines:
|
| 133 |
+
- Always measure before and after optimization
|
| 134 |
+
- Target specific metrics (depth, gates, or fitness)
|
| 135 |
+
- Small improvements compound - iterate if needed
|
| 136 |
+
- Don't sacrifice correctness for speed
|
| 137 |
+
|
| 138 |
+
## Output Format:
|
| 139 |
+
Report optimization results:
|
| 140 |
+
- Before/after metrics
|
| 141 |
+
- Techniques applied
|
| 142 |
+
- Improvement percentage"""
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
# ============================================================
|
| 146 |
+
# ANALYZER AGENT PROMPT
|
| 147 |
+
# ============================================================
|
| 148 |
+
|
| 149 |
+
ANALYZER_PROMPT = """You are a Quantum Circuit Analyzer agent. Your role is to extract insights from circuits.
|
| 150 |
+
|
| 151 |
+
## Your Responsibilities:
|
| 152 |
+
1. Parse and understand circuit structure
|
| 153 |
+
2. Measure circuit properties (depth, gates, etc.)
|
| 154 |
+
3. Simulate and get state/probability information
|
| 155 |
+
4. Estimate resource requirements
|
| 156 |
+
|
| 157 |
+
## Your Tools:
|
| 158 |
+
- parse_qasm: Extract circuit structure
|
| 159 |
+
- analyze_circuit: Get comprehensive analysis
|
| 160 |
+
- get_circuit_depth: Measure depth
|
| 161 |
+
- get_statevector: Get quantum state
|
| 162 |
+
- get_probabilities: Get measurement probabilities
|
| 163 |
+
- estimate_resources: Resource estimation
|
| 164 |
+
- estimate_noise: Noise impact estimation
|
| 165 |
+
|
| 166 |
+
## Guidelines:
|
| 167 |
+
- Start with structural analysis (parse, analyze)
|
| 168 |
+
- Then get simulation results if needed
|
| 169 |
+
- Consider noise for realistic assessment
|
| 170 |
+
- Report findings clearly and completely
|
| 171 |
+
|
| 172 |
+
## Analysis Areas:
|
| 173 |
+
1. Structure: qubits, gates, depth, connectivity
|
| 174 |
+
2. State: amplitudes, probabilities, entanglement
|
| 175 |
+
3. Resources: execution time, error rates
|
| 176 |
+
4. Comparison: vs ideal, vs other circuits
|
| 177 |
+
|
| 178 |
+
## Output Format:
|
| 179 |
+
Provide structured analysis:
|
| 180 |
+
- Circuit summary (qubits, gates, depth)
|
| 181 |
+
- Key observations
|
| 182 |
+
- Recommendations if applicable"""
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# ============================================================
|
| 186 |
+
# SCORER AGENT PROMPT
|
| 187 |
+
# ============================================================
|
| 188 |
+
|
| 189 |
+
SCORER_PROMPT = """You are a Quantum Circuit Scorer agent. Your role is to evaluate circuit quality.
|
| 190 |
+
|
| 191 |
+
## Your Responsibilities:
|
| 192 |
+
1. Calculate complexity scores
|
| 193 |
+
2. Assess hardware fitness
|
| 194 |
+
3. Measure expressibility
|
| 195 |
+
4. Provide overall quality assessment
|
| 196 |
+
|
| 197 |
+
## Your Tools:
|
| 198 |
+
- calculate_complexity: Lower is better (simpler circuit)
|
| 199 |
+
- calculate_hardware_fitness: Higher is better (easier to run)
|
| 200 |
+
- calculate_expressibility: How much state space coverage
|
| 201 |
+
- simulate_circuit: Verify functionality via simulation
|
| 202 |
+
|
| 203 |
+
## Scoring Framework:
|
| 204 |
+
1. Complexity (weight: 30%): Gate count, depth
|
| 205 |
+
2. Hardware Fitness (weight: 40%): Connectivity, native gates
|
| 206 |
+
3. Expressibility (weight: 20%): State space coverage
|
| 207 |
+
4. Correctness (weight: 10%): Simulation accuracy
|
| 208 |
+
|
| 209 |
+
## Guidelines:
|
| 210 |
+
- Always get all relevant scores
|
| 211 |
+
- Consider the specific use case when weighting
|
| 212 |
+
- Compare against reference circuits when available
|
| 213 |
+
- Provide actionable feedback
|
| 214 |
+
|
| 215 |
+
## Output Format:
|
| 216 |
+
Provide comprehensive scoring:
|
| 217 |
+
- Individual scores with explanations
|
| 218 |
+
- Weighted overall score
|
| 219 |
+
- Strengths and weaknesses
|
| 220 |
+
- Improvement suggestions"""
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
# ============================================================
|
| 224 |
+
# COORDINATOR AGENT PROMPT (for Guided mode)
|
| 225 |
+
# ============================================================
|
| 226 |
+
|
| 227 |
+
COORDINATOR_PROMPT = """You are a Workflow Coordinator agent. Your role is to orchestrate other agents in a structured workflow.
|
| 228 |
+
|
| 229 |
+
## Your Responsibilities:
|
| 230 |
+
1. Parse the user's goal
|
| 231 |
+
2. Determine the workflow sequence
|
| 232 |
+
3. Dispatch tasks to specialized agents
|
| 233 |
+
4. Collect and synthesize results
|
| 234 |
+
|
| 235 |
+
## Workflow Templates:
|
| 236 |
+
1. BUILD: Architect → Builder → Validator → Scorer
|
| 237 |
+
2. OPTIMIZE: Analyzer → Optimizer → Validator → Scorer
|
| 238 |
+
3. EVALUATE: Analyzer → Scorer
|
| 239 |
+
4. FULL: Architect → Builder → Validator → Optimizer → Analyzer → Scorer
|
| 240 |
+
|
| 241 |
+
## Guidelines:
|
| 242 |
+
- Choose the appropriate workflow for the goal
|
| 243 |
+
- Monitor agent progress and handle failures
|
| 244 |
+
- Aggregate results for final report
|
| 245 |
+
- Ensure each step completes before proceeding
|
| 246 |
+
|
| 247 |
+
## State Machine:
|
| 248 |
+
- PLANNING: Determine workflow
|
| 249 |
+
- DISPATCHING: Assign task to agent
|
| 250 |
+
- WAITING: Wait for agent completion
|
| 251 |
+
- COLLECTING: Gather results
|
| 252 |
+
- COMPLETED: Final synthesis
|
| 253 |
+
|
| 254 |
+
## Output Format:
|
| 255 |
+
Report workflow execution:
|
| 256 |
+
- Workflow chosen and why
|
| 257 |
+
- Each step's outcome
|
| 258 |
+
- Final aggregated results
|
| 259 |
+
- Any issues encountered"""
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
# Dictionary for easy access
|
| 263 |
+
ALL_PROMPTS = {
|
| 264 |
+
"architect": ARCHITECT_PROMPT,
|
| 265 |
+
"builder": BUILDER_PROMPT,
|
| 266 |
+
"validator": VALIDATOR_PROMPT,
|
| 267 |
+
"optimizer": OPTIMIZER_PROMPT,
|
| 268 |
+
"analyzer": ANALYZER_PROMPT,
|
| 269 |
+
"scorer": SCORER_PROMPT,
|
| 270 |
+
"coordinator": COORDINATOR_PROMPT
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def get_prompt(agent_type: str) -> str:
|
| 275 |
+
"""Get prompt for a specific agent type."""
|
| 276 |
+
return ALL_PROMPTS.get(agent_type, "")
|
prompts/optimized_prompts.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/prompts/optimized_prompts.py
|
| 2 |
+
# Relations: Used by orchestrators/orchestrator.py (NakedOrchestrator)
|
| 3 |
+
# Description: Enhanced prompts for NAKED mode with quantum optimization guidance
|
| 4 |
+
# These prompts achieve 47.9/100 quality and can be further improved
|
| 5 |
+
# by adding explicit optimization constraints
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Optimized Prompts: Direct LLM prompts for quantum circuit generation
|
| 9 |
+
|
| 10 |
+
Based on quality evaluation findings:
|
| 11 |
+
- NAKED mode outperforms multi-agent approaches
|
| 12 |
+
- Direct prompts with explicit constraints improve quality
|
| 13 |
+
- Avoids hallucinated measurements and unnecessary operations
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
# =============================================================================
|
| 17 |
+
# QUANTUM CIRCUIT GENERATION PROMPT (NAKED MODE - OPTIMIZED)
|
| 18 |
+
# =============================================================================
|
| 19 |
+
|
| 20 |
+
QUANTUM_CIRCUIT_OPTIMIZED = """You are an expert quantum circuit designer. Generate OpenQASM 2.0 circuits that are:
|
| 21 |
+
1. MINIMAL - use fewest possible gates
|
| 22 |
+
2. CORRECT - solve the specific problem
|
| 23 |
+
3. OPTIMAL - prefer lower depth and fewer two-qubit gates
|
| 24 |
+
|
| 25 |
+
CRITICAL CONSTRAINTS:
|
| 26 |
+
- Do NOT add measurement operations unless explicitly requested
|
| 27 |
+
- Do NOT use extra qubits beyond what the problem requires
|
| 28 |
+
- Do NOT add arbitrary gates (be precise)
|
| 29 |
+
- Prefer single-qubit gates over two-qubit gates
|
| 30 |
+
- Minimize circuit depth
|
| 31 |
+
|
| 32 |
+
PROBLEM: {problem_statement}
|
| 33 |
+
|
| 34 |
+
EXPECTED OUTPUT:
|
| 35 |
+
- Exactly {min_qubits} qubits (may use up to {max_qubits} if needed, but justify)
|
| 36 |
+
- Maximum {max_depth} gate depth {if max_depth else "(if applicable)"}
|
| 37 |
+
- Only gates in: {required_gates}
|
| 38 |
+
- Avoid gates: {forbidden_gates if forbidden_gates else "none"}
|
| 39 |
+
|
| 40 |
+
SOLUTION APPROACH:
|
| 41 |
+
1. Understand what quantum state/operation is needed
|
| 42 |
+
2. Choose the minimal gate sequence
|
| 43 |
+
3. Verify the gates are available
|
| 44 |
+
4. Return ONLY the QASM code
|
| 45 |
+
|
| 46 |
+
Return the complete OpenQASM 2.0 circuit wrapped in code blocks.
|
| 47 |
+
Format:
|
| 48 |
+
```qasm
|
| 49 |
+
OPENQASM 2.0;
|
| 50 |
+
include "qelib1.inc";
|
| 51 |
+
[Your circuit here]
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
Remember: Simplicity and correctness first, optimization second."""
|
| 55 |
+
|
| 56 |
+
# =============================================================================
|
| 57 |
+
# ENHANCED QUANTUM CIRCUIT GENERATION (WITH OPTIMIZATION HINTS)
|
| 58 |
+
# =============================================================================
|
| 59 |
+
|
| 60 |
+
QUANTUM_CIRCUIT_OPTIMIZED_V2 = """You are an expert quantum circuit designer with deep knowledge of quantum gate theory and optimization.
|
| 61 |
+
|
| 62 |
+
TASK: Generate an OpenQASM 2.0 quantum circuit that solves the following problem.
|
| 63 |
+
|
| 64 |
+
PROBLEM: {problem_statement}
|
| 65 |
+
|
| 66 |
+
DESIGN REQUIREMENTS:
|
| 67 |
+
✓ Use exactly {min_qubits} qubit(s)
|
| 68 |
+
✓ Keep depth ≤ {max_depth if max_depth else "minimal"}
|
| 69 |
+
✓ Only use these gates: {required_gates}
|
| 70 |
+
✓ Do NOT use: {forbidden_gates if forbidden_gates else "none"}
|
| 71 |
+
|
| 72 |
+
CRITICAL RULES (must follow):
|
| 73 |
+
1. NO measurement operations unless explicitly required
|
| 74 |
+
2. NO extra qubits - use only what's needed
|
| 75 |
+
3. NO unnecessary gates - every gate serves a purpose
|
| 76 |
+
4. Prefer H, X, Z, CX over complex multi-qubit gates
|
| 77 |
+
5. Gate cancellations (e.g., X·X = I) are encouraged
|
| 78 |
+
|
| 79 |
+
OPTIMIZATION GUIDANCE:
|
| 80 |
+
- Minimize depth: Each qubit layer should have parallel operations where possible
|
| 81 |
+
- Minimize two-qubit gates: These are most expensive
|
| 82 |
+
- Look for identities: XX=I, ZZ=I, HZH=X, HXH=Z, etc.
|
| 83 |
+
- Consider what state you're creating, not just what gates to apply
|
| 84 |
+
|
| 85 |
+
SOLUTION CHECKLIST:
|
| 86 |
+
Before generating the circuit, think through:
|
| 87 |
+
1. What is the target quantum state? (e.g., |+⟩, |Φ+⟩, etc.)
|
| 88 |
+
2. What's the minimal gate sequence to create it?
|
| 89 |
+
3. Can any gates be combined or cancelled?
|
| 90 |
+
4. Is the depth truly minimal?
|
| 91 |
+
|
| 92 |
+
OUTPUT FORMAT:
|
| 93 |
+
Return ONLY the OpenQASM 2.0 code in a code block:
|
| 94 |
+
|
| 95 |
+
```qasm
|
| 96 |
+
OPENQASM 2.0;
|
| 97 |
+
include "qelib1.inc";
|
| 98 |
+
qreg q[{min_qubits}];
|
| 99 |
+
[Your gates here]
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
Do NOT include explanations, do NOT include measurements, do NOT use extra qubits."""
|
| 103 |
+
|
| 104 |
+
# =============================================================================
|
| 105 |
+
# SPECIALIZED PROMPTS FOR PROBLEM CATEGORIES
|
| 106 |
+
# =============================================================================
|
| 107 |
+
|
| 108 |
+
STATE_PREPARATION_PROMPT = """You are designing a quantum state preparation circuit.
|
| 109 |
+
|
| 110 |
+
PROBLEM: {problem_statement}
|
| 111 |
+
|
| 112 |
+
Your goal is to transform the initial state |0...0⟩ into the target quantum state.
|
| 113 |
+
|
| 114 |
+
TARGET STATE: {expected_states}
|
| 115 |
+
|
| 116 |
+
GATES AVAILABLE: {required_gates}
|
| 117 |
+
|
| 118 |
+
KEY INSIGHTS FOR STATE PREP:
|
| 119 |
+
- Hadamard (H) creates superposition: H|0⟩ = (|0⟩ + |1⟩)/√2
|
| 120 |
+
- Pauli-X flips: X|0⟩ = |1⟩, X|1⟩ = |0⟩
|
| 121 |
+
- Pauli-Z adds phase: Z|1⟩ = -|1⟩
|
| 122 |
+
- Phase flip: |−⟩ = (|0⟩ - |1⟩)/√2 requires X then H
|
| 123 |
+
- Bell states need H on first qubit, then CX
|
| 124 |
+
|
| 125 |
+
SOLUTION:
|
| 126 |
+
Return the minimal OpenQASM circuit:
|
| 127 |
+
|
| 128 |
+
```qasm
|
| 129 |
+
OPENQASM 2.0;
|
| 130 |
+
include "qelib1.inc";
|
| 131 |
+
qreg q[{min_qubits}];
|
| 132 |
+
[Your gates here]
|
| 133 |
+
```"""
|
| 134 |
+
|
| 135 |
+
ENTANGLEMENT_PROMPT = """You are designing an entanglement circuit.
|
| 136 |
+
|
| 137 |
+
PROBLEM: {problem_statement}
|
| 138 |
+
|
| 139 |
+
Your goal is to create entanglement between qubits.
|
| 140 |
+
|
| 141 |
+
TARGET: {expected_states}
|
| 142 |
+
|
| 143 |
+
ENTANGLEMENT FACTS:
|
| 144 |
+
- Bell state |Φ+�� = (|00⟩ + |11⟩)/√2 requires: H on qubit 0, CX from 0→1
|
| 145 |
+
- Bell state |Φ-⟩ = (|00⟩ - |11⟩)/√2 requires: X on qubit 0, H on qubit 0, CX from 0→1
|
| 146 |
+
- GHZ state |GHZ⟩ = (|000⟩ + |111⟩)/√2 needs H on first, two CXs
|
| 147 |
+
- Entanglement requires multi-qubit gates (CX/CNOT)
|
| 148 |
+
|
| 149 |
+
SOLUTION:
|
| 150 |
+
Return the minimal OpenQASM circuit:
|
| 151 |
+
|
| 152 |
+
```qasm
|
| 153 |
+
OPENQASM 2.0;
|
| 154 |
+
include "qelib1.inc";
|
| 155 |
+
qreg q[{min_qubits}];
|
| 156 |
+
[Your gates here]
|
| 157 |
+
```"""
|
| 158 |
+
|
| 159 |
+
ALGORITHM_PROMPT = """You are implementing a quantum algorithm.
|
| 160 |
+
|
| 161 |
+
PROBLEM: {problem_statement}
|
| 162 |
+
|
| 163 |
+
ALGORITHM STRUCTURE:
|
| 164 |
+
{problem_statement}
|
| 165 |
+
|
| 166 |
+
KEY ALGORITHM COMPONENTS:
|
| 167 |
+
- Prepare superposition (usually with Hadamard)
|
| 168 |
+
- Apply oracle (function evaluation)
|
| 169 |
+
- Apply diffusion/phase flip (algorithm-specific)
|
| 170 |
+
- Measure result
|
| 171 |
+
|
| 172 |
+
SOLUTION:
|
| 173 |
+
Return the complete OpenQASM circuit:
|
| 174 |
+
|
| 175 |
+
```qasm
|
| 176 |
+
OPENQASM 2.0;
|
| 177 |
+
include "qelib1.inc";
|
| 178 |
+
qreg q[{min_qubits}];
|
| 179 |
+
[Your gates here]
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
Focus on correctness of the algorithm structure over minimal gate count."""
|
| 183 |
+
|
| 184 |
+
# =============================================================================
|
| 185 |
+
# GATE SYNTHESIS / DECOMPOSITION
|
| 186 |
+
# =============================================================================
|
| 187 |
+
|
| 188 |
+
GATE_SYNTHESIS_PROMPT = """You are decomposing a complex quantum gate into basic gates.
|
| 189 |
+
|
| 190 |
+
PROBLEM: {problem_statement}
|
| 191 |
+
|
| 192 |
+
TARGET GATE: {goal}
|
| 193 |
+
|
| 194 |
+
DECOMPOSITION FACTS:
|
| 195 |
+
- SWAP gate = 3 CX gates (CX a→b, CX b→a, CX a→b)
|
| 196 |
+
- CZ gate = H on target, CX, H on target
|
| 197 |
+
- Y gate = S·X·S†
|
| 198 |
+
- T gate = rotation by π/8 around Z-axis
|
| 199 |
+
- Rx(θ) = H·Rz(θ)·H (where applicable)
|
| 200 |
+
|
| 201 |
+
CONSTRAINTS:
|
| 202 |
+
- Only use: {required_gates}
|
| 203 |
+
- Avoid: {forbidden_gates if forbidden_gates else "none"}
|
| 204 |
+
- Minimize gate count and depth
|
| 205 |
+
|
| 206 |
+
SOLUTION:
|
| 207 |
+
Return the decomposed OpenQASM circuit:
|
| 208 |
+
|
| 209 |
+
```qasm
|
| 210 |
+
OPENQASM 2.0;
|
| 211 |
+
include "qelib1.inc";
|
| 212 |
+
qreg q[{min_qubits}];
|
| 213 |
+
[Your decomposition here]
|
| 214 |
+
```"""
|
| 215 |
+
|
| 216 |
+
# =============================================================================
|
| 217 |
+
# HELPER FUNCTION: FORMAT PROMPT FOR PROBLEM
|
| 218 |
+
# =============================================================================
|
| 219 |
+
|
| 220 |
+
def get_optimized_prompt(problem, use_advanced=True):
|
| 221 |
+
"""Generate optimized prompt for a problem.
|
| 222 |
+
|
| 223 |
+
Args:
|
| 224 |
+
problem: TestProblem instance
|
| 225 |
+
use_advanced: Use advanced V2 prompt with optimization hints
|
| 226 |
+
|
| 227 |
+
Returns:
|
| 228 |
+
Formatted prompt string
|
| 229 |
+
"""
|
| 230 |
+
template = QUANTUM_CIRCUIT_OPTIMIZED_V2 if use_advanced else QUANTUM_CIRCUIT_OPTIMIZED
|
| 231 |
+
|
| 232 |
+
expected = problem.expected
|
| 233 |
+
|
| 234 |
+
# Determine required and forbidden gates
|
| 235 |
+
required_gates = expected.required_gates if expected.required_gates else ["h", "x", "z", "cx", "measure"]
|
| 236 |
+
forbidden_gates = expected.forbidden_gates if expected.forbidden_gates else []
|
| 237 |
+
|
| 238 |
+
# Format the prompt
|
| 239 |
+
prompt = template.format(
|
| 240 |
+
problem_statement=problem.prompt,
|
| 241 |
+
min_qubits=expected.min_qubits,
|
| 242 |
+
max_qubits=expected.max_qubits,
|
| 243 |
+
max_depth=expected.max_depth or "minimal",
|
| 244 |
+
required_gates=", ".join(required_gates),
|
| 245 |
+
forbidden_gates=", ".join(forbidden_gates) if forbidden_gates else "none",
|
| 246 |
+
expected_states=problem.expected.expected_states if hasattr(problem.expected, 'expected_states') else "N/A"
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
return prompt
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def get_specialized_prompt(problem, use_advanced=True):
|
| 253 |
+
"""Generate specialized prompt based on problem category.
|
| 254 |
+
|
| 255 |
+
Args:
|
| 256 |
+
problem: TestProblem instance
|
| 257 |
+
use_advanced: Use advanced optimization hints
|
| 258 |
+
|
| 259 |
+
Returns:
|
| 260 |
+
Formatted prompt string
|
| 261 |
+
"""
|
| 262 |
+
from tests.test_problems import ProblemCategory
|
| 263 |
+
|
| 264 |
+
category_prompts = {
|
| 265 |
+
ProblemCategory.STATE_PREPARATION: STATE_PREPARATION_PROMPT,
|
| 266 |
+
ProblemCategory.GATE_SYNTHESIS: GATE_SYNTHESIS_PROMPT,
|
| 267 |
+
ProblemCategory.ALGORITHM: ALGORITHM_PROMPT,
|
| 268 |
+
ProblemCategory.ERROR_CORRECTION: QUANTUM_CIRCUIT_OPTIMIZED_V2,
|
| 269 |
+
ProblemCategory.OPTIMIZATION: QUANTUM_CIRCUIT_OPTIMIZED_V2,
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
template = category_prompts.get(problem.category, QUANTUM_CIRCUIT_OPTIMIZED_V2)
|
| 273 |
+
|
| 274 |
+
expected = problem.expected
|
| 275 |
+
required_gates = expected.required_gates if expected.required_gates else ["h", "x", "z", "cx"]
|
| 276 |
+
forbidden_gates = expected.forbidden_gates if expected.forbidden_gates else []
|
| 277 |
+
|
| 278 |
+
prompt = template.format(
|
| 279 |
+
problem_statement=problem.prompt,
|
| 280 |
+
goal=problem.name,
|
| 281 |
+
min_qubits=expected.min_qubits,
|
| 282 |
+
max_qubits=expected.max_qubits,
|
| 283 |
+
max_depth=expected.max_depth or "minimal",
|
| 284 |
+
required_gates=", ".join(required_gates),
|
| 285 |
+
forbidden_gates=", ".join(forbidden_gates) if forbidden_gates else "none",
|
| 286 |
+
expected_states=problem.expected.expected_states if hasattr(problem.expected, 'expected_states') else "N/A"
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
return prompt
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
google-genai>=0.6.0
|
| 2 |
+
litellm>=1.42.0
|
| 3 |
+
requests>=2.31.0
|
| 4 |
+
python-dotenv>=1.0.0
|
| 5 |
+
pydantic>=2.0.0
|
| 6 |
+
gradio>=4.0.0
|
tasks-project-state.json
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"project": "QAgents-Workflows",
|
| 3 |
+
"version": "0.8.0",
|
| 4 |
+
"description": "Multi-agent quantum circuit optimization system with multi-model fallback",
|
| 5 |
+
"last_updated": "2024-11-28",
|
| 6 |
+
"status": "BLACKBOARD_FIXED_QUASAR_ADDED",
|
| 7 |
+
"notes": "Fixed BLACKBOARD NoneType errors. Added QUASAR orchestrator with tiered verification. Added VERY_HARD problems. Mini test shows NAKED 3.3s and BLACKBOARD 15s both pass EASY.",
|
| 8 |
+
|
| 9 |
+
"comprehensive_test_results": {
|
| 10 |
+
"test_date": "2024-11-29",
|
| 11 |
+
"previous_results": {
|
| 12 |
+
"naked": {"success": "9/9 (100%)", "avg_time_ms": 3929},
|
| 13 |
+
"guided": {"success": "7/9 (78%)", "avg_time_ms": 23120},
|
| 14 |
+
"blackboard": {"success": "2/9 (22%)", "avg_time_ms": 13507}
|
| 15 |
+
},
|
| 16 |
+
"latest_test_20241129": {
|
| 17 |
+
"problem": "HARD - Deutsch Algorithm",
|
| 18 |
+
"naked": {"success": true, "time_ms": 3914, "gates": 5},
|
| 19 |
+
"quasar": {"success": true, "time_ms": 7254, "gates": 5},
|
| 20 |
+
"hybrid": {"success": true, "time_ms": 7181, "gates": 5},
|
| 21 |
+
"blackboard": {"success": true, "time_ms": 20915, "gates": 2},
|
| 22 |
+
"result": "ALL 4 MODES PASSED"
|
| 23 |
+
},
|
| 24 |
+
"very_hard_test": {
|
| 25 |
+
"problem": "VERY_HARD - 4-Qubit QFT",
|
| 26 |
+
"naked": {"success": true, "time_ms": 4473, "gates": 12},
|
| 27 |
+
"quasar": {"success": true, "time_ms": 7811, "gates": 12},
|
| 28 |
+
"hybrid": "interrupted - rate limiting",
|
| 29 |
+
"blackboard": "interrupted - rate limiting"
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
|
| 33 |
+
"fixes_applied_20241128": {
|
| 34 |
+
"blackboard_null_safety": {
|
| 35 |
+
"file": "orchestrators/orchestrator.py",
|
| 36 |
+
"changes": ["Added try/except in agent execution loop", "Added null-checking for action and result"]
|
| 37 |
+
},
|
| 38 |
+
"llm_adapter_null_safety": {
|
| 39 |
+
"file": "agents/llm_adapter.py",
|
| 40 |
+
"changes": ["Fixed response.text None handling", "Fixed _estimate_tokens with null-safe len()"]
|
| 41 |
+
}
|
| 42 |
+
},
|
| 43 |
+
|
| 44 |
+
"new_orchestrators": {
|
| 45 |
+
"quasar": {
|
| 46 |
+
"file": "orchestrators/quasar_orchestrator.py",
|
| 47 |
+
"description": "Tiered verification orchestrator (QUASAR-lite)",
|
| 48 |
+
"tiers": [
|
| 49 |
+
"Tier 1: Syntax validation via MCP",
|
| 50 |
+
"Tier 2: Circuit analysis (depth, gates)",
|
| 51 |
+
"Tier 3: Simulation verification",
|
| 52 |
+
"Tier 4: Semantic correctness"
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
+
"hybrid": {
|
| 56 |
+
"description": "NAKED first, QUASAR fallback on failure"
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
|
| 60 |
+
"new_problems": {
|
| 61 |
+
"very_hard_difficulty": [
|
| 62 |
+
"4-Qubit QFT",
|
| 63 |
+
"5-Qubit Entanglement Chain",
|
| 64 |
+
"Simon's Algorithm (2-bit)",
|
| 65 |
+
"Quantum Adder (1+1=10)"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
|
| 69 |
+
"model_cascade": {
|
| 70 |
+
"preferred_model": "gemini-2.5-flash-lite",
|
| 71 |
+
"models": [
|
| 72 |
+
{"name": "gemma-3-27b-it", "rpd": 14400, "priority": 1},
|
| 73 |
+
{"name": "gemini-2.5-flash-lite", "rpd": 1000, "priority": 2, "default": true},
|
| 74 |
+
{"name": "gemini-2.5-flash", "rpd": 250, "priority": 3},
|
| 75 |
+
{"name": "gemini-2.0-flash", "rpd": 200, "priority": 4},
|
| 76 |
+
{"name": "gemini-2.5-pro", "rpd": 50, "priority": 5}
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
|
| 80 |
+
"architectures": {
|
| 81 |
+
"naked": {
|
| 82 |
+
"description": "Direct LLM-to-QASM generation",
|
| 83 |
+
"status": "PRODUCTION_READY",
|
| 84 |
+
"success_rate": "100%",
|
| 85 |
+
"recommended": true
|
| 86 |
+
},
|
| 87 |
+
"guided": {
|
| 88 |
+
"description": "4-agent pipeline (Analyzer, Designer, Generator, Validator)",
|
| 89 |
+
"status": "DEPRECATED",
|
| 90 |
+
"success_rate": "78%",
|
| 91 |
+
"note": "Replaced by QUASAR"
|
| 92 |
+
},
|
| 93 |
+
"blackboard": {
|
| 94 |
+
"description": "Event-driven multi-agent blackboard",
|
| 95 |
+
"status": "FIXED",
|
| 96 |
+
"success_rate": "~100% (needs full retest)",
|
| 97 |
+
"note": "NoneType errors fixed, ~5x slower than NAKED"
|
| 98 |
+
},
|
| 99 |
+
"quasar": {
|
| 100 |
+
"description": "Tiered verification with MCP tools",
|
| 101 |
+
"status": "NEW",
|
| 102 |
+
"file": "orchestrators/quasar_orchestrator.py"
|
| 103 |
+
},
|
| 104 |
+
"hybrid": {
|
| 105 |
+
"description": "NAKED first, QUASAR fallback",
|
| 106 |
+
"status": "NEW"
|
| 107 |
+
}
|
| 108 |
+
},
|
| 109 |
+
|
| 110 |
+
"new_files_created": [
|
| 111 |
+
{"file": "prompts/optimized_prompts.py", "purpose": "Enhanced prompts for NAKED mode"},
|
| 112 |
+
{"file": "orchestrators/router.py", "purpose": "Difficulty-aware orchestrator selection"},
|
| 113 |
+
{"file": "tests/comprehensive_test.py", "purpose": "Full diagnostic test script"},
|
| 114 |
+
{"file": "docs/COMPREHENSIVE_TEST_ANALYSIS.md", "purpose": "Analysis of all test results"},
|
| 115 |
+
{"file": "docs/STRATEGIC_IMPROVEMENTS.md", "purpose": "Improvement roadmap based on findings"},
|
| 116 |
+
{"file": "docs/PROJECT_ANALYSIS_20251128.md", "purpose": "Deep project analysis"}
|
| 117 |
+
],
|
| 118 |
+
|
| 119 |
+
"recommendations": {
|
| 120 |
+
"immediate": [
|
| 121 |
+
"Adopt NAKED mode for production - 100% success, fastest, most efficient",
|
| 122 |
+
"Fix BLACKBOARD null-checking or deprecate entirely",
|
| 123 |
+
"Integrate optimized_prompts.py into NAKED orchestrator"
|
| 124 |
+
],
|
| 125 |
+
"short_term": [
|
| 126 |
+
"Add circuit quality scoring beyond gate count",
|
| 127 |
+
"Improve GUIDED generator for hard problems",
|
| 128 |
+
"Implement hybrid: NAKED first, GUIDED on failure"
|
| 129 |
+
],
|
| 130 |
+
"long_term": [
|
| 131 |
+
"Auto-select mode based on problem difficulty",
|
| 132 |
+
"MCP validation integration for correctness verification",
|
| 133 |
+
"Cost-aware orchestrator selection"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
|
| 137 |
+
"usage": {
|
| 138 |
+
"prerequisites": [
|
| 139 |
+
"Start MCP server: python QuantumArchitect-MCP/app.py",
|
| 140 |
+
"Set GOOGLE_API_KEY environment variable",
|
| 141 |
+
"Activate venv: & .venv/Scripts/Activate.ps1"
|
| 142 |
+
],
|
| 143 |
+
"commands": {
|
| 144 |
+
"comprehensive_test": "python tests/comprehensive_test.py",
|
| 145 |
+
"quality_eval": "python tests/run_quality_eval.py --mode all --difficulty all",
|
| 146 |
+
"quick_test": "python tests/run_quality_eval.py --quick"
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
}
|
tests/__init__.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests module: Test problems and evaluation harness."""
|
| 2 |
+
|
| 3 |
+
from .test_problems import (
|
| 4 |
+
ProblemDifficulty,
|
| 5 |
+
ProblemCategory,
|
| 6 |
+
ExpectedOutput,
|
| 7 |
+
TestProblem,
|
| 8 |
+
# Problems by ID naming
|
| 9 |
+
PROBLEM_E1_PHASE_FLIP,
|
| 10 |
+
PROBLEM_E2_CONTROLLED_NOT,
|
| 11 |
+
PROBLEM_E3_MEASUREMENT_BASIS,
|
| 12 |
+
PROBLEM_M1_SWAP_DECOMPOSITION,
|
| 13 |
+
PROBLEM_M2_CONTROLLED_Z,
|
| 14 |
+
PROBLEM_M3_PHASE_ESTIMATION_PREP,
|
| 15 |
+
PROBLEM_H1_DEUTSCH,
|
| 16 |
+
PROBLEM_H2_GROVER_2QUBIT,
|
| 17 |
+
PROBLEM_H3_TELEPORTATION_PREP,
|
| 18 |
+
# Collections
|
| 19 |
+
EASY_PROBLEMS,
|
| 20 |
+
MEDIUM_PROBLEMS,
|
| 21 |
+
HARD_PROBLEMS,
|
| 22 |
+
ALL_PROBLEMS,
|
| 23 |
+
get_problem,
|
| 24 |
+
get_problems_by_difficulty,
|
| 25 |
+
get_problems_by_category,
|
| 26 |
+
get_problems_by_tag,
|
| 27 |
+
get_research_problem_set
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
from .evaluation_harness import (
|
| 31 |
+
MetricResult,
|
| 32 |
+
CostMetrics,
|
| 33 |
+
EvaluationResult,
|
| 34 |
+
AggregatedResults,
|
| 35 |
+
EvaluationHarness
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
from .circuit_quality_analyzer import (
|
| 39 |
+
CircuitQualityAnalyzer,
|
| 40 |
+
AnalysisResult,
|
| 41 |
+
get_analyzer
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
from .quality_evaluation_harness import (
|
| 45 |
+
QualityEvaluationHarness,
|
| 46 |
+
run_quick_quality_test
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Backward compatibility aliases
|
| 50 |
+
BELL_STATE_PROBLEM = PROBLEM_E2_CONTROLLED_NOT # Bell state is easy_002
|
| 51 |
+
|
| 52 |
+
__all__ = [
|
| 53 |
+
"ProblemDifficulty",
|
| 54 |
+
"ProblemCategory",
|
| 55 |
+
"ExpectedOutput",
|
| 56 |
+
"TestProblem",
|
| 57 |
+
"PROBLEM_E1_PHASE_FLIP",
|
| 58 |
+
"PROBLEM_E2_CONTROLLED_NOT",
|
| 59 |
+
"PROBLEM_E3_MEASUREMENT_BASIS",
|
| 60 |
+
"PROBLEM_M1_SWAP_DECOMPOSITION",
|
| 61 |
+
"PROBLEM_M2_CONTROLLED_Z",
|
| 62 |
+
"PROBLEM_M3_PHASE_ESTIMATION_PREP",
|
| 63 |
+
"PROBLEM_H1_DEUTSCH",
|
| 64 |
+
"PROBLEM_H2_GROVER_2QUBIT",
|
| 65 |
+
"PROBLEM_H3_TELEPORTATION_PREP",
|
| 66 |
+
"EASY_PROBLEMS",
|
| 67 |
+
"MEDIUM_PROBLEMS",
|
| 68 |
+
"HARD_PROBLEMS",
|
| 69 |
+
"ALL_PROBLEMS",
|
| 70 |
+
"get_problem",
|
| 71 |
+
"get_problems_by_difficulty",
|
| 72 |
+
"get_problems_by_category",
|
| 73 |
+
"get_problems_by_tag",
|
| 74 |
+
"get_research_problem_set",
|
| 75 |
+
"MetricResult",
|
| 76 |
+
"CostMetrics",
|
| 77 |
+
"EvaluationResult",
|
| 78 |
+
"AggregatedResults",
|
| 79 |
+
"EvaluationHarness",
|
| 80 |
+
"BELL_STATE_PROBLEM",
|
| 81 |
+
# Quality analysis
|
| 82 |
+
"CircuitQualityAnalyzer",
|
| 83 |
+
"AnalysisResult",
|
| 84 |
+
"get_analyzer",
|
| 85 |
+
"QualityEvaluationHarness",
|
| 86 |
+
"run_quick_quality_test"
|
| 87 |
+
]
|
tests/circuit_quality_analyzer.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/circuit_quality_analyzer.py
|
| 2 |
+
# Relations: Uses client/mcp_client.py for MCP calls, database/circuit_quality_db.py for storage
|
| 3 |
+
# Description: Analyzes circuit quality using MCP endpoints
|
| 4 |
+
# Extracts: depth, gate_count, cx_count, hardware_fitness, validation, simulation
|
| 5 |
+
# Returns QualityMetrics for storage in database
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Circuit Quality Analyzer: Use MCP endpoints to measure circuit quality.
|
| 9 |
+
This module connects to the MCP server and extracts quality metrics.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import re
|
| 13 |
+
import logging
|
| 14 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 15 |
+
from dataclasses import dataclass
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@dataclass
|
| 21 |
+
class AnalysisResult:
|
| 22 |
+
"""Result from analyzing a circuit."""
|
| 23 |
+
depth: int = 0
|
| 24 |
+
gate_count: int = 0
|
| 25 |
+
cx_count: int = 0
|
| 26 |
+
single_qubit_count: int = 0
|
| 27 |
+
hardware_fitness: float = 0.0
|
| 28 |
+
syntax_valid: bool = False
|
| 29 |
+
complexity_score: float = 0.0
|
| 30 |
+
state_correctness: float = 0.0
|
| 31 |
+
noise_estimate: float = 0.0
|
| 32 |
+
probabilities: Dict[str, float] = None
|
| 33 |
+
errors: List[str] = None
|
| 34 |
+
|
| 35 |
+
def __post_init__(self):
|
| 36 |
+
if self.probabilities is None:
|
| 37 |
+
self.probabilities = {}
|
| 38 |
+
if self.errors is None:
|
| 39 |
+
self.errors = []
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class CircuitQualityAnalyzer:
|
| 43 |
+
"""
|
| 44 |
+
Analyzes circuit quality using MCP endpoints.
|
| 45 |
+
Connects to the running MCP server to get quality metrics.
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(self, mcp_url: str = "http://127.0.0.1:7861"):
|
| 49 |
+
self.mcp_url = mcp_url
|
| 50 |
+
self._client = None
|
| 51 |
+
|
| 52 |
+
def _get_client(self):
|
| 53 |
+
"""Get or create MCP client."""
|
| 54 |
+
if self._client is None:
|
| 55 |
+
try:
|
| 56 |
+
from client import get_client
|
| 57 |
+
self._client = get_client(self.mcp_url)
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logger.error(f"Failed to get MCP client: {e}")
|
| 60 |
+
return None
|
| 61 |
+
return self._client
|
| 62 |
+
|
| 63 |
+
def _extract_value(self, result: Any, keys: List[str], default: Any = 0) -> Any:
|
| 64 |
+
"""Safely extract value from nested result."""
|
| 65 |
+
if result is None:
|
| 66 |
+
return default
|
| 67 |
+
|
| 68 |
+
if isinstance(result, (int, float, bool)):
|
| 69 |
+
return result
|
| 70 |
+
|
| 71 |
+
if isinstance(result, list):
|
| 72 |
+
return result[0] if result else default
|
| 73 |
+
|
| 74 |
+
if isinstance(result, dict):
|
| 75 |
+
for key in keys:
|
| 76 |
+
if key in result:
|
| 77 |
+
val = result[key]
|
| 78 |
+
if isinstance(val, (int, float)):
|
| 79 |
+
return val
|
| 80 |
+
elif isinstance(val, dict):
|
| 81 |
+
# Try common nested keys
|
| 82 |
+
for subkey in ['value', 'score', 'depth', 'count', 'result']:
|
| 83 |
+
if subkey in val:
|
| 84 |
+
return val[subkey]
|
| 85 |
+
elif isinstance(val, list):
|
| 86 |
+
return val[0] if val else default
|
| 87 |
+
return val
|
| 88 |
+
# Try first value in dict
|
| 89 |
+
for v in result.values():
|
| 90 |
+
if isinstance(v, (int, float)):
|
| 91 |
+
return v
|
| 92 |
+
|
| 93 |
+
return default
|
| 94 |
+
|
| 95 |
+
def analyze_circuit(self, qasm_code: str, expected_states: Dict[str, float] = None) -> AnalysisResult:
|
| 96 |
+
"""
|
| 97 |
+
Analyze a circuit using MCP endpoints.
|
| 98 |
+
|
| 99 |
+
Args:
|
| 100 |
+
qasm_code: The QASM code to analyze
|
| 101 |
+
expected_states: Expected probability distribution for correctness check
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
AnalysisResult with all quality metrics
|
| 105 |
+
"""
|
| 106 |
+
result = AnalysisResult()
|
| 107 |
+
|
| 108 |
+
if not qasm_code or not qasm_code.strip():
|
| 109 |
+
result.errors.append("Empty QASM code")
|
| 110 |
+
return result
|
| 111 |
+
|
| 112 |
+
client = self._get_client()
|
| 113 |
+
if client is None:
|
| 114 |
+
# Fallback to local analysis
|
| 115 |
+
return self._analyze_locally(qasm_code, expected_states)
|
| 116 |
+
|
| 117 |
+
# 1. Validate syntax
|
| 118 |
+
try:
|
| 119 |
+
resp = client.validate_syntax(qasm_code)
|
| 120 |
+
if resp.success:
|
| 121 |
+
valid = resp.data
|
| 122 |
+
if isinstance(valid, dict):
|
| 123 |
+
result.syntax_valid = valid.get('valid', False) or valid.get('is_valid', False)
|
| 124 |
+
elif isinstance(valid, bool):
|
| 125 |
+
result.syntax_valid = valid
|
| 126 |
+
elif isinstance(valid, list):
|
| 127 |
+
result.syntax_valid = "valid" in str(valid).lower()
|
| 128 |
+
else:
|
| 129 |
+
result.syntax_valid = bool(valid)
|
| 130 |
+
else:
|
| 131 |
+
result.errors.append(f"Validation error: {resp.error}")
|
| 132 |
+
except Exception as e:
|
| 133 |
+
result.errors.append(f"Validation failed: {e}")
|
| 134 |
+
# Still try to parse locally
|
| 135 |
+
result.syntax_valid = "OPENQASM" in qasm_code and "qreg" in qasm_code
|
| 136 |
+
|
| 137 |
+
# 2. Analyze circuit structure
|
| 138 |
+
try:
|
| 139 |
+
resp = client.analyze_circuit(qasm_code)
|
| 140 |
+
if resp.success and resp.data:
|
| 141 |
+
data = resp.data
|
| 142 |
+
if isinstance(data, dict):
|
| 143 |
+
result.depth = self._extract_value(data, ['depth', 'circuit_depth'], 0)
|
| 144 |
+
result.gate_count = self._extract_value(data, ['gate_count', 'gates', 'num_gates', 'total_gates'], 0)
|
| 145 |
+
result.cx_count = self._extract_value(data, ['cx_count', 'cnot_count', 'two_qubit_gates'], 0)
|
| 146 |
+
result.single_qubit_count = self._extract_value(data, ['single_qubit_count', 'single_qubit_gates', 'one_qubit_gates'], 0)
|
| 147 |
+
except Exception as e:
|
| 148 |
+
result.errors.append(f"Analysis failed: {e}")
|
| 149 |
+
# Fallback to local parsing
|
| 150 |
+
local = self._parse_qasm_locally(qasm_code)
|
| 151 |
+
result.depth = local.get('depth', 0)
|
| 152 |
+
result.gate_count = local.get('gate_count', 0)
|
| 153 |
+
result.cx_count = local.get('cx_count', 0)
|
| 154 |
+
result.single_qubit_count = local.get('single_qubit_count', 0)
|
| 155 |
+
|
| 156 |
+
# 3. Get circuit depth if not already set
|
| 157 |
+
if result.depth == 0:
|
| 158 |
+
try:
|
| 159 |
+
resp = client.get_circuit_depth(qasm_code)
|
| 160 |
+
if resp.success:
|
| 161 |
+
result.depth = self._extract_value(resp.data, ['depth', 'value'], 0)
|
| 162 |
+
except Exception as e:
|
| 163 |
+
result.errors.append(f"Depth check failed: {e}")
|
| 164 |
+
|
| 165 |
+
# 4. Calculate hardware fitness
|
| 166 |
+
try:
|
| 167 |
+
resp = client.calculate_hardware_fitness(qasm_code, "ibm_brisbane")
|
| 168 |
+
if resp.success:
|
| 169 |
+
result.hardware_fitness = self._extract_value(resp.data,
|
| 170 |
+
['fitness', 'fitness_score', 'hardware_fitness', 'score'], 0.0)
|
| 171 |
+
if result.hardware_fitness > 1.0:
|
| 172 |
+
result.hardware_fitness = result.hardware_fitness / 100.0
|
| 173 |
+
except Exception as e:
|
| 174 |
+
result.errors.append(f"Hardware fitness failed: {e}")
|
| 175 |
+
|
| 176 |
+
# 5. Calculate complexity
|
| 177 |
+
try:
|
| 178 |
+
resp = client.calculate_complexity_score(qasm_code)
|
| 179 |
+
if resp.success:
|
| 180 |
+
result.complexity_score = self._extract_value(resp.data,
|
| 181 |
+
['complexity', 'complexity_score', 'score', 'total'], 0.0)
|
| 182 |
+
except Exception as e:
|
| 183 |
+
result.errors.append(f"Complexity check failed: {e}")
|
| 184 |
+
|
| 185 |
+
# 6. Get probabilities and check correctness
|
| 186 |
+
try:
|
| 187 |
+
resp = client.get_probabilities(qasm_code)
|
| 188 |
+
if resp.success and resp.data:
|
| 189 |
+
probs = resp.data
|
| 190 |
+
if isinstance(probs, dict):
|
| 191 |
+
result.probabilities = probs
|
| 192 |
+
if expected_states:
|
| 193 |
+
result.state_correctness = self._check_correctness(probs, expected_states)
|
| 194 |
+
else:
|
| 195 |
+
# No expected states - assume 100% if circuit runs
|
| 196 |
+
result.state_correctness = 1.0
|
| 197 |
+
except Exception as e:
|
| 198 |
+
result.errors.append(f"Probability check failed: {e}")
|
| 199 |
+
if expected_states is None:
|
| 200 |
+
result.state_correctness = 0.8 # Partial credit if other metrics pass
|
| 201 |
+
|
| 202 |
+
# 7. Estimate noise
|
| 203 |
+
try:
|
| 204 |
+
resp = client.estimate_noise(qasm_code, "ibm_brisbane")
|
| 205 |
+
if resp.success:
|
| 206 |
+
result.noise_estimate = self._extract_value(resp.data,
|
| 207 |
+
['noise', 'noise_estimate', 'error_rate', 'fidelity'], 0.0)
|
| 208 |
+
except Exception as e:
|
| 209 |
+
result.errors.append(f"Noise estimation failed: {e}")
|
| 210 |
+
|
| 211 |
+
return result
|
| 212 |
+
|
| 213 |
+
def _analyze_locally(self, qasm_code: str, expected_states: Dict[str, float] = None) -> AnalysisResult:
|
| 214 |
+
"""Fallback local analysis when MCP is unavailable."""
|
| 215 |
+
result = AnalysisResult()
|
| 216 |
+
|
| 217 |
+
# Basic syntax check
|
| 218 |
+
result.syntax_valid = "OPENQASM" in qasm_code and "qreg" in qasm_code
|
| 219 |
+
|
| 220 |
+
# Parse gates
|
| 221 |
+
local = self._parse_qasm_locally(qasm_code)
|
| 222 |
+
result.depth = local.get('depth', 0)
|
| 223 |
+
result.gate_count = local.get('gate_count', 0)
|
| 224 |
+
result.cx_count = local.get('cx_count', 0)
|
| 225 |
+
result.single_qubit_count = local.get('single_qubit_count', 0)
|
| 226 |
+
|
| 227 |
+
# Estimate hardware fitness based on structure
|
| 228 |
+
if result.gate_count > 0:
|
| 229 |
+
# Penalize high CX ratio
|
| 230 |
+
cx_ratio = result.cx_count / result.gate_count
|
| 231 |
+
result.hardware_fitness = max(0.0, 1.0 - cx_ratio * 0.5)
|
| 232 |
+
|
| 233 |
+
# Complexity estimate
|
| 234 |
+
result.complexity_score = result.depth + result.cx_count * 2
|
| 235 |
+
|
| 236 |
+
# State correctness if syntax valid
|
| 237 |
+
if result.syntax_valid:
|
| 238 |
+
result.state_correctness = 0.7 # Partial credit
|
| 239 |
+
|
| 240 |
+
result.errors.append("Used local fallback analysis")
|
| 241 |
+
return result
|
| 242 |
+
|
| 243 |
+
def _parse_qasm_locally(self, qasm_code: str) -> Dict[str, int]:
|
| 244 |
+
"""Parse QASM locally to extract gate counts."""
|
| 245 |
+
result = {
|
| 246 |
+
'depth': 0,
|
| 247 |
+
'gate_count': 0,
|
| 248 |
+
'cx_count': 0,
|
| 249 |
+
'single_qubit_count': 0
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
lines = qasm_code.strip().split('\n')
|
| 253 |
+
gate_depth_map = {} # qubit -> current depth
|
| 254 |
+
|
| 255 |
+
single_qubit_gates = ['h', 'x', 'y', 'z', 's', 't', 'sdg', 'tdg', 'rx', 'ry', 'rz', 'u1', 'u2', 'u3']
|
| 256 |
+
two_qubit_gates = ['cx', 'cz', 'swap', 'cp', 'crz', 'cnot']
|
| 257 |
+
|
| 258 |
+
for line in lines:
|
| 259 |
+
line = line.strip().lower()
|
| 260 |
+
if not line or line.startswith('//') or line.startswith('openqasm') or line.startswith('include'):
|
| 261 |
+
continue
|
| 262 |
+
if line.startswith('qreg') or line.startswith('creg') or line.startswith('measure') or line.startswith('barrier'):
|
| 263 |
+
continue
|
| 264 |
+
|
| 265 |
+
# Check for gates
|
| 266 |
+
for gate in single_qubit_gates:
|
| 267 |
+
if line.startswith(gate + ' ') or line.startswith(gate + '('):
|
| 268 |
+
result['single_qubit_count'] += 1
|
| 269 |
+
result['gate_count'] += 1
|
| 270 |
+
# Extract qubit
|
| 271 |
+
match = re.search(r'q\[(\d+)\]', line)
|
| 272 |
+
if match:
|
| 273 |
+
q = int(match.group(1))
|
| 274 |
+
gate_depth_map[q] = gate_depth_map.get(q, 0) + 1
|
| 275 |
+
break
|
| 276 |
+
|
| 277 |
+
for gate in two_qubit_gates:
|
| 278 |
+
if line.startswith(gate + ' '):
|
| 279 |
+
result['cx_count'] += 1
|
| 280 |
+
result['gate_count'] += 1
|
| 281 |
+
# Extract qubits
|
| 282 |
+
matches = re.findall(r'q\[(\d+)\]', line)
|
| 283 |
+
if matches:
|
| 284 |
+
for q in matches:
|
| 285 |
+
q = int(q)
|
| 286 |
+
gate_depth_map[q] = gate_depth_map.get(q, 0) + 1
|
| 287 |
+
break
|
| 288 |
+
|
| 289 |
+
if gate_depth_map:
|
| 290 |
+
result['depth'] = max(gate_depth_map.values())
|
| 291 |
+
|
| 292 |
+
return result
|
| 293 |
+
|
| 294 |
+
def _check_correctness(self, actual: Dict[str, float], expected: Dict[str, float]) -> float:
|
| 295 |
+
"""Check how close actual probabilities are to expected."""
|
| 296 |
+
if not expected:
|
| 297 |
+
return 1.0
|
| 298 |
+
|
| 299 |
+
total_error = 0.0
|
| 300 |
+
for state, exp_prob in expected.items():
|
| 301 |
+
act_prob = actual.get(state, 0.0)
|
| 302 |
+
total_error += abs(exp_prob - act_prob)
|
| 303 |
+
|
| 304 |
+
# Also check for unexpected states
|
| 305 |
+
for state, act_prob in actual.items():
|
| 306 |
+
if state not in expected and act_prob > 0.01:
|
| 307 |
+
total_error += act_prob
|
| 308 |
+
|
| 309 |
+
# Normalize (max error = 2.0)
|
| 310 |
+
correctness = max(0.0, 1.0 - total_error / 2.0)
|
| 311 |
+
return correctness
|
| 312 |
+
|
| 313 |
+
def compare_circuits(self, qasm1: str, qasm2: str) -> Dict[str, Any]:
|
| 314 |
+
"""Compare two circuits and return quality differences."""
|
| 315 |
+
result1 = self.analyze_circuit(qasm1)
|
| 316 |
+
result2 = self.analyze_circuit(qasm2)
|
| 317 |
+
|
| 318 |
+
return {
|
| 319 |
+
"circuit1": {
|
| 320 |
+
"depth": result1.depth,
|
| 321 |
+
"gate_count": result1.gate_count,
|
| 322 |
+
"cx_count": result1.cx_count,
|
| 323 |
+
"hardware_fitness": result1.hardware_fitness,
|
| 324 |
+
"syntax_valid": result1.syntax_valid
|
| 325 |
+
},
|
| 326 |
+
"circuit2": {
|
| 327 |
+
"depth": result2.depth,
|
| 328 |
+
"gate_count": result2.gate_count,
|
| 329 |
+
"cx_count": result2.cx_count,
|
| 330 |
+
"hardware_fitness": result2.hardware_fitness,
|
| 331 |
+
"syntax_valid": result2.syntax_valid
|
| 332 |
+
},
|
| 333 |
+
"comparison": {
|
| 334 |
+
"depth_diff": result2.depth - result1.depth,
|
| 335 |
+
"gate_diff": result2.gate_count - result1.gate_count,
|
| 336 |
+
"cx_diff": result2.cx_count - result1.cx_count,
|
| 337 |
+
"fitness_diff": result2.hardware_fitness - result1.hardware_fitness,
|
| 338 |
+
"circuit1_better": result1.depth < result2.depth or result1.hardware_fitness > result2.hardware_fitness
|
| 339 |
+
}
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
# Module-level singleton
|
| 344 |
+
_analyzer: Optional[CircuitQualityAnalyzer] = None
|
| 345 |
+
|
| 346 |
+
def get_analyzer(mcp_url: str = "http://127.0.0.1:7861") -> CircuitQualityAnalyzer:
|
| 347 |
+
"""Get or create the quality analyzer."""
|
| 348 |
+
global _analyzer
|
| 349 |
+
if _analyzer is None:
|
| 350 |
+
_analyzer = CircuitQualityAnalyzer(mcp_url)
|
| 351 |
+
return _analyzer
|
tests/comprehensive_test.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/comprehensive_test.py
|
| 2 |
+
# Relations: Uses orchestrators/, tests/test_problems.py, config.py
|
| 3 |
+
# Description: Comprehensive test across all difficulties with detailed diagnostics
|
| 4 |
+
# Run with: python tests/comprehensive_test.py
|
| 5 |
+
|
| 6 |
+
"""
|
| 7 |
+
Comprehensive Circuit Generation Test
|
| 8 |
+
|
| 9 |
+
Tests all 9 problems (easy, medium, hard) with all 3 modes (naked, guided, blackboard).
|
| 10 |
+
Provides detailed diagnostics on where each mode succeeds/fails.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import sys
|
| 14 |
+
import time
|
| 15 |
+
import os
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
# Setup paths
|
| 20 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 21 |
+
|
| 22 |
+
from tests.test_problems import ALL_PROBLEMS, ProblemDifficulty
|
| 23 |
+
from orchestrators import create_orchestrator
|
| 24 |
+
from config import reset_cost_tracking, get_cost_summary, set_api_key
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def extract_qasm(result):
|
| 28 |
+
"""Extract QASM from orchestrator result."""
|
| 29 |
+
if not result or not result.final_output:
|
| 30 |
+
return None
|
| 31 |
+
|
| 32 |
+
qasm = result.final_output
|
| 33 |
+
if isinstance(qasm, list):
|
| 34 |
+
qasm = qasm[0] if qasm else None
|
| 35 |
+
|
| 36 |
+
return str(qasm) if qasm else None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def validate_qasm(qasm):
|
| 40 |
+
"""Validate QASM structure and count gates."""
|
| 41 |
+
if not qasm:
|
| 42 |
+
return {"valid": False, "has_qreg": False, "gate_count": 0, "depth": 0}
|
| 43 |
+
|
| 44 |
+
valid = "OPENQASM" in qasm
|
| 45 |
+
has_qreg = "qreg" in qasm
|
| 46 |
+
|
| 47 |
+
# Count gates
|
| 48 |
+
gate_count = 0
|
| 49 |
+
for gate in ['h ', 'h(', 'x ', 'x(', 'z ', 'z(', 'cx ', 'cx(', 'cz ',
|
| 50 |
+
'swap ', 't ', 's ', 'ry(', 'rz(', 'rx(', 'u1(', 'u2(', 'u3(']:
|
| 51 |
+
gate_count += qasm.lower().count(gate)
|
| 52 |
+
|
| 53 |
+
# Estimate depth (simplified)
|
| 54 |
+
lines = [l for l in qasm.split('\n') if l.strip() and not l.strip().startswith('//')]
|
| 55 |
+
depth = len([l for l in lines if any(g in l.lower() for g in ['h ', 'x ', 'cx ', 'cz ', 'swap'])])
|
| 56 |
+
|
| 57 |
+
return {"valid": valid, "has_qreg": has_qreg, "gate_count": gate_count, "depth": depth}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def run_comprehensive_test():
|
| 61 |
+
"""Run comprehensive test across all problems and modes."""
|
| 62 |
+
|
| 63 |
+
# Set API key
|
| 64 |
+
api_key = os.getenv('GOOGLE_API_KEY') or os.getenv('GENAI_API_KEY')
|
| 65 |
+
if api_key:
|
| 66 |
+
set_api_key(api_key)
|
| 67 |
+
else:
|
| 68 |
+
print("ERROR: No API key found. Set GOOGLE_API_KEY environment variable.")
|
| 69 |
+
return
|
| 70 |
+
|
| 71 |
+
print("=" * 100)
|
| 72 |
+
print("COMPREHENSIVE CIRCUIT GENERATION TEST - ALL DIFFICULTIES")
|
| 73 |
+
print("=" * 100)
|
| 74 |
+
print(f"Date: {datetime.now().isoformat()}")
|
| 75 |
+
print(f"Problems: {len(ALL_PROBLEMS)} total (3 easy, 3 medium, 3 hard)")
|
| 76 |
+
print(f"Modes: naked, guided, blackboard")
|
| 77 |
+
print("=" * 100)
|
| 78 |
+
|
| 79 |
+
# Store all results
|
| 80 |
+
all_results = []
|
| 81 |
+
|
| 82 |
+
# Test each problem with each mode
|
| 83 |
+
for problem in ALL_PROBLEMS:
|
| 84 |
+
print(f"\n\n{'=' * 100}")
|
| 85 |
+
print(f"PROBLEM: {problem.id} - {problem.name}")
|
| 86 |
+
print(f"Difficulty: {problem.difficulty.value.upper()}")
|
| 87 |
+
print(f"Category: {problem.category.value}")
|
| 88 |
+
print(f"Expected qubits: {problem.expected.min_qubits}-{problem.expected.max_qubits}")
|
| 89 |
+
print(f"Required gates: {problem.expected.required_gates}")
|
| 90 |
+
print(f"Expected states: {problem.expected.expected_states}")
|
| 91 |
+
print("=" * 100)
|
| 92 |
+
|
| 93 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 94 |
+
print(f"\n--- {mode.upper()} MODE ---")
|
| 95 |
+
reset_cost_tracking()
|
| 96 |
+
|
| 97 |
+
start = time.perf_counter()
|
| 98 |
+
result = None
|
| 99 |
+
qasm = None
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
orchestrator = create_orchestrator(mode)
|
| 103 |
+
result = orchestrator.run(problem.goal)
|
| 104 |
+
|
| 105 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 106 |
+
cost = get_cost_summary()
|
| 107 |
+
|
| 108 |
+
# Extract and validate QASM
|
| 109 |
+
qasm = extract_qasm(result)
|
| 110 |
+
validation = validate_qasm(qasm)
|
| 111 |
+
|
| 112 |
+
success = result.success if result else False
|
| 113 |
+
errors = result.errors if result else []
|
| 114 |
+
|
| 115 |
+
# Print detailed results
|
| 116 |
+
status = '✅' if success and validation['valid'] else '❌'
|
| 117 |
+
print(f"{status} Success: {success}")
|
| 118 |
+
print(f" Time: {elapsed:.0f}ms")
|
| 119 |
+
print(f" LLM Calls: {cost.get('total_requests', 0)}")
|
| 120 |
+
print(f" Tokens: {cost.get('total_tokens', 0)}")
|
| 121 |
+
print(f" QASM Valid: {validation['valid']}")
|
| 122 |
+
print(f" Has qreg: {validation['has_qreg']}")
|
| 123 |
+
print(f" Gate Count: {validation['gate_count']}")
|
| 124 |
+
print(f" Est. Depth: {validation['depth']}")
|
| 125 |
+
|
| 126 |
+
if errors:
|
| 127 |
+
print(f" ⚠️ Errors: {errors[:2]}")
|
| 128 |
+
|
| 129 |
+
if qasm:
|
| 130 |
+
# Show first few lines of QASM
|
| 131 |
+
lines = qasm.split('\n')[:8]
|
| 132 |
+
print(" QASM:")
|
| 133 |
+
for line in lines:
|
| 134 |
+
print(f" {line}")
|
| 135 |
+
if len(qasm.split('\n')) > 8:
|
| 136 |
+
print(" ...")
|
| 137 |
+
else:
|
| 138 |
+
print(" QASM: None generated")
|
| 139 |
+
|
| 140 |
+
all_results.append({
|
| 141 |
+
'problem_id': problem.id,
|
| 142 |
+
'problem_name': problem.name,
|
| 143 |
+
'difficulty': problem.difficulty.value,
|
| 144 |
+
'category': problem.category.value,
|
| 145 |
+
'mode': mode,
|
| 146 |
+
'success': success and validation['valid'],
|
| 147 |
+
'qasm_valid': validation['valid'],
|
| 148 |
+
'time_ms': elapsed,
|
| 149 |
+
'llm_calls': cost.get('total_requests', 0),
|
| 150 |
+
'tokens': cost.get('total_tokens', 0),
|
| 151 |
+
'gate_count': validation['gate_count'],
|
| 152 |
+
'depth': validation['depth'],
|
| 153 |
+
'qasm': qasm[:500] if qasm else None,
|
| 154 |
+
'error': str(errors[0])[:100] if errors else None
|
| 155 |
+
})
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 159 |
+
error_msg = f"{type(e).__name__}: {str(e)[:200]}"
|
| 160 |
+
print(f"❌ EXCEPTION: {error_msg}")
|
| 161 |
+
|
| 162 |
+
import traceback
|
| 163 |
+
traceback.print_exc()
|
| 164 |
+
|
| 165 |
+
all_results.append({
|
| 166 |
+
'problem_id': problem.id,
|
| 167 |
+
'problem_name': problem.name,
|
| 168 |
+
'difficulty': problem.difficulty.value,
|
| 169 |
+
'category': problem.category.value,
|
| 170 |
+
'mode': mode,
|
| 171 |
+
'success': False,
|
| 172 |
+
'qasm_valid': False,
|
| 173 |
+
'time_ms': elapsed,
|
| 174 |
+
'llm_calls': 0,
|
| 175 |
+
'tokens': 0,
|
| 176 |
+
'gate_count': 0,
|
| 177 |
+
'depth': 0,
|
| 178 |
+
'qasm': None,
|
| 179 |
+
'error': error_msg[:100]
|
| 180 |
+
})
|
| 181 |
+
|
| 182 |
+
# Print final summary
|
| 183 |
+
print_summary(all_results)
|
| 184 |
+
|
| 185 |
+
# Save results to JSON
|
| 186 |
+
output_path = Path(__file__).parent.parent / f"research/comprehensive_test_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
| 187 |
+
output_path.parent.mkdir(exist_ok=True)
|
| 188 |
+
|
| 189 |
+
import json
|
| 190 |
+
with open(output_path, 'w') as f:
|
| 191 |
+
json.dump(all_results, f, indent=2)
|
| 192 |
+
print(f"\n\nResults saved to: {output_path}")
|
| 193 |
+
|
| 194 |
+
return all_results
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def print_summary(all_results):
|
| 198 |
+
"""Print summary by difficulty and mode."""
|
| 199 |
+
|
| 200 |
+
print("\n\n" + "=" * 100)
|
| 201 |
+
print("FINAL SUMMARY BY DIFFICULTY AND MODE")
|
| 202 |
+
print("=" * 100)
|
| 203 |
+
|
| 204 |
+
for diff in ['easy', 'medium', 'hard']:
|
| 205 |
+
print(f"\n{diff.upper()} PROBLEMS:")
|
| 206 |
+
print("-" * 80)
|
| 207 |
+
|
| 208 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 209 |
+
mode_results = [r for r in all_results if r['difficulty'] == diff and r['mode'] == mode]
|
| 210 |
+
if mode_results:
|
| 211 |
+
successes = sum(1 for r in mode_results if r['success'])
|
| 212 |
+
total = len(mode_results)
|
| 213 |
+
avg_time = sum(r['time_ms'] for r in mode_results) / total
|
| 214 |
+
total_llm = sum(r['llm_calls'] for r in mode_results)
|
| 215 |
+
avg_gates = sum(r['gate_count'] for r in mode_results) / total
|
| 216 |
+
|
| 217 |
+
status = '✅' if successes == total else '⚠️ ' if successes > 0 else '❌'
|
| 218 |
+
print(f"{status} {mode:12} | Success: {successes}/{total} | Time: {avg_time:>6.0f}ms | LLM: {total_llm:>2} | Avg Gates: {avg_gates:.1f}")
|
| 219 |
+
|
| 220 |
+
# Show failures
|
| 221 |
+
failures = [r for r in mode_results if not r['success']]
|
| 222 |
+
for f in failures:
|
| 223 |
+
error_msg = f['error'][:60] if f['error'] else 'No QASM generated'
|
| 224 |
+
print(f" ❌ {f['problem_id']}: {error_msg}")
|
| 225 |
+
|
| 226 |
+
# Calculate winners
|
| 227 |
+
print("\n\n" + "=" * 100)
|
| 228 |
+
print("🏆 WINNER BY DIFFICULTY (Score = Success*100 - Time/1000 - LLM*0.5)")
|
| 229 |
+
print("=" * 100)
|
| 230 |
+
|
| 231 |
+
for diff in ['easy', 'medium', 'hard']:
|
| 232 |
+
print(f"\n{diff.upper()}:")
|
| 233 |
+
best_mode = None
|
| 234 |
+
best_score = -999
|
| 235 |
+
|
| 236 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 237 |
+
mode_results = [r for r in all_results if r['difficulty'] == diff and r['mode'] == mode]
|
| 238 |
+
if mode_results:
|
| 239 |
+
successes = sum(1 for r in mode_results if r['success'])
|
| 240 |
+
total = len(mode_results)
|
| 241 |
+
avg_time = sum(r['time_ms'] for r in mode_results) / total
|
| 242 |
+
total_llm = sum(r['llm_calls'] for r in mode_results)
|
| 243 |
+
|
| 244 |
+
success_rate = successes / total
|
| 245 |
+
time_penalty = avg_time / 1000
|
| 246 |
+
llm_penalty = total_llm * 0.5
|
| 247 |
+
score = success_rate * 100 - time_penalty - llm_penalty
|
| 248 |
+
|
| 249 |
+
print(f" {mode:12}: Score={score:>6.1f} (Success={success_rate*100:.0f}%, Time={avg_time:.0f}ms, LLM={total_llm})")
|
| 250 |
+
|
| 251 |
+
if score > best_score:
|
| 252 |
+
best_score = score
|
| 253 |
+
best_mode = mode
|
| 254 |
+
|
| 255 |
+
print(f" 🏆 WINNER: {best_mode.upper() if best_mode else 'NONE'}")
|
| 256 |
+
|
| 257 |
+
# Overall recommendation
|
| 258 |
+
print("\n\n" + "=" * 100)
|
| 259 |
+
print("OVERALL RECOMMENDATIONS")
|
| 260 |
+
print("=" * 100)
|
| 261 |
+
|
| 262 |
+
# Calculate overall stats per mode
|
| 263 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 264 |
+
mode_results = [r for r in all_results if r['mode'] == mode]
|
| 265 |
+
if mode_results:
|
| 266 |
+
successes = sum(1 for r in mode_results if r['success'])
|
| 267 |
+
total = len(mode_results)
|
| 268 |
+
avg_time = sum(r['time_ms'] for r in mode_results) / total
|
| 269 |
+
total_llm = sum(r['llm_calls'] for r in mode_results)
|
| 270 |
+
avg_gates = sum(r['gate_count'] for r in mode_results) / total
|
| 271 |
+
|
| 272 |
+
print(f"\n{mode.upper()}:")
|
| 273 |
+
print(f" Overall Success: {successes}/{total} ({100*successes/total:.0f}%)")
|
| 274 |
+
print(f" Average Time: {avg_time:.0f}ms")
|
| 275 |
+
print(f" Total LLM Calls: {total_llm}")
|
| 276 |
+
print(f" Average Gates: {avg_gates:.1f}")
|
| 277 |
+
|
| 278 |
+
# List failures
|
| 279 |
+
failures = [r for r in mode_results if not r['success']]
|
| 280 |
+
if failures:
|
| 281 |
+
print(f" Failures ({len(failures)}):")
|
| 282 |
+
for f in failures:
|
| 283 |
+
print(f" - {f['problem_id']} ({f['difficulty']}): {f['error'][:50] if f['error'] else 'Unknown'}")
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
if __name__ == "__main__":
|
| 287 |
+
run_comprehensive_test()
|
tests/comprehensive_test_v2.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/comprehensive_test_v2.py
|
| 2 |
+
# Relations: Uses orchestrators, test_problems, client/mcp_client
|
| 3 |
+
# Description: Full diagnostic test comparing all 5 modes including QUASAR and HYBRID
|
| 4 |
+
"""
|
| 5 |
+
Comprehensive Test V2: Compare all orchestration modes
|
| 6 |
+
|
| 7 |
+
Modes tested:
|
| 8 |
+
1. NAKED - Direct LLM (baseline)
|
| 9 |
+
2. GUIDED - Multi-agent pipeline
|
| 10 |
+
3. BLACKBOARD - Event-driven agents
|
| 11 |
+
4. QUASAR - Tool-augmented LLM with hierarchical validation
|
| 12 |
+
5. HYBRID - NAKED first, QUASAR fallback
|
| 13 |
+
|
| 14 |
+
Problems:
|
| 15 |
+
- 3 EASY
|
| 16 |
+
- 3 MEDIUM
|
| 17 |
+
- 3 HARD
|
| 18 |
+
- 4 VERY_HARD (new - to find NAKED limits)
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import sys
|
| 22 |
+
import os
|
| 23 |
+
import json
|
| 24 |
+
import time
|
| 25 |
+
from datetime import datetime
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
|
| 28 |
+
# Setup paths
|
| 29 |
+
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
|
| 30 |
+
|
| 31 |
+
# Set API key BEFORE any imports
|
| 32 |
+
api_key = os.getenv('GOOGLE_API_KEY')
|
| 33 |
+
if not api_key:
|
| 34 |
+
api_key = "$env:GOOGLE_API_KEY"
|
| 35 |
+
os.environ['GOOGLE_API_KEY'] = api_key
|
| 36 |
+
|
| 37 |
+
from tests.test_problems import (
|
| 38 |
+
ALL_PROBLEMS, EASY_PROBLEMS, MEDIUM_PROBLEMS,
|
| 39 |
+
HARD_PROBLEMS, VERY_HARD_PROBLEMS,
|
| 40 |
+
ProblemDifficulty
|
| 41 |
+
)
|
| 42 |
+
from orchestrators import create_orchestrator
|
| 43 |
+
from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
|
| 44 |
+
from config import reset_cost_tracking, get_cost_summary, set_api_key
|
| 45 |
+
from client.mcp_client import get_client
|
| 46 |
+
|
| 47 |
+
# Set API key in config
|
| 48 |
+
set_api_key(api_key)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def extract_qasm_metrics(qasm: str) -> dict:
|
| 52 |
+
"""Extract metrics from QASM code."""
|
| 53 |
+
if not qasm:
|
| 54 |
+
return {"gate_count": 0, "depth": 0, "qubits": 0}
|
| 55 |
+
|
| 56 |
+
import re
|
| 57 |
+
|
| 58 |
+
# Count qubits
|
| 59 |
+
qreg_match = re.search(r'qreg\s+\w+\[(\d+)\]', qasm)
|
| 60 |
+
qubits = int(qreg_match.group(1)) if qreg_match else 0
|
| 61 |
+
|
| 62 |
+
# Count gates (excluding declarations and measurements)
|
| 63 |
+
gate_pattern = r'\b(h|x|y|z|s|t|sdg|tdg|cx|cz|cy|swap|ccx|rz|rx|ry|u1|u2|u3|p|cp)\b'
|
| 64 |
+
gates = re.findall(gate_pattern, qasm, re.IGNORECASE)
|
| 65 |
+
|
| 66 |
+
# Estimate depth (simplified)
|
| 67 |
+
lines = [l.strip() for l in qasm.split('\n') if l.strip() and not l.strip().startswith(('OPENQASM', 'include', 'qreg', 'creg', '//'))]
|
| 68 |
+
depth = len([l for l in lines if any(g in l.lower() for g in ['h ', 'x ', 'y ', 'z ', 'cx', 'cz', 'swap', 'rx', 'ry', 'rz', 'ccx'])])
|
| 69 |
+
|
| 70 |
+
return {"gate_count": len(gates), "depth": depth, "qubits": qubits}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def run_test(problem, mode: str) -> dict:
|
| 74 |
+
"""Run a single test and return results."""
|
| 75 |
+
result = {
|
| 76 |
+
"problem_id": problem.id,
|
| 77 |
+
"problem_name": problem.name,
|
| 78 |
+
"difficulty": problem.difficulty.value,
|
| 79 |
+
"category": problem.category.value,
|
| 80 |
+
"mode": mode,
|
| 81 |
+
"success": False,
|
| 82 |
+
"qasm_valid": False,
|
| 83 |
+
"time_ms": 0,
|
| 84 |
+
"llm_calls": 0,
|
| 85 |
+
"tokens": 0,
|
| 86 |
+
"gate_count": 0,
|
| 87 |
+
"depth": 0,
|
| 88 |
+
"qasm": None,
|
| 89 |
+
"error": None,
|
| 90 |
+
"tiers_passed": [],
|
| 91 |
+
"iterations": 0
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
start = time.perf_counter()
|
| 95 |
+
reset_cost_tracking()
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
if mode in ["quasar", "hybrid"]:
|
| 99 |
+
# Use new orchestrators with expected values
|
| 100 |
+
if mode == "quasar":
|
| 101 |
+
orchestrator = QuasarOrchestrator(max_iterations=3)
|
| 102 |
+
else:
|
| 103 |
+
orchestrator = HybridOrchestrator()
|
| 104 |
+
|
| 105 |
+
quasar_result = orchestrator.run(
|
| 106 |
+
goal=problem.prompt,
|
| 107 |
+
expected_qubits=problem.expected.min_qubits,
|
| 108 |
+
expected_states=problem.expected.expected_states if problem.expected.expected_states else None,
|
| 109 |
+
max_depth=problem.expected.max_depth
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
result["success"] = quasar_result.success
|
| 113 |
+
result["qasm"] = quasar_result.final_qasm
|
| 114 |
+
result["llm_calls"] = quasar_result.llm_calls
|
| 115 |
+
result["tokens"] = quasar_result.tokens_used
|
| 116 |
+
result["tiers_passed"] = quasar_result.tiers_passed
|
| 117 |
+
result["iterations"] = quasar_result.iterations
|
| 118 |
+
|
| 119 |
+
if quasar_result.final_qasm:
|
| 120 |
+
result["qasm_valid"] = True
|
| 121 |
+
metrics = extract_qasm_metrics(quasar_result.final_qasm)
|
| 122 |
+
result["gate_count"] = metrics["gate_count"]
|
| 123 |
+
result["depth"] = metrics["depth"]
|
| 124 |
+
|
| 125 |
+
if quasar_result.errors:
|
| 126 |
+
result["error"] = "; ".join(quasar_result.errors)
|
| 127 |
+
|
| 128 |
+
else:
|
| 129 |
+
# Use standard orchestrators
|
| 130 |
+
orchestrator = create_orchestrator(mode)
|
| 131 |
+
orch_result = orchestrator.run(problem.prompt)
|
| 132 |
+
|
| 133 |
+
result["success"] = orch_result.success
|
| 134 |
+
result["qasm"] = orch_result.final_output
|
| 135 |
+
|
| 136 |
+
# Get LLM stats
|
| 137 |
+
cost = get_cost_summary()
|
| 138 |
+
result["llm_calls"] = cost.get("llm_requests", 0)
|
| 139 |
+
result["tokens"] = cost.get("total_tokens", 0)
|
| 140 |
+
|
| 141 |
+
if orch_result.final_output:
|
| 142 |
+
result["qasm_valid"] = True
|
| 143 |
+
metrics = extract_qasm_metrics(orch_result.final_output)
|
| 144 |
+
result["gate_count"] = metrics["gate_count"]
|
| 145 |
+
result["depth"] = metrics["depth"]
|
| 146 |
+
|
| 147 |
+
if orch_result.errors:
|
| 148 |
+
result["error"] = "; ".join(orch_result.errors)
|
| 149 |
+
|
| 150 |
+
except Exception as e:
|
| 151 |
+
result["error"] = str(e)
|
| 152 |
+
|
| 153 |
+
result["time_ms"] = (time.perf_counter() - start) * 1000
|
| 154 |
+
return result
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def main():
|
| 158 |
+
print("=" * 100)
|
| 159 |
+
print("COMPREHENSIVE TEST V2 - ALL MODES INCLUDING QUASAR & HYBRID")
|
| 160 |
+
print("=" * 100)
|
| 161 |
+
print(f"Date: {datetime.now().isoformat()}")
|
| 162 |
+
print(f"Problems: {len(ALL_PROBLEMS)} total")
|
| 163 |
+
print(f" - Easy: {len(EASY_PROBLEMS)}")
|
| 164 |
+
print(f" - Medium: {len(MEDIUM_PROBLEMS)}")
|
| 165 |
+
print(f" - Hard: {len(HARD_PROBLEMS)}")
|
| 166 |
+
print(f" - Very Hard: {len(VERY_HARD_PROBLEMS)}")
|
| 167 |
+
print(f"Modes: naked, guided, blackboard, quasar, hybrid")
|
| 168 |
+
print("=" * 100)
|
| 169 |
+
|
| 170 |
+
# Check MCP server
|
| 171 |
+
try:
|
| 172 |
+
client = get_client()
|
| 173 |
+
if client.health_check():
|
| 174 |
+
print("✅ MCP Server connected")
|
| 175 |
+
else:
|
| 176 |
+
print("⚠️ MCP Server not responding - some validations may use fallback")
|
| 177 |
+
except:
|
| 178 |
+
print("⚠️ MCP Server not available")
|
| 179 |
+
|
| 180 |
+
all_results = []
|
| 181 |
+
modes = ["naked", "quasar", "hybrid", "guided", "blackboard"] # Order: fastest to slowest
|
| 182 |
+
|
| 183 |
+
# Group problems by difficulty
|
| 184 |
+
problem_groups = [
|
| 185 |
+
("EASY", EASY_PROBLEMS),
|
| 186 |
+
("MEDIUM", MEDIUM_PROBLEMS),
|
| 187 |
+
("HARD", HARD_PROBLEMS),
|
| 188 |
+
("VERY_HARD", VERY_HARD_PROBLEMS)
|
| 189 |
+
]
|
| 190 |
+
|
| 191 |
+
for diff_name, problems in problem_groups:
|
| 192 |
+
print(f"\n{'='*100}")
|
| 193 |
+
print(f"DIFFICULTY: {diff_name}")
|
| 194 |
+
print("=" * 100)
|
| 195 |
+
|
| 196 |
+
for problem in problems:
|
| 197 |
+
print(f"\n--- Problem: {problem.id} - {problem.name} ---")
|
| 198 |
+
|
| 199 |
+
for mode in modes:
|
| 200 |
+
print(f" Testing {mode}...", end=" ", flush=True)
|
| 201 |
+
|
| 202 |
+
result = run_test(problem, mode)
|
| 203 |
+
all_results.append(result)
|
| 204 |
+
|
| 205 |
+
status = "✅" if result["success"] else "❌"
|
| 206 |
+
time_str = f"{result['time_ms']:.0f}ms"
|
| 207 |
+
llm_str = f"LLM:{result['llm_calls']}"
|
| 208 |
+
gates_str = f"Gates:{result['gate_count']}"
|
| 209 |
+
|
| 210 |
+
extra = ""
|
| 211 |
+
if mode in ["quasar", "hybrid"]:
|
| 212 |
+
tiers = result.get("tiers_passed", [])
|
| 213 |
+
extra = f" Tiers:{tiers}"
|
| 214 |
+
|
| 215 |
+
print(f"{status} {time_str} {llm_str} {gates_str}{extra}")
|
| 216 |
+
|
| 217 |
+
if result["error"] and not result["success"]:
|
| 218 |
+
print(f" Error: {result['error'][:80]}...")
|
| 219 |
+
|
| 220 |
+
# Rate limiting
|
| 221 |
+
time.sleep(5)
|
| 222 |
+
|
| 223 |
+
# Summary
|
| 224 |
+
print("\n\n" + "=" * 100)
|
| 225 |
+
print("FINAL SUMMARY BY MODE")
|
| 226 |
+
print("=" * 100)
|
| 227 |
+
|
| 228 |
+
for mode in modes:
|
| 229 |
+
mode_results = [r for r in all_results if r["mode"] == mode]
|
| 230 |
+
successes = sum(1 for r in mode_results if r["success"])
|
| 231 |
+
total = len(mode_results)
|
| 232 |
+
total_time = sum(r["time_ms"] for r in mode_results)
|
| 233 |
+
total_llm = sum(r["llm_calls"] for r in mode_results)
|
| 234 |
+
avg_gates = sum(r["gate_count"] for r in mode_results if r["success"]) / max(successes, 1)
|
| 235 |
+
|
| 236 |
+
print(f"\n{mode.upper()}:")
|
| 237 |
+
print(f" Success: {successes}/{total} ({100*successes/total:.1f}%)")
|
| 238 |
+
print(f" Total Time: {total_time:.0f}ms ({total_time/total:.0f}ms avg)")
|
| 239 |
+
print(f" LLM Calls: {total_llm} ({total_llm/total:.1f} avg)")
|
| 240 |
+
print(f" Avg Gates (success): {avg_gates:.1f}")
|
| 241 |
+
|
| 242 |
+
# Per difficulty
|
| 243 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 244 |
+
diff_results = [r for r in mode_results if r["difficulty"] == diff]
|
| 245 |
+
if diff_results:
|
| 246 |
+
diff_success = sum(1 for r in diff_results if r["success"])
|
| 247 |
+
print(f" {diff}: {diff_success}/{len(diff_results)}")
|
| 248 |
+
|
| 249 |
+
# Efficiency comparison
|
| 250 |
+
print("\n" + "=" * 100)
|
| 251 |
+
print("EFFICIENCY COMPARISON (Success per LLM call)")
|
| 252 |
+
print("=" * 100)
|
| 253 |
+
|
| 254 |
+
for mode in modes:
|
| 255 |
+
mode_results = [r for r in all_results if r["mode"] == mode]
|
| 256 |
+
successes = sum(1 for r in mode_results if r["success"])
|
| 257 |
+
total_llm = sum(r["llm_calls"] for r in mode_results)
|
| 258 |
+
efficiency = successes / max(total_llm, 1)
|
| 259 |
+
print(f" {mode}: {efficiency:.3f} successes per LLM call")
|
| 260 |
+
|
| 261 |
+
# Winner determination
|
| 262 |
+
print("\n" + "=" * 100)
|
| 263 |
+
print("WINNER BY DIFFICULTY")
|
| 264 |
+
print("=" * 100)
|
| 265 |
+
|
| 266 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 267 |
+
print(f"\n{diff.upper()}:")
|
| 268 |
+
best_mode = None
|
| 269 |
+
best_success = -1
|
| 270 |
+
best_efficiency = -1
|
| 271 |
+
|
| 272 |
+
for mode in modes:
|
| 273 |
+
mode_results = [r for r in all_results if r["mode"] == mode and r["difficulty"] == diff]
|
| 274 |
+
if mode_results:
|
| 275 |
+
successes = sum(1 for r in mode_results if r["success"])
|
| 276 |
+
total_llm = sum(r["llm_calls"] for r in mode_results)
|
| 277 |
+
efficiency = successes / max(total_llm, 1)
|
| 278 |
+
|
| 279 |
+
if successes > best_success or (successes == best_success and efficiency > best_efficiency):
|
| 280 |
+
best_success = successes
|
| 281 |
+
best_efficiency = efficiency
|
| 282 |
+
best_mode = mode
|
| 283 |
+
|
| 284 |
+
if best_mode:
|
| 285 |
+
print(f" 🏆 Winner: {best_mode.upper()} ({best_success} successes)")
|
| 286 |
+
|
| 287 |
+
# Save results
|
| 288 |
+
output_path = Path(__file__).parent.parent / "research" / f"comprehensive_test_v2_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
| 289 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 290 |
+
|
| 291 |
+
with open(output_path, 'w') as f:
|
| 292 |
+
json.dump(all_results, f, indent=2)
|
| 293 |
+
|
| 294 |
+
print(f"\n\nResults saved to: {output_path}")
|
| 295 |
+
print("=" * 100)
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
if __name__ == "__main__":
|
| 299 |
+
main()
|
tests/evaluation_harness.py
ADDED
|
@@ -0,0 +1,748 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/evaluation_harness.py
|
| 2 |
+
# Relations: Uses orchestrators, tools, database, config modules
|
| 3 |
+
# Uses agents/llm_adapter.py for LLM usage tracking
|
| 4 |
+
# Description: Evaluation harness for comparative testing of Blackboard, Guided, and Naked modes
|
| 5 |
+
# Includes cost tracking (requests, tokens, time) for each mode
|
| 6 |
+
# Exports results to CSV for research analysis
|
| 7 |
+
"""
|
| 8 |
+
Evaluation Harness: Measure time, quality, effectiveness, reliability.
|
| 9 |
+
Runs comparative tests across Blackboard, Guided, and Naked modes.
|
| 10 |
+
|
| 11 |
+
COST TRACKING METRICS:
|
| 12 |
+
======================
|
| 13 |
+
For each mode, tracks:
|
| 14 |
+
- LLM requests: Number of calls to LLM API
|
| 15 |
+
- Tokens used: Total tokens consumed (input + output)
|
| 16 |
+
- Time: Total execution time
|
| 17 |
+
- Quality: Circuit correctness and complexity scores
|
| 18 |
+
|
| 19 |
+
MODES:
|
| 20 |
+
======
|
| 21 |
+
- Naked: Direct LLM (1 call/problem) - baseline test
|
| 22 |
+
- Guided: Structured workflow (4 LLM calls/problem)
|
| 23 |
+
- Blackboard: Free-form collaboration (8-12 LLM calls/problem)
|
| 24 |
+
|
| 25 |
+
OUTPUT FORMATS:
|
| 26 |
+
===============
|
| 27 |
+
- TXT: Human-readable report
|
| 28 |
+
- CSV: Research data for longitudinal analysis
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
import time
|
| 32 |
+
import json
|
| 33 |
+
import csv
|
| 34 |
+
import statistics
|
| 35 |
+
from dataclasses import dataclass, field, asdict
|
| 36 |
+
from typing import Dict, List, Any, Optional
|
| 37 |
+
from datetime import datetime
|
| 38 |
+
from pathlib import Path
|
| 39 |
+
import logging
|
| 40 |
+
|
| 41 |
+
from .test_problems import TestProblem, ALL_PROBLEMS, get_problem
|
| 42 |
+
from database import get_database, ResultEntry
|
| 43 |
+
|
| 44 |
+
logger = logging.getLogger(__name__)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass
|
| 48 |
+
class MetricResult:
|
| 49 |
+
"""Result for a single metric."""
|
| 50 |
+
name: str
|
| 51 |
+
value: float
|
| 52 |
+
unit: str
|
| 53 |
+
passed: bool = True
|
| 54 |
+
details: str = ""
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
@dataclass
|
| 58 |
+
class CostMetrics:
|
| 59 |
+
"""Cost metrics for a single run."""
|
| 60 |
+
llm_requests: int = 0
|
| 61 |
+
mcp_requests: int = 0
|
| 62 |
+
tokens_used: int = 0
|
| 63 |
+
time_ms: float = 0.0
|
| 64 |
+
models_used: List[str] = field(default_factory=list)
|
| 65 |
+
|
| 66 |
+
def cost_per_quality(self, quality_score: float) -> float:
|
| 67 |
+
"""Calculate cost-per-quality ratio (lower is better)."""
|
| 68 |
+
if quality_score <= 0:
|
| 69 |
+
return float('inf')
|
| 70 |
+
# Cost = (requests * 1) + (tokens / 1000) + (time_ms / 1000)
|
| 71 |
+
cost = self.llm_requests + (self.tokens_used / 1000) + (self.time_ms / 1000)
|
| 72 |
+
return cost / quality_score
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
@dataclass
|
| 76 |
+
class EvaluationResult:
|
| 77 |
+
"""Result of evaluating a single run."""
|
| 78 |
+
problem_id: str
|
| 79 |
+
system_mode: str
|
| 80 |
+
run_number: int
|
| 81 |
+
success: bool
|
| 82 |
+
execution_time_ms: float
|
| 83 |
+
circuit_qasm: Optional[str]
|
| 84 |
+
metrics: Dict[str, MetricResult] = field(default_factory=dict)
|
| 85 |
+
cost_metrics: CostMetrics = field(default_factory=CostMetrics)
|
| 86 |
+
errors: List[str] = field(default_factory=list)
|
| 87 |
+
timestamp: datetime = field(default_factory=datetime.now)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
@dataclass
|
| 91 |
+
class AggregatedResults:
|
| 92 |
+
"""Aggregated results for a problem across all runs."""
|
| 93 |
+
problem_id: str
|
| 94 |
+
system_mode: str
|
| 95 |
+
num_runs: int
|
| 96 |
+
success_rate: float
|
| 97 |
+
avg_time_ms: float
|
| 98 |
+
std_time_ms: float
|
| 99 |
+
avg_quality_score: float
|
| 100 |
+
effectiveness: float
|
| 101 |
+
reliability: float
|
| 102 |
+
# Cost aggregates
|
| 103 |
+
total_llm_requests: int = 0
|
| 104 |
+
total_mcp_requests: int = 0
|
| 105 |
+
total_tokens: int = 0
|
| 106 |
+
avg_cost_per_quality: float = 0.0
|
| 107 |
+
all_results: List[EvaluationResult] = field(default_factory=list)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class EvaluationHarness:
|
| 111 |
+
"""
|
| 112 |
+
Runs comparative evaluations across different orchestration modes.
|
| 113 |
+
Measures: Time, Quality, Effectiveness, Reliability, Cost
|
| 114 |
+
"""
|
| 115 |
+
|
| 116 |
+
def __init__(self, num_runs: int = 5, timeout_seconds: float = 120.0):
|
| 117 |
+
self.num_runs = num_runs
|
| 118 |
+
self.timeout_seconds = timeout_seconds
|
| 119 |
+
self.db = get_database()
|
| 120 |
+
self.results: Dict[str, Dict[str, AggregatedResults]] = {}
|
| 121 |
+
|
| 122 |
+
# Track MCP requests per run
|
| 123 |
+
self._mcp_request_count = 0
|
| 124 |
+
|
| 125 |
+
def _reset_cost_tracking(self):
|
| 126 |
+
"""Reset cost tracking before a run."""
|
| 127 |
+
try:
|
| 128 |
+
from config import reset_cost_tracking
|
| 129 |
+
reset_cost_tracking()
|
| 130 |
+
except Exception:
|
| 131 |
+
pass
|
| 132 |
+
self._mcp_request_count = 0
|
| 133 |
+
|
| 134 |
+
def _get_cost_summary(self) -> Dict:
|
| 135 |
+
"""Get cost tracking summary after a run."""
|
| 136 |
+
try:
|
| 137 |
+
from config import get_cost_summary
|
| 138 |
+
return get_cost_summary()
|
| 139 |
+
except Exception:
|
| 140 |
+
return {"total_requests": 0, "total_tokens": 0, "total_time_ms": 0.0}
|
| 141 |
+
|
| 142 |
+
def _get_llm_usage_summary(self) -> Dict:
|
| 143 |
+
"""Get LLM usage from rate limiter."""
|
| 144 |
+
try:
|
| 145 |
+
from agents.llm_adapter import get_usage_summary
|
| 146 |
+
return get_usage_summary()
|
| 147 |
+
except Exception:
|
| 148 |
+
return {}
|
| 149 |
+
|
| 150 |
+
def evaluate_single_run(self, problem: TestProblem, mode: str,
|
| 151 |
+
run_number: int) -> EvaluationResult:
|
| 152 |
+
"""Run a single evaluation with cost tracking."""
|
| 153 |
+
from orchestrators import create_orchestrator
|
| 154 |
+
from tools import invoke_tool
|
| 155 |
+
|
| 156 |
+
logger.info(f"Running {mode} on {problem.id}, run {run_number}")
|
| 157 |
+
|
| 158 |
+
# Reset cost tracking
|
| 159 |
+
self._reset_cost_tracking()
|
| 160 |
+
|
| 161 |
+
errors = []
|
| 162 |
+
circuit_qasm = None
|
| 163 |
+
metrics = {}
|
| 164 |
+
success = False
|
| 165 |
+
cost_metrics = CostMetrics()
|
| 166 |
+
|
| 167 |
+
start_time = time.perf_counter()
|
| 168 |
+
|
| 169 |
+
try:
|
| 170 |
+
# Create and run orchestrator
|
| 171 |
+
orchestrator = create_orchestrator(mode)
|
| 172 |
+
result = orchestrator.run(problem.goal)
|
| 173 |
+
|
| 174 |
+
circuit_qasm = result.final_output
|
| 175 |
+
|
| 176 |
+
# Handle list responses from MCP
|
| 177 |
+
if isinstance(circuit_qasm, list):
|
| 178 |
+
circuit_qasm = circuit_qasm[0] if circuit_qasm else None
|
| 179 |
+
|
| 180 |
+
# Ensure it's a string or None
|
| 181 |
+
if circuit_qasm is not None:
|
| 182 |
+
circuit_qasm = str(circuit_qasm) if not isinstance(circuit_qasm, str) else circuit_qasm
|
| 183 |
+
|
| 184 |
+
success = result.success and circuit_qasm is not None
|
| 185 |
+
|
| 186 |
+
if not success:
|
| 187 |
+
errors.extend(result.errors)
|
| 188 |
+
|
| 189 |
+
except Exception as e:
|
| 190 |
+
success = False
|
| 191 |
+
errors.append(str(e))
|
| 192 |
+
logger.error(f"Evaluation failed: {e}")
|
| 193 |
+
|
| 194 |
+
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
| 195 |
+
|
| 196 |
+
# Collect cost metrics
|
| 197 |
+
cost_summary = self._get_cost_summary()
|
| 198 |
+
llm_usage = self._get_llm_usage_summary()
|
| 199 |
+
|
| 200 |
+
cost_metrics = CostMetrics(
|
| 201 |
+
llm_requests=cost_summary.get("total_requests", 0),
|
| 202 |
+
mcp_requests=self._mcp_request_count,
|
| 203 |
+
tokens_used=cost_summary.get("total_tokens", 0),
|
| 204 |
+
time_ms=elapsed_ms,
|
| 205 |
+
models_used=list(cost_summary.get("model_breakdown", {}).keys())
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
# Calculate metrics if we have a circuit
|
| 209 |
+
if circuit_qasm:
|
| 210 |
+
metrics = self._calculate_metrics(circuit_qasm, problem)
|
| 211 |
+
|
| 212 |
+
return EvaluationResult(
|
| 213 |
+
problem_id=problem.id,
|
| 214 |
+
system_mode=mode,
|
| 215 |
+
run_number=run_number,
|
| 216 |
+
success=success,
|
| 217 |
+
execution_time_ms=elapsed_ms,
|
| 218 |
+
circuit_qasm=circuit_qasm,
|
| 219 |
+
metrics=metrics,
|
| 220 |
+
cost_metrics=cost_metrics,
|
| 221 |
+
errors=errors
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
def _calculate_metrics(self, qasm: str, problem: TestProblem) -> Dict[str, MetricResult]:
|
| 225 |
+
"""Calculate quality metrics for a circuit."""
|
| 226 |
+
from tools import invoke_tool
|
| 227 |
+
|
| 228 |
+
metrics = {}
|
| 229 |
+
|
| 230 |
+
try:
|
| 231 |
+
# Helper to extract value from potentially nested result
|
| 232 |
+
def extract_value(result, key, default=0):
|
| 233 |
+
val = result.get(key, default)
|
| 234 |
+
if isinstance(val, dict):
|
| 235 |
+
return val.get('depth', val.get('value', val.get('score', default)))
|
| 236 |
+
elif isinstance(val, list):
|
| 237 |
+
return val[0] if val else default
|
| 238 |
+
return val
|
| 239 |
+
|
| 240 |
+
# 1. Depth metric
|
| 241 |
+
self._mcp_request_count += 1
|
| 242 |
+
depth_result = invoke_tool("get_circuit_depth", qasm=qasm)
|
| 243 |
+
if depth_result.get("success"):
|
| 244 |
+
depth = extract_value(depth_result, "depth", 0)
|
| 245 |
+
if isinstance(depth, dict):
|
| 246 |
+
depth = depth.get('depth', 0)
|
| 247 |
+
max_depth = problem.expected.max_depth or 100
|
| 248 |
+
passed = depth <= max_depth if max_depth else True
|
| 249 |
+
metrics["depth"] = MetricResult(
|
| 250 |
+
name="Circuit Depth",
|
| 251 |
+
value=float(depth) if depth else 0,
|
| 252 |
+
unit="layers",
|
| 253 |
+
passed=passed,
|
| 254 |
+
details=f"Expected max: {max_depth}"
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
# 2. Complexity score
|
| 258 |
+
self._mcp_request_count += 1
|
| 259 |
+
complexity_result = invoke_tool("calculate_complexity", qasm=qasm)
|
| 260 |
+
if complexity_result.get("success"):
|
| 261 |
+
score = complexity_result.get("score", {})
|
| 262 |
+
if isinstance(score, dict):
|
| 263 |
+
complexity_value = score.get("complexity_score", score.get("total", 0))
|
| 264 |
+
elif isinstance(score, list):
|
| 265 |
+
complexity_value = 0
|
| 266 |
+
else:
|
| 267 |
+
complexity_value = float(score) if score else 0
|
| 268 |
+
metrics["complexity"] = MetricResult(
|
| 269 |
+
name="Complexity Score",
|
| 270 |
+
value=float(complexity_value) if complexity_value else 0,
|
| 271 |
+
unit="score",
|
| 272 |
+
passed=True
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
# 3. Hardware fitness
|
| 276 |
+
self._mcp_request_count += 1
|
| 277 |
+
fitness_result = invoke_tool("calculate_hardware_fitness", qasm=qasm)
|
| 278 |
+
if fitness_result.get("success"):
|
| 279 |
+
score = fitness_result.get("score", {})
|
| 280 |
+
if isinstance(score, dict):
|
| 281 |
+
fitness_value = score.get("fitness_score", score.get("fitness", 0))
|
| 282 |
+
elif isinstance(score, list):
|
| 283 |
+
fitness_value = 0
|
| 284 |
+
else:
|
| 285 |
+
fitness_value = float(score) if score else 0
|
| 286 |
+
metrics["hardware_fitness"] = MetricResult(
|
| 287 |
+
name="Hardware Fitness",
|
| 288 |
+
value=float(fitness_value) if fitness_value else 0,
|
| 289 |
+
unit="score",
|
| 290 |
+
passed=fitness_value > 0.5 if fitness_value else False
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
# 4. Validation
|
| 294 |
+
self._mcp_request_count += 1
|
| 295 |
+
validation_result = invoke_tool("validate_syntax", qasm=qasm)
|
| 296 |
+
valid_data = validation_result.get("valid", False)
|
| 297 |
+
# Handle list or complex response
|
| 298 |
+
if isinstance(valid_data, list):
|
| 299 |
+
valid = "valid" in str(valid_data).lower() or "✅" in str(valid_data)
|
| 300 |
+
elif isinstance(valid_data, dict):
|
| 301 |
+
valid = valid_data.get("valid", False)
|
| 302 |
+
else:
|
| 303 |
+
valid = bool(valid_data) and validation_result.get("success", False)
|
| 304 |
+
metrics["syntax_valid"] = MetricResult(
|
| 305 |
+
name="Syntax Validation",
|
| 306 |
+
value=1.0 if valid else 0.0,
|
| 307 |
+
unit="boolean",
|
| 308 |
+
passed=valid
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
# 5. Simulation correctness (if expected states defined)
|
| 312 |
+
if problem.expected.expected_states:
|
| 313 |
+
self._mcp_request_count += 1
|
| 314 |
+
prob_result = invoke_tool("get_probabilities", qasm=qasm)
|
| 315 |
+
if prob_result.get("success"):
|
| 316 |
+
probs = prob_result.get("probabilities", {})
|
| 317 |
+
if isinstance(probs, dict):
|
| 318 |
+
correctness = self._check_state_correctness(probs, problem.expected.expected_states)
|
| 319 |
+
else:
|
| 320 |
+
correctness = 0.5 # Default if can't parse
|
| 321 |
+
metrics["state_correctness"] = MetricResult(
|
| 322 |
+
name="State Correctness",
|
| 323 |
+
value=correctness,
|
| 324 |
+
unit="ratio",
|
| 325 |
+
passed=correctness > 0.9
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
except Exception as e:
|
| 329 |
+
logger.error(f"Metric calculation failed: {e}")
|
| 330 |
+
|
| 331 |
+
return metrics
|
| 332 |
+
|
| 333 |
+
def _check_state_correctness(self, actual: Dict[str, float],
|
| 334 |
+
expected: Dict[str, float]) -> float:
|
| 335 |
+
"""Check how close actual probabilities are to expected."""
|
| 336 |
+
if not expected:
|
| 337 |
+
return 1.0
|
| 338 |
+
|
| 339 |
+
total_error = 0.0
|
| 340 |
+
for state, expected_prob in expected.items():
|
| 341 |
+
actual_prob = actual.get(state, 0.0)
|
| 342 |
+
total_error += abs(expected_prob - actual_prob)
|
| 343 |
+
|
| 344 |
+
# Normalize to 0-1 range (0 = perfect, 1 = worst)
|
| 345 |
+
max_error = 2.0 # Maximum possible error
|
| 346 |
+
correctness = 1.0 - (total_error / max_error)
|
| 347 |
+
return max(0.0, correctness)
|
| 348 |
+
|
| 349 |
+
def aggregate_results(self, results: List[EvaluationResult]) -> AggregatedResults:
|
| 350 |
+
"""Aggregate multiple run results with cost metrics."""
|
| 351 |
+
if not results:
|
| 352 |
+
return AggregatedResults(
|
| 353 |
+
problem_id="",
|
| 354 |
+
system_mode="",
|
| 355 |
+
num_runs=0,
|
| 356 |
+
success_rate=0.0,
|
| 357 |
+
avg_time_ms=0.0,
|
| 358 |
+
std_time_ms=0.0,
|
| 359 |
+
avg_quality_score=0.0,
|
| 360 |
+
effectiveness=0.0,
|
| 361 |
+
reliability=0.0
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
problem_id = results[0].problem_id
|
| 365 |
+
system_mode = results[0].system_mode
|
| 366 |
+
num_runs = len(results)
|
| 367 |
+
|
| 368 |
+
# Success rate
|
| 369 |
+
successes = sum(1 for r in results if r.success)
|
| 370 |
+
success_rate = successes / num_runs
|
| 371 |
+
|
| 372 |
+
# Time statistics
|
| 373 |
+
times = [r.execution_time_ms for r in results]
|
| 374 |
+
avg_time = statistics.mean(times)
|
| 375 |
+
std_time = statistics.stdev(times) if len(times) > 1 else 0.0
|
| 376 |
+
|
| 377 |
+
# Cost aggregates
|
| 378 |
+
total_llm = sum(r.cost_metrics.llm_requests for r in results)
|
| 379 |
+
total_mcp = sum(r.cost_metrics.mcp_requests for r in results)
|
| 380 |
+
total_tokens = sum(r.cost_metrics.tokens_used for r in results)
|
| 381 |
+
|
| 382 |
+
# Quality score (average of metric scores for successful runs)
|
| 383 |
+
quality_scores = []
|
| 384 |
+
cost_per_quality_scores = []
|
| 385 |
+
for r in results:
|
| 386 |
+
if r.success and r.metrics:
|
| 387 |
+
# Combine relevant metrics
|
| 388 |
+
scores = []
|
| 389 |
+
if "complexity" in r.metrics:
|
| 390 |
+
# Invert complexity (lower is better)
|
| 391 |
+
scores.append(1.0 - min(r.metrics["complexity"].value / 100, 1.0))
|
| 392 |
+
if "hardware_fitness" in r.metrics:
|
| 393 |
+
scores.append(r.metrics["hardware_fitness"].value)
|
| 394 |
+
if "state_correctness" in r.metrics:
|
| 395 |
+
scores.append(r.metrics["state_correctness"].value)
|
| 396 |
+
if scores:
|
| 397 |
+
q_score = statistics.mean(scores)
|
| 398 |
+
quality_scores.append(q_score)
|
| 399 |
+
cost_per_quality_scores.append(r.cost_metrics.cost_per_quality(q_score))
|
| 400 |
+
|
| 401 |
+
avg_quality = statistics.mean(quality_scores) if quality_scores else 0.0
|
| 402 |
+
avg_cpq = statistics.mean(cost_per_quality_scores) if cost_per_quality_scores else float('inf')
|
| 403 |
+
|
| 404 |
+
# Effectiveness: Did we achieve the goal?
|
| 405 |
+
effective_runs = sum(
|
| 406 |
+
1 for r in results
|
| 407 |
+
if r.success and r.metrics.get("state_correctness", MetricResult("", 0, "")).value > 0.8
|
| 408 |
+
)
|
| 409 |
+
effectiveness = effective_runs / num_runs if num_runs > 0 else 0.0
|
| 410 |
+
|
| 411 |
+
# Reliability: Consistency of results (based on variance of success and quality)
|
| 412 |
+
reliability = success_rate * (1.0 - std_time / max(avg_time, 1.0))
|
| 413 |
+
reliability = max(0.0, min(1.0, reliability))
|
| 414 |
+
|
| 415 |
+
return AggregatedResults(
|
| 416 |
+
problem_id=problem_id,
|
| 417 |
+
system_mode=system_mode,
|
| 418 |
+
num_runs=num_runs,
|
| 419 |
+
success_rate=success_rate,
|
| 420 |
+
avg_time_ms=avg_time,
|
| 421 |
+
std_time_ms=std_time,
|
| 422 |
+
avg_quality_score=avg_quality,
|
| 423 |
+
effectiveness=effectiveness,
|
| 424 |
+
reliability=reliability,
|
| 425 |
+
total_llm_requests=total_llm,
|
| 426 |
+
total_mcp_requests=total_mcp,
|
| 427 |
+
total_tokens=total_tokens,
|
| 428 |
+
avg_cost_per_quality=avg_cpq,
|
| 429 |
+
all_results=results
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
def evaluate_problem(self, problem: TestProblem,
|
| 433 |
+
modes: List[str] = None) -> Dict[str, AggregatedResults]:
|
| 434 |
+
"""Evaluate a problem across all modes."""
|
| 435 |
+
if modes is None:
|
| 436 |
+
modes = ["blackboard", "guided", "naked"]
|
| 437 |
+
|
| 438 |
+
results_by_mode = {}
|
| 439 |
+
|
| 440 |
+
for mode in modes:
|
| 441 |
+
run_results = []
|
| 442 |
+
|
| 443 |
+
for run_num in range(1, self.num_runs + 1):
|
| 444 |
+
result = self.evaluate_single_run(problem, mode, run_num)
|
| 445 |
+
run_results.append(result)
|
| 446 |
+
|
| 447 |
+
# Store in database
|
| 448 |
+
self.db.store_result(ResultEntry(
|
| 449 |
+
run_id=f"{problem.id}_{mode}_{run_num}",
|
| 450 |
+
system_mode=mode,
|
| 451 |
+
problem_id=problem.id,
|
| 452 |
+
success=result.success,
|
| 453 |
+
execution_time_ms=result.execution_time_ms,
|
| 454 |
+
circuit_qasm=result.circuit_qasm,
|
| 455 |
+
metrics={k: asdict(v) for k, v in result.metrics.items()}
|
| 456 |
+
))
|
| 457 |
+
|
| 458 |
+
aggregated = self.aggregate_results(run_results)
|
| 459 |
+
results_by_mode[mode] = aggregated
|
| 460 |
+
|
| 461 |
+
return results_by_mode
|
| 462 |
+
|
| 463 |
+
def evaluate_all(self, problems: List[TestProblem] = None,
|
| 464 |
+
modes: List[str] = None) -> Dict[str, Dict[str, AggregatedResults]]:
|
| 465 |
+
"""Evaluate all problems across all modes."""
|
| 466 |
+
if problems is None:
|
| 467 |
+
problems = ALL_PROBLEMS
|
| 468 |
+
if modes is None:
|
| 469 |
+
modes = ["blackboard", "guided", "naked"]
|
| 470 |
+
|
| 471 |
+
all_results = {}
|
| 472 |
+
|
| 473 |
+
for problem in problems:
|
| 474 |
+
logger.info(f"Evaluating problem: {problem.name}")
|
| 475 |
+
all_results[problem.id] = self.evaluate_problem(problem, modes)
|
| 476 |
+
|
| 477 |
+
self.results = all_results
|
| 478 |
+
return all_results
|
| 479 |
+
|
| 480 |
+
def generate_report(self, output_path: Optional[Path] = None) -> str:
|
| 481 |
+
"""Generate a comparison report with cost analysis."""
|
| 482 |
+
if not self.results:
|
| 483 |
+
return "No results to report. Run evaluate_all() first."
|
| 484 |
+
|
| 485 |
+
lines = [
|
| 486 |
+
"=" * 100,
|
| 487 |
+
"QUANTUM AGENT SYSTEM COMPARATIVE EVALUATION REPORT",
|
| 488 |
+
f"Generated: {datetime.now().isoformat()}",
|
| 489 |
+
f"Number of runs per problem: {self.num_runs}",
|
| 490 |
+
"=" * 100,
|
| 491 |
+
""
|
| 492 |
+
]
|
| 493 |
+
|
| 494 |
+
# Summary table with cost metrics
|
| 495 |
+
lines.append("SUMMARY BY MODE (with Cost Analysis)")
|
| 496 |
+
lines.append("-" * 100)
|
| 497 |
+
lines.append(f"{'Mode':<12} {'Success%':>9} {'Time(ms)':>10} {'Quality':>8} {'LLM Req':>8} {'Tokens':>10} {'Cost/Qual':>10}")
|
| 498 |
+
lines.append("-" * 100)
|
| 499 |
+
|
| 500 |
+
mode_totals = {
|
| 501 |
+
mode: {
|
| 502 |
+
"success": 0, "total": 0, "times": [], "quality": [],
|
| 503 |
+
"llm_req": 0, "mcp_req": 0, "tokens": 0, "cpq": []
|
| 504 |
+
}
|
| 505 |
+
for mode in ["blackboard", "guided", "naked"]
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
for problem_id, mode_results in self.results.items():
|
| 509 |
+
for mode, agg in mode_results.items():
|
| 510 |
+
mode_totals[mode]["success"] += agg.success_rate * agg.num_runs
|
| 511 |
+
mode_totals[mode]["total"] += agg.num_runs
|
| 512 |
+
mode_totals[mode]["times"].append(agg.avg_time_ms)
|
| 513 |
+
mode_totals[mode]["quality"].append(agg.avg_quality_score)
|
| 514 |
+
mode_totals[mode]["llm_req"] += agg.total_llm_requests
|
| 515 |
+
mode_totals[mode]["mcp_req"] += agg.total_mcp_requests
|
| 516 |
+
mode_totals[mode]["tokens"] += agg.total_tokens
|
| 517 |
+
if agg.avg_cost_per_quality != float('inf'):
|
| 518 |
+
mode_totals[mode]["cpq"].append(agg.avg_cost_per_quality)
|
| 519 |
+
|
| 520 |
+
for mode, totals in mode_totals.items():
|
| 521 |
+
if totals["total"] > 0:
|
| 522 |
+
success_pct = (totals["success"] / totals["total"]) * 100
|
| 523 |
+
avg_time = statistics.mean(totals["times"]) if totals["times"] else 0
|
| 524 |
+
avg_quality = statistics.mean(totals["quality"]) if totals["quality"] else 0
|
| 525 |
+
avg_cpq = statistics.mean(totals["cpq"]) if totals["cpq"] else float('inf')
|
| 526 |
+
cpq_str = f"{avg_cpq:.2f}" if avg_cpq != float('inf') else "N/A"
|
| 527 |
+
|
| 528 |
+
lines.append(
|
| 529 |
+
f"{mode:<12} {success_pct:>8.1f}% {avg_time:>9.0f} {avg_quality:>8.2f} "
|
| 530 |
+
f"{totals['llm_req']:>8} {totals['tokens']:>10} {cpq_str:>10}"
|
| 531 |
+
)
|
| 532 |
+
|
| 533 |
+
lines.append("")
|
| 534 |
+
lines.append("")
|
| 535 |
+
|
| 536 |
+
# Cost efficiency analysis
|
| 537 |
+
lines.append("COST EFFICIENCY ANALYSIS")
|
| 538 |
+
lines.append("-" * 60)
|
| 539 |
+
lines.append("")
|
| 540 |
+
lines.append("Expected LLM Requests per problem:")
|
| 541 |
+
lines.append(" - Naked: 1 (single direct LLM call)")
|
| 542 |
+
lines.append(" - Guided: 4 (one per agent: Architect, Builder, Validator, Scorer)")
|
| 543 |
+
lines.append(" - Blackboard: 8-12 (multiple collaborative rounds)")
|
| 544 |
+
lines.append("")
|
| 545 |
+
lines.append("Cost-per-Quality interpretation:")
|
| 546 |
+
lines.append(" - Lower is better (less resources for same quality)")
|
| 547 |
+
lines.append(" - Naked has lowest cost but tests raw LLM capability")
|
| 548 |
+
lines.append(" - Blackboard has highest cost but best quality potential")
|
| 549 |
+
lines.append("")
|
| 550 |
+
|
| 551 |
+
# Detailed results per problem
|
| 552 |
+
lines.append("DETAILED RESULTS BY PROBLEM")
|
| 553 |
+
lines.append("-" * 100)
|
| 554 |
+
|
| 555 |
+
for problem_id, mode_results in self.results.items():
|
| 556 |
+
problem = get_problem(problem_id)
|
| 557 |
+
problem_name = problem.name if problem else problem_id
|
| 558 |
+
|
| 559 |
+
lines.append(f"\n{problem_name} ({problem_id})")
|
| 560 |
+
lines.append("-" * 50)
|
| 561 |
+
lines.append(f"{'Mode':<12} {'Success':>8} {'Time(ms)':>10} {'Quality':>8} {'LLM':>6} {'Tokens':>8}")
|
| 562 |
+
|
| 563 |
+
for mode, agg in mode_results.items():
|
| 564 |
+
lines.append(
|
| 565 |
+
f"{mode:<12} "
|
| 566 |
+
f"{agg.success_rate*100:>7.0f}% "
|
| 567 |
+
f"{agg.avg_time_ms:>9.0f} "
|
| 568 |
+
f"{agg.avg_quality_score:>8.2f} "
|
| 569 |
+
f"{agg.total_llm_requests:>6} "
|
| 570 |
+
f"{agg.total_tokens:>8}"
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
lines.append("")
|
| 574 |
+
lines.append("=" * 100)
|
| 575 |
+
lines.append("END OF REPORT")
|
| 576 |
+
|
| 577 |
+
report = "\n".join(lines)
|
| 578 |
+
|
| 579 |
+
if output_path:
|
| 580 |
+
output_path.write_text(report)
|
| 581 |
+
logger.info(f"Report saved to: {output_path}")
|
| 582 |
+
|
| 583 |
+
return report
|
| 584 |
+
|
| 585 |
+
def export_csv(self, output_path: Optional[Path] = None) -> str:
|
| 586 |
+
"""
|
| 587 |
+
Export results to CSV for research analysis.
|
| 588 |
+
|
| 589 |
+
CSV Columns:
|
| 590 |
+
- timestamp: When the evaluation was run
|
| 591 |
+
- problem_id: Unique problem identifier
|
| 592 |
+
- problem_name: Human-readable problem name
|
| 593 |
+
- difficulty: Problem difficulty (easy, medium, hard)
|
| 594 |
+
- mode: Execution mode (naked, guided, blackboard)
|
| 595 |
+
- run_number: Run iteration (1 to num_runs)
|
| 596 |
+
- success: Whether the run succeeded (True/False)
|
| 597 |
+
- time_ms: Execution time in milliseconds
|
| 598 |
+
- llm_requests: Number of LLM API calls
|
| 599 |
+
- tokens_used: Total tokens consumed
|
| 600 |
+
- mcp_requests: Number of MCP tool calls
|
| 601 |
+
- quality_score: Combined quality score (0-1)
|
| 602 |
+
- depth: Circuit depth
|
| 603 |
+
- complexity: Circuit complexity score
|
| 604 |
+
- hardware_fitness: Hardware compatibility score
|
| 605 |
+
- syntax_valid: Whether QASM syntax is valid
|
| 606 |
+
- state_correctness: Probability distribution correctness
|
| 607 |
+
- cost_per_quality: Cost efficiency ratio
|
| 608 |
+
- model_used: Primary LLM model used
|
| 609 |
+
- qasm_length: Length of generated QASM code
|
| 610 |
+
"""
|
| 611 |
+
if not self.results:
|
| 612 |
+
return "No results to export. Run evaluate_all() first."
|
| 613 |
+
|
| 614 |
+
timestamp = datetime.now().isoformat()
|
| 615 |
+
|
| 616 |
+
# Default output path
|
| 617 |
+
if output_path is None:
|
| 618 |
+
output_dir = Path(__file__).parent.parent / "research"
|
| 619 |
+
output_dir.mkdir(exist_ok=True)
|
| 620 |
+
output_path = output_dir / f"evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
| 621 |
+
|
| 622 |
+
# CSV header
|
| 623 |
+
fieldnames = [
|
| 624 |
+
'timestamp', 'problem_id', 'problem_name', 'difficulty',
|
| 625 |
+
'mode', 'run_number', 'success', 'time_ms',
|
| 626 |
+
'llm_requests', 'tokens_used', 'mcp_requests',
|
| 627 |
+
'quality_score', 'depth', 'complexity', 'hardware_fitness',
|
| 628 |
+
'syntax_valid', 'state_correctness', 'cost_per_quality',
|
| 629 |
+
'model_used', 'qasm_length', 'errors'
|
| 630 |
+
]
|
| 631 |
+
|
| 632 |
+
rows = []
|
| 633 |
+
|
| 634 |
+
for problem_id, mode_results in self.results.items():
|
| 635 |
+
problem = get_problem(problem_id)
|
| 636 |
+
problem_name = problem.name if problem else problem_id
|
| 637 |
+
difficulty = problem.difficulty if problem else "unknown"
|
| 638 |
+
|
| 639 |
+
for mode, agg in mode_results.items():
|
| 640 |
+
for result in agg.all_results:
|
| 641 |
+
# Extract metric values safely
|
| 642 |
+
def get_metric(name, default=0.0):
|
| 643 |
+
if name in result.metrics:
|
| 644 |
+
return result.metrics[name].value
|
| 645 |
+
return default
|
| 646 |
+
|
| 647 |
+
# Calculate quality score
|
| 648 |
+
quality_components = []
|
| 649 |
+
if "complexity" in result.metrics:
|
| 650 |
+
quality_components.append(1.0 - min(get_metric("complexity") / 100, 1.0))
|
| 651 |
+
if "hardware_fitness" in result.metrics:
|
| 652 |
+
quality_components.append(get_metric("hardware_fitness"))
|
| 653 |
+
if "state_correctness" in result.metrics:
|
| 654 |
+
quality_components.append(get_metric("state_correctness"))
|
| 655 |
+
quality_score = statistics.mean(quality_components) if quality_components else 0.0
|
| 656 |
+
|
| 657 |
+
# Cost per quality
|
| 658 |
+
cpq = result.cost_metrics.cost_per_quality(quality_score) if quality_score > 0 else float('inf')
|
| 659 |
+
cpq_str = f"{cpq:.4f}" if cpq != float('inf') else "inf"
|
| 660 |
+
|
| 661 |
+
# Model used
|
| 662 |
+
models = result.cost_metrics.models_used
|
| 663 |
+
model_used = models[0] if models else "unknown"
|
| 664 |
+
|
| 665 |
+
# QASM length
|
| 666 |
+
qasm_len = len(result.circuit_qasm) if result.circuit_qasm else 0
|
| 667 |
+
|
| 668 |
+
row = {
|
| 669 |
+
'timestamp': timestamp,
|
| 670 |
+
'problem_id': problem_id,
|
| 671 |
+
'problem_name': problem_name,
|
| 672 |
+
'difficulty': difficulty,
|
| 673 |
+
'mode': mode,
|
| 674 |
+
'run_number': result.run_number,
|
| 675 |
+
'success': result.success,
|
| 676 |
+
'time_ms': f"{result.execution_time_ms:.2f}",
|
| 677 |
+
'llm_requests': result.cost_metrics.llm_requests,
|
| 678 |
+
'tokens_used': result.cost_metrics.tokens_used,
|
| 679 |
+
'mcp_requests': result.cost_metrics.mcp_requests,
|
| 680 |
+
'quality_score': f"{quality_score:.4f}",
|
| 681 |
+
'depth': get_metric("depth"),
|
| 682 |
+
'complexity': f"{get_metric('complexity'):.2f}",
|
| 683 |
+
'hardware_fitness': f"{get_metric('hardware_fitness'):.4f}",
|
| 684 |
+
'syntax_valid': get_metric("syntax_valid") == 1.0,
|
| 685 |
+
'state_correctness': f"{get_metric('state_correctness'):.4f}",
|
| 686 |
+
'cost_per_quality': cpq_str,
|
| 687 |
+
'model_used': model_used,
|
| 688 |
+
'qasm_length': qasm_len,
|
| 689 |
+
'errors': "; ".join(result.errors) if result.errors else ""
|
| 690 |
+
}
|
| 691 |
+
rows.append(row)
|
| 692 |
+
|
| 693 |
+
# Write CSV
|
| 694 |
+
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
| 695 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
| 696 |
+
writer.writeheader()
|
| 697 |
+
writer.writerows(rows)
|
| 698 |
+
|
| 699 |
+
logger.info(f"CSV exported to: {output_path}")
|
| 700 |
+
return str(output_path)
|
| 701 |
+
|
| 702 |
+
def get_summary_stats(self) -> Dict[str, Any]:
|
| 703 |
+
"""
|
| 704 |
+
Get summary statistics for the evaluation run.
|
| 705 |
+
Useful for programmatic access to results.
|
| 706 |
+
"""
|
| 707 |
+
if not self.results:
|
| 708 |
+
return {}
|
| 709 |
+
|
| 710 |
+
stats = {
|
| 711 |
+
'timestamp': datetime.now().isoformat(),
|
| 712 |
+
'num_problems': len(self.results),
|
| 713 |
+
'runs_per_problem': self.num_runs,
|
| 714 |
+
'modes': {}
|
| 715 |
+
}
|
| 716 |
+
|
| 717 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 718 |
+
mode_stats = {
|
| 719 |
+
'success_rate': 0.0,
|
| 720 |
+
'avg_time_ms': 0.0,
|
| 721 |
+
'total_llm_requests': 0,
|
| 722 |
+
'total_tokens': 0,
|
| 723 |
+
'avg_quality': 0.0
|
| 724 |
+
}
|
| 725 |
+
|
| 726 |
+
times = []
|
| 727 |
+
qualities = []
|
| 728 |
+
total_runs = 0
|
| 729 |
+
successes = 0
|
| 730 |
+
|
| 731 |
+
for problem_id, mode_results in self.results.items():
|
| 732 |
+
if mode in mode_results:
|
| 733 |
+
agg = mode_results[mode]
|
| 734 |
+
total_runs += agg.num_runs
|
| 735 |
+
successes += agg.success_rate * agg.num_runs
|
| 736 |
+
times.append(agg.avg_time_ms)
|
| 737 |
+
qualities.append(agg.avg_quality_score)
|
| 738 |
+
mode_stats['total_llm_requests'] += agg.total_llm_requests
|
| 739 |
+
mode_stats['total_tokens'] += agg.total_tokens
|
| 740 |
+
|
| 741 |
+
if total_runs > 0:
|
| 742 |
+
mode_stats['success_rate'] = successes / total_runs
|
| 743 |
+
mode_stats['avg_time_ms'] = statistics.mean(times) if times else 0
|
| 744 |
+
mode_stats['avg_quality'] = statistics.mean(qualities) if qualities else 0
|
| 745 |
+
|
| 746 |
+
stats['modes'][mode] = mode_stats
|
| 747 |
+
|
| 748 |
+
return stats
|
tests/evaluation_report.txt
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
====================================================================================================
|
| 2 |
+
QUANTUM AGENT SYSTEM COMPARATIVE EVALUATION REPORT
|
| 3 |
+
Generated: 2025-11-28T18:38:30.068424
|
| 4 |
+
Number of runs per problem: 1
|
| 5 |
+
====================================================================================================
|
| 6 |
+
|
| 7 |
+
SUMMARY BY MODE (with Cost Analysis)
|
| 8 |
+
----------------------------------------------------------------------------------------------------
|
| 9 |
+
Mode Success% Time(ms) Quality LLM Req Tokens Cost/Qual
|
| 10 |
+
----------------------------------------------------------------------------------------------------
|
| 11 |
+
blackboard 66.7% 14612 0.00 5 2709 N/A
|
| 12 |
+
guided 100.0% 23975 0.00 8 4481 N/A
|
| 13 |
+
naked 100.0% 5251 0.00 3 901 N/A
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
COST EFFICIENCY ANALYSIS
|
| 17 |
+
------------------------------------------------------------
|
| 18 |
+
|
| 19 |
+
Expected LLM Requests per problem:
|
| 20 |
+
- Naked: 1 (single direct LLM call)
|
| 21 |
+
- Guided: 4 (one per agent: Architect, Builder, Validator, Scorer)
|
| 22 |
+
- Blackboard: 8-12 (multiple collaborative rounds)
|
| 23 |
+
|
| 24 |
+
Cost-per-Quality interpretation:
|
| 25 |
+
- Lower is better (less resources for same quality)
|
| 26 |
+
- Naked has lowest cost but tests raw LLM capability
|
| 27 |
+
- Blackboard has highest cost but best quality potential
|
| 28 |
+
|
| 29 |
+
DETAILED RESULTS BY PROBLEM
|
| 30 |
+
----------------------------------------------------------------------------------------------------
|
| 31 |
+
|
| 32 |
+
Phase Flip State (easy_001)
|
| 33 |
+
--------------------------------------------------
|
| 34 |
+
Mode Success Time(ms) Quality LLM Tokens
|
| 35 |
+
blackboard 100% 11292 0.00 2 955
|
| 36 |
+
guided 100% 31284 0.00 4 2177
|
| 37 |
+
naked 100% 6894 0.00 1 293
|
| 38 |
+
|
| 39 |
+
Entanglement Generation (easy_002)
|
| 40 |
+
--------------------------------------------------
|
| 41 |
+
Mode Success Time(ms) Quality LLM Tokens
|
| 42 |
+
blackboard 0% 16832 0.00 1 529
|
| 43 |
+
guided 100% 20431 0.00 2 1046
|
| 44 |
+
naked 100% 1929 0.00 1 305
|
| 45 |
+
|
| 46 |
+
X-Basis Measurement Prep (easy_003)
|
| 47 |
+
--------------------------------------------------
|
| 48 |
+
Mode Success Time(ms) Quality LLM Tokens
|
| 49 |
+
blackboard 100% 15713 0.00 2 1225
|
| 50 |
+
guided 100% 20209 0.00 2 1258
|
| 51 |
+
naked 100% 6930 0.00 1 303
|
| 52 |
+
|
| 53 |
+
====================================================================================================
|
| 54 |
+
END OF REPORT
|
tests/fast_eval.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/fast_eval.py
|
| 2 |
+
# Fast evaluation - one problem per difficulty, all modes
|
| 3 |
+
"""Fast mode evaluation."""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
import json
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
|
| 13 |
+
|
| 14 |
+
api_key = "$env:GOOGLE_API_KEY"
|
| 15 |
+
os.environ['GOOGLE_API_KEY'] = api_key
|
| 16 |
+
|
| 17 |
+
from tests.test_problems import (
|
| 18 |
+
PROBLEM_E1_PHASE_FLIP,
|
| 19 |
+
PROBLEM_M1_SWAP_DECOMPOSITION,
|
| 20 |
+
PROBLEM_H1_DEUTSCH,
|
| 21 |
+
PROBLEM_VH4_BERNSTEIN_VAZIRANI
|
| 22 |
+
)
|
| 23 |
+
from orchestrators import create_orchestrator
|
| 24 |
+
from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
|
| 25 |
+
from config import set_api_key
|
| 26 |
+
import re
|
| 27 |
+
|
| 28 |
+
set_api_key(api_key)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def extract_gates(qasm):
|
| 32 |
+
if not qasm:
|
| 33 |
+
return 0
|
| 34 |
+
gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
|
| 35 |
+
return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_problem(problem, mode, timeout=60):
|
| 39 |
+
start = time.perf_counter()
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
if mode == "quasar":
|
| 43 |
+
orch = QuasarOrchestrator(max_iterations=3)
|
| 44 |
+
result = orch.run(problem.prompt, problem.expected.min_qubits)
|
| 45 |
+
return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000,
|
| 46 |
+
"llm": result.llm_calls, "gates": extract_gates(result.final_qasm), "error": None}
|
| 47 |
+
|
| 48 |
+
elif mode == "hybrid":
|
| 49 |
+
orch = HybridOrchestrator()
|
| 50 |
+
result = orch.run(problem.prompt, problem.expected.min_qubits)
|
| 51 |
+
return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000,
|
| 52 |
+
"llm": result.llm_calls, "gates": extract_gates(result.final_qasm), "error": None}
|
| 53 |
+
|
| 54 |
+
else:
|
| 55 |
+
orch = create_orchestrator(mode)
|
| 56 |
+
result = orch.run(problem.prompt)
|
| 57 |
+
llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
|
| 58 |
+
return {"success": result.success, "time_ms": (time.perf_counter()-start)*1000,
|
| 59 |
+
"llm": llm, "gates": extract_gates(result.final_output), "error": "; ".join(result.errors) if result.errors else None}
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
return {"success": False, "time_ms": (time.perf_counter()-start)*1000,
|
| 63 |
+
"llm": 0, "gates": 0, "error": str(e)[:60]}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
print("=" * 70)
|
| 67 |
+
print("FAST MODE EVALUATION")
|
| 68 |
+
print("=" * 70)
|
| 69 |
+
print(f"Date: {datetime.now().isoformat()}")
|
| 70 |
+
|
| 71 |
+
problems = [
|
| 72 |
+
("EASY", PROBLEM_E1_PHASE_FLIP),
|
| 73 |
+
("MEDIUM", PROBLEM_M1_SWAP_DECOMPOSITION),
|
| 74 |
+
("HARD", PROBLEM_H1_DEUTSCH),
|
| 75 |
+
("VERY_HARD", PROBLEM_VH4_BERNSTEIN_VAZIRANI)
|
| 76 |
+
]
|
| 77 |
+
|
| 78 |
+
modes = ["naked", "quasar", "hybrid", "blackboard"]
|
| 79 |
+
all_results = {}
|
| 80 |
+
|
| 81 |
+
for diff, problem in problems:
|
| 82 |
+
print(f"\n{diff}: {problem.name}")
|
| 83 |
+
print("-" * 50)
|
| 84 |
+
all_results[diff] = {}
|
| 85 |
+
|
| 86 |
+
for mode in modes:
|
| 87 |
+
print(f" {mode:12}", end=" ", flush=True)
|
| 88 |
+
result = test_problem(problem, mode)
|
| 89 |
+
all_results[diff][mode] = result
|
| 90 |
+
|
| 91 |
+
status = "✅" if result["success"] else "❌"
|
| 92 |
+
print(f"{status} {result['time_ms']:5.0f}ms LLM:{result['llm']} Gates:{result['gates']}")
|
| 93 |
+
|
| 94 |
+
if result["error"]:
|
| 95 |
+
print(f" ⚠️ {result['error'][:40]}...")
|
| 96 |
+
|
| 97 |
+
time.sleep(5)
|
| 98 |
+
|
| 99 |
+
# Summary
|
| 100 |
+
print("\n" + "=" * 70)
|
| 101 |
+
print("SUMMARY")
|
| 102 |
+
print("=" * 70)
|
| 103 |
+
|
| 104 |
+
for mode in modes:
|
| 105 |
+
successes = sum(1 for diff in all_results if all_results[diff][mode]["success"])
|
| 106 |
+
total_time = sum(all_results[diff][mode]["time_ms"] for diff in all_results)
|
| 107 |
+
total_llm = sum(all_results[diff][mode]["llm"] for diff in all_results)
|
| 108 |
+
print(f"\n{mode.upper():12} {successes}/4 ({25*successes}%) | {total_time:.0f}ms | {total_llm} LLM calls")
|
| 109 |
+
for diff in all_results:
|
| 110 |
+
r = all_results[diff][mode]
|
| 111 |
+
status = "✅" if r["success"] else "❌"
|
| 112 |
+
print(f" {diff:10} {status}")
|
| 113 |
+
|
| 114 |
+
print("\n" + "=" * 70)
|
| 115 |
+
print("DONE")
|
tests/final_eval.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/final_eval.py
|
| 2 |
+
# Final evaluation - NAKED vs BLACKBOARD on all difficulties
|
| 3 |
+
"""Final mode evaluation: NAKED vs fixed BLACKBOARD."""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
|
| 12 |
+
|
| 13 |
+
api_key = "$env:GOOGLE_API_KEY"
|
| 14 |
+
os.environ['GOOGLE_API_KEY'] = api_key
|
| 15 |
+
|
| 16 |
+
from tests.test_problems import ALL_PROBLEMS
|
| 17 |
+
from orchestrators import create_orchestrator
|
| 18 |
+
from config import set_api_key
|
| 19 |
+
import re
|
| 20 |
+
|
| 21 |
+
set_api_key(api_key)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def extract_gates(qasm):
|
| 25 |
+
if not qasm:
|
| 26 |
+
return 0
|
| 27 |
+
gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
|
| 28 |
+
return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_problem(problem, mode):
|
| 32 |
+
start = time.perf_counter()
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
orch = create_orchestrator(mode)
|
| 36 |
+
result = orch.run(problem.prompt)
|
| 37 |
+
|
| 38 |
+
llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
|
| 39 |
+
|
| 40 |
+
return {
|
| 41 |
+
"success": result.success,
|
| 42 |
+
"time_ms": (time.perf_counter()-start)*1000,
|
| 43 |
+
"llm": llm,
|
| 44 |
+
"gates": extract_gates(result.final_output),
|
| 45 |
+
"error": "; ".join(result.errors[:2]) if result.errors else None
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
except Exception as e:
|
| 49 |
+
return {
|
| 50 |
+
"success": False,
|
| 51 |
+
"time_ms": (time.perf_counter()-start)*1000,
|
| 52 |
+
"llm": 0,
|
| 53 |
+
"gates": 0,
|
| 54 |
+
"error": str(e)[:60]
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
print("=" * 80)
|
| 59 |
+
print("FINAL MODE EVALUATION: NAKED vs BLACKBOARD")
|
| 60 |
+
print("=" * 80)
|
| 61 |
+
print(f"Date: {datetime.now().isoformat()}")
|
| 62 |
+
print(f"Problems: {len(ALL_PROBLEMS)}")
|
| 63 |
+
print()
|
| 64 |
+
|
| 65 |
+
modes = ["naked", "blackboard"]
|
| 66 |
+
results_by_difficulty = {"easy": {}, "medium": {}, "hard": {}, "very_hard": {}}
|
| 67 |
+
|
| 68 |
+
for problem in ALL_PROBLEMS:
|
| 69 |
+
diff = problem.difficulty.value
|
| 70 |
+
print(f"\n{diff.upper()}: {problem.name}")
|
| 71 |
+
|
| 72 |
+
if diff not in results_by_difficulty:
|
| 73 |
+
results_by_difficulty[diff] = {}
|
| 74 |
+
|
| 75 |
+
for mode in modes:
|
| 76 |
+
print(f" {mode:12}", end=" ", flush=True)
|
| 77 |
+
result = test_problem(problem, mode)
|
| 78 |
+
|
| 79 |
+
if mode not in results_by_difficulty[diff]:
|
| 80 |
+
results_by_difficulty[diff][mode] = []
|
| 81 |
+
results_by_difficulty[diff][mode].append(result)
|
| 82 |
+
|
| 83 |
+
status = "✅" if result["success"] else "❌"
|
| 84 |
+
print(f"{status} {result['time_ms']:5.0f}ms LLM:{result['llm']} Gates:{result['gates']}")
|
| 85 |
+
|
| 86 |
+
if result["error"] and not result["success"]:
|
| 87 |
+
print(f" ⚠️ {result['error'][:50]}...")
|
| 88 |
+
|
| 89 |
+
time.sleep(4)
|
| 90 |
+
|
| 91 |
+
# Summary
|
| 92 |
+
print("\n\n" + "=" * 80)
|
| 93 |
+
print("FINAL SUMMARY")
|
| 94 |
+
print("=" * 80)
|
| 95 |
+
|
| 96 |
+
for mode in modes:
|
| 97 |
+
print(f"\n{mode.upper()}")
|
| 98 |
+
print("-" * 40)
|
| 99 |
+
|
| 100 |
+
total_success = 0
|
| 101 |
+
total_problems = 0
|
| 102 |
+
total_time = 0
|
| 103 |
+
total_llm = 0
|
| 104 |
+
|
| 105 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 106 |
+
if diff in results_by_difficulty and mode in results_by_difficulty[diff]:
|
| 107 |
+
results = results_by_difficulty[diff][mode]
|
| 108 |
+
successes = sum(1 for r in results if r["success"])
|
| 109 |
+
total_success += successes
|
| 110 |
+
total_problems += len(results)
|
| 111 |
+
total_time += sum(r["time_ms"] for r in results)
|
| 112 |
+
total_llm += sum(r["llm"] for r in results)
|
| 113 |
+
|
| 114 |
+
print(f" {diff:10}: {successes}/{len(results)}")
|
| 115 |
+
|
| 116 |
+
print(f"\n TOTAL: {total_success}/{total_problems} ({100*total_success/total_problems:.0f}%)")
|
| 117 |
+
print(f" Time: {total_time:.0f}ms total ({total_time/total_problems:.0f}ms avg)")
|
| 118 |
+
print(f" LLM calls: {total_llm}")
|
| 119 |
+
|
| 120 |
+
print("\n" + "=" * 80)
|
| 121 |
+
print("WINNER DETERMINATION")
|
| 122 |
+
print("=" * 80)
|
| 123 |
+
|
| 124 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 125 |
+
if diff not in results_by_difficulty:
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
+
print(f"\n{diff.upper()}:")
|
| 129 |
+
for mode in modes:
|
| 130 |
+
if mode in results_by_difficulty[diff]:
|
| 131 |
+
results = results_by_difficulty[diff][mode]
|
| 132 |
+
successes = sum(1 for r in results if r["success"])
|
| 133 |
+
avg_time = sum(r["time_ms"] for r in results) / len(results)
|
| 134 |
+
print(f" {mode}: {successes}/{len(results)} ({avg_time:.0f}ms avg)")
|
| 135 |
+
|
| 136 |
+
print("\n" + "=" * 80)
|
| 137 |
+
print("DONE")
|
tests/full_comparison.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/full_comparison.py
|
| 2 |
+
# Full comparison test across all modes and difficulties
|
| 3 |
+
"""Full mode comparison test."""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
import json
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
|
| 13 |
+
|
| 14 |
+
api_key = "$env:GOOGLE_API_KEY"
|
| 15 |
+
os.environ['GOOGLE_API_KEY'] = api_key
|
| 16 |
+
|
| 17 |
+
from tests.test_problems import ALL_PROBLEMS, ProblemDifficulty
|
| 18 |
+
from orchestrators import create_orchestrator
|
| 19 |
+
from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
|
| 20 |
+
from config import set_api_key
|
| 21 |
+
import re
|
| 22 |
+
|
| 23 |
+
set_api_key(api_key)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def extract_gates(qasm):
|
| 27 |
+
"""Count gates in QASM."""
|
| 28 |
+
if not qasm:
|
| 29 |
+
return 0
|
| 30 |
+
gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
|
| 31 |
+
return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_problem(problem, mode):
|
| 35 |
+
"""Test a single problem."""
|
| 36 |
+
start = time.perf_counter()
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
if mode == "quasar":
|
| 40 |
+
orch = QuasarOrchestrator(max_iterations=3)
|
| 41 |
+
result = orch.run(
|
| 42 |
+
problem.prompt,
|
| 43 |
+
problem.expected.min_qubits,
|
| 44 |
+
problem.expected.expected_states if problem.expected.expected_states else None
|
| 45 |
+
)
|
| 46 |
+
success = result.success
|
| 47 |
+
qasm = result.final_qasm
|
| 48 |
+
llm = result.llm_calls
|
| 49 |
+
iterations = result.iterations
|
| 50 |
+
tiers = result.tiers_passed
|
| 51 |
+
|
| 52 |
+
elif mode == "hybrid":
|
| 53 |
+
orch = HybridOrchestrator()
|
| 54 |
+
result = orch.run(
|
| 55 |
+
problem.prompt,
|
| 56 |
+
problem.expected.min_qubits,
|
| 57 |
+
problem.expected.expected_states if problem.expected.expected_states else None
|
| 58 |
+
)
|
| 59 |
+
success = result.success
|
| 60 |
+
qasm = result.final_qasm
|
| 61 |
+
llm = result.llm_calls
|
| 62 |
+
iterations = result.iterations
|
| 63 |
+
tiers = result.tiers_passed
|
| 64 |
+
|
| 65 |
+
else:
|
| 66 |
+
orch = create_orchestrator(mode)
|
| 67 |
+
result = orch.run(problem.prompt)
|
| 68 |
+
success = result.success
|
| 69 |
+
qasm = result.final_output
|
| 70 |
+
llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
|
| 71 |
+
iterations = 1
|
| 72 |
+
tiers = []
|
| 73 |
+
|
| 74 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 75 |
+
gates = extract_gates(qasm)
|
| 76 |
+
|
| 77 |
+
return {
|
| 78 |
+
"success": success,
|
| 79 |
+
"time_ms": elapsed,
|
| 80 |
+
"llm": llm,
|
| 81 |
+
"gates": gates,
|
| 82 |
+
"iterations": iterations,
|
| 83 |
+
"tiers": tiers,
|
| 84 |
+
"qasm": qasm,
|
| 85 |
+
"error": None
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
except Exception as e:
|
| 89 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 90 |
+
return {
|
| 91 |
+
"success": False,
|
| 92 |
+
"time_ms": elapsed,
|
| 93 |
+
"llm": 0,
|
| 94 |
+
"gates": 0,
|
| 95 |
+
"iterations": 0,
|
| 96 |
+
"tiers": [],
|
| 97 |
+
"qasm": None,
|
| 98 |
+
"error": str(e)[:100]
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def main():
|
| 103 |
+
print("=" * 100)
|
| 104 |
+
print("FULL MODE COMPARISON TEST")
|
| 105 |
+
print("=" * 100)
|
| 106 |
+
print(f"Date: {datetime.now().isoformat()}")
|
| 107 |
+
print(f"Total problems: {len(ALL_PROBLEMS)}")
|
| 108 |
+
print()
|
| 109 |
+
|
| 110 |
+
# Modes to test - focus on the key ones
|
| 111 |
+
modes = ["naked", "quasar", "hybrid", "blackboard"]
|
| 112 |
+
|
| 113 |
+
all_results = []
|
| 114 |
+
|
| 115 |
+
# Group by difficulty
|
| 116 |
+
for difficulty in [ProblemDifficulty.EASY, ProblemDifficulty.MEDIUM, ProblemDifficulty.HARD, ProblemDifficulty.VERY_HARD]:
|
| 117 |
+
problems = [p for p in ALL_PROBLEMS if p.difficulty == difficulty]
|
| 118 |
+
|
| 119 |
+
print(f"\n{'='*100}")
|
| 120 |
+
print(f"DIFFICULTY: {difficulty.value.upper()} ({len(problems)} problems)")
|
| 121 |
+
print("=" * 100)
|
| 122 |
+
|
| 123 |
+
for problem in problems:
|
| 124 |
+
print(f"\n {problem.id}: {problem.name}")
|
| 125 |
+
|
| 126 |
+
for mode in modes:
|
| 127 |
+
print(f" {mode:12}", end=" ", flush=True)
|
| 128 |
+
|
| 129 |
+
result = test_problem(problem, mode)
|
| 130 |
+
result["problem_id"] = problem.id
|
| 131 |
+
result["difficulty"] = difficulty.value
|
| 132 |
+
result["mode"] = mode
|
| 133 |
+
all_results.append(result)
|
| 134 |
+
|
| 135 |
+
status = "✅" if result["success"] else "❌"
|
| 136 |
+
time_str = f"{result['time_ms']:6.0f}ms"
|
| 137 |
+
llm_str = f"LLM:{result['llm']}"
|
| 138 |
+
gates_str = f"Gates:{result['gates']:2}"
|
| 139 |
+
|
| 140 |
+
extra = ""
|
| 141 |
+
if result["tiers"]:
|
| 142 |
+
extra = f" Tiers:{result['tiers']}"
|
| 143 |
+
|
| 144 |
+
print(f"{status} {time_str} {llm_str:6} {gates_str}{extra}")
|
| 145 |
+
|
| 146 |
+
if result["error"]:
|
| 147 |
+
print(f" ❌ Error: {result['error'][:60]}...")
|
| 148 |
+
|
| 149 |
+
time.sleep(5)
|
| 150 |
+
|
| 151 |
+
# Summary
|
| 152 |
+
print("\n\n" + "=" * 100)
|
| 153 |
+
print("SUMMARY BY MODE")
|
| 154 |
+
print("=" * 100)
|
| 155 |
+
|
| 156 |
+
for mode in modes:
|
| 157 |
+
mode_results = [r for r in all_results if r["mode"] == mode]
|
| 158 |
+
successes = sum(1 for r in mode_results if r["success"])
|
| 159 |
+
total = len(mode_results)
|
| 160 |
+
total_time = sum(r["time_ms"] for r in mode_results)
|
| 161 |
+
total_llm = sum(r["llm"] for r in mode_results)
|
| 162 |
+
avg_gates = sum(r["gates"] for r in mode_results if r["success"]) / max(successes, 1)
|
| 163 |
+
|
| 164 |
+
print(f"\n{mode.upper():12}")
|
| 165 |
+
print(f" Overall: {successes}/{total} ({100*successes/total:.0f}%)")
|
| 166 |
+
print(f" Time: {total_time/1000:.1f}s total, {total_time/total:.0f}ms avg")
|
| 167 |
+
print(f" LLM: {total_llm} calls ({total_llm/total:.1f} avg)")
|
| 168 |
+
print(f" Gates: {avg_gates:.1f} avg")
|
| 169 |
+
|
| 170 |
+
# By difficulty
|
| 171 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 172 |
+
diff_results = [r for r in mode_results if r["difficulty"] == diff]
|
| 173 |
+
if diff_results:
|
| 174 |
+
diff_success = sum(1 for r in diff_results if r["success"])
|
| 175 |
+
print(f" {diff:10}: {diff_success}/{len(diff_results)}")
|
| 176 |
+
|
| 177 |
+
# Save results
|
| 178 |
+
output_path = Path(__file__).parent.parent / "research" / f"full_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
| 179 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 180 |
+
|
| 181 |
+
# Clean QASM for JSON (can be long)
|
| 182 |
+
for r in all_results:
|
| 183 |
+
if r["qasm"]:
|
| 184 |
+
r["qasm"] = r["qasm"][:500] # Truncate for storage
|
| 185 |
+
|
| 186 |
+
with open(output_path, 'w') as f:
|
| 187 |
+
json.dump(all_results, f, indent=2)
|
| 188 |
+
|
| 189 |
+
print(f"\n\nResults saved to: {output_path}")
|
| 190 |
+
|
| 191 |
+
# Winner determination
|
| 192 |
+
print("\n" + "=" * 100)
|
| 193 |
+
print("🏆 WINNER BY DIFFICULTY")
|
| 194 |
+
print("=" * 100)
|
| 195 |
+
|
| 196 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 197 |
+
print(f"\n{diff.upper()}:")
|
| 198 |
+
best_mode = None
|
| 199 |
+
best_success = -1
|
| 200 |
+
|
| 201 |
+
for mode in modes:
|
| 202 |
+
mode_results = [r for r in all_results if r["mode"] == mode and r["difficulty"] == diff]
|
| 203 |
+
if mode_results:
|
| 204 |
+
successes = sum(1 for r in mode_results if r["success"])
|
| 205 |
+
if successes > best_success:
|
| 206 |
+
best_success = successes
|
| 207 |
+
best_mode = mode
|
| 208 |
+
|
| 209 |
+
if best_mode:
|
| 210 |
+
print(f" 🏆 {best_mode.upper()} ({best_success}/{len([r for r in all_results if r['difficulty']==diff and r['mode']==best_mode])})")
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
if __name__ == "__main__":
|
| 214 |
+
main()
|
tests/mini_test.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/mini_test.py
|
| 2 |
+
# Description: Test all 4 modes on problems of each difficulty
|
| 3 |
+
"""
|
| 4 |
+
Mini Test: Comparison of NAKED, BLACKBOARD, GUIDED, HYBRID on 4 problems.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sys
|
| 8 |
+
import os
|
| 9 |
+
import warnings
|
| 10 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 11 |
+
|
| 12 |
+
# Suppress Gemini function_call warning (it's informational, not an error)
|
| 13 |
+
warnings.filterwarnings("ignore", message=".*non-text parts.*")
|
| 14 |
+
|
| 15 |
+
from orchestrators import create_orchestrator
|
| 16 |
+
from tests.test_problems import get_problems_by_difficulty, ProblemDifficulty as Difficulty
|
| 17 |
+
|
| 18 |
+
def test_one(problem, mode):
|
| 19 |
+
"""Test a single problem with a mode."""
|
| 20 |
+
orch = create_orchestrator(mode)
|
| 21 |
+
import time
|
| 22 |
+
start = time.perf_counter()
|
| 23 |
+
result = orch.run(problem.prompt)
|
| 24 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 25 |
+
|
| 26 |
+
# Count gates
|
| 27 |
+
gates = 0
|
| 28 |
+
if result.final_output:
|
| 29 |
+
gates = len([l for l in result.final_output.split('\n')
|
| 30 |
+
if l.strip() and not l.startswith(('OPENQASM', 'include', 'qreg', 'creg', 'measure', '//'))])
|
| 31 |
+
|
| 32 |
+
return result.success, elapsed, gates
|
| 33 |
+
|
| 34 |
+
def main():
|
| 35 |
+
print("=" * 70)
|
| 36 |
+
print("COMPREHENSIVE TEST: NAKED vs BLACKBOARD vs GUIDED vs HYBRID")
|
| 37 |
+
print("=" * 70)
|
| 38 |
+
|
| 39 |
+
# Test HARD problems to see where modes fail
|
| 40 |
+
modes = ["naked", "blackboard", "guided", "hybrid"]
|
| 41 |
+
|
| 42 |
+
# One problem per difficulty
|
| 43 |
+
test_problems = [
|
| 44 |
+
("EASY", get_problems_by_difficulty(Difficulty.EASY)[0]),
|
| 45 |
+
("HARD", get_problems_by_difficulty(Difficulty.HARD)[0]),
|
| 46 |
+
("VERY_HARD", get_problems_by_difficulty(Difficulty.VERY_HARD)[0]),
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
results = {mode: [] for mode in modes}
|
| 50 |
+
|
| 51 |
+
for diff_name, problem in test_problems:
|
| 52 |
+
print(f"\n{diff_name}: {problem.name}")
|
| 53 |
+
print("-" * 50)
|
| 54 |
+
|
| 55 |
+
for mode in modes:
|
| 56 |
+
try:
|
| 57 |
+
ok, ms, gates = test_one(problem, mode)
|
| 58 |
+
status = "✅" if ok else "❌"
|
| 59 |
+
print(f" {mode:12} {status} {ms:6.0f}ms {gates:2} gates")
|
| 60 |
+
results[mode].append(ok)
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f" {mode:12} ❌ Error: {str(e)[:50]}")
|
| 63 |
+
results[mode].append(False)
|
| 64 |
+
|
| 65 |
+
print("\n" + "=" * 70)
|
| 66 |
+
print("SUMMARY")
|
| 67 |
+
print("=" * 70)
|
| 68 |
+
for mode in modes:
|
| 69 |
+
passed = sum(results[mode])
|
| 70 |
+
total = len(results[mode])
|
| 71 |
+
pct = 100*passed/total if total > 0 else 0
|
| 72 |
+
print(f" {mode:12}: {passed}/{total} passed ({pct:.0f}%)")
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
main()
|
tests/mode_evaluation.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/mode_evaluation.py
|
| 2 |
+
# Evaluate all modes on representative problems from each difficulty
|
| 3 |
+
"""Mode Evaluation: Test all modes on key problems from each difficulty level."""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
import json
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
|
| 13 |
+
|
| 14 |
+
api_key = "$env:GOOGLE_API_KEY"
|
| 15 |
+
os.environ['GOOGLE_API_KEY'] = api_key
|
| 16 |
+
|
| 17 |
+
from tests.test_problems import (
|
| 18 |
+
PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT,
|
| 19 |
+
PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z,
|
| 20 |
+
PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT,
|
| 21 |
+
PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI
|
| 22 |
+
)
|
| 23 |
+
from orchestrators import create_orchestrator
|
| 24 |
+
from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
|
| 25 |
+
from config import set_api_key
|
| 26 |
+
import re
|
| 27 |
+
|
| 28 |
+
set_api_key(api_key)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def extract_gates(qasm):
|
| 32 |
+
"""Count gates in QASM."""
|
| 33 |
+
if not qasm:
|
| 34 |
+
return 0
|
| 35 |
+
gate_pattern = r'\b(h|x|y|z|s|t|cx|cz|swap|ccx|rz|rx|ry|cp)\b'
|
| 36 |
+
return len(re.findall(gate_pattern, qasm, re.IGNORECASE))
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def test_problem(problem, mode):
|
| 40 |
+
"""Test a single problem."""
|
| 41 |
+
start = time.perf_counter()
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
if mode == "quasar":
|
| 45 |
+
orch = QuasarOrchestrator(max_iterations=3)
|
| 46 |
+
result = orch.run(
|
| 47 |
+
problem.prompt,
|
| 48 |
+
problem.expected.min_qubits,
|
| 49 |
+
problem.expected.expected_states if problem.expected.expected_states else None
|
| 50 |
+
)
|
| 51 |
+
success = result.success
|
| 52 |
+
qasm = result.final_qasm
|
| 53 |
+
llm = result.llm_calls
|
| 54 |
+
iterations = result.iterations
|
| 55 |
+
|
| 56 |
+
elif mode == "hybrid":
|
| 57 |
+
orch = HybridOrchestrator()
|
| 58 |
+
result = orch.run(
|
| 59 |
+
problem.prompt,
|
| 60 |
+
problem.expected.min_qubits,
|
| 61 |
+
problem.expected.expected_states if problem.expected.expected_states else None
|
| 62 |
+
)
|
| 63 |
+
success = result.success
|
| 64 |
+
qasm = result.final_qasm
|
| 65 |
+
llm = result.llm_calls
|
| 66 |
+
iterations = result.iterations
|
| 67 |
+
|
| 68 |
+
else:
|
| 69 |
+
orch = create_orchestrator(mode)
|
| 70 |
+
result = orch.run(problem.prompt)
|
| 71 |
+
success = result.success
|
| 72 |
+
qasm = result.final_output
|
| 73 |
+
llm = 1 if mode == "naked" else len(result.agent_results) if result.agent_results else 0
|
| 74 |
+
iterations = 1
|
| 75 |
+
|
| 76 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 77 |
+
gates = extract_gates(qasm)
|
| 78 |
+
|
| 79 |
+
return {
|
| 80 |
+
"success": success,
|
| 81 |
+
"time_ms": elapsed,
|
| 82 |
+
"llm": llm,
|
| 83 |
+
"gates": gates,
|
| 84 |
+
"iterations": iterations,
|
| 85 |
+
"error": None
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
except Exception as e:
|
| 89 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 90 |
+
return {
|
| 91 |
+
"success": False,
|
| 92 |
+
"time_ms": elapsed,
|
| 93 |
+
"llm": 0,
|
| 94 |
+
"gates": 0,
|
| 95 |
+
"error": str(e)[:80]
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def main():
|
| 100 |
+
print("=" * 80)
|
| 101 |
+
print("MODE EVALUATION - KEY PROBLEMS FROM EACH DIFFICULTY")
|
| 102 |
+
print("=" * 80)
|
| 103 |
+
print(f"Date: {datetime.now().isoformat()}")
|
| 104 |
+
print()
|
| 105 |
+
|
| 106 |
+
# Key problems to test (2 per difficulty)
|
| 107 |
+
test_problems = [
|
| 108 |
+
("EASY", [PROBLEM_E1_PHASE_FLIP, PROBLEM_E2_CONTROLLED_NOT]),
|
| 109 |
+
("MEDIUM", [PROBLEM_M1_SWAP_DECOMPOSITION, PROBLEM_M2_CONTROLLED_Z]),
|
| 110 |
+
("HARD", [PROBLEM_H1_DEUTSCH, PROBLEM_H2_GROVER_2QUBIT]),
|
| 111 |
+
("VERY_HARD", [PROBLEM_VH1_QFT_4QUBIT, PROBLEM_VH2_GROVER_3QUBIT, PROBLEM_VH4_BERNSTEIN_VAZIRANI])
|
| 112 |
+
]
|
| 113 |
+
|
| 114 |
+
# Modes to test - focus on working ones
|
| 115 |
+
modes = ["naked", "quasar", "hybrid", "blackboard"]
|
| 116 |
+
|
| 117 |
+
all_results = []
|
| 118 |
+
|
| 119 |
+
for diff_name, problems in test_problems:
|
| 120 |
+
print(f"\n{'='*80}")
|
| 121 |
+
print(f"{diff_name} PROBLEMS")
|
| 122 |
+
print("=" * 80)
|
| 123 |
+
|
| 124 |
+
for problem in problems:
|
| 125 |
+
print(f"\n {problem.id}: {problem.name}")
|
| 126 |
+
|
| 127 |
+
for mode in modes:
|
| 128 |
+
print(f" {mode:12}", end=" ", flush=True)
|
| 129 |
+
|
| 130 |
+
result = test_problem(problem, mode)
|
| 131 |
+
result["problem_id"] = problem.id
|
| 132 |
+
result["difficulty"] = diff_name.lower()
|
| 133 |
+
result["mode"] = mode
|
| 134 |
+
all_results.append(result)
|
| 135 |
+
|
| 136 |
+
status = "✅" if result["success"] else "❌"
|
| 137 |
+
time_str = f"{result['time_ms']:6.0f}ms"
|
| 138 |
+
llm_str = f"LLM:{result['llm']}"
|
| 139 |
+
gates_str = f"Gates:{result['gates']:2}"
|
| 140 |
+
|
| 141 |
+
print(f"{status} {time_str} {llm_str:6} {gates_str}")
|
| 142 |
+
|
| 143 |
+
if result["error"]:
|
| 144 |
+
print(f" ⚠️ {result['error'][:50]}...")
|
| 145 |
+
|
| 146 |
+
time.sleep(5) # Rate limiting
|
| 147 |
+
|
| 148 |
+
# Summary
|
| 149 |
+
print("\n\n" + "=" * 80)
|
| 150 |
+
print("SUMMARY BY MODE")
|
| 151 |
+
print("=" * 80)
|
| 152 |
+
|
| 153 |
+
for mode in modes:
|
| 154 |
+
mode_results = [r for r in all_results if r["mode"] == mode]
|
| 155 |
+
successes = sum(1 for r in mode_results if r["success"])
|
| 156 |
+
total = len(mode_results)
|
| 157 |
+
total_time = sum(r["time_ms"] for r in mode_results)
|
| 158 |
+
total_llm = sum(r["llm"] for r in mode_results)
|
| 159 |
+
avg_gates = sum(r["gates"] for r in mode_results if r["success"]) / max(successes, 1)
|
| 160 |
+
|
| 161 |
+
print(f"\n{mode.upper():12}")
|
| 162 |
+
print(f" Success: {successes}/{total} ({100*successes/total:.0f}%)")
|
| 163 |
+
print(f" Time: {total_time:.0f}ms total, {total_time/total:.0f}ms avg")
|
| 164 |
+
print(f" LLM: {total_llm} calls")
|
| 165 |
+
print(f" Gates: {avg_gates:.1f} avg")
|
| 166 |
+
|
| 167 |
+
# By difficulty
|
| 168 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 169 |
+
diff_results = [r for r in mode_results if r["difficulty"] == diff]
|
| 170 |
+
if diff_results:
|
| 171 |
+
diff_success = sum(1 for r in diff_results if r["success"])
|
| 172 |
+
print(f" {diff:10}: {diff_success}/{len(diff_results)}")
|
| 173 |
+
|
| 174 |
+
# Winner by difficulty
|
| 175 |
+
print("\n" + "=" * 80)
|
| 176 |
+
print("🏆 WINNER BY DIFFICULTY")
|
| 177 |
+
print("=" * 80)
|
| 178 |
+
|
| 179 |
+
for diff in ["easy", "medium", "hard", "very_hard"]:
|
| 180 |
+
diff_results = [r for r in all_results if r["difficulty"] == diff]
|
| 181 |
+
|
| 182 |
+
print(f"\n{diff.upper()}:")
|
| 183 |
+
for mode in modes:
|
| 184 |
+
mode_diff_results = [r for r in diff_results if r["mode"] == mode]
|
| 185 |
+
if mode_diff_results:
|
| 186 |
+
successes = sum(1 for r in mode_diff_results if r["success"])
|
| 187 |
+
total_time = sum(r["time_ms"] for r in mode_diff_results)
|
| 188 |
+
avg_time = total_time / len(mode_diff_results)
|
| 189 |
+
print(f" {mode:12} {successes}/{len(mode_diff_results)} ({avg_time:.0f}ms avg)")
|
| 190 |
+
|
| 191 |
+
# Save results
|
| 192 |
+
output_path = Path(__file__).parent.parent / "research" / f"mode_evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
| 193 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 194 |
+
|
| 195 |
+
with open(output_path, 'w') as f:
|
| 196 |
+
json.dump(all_results, f, indent=2)
|
| 197 |
+
|
| 198 |
+
print(f"\n\nResults saved to: {output_path}")
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
if __name__ == "__main__":
|
| 202 |
+
main()
|
tests/quality_evaluation_harness.py
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/quality_evaluation_harness.py
|
| 2 |
+
# Relations: Uses orchestrators/, tests/circuit_quality_analyzer.py, database/circuit_quality_db.py
|
| 3 |
+
# Description: Quality-focused evaluation harness that stores QASM circuits
|
| 4 |
+
# Runs all 3 modes, measures quality via MCP, stores in database
|
| 5 |
+
# Generates comparison reports with actual circuit outputs
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Quality Evaluation Harness: Run evaluations focused on CIRCUIT QUALITY.
|
| 9 |
+
Key difference from regular harness: stores actual QASM and measures quality.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import time
|
| 13 |
+
import json
|
| 14 |
+
import logging
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
from typing import Dict, List, Optional, Any
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
import uuid
|
| 19 |
+
|
| 20 |
+
from .test_problems import TestProblem, ALL_PROBLEMS, get_problem, get_problems_by_difficulty, ProblemDifficulty
|
| 21 |
+
from .circuit_quality_analyzer import CircuitQualityAnalyzer, AnalysisResult
|
| 22 |
+
from database.circuit_quality_db import (
|
| 23 |
+
CircuitQualityDB, CircuitEvaluation, QualityMetrics, get_quality_db
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class QualityEvaluationHarness:
|
| 30 |
+
"""
|
| 31 |
+
Runs quality-focused evaluations across all orchestration modes.
|
| 32 |
+
PRIMARY FOCUS: Circuit quality, not just success rate.
|
| 33 |
+
STORES: Full QASM code in database for later analysis.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self, mcp_url: str = "http://127.0.0.1:7861"):
|
| 37 |
+
self.mcp_url = mcp_url
|
| 38 |
+
self.analyzer = CircuitQualityAnalyzer(mcp_url)
|
| 39 |
+
self.db = get_quality_db()
|
| 40 |
+
self.run_id = f"quality_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
| 41 |
+
|
| 42 |
+
def evaluate_single(self, problem: TestProblem, mode: str) -> CircuitEvaluation:
|
| 43 |
+
"""
|
| 44 |
+
Run a single evaluation and return full CircuitEvaluation with QASM.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
problem: The test problem to solve
|
| 48 |
+
mode: 'naked', 'guided', or 'blackboard'
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
CircuitEvaluation with full QASM and quality metrics
|
| 52 |
+
"""
|
| 53 |
+
from orchestrators import create_orchestrator
|
| 54 |
+
|
| 55 |
+
logger.info(f"Evaluating {problem.id} with {mode} mode")
|
| 56 |
+
|
| 57 |
+
# Reset cost tracking
|
| 58 |
+
try:
|
| 59 |
+
from config import reset_cost_tracking, get_cost_summary
|
| 60 |
+
reset_cost_tracking()
|
| 61 |
+
except ImportError:
|
| 62 |
+
get_cost_summary = lambda: {}
|
| 63 |
+
|
| 64 |
+
# Initialize result
|
| 65 |
+
eval_result = CircuitEvaluation(
|
| 66 |
+
run_id=self.run_id,
|
| 67 |
+
timestamp=datetime.now().isoformat(),
|
| 68 |
+
problem_id=problem.id,
|
| 69 |
+
problem_goal=problem.goal,
|
| 70 |
+
mode=mode
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
start_time = time.perf_counter()
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
# Create and run orchestrator
|
| 77 |
+
orchestrator = create_orchestrator(mode)
|
| 78 |
+
result = orchestrator.run(problem.goal)
|
| 79 |
+
|
| 80 |
+
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
| 81 |
+
eval_result.execution_time_ms = elapsed_ms
|
| 82 |
+
|
| 83 |
+
# Extract QASM
|
| 84 |
+
qasm = result.final_output
|
| 85 |
+
if isinstance(qasm, list):
|
| 86 |
+
qasm = qasm[0] if qasm else None
|
| 87 |
+
if qasm is not None:
|
| 88 |
+
qasm = str(qasm) if not isinstance(qasm, str) else qasm
|
| 89 |
+
|
| 90 |
+
eval_result.qasm_code = qasm or ""
|
| 91 |
+
eval_result.success = result.success and bool(qasm)
|
| 92 |
+
|
| 93 |
+
if not eval_result.success:
|
| 94 |
+
eval_result.errors = result.errors
|
| 95 |
+
|
| 96 |
+
except Exception as e:
|
| 97 |
+
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
| 98 |
+
eval_result.execution_time_ms = elapsed_ms
|
| 99 |
+
eval_result.success = False
|
| 100 |
+
eval_result.errors = [str(e)]
|
| 101 |
+
logger.error(f"Evaluation failed for {problem.id}/{mode}: {e}")
|
| 102 |
+
|
| 103 |
+
# Get cost metrics
|
| 104 |
+
try:
|
| 105 |
+
cost = get_cost_summary()
|
| 106 |
+
eval_result.llm_requests = cost.get('total_requests', 0)
|
| 107 |
+
eval_result.tokens_used = cost.get('total_tokens', 0)
|
| 108 |
+
except Exception:
|
| 109 |
+
pass
|
| 110 |
+
|
| 111 |
+
# Analyze quality if we have QASM
|
| 112 |
+
if eval_result.qasm_code:
|
| 113 |
+
expected = problem.expected.expected_states if problem.expected else None
|
| 114 |
+
analysis = self.analyzer.analyze_circuit(eval_result.qasm_code, expected)
|
| 115 |
+
|
| 116 |
+
eval_result.quality_metrics = QualityMetrics(
|
| 117 |
+
depth=analysis.depth,
|
| 118 |
+
gate_count=analysis.gate_count,
|
| 119 |
+
cx_count=analysis.cx_count,
|
| 120 |
+
single_qubit_count=analysis.single_qubit_count,
|
| 121 |
+
hardware_fitness=analysis.hardware_fitness,
|
| 122 |
+
syntax_valid=analysis.syntax_valid,
|
| 123 |
+
state_correctness=analysis.state_correctness,
|
| 124 |
+
complexity_score=analysis.complexity_score,
|
| 125 |
+
noise_estimate=analysis.noise_estimate
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
if analysis.errors:
|
| 129 |
+
eval_result.errors.extend(analysis.errors)
|
| 130 |
+
|
| 131 |
+
# Store in database
|
| 132 |
+
eval_id = self.db.save_evaluation(eval_result)
|
| 133 |
+
eval_result.id = eval_id
|
| 134 |
+
|
| 135 |
+
logger.info(f"Stored evaluation {eval_id}: {problem.id}/{mode} - "
|
| 136 |
+
f"success={eval_result.success}, score={eval_result.quality_metrics.overall_score()}")
|
| 137 |
+
|
| 138 |
+
return eval_result
|
| 139 |
+
|
| 140 |
+
def evaluate_problem_all_modes(self, problem: TestProblem,
|
| 141 |
+
modes: List[str] = None) -> Dict[str, CircuitEvaluation]:
|
| 142 |
+
"""Evaluate a single problem with all modes."""
|
| 143 |
+
if modes is None:
|
| 144 |
+
modes = ['naked', 'guided', 'blackboard']
|
| 145 |
+
|
| 146 |
+
results = {}
|
| 147 |
+
for mode in modes:
|
| 148 |
+
results[mode] = self.evaluate_single(problem, mode)
|
| 149 |
+
|
| 150 |
+
return results
|
| 151 |
+
|
| 152 |
+
def run_full_evaluation(self,
|
| 153 |
+
difficulties: List[str] = None,
|
| 154 |
+
modes: List[str] = None,
|
| 155 |
+
max_problems: int = None) -> str:
|
| 156 |
+
"""
|
| 157 |
+
Run full evaluation across problems and modes.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
difficulties: List of difficulties to test ('easy', 'medium', 'hard')
|
| 161 |
+
modes: List of modes to test ('naked', 'guided', 'blackboard')
|
| 162 |
+
max_problems: Maximum number of problems to test (for quick runs)
|
| 163 |
+
|
| 164 |
+
Returns:
|
| 165 |
+
run_id for this evaluation run
|
| 166 |
+
"""
|
| 167 |
+
if difficulties is None:
|
| 168 |
+
difficulties = ['easy', 'medium', 'hard']
|
| 169 |
+
if modes is None:
|
| 170 |
+
modes = ['naked', 'guided', 'blackboard']
|
| 171 |
+
|
| 172 |
+
# Gather problems
|
| 173 |
+
all_probs = []
|
| 174 |
+
for diff in difficulties:
|
| 175 |
+
# Convert string to enum if needed
|
| 176 |
+
if isinstance(diff, str):
|
| 177 |
+
try:
|
| 178 |
+
diff_enum = ProblemDifficulty(diff)
|
| 179 |
+
except ValueError:
|
| 180 |
+
logger.warning(f"Invalid difficulty: {diff}")
|
| 181 |
+
continue
|
| 182 |
+
else:
|
| 183 |
+
diff_enum = diff
|
| 184 |
+
|
| 185 |
+
probs = get_problems_by_difficulty(diff_enum)
|
| 186 |
+
all_probs.extend(probs)
|
| 187 |
+
|
| 188 |
+
if max_problems:
|
| 189 |
+
all_probs = all_probs[:max_problems]
|
| 190 |
+
|
| 191 |
+
logger.info(f"Starting quality evaluation run {self.run_id}")
|
| 192 |
+
logger.info(f"Problems: {len(all_probs)}, Modes: {modes}")
|
| 193 |
+
|
| 194 |
+
# Run evaluations
|
| 195 |
+
total = len(all_probs) * len(modes)
|
| 196 |
+
completed = 0
|
| 197 |
+
|
| 198 |
+
for problem in all_probs:
|
| 199 |
+
for mode in modes:
|
| 200 |
+
try:
|
| 201 |
+
self.evaluate_single(problem, mode)
|
| 202 |
+
completed += 1
|
| 203 |
+
logger.info(f"Progress: {completed}/{total}")
|
| 204 |
+
except Exception as e:
|
| 205 |
+
logger.error(f"Failed {problem.id}/{mode}: {e}")
|
| 206 |
+
completed += 1
|
| 207 |
+
|
| 208 |
+
# Save run summary
|
| 209 |
+
summary = self.db.get_quality_summary(self.run_id)
|
| 210 |
+
self.db.save_comparison_run(
|
| 211 |
+
run_id=self.run_id,
|
| 212 |
+
description=f"Quality evaluation: {len(all_probs)} problems, {modes}",
|
| 213 |
+
num_problems=len(all_probs),
|
| 214 |
+
modes=modes,
|
| 215 |
+
summary=summary
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
return self.run_id
|
| 219 |
+
|
| 220 |
+
def generate_report(self, run_id: Optional[str] = None) -> str:
|
| 221 |
+
"""Generate a comprehensive quality comparison report."""
|
| 222 |
+
if run_id is None:
|
| 223 |
+
run_id = self.run_id
|
| 224 |
+
|
| 225 |
+
# Get summary
|
| 226 |
+
summary = self.db.get_quality_summary(run_id)
|
| 227 |
+
|
| 228 |
+
# Get full circuit export
|
| 229 |
+
circuits_md = self.db.export_circuits_markdown(run_id)
|
| 230 |
+
|
| 231 |
+
# Build report
|
| 232 |
+
report = []
|
| 233 |
+
report.append("# CIRCUIT QUALITY EVALUATION REPORT\n")
|
| 234 |
+
report.append(f"Run ID: {run_id}\n")
|
| 235 |
+
report.append(f"Generated: {datetime.now().isoformat()}\n\n")
|
| 236 |
+
|
| 237 |
+
report.append("## EXECUTIVE SUMMARY\n\n")
|
| 238 |
+
|
| 239 |
+
# Summary table
|
| 240 |
+
report.append("| Mode | Success Rate | Quality Score | Avg Depth | Avg Gates | Avg CX | HW Fitness | LLM Calls |\n")
|
| 241 |
+
report.append("|------|-------------|---------------|-----------|-----------|--------|------------|----------|\n")
|
| 242 |
+
|
| 243 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 244 |
+
if mode in summary.get('modes', {}):
|
| 245 |
+
m = summary['modes'][mode]
|
| 246 |
+
report.append(
|
| 247 |
+
f"| {mode.upper()} | {m['success_rate']*100:.0f}% | "
|
| 248 |
+
f"{m['avg_quality_score']:.1f}/100 | {m['avg_depth']:.1f} | "
|
| 249 |
+
f"{m['avg_gates']:.1f} | {m['avg_cx_count']:.1f} | "
|
| 250 |
+
f"{m['avg_hardware_fitness']:.3f} | {m['total_llm_requests']} |\n"
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
report.append("\n## KEY FINDINGS\n\n")
|
| 254 |
+
|
| 255 |
+
# Determine winner
|
| 256 |
+
modes_data = summary.get('modes', {})
|
| 257 |
+
if modes_data:
|
| 258 |
+
best_quality = max(modes_data.items(), key=lambda x: x[1].get('avg_quality_score', 0))
|
| 259 |
+
best_success = max(modes_data.items(), key=lambda x: x[1].get('success_rate', 0))
|
| 260 |
+
lowest_cost = min(modes_data.items(), key=lambda x: x[1].get('total_llm_requests', float('inf')))
|
| 261 |
+
|
| 262 |
+
report.append(f"- **Best Quality**: {best_quality[0].upper()} ({best_quality[1]['avg_quality_score']:.1f}/100)\n")
|
| 263 |
+
report.append(f"- **Best Success Rate**: {best_success[0].upper()} ({best_success[1]['success_rate']*100:.0f}%)\n")
|
| 264 |
+
report.append(f"- **Lowest Cost**: {lowest_cost[0].upper()} ({lowest_cost[1]['total_llm_requests']} LLM calls)\n")
|
| 265 |
+
|
| 266 |
+
# Quality per LLM call
|
| 267 |
+
report.append("\n### Quality Efficiency (Quality Score per LLM Call)\n\n")
|
| 268 |
+
for mode, data in modes_data.items():
|
| 269 |
+
llm_calls = data.get('total_llm_requests', 1) or 1
|
| 270 |
+
quality = data.get('avg_quality_score', 0)
|
| 271 |
+
efficiency = quality / llm_calls
|
| 272 |
+
report.append(f"- {mode.upper()}: {efficiency:.2f} quality points per LLM call\n")
|
| 273 |
+
|
| 274 |
+
report.append("\n---\n")
|
| 275 |
+
report.append("\n## DETAILED CIRCUIT COMPARISONS\n")
|
| 276 |
+
report.append(circuits_md)
|
| 277 |
+
|
| 278 |
+
return "".join(report)
|
| 279 |
+
|
| 280 |
+
def print_summary(self, run_id: Optional[str] = None):
|
| 281 |
+
"""Print a quick summary to console."""
|
| 282 |
+
if run_id is None:
|
| 283 |
+
run_id = self.run_id
|
| 284 |
+
|
| 285 |
+
summary = self.db.get_quality_summary(run_id)
|
| 286 |
+
|
| 287 |
+
print("\n" + "="*70)
|
| 288 |
+
print("QUALITY EVALUATION SUMMARY")
|
| 289 |
+
print("="*70)
|
| 290 |
+
|
| 291 |
+
modes = summary.get('modes', {})
|
| 292 |
+
for mode in ['naked', 'guided', 'blackboard']:
|
| 293 |
+
if mode in modes:
|
| 294 |
+
m = modes[mode]
|
| 295 |
+
print(f"\n{mode.upper()}:")
|
| 296 |
+
print(f" Success Rate: {m['success_rate']*100:.0f}%")
|
| 297 |
+
print(f" Quality Score: {m['avg_quality_score']:.1f}/100")
|
| 298 |
+
print(f" Avg Depth: {m['avg_depth']:.1f}")
|
| 299 |
+
print(f" Avg Gates: {m['avg_gates']:.1f}")
|
| 300 |
+
print(f" Avg CX Count: {m['avg_cx_count']:.1f}")
|
| 301 |
+
print(f" HW Fitness: {m['avg_hardware_fitness']:.3f}")
|
| 302 |
+
print(f" LLM Requests: {m['total_llm_requests']}")
|
| 303 |
+
|
| 304 |
+
print("\n" + "="*70)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def run_quick_quality_test(mode: str = 'naked', problem_id: str = 'bell_state') -> CircuitEvaluation:
|
| 308 |
+
"""Quick test function to verify system works."""
|
| 309 |
+
problem = get_problem(problem_id)
|
| 310 |
+
if not problem:
|
| 311 |
+
raise ValueError(f"Problem not found: {problem_id}")
|
| 312 |
+
|
| 313 |
+
harness = QualityEvaluationHarness()
|
| 314 |
+
return harness.evaluate_single(problem, mode)
|
tests/quick_mode_test.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/quick_mode_test.py
|
| 2 |
+
# Description: Quick test of all modes on one HARD problem
|
| 3 |
+
"""
|
| 4 |
+
Quick Mode Test: Test all 4 modes on 1 problem each difficulty
|
| 5 |
+
Designed to be fast by testing only essential combinations.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
import warnings
|
| 11 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 12 |
+
|
| 13 |
+
# Suppress warnings
|
| 14 |
+
warnings.filterwarnings("ignore", message=".*non-text parts.*")
|
| 15 |
+
warnings.filterwarnings("ignore", message=".*GOOGLE_API_KEY.*")
|
| 16 |
+
|
| 17 |
+
import time
|
| 18 |
+
from orchestrators import create_orchestrator
|
| 19 |
+
from tests.test_problems import get_problems_by_difficulty, ProblemDifficulty
|
| 20 |
+
|
| 21 |
+
def test_mode(mode, problem):
|
| 22 |
+
"""Test a single mode on a problem."""
|
| 23 |
+
try:
|
| 24 |
+
orch = create_orchestrator(mode)
|
| 25 |
+
start = time.perf_counter()
|
| 26 |
+
result = orch.run(problem.prompt)
|
| 27 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 28 |
+
|
| 29 |
+
gates = 0
|
| 30 |
+
if result.final_output:
|
| 31 |
+
gates = len([l for l in result.final_output.split('\n')
|
| 32 |
+
if l.strip() and not l.startswith(('OPENQASM', 'include', 'qreg', 'creg', 'measure', '//'))])
|
| 33 |
+
|
| 34 |
+
return result.success, elapsed, gates, None
|
| 35 |
+
except Exception as e:
|
| 36 |
+
return False, 0, 0, str(e)[:50]
|
| 37 |
+
|
| 38 |
+
def main():
|
| 39 |
+
print("=" * 60)
|
| 40 |
+
print("QUICK MODE TEST: All 4 modes on HARD problem")
|
| 41 |
+
print("=" * 60)
|
| 42 |
+
|
| 43 |
+
# Get one VERY_HARD problem - this will show where modes struggle
|
| 44 |
+
very_hard_problems = get_problems_by_difficulty(ProblemDifficulty.VERY_HARD)
|
| 45 |
+
problem = very_hard_problems[0] # 4-Qubit QFT
|
| 46 |
+
|
| 47 |
+
print(f"\nProblem: {problem.name}")
|
| 48 |
+
print(f"Difficulty: VERY_HARD")
|
| 49 |
+
print(f"Description: {problem.prompt[:80]}...")
|
| 50 |
+
print("-" * 60)
|
| 51 |
+
|
| 52 |
+
modes = ["naked", "quasar", "hybrid", "blackboard"]
|
| 53 |
+
results = []
|
| 54 |
+
|
| 55 |
+
for mode in modes:
|
| 56 |
+
print(f"\nTesting {mode}...", end=" ", flush=True)
|
| 57 |
+
ok, ms, gates, error = test_mode(mode, problem)
|
| 58 |
+
|
| 59 |
+
if ok:
|
| 60 |
+
print(f"✅ {ms:.0f}ms, {gates} gates")
|
| 61 |
+
results.append((mode, True, ms, gates))
|
| 62 |
+
elif error:
|
| 63 |
+
print(f"❌ Error: {error}")
|
| 64 |
+
results.append((mode, False, 0, 0))
|
| 65 |
+
else:
|
| 66 |
+
print(f"❌ Failed ({ms:.0f}ms)")
|
| 67 |
+
results.append((mode, False, ms, gates))
|
| 68 |
+
|
| 69 |
+
print("\n" + "=" * 60)
|
| 70 |
+
print("RESULTS SUMMARY")
|
| 71 |
+
print("=" * 60)
|
| 72 |
+
|
| 73 |
+
for mode, ok, ms, gates in results:
|
| 74 |
+
status = "✅ PASS" if ok else "❌ FAIL"
|
| 75 |
+
print(f" {mode:12}: {status:10} {ms:6.0f}ms {gates:2} gates")
|
| 76 |
+
|
| 77 |
+
passed = sum(1 for r in results if r[1])
|
| 78 |
+
print(f"\nTotal: {passed}/{len(results)} modes passed")
|
| 79 |
+
|
| 80 |
+
if __name__ == "__main__":
|
| 81 |
+
main()
|
tests/quick_test.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/quick_test.py
|
| 2 |
+
# Quick test to compare modes on easy problems only
|
| 3 |
+
"""Quick test for mode comparison."""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
|
| 11 |
+
|
| 12 |
+
api_key = "$env:GOOGLE_API_KEY"
|
| 13 |
+
os.environ['GOOGLE_API_KEY'] = api_key
|
| 14 |
+
|
| 15 |
+
from tests.test_problems import EASY_PROBLEMS, VERY_HARD_PROBLEMS
|
| 16 |
+
from orchestrators import create_orchestrator
|
| 17 |
+
from orchestrators.quasar_orchestrator import QuasarOrchestrator, HybridOrchestrator
|
| 18 |
+
from config import set_api_key
|
| 19 |
+
|
| 20 |
+
set_api_key(api_key)
|
| 21 |
+
|
| 22 |
+
def test_problem(problem, mode):
|
| 23 |
+
"""Test a single problem."""
|
| 24 |
+
start = time.perf_counter()
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
if mode == "quasar":
|
| 28 |
+
orch = QuasarOrchestrator(max_iterations=3)
|
| 29 |
+
result = orch.run(problem.prompt, problem.expected.min_qubits)
|
| 30 |
+
success = result.success
|
| 31 |
+
qasm = result.final_qasm
|
| 32 |
+
llm = result.llm_calls
|
| 33 |
+
elif mode == "hybrid":
|
| 34 |
+
orch = HybridOrchestrator()
|
| 35 |
+
result = orch.run(problem.prompt, problem.expected.min_qubits)
|
| 36 |
+
success = result.success
|
| 37 |
+
qasm = result.final_qasm
|
| 38 |
+
llm = result.llm_calls
|
| 39 |
+
else:
|
| 40 |
+
orch = create_orchestrator(mode)
|
| 41 |
+
result = orch.run(problem.prompt)
|
| 42 |
+
success = result.success
|
| 43 |
+
qasm = result.final_output
|
| 44 |
+
llm = len([k for k in result.agent_results.keys()]) if result.agent_results else 1
|
| 45 |
+
|
| 46 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 47 |
+
return {"success": success, "time_ms": elapsed, "llm": llm, "qasm": qasm[:100] if qasm else None}
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
elapsed = (time.perf_counter() - start) * 1000
|
| 51 |
+
return {"success": False, "time_ms": elapsed, "llm": 0, "error": str(e)[:50]}
|
| 52 |
+
|
| 53 |
+
print("=" * 80)
|
| 54 |
+
print("QUICK MODE COMPARISON TEST")
|
| 55 |
+
print("=" * 80)
|
| 56 |
+
|
| 57 |
+
# Test only first easy and first very_hard problem with all modes
|
| 58 |
+
test_cases = [
|
| 59 |
+
("EASY", EASY_PROBLEMS[0]),
|
| 60 |
+
("VERY_HARD", VERY_HARD_PROBLEMS[0])
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
modes = ["naked", "quasar", "hybrid"] # Skip slow modes
|
| 64 |
+
|
| 65 |
+
for diff, problem in test_cases:
|
| 66 |
+
print(f"\n{diff}: {problem.name}")
|
| 67 |
+
print("-" * 60)
|
| 68 |
+
|
| 69 |
+
for mode in modes:
|
| 70 |
+
print(f" {mode}...", end=" ", flush=True)
|
| 71 |
+
result = test_problem(problem, mode)
|
| 72 |
+
|
| 73 |
+
status = "✅" if result["success"] else "❌"
|
| 74 |
+
time_str = f"{result['time_ms']:.0f}ms"
|
| 75 |
+
llm_str = f"LLM:{result.get('llm', '?')}"
|
| 76 |
+
|
| 77 |
+
print(f"{status} {time_str} {llm_str}")
|
| 78 |
+
|
| 79 |
+
if not result["success"] and "error" in result:
|
| 80 |
+
print(f" Error: {result['error']}")
|
| 81 |
+
|
| 82 |
+
time.sleep(5) # Rate limiting
|
| 83 |
+
|
| 84 |
+
print("\n" + "=" * 80)
|
| 85 |
+
print("DONE")
|
tests/run_evaluation.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
"""
|
| 3 |
+
QAgents-Workflows: Main Evaluation Runner
|
| 4 |
+
Runs comparative tests between Blackboard, Guided, and Naked modes.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
python run_evaluation.py # Run all tests
|
| 8 |
+
python run_evaluation.py --mode naked # Test specific mode
|
| 9 |
+
python run_evaluation.py --problem easy_001 # Test specific problem
|
| 10 |
+
python run_evaluation.py --quick # Quick test (1 run per problem)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import argparse
|
| 14 |
+
import logging
|
| 15 |
+
import sys
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
# Add parent to path for imports
|
| 19 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 20 |
+
|
| 21 |
+
from config import config, set_mode
|
| 22 |
+
from client import get_client
|
| 23 |
+
from tests import (
|
| 24 |
+
EvaluationHarness,
|
| 25 |
+
ALL_PROBLEMS,
|
| 26 |
+
EASY_PROBLEMS,
|
| 27 |
+
get_problem
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def setup_logging(verbose: bool = True):
|
| 32 |
+
"""Configure logging."""
|
| 33 |
+
level = logging.DEBUG if verbose else logging.INFO
|
| 34 |
+
logging.basicConfig(
|
| 35 |
+
level=level,
|
| 36 |
+
format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
|
| 37 |
+
datefmt="%H:%M:%S"
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def check_mcp_server():
|
| 42 |
+
"""Check if MCP server is running."""
|
| 43 |
+
client = get_client()
|
| 44 |
+
if not client.health_check():
|
| 45 |
+
print("\n❌ ERROR: QuantumArchitect-MCP server is not running!")
|
| 46 |
+
print("\nPlease start it with:")
|
| 47 |
+
print(" cd D:\\teach\\quantum-circuits")
|
| 48 |
+
print(" & .venv\\Scripts\\Activate.ps1")
|
| 49 |
+
print(" python QuantumArchitect-MCP\\app.py")
|
| 50 |
+
print()
|
| 51 |
+
return False
|
| 52 |
+
print("✅ MCP server is running")
|
| 53 |
+
return True
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def run_quick_test():
|
| 57 |
+
"""Run a quick sanity test."""
|
| 58 |
+
print("\n Running Quick Test (Naked mode, Bell State)")
|
| 59 |
+
print("-" * 50)
|
| 60 |
+
|
| 61 |
+
from orchestrators import create_orchestrator
|
| 62 |
+
from tests import BELL_STATE_PROBLEM
|
| 63 |
+
|
| 64 |
+
orchestrator = create_orchestrator("naked")
|
| 65 |
+
result = orchestrator.run(BELL_STATE_PROBLEM.goal)
|
| 66 |
+
|
| 67 |
+
print(f"Success: {result.success}")
|
| 68 |
+
print(f"Time: {result.execution_time_ms:.1f}ms")
|
| 69 |
+
print(f"Steps: {result.steps_completed}")
|
| 70 |
+
|
| 71 |
+
if result.final_output:
|
| 72 |
+
print(f"\nGenerated Circuit:")
|
| 73 |
+
print(result.final_output[:500] if len(result.final_output) > 500 else result.final_output)
|
| 74 |
+
|
| 75 |
+
if result.errors:
|
| 76 |
+
print(f"\nErrors: {result.errors}")
|
| 77 |
+
|
| 78 |
+
return result.success
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def run_full_evaluation(problems=None, modes=None, num_runs=3):
|
| 82 |
+
"""Run full comparative evaluation."""
|
| 83 |
+
print("\n Starting Full Evaluation")
|
| 84 |
+
print("=" * 60)
|
| 85 |
+
|
| 86 |
+
if problems is None:
|
| 87 |
+
problems = EASY_PROBLEMS # Start with easy problems
|
| 88 |
+
if modes is None:
|
| 89 |
+
modes = ["blackboard", "guided", "naked"]
|
| 90 |
+
|
| 91 |
+
print(f"Problems: {len(problems)}")
|
| 92 |
+
print(f"Modes: {modes}")
|
| 93 |
+
print(f"Runs per problem: {num_runs}")
|
| 94 |
+
print()
|
| 95 |
+
|
| 96 |
+
harness = EvaluationHarness(num_runs=num_runs)
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
results = harness.evaluate_all(problems=problems, modes=modes)
|
| 100 |
+
|
| 101 |
+
# Generate and print report
|
| 102 |
+
report = harness.generate_report()
|
| 103 |
+
print("\n" + report)
|
| 104 |
+
|
| 105 |
+
# Save report to file
|
| 106 |
+
report_path = Path(__file__).parent / "evaluation_report.txt"
|
| 107 |
+
report_path.write_text(report)
|
| 108 |
+
print(f"\n Report saved to: {report_path}")
|
| 109 |
+
|
| 110 |
+
# Export CSV for research
|
| 111 |
+
csv_path = harness.export_csv()
|
| 112 |
+
print(f" CSV exported to: {csv_path}")
|
| 113 |
+
|
| 114 |
+
# Print summary stats
|
| 115 |
+
stats = harness.get_summary_stats()
|
| 116 |
+
print("\n Summary Statistics:")
|
| 117 |
+
for mode, mode_stats in stats.get('modes', {}).items():
|
| 118 |
+
print(f" {mode}: {mode_stats['success_rate']*100:.1f}% success, "
|
| 119 |
+
f"{mode_stats['total_llm_requests']} LLM calls, "
|
| 120 |
+
f"{mode_stats['total_tokens']} tokens")
|
| 121 |
+
|
| 122 |
+
return True
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
logging.exception(f"Evaluation failed: {e}")
|
| 126 |
+
return False
|
| 127 |
+
def main():
|
| 128 |
+
parser = argparse.ArgumentParser(
|
| 129 |
+
description="QAgents Comparative Evaluation Runner",
|
| 130 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 131 |
+
epilog="""
|
| 132 |
+
Examples:
|
| 133 |
+
python run_evaluation.py # Full evaluation
|
| 134 |
+
python run_evaluation.py --quick # Quick sanity test
|
| 135 |
+
python run_evaluation.py --mode naked # Test naked mode only
|
| 136 |
+
python run_evaluation.py --easy # Only easy problems
|
| 137 |
+
python run_evaluation.py --runs 10 # 10 runs per problem
|
| 138 |
+
"""
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
parser.add_argument("--quick", action="store_true",
|
| 142 |
+
help="Run quick sanity test only")
|
| 143 |
+
parser.add_argument("--mode", choices=["blackboard", "guided", "naked"],
|
| 144 |
+
help="Test specific mode only")
|
| 145 |
+
parser.add_argument("--problem", type=str,
|
| 146 |
+
help="Test specific problem by ID")
|
| 147 |
+
parser.add_argument("--easy", action="store_true",
|
| 148 |
+
help="Only easy problems")
|
| 149 |
+
parser.add_argument("--runs", type=int, default=3,
|
| 150 |
+
help="Number of runs per problem (default: 3)")
|
| 151 |
+
parser.add_argument("--verbose", "-v", action="store_true",
|
| 152 |
+
help="Verbose output")
|
| 153 |
+
|
| 154 |
+
args = parser.parse_args()
|
| 155 |
+
|
| 156 |
+
setup_logging(args.verbose)
|
| 157 |
+
|
| 158 |
+
print("=" * 60)
|
| 159 |
+
print("[EVALUATION] QAgents-Workflows Comparative Evaluation")
|
| 160 |
+
print("=" * 60)
|
| 161 |
+
|
| 162 |
+
# Check MCP server
|
| 163 |
+
if not check_mcp_server():
|
| 164 |
+
sys.exit(1)
|
| 165 |
+
|
| 166 |
+
# Quick test mode
|
| 167 |
+
if args.quick:
|
| 168 |
+
success = run_quick_test()
|
| 169 |
+
sys.exit(0 if success else 1)
|
| 170 |
+
|
| 171 |
+
# Determine problems to run
|
| 172 |
+
if args.problem:
|
| 173 |
+
problem = get_problem(args.problem)
|
| 174 |
+
if not problem:
|
| 175 |
+
print(f"❌ Unknown problem: {args.problem}")
|
| 176 |
+
sys.exit(1)
|
| 177 |
+
problems = [problem]
|
| 178 |
+
elif args.easy:
|
| 179 |
+
problems = EASY_PROBLEMS
|
| 180 |
+
else:
|
| 181 |
+
problems = ALL_PROBLEMS
|
| 182 |
+
|
| 183 |
+
# Determine modes to test
|
| 184 |
+
modes = [args.mode] if args.mode else None
|
| 185 |
+
|
| 186 |
+
# Run evaluation
|
| 187 |
+
success = run_full_evaluation(
|
| 188 |
+
problems=problems,
|
| 189 |
+
modes=modes,
|
| 190 |
+
num_runs=args.runs
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
sys.exit(0 if success else 1)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
if __name__ == "__main__":
|
| 197 |
+
main()
|
tests/run_quality_eval.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/run_quality_eval.py
|
| 2 |
+
# Relations: Uses tests/quality_evaluation_harness.py, database/circuit_quality_db.py
|
| 3 |
+
# Description: CLI entry point for quality-focused evaluation
|
| 4 |
+
# Run with: python run_quality_eval.py --mode all --difficulty easy
|
| 5 |
+
# Generates quality comparison report with actual QASM circuits
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Quality Evaluation Runner: CLI entry point for circuit quality comparison.
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python run_quality_eval.py --mode all --difficulty easy
|
| 12 |
+
python run_quality_eval.py --mode naked --problem easy_001
|
| 13 |
+
python run_quality_eval.py --report RUN_ID
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import argparse
|
| 17 |
+
import logging
|
| 18 |
+
import sys
|
| 19 |
+
import os
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from datetime import datetime
|
| 22 |
+
|
| 23 |
+
# Add project root to path
|
| 24 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 25 |
+
|
| 26 |
+
# Ensure API key is set BEFORE importing config
|
| 27 |
+
api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY")
|
| 28 |
+
if api_key:
|
| 29 |
+
os.environ["GOOGLE_API_KEY"] = api_key
|
| 30 |
+
|
| 31 |
+
from tests.quality_evaluation_harness import QualityEvaluationHarness, run_quick_quality_test
|
| 32 |
+
from tests.test_problems import get_problem, get_problems_by_difficulty
|
| 33 |
+
from database.circuit_quality_db import get_quality_db
|
| 34 |
+
from config import set_api_key
|
| 35 |
+
|
| 36 |
+
# Configure logging
|
| 37 |
+
logging.basicConfig(
|
| 38 |
+
level=logging.INFO,
|
| 39 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 40 |
+
)
|
| 41 |
+
logger = logging.getLogger(__name__)
|
| 42 |
+
|
| 43 |
+
# Explicitly set API key in config after logging is ready
|
| 44 |
+
if api_key:
|
| 45 |
+
set_api_key(api_key)
|
| 46 |
+
logger.info(f"API Key configured: {api_key[:10]}...")
|
| 47 |
+
else:
|
| 48 |
+
logger.warning("No GOOGLE_API_KEY or GENAI_API_KEY found in environment")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def run_evaluation(args):
|
| 52 |
+
"""Run quality evaluation based on arguments."""
|
| 53 |
+
harness = QualityEvaluationHarness()
|
| 54 |
+
|
| 55 |
+
# Parse modes
|
| 56 |
+
if args.mode == 'all':
|
| 57 |
+
modes = ['naked', 'guided', 'blackboard']
|
| 58 |
+
else:
|
| 59 |
+
modes = [args.mode]
|
| 60 |
+
|
| 61 |
+
# Parse difficulties
|
| 62 |
+
if args.difficulty == 'all':
|
| 63 |
+
difficulties = ['easy', 'medium', 'hard']
|
| 64 |
+
else:
|
| 65 |
+
difficulties = [args.difficulty]
|
| 66 |
+
|
| 67 |
+
# Check if specific problem
|
| 68 |
+
if args.problem:
|
| 69 |
+
problem = get_problem(args.problem)
|
| 70 |
+
if not problem:
|
| 71 |
+
print(f"ERROR: Problem not found: {args.problem}")
|
| 72 |
+
return
|
| 73 |
+
|
| 74 |
+
print(f"\n{'='*60}")
|
| 75 |
+
print(f"Running quality evaluation for: {args.problem}")
|
| 76 |
+
print(f"Modes: {modes}")
|
| 77 |
+
print(f"{'='*60}\n")
|
| 78 |
+
|
| 79 |
+
results = harness.evaluate_problem_all_modes(problem, modes)
|
| 80 |
+
|
| 81 |
+
# Print results
|
| 82 |
+
for mode, result in results.items():
|
| 83 |
+
print(f"\n{mode.upper()}:")
|
| 84 |
+
print(f" Success: {'✅' if result.success else '❌'}")
|
| 85 |
+
print(f" Quality Score: {result.quality_metrics.overall_score()}/100")
|
| 86 |
+
print(f" Depth: {result.quality_metrics.depth}")
|
| 87 |
+
print(f" Gates: {result.quality_metrics.gate_count}")
|
| 88 |
+
print(f" CX: {result.quality_metrics.cx_count}")
|
| 89 |
+
print(f" Time: {result.execution_time_ms:.0f}ms")
|
| 90 |
+
print(f" LLM Calls: {result.llm_requests}")
|
| 91 |
+
if result.qasm_code:
|
| 92 |
+
print(f" QASM ({len(result.qasm_code)} chars):")
|
| 93 |
+
lines = result.qasm_code.split('\n')[:10]
|
| 94 |
+
for line in lines:
|
| 95 |
+
print(f" {line}")
|
| 96 |
+
if len(result.qasm_code.split('\n')) > 10:
|
| 97 |
+
print(" ...")
|
| 98 |
+
else:
|
| 99 |
+
# Full evaluation
|
| 100 |
+
print(f"\n{'='*60}")
|
| 101 |
+
print(f"Running full quality evaluation")
|
| 102 |
+
print(f"Difficulties: {difficulties}")
|
| 103 |
+
print(f"Modes: {modes}")
|
| 104 |
+
print(f"Max problems: {args.max_problems or 'all'}")
|
| 105 |
+
print(f"{'='*60}\n")
|
| 106 |
+
|
| 107 |
+
run_id = harness.run_full_evaluation(
|
| 108 |
+
difficulties=difficulties,
|
| 109 |
+
modes=modes,
|
| 110 |
+
max_problems=args.max_problems
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# Print summary
|
| 114 |
+
harness.print_summary(run_id)
|
| 115 |
+
|
| 116 |
+
# Generate report file
|
| 117 |
+
report = harness.generate_report(run_id)
|
| 118 |
+
report_path = Path(__file__).parent / f"QUALITY_REPORT_{run_id}.md"
|
| 119 |
+
report_path.write_text(report, encoding='utf-8')
|
| 120 |
+
print(f"\nFull report saved to: {report_path}")
|
| 121 |
+
|
| 122 |
+
print(f"\nRun ID: {run_id}")
|
| 123 |
+
print("Use --report <run_id> to regenerate report later")
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def show_report(run_id: str):
|
| 127 |
+
"""Show report for a specific run."""
|
| 128 |
+
harness = QualityEvaluationHarness()
|
| 129 |
+
harness.run_id = run_id # Set to existing run
|
| 130 |
+
|
| 131 |
+
report = harness.generate_report(run_id)
|
| 132 |
+
print(report)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def list_runs():
|
| 136 |
+
"""List all evaluation runs."""
|
| 137 |
+
db = get_quality_db()
|
| 138 |
+
|
| 139 |
+
query = "SELECT run_id, timestamp, description, num_problems FROM comparison_runs ORDER BY timestamp DESC LIMIT 20"
|
| 140 |
+
import sqlite3
|
| 141 |
+
with sqlite3.connect(db.db_file) as conn:
|
| 142 |
+
conn.row_factory = sqlite3.Row
|
| 143 |
+
rows = conn.execute(query).fetchall()
|
| 144 |
+
|
| 145 |
+
if not rows:
|
| 146 |
+
print("No evaluation runs found.")
|
| 147 |
+
return
|
| 148 |
+
|
| 149 |
+
print("\nRecent Evaluation Runs:")
|
| 150 |
+
print("-" * 80)
|
| 151 |
+
for row in rows:
|
| 152 |
+
print(f"{row['run_id']} | {row['timestamp']} | {row['num_problems']} problems | {row['description'] or 'N/A'}")
|
| 153 |
+
print("-" * 80)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def quick_test(args):
|
| 157 |
+
"""Run a quick single test."""
|
| 158 |
+
mode = args.mode if args.mode != 'all' else 'naked'
|
| 159 |
+
problem_id = args.problem or 'easy_001'
|
| 160 |
+
|
| 161 |
+
print(f"\nQuick test: {problem_id} with {mode} mode")
|
| 162 |
+
print("-" * 40)
|
| 163 |
+
|
| 164 |
+
try:
|
| 165 |
+
result = run_quick_quality_test(mode, problem_id)
|
| 166 |
+
print(f"Success: {'✅' if result.success else '❌'}")
|
| 167 |
+
print(f"Quality Score: {result.quality_metrics.overall_score()}/100")
|
| 168 |
+
print(f"Depth: {result.quality_metrics.depth}")
|
| 169 |
+
print(f"Gates: {result.quality_metrics.gate_count}")
|
| 170 |
+
if result.qasm_code:
|
| 171 |
+
print(f"\nQASM:\n{result.qasm_code[:500]}")
|
| 172 |
+
if result.errors:
|
| 173 |
+
print(f"\nErrors: {result.errors}")
|
| 174 |
+
except Exception as e:
|
| 175 |
+
print(f"ERROR: {e}")
|
| 176 |
+
import traceback
|
| 177 |
+
traceback.print_exc()
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def main():
|
| 181 |
+
parser = argparse.ArgumentParser(
|
| 182 |
+
description="Quality-focused quantum circuit evaluation",
|
| 183 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 184 |
+
epilog="""
|
| 185 |
+
Examples:
|
| 186 |
+
python run_quality_eval.py --quick # Quick test
|
| 187 |
+
python run_quality_eval.py --mode all --difficulty easy
|
| 188 |
+
python run_quality_eval.py --problem easy_001 --mode all
|
| 189 |
+
python run_quality_eval.py --list # List previous runs
|
| 190 |
+
python run_quality_eval.py --report quality_20241128_120000
|
| 191 |
+
"""
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
parser.add_argument('--mode', choices=['naked', 'guided', 'blackboard', 'all'],
|
| 195 |
+
default='all', help='Orchestration mode(s) to test')
|
| 196 |
+
parser.add_argument('--difficulty', choices=['easy', 'medium', 'hard', 'all'],
|
| 197 |
+
default='easy', help='Problem difficulty level(s)')
|
| 198 |
+
parser.add_argument('--problem', type=str, help='Specific problem ID to test')
|
| 199 |
+
parser.add_argument('--max-problems', type=int, help='Maximum problems to test')
|
| 200 |
+
parser.add_argument('--quick', action='store_true', help='Run quick single test')
|
| 201 |
+
parser.add_argument('--report', type=str, help='Generate report for run ID')
|
| 202 |
+
parser.add_argument('--list', action='store_true', help='List previous runs')
|
| 203 |
+
|
| 204 |
+
args = parser.parse_args()
|
| 205 |
+
|
| 206 |
+
if args.list:
|
| 207 |
+
list_runs()
|
| 208 |
+
elif args.report:
|
| 209 |
+
show_report(args.report)
|
| 210 |
+
elif args.quick:
|
| 211 |
+
quick_test(args)
|
| 212 |
+
else:
|
| 213 |
+
run_evaluation(args)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
if __name__ == "__main__":
|
| 217 |
+
main()
|
tests/test_db_storage.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/test_db_storage.py
|
| 2 |
+
# Description: Quick test to verify database storage works
|
| 3 |
+
"""Test that database can store and retrieve circuits."""
|
| 4 |
+
|
| 5 |
+
from database.circuit_quality_db import CircuitQualityDB, CircuitEvaluation, QualityMetrics, get_quality_db
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
|
| 8 |
+
def test_db():
|
| 9 |
+
# Test database
|
| 10 |
+
db = get_quality_db()
|
| 11 |
+
print(f'Database file: {db.db_file}')
|
| 12 |
+
|
| 13 |
+
# Create a test evaluation with sample QASM
|
| 14 |
+
test_qasm = """OPENQASM 2.0;
|
| 15 |
+
include "qelib1.inc";
|
| 16 |
+
qreg q[2];
|
| 17 |
+
creg c[2];
|
| 18 |
+
h q[0];
|
| 19 |
+
cx q[0], q[1];
|
| 20 |
+
measure q -> c;
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
test_eval = CircuitEvaluation(
|
| 24 |
+
run_id='test_manual_001',
|
| 25 |
+
timestamp=datetime.now().isoformat(),
|
| 26 |
+
problem_id='test_bell_state',
|
| 27 |
+
problem_goal='Create Bell state',
|
| 28 |
+
mode='manual_test',
|
| 29 |
+
qasm_code=test_qasm,
|
| 30 |
+
success=True,
|
| 31 |
+
execution_time_ms=0,
|
| 32 |
+
llm_requests=0,
|
| 33 |
+
tokens_used=0,
|
| 34 |
+
quality_metrics=QualityMetrics(
|
| 35 |
+
depth=2,
|
| 36 |
+
gate_count=3,
|
| 37 |
+
cx_count=1,
|
| 38 |
+
single_qubit_count=1,
|
| 39 |
+
hardware_fitness=0.95,
|
| 40 |
+
syntax_valid=True,
|
| 41 |
+
state_correctness=1.0
|
| 42 |
+
)
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Save to database
|
| 46 |
+
eval_id = db.save_evaluation(test_eval)
|
| 47 |
+
print(f'Saved evaluation ID: {eval_id}')
|
| 48 |
+
|
| 49 |
+
# Retrieve and verify
|
| 50 |
+
evals = db.get_evaluations(problem_id='test_bell_state')
|
| 51 |
+
print(f'Retrieved {len(evals)} evaluations')
|
| 52 |
+
if evals:
|
| 53 |
+
e = evals[0]
|
| 54 |
+
print(f'QASM stored ({len(e.qasm_code)} chars):')
|
| 55 |
+
print(e.qasm_code)
|
| 56 |
+
print(f'Quality score: {e.quality_metrics.overall_score()}/100')
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
test_db()
|
tests/test_mcp_client.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/test_mcp_client.py
|
| 2 |
+
# Relations: Tests client/mcp_client.py
|
| 3 |
+
# Description: Comprehensive tests for MCP client with Gradio and fallback implementations
|
| 4 |
+
|
| 5 |
+
"""
|
| 6 |
+
Test suite for MCP client functionality.
|
| 7 |
+
Tests both Gradio-based endpoints and local fallback implementations.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import sys
|
| 11 |
+
import os
|
| 12 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 13 |
+
|
| 14 |
+
from client.mcp_client import get_client, MCPClient, QASMLocalAnalyzer
|
| 15 |
+
|
| 16 |
+
# Sample QASM for testing
|
| 17 |
+
BELL_STATE_QASM = '''OPENQASM 2.0;
|
| 18 |
+
include "qelib1.inc";
|
| 19 |
+
qreg q[2];
|
| 20 |
+
creg c[2];
|
| 21 |
+
h q[0];
|
| 22 |
+
cx q[0], q[1];
|
| 23 |
+
measure q -> c;'''
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_health_check():
|
| 27 |
+
"""Test server health check."""
|
| 28 |
+
client = get_client()
|
| 29 |
+
result = client.health_check()
|
| 30 |
+
print(f"Health Check: {'OK' if result else 'FAILED'}")
|
| 31 |
+
return result
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_create_circuit():
|
| 35 |
+
"""Test circuit creation from template (uses Gradio)."""
|
| 36 |
+
client = get_client()
|
| 37 |
+
result = client.create_circuit_from_template('bell_state', 2)
|
| 38 |
+
|
| 39 |
+
print(f"Create Circuit:")
|
| 40 |
+
print(f" Success: {result.success}")
|
| 41 |
+
print(f" Endpoint: {result.endpoint}")
|
| 42 |
+
print(f" Time: {result.execution_time_ms:.2f}ms")
|
| 43 |
+
if result.success and result.data:
|
| 44 |
+
print(f" Data preview: {str(result.data)[:80]}...")
|
| 45 |
+
return result.success
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_analyze_circuit():
|
| 49 |
+
"""Test circuit analysis (uses fallback)."""
|
| 50 |
+
client = get_client()
|
| 51 |
+
result = client.analyze_circuit(BELL_STATE_QASM)
|
| 52 |
+
|
| 53 |
+
print(f"Analyze Circuit:")
|
| 54 |
+
print(f" Success: {result.success}")
|
| 55 |
+
print(f" Is Fallback: {result.is_fallback}")
|
| 56 |
+
if result.success:
|
| 57 |
+
print(f" Depth: {result.data.get('depth')}")
|
| 58 |
+
print(f" Gate Count: {result.data.get('gate_count')}")
|
| 59 |
+
print(f" Two-qubit Gates: {result.data.get('two_qubit_gates')}")
|
| 60 |
+
return result.success
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def test_validate_syntax():
|
| 64 |
+
"""Test syntax validation (uses Gradio)."""
|
| 65 |
+
client = get_client()
|
| 66 |
+
result = client.validate_syntax(BELL_STATE_QASM)
|
| 67 |
+
|
| 68 |
+
print(f"Validate Syntax:")
|
| 69 |
+
print(f" Success: {result.success}")
|
| 70 |
+
print(f" Endpoint: {result.endpoint}")
|
| 71 |
+
print(f" Time: {result.execution_time_ms:.2f}ms")
|
| 72 |
+
return result.success
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def test_simulate_circuit():
|
| 76 |
+
"""Test circuit simulation (uses Gradio)."""
|
| 77 |
+
client = get_client()
|
| 78 |
+
result = client.simulate_circuit(BELL_STATE_QASM, shots=100)
|
| 79 |
+
|
| 80 |
+
print(f"Simulate Circuit:")
|
| 81 |
+
print(f" Success: {result.success}")
|
| 82 |
+
print(f" Endpoint: {result.endpoint}")
|
| 83 |
+
print(f" Time: {result.execution_time_ms:.2f}ms")
|
| 84 |
+
if result.success and result.data:
|
| 85 |
+
print(f" Data preview: {str(result.data)[:80]}...")
|
| 86 |
+
return result.success
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def test_complexity_score():
|
| 90 |
+
"""Test complexity scoring (uses Gradio or fallback)."""
|
| 91 |
+
client = get_client()
|
| 92 |
+
result = client.calculate_complexity_score(BELL_STATE_QASM)
|
| 93 |
+
|
| 94 |
+
print(f"Complexity Score:")
|
| 95 |
+
print(f" Success: {result.success}")
|
| 96 |
+
print(f" Is Fallback: {result.is_fallback}")
|
| 97 |
+
if result.success and result.data:
|
| 98 |
+
if isinstance(result.data, dict):
|
| 99 |
+
print(f" Score: {result.data.get('complexity_score', 'N/A')}")
|
| 100 |
+
return result.success
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def test_estimate_noise():
|
| 104 |
+
"""Test noise estimation (uses fallback)."""
|
| 105 |
+
client = get_client()
|
| 106 |
+
result = client.estimate_noise(BELL_STATE_QASM, hardware='ibm_brisbane')
|
| 107 |
+
|
| 108 |
+
print(f"Estimate Noise:")
|
| 109 |
+
print(f" Success: {result.success}")
|
| 110 |
+
print(f" Is Fallback: {result.is_fallback}")
|
| 111 |
+
if result.success:
|
| 112 |
+
print(f" Fidelity: {result.data.get('estimated_fidelity')}")
|
| 113 |
+
print(f" Total Error: {result.data.get('total_error_probability')}")
|
| 114 |
+
return result.success
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def test_local_analyzer():
|
| 118 |
+
"""Test QASMLocalAnalyzer directly."""
|
| 119 |
+
analyzer = QASMLocalAnalyzer()
|
| 120 |
+
|
| 121 |
+
# Parse
|
| 122 |
+
parsed = analyzer.parse_qasm(BELL_STATE_QASM)
|
| 123 |
+
print(f"Local Parser:")
|
| 124 |
+
print(f" Qubits: {parsed['num_qubits']}")
|
| 125 |
+
print(f" Gates: {len(parsed['gates'])}")
|
| 126 |
+
|
| 127 |
+
# Analyze
|
| 128 |
+
analysis = analyzer.analyze_circuit(BELL_STATE_QASM)
|
| 129 |
+
print(f"Local Analyzer:")
|
| 130 |
+
print(f" Depth: {analysis['depth']}")
|
| 131 |
+
print(f" Gate breakdown: {analysis['gate_breakdown']}")
|
| 132 |
+
|
| 133 |
+
# Complexity
|
| 134 |
+
complexity = analyzer.calculate_complexity(BELL_STATE_QASM)
|
| 135 |
+
print(f"Local Complexity:")
|
| 136 |
+
print(f" Score: {complexity['complexity_score']}")
|
| 137 |
+
|
| 138 |
+
return True
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def run_all_tests():
|
| 142 |
+
"""Run all MCP client tests."""
|
| 143 |
+
print("=" * 50)
|
| 144 |
+
print("MCP Client Test Suite")
|
| 145 |
+
print("=" * 50)
|
| 146 |
+
|
| 147 |
+
tests = [
|
| 148 |
+
("Health Check", test_health_check),
|
| 149 |
+
("Create Circuit", test_create_circuit),
|
| 150 |
+
("Analyze Circuit", test_analyze_circuit),
|
| 151 |
+
("Validate Syntax", test_validate_syntax),
|
| 152 |
+
("Simulate Circuit", test_simulate_circuit),
|
| 153 |
+
("Complexity Score", test_complexity_score),
|
| 154 |
+
("Estimate Noise", test_estimate_noise),
|
| 155 |
+
("Local Analyzer", test_local_analyzer),
|
| 156 |
+
]
|
| 157 |
+
|
| 158 |
+
results = []
|
| 159 |
+
for name, test_func in tests:
|
| 160 |
+
print(f"\n--- {name} ---")
|
| 161 |
+
try:
|
| 162 |
+
passed = test_func()
|
| 163 |
+
results.append((name, passed))
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(f"ERROR: {e}")
|
| 166 |
+
results.append((name, False))
|
| 167 |
+
|
| 168 |
+
print("\n" + "=" * 50)
|
| 169 |
+
print("Summary")
|
| 170 |
+
print("=" * 50)
|
| 171 |
+
passed = sum(1 for _, p in results if p)
|
| 172 |
+
print(f"Passed: {passed}/{len(results)}")
|
| 173 |
+
for name, p in results:
|
| 174 |
+
status = "✓" if p else "✗"
|
| 175 |
+
print(f" {status} {name}")
|
| 176 |
+
|
| 177 |
+
return all(p for _, p in results)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
if __name__ == "__main__":
|
| 181 |
+
run_all_tests()
|
tests/test_problems.py
ADDED
|
@@ -0,0 +1,709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/tests/test_problems.py
|
| 2 |
+
# Relations: Used by evaluation_harness.py, run_evaluation.py
|
| 3 |
+
# Description: Real quantum computing problems requiring LLM reasoning
|
| 4 |
+
# Each problem has increasing complexity and real-world relevance
|
| 5 |
+
"""
|
| 6 |
+
Test Problems Module: Real Quantum Computing Challenges
|
| 7 |
+
|
| 8 |
+
TESTING FRAMEWORK DESIGN:
|
| 9 |
+
=========================
|
| 10 |
+
|
| 11 |
+
Each problem requires actual LLM reasoning to solve - no hardcoded templates.
|
| 12 |
+
The LLM must understand the quantum mechanics and generate appropriate QASM.
|
| 13 |
+
|
| 14 |
+
EVALUATION MODES:
|
| 15 |
+
-----------------
|
| 16 |
+
1. NAKED: 1 LLM call per problem (direct reasoning, no agents)
|
| 17 |
+
2. GUIDED: 1 + 4 LLM calls (initial + architect/builder/validator/scorer agents)
|
| 18 |
+
3. BLACKBOARD: 1 + 8-12 LLM calls (initial + collaborative agent rounds)
|
| 19 |
+
|
| 20 |
+
PROBLEM CATEGORIES:
|
| 21 |
+
-------------------
|
| 22 |
+
EASY (1-2 qubits, 1-3 gates):
|
| 23 |
+
- Fundamental single/two-qubit operations
|
| 24 |
+
- Direct QASM generation possible
|
| 25 |
+
|
| 26 |
+
MEDIUM (2-3 qubits, 4-8 gates):
|
| 27 |
+
- Require understanding of gate decomposition
|
| 28 |
+
- Multiple valid solutions possible
|
| 29 |
+
|
| 30 |
+
HARD (3+ qubits, 8+ gates):
|
| 31 |
+
- Algorithm implementation
|
| 32 |
+
- Optimization considerations
|
| 33 |
+
- Real-world applications
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
from dataclasses import dataclass, field
|
| 37 |
+
from typing import Dict, List, Optional, Any
|
| 38 |
+
from enum import Enum
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class ProblemDifficulty(Enum):
|
| 42 |
+
"""Problem difficulty levels."""
|
| 43 |
+
EASY = "easy"
|
| 44 |
+
MEDIUM = "medium"
|
| 45 |
+
HARD = "hard"
|
| 46 |
+
VERY_HARD = "very_hard" # New: Push NAKED to its limits
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class ProblemCategory(Enum):
|
| 50 |
+
"""Problem categories for research tracking."""
|
| 51 |
+
STATE_PREPARATION = "state_prep"
|
| 52 |
+
GATE_SYNTHESIS = "gate_synthesis"
|
| 53 |
+
ALGORITHM = "algorithm"
|
| 54 |
+
ERROR_CORRECTION = "error_correction"
|
| 55 |
+
OPTIMIZATION = "optimization"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@dataclass
|
| 59 |
+
class ExpectedOutput:
|
| 60 |
+
"""Expected output for validation."""
|
| 61 |
+
min_qubits: int
|
| 62 |
+
max_qubits: int = 10
|
| 63 |
+
max_depth: Optional[int] = None
|
| 64 |
+
required_gates: List[str] = field(default_factory=list)
|
| 65 |
+
forbidden_gates: List[str] = field(default_factory=list)
|
| 66 |
+
expected_states: Dict[str, float] = field(default_factory=dict)
|
| 67 |
+
tolerance: float = 0.1 # Probability tolerance for state matching
|
| 68 |
+
must_be_unitary: bool = True
|
| 69 |
+
hardware_compatible: bool = True
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@dataclass
|
| 73 |
+
class TestProblem:
|
| 74 |
+
"""A quantum circuit test problem for LLM evaluation."""
|
| 75 |
+
id: str
|
| 76 |
+
name: str
|
| 77 |
+
description: str
|
| 78 |
+
|
| 79 |
+
# The prompt sent to the LLM - must require reasoning
|
| 80 |
+
prompt: str
|
| 81 |
+
|
| 82 |
+
# Category and difficulty for analysis
|
| 83 |
+
difficulty: ProblemDifficulty
|
| 84 |
+
category: ProblemCategory
|
| 85 |
+
|
| 86 |
+
# Validation criteria
|
| 87 |
+
expected: ExpectedOutput
|
| 88 |
+
|
| 89 |
+
# Metadata for research tracking
|
| 90 |
+
tags: List[str] = field(default_factory=list)
|
| 91 |
+
reference_solution: Optional[str] = None # Known optimal QASM
|
| 92 |
+
optimal_depth: Optional[int] = None
|
| 93 |
+
optimal_gate_count: Optional[int] = None
|
| 94 |
+
|
| 95 |
+
# Research tracking
|
| 96 |
+
requires_understanding: List[str] = field(default_factory=list)
|
| 97 |
+
common_mistakes: List[str] = field(default_factory=list)
|
| 98 |
+
|
| 99 |
+
@property
|
| 100 |
+
def goal(self) -> str:
|
| 101 |
+
"""Alias for prompt - used by orchestrators."""
|
| 102 |
+
return self.prompt
|
| 103 |
+
# =============================================================================
|
| 104 |
+
# EASY PROBLEMS: Fundamental Quantum Operations
|
| 105 |
+
# =============================================================================
|
| 106 |
+
|
| 107 |
+
PROBLEM_E1_PHASE_FLIP = TestProblem(
|
| 108 |
+
id="easy_001",
|
| 109 |
+
name="Phase Flip State",
|
| 110 |
+
description="Create the |−⟩ state (phase-flipped superposition)",
|
| 111 |
+
prompt="""Create a quantum circuit that prepares the |−⟩ state.
|
| 112 |
+
|
| 113 |
+
The |−⟩ state is defined as: (|0⟩ - |1⟩)/√2
|
| 114 |
+
|
| 115 |
+
This is different from the |+⟩ state which is (|0⟩ + |1⟩)/√2.
|
| 116 |
+
|
| 117 |
+
Requirements:
|
| 118 |
+
- Use a single qubit
|
| 119 |
+
- The final state should have equal probability of 0 and 1
|
| 120 |
+
- But the relative phase between them should be π (negative)
|
| 121 |
+
|
| 122 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 123 |
+
difficulty=ProblemDifficulty.EASY,
|
| 124 |
+
category=ProblemCategory.STATE_PREPARATION,
|
| 125 |
+
expected=ExpectedOutput(
|
| 126 |
+
min_qubits=1,
|
| 127 |
+
max_qubits=1,
|
| 128 |
+
max_depth=2,
|
| 129 |
+
required_gates=["h", "z"], # or x then h
|
| 130 |
+
expected_states={"0": 0.5, "1": 0.5}
|
| 131 |
+
),
|
| 132 |
+
tags=["superposition", "phase", "single-qubit"],
|
| 133 |
+
requires_understanding=["Hadamard gate", "Z gate", "quantum phases"],
|
| 134 |
+
common_mistakes=["Using only H (creates |+⟩ not |−⟩)", "Wrong gate order"],
|
| 135 |
+
optimal_depth=2,
|
| 136 |
+
optimal_gate_count=2
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
PROBLEM_E2_CONTROLLED_NOT = TestProblem(
|
| 140 |
+
id="easy_002",
|
| 141 |
+
name="Entanglement Generation",
|
| 142 |
+
description="Create maximal entanglement between two qubits",
|
| 143 |
+
prompt="""Create a quantum circuit that maximally entangles two qubits.
|
| 144 |
+
|
| 145 |
+
Starting from |00⟩, create the Bell state |Φ+⟩ = (|00⟩ + |11⟩)/√2
|
| 146 |
+
|
| 147 |
+
Requirements:
|
| 148 |
+
- Use exactly 2 qubits
|
| 149 |
+
- Measuring both qubits should give 00 or 11 with equal probability
|
| 150 |
+
- The qubits must be entangled (not just in superposition)
|
| 151 |
+
|
| 152 |
+
Think about what gates create entanglement.
|
| 153 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 154 |
+
difficulty=ProblemDifficulty.EASY,
|
| 155 |
+
category=ProblemCategory.STATE_PREPARATION,
|
| 156 |
+
expected=ExpectedOutput(
|
| 157 |
+
min_qubits=2,
|
| 158 |
+
max_qubits=2,
|
| 159 |
+
max_depth=3,
|
| 160 |
+
required_gates=["h", "cx"],
|
| 161 |
+
expected_states={"00": 0.5, "11": 0.5}
|
| 162 |
+
),
|
| 163 |
+
tags=["entanglement", "bell", "cnot"],
|
| 164 |
+
requires_understanding=["Hadamard gate", "CNOT gate", "entanglement"],
|
| 165 |
+
common_mistakes=["Applying H to both qubits (no entanglement)", "Wrong CNOT direction"],
|
| 166 |
+
optimal_depth=2,
|
| 167 |
+
optimal_gate_count=2
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
PROBLEM_E3_MEASUREMENT_BASIS = TestProblem(
|
| 171 |
+
id="easy_003",
|
| 172 |
+
name="X-Basis Measurement Prep",
|
| 173 |
+
description="Prepare a state for X-basis measurement",
|
| 174 |
+
prompt="""Create a circuit that transforms a Z-basis state into X-basis.
|
| 175 |
+
|
| 176 |
+
Starting with |0⟩, prepare the state so that if we were to measure in the
|
| 177 |
+
X-basis (instead of Z-basis), we would get |+⟩ deterministically.
|
| 178 |
+
|
| 179 |
+
In other words: Transform |0⟩ → |+⟩ where |+⟩ = (|0⟩ + |1⟩)/√2
|
| 180 |
+
|
| 181 |
+
Requirements:
|
| 182 |
+
- Single qubit circuit
|
| 183 |
+
- The state should be the +1 eigenstate of the X operator
|
| 184 |
+
|
| 185 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 186 |
+
difficulty=ProblemDifficulty.EASY,
|
| 187 |
+
category=ProblemCategory.STATE_PREPARATION,
|
| 188 |
+
expected=ExpectedOutput(
|
| 189 |
+
min_qubits=1,
|
| 190 |
+
max_qubits=1,
|
| 191 |
+
max_depth=1,
|
| 192 |
+
required_gates=["h"],
|
| 193 |
+
expected_states={"0": 0.5, "1": 0.5}
|
| 194 |
+
),
|
| 195 |
+
tags=["basis-change", "hadamard", "measurement"],
|
| 196 |
+
requires_understanding=["Measurement bases", "Hadamard as basis change"],
|
| 197 |
+
common_mistakes=["Not understanding basis transformation"],
|
| 198 |
+
optimal_depth=1,
|
| 199 |
+
optimal_gate_count=1
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
# =============================================================================
|
| 204 |
+
# MEDIUM PROBLEMS: Gate Decomposition and Multi-Qubit Operations
|
| 205 |
+
# =============================================================================
|
| 206 |
+
|
| 207 |
+
PROBLEM_M1_SWAP_DECOMPOSITION = TestProblem(
|
| 208 |
+
id="medium_001",
|
| 209 |
+
name="SWAP from CNOTs",
|
| 210 |
+
description="Implement SWAP gate using only CNOT gates",
|
| 211 |
+
prompt="""Decompose the SWAP gate into basic gates.
|
| 212 |
+
|
| 213 |
+
The SWAP gate exchanges the states of two qubits:
|
| 214 |
+
SWAP|ab⟩ = |ba⟩
|
| 215 |
+
|
| 216 |
+
You must implement SWAP using only CNOT gates (no native SWAP allowed).
|
| 217 |
+
|
| 218 |
+
Requirements:
|
| 219 |
+
- Use exactly 2 qubits
|
| 220 |
+
- Only use CNOT (cx) gates - no other two-qubit gates
|
| 221 |
+
- The circuit should swap the state of qubit 0 and qubit 1
|
| 222 |
+
- Test: if input is |01⟩, output should be |10⟩
|
| 223 |
+
|
| 224 |
+
Hint: CNOT can be thought of as conditional bit flip.
|
| 225 |
+
|
| 226 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 227 |
+
difficulty=ProblemDifficulty.MEDIUM,
|
| 228 |
+
category=ProblemCategory.GATE_SYNTHESIS,
|
| 229 |
+
expected=ExpectedOutput(
|
| 230 |
+
min_qubits=2,
|
| 231 |
+
max_qubits=2,
|
| 232 |
+
max_depth=6,
|
| 233 |
+
required_gates=["cx"],
|
| 234 |
+
forbidden_gates=["swap"]
|
| 235 |
+
),
|
| 236 |
+
tags=["decomposition", "swap", "cnot-only"],
|
| 237 |
+
requires_understanding=["CNOT behavior", "Gate decomposition"],
|
| 238 |
+
common_mistakes=["Wrong number of CNOTs", "Wrong CNOT directions"],
|
| 239 |
+
reference_solution="OPENQASM 2.0;\ninclude \"qelib1.inc\";\nqreg q[2];\ncx q[0],q[1];\ncx q[1],q[0];\ncx q[0],q[1];",
|
| 240 |
+
optimal_depth=3,
|
| 241 |
+
optimal_gate_count=3
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
PROBLEM_M2_CONTROLLED_Z = TestProblem(
|
| 245 |
+
id="medium_002",
|
| 246 |
+
name="CZ from Basic Gates",
|
| 247 |
+
description="Build Controlled-Z using H and CNOT",
|
| 248 |
+
prompt="""Implement the Controlled-Z (CZ) gate using only Hadamard and CNOT gates.
|
| 249 |
+
|
| 250 |
+
The CZ gate applies a Z gate to the target qubit when the control is |1⟩:
|
| 251 |
+
CZ|00⟩ = |00⟩
|
| 252 |
+
CZ|01⟩ = |01⟩
|
| 253 |
+
CZ|10⟩ = |10⟩
|
| 254 |
+
CZ|11⟩ = -|11⟩ (note the phase flip!)
|
| 255 |
+
|
| 256 |
+
Requirements:
|
| 257 |
+
- Use only H and CNOT gates
|
| 258 |
+
- No native CZ gate allowed
|
| 259 |
+
- 2 qubits
|
| 260 |
+
|
| 261 |
+
Hint: Think about how H transforms Z operations.
|
| 262 |
+
|
| 263 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 264 |
+
difficulty=ProblemDifficulty.MEDIUM,
|
| 265 |
+
category=ProblemCategory.GATE_SYNTHESIS,
|
| 266 |
+
expected=ExpectedOutput(
|
| 267 |
+
min_qubits=2,
|
| 268 |
+
max_qubits=2,
|
| 269 |
+
max_depth=5,
|
| 270 |
+
required_gates=["h", "cx"],
|
| 271 |
+
forbidden_gates=["cz"]
|
| 272 |
+
),
|
| 273 |
+
tags=["decomposition", "controlled-z", "phase"],
|
| 274 |
+
requires_understanding=["CZ gate definition", "H-Z-H = X identity"],
|
| 275 |
+
common_mistakes=["Forgetting H gates", "Wrong qubit as target"],
|
| 276 |
+
reference_solution="OPENQASM 2.0;\ninclude \"qelib1.inc\";\nqreg q[2];\nh q[1];\ncx q[0],q[1];\nh q[1];",
|
| 277 |
+
optimal_depth=3,
|
| 278 |
+
optimal_gate_count=3
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
PROBLEM_M3_PHASE_ESTIMATION_PREP = TestProblem(
|
| 282 |
+
id="medium_003",
|
| 283 |
+
name="Phase Kickback Setup",
|
| 284 |
+
description="Create the phase kickback configuration",
|
| 285 |
+
prompt="""Create a circuit demonstrating quantum phase kickback.
|
| 286 |
+
|
| 287 |
+
Phase kickback is a key concept where applying a controlled-U gate
|
| 288 |
+
causes the control qubit to acquire the eigenvalue phase.
|
| 289 |
+
|
| 290 |
+
Setup:
|
| 291 |
+
1. Prepare control qubit in |+⟩ superposition
|
| 292 |
+
2. Prepare target qubit in |1⟩ (eigenstate of Z with eigenvalue -1)
|
| 293 |
+
3. Apply CZ gate
|
| 294 |
+
4. The control qubit should now be in |−⟩ state
|
| 295 |
+
|
| 296 |
+
The final state of the control qubit (q[0]) should show the phase kickback.
|
| 297 |
+
|
| 298 |
+
Requirements:
|
| 299 |
+
- 2 qubits
|
| 300 |
+
- Control in superposition, target in |1⟩
|
| 301 |
+
- Apply controlled operation
|
| 302 |
+
- Use only basic gates (H, X, CX, CZ allowed)
|
| 303 |
+
|
| 304 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 305 |
+
difficulty=ProblemDifficulty.MEDIUM,
|
| 306 |
+
category=ProblemCategory.ALGORITHM,
|
| 307 |
+
expected=ExpectedOutput(
|
| 308 |
+
min_qubits=2,
|
| 309 |
+
max_qubits=2,
|
| 310 |
+
max_depth=5,
|
| 311 |
+
required_gates=["h", "x"],
|
| 312 |
+
expected_states={"01": 0.5, "11": 0.5} # After kickback
|
| 313 |
+
),
|
| 314 |
+
tags=["phase-kickback", "algorithm-primitive", "phase-estimation"],
|
| 315 |
+
requires_understanding=["Phase kickback", "Eigenstates", "Controlled operations"],
|
| 316 |
+
common_mistakes=["Target not in eigenstate", "Missing superposition"],
|
| 317 |
+
optimal_depth=4,
|
| 318 |
+
optimal_gate_count=4
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
# =============================================================================
|
| 323 |
+
# HARD PROBLEMS: Algorithm Implementation
|
| 324 |
+
# =============================================================================
|
| 325 |
+
|
| 326 |
+
PROBLEM_H1_DEUTSCH = TestProblem(
|
| 327 |
+
id="hard_001",
|
| 328 |
+
name="Deutsch Algorithm",
|
| 329 |
+
description="Implement Deutsch's algorithm for function type detection",
|
| 330 |
+
prompt="""Implement Deutsch's algorithm to determine if a function is constant or balanced.
|
| 331 |
+
|
| 332 |
+
Deutsch's algorithm determines whether a black-box function f:{0,1}→{0,1} is:
|
| 333 |
+
- Constant: f(0)=f(1) (always 0 or always 1)
|
| 334 |
+
- Balanced: f(0)≠f(1) (different outputs)
|
| 335 |
+
|
| 336 |
+
For this problem, implement the oracle for the BALANCED function f(x) = x.
|
| 337 |
+
|
| 338 |
+
Algorithm structure:
|
| 339 |
+
1. Initialize |01⟩ (input qubit |0⟩, ancilla qubit |1⟩)
|
| 340 |
+
2. Apply H to both qubits
|
| 341 |
+
3. Apply the oracle Uf: |x,y⟩ → |x, y⊕f(x)⟩
|
| 342 |
+
4. Apply H to the input qubit
|
| 343 |
+
5. Measure input qubit: |1⟩ means balanced
|
| 344 |
+
|
| 345 |
+
For f(x)=x, the oracle is just a CNOT.
|
| 346 |
+
|
| 347 |
+
Requirements:
|
| 348 |
+
- 2 qubits
|
| 349 |
+
- Implement full Deutsch circuit with f(x)=x oracle
|
| 350 |
+
- After measurement, input qubit should be in |1⟩
|
| 351 |
+
|
| 352 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 353 |
+
difficulty=ProblemDifficulty.HARD,
|
| 354 |
+
category=ProblemCategory.ALGORITHM,
|
| 355 |
+
expected=ExpectedOutput(
|
| 356 |
+
min_qubits=2,
|
| 357 |
+
max_qubits=2,
|
| 358 |
+
max_depth=8,
|
| 359 |
+
required_gates=["h", "x", "cx"],
|
| 360 |
+
expected_states={"11": 1.0} # Input qubit is 1 (balanced), ancilla is 1
|
| 361 |
+
),
|
| 362 |
+
tags=["algorithm", "deutsch", "oracle"],
|
| 363 |
+
requires_understanding=["Deutsch algorithm", "Oracle construction", "Interference"],
|
| 364 |
+
common_mistakes=["Wrong initial state", "Missing ancilla preparation", "Oracle errors"],
|
| 365 |
+
optimal_depth=5,
|
| 366 |
+
optimal_gate_count=6
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
PROBLEM_H2_GROVER_2QUBIT = TestProblem(
|
| 370 |
+
id="hard_002",
|
| 371 |
+
name="Grover Search (2-qubit)",
|
| 372 |
+
description="Find marked state |11⟩ using Grover's algorithm",
|
| 373 |
+
prompt="""Implement 2-qubit Grover's search algorithm to find the state |11⟩.
|
| 374 |
+
|
| 375 |
+
Grover's algorithm amplifies the probability of the marked state.
|
| 376 |
+
|
| 377 |
+
For 2 qubits with 1 marked state, we need exactly 1 iteration:
|
| 378 |
+
|
| 379 |
+
1. Initialize: H⊗H on |00⟩ → equal superposition
|
| 380 |
+
2. Oracle: Mark |11⟩ with a phase flip (multiply by -1)
|
| 381 |
+
3. Diffusion: Reflect about the average amplitude
|
| 382 |
+
|
| 383 |
+
Oracle for |11⟩: Apply CZ (or equivalent)
|
| 384 |
+
Diffusion operator: H⊗H · (2|00⟩⟨00| - I) · H⊗H
|
| 385 |
+
|
| 386 |
+
Requirements:
|
| 387 |
+
- 2 qubits
|
| 388 |
+
- After 1 Grover iteration, |11⟩ should have probability ≈ 1
|
| 389 |
+
- Use only basic gates
|
| 390 |
+
|
| 391 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 392 |
+
difficulty=ProblemDifficulty.HARD,
|
| 393 |
+
category=ProblemCategory.ALGORITHM,
|
| 394 |
+
expected=ExpectedOutput(
|
| 395 |
+
min_qubits=2,
|
| 396 |
+
max_qubits=2,
|
| 397 |
+
max_depth=12,
|
| 398 |
+
required_gates=["h", "x", "cx"],
|
| 399 |
+
expected_states={"11": 1.0},
|
| 400 |
+
tolerance=0.1
|
| 401 |
+
),
|
| 402 |
+
tags=["algorithm", "grover", "search", "amplitude-amplification"],
|
| 403 |
+
requires_understanding=["Grover's algorithm", "Oracle design", "Diffusion operator"],
|
| 404 |
+
common_mistakes=["Wrong oracle phase", "Missing diffusion", "Too many/few iterations"],
|
| 405 |
+
optimal_depth=8,
|
| 406 |
+
optimal_gate_count=10
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
PROBLEM_H3_TELEPORTATION_PREP = TestProblem(
|
| 410 |
+
id="hard_003",
|
| 411 |
+
name="Quantum Teleportation Setup",
|
| 412 |
+
description="Prepare the entangled resource state for teleportation",
|
| 413 |
+
prompt="""Create the initial setup for quantum teleportation.
|
| 414 |
+
|
| 415 |
+
Quantum teleportation requires:
|
| 416 |
+
1. The state to teleport |ψ⟩ on qubit 0
|
| 417 |
+
2. A shared Bell pair between qubits 1 and 2
|
| 418 |
+
|
| 419 |
+
For this problem:
|
| 420 |
+
- Prepare qubit 0 in state |+⟩ (the state we'll "teleport")
|
| 421 |
+
- Prepare qubits 1 and 2 in the Bell state (|00⟩ + |11⟩)/√2
|
| 422 |
+
- Qubit 1 goes to Alice (sender), qubit 2 to Bob (receiver)
|
| 423 |
+
|
| 424 |
+
Requirements:
|
| 425 |
+
- 3 qubits
|
| 426 |
+
- q[0]: |+⟩ state (to be teleported)
|
| 427 |
+
- q[1], q[2]: Bell pair (shared entanglement)
|
| 428 |
+
|
| 429 |
+
After this setup, Alice has q[0] and q[1], Bob has q[2].
|
| 430 |
+
|
| 431 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 432 |
+
difficulty=ProblemDifficulty.HARD,
|
| 433 |
+
category=ProblemCategory.ALGORITHM,
|
| 434 |
+
expected=ExpectedOutput(
|
| 435 |
+
min_qubits=3,
|
| 436 |
+
max_qubits=3,
|
| 437 |
+
max_depth=4,
|
| 438 |
+
required_gates=["h", "cx"]
|
| 439 |
+
),
|
| 440 |
+
tags=["algorithm", "teleportation", "entanglement", "bell-state"],
|
| 441 |
+
requires_understanding=["Quantum teleportation", "Bell states", "Entanglement as resource"],
|
| 442 |
+
common_mistakes=["Wrong qubits entangled", "State to teleport not prepared"],
|
| 443 |
+
optimal_depth=3,
|
| 444 |
+
optimal_gate_count=4
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
# =============================================================================
|
| 449 |
+
# PROBLEM SETS
|
| 450 |
+
# =============================================================================
|
| 451 |
+
|
| 452 |
+
EASY_PROBLEMS = [
|
| 453 |
+
PROBLEM_E1_PHASE_FLIP,
|
| 454 |
+
PROBLEM_E2_CONTROLLED_NOT,
|
| 455 |
+
PROBLEM_E3_MEASUREMENT_BASIS
|
| 456 |
+
]
|
| 457 |
+
|
| 458 |
+
MEDIUM_PROBLEMS = [
|
| 459 |
+
PROBLEM_M1_SWAP_DECOMPOSITION,
|
| 460 |
+
PROBLEM_M2_CONTROLLED_Z,
|
| 461 |
+
PROBLEM_M3_PHASE_ESTIMATION_PREP
|
| 462 |
+
]
|
| 463 |
+
|
| 464 |
+
HARD_PROBLEMS = [
|
| 465 |
+
PROBLEM_H1_DEUTSCH,
|
| 466 |
+
PROBLEM_H2_GROVER_2QUBIT,
|
| 467 |
+
PROBLEM_H3_TELEPORTATION_PREP
|
| 468 |
+
]
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
# ============================================================================
|
| 472 |
+
# VERY_HARD PROBLEMS: Push NAKED to its limits
|
| 473 |
+
# ============================================================================
|
| 474 |
+
|
| 475 |
+
PROBLEM_VH1_QFT_4QUBIT = TestProblem(
|
| 476 |
+
id="very_hard_001",
|
| 477 |
+
name="4-Qubit QFT",
|
| 478 |
+
description="Implement full Quantum Fourier Transform on 4 qubits",
|
| 479 |
+
prompt="""Implement the complete Quantum Fourier Transform (QFT) on 4 qubits.
|
| 480 |
+
|
| 481 |
+
The QFT transforms computational basis states into Fourier basis:
|
| 482 |
+
QFT|x⟩ = (1/√N) Σ_{k=0}^{N-1} e^{2πixk/N} |k⟩
|
| 483 |
+
|
| 484 |
+
For 4 qubits (N=16), the circuit requires:
|
| 485 |
+
1. Apply Hadamard to each qubit in sequence
|
| 486 |
+
2. Apply controlled phase rotations (CR_k) between qubits
|
| 487 |
+
3. SWAP qubits to correct bit ordering (optional for some conventions)
|
| 488 |
+
|
| 489 |
+
Phase rotation angles: R_k = rotation by π/2^(k-1)
|
| 490 |
+
- R_2 = π/2 (S gate or cp(π/2))
|
| 491 |
+
- R_3 = π/4 (T gate or cp(π/4))
|
| 492 |
+
- R_4 = π/8 (cp(π/8))
|
| 493 |
+
|
| 494 |
+
Requirements:
|
| 495 |
+
- Use exactly 4 qubits
|
| 496 |
+
- Must use H, controlled-phase (cp or crz), and optionally SWAP gates
|
| 497 |
+
- Do NOT use QFT as a black box - implement the full decomposition
|
| 498 |
+
- Include proper phase rotations between all qubit pairs
|
| 499 |
+
|
| 500 |
+
The output should show interference patterns in the Fourier basis.
|
| 501 |
+
|
| 502 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 503 |
+
difficulty=ProblemDifficulty.VERY_HARD,
|
| 504 |
+
category=ProblemCategory.ALGORITHM,
|
| 505 |
+
expected=ExpectedOutput(
|
| 506 |
+
min_qubits=4,
|
| 507 |
+
max_qubits=4,
|
| 508 |
+
max_depth=20,
|
| 509 |
+
required_gates=["h"]
|
| 510 |
+
),
|
| 511 |
+
tags=["qft", "fourier", "phase-rotation", "multi-qubit"],
|
| 512 |
+
requires_understanding=["QFT algorithm", "Controlled phase gates", "Bit reversal"],
|
| 513 |
+
common_mistakes=["Wrong phase angles", "Missing controlled rotations", "Forgetting bit reversal"],
|
| 514 |
+
optimal_depth=12,
|
| 515 |
+
optimal_gate_count=16
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
PROBLEM_VH2_GROVER_3QUBIT = TestProblem(
|
| 519 |
+
id="very_hard_002",
|
| 520 |
+
name="Grover 3-Qubit Search",
|
| 521 |
+
description="Implement Grover's search on 3 qubits with 2 iterations",
|
| 522 |
+
prompt="""Implement 3-qubit Grover's search algorithm to find the marked state |101⟩.
|
| 523 |
+
|
| 524 |
+
For 3 qubits (N=8 states), the optimal number of iterations is approximately π√N/4 ≈ 2.
|
| 525 |
+
|
| 526 |
+
Algorithm structure (repeat 2 times):
|
| 527 |
+
1. Initial superposition: H⊗H⊗H on |000⟩
|
| 528 |
+
|
| 529 |
+
For EACH Grover iteration:
|
| 530 |
+
2. Oracle: Mark |101⟩ with phase flip (multiply amplitude by -1)
|
| 531 |
+
- Oracle for |101⟩: X on q[1], then CCZ (or Toffoli+phase), then X on q[1]
|
| 532 |
+
- Alternative: use multi-controlled Z gate
|
| 533 |
+
|
| 534 |
+
3. Diffusion operator (Grover diffuser):
|
| 535 |
+
- Apply H to all qubits
|
| 536 |
+
- Apply X to all qubits
|
| 537 |
+
- Apply multi-controlled Z (CCZ or decomposition)
|
| 538 |
+
- Apply X to all qubits
|
| 539 |
+
- Apply H to all qubits
|
| 540 |
+
|
| 541 |
+
Requirements:
|
| 542 |
+
- Use exactly 3 qubits
|
| 543 |
+
- Implement BOTH oracle and diffusion operator
|
| 544 |
+
- Perform exactly 2 Grover iterations
|
| 545 |
+
- After 2 iterations, |101⟩ should have probability > 0.9
|
| 546 |
+
- Use basic gates: H, X, CX, CCX (Toffoli), CZ, or their equivalents
|
| 547 |
+
|
| 548 |
+
IMPORTANT: You must implement CCZ using either:
|
| 549 |
+
- ccx followed by cz and ccx (Toffoli-based)
|
| 550 |
+
- h on target, ccx, h on target (standard decomposition)
|
| 551 |
+
|
| 552 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 553 |
+
difficulty=ProblemDifficulty.VERY_HARD,
|
| 554 |
+
category=ProblemCategory.ALGORITHM,
|
| 555 |
+
expected=ExpectedOutput(
|
| 556 |
+
min_qubits=3,
|
| 557 |
+
max_qubits=3,
|
| 558 |
+
max_depth=30,
|
| 559 |
+
required_gates=["h", "x", "cx"],
|
| 560 |
+
expected_states={"101": 0.9},
|
| 561 |
+
tolerance=0.15
|
| 562 |
+
),
|
| 563 |
+
tags=["grover", "search", "oracle", "diffusion", "multi-iteration"],
|
| 564 |
+
requires_understanding=["Grover's algorithm", "Multi-controlled gates", "Oracle design", "Diffusion operator"],
|
| 565 |
+
common_mistakes=["Wrong oracle", "Single iteration only", "Incorrect diffusion", "Missing CCZ decomposition"],
|
| 566 |
+
optimal_depth=24,
|
| 567 |
+
optimal_gate_count=40
|
| 568 |
+
)
|
| 569 |
+
|
| 570 |
+
PROBLEM_VH3_VQE_ANSATZ = TestProblem(
|
| 571 |
+
id="very_hard_003",
|
| 572 |
+
name="VQE Hardware-Efficient Ansatz",
|
| 573 |
+
description="Construct a 4-qubit hardware-efficient ansatz for VQE",
|
| 574 |
+
prompt="""Construct a 4-qubit hardware-efficient variational ansatz for VQE.
|
| 575 |
+
|
| 576 |
+
A hardware-efficient ansatz is a parameterized quantum circuit used in VQE
|
| 577 |
+
(Variational Quantum Eigensolver) to prepare trial wavefunctions.
|
| 578 |
+
|
| 579 |
+
Structure (2 layers):
|
| 580 |
+
|
| 581 |
+
LAYER 1:
|
| 582 |
+
1. Apply Ry(θ) rotations to all 4 qubits (use ry gate with parameter, e.g., ry(pi/4))
|
| 583 |
+
2. Apply Rz(φ) rotations to all 4 qubits (use rz gate with parameter, e.g., rz(pi/4))
|
| 584 |
+
3. Apply entangling CNOT ladder: cx q[0],q[1]; cx q[1],q[2]; cx q[2],q[3];
|
| 585 |
+
|
| 586 |
+
LAYER 2:
|
| 587 |
+
4. Apply Ry(θ') rotations to all 4 qubits
|
| 588 |
+
5. Apply Rz(φ') rotations to all 4 qubits
|
| 589 |
+
6. Apply entangling CNOT ladder again
|
| 590 |
+
|
| 591 |
+
For this implementation, use fixed angles:
|
| 592 |
+
- Layer 1: ry(0.5) and rz(0.3) on all qubits
|
| 593 |
+
- Layer 2: ry(0.7) and rz(0.2) on all qubits
|
| 594 |
+
|
| 595 |
+
Requirements:
|
| 596 |
+
- Use exactly 4 qubits
|
| 597 |
+
- Implement 2 full layers (rotation + entanglement each)
|
| 598 |
+
- Use ry, rz, and cx gates
|
| 599 |
+
- Linear entanglement pattern (nearest-neighbor CNOTs)
|
| 600 |
+
|
| 601 |
+
This circuit structure is used on real quantum hardware (IBM, Google) for
|
| 602 |
+
quantum chemistry and optimization problems.
|
| 603 |
+
|
| 604 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 605 |
+
difficulty=ProblemDifficulty.VERY_HARD,
|
| 606 |
+
category=ProblemCategory.ALGORITHM,
|
| 607 |
+
expected=ExpectedOutput(
|
| 608 |
+
min_qubits=4,
|
| 609 |
+
max_qubits=4,
|
| 610 |
+
max_depth=16,
|
| 611 |
+
required_gates=["ry", "rz", "cx"]
|
| 612 |
+
),
|
| 613 |
+
tags=["vqe", "ansatz", "variational", "quantum-chemistry", "hardware-efficient"],
|
| 614 |
+
requires_understanding=["VQE algorithm", "Parameterized circuits", "Hardware constraints", "Entanglement layers"],
|
| 615 |
+
common_mistakes=["Missing rotation layers", "Wrong entanglement pattern", "Incorrect parameter format"],
|
| 616 |
+
optimal_depth=12,
|
| 617 |
+
optimal_gate_count=22
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
PROBLEM_VH4_BERNSTEIN_VAZIRANI = TestProblem(
|
| 621 |
+
id="very_hard_004",
|
| 622 |
+
name="Bernstein-Vazirani 4-bit",
|
| 623 |
+
description="Implement Bernstein-Vazirani algorithm to find hidden string s=1011",
|
| 624 |
+
prompt="""Implement the Bernstein-Vazirani algorithm to find the hidden string s=1011.
|
| 625 |
+
|
| 626 |
+
The Bernstein-Vazirani algorithm finds a hidden n-bit string s in ONE query.
|
| 627 |
+
Given a function f(x) = s·x mod 2 (bitwise dot product), find s.
|
| 628 |
+
|
| 629 |
+
For s=1011 (4 bits), we need 5 qubits (4 input + 1 ancilla):
|
| 630 |
+
|
| 631 |
+
Algorithm:
|
| 632 |
+
1. Initialize all input qubits to |0⟩, ancilla to |1⟩
|
| 633 |
+
2. Apply H to all 5 qubits (creates superposition + phase kickback setup)
|
| 634 |
+
3. Apply Oracle U_f: For each bit s_i=1, apply CNOT from q[i] to ancilla
|
| 635 |
+
- s=1011 means: CNOT from q[0] to q[4], q[2] to q[4], q[3] to q[4]
|
| 636 |
+
- (s[0]=1, s[1]=0, s[2]=1, s[3]=1 → control qubits 0, 2, 3)
|
| 637 |
+
4. Apply H to all input qubits (NOT the ancilla)
|
| 638 |
+
5. Measure input qubits → reveals s directly
|
| 639 |
+
|
| 640 |
+
Requirements:
|
| 641 |
+
- Use 5 qubits (q[0-3] for input, q[4] for ancilla)
|
| 642 |
+
- Prepare ancilla in |1⟩ state before Hadamards
|
| 643 |
+
- Oracle: CNOT from q[0], q[2], q[3] to q[4] (positions where s has 1)
|
| 644 |
+
- Apply final Hadamards only to input qubits
|
| 645 |
+
- Measure input qubits → should give |1011⟩
|
| 646 |
+
|
| 647 |
+
After measurement, the input register should read 1011 with probability 1.0.
|
| 648 |
+
|
| 649 |
+
Provide the OpenQASM 2.0 circuit.""",
|
| 650 |
+
difficulty=ProblemDifficulty.VERY_HARD,
|
| 651 |
+
category=ProblemCategory.ALGORITHM,
|
| 652 |
+
expected=ExpectedOutput(
|
| 653 |
+
min_qubits=5,
|
| 654 |
+
max_qubits=5,
|
| 655 |
+
max_depth=10,
|
| 656 |
+
required_gates=["h", "x", "cx"],
|
| 657 |
+
expected_states={"10111": 1.0}, # 1011 in input register, 1 in ancilla
|
| 658 |
+
tolerance=0.05
|
| 659 |
+
),
|
| 660 |
+
tags=["bernstein-vazirani", "oracle", "hidden-string", "query-complexity"],
|
| 661 |
+
requires_understanding=["Bernstein-Vazirani algorithm", "Oracle construction", "Phase kickback"],
|
| 662 |
+
common_mistakes=["Wrong oracle CNOTs", "Missing ancilla preparation", "Hadamards on ancilla"],
|
| 663 |
+
optimal_depth=6,
|
| 664 |
+
optimal_gate_count=15
|
| 665 |
+
)
|
| 666 |
+
|
| 667 |
+
VERY_HARD_PROBLEMS = [
|
| 668 |
+
PROBLEM_VH1_QFT_4QUBIT,
|
| 669 |
+
PROBLEM_VH2_GROVER_3QUBIT,
|
| 670 |
+
PROBLEM_VH3_VQE_ANSATZ,
|
| 671 |
+
PROBLEM_VH4_BERNSTEIN_VAZIRANI
|
| 672 |
+
]
|
| 673 |
+
|
| 674 |
+
ALL_PROBLEMS = EASY_PROBLEMS + MEDIUM_PROBLEMS + HARD_PROBLEMS + VERY_HARD_PROBLEMS
|
| 675 |
+
|
| 676 |
+
# Problem registry by ID
|
| 677 |
+
PROBLEMS_BY_ID = {p.id: p for p in ALL_PROBLEMS}
|
| 678 |
+
|
| 679 |
+
|
| 680 |
+
def get_problem(problem_id: str) -> Optional[TestProblem]:
|
| 681 |
+
"""Get a problem by ID."""
|
| 682 |
+
return PROBLEMS_BY_ID.get(problem_id)
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
def get_problems_by_difficulty(difficulty: ProblemDifficulty) -> List[TestProblem]:
|
| 686 |
+
"""Get all problems of a specific difficulty."""
|
| 687 |
+
# Handle string input
|
| 688 |
+
if isinstance(difficulty, str):
|
| 689 |
+
difficulty = ProblemDifficulty(difficulty.lower())
|
| 690 |
+
return [p for p in ALL_PROBLEMS if p.difficulty == difficulty]
|
| 691 |
+
|
| 692 |
+
|
| 693 |
+
def get_problems_by_category(category: ProblemCategory) -> List[TestProblem]:
|
| 694 |
+
"""Get all problems of a specific category."""
|
| 695 |
+
return [p for p in ALL_PROBLEMS if p.category == category]
|
| 696 |
+
|
| 697 |
+
|
| 698 |
+
def get_problems_by_tag(tag: str) -> List[TestProblem]:
|
| 699 |
+
"""Get all problems with a specific tag."""
|
| 700 |
+
return [p for p in ALL_PROBLEMS if tag in p.tags]
|
| 701 |
+
|
| 702 |
+
|
| 703 |
+
def get_research_problem_set() -> List[TestProblem]:
|
| 704 |
+
"""Get the standard research evaluation set (3 problems, one per difficulty)."""
|
| 705 |
+
return [
|
| 706 |
+
PROBLEM_E1_PHASE_FLIP, # Easy: Phase flip state
|
| 707 |
+
PROBLEM_M1_SWAP_DECOMPOSITION, # Medium: SWAP decomposition
|
| 708 |
+
PROBLEM_H1_DEUTSCH # Hard: Deutsch algorithm
|
| 709 |
+
]
|
tests/test_quality_analyzer.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Path: QAgents-workflos/test_quality_analyzer.py
|
| 2 |
+
# Description: Test the circuit quality analyzer
|
| 3 |
+
"""Test that quality analyzer works with MCP endpoints."""
|
| 4 |
+
|
| 5 |
+
from tests.circuit_quality_analyzer import CircuitQualityAnalyzer, get_analyzer
|
| 6 |
+
|
| 7 |
+
def test_analyzer():
|
| 8 |
+
analyzer = get_analyzer()
|
| 9 |
+
|
| 10 |
+
# Test with a Bell state circuit
|
| 11 |
+
test_qasm = """OPENQASM 2.0;
|
| 12 |
+
include "qelib1.inc";
|
| 13 |
+
qreg q[2];
|
| 14 |
+
creg c[2];
|
| 15 |
+
h q[0];
|
| 16 |
+
cx q[0], q[1];
|
| 17 |
+
measure q -> c;
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
print("Analyzing Bell state circuit...")
|
| 21 |
+
print("-" * 40)
|
| 22 |
+
|
| 23 |
+
result = analyzer.analyze_circuit(test_qasm)
|
| 24 |
+
|
| 25 |
+
print(f"Syntax Valid: {result.syntax_valid}")
|
| 26 |
+
print(f"Depth: {result.depth}")
|
| 27 |
+
print(f"Gate Count: {result.gate_count}")
|
| 28 |
+
print(f"CX Count: {result.cx_count}")
|
| 29 |
+
print(f"Single Qubit Count: {result.single_qubit_count}")
|
| 30 |
+
print(f"Hardware Fitness: {result.hardware_fitness}")
|
| 31 |
+
print(f"Complexity Score: {result.complexity_score}")
|
| 32 |
+
print(f"State Correctness: {result.state_correctness}")
|
| 33 |
+
print(f"Noise Estimate: {result.noise_estimate}")
|
| 34 |
+
print(f"Probabilities: {result.probabilities}")
|
| 35 |
+
|
| 36 |
+
if result.errors:
|
| 37 |
+
print(f"\nErrors/Warnings:")
|
| 38 |
+
for err in result.errors:
|
| 39 |
+
print(f" - {err}")
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
test_analyzer()
|
tests/test_ratelimited.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Quick test of rate-limited evaluation on easy problems.
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
from tests.evaluation_harness import EvaluationHarness
|
| 6 |
+
from tests.test_problems import EASY_PROBLEMS, MEDIUM_PROBLEMS, HARD_PROBLEMS
|
| 7 |
+
|
| 8 |
+
# Combine all problems
|
| 9 |
+
TEST_PROBLEMS = EASY_PROBLEMS + MEDIUM_PROBLEMS + HARD_PROBLEMS
|
| 10 |
+
|
| 11 |
+
# Ensure API key is set
|
| 12 |
+
os.environ["GOOGLE_API_KEY"] = "$env:GOOGLE_API_KEY"
|
| 13 |
+
|
| 14 |
+
print("=== RATE-LIMITED EVALUATION TEST ===")
|
| 15 |
+
print("Testing Guided mode (4 LLM calls per problem)")
|
| 16 |
+
print("Rate limit: 5 seconds between requests")
|
| 17 |
+
print("")
|
| 18 |
+
|
| 19 |
+
# Run only 3 easy problems with guided mode
|
| 20 |
+
harness = EvaluationHarness()
|
| 21 |
+
easy_problems = [p for p in TEST_PROBLEMS if p.id.startswith('easy')][:3]
|
| 22 |
+
|
| 23 |
+
print(f"Testing {len(easy_problems)} problems with Guided orchestration\n")
|
| 24 |
+
results = []
|
| 25 |
+
|
| 26 |
+
for problem in easy_problems:
|
| 27 |
+
print(f"Problem: {problem.name}")
|
| 28 |
+
result = harness.evaluate_single_run(problem, mode='guided', run_number=1)
|
| 29 |
+
results.append(result)
|
| 30 |
+
print(f" Success: {result.success}, Time: {result.execution_time_ms:.1f}ms\n")
|
| 31 |
+
|
| 32 |
+
# Summary
|
| 33 |
+
successes = sum(1 for r in results if r.success)
|
| 34 |
+
print("=== SUMMARY ===")
|
| 35 |
+
print(f"Success rate: {successes}/{len(results)} ({100*successes/len(results):.0f}%)")
|
| 36 |
+
print(f"Total API calls: ~{len(results) * 4} LLM requests")
|
| 37 |
+
print(f"Expected time with rate limiting: ~{len(results) * 4 * 5 / 60:.1f} minutes")
|
tools/__init__.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tools module: MCP endpoint wrappers as callable tools."""
|
| 2 |
+
|
| 3 |
+
from .tool_registry import (
|
| 4 |
+
ToolDefinition,
|
| 5 |
+
ToolCategory,
|
| 6 |
+
ToolRegistry,
|
| 7 |
+
registry,
|
| 8 |
+
register_tool
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
from .quantum_tools import (
|
| 12 |
+
get_all_tools,
|
| 13 |
+
get_tools_by_category,
|
| 14 |
+
invoke_tool,
|
| 15 |
+
# Creation tools
|
| 16 |
+
create_from_template,
|
| 17 |
+
generate_random_circuit,
|
| 18 |
+
generate_from_description,
|
| 19 |
+
# Analysis tools
|
| 20 |
+
parse_qasm,
|
| 21 |
+
analyze_circuit,
|
| 22 |
+
get_circuit_depth,
|
| 23 |
+
# Validation tools
|
| 24 |
+
validate_syntax,
|
| 25 |
+
check_connectivity,
|
| 26 |
+
verify_unitary,
|
| 27 |
+
# Simulation tools
|
| 28 |
+
simulate_circuit,
|
| 29 |
+
get_statevector,
|
| 30 |
+
get_probabilities,
|
| 31 |
+
# Scoring tools
|
| 32 |
+
calculate_complexity,
|
| 33 |
+
calculate_hardware_fitness,
|
| 34 |
+
calculate_expressibility,
|
| 35 |
+
# Resource tools
|
| 36 |
+
estimate_resources,
|
| 37 |
+
estimate_noise,
|
| 38 |
+
# Composition tools
|
| 39 |
+
compose_circuits,
|
| 40 |
+
generate_inverse,
|
| 41 |
+
tensor_circuits,
|
| 42 |
+
repeat_circuit
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
__all__ = [
|
| 46 |
+
"ToolDefinition",
|
| 47 |
+
"ToolCategory",
|
| 48 |
+
"ToolRegistry",
|
| 49 |
+
"registry",
|
| 50 |
+
"register_tool",
|
| 51 |
+
"get_all_tools",
|
| 52 |
+
"get_tools_by_category",
|
| 53 |
+
"invoke_tool"
|
| 54 |
+
]
|
tools/quantum_tools.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Quantum Tools: MCP endpoint wrappers registered as tools.
|
| 3 |
+
All 23 MCP endpoints wrapped as callable tools for agents.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import Any, Dict, Optional
|
| 7 |
+
from .tool_registry import register_tool, ToolCategory, registry
|
| 8 |
+
|
| 9 |
+
# Import client lazily to avoid circular imports
|
| 10 |
+
def _get_client():
|
| 11 |
+
from client import get_client
|
| 12 |
+
return get_client()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# ===== CREATION TOOLS =====
|
| 16 |
+
|
| 17 |
+
@register_tool(
|
| 18 |
+
name="create_from_template",
|
| 19 |
+
description="Create a quantum circuit from a predefined template (bell_state, ghz, qft, grover, etc.)",
|
| 20 |
+
category=ToolCategory.CREATION,
|
| 21 |
+
parameters={
|
| 22 |
+
"template": {"type": "string", "description": "Template name", "required": True},
|
| 23 |
+
"num_qubits": {"type": "integer", "description": "Number of qubits", "required": False}
|
| 24 |
+
},
|
| 25 |
+
returns="QASM code of the created circuit"
|
| 26 |
+
)
|
| 27 |
+
def create_from_template(template: str, num_qubits: int = 2) -> Dict:
|
| 28 |
+
response = _get_client().create_circuit_from_template(template, num_qubits)
|
| 29 |
+
return {"success": response.success, "qasm": response.data, "error": response.error}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@register_tool(
|
| 33 |
+
name="generate_random_circuit",
|
| 34 |
+
description="Generate a random quantum circuit with specified parameters",
|
| 35 |
+
category=ToolCategory.CREATION,
|
| 36 |
+
parameters={
|
| 37 |
+
"num_qubits": {"type": "integer", "description": "Number of qubits", "required": True},
|
| 38 |
+
"depth": {"type": "integer", "description": "Circuit depth", "required": True},
|
| 39 |
+
"gate_set": {"type": "string", "description": "Comma-separated gates (h,cx,rz)", "required": False}
|
| 40 |
+
},
|
| 41 |
+
returns="QASM code of the random circuit"
|
| 42 |
+
)
|
| 43 |
+
def generate_random_circuit(num_qubits: int, depth: int, gate_set: str = "h,cx,rz") -> Dict:
|
| 44 |
+
response = _get_client().generate_random_circuit(num_qubits, depth, gate_set)
|
| 45 |
+
return {"success": response.success, "qasm": response.data, "error": response.error}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@register_tool(
|
| 49 |
+
name="generate_from_description",
|
| 50 |
+
description="Generate a circuit from natural language description",
|
| 51 |
+
category=ToolCategory.CREATION,
|
| 52 |
+
parameters={
|
| 53 |
+
"description": {"type": "string", "description": "Natural language description of the circuit", "required": True}
|
| 54 |
+
},
|
| 55 |
+
returns="QASM code of the generated circuit"
|
| 56 |
+
)
|
| 57 |
+
def generate_from_description(description: str) -> Dict:
|
| 58 |
+
response = _get_client().generate_circuit_from_description(description)
|
| 59 |
+
return {"success": response.success, "qasm": response.data, "error": response.error}
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# ===== ANALYSIS TOOLS =====
|
| 63 |
+
|
| 64 |
+
@register_tool(
|
| 65 |
+
name="parse_qasm",
|
| 66 |
+
description="Parse OpenQASM code and extract circuit structure",
|
| 67 |
+
category=ToolCategory.ANALYSIS,
|
| 68 |
+
parameters={
|
| 69 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 70 |
+
},
|
| 71 |
+
returns="Parsed circuit structure with gates, qubits, etc."
|
| 72 |
+
)
|
| 73 |
+
def parse_qasm(qasm: str) -> Dict:
|
| 74 |
+
response = _get_client().parse_qasm(qasm)
|
| 75 |
+
return {"success": response.success, "structure": response.data, "error": response.error}
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
@register_tool(
|
| 79 |
+
name="analyze_circuit",
|
| 80 |
+
description="Analyze circuit properties: depth, gate count, qubit usage",
|
| 81 |
+
category=ToolCategory.ANALYSIS,
|
| 82 |
+
parameters={
|
| 83 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 84 |
+
},
|
| 85 |
+
returns="Circuit analysis with depth, gate counts, etc."
|
| 86 |
+
)
|
| 87 |
+
def analyze_circuit(qasm: str) -> Dict:
|
| 88 |
+
response = _get_client().analyze_circuit(qasm)
|
| 89 |
+
return {"success": response.success, "analysis": response.data, "error": response.error}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@register_tool(
|
| 93 |
+
name="get_circuit_depth",
|
| 94 |
+
description="Get the depth of a quantum circuit",
|
| 95 |
+
category=ToolCategory.ANALYSIS,
|
| 96 |
+
parameters={
|
| 97 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 98 |
+
},
|
| 99 |
+
returns="Integer depth value"
|
| 100 |
+
)
|
| 101 |
+
def get_circuit_depth(qasm: str) -> Dict:
|
| 102 |
+
response = _get_client().get_circuit_depth(qasm)
|
| 103 |
+
return {"success": response.success, "depth": response.data, "error": response.error}
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
# ===== VALIDATION TOOLS =====
|
| 107 |
+
|
| 108 |
+
@register_tool(
|
| 109 |
+
name="validate_syntax",
|
| 110 |
+
description="Validate QASM syntax for correctness",
|
| 111 |
+
category=ToolCategory.VALIDATION,
|
| 112 |
+
parameters={
|
| 113 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 114 |
+
},
|
| 115 |
+
returns="Validation result with any syntax errors"
|
| 116 |
+
)
|
| 117 |
+
def validate_syntax(qasm: str) -> Dict:
|
| 118 |
+
response = _get_client().validate_syntax(qasm)
|
| 119 |
+
return {"success": response.success, "valid": response.data, "error": response.error}
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
@register_tool(
|
| 123 |
+
name="check_connectivity",
|
| 124 |
+
description="Check if circuit respects hardware qubit connectivity",
|
| 125 |
+
category=ToolCategory.VALIDATION,
|
| 126 |
+
parameters={
|
| 127 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True},
|
| 128 |
+
"hardware": {"type": "string", "description": "Hardware profile (ibm_eagle, ionq_aria, rigetti_aspen)", "required": False}
|
| 129 |
+
},
|
| 130 |
+
returns="Connectivity check result"
|
| 131 |
+
)
|
| 132 |
+
def check_connectivity(qasm: str, hardware: str = "ibm_eagle") -> Dict:
|
| 133 |
+
response = _get_client().check_connectivity(qasm, hardware)
|
| 134 |
+
return {"success": response.success, "result": response.data, "error": response.error}
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
@register_tool(
|
| 138 |
+
name="verify_unitary",
|
| 139 |
+
description="Verify that circuit produces a valid unitary matrix",
|
| 140 |
+
category=ToolCategory.VALIDATION,
|
| 141 |
+
parameters={
|
| 142 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 143 |
+
},
|
| 144 |
+
returns="Unitary verification result"
|
| 145 |
+
)
|
| 146 |
+
def verify_unitary(qasm: str) -> Dict:
|
| 147 |
+
response = _get_client().verify_unitary(qasm)
|
| 148 |
+
return {"success": response.success, "result": response.data, "error": response.error}
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# ===== SIMULATION TOOLS =====
|
| 152 |
+
|
| 153 |
+
@register_tool(
|
| 154 |
+
name="simulate_circuit",
|
| 155 |
+
description="Simulate circuit execution and get measurement results",
|
| 156 |
+
category=ToolCategory.SIMULATION,
|
| 157 |
+
parameters={
|
| 158 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True},
|
| 159 |
+
"shots": {"type": "integer", "description": "Number of measurement shots", "required": False}
|
| 160 |
+
},
|
| 161 |
+
returns="Measurement results with counts"
|
| 162 |
+
)
|
| 163 |
+
def simulate_circuit(qasm: str, shots: int = 1024) -> Dict:
|
| 164 |
+
response = _get_client().simulate_circuit(qasm, shots)
|
| 165 |
+
return {"success": response.success, "results": response.data, "error": response.error}
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@register_tool(
|
| 169 |
+
name="get_statevector",
|
| 170 |
+
description="Get the statevector of a circuit (no measurement)",
|
| 171 |
+
category=ToolCategory.SIMULATION,
|
| 172 |
+
parameters={
|
| 173 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 174 |
+
},
|
| 175 |
+
returns="Statevector as complex amplitudes"
|
| 176 |
+
)
|
| 177 |
+
def get_statevector(qasm: str) -> Dict:
|
| 178 |
+
response = _get_client().get_statevector(qasm)
|
| 179 |
+
return {"success": response.success, "statevector": response.data, "error": response.error}
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
@register_tool(
|
| 183 |
+
name="get_probabilities",
|
| 184 |
+
description="Get probability distribution from circuit",
|
| 185 |
+
category=ToolCategory.SIMULATION,
|
| 186 |
+
parameters={
|
| 187 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 188 |
+
},
|
| 189 |
+
returns="Probability distribution over computational basis states"
|
| 190 |
+
)
|
| 191 |
+
def get_probabilities(qasm: str) -> Dict:
|
| 192 |
+
response = _get_client().get_probabilities(qasm)
|
| 193 |
+
return {"success": response.success, "probabilities": response.data, "error": response.error}
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# ===== SCORING TOOLS =====
|
| 197 |
+
|
| 198 |
+
@register_tool(
|
| 199 |
+
name="calculate_complexity",
|
| 200 |
+
description="Calculate circuit complexity score (lower is better)",
|
| 201 |
+
category=ToolCategory.SCORING,
|
| 202 |
+
parameters={
|
| 203 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 204 |
+
},
|
| 205 |
+
returns="Complexity score and breakdown"
|
| 206 |
+
)
|
| 207 |
+
def calculate_complexity(qasm: str) -> Dict:
|
| 208 |
+
response = _get_client().calculate_complexity_score(qasm)
|
| 209 |
+
return {"success": response.success, "score": response.data, "error": response.error}
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
@register_tool(
|
| 213 |
+
name="calculate_hardware_fitness",
|
| 214 |
+
description="Calculate how well circuit fits target hardware",
|
| 215 |
+
category=ToolCategory.SCORING,
|
| 216 |
+
parameters={
|
| 217 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True},
|
| 218 |
+
"hardware": {"type": "string", "description": "Hardware profile", "required": False}
|
| 219 |
+
},
|
| 220 |
+
returns="Hardware fitness score (higher is better)"
|
| 221 |
+
)
|
| 222 |
+
def calculate_hardware_fitness(qasm: str, hardware: str = "ibm_eagle") -> Dict:
|
| 223 |
+
response = _get_client().calculate_hardware_fitness(qasm, hardware)
|
| 224 |
+
return {"success": response.success, "score": response.data, "error": response.error}
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
@register_tool(
|
| 228 |
+
name="calculate_expressibility",
|
| 229 |
+
description="Calculate circuit expressibility (ability to explore state space)",
|
| 230 |
+
category=ToolCategory.SCORING,
|
| 231 |
+
parameters={
|
| 232 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 233 |
+
},
|
| 234 |
+
returns="Expressibility score"
|
| 235 |
+
)
|
| 236 |
+
def calculate_expressibility(qasm: str) -> Dict:
|
| 237 |
+
response = _get_client().calculate_expressibility(qasm)
|
| 238 |
+
return {"success": response.success, "score": response.data, "error": response.error}
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
# ===== RESOURCE TOOLS =====
|
| 242 |
+
|
| 243 |
+
@register_tool(
|
| 244 |
+
name="estimate_resources",
|
| 245 |
+
description="Estimate resource requirements (qubits, gates, depth)",
|
| 246 |
+
category=ToolCategory.RESOURCE,
|
| 247 |
+
parameters={
|
| 248 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 249 |
+
},
|
| 250 |
+
returns="Resource estimation breakdown"
|
| 251 |
+
)
|
| 252 |
+
def estimate_resources(qasm: str) -> Dict:
|
| 253 |
+
response = _get_client().estimate_resources(qasm)
|
| 254 |
+
return {"success": response.success, "resources": response.data, "error": response.error}
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
@register_tool(
|
| 258 |
+
name="estimate_noise",
|
| 259 |
+
description="Estimate noise impact on circuit execution",
|
| 260 |
+
category=ToolCategory.RESOURCE,
|
| 261 |
+
parameters={
|
| 262 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True},
|
| 263 |
+
"hardware": {"type": "string", "description": "Hardware profile", "required": False}
|
| 264 |
+
},
|
| 265 |
+
returns="Noise estimation"
|
| 266 |
+
)
|
| 267 |
+
def estimate_noise(qasm: str, hardware: str = "ibm_eagle") -> Dict:
|
| 268 |
+
response = _get_client().estimate_noise(qasm, hardware)
|
| 269 |
+
return {"success": response.success, "noise": response.data, "error": response.error}
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
# ===== COMPOSITION TOOLS =====
|
| 273 |
+
|
| 274 |
+
@register_tool(
|
| 275 |
+
name="compose_circuits",
|
| 276 |
+
description="Compose two circuits sequentially",
|
| 277 |
+
category=ToolCategory.COMPOSITION,
|
| 278 |
+
parameters={
|
| 279 |
+
"qasm1": {"type": "string", "description": "First circuit QASM", "required": True},
|
| 280 |
+
"qasm2": {"type": "string", "description": "Second circuit QASM", "required": True},
|
| 281 |
+
"qubit_mapping": {"type": "string", "description": "Qubit mapping (e.g., '0:1,1:0')", "required": False}
|
| 282 |
+
},
|
| 283 |
+
returns="Composed circuit QASM"
|
| 284 |
+
)
|
| 285 |
+
def compose_circuits(qasm1: str, qasm2: str, qubit_mapping: str = "") -> Dict:
|
| 286 |
+
response = _get_client().compose_circuits(qasm1, qasm2, qubit_mapping)
|
| 287 |
+
return {"success": response.success, "qasm": response.data, "error": response.error}
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
@register_tool(
|
| 291 |
+
name="generate_inverse",
|
| 292 |
+
description="Generate the inverse (adjoint) of a circuit",
|
| 293 |
+
category=ToolCategory.COMPOSITION,
|
| 294 |
+
parameters={
|
| 295 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True}
|
| 296 |
+
},
|
| 297 |
+
returns="Inverse circuit QASM"
|
| 298 |
+
)
|
| 299 |
+
def generate_inverse(qasm: str) -> Dict:
|
| 300 |
+
response = _get_client().generate_inverse_circuit(qasm)
|
| 301 |
+
return {"success": response.success, "qasm": response.data, "error": response.error}
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
@register_tool(
|
| 305 |
+
name="tensor_circuits",
|
| 306 |
+
description="Create tensor product of two circuits (parallel composition)",
|
| 307 |
+
category=ToolCategory.COMPOSITION,
|
| 308 |
+
parameters={
|
| 309 |
+
"qasm1": {"type": "string", "description": "First circuit QASM", "required": True},
|
| 310 |
+
"qasm2": {"type": "string", "description": "Second circuit QASM", "required": True}
|
| 311 |
+
},
|
| 312 |
+
returns="Tensored circuit QASM"
|
| 313 |
+
)
|
| 314 |
+
def tensor_circuits(qasm1: str, qasm2: str) -> Dict:
|
| 315 |
+
response = _get_client().tensor_circuits(qasm1, qasm2)
|
| 316 |
+
return {"success": response.success, "qasm": response.data, "error": response.error}
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
@register_tool(
|
| 320 |
+
name="repeat_circuit",
|
| 321 |
+
description="Repeat a circuit n times",
|
| 322 |
+
category=ToolCategory.COMPOSITION,
|
| 323 |
+
parameters={
|
| 324 |
+
"qasm": {"type": "string", "description": "OpenQASM code", "required": True},
|
| 325 |
+
"n": {"type": "integer", "description": "Number of repetitions", "required": True}
|
| 326 |
+
},
|
| 327 |
+
returns="Repeated circuit QASM"
|
| 328 |
+
)
|
| 329 |
+
def repeat_circuit(qasm: str, n: int) -> Dict:
|
| 330 |
+
response = _get_client().repeat_circuit(qasm, n)
|
| 331 |
+
return {"success": response.success, "qasm": response.data, "error": response.error}
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
# ===== UTILITY FUNCTIONS =====
|
| 335 |
+
|
| 336 |
+
def get_all_tools():
|
| 337 |
+
"""Get all registered tools."""
|
| 338 |
+
return registry.get_all()
|
| 339 |
+
|
| 340 |
+
def get_tools_by_category(category: ToolCategory):
|
| 341 |
+
"""Get tools by category."""
|
| 342 |
+
return registry.get_by_category(category)
|
| 343 |
+
|
| 344 |
+
def invoke_tool(name: str, **kwargs):
|
| 345 |
+
"""Invoke a tool by name."""
|
| 346 |
+
return registry.invoke(name, **kwargs)
|
tools/tool_registry.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tools Module: Wrapped MCP endpoints as callable tools for agents.
|
| 3 |
+
Each tool is a self-contained function that can be invoked by agents.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import Any, Callable, Dict, List, Optional
|
| 7 |
+
from dataclasses import dataclass, field
|
| 8 |
+
from enum import Enum
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
+
class ToolCategory(Enum):
|
| 12 |
+
"""Categories of tools for agent specialization."""
|
| 13 |
+
CREATION = "creation"
|
| 14 |
+
ANALYSIS = "analysis"
|
| 15 |
+
VALIDATION = "validation"
|
| 16 |
+
SIMULATION = "simulation"
|
| 17 |
+
SCORING = "scoring"
|
| 18 |
+
COMPOSITION = "composition"
|
| 19 |
+
RESOURCE = "resource"
|
| 20 |
+
|
| 21 |
+
@dataclass
|
| 22 |
+
class ToolDefinition:
|
| 23 |
+
"""Definition of a tool that agents can use."""
|
| 24 |
+
name: str
|
| 25 |
+
description: str
|
| 26 |
+
category: ToolCategory
|
| 27 |
+
parameters: Dict[str, Dict] # name -> {type, description, required}
|
| 28 |
+
function: Callable
|
| 29 |
+
returns: str
|
| 30 |
+
|
| 31 |
+
def to_llm_schema(self) -> Dict:
|
| 32 |
+
"""Convert to OpenAI function calling format."""
|
| 33 |
+
properties = {}
|
| 34 |
+
required = []
|
| 35 |
+
|
| 36 |
+
for name, info in self.parameters.items():
|
| 37 |
+
properties[name] = {
|
| 38 |
+
"type": info.get("type", "string"),
|
| 39 |
+
"description": info.get("description", "")
|
| 40 |
+
}
|
| 41 |
+
if info.get("required", False):
|
| 42 |
+
required.append(name)
|
| 43 |
+
|
| 44 |
+
return {
|
| 45 |
+
"type": "function",
|
| 46 |
+
"function": {
|
| 47 |
+
"name": self.name,
|
| 48 |
+
"description": self.description,
|
| 49 |
+
"parameters": {
|
| 50 |
+
"type": "object",
|
| 51 |
+
"properties": properties,
|
| 52 |
+
"required": required
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class ToolRegistry:
|
| 59 |
+
"""Registry of all available tools."""
|
| 60 |
+
|
| 61 |
+
def __init__(self):
|
| 62 |
+
self._tools: Dict[str, ToolDefinition] = {}
|
| 63 |
+
self._by_category: Dict[ToolCategory, List[str]] = {cat: [] for cat in ToolCategory}
|
| 64 |
+
|
| 65 |
+
def register(self, tool: ToolDefinition):
|
| 66 |
+
"""Register a tool."""
|
| 67 |
+
self._tools[tool.name] = tool
|
| 68 |
+
self._by_category[tool.category].append(tool.name)
|
| 69 |
+
|
| 70 |
+
def get(self, name: str) -> Optional[ToolDefinition]:
|
| 71 |
+
"""Get a tool by name."""
|
| 72 |
+
return self._tools.get(name)
|
| 73 |
+
|
| 74 |
+
def get_by_category(self, category: ToolCategory) -> List[ToolDefinition]:
|
| 75 |
+
"""Get all tools in a category."""
|
| 76 |
+
return [self._tools[name] for name in self._by_category[category]]
|
| 77 |
+
|
| 78 |
+
def get_all(self) -> List[ToolDefinition]:
|
| 79 |
+
"""Get all registered tools."""
|
| 80 |
+
return list(self._tools.values())
|
| 81 |
+
|
| 82 |
+
def get_llm_schemas(self, categories: Optional[List[ToolCategory]] = None) -> List[Dict]:
|
| 83 |
+
"""Get OpenAI function schemas for specified categories."""
|
| 84 |
+
if categories is None:
|
| 85 |
+
tools = self.get_all()
|
| 86 |
+
else:
|
| 87 |
+
tools = []
|
| 88 |
+
for cat in categories:
|
| 89 |
+
tools.extend(self.get_by_category(cat))
|
| 90 |
+
return [t.to_llm_schema() for t in tools]
|
| 91 |
+
|
| 92 |
+
def invoke(self, name: str, **kwargs) -> Any:
|
| 93 |
+
"""Invoke a tool by name with arguments."""
|
| 94 |
+
tool = self.get(name)
|
| 95 |
+
if tool is None:
|
| 96 |
+
raise ValueError(f"Unknown tool: {name}")
|
| 97 |
+
return tool.function(**kwargs)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
# Global registry
|
| 101 |
+
registry = ToolRegistry()
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def register_tool(name: str, description: str, category: ToolCategory,
|
| 105 |
+
parameters: Dict, returns: str):
|
| 106 |
+
"""Decorator to register a function as a tool."""
|
| 107 |
+
def decorator(func: Callable):
|
| 108 |
+
tool = ToolDefinition(
|
| 109 |
+
name=name,
|
| 110 |
+
description=description,
|
| 111 |
+
category=category,
|
| 112 |
+
parameters=parameters,
|
| 113 |
+
function=func,
|
| 114 |
+
returns=returns
|
| 115 |
+
)
|
| 116 |
+
registry.register(tool)
|
| 117 |
+
return func
|
| 118 |
+
return decorator
|