Spaces:
Sleeping
Sleeping
File size: 7,516 Bytes
1bb4678 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
# Path: QAgents-workflos/run_quality_eval.py
# Relations: Uses tests/quality_evaluation_harness.py, database/circuit_quality_db.py
# Description: CLI entry point for quality-focused evaluation
# Run with: python run_quality_eval.py --mode all --difficulty easy
# Generates quality comparison report with actual QASM circuits
"""
Quality Evaluation Runner: CLI entry point for circuit quality comparison.
Usage:
python run_quality_eval.py --mode all --difficulty easy
python run_quality_eval.py --mode naked --problem easy_001
python run_quality_eval.py --report RUN_ID
"""
import argparse
import logging
import sys
import os
from pathlib import Path
from datetime import datetime
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent))
# Ensure API key is set BEFORE importing config
api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY")
if api_key:
os.environ["GOOGLE_API_KEY"] = api_key
from tests.quality_evaluation_harness import QualityEvaluationHarness, run_quick_quality_test
from tests.test_problems import get_problem, get_problems_by_difficulty
from database.circuit_quality_db import get_quality_db
from config import set_api_key
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Explicitly set API key in config after logging is ready
if api_key:
set_api_key(api_key)
logger.info(f"API Key configured: {api_key[:10]}...")
else:
logger.warning("No GOOGLE_API_KEY or GENAI_API_KEY found in environment")
def run_evaluation(args):
"""Run quality evaluation based on arguments."""
harness = QualityEvaluationHarness()
# Parse modes
if args.mode == 'all':
modes = ['naked', 'guided', 'blackboard']
else:
modes = [args.mode]
# Parse difficulties
if args.difficulty == 'all':
difficulties = ['easy', 'medium', 'hard']
else:
difficulties = [args.difficulty]
# Check if specific problem
if args.problem:
problem = get_problem(args.problem)
if not problem:
print(f"ERROR: Problem not found: {args.problem}")
return
print(f"\n{'='*60}")
print(f"Running quality evaluation for: {args.problem}")
print(f"Modes: {modes}")
print(f"{'='*60}\n")
results = harness.evaluate_problem_all_modes(problem, modes)
# Print results
for mode, result in results.items():
print(f"\n{mode.upper()}:")
print(f" Success: {'✅' if result.success else '❌'}")
print(f" Quality Score: {result.quality_metrics.overall_score()}/100")
print(f" Depth: {result.quality_metrics.depth}")
print(f" Gates: {result.quality_metrics.gate_count}")
print(f" CX: {result.quality_metrics.cx_count}")
print(f" Time: {result.execution_time_ms:.0f}ms")
print(f" LLM Calls: {result.llm_requests}")
if result.qasm_code:
print(f" QASM ({len(result.qasm_code)} chars):")
lines = result.qasm_code.split('\n')[:10]
for line in lines:
print(f" {line}")
if len(result.qasm_code.split('\n')) > 10:
print(" ...")
else:
# Full evaluation
print(f"\n{'='*60}")
print(f"Running full quality evaluation")
print(f"Difficulties: {difficulties}")
print(f"Modes: {modes}")
print(f"Max problems: {args.max_problems or 'all'}")
print(f"{'='*60}\n")
run_id = harness.run_full_evaluation(
difficulties=difficulties,
modes=modes,
max_problems=args.max_problems
)
# Print summary
harness.print_summary(run_id)
# Generate report file
report = harness.generate_report(run_id)
report_path = Path(__file__).parent / f"QUALITY_REPORT_{run_id}.md"
report_path.write_text(report, encoding='utf-8')
print(f"\nFull report saved to: {report_path}")
print(f"\nRun ID: {run_id}")
print("Use --report <run_id> to regenerate report later")
def show_report(run_id: str):
"""Show report for a specific run."""
harness = QualityEvaluationHarness()
harness.run_id = run_id # Set to existing run
report = harness.generate_report(run_id)
print(report)
def list_runs():
"""List all evaluation runs."""
db = get_quality_db()
query = "SELECT run_id, timestamp, description, num_problems FROM comparison_runs ORDER BY timestamp DESC LIMIT 20"
import sqlite3
with sqlite3.connect(db.db_file) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(query).fetchall()
if not rows:
print("No evaluation runs found.")
return
print("\nRecent Evaluation Runs:")
print("-" * 80)
for row in rows:
print(f"{row['run_id']} | {row['timestamp']} | {row['num_problems']} problems | {row['description'] or 'N/A'}")
print("-" * 80)
def quick_test(args):
"""Run a quick single test."""
mode = args.mode if args.mode != 'all' else 'naked'
problem_id = args.problem or 'easy_001'
print(f"\nQuick test: {problem_id} with {mode} mode")
print("-" * 40)
try:
result = run_quick_quality_test(mode, problem_id)
print(f"Success: {'✅' if result.success else '❌'}")
print(f"Quality Score: {result.quality_metrics.overall_score()}/100")
print(f"Depth: {result.quality_metrics.depth}")
print(f"Gates: {result.quality_metrics.gate_count}")
if result.qasm_code:
print(f"\nQASM:\n{result.qasm_code[:500]}")
if result.errors:
print(f"\nErrors: {result.errors}")
except Exception as e:
print(f"ERROR: {e}")
import traceback
traceback.print_exc()
def main():
parser = argparse.ArgumentParser(
description="Quality-focused quantum circuit evaluation",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python run_quality_eval.py --quick # Quick test
python run_quality_eval.py --mode all --difficulty easy
python run_quality_eval.py --problem easy_001 --mode all
python run_quality_eval.py --list # List previous runs
python run_quality_eval.py --report quality_20241128_120000
"""
)
parser.add_argument('--mode', choices=['naked', 'guided', 'blackboard', 'all'],
default='all', help='Orchestration mode(s) to test')
parser.add_argument('--difficulty', choices=['easy', 'medium', 'hard', 'all'],
default='easy', help='Problem difficulty level(s)')
parser.add_argument('--problem', type=str, help='Specific problem ID to test')
parser.add_argument('--max-problems', type=int, help='Maximum problems to test')
parser.add_argument('--quick', action='store_true', help='Run quick single test')
parser.add_argument('--report', type=str, help='Generate report for run ID')
parser.add_argument('--list', action='store_true', help='List previous runs')
args = parser.parse_args()
if args.list:
list_runs()
elif args.report:
show_report(args.report)
elif args.quick:
quick_test(args)
else:
run_evaluation(args)
if __name__ == "__main__":
main()
|