Spaces:
Sleeping
Sleeping
File size: 11,130 Bytes
1bb4678 b86397a 1bb4678 b86397a 1bb4678 b86397a 1bb4678 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
"""
QAgents-Workflows: Configuration
Central configuration for the multi-agent quantum circuit optimization system.
Path: QAgents-workflos/config.py
Related: agents/llm_adapter.py (uses GEMINI_MODELS for fallback cascade)
run_evaluation.py (uses config for evaluation settings)
workflows/workflow_definitions.py (references rate limits)
"""
from pathlib import Path
from dataclasses import dataclass, field
from typing import Optional, List, Dict
import os
# Load environment variables from .env file
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
# If python-dotenv is not installed, continue without loading .env
# (it will use system environment variables only)
pass
# Paths
PROJECT_ROOT = Path(__file__).parent
QUANTUM_MCP_ROOT = PROJECT_ROOT.parent / "QuantumArchitect-MCP"
# =============================================================================
# GEMINI MODEL CASCADE (sorted by RPD - highest to lowest for optimal fallback)
# =============================================================================
# When a model hits rate limits (RPM/RPD), fallback to next model in list.
# Free tier limits (as of 2025):
# - Gemma 3: 30 RPM, 15K TPM, 14,400 RPD (HIGHEST availability)
# - Flash-Lite: 15 RPM, 250K TPM, 1,000 RPD
# - Flash 2.5: 10 RPM, 250K TPM, 250 RPD
# - Flash 2.0: 15 RPM, 1M TPM, 200 RPD
# - Flash 2.0 Lite: 30 RPM, 1M TPM, 200 RPD
# - Pro 2.5: 2 RPM, 125K TPM, 50 RPD (LOWEST availability)
#
# EXPECTED REQUESTS PER EVALUATION (9 problems):
# - Naked mode: 0 LLM calls (direct MCP only)
# - Guided mode: ~36 LLM calls (4 per problem)
# - Blackboard: ~72-108 LLM calls (8-12 per problem)
# =============================================================================
GEMINI_MODELS: List[Dict] = [
# Highest RPD - most available (14,400/day = 10/min continuously)
{
"name": "gemma-3-27b-it",
"rpm": 30,
"tpm": 15_000,
"rpd": 14_400,
"priority": 1,
"notes": "Best for high-volume, may have lower quality than Flash"
},
# Good balance - default model (1,000/day)
{
"name": "gemini-2.5-flash-lite",
"rpm": 15,
"tpm": 250_000,
"rpd": 1_000,
"priority": 2,
"notes": "Good balance of quality and availability - DEFAULT"
},
# Higher quality - moderate availability (250/day)
{
"name": "gemini-2.5-flash",
"rpm": 10,
"tpm": 250_000,
"rpd": 250,
"priority": 3,
"notes": "Better quality, lower availability"
},
# High TPM for long contexts (200/day)
{
"name": "gemini-2.0-flash",
"rpm": 15,
"tpm": 1_000_000,
"rpd": 200,
"priority": 4,
"notes": "Good for long contexts, moderate availability"
},
# Fast variant (200/day)
{
"name": "gemini-2.0-flash-lite",
"rpm": 30,
"tpm": 1_000_000,
"rpd": 200,
"priority": 5,
"notes": "Fast responses, lower availability"
},
# Lowest RPD - highest quality, use sparingly (50/day)
{
"name": "gemini-2.5-pro",
"rpm": 2,
"tpm": 125_000,
"rpd": 50,
"priority": 6,
"notes": "Highest quality, use sparingly - LAST RESORT"
},
]
def get_model_by_priority(priority: int = 1) -> Optional[Dict]:
"""Get model config by priority (1=highest RPD)."""
for model in GEMINI_MODELS:
if model["priority"] == priority:
return model
return None
def get_next_model(current_name: str) -> Optional[Dict]:
"""Get next model in fallback chain."""
for i, model in enumerate(GEMINI_MODELS):
if model["name"] == current_name:
if i + 1 < len(GEMINI_MODELS):
return GEMINI_MODELS[i + 1]
return None
def get_model_config(model_name: str) -> Optional[Dict]:
"""Get model config by name."""
for model in GEMINI_MODELS:
if model["name"] == model_name:
return model
return None
@dataclass
class MCPConfig:
"""MCP Server configuration."""
host: str = "127.0.0.1"
port: int = 7861
base_url: str = field(init=False)
def __post_init__(self):
self.base_url = f"http://{self.host}:{self.port}"
@dataclass
class RateLimitConfig:
"""Rate limiting based on Gemini API free tier limits."""
# Default to gemini-2.5-flash-lite limits
rpm_limit: int = 15 # Requests per minute
tpm_limit: int = 250_000 # Tokens per minute
rpd_limit: int = 1_000 # Requests per day
# Conservative buffer (80% of limit = 12 RPM effective)
rpm_buffer: float = 0.8
@property
def min_request_interval(self) -> float:
"""Minimum seconds between requests: 60 / (15 * 0.8) = 5 seconds."""
return 60.0 / (self.rpm_limit * self.rpm_buffer)
@dataclass
class LLMConfig:
"""LLM configuration for agents - model agnostic via Gemini and LiteLLM.
Environment Variables (HuggingFace Space compatible):
- LLM_PROVIDER: Provider name (gemini, openai, anthropic, groq, ollama). Default: "gemini"
- LLM_MODEL: Model identifier. Default: "gemini-2.5-flash-lite"
- GOOGLE_API_KEY: Gemini API key (Gemini provider)
- GENAI_API_KEY: Alternative Gemini API key (fallback)
- OPENAI_API_KEY: OpenAI API key (OpenAI provider)
- ANTHROPIC_API_KEY: Anthropic API key (Anthropic provider)
- GROQ_API_KEY: Groq API key (Groq provider)
"""
# Provider options: gemini, openai, anthropic, groq, ollama, etc.
# Reads from LLM_PROVIDER env var, falls back to "gemini"
provider: str = field(default_factory=lambda: os.getenv("LLM_PROVIDER", "gemini"))
# Model identifier - reads from LLM_MODEL env var, falls back to "gemini-2.5-flash-lite"
model: str = field(default_factory=lambda: os.getenv("LLM_MODEL", "gemini-2.5-flash-lite"))
# API key - tries GOOGLE_API_KEY first (Gemini), then GENAI_API_KEY as fallback
# Use None as default and fetch dynamically to support HuggingFace Spaces
api_key: Optional[str] = None
temperature: float = 0.2
max_tokens: int = 2000
# Rate limiting
rate_limit: RateLimitConfig = field(default_factory=RateLimitConfig)
enable_rate_limiting: bool = True # Set to False to disable
# Multi-model fallback
enable_fallback: bool = True # Enable automatic model switching on rate limit
fallback_on_error: bool = True # Also fallback on API errors
def __post_init__(self):
"""Initialize API key from environment if not set."""
if self.api_key is None:
# Try GOOGLE_API_KEY first, then GENAI_API_KEY
self.api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY")
def get_api_key(self) -> Optional[str]:
"""Get current API key, checking environment on each call for HF Spaces."""
# Always check environment first to support dynamic Secrets in HF Spaces
return os.getenv("GOOGLE_API_KEY") or os.getenv("GENAI_API_KEY") or self.api_key
@property
def model_string(self) -> str:
"""Get full model string for API calls."""
if self.provider in ["gemini"]:
return self.model
else:
# LiteLLM format: provider/model
return f"{self.provider}/{self.model}"
@dataclass
class DatabaseConfig:
"""Database/storage configuration."""
db_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "data")
log_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "logs")
memory_path: Path = field(default_factory=lambda: PROJECT_ROOT / "database" / "memory")
def __post_init__(self):
# Ensure directories exist
for path in [self.db_path, self.log_path, self.memory_path]:
path.mkdir(parents=True, exist_ok=True)
@dataclass
class CostTrackingConfig:
"""Cost and usage tracking configuration."""
enabled: bool = True
track_requests: bool = True
track_tokens: bool = True
track_time: bool = True
# Usage counters (reset daily in production)
total_requests: int = 0
total_tokens: int = 0
total_time_ms: float = 0.0
# Per-model tracking
model_usage: Dict[str, Dict] = field(default_factory=dict)
def record_request(self, model: str, tokens: int, time_ms: float):
"""Record a request for cost tracking."""
if not self.enabled:
return
self.total_requests += 1
self.total_tokens += tokens
self.total_time_ms += time_ms
if model not in self.model_usage:
self.model_usage[model] = {"requests": 0, "tokens": 0, "time_ms": 0.0}
self.model_usage[model]["requests"] += 1
self.model_usage[model]["tokens"] += tokens
self.model_usage[model]["time_ms"] += time_ms
def get_summary(self) -> Dict:
"""Get cost tracking summary."""
return {
"total_requests": self.total_requests,
"total_tokens": self.total_tokens,
"total_time_ms": self.total_time_ms,
"avg_time_per_request": self.total_time_ms / max(1, self.total_requests),
"model_breakdown": self.model_usage.copy()
}
def reset(self):
"""Reset all counters."""
self.total_requests = 0
self.total_tokens = 0
self.total_time_ms = 0.0
self.model_usage = {}
@dataclass
class EvaluationConfig:
"""Evaluation settings."""
num_runs: int = 5 # Number of runs per problem for reliability
timeout_seconds: float = 120.0 # Max time per problem
save_results: bool = True
# Cost tracking for evaluation
cost_tracking: CostTrackingConfig = field(default_factory=CostTrackingConfig)
@dataclass
class SystemConfig:
"""Master configuration."""
mcp: MCPConfig = field(default_factory=MCPConfig)
llm: LLMConfig = field(default_factory=LLMConfig)
database: DatabaseConfig = field(default_factory=DatabaseConfig)
evaluation: EvaluationConfig = field(default_factory=EvaluationConfig)
# System mode: "blackboard", "guided", or "naked"
active_mode: str = "guided"
# Debug settings
verbose: bool = True
log_level: str = "INFO"
# Global config instance
config = SystemConfig()
def set_mode(mode: str):
"""Switch between blackboard, guided, and naked modes."""
if mode not in ("blackboard", "guided", "naked"):
raise ValueError(f"Invalid mode: {mode}. Use 'blackboard', 'guided', or 'naked'")
config.active_mode = mode
def get_mode() -> str:
"""Get current system mode."""
return config.active_mode
def set_api_key(api_key: str):
"""Set the API key for LLM calls."""
config.llm.api_key = api_key
def get_cost_summary() -> Dict:
"""Get the current cost tracking summary."""
return config.evaluation.cost_tracking.get_summary()
def reset_cost_tracking():
"""Reset cost tracking counters."""
config.evaluation.cost_tracking.reset()
|