"""Configuration loading from YAML files via Pydantic models.""" from __future__ import annotations from pathlib import Path from typing import Any, Literal import yaml from pydantic import BaseModel, model_validator # --- Nested config models --- class AgentConfig(BaseModel): max_iterations: int = 3 temperature: float = 0.0 class ModelPricing(BaseModel): input_cost_per_mtok: float output_cost_per_mtok: float class SelfHostedConfig(BaseModel): base_url: str = "" model_name: str = "mistralai/Mistral-7B-Instruct-v0.3" api_key: str = "" timeout_seconds: float = 120.0 class ProviderConfig(BaseModel): default: str = "openai" models: dict[str, ModelPricing] = {} selfhosted: SelfHostedConfig = SelfHostedConfig() class ChunkingConfig(BaseModel): strategy: str = "recursive" chunk_size: int = 512 chunk_overlap: int = 64 class RetrievalConfig(BaseModel): strategy: str = "hybrid" rrf_k: int = 60 candidates_per_system: int = 10 top_k: int = 5 class RerankerConfig(BaseModel): enabled: bool = True model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2" top_k: int = 5 # independent of retrieval.top_k class RAGConfig(BaseModel): chunking: ChunkingConfig = ChunkingConfig() retrieval: RetrievalConfig = RetrievalConfig() reranker: RerankerConfig = RerankerConfig() store_path: str = ".cache/store" refusal_threshold: float = 0.0 # 0.0 = disabled (V1 behavior) class RetryConfig(BaseModel): max_retries: int = 3 base_delay: float = 1.0 # seconds max_delay: float = 8.0 # cap for exponential backoff class EmbeddingConfig(BaseModel): model: str = "all-MiniLM-L6-v2" cache_dir: str = ".cache/embeddings" class ServingConfig(BaseModel): host: str = "0.0.0.0" port: int = 8000 request_timeout_seconds: int = 30 rate_limit_rpm: int = 10 # requests per minute per IP class MemoryConfig(BaseModel): enabled: bool = True db_path: str = "data/conversations.db" max_turns: int = 10 class EvaluationConfig(BaseModel): judge_provider: str = "openai" golden_dataset: str = "agent_bench/evaluation/datasets/tech_docs_golden.json" # New in judge-layer v1: which dimensions to score with L2 LLM judges. # citation_faithfulness is opt-in v1 (default-on v1.1). judge_dimensions: list[str] = ["groundedness", "relevance", "completeness"] _VALID_TIERS = {"heuristic", "classifier"} class InjectionConfig(BaseModel): enabled: bool = True action: Literal["block", "warn", "flag"] = "block" tiers: list[str] = ["heuristic", "classifier"] classifier_url: str = "" @model_validator(mode="after") def _validate_tiers(self) -> "InjectionConfig": invalid = set(self.tiers) - _VALID_TIERS if invalid: raise ValueError( f"Invalid injection tier(s): {invalid}. Allowed: {_VALID_TIERS}" ) if "classifier" in self.tiers and not self.classifier_url: import structlog structlog.get_logger().warning( "injection_classifier_no_url", msg="Tier 'classifier' configured but classifier_url is empty; " "classifier tier will be skipped at runtime.", ) return self class PIIConfig(BaseModel): enabled: bool = True mode: Literal["redact", "detect_only", "passthrough"] = "redact" redact_patterns: list[str] = [ "EMAIL", "PHONE", "SSN", "CREDIT_CARD", "IP_ADDRESS", ] use_ner: bool = False ner_entities: list[str] = ["PERSON"] class OutputConfig(BaseModel): enabled: bool = True pii_check: bool = True url_check: bool = True secret_check: bool = True blocklist: list[str] = [] class AuditConfig(BaseModel): enabled: bool = True path: str = "logs/audit.jsonl" max_size_mb: int = 100 rotate: bool = True class SecurityConfig(BaseModel): injection: InjectionConfig = InjectionConfig() pii: PIIConfig = PIIConfig() output: OutputConfig = OutputConfig() audit: AuditConfig = AuditConfig() class CorpusConfig(BaseModel): """Per-corpus configuration: store path, thresholds, iteration limits.""" label: str store_path: str data_path: str refusal_threshold: float = 0.0 top_k: int = 5 max_iterations: int = 3 # Optional: path to the golden dataset JSON for this corpus. None is # a valid state (corpus has no golden set yet during bring-up). The # evaluation CLI errors clearly if --corpus targets a corpus with # golden_dataset=None rather than requiring the field upfront. golden_dataset: str | None = None # When False, the corpus is kept in YAML for schema visibility but is # not wired into corpus_map at startup. Dashboard can render the # toggle as disabled; /ask requests for the corpus return 400. # Use this for corpora whose docs/store are not yet curated. available: bool = True class AppConfig(BaseModel): agent: AgentConfig = AgentConfig() provider: ProviderConfig = ProviderConfig() rag: RAGConfig = RAGConfig() retry: RetryConfig = RetryConfig() memory: MemoryConfig = MemoryConfig() embedding: EmbeddingConfig = EmbeddingConfig() serving: ServingConfig = ServingConfig() evaluation: EvaluationConfig = EvaluationConfig() security: SecurityConfig = SecurityConfig() # Multi-corpus support corpora: dict[str, CorpusConfig] = {} default_corpus: str = "fastapi" @model_validator(mode="after") def _validate_default_corpus(self) -> "AppConfig": if not self.corpora: return self if self.default_corpus not in self.corpora: raise ValueError( f"default_corpus={self.default_corpus!r} is not in corpora " f"{sorted(self.corpora.keys())!r}. Configured corpora must " "include the default.", ) # The default corpus must also be available — otherwise the app # would boot with no reachable default orchestrator. if not self.corpora[self.default_corpus].available: raise ValueError( f"default_corpus={self.default_corpus!r} has available=False. " "The default corpus must be ready to serve; set available=true " "or point default_corpus at a ready corpus.", ) return self # --- Task config --- class TaskConfig(BaseModel): name: str description: str system_prompt: str document_dir: str = "data/tech_docs/" class TaskFileConfig(BaseModel): task: TaskConfig # --- Loaders --- def _resolve_config_dir() -> Path: """Resolve configs directory: cwd first, then package-relative fallback.""" cwd_configs = Path.cwd() / "configs" if cwd_configs.is_dir(): return cwd_configs # Fallback: relative to package location (works for installed packages) pkg_configs = Path(__file__).resolve().parent.parent.parent / "configs" if pkg_configs.is_dir(): return pkg_configs return cwd_configs # Let the caller get a clear FileNotFoundError def load_config(path: Path | None = None) -> AppConfig: """Load application config from YAML. If AGENT_BENCH_ENV is set (e.g. 'production'), loads configs/{env}.yaml if it exists, otherwise falls back to default.yaml. """ if path is None: import os env = os.environ.get("AGENT_BENCH_ENV", "") config_dir = _resolve_config_dir() env_path = config_dir / f"{env}.yaml" path = env_path if env and env_path.exists() else config_dir / "default.yaml" with open(path) as f: data: dict[str, Any] = yaml.safe_load(f) return AppConfig.model_validate(data) def load_task_config(task_name: str, path: Path | None = None) -> TaskConfig: """Load a task-specific config from YAML.""" if path is None: path = _resolve_config_dir() / "tasks" / f"{task_name}.yaml" with open(path) as f: data: dict[str, Any] = yaml.safe_load(f) return TaskFileConfig.model_validate(data).task