from __future__ import annotations import os from dataclasses import dataclass from pathlib import Path @dataclass(frozen=True) class ServiceConfig: concurrency: int proxy: str | None callback_url: str | None storage_dir: Path orchestrator_db_path: Path engine_strategy: str engine_fallback_threshold: int raw_data_retention_days: int mediacrawler_storage_state_paths: tuple[Path, ...] enable_legacy_routes: bool error_summary_scan_limit: int def _env_int(name: str, default: int) -> int: value = os.getenv(name) if value is None or value == "": return default return int(value) def _env_str(name: str) -> str | None: value = os.getenv(name) if value is None or value == "": return None return value def _env_bool(name: str, default: bool = False) -> bool: value = os.getenv(name) if value is None or value == "": return bool(default) v = str(value).strip().lower() if v in ("1", "true", "yes", "y", "on"): return True if v in ("0", "false", "no", "n", "off"): return False return bool(default) def _env_path_list(*names: str) -> tuple[Path, ...]: values: list[str] = [] for name in names: value = os.getenv(name) if value is None or value == "": continue if "," in value: values.extend([item.strip() for item in value.split(",") if item.strip()]) else: values.append(value.strip()) paths: list[Path] = [] for item in values: if not item: continue paths.append(Path(item).expanduser()) return tuple(paths) def _resolve_path(*, base_dir: Path, value: str | None, default_rel: str) -> Path: raw = str(value or "").strip() if raw == "": raw = default_rel p = Path(raw).expanduser() if p.is_absolute(): return p.resolve() return (base_dir / p).resolve() def load_config() -> ServiceConfig: base_dir = Path(__file__).resolve().parents[1] storage_root = ( os.getenv("STORAGE_ROOT") or os.getenv("SERVICE_STORAGE_ROOT") or os.getenv("SERVICE_STORAGE_DIR") ) if storage_root is None or storage_root == "": storage_dir = (base_dir / "storage").resolve() else: candidate = Path(storage_root).expanduser() if candidate.is_absolute(): storage_dir = candidate.resolve() else: storage_dir = (base_dir / candidate).resolve() storage_state_paths = _env_path_list( "MEDIACRAWLER_STORAGE_STATE_PATHS", "MEDIACRAWLER_STORAGE_STATE_PATH", "SERVICE_STORAGE_STATE_PATHS", "SERVICE_STORAGE_STATE_PATH", ) resolved_storage_state_paths: list[Path] = [] for path in storage_state_paths: if path.is_absolute(): resolved_storage_state_paths.append(path) else: resolved_storage_state_paths.append((base_dir / path).resolve()) return ServiceConfig( concurrency=_env_int("SERVICE_CONCURRENCY", 4), proxy=_env_str("SERVICE_PROXY"), callback_url=_env_str("CALLBACK_URL") or _env_str("SERVICE_CALLBACK_URL"), storage_dir=storage_dir, orchestrator_db_path=_resolve_path( base_dir=base_dir, value=os.getenv("ORCHESTRATOR_DB_PATH"), default_rel="orchestrator/data/mvp.db", ), engine_strategy=os.getenv("ENGINE_STRATEGY", "auto"), engine_fallback_threshold=_env_int("ENGINE_FALLBACK_THRESHOLD", 3), raw_data_retention_days=_env_int("RAW_DATA_RETENTION_DAYS", 7), mediacrawler_storage_state_paths=tuple(resolved_storage_state_paths), enable_legacy_routes=_env_bool("ENABLE_LEGACY_ROUTES", False), error_summary_scan_limit=_env_int("ERROR_SUMMARY_SCAN_LIMIT", 1000), )