| from __future__ import annotations |
|
|
| import os |
| from dataclasses import dataclass |
| from pathlib import Path |
|
|
|
|
| @dataclass(frozen=True) |
| class ServiceConfig: |
| concurrency: int |
| proxy: str | None |
| callback_url: str | None |
| storage_dir: Path |
| orchestrator_db_path: Path |
| engine_strategy: str |
| engine_fallback_threshold: int |
| raw_data_retention_days: int |
| mediacrawler_storage_state_paths: tuple[Path, ...] |
| enable_legacy_routes: bool |
| error_summary_scan_limit: int |
|
|
|
|
| def _env_int(name: str, default: int) -> int: |
| value = os.getenv(name) |
| if value is None or value == "": |
| return default |
| return int(value) |
|
|
|
|
| def _env_str(name: str) -> str | None: |
| value = os.getenv(name) |
| if value is None or value == "": |
| return None |
| return value |
|
|
|
|
| def _env_bool(name: str, default: bool = False) -> bool: |
| value = os.getenv(name) |
| if value is None or value == "": |
| return bool(default) |
| v = str(value).strip().lower() |
| if v in ("1", "true", "yes", "y", "on"): |
| return True |
| if v in ("0", "false", "no", "n", "off"): |
| return False |
| return bool(default) |
|
|
|
|
| def _env_path_list(*names: str) -> tuple[Path, ...]: |
| values: list[str] = [] |
| for name in names: |
| value = os.getenv(name) |
| if value is None or value == "": |
| continue |
| if "," in value: |
| values.extend([item.strip() for item in value.split(",") if item.strip()]) |
| else: |
| values.append(value.strip()) |
| paths: list[Path] = [] |
| for item in values: |
| if not item: |
| continue |
| paths.append(Path(item).expanduser()) |
| return tuple(paths) |
|
|
|
|
| def _resolve_path(*, base_dir: Path, value: str | None, default_rel: str) -> Path: |
| raw = str(value or "").strip() |
| if raw == "": |
| raw = default_rel |
| p = Path(raw).expanduser() |
| if p.is_absolute(): |
| return p.resolve() |
| return (base_dir / p).resolve() |
|
|
|
|
| def load_config() -> ServiceConfig: |
| base_dir = Path(__file__).resolve().parents[1] |
| storage_root = ( |
| os.getenv("STORAGE_ROOT") |
| or os.getenv("SERVICE_STORAGE_ROOT") |
| or os.getenv("SERVICE_STORAGE_DIR") |
| ) |
| if storage_root is None or storage_root == "": |
| storage_dir = (base_dir / "storage").resolve() |
| else: |
| candidate = Path(storage_root).expanduser() |
| if candidate.is_absolute(): |
| storage_dir = candidate.resolve() |
| else: |
| storage_dir = (base_dir / candidate).resolve() |
|
|
| storage_state_paths = _env_path_list( |
| "MEDIACRAWLER_STORAGE_STATE_PATHS", |
| "MEDIACRAWLER_STORAGE_STATE_PATH", |
| "SERVICE_STORAGE_STATE_PATHS", |
| "SERVICE_STORAGE_STATE_PATH", |
| ) |
| resolved_storage_state_paths: list[Path] = [] |
| for path in storage_state_paths: |
| if path.is_absolute(): |
| resolved_storage_state_paths.append(path) |
| else: |
| resolved_storage_state_paths.append((base_dir / path).resolve()) |
|
|
| return ServiceConfig( |
| concurrency=_env_int("SERVICE_CONCURRENCY", 4), |
| proxy=_env_str("SERVICE_PROXY"), |
| callback_url=_env_str("CALLBACK_URL") or _env_str("SERVICE_CALLBACK_URL"), |
| storage_dir=storage_dir, |
| orchestrator_db_path=_resolve_path( |
| base_dir=base_dir, |
| value=os.getenv("ORCHESTRATOR_DB_PATH"), |
| default_rel="orchestrator/data/mvp.db", |
| ), |
| engine_strategy=os.getenv("ENGINE_STRATEGY", "auto"), |
| engine_fallback_threshold=_env_int("ENGINE_FALLBACK_THRESHOLD", 3), |
| raw_data_retention_days=_env_int("RAW_DATA_RETENTION_DAYS", 7), |
| mediacrawler_storage_state_paths=tuple(resolved_storage_state_paths), |
| enable_legacy_routes=_env_bool("ENABLE_LEGACY_ROUTES", False), |
| error_summary_scan_limit=_env_int("ERROR_SUMMARY_SCAN_LIMIT", 1000), |
| ) |
|
|