"""Load the project YAML config into a simple attribute-style object.""" from __future__ import annotations import os from pathlib import Path from types import SimpleNamespace import yaml PROJECT_ROOT = Path(__file__).resolve().parents[1] def _to_namespace(obj): if isinstance(obj, dict): return SimpleNamespace(**{k: _to_namespace(v) for k, v in obj.items()}) if isinstance(obj, list): return [_to_namespace(v) for v in obj] return obj def load_config(path: str | os.PathLike | None = None) -> SimpleNamespace: """Read config.yaml and return a nested namespace (cfg.data.languages, ...).""" path = Path(path) if path else PROJECT_ROOT / "config.yaml" with open(path, "r", encoding="utf-8") as f: raw = yaml.safe_load(f) cfg = _to_namespace(raw) # Resolve paths relative to project root and ensure they exist. for attr in ("data_dir", "raw_dir", "processed_dir", "eda_dir", "index_dir"): abspath = PROJECT_ROOT / getattr(cfg.paths, attr) setattr(cfg.paths, attr, str(abspath)) abspath.mkdir(parents=True, exist_ok=True) return cfg if __name__ == "__main__": c = load_config() print("Languages:", c.data.languages) print("Use sample:", c.data.use_sample) print("Processed dir:", c.paths.processed_dir)