File size: 1,315 Bytes
b89e6d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""Load the project YAML config into a simple attribute-style object."""
from __future__ import annotations

import os
from pathlib import Path
from types import SimpleNamespace

import yaml

PROJECT_ROOT = Path(__file__).resolve().parents[1]


def _to_namespace(obj):
    if isinstance(obj, dict):
        return SimpleNamespace(**{k: _to_namespace(v) for k, v in obj.items()})
    if isinstance(obj, list):
        return [_to_namespace(v) for v in obj]
    return obj


def load_config(path: str | os.PathLike | None = None) -> SimpleNamespace:
    """Read config.yaml and return a nested namespace (cfg.data.languages, ...)."""
    path = Path(path) if path else PROJECT_ROOT / "config.yaml"
    with open(path, "r", encoding="utf-8") as f:
        raw = yaml.safe_load(f)
    cfg = _to_namespace(raw)
    # Resolve paths relative to project root and ensure they exist.
    for attr in ("data_dir", "raw_dir", "processed_dir", "eda_dir", "index_dir"):
        abspath = PROJECT_ROOT / getattr(cfg.paths, attr)
        setattr(cfg.paths, attr, str(abspath))
        abspath.mkdir(parents=True, exist_ok=True)
    return cfg


if __name__ == "__main__":
    c = load_config()
    print("Languages:", c.data.languages)
    print("Use sample:", c.data.use_sample)
    print("Processed dir:", c.paths.processed_dir)