Spaces:
Running
Running
| """Shared configuration and helpers for EDA pipeline.""" | |
| import json | |
| import shutil | |
| from datetime import datetime | |
| from pathlib import Path | |
| # ------------------------------------------------------------------- | |
| # Paths and versioning | |
| # ------------------------------------------------------------------- | |
| DATA_DIR = Path("Data") | |
| CASES_FILE = DATA_DIR / "ISDMHack_Cases_WPfinal.csv" | |
| HEAR_FILE = DATA_DIR / "ISDMHack_Hear.csv" | |
| REPORTS_DIR = Path("reports") | |
| FIGURES_DIR = REPORTS_DIR / "figures" | |
| FIGURES_DIR.mkdir(parents=True, exist_ok=True) | |
| VERSION = "v0.4.0" | |
| RUN_TS = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| RUN_DIR = FIGURES_DIR / f"{VERSION}_{RUN_TS}" | |
| RUN_DIR.mkdir(parents=True, exist_ok=True) | |
| PARAMS_DIR = RUN_DIR / "params" | |
| PARAMS_DIR.mkdir(parents=True, exist_ok=True) | |
| # cleaned data outputs | |
| CASES_CLEAN_PARQUET = RUN_DIR / "cases_clean.parquet" | |
| HEARINGS_CLEAN_PARQUET = RUN_DIR / "hearings_clean.parquet" | |
| # ------------------------------------------------------------------- | |
| # Null tokens and canonicalisation | |
| # ------------------------------------------------------------------- | |
| NULL_TOKENS = ["", "NULL", "Null", "null", "NA", "N/A", "na", "NaN", "nan", "-", "--"] | |
| def copy_to_versioned(filename: str) -> None: | |
| """Copy a file from FIGURES_DIR to RUN_DIR for versioned snapshots.""" | |
| src = FIGURES_DIR / filename | |
| dst = RUN_DIR / filename | |
| try: | |
| if src.exists(): | |
| shutil.copyfile(src, dst) | |
| except Exception as e: | |
| print(f"[WARN] Versioned copy failed for {filename}: {e}") | |
| def write_metadata(meta: dict) -> None: | |
| """Write run metadata into RUN_DIR/metadata.json.""" | |
| meta_path = RUN_DIR / "metadata.json" | |
| try: | |
| with open(meta_path, "w", encoding="utf-8") as f: | |
| json.dump(meta, f, indent=2, default=str) | |
| except Exception as e: | |
| print(f"[WARN] Metadata export error: {e}") | |