hackathon_code4change / src /eda_config.py
RoyAalekh's picture
WIP
4ffade4
raw
history blame
1.87 kB
"""Shared configuration and helpers for EDA pipeline."""
import json
import shutil
from datetime import datetime
from pathlib import Path
# -------------------------------------------------------------------
# Paths and versioning
# -------------------------------------------------------------------
DATA_DIR = Path("Data")
CASES_FILE = DATA_DIR / "ISDMHack_Cases_WPfinal.csv"
HEAR_FILE = DATA_DIR / "ISDMHack_Hear.csv"
REPORTS_DIR = Path("reports")
FIGURES_DIR = REPORTS_DIR / "figures"
FIGURES_DIR.mkdir(parents=True, exist_ok=True)
VERSION = "v0.4.0"
RUN_TS = datetime.now().strftime("%Y%m%d_%H%M%S")
RUN_DIR = FIGURES_DIR / f"{VERSION}_{RUN_TS}"
RUN_DIR.mkdir(parents=True, exist_ok=True)
PARAMS_DIR = RUN_DIR / "params"
PARAMS_DIR.mkdir(parents=True, exist_ok=True)
# cleaned data outputs
CASES_CLEAN_PARQUET = RUN_DIR / "cases_clean.parquet"
HEARINGS_CLEAN_PARQUET = RUN_DIR / "hearings_clean.parquet"
# -------------------------------------------------------------------
# Null tokens and canonicalisation
# -------------------------------------------------------------------
NULL_TOKENS = ["", "NULL", "Null", "null", "NA", "N/A", "na", "NaN", "nan", "-", "--"]
def copy_to_versioned(filename: str) -> None:
"""Copy a file from FIGURES_DIR to RUN_DIR for versioned snapshots."""
src = FIGURES_DIR / filename
dst = RUN_DIR / filename
try:
if src.exists():
shutil.copyfile(src, dst)
except Exception as e:
print(f"[WARN] Versioned copy failed for {filename}: {e}")
def write_metadata(meta: dict) -> None:
"""Write run metadata into RUN_DIR/metadata.json."""
meta_path = RUN_DIR / "metadata.json"
try:
with open(meta_path, "w", encoding="utf-8") as f:
json.dump(meta, f, indent=2, default=str)
except Exception as e:
print(f"[WARN] Metadata export error: {e}")