"""Phase 1 EDA entrypoint: analyse the processed training split. Usage: python scripts/02_run_eda.py Reads data/processed/train.parquet, writes plots + eda_stats.json to data/eda/. """ from __future__ import annotations import json import sys from pathlib import Path import pandas as pd sys.path.append(str(Path(__file__).resolve().parents[1])) from src.config import load_config from src.eda.analyze import run_eda def main(): cfg = load_config() print("=" * 60) print("PHASE 1: EXPLORATORY DATA ANALYSIS") print("=" * 60) train_path = Path(cfg.paths.processed_dir) / "train.parquet" funnel_path = Path(cfg.paths.processed_dir) / "cleaning_funnel.csv" if not train_path.exists(): sys.exit("train.parquet not found. Run scripts/01_prepare_data.py first.") df = pd.read_parquet(train_path) funnel = pd.read_csv(funnel_path) if funnel_path.exists() else None stats = run_eda(df, cfg, funnel) print(json.dumps({k: v for k, v in stats.items() if k != "plots"}, indent=2)) print(f"\nPlots saved to {cfg.paths.eda_dir}") if __name__ == "__main__": main()