Spaces:
Running
Running
| """Evaluation metrics and harness. | |
| Headline results are reported on REAL held-out projects (Batselier) - never on the | |
| synthetic generator that trains the fine-tune. See PLAN.md sections 4 and 6. | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| from . import evm | |
| # --------------------------------------------------------------------------- # | |
| # Metrics | |
| # --------------------------------------------------------------------------- # | |
| def mae(y_true, y_pred) -> float: | |
| return float(np.mean(np.abs(np.asarray(y_true) - np.asarray(y_pred)))) | |
| def rmse(y_true, y_pred) -> float: | |
| return float(np.sqrt(np.mean((np.asarray(y_true) - np.asarray(y_pred)) ** 2))) | |
| def mase(y_true, y_pred, y_train, m: int = 1) -> float: | |
| """Mean Absolute Scaled Error (scale-free; primary point metric).""" | |
| y_train = np.asarray(y_train, float) | |
| if len(y_train) <= m: | |
| return np.nan | |
| denom = np.mean(np.abs(np.diff(y_train, n=m))) | |
| if denom == 0: | |
| return np.nan | |
| return mae(y_true, y_pred) / denom | |
| def pinball(y_true, q_pred, level: float) -> float: | |
| """Quantile (pinball) loss at a single quantile level.""" | |
| y_true = np.asarray(y_true, float) | |
| q_pred = np.asarray(q_pred, float) | |
| e = y_true - q_pred | |
| return float(np.mean(np.maximum(level * e, (level - 1.0) * e))) | |
| def interval_coverage(y_true, lo, hi) -> float: | |
| """Empirical coverage of the [lo, hi] interval (target ~0.8 for P10-P90).""" | |
| y_true = np.asarray(y_true, float) | |
| return float(np.mean((y_true >= np.asarray(lo)) & (y_true <= np.asarray(hi)))) | |
| # --------------------------------------------------------------------------- # | |
| # Harness | |
| # --------------------------------------------------------------------------- # | |
| def evaluate_on_project(forecast_fn, project, context_frac: float = 0.5) -> dict | None: | |
| """Condition on the first `context_frac` of a project's EV increments and score | |
| the forecast of the remainder. Also reports derived EAC error.""" | |
| ev_inc = evm.to_increments(project.ev) | |
| n = len(ev_inc) | |
| k = max(4, int(n * context_frac)) | |
| horizon = n - k | |
| if horizon < 1: | |
| return None | |
| obs, truth = ev_inc[:k], ev_inc[k:] | |
| f = forecast_fn(obs, horizon, bac=project.bac, planned_periods=project.planned_finish) | |
| # Derived final-cost (EAC) error: re-integrate EV forecast, project AC at latest CPI. | |
| last_cum_ev = project.ev[k - 1] | |
| eac_pred = last_cum_ev + float(np.sum(f["q50"])) # simplistic; refine with CPI | |
| eac_true = project.ev[-1] | |
| return { | |
| "mae": mae(truth, f["q50"]), | |
| "mase": mase(truth, f["q50"], obs), | |
| "pinball10": pinball(truth, f["q10"], 0.1), | |
| "pinball90": pinball(truth, f["q90"], 0.9), | |
| "coverage80": interval_coverage(truth, f["q10"], f["q90"]), | |
| "eac_ape": abs(eac_pred - eac_true) / eac_true if eac_true else np.nan, | |
| } | |
| def evaluate_forecaster(forecast_fn, projects, context_frac: float = 0.5): | |
| """Mean metrics across a set of projects.""" | |
| import pandas as pd | |
| rows = [evaluate_on_project(forecast_fn, p, context_frac) for p in projects] | |
| rows = [r for r in rows if r is not None] | |
| return pd.DataFrame(rows).mean(numeric_only=True) | |
| def benchmark(forecasters: dict, projects, context_frac: float = 0.5): | |
| """Run several named forecasters and return a comparison table.""" | |
| import pandas as pd | |
| out = {name: evaluate_forecaster(fn, projects, context_frac) for name, fn in forecasters.items()} | |
| return pd.DataFrame(out).T | |
| def load_dslib(path: str): | |
| """Load DSLIB (OR&S Ghent) real projects into a list of `synthetic.Project`-shaped | |
| records. Easiest source is the per-project Excel output sheets (PV/EV/AC columns); | |
| otherwise convert the `.p2x` files via PMConverter. Filter to 'green' (authentic, | |
| complete) projects. See data/README.md.""" | |
| raise NotImplementedError( | |
| "Parse the DSLIB Excel output sheets (or .p2x via PMConverter) into the tidy " | |
| "schema (project_id, period, pv, ev, ac, bac, planned_finish). See data/README.md." | |
| ) | |