Spaces:
Running
Running
| """Load the DSLIB (OR&S Ghent) real-project EVM tracking data. | |
| Reads each per-project 'Tracking Overview' sheet (cumulative PV/EV/AC per tracking | |
| period) from data/DSLIB/Excel/*.xlsx into `synthetic.Project` records, so real and | |
| synthetic projects share one interface for the baselines / forecaster / eval harness. | |
| Only ~117 of the 231 projects carry periodic tracking; series are short (median ~8 | |
| periods). Filter with `min_periods`. The `.p2x` files (a subset) hold the same data as | |
| XML but the Excel sheets are easier; the other libraries (ASLIB/RCPLIB/MSLIB/MPLIB/ | |
| SSLIB/MMLIB) are artificial RCPSP scheduling sets and are NOT used here. | |
| """ | |
| from __future__ import annotations | |
| import glob | |
| import os | |
| import warnings | |
| import numpy as np | |
| from .synthetic import Project | |
| DEFAULT_DIR = os.path.join("data", "DSLIB", "Excel") | |
| _PV, _EV, _AC = "Planned Value (PV)", "Earned Value (EV)", "Actual Cost (AC)" | |
| def load_project(path: str) -> Project | None: | |
| """Parse one DSLIB Excel workbook's 'Tracking Overview' sheet. Returns None if it | |
| has no usable PV/EV/AC tracking.""" | |
| import pandas as pd | |
| try: | |
| df = pd.read_excel(path, sheet_name="Tracking Overview", header=1) | |
| except Exception: | |
| return None | |
| if not {_PV, _EV, _AC}.issubset(df.columns): | |
| return None | |
| pv = pd.to_numeric(df[_PV], errors="coerce") | |
| ev = pd.to_numeric(df[_EV], errors="coerce") | |
| ac = pd.to_numeric(df[_AC], errors="coerce") | |
| mask = pv.notna() & ev.notna() & ac.notna() | |
| pv, ev, ac = pv[mask].to_numpy(float), ev[mask].to_numpy(float), ac[mask].to_numpy(float) | |
| n = len(pv) | |
| if n < 2: | |
| return None | |
| bac = float(np.nanmax(pv)) | |
| if not np.isfinite(bac) or bac <= 0: | |
| return None | |
| # Baseline finish (in tracking periods) = first period where planned work is complete. | |
| hit = pv >= 0.999 * bac | |
| planned_finish = int(np.argmax(hit)) + 1 if hit.any() else n | |
| return Project( | |
| name=os.path.splitext(os.path.basename(path))[0], | |
| period=np.arange(1, n + 1), | |
| pv=pv, | |
| ev=ev, | |
| ac=ac, | |
| bac=bac, | |
| planned_finish=planned_finish, | |
| meta={"source": "DSLIB", "n_track": n}, | |
| ) | |
| def load_dslib(excel_dir: str = DEFAULT_DIR, min_periods: int = 8) -> list[Project]: | |
| """Load all DSLIB projects with at least `min_periods` tracking periods.""" | |
| warnings.filterwarnings("ignore") | |
| out = [] | |
| for f in sorted(glob.glob(os.path.join(excel_dir, "*.xlsx"))): | |
| p = load_project(f) | |
| if p is not None and p.meta["n_track"] >= min_periods: | |
| out.append(p) | |
| return out | |