"""Tabulate DASH-JSP results against published metaheuristic and RL baselines.

Published numbers are quoted, not re-run.

References
----------
- Goncalves, J. F., Mendes, J. J., Resende, M. G. C. (2005). A hybrid genetic
  algorithm for the job shop scheduling problem. EJOR 167(1), 77-95.
- Nowicki, E., Smutnicki, C. (2005). An advanced tabu search algorithm for the
  job shop problem. Journal of Scheduling 8(2), 145-159.
- Zhang, C. et al. (2020). Learning to dispatch for job shop scheduling via
  deep reinforcement learning. NeurIPS.
- Park, J. et al. (2021). Learning to schedule job-shop problems: representation
  and policy learning using graph neural network and reinforcement learning.
  IJPR 59(11), 3360-3377.

Outputs
-------
results/comparisons.md and results/comparisons.csv
"""

from __future__ import annotations

import argparse
from pathlib import Path

import pandas as pd


# Published average optimality gap (%) on Taillard, by method.
# Numbers compiled from the references above. Where multiple variants exist
# in the same paper, we quote the best-reported aggregate. These values are
# QUOTED, not re-computed.
PUBLISHED_TAILLARD_GAPS = {
    "Goncalves 2005 (Hybrid GA)":         5.30,
    "Nowicki & Smutnicki 2005 (TS)":      5.46,
    "Zhang et al. 2020 (Deep RL, L2D)":  17.24,  # average across all sizes
    "Park et al. 2021 (GNN + RL)":       15.38,
    "FIFO":                               43.0,
    "EDD":                                40.0,
    "WSPT":                               18.5,
    "ATC":                                14.0,
    "MWKR (most work remaining)":         13.0,
}


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--results", default="results/benchmark_full.csv",
        help="DASH-JSP benchmark CSV produced by scripts/run_evaluation.py",
    )
    parser.add_argument("--out-dir", default="results")
    args = parser.parse_args()

    out_dir = Path(args.out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    rows = []
    for k, v in PUBLISHED_TAILLARD_GAPS.items():
        rows.append({"method": k, "source": "published", "mean_gap_pct": v})

    # Add DASH-JSP rows from local results, if available.
    p = Path(args.results)
    if p.exists():
        df = pd.read_csv(p)
        df = df[df["family"] == "taillard"]
        agg = df.groupby("method")["optimality_gap_pct"].mean()
        for m, v in agg.items():
            rows.append({"method": f"DASH-JSP ({m})", "source": "this work", "mean_gap_pct": float(v)})
    else:
        rows.append({
            "method": "DASH-JSP (LinUCB)",
            "source": "this work",
            "mean_gap_pct": "TBD (run scripts/run_evaluation.py)",
        })

    df_out = pd.DataFrame(rows)
    df_out.to_csv(out_dir / "comparisons.csv", index=False)

    md = ["# DASH-JSP vs. published methods on Taillard (mean optimality gap, %)", ""]
    md.append("| Method | Source | Mean gap (%) |")
    md.append("|---|---|---:|")
    for r in rows:
        v = r["mean_gap_pct"]
        v_s = f"{v:.2f}" if isinstance(v, (int, float)) else str(v)
        md.append(f"| {r['method']} | {r['source']} | {v_s} |")

    md.append("")
    md.append(
        "*Smaller is better. Published numbers are quoted from the cited papers; "
        "DASH-JSP numbers are computed in this work.*",
    )
    (out_dir / "comparisons.md").write_text("\n".join(md), encoding="utf-8")
    print((out_dir / "comparisons.md").read_text(encoding="utf-8"))


if __name__ == "__main__":
    main()