"""Pre-parse local benchmark instances to Parquet and push to HF dataset hub. Output layout in `Vittal-M/jsp-benchmarks-cached`: taillard.parquet lawrence.parquet brandimarte.parquet dmu.parquet README.md """ from __future__ import annotations import argparse import json import os from pathlib import Path import pandas as pd from dash_jsp.benchmarks import taillard, lawrence, brandimarte, dmu def _jsp_to_df(instances) -> pd.DataFrame: rows = [] for inst in instances: rows.append({ "name": inst.name, "family": inst.family, "n_jobs": inst.n_jobs, "n_machines": inst.n_machines, "optimum": inst.optimum, "ops_json": json.dumps(inst.ops), "due_dates_json": json.dumps(inst.due_dates) if inst.due_dates else None, "weights_json": json.dumps(inst.weights) if inst.weights else None, "source_url": inst.source_url, }) return pd.DataFrame(rows) def _fjsp_to_df(instances) -> pd.DataFrame: rows = [] for inst in instances: rows.append({ "name": inst.name, "family": inst.family, "n_jobs": inst.n_jobs, "n_machines": inst.n_machines, "optimum": inst.optimum, "ops_json": json.dumps(inst.ops), "source_url": inst.source_url, }) return pd.DataFrame(rows) def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--data-dir", default="data") parser.add_argument("--out-dir", default="data/cached") parser.add_argument("--push", action="store_true", help="Also push to HF dataset hub") parser.add_argument( "--repo-id", default=os.environ.get("HF_DATASET_REPO_ID", "Vittal-M/jsp-benchmarks-cached"), ) args = parser.parse_args() data_dir = Path(args.data_dir) out_dir = Path(args.out_dir) out_dir.mkdir(parents=True, exist_ok=True) families = {} try: families["taillard"] = _jsp_to_df(taillard.load_all(data_dir / "taillard")) except FileNotFoundError as e: print(f"[skip] taillard: {e}") try: families["lawrence"] = _jsp_to_df(lawrence.load_all(data_dir / "lawrence")) except FileNotFoundError as e: print(f"[skip] lawrence: {e}") try: families["dmu"] = _jsp_to_df(dmu.load_all(data_dir / "dmu")) except FileNotFoundError as e: print(f"[skip] dmu: {e}") try: families["brandimarte"] = _fjsp_to_df(brandimarte.load_all(data_dir / "brandimarte")) except FileNotFoundError as e: print(f"[skip] brandimarte: {e}") for fam, df in families.items(): path = out_dir / f"{fam}.parquet" df.to_parquet(path, index=False) print(f" wrote {path} ({len(df)} instances)") # Dataset card card = ( "# JSP / FJSP Benchmark Cache\n\n" "Pre-parsed canonical JSP and FJSP benchmark instances:\n\n" "- **Taillard** (1993) — 80 JSP instances\n" "- **Lawrence** (1984) — 40 JSP instances\n" "- **Brandimarte** (1993) — 10 FJSP instances\n" "- **DMU** (Demirkol-Mehta-Uzsoy 1998) — up to 80 JSP instances\n\n" "Schema: `name, family, n_jobs, n_machines, optimum, ops_json, " "due_dates_json, weights_json, source_url`. The `ops_json` field " "contains a JSON-encoded list of `[machine_id, processing_time]` pairs " "per operation per job.\n\n" "Used by [DASH-JSP](https://huggingface.co/spaces/Vittal-M/dash-jsp-demo).\n" ) (out_dir / "README.md").write_text(card) if args.push: try: from huggingface_hub import HfApi, create_repo except ImportError: raise SystemExit("pip install huggingface_hub") token = os.environ.get("HF_TOKEN") if not token: raise SystemExit("Set HF_TOKEN") api = HfApi(token=token) create_repo(args.repo_id, token=token, exist_ok=True, repo_type="dataset") api.upload_folder( folder_path=str(out_dir), repo_id=args.repo_id, repo_type="dataset", commit_message="Update benchmark cache", ) print(f"Pushed {out_dir} → https://huggingface.co/datasets/{args.repo_id}") if __name__ == "__main__": main()