Spaces:
Runtime error
Runtime error
| """Pre-parse local benchmark instances to Parquet and push to HF dataset hub. | |
| Output layout in `Vittal-M/jsp-benchmarks-cached`: | |
| taillard.parquet | |
| lawrence.parquet | |
| brandimarte.parquet | |
| dmu.parquet | |
| README.md | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import os | |
| from pathlib import Path | |
| import pandas as pd | |
| from dash_jsp.benchmarks import taillard, lawrence, brandimarte, dmu | |
| def _jsp_to_df(instances) -> pd.DataFrame: | |
| rows = [] | |
| for inst in instances: | |
| rows.append({ | |
| "name": inst.name, | |
| "family": inst.family, | |
| "n_jobs": inst.n_jobs, | |
| "n_machines": inst.n_machines, | |
| "optimum": inst.optimum, | |
| "ops_json": json.dumps(inst.ops), | |
| "due_dates_json": json.dumps(inst.due_dates) if inst.due_dates else None, | |
| "weights_json": json.dumps(inst.weights) if inst.weights else None, | |
| "source_url": inst.source_url, | |
| }) | |
| return pd.DataFrame(rows) | |
| def _fjsp_to_df(instances) -> pd.DataFrame: | |
| rows = [] | |
| for inst in instances: | |
| rows.append({ | |
| "name": inst.name, | |
| "family": inst.family, | |
| "n_jobs": inst.n_jobs, | |
| "n_machines": inst.n_machines, | |
| "optimum": inst.optimum, | |
| "ops_json": json.dumps(inst.ops), | |
| "source_url": inst.source_url, | |
| }) | |
| return pd.DataFrame(rows) | |
| def main() -> None: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--data-dir", default="data") | |
| parser.add_argument("--out-dir", default="data/cached") | |
| parser.add_argument("--push", action="store_true", | |
| help="Also push to HF dataset hub") | |
| parser.add_argument( | |
| "--repo-id", | |
| default=os.environ.get("HF_DATASET_REPO_ID", "Vittal-M/jsp-benchmarks-cached"), | |
| ) | |
| args = parser.parse_args() | |
| data_dir = Path(args.data_dir) | |
| out_dir = Path(args.out_dir) | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| families = {} | |
| try: | |
| families["taillard"] = _jsp_to_df(taillard.load_all(data_dir / "taillard")) | |
| except FileNotFoundError as e: | |
| print(f"[skip] taillard: {e}") | |
| try: | |
| families["lawrence"] = _jsp_to_df(lawrence.load_all(data_dir / "lawrence")) | |
| except FileNotFoundError as e: | |
| print(f"[skip] lawrence: {e}") | |
| try: | |
| families["dmu"] = _jsp_to_df(dmu.load_all(data_dir / "dmu")) | |
| except FileNotFoundError as e: | |
| print(f"[skip] dmu: {e}") | |
| try: | |
| families["brandimarte"] = _fjsp_to_df(brandimarte.load_all(data_dir / "brandimarte")) | |
| except FileNotFoundError as e: | |
| print(f"[skip] brandimarte: {e}") | |
| for fam, df in families.items(): | |
| path = out_dir / f"{fam}.parquet" | |
| df.to_parquet(path, index=False) | |
| print(f" wrote {path} ({len(df)} instances)") | |
| # Dataset card | |
| card = ( | |
| "# JSP / FJSP Benchmark Cache\n\n" | |
| "Pre-parsed canonical JSP and FJSP benchmark instances:\n\n" | |
| "- **Taillard** (1993) β 80 JSP instances\n" | |
| "- **Lawrence** (1984) β 40 JSP instances\n" | |
| "- **Brandimarte** (1993) β 10 FJSP instances\n" | |
| "- **DMU** (Demirkol-Mehta-Uzsoy 1998) β up to 80 JSP instances\n\n" | |
| "Schema: `name, family, n_jobs, n_machines, optimum, ops_json, " | |
| "due_dates_json, weights_json, source_url`. The `ops_json` field " | |
| "contains a JSON-encoded list of `[machine_id, processing_time]` pairs " | |
| "per operation per job.\n\n" | |
| "Used by [DASH-JSP](https://huggingface.co/spaces/Vittal-M/dash-jsp-demo).\n" | |
| ) | |
| (out_dir / "README.md").write_text(card) | |
| if args.push: | |
| try: | |
| from huggingface_hub import HfApi, create_repo | |
| except ImportError: | |
| raise SystemExit("pip install huggingface_hub") | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| raise SystemExit("Set HF_TOKEN") | |
| api = HfApi(token=token) | |
| create_repo(args.repo_id, token=token, exist_ok=True, repo_type="dataset") | |
| api.upload_folder( | |
| folder_path=str(out_dir), | |
| repo_id=args.repo_id, | |
| repo_type="dataset", | |
| commit_message="Update benchmark cache", | |
| ) | |
| print(f"Pushed {out_dir} β https://huggingface.co/datasets/{args.repo_id}") | |
| if __name__ == "__main__": | |
| main() | |