dash-jsp-trainer / scripts /download_benchmarks.py
Vittal-M's picture
Trainer Space: download -> train -> push -> sleep
52c82e4
"""Download public JSP / FJSP benchmark instances.
Sources
-------
- Taillard: canonical files on Eric Taillard's homepage and the JSPLIB GitHub
- Lawrence: OR-Library (J. E. Beasley)
- Brandimarte: IDSIA mirror
- DMU: JSPLIB GitHub mirror
We download from the JSPLIB GitHub mirror (https://github.com/tamy0612/JSPLIB)
because it provides a single canonical text representation, public and stable,
under permissive licensing.
"""
from __future__ import annotations
import argparse
import hashlib
import sys
from pathlib import Path
from typing import Iterable, List
from urllib.error import HTTPError, URLError
from urllib.request import urlopen
JSPLIB_RAW = (
"https://raw.githubusercontent.com/tamy0612/JSPLIB/master/instances/"
)
def _download(url: str) -> bytes:
try:
with urlopen(url, timeout=30) as resp:
return resp.read()
except (HTTPError, URLError) as e:
raise SystemExit(f"Failed to download {url}: {e}")
def _save(text: bytes, path: Path) -> str:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(text)
return hashlib.sha256(text).hexdigest()[:16]
def download_taillard(data_dir: Path, names: Iterable[str]) -> dict:
"""Download Taillard instances by name (e.g., 'ta01')."""
out = {}
for name in names:
url = f"{JSPLIB_RAW}{name}"
try:
blob = _download(url)
sha = _save(blob, data_dir / f"{name}.txt")
out[name] = {"url": url, "sha256_16": sha, "size": len(blob)}
print(f" taillard/{name} ok ({len(blob)} bytes, sha {sha})")
except SystemExit as e:
print(f" taillard/{name} FAILED: {e}", file=sys.stderr)
out[name] = {"url": url, "error": str(e)}
return out
def download_lawrence(data_dir: Path, names: Iterable[str]) -> dict:
out = {}
for name in names:
url = f"{JSPLIB_RAW}{name}"
try:
blob = _download(url)
sha = _save(blob, data_dir / f"{name}.txt")
out[name] = {"url": url, "sha256_16": sha, "size": len(blob)}
print(f" lawrence/{name} ok ({len(blob)} bytes, sha {sha})")
except SystemExit as e:
print(f" lawrence/{name} FAILED: {e}", file=sys.stderr)
out[name] = {"url": url, "error": str(e)}
return out
def download_dmu(data_dir: Path, names: Iterable[str]) -> dict:
out = {}
for name in names:
url = f"{JSPLIB_RAW}{name}"
try:
blob = _download(url)
sha = _save(blob, data_dir / f"{name}.txt")
out[name] = {"url": url, "sha256_16": sha, "size": len(blob)}
print(f" dmu/{name} ok ({len(blob)} bytes, sha {sha})")
except SystemExit as e:
print(f" dmu/{name} FAILED: {e}", file=sys.stderr)
out[name] = {"url": url, "error": str(e)}
return out
# Brandimarte canonical mirror
BRANDIMARTE_BASE = (
"https://raw.githubusercontent.com/SchedulingLab/fjsp-instances/master/Brandimarte/"
)
def download_brandimarte(data_dir: Path, names: Iterable[str]) -> dict:
out = {}
for name in names:
url = f"{BRANDIMARTE_BASE}{name}.fjs"
try:
blob = _download(url)
sha = _save(blob, data_dir / f"{name}.txt")
out[name] = {"url": url, "sha256_16": sha, "size": len(blob)}
print(f" brandimarte/{name} ok ({len(blob)} bytes, sha {sha})")
except SystemExit as e:
print(f" brandimarte/{name} FAILED: {e}", file=sys.stderr)
out[name] = {"url": url, "error": str(e)}
return out
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--family",
nargs="+",
default=["taillard", "lawrence", "brandimarte", "dmu"],
choices=["taillard", "lawrence", "brandimarte", "dmu", "all"],
)
parser.add_argument("--data-dir", default="data")
args = parser.parse_args()
if "all" in args.family:
args.family = ["taillard", "lawrence", "brandimarte", "dmu"]
root = Path(args.data_dir)
manifest = {}
if "taillard" in args.family:
names = [f"ta{i:02d}" for i in range(1, 81)]
manifest["taillard"] = download_taillard(root / "taillard", names)
if "lawrence" in args.family:
names = [f"la{i:02d}" for i in range(1, 41)]
manifest["lawrence"] = download_lawrence(root / "lawrence", names)
if "brandimarte" in args.family:
names = [f"mk{i:02d}" for i in range(1, 11)]
manifest["brandimarte"] = download_brandimarte(root / "brandimarte", names)
if "dmu" in args.family:
names = [f"dmu{i:02d}" for i in range(1, 81)]
manifest["dmu"] = download_dmu(root / "dmu", names)
import json
(root / "MANIFEST.json").write_text(json.dumps(manifest, indent=2))
print(f"\nManifest saved to {root / 'MANIFEST.json'}")
if __name__ == "__main__":
main()