"""Download public JSP / FJSP benchmark instances. Sources ------- - Taillard: canonical files on Eric Taillard's homepage and the JSPLIB GitHub - Lawrence: OR-Library (J. E. Beasley) - Brandimarte: IDSIA mirror - DMU: JSPLIB GitHub mirror We download from the JSPLIB GitHub mirror (https://github.com/tamy0612/JSPLIB) because it provides a single canonical text representation, public and stable, under permissive licensing. """ from __future__ import annotations import argparse import hashlib import sys from pathlib import Path from typing import Iterable, List from urllib.error import HTTPError, URLError from urllib.request import urlopen JSPLIB_RAW = ( "https://raw.githubusercontent.com/tamy0612/JSPLIB/master/instances/" ) def _download(url: str) -> bytes: try: with urlopen(url, timeout=30) as resp: return resp.read() except (HTTPError, URLError) as e: raise SystemExit(f"Failed to download {url}: {e}") def _save(text: bytes, path: Path) -> str: path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(text) return hashlib.sha256(text).hexdigest()[:16] def download_taillard(data_dir: Path, names: Iterable[str]) -> dict: """Download Taillard instances by name (e.g., 'ta01').""" out = {} for name in names: url = f"{JSPLIB_RAW}{name}" try: blob = _download(url) sha = _save(blob, data_dir / f"{name}.txt") out[name] = {"url": url, "sha256_16": sha, "size": len(blob)} print(f" taillard/{name} ok ({len(blob)} bytes, sha {sha})") except SystemExit as e: print(f" taillard/{name} FAILED: {e}", file=sys.stderr) out[name] = {"url": url, "error": str(e)} return out def download_lawrence(data_dir: Path, names: Iterable[str]) -> dict: out = {} for name in names: url = f"{JSPLIB_RAW}{name}" try: blob = _download(url) sha = _save(blob, data_dir / f"{name}.txt") out[name] = {"url": url, "sha256_16": sha, "size": len(blob)} print(f" lawrence/{name} ok ({len(blob)} bytes, sha {sha})") except SystemExit as e: print(f" lawrence/{name} FAILED: {e}", file=sys.stderr) out[name] = {"url": url, "error": str(e)} return out def download_dmu(data_dir: Path, names: Iterable[str]) -> dict: out = {} for name in names: url = f"{JSPLIB_RAW}{name}" try: blob = _download(url) sha = _save(blob, data_dir / f"{name}.txt") out[name] = {"url": url, "sha256_16": sha, "size": len(blob)} print(f" dmu/{name} ok ({len(blob)} bytes, sha {sha})") except SystemExit as e: print(f" dmu/{name} FAILED: {e}", file=sys.stderr) out[name] = {"url": url, "error": str(e)} return out # Brandimarte canonical mirror BRANDIMARTE_BASE = ( "https://raw.githubusercontent.com/SchedulingLab/fjsp-instances/master/Brandimarte/" ) def download_brandimarte(data_dir: Path, names: Iterable[str]) -> dict: out = {} for name in names: url = f"{BRANDIMARTE_BASE}{name}.fjs" try: blob = _download(url) sha = _save(blob, data_dir / f"{name}.txt") out[name] = {"url": url, "sha256_16": sha, "size": len(blob)} print(f" brandimarte/{name} ok ({len(blob)} bytes, sha {sha})") except SystemExit as e: print(f" brandimarte/{name} FAILED: {e}", file=sys.stderr) out[name] = {"url": url, "error": str(e)} return out def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( "--family", nargs="+", default=["taillard", "lawrence", "brandimarte", "dmu"], choices=["taillard", "lawrence", "brandimarte", "dmu", "all"], ) parser.add_argument("--data-dir", default="data") args = parser.parse_args() if "all" in args.family: args.family = ["taillard", "lawrence", "brandimarte", "dmu"] root = Path(args.data_dir) manifest = {} if "taillard" in args.family: names = [f"ta{i:02d}" for i in range(1, 81)] manifest["taillard"] = download_taillard(root / "taillard", names) if "lawrence" in args.family: names = [f"la{i:02d}" for i in range(1, 41)] manifest["lawrence"] = download_lawrence(root / "lawrence", names) if "brandimarte" in args.family: names = [f"mk{i:02d}" for i in range(1, 11)] manifest["brandimarte"] = download_brandimarte(root / "brandimarte", names) if "dmu" in args.family: names = [f"dmu{i:02d}" for i in range(1, 81)] manifest["dmu"] = download_dmu(root / "dmu", names) import json (root / "MANIFEST.json").write_text(json.dumps(manifest, indent=2)) print(f"\nManifest saved to {root / 'MANIFEST.json'}") if __name__ == "__main__": main()