from __future__ import annotations import json from pathlib import Path ROOT = Path(__file__).resolve().parents[1] DATA = ROOT.parent / 'SimplexTasks-12-data' REQUIRED_DATA_FILES = [ 'README.md', 'CITATION.cff', 'LICENSE', 'MANIFEST.json', 'BENCHMARK_PROTOCOL.md', 'EVALUATION_PROTOCOL.md', 'ASSET_PROVENANCE_AND_TERMS.md', 'REPRODUCIBILITY.md', 'data/viewer/tasks.jsonl', ] REQUIRED_REAL_DIRS = [ 'cifar10_softmax', 'topics_20newsgroups', 'samson_unmixing', 'pbmc_pseudobulk', 'utkface_ldl', 'affectivetext', ] REQUIRED_SYNTH_DIRS = ['D1', 'D2', 'D3', 'D4', 'D5', 'D6'] REQUIRED_CODE_FILES = [ 'README.md', 'CITATION.cff', 'LICENSE', 'environment.yml', 'requirements.txt', 'scripts/reproduce_tables.py', 'scripts/reproduce_figures.py', 'scripts/run_benchmark.py', 'docs/reviewer_quickstart.md', ] def require(path: Path, label: str, issues: list[str]) -> None: if not path.exists(): issues.append(f'missing {label}: {path}') def main() -> None: issues: list[str] = [] require(DATA, 'dataset bundle', issues) for rel in REQUIRED_CODE_FILES: require(ROOT / rel, f'code file {rel}', issues) if DATA.exists(): for rel in REQUIRED_DATA_FILES: require(DATA / rel, f'data file {rel}', issues) for name in REQUIRED_REAL_DIRS: require(DATA / 'data' / 'derived' / name, f'derived task {name}', issues) for name in REQUIRED_SYNTH_DIRS: require(DATA / 'data' / 'synthetic' / name, f'synthetic task {name}', issues) require(DATA / 'data' / 'summaries' / 'figure_inputs', 'figure input cache', issues) require(DATA / 'data' / 'summaries' / 'table_inputs', 'table input cache', issues) viewer_path = DATA / 'data' / 'viewer' / 'tasks.jsonl' if viewer_path.exists(): rows = [json.loads(line) for line in viewer_path.read_text(encoding='utf-8').splitlines() if line.strip()] schemas = {tuple(row.keys()) for row in rows} if len(rows) != 12: issues.append(f'unexpected row count in data/viewer/tasks.jsonl: {len(rows)}') if len(schemas) != 1: issues.append('inconsistent JSONL schema in data/viewer/tasks.jsonl') manifest_path = DATA / 'MANIFEST.json' if manifest_path.exists(): manifest = json.loads(manifest_path.read_text(encoding='utf-8')) if manifest.get('task_count') != 12: issues.append(f"unexpected task_count in MANIFEST.json: {manifest.get('task_count')}") if issues: raise SystemExit('\n'.join(issues)) print('artifact integrity ok') if __name__ == '__main__': main()