| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| DATA = ROOT.parent / 'SimplexTasks-12-data' |
|
|
| REQUIRED_DATA_FILES = [ |
| 'README.md', |
| 'CITATION.cff', |
| 'LICENSE', |
| 'MANIFEST.json', |
| 'BENCHMARK_PROTOCOL.md', |
| 'EVALUATION_PROTOCOL.md', |
| 'ASSET_PROVENANCE_AND_TERMS.md', |
| 'REPRODUCIBILITY.md', |
| 'data/viewer/tasks.jsonl', |
| ] |
| REQUIRED_REAL_DIRS = [ |
| 'cifar10_softmax', |
| 'topics_20newsgroups', |
| 'samson_unmixing', |
| 'pbmc_pseudobulk', |
| 'utkface_ldl', |
| 'affectivetext', |
| ] |
| REQUIRED_SYNTH_DIRS = ['D1', 'D2', 'D3', 'D4', 'D5', 'D6'] |
| REQUIRED_CODE_FILES = [ |
| 'README.md', |
| 'CITATION.cff', |
| 'LICENSE', |
| 'environment.yml', |
| 'requirements.txt', |
| 'scripts/reproduce_tables.py', |
| 'scripts/reproduce_figures.py', |
| 'scripts/run_benchmark.py', |
| 'docs/reviewer_quickstart.md', |
| ] |
|
|
|
|
| def require(path: Path, label: str, issues: list[str]) -> None: |
| if not path.exists(): |
| issues.append(f'missing {label}: {path}') |
|
|
|
|
| def main() -> None: |
| issues: list[str] = [] |
| require(DATA, 'dataset bundle', issues) |
| for rel in REQUIRED_CODE_FILES: |
| require(ROOT / rel, f'code file {rel}', issues) |
| if DATA.exists(): |
| for rel in REQUIRED_DATA_FILES: |
| require(DATA / rel, f'data file {rel}', issues) |
| for name in REQUIRED_REAL_DIRS: |
| require(DATA / 'data' / 'derived' / name, f'derived task {name}', issues) |
| for name in REQUIRED_SYNTH_DIRS: |
| require(DATA / 'data' / 'synthetic' / name, f'synthetic task {name}', issues) |
| require(DATA / 'data' / 'summaries' / 'figure_inputs', 'figure input cache', issues) |
| require(DATA / 'data' / 'summaries' / 'table_inputs', 'table input cache', issues) |
| viewer_path = DATA / 'data' / 'viewer' / 'tasks.jsonl' |
| if viewer_path.exists(): |
| rows = [json.loads(line) for line in viewer_path.read_text(encoding='utf-8').splitlines() if line.strip()] |
| schemas = {tuple(row.keys()) for row in rows} |
| if len(rows) != 12: |
| issues.append(f'unexpected row count in data/viewer/tasks.jsonl: {len(rows)}') |
| if len(schemas) != 1: |
| issues.append('inconsistent JSONL schema in data/viewer/tasks.jsonl') |
| manifest_path = DATA / 'MANIFEST.json' |
| if manifest_path.exists(): |
| manifest = json.loads(manifest_path.read_text(encoding='utf-8')) |
| if manifest.get('task_count') != 12: |
| issues.append(f"unexpected task_count in MANIFEST.json: {manifest.get('task_count')}") |
| if issues: |
| raise SystemExit('\n'.join(issues)) |
| print('artifact integrity ok') |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|