| """Test that mapping JSON files in dataLoader.py point to existing data files.
|
|
|
| Randomly samples 30 entries per JSON file and checks that the file paths exist on disk.
|
| """
|
| import os
|
| import sys
|
| import json
|
| import random
|
|
|
|
|
| ROOT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'Dataloader')
|
| ROOT_DIR = os.path.normpath(ROOT_DIR)
|
|
|
|
|
| mapping_files = {
|
| 'MSD': 'nifty_mappings/MSD_mappings.json',
|
| 'TotalSegmentor': 'nifty_mappings/TotalSegmentorCT_MRI_mappings.json',
|
| 'Kaggle_osic': 'nifty_mappings/Kaggle_osic_mappings.json',
|
| 'CancerImageArchive': 'nifty_mappings/CIA_mappings.json',
|
| 'MnMs': 'nifty_mappings/MnMs_mappings.json',
|
| 'Brats2019': 'nifty_mappings/Brats2019_mappings.json',
|
| 'Brats2020': 'nifty_mappings/Brats2020_mappings.json',
|
| 'Brats2021': 'nifty_mappings/Brats2021_mappings.json',
|
| 'OASIS_1': 'nifty_mappings/OASIS_1_mappings.json',
|
| 'OASIS_2': 'nifty_mappings/OASIS_2_mappings.json',
|
| 'PSMA-FDG-PET-CT-LESION': 'nifty_mappings/PSMA-FDG-PET-CT-LESION_mappings.json',
|
| 'PSMA-CT': 'nifty_mappings/PSMA-CT-Longitud_mappings.json',
|
| 'AbdomenAtlas': 'nifty_mappings/AbdomenAtlas_mappings.json',
|
| 'AbdomenCT1k': 'nifty_mappings/AbdomenCT1k_mappings.json',
|
| }
|
| for k, v in mapping_files.items():
|
| mapping_files[k] = os.path.join(ROOT_DIR, v)
|
|
|
| SAMPLE_SIZE = 30
|
|
|
|
|
| def _check_mapping(name, json_path):
|
| """Load a mapping JSON, sample up to 30 keys, and check if files exist.
|
|
|
| Returns (total, sampled, missing_paths).
|
| """
|
| with open(json_path, 'r') as f:
|
| data = json.load(f)
|
|
|
| all_keys = list(data.keys())
|
| total = len(all_keys)
|
| sampled_keys = random.sample(all_keys, min(SAMPLE_SIZE, total))
|
|
|
| missing = []
|
| for key in sampled_keys:
|
| if not os.path.isfile(key):
|
| missing.append(key)
|
|
|
| return total, len(sampled_keys), missing
|
|
|
|
|
| def test_all_json_files_exist():
|
| """Every JSON mapping file listed in mapping_files must exist on disk."""
|
| missing_jsons = []
|
| for name, path in mapping_files.items():
|
| if not os.path.isfile(path):
|
| missing_jsons.append((name, path))
|
| assert not missing_jsons, (
|
| "Missing JSON mapping files:\n"
|
| + "\n".join(f" {name}: {path}" for name, path in missing_jsons)
|
| )
|
|
|
|
|
| def test_mapping_paths_exist():
|
| """Randomly check 30 data file paths per mapping JSON."""
|
| random.seed(42)
|
| all_results = {}
|
| any_failure = False
|
|
|
| for name, json_path in sorted(mapping_files.items()):
|
| if not os.path.isfile(json_path):
|
| all_results[name] = f"JSON file not found: {json_path}"
|
| any_failure = True
|
| continue
|
|
|
| total, sampled, missing = _check_mapping(name, json_path)
|
| all_results[name] = {
|
| 'total_entries': total,
|
| 'sampled': sampled,
|
| 'missing_count': len(missing),
|
| 'missing_paths': missing,
|
| }
|
| if missing:
|
| any_failure = True
|
|
|
|
|
| lines = []
|
| for name, result in sorted(all_results.items()):
|
| if isinstance(result, str):
|
| lines.append(f"\n[FAIL] {name}: {result}")
|
| continue
|
| status = "PASS" if result['missing_count'] == 0 else "FAIL"
|
| lines.append(
|
| f"\n[{status}] {name}: "
|
| f"{result['sampled']}/{result['total_entries']} sampled, "
|
| f"{result['missing_count']} missing"
|
| )
|
| for p in result['missing_paths']:
|
| lines.append(f" MISSING: {p}")
|
|
|
| report = "\n".join(lines)
|
| assert not any_failure, f"Some data paths are missing or invalid:\n{report}"
|
|
|
| print(report)
|
|
|
|
|
| if __name__ == '__main__':
|
| print(f"Checking mapping files under: {ROOT_DIR}")
|
| print(f"Sampling {SAMPLE_SIZE} entries per JSON file\n")
|
| random.seed(42)
|
|
|
| overall_pass = True
|
| for name, json_path in sorted(mapping_files.items()):
|
| if not os.path.isfile(json_path):
|
| print(f"[SKIP] {name}: JSON not found at {json_path}")
|
| overall_pass = False
|
| continue
|
|
|
| total, sampled, missing = _check_mapping(name, json_path)
|
| status = "PASS" if not missing else "FAIL"
|
| if missing:
|
| overall_pass = False
|
| print(f"[{status}] {name}: {sampled}/{total} sampled, {len(missing)} missing")
|
| for p in missing:
|
| print(f" MISSING: {p}")
|
|
|
| print()
|
| if overall_pass:
|
| print("All checked paths exist.")
|
| else:
|
| print("Some paths are MISSING — mappings may need updating.")
|
| sys.exit(1)
|
|
|