File size: 4,893 Bytes
2af0e94 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | """Test that mapping JSON files in dataLoader.py point to existing data files.
Randomly samples 30 entries per JSON file and checks that the file paths exist on disk.
"""
import os
import sys
import json
import random
# Resolve paths relative to the Dataloader directory, matching dataLoader.py logic
ROOT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'Dataloader')
ROOT_DIR = os.path.normpath(ROOT_DIR)
# Replicate the mapping_files dict from dataLoader.py
mapping_files = {
'MSD': 'nifty_mappings/MSD_mappings.json',
'TotalSegmentor': 'nifty_mappings/TotalSegmentorCT_MRI_mappings.json',
'Kaggle_osic': 'nifty_mappings/Kaggle_osic_mappings.json',
'CancerImageArchive': 'nifty_mappings/CIA_mappings.json',
'MnMs': 'nifty_mappings/MnMs_mappings.json',
'Brats2019': 'nifty_mappings/Brats2019_mappings.json',
'Brats2020': 'nifty_mappings/Brats2020_mappings.json',
'Brats2021': 'nifty_mappings/Brats2021_mappings.json',
'OASIS_1': 'nifty_mappings/OASIS_1_mappings.json',
'OASIS_2': 'nifty_mappings/OASIS_2_mappings.json',
'PSMA-FDG-PET-CT-LESION': 'nifty_mappings/PSMA-FDG-PET-CT-LESION_mappings.json',
'PSMA-CT': 'nifty_mappings/PSMA-CT-Longitud_mappings.json',
'AbdomenAtlas': 'nifty_mappings/AbdomenAtlas_mappings.json',
'AbdomenCT1k': 'nifty_mappings/AbdomenCT1k_mappings.json',
}
for k, v in mapping_files.items():
mapping_files[k] = os.path.join(ROOT_DIR, v)
SAMPLE_SIZE = 30
def _check_mapping(name, json_path):
"""Load a mapping JSON, sample up to 30 keys, and check if files exist.
Returns (total, sampled, missing_paths).
"""
with open(json_path, 'r') as f:
data = json.load(f)
all_keys = list(data.keys())
total = len(all_keys)
sampled_keys = random.sample(all_keys, min(SAMPLE_SIZE, total))
missing = []
for key in sampled_keys:
if not os.path.isfile(key):
missing.append(key)
return total, len(sampled_keys), missing
def test_all_json_files_exist():
"""Every JSON mapping file listed in mapping_files must exist on disk."""
missing_jsons = []
for name, path in mapping_files.items():
if not os.path.isfile(path):
missing_jsons.append((name, path))
assert not missing_jsons, (
"Missing JSON mapping files:\n"
+ "\n".join(f" {name}: {path}" for name, path in missing_jsons)
)
def test_mapping_paths_exist():
"""Randomly check 30 data file paths per mapping JSON."""
random.seed(42)
all_results = {}
any_failure = False
for name, json_path in sorted(mapping_files.items()):
if not os.path.isfile(json_path):
all_results[name] = f"JSON file not found: {json_path}"
any_failure = True
continue
total, sampled, missing = _check_mapping(name, json_path)
all_results[name] = {
'total_entries': total,
'sampled': sampled,
'missing_count': len(missing),
'missing_paths': missing,
}
if missing:
any_failure = True
# Build a readable report
lines = []
for name, result in sorted(all_results.items()):
if isinstance(result, str):
lines.append(f"\n[FAIL] {name}: {result}")
continue
status = "PASS" if result['missing_count'] == 0 else "FAIL"
lines.append(
f"\n[{status}] {name}: "
f"{result['sampled']}/{result['total_entries']} sampled, "
f"{result['missing_count']} missing"
)
for p in result['missing_paths']:
lines.append(f" MISSING: {p}")
report = "\n".join(lines)
assert not any_failure, f"Some data paths are missing or invalid:\n{report}"
# Print report on success too (visible with pytest -v or -s)
print(report)
if __name__ == '__main__':
print(f"Checking mapping files under: {ROOT_DIR}")
print(f"Sampling {SAMPLE_SIZE} entries per JSON file\n")
random.seed(42)
overall_pass = True
for name, json_path in sorted(mapping_files.items()):
if not os.path.isfile(json_path):
print(f"[SKIP] {name}: JSON not found at {json_path}")
overall_pass = False
continue
total, sampled, missing = _check_mapping(name, json_path)
status = "PASS" if not missing else "FAIL"
if missing:
overall_pass = False
print(f"[{status}] {name}: {sampled}/{total} sampled, {len(missing)} missing")
for p in missing:
print(f" MISSING: {p}")
print()
if overall_pass:
print("All checked paths exist.")
else:
print("Some paths are MISSING — mappings may need updating.")
sys.exit(1)
|