Omini3D / tests /test_mapping_paths.py
maxmo2009's picture
Sync from local: code + epoch-110 checkpoint, clean README
2af0e94 verified
"""Test that mapping JSON files in dataLoader.py point to existing data files.
Randomly samples 30 entries per JSON file and checks that the file paths exist on disk.
"""
import os
import sys
import json
import random
# Resolve paths relative to the Dataloader directory, matching dataLoader.py logic
ROOT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'Dataloader')
ROOT_DIR = os.path.normpath(ROOT_DIR)
# Replicate the mapping_files dict from dataLoader.py
mapping_files = {
'MSD': 'nifty_mappings/MSD_mappings.json',
'TotalSegmentor': 'nifty_mappings/TotalSegmentorCT_MRI_mappings.json',
'Kaggle_osic': 'nifty_mappings/Kaggle_osic_mappings.json',
'CancerImageArchive': 'nifty_mappings/CIA_mappings.json',
'MnMs': 'nifty_mappings/MnMs_mappings.json',
'Brats2019': 'nifty_mappings/Brats2019_mappings.json',
'Brats2020': 'nifty_mappings/Brats2020_mappings.json',
'Brats2021': 'nifty_mappings/Brats2021_mappings.json',
'OASIS_1': 'nifty_mappings/OASIS_1_mappings.json',
'OASIS_2': 'nifty_mappings/OASIS_2_mappings.json',
'PSMA-FDG-PET-CT-LESION': 'nifty_mappings/PSMA-FDG-PET-CT-LESION_mappings.json',
'PSMA-CT': 'nifty_mappings/PSMA-CT-Longitud_mappings.json',
'AbdomenAtlas': 'nifty_mappings/AbdomenAtlas_mappings.json',
'AbdomenCT1k': 'nifty_mappings/AbdomenCT1k_mappings.json',
}
for k, v in mapping_files.items():
mapping_files[k] = os.path.join(ROOT_DIR, v)
SAMPLE_SIZE = 30
def _check_mapping(name, json_path):
"""Load a mapping JSON, sample up to 30 keys, and check if files exist.
Returns (total, sampled, missing_paths).
"""
with open(json_path, 'r') as f:
data = json.load(f)
all_keys = list(data.keys())
total = len(all_keys)
sampled_keys = random.sample(all_keys, min(SAMPLE_SIZE, total))
missing = []
for key in sampled_keys:
if not os.path.isfile(key):
missing.append(key)
return total, len(sampled_keys), missing
def test_all_json_files_exist():
"""Every JSON mapping file listed in mapping_files must exist on disk."""
missing_jsons = []
for name, path in mapping_files.items():
if not os.path.isfile(path):
missing_jsons.append((name, path))
assert not missing_jsons, (
"Missing JSON mapping files:\n"
+ "\n".join(f" {name}: {path}" for name, path in missing_jsons)
)
def test_mapping_paths_exist():
"""Randomly check 30 data file paths per mapping JSON."""
random.seed(42)
all_results = {}
any_failure = False
for name, json_path in sorted(mapping_files.items()):
if not os.path.isfile(json_path):
all_results[name] = f"JSON file not found: {json_path}"
any_failure = True
continue
total, sampled, missing = _check_mapping(name, json_path)
all_results[name] = {
'total_entries': total,
'sampled': sampled,
'missing_count': len(missing),
'missing_paths': missing,
}
if missing:
any_failure = True
# Build a readable report
lines = []
for name, result in sorted(all_results.items()):
if isinstance(result, str):
lines.append(f"\n[FAIL] {name}: {result}")
continue
status = "PASS" if result['missing_count'] == 0 else "FAIL"
lines.append(
f"\n[{status}] {name}: "
f"{result['sampled']}/{result['total_entries']} sampled, "
f"{result['missing_count']} missing"
)
for p in result['missing_paths']:
lines.append(f" MISSING: {p}")
report = "\n".join(lines)
assert not any_failure, f"Some data paths are missing or invalid:\n{report}"
# Print report on success too (visible with pytest -v or -s)
print(report)
if __name__ == '__main__':
print(f"Checking mapping files under: {ROOT_DIR}")
print(f"Sampling {SAMPLE_SIZE} entries per JSON file\n")
random.seed(42)
overall_pass = True
for name, json_path in sorted(mapping_files.items()):
if not os.path.isfile(json_path):
print(f"[SKIP] {name}: JSON not found at {json_path}")
overall_pass = False
continue
total, sampled, missing = _check_mapping(name, json_path)
status = "PASS" if not missing else "FAIL"
if missing:
overall_pass = False
print(f"[{status}] {name}: {sampled}/{total} sampled, {len(missing)} missing")
for p in missing:
print(f" MISSING: {p}")
print()
if overall_pass:
print("All checked paths exist.")
else:
print("Some paths are MISSING — mappings may need updating.")
sys.exit(1)