| import os
|
| import re
|
| from collections import defaultdict
|
|
|
| def parse_test_list_line(line):
|
|
|
| match = re.search(r'LIDC-IDRI-(\d+)/nodule-(\d+)/images/slice-(\d+)\.png', line.strip())
|
| if match:
|
| patient_id = match.group(1)
|
| nodule_id = match.group(2)
|
| slice_id = match.group(3)
|
| return patient_id, nodule_id, slice_id
|
| return None
|
|
|
| def parse_sample_filename(filename):
|
|
|
| match = re.search(r'LIDC-IDRI-(\d+)_nodule-(\d+)_slice-(\d+)_(.+)\.png', filename)
|
| if match:
|
| patient_id = match.group(1)
|
| nodule_id = match.group(2)
|
| slice_id = match.group(3)
|
| file_type = match.group(4)
|
| return patient_id, nodule_id, slice_id, file_type
|
| return None
|
|
|
| def main():
|
|
|
| test_list_path = 'test_list.txt'
|
| samples_dir = 'results/samples/samples' |
|
|
|
|
| expected = set()
|
| with open(test_list_path, 'r') as f:
|
| for line in f:
|
| parsed = parse_test_list_line(line)
|
| if parsed:
|
| expected.add(parsed)
|
|
|
| print(f"Total expected images: {len(expected)}")
|
|
|
|
|
| sample_files = os.listdir(samples_dir)
|
|
|
|
|
| sample_groups = defaultdict(set)
|
| for filename in sample_files:
|
| parsed = parse_sample_filename(filename)
|
| if parsed:
|
| patient_id, nodule_id, slice_id, file_type = parsed
|
| key = (patient_id, nodule_id, slice_id)
|
| sample_groups[key].add(file_type)
|
|
|
| |
| completed = 0 |
| for key in expected: |
| types = sample_groups.get(key, set()) |
| if all(f'sample_{i:02d}' in types for i in range(16)): |
| completed += 1 |
|
|
| print(f"Completed images (with all 4 samples): {completed}")
|
| print(f"Progress: {completed}/{len(expected)} ({completed/len(expected)*100:.2f}%)")
|
|
|
|
|
| total_samples = sum(len(types) for types in sample_groups.values())
|
| unique_patients = len(set(key[0] for key in sample_groups))
|
| print(f"Total sample files: {total_samples}")
|
| print(f"Unique patients in samples: {unique_patients}")
|
|
|
|
|
| type_counts = defaultdict(int)
|
| for types in sample_groups.values():
|
| for t in types:
|
| type_counts[t] += 1
|
| print("File type counts:")
|
| for t, count in sorted(type_counts.items()):
|
| print(f" {t}: {count}")
|
|
|
| if __name__ == "__main__":
|
| main() |
|
|