import os import re from collections import defaultdict def parse_test_list_line(line): # Example: LIDC-IDRI-slices/LIDC-IDRI-0004/nodule-0/images/slice-0.png match = re.search(r'LIDC-IDRI-(\d+)/nodule-(\d+)/images/slice-(\d+)\.png', line.strip()) if match: patient_id = match.group(1) nodule_id = match.group(2) slice_id = match.group(3) return patient_id, nodule_id, slice_id return None def parse_sample_filename(filename): # Example: LIDC-IDRI-0004_nodule-0_slice-0_sample_00.png match = re.search(r'LIDC-IDRI-(\d+)_nodule-(\d+)_slice-(\d+)_(.+)\.png', filename) if match: patient_id = match.group(1) nodule_id = match.group(2) slice_id = match.group(3) file_type = match.group(4) return patient_id, nodule_id, slice_id, file_type return None def main(): # Paths test_list_path = 'test_list.txt' samples_dir = 'results/samples/samples' # Read test_list.txt expected = set() with open(test_list_path, 'r') as f: for line in f: parsed = parse_test_list_line(line) if parsed: expected.add(parsed) print(f"Total expected images: {len(expected)}") # List samples directory sample_files = os.listdir(samples_dir) # Group by (patient_id, nodule_id, slice_id) sample_groups = defaultdict(set) for filename in sample_files: parsed = parse_sample_filename(filename) if parsed: patient_id, nodule_id, slice_id, file_type = parsed key = (patient_id, nodule_id, slice_id) sample_groups[key].add(file_type) # Count completed (have all 16 samples) completed = 0 for key in expected: types = sample_groups.get(key, set()) if all(f'sample_{i:02d}' in types for i in range(16)): completed += 1 print(f"Completed images (with all 4 samples): {completed}") print(f"Progress: {completed}/{len(expected)} ({completed/len(expected)*100:.2f}%)") # Additional analysis total_samples = sum(len(types) for types in sample_groups.values()) unique_patients = len(set(key[0] for key in sample_groups)) print(f"Total sample files: {total_samples}") print(f"Unique patients in samples: {unique_patients}") # Count types type_counts = defaultdict(int) for types in sample_groups.values(): for t in types: type_counts[t] += 1 print("File type counts:") for t, count in sorted(type_counts.items()): print(f" {t}: {count}") if __name__ == "__main__": main()