File size: 4,738 Bytes
3e09c97
 
 
 
 
 
 
 
c4c873f
3e09c97
c4c873f
 
3e09c97
c4c873f
 
 
 
 
 
 
 
3e09c97
 
 
c4c873f
 
3e09c97
c4c873f
3e09c97
c4c873f
3e09c97
c4c873f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e09c97
 
 
 
 
 
 
 
 
c4c873f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e09c97
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""
Download sample CT scans for the Hugging Face Space demo.
"""

import os
import urllib.request
import zipfile
import shutil
import time

# Direct samples from MONAI Model Zoo
MONAI_SAMPLES = [
    {
        "url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0037.nii.gz",
        "filename": "sample_ct_s0037.nii.gz",
        "description": "MONAI Model Zoo Sample s0037"
    },
    {
        "url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0038.nii.gz",
        "filename": "sample_ct_s0038.nii.gz",
        "description": "MONAI Model Zoo Sample s0038"
    }
]

# Fallback: Zenodo Small Subset
ZENODO_URL = "https://zenodo.org/records/10047263/files/Totalsegmentator_dataset_small_v201.zip?download=1"

def download_file(url, output_path, description):
    print(f"Downloading {description}...")
    print(f"  Url: {url}")
    try:
        # User-Agent needed for some servers
        opener = urllib.request.build_opener()
        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
        urllib.request.install_opener(opener)
        
        urllib.request.urlretrieve(url, output_path)
        
        # Verify file size (sometimes GitHub returns 404 text file)
        size = os.path.getsize(output_path)
        if size < 1000:  # < 1KB likely error text
            with open(output_path, 'r') as f:
                content = f.read(100)
            if "404: Not Found" in content or "Not Found" in content:
                print(f"  ✗ Downloaded file appears to be a 404 page.")
                os.remove(output_path)
                return False
        
        print(f"  ✓ Success! Saved to {output_path} ({size/1024/1024:.2f} MB)")
        return True
    except Exception as e:
        print(f"  ✗ Failed: {e}")
        return False

def setup_examples():
    examples_dir = os.path.join(os.path.dirname(__file__), "examples")
    os.makedirs(examples_dir, exist_ok=True)
    
    success_count = 0
    
    # 1. Try Direct MONAI Samples
    print("\n--- Attempting to download direct samples from MONAI Model Zoo ---")
    for sample in MONAI_SAMPLES:
        dest = os.path.join(examples_dir, sample["filename"])
        if not os.path.exists(dest):
            if download_file(sample["url"], dest, sample["description"]):
                success_count += 1
        else:
            print(f"  ✓ {sample['filename']} already exists")
            success_count += 1
            
    # 2. If NO samples found/downloaded, try Zenodo Zip
    # We only do this if we really need data, as it's 3GB
    if success_count == 0:
        print("\n--- Direct downloads failed. Downloading Zenodo subset (WARNING: ~3.2GB) ---")
        zip_path = os.path.join(examples_dir, "temp_zenodo.zip")
        
        print(f"Downloading Zenodo zip to {zip_path}...")
        # Note: This might timeout on some systems, simpler logic here
        if download_file(ZENODO_URL, zip_path, "Zenodo TotalSegmentator Small Subset"):
            try:
                print("Extracting random samples from zip...")
                with zipfile.ZipFile(zip_path, 'r') as zf:
                    # Find ct.nii.gz files inside the structure
                    # Structure is usually: Totalsegmentator_dataset_small_v201/subject_id/ct.nii.gz
                    files = zf.namelist()
                    ct_files = [f for f in files if f.endswith('ct.nii.gz')]
                    
                    extracted = 0
                    for i, ct_file in enumerate(ct_files[:3]): # Get first 3
                        out_name = f"sample_ct_zenodo_{i+1}.nii.gz"
                        out_path = os.path.join(examples_dir, out_name)
                        
                        with zf.open(ct_file) as source, open(out_path, 'wb') as target:
                            shutil.copyfileobj(source, target)
                        
                        print(f"  ✓ Extracted {out_name}")
                        extracted += 1
                    
                    if extracted > 0:
                        success_count += extracted
            except Exception as e:
                print(f"  ✗ Extraction failed: {e}")
            
            # Cleanup zip
            if os.path.exists(zip_path):
                print("Cleaning up zip file...")
                os.remove(zip_path)
    
    # 3. Check what we have
    final_files = [f for f in os.listdir(examples_dir) if f.endswith('.nii.gz')]
    print(f"\nTotal example files in {examples_dir}: {len(final_files)}")
    print(final_files)
    
    return final_files

if __name__ == "__main__":
    setup_examples()