Spaces:
Paused
Paused
File size: 4,738 Bytes
3e09c97 c4c873f 3e09c97 c4c873f 3e09c97 c4c873f 3e09c97 c4c873f 3e09c97 c4c873f 3e09c97 c4c873f 3e09c97 c4c873f 3e09c97 c4c873f 3e09c97 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | """
Download sample CT scans for the Hugging Face Space demo.
"""
import os
import urllib.request
import zipfile
import shutil
import time
# Direct samples from MONAI Model Zoo
MONAI_SAMPLES = [
{
"url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0037.nii.gz",
"filename": "sample_ct_s0037.nii.gz",
"description": "MONAI Model Zoo Sample s0037"
},
{
"url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0038.nii.gz",
"filename": "sample_ct_s0038.nii.gz",
"description": "MONAI Model Zoo Sample s0038"
}
]
# Fallback: Zenodo Small Subset
ZENODO_URL = "https://zenodo.org/records/10047263/files/Totalsegmentator_dataset_small_v201.zip?download=1"
def download_file(url, output_path, description):
print(f"Downloading {description}...")
print(f" Url: {url}")
try:
# User-Agent needed for some servers
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(url, output_path)
# Verify file size (sometimes GitHub returns 404 text file)
size = os.path.getsize(output_path)
if size < 1000: # < 1KB likely error text
with open(output_path, 'r') as f:
content = f.read(100)
if "404: Not Found" in content or "Not Found" in content:
print(f" ✗ Downloaded file appears to be a 404 page.")
os.remove(output_path)
return False
print(f" ✓ Success! Saved to {output_path} ({size/1024/1024:.2f} MB)")
return True
except Exception as e:
print(f" ✗ Failed: {e}")
return False
def setup_examples():
examples_dir = os.path.join(os.path.dirname(__file__), "examples")
os.makedirs(examples_dir, exist_ok=True)
success_count = 0
# 1. Try Direct MONAI Samples
print("\n--- Attempting to download direct samples from MONAI Model Zoo ---")
for sample in MONAI_SAMPLES:
dest = os.path.join(examples_dir, sample["filename"])
if not os.path.exists(dest):
if download_file(sample["url"], dest, sample["description"]):
success_count += 1
else:
print(f" ✓ {sample['filename']} already exists")
success_count += 1
# 2. If NO samples found/downloaded, try Zenodo Zip
# We only do this if we really need data, as it's 3GB
if success_count == 0:
print("\n--- Direct downloads failed. Downloading Zenodo subset (WARNING: ~3.2GB) ---")
zip_path = os.path.join(examples_dir, "temp_zenodo.zip")
print(f"Downloading Zenodo zip to {zip_path}...")
# Note: This might timeout on some systems, simpler logic here
if download_file(ZENODO_URL, zip_path, "Zenodo TotalSegmentator Small Subset"):
try:
print("Extracting random samples from zip...")
with zipfile.ZipFile(zip_path, 'r') as zf:
# Find ct.nii.gz files inside the structure
# Structure is usually: Totalsegmentator_dataset_small_v201/subject_id/ct.nii.gz
files = zf.namelist()
ct_files = [f for f in files if f.endswith('ct.nii.gz')]
extracted = 0
for i, ct_file in enumerate(ct_files[:3]): # Get first 3
out_name = f"sample_ct_zenodo_{i+1}.nii.gz"
out_path = os.path.join(examples_dir, out_name)
with zf.open(ct_file) as source, open(out_path, 'wb') as target:
shutil.copyfileobj(source, target)
print(f" ✓ Extracted {out_name}")
extracted += 1
if extracted > 0:
success_count += extracted
except Exception as e:
print(f" ✗ Extraction failed: {e}")
# Cleanup zip
if os.path.exists(zip_path):
print("Cleaning up zip file...")
os.remove(zip_path)
# 3. Check what we have
final_files = [f for f in os.listdir(examples_dir) if f.endswith('.nii.gz')]
print(f"\nTotal example files in {examples_dir}: {len(final_files)}")
print(final_files)
return final_files
if __name__ == "__main__":
setup_examples()
|