Spaces:

IFMedTech
/

CT_Segmentation

Paused

File size: 4,738 Bytes

"""
Download sample CT scans for the Hugging Face Space demo.
"""

import os
import urllib.request
import zipfile
import shutil
import time

# Direct samples from MONAI Model Zoo
MONAI_SAMPLES = [
    {
        "url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0037.nii.gz",
        "filename": "sample_ct_s0037.nii.gz",
        "description": "MONAI Model Zoo Sample s0037"
    },
    {
        "url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0038.nii.gz",
        "filename": "sample_ct_s0038.nii.gz",
        "description": "MONAI Model Zoo Sample s0038"
    }
]

# Fallback: Zenodo Small Subset
ZENODO_URL = "https://zenodo.org/records/10047263/files/Totalsegmentator_dataset_small_v201.zip?download=1"

def download_file(url, output_path, description):
    print(f"Downloading {description}...")
    print(f"  Url: {url}")
    try:
        # User-Agent needed for some servers
        opener = urllib.request.build_opener()
        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
        urllib.request.install_opener(opener)
        
        urllib.request.urlretrieve(url, output_path)
        
        # Verify file size (sometimes GitHub returns 404 text file)
        size = os.path.getsize(output_path)
        if size < 1000:  # < 1KB likely error text
            with open(output_path, 'r') as f:
                content = f.read(100)
            if "404: Not Found" in content or "Not Found" in content:
                print(f"  ✗ Downloaded file appears to be a 404 page.")
                os.remove(output_path)
                return False
        
        print(f"  ✓ Success! Saved to {output_path} ({size/1024/1024:.2f} MB)")
        return True
    except Exception as e:
        print(f"  ✗ Failed: {e}")
        return False

def setup_examples():
    examples_dir = os.path.join(os.path.dirname(__file__), "examples")
    os.makedirs(examples_dir, exist_ok=True)
    
    success_count = 0
    
    # 1. Try Direct MONAI Samples
    print("\n--- Attempting to download direct samples from MONAI Model Zoo ---")
    for sample in MONAI_SAMPLES:
        dest = os.path.join(examples_dir, sample["filename"])
        if not os.path.exists(dest):
            if download_file(sample["url"], dest, sample["description"]):
                success_count += 1
        else:
            print(f"  ✓ {sample['filename']} already exists")
            success_count += 1
            
    # 2. If NO samples found/downloaded, try Zenodo Zip
    # We only do this if we really need data, as it's 3GB
    if success_count == 0:
        print("\n--- Direct downloads failed. Downloading Zenodo subset (WARNING: ~3.2GB) ---")
        zip_path = os.path.join(examples_dir, "temp_zenodo.zip")
        
        print(f"Downloading Zenodo zip to {zip_path}...")
        # Note: This might timeout on some systems, simpler logic here
        if download_file(ZENODO_URL, zip_path, "Zenodo TotalSegmentator Small Subset"):
            try:
                print("Extracting random samples from zip...")
                with zipfile.ZipFile(zip_path, 'r') as zf:
                    # Find ct.nii.gz files inside the structure
                    # Structure is usually: Totalsegmentator_dataset_small_v201/subject_id/ct.nii.gz
                    files = zf.namelist()
                    ct_files = [f for f in files if f.endswith('ct.nii.gz')]
                    
                    extracted = 0
                    for i, ct_file in enumerate(ct_files[:3]): # Get first 3
                        out_name = f"sample_ct_zenodo_{i+1}.nii.gz"
                        out_path = os.path.join(examples_dir, out_name)
                        
                        with zf.open(ct_file) as source, open(out_path, 'wb') as target:
                            shutil.copyfileobj(source, target)
                        
                        print(f"  ✓ Extracted {out_name}")
                        extracted += 1
                    
                    if extracted > 0:
                        success_count += extracted
            except Exception as e:
                print(f"  ✗ Extraction failed: {e}")
            
            # Cleanup zip
            if os.path.exists(zip_path):
                print("Cleaning up zip file...")
                os.remove(zip_path)
    
    # 3. Check what we have
    final_files = [f for f in os.listdir(examples_dir) if f.endswith('.nii.gz')]
    print(f"\nTotal example files in {examples_dir}: {len(final_files)}")
    print(final_files)
    
    return final_files

if __name__ == "__main__":
    setup_examples()