Spaces:

issoufzousko07
/

elephmind-api

Sleeping

File size: 4,930 Bytes

a29fdb5

import pydicom
import logging
import hashlib
from typing import Tuple, Dict, Any, Optional
from pathlib import Path
import os
import io

logger = logging.getLogger(__name__)

# Mandatory DICOM Tags for Medical Validity
REQUIRED_TAGS = [
    'PatientID',
    'StudyInstanceUID',
    'SeriesInstanceUID',
    'Modality',
    'PixelSpacing', # Crucial for measurements
    # 'ImageOrientationPatient' # Often missing in simple CR/DX, but critical for CT/MRI
]

# Tags to Anonymize (PHI)
PHI_TAGS = [
    'PatientName',
    'PatientBirthDate',
    'PatientAddress',
    'InstitutionName',
    'ReferringPhysicianName'
]

def validate_dicom(file_bytes: bytes) -> pydicom.dataset.FileDataset:
    """

    Strict validation of DICOM file.

    Raises ValueError if invalid.

    """
    try:
        # 1. Parse without loading pixel data first (speed)
        ds = pydicom.dcmread(io.BytesIO(file_bytes), stop_before_pixels=False)
    except Exception as e:
        raise ValueError(f"Invalid DICOM format: {str(e)}")

    # 2. Check Mandatory Tags
    missing_tags = [tag for tag in REQUIRED_TAGS if tag not in ds]
    if missing_tags:
        # Modality specific relaxation could go here, but strict for now
        raise ValueError(f"Missing critical DICOM tags: {missing_tags}")

    # 3. Check Pixel Data presence
    if 'PixelData' not in ds:
         raise ValueError("DICOM file has no image data (PixelData missing).")

    return ds

def anonymize_dicom(ds: pydicom.dataset.FileDataset) -> pydicom.dataset.FileDataset:
    """

    Remove PHI from dataset.

    Returns modified dataset.

    """
    # Hash PatientID to keep linkable anonymous ID
    original_id = str(ds.get('PatientID', 'Unknown'))
    hashed_id = hashlib.sha256(original_id.encode()).hexdigest()[:16].upper()
    
    ds.PatientID = f"ANON-{hashed_id}"
    
    # Wipe other fields
    for tag in PHI_TAGS:
        if tag in ds:
            ds.data_element(tag).value = "ANONYMIZED"
            
    return ds

def process_dicom_upload(file_bytes: bytes, username: str) -> Tuple[bytes, Dict[str, Any]]:
    """

    Main Gateway Function: Validate -> Anonymize -> Return Bytes & Metadata

    """
    # 1. Validate
    try:
        ds = validate_dicom(file_bytes)
    except Exception as e:
        logger.error(f"DICOM Validation Failed: {e}")
        raise ValueError(f"DICOM Rejected: {e}")

    # 2. Anonymize
    ds = anonymize_dicom(ds)
    
    # 3. Extract safe metadata for Indexing
    metadata = {
        "modality": ds.get("Modality", "Unknown"),
        "body_part": ds.get("BodyPartExamined", "Unknown"),
        "study_uid": str(ds.get("StudyInstanceUID", "")),
        "series_uid": str(ds.get("SeriesInstanceUID", "")),
        "pixel_spacing": ds.get("PixelSpacing", [1.0, 1.0]),
        "original_filename_hint": "dicom_file.dcm" # We generally lose original filename in API
    }
    
    # 4. Convert back to bytes for storage
    # We save the ANONYMIZED version
    with io.BytesIO() as buffer:
        ds.save_as(buffer)
        safe_bytes = buffer.getvalue()
        
    return safe_bytes, metadata

def convert_dicom_to_image(ds: pydicom.dataset.FileDataset) -> Any:
    """

    Convert DICOM to PIL Image / Numpy array for inference.

    Handles Hounsfield Units (HU) and Windowing if CT.

    """
    import numpy as np
    from PIL import Image
    
    try:
        # Start with raw pixel array
        pixel_array = ds.pixel_array.astype(float)
        
        # Rescale Slope/Intercept (Hounsfield Units)
        slope = getattr(ds, 'RescaleSlope', 1)
        intercept = getattr(ds, 'RescaleIntercept', 0)
        pixel_array = (pixel_array * slope) + intercept

        # Windowing (Basic Auto-Windowing if not specified)
        # Improvement: Use window center/width from tags if available
        # window_center = ds.get("WindowCenter", ... ) 
        
        # Normalize to 0-255 for standard Vision Models (unless model expects HU)
        # For CLIP/Vision models trained on PNGs, 0-255 is safe
        pixel_min = np.min(pixel_array)
        pixel_max = np.max(pixel_array)
        
        if pixel_max - pixel_min != 0:
            pixel_array = ((pixel_array - pixel_min) / (pixel_max - pixel_min)) * 255.0
        else:
            pixel_array = np.zeros_like(pixel_array)
            
        pixel_array = pixel_array.astype(np.uint8)
        
        # Handle Color Space (Monochrome usually)
        if len(pixel_array.shape) == 2:
            image = Image.fromarray(pixel_array).convert("RGB")
        else:
            image = Image.fromarray(pixel_array) # RGB already?
            
        return image
        
    except Exception as e:
        logger.error(f"DICOM Conversion Error: {e}")
        raise ValueError(f"Could not convert DICOM to image: {e}")