File size: 17,220 Bytes
a874986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbd23a5
a874986
cbd23a5
a874986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbd23a5
 
 
a874986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
"""
Preprocessing module for FetalCLIP.

Supports two pipelines:
1. DICOM (Full): US region extraction, fan isolation, text removal, denoising
2. Image (Basic): Square padding, resize
"""

import cv2
import copy
import numpy as np
from PIL import Image
from typing import Tuple, Dict, List, Optional
from io import BytesIO

# Try importing DICOM-specific libraries
try:
    from pydicom import dcmread
    from pydicom.pixel_data_handlers import convert_color_space
    DICOM_AVAILABLE = True
except ImportError:
    DICOM_AVAILABLE = False

try:
    from skimage.restoration import denoise_nl_means, estimate_sigma
    SKIMAGE_AVAILABLE = True
except ImportError:
    SKIMAGE_AVAILABLE = False

try:
    import albumentations as A
    ALBUMENTATIONS_AVAILABLE = True
except ImportError:
    ALBUMENTATIONS_AVAILABLE = False


# ============================================================================
# CONSTANTS
# ============================================================================

TARGET_SIZE = (512, 512)
INTERPOLATION = cv2.INTER_LANCZOS4

INTENSITY_THRESHOLD = 0
SMALL_VIEW_MARGIN_CROP_Y = 1

YELLOW_BOX_BACKGROUND_PIXEL = np.array([57, 57, 57])
MIN_YELLOW_BOX_RECT_AREA = 2_000

MASK_INPAINTING_DILATE_KERNEL = np.ones((9, 9), np.uint8)
DENOISE_NL_MEANS_PATCH_KW = dict(
    patch_size=7,
    patch_distance=6,
    channel_axis=-1,
)
INPAINT_RADIUS = 5


# ============================================================================
# TEXT DETECTION UTILITIES (from utils_husain.py)
# ============================================================================

def rgb2gray(rgb: np.ndarray) -> np.ndarray:
    """Convert RGB to grayscale while keeping 3 channels."""
    r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
    gray = 0.299 * r + 0.5870 * g + 0.1140 * b
    rgb_grey = rgb.copy()
    rgb_grey[:, :, 0] = gray
    rgb_grey[:, :, 1] = gray
    rgb_grey[:, :, 2] = gray
    return rgb_grey


def mask_filter(image: np.ndarray, grey_threshold: int) -> np.ndarray:
    """Create binary mask for pixels above threshold."""
    img = image.copy()
    grey_img = rgb2gray(img)
    convert = np.zeros((img.shape[0], img.shape[1], 3))
    idxs = np.where(
        (grey_img[:, :, 0] > grey_threshold)
        & (grey_img[:, :, 1] > grey_threshold)
        & (grey_img[:, :, 2] > grey_threshold)
    )
    convert[idxs] = [255, 255, 255]
    return np.uint8(convert)


def maximize_contrast(img_grayscale: np.ndarray) -> np.ndarray:
    """Enhance contrast using morphological operations."""
    height, width = img_grayscale.shape
    structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    
    img_top_hat = cv2.morphologyEx(img_grayscale, cv2.MORPH_TOPHAT, structuring_element)
    img_black_hat = cv2.morphologyEx(img_grayscale, cv2.MORPH_BLACKHAT, structuring_element)
    
    img_plus_top_hat = cv2.add(img_grayscale, img_top_hat)
    result = cv2.subtract(img_plus_top_hat, img_black_hat)
    
    return result


def detect_white_annotation(img: np.ndarray) -> np.ndarray:
    """Detect white text/annotations."""
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    dif = maximize_contrast(img_gray)
    dif_rgb = cv2.cvtColor(dif, cv2.COLOR_GRAY2BGR)
    masked_img = mask_filter(dif_rgb, 254)
    dilation = cv2.dilate(masked_img, np.ones((3, 3), np.uint8), iterations=1)
    mask = cv2.cvtColor(dilation, cv2.COLOR_BGR2GRAY)
    return mask


def detect_cyan(img: np.ndarray) -> np.ndarray:
    """Detect cyan colored text."""
    image_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    lowers = np.uint8([85, 150, 20])
    uppers = np.uint8([95, 255, 255])
    mask = np.array(cv2.inRange(image_hsv, lowers, uppers))
    return mask


def detect_purple_text(img: np.ndarray) -> np.ndarray:
    """Detect purple colored text."""
    image_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    lowers = np.uint8([110, 100, 50])
    uppers = np.uint8([130, 255, 255])
    mask = np.array(cv2.inRange(image_hsv, lowers, uppers))
    return mask


def detect_orange_text(img: np.ndarray) -> np.ndarray:
    """Detect orange colored text."""
    image_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    lowers = np.uint8([12, 150, 100])
    uppers = np.uint8([27, 255, 255])
    mask = np.array(cv2.inRange(image_hsv, lowers, uppers))
    return mask


def detect_green_text(img: np.ndarray) -> np.ndarray:
    """Detect green colored text."""
    image_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    lowers = np.uint8([50, 100, 50])
    uppers = np.uint8([70, 255, 255])
    mask = np.array(cv2.inRange(image_hsv, lowers, uppers))
    return mask


def detect_annotation(img: np.ndarray) -> np.ndarray:
    """Detect all text annotations (white, cyan, orange, purple, green)."""
    d1 = (detect_white_annotation(img) >= 127).astype(np.float32)
    d2 = (detect_cyan(img) >= 127).astype(np.float32)
    d3 = (detect_orange_text(img) >= 127).astype(np.float32)
    d4 = (detect_purple_text(img) >= 127).astype(np.float32)
    d5 = (detect_green_text(img) >= 127).astype(np.float32)

    inpaint_mask = d1 + d2 + d3 + d4 + d5
    inpaint_mask = (inpaint_mask > 0).astype(np.uint8) * 255

    inpaint_mask = maximize_contrast(inpaint_mask)
    blur = cv2.GaussianBlur(inpaint_mask, (5, 5), 0)
    ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    th3 = cv2.bitwise_or(th3, inpaint_mask)
    return th3


# ============================================================================
# DICOM UTILITIES (from utils_adam.py)
# ============================================================================

def remove_text_box(im: np.ndarray, box_background_pixel: np.ndarray, min_rect_area: int = 2000) -> np.ndarray:
    """Remove yellow/gray text boxes from image."""
    binary = np.all(im == box_background_pixel, axis=-1).astype(np.uint8)
    binary = binary * 255
    contours, hierarchy = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    
    if len(contours) == 0:
        return im

    contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(contour)

    if w * h >= min_rect_area:
        im[y:y+h, x:x+w] = 0

    return im


def pad_to_square(im: np.ndarray) -> np.ndarray:
    """Pad image to square using black padding."""
    if ALBUMENTATIONS_AVAILABLE:
        target_size = max(im.shape[:2])
        return A.PadIfNeeded(min_height=target_size, min_width=target_size,
                             border_mode=0, value=(0, 0, 0))(image=im)["image"]
    else:
        # Fallback without albumentations
        height, width = im.shape[:2]
        max_side = max(height, width)
        
        if len(im.shape) == 3:
            result = np.zeros((max_side, max_side, im.shape[2]), dtype=im.dtype)
        else:
            result = np.zeros((max_side, max_side), dtype=im.dtype)
        
        y_offset = (max_side - height) // 2
        x_offset = (max_side - width) // 2
        result[y_offset:y_offset+height, x_offset:x_offset+width] = im
        
        return result


def get_fan_region(im: np.ndarray, threshold: int = 1) -> np.ndarray:
    """Extract the ultrasound fan/cone region."""
    imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    
    ret, thresh = cv2.threshold(imgray, threshold, 255, 0)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    
    if len(contours) == 0:
        return im
    
    contour = max(contours, key=cv2.contourArea)
    
    # Create mask
    filled_image = np.zeros_like(im)
    cv2.drawContours(filled_image, [contour], -1, (255, 255, 255), thickness=cv2.FILLED)
    
    # Crop to bounding box
    x, y, w, h = cv2.boundingRect(contour)
    cropped_image = im[y:y+h, x:x+w]
    filled_image = filled_image[y:y+h, x:x+w]
    
    # Apply mask
    masked_image = cv2.bitwise_and(cropped_image, filled_image)
    
    return masked_image


def get_us_region_from_dcm(us, sv_mc_y: int = 1) -> np.ndarray:
    """Extract ultrasound region from DICOM using metadata."""
    # Initialize default coordinates
    x0_f, x1_f, y0_f, y1_f = None, None, None, None
    x0, x1, y0, y1 = 0, us.pixel_array.shape[1], 0, us.pixel_array.shape[0]

    # Check for ultrasound regions metadata
    if hasattr(us, 'SequenceOfUltrasoundRegions') and len(us.SequenceOfUltrasoundRegions) > 0:
        regions = us.SequenceOfUltrasoundRegions
        
        if len(regions) == 2 and int(regions[0].RegionDataType) == 1 and int(regions[1].RegionDataType) == 1:
            # Image with small view (picture-in-picture)
            x0_f = np.min([regions[0].RegionLocationMinX0, regions[1].RegionLocationMinX0])
            x1_f = np.max([regions[0].RegionLocationMinX0, regions[1].RegionLocationMinX0])
            y0_f = np.max([regions[0].RegionLocationMinY0, regions[1].RegionLocationMinY0])
            y1_f = np.max([regions[0].RegionLocationMaxY1, regions[1].RegionLocationMaxY1])
            
            x0 = min(regions[0].RegionLocationMinX0, regions[1].RegionLocationMinX0)
            x1 = max(regions[0].RegionLocationMaxX1, regions[1].RegionLocationMaxX1)
            y0 = min(regions[0].RegionLocationMinY0, regions[1].RegionLocationMinY0)
            y1 = max(regions[0].RegionLocationMaxY1, regions[1].RegionLocationMaxY1)

        elif len(regions) >= 1 and int(regions[0].RegionDataType) == 1:
            x0 = regions[0].RegionLocationMinX0
            x1 = regions[0].RegionLocationMaxX1
            y0 = regions[0].RegionLocationMinY0
            y1 = regions[0].RegionLocationMaxY1

    ds = copy.deepcopy(us.pixel_array)
    
    # Handle color space conversion
    if hasattr(us, 'PhotometricInterpretation'):
        if 'ybr_full' in us.PhotometricInterpretation.lower():
            ds = convert_color_space(ds, "YBR_FULL", "RGB", per_frame=True)

    # Remove small view if present
    if x0_f is not None:
        ds[y0_f - sv_mc_y:y1_f, x0_f:x1_f, :] = 0

    # Crop to ultrasound region
    ds = ds[y0:y1, x0:x1, :]

    return ds


# ============================================================================
# MAIN PREPROCESSING FUNCTIONS
# ============================================================================

def preprocess_dicom(file_bytes: bytes) -> Tuple[Image.Image, Dict]:
    """
    Full DICOM preprocessing pipeline.
    
    Steps:
    1. Parse DICOM file
    2. Extract ultrasound region from metadata
    3. Remove yellow text boxes
    4. Extract fan/cone region
    5. Detect text annotations
    6. Inpaint to remove text
    7. Denoise using non-local means
    8. Pad to square
    9. Resize to target size
    
    Returns:
        Tuple of (PIL Image, metadata dict)
    """
    if not DICOM_AVAILABLE:
        raise RuntimeError("pydicom not installed. Install with: pip install pydicom")
    
    # Parse DICOM
    us = dcmread(BytesIO(file_bytes))
    
    # Extract ultrasound region
    ds = get_us_region_from_dcm(us, sv_mc_y=SMALL_VIEW_MARGIN_CROP_Y)
    
    # Remove text box
    img = remove_text_box(ds.copy(), box_background_pixel=YELLOW_BOX_BACKGROUND_PIXEL, 
                          min_rect_area=MIN_YELLOW_BOX_RECT_AREA)
    
    # Extract fan region
    fan = get_fan_region(img, threshold=INTENSITY_THRESHOLD)
    
    # Detect annotations
    image_grey = fan.copy()
    mask_inpaint = detect_annotation(fan)
    mask_inpaint = cv2.dilate(mask_inpaint, MASK_INPAINTING_DILATE_KERNEL)
    
    # Inpaint to remove text
    dst = cv2.inpaint(image_grey, mask_inpaint, INPAINT_RADIUS, cv2.INPAINT_TELEA)
    dst = dst / np.max(dst) if np.max(dst) > 0 else dst
    
    # Denoise
    if SKIMAGE_AVAILABLE:
        sigma = estimate_sigma(dst, channel_axis=-1, average_sigmas=True)
        median = denoise_nl_means(dst, h=0.8 * sigma, fast_mode=True, **DENOISE_NL_MEANS_PATCH_KW)
        median = np.clip(median * 255, 0, 255).astype(np.uint8)
    else:
        median = np.clip(dst * 255, 0, 255).astype(np.uint8)
    
    # Pad to square (model will resize to 224×224)
    img = pad_to_square(median)
    padded_size = max(img.shape[:2])
    
    # Extract metadata
    try:
        rows = getattr(us, 'Rows', fan.shape[0])
        columns = getattr(us, 'Columns', fan.shape[1])
        
        if hasattr(us, 'PixelSpacing') and us.PixelSpacing is not None:
            orig_pixel_spacing = [float(sp) for sp in us.PixelSpacing]
        else:
            orig_pixel_spacing = [1.0, 1.0]
    except Exception:
        rows = fan.shape[0]
        columns = fan.shape[1]
        orig_pixel_spacing = [1.0, 1.0]
    
    metadata = {
        'original_size': (rows, columns),
        'original_pixel_spacing': orig_pixel_spacing,
        'fan_size': (fan.shape[0], fan.shape[1]),
        'padded_size': padded_size,
        'pixel_spacing': orig_pixel_spacing[0],  # Original spacing, model.py adjusts for resize
        'processed_size': (padded_size, padded_size),
    }
    
    # Convert to PIL
    if len(img.shape) == 2:
        img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    else:
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    img_pil = Image.fromarray(img_rgb)
    
    steps_applied = [
        "dicom_parsing",
        "us_region_extraction",
        "text_box_removal",
        "fan_extraction",
        "annotation_detection",
        "inpainting",
        "denoising" if SKIMAGE_AVAILABLE else "normalization",
        "square_padding",
    ]
    
    return img_pil, {
        "type": "dicom",
        "pipeline": "full",
        "steps_applied": steps_applied,
        "metadata": metadata
    }


def preprocess_image(image: Image.Image) -> Tuple[Image.Image, Dict]:
    """
    Basic image preprocessing pipeline.
    
    Steps:
    1. Convert to RGB if needed
    2. Pad to square
    3. (Model will resize to 224)
    
    Returns:
        Tuple of (PIL Image, preprocessing info dict)
    """
    # Convert to RGB
    if image.mode not in ('RGB', 'L'):
        image = image.convert('RGB')
    
    width, height = image.size
    max_side = max(width, height)
    
    # Create square image with black padding
    padding_color = (0, 0, 0) if image.mode == "RGB" else 0
    new_image = Image.new(image.mode, (max_side, max_side), padding_color)
    
    # Center the original
    padding_left = (max_side - width) // 2
    padding_top = (max_side - height) // 2
    new_image.paste(image, (padding_left, padding_top))
    
    # Ensure RGB
    if new_image.mode == 'L':
        new_image = new_image.convert('RGB')
    
    steps_applied = [
        "rgb_conversion",
        "square_padding",
    ]
    
    return new_image, {
        "type": "image",
        "pipeline": "basic",
        "steps_applied": steps_applied,
        "metadata": {
            "original_size": (height, width),
            "processed_size": (max_side, max_side),
        }
    }


def is_dicom_file(file_bytes: bytes, filename: str) -> bool:
    """Check if file is a DICOM file."""
    # Check by extension
    lower_name = filename.lower()
    if lower_name.endswith('.dcm') or lower_name.endswith('.dicom'):
        return True
    
    # Check DICOM magic number (DICM at offset 128)
    if len(file_bytes) > 132:
        if file_bytes[128:132] == b'DICM':
            return True
    
    return False


def image_to_base64(image: Image.Image) -> str:
    """Convert PIL Image to base64 string."""
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    import base64
    return base64.b64encode(buffered.getvalue()).decode('utf-8')


def get_dicom_preview(file_bytes: bytes) -> str:
    """Extract raw image from DICOM for preview (no preprocessing)."""
    if not DICOM_AVAILABLE:
        raise RuntimeError("pydicom not installed")
    
    us = dcmread(BytesIO(file_bytes))
    ds = us.pixel_array
    
    # Handle color space
    if hasattr(us, 'PhotometricInterpretation'):
        if 'ybr_full' in us.PhotometricInterpretation.lower():
            ds = convert_color_space(ds, "YBR_FULL", "RGB", per_frame=True)
    
    # Handle video (take first frame)
    if len(ds.shape) == 4:
        ds = ds[0]
    
    # Normalize to 0-255
    if ds.max() > 255:
        ds = ((ds - ds.min()) / (ds.max() - ds.min()) * 255).astype(np.uint8)
    
    # Convert to RGB if grayscale
    if len(ds.shape) == 2:
        ds = cv2.cvtColor(ds, cv2.COLOR_GRAY2RGB)
    elif ds.shape[2] == 3:
        ds = cv2.cvtColor(ds, cv2.COLOR_BGR2RGB)
    
    img_pil = Image.fromarray(ds)
    return image_to_base64(img_pil)


def preprocess_file(file_bytes: bytes, filename: str) -> Tuple[Image.Image, Dict]:
    """
    Automatically detect file type and apply appropriate preprocessing.
    
    Returns:
        Tuple of (PIL Image, preprocessing info dict with base64 image)
    """
    if is_dicom_file(file_bytes, filename):
        processed_image, info = preprocess_dicom(file_bytes)
        # Add base64 encoded image for frontend display
        info["processed_image_base64"] = image_to_base64(processed_image)
        return processed_image, info
    else:
        # Regular image
        image = Image.open(BytesIO(file_bytes))
        processed_image, info = preprocess_image(image)
        info["processed_image_base64"] = image_to_base64(processed_image)
        return processed_image, info