File size: 3,553 Bytes
283b0cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5081d4a
283b0cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5081d4a
283b0cb
 
 
 
 
 
 
 
5081d4a
 
 
 
 
 
 
 
 
 
 
 
 
 
283b0cb
5081d4a
283b0cb
 
 
 
 
 
5081d4a
 
283b0cb
5081d4a
283b0cb
5081d4a
 
283b0cb
5081d4a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import cv2
import numpy as np
from pathlib import Path
from typing import Dict, Any, Tuple, Union
import logging

logger = logging.getLogger(__name__)

class ImageEnhancer:
    """

    Applies neuro-symbolic preprocessing directly from the MVM² architecture.

    Enhances mathematical images for optimal OCR extraction.

    """
    def __init__(self, sigma: float = 1.2):
        self.sigma = sigma
    
    def calculate_contrast(self, gray_img: np.ndarray) -> float:
        """Calculate RMS contrast."""
        if gray_img is None or gray_img.size == 0:
            return 0.0
        return float(gray_img.std())

    def enhance(self, image_source: Union[str, Path, bytes, np.ndarray], skip_binarization: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
        """

        Enhance image using Gaussian Blur, CLAHE, and Adaptive Binarization.

        Returns the enhanced image (as numpy array) and metadata tagged with quality metrics.

        """
        if isinstance(image_source, (str, Path)):
            img = cv2.imread(str(image_source))
            if img is None:
                raise ValueError(f"Could not load image at {image_source}")
        elif isinstance(image_source, bytes):
            nparr = np.frombuffer(image_source, np.uint8)
            img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
            if img is None:
                raise ValueError("Could not decode image from bytes")
        elif isinstance(image_source, np.ndarray):
            img = image_source
        else:
            raise ValueError(f"Unsupported image source type: {type(image_source)}")
            
        height, width = img.shape[:2]
        
        # 1. Convert to grayscale
        if len(img.shape) == 3:
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        else:
            gray = img.copy()
            
        initial_contrast = self.calculate_contrast(gray)
        
        # 2. Gaussian Blur (sigma=1.2)
        blurred = cv2.GaussianBlur(gray, (0, 0), sigmaX=self.sigma, sigmaY=self.sigma)
        
        # 3. CLAHE (Contrast Limited Adaptive Histogram Equalization)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        clahe_img = clahe.apply(blurred)
        
        # 4. Adaptive Binarization (Lighting normalization)
        # We skip if explicitly requested OR if the image already has very high contrast (likely digital)
        if skip_binarization or initial_contrast > 60:
            final_img = clahe_img
            bin_applied = False
        else:
            final_img = cv2.adaptiveThreshold(
                clahe_img, 
                255, 
                cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                cv2.THRESH_BINARY, 
                11, 
                2
            )
            bin_applied = True
        
        final_contrast = self.calculate_contrast(final_img)
        
        metadata = {
            "resolution": {"width": width, "height": height},
            "metrics": {
                "initial_contrast": round(initial_contrast, 2),
                "final_contrast": round(final_contrast, 2),
                "blur_sigma_used": self.sigma,
                "binarization_applied": bin_applied
            },
            "processing_steps": ["grayscale", f"gaussian_blur_sigma_{self.sigma}", "clahe"]
        }
        if bin_applied:
            metadata["processing_steps"].append("adaptive_binarization")
        
        return final_img, metadata