Varshith dharmaj
Upload folder using huggingface_hub
5081d4a verified
import cv2
import numpy as np
from pathlib import Path
from typing import Dict, Any, Tuple, Union
import logging
logger = logging.getLogger(__name__)
class ImageEnhancer:
"""
Applies neuro-symbolic preprocessing directly from the MVM² architecture.
Enhances mathematical images for optimal OCR extraction.
"""
def __init__(self, sigma: float = 1.2):
self.sigma = sigma
def calculate_contrast(self, gray_img: np.ndarray) -> float:
"""Calculate RMS contrast."""
if gray_img is None or gray_img.size == 0:
return 0.0
return float(gray_img.std())
def enhance(self, image_source: Union[str, Path, bytes, np.ndarray], skip_binarization: bool = False) -> Tuple[np.ndarray, Dict[str, Any]]:
"""
Enhance image using Gaussian Blur, CLAHE, and Adaptive Binarization.
Returns the enhanced image (as numpy array) and metadata tagged with quality metrics.
"""
if isinstance(image_source, (str, Path)):
img = cv2.imread(str(image_source))
if img is None:
raise ValueError(f"Could not load image at {image_source}")
elif isinstance(image_source, bytes):
nparr = np.frombuffer(image_source, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img is None:
raise ValueError("Could not decode image from bytes")
elif isinstance(image_source, np.ndarray):
img = image_source
else:
raise ValueError(f"Unsupported image source type: {type(image_source)}")
height, width = img.shape[:2]
# 1. Convert to grayscale
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
gray = img.copy()
initial_contrast = self.calculate_contrast(gray)
# 2. Gaussian Blur (sigma=1.2)
blurred = cv2.GaussianBlur(gray, (0, 0), sigmaX=self.sigma, sigmaY=self.sigma)
# 3. CLAHE (Contrast Limited Adaptive Histogram Equalization)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe_img = clahe.apply(blurred)
# 4. Adaptive Binarization (Lighting normalization)
# We skip if explicitly requested OR if the image already has very high contrast (likely digital)
if skip_binarization or initial_contrast > 60:
final_img = clahe_img
bin_applied = False
else:
final_img = cv2.adaptiveThreshold(
clahe_img,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
11,
2
)
bin_applied = True
final_contrast = self.calculate_contrast(final_img)
metadata = {
"resolution": {"width": width, "height": height},
"metrics": {
"initial_contrast": round(initial_contrast, 2),
"final_contrast": round(final_contrast, 2),
"blur_sigma_used": self.sigma,
"binarization_applied": bin_applied
},
"processing_steps": ["grayscale", f"gaussian_blur_sigma_{self.sigma}", "clahe"]
}
if bin_applied:
metadata["processing_steps"].append("adaptive_binarization")
return final_img, metadata