import cv2 import numpy as np from PIL import Image, ImageEnhance, ImageFilter class DocumentScanner: def __init__(self): pass def order_points(self, pts): rect = np.zeros((4, 2), dtype="float32") s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] return rect def four_point_transform(self, image, pts): rect = self.order_points(pts) (tl, tr, br, bl) = rect widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) return warped def detect_document(self, image): orig = image.copy() height, width = image.shape[:2] ratio = height / 500.0 new_width = int(width / ratio) resized = cv2.resize(image, (new_width, 500)) gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(blurred, 50, 200) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) edged = cv2.dilate(edged, kernel, iterations=1) contours, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10] screen_cnt = None for c in contours: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: screen_cnt = approx break if screen_cnt is None: edge_margin = 0.02 h, w = resized.shape[:2] margin_x = int(w * edge_margin) margin_y = int(h * edge_margin) screen_cnt = np.array([ [[margin_x, margin_y]], [[w - margin_x, margin_y]], [[w - margin_x, h - margin_y]], [[margin_x, h - margin_y]] ]) return screen_cnt.reshape(4, 2) * ratio def auto_crop_and_align(self, image): if isinstance(image, Image.Image): image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) doc_contour = self.detect_document(image) warped = self.four_point_transform(image, doc_contour) return warped def enhance_sharpness(self, image, amount=1.5): if isinstance(image, np.ndarray): pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) else: pil_image = image blurred = pil_image.filter(ImageFilter.GaussianBlur(radius=1)) blurred_np = np.array(blurred).astype(np.float32) original_np = np.array(pil_image).astype(np.float32) sharpened = original_np + (original_np - blurred_np) * amount sharpened = np.clip(sharpened, 0, 255).astype(np.uint8) return Image.fromarray(sharpened) def adaptive_contrast(self, image): if isinstance(image, Image.Image): image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) l = clahe.apply(l) lab = cv2.merge([l, a, b]) result = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR) return result def denoise_preserve_details(self, image, strength=3): if isinstance(image, Image.Image): image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) denoised = cv2.bilateralFilter(image, 9, strength * 10, strength * 10) return denoised def process_document(self, pil_image, enhance_hd=True, scale=2): img_array = np.array(pil_image) if len(img_array.shape) == 2: img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2BGR) else: img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) cropped = self.auto_crop_and_align(img_array) denoised = self.denoise_preserve_details(cropped, strength=2) contrasted = self.adaptive_contrast(denoised) result_rgb = cv2.cvtColor(contrasted, cv2.COLOR_BGR2RGB) result_pil = Image.fromarray(result_rgb) sharpened = self.enhance_sharpness(result_pil, amount=0.8) enhancer = ImageEnhance.Brightness(sharpened) brightened = enhancer.enhance(1.05) if enhance_hd: try: from enhancer import ImageEnhancer ai_enhancer = ImageEnhancer() hd_image = ai_enhancer.enhance(brightened, scale=scale) return hd_image except Exception as e: print(f"[DocScan] Using fallback upscaling (AI models load on Hugging Face deployment)") new_size = (brightened.width * scale, brightened.height * scale) hd_image = brightened.resize(new_size, Image.LANCZOS) return self.enhance_sharpness(hd_image, amount=0.5) return brightened class FallbackDocumentScanner: def process_document(self, pil_image, enhance_hd=True, scale=2): if pil_image.mode != "RGB": pil_image = pil_image.convert("RGB") enhancer = ImageEnhance.Contrast(pil_image) contrasted = enhancer.enhance(1.15) enhancer = ImageEnhance.Sharpness(contrasted) sharpened = enhancer.enhance(1.3) enhancer = ImageEnhance.Brightness(sharpened) brightened = enhancer.enhance(1.05) if enhance_hd: new_size = (brightened.width * scale, brightened.height * scale) hd_image = brightened.resize(new_size, Image.LANCZOS) enhancer = ImageEnhance.Sharpness(hd_image) final = enhancer.enhance(1.2) return final return brightened def get_document_scanner(): try: import cv2 return DocumentScanner() except ImportError: print("OpenCV not available, using fallback scanner") return FallbackDocumentScanner()