Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| import easyocr | |
| # Skew Correction | |
| def deskew(image): | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| gray = cv2.bitwise_not(gray) | |
| thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] | |
| coords = np.column_stack(np.where(thresh > 0)) | |
| angle = cv2.minAreaRect(coords)[-1] | |
| if angle < -45: | |
| angle = -(90 + angle) | |
| else: | |
| angle = -angle | |
| (h, w) = image.shape[:2] | |
| center = (w // 2, h // 2) | |
| M = cv2.getRotationMatrix2D(center, angle, 1.0) | |
| rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) | |
| return rotated | |
| # Lighting Correction | |
| def correct_lighting(image): | |
| lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) | |
| l, a, b = cv2.split(lab) | |
| clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) | |
| cl = clahe.apply(l) | |
| limg = cv2.merge((cl,a,b)) | |
| final = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR) | |
| return final | |
| # OCR Extraction using EasyOCR | |
| def extract_text(image, langs=['en']): | |
| reader = easyocr.Reader(langs, gpu=False) | |
| results = reader.readtext(image, detail=0, paragraph=True) | |
| text = "\n".join(results) | |
| return text | |
| def enhance_for_ocr(image): | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| # Adaptive Threshold | |
| th = cv2.adaptiveThreshold( | |
| gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY, 15, 10 | |
| ) | |
| return th | |
| # Full pipeline | |
| def process_image(file, langs=['en']): | |
| img = Image.open(file).convert('RGB') | |
| img_cv = np.array(img) | |
| # 1. تصحيح الانحراف | |
| img_cv = deskew(img_cv) | |
| # 2. تصحيح الإضاءة | |
| img_cv = correct_lighting(img_cv) | |
| # 3. تحسين للنصوص | |
| img_cv = enhance_for_ocr(img_cv) | |
| # 4. استخراج النصوص | |
| text = extract_text(img_cv, langs) | |
| return text | |