Spaces:
Sleeping
Sleeping
Update ocr_processing.py
Browse files- ocr_processing.py +22 -2
ocr_processing.py
CHANGED
|
@@ -32,16 +32,36 @@ def correct_lighting(image):
|
|
| 32 |
|
| 33 |
# OCR Extraction using EasyOCR
|
| 34 |
def extract_text(image, langs=['en']):
|
| 35 |
-
reader = easyocr.Reader(langs, gpu=False)
|
| 36 |
-
results = reader.readtext(image, detail=0)
|
| 37 |
text = "\n".join(results)
|
| 38 |
return text
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# Full pipeline
|
| 41 |
def process_image(file, langs=['en']):
|
| 42 |
img = Image.open(file).convert('RGB')
|
| 43 |
img_cv = np.array(img)
|
|
|
|
|
|
|
| 44 |
img_cv = deskew(img_cv)
|
|
|
|
|
|
|
| 45 |
img_cv = correct_lighting(img_cv)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
text = extract_text(img_cv, langs)
|
| 47 |
return text
|
|
|
|
| 32 |
|
| 33 |
# OCR Extraction using EasyOCR
|
| 34 |
def extract_text(image, langs=['en']):
|
| 35 |
+
reader = easyocr.Reader(langs, gpu=False)
|
| 36 |
+
results = reader.readtext(image, detail=0, paragraph=True)
|
| 37 |
text = "\n".join(results)
|
| 38 |
return text
|
| 39 |
|
| 40 |
+
|
| 41 |
+
def enhance_for_ocr(image):
|
| 42 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 43 |
+
# Adaptive Threshold
|
| 44 |
+
th = cv2.adaptiveThreshold(
|
| 45 |
+
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
| 46 |
+
cv2.THRESH_BINARY, 15, 10
|
| 47 |
+
)
|
| 48 |
+
return th
|
| 49 |
+
|
| 50 |
+
|
| 51 |
# Full pipeline
|
| 52 |
def process_image(file, langs=['en']):
|
| 53 |
img = Image.open(file).convert('RGB')
|
| 54 |
img_cv = np.array(img)
|
| 55 |
+
|
| 56 |
+
# 1. تصحيح الانحراف
|
| 57 |
img_cv = deskew(img_cv)
|
| 58 |
+
|
| 59 |
+
# 2. تصحيح الإضاءة
|
| 60 |
img_cv = correct_lighting(img_cv)
|
| 61 |
+
|
| 62 |
+
# 3. تحسين للنصوص
|
| 63 |
+
img_cv = enhance_for_ocr(img_cv)
|
| 64 |
+
|
| 65 |
+
# 4. استخراج النصوص
|
| 66 |
text = extract_text(img_cv, langs)
|
| 67 |
return text
|