|
|
from paddleocr import PaddleOCR |
|
|
import cv2 |
|
|
import numpy as np |
|
|
import os |
|
|
import tempfile |
|
|
|
|
|
|
|
|
ocr_engine = None |
|
|
|
|
|
def get_ocr_engine(): |
|
|
"""الحصول على أو إنشاء محرك OCR""" |
|
|
global ocr_engine |
|
|
if ocr_engine is None: |
|
|
try: |
|
|
|
|
|
ocr_engine = PaddleOCR( |
|
|
use_angle_cls=True, |
|
|
lang='en', |
|
|
show_log=False, |
|
|
|
|
|
det_db_thresh=0.3, |
|
|
det_db_box_thresh=0.3, |
|
|
use_space_char=True |
|
|
) |
|
|
print("✅ تم تهيئة محرك PaddleOCR بنجاح") |
|
|
except Exception as e: |
|
|
print(f"❌ فشل في تهيئة PaddleOCR: {e}") |
|
|
|
|
|
try: |
|
|
ocr_engine = PaddleOCR(use_angle_cls=True, lang='en', show_log=False) |
|
|
print("✅ تم تهيئة محرك PaddleOCR بالإعدادات الأساسية") |
|
|
except: |
|
|
raise |
|
|
return ocr_engine |
|
|
|
|
|
def preprocess_image(image_path): |
|
|
"""معالجة مسبقة بسيطة للصورة""" |
|
|
try: |
|
|
img = cv2.imread(image_path) |
|
|
if img is None: |
|
|
return image_path |
|
|
|
|
|
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
|
|
|
enhanced = cv2.convertScaleAbs(gray, alpha=1.3, beta=40) |
|
|
|
|
|
|
|
|
_, temp_path = tempfile.mkstemp(suffix='.png') |
|
|
cv2.imwrite(temp_path, enhanced) |
|
|
|
|
|
return temp_path |
|
|
except Exception as e: |
|
|
print(f"⚠️ خطأ في معالجة الصورة: {e}") |
|
|
return image_path |
|
|
|
|
|
def extract_texts(image_path: str, preprocess: bool = True): |
|
|
""" |
|
|
استخراج النصوص من الصورة مع معالجة مسبقة اختيارية |
|
|
""" |
|
|
try: |
|
|
ocr = get_ocr_engine() |
|
|
processed_path = image_path |
|
|
|
|
|
|
|
|
if preprocess: |
|
|
processed_path = preprocess_image(image_path) |
|
|
|
|
|
|
|
|
result = ocr.ocr(processed_path, cls=True) |
|
|
texts = [] |
|
|
boxes = [] |
|
|
|
|
|
if result and result[0]: |
|
|
for line in result[0]: |
|
|
if line and len(line) >= 2: |
|
|
text = line[1][0] |
|
|
confidence = line[1][1] if len(line[1]) > 1 else 0.5 |
|
|
|
|
|
|
|
|
if confidence > 0.3 and text.strip(): |
|
|
texts.append(text) |
|
|
boxes.append(line[0]) |
|
|
print(f"📝 تم استخراج: '{text}' (ثقة: {confidence:.2f})") |
|
|
|
|
|
|
|
|
if preprocess and processed_path != image_path and os.path.exists(processed_path): |
|
|
try: |
|
|
os.remove(processed_path) |
|
|
except: |
|
|
pass |
|
|
|
|
|
print(f"✅ تم استخراج {len(texts)} نصاً من الصورة") |
|
|
return texts, boxes |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ خطأ في استخراج النصوص: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return [], [] |