from paddleocr import PaddleOCR
import cv2
import numpy as np
import os
import tempfile

# تهيئة محرك OCR مرة واحدة فقط
ocr_engine = None

def get_ocr_engine():
    """الحصول على أو إنشاء محرك OCR"""
    global ocr_engine
    if ocr_engine is None:
        try:
            # استخدام الإعدادات المتوافقة مع الإصدارات الحديثة
            ocr_engine = PaddleOCR(
                use_angle_cls=True, 
                lang='en', 
                show_log=False,
                # الإعدادات المتوافقة مع الإصدارات الحديثة
                det_db_thresh=0.3,
                det_db_box_thresh=0.3,
                use_space_char=True
            )
            print("✅ تم تهيئة محرك PaddleOCR بنجاح")
        except Exception as e:
            print(f"❌ فشل في تهيئة PaddleOCR: {e}")
            # Fallback إلى إعدادات أساسية
            try:
                ocr_engine = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
                print("✅ تم تهيئة محرك PaddleOCR بالإعدادات الأساسية")
            except:
                raise
    return ocr_engine

def preprocess_image(image_path):
    """معالجة مسبقة بسيطة للصورة"""
    try:
        img = cv2.imread(image_path)
        if img is None:
            return image_path
            
        # تحويل إلى تدرج الرمادي
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # زيادة التباين البسيط
        enhanced = cv2.convertScaleAbs(gray, alpha=1.3, beta=40)
        
        # حفظ الصورة المحسنة مؤقتاً
        _, temp_path = tempfile.mkstemp(suffix='.png')
        cv2.imwrite(temp_path, enhanced)
        
        return temp_path
    except Exception as e:
        print(f"⚠️ خطأ في معالجة الصورة: {e}")
        return image_path

def extract_texts(image_path: str, preprocess: bool = True):
    """
    استخراج النصوص من الصورة مع معالجة مسبقة اختيارية
    """
    try:
        ocr = get_ocr_engine()
        processed_path = image_path
        
        # المعالجة المسبقة إذا requested
        if preprocess:
            processed_path = preprocess_image(image_path)
        
        # استخراج النصوص
        result = ocr.ocr(processed_path, cls=True)
        texts = []
        boxes = []
        
        if result and result[0]:
            for line in result[0]:
                if line and len(line) >= 2:
                    text = line[1][0]
                    confidence = line[1][1] if len(line[1]) > 1 else 0.5
                    
                    # فلترة النصوص ذات الثقة المنخفضة يدوياً
                    if confidence > 0.3 and text.strip():
                        texts.append(text)
                        boxes.append(line[0])
                        print(f"📝 تم استخراج: '{text}' (ثقة: {confidence:.2f})")
        
        # تنظيف الملف المؤقت إذا تم إنشاؤه
        if preprocess and processed_path != image_path and os.path.exists(processed_path):
            try:
                os.remove(processed_path)
            except:
                pass
        
        print(f"✅ تم استخراج {len(texts)} نصاً من الصورة")
        return texts, boxes
        
    except Exception as e:
        print(f"❌ خطأ في استخراج النصوص: {e}")
        import traceback
        traceback.print_exc()
        return [], []