Spaces:

YOUKKASS
/

manga01

Sleeping

App Files Files Community

manga01 / modules /ocr.py

YOUKKASS

Update modules/ocr.py

f947852 verified 4 months ago

raw

history blame contribute delete

3.7 kB

	from paddleocr import PaddleOCR
	import cv2
	import numpy as np
	import os
	import tempfile

	# تهيئة محرك OCR مرة واحدة فقط
	ocr_engine = None

	def get_ocr_engine():
	"""الحصول على أو إنشاء محرك OCR"""
	global ocr_engine
	if ocr_engine is None:
	try:
	# استخدام الإعدادات المتوافقة مع الإصدارات الحديثة
	ocr_engine = PaddleOCR(
	use_angle_cls=True,
	lang='en',
	show_log=False,
	# الإعدادات المتوافقة مع الإصدارات الحديثة
	det_db_thresh=0.3,
	det_db_box_thresh=0.3,
	use_space_char=True
	)
	print("✅ تم تهيئة محرك PaddleOCR بنجاح")
	except Exception as e:
	print(f"❌ فشل في تهيئة PaddleOCR: {e}")
	# Fallback إلى إعدادات أساسية
	try:
	ocr_engine = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
	print("✅ تم تهيئة محرك PaddleOCR بالإعدادات الأساسية")
	except:
	raise
	return ocr_engine

	def preprocess_image(image_path):
	"""معالجة مسبقة بسيطة للصورة"""
	try:
	img = cv2.imread(image_path)
	if img is None:
	return image_path

	# تحويل إلى تدرج الرمادي
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# زيادة التباين البسيط
	enhanced = cv2.convertScaleAbs(gray, alpha=1.3, beta=40)

	# حفظ الصورة المحسنة مؤقتاً
	_, temp_path = tempfile.mkstemp(suffix='.png')
	cv2.imwrite(temp_path, enhanced)

	return temp_path
	except Exception as e:
	print(f"⚠️ خطأ في معالجة الصورة: {e}")
	return image_path

	def extract_texts(image_path: str, preprocess: bool = True):
	"""
	استخراج النصوص من الصورة مع معالجة مسبقة اختيارية
	"""
	try:
	ocr = get_ocr_engine()
	processed_path = image_path

	# المعالجة المسبقة إذا requested
	if preprocess:
	processed_path = preprocess_image(image_path)

	# استخراج النصوص
	result = ocr.ocr(processed_path, cls=True)
	texts = []
	boxes = []

	if result and result[0]:
	for line in result[0]:
	if line and len(line) >= 2:
	text = line[1][0]
	confidence = line[1][1] if len(line[1]) > 1 else 0.5

	# فلترة النصوص ذات الثقة المنخفضة يدوياً
	if confidence > 0.3 and text.strip():
	texts.append(text)
	boxes.append(line[0])
	print(f"📝 تم استخراج: '{text}' (ثقة: {confidence:.2f})")

	# تنظيف الملف المؤقت إذا تم إنشاؤه
	if preprocess and processed_path != image_path and os.path.exists(processed_path):
	try:
	os.remove(processed_path)
	except:
	pass

	print(f"✅ تم استخراج {len(texts)} نصاً من الصورة")
	return texts, boxes

	except Exception as e:
	print(f"❌ خطأ في استخراج النصوص: {e}")
	import traceback
	traceback.print_exc()
	return [], []