Spaces:

ayb-bh1146
/

Photo-texte

Runtime error

App Files Files Community

Photo-texte / app.py

ayb-bh1146

Create app.py

3a32292 verified 28 days ago

raw

history blame contribute delete

5.59 kB

	import gradio as gr
	import pytesseract
	from PIL import Image
	import numpy as np
	from openai import OpenAI
	import os
	import json

	# ── مفتاح NVIDIA API ─────────────────────────────────────────────
	NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")

	client = OpenAI(
	base_url="https://integrate.api.nvidia.com/v1",
	api_key=NVIDIA_API_KEY
	)

	# ── قراءة النص من الصورة ─────────────────────────────────────────
	def read_image(image: Image.Image) -> str:
	"""يقرأ النص من الصورة باستخدام Tesseract"""
	# محاولة العربية أولاً ثم الإنجليزية
	try:
	text = pytesseract.image_to_string(image, lang="ara+eng")
	except Exception:
	try:
	text = pytesseract.image_to_string(image, lang="eng")
	except Exception as e:
	text = f"خطأ في قراءة الصورة: {e}"
	return text.strip()

	# ── تحليل النص بالذكاء الاصطناعي ────────────────────────────────
	def analyze_with_ai(text: str) -> dict:
	"""يرسل النص لـ NVIDIA API ويستخرج المعلومات"""
	if not NVIDIA_API_KEY:
	return {"خطأ": "لم يتم إضافة مفتاح NVIDIA API في الإعدادات"}

	prompt = f"""
	أنت مساعد متخصص في قراءة الفواتير والوثائق المالية.
	لديك النص التالي المستخرج من صورة فاتورة:

	---
	{text}
	---

	استخرج المعلومات التالية بدقة:
	- اسم البائع أو المتجر
	- التاريخ
	- رقم الفاتورة (إن وجد)
	- قائمة المنتجات أو الخدمات مع أسعارها
	- المبلغ الإجمالي
	- الضريبة (إن وجدت)
	- طريقة الدفع (إن وجدت)

	أجب بصيغة JSON فقط بدون أي نص إضافي.
	مثال:
	{{
	"البائع": "اسم المتجر",
	"التاريخ": "2024-01-01",
	"رقم_الفاتورة": "001",
	"المنتجات": [
	{{"الاسم": "منتج 1", "الكمية": 2, "السعر": 10.0}},
	{{"الاسم": "منتج 2", "الكمية": 1, "السعر": 25.0}}
	],
	"الإجمالي": 45.0,
	"الضريبة": 0.0,
	"طريقة_الدفع": "نقدي"
	}}
	"""

	try:
	response = client.chat.completions.create(
	model="meta/llama-3.1-8b-instruct",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.1,
	max_tokens=1000,
	)
	result_text = response.choices[0].message.content.strip()

	# تنظيف الرد وتحويله لـ JSON
	result_text = result_text.replace("```json", "").replace("```", "").strip()
	return json.loads(result_text)

	except json.JSONDecodeError:
	return {"النتيجة_الخام": result_text}
	except Exception as e:
	return {"خطأ": str(e)}

	# ── الدالة الرئيسية ──────────────────────────────────────────────
	def process_invoice(image):
	if image is None:
	return "يرجى رفع صورة أولاً", {}

	pil_image = Image.fromarray(image) if isinstance(image, np.ndarray) else image

	# الخطوة 1: قراءة الصورة
	extracted_text = read_image(pil_image)

	if not extracted_text:
	return "لم يتم استخراج أي نص من الصورة — تأكد من وضوح الصورة", {}

	# الخطوة 2: تحليل بالذكاء الاصطناعي
	result = analyze_with_ai(extracted_text)

	return extracted_text, result

	# ── الواجهة ──────────────────────────────────────────────────────
	with gr.Blocks(
	title="قارئ الفواتير الذكي",
	theme=gr.themes.Soft(),
	css="""
	.title { text-align: center; font-size: 2rem; font-weight: bold; margin-bottom: 8px; }
	.subtitle { text-align: center; color: #666; margin-bottom: 20px; }
	"""
	) as demo:

	gr.HTML('<div class="title">🧾 قارئ الفواتير الذكي</div>')
	gr.HTML('<div class="subtitle">ارفع صورة أي فاتورة وسيستخرج المعلومات تلقائياً</div>')

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(
	label="📸 صورة الفاتورة",
	type="pil",
	sources=["upload", "clipboard"]
	)
	submit_btn = gr.Button("🔍 استخرج المعلومات", variant="primary", size="lg")

	with gr.Column(scale=1):
	text_output = gr.Textbox(
	label="📝 النص المستخرج من الصورة",
	lines=8,
	rtl=True
	)
	json_output = gr.JSON(
	label="📊 المعلومات المنظمة"
	)

	submit_btn.click(
	fn=process_invoice,
	inputs=[image_input],
	outputs=[text_output, json_output]
	)

	gr.HTML("""
	<div style="text-align:center;margin-top:20px;color:#888;font-size:13px">
	مدعوم بـ Tesseract OCR + NVIDIA Llama 3.1
	</div>
	""")

	if __name__ == "__main__":
	demo.launch()