Spaces:

oddadmix
/

Arabic-OCR-Models-Demos

Running on Zero

App Files Files Community

Arabic-OCR-Models-Demos / app.py

sherif1313

Update app.py

ee3ccbb verified 27 days ago

raw

history blame

11.4 kB

	# -- coding: utf-8 --
	"""
	🤖 Arabic OCR - Hugging Face Spaces Version
	Model: Qwen3.5-0.8B-VL with LoRA
	No Quantization - Full Precision
	"""

	import os
	import time
	import torch
	from PIL import Image
	import gradio as gr
	from transformers import AutoProcessor, Qwen3_5ForConditionalGeneration
	from qwen_vl_utils import process_vision_info

	# ==================== ⚙️ إعدادات الجهاز ====================
	if torch.cuda.is_available():
	device = "cuda"
	dtype = torch.float16
	print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")
	elif torch.backends.mps.is_available():
	device = "mps"
	dtype = torch.float16
	print("✅ Using Apple Silicon (MPS)")
	else:
	device = "cpu"
	dtype = torch.float32
	print("⚠️ Using CPU (slower inference)")

	print(f"[INFO] Device: {device} \| Dtype: {dtype}")

	# ==================== 🔄 تحميل النموذج ====================
	def load_model():
	"""تحميل النموذج والمعالج مع إدارة الذاكرة"""
	model_path = os.getenv("MODEL_PATH", "sherif1313/Arabic-Qwen3.5-OCR-v4")

	print(f"[INFO] Loading model from: {model_path}")

	processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)

	model = Qwen3_5ForConditionalGeneration.from_pretrained(
	model_path,
	torch_dtype=dtype,
	device_map="auto" if device == "cuda" else None,
	trust_remote_code=True,
	low_cpu_mem_usage=True,
	)

	model.eval()
	print("[INFO] Model loaded successfully!")
	return model, processor

	# تحميل عالمي (يتم مرة واحدة عند بدء التطبيق)
	try:
	model, processor = load_model()
	except Exception as e:
	print(f"[ERROR] Failed to load model: {e}")
	model = None
	processor = None

	# ==================== 🧹 دوال مساعدة ====================
	def prepare_image(image: Image.Image, max_size: int = 768) -> Image.Image:
	"""تحضير الصورة: ضغط + ضبط الأبعاد لمضاعفات 64"""
	if max(image.size) > max_size:
	image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)

	w, h = image.size
	new_w = ((w + 63) // 64) * 64
	new_h = ((h + 63) // 64) * 64
	if (new_w, new_h) != image.size:
	image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)

	return image

	def clean_output(text: str, max_repetitions: int = 2) -> str:
	"""تنظيف التكرار في المخرجات"""
	if not text:
	return text

	import re
	text = re.sub(r'(.)\1{4,}', r'\1\1\1', text)

	lines = text.strip().split('\n')
	cleaned = []
	seen = {}
	for line in lines:
	line_stripped = line.strip()
	if not line_stripped:
	continue
	count = seen.get(line_stripped, 0) + 1
	if count <= max_repetitions:
	cleaned.append(line)
	seen[line_stripped] = count

	return '\n'.join(cleaned).strip()

	# ==================== 🔍 دالة الاستدلال ====================
	def extract_text(image, prompt: str = None) -> tuple[str, str]:
	"""استخراج النص من الصورة"""
	if model is None or processor is None:
	return "❌ Error: Model not loaded", "0.00"

	if image is None:
	return "⚠️ Please upload an image", "0.00"

	start_time = time.time()

	try:
	if isinstance(image, str):
	image_pil = Image.open(image).convert("RGB")
	elif isinstance(image, Image.Image):
	image_pil = image.convert("RGB")
	else:
	image_pil = Image.fromarray(image).convert("RGB")

	image_pil = prepare_image(image_pil)

	if prompt is None or not prompt.strip():
	prompt = "اقرأ النص في هذه الصورة كاملاً من البداية إلى النهاية."

	messages = [{
	"role": "user",
	"content": [
	{"type": "image", "image": image_pil},
	{"type": "text", "text": prompt}
	]
	}]

	text_input = processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	image_inputs, _ = process_vision_info(messages)

	inputs = processor(
	text=[text_input],
	images=image_inputs,
	padding=True,
	return_tensors="pt"
	).to(device)

	with torch.inference_mode():
	generated_ids = model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=False,
	temperature=1.0,
	repetition_penalty=1.2,
	no_repeat_ngram_size=3,
	pad_token_id=processor.tokenizer.pad_token_id,
	eos_token_id=processor.tokenizer.eos_token_id,
	)

	input_len = inputs.input_ids.shape[1]
	output_text = processor.batch_decode(
	generated_ids[:, input_len:],
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	)[0]

	output_text = clean_output(output_text.strip())

	elapsed = time.time() - start_time

	return output_text, f"{elapsed:.2f} seconds"

	except torch.cuda.OutOfMemoryError:
	torch.cuda.empty_cache()
	return "❌ Out of Memory. Try a smaller image.", "0.00"
	except Exception as e:
	print(f"[ERROR] {e}")
	import traceback
	traceback.print_exc()
	return f"❌ Error: {str(e)}", "0.00"

	# ==================== 🎨 واجهة Gradio ====================
	def create_interface():
	"""إنشاء واجهة المستخدم"""

	with gr.Blocks(
	title="Arabic OCR - Qwen3.5-0.8B"
	# theme and css removed from here – moved to launch()
	) as demo:

	gr.Markdown("""
	# 📝 Arabic Handwritten & Printed OCR V4
	### Powered by Qwen3.5-0.8B

	Upload an image containing Arabic text, and the model will extract it.

	✨ Features:
	- 🌍 Arabic support
	- ✍️ Handwritten & printed text
	- 🔤 Preserves diacritics (تشكيل)
	- ⚡ Full precision (no quantization)
	""", elem_classes="header")

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(
	label="📷 Upload Image",
	type="pil",
	height=300,
	sources=["upload", "clipboard"]
	)

	prompt_input = gr.Textbox(
	label="📝 Custom Prompt (Optional)",
	placeholder="اقرأ النص في هذه الصورة...",
	value="اقرأ النص في هذه الصورة كاملاً من البداية إلى النهاية.",
	lines=2
	)

	submit_btn = gr.Button(
	"🔍 Extract Text",
	variant="primary",
	size="lg"
	)

	# Examples – use local files or remote URLs (remote may fail in some environments)
	# For production, copy images to an 'examples' folder and use local paths.
	gr.Examples(
	label="📋 Examples (Optional)",
	examples=[
	# You can replace these with local files like ["examples/sample1.jpg"]
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00002.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00106.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00107.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00113.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00126.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00135.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00141.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00197.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00198.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00199.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00216.png"],
	["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00240.png"],
	],
	inputs=[image_input],
	cache_examples=False
	)

	with gr.Column(scale=1):
	# Removed show_copy_button parameter (not available in older Gradio)
	output_text = gr.Textbox(
	label="📄 Extracted Text",
	lines=12,
	elem_classes="output-box"
	)

	time_output = gr.Textbox(
	label="⏱️ Inference Time",
	interactive=False,
	value="-"
	)

	clear_btn = gr.Button("🗑️ Clear", variant="secondary")

	# ربط الأحداث
	submit_btn.click(
	fn=extract_text,
	inputs=[image_input, prompt_input],
	outputs=[output_text, time_output]
	)

	clear_btn.click(
	fn=lambda: (None, "", "-"),
	inputs=[],
	outputs=[image_input, prompt_input, time_output]
	)

	gr.Markdown("""
	### 💡 Tips for Best Results:
	1. Use clear, well-lit images
	2. Crop to the text region if possible
	3. For handwritten text, ensure good contrast
	4. Custom prompts can improve accuracy for specific formats
	""")

	return demo

	# ==================== 🚀 نقطة الدخول ====================
	if __name__ == "__main__":
	print("[INFO] Creating Gradio interface...")

	demo = create_interface()

	# تشغيل التطبيق مع تمرير theme و css هنا (Gradio 6+)
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.getenv("PORT", 7860)),
	share=False,
	debug=os.getenv("DEBUG", "false").lower() == "true",
	show_error=True,
	theme=gr.themes.Soft(), # moved from Blocks
	css="""
	.header { text-align: center; margin-bottom: 20px; }
	.output-box { min-height: 200px; }
	"""
	)