Spaces:
Running on Zero
Running on Zero
| # -*- coding: utf-8 -*- | |
| """ | |
| 🤖 Arabic OCR - Hugging Face Spaces Version | |
| Model: Qwen3.5-0.8B-VL with LoRA | |
| No Quantization - Full Precision | |
| """ | |
| import os | |
| import time | |
| import torch | |
| from PIL import Image | |
| import gradio as gr | |
| from transformers import AutoProcessor, Qwen3_5ForConditionalGeneration | |
| from qwen_vl_utils import process_vision_info | |
| # ==================== ⚙️ إعدادات الجهاز ==================== | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| dtype = torch.float16 | |
| print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}") | |
| elif torch.backends.mps.is_available(): | |
| device = "mps" | |
| dtype = torch.float16 | |
| print("✅ Using Apple Silicon (MPS)") | |
| else: | |
| device = "cpu" | |
| dtype = torch.float32 | |
| print("⚠️ Using CPU (slower inference)") | |
| print(f"[INFO] Device: {device} | Dtype: {dtype}") | |
| # ==================== 🔄 تحميل النموذج ==================== | |
| def load_model(): | |
| """تحميل النموذج والمعالج مع إدارة الذاكرة""" | |
| model_path = os.getenv("MODEL_PATH", "sherif1313/Arabic-Qwen3.5-OCR-v4") | |
| print(f"[INFO] Loading model from: {model_path}") | |
| processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True) | |
| model = Qwen3_5ForConditionalGeneration.from_pretrained( | |
| model_path, | |
| torch_dtype=dtype, | |
| device_map="auto" if device == "cuda" else None, | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, | |
| ) | |
| model.eval() | |
| print("[INFO] Model loaded successfully!") | |
| return model, processor | |
| # تحميل عالمي (يتم مرة واحدة عند بدء التطبيق) | |
| try: | |
| model, processor = load_model() | |
| except Exception as e: | |
| print(f"[ERROR] Failed to load model: {e}") | |
| model = None | |
| processor = None | |
| # ==================== 🧹 دوال مساعدة ==================== | |
| def prepare_image(image: Image.Image, max_size: int = 768) -> Image.Image: | |
| """تحضير الصورة: ضغط + ضبط الأبعاد لمضاعفات 64""" | |
| if max(image.size) > max_size: | |
| image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) | |
| w, h = image.size | |
| new_w = ((w + 63) // 64) * 64 | |
| new_h = ((h + 63) // 64) * 64 | |
| if (new_w, new_h) != image.size: | |
| image = image.resize((new_w, new_h), Image.Resampling.LANCZOS) | |
| return image | |
| def clean_output(text: str, max_repetitions: int = 2) -> str: | |
| """تنظيف التكرار في المخرجات""" | |
| if not text: | |
| return text | |
| import re | |
| text = re.sub(r'(.)\1{4,}', r'\1\1\1', text) | |
| lines = text.strip().split('\n') | |
| cleaned = [] | |
| seen = {} | |
| for line in lines: | |
| line_stripped = line.strip() | |
| if not line_stripped: | |
| continue | |
| count = seen.get(line_stripped, 0) + 1 | |
| if count <= max_repetitions: | |
| cleaned.append(line) | |
| seen[line_stripped] = count | |
| return '\n'.join(cleaned).strip() | |
| # ==================== 🔍 دالة الاستدلال ==================== | |
| def extract_text(image, prompt: str = None) -> tuple[str, str]: | |
| """استخراج النص من الصورة""" | |
| if model is None or processor is None: | |
| return "❌ Error: Model not loaded", "0.00" | |
| if image is None: | |
| return "⚠️ Please upload an image", "0.00" | |
| start_time = time.time() | |
| try: | |
| if isinstance(image, str): | |
| image_pil = Image.open(image).convert("RGB") | |
| elif isinstance(image, Image.Image): | |
| image_pil = image.convert("RGB") | |
| else: | |
| image_pil = Image.fromarray(image).convert("RGB") | |
| image_pil = prepare_image(image_pil) | |
| if prompt is None or not prompt.strip(): | |
| prompt = "اقرأ النص في هذه الصورة كاملاً من البداية إلى النهاية." | |
| messages = [{ | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image_pil}, | |
| {"type": "text", "text": prompt} | |
| ] | |
| }] | |
| text_input = processor.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| image_inputs, _ = process_vision_info(messages) | |
| inputs = processor( | |
| text=[text_input], | |
| images=image_inputs, | |
| padding=True, | |
| return_tensors="pt" | |
| ).to(device) | |
| with torch.inference_mode(): | |
| generated_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| do_sample=False, | |
| temperature=1.0, | |
| repetition_penalty=1.2, | |
| no_repeat_ngram_size=3, | |
| pad_token_id=processor.tokenizer.pad_token_id, | |
| eos_token_id=processor.tokenizer.eos_token_id, | |
| ) | |
| input_len = inputs.input_ids.shape[1] | |
| output_text = processor.batch_decode( | |
| generated_ids[:, input_len:], | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| )[0] | |
| output_text = clean_output(output_text.strip()) | |
| elapsed = time.time() - start_time | |
| return output_text, f"{elapsed:.2f} seconds" | |
| except torch.cuda.OutOfMemoryError: | |
| torch.cuda.empty_cache() | |
| return "❌ Out of Memory. Try a smaller image.", "0.00" | |
| except Exception as e: | |
| print(f"[ERROR] {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"❌ Error: {str(e)}", "0.00" | |
| # ==================== 🎨 واجهة Gradio ==================== | |
| def create_interface(): | |
| """إنشاء واجهة المستخدم""" | |
| with gr.Blocks( | |
| title="Arabic OCR - Qwen3.5-0.8B" | |
| # theme and css removed from here – moved to launch() | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 📝 Arabic Handwritten & Printed OCR V4 | |
| ### Powered by Qwen3.5-0.8B | |
| Upload an image containing Arabic text, and the model will extract it. | |
| ✨ **Features:** | |
| - 🌍 Arabic support | |
| - ✍️ Handwritten & printed text | |
| - 🔤 Preserves diacritics (تشكيل) | |
| - ⚡ Full precision (no quantization) | |
| """, elem_classes="header") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| label="📷 Upload Image", | |
| type="pil", | |
| height=300, | |
| sources=["upload", "clipboard"] | |
| ) | |
| prompt_input = gr.Textbox( | |
| label="📝 Custom Prompt (Optional)", | |
| placeholder="اقرأ النص في هذه الصورة...", | |
| value="اقرأ النص في هذه الصورة كاملاً من البداية إلى النهاية.", | |
| lines=2 | |
| ) | |
| submit_btn = gr.Button( | |
| "🔍 Extract Text", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Examples – use local files or remote URLs (remote may fail in some environments) | |
| # For production, copy images to an 'examples' folder and use local paths. | |
| gr.Examples( | |
| label="📋 Examples (Optional)", | |
| examples=[ | |
| # You can replace these with local files like ["examples/sample1.jpg"] | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00002.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00106.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00107.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00113.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00126.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00135.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00141.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00197.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00198.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00199.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00216.png"], | |
| ["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00240.png"], | |
| ], | |
| inputs=[image_input], | |
| cache_examples=False | |
| ) | |
| with gr.Column(scale=1): | |
| # Removed show_copy_button parameter (not available in older Gradio) | |
| output_text = gr.Textbox( | |
| label="📄 Extracted Text", | |
| lines=12, | |
| elem_classes="output-box" | |
| ) | |
| time_output = gr.Textbox( | |
| label="⏱️ Inference Time", | |
| interactive=False, | |
| value="-" | |
| ) | |
| clear_btn = gr.Button("🗑️ Clear", variant="secondary") | |
| # ربط الأحداث | |
| submit_btn.click( | |
| fn=extract_text, | |
| inputs=[image_input, prompt_input], | |
| outputs=[output_text, time_output] | |
| ) | |
| clear_btn.click( | |
| fn=lambda: (None, "", "-"), | |
| inputs=[], | |
| outputs=[image_input, prompt_input, time_output] | |
| ) | |
| gr.Markdown(""" | |
| ### 💡 Tips for Best Results: | |
| 1. Use clear, well-lit images | |
| 2. Crop to the text region if possible | |
| 3. For handwritten text, ensure good contrast | |
| 4. Custom prompts can improve accuracy for specific formats | |
| """) | |
| return demo | |
| # ==================== 🚀 نقطة الدخول ==================== | |
| if __name__ == "__main__": | |
| print("[INFO] Creating Gradio interface...") | |
| demo = create_interface() | |
| # تشغيل التطبيق مع تمرير theme و css هنا (Gradio 6+) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.getenv("PORT", 7860)), | |
| share=False, | |
| debug=os.getenv("DEBUG", "false").lower() == "true", | |
| show_error=True, | |
| theme=gr.themes.Soft(), # moved from Blocks | |
| css=""" | |
| .header { text-align: center; margin-bottom: 20px; } | |
| .output-box { min-height: 200px; } | |
| """ | |
| ) |