sherif1313's picture
Update app.py
ee3ccbb verified
raw
history blame
11.4 kB
# -*- coding: utf-8 -*-
"""
🤖 Arabic OCR - Hugging Face Spaces Version
Model: Qwen3.5-0.8B-VL with LoRA
No Quantization - Full Precision
"""
import os
import time
import torch
from PIL import Image
import gradio as gr
from transformers import AutoProcessor, Qwen3_5ForConditionalGeneration
from qwen_vl_utils import process_vision_info
# ==================== ⚙️ إعدادات الجهاز ====================
if torch.cuda.is_available():
device = "cuda"
dtype = torch.float16
print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")
elif torch.backends.mps.is_available():
device = "mps"
dtype = torch.float16
print("✅ Using Apple Silicon (MPS)")
else:
device = "cpu"
dtype = torch.float32
print("⚠️ Using CPU (slower inference)")
print(f"[INFO] Device: {device} | Dtype: {dtype}")
# ==================== 🔄 تحميل النموذج ====================
def load_model():
"""تحميل النموذج والمعالج مع إدارة الذاكرة"""
model_path = os.getenv("MODEL_PATH", "sherif1313/Arabic-Qwen3.5-OCR-v4")
print(f"[INFO] Loading model from: {model_path}")
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
model = Qwen3_5ForConditionalGeneration.from_pretrained(
model_path,
torch_dtype=dtype,
device_map="auto" if device == "cuda" else None,
trust_remote_code=True,
low_cpu_mem_usage=True,
)
model.eval()
print("[INFO] Model loaded successfully!")
return model, processor
# تحميل عالمي (يتم مرة واحدة عند بدء التطبيق)
try:
model, processor = load_model()
except Exception as e:
print(f"[ERROR] Failed to load model: {e}")
model = None
processor = None
# ==================== 🧹 دوال مساعدة ====================
def prepare_image(image: Image.Image, max_size: int = 768) -> Image.Image:
"""تحضير الصورة: ضغط + ضبط الأبعاد لمضاعفات 64"""
if max(image.size) > max_size:
image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
w, h = image.size
new_w = ((w + 63) // 64) * 64
new_h = ((h + 63) // 64) * 64
if (new_w, new_h) != image.size:
image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)
return image
def clean_output(text: str, max_repetitions: int = 2) -> str:
"""تنظيف التكرار في المخرجات"""
if not text:
return text
import re
text = re.sub(r'(.)\1{4,}', r'\1\1\1', text)
lines = text.strip().split('\n')
cleaned = []
seen = {}
for line in lines:
line_stripped = line.strip()
if not line_stripped:
continue
count = seen.get(line_stripped, 0) + 1
if count <= max_repetitions:
cleaned.append(line)
seen[line_stripped] = count
return '\n'.join(cleaned).strip()
# ==================== 🔍 دالة الاستدلال ====================
def extract_text(image, prompt: str = None) -> tuple[str, str]:
"""استخراج النص من الصورة"""
if model is None or processor is None:
return "❌ Error: Model not loaded", "0.00"
if image is None:
return "⚠️ Please upload an image", "0.00"
start_time = time.time()
try:
if isinstance(image, str):
image_pil = Image.open(image).convert("RGB")
elif isinstance(image, Image.Image):
image_pil = image.convert("RGB")
else:
image_pil = Image.fromarray(image).convert("RGB")
image_pil = prepare_image(image_pil)
if prompt is None or not prompt.strip():
prompt = "اقرأ النص في هذه الصورة كاملاً من البداية إلى النهاية."
messages = [{
"role": "user",
"content": [
{"type": "image", "image": image_pil},
{"type": "text", "text": prompt}
]
}]
text_input = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_inputs, _ = process_vision_info(messages)
inputs = processor(
text=[text_input],
images=image_inputs,
padding=True,
return_tensors="pt"
).to(device)
with torch.inference_mode():
generated_ids = model.generate(
**inputs,
max_new_tokens=512,
do_sample=False,
temperature=1.0,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id,
)
input_len = inputs.input_ids.shape[1]
output_text = processor.batch_decode(
generated_ids[:, input_len:],
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)[0]
output_text = clean_output(output_text.strip())
elapsed = time.time() - start_time
return output_text, f"{elapsed:.2f} seconds"
except torch.cuda.OutOfMemoryError:
torch.cuda.empty_cache()
return "❌ Out of Memory. Try a smaller image.", "0.00"
except Exception as e:
print(f"[ERROR] {e}")
import traceback
traceback.print_exc()
return f"❌ Error: {str(e)}", "0.00"
# ==================== 🎨 واجهة Gradio ====================
def create_interface():
"""إنشاء واجهة المستخدم"""
with gr.Blocks(
title="Arabic OCR - Qwen3.5-0.8B"
# theme and css removed from here – moved to launch()
) as demo:
gr.Markdown("""
# 📝 Arabic Handwritten & Printed OCR V4
### Powered by Qwen3.5-0.8B
Upload an image containing Arabic text, and the model will extract it.
✨ **Features:**
- 🌍 Arabic support
- ✍️ Handwritten & printed text
- 🔤 Preserves diacritics (تشكيل)
- ⚡ Full precision (no quantization)
""", elem_classes="header")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
label="📷 Upload Image",
type="pil",
height=300,
sources=["upload", "clipboard"]
)
prompt_input = gr.Textbox(
label="📝 Custom Prompt (Optional)",
placeholder="اقرأ النص في هذه الصورة...",
value="اقرأ النص في هذه الصورة كاملاً من البداية إلى النهاية.",
lines=2
)
submit_btn = gr.Button(
"🔍 Extract Text",
variant="primary",
size="lg"
)
# Examples – use local files or remote URLs (remote may fail in some environments)
# For production, copy images to an 'examples' folder and use local paths.
gr.Examples(
label="📋 Examples (Optional)",
examples=[
# You can replace these with local files like ["examples/sample1.jpg"]
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00002.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00106.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00107.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00113.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00126.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00135.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00141.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00197.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00198.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00199.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00216.png"],
["https://huggingface.co/sherif1313/Arabic-handwritten-OCR-4bit-Qwen2.5-VL-3B-v2/resolve/main/assets/00240.png"],
],
inputs=[image_input],
cache_examples=False
)
with gr.Column(scale=1):
# Removed show_copy_button parameter (not available in older Gradio)
output_text = gr.Textbox(
label="📄 Extracted Text",
lines=12,
elem_classes="output-box"
)
time_output = gr.Textbox(
label="⏱️ Inference Time",
interactive=False,
value="-"
)
clear_btn = gr.Button("🗑️ Clear", variant="secondary")
# ربط الأحداث
submit_btn.click(
fn=extract_text,
inputs=[image_input, prompt_input],
outputs=[output_text, time_output]
)
clear_btn.click(
fn=lambda: (None, "", "-"),
inputs=[],
outputs=[image_input, prompt_input, time_output]
)
gr.Markdown("""
### 💡 Tips for Best Results:
1. Use clear, well-lit images
2. Crop to the text region if possible
3. For handwritten text, ensure good contrast
4. Custom prompts can improve accuracy for specific formats
""")
return demo
# ==================== 🚀 نقطة الدخول ====================
if __name__ == "__main__":
print("[INFO] Creating Gradio interface...")
demo = create_interface()
# تشغيل التطبيق مع تمرير theme و css هنا (Gradio 6+)
demo.launch(
server_name="0.0.0.0",
server_port=int(os.getenv("PORT", 7860)),
share=False,
debug=os.getenv("DEBUG", "false").lower() == "true",
show_error=True,
theme=gr.themes.Soft(), # moved from Blocks
css="""
.header { text-align: center; margin-bottom: 20px; }
.output-box { min-height: 200px; }
"""
)