Spaces:

khodour
/

Arabic-OCR

Runtime error

App Files Files Community

khodour commited on May 3, 2025

Commit

90c31bc

verified ·

1 Parent(s): b1d157c

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -17

app.py CHANGED Viewed

@@ -1,23 +1,112 @@
 import gradio as gr
-from ollama import chat  # make sure you have `ollama` (not ollama-client) in requirements.txt
-def ocr_fn(image):
-    if image is None:
-        return "⚠️ Please upload an image first."
-    # call your Ollama model
-    resp = chat(
-        model="Qwen/Qwen2.5-VL-7B-Instruct",
-        images=[image],
-        messages=[{"role":"user","content":"Extract all Arabic text, preserving layout."}]
     )
-    return resp
-with gr.Blocks() as demo:
-    gr.Markdown("## Arabic OCR Demo")
     with gr.Row():
-        inp = gr.Image(type="pil", label="📤 Upload an Arabic text image")
-        out = gr.Textbox(lines=10, label="📋 Extracted Text")
-    btn = gr.Button("Submit")
-    btn.click(fn=ocr_fn, inputs=inp, outputs=out)
-demo.launch()

 import gradio as gr
+import time
+import spaces
+from PIL import Image
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import torch
+import uuid
+import os
+import numpy as np
+# Load model and processor
+model_name = "gagan3012/Florence-2-FT-ArabicOCR"
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+                model_name,
+                torch_dtype="auto",
+                device_map="cuda"
+            )
+processor = AutoProcessor.from_pretrained(model_name)
+max_tokens = 2000
+@spaces.GPU(duration=120)
+def perform_ocr(image):
+    inputArray = np.any(image)
+    if inputArray == False:
+        return "Error Processing"
+    """Process image and extract text using OCR model"""
+    image = Image.fromarray(image)
+    src = str(uuid.uuid4()) + ".png"
+    prompt = "Below is the image of one page of a document, as well as some raw textual content that was previously extracted for it. Just return the plain text representation of this document as if you were reading it naturally. Do not hallucinate."
+    image.save(src)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": f"file://{src}"},
+                {"type": "text", "text": prompt},
+            ],
+        }
+    ]
+    # Process inputs
+    text = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
     )
+    inputs = inputs.to("cuda")
+    # Generate text
+    generated_ids = model.generate(**inputs, max_new_tokens=max_tokens, use_cache=True)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )[0]
+    # Cleanup
+    os.remove(src)
+    return output_text
+# Create Gradio interface
+with gr.Blocks(title="Qari Arabic OCR") as demo:
+    gr.Markdown("# Qari Arabic OCR")
+    gr.Markdown("Upload an image to extract Arabic text in real-time. This model is specialized for Arabic document OCR.")
     with gr.Row():
+        with gr.Column(scale=1):
+            # Input image
+            image_input = gr.Image(type="numpy", label="Upload Image")
+            # Example gallery
+            gr.Examples(
+                examples=[
+                    ["2.jpg"],
+                    ["3.jpg"]
+                ],
+                inputs=image_input,
+                label="Example Images",
+                examples_per_page=4
+            )
+            # Submit button
+            submit_btn = gr.Button("Extract Text")
+        with gr.Column(scale=1):
+            # Output text
+            output = gr.Textbox(label="Extracted Text", lines=20, show_copy_button=True)
+            # Model details
+            with gr.Accordion("Model Information", open=False):
+                gr.Markdown("""
+                **Model:** Qari-OCR-0.1-VL-2B-Instruct
+                **Description:** Arabic OCR model based on Qwen2-VL architecture
+                **Size:** 2B parameters
+                **Context window:** Supports up to 2000 output tokens
+                """)
+    # Set up processing flow
+    submit_btn.click(fn=perform_ocr, inputs=image_input, outputs=output)
+    image_input.change(fn=perform_ocr, inputs=image_input, outputs=output)
+demo.launch(debug=True)