Spaces:

GiantAnalytics
/

ArabicOCRExtractor

Running

App Files Files Community

GiantAnalytics commited on Jan 13, 2025

Commit

16ca38c

verified ·

1 Parent(s): ef9ba78

Second Version

Browse files

Files changed (1) hide show

app.py +41 -18

app.py CHANGED Viewed

@@ -9,9 +9,25 @@ import numpy as np
 ocr = PaddleOCR(use_angle_cls=True, lang='ar')
 def ocr_extract_text(image):
     # Perform OCR on the uploaded image
     result = ocr.ocr(image, cls=True)
     # Initialize a list to store detected text
     detected_text = []
@@ -25,29 +41,36 @@ def ocr_extract_text(image):
     texts = [line[1][0] for line in result[0]]  # Detected text
     scores = [line[1][1] for line in result[0]]  # Confidence scores
-    annotated_image = draw_ocr(image, boxes, texts, scores, font_path="path/to/arabic/font.ttf")
-    # Convert the annotated image to a format that Gradio can display
-    annotated_image = Image.fromarray(np.uint8(annotated_image))
-    # Return the text and annotated image
-    return "\n".join(detected_text), annotated_image
-# Define Gradio interface with the button to trigger OCR
-def start_ocr(image):
-    # Trigger OCR extraction when button is clicked
-    return ocr_extract_text(image)
 iface = gr.Interface(
-    fn=start_ocr,  # Function to trigger OCR extraction on button click
-    inputs=gr.Image(type="pil"),  # Image input only
-    outputs=[gr.Textbox(label="Extracted Text"), gr.Image(label="Annotated Image")],  # Outputs
-    live=False,  # Set live=False as we want to trigger the process with a button click
-    title="Arabic OCR Extractor",  # Title of the interface
-    description="Upload an Arabic document or image, and click 'Start Extracting Text' to extract the text using OCR.",  # Description
-    allow_flagging="never"  # Prevent flagging if it's not required
 )
 if __name__ == "__main__":
-    # This ensures Gradio runs in debug mode and with queue for better debugging
-    iface.queue().launch(debug=True)

 ocr = PaddleOCR(use_angle_cls=True, lang='ar')
 def ocr_extract_text(image):
+    if image is None:
+        return "No image provided", None
+    # Convert PIL Image to numpy array if needed
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    # Ensure image is in BGR format for PaddleOCR
+    if len(image.shape) == 3 and image.shape[2] == 4:  # RGBA
+        image = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
+    elif len(image.shape) == 3 and image.shape[2] == 3:  # RGB
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
     # Perform OCR on the uploaded image
     result = ocr.ocr(image, cls=True)
+    if result[0] is None:
+        return "No text detected in the image", image
     # Initialize a list to store detected text
     detected_text = []
     texts = [line[1][0] for line in result[0]]  # Detected text
     scores = [line[1][1] for line in result[0]]  # Confidence scores
+    try:
+        annotated_image = draw_ocr(image, boxes, texts, scores)
+    except Exception as e:
+        print(f"Error annotating image: {e}")
+        annotated_image = image
+    # Join the detected text with Arabic-compatible line breaks
+    text_output = "\n".join(detected_text)
+    return text_output, annotated_image
+# Define Gradio interface with Arabic-friendly styling
+css = """
+.output-text { direction: rtl; text-align: right; }
+"""
 iface = gr.Interface(
+    fn=ocr_extract_text,
+    inputs=gr.Image(type="numpy", label="Upload Image"),
+    outputs=[
+        gr.Textbox(label="Extracted Arabic Text", elem_classes=["output-text"]),
+        gr.Image(label="Annotated Image")
+    ],
+    title="Arabic OCR Extractor",
+    description="Upload an Arabic document or image to extract the text using OCR.",
+    css=css,
+    examples=[],  # You can add example images here
+    cache_examples=True
 )
+# For Colab and Hugging Face Spaces compatibility
 if __name__ == "__main__":
+    iface.launch(debug=True, share=True)  # share=True enables Colab public URL