Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import paddle | |
| from paddleocr import PaddleOCR, draw_ocr | |
| import cv2 | |
| from PIL import Image | |
| import numpy as np | |
| # Initialize PaddleOCR for Arabic | |
| ocr = PaddleOCR(use_angle_cls=True, lang='ar') | |
| def ocr_extract_text(image): | |
| if image is None: | |
| return "No image provided", None | |
| # Convert PIL Image to numpy array if needed | |
| if isinstance(image, Image.Image): | |
| image = np.array(image) | |
| # Ensure image is in BGR format for PaddleOCR | |
| if len(image.shape) == 3 and image.shape[2] == 4: # RGBA | |
| image = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR) | |
| elif len(image.shape) == 3 and image.shape[2] == 3: # RGB | |
| image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| # Perform OCR on the uploaded image | |
| result = ocr.ocr(image, cls=True) | |
| if result[0] is None: | |
| return "No text detected in the image", image | |
| # Initialize a list to store detected text | |
| detected_text = [] | |
| # Loop through the results and extract text | |
| for line in result[0]: # result[0] contains the OCR results for text regions | |
| text = line[1][0] # Detected text | |
| detected_text.append(text) | |
| # Draw the OCR results on the image | |
| boxes = [line[0] for line in result[0]] # Bounding box coordinates | |
| texts = [line[1][0] for line in result[0]] # Detected text | |
| scores = [line[1][1] for line in result[0]] # Confidence scores | |
| try: | |
| annotated_image = draw_ocr(image, boxes, texts, scores) | |
| except Exception as e: | |
| print(f"Error annotating image: {e}") | |
| annotated_image = image | |
| # Join the detected text with Arabic-compatible line breaks | |
| text_output = "\n".join(detected_text) | |
| return text_output, annotated_image | |
| # Define Gradio interface with Arabic-friendly styling | |
| css = """ | |
| .output-text { direction: rtl; text-align: right; } | |
| """ | |
| iface = gr.Interface( | |
| fn=ocr_extract_text, | |
| inputs=gr.Image(type="numpy", label="Upload Image"), | |
| outputs=[ | |
| gr.Textbox(label="Extracted Arabic Text", elem_classes=["output-text"]), | |
| gr.Image(label="Annotated Image") | |
| ], | |
| title="Arabic OCR Extractor", | |
| description="Upload an Arabic document or image to extract the text using OCR.", | |
| css=css, | |
| examples=[], # You can add example images here | |
| cache_examples=True | |
| ) | |
| # For Colab and Hugging Face Spaces compatibility | |
| if __name__ == "__main__": | |
| iface.launch(debug=True, share=True) # share=True enables Colab public URL | |