Spaces:
Sleeping
Sleeping
File size: 3,122 Bytes
4cb0d47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import easyocr
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import requests
from pathlib import Path
# Download and cache the font file
def get_font():
font_path = Path("Roboto-Regular.ttf")
if not font_path.exists():
font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
response = requests.get(font_url)
font_path.write_bytes(response.content)
return str(font_path)
# Initialize EasyOCR Reader for Arabic and English
reader = easyocr.Reader(['ar', 'en'], gpu=True) # Set gpu=False if no GPU available
def ocr_extract_text(image):
if image is None:
return "No image provided", None
# Convert to RGB if needed
if len(image.shape) == 3 and image.shape[2] == 4: # RGBA
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
# Perform OCR
results = reader.readtext(image)
if not results:
return "No text detected in the image", image
# Prepare text output and confidence scores
detected_text = []
for (_, text, confidence) in results:
detected_text.append(f"{text} (Confidence: {confidence:.2f})")
# Create annotated image
pil_image = Image.fromarray(image)
draw = ImageDraw.Draw(pil_image)
# Get font for annotation
try:
font = ImageFont.truetype(get_font(), size=20)
except Exception as e:
print(f"Error loading font: {e}")
font = ImageFont.load_default()
# Draw boxes and text
for (bbox, text, confidence) in results:
# Convert points to integers
top_left = tuple(map(int, bbox[0]))
bottom_right = tuple(map(int, bbox[2]))
# Draw rectangle
draw.rectangle([top_left, bottom_right], outline="red", width=3)
# Draw text with confidence
text_with_conf = f"{text} ({confidence:.2f})"
draw.text(top_left, text_with_conf, fill="blue", font=font)
# Convert back to numpy array
annotated_image = np.array(pil_image)
# Join detected text with proper formatting
text_output = "\n".join(detected_text)
return text_output, annotated_image
# Custom CSS for RTL support and better text display
css = """
.output-text {
direction: rtl;
text-align: right;
font-family: Arial, sans-serif;
white-space: pre-wrap;
}
"""
# Create Gradio interface
iface = gr.Interface(
fn=ocr_extract_text,
inputs=gr.Image(type="numpy", label="Upload Image"),
outputs=[
gr.Textbox(
label="Extracted Text (Arabic & English)",
elem_classes=["output-text"]
),
gr.Image(label="Annotated Image")
],
title="Arabic & English OCR Extractor",
description="Upload an image containing Arabic and/or English text for OCR processing. The system will detect and extract text in both languages.",
css=css,
examples=[], # You can add example images here
cache_examples=True
)
# Launch the interface
if __name__ == "__main__":
iface.launch(debug=True, share=True) |