File size: 3,122 Bytes
4cb0d47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
import easyocr
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import requests
from pathlib import Path

# Download and cache the font file
def get_font():
    font_path = Path("Roboto-Regular.ttf")
    if not font_path.exists():
        font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
        response = requests.get(font_url)
        font_path.write_bytes(response.content)
    return str(font_path)

# Initialize EasyOCR Reader for Arabic and English
reader = easyocr.Reader(['ar', 'en'], gpu=True)  # Set gpu=False if no GPU available

def ocr_extract_text(image):
    if image is None:
        return "No image provided", None
    
    # Convert to RGB if needed
    if len(image.shape) == 3 and image.shape[2] == 4:  # RGBA
        image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
    
    # Perform OCR
    results = reader.readtext(image)
    
    if not results:
        return "No text detected in the image", image
    
    # Prepare text output and confidence scores
    detected_text = []
    for (_, text, confidence) in results:
        detected_text.append(f"{text} (Confidence: {confidence:.2f})")
    
    # Create annotated image
    pil_image = Image.fromarray(image)
    draw = ImageDraw.Draw(pil_image)
    
    # Get font for annotation
    try:
        font = ImageFont.truetype(get_font(), size=20)
    except Exception as e:
        print(f"Error loading font: {e}")
        font = ImageFont.load_default()
    
    # Draw boxes and text
    for (bbox, text, confidence) in results:
        # Convert points to integers
        top_left = tuple(map(int, bbox[0]))
        bottom_right = tuple(map(int, bbox[2]))
        
        # Draw rectangle
        draw.rectangle([top_left, bottom_right], outline="red", width=3)
        
        # Draw text with confidence
        text_with_conf = f"{text} ({confidence:.2f})"
        draw.text(top_left, text_with_conf, fill="blue", font=font)
    
    # Convert back to numpy array
    annotated_image = np.array(pil_image)
    
    # Join detected text with proper formatting
    text_output = "\n".join(detected_text)
    
    return text_output, annotated_image

# Custom CSS for RTL support and better text display
css = """
.output-text { 
    direction: rtl; 
    text-align: right; 
    font-family: Arial, sans-serif;
    white-space: pre-wrap;
}
"""

# Create Gradio interface
iface = gr.Interface(
    fn=ocr_extract_text,
    inputs=gr.Image(type="numpy", label="Upload Image"),
    outputs=[
        gr.Textbox(
            label="Extracted Text (Arabic & English)", 
            elem_classes=["output-text"]
        ),
        gr.Image(label="Annotated Image")
    ],
    title="Arabic & English OCR Extractor",
    description="Upload an image containing Arabic and/or English text for OCR processing. The system will detect and extract text in both languages.",
    css=css,
    examples=[],  # You can add example images here
    cache_examples=True
)

# Launch the interface
if __name__ == "__main__":
    iface.launch(debug=True, share=True)