GiantAnalytics's picture
Rename app.py to main.py
a7b5b52 verified
import gradio as gr
import easyocr
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import requests
from pathlib import Path
# Download and cache the font file
def get_font():
font_path = Path("Roboto-Regular.ttf")
if not font_path.exists():
font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
response = requests.get(font_url)
font_path.write_bytes(response.content)
return str(font_path)
# Initialize EasyOCR Reader for Arabic and English
reader = easyocr.Reader(['ar', 'en'], gpu=True) # Set gpu=False if no GPU available
def ocr_extract_text(image):
if image is None:
return "No image provided", None
# Convert to RGB if needed
if len(image.shape) == 3 and image.shape[2] == 4: # RGBA
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
# Perform OCR
results = reader.readtext(image)
if not results:
return "No text detected in the image", image
# Prepare text output and confidence scores
detected_text = []
for (_, text, confidence) in results:
detected_text.append(f"{text} (Confidence: {confidence:.2f})")
# Create annotated image
pil_image = Image.fromarray(image)
draw = ImageDraw.Draw(pil_image)
# Get font for annotation
try:
font = ImageFont.truetype(get_font(), size=20)
except Exception as e:
print(f"Error loading font: {e}")
font = ImageFont.load_default()
# Draw boxes and text
for (bbox, text, confidence) in results:
# Convert points to integers
top_left = tuple(map(int, bbox[0]))
bottom_right = tuple(map(int, bbox[2]))
# Draw rectangle
draw.rectangle([top_left, bottom_right], outline="red", width=3)
# Draw text with confidence
text_with_conf = f"{text} ({confidence:.2f})"
draw.text(top_left, text_with_conf, fill="blue", font=font)
# Convert back to numpy array
annotated_image = np.array(pil_image)
# Join detected text with proper formatting
text_output = "\n".join(detected_text)
return text_output, annotated_image
# Custom CSS for RTL support and better text display
css = """
.output-text {
direction: rtl;
text-align: right;
font-family: Arial, sans-serif;
white-space: pre-wrap;
}
"""
# Create Gradio interface
iface = gr.Interface(
fn=ocr_extract_text,
inputs=gr.Image(type="numpy", label="Upload Image"),
outputs=[
gr.Textbox(
label="Extracted Text (Arabic & English)",
elem_classes=["output-text"]
),
gr.Image(label="Annotated Image")
],
title="Arabic & English OCR Extractor",
description="Upload an image containing Arabic and/or English text for OCR processing. The system will detect and extract text in both languages.",
css=css,
examples=[], # You can add example images here
cache_examples=True
)
# Launch the interface
if __name__ == "__main__":
iface.launch(debug=True, share=True)