Spaces:

GiantAnalytics
/

ArabicOCRExtractor

Sleeping

App Files Files Community

ArabicOCRExtractor / app3.py

GiantAnalytics

Rename app.py to app3.py

25d0164 verified 11 months ago

raw

history blame contribute delete

4.72 kB

	import gradio as gr
	import easyocr
	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	import os
	import requests
	from pathlib import Path
	import pandas as pd
	import pytesseract
	from pytesseract import Output
	import traceback
	import logging
	import sys
	from img2table.document import Image as Img2TableImage
	from img2table.ocr import TesseractOCR
	import pytesseract
	import os

	# Set the correct Tesseract path
	pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"

	# Explicitly set the traineddata path
	# os.environ["TESSDATA_PREFIX"] = "/usr/share/tesseract-ocr/4.00/tessdata/"

	# Initialize Tesseract for French OCR
	ocr = TesseractOCR(lang="fra")

	# Set up logging
	logging.basicConfig(level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[logging.StreamHandler(sys.stdout)])
	logger = logging.getLogger(__name__)

	# Download and cache the font file
	def get_font():
	"""Download font for annotation if not available."""
	try:
	font_path = Path("Roboto-Regular.ttf")
	if not font_path.exists():
	font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
	response = requests.get(font_url)
	font_path.write_bytes(response.content)
	return str(font_path)
	except Exception as e:
	logger.error(f"Error in get_font: {str(e)}")
	return None

	# Initialize EasyOCR Reader for French & English
	try:
	reader = easyocr.Reader(['fr', 'en'], gpu=False)
	except Exception as e:
	logger.error(f"Error initializing EasyOCR: {str(e)}")

	def ocr_extract_text_and_tables(image):
	"""Extract text and tables from an image."""
	try:
	if image is None:
	return "No image provided", "No image provided", None

	# Ensure image is in the correct format
	if isinstance(image, Image.Image):
	image = np.array(image, dtype=np.uint8) # Convert PIL to numpy

	# If image has an alpha channel (RGBA), convert to RGB
	if len(image.shape) == 3 and image.shape[2] == 4:
	image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
	elif len(image.shape) == 2: # If grayscale, convert to BGR
	image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)

	# Convert to grayscale for better OCR
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Apply adaptive thresholding
	processed = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

	# 1️⃣ Extract General Text using EasyOCR
	results = reader.readtext(processed)
	detected_text = [f"{text} (Confidence: {confidence:.2f})" for _, text, confidence in results]

	# 2️⃣ Save image to a temporary file for `img2table`
	temp_image_path = "temp_table_image.jpg"
	cv2.imwrite(temp_image_path, image)

	# 3️⃣ Use img2table for structured table extraction
	img = Img2TableImage(temp_image_path) # Use file path instead of np.ndarray
	ocr = TesseractOCR(lang="fra")
	tables = img.extract_tables(ocr=ocr)

	# Convert tables to Pandas DataFrame
	table_data = [table.df for table in tables] if tables else []

	# Save extracted tables as CSV (optional)
	for i, df in enumerate(table_data):
	df.to_csv(f"extracted_table_{i+1}.csv", index=False)

	# 4️⃣ Annotate Image with Bounding Boxes for Detected Text
	pil_image = Image.fromarray(image)
	draw = ImageDraw.Draw(pil_image)

	for bbox, text, confidence in results:
	top_left = tuple(map(int, bbox[0]))
	bottom_right = tuple(map(int, bbox[2]))
	draw.rectangle([top_left, bottom_right], outline="red", width=3)

	annotated_image = np.array(pil_image)

	# Format output
	text_output = "\n".join(detected_text)
	tables_output = "\n\n".join([df.to_string(index=False, header=False) for df in table_data]) if table_data else "No tables detected."

	return text_output, tables_output, annotated_image

	except Exception as e:
	return f"Error: {str(e)}", "Processing failed", None

	# Create Gradio Interface
	iface = gr.Interface(
	fn=ocr_extract_text_and_tables,
	inputs=gr.Image(type="pil", label="Upload Image"), # Ensures PIL image input
	outputs=[
	gr.Textbox(label="Extracted Text (French)"),
	gr.Textbox(label="Extracted Tables"),
	gr.Image(label="Annotated Image")
	],
	title="French OCR & Table Extractor",
	description="Upload an image containing French text and tables for OCR processing.",
	)

	if __name__ == "__main__":
	iface.launch()