Spaces:
Sleeping
Sleeping
File size: 4,724 Bytes
15e13e7 938099c 625ff0d 938099c 15e13e7 625ff0d 8a21966 625ff0d 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 ea8ed28 15e13e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
import easyocr
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import requests
from pathlib import Path
import pandas as pd
import pytesseract
from pytesseract import Output
import traceback
import logging
import sys
from img2table.document import Image as Img2TableImage
from img2table.ocr import TesseractOCR
import pytesseract
import os
# Set the correct Tesseract path
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
# Explicitly set the traineddata path
# os.environ["TESSDATA_PREFIX"] = "/usr/share/tesseract-ocr/4.00/tessdata/"
# Initialize Tesseract for French OCR
ocr = TesseractOCR(lang="fra")
# Set up logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__)
# Download and cache the font file
def get_font():
"""Download font for annotation if not available."""
try:
font_path = Path("Roboto-Regular.ttf")
if not font_path.exists():
font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
response = requests.get(font_url)
font_path.write_bytes(response.content)
return str(font_path)
except Exception as e:
logger.error(f"Error in get_font: {str(e)}")
return None
# Initialize EasyOCR Reader for French & English
try:
reader = easyocr.Reader(['fr', 'en'], gpu=False)
except Exception as e:
logger.error(f"Error initializing EasyOCR: {str(e)}")
def ocr_extract_text_and_tables(image):
"""Extract text and tables from an image."""
try:
if image is None:
return "No image provided", "No image provided", None
# Ensure image is in the correct format
if isinstance(image, Image.Image):
image = np.array(image, dtype=np.uint8) # Convert PIL to numpy
# If image has an alpha channel (RGBA), convert to RGB
if len(image.shape) == 3 and image.shape[2] == 4:
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
elif len(image.shape) == 2: # If grayscale, convert to BGR
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
# Convert to grayscale for better OCR
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
processed = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
# 1️⃣ Extract General Text using EasyOCR
results = reader.readtext(processed)
detected_text = [f"{text} (Confidence: {confidence:.2f})" for _, text, confidence in results]
# 2️⃣ Save image to a temporary file for `img2table`
temp_image_path = "temp_table_image.jpg"
cv2.imwrite(temp_image_path, image)
# 3️⃣ Use img2table for structured table extraction
img = Img2TableImage(temp_image_path) # Use file path instead of np.ndarray
ocr = TesseractOCR(lang="fra")
tables = img.extract_tables(ocr=ocr)
# Convert tables to Pandas DataFrame
table_data = [table.df for table in tables] if tables else []
# Save extracted tables as CSV (optional)
for i, df in enumerate(table_data):
df.to_csv(f"extracted_table_{i+1}.csv", index=False)
# 4️⃣ Annotate Image with Bounding Boxes for Detected Text
pil_image = Image.fromarray(image)
draw = ImageDraw.Draw(pil_image)
for bbox, text, confidence in results:
top_left = tuple(map(int, bbox[0]))
bottom_right = tuple(map(int, bbox[2]))
draw.rectangle([top_left, bottom_right], outline="red", width=3)
annotated_image = np.array(pil_image)
# Format output
text_output = "\n".join(detected_text)
tables_output = "\n\n".join([df.to_string(index=False, header=False) for df in table_data]) if table_data else "No tables detected."
return text_output, tables_output, annotated_image
except Exception as e:
return f"Error: {str(e)}", "Processing failed", None
# Create Gradio Interface
iface = gr.Interface(
fn=ocr_extract_text_and_tables,
inputs=gr.Image(type="pil", label="Upload Image"), # Ensures PIL image input
outputs=[
gr.Textbox(label="Extracted Text (French)"),
gr.Textbox(label="Extracted Tables"),
gr.Image(label="Annotated Image")
],
title="French OCR & Table Extractor",
description="Upload an image containing French text and tables for OCR processing.",
)
if __name__ == "__main__":
iface.launch()
|