GiantAnalytics's picture
Update app.py
af88408 verified
import gradio as gr
import easyocr
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import requests
from pathlib import Path
import pandas as pd
import pytesseract
from pytesseract import Output
import traceback
import logging
import sys
# Set up logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__)
# Download and cache the font file
def get_font():
try:
logger.info("Attempting to get font...")
font_path = Path("Roboto-Regular.ttf")
if not font_path.exists():
logger.info("Font not found, downloading...")
font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
response = requests.get(font_url)
font_path.write_bytes(response.content)
logger.info("Font downloaded successfully")
else:
logger.info("Font already exists")
return str(font_path)
except Exception as e:
logger.error(f"Error in get_font: {str(e)}")
logger.error(traceback.format_exc())
return None
# Initialize EasyOCR Reader for French
try:
logger.info("Initializing EasyOCR Reader for French...")
reader = easyocr.Reader(['fr', 'en'], gpu=False) # Changed to False since you're on CPU
logger.info("EasyOCR Reader initialized successfully")
except Exception as e:
logger.error(f"Error initializing EasyOCR: {str(e)}")
logger.error(traceback.format_exc())
def ocr_extract_text_and_tables(image):
try:
logger.info("Starting OCR extraction...")
if image is None:
logger.warning("No image provided")
return "No image provided", None, None
logger.info(f"Image shape: {image.shape}, dtype: {image.dtype}")
# Convert to RGB if needed
if len(image.shape) == 3 and image.shape[2] == 4: # RGBA
logger.info("Converting RGBA to RGB")
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
# Create copy for table detection
table_image = image.copy()
# 1. First extract general text with EasyOCR
logger.info("Running EasyOCR text detection...")
results = reader.readtext(image)
logger.info(f"EasyOCR detected {len(results)} text regions")
# Prepare text output and confidence scores
detected_text = []
for i, (bbox, text, confidence) in enumerate(results):
logger.info(f"Text region {i+1}: '{text}' with confidence {confidence:.2f}")
detected_text.append(f"{text} (Confidence: {confidence:.2f})")
# 2. Use pytesseract for table detection and extraction
logger.info("Running Pytesseract for table detection...")
try:
pytesseract_config = r'--oem 3 --psm 6 -l fra' # French language
logger.info(f"Pytesseract config: {pytesseract_config}")
df = pytesseract.image_to_data(table_image, output_type=Output.DATAFRAME, config=pytesseract_config)
logger.info(f"Pytesseract returned dataframe with shape: {df.shape}")
except Exception as e:
logger.error(f"Pytesseract error: {str(e)}")
logger.error(traceback.format_exc())
df = pd.DataFrame() # Empty dataframe to continue processing
# Filter out low-confidence text
try:
if not df.empty:
logger.info("Filtering low-confidence text...")
df = df.dropna(subset=['text'])
logger.info(f"After dropna, dataframe shape: {df.shape}")
if 'conf' in df.columns:
df = df.query('conf > 50')
logger.info(f"After confidence filtering, dataframe shape: {df.shape}")
else:
logger.warning("No 'conf' column found in pytesseract output")
except Exception as e:
logger.error(f"Error filtering dataframe: {str(e)}")
logger.error(traceback.format_exc())
# Try to identify table structures based on alignment and spacing
tables = []
try:
if not df.empty and 'block_num' in df.columns:
logger.info("Attempting to identify tables...")
# Simple table detection: look for text that's aligned in columns with similar x-coordinates
# Group by block_num which often separates tables
blocks = df['block_num'].unique()
logger.info(f"Found {len(blocks)} text blocks")
for block in blocks:
logger.info(f"Processing block {block}")
block_df = df[df['block_num'] == block]
if len(block_df) > 4: # Assuming a table has at least a few cells
logger.info(f"Block {block} has {len(block_df)} cells, might be a table")
# Sort by top-to-bottom (vertical position)
sorted_df = block_df.sort_values(['top', 'left'])
# Convert to pandas table format
table_rows = []
current_row = []
last_top = -100
for _, row in sorted_df.iterrows():
# If we're on a new row (based on vertical position)
if abs(row['top'] - last_top) > 10: # Threshold for new row
if current_row:
table_rows.append(current_row)
current_row = []
last_top = row['top']
current_row.append(row['text'])
# Add the last row
if current_row:
table_rows.append(current_row)
logger.info(f"Extracted {len(table_rows)} rows from potential table")
# If we have multiple rows, we might have a table
if len(table_rows) > 1:
# Try to create a pandas DataFrame
try:
# Pad rows to have equal length
max_cols = max(len(row) for row in table_rows)
logger.info(f"Table has {max_cols} columns")
padded_rows = [row + [''] * (max_cols - len(row)) for row in table_rows]
# Create DataFrame
table_df = pd.DataFrame(padded_rows)
# Add to tables list
tables.append(table_df)
logger.info(f"Successfully created table with shape {table_df.shape}")
except Exception as e:
logger.error(f"Error creating table DataFrame: {str(e)}")
logger.error(traceback.format_exc())
except Exception as e:
logger.error(f"Error in table detection: {str(e)}")
logger.error(traceback.format_exc())
logger.info(f"Detected {len(tables)} tables")
# Create annotated image
try:
logger.info("Creating annotated image...")
pil_image = Image.fromarray(image)
draw = ImageDraw.Draw(pil_image)
# Get font for annotation
logger.info("Loading font...")
try:
font_path = get_font()
if font_path:
font = ImageFont.truetype(font_path, size=20)
logger.info("Font loaded successfully")
else:
logger.warning("Font path is None, using default font")
font = ImageFont.load_default()
except Exception as e:
logger.error(f"Error loading font: {str(e)}")
logger.error(traceback.format_exc())
font = ImageFont.load_default()
logger.info("Using default font instead")
# Draw boxes and text for regular text detection
logger.info("Drawing annotation boxes...")
for i, (bbox, text, confidence) in enumerate(results):
try:
# Convert points to integers
top_left = tuple(map(int, bbox[0]))
bottom_right = tuple(map(int, bbox[2]))
# Draw rectangle
draw.rectangle([top_left, bottom_right], outline="red", width=3)
# Draw text with confidence
text_with_conf = f"{text} ({confidence:.2f})"
draw.text(top_left, text_with_conf, fill="blue", font=font)
logger.info(f"Drew annotation for text region {i+1}")
except Exception as e:
logger.error(f"Error drawing annotation for region {i+1}: {str(e)}")
continue
# Convert back to numpy array
annotated_image = np.array(pil_image)
logger.info("Annotated image created successfully")
except Exception as e:
logger.error(f"Error creating annotated image: {str(e)}")
logger.error(traceback.format_exc())
annotated_image = image.copy() # Return original image if annotation fails
# Join detected text with proper formatting
text_output = "\n".join(detected_text)
# Format tables for display
tables_output = ""
for i, table in enumerate(tables):
tables_output += f"Table {i+1}:\n"
tables_output += table.to_string(index=False, header=False) + "\n\n"
logger.info("OCR extraction completed successfully")
return text_output, tables_output, annotated_image
except Exception as e:
error_msg = f"Unexpected error in OCR extraction: {str(e)}"
logger.error(error_msg)
logger.error(traceback.format_exc())
return f"Error: {error_msg}", "Processing failed", None
# Create Gradio interface
try:
logger.info("Creating Gradio interface...")
iface = gr.Interface(
fn=ocr_extract_text_and_tables,
inputs=gr.Image(type="numpy", label="Upload Image"),
outputs=[
gr.Textbox(label="Extracted Text (French)", elem_classes=["output-text"]),
gr.Textbox(label="Extracted Tables", elem_classes=["output-text"]),
gr.Image(label="Annotated Image")
],
title="French OCR & Table Extractor",
description="Upload an image containing French text and tables for OCR processing. The system will detect and extract both regular text and tabular data.",
examples=[], # You can add example images here
cache_examples=True
)
logger.info("Gradio interface created successfully")
except Exception as e:
logger.error(f"Error creating Gradio interface: {str(e)}")
logger.error(traceback.format_exc())
# Launch the interface
if __name__ == "__main__":
try:
logger.info("Launching Gradio interface...")
iface.launch()
logger.info("Gradio interface launched successfully")
except Exception as e:
logger.error(f"Error launching Gradio interface: {str(e)}")
logger.error(traceback.format_exc())