import gradio as gr import easyocr import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont import os import requests from pathlib import Path import pandas as pd import pytesseract from pytesseract import Output import traceback import logging import sys # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)]) logger = logging.getLogger(__name__) # Download and cache the font file def get_font(): try: logger.info("Attempting to get font...") font_path = Path("Roboto-Regular.ttf") if not font_path.exists(): logger.info("Font not found, downloading...") font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf" response = requests.get(font_url) font_path.write_bytes(response.content) logger.info("Font downloaded successfully") else: logger.info("Font already exists") return str(font_path) except Exception as e: logger.error(f"Error in get_font: {str(e)}") logger.error(traceback.format_exc()) return None # Initialize EasyOCR Reader for French try: logger.info("Initializing EasyOCR Reader for French...") reader = easyocr.Reader(['fr', 'en'], gpu=False) # Changed to False since you're on CPU logger.info("EasyOCR Reader initialized successfully") except Exception as e: logger.error(f"Error initializing EasyOCR: {str(e)}") logger.error(traceback.format_exc()) def ocr_extract_text_and_tables(image): try: logger.info("Starting OCR extraction...") if image is None: logger.warning("No image provided") return "No image provided", None, None logger.info(f"Image shape: {image.shape}, dtype: {image.dtype}") # Convert to RGB if needed if len(image.shape) == 3 and image.shape[2] == 4: # RGBA logger.info("Converting RGBA to RGB") image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) # Create copy for table detection table_image = image.copy() # 1. First extract general text with EasyOCR logger.info("Running EasyOCR text detection...") results = reader.readtext(image) logger.info(f"EasyOCR detected {len(results)} text regions") # Prepare text output and confidence scores detected_text = [] for i, (bbox, text, confidence) in enumerate(results): logger.info(f"Text region {i+1}: '{text}' with confidence {confidence:.2f}") detected_text.append(f"{text} (Confidence: {confidence:.2f})") # 2. Use pytesseract for table detection and extraction logger.info("Running Pytesseract for table detection...") try: pytesseract_config = r'--oem 3 --psm 6 -l fra' # French language logger.info(f"Pytesseract config: {pytesseract_config}") df = pytesseract.image_to_data(table_image, output_type=Output.DATAFRAME, config=pytesseract_config) logger.info(f"Pytesseract returned dataframe with shape: {df.shape}") except Exception as e: logger.error(f"Pytesseract error: {str(e)}") logger.error(traceback.format_exc()) df = pd.DataFrame() # Empty dataframe to continue processing # Filter out low-confidence text try: if not df.empty: logger.info("Filtering low-confidence text...") df = df.dropna(subset=['text']) logger.info(f"After dropna, dataframe shape: {df.shape}") if 'conf' in df.columns: df = df.query('conf > 50') logger.info(f"After confidence filtering, dataframe shape: {df.shape}") else: logger.warning("No 'conf' column found in pytesseract output") except Exception as e: logger.error(f"Error filtering dataframe: {str(e)}") logger.error(traceback.format_exc()) # Try to identify table structures based on alignment and spacing tables = [] try: if not df.empty and 'block_num' in df.columns: logger.info("Attempting to identify tables...") # Simple table detection: look for text that's aligned in columns with similar x-coordinates # Group by block_num which often separates tables blocks = df['block_num'].unique() logger.info(f"Found {len(blocks)} text blocks") for block in blocks: logger.info(f"Processing block {block}") block_df = df[df['block_num'] == block] if len(block_df) > 4: # Assuming a table has at least a few cells logger.info(f"Block {block} has {len(block_df)} cells, might be a table") # Sort by top-to-bottom (vertical position) sorted_df = block_df.sort_values(['top', 'left']) # Convert to pandas table format table_rows = [] current_row = [] last_top = -100 for _, row in sorted_df.iterrows(): # If we're on a new row (based on vertical position) if abs(row['top'] - last_top) > 10: # Threshold for new row if current_row: table_rows.append(current_row) current_row = [] last_top = row['top'] current_row.append(row['text']) # Add the last row if current_row: table_rows.append(current_row) logger.info(f"Extracted {len(table_rows)} rows from potential table") # If we have multiple rows, we might have a table if len(table_rows) > 1: # Try to create a pandas DataFrame try: # Pad rows to have equal length max_cols = max(len(row) for row in table_rows) logger.info(f"Table has {max_cols} columns") padded_rows = [row + [''] * (max_cols - len(row)) for row in table_rows] # Create DataFrame table_df = pd.DataFrame(padded_rows) # Add to tables list tables.append(table_df) logger.info(f"Successfully created table with shape {table_df.shape}") except Exception as e: logger.error(f"Error creating table DataFrame: {str(e)}") logger.error(traceback.format_exc()) except Exception as e: logger.error(f"Error in table detection: {str(e)}") logger.error(traceback.format_exc()) logger.info(f"Detected {len(tables)} tables") # Create annotated image try: logger.info("Creating annotated image...") pil_image = Image.fromarray(image) draw = ImageDraw.Draw(pil_image) # Get font for annotation logger.info("Loading font...") try: font_path = get_font() if font_path: font = ImageFont.truetype(font_path, size=20) logger.info("Font loaded successfully") else: logger.warning("Font path is None, using default font") font = ImageFont.load_default() except Exception as e: logger.error(f"Error loading font: {str(e)}") logger.error(traceback.format_exc()) font = ImageFont.load_default() logger.info("Using default font instead") # Draw boxes and text for regular text detection logger.info("Drawing annotation boxes...") for i, (bbox, text, confidence) in enumerate(results): try: # Convert points to integers top_left = tuple(map(int, bbox[0])) bottom_right = tuple(map(int, bbox[2])) # Draw rectangle draw.rectangle([top_left, bottom_right], outline="red", width=3) # Draw text with confidence text_with_conf = f"{text} ({confidence:.2f})" draw.text(top_left, text_with_conf, fill="blue", font=font) logger.info(f"Drew annotation for text region {i+1}") except Exception as e: logger.error(f"Error drawing annotation for region {i+1}: {str(e)}") continue # Convert back to numpy array annotated_image = np.array(pil_image) logger.info("Annotated image created successfully") except Exception as e: logger.error(f"Error creating annotated image: {str(e)}") logger.error(traceback.format_exc()) annotated_image = image.copy() # Return original image if annotation fails # Join detected text with proper formatting text_output = "\n".join(detected_text) # Format tables for display tables_output = "" for i, table in enumerate(tables): tables_output += f"Table {i+1}:\n" tables_output += table.to_string(index=False, header=False) + "\n\n" logger.info("OCR extraction completed successfully") return text_output, tables_output, annotated_image except Exception as e: error_msg = f"Unexpected error in OCR extraction: {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) return f"Error: {error_msg}", "Processing failed", None # Create Gradio interface try: logger.info("Creating Gradio interface...") iface = gr.Interface( fn=ocr_extract_text_and_tables, inputs=gr.Image(type="numpy", label="Upload Image"), outputs=[ gr.Textbox(label="Extracted Text (French)", elem_classes=["output-text"]), gr.Textbox(label="Extracted Tables", elem_classes=["output-text"]), gr.Image(label="Annotated Image") ], title="French OCR & Table Extractor", description="Upload an image containing French text and tables for OCR processing. The system will detect and extract both regular text and tabular data.", examples=[], # You can add example images here cache_examples=True ) logger.info("Gradio interface created successfully") except Exception as e: logger.error(f"Error creating Gradio interface: {str(e)}") logger.error(traceback.format_exc()) # Launch the interface if __name__ == "__main__": try: logger.info("Launching Gradio interface...") iface.launch() logger.info("Gradio interface launched successfully") except Exception as e: logger.error(f"Error launching Gradio interface: {str(e)}") logger.error(traceback.format_exc())