Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import easyocr | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont | |
| import os | |
| import requests | |
| from pathlib import Path | |
| import pandas as pd | |
| import pytesseract | |
| from pytesseract import Output | |
| import traceback | |
| import logging | |
| import sys | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[logging.StreamHandler(sys.stdout)]) | |
| logger = logging.getLogger(__name__) | |
| # Download and cache the font file | |
| def get_font(): | |
| try: | |
| logger.info("Attempting to get font...") | |
| font_path = Path("Roboto-Regular.ttf") | |
| if not font_path.exists(): | |
| logger.info("Font not found, downloading...") | |
| font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf" | |
| response = requests.get(font_url) | |
| font_path.write_bytes(response.content) | |
| logger.info("Font downloaded successfully") | |
| else: | |
| logger.info("Font already exists") | |
| return str(font_path) | |
| except Exception as e: | |
| logger.error(f"Error in get_font: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| return None | |
| # Initialize EasyOCR Reader for French | |
| try: | |
| logger.info("Initializing EasyOCR Reader for French...") | |
| reader = easyocr.Reader(['fr', 'en'], gpu=False) # Changed to False since you're on CPU | |
| logger.info("EasyOCR Reader initialized successfully") | |
| except Exception as e: | |
| logger.error(f"Error initializing EasyOCR: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| def ocr_extract_text_and_tables(image): | |
| try: | |
| logger.info("Starting OCR extraction...") | |
| if image is None: | |
| logger.warning("No image provided") | |
| return "No image provided", None, None | |
| logger.info(f"Image shape: {image.shape}, dtype: {image.dtype}") | |
| # Convert to RGB if needed | |
| if len(image.shape) == 3 and image.shape[2] == 4: # RGBA | |
| logger.info("Converting RGBA to RGB") | |
| image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) | |
| # Create copy for table detection | |
| table_image = image.copy() | |
| # 1. First extract general text with EasyOCR | |
| logger.info("Running EasyOCR text detection...") | |
| results = reader.readtext(image) | |
| logger.info(f"EasyOCR detected {len(results)} text regions") | |
| # Prepare text output and confidence scores | |
| detected_text = [] | |
| for i, (bbox, text, confidence) in enumerate(results): | |
| logger.info(f"Text region {i+1}: '{text}' with confidence {confidence:.2f}") | |
| detected_text.append(f"{text} (Confidence: {confidence:.2f})") | |
| # 2. Use pytesseract for table detection and extraction | |
| logger.info("Running Pytesseract for table detection...") | |
| try: | |
| pytesseract_config = r'--oem 3 --psm 6 -l fra' # French language | |
| logger.info(f"Pytesseract config: {pytesseract_config}") | |
| df = pytesseract.image_to_data(table_image, output_type=Output.DATAFRAME, config=pytesseract_config) | |
| logger.info(f"Pytesseract returned dataframe with shape: {df.shape}") | |
| except Exception as e: | |
| logger.error(f"Pytesseract error: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| df = pd.DataFrame() # Empty dataframe to continue processing | |
| # Filter out low-confidence text | |
| try: | |
| if not df.empty: | |
| logger.info("Filtering low-confidence text...") | |
| df = df.dropna(subset=['text']) | |
| logger.info(f"After dropna, dataframe shape: {df.shape}") | |
| if 'conf' in df.columns: | |
| df = df.query('conf > 50') | |
| logger.info(f"After confidence filtering, dataframe shape: {df.shape}") | |
| else: | |
| logger.warning("No 'conf' column found in pytesseract output") | |
| except Exception as e: | |
| logger.error(f"Error filtering dataframe: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| # Try to identify table structures based on alignment and spacing | |
| tables = [] | |
| try: | |
| if not df.empty and 'block_num' in df.columns: | |
| logger.info("Attempting to identify tables...") | |
| # Simple table detection: look for text that's aligned in columns with similar x-coordinates | |
| # Group by block_num which often separates tables | |
| blocks = df['block_num'].unique() | |
| logger.info(f"Found {len(blocks)} text blocks") | |
| for block in blocks: | |
| logger.info(f"Processing block {block}") | |
| block_df = df[df['block_num'] == block] | |
| if len(block_df) > 4: # Assuming a table has at least a few cells | |
| logger.info(f"Block {block} has {len(block_df)} cells, might be a table") | |
| # Sort by top-to-bottom (vertical position) | |
| sorted_df = block_df.sort_values(['top', 'left']) | |
| # Convert to pandas table format | |
| table_rows = [] | |
| current_row = [] | |
| last_top = -100 | |
| for _, row in sorted_df.iterrows(): | |
| # If we're on a new row (based on vertical position) | |
| if abs(row['top'] - last_top) > 10: # Threshold for new row | |
| if current_row: | |
| table_rows.append(current_row) | |
| current_row = [] | |
| last_top = row['top'] | |
| current_row.append(row['text']) | |
| # Add the last row | |
| if current_row: | |
| table_rows.append(current_row) | |
| logger.info(f"Extracted {len(table_rows)} rows from potential table") | |
| # If we have multiple rows, we might have a table | |
| if len(table_rows) > 1: | |
| # Try to create a pandas DataFrame | |
| try: | |
| # Pad rows to have equal length | |
| max_cols = max(len(row) for row in table_rows) | |
| logger.info(f"Table has {max_cols} columns") | |
| padded_rows = [row + [''] * (max_cols - len(row)) for row in table_rows] | |
| # Create DataFrame | |
| table_df = pd.DataFrame(padded_rows) | |
| # Add to tables list | |
| tables.append(table_df) | |
| logger.info(f"Successfully created table with shape {table_df.shape}") | |
| except Exception as e: | |
| logger.error(f"Error creating table DataFrame: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| except Exception as e: | |
| logger.error(f"Error in table detection: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| logger.info(f"Detected {len(tables)} tables") | |
| # Create annotated image | |
| try: | |
| logger.info("Creating annotated image...") | |
| pil_image = Image.fromarray(image) | |
| draw = ImageDraw.Draw(pil_image) | |
| # Get font for annotation | |
| logger.info("Loading font...") | |
| try: | |
| font_path = get_font() | |
| if font_path: | |
| font = ImageFont.truetype(font_path, size=20) | |
| logger.info("Font loaded successfully") | |
| else: | |
| logger.warning("Font path is None, using default font") | |
| font = ImageFont.load_default() | |
| except Exception as e: | |
| logger.error(f"Error loading font: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| font = ImageFont.load_default() | |
| logger.info("Using default font instead") | |
| # Draw boxes and text for regular text detection | |
| logger.info("Drawing annotation boxes...") | |
| for i, (bbox, text, confidence) in enumerate(results): | |
| try: | |
| # Convert points to integers | |
| top_left = tuple(map(int, bbox[0])) | |
| bottom_right = tuple(map(int, bbox[2])) | |
| # Draw rectangle | |
| draw.rectangle([top_left, bottom_right], outline="red", width=3) | |
| # Draw text with confidence | |
| text_with_conf = f"{text} ({confidence:.2f})" | |
| draw.text(top_left, text_with_conf, fill="blue", font=font) | |
| logger.info(f"Drew annotation for text region {i+1}") | |
| except Exception as e: | |
| logger.error(f"Error drawing annotation for region {i+1}: {str(e)}") | |
| continue | |
| # Convert back to numpy array | |
| annotated_image = np.array(pil_image) | |
| logger.info("Annotated image created successfully") | |
| except Exception as e: | |
| logger.error(f"Error creating annotated image: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| annotated_image = image.copy() # Return original image if annotation fails | |
| # Join detected text with proper formatting | |
| text_output = "\n".join(detected_text) | |
| # Format tables for display | |
| tables_output = "" | |
| for i, table in enumerate(tables): | |
| tables_output += f"Table {i+1}:\n" | |
| tables_output += table.to_string(index=False, header=False) + "\n\n" | |
| logger.info("OCR extraction completed successfully") | |
| return text_output, tables_output, annotated_image | |
| except Exception as e: | |
| error_msg = f"Unexpected error in OCR extraction: {str(e)}" | |
| logger.error(error_msg) | |
| logger.error(traceback.format_exc()) | |
| return f"Error: {error_msg}", "Processing failed", None | |
| # Create Gradio interface | |
| try: | |
| logger.info("Creating Gradio interface...") | |
| iface = gr.Interface( | |
| fn=ocr_extract_text_and_tables, | |
| inputs=gr.Image(type="numpy", label="Upload Image"), | |
| outputs=[ | |
| gr.Textbox(label="Extracted Text (French)", elem_classes=["output-text"]), | |
| gr.Textbox(label="Extracted Tables", elem_classes=["output-text"]), | |
| gr.Image(label="Annotated Image") | |
| ], | |
| title="French OCR & Table Extractor", | |
| description="Upload an image containing French text and tables for OCR processing. The system will detect and extract both regular text and tabular data.", | |
| examples=[], # You can add example images here | |
| cache_examples=True | |
| ) | |
| logger.info("Gradio interface created successfully") | |
| except Exception as e: | |
| logger.error(f"Error creating Gradio interface: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| try: | |
| logger.info("Launching Gradio interface...") | |
| iface.launch() | |
| logger.info("Gradio interface launched successfully") | |
| except Exception as e: | |
| logger.error(f"Error launching Gradio interface: {str(e)}") | |
| logger.error(traceback.format_exc()) |