Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import easyocr | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont | |
| import os | |
| import requests | |
| from pathlib import Path | |
| import pandas as pd | |
| import pytesseract | |
| from pytesseract import Output | |
| import traceback | |
| import logging | |
| import sys | |
| from img2table.document import Image as Img2TableImage | |
| from img2table.ocr import TesseractOCR | |
| import pytesseract | |
| import os | |
| # Set the correct Tesseract path | |
| pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" | |
| # Explicitly set the traineddata path | |
| # os.environ["TESSDATA_PREFIX"] = "/usr/share/tesseract-ocr/4.00/tessdata/" | |
| # Initialize Tesseract for French OCR | |
| ocr = TesseractOCR(lang="fra") | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[logging.StreamHandler(sys.stdout)]) | |
| logger = logging.getLogger(__name__) | |
| # Download and cache the font file | |
| def get_font(): | |
| """Download font for annotation if not available.""" | |
| try: | |
| font_path = Path("Roboto-Regular.ttf") | |
| if not font_path.exists(): | |
| font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf" | |
| response = requests.get(font_url) | |
| font_path.write_bytes(response.content) | |
| return str(font_path) | |
| except Exception as e: | |
| logger.error(f"Error in get_font: {str(e)}") | |
| return None | |
| # Initialize EasyOCR Reader for French & English | |
| try: | |
| reader = easyocr.Reader(['fr', 'en'], gpu=False) | |
| except Exception as e: | |
| logger.error(f"Error initializing EasyOCR: {str(e)}") | |
| def ocr_extract_text_and_tables(image): | |
| """Extract text and tables from an image.""" | |
| try: | |
| if image is None: | |
| return "No image provided", "No image provided", None | |
| # Ensure image is in the correct format | |
| if isinstance(image, Image.Image): | |
| image = np.array(image, dtype=np.uint8) # Convert PIL to numpy | |
| # If image has an alpha channel (RGBA), convert to RGB | |
| if len(image.shape) == 3 and image.shape[2] == 4: | |
| image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) | |
| elif len(image.shape) == 2: # If grayscale, convert to BGR | |
| image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | |
| # Convert to grayscale for better OCR | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| # Apply adaptive thresholding | |
| processed = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) | |
| # 1️⃣ Extract General Text using EasyOCR | |
| results = reader.readtext(processed) | |
| detected_text = [f"{text} (Confidence: {confidence:.2f})" for _, text, confidence in results] | |
| # 2️⃣ Save image to a temporary file for `img2table` | |
| temp_image_path = "temp_table_image.jpg" | |
| cv2.imwrite(temp_image_path, image) | |
| # 3️⃣ Use img2table for structured table extraction | |
| img = Img2TableImage(temp_image_path) # Use file path instead of np.ndarray | |
| ocr = TesseractOCR(lang="fra") | |
| tables = img.extract_tables(ocr=ocr) | |
| # Convert tables to Pandas DataFrame | |
| table_data = [table.df for table in tables] if tables else [] | |
| # Save extracted tables as CSV (optional) | |
| for i, df in enumerate(table_data): | |
| df.to_csv(f"extracted_table_{i+1}.csv", index=False) | |
| # 4️⃣ Annotate Image with Bounding Boxes for Detected Text | |
| pil_image = Image.fromarray(image) | |
| draw = ImageDraw.Draw(pil_image) | |
| for bbox, text, confidence in results: | |
| top_left = tuple(map(int, bbox[0])) | |
| bottom_right = tuple(map(int, bbox[2])) | |
| draw.rectangle([top_left, bottom_right], outline="red", width=3) | |
| annotated_image = np.array(pil_image) | |
| # Format output | |
| text_output = "\n".join(detected_text) | |
| tables_output = "\n\n".join([df.to_string(index=False, header=False) for df in table_data]) if table_data else "No tables detected." | |
| return text_output, tables_output, annotated_image | |
| except Exception as e: | |
| return f"Error: {str(e)}", "Processing failed", None | |
| # Create Gradio Interface | |
| iface = gr.Interface( | |
| fn=ocr_extract_text_and_tables, | |
| inputs=gr.Image(type="pil", label="Upload Image"), # Ensures PIL image input | |
| outputs=[ | |
| gr.Textbox(label="Extracted Text (French)"), | |
| gr.Textbox(label="Extracted Tables"), | |
| gr.Image(label="Annotated Image") | |
| ], | |
| title="French OCR & Table Extractor", | |
| description="Upload an image containing French text and tables for OCR processing.", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |