Update app.py
Browse files
app.py
CHANGED
|
@@ -23,6 +23,7 @@ logger = logging.getLogger(__name__)
|
|
| 23 |
|
| 24 |
# Download and cache the font file
|
| 25 |
def get_font():
|
|
|
|
| 26 |
try:
|
| 27 |
font_path = Path("Roboto-Regular.ttf")
|
| 28 |
if not font_path.exists():
|
|
@@ -34,33 +35,44 @@ def get_font():
|
|
| 34 |
logger.error(f"Error in get_font: {str(e)}")
|
| 35 |
return None
|
| 36 |
|
| 37 |
-
# Initialize EasyOCR Reader for French
|
| 38 |
try:
|
| 39 |
reader = easyocr.Reader(['fr', 'en'], gpu=False)
|
| 40 |
except Exception as e:
|
| 41 |
logger.error(f"Error initializing EasyOCR: {str(e)}")
|
| 42 |
|
| 43 |
def ocr_extract_text_and_tables(image):
|
|
|
|
| 44 |
try:
|
| 45 |
if image is None:
|
| 46 |
return "No image provided", "No image provided", None
|
| 47 |
|
| 48 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
if len(image.shape) == 3 and image.shape[2] == 4:
|
| 50 |
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
| 51 |
-
|
|
|
|
|
|
|
| 52 |
# Convert to grayscale for better OCR
|
| 53 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 54 |
|
| 55 |
# Apply adaptive thresholding
|
| 56 |
processed = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
| 57 |
|
| 58 |
-
# 1
|
| 59 |
results = reader.readtext(processed)
|
| 60 |
detected_text = [f"{text} (Confidence: {confidence:.2f})" for _, text, confidence in results]
|
| 61 |
|
| 62 |
-
# 2
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
ocr = TesseractOCR(lang="fra")
|
| 65 |
tables = img.extract_tables(ocr=ocr)
|
| 66 |
|
|
@@ -71,7 +83,7 @@ def ocr_extract_text_and_tables(image):
|
|
| 71 |
for i, df in enumerate(table_data):
|
| 72 |
df.to_csv(f"extracted_table_{i+1}.csv", index=False)
|
| 73 |
|
| 74 |
-
# Annotate
|
| 75 |
pil_image = Image.fromarray(image)
|
| 76 |
draw = ImageDraw.Draw(pil_image)
|
| 77 |
|
|
@@ -91,10 +103,10 @@ def ocr_extract_text_and_tables(image):
|
|
| 91 |
except Exception as e:
|
| 92 |
return f"Error: {str(e)}", "Processing failed", None
|
| 93 |
|
| 94 |
-
# Create Gradio
|
| 95 |
iface = gr.Interface(
|
| 96 |
fn=ocr_extract_text_and_tables,
|
| 97 |
-
inputs=gr.Image(type="
|
| 98 |
outputs=[
|
| 99 |
gr.Textbox(label="Extracted Text (French)"),
|
| 100 |
gr.Textbox(label="Extracted Tables"),
|
|
|
|
| 23 |
|
| 24 |
# Download and cache the font file
|
| 25 |
def get_font():
|
| 26 |
+
"""Download font for annotation if not available."""
|
| 27 |
try:
|
| 28 |
font_path = Path("Roboto-Regular.ttf")
|
| 29 |
if not font_path.exists():
|
|
|
|
| 35 |
logger.error(f"Error in get_font: {str(e)}")
|
| 36 |
return None
|
| 37 |
|
| 38 |
+
# Initialize EasyOCR Reader for French & English
|
| 39 |
try:
|
| 40 |
reader = easyocr.Reader(['fr', 'en'], gpu=False)
|
| 41 |
except Exception as e:
|
| 42 |
logger.error(f"Error initializing EasyOCR: {str(e)}")
|
| 43 |
|
| 44 |
def ocr_extract_text_and_tables(image):
|
| 45 |
+
"""Extract text and tables from an image."""
|
| 46 |
try:
|
| 47 |
if image is None:
|
| 48 |
return "No image provided", "No image provided", None
|
| 49 |
|
| 50 |
+
# Ensure image is in the correct format
|
| 51 |
+
if isinstance(image, Image.Image):
|
| 52 |
+
image = np.array(image, dtype=np.uint8) # Convert PIL to numpy
|
| 53 |
+
|
| 54 |
+
# If image has an alpha channel (RGBA), convert to RGB
|
| 55 |
if len(image.shape) == 3 and image.shape[2] == 4:
|
| 56 |
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
| 57 |
+
elif len(image.shape) == 2: # If grayscale, convert to BGR
|
| 58 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
| 59 |
+
|
| 60 |
# Convert to grayscale for better OCR
|
| 61 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 62 |
|
| 63 |
# Apply adaptive thresholding
|
| 64 |
processed = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
| 65 |
|
| 66 |
+
# 1️⃣ Extract General Text using EasyOCR
|
| 67 |
results = reader.readtext(processed)
|
| 68 |
detected_text = [f"{text} (Confidence: {confidence:.2f})" for _, text, confidence in results]
|
| 69 |
|
| 70 |
+
# 2️⃣ Save image to a temporary file for `img2table`
|
| 71 |
+
temp_image_path = "temp_table_image.jpg"
|
| 72 |
+
cv2.imwrite(temp_image_path, image)
|
| 73 |
+
|
| 74 |
+
# 3️⃣ Use img2table for structured table extraction
|
| 75 |
+
img = Img2TableImage(temp_image_path) # Use file path instead of np.ndarray
|
| 76 |
ocr = TesseractOCR(lang="fra")
|
| 77 |
tables = img.extract_tables(ocr=ocr)
|
| 78 |
|
|
|
|
| 83 |
for i, df in enumerate(table_data):
|
| 84 |
df.to_csv(f"extracted_table_{i+1}.csv", index=False)
|
| 85 |
|
| 86 |
+
# 4️⃣ Annotate Image with Bounding Boxes for Detected Text
|
| 87 |
pil_image = Image.fromarray(image)
|
| 88 |
draw = ImageDraw.Draw(pil_image)
|
| 89 |
|
|
|
|
| 103 |
except Exception as e:
|
| 104 |
return f"Error: {str(e)}", "Processing failed", None
|
| 105 |
|
| 106 |
+
# Create Gradio Interface
|
| 107 |
iface = gr.Interface(
|
| 108 |
fn=ocr_extract_text_and_tables,
|
| 109 |
+
inputs=gr.Image(type="pil", label="Upload Image"), # Ensures PIL image input
|
| 110 |
outputs=[
|
| 111 |
gr.Textbox(label="Extracted Text (French)"),
|
| 112 |
gr.Textbox(label="Extracted Tables"),
|