pddle / app.py
heerjtdev's picture
Update app.py
4662790 verified
import gradio as gr
from pix2text import Pix2Text
import logging
from PIL import Image
# Set up logging to WARNING level to suppress excessive output from model libraries
logging.basicConfig(level=logging.WARNING)
# Initialize Pix2Text model globally (expensive operation, do it once)
p2t = None
try:
# Initialize the Pix2Text model
p2t = Pix2Text()
except Exception as e:
print(f"Error initializing Pix2Text model: {e}. Recognition will use a fallback function.")
# Define the main recognition function
def recognize_text(image_path: str) -> str:
"""
Performs OCR on the uploaded image and safely parses the output.
This function includes debugging to understand the result structure.
"""
if p2t is None:
return (
"Model initialization failed at startup. Please check the logs "
"to ensure all dependencies (like ONNX runtime) loaded correctly."
)
try:
# Recognize text and formulas
result = p2t.recognize(image_path, save_formula_images=False, use_analyzer=True)
# DEBUG: Print the actual result structure
print(f"DEBUG - Result type: {type(result)}")
print(f"DEBUG - Result content: {result}")
# Handle different possible return types
if isinstance(result, str):
# If result is directly a string
return result if result.strip() else "No recognizable text or formulas found in the image."
if isinstance(result, dict):
# If result is a dictionary, try to extract text from common keys
print(f"DEBUG - Result keys: {result.keys()}")
possible_keys = ['text', 'content', 'result', 'output']
for key in possible_keys:
if key in result:
return str(result[key])
return f"Result is a dict but couldn't find text. Keys: {list(result.keys())}"
if isinstance(result, list):
# If result is a list, process each item
extracted_parts = []
for i, item in enumerate(result):
print(f"DEBUG - Item {i} type: {type(item)}")
print(f"DEBUG - Item {i} content: {item}")
if hasattr(item, 'text'):
# P2TOutput object (for formulas or structured text)
extracted_parts.append(item.text)
elif isinstance(item, str):
# Simple text string
extracted_parts.append(item)
elif isinstance(item, dict):
# Dictionary with text content
if 'text' in item:
extracted_parts.append(item['text'])
elif 'content' in item:
extracted_parts.append(item['content'])
else:
extracted_parts.append(str(item))
else:
# Try to convert to string as fallback
extracted_parts.append(str(item))
extracted_text = "\n\n".join(extracted_parts)
if not extracted_text.strip():
return "No recognizable text or formulas found in the image."
return extracted_text
# If none of the above, try to convert to string
return str(result) if result else "No recognizable text or formulas found in the image."
except Exception as e:
# Catch any unexpected errors during the recognition process
import traceback
return f"An unexpected error occurred during recognition: {e}\n\nTraceback:\n{traceback.format_exc()}"
# --- Gradio Interface Setup ---
iface = gr.Interface(
fn=recognize_text,
# Use type="filepath" to send the local file path to the Python function
inputs=gr.Image(type="filepath", label="Upload Image (Text/Formula/Math)"),
# The output is a standard textbox
outputs=gr.Textbox(label="Extracted Text (LaTeX/Plain Text)", lines=10),
title="🔬 Pix2Text OCR Formula and Text Recognition",
description=(
"Upload an image containing text, mathematical formulas, or scientific notation. "
"The app converts the image content into editable text, using LaTeX for formulas."
),
theme=gr.themes.Soft(),
allow_flagging="never",
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch(show_api=False)