File size: 4,454 Bytes
490d1f0 f547ea0 c9e9a90 f547ea0 37be9a5 c9e9a90 37be9a5 58317ea 37be9a5 c9e9a90 58317ea 37be9a5 f547ea0 37be9a5 490d1f0 37be9a5 490d1f0 37be9a5 490d1f0 37be9a5 490d1f0 37be9a5 490d1f0 37be9a5 490d1f0 c9e9a90 1d6b971 905c2c1 1d6b971 58317ea c9e9a90 37be9a5 490d1f0 381620a c9e9a90 37be9a5 905c2c1 c9e9a90 1d6b971 c9e9a90 58317ea 37be9a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import gradio as gr
from pix2text import Pix2Text
import logging
from PIL import Image
# Set up logging to WARNING level to suppress excessive output from model libraries
logging.basicConfig(level=logging.WARNING)
# Initialize Pix2Text model globally (expensive operation, do it once)
p2t = None
try:
# Initialize the Pix2Text model
p2t = Pix2Text()
except Exception as e:
print(f"Error initializing Pix2Text model: {e}. Recognition will use a fallback function.")
# Define the main recognition function
def recognize_text(image_path: str) -> str:
"""
Performs OCR on the uploaded image and safely parses the output.
This function includes debugging to understand the result structure.
"""
if p2t is None:
return (
"Model initialization failed at startup. Please check the logs "
"to ensure all dependencies (like ONNX runtime) loaded correctly."
)
try:
# Recognize text and formulas
result = p2t.recognize(image_path, save_formula_images=False, use_analyzer=True)
# DEBUG: Print the actual result structure
print(f"DEBUG - Result type: {type(result)}")
print(f"DEBUG - Result content: {result}")
# Handle different possible return types
if isinstance(result, str):
# If result is directly a string
return result if result.strip() else "No recognizable text or formulas found in the image."
if isinstance(result, dict):
# If result is a dictionary, try to extract text from common keys
print(f"DEBUG - Result keys: {result.keys()}")
possible_keys = ['text', 'content', 'result', 'output']
for key in possible_keys:
if key in result:
return str(result[key])
return f"Result is a dict but couldn't find text. Keys: {list(result.keys())}"
if isinstance(result, list):
# If result is a list, process each item
extracted_parts = []
for i, item in enumerate(result):
print(f"DEBUG - Item {i} type: {type(item)}")
print(f"DEBUG - Item {i} content: {item}")
if hasattr(item, 'text'):
# P2TOutput object (for formulas or structured text)
extracted_parts.append(item.text)
elif isinstance(item, str):
# Simple text string
extracted_parts.append(item)
elif isinstance(item, dict):
# Dictionary with text content
if 'text' in item:
extracted_parts.append(item['text'])
elif 'content' in item:
extracted_parts.append(item['content'])
else:
extracted_parts.append(str(item))
else:
# Try to convert to string as fallback
extracted_parts.append(str(item))
extracted_text = "\n\n".join(extracted_parts)
if not extracted_text.strip():
return "No recognizable text or formulas found in the image."
return extracted_text
# If none of the above, try to convert to string
return str(result) if result else "No recognizable text or formulas found in the image."
except Exception as e:
# Catch any unexpected errors during the recognition process
import traceback
return f"An unexpected error occurred during recognition: {e}\n\nTraceback:\n{traceback.format_exc()}"
# --- Gradio Interface Setup ---
iface = gr.Interface(
fn=recognize_text,
# Use type="filepath" to send the local file path to the Python function
inputs=gr.Image(type="filepath", label="Upload Image (Text/Formula/Math)"),
# The output is a standard textbox
outputs=gr.Textbox(label="Extracted Text (LaTeX/Plain Text)", lines=10),
title="🔬 Pix2Text OCR Formula and Text Recognition",
description=(
"Upload an image containing text, mathematical formulas, or scientific notation. "
"The app converts the image content into editable text, using LaTeX for formulas."
),
theme=gr.themes.Soft(),
allow_flagging="never",
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch(show_api=False) |