|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
from pix2text import Pix2Text |
|
|
import logging |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.WARNING) |
|
|
|
|
|
|
|
|
p2t = None |
|
|
try: |
|
|
|
|
|
p2t = Pix2Text() |
|
|
except Exception as e: |
|
|
print(f"Error initializing Pix2Text model: {e}. Recognition will use a fallback function.") |
|
|
|
|
|
|
|
|
def recognize_text(image_path: str) -> str: |
|
|
""" |
|
|
Performs OCR on the uploaded image and safely parses the output. |
|
|
This function includes debugging to understand the result structure. |
|
|
""" |
|
|
if p2t is None: |
|
|
return ( |
|
|
"Model initialization failed at startup. Please check the logs " |
|
|
"to ensure all dependencies (like ONNX runtime) loaded correctly." |
|
|
) |
|
|
|
|
|
try: |
|
|
|
|
|
result = p2t.recognize(image_path, save_formula_images=False, use_analyzer=True) |
|
|
|
|
|
|
|
|
print(f"DEBUG - Result type: {type(result)}") |
|
|
print(f"DEBUG - Result content: {result}") |
|
|
|
|
|
|
|
|
if isinstance(result, str): |
|
|
|
|
|
return result if result.strip() else "No recognizable text or formulas found in the image." |
|
|
|
|
|
if isinstance(result, dict): |
|
|
|
|
|
print(f"DEBUG - Result keys: {result.keys()}") |
|
|
possible_keys = ['text', 'content', 'result', 'output'] |
|
|
for key in possible_keys: |
|
|
if key in result: |
|
|
return str(result[key]) |
|
|
return f"Result is a dict but couldn't find text. Keys: {list(result.keys())}" |
|
|
|
|
|
if isinstance(result, list): |
|
|
|
|
|
extracted_parts = [] |
|
|
|
|
|
for i, item in enumerate(result): |
|
|
print(f"DEBUG - Item {i} type: {type(item)}") |
|
|
print(f"DEBUG - Item {i} content: {item}") |
|
|
|
|
|
if hasattr(item, 'text'): |
|
|
|
|
|
extracted_parts.append(item.text) |
|
|
elif isinstance(item, str): |
|
|
|
|
|
extracted_parts.append(item) |
|
|
elif isinstance(item, dict): |
|
|
|
|
|
if 'text' in item: |
|
|
extracted_parts.append(item['text']) |
|
|
elif 'content' in item: |
|
|
extracted_parts.append(item['content']) |
|
|
else: |
|
|
extracted_parts.append(str(item)) |
|
|
else: |
|
|
|
|
|
extracted_parts.append(str(item)) |
|
|
|
|
|
extracted_text = "\n\n".join(extracted_parts) |
|
|
|
|
|
if not extracted_text.strip(): |
|
|
return "No recognizable text or formulas found in the image." |
|
|
|
|
|
return extracted_text |
|
|
|
|
|
|
|
|
return str(result) if result else "No recognizable text or formulas found in the image." |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
import traceback |
|
|
return f"An unexpected error occurred during recognition: {e}\n\nTraceback:\n{traceback.format_exc()}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=recognize_text, |
|
|
|
|
|
inputs=gr.Image(type="filepath", label="Upload Image (Text/Formula/Math)"), |
|
|
|
|
|
outputs=gr.Textbox(label="Extracted Text (LaTeX/Plain Text)", lines=10), |
|
|
title="🔬 Pix2Text OCR Formula and Text Recognition", |
|
|
description=( |
|
|
"Upload an image containing text, mathematical formulas, or scientific notation. " |
|
|
"The app converts the image content into editable text, using LaTeX for formulas." |
|
|
), |
|
|
theme=gr.themes.Soft(), |
|
|
allow_flagging="never", |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch(show_api=False) |