Spaces:

heerjtdev
/

pddle

Sleeping

App Files Files Community

pddle / app.py

heerjtdev

Update app.py

4662790 verified about 1 month ago

raw

history blame contribute delete

4.45 kB






	import gradio as gr
	from pix2text import Pix2Text
	import logging
	from PIL import Image

	# Set up logging to WARNING level to suppress excessive output from model libraries
	logging.basicConfig(level=logging.WARNING)

	# Initialize Pix2Text model globally (expensive operation, do it once)
	p2t = None
	try:
	# Initialize the Pix2Text model
	p2t = Pix2Text()
	except Exception as e:
	print(f"Error initializing Pix2Text model: {e}. Recognition will use a fallback function.")

	# Define the main recognition function
	def recognize_text(image_path: str) -> str:
	"""
	Performs OCR on the uploaded image and safely parses the output.
	This function includes debugging to understand the result structure.
	"""
	if p2t is None:
	return (
	"Model initialization failed at startup. Please check the logs "
	"to ensure all dependencies (like ONNX runtime) loaded correctly."
	)

	try:
	# Recognize text and formulas
	result = p2t.recognize(image_path, save_formula_images=False, use_analyzer=True)

	# DEBUG: Print the actual result structure
	print(f"DEBUG - Result type: {type(result)}")
	print(f"DEBUG - Result content: {result}")

	# Handle different possible return types
	if isinstance(result, str):
	# If result is directly a string
	return result if result.strip() else "No recognizable text or formulas found in the image."

	if isinstance(result, dict):
	# If result is a dictionary, try to extract text from common keys
	print(f"DEBUG - Result keys: {result.keys()}")
	possible_keys = ['text', 'content', 'result', 'output']
	for key in possible_keys:
	if key in result:
	return str(result[key])
	return f"Result is a dict but couldn't find text. Keys: {list(result.keys())}"

	if isinstance(result, list):
	# If result is a list, process each item
	extracted_parts = []

	for i, item in enumerate(result):
	print(f"DEBUG - Item {i} type: {type(item)}")
	print(f"DEBUG - Item {i} content: {item}")

	if hasattr(item, 'text'):
	# P2TOutput object (for formulas or structured text)
	extracted_parts.append(item.text)
	elif isinstance(item, str):
	# Simple text string
	extracted_parts.append(item)
	elif isinstance(item, dict):
	# Dictionary with text content
	if 'text' in item:
	extracted_parts.append(item['text'])
	elif 'content' in item:
	extracted_parts.append(item['content'])
	else:
	extracted_parts.append(str(item))
	else:
	# Try to convert to string as fallback
	extracted_parts.append(str(item))

	extracted_text = "\n\n".join(extracted_parts)

	if not extracted_text.strip():
	return "No recognizable text or formulas found in the image."

	return extracted_text

	# If none of the above, try to convert to string
	return str(result) if result else "No recognizable text or formulas found in the image."

	except Exception as e:
	# Catch any unexpected errors during the recognition process
	import traceback
	return f"An unexpected error occurred during recognition: {e}\n\nTraceback:\n{traceback.format_exc()}"


	# --- Gradio Interface Setup ---

	iface = gr.Interface(
	fn=recognize_text,
	# Use type="filepath" to send the local file path to the Python function
	inputs=gr.Image(type="filepath", label="Upload Image (Text/Formula/Math)"),
	# The output is a standard textbox
	outputs=gr.Textbox(label="Extracted Text (LaTeX/Plain Text)", lines=10),
	title="🔬 Pix2Text OCR Formula and Text Recognition",
	description=(
	"Upload an image containing text, mathematical formulas, or scientific notation. "
	"The app converts the image content into editable text, using LaTeX for formulas."
	),
	theme=gr.themes.Soft(),
	allow_flagging="never",
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	iface.launch(show_api=False)