Spaces:

shayansjm
/

ocr2

Sleeping

App Files Files Community

ocr2 / app.py

shayansjm

Update app.py

d3be5ae verified about 2 months ago

raw

history blame contribute delete

2.39 kB

	import gradio as gr
	from paddleocr import PaddleOCR
	from PIL import Image, ImageOps
	import numpy as np

	# Global model holder
	ocr_instance = None

	def process_bank_form(image):
	global ocr_instance

	if image is None:
	return "Please upload an image."

	try:
	# 1. LAZY LOAD MODEL (Standard 2026 init)
	if ocr_instance is None:
	ocr_instance = PaddleOCR(
	lang='en',
	ocr_version='PP-OCRv5', # Using latest v5 for best handwriting
	use_angle_cls=True
	)

	# 2. IMAGE STANDARDIZATION
	img = image.convert("RGB")
	img = ImageOps.exif_transpose(img) # Prevents 'sideways' photo errors
	img_array = np.array(img)

	# 3. RUN OCR
	result = ocr_instance.ocr(img_array)

	# 4. DEEP DEFENSIVE PARSING (Fixes 'index out of range')
	if not result or not isinstance(result, list) or len(result) == 0 or result[0] is None:
	return "No text detected. Try a closer, clearer photo."

	extracted_text = []

	# result[0] is the list of detected lines/boxes
	for line in result[0]:
	# Each 'line' must be a list: [ [coordinates], [text, confidence] ]
	if isinstance(line, list) and len(line) >= 2:
	content = line[1] # This should be [text, confidence]

	# Check if 'content' has a list with at least the text string
	if isinstance(content, (list, tuple)) and len(content) > 0:
	text_val = content[0]

	# Ensure it's a valid string and skip if empty
	if text_val and isinstance(text_val, str):
	extracted_text.append(text_val.strip())

	if not extracted_text:
	return "AI found text boxes but couldn't read characters. Try cropping the image."

	return "\n".join(extracted_text)

	except Exception as e:
	return f"System Error: {str(e)}\nTip: If it persists, use 'Factory Reboot' in Space Settings."

	# UI
	demo = gr.Interface(
	fn=process_bank_form,
	inputs=gr.Image(type="pil", label="Upload Bank Form"),
	outputs=gr.Textbox(label="Extracted Text", lines=20),
	title="🏦 Global Bank OCR"
	)

	if __name__ == "__main__":
	demo.launch(max_threads=1)