Spaces:

Fanoni
/

fanoni-document-ai

Running on Zero

dafe smith

Upload folder using huggingface_hub

21bc131 verified about 2 months ago

2.93 kB

	"""Fanoni Document AI - HuggingFace Space with GOT-OCR2.0 Model."""

	import gradio as gr
	import spaces
	from transformers import AutoModel, AutoTokenizer
	from PIL import Image
	import torch

	# Load GOT-OCR2.0 model
	MODEL_NAME = "ucaslcl/GOT-OCR2_0"

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

	print("Loading model...")
	model = AutoModel.from_pretrained(
	MODEL_NAME,
	trust_remote_code=True,
	low_cpu_mem_usage=True,
	torch_dtype=torch.float16
	)
	print("Model loaded!")


	@spaces.GPU
	def extract_text(image, output_format):
	"""Extract text from uploaded image using GOT-OCR2.0."""
	if image is None:
	return "Please upload an image."

	try:
	# Move model to GPU for this call
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model_gpu = model.to(device).eval()

	# Save image temporarily
	temp_path = "/tmp/uploaded_image.png"
	if isinstance(image, str):
	temp_path = image
	else:
	Image.fromarray(image).save(temp_path)

	# OCR extraction
	if output_format == "Plain Text":
	result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr')
	elif output_format == "Formatted (Tables/Structure)":
	result = model_gpu.chat(tokenizer, temp_path, ocr_type='format')
	else:
	result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr')

	return result

	except Exception as e:
	return f"Error: {str(e)}"


	# Gradio Interface
	with gr.Blocks(title="Fanoni Document AI", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 📄 Fanoni Document AI
	### Extract text from documents using GOT-OCR2.0

	Upload an image of a document (invoice, receipt, form, etc.) to extract text.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(
	label="Upload Document",
	type="numpy",
	height=400
	)
	format_dropdown = gr.Dropdown(
	choices=["Plain Text", "Formatted (Tables/Structure)"],
	value="Plain Text",
	label="Output Format"
	)
	extract_btn = gr.Button("Extract Text", variant="primary", size="lg")

	with gr.Column(scale=1):
	output_text = gr.Textbox(
	label="Extracted Text",
	lines=20,
	max_lines=50,
	show_copy_button=True
	)

	extract_btn.click(
	fn=extract_text,
	inputs=[image_input, format_dropdown],
	outputs=output_text
	)

	gr.Markdown("""
	---
	Supported formats: JPG, PNG, WEBP, BMP
	Model: [GOT-OCR2.0](https://huggingface.co/ucaslcl/GOT-OCR2_0) - General OCR Theory
	Powered by: Fanoni AI
	""")

	if __name__ == "__main__":
	demo.launch()