dafe smith
Upload folder using huggingface_hub
21bc131 verified
"""Fanoni Document AI - HuggingFace Space with GOT-OCR2.0 Model."""
import gradio as gr
import spaces
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import torch
# Load GOT-OCR2.0 model
MODEL_NAME = "ucaslcl/GOT-OCR2_0"
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
print("Loading model...")
model = AutoModel.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
low_cpu_mem_usage=True,
torch_dtype=torch.float16
)
print("Model loaded!")
@spaces.GPU
def extract_text(image, output_format):
"""Extract text from uploaded image using GOT-OCR2.0."""
if image is None:
return "Please upload an image."
try:
# Move model to GPU for this call
device = "cuda" if torch.cuda.is_available() else "cpu"
model_gpu = model.to(device).eval()
# Save image temporarily
temp_path = "/tmp/uploaded_image.png"
if isinstance(image, str):
temp_path = image
else:
Image.fromarray(image).save(temp_path)
# OCR extraction
if output_format == "Plain Text":
result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr')
elif output_format == "Formatted (Tables/Structure)":
result = model_gpu.chat(tokenizer, temp_path, ocr_type='format')
else:
result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr')
return result
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
with gr.Blocks(title="Fanoni Document AI", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# ๐Ÿ“„ Fanoni Document AI
### Extract text from documents using GOT-OCR2.0
Upload an image of a document (invoice, receipt, form, etc.) to extract text.
""")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
label="Upload Document",
type="numpy",
height=400
)
format_dropdown = gr.Dropdown(
choices=["Plain Text", "Formatted (Tables/Structure)"],
value="Plain Text",
label="Output Format"
)
extract_btn = gr.Button("Extract Text", variant="primary", size="lg")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="Extracted Text",
lines=20,
max_lines=50,
show_copy_button=True
)
extract_btn.click(
fn=extract_text,
inputs=[image_input, format_dropdown],
outputs=output_text
)
gr.Markdown("""
---
**Supported formats:** JPG, PNG, WEBP, BMP
**Model:** [GOT-OCR2.0](https://huggingface.co/ucaslcl/GOT-OCR2_0) - General OCR Theory
**Powered by:** Fanoni AI
""")
if __name__ == "__main__":
demo.launch()