Spaces:
Running
on
Zero
Running
on
Zero
| """Fanoni Document AI - HuggingFace Space with GOT-OCR2.0 Model.""" | |
| import gradio as gr | |
| import spaces | |
| from transformers import AutoModel, AutoTokenizer | |
| from PIL import Image | |
| import torch | |
| # Load GOT-OCR2.0 model | |
| MODEL_NAME = "ucaslcl/GOT-OCR2_0" | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| print("Loading model...") | |
| model = AutoModel.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, | |
| torch_dtype=torch.float16 | |
| ) | |
| print("Model loaded!") | |
| def extract_text(image, output_format): | |
| """Extract text from uploaded image using GOT-OCR2.0.""" | |
| if image is None: | |
| return "Please upload an image." | |
| try: | |
| # Move model to GPU for this call | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model_gpu = model.to(device).eval() | |
| # Save image temporarily | |
| temp_path = "/tmp/uploaded_image.png" | |
| if isinstance(image, str): | |
| temp_path = image | |
| else: | |
| Image.fromarray(image).save(temp_path) | |
| # OCR extraction | |
| if output_format == "Plain Text": | |
| result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr') | |
| elif output_format == "Formatted (Tables/Structure)": | |
| result = model_gpu.chat(tokenizer, temp_path, ocr_type='format') | |
| else: | |
| result = model_gpu.chat(tokenizer, temp_path, ocr_type='ocr') | |
| return result | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Gradio Interface | |
| with gr.Blocks(title="Fanoni Document AI", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # ๐ Fanoni Document AI | |
| ### Extract text from documents using GOT-OCR2.0 | |
| Upload an image of a document (invoice, receipt, form, etc.) to extract text. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| label="Upload Document", | |
| type="numpy", | |
| height=400 | |
| ) | |
| format_dropdown = gr.Dropdown( | |
| choices=["Plain Text", "Formatted (Tables/Structure)"], | |
| value="Plain Text", | |
| label="Output Format" | |
| ) | |
| extract_btn = gr.Button("Extract Text", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| output_text = gr.Textbox( | |
| label="Extracted Text", | |
| lines=20, | |
| max_lines=50, | |
| show_copy_button=True | |
| ) | |
| extract_btn.click( | |
| fn=extract_text, | |
| inputs=[image_input, format_dropdown], | |
| outputs=output_text | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Supported formats:** JPG, PNG, WEBP, BMP | |
| **Model:** [GOT-OCR2.0](https://huggingface.co/ucaslcl/GOT-OCR2_0) - General OCR Theory | |
| **Powered by:** Fanoni AI | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |