import streamlit as st from transformers import AutoModel, AutoTokenizer from PIL import Image import os import base64 import uuid import time import shutil from pathlib import Path # Load tokenizer and model on CPU tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True) model = model.eval() # Use CPU # Define folders for uploads and results UPLOAD_FOLDER = "./uploads" RESULTS_FOLDER = "./results" for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]: if not os.path.exists(folder): os.makedirs(folder) # Function to run the GOT model def run_GOT(image, got_mode, fine_grained_mode="", ocr_color="", ocr_box=""): unique_id = str(uuid.uuid4()) image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png") result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html") image.save(image_path) try: if got_mode == "plain texts OCR": res = model.chat(tokenizer, image_path, ocr_type='ocr') return res, None elif got_mode == "format texts OCR": res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path) elif got_mode == "plain multi-crop OCR": res = model.chat_crop(tokenizer, image_path, ocr_type='ocr') return res, None elif got_mode == "format multi-crop OCR": res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path) elif got_mode == "plain fine-grained OCR": res = model.chat(tokenizer, image_path, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color) return res, None elif got_mode == "format fine-grained OCR": res = model.chat(tokenizer, image_path, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path) res_markdown = res if "format" in got_mode and os.path.exists(result_path): with open(result_path, 'r') as f: html_content = f.read() encoded_html = base64.b64encode(html_content.encode('utf-8')).decode('utf-8') iframe_src = f"data:text/html;base64,{encoded_html}" iframe = f'' return res_markdown, iframe else: return res_markdown, None except Exception as e: return f"Error: {str(e)}", None finally: if os.path.exists(image_path): os.remove(image_path) # Function to clean up old files def cleanup_old_files(): current_time = time.time() for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]: for file_path in Path(folder).glob('*'): if current_time - file_path.stat().st_mtime > 3600: # 1 hour file_path.unlink() # Streamlit App st.set_page_config(page_title="GOT-OCR-2.0 Demo", layout="wide") st.markdown("""