# import gradio as gr
# import easyocr

# reader = easyocr.Reader(["en"])

# def ocr_image(image):
#     results = reader.readtext(image)
#     return "\n".join([text for _, text, _ in results])

# demo = gr.Interface(fn=ocr_image, inputs="image", outputs="text")

# if __name__ == "__main__":
#     demo.launch()

import os
from PIL import Image
import torch
import gradio as gr

# Make sure you have the local model folder (e.g., "DotsOCR") with all files from the repo
LOCAL_MODEL_PATH = "./DotsOCR"

# Import the model and processor code locally
import sys
sys.path.append(LOCAL_MODEL_PATH)

from modeling_dots_ocr import DotsOCRForVision2Text
from configuration_dots import DotsOCRConfig
from transformers import PreTrainedTokenizerFast

# Load tokenizer
tokenizer = PreTrainedTokenizerFast.from_pretrained(LOCAL_MODEL_PATH)

# Load model configuration
config = DotsOCRConfig.from_pretrained(LOCAL_MODEL_PATH)

# Load model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = DotsOCRForVision2Text.from_pretrained(LOCAL_MODEL_PATH, config=config)
model.to(device)
model.eval()

# Load only the image processor
from transformers import AutoFeatureExtractor
image_processor = AutoFeatureExtractor.from_pretrained(LOCAL_MODEL_PATH)

def parse_document(image: Image.Image):
    # Preprocess the image
    inputs = image_processor(images=image, return_tensors="pt").to(device)

    # Forward pass
    with torch.no_grad():
        output_ids = model.generate(**inputs, max_new_tokens=1024)
    
    # Decode output
    text = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
    return text

# Gradio demo
demo = gr.Interface(
    fn=parse_document,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="dots.ocr Document Parser",
    description="Parse text from images using dots.ocr"
)

if __name__ == "__main__":
    demo.launch()