import os
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import gradio as gr

# เปิดใช้ PyTorch GPU/CPU
os.environ['USE_TORCH'] = '1'

# โหลดโมเดล OCR ภาษาอังกฤษ (pretrained)
predictor = ocr_predictor(pretrained=True)

# Title/Description สำหรับ Gradio
title = "English OCR"
description = """Upload an image to extract English text from it!"""

# ฟังก์ชัน OCR
def ocr(img):
    # บันทึกไฟล์ชั่วคราว
    img.save("out.jpg")
    doc = DocumentFile.from_images("out.jpg")
    
    # ทำ OCR
    output = predictor(doc)
    
    # รวมผลลัพธ์เป็น string
    res = ""
    for page in output.pages:
        for block in page.blocks:
            for line in block.lines:
                for word in line.words:
                    res += word.value + " "
            res += "\n"
        res += "\n"
    
    # บันทึกผลเป็นไฟล์ txt
    output_name = "RESULT_OCR.txt"
    with open(output_name, "w", encoding="utf-8", errors="ignore") as f:
        f.write(res)
    
    return res, output_name

# สร้าง Gradio Interface
demo = gr.Interface(
    fn=ocr,
    inputs=[gr.Image(type="pil")],  # รับภาพจากผู้ใช้
    outputs=[
        gr.Textbox(lines=20, label="Full Text"),
        gr.File(label="Download OCR Results")
    ],
    title=title,
    description=description
)

# Launch
demo.launch(debug=True)