import gradio as gr
import torch
from PIL import Image
from unsloth import FastVisionModel
from transformers import AutoModel
import os

print("Loading model...")
model, tokenizer = FastVisionModel.from_pretrained(
    "https://huggingface.co/nomypython/urdu-ocr-deepseek",  # ← Change this!
    load_in_4bit=True,
    auto_model=AutoModel,
    trust_remote_code=True,
    use_gradient_checkpointing="unsloth",
)
FastVisionModel.for_inference(model)
print("✓ Model loaded!")

def extract_urdu_text(image):
    if image is None:
        return "⚠️ Please upload an image!"
    
    try:
        temp_path = "temp.png"
        image.save(temp_path)
        
        result = model.infer(
            tokenizer,
            prompt="<image>\nExtract Urdu text from this image:",
            image_file=temp_path,
            output_path="./temp_results",
            image_size=640,
            base_size=640,
            crop_mode=False,
            save_results=False,
        )
        
        if os.path.exists(temp_path):
            os.remove(temp_path)
        
        return result if result else "⚠️ No text detected"
    except Exception as e:
        return f"❌ Error: {str(e)}"

with gr.Blocks(title="Urdu OCR") as demo:
    gr.Markdown("# 🔤 Urdu OCR - اردو او سی آر")
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload Image")
            extract_btn = gr.Button("🚀 Extract Text", variant="primary")
        with gr.Column():
            text_output = gr.Textbox(
                label="Extracted Text",
                lines=10,
                rtl=True
            )
    
    extract_btn.click(extract_urdu_text, image_input, text_output)
    image_input.change(extract_urdu_text, image_input, text_output)

demo.launch()