from transformers import pipeline
from PIL import Image
import gradio as gr
import io

# Load pipeline on CPU
# device=-1 forces CPU usage
pipe = pipeline("image-to-text", model="dragonstar/image-text-captcha-v2", device=-1)

def run_ocr(image):
    """
    image: PIL Image (Gradio will give a PIL if type="pil")
    Returns: detected text (string) or error message
    """
    if image is None:
        return "No image provided"
    try:
        # pipeline usually expects PIL/np image
        outputs = pipe(image)
        # model outputs vary; try common keys
        if isinstance(outputs, list) and len(outputs) > 0:
            # Many vision->text models return [{'generated_text': '...'}]
            first = outputs[0]
            text = first.get("generated_text") or first.get("text") or str(first)
        else:
            text = str(outputs)
        text = text.strip()
        if not text:
            return "No text extracted"
        return text
    except Exception as e:
        return f"Error: {e}"

title = "Image → Text (captcha) — dragonstar/image-text-captcha-v2"
desc = "Upload a captcha image. Model runs on CPU in this Space. Results may be slower than cloud inference."

demo = gr.Interface(
    fn=run_ocr,
    inputs=gr.Image(type="pil", label="Captcha image"),
    outputs=gr.Textbox(label="Detected text"),
    title=title,
    description=desc,
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()