img2 / app.py
Unique00225's picture
Create app.py
d2315a1 verified
from transformers import pipeline
from PIL import Image
import gradio as gr
import io
# Load pipeline on CPU
# device=-1 forces CPU usage
pipe = pipeline("image-to-text", model="dragonstar/image-text-captcha-v2", device=-1)
def run_ocr(image):
"""
image: PIL Image (Gradio will give a PIL if type="pil")
Returns: detected text (string) or error message
"""
if image is None:
return "No image provided"
try:
# pipeline usually expects PIL/np image
outputs = pipe(image)
# model outputs vary; try common keys
if isinstance(outputs, list) and len(outputs) > 0:
# Many vision->text models return [{'generated_text': '...'}]
first = outputs[0]
text = first.get("generated_text") or first.get("text") or str(first)
else:
text = str(outputs)
text = text.strip()
if not text:
return "No text extracted"
return text
except Exception as e:
return f"Error: {e}"
title = "Image β†’ Text (captcha) β€” dragonstar/image-text-captcha-v2"
desc = "Upload a captcha image. Model runs on CPU in this Space. Results may be slower than cloud inference."
demo = gr.Interface(
fn=run_ocr,
inputs=gr.Image(type="pil", label="Captcha image"),
outputs=gr.Textbox(label="Detected text"),
title=title,
description=desc,
allow_flagging="never"
)
if __name__ == "__main__":
demo.launch()