from transformers import pipeline from PIL import Image import gradio as gr import io # Load pipeline on CPU # device=-1 forces CPU usage pipe = pipeline("image-to-text", model="dragonstar/image-text-captcha-v2", device=-1) def run_ocr(image): """ image: PIL Image (Gradio will give a PIL if type="pil") Returns: detected text (string) or error message """ if image is None: return "No image provided" try: # pipeline usually expects PIL/np image outputs = pipe(image) # model outputs vary; try common keys if isinstance(outputs, list) and len(outputs) > 0: # Many vision->text models return [{'generated_text': '...'}] first = outputs[0] text = first.get("generated_text") or first.get("text") or str(first) else: text = str(outputs) text = text.strip() if not text: return "No text extracted" return text except Exception as e: return f"Error: {e}" title = "Image → Text (captcha) — dragonstar/image-text-captcha-v2" desc = "Upload a captcha image. Model runs on CPU in this Space. Results may be slower than cloud inference." demo = gr.Interface( fn=run_ocr, inputs=gr.Image(type="pil", label="Captcha image"), outputs=gr.Textbox(label="Detected text"), title=title, description=desc, allow_flagging="never" ) if __name__ == "__main__": demo.launch()