import gradio as gr import torch from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import time MODEL = "microsoft/trocr-small-printed" device = "cuda" if torch.cuda.is_available() else "cpu" # Load once at startup processor = TrOCRProcessor.from_pretrained(MODEL) model = VisionEncoderDecoderModel.from_pretrained(MODEL).to(device) def extract_text(image): if image is None: return "⚠️ Please upload an image." start_time = time.time() if not isinstance(image, Image.Image): image = Image.fromarray(image) pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device) generated_ids = model.generate(pixel_values) text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] runtime = round(time.time() - start_time, 2) return f"""📝 Extracted Text: {text} ⏱ Processed in {runtime} seconds """ demo = gr.Interface( fn=extract_text, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Textbox(label="OCR Result"), title="🖼 Image → Text Demo", description="Upload an image with printed text. Powered by Microsoft TrOCR running locally on Hugging Face Spaces.", examples=[ ["https://huggingface.co/datasets/nielsr/image_dummy/raw/main/receipt.png"] ] ) if __name__ == "__main__": demo.launch()