| import gradio as gr |
| import torch |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
| from PIL import Image |
| import time |
|
|
| MODEL = "microsoft/trocr-small-printed" |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| |
| processor = TrOCRProcessor.from_pretrained(MODEL) |
| model = VisionEncoderDecoderModel.from_pretrained(MODEL).to(device) |
|
|
| def extract_text(image): |
| if image is None: |
| return "โ ๏ธ Please upload an image." |
|
|
| start_time = time.time() |
|
|
| if not isinstance(image, Image.Image): |
| image = Image.fromarray(image) |
|
|
| pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device) |
|
|
| generated_ids = model.generate(pixel_values) |
| text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
| runtime = round(time.time() - start_time, 2) |
|
|
| return f"""๐ Extracted Text: |
| |
| {text} |
| |
| โฑ Processed in {runtime} seconds |
| """ |
|
|
| demo = gr.Interface( |
| fn=extract_text, |
| inputs=gr.Image(type="pil", label="Upload Image"), |
| outputs=gr.Textbox(label="OCR Result"), |
| title="๐ผ Image โ Text Demo", |
| description="Upload an image with printed text. Powered by Microsoft TrOCR running locally on Hugging Face Spaces.", |
| examples=[ |
| ["https://huggingface.co/datasets/nielsr/image_dummy/raw/main/receipt.png"] |
| ] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |