Spaces:
Runtime error
Runtime error
File size: 1,971 Bytes
a35bb55 77ec64c a35bb55 151d09d c916c34 a35bb55 c916c34 a35bb55 c916c34 a35bb55 c916c34 e77921d c916c34 a35bb55 86c3831 a35bb55 c916c34 a35bb55 0f0e37d a35bb55 f939fbe 08a83ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# Importing necessary packages
import torch # PyTorch used for executing deep learning functions
from PIL import Image, ImageTk # to display the image from the encoded pixels
import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel # importing the TrOCR processor representing the visual feature extrcator and tokenizer of the TrOCR model, and the TrOCR model
import os
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
model = VisionEncoderDecoderModel.from_pretrained("sk2003/hist-trocr", use_auth_token=auth_token)
# def process_image(image):
# # prepare image
# pixel_values = processor(image, return_tensors="pt").pixel_values
# # generate
# generated_ids = model.generate(pixel_values)
# # decode
# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# return generated_text
def inference_on_image(image):
pixel_values = processor(image, return_tensors="pt").pixel_values
pred = model.generate(pixel_values, max_new_tokens=100)
dec_pred = processor.batch_decode(pred, skip_special_tokens=True)[0]
return dec_pred
title = "Hist-TrOCR"
description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft's TrOCR which is an end-to-end transformer model used for recognition of text from single-line or word images. It has been fine-tuned on historical text images. Upload an image (or select from the given samples) and click 'Submit' to get the transcription. Results may take a few seconds to show up."
iface = gr.Interface(fn=inference_on_image,
inputs=gr.inputs.Image(type="pil"),
outputs=gr.outputs.Textbox(),
title=title,
examples=[["309-35.png"],["270-01-03.png"],["v211285.b750.s69.jpg"]],
description=description)
iface.launch(debug=True)
|