| from transformers import ( | |
| TrOCRConfig, | |
| TrOCRProcessor, | |
| TrOCRForCausalLM, | |
| ViTConfig, | |
| ViTModel, | |
| VisionEncoderDecoderModel, | |
| ) | |
| import gradio as gr | |
| processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") | |
| model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") | |
| def ocr(image): | |
| pixel_values = processor(image, return_tensors="pt").pixel_values | |
| generated_ids = model.generate(pixel_values) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return generated_text | |
| demo = gr.Interface(fn=ocr, inputs="image",outputs= ["text"]) | |
| demo.launch() |