| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| from PIL import Image | |
| import cv2 | |
| class OCREngine: | |
| def __init__(self): | |
| self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") | |
| self.model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") | |
| def read_text(self, canvas): | |
| gray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY) | |
| img = Image.fromarray(gray) | |
| pixel_values = self.processor(images=img, return_tensors="pt").pixel_values | |
| generated_ids = self.model.generate(pixel_values) | |
| text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return text | |