Spaces:
Build error
Build error
| from paddleocr import PaddleOCR | |
| import cv2 | |
| import json | |
| import numpy as np | |
| from PIL import Image | |
| from torchvision import transforms | |
| # Khởi tạo PaddleOCR một lần | |
| ocr_engine = PaddleOCR( | |
| use_angle_cls=False, | |
| lang='en', | |
| rec=False, | |
| use_gpu=True, | |
| gpu_mem=500, # Giới hạn bộ nhớ GPU (MB) | |
| det_limit_side_len=1280 | |
| ) | |
| def detect_text_boxes(image: np.ndarray): | |
| """ | |
| Detect text bounding boxes from numpy image array (OpenCV format) | |
| Returns list of dicts with box coordinates. | |
| """ | |
| if not isinstance(image, np.ndarray): | |
| raise ValueError("Input must be numpy array (OpenCV format)") | |
| result = ocr_engine.ocr(image, cls=False) | |
| boxes = [] | |
| for idx, item in enumerate(result[0]): | |
| points = item[0] | |
| boxes.append({ | |
| "id": idx + 1, | |
| "text": "", # empty because recognition disabled | |
| "box": points | |
| }) | |
| return boxes | |
| def preprocess_image(image: Image.Image): | |
| """Chuẩn bị ảnh đầu vào cho model""" | |
| transform = transforms.Compose([ | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], | |
| std=[0.229, 0.224, 0.225]) | |
| ]) | |
| return transform(image) | |
| def decode_predictions(predictions, tokenizer): | |
| """Chuyển đổi model output thành text""" | |
| texts = [] | |
| for pred in predictions: | |
| tokens = [] | |
| for token_id in pred: | |
| if token_id == tokenizer.special_tokens['eos']: | |
| break | |
| tokens.append(token_id.item()) | |
| texts.append(tokenizer.decode(tokens)) | |
| return texts | |
| def crop_and_resize_line(image_rgb, box, target_height=48): | |
| pts = np.array(box, dtype=np.float32) | |
| x, y, w, h = cv2.boundingRect(pts) | |
| roi = image_rgb[y:y+h, x:x+w] | |
| ratio = target_height / float(h) | |
| new_w = max(1, int(w * ratio)) | |
| resized = cv2.resize(roi, (new_w, target_height)) | |
| return resized | |
| def sort_annotations_by_top(annotations): | |
| return sorted(annotations, key=lambda x: min(point[1] for point in x['box'])) |