| import os | |
| import cv2 | |
| import torch | |
| from gdcount.model import GDCount, GDCountConfig | |
| def load_and_preprocess(img_path, size=800): | |
| img_bgr = cv2.imread(img_path) | |
| h, w = img_bgr.shape[:2] | |
| img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) | |
| # resize sao cho cạnh ngắn = size (giống paper) :contentReference[oaicite:7]{index=7} | |
| if min(h, w) != size: | |
| scale = size / min(h, w) | |
| new_w = int(round(w * scale)) | |
| new_h = int(round(h * scale)) | |
| img_rgb = cv2.resize(img_rgb, (new_w, new_h), interpolation=cv2.INTER_LINEAR) | |
| img_tensor = torch.from_numpy(img_rgb).float() / 255.0 | |
| img_tensor = img_tensor.permute(2, 0, 1) # (3,H,W) | |
| # chuẩn hoá tương tự GroundingDINO | |
| mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1) | |
| std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1) | |
| img_tensor = (img_tensor - mean) / std | |
| return img_tensor.unsqueeze(0), (new_h, new_w), img_bgr |