Spaces:
Sleeping
Sleeping
| from PIL import Image | |
| import torch | |
| from torch.utils.data import DataLoader | |
| from torch.nn.functional import softmax | |
| from utils import * | |
| class Recognizer: | |
| def __init__( | |
| self, | |
| model_file, | |
| device = "cpu", | |
| ): | |
| weights = torch.load(model_file) | |
| self.model = weights["model"] | |
| self.converter = weights["converter"] | |
| self.opt = self.model.opt | |
| self.imgH = self.opt.imgH | |
| self.imgW = self.opt.imgW | |
| self.input_channel = self.opt.input_channel | |
| self.device = device | |
| _ = self.model.to(device) | |
| self.model.eval() | |
| def __call__( | |
| self, | |
| imgs | |
| ): | |
| results = [] | |
| transform = NormalizePAD((self.input_channel, self.imgH, self.imgW)) | |
| with torch.no_grad(): | |
| for img in imgs: | |
| img = Image.fromarray(img).convert("L") | |
| w, h = img.size | |
| ratio = w / float(h) | |
| if math.ceil(self.imgH * ratio) > self.imgW: | |
| resized_w = self.imgW | |
| else: | |
| resized_w = math.ceil(self.imgH * ratio) | |
| img = img.resize((resized_w, self.imgH), Image.BICUBIC) | |
| img = transform(img) | |
| img = img.unsqueeze(0) | |
| img = img.to(self.device) | |
| text_for_pred = torch.LongTensor(1, w // 10 + 1).fill_(0).to(self.device) | |
| preds = self.model(img, text_for_pred) | |
| preds_size = [preds.size(1)] | |
| preds_prob = softmax(preds, dim=-1).squeeze().cpu().detach().numpy() | |
| values = preds_prob.max(axis=-1) | |
| indices = preds_prob.argmax(axis=-1) | |
| preds_str = self.converter.decode_greedy(indices.ravel(), preds_size)[0] | |
| confidence_score = custom_mean(values) | |
| results.append([preds_str, confidence_score]) | |
| return results | |