""" Korean License Plate OCR - KLPR v2 (Model v5) Hugging Face Gradio App """ from __future__ import annotations import gradio as gr import gradio_client.utils as client_utils import torch import torch.nn as nn from PIL import Image import torchvision.transforms as transforms import numpy as np # Work around gradio_client not handling boolean JSON schema nodes. if not getattr(client_utils, "_patched_bool_schema", False): _orig_json_schema_to_python_type = client_utils._json_schema_to_python_type def _safe_json_schema_to_python_type(schema, defs=None): if isinstance(schema, bool): return "Any" return _orig_json_schema_to_python_type(schema, defs) client_utils._json_schema_to_python_type = _safe_json_schema_to_python_type client_utils._patched_bool_schema = True class CRNN(nn.Module): def __init__(self, img_height, num_chars, rnn_hidden=256): super().__init__() self.cnn = nn.Sequential( nn.Conv2d(1, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d((2, 2)), nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d((2, 2)), nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.MaxPool2d((2, 1)), nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.MaxPool2d((2, 1)), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.MaxPool2d((2, 1)), ) self.rnn = nn.LSTM(512, rnn_hidden, bidirectional=True, num_layers=2, batch_first=True) self.fc = nn.Linear(rnn_hidden * 2, num_chars) def forward(self, x): conv = self.cnn(x) conv = conv.squeeze(2).permute(0, 2, 1) rnn_out, _ = self.rnn(conv) return self.fc(rnn_out) def decode_predictions(outputs, itos, blank_idx=0): preds = outputs.argmax(2).detach().cpu().numpy() decoded = [] for pred in preds: char_list = [] prev_idx = blank_idx for idx in pred: if idx != blank_idx and idx != prev_idx: char_list.append(itos[int(idx)]) prev_idx = idx decoded.append("".join(char_list)) return decoded def preprocess_image(image, img_height=32, max_width=200): if not isinstance(image, Image.Image): if isinstance(image, np.ndarray): image = Image.fromarray(image.astype("uint8")) else: image = Image.open(image) image = image.convert("L") w, h = image.size new_w = min(int(img_height * w / h), max_width) image = image.resize((new_w, img_height), Image.LANCZOS) new_img = Image.new("L", (max_width, img_height), 255) new_img.paste(image, (0, 0)) transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))] ) return transform(new_img).unsqueeze(0) print("모델 로딩 중...") checkpoint_path = "best_ocr_one_line.pth" checkpoint = torch.load(checkpoint_path, map_location="cpu") img_h = checkpoint.get("img_h", 32) max_w = checkpoint.get("max_w", 200) itos = checkpoint["itos"] num_chars = len(itos) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = CRNN(img_h, num_chars, rnn_hidden=256).to(device) model.load_state_dict(checkpoint["model_state"]) model.eval() print(f"✓ 모델 로드 완료 (Device: {device})") print(f" - Epoch: {checkpoint.get('epoch', '?')}") print(f" - Val Acc: {checkpoint.get('val_acc', '?'):.2%}") def predict_license_plate(image): if image is None: return "이미지를 업로드해 주세요." try: image_tensor = preprocess_image(image, img_h, max_w).to(device) with torch.no_grad(): outputs = model(image_tensor).log_softmax(2) predictions = decode_predictions(outputs, itos) result = predictions[0] return result if result else "(인식 결과 없음)" except Exception as exc: return f"오류 발생: {exc}" demo = gr.Interface( fn=predict_license_plate, inputs=gr.Image(type="pil", label="번호판 이미지"), outputs=gr.Textbox(label="인식 결과"), title="🚘 한국 번호판 OCR - KLPR v2", description=( "번호판 이미지에서 문자를 인식합니다.\n\n" "**모델 정보:** CRNN (CNN + BiLSTM + CTC)\n" "**입력:** 번호판 이미지 1장" ), api_name="predict", cache_examples=False, ) if __name__ == "__main__": demo.launch()