Spaces:

nice22090
/

KLPR_v1

Sleeping

App Files Files Community

nice22090 commited on Jan 4

Commit

cb24c40

1 Parent(s): 0c56ad2

Rebuild app for HF Spaces

Browse files

Files changed (1) hide show

app.py +44 -87

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 """
-한국 번호판 OCR - KLPR_v1 (Model v4)
 Hugging Face Gradio App
 """
 import gradio as gr
 import gradio_client.utils as client_utils
 import torch
@@ -11,77 +13,60 @@ from PIL import Image
 import torchvision.transforms as transforms
 import numpy as np
-# Work around gradio_client schema parsing when additionalProperties is boolean.
 if not getattr(client_utils, "_patched_bool_schema", False):
-    _orig_json_schema_to_python_type = client_utils.json_schema_to_python_type
-    def _safe_json_schema_to_python_type(schema):
         if isinstance(schema, bool):
             return "Any"
-        return _orig_json_schema_to_python_type(schema)
-    client_utils.json_schema_to_python_type = _safe_json_schema_to_python_type
     client_utils._patched_bool_schema = True
-# ============================================================================
-# 모델 정의
-# ============================================================================
 class CRNN(nn.Module):
     def __init__(self, img_height, num_chars, rnn_hidden=256):
-        super(CRNN, self).__init__()
-        # CNN - 32x200 -> 1x50
         self.cnn = nn.Sequential(
             nn.Conv2d(1, 64, kernel_size=3, padding=1),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 2)),
             nn.Conv2d(64, 128, kernel_size=3, padding=1),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 2)),
             nn.Conv2d(128, 256, kernel_size=3, padding=1),
             nn.BatchNorm2d(256),
             nn.ReLU(inplace=True),
             nn.Conv2d(256, 256, kernel_size=3, padding=1),
             nn.BatchNorm2d(256),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 1)),
             nn.Conv2d(256, 512, kernel_size=3, padding=1),
             nn.BatchNorm2d(512),
             nn.ReLU(inplace=True),
             nn.Conv2d(512, 512, kernel_size=3, padding=1),
             nn.BatchNorm2d(512),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 1)),
             nn.Conv2d(512, 512, kernel_size=3, padding=1),
             nn.BatchNorm2d(512),
             nn.ReLU(inplace=True),
-            nn.MaxPool2d((2, 1))
         )
         self.rnn = nn.LSTM(512, rnn_hidden, bidirectional=True, num_layers=2, batch_first=True)
         self.fc = nn.Linear(rnn_hidden * 2, num_chars)
     def forward(self, x):
         conv = self.cnn(x)
-        b, c, h, w = conv.size()
         conv = conv.squeeze(2).permute(0, 2, 1)
         rnn_out, _ = self.rnn(conv)
-        output = self.fc(rnn_out)
-        return output
-# ============================================================================
-# CTC 디코딩
-# ============================================================================
-def decode_predictions(outputs, itos, blank_idx=0):
-    """CTC 디코딩"""
-    preds = outputs.argmax(2).detach().cpu().numpy()  # (B, T)
     decoded = []
     for pred in preds:
         char_list = []
@@ -90,104 +75,76 @@ def decode_predictions(outputs, itos, blank_idx=0):
             if idx != blank_idx and idx != prev_idx:
                 char_list.append(itos[int(idx)])
             prev_idx = idx
-        decoded.append(''.join(char_list))
     return decoded
-# ============================================================================
-# 이미지 전처리
-# ============================================================================
 def preprocess_image(image, img_height=32, max_width=200):
-    """번호판 이미지 전처리"""
-    # PIL Image로 변환 (Gradio 4.x에서 type="pil"로 이미 PIL Image)
     if not isinstance(image, Image.Image):
         if isinstance(image, np.ndarray):
-            image = Image.fromarray(image.astype('uint8'))
-    image = image.convert('L')
-    # 리사이즈 (aspect ratio 유지)
     w, h = image.size
     new_w = min(int(img_height * w / h), max_width)
     image = image.resize((new_w, img_height), Image.LANCZOS)
-    # 패딩
-    new_img = Image.new('L', (max_width, img_height), 255)
     new_img.paste(image, (0, 0))
-    # Transform
-    transform = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.5,), (0.5,))
-    ])
-    return transform(new_img).unsqueeze(0)  # (1, 1, H, W)
-# ============================================================================
-# 모델 로드
-# ============================================================================
 print("모델 로딩 중...")
-checkpoint_path = 'best_ocr_one_line.pth'
-checkpoint = torch.load(checkpoint_path, map_location='cpu')
-img_h = checkpoint.get('img_h', 32)
-max_w = checkpoint.get('max_w', 200)
-itos = checkpoint['itos']
 num_chars = len(itos)
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = CRNN(img_h, num_chars, rnn_hidden=256).to(device)
-model.load_state_dict(checkpoint['model_state'])
 model.eval()
 print(f"✓ 모델 로드 완료 (Device: {device})")
 print(f"  - Epoch: {checkpoint.get('epoch', '?')}")
 print(f"  - Val Acc: {checkpoint.get('val_acc', '?'):.2%}")
-# ============================================================================
-# 추론 함수
-# ============================================================================
 def predict_license_plate(image):
-    """번호판 이미지에서 텍스트 예측"""
     if image is None:
-        return "이미지를 업로드해주세요."
     try:
-        # 전처리
         image_tensor = preprocess_image(image, img_h, max_w).to(device)
-        # 추론
         with torch.no_grad():
             outputs = model(image_tensor).log_softmax(2)
             predictions = decode_predictions(outputs, itos)
         result = predictions[0]
         return result if result else "(인식 결과 없음)"
-    except Exception as e:
-        return f"오류 발생: {str(e)}"
-# ============================================================================
-# Gradio 인터페이스
-# ============================================================================
 demo = gr.Interface(
     fn=predict_license_plate,
     inputs=gr.Image(type="pil", label="번호판 이미지"),
     outputs=gr.Textbox(label="인식 결과"),
-    title="🚗 한국 번호판 OCR - KLPR v1",
-    description="""
-    한국 자동차 번호판을 인식하는 OCR 모델입니다.
-    **모델 정보:**
-    - Model: CRNN (CNN + Bidirectional LSTM + CTC)
-    - Validation Accuracy: 92.38%
-    - Epoch: 2
-    - 지원 문자: 72개 (한글 + 숫자)
-    **사용 방법:**
-    1. 번호판 이미지를 업로드하세요
-    2. 자동으로 번호판 번호가 인식됩니다
-    """,
-    api_name="predict"
 )
 if __name__ == "__main__":

 """
+Korean License Plate OCR - KLPR v1 (Model v4)
 Hugging Face Gradio App
 """
+from __future__ import annotations
 import gradio as gr
 import gradio_client.utils as client_utils
 import torch
 import torchvision.transforms as transforms
 import numpy as np
+# Work around gradio_client not handling boolean JSON schema nodes.
 if not getattr(client_utils, "_patched_bool_schema", False):
+    _orig_json_schema_to_python_type = client_utils._json_schema_to_python_type
+    def _safe_json_schema_to_python_type(schema, defs=None):
         if isinstance(schema, bool):
             return "Any"
+        return _orig_json_schema_to_python_type(schema, defs)
+    client_utils._json_schema_to_python_type = _safe_json_schema_to_python_type
     client_utils._patched_bool_schema = True
 class CRNN(nn.Module):
     def __init__(self, img_height, num_chars, rnn_hidden=256):
+        super().__init__()
         self.cnn = nn.Sequential(
             nn.Conv2d(1, 64, kernel_size=3, padding=1),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 2)),
             nn.Conv2d(64, 128, kernel_size=3, padding=1),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 2)),
             nn.Conv2d(128, 256, kernel_size=3, padding=1),
             nn.BatchNorm2d(256),
             nn.ReLU(inplace=True),
             nn.Conv2d(256, 256, kernel_size=3, padding=1),
             nn.BatchNorm2d(256),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 1)),
             nn.Conv2d(256, 512, kernel_size=3, padding=1),
             nn.BatchNorm2d(512),
             nn.ReLU(inplace=True),
             nn.Conv2d(512, 512, kernel_size=3, padding=1),
             nn.BatchNorm2d(512),
             nn.ReLU(inplace=True),
             nn.MaxPool2d((2, 1)),
             nn.Conv2d(512, 512, kernel_size=3, padding=1),
             nn.BatchNorm2d(512),
             nn.ReLU(inplace=True),
+            nn.MaxPool2d((2, 1)),
         )
         self.rnn = nn.LSTM(512, rnn_hidden, bidirectional=True, num_layers=2, batch_first=True)
         self.fc = nn.Linear(rnn_hidden * 2, num_chars)
     def forward(self, x):
         conv = self.cnn(x)
         conv = conv.squeeze(2).permute(0, 2, 1)
         rnn_out, _ = self.rnn(conv)
+        return self.fc(rnn_out)
+def decode_predictions(outputs, itos, blank_idx=0):
+    preds = outputs.argmax(2).detach().cpu().numpy()
     decoded = []
     for pred in preds:
         char_list = []
             if idx != blank_idx and idx != prev_idx:
                 char_list.append(itos[int(idx)])
             prev_idx = idx
+        decoded.append("".join(char_list))
     return decoded
 def preprocess_image(image, img_height=32, max_width=200):
     if not isinstance(image, Image.Image):
         if isinstance(image, np.ndarray):
+            image = Image.fromarray(image.astype("uint8"))
+        else:
+            image = Image.open(image)
+    image = image.convert("L")
     w, h = image.size
     new_w = min(int(img_height * w / h), max_width)
     image = image.resize((new_w, img_height), Image.LANCZOS)
+    new_img = Image.new("L", (max_width, img_height), 255)
     new_img.paste(image, (0, 0))
+    transform = transforms.Compose(
+        [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
+    )
+    return transform(new_img).unsqueeze(0)
 print("모델 로딩 중...")
+checkpoint_path = "best_ocr_one_line.pth"
+checkpoint = torch.load(checkpoint_path, map_location="cpu")
+img_h = checkpoint.get("img_h", 32)
+max_w = checkpoint.get("max_w", 200)
+itos = checkpoint["itos"]
 num_chars = len(itos)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = CRNN(img_h, num_chars, rnn_hidden=256).to(device)
+model.load_state_dict(checkpoint["model_state"])
 model.eval()
 print(f"✓ 모델 로드 완료 (Device: {device})")
 print(f"  - Epoch: {checkpoint.get('epoch', '?')}")
 print(f"  - Val Acc: {checkpoint.get('val_acc', '?'):.2%}")
 def predict_license_plate(image):
     if image is None:
+        return "이미지를 업로드해 주세요."
     try:
         image_tensor = preprocess_image(image, img_h, max_w).to(device)
         with torch.no_grad():
             outputs = model(image_tensor).log_softmax(2)
             predictions = decode_predictions(outputs, itos)
         result = predictions[0]
         return result if result else "(인식 결과 없음)"
+    except Exception as exc:
+        return f"오류 발생: {exc}"
 demo = gr.Interface(
     fn=predict_license_plate,
     inputs=gr.Image(type="pil", label="번호판 이미지"),
     outputs=gr.Textbox(label="인식 결과"),
+    title="🚘 한국 번호판 OCR - KLPR v1",
+    description=(
+        "번호판 이미지에서 문자를 인식합니다.\n\n"
+        "**모델 정보:** CRNN (CNN + BiLSTM + CTC)\n"
+        "**입력:** 번호판 이미지 1장"
+    ),
+    api_name="predict",
+    cache_examples=False,
 )
 if __name__ == "__main__":