Spaces:

nice22090
/

KLPR_v2

Sleeping

App Files Files Community

nice22090 commited on Jan 4

Commit

9335bef

1 Parent(s): 1e343ee

Add KLPR v2 application with model

Browse files

Files changed (4) hide show

README.md +27 -7
app.py +184 -0
best_ocr_one_line.pth +3 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,13 +1,33 @@
 ---
-title: KLPR V2
-emoji: 👁
-colorFrom: gray
-colorTo: gray
 sdk: gradio
-sdk_version: 6.2.0
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: KLPR v2
+emoji: 🚗
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: "5.9.1"
 app_file: app.py
 pinned: false
 ---
+# 🚗 KLPR v2 - 한국 번호판 OCR
+한국 자동차 번호판을 인식하는 딥러닝 기반 OCR 시스템 (v2 - 확장된 문자 세트)
+## 📊 모델 정보
+- **모델 구조**: CRNN (CNN + Bidirectional LSTM + CTC Loss)
+- **입력 크기**: 32x200 (grayscale)
+- **Validation Accuracy**: 91.23%
+- **Validation Loss**: 0.1095
+- **훈련 Epoch**: 18
+- **지원 문자**: 77개 (한글 자음 + 숫자 + 추가 지역명)
+## 🆕 v2의 특징
+1. **확장된 문자 세트**: 추가 지역명 지원 (산, 제, 세, 종, 파)
+2. **안정적인 훈련**: 18 epoch 훈련으로 더 수렴된 모델
+3. **낮은 Loss**: 0.1095로 v1(0.1188)보다 낮음
+## 🚀 사용 방법
+번호판 이미지를 업로드하면 자동으로 번호를 인식합니다!

app.py ADDED Viewed

	@@ -0,0 +1,184 @@

+"""
+한국 번호판 OCR - KLPR_v2 (Model v5)
+Hugging Face Gradio App
+"""
+import gradio as gr
+import torch
+import torch.nn as nn
+from PIL import Image
+import torchvision.transforms as transforms
+# ============================================================================
+# 모델 정의
+# ============================================================================
+class CRNN(nn.Module):
+    def __init__(self, img_height, num_chars, rnn_hidden=256):
+        super(CRNN, self).__init__()
+        # CNN - 32x200 -> 1x50
+        self.cnn = nn.Sequential(
+            nn.Conv2d(1, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d((2, 2)),
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d((2, 2)),
+            nn.Conv2d(128, 256, kernel_size=3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d((2, 1)),
+            nn.Conv2d(256, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(512, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d((2, 1)),
+            nn.Conv2d(512, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d((2, 1))
+        )
+        self.rnn = nn.LSTM(512, rnn_hidden, bidirectional=True, num_layers=2, batch_first=True)
+        self.fc = nn.Linear(rnn_hidden * 2, num_chars)
+    def forward(self, x):
+        conv = self.cnn(x)
+        b, c, h, w = conv.size()
+        conv = conv.squeeze(2).permute(0, 2, 1)
+        rnn_out, _ = self.rnn(conv)
+        output = self.fc(rnn_out)
+        return output
+# ============================================================================
+# CTC 디코딩
+# ============================================================================
+def decode_predictions(outputs, itos, blank_idx=0):
+    """CTC 디코딩"""
+    preds = outputs.argmax(2).detach().cpu().numpy()  # (B, T)
+    decoded = []
+    for pred in preds:
+        char_list = []
+        prev_idx = blank_idx
+        for idx in pred:
+            if idx != blank_idx and idx != prev_idx:
+                char_list.append(itos[int(idx)])
+            prev_idx = idx
+        decoded.append(''.join(char_list))
+    return decoded
+# ============================================================================
+# 이미지 전처리
+# ============================================================================
+def preprocess_image(image, img_height=32, max_width=200):
+    """번호판 이미지 전처리"""
+    # PIL Image로 변환 (Gradio가 numpy array를 전달)
+    if not isinstance(image, Image.Image):
+        image = Image.fromarray(image)
+    image = image.convert('L')
+    # 리사이즈 (aspect ratio 유지)
+    w, h = image.size
+    new_w = min(int(img_height * w / h), max_width)
+    image = image.resize((new_w, img_height), Image.LANCZOS)
+    # 패딩
+    new_img = Image.new('L', (max_width, img_height), 255)
+    new_img.paste(image, (0, 0))
+    # Transform
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.5,), (0.5,))
+    ])
+    return transform(new_img).unsqueeze(0)  # (1, 1, H, W)
+# ============================================================================
+# 모델 로드
+# ============================================================================
+print("모델 로딩 중...")
+checkpoint_path = 'best_ocr_one_line.pth'
+checkpoint = torch.load(checkpoint_path, map_location='cpu')
+img_h = checkpoint.get('img_h', 32)
+max_w = checkpoint.get('max_w', 200)
+itos = checkpoint['itos']
+num_chars = len(itos)
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = CRNN(img_h, num_chars, rnn_hidden=256).to(device)
+model.load_state_dict(checkpoint['model_state'])
+model.eval()
+print(f"✓ 모델 로드 완료 (Device: {device})")
+print(f"  - Epoch: {checkpoint.get('epoch', '?')}")
+print(f"  - Val Acc: {checkpoint.get('val_acc', '?'):.2%}")
+# ============================================================================
+# 추론 함수
+# ============================================================================
+def predict_license_plate(image):
+    """번호판 이미지에서 텍스트 예측"""
+    if image is None:
+        return "이미지를 업로드해주세요."
+    try:
+        # 전처리
+        image_tensor = preprocess_image(image, img_h, max_w).to(device)
+        # 추론
+        with torch.no_grad():
+            outputs = model(image_tensor).log_softmax(2)
+            predictions = decode_predictions(outputs, itos)
+        result = predictions[0]
+        return result if result else "(인식 결과 없음)"
+    except Exception as e:
+        return f"오류 발생: {str(e)}"
+# ============================================================================
+# Gradio 인터페이스
+# ============================================================================
+demo = gr.Interface(
+    fn=predict_license_plate,
+    inputs=gr.Image(type="numpy", label="번호판 이미지"),
+    outputs=gr.Textbox(label="인식 결과"),
+    title="🚗 한국 번호판 OCR - KLPR v2",
+    description="""
+    한국 자동차 번호판을 인식하는 OCR 모델입니다.
+    **모델 정보:**
+    - Model: CRNN (CNN + Bidirectional LSTM + CTC)
+    - Validation Accuracy: 91.23%
+    - Epoch: 18
+    - 지원 문자: 77개 (한글 + 숫자 + 추가 특수 지역명)
+    **사용 방법:**
+    1. 번호판 이미지를 업로드하세요
+    2. 자동으로 번호판 번호가 인식됩니다
+    **v2의 특징:**
+    - 더 많은 지역명 지원 (산, 제, 세, 종, 파)
+    - 18 epoch 훈련으로 더 안정적인 학습
+    """,
+    examples=None,
+    cache_examples=False,
+)
+if __name__ == "__main__":
+    demo.launch()

best_ocr_one_line.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da6b839d9550ea5c0ff089f2e669ceb0911a83ed17983787e6de1b0c38bb7f3f
+size 120762491

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+torchvision
+gradio
+Pillow