nice22090 commited on
Commit
9335bef
ยท
1 Parent(s): 1e343ee

Add KLPR v2 application with model

Browse files
Files changed (4) hide show
  1. README.md +27 -7
  2. app.py +184 -0
  3. best_ocr_one_line.pth +3 -0
  4. requirements.txt +4 -0
README.md CHANGED
@@ -1,13 +1,33 @@
1
  ---
2
- title: KLPR V2
3
- emoji: ๐Ÿ‘
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: KLPR v2
3
+ emoji: ๐Ÿš—
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: "5.9.1"
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
+ # ๐Ÿš— KLPR v2 - ํ•œ๊ตญ ๋ฒˆํ˜ธํŒ OCR
13
+
14
+ ํ•œ๊ตญ ์ž๋™์ฐจ ๋ฒˆํ˜ธํŒ์„ ์ธ์‹ํ•˜๋Š” ๋”ฅ๋Ÿฌ๋‹ ๊ธฐ๋ฐ˜ OCR ์‹œ์Šคํ…œ (v2 - ํ™•์žฅ๋œ ๋ฌธ์ž ์„ธํŠธ)
15
+
16
+ ## ๐Ÿ“Š ๋ชจ๋ธ ์ •๋ณด
17
+
18
+ - **๋ชจ๋ธ ๊ตฌ์กฐ**: CRNN (CNN + Bidirectional LSTM + CTC Loss)
19
+ - **์ž…๋ ฅ ํฌ๊ธฐ**: 32x200 (grayscale)
20
+ - **Validation Accuracy**: 91.23%
21
+ - **Validation Loss**: 0.1095
22
+ - **ํ›ˆ๋ จ Epoch**: 18
23
+ - **์ง€์› ๋ฌธ์ž**: 77๊ฐœ (ํ•œ๊ธ€ ์ž์Œ + ์ˆซ์ž + ์ถ”๊ฐ€ ์ง€์—ญ๋ช…)
24
+
25
+ ## ๐Ÿ†• v2์˜ ํŠน์ง•
26
+
27
+ 1. **ํ™•์žฅ๋œ ๋ฌธ์ž ์„ธํŠธ**: ์ถ”๊ฐ€ ์ง€์—ญ๋ช… ์ง€์› (์‚ฐ, ์ œ, ์„ธ, ์ข…, ํŒŒ)
28
+ 2. **์•ˆ์ •์ ์ธ ํ›ˆ๋ จ**: 18 epoch ํ›ˆ๋ จ์œผ๋กœ ๋” ์ˆ˜๋ ด๋œ ๋ชจ๋ธ
29
+ 3. **๋‚ฎ์€ Loss**: 0.1095๋กœ v1(0.1188)๋ณด๋‹ค ๋‚ฎ์Œ
30
+
31
+ ## ๐Ÿš€ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
32
+
33
+ ๋ฒˆํ˜ธํŒ ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ์ž๋™์œผ๋กœ ๋ฒˆํ˜ธ๋ฅผ ์ธ์‹ํ•ฉ๋‹ˆ๋‹ค!
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ํ•œ๊ตญ ๋ฒˆํ˜ธํŒ OCR - KLPR_v2 (Model v5)
3
+ Hugging Face Gradio App
4
+ """
5
+
6
+ import gradio as gr
7
+ import torch
8
+ import torch.nn as nn
9
+ from PIL import Image
10
+ import torchvision.transforms as transforms
11
+
12
+ # ============================================================================
13
+ # ๋ชจ๋ธ ์ •์˜
14
+ # ============================================================================
15
+ class CRNN(nn.Module):
16
+ def __init__(self, img_height, num_chars, rnn_hidden=256):
17
+ super(CRNN, self).__init__()
18
+
19
+ # CNN - 32x200 -> 1x50
20
+ self.cnn = nn.Sequential(
21
+ nn.Conv2d(1, 64, kernel_size=3, padding=1),
22
+ nn.ReLU(inplace=True),
23
+ nn.MaxPool2d((2, 2)),
24
+
25
+ nn.Conv2d(64, 128, kernel_size=3, padding=1),
26
+ nn.ReLU(inplace=True),
27
+ nn.MaxPool2d((2, 2)),
28
+
29
+ nn.Conv2d(128, 256, kernel_size=3, padding=1),
30
+ nn.BatchNorm2d(256),
31
+ nn.ReLU(inplace=True),
32
+
33
+ nn.Conv2d(256, 256, kernel_size=3, padding=1),
34
+ nn.BatchNorm2d(256),
35
+ nn.ReLU(inplace=True),
36
+ nn.MaxPool2d((2, 1)),
37
+
38
+ nn.Conv2d(256, 512, kernel_size=3, padding=1),
39
+ nn.BatchNorm2d(512),
40
+ nn.ReLU(inplace=True),
41
+
42
+ nn.Conv2d(512, 512, kernel_size=3, padding=1),
43
+ nn.BatchNorm2d(512),
44
+ nn.ReLU(inplace=True),
45
+ nn.MaxPool2d((2, 1)),
46
+
47
+ nn.Conv2d(512, 512, kernel_size=3, padding=1),
48
+ nn.BatchNorm2d(512),
49
+ nn.ReLU(inplace=True),
50
+ nn.MaxPool2d((2, 1))
51
+ )
52
+
53
+ self.rnn = nn.LSTM(512, rnn_hidden, bidirectional=True, num_layers=2, batch_first=True)
54
+ self.fc = nn.Linear(rnn_hidden * 2, num_chars)
55
+
56
+ def forward(self, x):
57
+ conv = self.cnn(x)
58
+ b, c, h, w = conv.size()
59
+ conv = conv.squeeze(2).permute(0, 2, 1)
60
+ rnn_out, _ = self.rnn(conv)
61
+ output = self.fc(rnn_out)
62
+ return output
63
+
64
+ # ============================================================================
65
+ # CTC ๋””์ฝ”๋”ฉ
66
+ # ============================================================================
67
+ def decode_predictions(outputs, itos, blank_idx=0):
68
+ """CTC ๋””์ฝ”๋”ฉ"""
69
+ preds = outputs.argmax(2).detach().cpu().numpy() # (B, T)
70
+
71
+ decoded = []
72
+ for pred in preds:
73
+ char_list = []
74
+ prev_idx = blank_idx
75
+ for idx in pred:
76
+ if idx != blank_idx and idx != prev_idx:
77
+ char_list.append(itos[int(idx)])
78
+ prev_idx = idx
79
+ decoded.append(''.join(char_list))
80
+ return decoded
81
+
82
+ # ============================================================================
83
+ # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
84
+ # ============================================================================
85
+ def preprocess_image(image, img_height=32, max_width=200):
86
+ """๋ฒˆํ˜ธํŒ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ"""
87
+ # PIL Image๋กœ ๋ณ€ํ™˜ (Gradio๊ฐ€ numpy array๋ฅผ ์ „๋‹ฌ)
88
+ if not isinstance(image, Image.Image):
89
+ image = Image.fromarray(image)
90
+
91
+ image = image.convert('L')
92
+
93
+ # ๋ฆฌ์‚ฌ์ด์ฆˆ (aspect ratio ์œ ์ง€)
94
+ w, h = image.size
95
+ new_w = min(int(img_height * w / h), max_width)
96
+ image = image.resize((new_w, img_height), Image.LANCZOS)
97
+
98
+ # ํŒจ๋”ฉ
99
+ new_img = Image.new('L', (max_width, img_height), 255)
100
+ new_img.paste(image, (0, 0))
101
+
102
+ # Transform
103
+ transform = transforms.Compose([
104
+ transforms.ToTensor(),
105
+ transforms.Normalize((0.5,), (0.5,))
106
+ ])
107
+
108
+ return transform(new_img).unsqueeze(0) # (1, 1, H, W)
109
+
110
+ # ============================================================================
111
+ # ๋ชจ๋ธ ๋กœ๋“œ
112
+ # ============================================================================
113
+ print("๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
114
+ checkpoint_path = 'best_ocr_one_line.pth'
115
+ checkpoint = torch.load(checkpoint_path, map_location='cpu')
116
+
117
+ img_h = checkpoint.get('img_h', 32)
118
+ max_w = checkpoint.get('max_w', 200)
119
+ itos = checkpoint['itos']
120
+ num_chars = len(itos)
121
+
122
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
123
+ model = CRNN(img_h, num_chars, rnn_hidden=256).to(device)
124
+ model.load_state_dict(checkpoint['model_state'])
125
+ model.eval()
126
+
127
+ print(f"โœ“ ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ (Device: {device})")
128
+ print(f" - Epoch: {checkpoint.get('epoch', '?')}")
129
+ print(f" - Val Acc: {checkpoint.get('val_acc', '?'):.2%}")
130
+
131
+ # ============================================================================
132
+ # ์ถ”๋ก  ํ•จ์ˆ˜
133
+ # ============================================================================
134
+ def predict_license_plate(image):
135
+ """๋ฒˆํ˜ธํŒ ์ด๋ฏธ์ง€์—์„œ ํ…์ŠคํŠธ ์˜ˆ์ธก"""
136
+ if image is None:
137
+ return "์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”."
138
+
139
+ try:
140
+ # ์ „์ฒ˜๋ฆฌ
141
+ image_tensor = preprocess_image(image, img_h, max_w).to(device)
142
+
143
+ # ์ถ”๋ก 
144
+ with torch.no_grad():
145
+ outputs = model(image_tensor).log_softmax(2)
146
+ predictions = decode_predictions(outputs, itos)
147
+
148
+ result = predictions[0]
149
+ return result if result else "(์ธ์‹ ๊ฒฐ๊ณผ ์—†์Œ)"
150
+
151
+ except Exception as e:
152
+ return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
153
+
154
+ # ============================================================================
155
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค
156
+ # ============================================================================
157
+ demo = gr.Interface(
158
+ fn=predict_license_plate,
159
+ inputs=gr.Image(type="numpy", label="๋ฒˆํ˜ธํŒ ์ด๋ฏธ์ง€"),
160
+ outputs=gr.Textbox(label="์ธ์‹ ๊ฒฐ๊ณผ"),
161
+ title="๐Ÿš— ํ•œ๊ตญ ๋ฒˆํ˜ธํŒ OCR - KLPR v2",
162
+ description="""
163
+ ํ•œ๊ตญ ์ž๋™์ฐจ ๋ฒˆํ˜ธํŒ์„ ์ธ์‹ํ•˜๋Š” OCR ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค.
164
+
165
+ **๋ชจ๋ธ ์ •๋ณด:**
166
+ - Model: CRNN (CNN + Bidirectional LSTM + CTC)
167
+ - Validation Accuracy: 91.23%
168
+ - Epoch: 18
169
+ - ์ง€์› ๋ฌธ์ž: 77๊ฐœ (ํ•œ๊ธ€ + ์ˆซ์ž + ์ถ”๊ฐ€ ํŠน์ˆ˜ ์ง€์—ญ๋ช…)
170
+
171
+ **์‚ฌ์šฉ ๋ฐฉ๋ฒ•:**
172
+ 1. ๋ฒˆํ˜ธํŒ ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”
173
+ 2. ์ž๋™์œผ๋กœ ๋ฒˆํ˜ธํŒ ๋ฒˆํ˜ธ๊ฐ€ ์ธ์‹๋ฉ๋‹ˆ๋‹ค
174
+
175
+ **v2์˜ ํŠน์ง•:**
176
+ - ๋” ๋งŽ์€ ์ง€์—ญ๋ช… ์ง€์› (์‚ฐ, ์ œ, ์„ธ, ์ข…, ํŒŒ)
177
+ - 18 epoch ํ›ˆ๋ จ์œผ๋กœ ๋” ์•ˆ์ •์ ์ธ ํ•™์Šต
178
+ """,
179
+ examples=None,
180
+ cache_examples=False,
181
+ )
182
+
183
+ if __name__ == "__main__":
184
+ demo.launch()
best_ocr_one_line.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da6b839d9550ea5c0ff089f2e669ceb0911a83ed17983787e6de1b0c38bb7f3f
3
+ size 120762491
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ gradio
4
+ Pillow