Spaces:

nice22090
/

KLPR_v2

Sleeping

App Files Files Community

KLPR_v2 / app.py

nice22090

Rebuild app for HF Spaces

20a77f4 about 1 month ago

raw

history blame contribute delete

5 kB

	"""
	Korean License Plate OCR - KLPR v2 (Model v5)
	Hugging Face Gradio App
	"""

	from __future__ import annotations

	import gradio as gr
	import gradio_client.utils as client_utils
	import torch
	import torch.nn as nn
	from PIL import Image
	import torchvision.transforms as transforms
	import numpy as np

	# Work around gradio_client not handling boolean JSON schema nodes.
	if not getattr(client_utils, "_patched_bool_schema", False):
	_orig_json_schema_to_python_type = client_utils._json_schema_to_python_type

	def _safe_json_schema_to_python_type(schema, defs=None):
	if isinstance(schema, bool):
	return "Any"
	return _orig_json_schema_to_python_type(schema, defs)

	client_utils._json_schema_to_python_type = _safe_json_schema_to_python_type
	client_utils._patched_bool_schema = True


	class CRNN(nn.Module):
	def __init__(self, img_height, num_chars, rnn_hidden=256):
	super().__init__()
	self.cnn = nn.Sequential(
	nn.Conv2d(1, 64, kernel_size=3, padding=1),
	nn.ReLU(inplace=True),
	nn.MaxPool2d((2, 2)),
	nn.Conv2d(64, 128, kernel_size=3, padding=1),
	nn.ReLU(inplace=True),
	nn.MaxPool2d((2, 2)),
	nn.Conv2d(128, 256, kernel_size=3, padding=1),
	nn.BatchNorm2d(256),
	nn.ReLU(inplace=True),
	nn.Conv2d(256, 256, kernel_size=3, padding=1),
	nn.BatchNorm2d(256),
	nn.ReLU(inplace=True),
	nn.MaxPool2d((2, 1)),
	nn.Conv2d(256, 512, kernel_size=3, padding=1),
	nn.BatchNorm2d(512),
	nn.ReLU(inplace=True),
	nn.Conv2d(512, 512, kernel_size=3, padding=1),
	nn.BatchNorm2d(512),
	nn.ReLU(inplace=True),
	nn.MaxPool2d((2, 1)),
	nn.Conv2d(512, 512, kernel_size=3, padding=1),
	nn.BatchNorm2d(512),
	nn.ReLU(inplace=True),
	nn.MaxPool2d((2, 1)),
	)
	self.rnn = nn.LSTM(512, rnn_hidden, bidirectional=True, num_layers=2, batch_first=True)
	self.fc = nn.Linear(rnn_hidden * 2, num_chars)

	def forward(self, x):
	conv = self.cnn(x)
	conv = conv.squeeze(2).permute(0, 2, 1)
	rnn_out, _ = self.rnn(conv)
	return self.fc(rnn_out)


	def decode_predictions(outputs, itos, blank_idx=0):
	preds = outputs.argmax(2).detach().cpu().numpy()
	decoded = []
	for pred in preds:
	char_list = []
	prev_idx = blank_idx
	for idx in pred:
	if idx != blank_idx and idx != prev_idx:
	char_list.append(itos[int(idx)])
	prev_idx = idx
	decoded.append("".join(char_list))
	return decoded


	def preprocess_image(image, img_height=32, max_width=200):
	if not isinstance(image, Image.Image):
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image.astype("uint8"))
	else:
	image = Image.open(image)

	image = image.convert("L")
	w, h = image.size
	new_w = min(int(img_height * w / h), max_width)
	image = image.resize((new_w, img_height), Image.LANCZOS)

	new_img = Image.new("L", (max_width, img_height), 255)
	new_img.paste(image, (0, 0))

	transform = transforms.Compose(
	[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
	)
	return transform(new_img).unsqueeze(0)


	print("모델 로딩 중...")
	checkpoint_path = "best_ocr_one_line.pth"
	checkpoint = torch.load(checkpoint_path, map_location="cpu")

	img_h = checkpoint.get("img_h", 32)
	max_w = checkpoint.get("max_w", 200)
	itos = checkpoint["itos"]
	num_chars = len(itos)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = CRNN(img_h, num_chars, rnn_hidden=256).to(device)
	model.load_state_dict(checkpoint["model_state"])
	model.eval()

	print(f"✓ 모델 로드 완료 (Device: {device})")
	print(f" - Epoch: {checkpoint.get('epoch', '?')}")
	print(f" - Val Acc: {checkpoint.get('val_acc', '?'):.2%}")


	def predict_license_plate(image):
	if image is None:
	return "이미지를 업로드해 주세요."
	try:
	image_tensor = preprocess_image(image, img_h, max_w).to(device)
	with torch.no_grad():
	outputs = model(image_tensor).log_softmax(2)
	predictions = decode_predictions(outputs, itos)
	result = predictions[0]
	return result if result else "(인식 결과 없음)"
	except Exception as exc:
	return f"오류 발생: {exc}"


	demo = gr.Interface(
	fn=predict_license_plate,
	inputs=gr.Image(type="pil", label="번호판 이미지"),
	outputs=gr.Textbox(label="인식 결과"),
	title="🚘 한국 번호판 OCR - KLPR v2",
	description=(
	"번호판 이미지에서 문자를 인식합니다.\n\n"
	"모델 정보: CRNN (CNN + BiLSTM + CTC)\n"
	"입력: 번호판 이미지 1장"
	),
	api_name="predict",
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.launch()