from PIL import Image from ultralytics import YOLO import os import torch import re import cv2 import gradio as gr import torchvision.transforms as T import albumentations as A import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as patches device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Folders input_folder = "./target" output_folder = "./target_output" os.makedirs(output_folder, exist_ok=True) # Detector model license_plate_detector = YOLO("./models/yolo11x.pt") # SuperResolution model sr = cv2.dnn_superres.DnnSuperResImpl_create() sr.readModel("./models/FSRCNN_x3.pb") sr.setModel("fsrcnn", 3) class App: models = ['parseq', 'parseq_tiny', 'abinet', 'crnn', 'trba', 'vitstr'] def __init__(self): self._model_cache = {} self._preprocess = T.Compose([ T.Resize((32, 128), T.InterpolationMode.BICUBIC), T.ToTensor(), T.Normalize(0.5, 0.5) ]) def _get_model(self, name): if name in self._model_cache: return self._model_cache[name] model = torch.hub.load('baudm/parseq', name, pretrained=True).eval().to(device) self._model_cache[name] = model return model @torch.inference_mode() def __call__(self, model_name, image): if image is None: return '', [] model = self._get_model(model_name) image = self._preprocess(image.convert('RGB')).unsqueeze(0).to(device) pred = model(image).softmax(-1) label, _ = model.tokenizer.decode(pred) raw_label, raw_confidence = model.tokenizer.decode(pred, raw=True) max_len = 25 if model_name == 'crnn' else len(label[0]) + 1 conf = list(map('{:0.1f}'.format, raw_confidence[0][:max_len].tolist())) return label[0], [raw_label[0][:max_len], conf] p = App() black_list = ["Y985BE152"] def detect_license_plates(model, image): plate_image_np = pil_to_np(image) transform = A.Compose([ A.ToGray(p=1.0), A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=1.0), ]) transformed = transform(image=plate_image_np)['image'] if len(transformed.shape) == 2: transformed = cv2.cvtColor(transformed, cv2.COLOR_GRAY2RGB) image = np_to_pil(transformed) results = model(image) plates = [] for result in results: for box in result.boxes.xyxy.cpu().numpy(): x1, y1, x2, y2 = map(int, box) plate = image.crop((x1, y1, x2, y2)) plates.append((plate, (x1, y1, x2, y2))) return plates def pil_to_np(image): return np.array(image) def np_to_pil(image_np): return Image.fromarray(image_np) def preprocess_license_plate(plate_image: Image): plate_image_np = pil_to_np(plate_image) if not(plate_image_np.ndim == 2 or plate_image_np.shape[-1] == 1): plate_image_np = A.ToGray(p=1.0, num_output_channels=1)(image=plate_image_np)['image'] super_resolved = sr.upsample(plate_image_np) augmented = A.Compose([ A.CLAHE(clip_limit=2, tile_grid_size=(1, 1), p=1.0), A.Morphological(p=1.0, scale=(4, 4), operation="erosion"), ])(image=super_resolved)['image'] super_resolved_pil = np_to_pil(augmented) return super_resolved_pil def process_image(image_path: Image): image_np = np.array(image_path) fig, ax = plt.subplots(1, figsize=(10, 6)) ax.imshow(image_np) plates = detect_license_plates(license_plate_detector, image_path) recognized_texts = [] for i, (plate, bbox) in enumerate(plates): preprocessed_plate = preprocess_license_plate(plate) recognized_text, raw_output = p.__call__("parseq", preprocessed_plate) if recognized_text and len(recognized_text) > 5: recognized_text = re.sub(r"[^A-Za-z0-9]", "", recognized_text).upper() recognized_text = recognized_text.replace('V', 'Y').replace('I', '') recognized_text = recognized_text.replace('8', 'В', 1) if recognized_text[0] == "8" else recognized_text recognized_text = recognized_text.replace('7', 'T', 1) if recognized_text[0] == "7" else recognized_text recognized_text = recognized_text.replace('0', 'O', 1) if recognized_text[0] == "0" else recognized_text recognized_text = recognized_text[:9] if len(recognized_text) >= 9 else recognized_text if recognized_text not in black_list: recognized_texts.append(recognized_text) x1, y1, x2, y2 = bbox rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='r', facecolor='none') ax.add_patch(rect) ax.text(x1, y1 - 10, recognized_text, color='red', fontsize=12, bbox=dict(facecolor='white', alpha=0.5)) plt.axis('off') # Saving image to buffer output_buffer = "processed_image.png" plt.savefig(output_buffer, bbox_inches='tight') plt.close() return Image.open(output_buffer), recognized_texts # Gradio UI target_folder = "./target" example_images = [ os.path.join(target_folder, file) for file in os.listdir(target_folder) if file.lower().endswith(("jpg", "png", "bmp")) ] interface = gr.Interface( fn=process_image, inputs=gr.Image(type="pil", label="Загрузите фото машины с номером 📤"), outputs=[ gr.Image(type="pil", label="📸 Выход 0 - Обработанное изображение"), gr.JSON(label="🔍 Выход 1 - Распознанный номер"), ], title="Распознавание российских номеров", description="🔎 **Загрузите изображение с автомобильным номером** и модель автоматически **определит госномер!** 🔥\n\n📸 **Форматы:** JPG, PNG, BMP", examples=example_images, flagging_mode="never", theme="compact", ) if __name__ == "__main__": interface.launch(share=True)