import base64 from io import BytesIO import uuid from cairosvg import svg2png import cv2 import numpy as np import gradio as gr import onnxruntime as ort from PIL import Image IMG_HEIGHT = 300 IMG_WIDTH = 500 MAX_LENGTH = 4 CHARACTERS = ['0','1','2','3','4','5','6','7','8','9', 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z', 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] MODEL_PATH = 'model.onnx' session = ort.InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) def preprocess_captcha(image): pil = image.convert("RGB") cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR) gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) processed = Image.fromarray(thresh).convert("RGB") return processed def get_result(pred): accuracy = 1 last = None ans = [] for item in pred[0]: char_ind = item.argmax() if char_ind != last and char_ind != 0 and char_ind != len(CHARACTERS) + 1: ans.append(CHARACTERS[char_ind - 1]) accuracy *= item[char_ind] last = char_ind answ = "".join(ans)[:MAX_LENGTH] return answ def predict(svg_text): request_id = str(uuid.uuid4()) print(f"Yeni istek geldi. ID: {request_id}") text = svg_text.strip() if not text: print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input") return "Empty input" if text.startswith('data:image/svg+xml;base64,'): b = base64.b64decode(text.split(',')[-1]) else: b = text.encode('utf-8') png_bytes = svg2png(bytestring=b) image = Image.open(BytesIO(png_bytes)) processed = preprocess_captcha(image) img = processed.convert('L') img = img.resize((IMG_WIDTH, IMG_HEIGHT)) img = np.array(img) img = np.expand_dims(img, axis=1) img = np.expand_dims(img, axis=-1) img = img.transpose([1,2,0,3]) img = img.astype(np.float32) / 255. dummy_label = np.random.default_rng().random((28, 28), dtype=np.float32) result_tensor = session.run(None, {'image': img, 'label': dummy_label})[0] result = get_result(result_tensor) print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}") return result demo = gr.Interface( fn=predict, inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."), outputs=gr.Textbox(label="Solution"), title="Captcha Solver", ) demo.launch()