Spaces:
Running
Running
| import base64 | |
| from io import BytesIO | |
| import uuid | |
| from cairosvg import svg2png | |
| import cv2 | |
| import numpy as np | |
| import gradio as gr | |
| import onnxruntime as ort | |
| from PIL import Image | |
| IMG_HEIGHT = 300 | |
| IMG_WIDTH = 500 | |
| MAX_LENGTH = 4 | |
| CHARACTERS = ['0','1','2','3','4','5','6','7','8','9', | |
| 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z', | |
| 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] | |
| MODEL_PATH = 'model.onnx' | |
| session = ort.InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) | |
| def preprocess_captcha(image): | |
| pil = image.convert("RGB") | |
| cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR) | |
| gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) | |
| _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
| processed = Image.fromarray(thresh).convert("RGB") | |
| return processed | |
| def get_result(pred): | |
| accuracy = 1 | |
| last = None | |
| ans = [] | |
| for item in pred[0]: | |
| char_ind = item.argmax() | |
| if char_ind != last and char_ind != 0 and char_ind != len(CHARACTERS) + 1: | |
| ans.append(CHARACTERS[char_ind - 1]) | |
| accuracy *= item[char_ind] | |
| last = char_ind | |
| answ = "".join(ans)[:MAX_LENGTH] | |
| return answ | |
| def predict(svg_text): | |
| request_id = str(uuid.uuid4()) | |
| print(f"Yeni istek geldi. ID: {request_id}") | |
| text = svg_text.strip() | |
| if not text: | |
| print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input") | |
| return "Empty input" | |
| if text.startswith('data:image/svg+xml;base64,'): | |
| b = base64.b64decode(text.split(',')[-1]) | |
| else: | |
| b = text.encode('utf-8') | |
| png_bytes = svg2png(bytestring=b) | |
| image = Image.open(BytesIO(png_bytes)) | |
| processed = preprocess_captcha(image) | |
| img = processed.convert('L') | |
| img = img.resize((IMG_WIDTH, IMG_HEIGHT)) | |
| img = np.array(img) | |
| img = np.expand_dims(img, axis=1) | |
| img = np.expand_dims(img, axis=-1) | |
| img = img.transpose([1,2,0,3]) | |
| img = img.astype(np.float32) / 255. | |
| dummy_label = np.random.default_rng().random((28, 28), dtype=np.float32) | |
| result_tensor = session.run(None, {'image': img, 'label': dummy_label})[0] | |
| result = get_result(result_tensor) | |
| print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}") | |
| return result | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."), | |
| outputs=gr.Textbox(label="Solution"), | |
| title="Captcha Solver", | |
| ) | |
| demo.launch() |