Pixelplanet_old / app.py
m2zm's picture
Upload 6 files
f40ddb8 verified
Raw
History Blame
2.67 kB
import base64
from io import BytesIO
import uuid
from cairosvg import svg2png
import cv2
import numpy as np
import gradio as gr
import onnxruntime as ort
from PIL import Image
IMG_HEIGHT = 300
IMG_WIDTH = 500
MAX_LENGTH = 4
CHARACTERS = ['0','1','2','3','4','5','6','7','8','9',
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
MODEL_PATH = 'model.onnx'
session = ort.InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
def preprocess_captcha(image):
pil = image.convert("RGB")
cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
processed = Image.fromarray(thresh).convert("RGB")
return processed
def get_result(pred):
accuracy = 1
last = None
ans = []
for item in pred[0]:
char_ind = item.argmax()
if char_ind != last and char_ind != 0 and char_ind != len(CHARACTERS) + 1:
ans.append(CHARACTERS[char_ind - 1])
accuracy *= item[char_ind]
last = char_ind
answ = "".join(ans)[:MAX_LENGTH]
return answ
def predict(svg_text):
request_id = str(uuid.uuid4())
print(f"Yeni istek geldi. ID: {request_id}")
text = svg_text.strip()
if not text:
print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
return "Empty input"
if text.startswith('data:image/svg+xml;base64,'):
b = base64.b64decode(text.split(',')[-1])
else:
b = text.encode('utf-8')
png_bytes = svg2png(bytestring=b)
image = Image.open(BytesIO(png_bytes))
processed = preprocess_captcha(image)
img = processed.convert('L')
img = img.resize((IMG_WIDTH, IMG_HEIGHT))
img = np.array(img)
img = np.expand_dims(img, axis=1)
img = np.expand_dims(img, axis=-1)
img = img.transpose([1,2,0,3])
img = img.astype(np.float32) / 255.
dummy_label = np.random.default_rng().random((28, 28), dtype=np.float32)
result_tensor = session.run(None, {'image': img, 'label': dummy_label})[0]
result = get_result(result_tensor)
print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
return result
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
outputs=gr.Textbox(label="Solution"),
title="Captcha Solver",
)
demo.launch()