import base64
from io import BytesIO
import uuid
from cairosvg import svg2png
import cv2
import numpy as np
import gradio as gr
import onnxruntime as ort
from PIL import Image

IMG_HEIGHT = 300
IMG_WIDTH = 500
MAX_LENGTH = 4
CHARACTERS = ['0','1','2','3','4','5','6','7','8','9',
              'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
              'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
MODEL_PATH = 'model.onnx'
session = ort.InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])

def preprocess_captcha(image):
    pil = image.convert("RGB")
    cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    processed = Image.fromarray(thresh).convert("RGB")
    return processed

def get_result(pred):
    accuracy = 1
    last = None
    ans = []
    for item in pred[0]:
        char_ind = item.argmax()
        if char_ind != last and char_ind != 0 and char_ind != len(CHARACTERS) + 1:
            ans.append(CHARACTERS[char_ind - 1])
            accuracy *= item[char_ind]
        last = char_ind
    answ = "".join(ans)[:MAX_LENGTH]
    return answ

def predict(svg_text):
    request_id = str(uuid.uuid4())
    print(f"Yeni istek geldi. ID: {request_id}")
    
    text = svg_text.strip()
    if not text:
        print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
        return "Empty input"
    
    if text.startswith('data:image/svg+xml;base64,'):
        b = base64.b64decode(text.split(',')[-1])
    else:
        b = text.encode('utf-8')
    
    png_bytes = svg2png(bytestring=b)
    image = Image.open(BytesIO(png_bytes))
    
    processed = preprocess_captcha(image)
    
    img = processed.convert('L')
    img = img.resize((IMG_WIDTH, IMG_HEIGHT))
    img = np.array(img)
    img = np.expand_dims(img, axis=1)
    img = np.expand_dims(img, axis=-1)
    img = img.transpose([1,2,0,3])
    img = img.astype(np.float32) / 255.
    
    dummy_label = np.random.default_rng().random((28, 28), dtype=np.float32)
    result_tensor = session.run(None, {'image': img, 'label': dummy_label})[0]
    
    result = get_result(result_tensor)
    
    print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
    return result

demo = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
    outputs=gr.Textbox(label="Solution"),
    title="Captcha Solver",
)

demo.launch()