import base64
from io import BytesIO
import uuid
from cairosvg import svg2png
import cv2
import numpy as np
import gradio as gr
import torch
from PIL import Image
from transformers import VisionEncoderDecoderModel, TrOCRProcessor

device = "cuda" if torch.cuda.is_available() else "cpu"
processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")
model = VisionEncoderDecoderModel.from_pretrained("anuashok/ocr-captcha-v3").to(device)
model.eval()

def run_ocr(pil_image):
    pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
    with torch.no_grad():
        generated_ids = model.generate(pixel_values)
    return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

def preprocess_captcha(image):
    pil = image.convert("RGB")
    cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    processed = Image.fromarray(thresh).convert("RGB")
    return processed

def predict(svg_text):
    request_id = str(uuid.uuid4())
    print(f"Yeni istek geldi. ID: {request_id}")
    
    text = svg_text.strip()
    if not text:
        print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
        return "Empty input"
    if text.startswith('data:image/svg+xml;base64,'):
        b = base64.b64decode(text.split(',')[-1])
    else:
        b = text.encode('utf-8')
    png_bytes = svg2png(bytestring=b)
    image = Image.open(BytesIO(png_bytes))
    processed = preprocess_captcha(image)
    result = run_ocr(processed)
    print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
    return result

demo = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
    outputs=gr.Textbox(label="Solution"),
    title="Captcha Solver",
)

demo.launch()