Spaces:

m2zm
/

pixelplanet

Running

File size: 3,897 Bytes

import base64
import uuid
import cairosvg
import cv2
import numpy as np
import re
import torch
from PIL import Image, ImageEnhance, ImageFilter
from transformers import VisionEncoderDecoderModel, TrOCRProcessor
import gradio as gr

device = "cuda" if torch.cuda.is_available() else "cpu"
processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")

model = VisionEncoderDecoderModel.from_pretrained(
    "anuashok/ocr-captcha-v3",
    torch_dtype=torch.float16
).to(device)

model.eval()

def advanced_preprocess(cv_image):
    gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bilateralFilter(gray, 5, 75, 75)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    pil = Image.fromarray(thresh).convert("RGB")
    pil = pil.filter(ImageFilter.SHARPEN)
    pil = ImageEnhance.Contrast(pil).enhance(2.5)
    return pil

def run_ocr(pil_image):
    pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
    with torch.no_grad():
        generated_ids = model.generate(
            pixel_values,
            max_length=4,
            min_length=4,
            num_beams=3,
            no_repeat_ngram_size=2,
            early_stopping=True,
            length_penalty=1.0,
            repetition_penalty=1.5
        )
    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    return re.sub(r'[^A-Za-z0-9]', '', text.upper())

def genRotations(svg):
    disable_anim = re.sub(r'<animateTransform type="rotate" repeatCount="indefinite" attributeName="transform" from="\d+ \d+,\d+" to="\d+ \d+ \d+" begin="\d+" dur="\d+s"/>', '', svg)
    matches = re.findall(r"rotate\((1|-1), (\d+), (\d+)\)", disable_anim)
    if not matches or len(matches) < 2:
        return ""
    firstcoords = matches[0][1]
    secondcoords = matches[-1][1]

    def create_rotated_image(svg_code, angle_pos, coords):
        temp1 = re.sub(rf'rotate\(1, {coords}, 150\)', f'rotate({angle_pos}, {coords}, 150)', svg_code)
        temp2 = re.sub(rf'rotate\(-1, {coords}, 150\)', f'rotate(-{angle_pos}, {coords}, 150)', temp1)
        image_data = cairosvg.svg2png(bytestring=temp2.encode('utf-8'))
        nparr = np.frombuffer(image_data, np.uint8)
        return cv2.imdecode(nparr, cv2.IMREAD_COLOR)

    def combine_images(base_img, part_img, width=68):
        combined_img = base_img.copy()
        fc = int(firstcoords)
        crop_img = part_img[:, fc - width:fc + width]
        combined_img[:, fc - width:fc + width] = crop_img
        return combined_img

    angles = [0, 45, 90, 135, 180, 225, 270, 315]

    for a in angles:
        img1 = create_rotated_image(disable_anim, a, firstcoords)
        img2 = create_rotated_image(disable_anim, a, secondcoords)
        combo = combine_images(img2, img1)
        
        processed = advanced_preprocess(combo)
        res = run_ocr(processed)
        
        if len(res) == 4 and res.isalnum():
            return res

    return ""

def predict(svg_text):
    request_id = str(uuid.uuid4())
    print(f"Yeni istek geldi. ID: {request_id}")
    text = svg_text.strip()
    if not text:
        print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
        return "Empty input"
    if text.startswith('data:image/svg+xml;base64,'):
        b = base64.b64decode(text.split(',')[-1])
        svg = b.decode('utf-8')
    else:
        svg = text
    if len(svg) > 30000:
        print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Too large")
        return "Too large"
    result = genRotations(svg)
    print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
    return result if result else "XXXX"

demo = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
    outputs=gr.Textbox(label="Solution"),
    title="Captcha Solver",
)
demo.launch()