File size: 3,897 Bytes
24c3b65
 
 
 
 
 
 
4348971
29ea798
24c3b65
 
 
 
 
 
 
 
 
 
 
 
16fef22
c2b394f
d6fea40
f64a374
c2b394f
 
f64a374
4348971
24c3b65
 
 
 
 
 
1a4de0d
24c3b65
f64a374
24c3b65
 
 
d6fea40
24c3b65
1a4de0d
f64a374
24c3b65
 
 
 
022227b
d6fea40
24c3b65
 
 
 
 
 
 
 
 
 
022227b
24c3b65
 
 
 
 
 
f64a374
 
fc3087a
 
 
 
 
 
 
 
 
 
16fef22
29ea798
24c3b65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29ea798
24c3b65
 
 
 
 
 
 
022227b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import base64
import uuid
import cairosvg
import cv2
import numpy as np
import re
import torch
from PIL import Image, ImageEnhance, ImageFilter
from transformers import VisionEncoderDecoderModel, TrOCRProcessor
import gradio as gr

device = "cuda" if torch.cuda.is_available() else "cpu"
processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")

model = VisionEncoderDecoderModel.from_pretrained(
    "anuashok/ocr-captcha-v3",
    torch_dtype=torch.float16
).to(device)

model.eval()

def advanced_preprocess(cv_image):
    gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bilateralFilter(gray, 5, 75, 75)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    pil = Image.fromarray(thresh).convert("RGB")
    pil = pil.filter(ImageFilter.SHARPEN)
    pil = ImageEnhance.Contrast(pil).enhance(2.5)
    return pil

def run_ocr(pil_image):
    pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
    with torch.no_grad():
        generated_ids = model.generate(
            pixel_values,
            max_length=4,
            min_length=4,
            num_beams=3,
            no_repeat_ngram_size=2,
            early_stopping=True,
            length_penalty=1.0,
            repetition_penalty=1.5
        )
    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    return re.sub(r'[^A-Za-z0-9]', '', text.upper())

def genRotations(svg):
    disable_anim = re.sub(r'<animateTransform type="rotate" repeatCount="indefinite" attributeName="transform" from="\d+ \d+,\d+" to="\d+ \d+ \d+" begin="\d+" dur="\d+s"/>', '', svg)
    matches = re.findall(r"rotate\((1|-1), (\d+), (\d+)\)", disable_anim)
    if not matches or len(matches) < 2:
        return ""
    firstcoords = matches[0][1]
    secondcoords = matches[-1][1]

    def create_rotated_image(svg_code, angle_pos, coords):
        temp1 = re.sub(rf'rotate\(1, {coords}, 150\)', f'rotate({angle_pos}, {coords}, 150)', svg_code)
        temp2 = re.sub(rf'rotate\(-1, {coords}, 150\)', f'rotate(-{angle_pos}, {coords}, 150)', temp1)
        image_data = cairosvg.svg2png(bytestring=temp2.encode('utf-8'))
        nparr = np.frombuffer(image_data, np.uint8)
        return cv2.imdecode(nparr, cv2.IMREAD_COLOR)

    def combine_images(base_img, part_img, width=68):
        combined_img = base_img.copy()
        fc = int(firstcoords)
        crop_img = part_img[:, fc - width:fc + width]
        combined_img[:, fc - width:fc + width] = crop_img
        return combined_img

    angles = [0, 45, 90, 135, 180, 225, 270, 315]

    for a in angles:
        img1 = create_rotated_image(disable_anim, a, firstcoords)
        img2 = create_rotated_image(disable_anim, a, secondcoords)
        combo = combine_images(img2, img1)
        
        processed = advanced_preprocess(combo)
        res = run_ocr(processed)
        
        if len(res) == 4 and res.isalnum():
            return res

    return ""

def predict(svg_text):
    request_id = str(uuid.uuid4())
    print(f"Yeni istek geldi. ID: {request_id}")
    text = svg_text.strip()
    if not text:
        print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
        return "Empty input"
    if text.startswith('data:image/svg+xml;base64,'):
        b = base64.b64decode(text.split(',')[-1])
        svg = b.decode('utf-8')
    else:
        svg = text
    if len(svg) > 30000:
        print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Too large")
        return "Too large"
    result = genRotations(svg)
    print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
    return result if result else "XXXX"

demo = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
    outputs=gr.Textbox(label="Solution"),
    title="Captcha Solver",
)
demo.launch()