File size: 2,015 Bytes
7b645e3
ab24f8b
7b645e3
 
 
 
 
 
 
 
 
 
 
ab24f8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b645e3
ab24f8b
 
 
 
 
7b645e3
 
ab24f8b
 
 
 
 
7b645e3
142cc58
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import io
import time
import os
import re
from PIL import Image
from cairosvg import svg2png
from transformers import VisionEncoderDecoderModel, TrOCRProcessor
import gradio as gr

processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")
model = VisionEncoderDecoderModel.from_pretrained("anuashok/ocr-captcha-v3")
os.makedirs("outputs", exist_ok=True)

def solve_svg_captcha(svg_data):
    svg_data = re.sub(r'<style>.*?</style>', '', svg_data, flags=re.DOTALL)
    svg_data = svg_data.replace('file:///', '')
    svg_data = svg_data.replace('/app/', '')
    svg_data = re.sub(r'url\(["\']?\/?app\/[^)"\']*["\']?\)', 'url()', svg_data)
    
    png_bytes = svg2png(bytestring=svg_data.encode('utf-8'))
    image = Image.open(io.BytesIO(png_bytes)).convert("RGBA")
    image = image.resize((500, 300))
    background = Image.new("RGBA", image.size, (255, 255, 255))
    combined = Image.alpha_composite(background, image).convert("RGB")
    pixel_values = processor(combined, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    sanitized = re.sub(r'[^A-Za-z0-9]', '', generated_text)
    sanitized = sanitized.upper()
    return sanitized[:4]

def predict(svgdata):
    if not svgdata:
        return "No SVG provided"
    if len(svgdata) > 50000:
        return "SVG too large"
    try:
        model_answer = solve_svg_captcha(svgdata)
    except Exception as e:
        print(f"Error in predict: {e}")
        return "Model could not predict"
    return model_answer or "Model could not predict"

with gr.Blocks() as demo:
    gr.Markdown("Enter SVG data and receive model answer")
    svg_input = gr.Textbox(label="SVG Data", lines=10)
    predict_btn = gr.Button("Get Model Answer")
    model_answer = gr.Textbox(label="Model Answer", interactive=False)
    predict_btn.click(predict, inputs=[svg_input], outputs=[model_answer])

if __name__ == "__main__":
    demo.launch()