Spaces:
Running
Running
File size: 3,897 Bytes
24c3b65 4348971 29ea798 24c3b65 16fef22 c2b394f d6fea40 f64a374 c2b394f f64a374 4348971 24c3b65 1a4de0d 24c3b65 f64a374 24c3b65 d6fea40 24c3b65 1a4de0d f64a374 24c3b65 022227b d6fea40 24c3b65 022227b 24c3b65 f64a374 fc3087a 16fef22 29ea798 24c3b65 29ea798 24c3b65 022227b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | import base64
import uuid
import cairosvg
import cv2
import numpy as np
import re
import torch
from PIL import Image, ImageEnhance, ImageFilter
from transformers import VisionEncoderDecoderModel, TrOCRProcessor
import gradio as gr
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")
model = VisionEncoderDecoderModel.from_pretrained(
"anuashok/ocr-captcha-v3",
torch_dtype=torch.float16
).to(device)
model.eval()
def advanced_preprocess(cv_image):
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 5, 75, 75)
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
pil = Image.fromarray(thresh).convert("RGB")
pil = pil.filter(ImageFilter.SHARPEN)
pil = ImageEnhance.Contrast(pil).enhance(2.5)
return pil
def run_ocr(pil_image):
pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
with torch.no_grad():
generated_ids = model.generate(
pixel_values,
max_length=4,
min_length=4,
num_beams=3,
no_repeat_ngram_size=2,
early_stopping=True,
length_penalty=1.0,
repetition_penalty=1.5
)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
return re.sub(r'[^A-Za-z0-9]', '', text.upper())
def genRotations(svg):
disable_anim = re.sub(r'<animateTransform type="rotate" repeatCount="indefinite" attributeName="transform" from="\d+ \d+,\d+" to="\d+ \d+ \d+" begin="\d+" dur="\d+s"/>', '', svg)
matches = re.findall(r"rotate\((1|-1), (\d+), (\d+)\)", disable_anim)
if not matches or len(matches) < 2:
return ""
firstcoords = matches[0][1]
secondcoords = matches[-1][1]
def create_rotated_image(svg_code, angle_pos, coords):
temp1 = re.sub(rf'rotate\(1, {coords}, 150\)', f'rotate({angle_pos}, {coords}, 150)', svg_code)
temp2 = re.sub(rf'rotate\(-1, {coords}, 150\)', f'rotate(-{angle_pos}, {coords}, 150)', temp1)
image_data = cairosvg.svg2png(bytestring=temp2.encode('utf-8'))
nparr = np.frombuffer(image_data, np.uint8)
return cv2.imdecode(nparr, cv2.IMREAD_COLOR)
def combine_images(base_img, part_img, width=68):
combined_img = base_img.copy()
fc = int(firstcoords)
crop_img = part_img[:, fc - width:fc + width]
combined_img[:, fc - width:fc + width] = crop_img
return combined_img
angles = [0, 45, 90, 135, 180, 225, 270, 315]
for a in angles:
img1 = create_rotated_image(disable_anim, a, firstcoords)
img2 = create_rotated_image(disable_anim, a, secondcoords)
combo = combine_images(img2, img1)
processed = advanced_preprocess(combo)
res = run_ocr(processed)
if len(res) == 4 and res.isalnum():
return res
return ""
def predict(svg_text):
request_id = str(uuid.uuid4())
print(f"Yeni istek geldi. ID: {request_id}")
text = svg_text.strip()
if not text:
print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
return "Empty input"
if text.startswith('data:image/svg+xml;base64,'):
b = base64.b64decode(text.split(',')[-1])
svg = b.decode('utf-8')
else:
svg = text
if len(svg) > 30000:
print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Too large")
return "Too large"
result = genRotations(svg)
print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
return result if result else "XXXX"
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
outputs=gr.Textbox(label="Solution"),
title="Captcha Solver",
)
demo.launch() |