Spaces:
Running
Running
| import base64 | |
| import uuid | |
| import cairosvg | |
| import cv2 | |
| import numpy as np | |
| import re | |
| import torch | |
| from PIL import Image, ImageEnhance, ImageFilter | |
| from transformers import VisionEncoderDecoderModel, TrOCRProcessor | |
| import gradio as gr | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3") | |
| model = VisionEncoderDecoderModel.from_pretrained( | |
| "anuashok/ocr-captcha-v3", | |
| torch_dtype=torch.float16 | |
| ).to(device) | |
| model.eval() | |
| def advanced_preprocess(cv_image): | |
| gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) | |
| gray = cv2.bilateralFilter(gray, 5, 75, 75) | |
| _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) | |
| pil = Image.fromarray(thresh).convert("RGB") | |
| pil = pil.filter(ImageFilter.SHARPEN) | |
| pil = ImageEnhance.Contrast(pil).enhance(2.5) | |
| return pil | |
| def run_ocr(pil_image): | |
| pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device) | |
| with torch.no_grad(): | |
| generated_ids = model.generate( | |
| pixel_values, | |
| max_length=4, | |
| min_length=4, | |
| num_beams=3, | |
| no_repeat_ngram_size=2, | |
| early_stopping=True, | |
| length_penalty=1.0, | |
| repetition_penalty=1.5 | |
| ) | |
| text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() | |
| return re.sub(r'[^A-Za-z0-9]', '', text.upper()) | |
| def genRotations(svg): | |
| disable_anim = re.sub(r'<animateTransform type="rotate" repeatCount="indefinite" attributeName="transform" from="\d+ \d+,\d+" to="\d+ \d+ \d+" begin="\d+" dur="\d+s"/>', '', svg) | |
| matches = re.findall(r"rotate\((1|-1), (\d+), (\d+)\)", disable_anim) | |
| if not matches or len(matches) < 2: | |
| return "" | |
| firstcoords = matches[0][1] | |
| secondcoords = matches[-1][1] | |
| def create_rotated_image(svg_code, angle_pos, coords): | |
| temp1 = re.sub(rf'rotate\(1, {coords}, 150\)', f'rotate({angle_pos}, {coords}, 150)', svg_code) | |
| temp2 = re.sub(rf'rotate\(-1, {coords}, 150\)', f'rotate(-{angle_pos}, {coords}, 150)', temp1) | |
| image_data = cairosvg.svg2png(bytestring=temp2.encode('utf-8')) | |
| nparr = np.frombuffer(image_data, np.uint8) | |
| return cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
| def combine_images(base_img, part_img, width=68): | |
| combined_img = base_img.copy() | |
| fc = int(firstcoords) | |
| crop_img = part_img[:, fc - width:fc + width] | |
| combined_img[:, fc - width:fc + width] = crop_img | |
| return combined_img | |
| angles = [0, 45, 90, 135, 180, 225, 270, 315] | |
| for a in angles: | |
| img1 = create_rotated_image(disable_anim, a, firstcoords) | |
| img2 = create_rotated_image(disable_anim, a, secondcoords) | |
| combo = combine_images(img2, img1) | |
| processed = advanced_preprocess(combo) | |
| res = run_ocr(processed) | |
| if len(res) == 4 and res.isalnum(): | |
| return res | |
| return "" | |
| def predict(svg_text): | |
| request_id = str(uuid.uuid4()) | |
| print(f"Yeni istek geldi. ID: {request_id}") | |
| text = svg_text.strip() | |
| if not text: | |
| print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input") | |
| return "Empty input" | |
| if text.startswith('data:image/svg+xml;base64,'): | |
| b = base64.b64decode(text.split(',')[-1]) | |
| svg = b.decode('utf-8') | |
| else: | |
| svg = text | |
| if len(svg) > 30000: | |
| print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Too large") | |
| return "Too large" | |
| result = genRotations(svg) | |
| print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}") | |
| return result if result else "XXXX" | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."), | |
| outputs=gr.Textbox(label="Solution"), | |
| title="Captcha Solver", | |
| ) | |
| demo.launch() |