gplace / app.py
m2zm's picture
Upload Solver Files
2462b71 verified
raw
history blame
1.98 kB
import base64
from io import BytesIO
import uuid
from cairosvg import svg2png
import cv2
import numpy as np
import gradio as gr
import torch
from PIL import Image
from transformers import VisionEncoderDecoderModel, TrOCRProcessor
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")
model = VisionEncoderDecoderModel.from_pretrained("anuashok/ocr-captcha-v3").to(device)
model.eval()
def run_ocr(pil_image):
pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
with torch.no_grad():
generated_ids = model.generate(pixel_values)
return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
def preprocess_captcha(image):
pil = image.convert("RGB")
cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
processed = Image.fromarray(thresh).convert("RGB")
return processed
def predict(svg_text):
request_id = str(uuid.uuid4())
print(f"Yeni istek geldi. ID: {request_id}")
text = svg_text.strip()
if not text:
print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
return "Empty input"
if text.startswith('data:image/svg+xml;base64,'):
b = base64.b64decode(text.split(',')[-1])
else:
b = text.encode('utf-8')
png_bytes = svg2png(bytestring=b)
image = Image.open(BytesIO(png_bytes))
processed = preprocess_captcha(image)
result = run_ocr(processed)
print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
return result
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
outputs=gr.Textbox(label="Solution"),
title="Captcha Solver",
)
demo.launch()