Spaces:

m2zm
/

gplace

Sleeping

App Files Files

gplace / app.py

m2zm

Upload Solver Files

2462b71 verified 2 months ago

raw

history blame

1.98 kB

	import base64
	from io import BytesIO
	import uuid
	from cairosvg import svg2png
	import cv2
	import numpy as np
	import gradio as gr
	import torch
	from PIL import Image
	from transformers import VisionEncoderDecoderModel, TrOCRProcessor

	device = "cuda" if torch.cuda.is_available() else "cpu"
	processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")
	model = VisionEncoderDecoderModel.from_pretrained("anuashok/ocr-captcha-v3").to(device)
	model.eval()

	def run_ocr(pil_image):
	pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
	with torch.no_grad():
	generated_ids = model.generate(pixel_values)
	return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	def preprocess_captcha(image):
	pil = image.convert("RGB")
	cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
	gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	processed = Image.fromarray(thresh).convert("RGB")
	return processed

	def predict(svg_text):
	request_id = str(uuid.uuid4())
	print(f"Yeni istek geldi. ID: {request_id}")

	text = svg_text.strip()
	if not text:
	print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
	return "Empty input"
	if text.startswith('data:image/svg+xml;base64,'):
	b = base64.b64decode(text.split(',')[-1])
	else:
	b = text.encode('utf-8')
	png_bytes = svg2png(bytestring=b)
	image = Image.open(BytesIO(png_bytes))
	processed = preprocess_captcha(image)
	result = run_ocr(processed)
	print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
	return result

	demo = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
	outputs=gr.Textbox(label="Solution"),
	title="Captcha Solver",
	)

	demo.launch()