Spaces:

m2zm
/

pixelplanet

Running

App Files Files Community

pixelplanet / app.py

m2zm

Update app.py

f64a374 verified about 1 month ago

raw

history blame contribute delete

3.9 kB

	import base64
	import uuid
	import cairosvg
	import cv2
	import numpy as np
	import re
	import torch
	from PIL import Image, ImageEnhance, ImageFilter
	from transformers import VisionEncoderDecoderModel, TrOCRProcessor
	import gradio as gr

	device = "cuda" if torch.cuda.is_available() else "cpu"
	processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3")

	model = VisionEncoderDecoderModel.from_pretrained(
	"anuashok/ocr-captcha-v3",
	torch_dtype=torch.float16
	).to(device)

	model.eval()

	def advanced_preprocess(cv_image):
	gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
	gray = cv2.bilateralFilter(gray, 5, 75, 75)
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
	pil = Image.fromarray(thresh).convert("RGB")
	pil = pil.filter(ImageFilter.SHARPEN)
	pil = ImageEnhance.Contrast(pil).enhance(2.5)
	return pil

	def run_ocr(pil_image):
	pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
	with torch.no_grad():
	generated_ids = model.generate(
	pixel_values,
	max_length=4,
	min_length=4,
	num_beams=3,
	no_repeat_ngram_size=2,
	early_stopping=True,
	length_penalty=1.0,
	repetition_penalty=1.5
	)
	text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
	return re.sub(r'[^A-Za-z0-9]', '', text.upper())

	def genRotations(svg):
	disable_anim = re.sub(r'<animateTransform type="rotate" repeatCount="indefinite" attributeName="transform" from="\d+ \d+,\d+" to="\d+ \d+ \d+" begin="\d+" dur="\d+s"/>', '', svg)
	matches = re.findall(r"rotate\((1\|-1), (\d+), (\d+)\)", disable_anim)
	if not matches or len(matches) < 2:
	return ""
	firstcoords = matches[0][1]
	secondcoords = matches[-1][1]

	def create_rotated_image(svg_code, angle_pos, coords):
	temp1 = re.sub(rf'rotate\(1, {coords}, 150\)', f'rotate({angle_pos}, {coords}, 150)', svg_code)
	temp2 = re.sub(rf'rotate\(-1, {coords}, 150\)', f'rotate(-{angle_pos}, {coords}, 150)', temp1)
	image_data = cairosvg.svg2png(bytestring=temp2.encode('utf-8'))
	nparr = np.frombuffer(image_data, np.uint8)
	return cv2.imdecode(nparr, cv2.IMREAD_COLOR)

	def combine_images(base_img, part_img, width=68):
	combined_img = base_img.copy()
	fc = int(firstcoords)
	crop_img = part_img[:, fc - width:fc + width]
	combined_img[:, fc - width:fc + width] = crop_img
	return combined_img

	angles = [0, 45, 90, 135, 180, 225, 270, 315]

	for a in angles:
	img1 = create_rotated_image(disable_anim, a, firstcoords)
	img2 = create_rotated_image(disable_anim, a, secondcoords)
	combo = combine_images(img2, img1)

	processed = advanced_preprocess(combo)
	res = run_ocr(processed)

	if len(res) == 4 and res.isalnum():
	return res

	return ""

	def predict(svg_text):
	request_id = str(uuid.uuid4())
	print(f"Yeni istek geldi. ID: {request_id}")
	text = svg_text.strip()
	if not text:
	print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
	return "Empty input"
	if text.startswith('data:image/svg+xml;base64,'):
	b = base64.b64decode(text.split(',')[-1])
	svg = b.decode('utf-8')
	else:
	svg = text
	if len(svg) > 30000:
	print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Too large")
	return "Too large"
	result = genRotations(svg)
	print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
	return result if result else "XXXX"

	demo = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
	outputs=gr.Textbox(label="Solution"),
	title="Captcha Solver",
	)
	demo.launch()