Pixelplanet_old

Running

App Files Files

Pixelplanet_old / app.py

m2zm

Upload 6 files

f40ddb8 verified 6 days ago

Raw

History Blame

2.67 kB

	import base64
	from io import BytesIO
	import uuid
	from cairosvg import svg2png
	import cv2
	import numpy as np
	import gradio as gr
	import onnxruntime as ort
	from PIL import Image

	IMG_HEIGHT = 300
	IMG_WIDTH = 500
	MAX_LENGTH = 4
	CHARACTERS = ['0','1','2','3','4','5','6','7','8','9',
	'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
	'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
	MODEL_PATH = 'model.onnx'
	session = ort.InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])

	def preprocess_captcha(image):
	pil = image.convert("RGB")
	cv_img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
	gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	processed = Image.fromarray(thresh).convert("RGB")
	return processed

	def get_result(pred):
	accuracy = 1
	last = None
	ans = []
	for item in pred[0]:
	char_ind = item.argmax()
	if char_ind != last and char_ind != 0 and char_ind != len(CHARACTERS) + 1:
	ans.append(CHARACTERS[char_ind - 1])
	accuracy *= item[char_ind]
	last = char_ind
	answ = "".join(ans)[:MAX_LENGTH]
	return answ

	def predict(svg_text):
	request_id = str(uuid.uuid4())
	print(f"Yeni istek geldi. ID: {request_id}")

	text = svg_text.strip()
	if not text:
	print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: Empty input")
	return "Empty input"

	if text.startswith('data:image/svg+xml;base64,'):
	b = base64.b64decode(text.split(',')[-1])
	else:
	b = text.encode('utf-8')

	png_bytes = svg2png(bytestring=b)
	image = Image.open(BytesIO(png_bytes))

	processed = preprocess_captcha(image)

	img = processed.convert('L')
	img = img.resize((IMG_WIDTH, IMG_HEIGHT))
	img = np.array(img)
	img = np.expand_dims(img, axis=1)
	img = np.expand_dims(img, axis=-1)
	img = img.transpose([1,2,0,3])
	img = img.astype(np.float32) / 255.

	dummy_label = np.random.default_rng().random((28, 28), dtype=np.float32)
	result_tensor = session.run(None, {'image': img, 'label': dummy_label})[0]

	result = get_result(result_tensor)

	print(f"OCR cevabı döndürüldü. ID: {request_id}, Cevap: {result}")
	return result

	demo = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(label="SVG", lines=6, placeholder="SVG to PNG..."),
	outputs=gr.Textbox(label="Solution"),
	title="Captcha Solver",
	)

	demo.launch()