Spaces:

mohakapoor
/

CaptchaOCR

Running

CaptchaOCR / app.py

mohakkapoor4

Refactor .gitignore to specify checkpoint file types and exclude all but the best model. Update inference.py to use enhanced CAPTCHA generation and adjust dimensions. Increase training epochs in train.py for better model performance. Update training metrics and data generation logic in data.py for improved dataset handling and augmentation. Update config.py for dataset path consistency.

322be7d 4 months ago

raw

history blame

2.51 kB

	import os
	import random
	import gradio as gr
	from PIL import Image
	import torch

	# Import your inference module
	import inference as inf
	from src.generateCaptcha import generate_captcha
	from src.config import cfg # sizes, charset, dirs

	# Device and one-time model load
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	MODEL = inf.load_model("checkpoints/best_model.pth").to(DEVICE).eval()

	# Ensure results dir exists
	os.makedirs(cfg.RESULT_DIR, exist_ok=True)

	def random_text():
	L = random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)
	return "".join(random.choices(cfg.chars, k=L))

	def ui_generate():
	text = random_text()
	filename = f"{text}_{random.randint(1000,9999)}.png"
	# Use generateCaptcha.py directly
	img = generate_captcha(text, width=cfg.W_max, height=cfg.H)

	# Save to results directory
	filepath = os.path.join(cfg.RESULT_DIR, filename)
	img.save(filepath)

	return img, text, filepath

	def ui_solve(img: Image.Image, path_hint: str):
	# Prefer uploaded image
	if img is not None:
	tmp_path = os.path.join(cfg.RESULT_DIR, f"upload_{random.randint(1000,9999)}.png")
	img.save(tmp_path)
	tensor = inf.preprocess_image(tmp_path, (cfg.W_max, cfg.H))
	pred = inf.predict_captcha(MODEL, tensor, DEVICE)
	return pred
	# Otherwise, solve the last generated image
	if path_hint and os.path.exists(path_hint):
	tensor = inf.preprocess_image(path_hint, (cfg.W_max, cfg.H))
	pred = inf.predict_captcha(MODEL, tensor, DEVICE)
	return pred
	return "No image provided. Generate or upload first."

	with gr.Blocks(title="CAPTCHA OCR (checkpoint)") as demo:
	gr.Markdown("## CAPTCHA OCR demo")

	with gr.Row():
	gen_btn = gr.Button("Generate CAPTCHA", variant="primary")
	gt_out = gr.Textbox(label="Ground Truth", interactive=False)

	with gr.Row():
	img_out = gr.Image(label="Generated CAPTCHA", type="pil")
	path_box = gr.Textbox(label="Internal Path", interactive=False, visible=False)

	gen_btn.click(fn=ui_generate, outputs=[img_out, gt_out, path_box])

	gr.Markdown("### Solve")
	with gr.Row():
	img_in = gr.Image(label="Upload CAPTCHA (optional)", type="pil")
	solve_btn = gr.Button("Solve")
	pred_out = gr.Textbox(label="Prediction", interactive=False)

	solve_btn.click(fn=ui_solve, inputs=[img_in, path_box], outputs=[pred_out])

	if __name__ == "__main__":
	demo.launch()