Spaces:
Running
Running
mohakkapoor4
commited on
Commit
·
322be7d
1
Parent(s):
a1eb0d1
Refactor .gitignore to specify checkpoint file types and exclude all but the best model. Update inference.py to use enhanced CAPTCHA generation and adjust dimensions. Increase training epochs in train.py for better model performance. Update training metrics and data generation logic in data.py for improved dataset handling and augmentation. Update config.py for dataset path consistency.
Browse files- .gitignore +7 -8
- Metrics/loss_comparison.png +2 -2
- Metrics/training_losses.png +2 -2
- Metrics/training_metrics.txt +2 -2
- app.py +72 -0
- checkpoints/best_model.pth +3 -0
- inference.py +15 -10
- src/config.py +1 -1
- src/data.py +184 -35
- src/generateCaptcha.py +181 -0
- train.py +1 -1
.gitignore
CHANGED
|
@@ -80,14 +80,13 @@ desktop.ini
|
|
| 80 |
!Metrics/*.jpg
|
| 81 |
|
| 82 |
# Models and checkpoints
|
| 83 |
-
checkpoints
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
*.safetensors
|
| 91 |
runs/
|
| 92 |
outputs/
|
| 93 |
artifacts/
|
|
|
|
| 80 |
!Metrics/*.jpg
|
| 81 |
|
| 82 |
# Models and checkpoints
|
| 83 |
+
checkpoints/*.pth
|
| 84 |
+
checkpoints/*.pt
|
| 85 |
+
checkpoints/*.ckpt
|
| 86 |
+
checkpoints/*.onnx
|
| 87 |
+
checkpoints/*.bin
|
| 88 |
+
checkpoints/*.safetensors
|
| 89 |
+
!checkpoints/best_model.pth
|
|
|
|
| 90 |
runs/
|
| 91 |
outputs/
|
| 92 |
artifacts/
|
Metrics/loss_comparison.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
Metrics/training_losses.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
Metrics/training_metrics.txt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ed95b7a50649f8393e171a702b5e096adaa4d66712b3faf26c45931854dffb7
|
| 3 |
+
size 842
|
app.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import random
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
# Import your inference module
|
| 8 |
+
import inference as inf
|
| 9 |
+
from src.generateCaptcha import generate_captcha
|
| 10 |
+
from src.config import cfg # sizes, charset, dirs
|
| 11 |
+
|
| 12 |
+
# Device and one-time model load
|
| 13 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 14 |
+
MODEL = inf.load_model("checkpoints/best_model.pth").to(DEVICE).eval()
|
| 15 |
+
|
| 16 |
+
# Ensure results dir exists
|
| 17 |
+
os.makedirs(cfg.RESULT_DIR, exist_ok=True)
|
| 18 |
+
|
| 19 |
+
def random_text():
|
| 20 |
+
L = random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)
|
| 21 |
+
return "".join(random.choices(cfg.chars, k=L))
|
| 22 |
+
|
| 23 |
+
def ui_generate():
|
| 24 |
+
text = random_text()
|
| 25 |
+
filename = f"{text}_{random.randint(1000,9999)}.png"
|
| 26 |
+
# Use generateCaptcha.py directly
|
| 27 |
+
img = generate_captcha(text, width=cfg.W_max, height=cfg.H)
|
| 28 |
+
|
| 29 |
+
# Save to results directory
|
| 30 |
+
filepath = os.path.join(cfg.RESULT_DIR, filename)
|
| 31 |
+
img.save(filepath)
|
| 32 |
+
|
| 33 |
+
return img, text, filepath
|
| 34 |
+
|
| 35 |
+
def ui_solve(img: Image.Image, path_hint: str):
|
| 36 |
+
# Prefer uploaded image
|
| 37 |
+
if img is not None:
|
| 38 |
+
tmp_path = os.path.join(cfg.RESULT_DIR, f"upload_{random.randint(1000,9999)}.png")
|
| 39 |
+
img.save(tmp_path)
|
| 40 |
+
tensor = inf.preprocess_image(tmp_path, (cfg.W_max, cfg.H))
|
| 41 |
+
pred = inf.predict_captcha(MODEL, tensor, DEVICE)
|
| 42 |
+
return pred
|
| 43 |
+
# Otherwise, solve the last generated image
|
| 44 |
+
if path_hint and os.path.exists(path_hint):
|
| 45 |
+
tensor = inf.preprocess_image(path_hint, (cfg.W_max, cfg.H))
|
| 46 |
+
pred = inf.predict_captcha(MODEL, tensor, DEVICE)
|
| 47 |
+
return pred
|
| 48 |
+
return "No image provided. Generate or upload first."
|
| 49 |
+
|
| 50 |
+
with gr.Blocks(title="CAPTCHA OCR (checkpoint)") as demo:
|
| 51 |
+
gr.Markdown("## CAPTCHA OCR demo")
|
| 52 |
+
|
| 53 |
+
with gr.Row():
|
| 54 |
+
gen_btn = gr.Button("Generate CAPTCHA", variant="primary")
|
| 55 |
+
gt_out = gr.Textbox(label="Ground Truth", interactive=False)
|
| 56 |
+
|
| 57 |
+
with gr.Row():
|
| 58 |
+
img_out = gr.Image(label="Generated CAPTCHA", type="pil")
|
| 59 |
+
path_box = gr.Textbox(label="Internal Path", interactive=False, visible=False)
|
| 60 |
+
|
| 61 |
+
gen_btn.click(fn=ui_generate, outputs=[img_out, gt_out, path_box])
|
| 62 |
+
|
| 63 |
+
gr.Markdown("### Solve")
|
| 64 |
+
with gr.Row():
|
| 65 |
+
img_in = gr.Image(label="Upload CAPTCHA (optional)", type="pil")
|
| 66 |
+
solve_btn = gr.Button("Solve")
|
| 67 |
+
pred_out = gr.Textbox(label="Prediction", interactive=False)
|
| 68 |
+
|
| 69 |
+
solve_btn.click(fn=ui_solve, inputs=[img_in, path_box], outputs=[pred_out])
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
demo.launch()
|
checkpoints/best_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88e646907eb2ca7a43d87a5cd251be9c7d2b79f98e30d5ca9e53b8ae93e6045d
|
| 3 |
+
size 48371934
|
inference.py
CHANGED
|
@@ -7,7 +7,7 @@ from src.config import cfg
|
|
| 7 |
from src.model_crnn import CRNN
|
| 8 |
from src.vocab import ctc_greedy_decode, vocab_size
|
| 9 |
from src.plotting import TrainingMetrics
|
| 10 |
-
from
|
| 11 |
|
| 12 |
def load_model(checkpoint_path="checkpoints/best_model.pth"):
|
| 13 |
"""Load the trained model from checkpoint."""
|
|
@@ -69,12 +69,17 @@ def predict_captcha(model, image_tensor, device):
|
|
| 69 |
|
| 70 |
return prediction[0] if prediction else ""
|
| 71 |
|
| 72 |
-
def generate_test_captcha(text, filename, width=
|
| 73 |
-
"""Generate a test CAPTCHA image."""
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
filepath = os.path.join(cfg.RESULT_DIR, filename)
|
| 76 |
-
|
| 77 |
-
print(f"Generated test CAPTCHA: {filename}")
|
| 78 |
return filepath
|
| 79 |
|
| 80 |
def main():
|
|
@@ -92,7 +97,7 @@ def main():
|
|
| 92 |
print("Model loaded successfully!")
|
| 93 |
|
| 94 |
# Generate test CAPTCHAs
|
| 95 |
-
print("\nGenerating test CAPTCHAs...")
|
| 96 |
test_cases = []
|
| 97 |
|
| 98 |
for i in range(4):
|
|
@@ -100,8 +105,8 @@ def main():
|
|
| 100 |
text = ''.join(random.choices(cfg.chars, k=random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)))
|
| 101 |
filename = f"{text}_{i}.png"
|
| 102 |
|
| 103 |
-
# Generate image
|
| 104 |
-
image_path = generate_test_captcha(text, filename)
|
| 105 |
test_cases.append((text, image_path, "")) # Add empty prediction slot
|
| 106 |
|
| 107 |
# Run inference
|
|
@@ -151,7 +156,7 @@ def main():
|
|
| 151 |
correct_chars += 1
|
| 152 |
|
| 153 |
char_accuracy = (correct_chars / total_chars) * 100 if total_chars > 0 else 0
|
| 154 |
-
print(f"
|
| 155 |
|
| 156 |
if accuracy >= 80:
|
| 157 |
print("Excellent performance!")
|
|
|
|
| 7 |
from src.model_crnn import CRNN
|
| 8 |
from src.vocab import ctc_greedy_decode, vocab_size
|
| 9 |
from src.plotting import TrainingMetrics
|
| 10 |
+
from src.generateCaptcha import generate_captcha
|
| 11 |
|
| 12 |
def load_model(checkpoint_path="checkpoints/best_model.pth"):
|
| 13 |
"""Load the trained model from checkpoint."""
|
|
|
|
| 69 |
|
| 70 |
return prediction[0] if prediction else ""
|
| 71 |
|
| 72 |
+
def generate_test_captcha(text, filename, width=256, height=60):
|
| 73 |
+
"""Generate a test CAPTCHA image using enhanced generation."""
|
| 74 |
+
# Use the enhanced CAPTCHA generation from generateCaptcha.py
|
| 75 |
+
img = generate_captcha(text, width=width, height=height)
|
| 76 |
+
|
| 77 |
+
# Ensure results directory exists
|
| 78 |
+
os.makedirs(cfg.RESULT_DIR, exist_ok=True)
|
| 79 |
+
|
| 80 |
filepath = os.path.join(cfg.RESULT_DIR, filename)
|
| 81 |
+
img.save(filepath)
|
| 82 |
+
print(f"Generated enhanced test CAPTCHA: {filename}")
|
| 83 |
return filepath
|
| 84 |
|
| 85 |
def main():
|
|
|
|
| 97 |
print("Model loaded successfully!")
|
| 98 |
|
| 99 |
# Generate test CAPTCHAs
|
| 100 |
+
print("\nGenerating enhanced test CAPTCHAs...")
|
| 101 |
test_cases = []
|
| 102 |
|
| 103 |
for i in range(4):
|
|
|
|
| 105 |
text = ''.join(random.choices(cfg.chars, k=random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)))
|
| 106 |
filename = f"{text}_{i}.png"
|
| 107 |
|
| 108 |
+
# Generate enhanced image
|
| 109 |
+
image_path = generate_test_captcha(text, filename, width=cfg.W_max, height=cfg.H)
|
| 110 |
test_cases.append((text, image_path, "")) # Add empty prediction slot
|
| 111 |
|
| 112 |
# Run inference
|
|
|
|
| 156 |
correct_chars += 1
|
| 157 |
|
| 158 |
char_accuracy = (correct_chars / total_chars) * 100 if total_chars > 0 else 0
|
| 159 |
+
print(f"Character Accuracy: {correct_chars}/{total_chars} ({char_accuracy:.1f}%)")
|
| 160 |
|
| 161 |
if accuracy >= 80:
|
| 162 |
print("Excellent performance!")
|
src/config.py
CHANGED
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass
|
|
| 4 |
|
| 5 |
@dataclass
|
| 6 |
class Config:
|
| 7 |
-
data_root: str = os.getenv("DATA_ROOT","
|
| 8 |
|
| 9 |
chars: str = string.ascii_letters + string.digits
|
| 10 |
CAPTCHA_LEN_LOWER_LIMIT: int = 5
|
|
|
|
| 4 |
|
| 5 |
@dataclass
|
| 6 |
class Config:
|
| 7 |
+
data_root: str = os.getenv("DATA_ROOT","Dataset\captchas")
|
| 8 |
|
| 9 |
chars: str = string.ascii_letters + string.digits
|
| 10 |
CAPTCHA_LEN_LOWER_LIMIT: int = 5
|
src/data.py
CHANGED
|
@@ -1,63 +1,212 @@
|
|
| 1 |
from captcha.image import ImageCaptcha
|
| 2 |
-
import random
|
| 3 |
-
import string
|
| 4 |
-
import os
|
| 5 |
-
import csv
|
| 6 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
# config
|
| 9 |
-
DATASET_DIR = "
|
| 10 |
-
LABELS = "
|
| 11 |
-
NUM_IMAGES =
|
| 12 |
CHARS = string.ascii_letters + string.digits
|
| 13 |
CAPTCHA_LEN_LOWER_LIMIT = 5
|
| 14 |
CAPTCHA_LEN_UPPER_LIMIT = 7
|
| 15 |
directories = [["train",0.8],["val",0.1],["test",0.1]]
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
os.makedirs(DATASET_DIR, exist_ok=True)
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
|
|
|
| 20 |
|
| 21 |
-
with open(LABELS,mode="w",newline="") as f:
|
| 22 |
writer = csv.writer(f)
|
| 23 |
writer.writerow(["filename","label"])
|
| 24 |
-
|
| 25 |
-
os.makedirs(OUTPUT_DIR,exist_ok=True)
|
| 26 |
for i in range(NUM_IMAGES):
|
| 27 |
-
if i%(NUM_IMAGES
|
| 28 |
print(f"{i} images made")
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
OUTPUT_DIR =
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
filename = f"{text}_{i}.png"
|
| 38 |
filepath = os.path.join(OUTPUT_DIR, filename)
|
| 39 |
-
image.write(text, filepath)
|
| 40 |
-
writer.writerow([filename,text])
|
| 41 |
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
|
|
|
|
| 44 |
|
| 45 |
-
|
| 46 |
|
|
|
|
|
|
|
| 47 |
n = len(df)
|
| 48 |
-
train_end = int(n *
|
| 49 |
-
val_end = train_end + int(n *
|
| 50 |
|
| 51 |
-
# Split datasets
|
| 52 |
df_train = df.iloc[:train_end]
|
| 53 |
df_val = df.iloc[train_end:val_end]
|
| 54 |
df_test = df.iloc[val_end:]
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
df_test.to_csv(os.path.join(DATASET_DIR,"test/labels.csv"), index=False)
|
| 60 |
-
|
| 61 |
print("Labels Generated")
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 1 |
from captcha.image import ImageCaptcha
|
| 2 |
+
import random, string, os, csv, io
|
|
|
|
|
|
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
+
from PIL import Image, ImageDraw, ImageFilter
|
| 5 |
+
import numpy as np
|
| 6 |
+
import cv2
|
| 7 |
|
| 8 |
+
# ===== your original config =====
|
| 9 |
+
DATASET_DIR = "Dataset/captchas"
|
| 10 |
+
LABELS = "Dataset/labels.csv"
|
| 11 |
+
NUM_IMAGES = 100000
|
| 12 |
CHARS = string.ascii_letters + string.digits
|
| 13 |
CAPTCHA_LEN_LOWER_LIMIT = 5
|
| 14 |
CAPTCHA_LEN_UPPER_LIMIT = 7
|
| 15 |
directories = [["train",0.8],["val",0.1],["test",0.1]]
|
| 16 |
|
| 17 |
+
# Match config.py dimensions
|
| 18 |
+
IMG_WIDTH = 256 # W_max from config
|
| 19 |
+
IMG_HEIGHT = 60 # H from config
|
| 20 |
+
GRAYSCALE = True # grayscale from config
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ----- minimal augment helpers -----
|
| 24 |
+
def rand_color(lo=0, hi=255):
|
| 25 |
+
return tuple(random.randint(lo, hi) for _ in range(3))
|
| 26 |
+
|
| 27 |
+
def gradient_bg(w, h):
|
| 28 |
+
top = rand_color(200, 255)
|
| 29 |
+
bot = rand_color(200, 255)
|
| 30 |
+
arr = np.zeros((h, w, 3), dtype=np.uint8)
|
| 31 |
+
for y in range(h):
|
| 32 |
+
t = y / max(1, h - 1)
|
| 33 |
+
arr[y, :, :] = (np.array(top) * (1 - t) + np.array(bot) * t).astype(np.uint8)
|
| 34 |
+
return Image.fromarray(arr)
|
| 35 |
+
|
| 36 |
+
def add_interference(img, line_range=(0, 3), dot_range=(10, 80)):
|
| 37 |
+
draw = ImageDraw.Draw(img)
|
| 38 |
+
w, h = img.size
|
| 39 |
+
for _ in range(random.randint(*line_range)):
|
| 40 |
+
x1, y1 = random.randint(0, w-1), random.randint(0, h-1)
|
| 41 |
+
x2, y2 = random.randint(0, w-1), random.randint(0, h-1)
|
| 42 |
+
draw.line((x1, y1, x2, y2), fill=rand_color(50, 180), width=random.randint(1, 2))
|
| 43 |
+
for _ in range(random.randint(*dot_range)):
|
| 44 |
+
x, y = random.randint(0, w-1), random.randint(0, h-1)
|
| 45 |
+
r = random.choice([0, 1])
|
| 46 |
+
draw.ellipse((x-r, y-r, x+r, y+r), fill=rand_color(0, 200))
|
| 47 |
+
return img
|
| 48 |
+
|
| 49 |
+
def perspective_warp(img, max_ratio=0.03):
|
| 50 |
+
if max_ratio <= 0:
|
| 51 |
+
return img
|
| 52 |
+
w, h = img.size
|
| 53 |
+
dx = int(w * max_ratio)
|
| 54 |
+
dy = int(h * max_ratio * 0.7)
|
| 55 |
+
src = np.float32([[0,0],[w,0],[w,h],[0,h]])
|
| 56 |
+
dst = np.float32([[random.randint(0,dx), random.randint(0,dy)],
|
| 57 |
+
[w-random.randint(0,dx), random.randint(0,dy)],
|
| 58 |
+
[w-random.randint(0,dx), h-random.randint(0,dy)],
|
| 59 |
+
[random.randint(0,dx), h-random.randint(0,dy)]])
|
| 60 |
+
M = cv2.getPerspectiveTransform(src, dst)
|
| 61 |
+
arr = np.array(img.convert("RGB"))[:, :, ::-1] # to BGR
|
| 62 |
+
out = cv2.warpPerspective(arr, M, (w, h), borderMode=cv2.BORDER_REPLICATE)
|
| 63 |
+
return Image.fromarray(out[:, :, ::-1]) # back to RGB
|
| 64 |
+
|
| 65 |
+
def jpeg_recompress(img, qmin=70, qmax=95):
|
| 66 |
+
q = random.randint(qmin, qmax)
|
| 67 |
+
buf = io.BytesIO()
|
| 68 |
+
img.save(buf, format="JPEG", quality=q)
|
| 69 |
+
buf.seek(0)
|
| 70 |
+
return Image.open(buf).convert("RGB")
|
| 71 |
+
|
| 72 |
+
def add_noise_and_blur(img, noise_sigma=(0.0, 6.0), blur_sigma=(0.0, 0.8), motion_prob=0.1):
|
| 73 |
+
# gaussian noise
|
| 74 |
+
s = random.uniform(*noise_sigma)
|
| 75 |
+
if s > 0.05:
|
| 76 |
+
arr = np.array(img).astype(np.float32)
|
| 77 |
+
arr += np.random.normal(0, s, arr.shape).astype(np.float32)
|
| 78 |
+
arr = np.clip(arr, 0, 255).astype(np.uint8)
|
| 79 |
+
img = Image.fromarray(arr)
|
| 80 |
+
# blur
|
| 81 |
+
if random.random() < motion_prob:
|
| 82 |
+
# simple directional blur
|
| 83 |
+
ksize = random.choice([3,5])
|
| 84 |
+
kernel = Image.new("L", (ksize, ksize), 0)
|
| 85 |
+
draw = ImageDraw.Draw(kernel)
|
| 86 |
+
draw.line((0, ksize//2, ksize-1, ksize//2), fill=255, width=1)
|
| 87 |
+
kernel = kernel.rotate(random.uniform(0, 180), resample=Image.BILINEAR)
|
| 88 |
+
kernel = np.array(kernel, dtype=np.float32)
|
| 89 |
+
kernel /= max(1, kernel.sum())
|
| 90 |
+
import cv2
|
| 91 |
+
arr = np.array(img)
|
| 92 |
+
arr = cv2.filter2D(arr, -1, kernel)
|
| 93 |
+
img = Image.fromarray(arr)
|
| 94 |
+
else:
|
| 95 |
+
sigma = random.uniform(*blur_sigma)
|
| 96 |
+
if sigma > 0.05:
|
| 97 |
+
img = img.filter(ImageFilter.GaussianBlur(radius=sigma))
|
| 98 |
+
return img
|
| 99 |
+
|
| 100 |
+
def render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT):
|
| 101 |
+
# randomize basic style knobs
|
| 102 |
+
bg_choice = random.choice(["solid", "gradient"])
|
| 103 |
+
fg_color = rand_color(0, 80)
|
| 104 |
+
if bg_choice == "solid":
|
| 105 |
+
bg_color = rand_color(210, 255)
|
| 106 |
+
bg = Image.new("RGB", (width, height), color=bg_color)
|
| 107 |
+
else:
|
| 108 |
+
bg = gradient_bg(width, height)
|
| 109 |
+
|
| 110 |
+
# Adjust font sizes for larger dimensions
|
| 111 |
+
font_sizes = [int(height * 0.7), int(height * 0.75), int(height * 0.8), int(height * 0.85)]
|
| 112 |
+
font_size = random.choice(font_sizes)
|
| 113 |
+
|
| 114 |
+
# ImageCaptcha accepts fonts via fonts arg; here we keep default but jitter spacing
|
| 115 |
+
image = ImageCaptcha(width=width, height=height, fonts=None, font_sizes=[font_size])
|
| 116 |
+
|
| 117 |
+
# draw base image
|
| 118 |
+
base = Image.frombytes('RGB', (width, height), image.generate_image(text).tobytes())
|
| 119 |
+
|
| 120 |
+
# quick contrast tweak: recolor foreground by compositing text mask if needed
|
| 121 |
+
# For minimal change, we stick with base and apply light warps/noise
|
| 122 |
+
# mild rotation/shear
|
| 123 |
+
angle = random.uniform(-6, 6)
|
| 124 |
+
base = base.rotate(angle, resample=Image.BILINEAR, expand=False, fillcolor=bg.getpixel((0,0)))
|
| 125 |
+
|
| 126 |
+
# perspective warp (very light)
|
| 127 |
+
if random.random() < 0.6:
|
| 128 |
+
base = perspective_warp(base, max_ratio=0.025)
|
| 129 |
+
|
| 130 |
+
# draw interference over the image
|
| 131 |
+
base = add_interference(base, line_range=(0, 3), dot_range=(10, 60))
|
| 132 |
+
|
| 133 |
+
# light noise + blur + jpeg recompress to add artifacts
|
| 134 |
+
base = add_noise_and_blur(base, noise_sigma=(0.0, 5.0), blur_sigma=(0.0, 0.7), motion_prob=0.12)
|
| 135 |
+
base = jpeg_recompress(base, qmin=72, qmax=92)
|
| 136 |
+
|
| 137 |
+
# optional low contrast: 20% chance to darken bg and lighten fg a bit
|
| 138 |
+
if random.random() < 0.2:
|
| 139 |
+
base = base.point(lambda p: int(p*0.95 + 6))
|
| 140 |
+
|
| 141 |
+
# Convert to grayscale if specified
|
| 142 |
+
if GRAYSCALE:
|
| 143 |
+
base = base.convert('L')
|
| 144 |
+
|
| 145 |
+
return base
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
# Fix: Extract names and thresholds upfront
|
| 150 |
+
train_name, val_name, test_name = directories[0][0], directories[1][0], directories[2][0]
|
| 151 |
+
train_ratio, val_ratio, test_ratio = directories[0][1], directories[1][1], directories[2][1]
|
| 152 |
+
|
| 153 |
+
# Calculate split thresholds
|
| 154 |
+
n = NUM_IMAGES
|
| 155 |
+
train_end = int(n * train_ratio)
|
| 156 |
+
val_end = train_end + int(n * val_ratio)
|
| 157 |
+
|
| 158 |
+
# Create directories once
|
| 159 |
+
train_dir = os.path.join(DATASET_DIR, train_name)
|
| 160 |
+
val_dir = os.path.join(DATASET_DIR, val_name)
|
| 161 |
+
test_dir = os.path.join(DATASET_DIR, test_name)
|
| 162 |
+
|
| 163 |
os.makedirs(DATASET_DIR, exist_ok=True)
|
| 164 |
+
os.makedirs(train_dir, exist_ok=True)
|
| 165 |
+
os.makedirs(val_dir, exist_ok=True)
|
| 166 |
+
os.makedirs(test_dir, exist_ok=True)
|
| 167 |
|
| 168 |
+
image = ImageCaptcha(width=160, height=60) # kept for compatibility if needed
|
| 169 |
|
| 170 |
+
with open(LABELS, mode="w", newline="") as f:
|
| 171 |
writer = csv.writer(f)
|
| 172 |
writer.writerow(["filename","label"])
|
| 173 |
+
|
|
|
|
| 174 |
for i in range(NUM_IMAGES):
|
| 175 |
+
if i % max(1, (NUM_IMAGES//100)) == 0:
|
| 176 |
print(f"{i} images made")
|
| 177 |
+
|
| 178 |
+
# Pick output directory based on thresholds
|
| 179 |
+
if i < train_end:
|
| 180 |
+
OUTPUT_DIR = train_dir
|
| 181 |
+
elif i < val_end:
|
| 182 |
+
OUTPUT_DIR = val_dir
|
| 183 |
+
else:
|
| 184 |
+
OUTPUT_DIR = test_dir
|
| 185 |
+
|
| 186 |
+
text = ''.join(random.choices(CHARS, k=random.randint(CAPTCHA_LEN_LOWER_LIMIT, CAPTCHA_LEN_UPPER_LIMIT)))
|
| 187 |
filename = f"{text}_{i}.png"
|
| 188 |
filepath = os.path.join(OUTPUT_DIR, filename)
|
|
|
|
|
|
|
| 189 |
|
| 190 |
+
# --- minimal change: replace image.write with our small variation renderer ---
|
| 191 |
+
img = render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT)
|
| 192 |
+
img.save(filepath)
|
| 193 |
+
# -----------------------------------------
|
| 194 |
|
| 195 |
+
writer.writerow([filename, text])
|
| 196 |
|
| 197 |
+
print("Data Generated!")
|
| 198 |
|
| 199 |
+
# Fixed split logic
|
| 200 |
+
df = pd.read_csv(LABELS)
|
| 201 |
n = len(df)
|
| 202 |
+
train_end = int(n * train_ratio)
|
| 203 |
+
val_end = train_end + int(n * val_ratio)
|
| 204 |
|
|
|
|
| 205 |
df_train = df.iloc[:train_end]
|
| 206 |
df_val = df.iloc[train_end:val_end]
|
| 207 |
df_test = df.iloc[val_end:]
|
| 208 |
|
| 209 |
+
df_train.to_csv(os.path.join(DATASET_DIR, f"{train_name}/labels.csv"), index=False)
|
| 210 |
+
df_val.to_csv(os.path.join(DATASET_DIR, f"{val_name}/labels.csv"), index=False)
|
| 211 |
+
df_test.to_csv(os.path.join(DATASET_DIR, f"{test_name}/labels.csv"), index=False)
|
|
|
|
|
|
|
| 212 |
print("Labels Generated")
|
|
|
|
|
|
src/generateCaptcha.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Simple CAPTCHA Generation Utility
|
| 3 |
+
Generates individual CAPTCHA images using enhanced rendering
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import random
|
| 7 |
+
import string
|
| 8 |
+
from PIL import Image, ImageDraw, ImageFilter
|
| 9 |
+
import numpy as np
|
| 10 |
+
import cv2
|
| 11 |
+
import io
|
| 12 |
+
|
| 13 |
+
# Configuration - match your training setup
|
| 14 |
+
IMG_WIDTH = 256
|
| 15 |
+
IMG_HEIGHT = 60
|
| 16 |
+
GRAYSCALE = True
|
| 17 |
+
CHARS = string.ascii_letters + string.digits
|
| 18 |
+
CAPTCHA_LEN_LOWER_LIMIT = 5
|
| 19 |
+
CAPTCHA_LEN_UPPER_LIMIT = 7
|
| 20 |
+
|
| 21 |
+
def rand_color(lo=0, hi=255):
|
| 22 |
+
"""Generate random RGB color."""
|
| 23 |
+
return tuple(random.randint(lo, hi) for _ in range(3))
|
| 24 |
+
|
| 25 |
+
def gradient_bg(w, h):
|
| 26 |
+
"""Create gradient background."""
|
| 27 |
+
top = rand_color(200, 255)
|
| 28 |
+
bot = rand_color(200, 255)
|
| 29 |
+
arr = np.zeros((h, w, 3), dtype=np.uint8)
|
| 30 |
+
for y in range(h):
|
| 31 |
+
t = y / max(1, h - 1)
|
| 32 |
+
arr[y, :, :] = (np.array(top) * (1 - t) + np.array(bot) * t).astype(np.uint8)
|
| 33 |
+
return Image.fromarray(arr)
|
| 34 |
+
|
| 35 |
+
def add_interference(img, line_range=(0, 3), dot_range=(10, 80)):
|
| 36 |
+
"""Add interference patterns (lines and dots)."""
|
| 37 |
+
draw = ImageDraw.Draw(img)
|
| 38 |
+
w, h = img.size
|
| 39 |
+
for _ in range(random.randint(*line_range)):
|
| 40 |
+
x1, y1 = random.randint(0, w-1), random.randint(0, h-1)
|
| 41 |
+
x2, y2 = random.randint(0, w-1), random.randint(0, h-1)
|
| 42 |
+
draw.line((x1, y1, x2, y2), fill=rand_color(50, 180), width=random.randint(1, 2))
|
| 43 |
+
for _ in range(random.randint(*dot_range)):
|
| 44 |
+
x, y = random.randint(0, w-1), random.randint(0, h-1)
|
| 45 |
+
r = random.choice([0, 1])
|
| 46 |
+
draw.ellipse((x-r, y-r, x+r, y+r), fill=rand_color(0, 200))
|
| 47 |
+
return img
|
| 48 |
+
|
| 49 |
+
def perspective_warp(img, max_ratio=0.03):
|
| 50 |
+
"""Apply perspective warping."""
|
| 51 |
+
if max_ratio <= 0:
|
| 52 |
+
return img
|
| 53 |
+
w, h = img.size
|
| 54 |
+
dx = int(w * max_ratio)
|
| 55 |
+
dy = int(h * max_ratio * 0.7)
|
| 56 |
+
src = np.float32([[0,0],[w,0],[w,h],[0,h]])
|
| 57 |
+
dst = np.float32([[random.randint(0,dx), random.randint(0,dy)],
|
| 58 |
+
[w-random.randint(0,dx), random.randint(0,dy)],
|
| 59 |
+
[w-random.randint(0,dx), h-random.randint(0,dy)],
|
| 60 |
+
[random.randint(0,dx), h-random.randint(0,dy)]])
|
| 61 |
+
M = cv2.getPerspectiveTransform(src, dst)
|
| 62 |
+
arr = np.array(img.convert("RGB"))[:, :, ::-1] # to BGR
|
| 63 |
+
out = cv2.warpPerspective(arr, M, (w, h), borderMode=cv2.BORDER_REPLICATE)
|
| 64 |
+
return Image.fromarray(out[:, :, ::-1]) # back to RGB
|
| 65 |
+
|
| 66 |
+
def jpeg_recompress(img, qmin=70, qmax=95):
|
| 67 |
+
"""Recompress image to simulate JPEG artifacts."""
|
| 68 |
+
q = random.randint(qmin, qmax)
|
| 69 |
+
buf = io.BytesIO()
|
| 70 |
+
img.save(buf, format="JPEG", quality=q)
|
| 71 |
+
buf.seek(0)
|
| 72 |
+
return Image.open(buf).convert("RGB")
|
| 73 |
+
|
| 74 |
+
def add_noise_and_blur(img, noise_sigma=(0.0, 6.0), blur_sigma=(0.0, 0.8), motion_prob=0.1):
|
| 75 |
+
"""Add noise and blur effects."""
|
| 76 |
+
# Gaussian noise
|
| 77 |
+
s = random.uniform(*noise_sigma)
|
| 78 |
+
if s > 0.05:
|
| 79 |
+
arr = np.array(img).astype(np.float32)
|
| 80 |
+
arr += np.random.normal(0, s, arr.shape).astype(np.float32)
|
| 81 |
+
arr = np.clip(arr, 0, 255).astype(np.uint8)
|
| 82 |
+
img = Image.fromarray(arr)
|
| 83 |
+
|
| 84 |
+
# Blur
|
| 85 |
+
if random.random() < motion_prob:
|
| 86 |
+
# Simple directional blur
|
| 87 |
+
ksize = random.choice([3,5])
|
| 88 |
+
kernel = Image.new("L", (ksize, ksize), 0)
|
| 89 |
+
draw = ImageDraw.Draw(kernel)
|
| 90 |
+
draw.line((0, ksize//2, ksize-1, ksize//2), fill=255, width=1)
|
| 91 |
+
kernel = kernel.rotate(random.uniform(0, 180), resample=Image.BILINEAR)
|
| 92 |
+
kernel = np.array(kernel, dtype=np.float32)
|
| 93 |
+
kernel /= max(1, kernel.sum())
|
| 94 |
+
arr = np.array(img)
|
| 95 |
+
arr = cv2.filter2D(arr, -1, kernel)
|
| 96 |
+
img = Image.fromarray(arr)
|
| 97 |
+
else:
|
| 98 |
+
sigma = random.uniform(*blur_sigma)
|
| 99 |
+
if sigma > 0.05:
|
| 100 |
+
img = img.filter(ImageFilter.GaussianBlur(radius=sigma))
|
| 101 |
+
|
| 102 |
+
return img
|
| 103 |
+
|
| 104 |
+
def generate_captcha(text=None, width=IMG_WIDTH, height=IMG_HEIGHT, save_path=None):
|
| 105 |
+
"""
|
| 106 |
+
Generate a single enhanced CAPTCHA image.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
text (str, optional): Text to render. If None, generates random text.
|
| 110 |
+
width (int): Image width
|
| 111 |
+
height (int): Image height
|
| 112 |
+
save_path (str, optional): Path to save the image. If None, returns PIL Image.
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
PIL Image if save_path is None, otherwise saves and returns the path
|
| 116 |
+
"""
|
| 117 |
+
# Generate random text if none provided
|
| 118 |
+
if text is None:
|
| 119 |
+
text = ''.join(random.choices(CHARS, k=random.randint(CAPTCHA_LEN_LOWER_LIMIT, CAPTCHA_LEN_UPPER_LIMIT)))
|
| 120 |
+
|
| 121 |
+
# Randomize basic style
|
| 122 |
+
bg_choice = random.choice(["solid", "gradient"])
|
| 123 |
+
fg_color = rand_color(0, 80)
|
| 124 |
+
|
| 125 |
+
if bg_choice == "solid":
|
| 126 |
+
bg_color = rand_color(210, 255)
|
| 127 |
+
bg = Image.new("RGB", (width, height), color=bg_color)
|
| 128 |
+
else:
|
| 129 |
+
bg = gradient_bg(width, height)
|
| 130 |
+
|
| 131 |
+
# Adjust font sizes for larger dimensions
|
| 132 |
+
font_sizes = [int(height * 0.7), int(height * 0.75), int(height * 0.8), int(height * 0.85)]
|
| 133 |
+
font_size = random.choice(font_sizes)
|
| 134 |
+
|
| 135 |
+
# Use ImageCaptcha for base text rendering
|
| 136 |
+
from captcha.image import ImageCaptcha
|
| 137 |
+
image = ImageCaptcha(width=width, height=height, fonts=None, font_sizes=[font_size])
|
| 138 |
+
|
| 139 |
+
# Draw base image
|
| 140 |
+
base = Image.frombytes('RGB', (width, height), image.generate_image(text).tobytes())
|
| 141 |
+
|
| 142 |
+
# Apply enhancements
|
| 143 |
+
angle = random.uniform(-6, 6)
|
| 144 |
+
base = base.rotate(angle, resample=Image.BILINEAR, expand=False, fillcolor=bg.getpixel((0,0)))
|
| 145 |
+
|
| 146 |
+
# Perspective warp (very light)
|
| 147 |
+
if random.random() < 0.6:
|
| 148 |
+
base = perspective_warp(base, max_ratio=0.025)
|
| 149 |
+
|
| 150 |
+
# Add interference
|
| 151 |
+
base = add_interference(base, line_range=(0, 3), dot_range=(10, 60))
|
| 152 |
+
|
| 153 |
+
# Noise + blur + JPEG recompression
|
| 154 |
+
base = add_noise_and_blur(base, noise_sigma=(0.0, 5.0), blur_sigma=(0.0, 0.7), motion_prob=0.12)
|
| 155 |
+
base = jpeg_recompress(base, qmin=72, qmax=92)
|
| 156 |
+
|
| 157 |
+
# Optional low contrast
|
| 158 |
+
if random.random() < 0.2:
|
| 159 |
+
base = base.point(lambda p: int(p*0.95 + 6))
|
| 160 |
+
|
| 161 |
+
# Convert to grayscale if specified
|
| 162 |
+
if GRAYSCALE:
|
| 163 |
+
base = base.convert('L')
|
| 164 |
+
|
| 165 |
+
# Save or return
|
| 166 |
+
if save_path:
|
| 167 |
+
base.save(save_path)
|
| 168 |
+
return save_path
|
| 169 |
+
else:
|
| 170 |
+
return base
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
if __name__ == "__main__":
|
| 174 |
+
# Example usage
|
| 175 |
+
print("Generating sample CAPTCHAs...")
|
| 176 |
+
|
| 177 |
+
# Generate with specific text
|
| 178 |
+
img1 = generate_captcha("HELLO", save_path="sample_HELLO.png")
|
| 179 |
+
print(f"Generated: sample_HELLO.png")
|
| 180 |
+
|
| 181 |
+
print("Done! Check the generated images.")
|
train.py
CHANGED
|
@@ -50,7 +50,7 @@ def main():
|
|
| 50 |
scaler = torch.amp.GradScaler('cuda', enabled=False) # Disable AMP for stability
|
| 51 |
|
| 52 |
# Epoch-based training with scheduler
|
| 53 |
-
epochs =
|
| 54 |
scheduler = torch.optim.lr_scheduler.OneCycleLR(
|
| 55 |
optimizer, max_lr=3e-4, steps_per_epoch=len(train_dl), epochs=epochs
|
| 56 |
)
|
|
|
|
| 50 |
scaler = torch.amp.GradScaler('cuda', enabled=False) # Disable AMP for stability
|
| 51 |
|
| 52 |
# Epoch-based training with scheduler
|
| 53 |
+
epochs = 40 # Increased for OneCycleLR
|
| 54 |
scheduler = torch.optim.lr_scheduler.OneCycleLR(
|
| 55 |
optimizer, max_lr=3e-4, steps_per_epoch=len(train_dl), epochs=epochs
|
| 56 |
)
|