from captcha.image import ImageCaptcha import random, string, os, csv, io import pandas as pd from PIL import Image, ImageDraw, ImageFilter import numpy as np import cv2 # ===== your original config ===== DATASET_DIR = "Dataset/captchas" LABELS = "Dataset/labels.csv" NUM_IMAGES = 100000 CHARS = string.ascii_letters + string.digits CAPTCHA_LEN_LOWER_LIMIT = 5 CAPTCHA_LEN_UPPER_LIMIT = 7 directories = [["train",0.8],["val",0.1],["test",0.1]] # Match config.py dimensions IMG_WIDTH = 256 # W_max from config IMG_HEIGHT = 60 # H from config GRAYSCALE = True # grayscale from config # ----- minimal augment helpers ----- def rand_color(lo=0, hi=255): return tuple(random.randint(lo, hi) for _ in range(3)) def gradient_bg(w, h): top = rand_color(200, 255) bot = rand_color(200, 255) arr = np.zeros((h, w, 3), dtype=np.uint8) for y in range(h): t = y / max(1, h - 1) arr[y, :, :] = (np.array(top) * (1 - t) + np.array(bot) * t).astype(np.uint8) return Image.fromarray(arr) def add_interference(img, line_range=(0, 3), dot_range=(10, 80)): draw = ImageDraw.Draw(img) w, h = img.size for _ in range(random.randint(*line_range)): x1, y1 = random.randint(0, w-1), random.randint(0, h-1) x2, y2 = random.randint(0, w-1), random.randint(0, h-1) draw.line((x1, y1, x2, y2), fill=rand_color(50, 180), width=random.randint(1, 2)) for _ in range(random.randint(*dot_range)): x, y = random.randint(0, w-1), random.randint(0, h-1) r = random.choice([0, 1]) draw.ellipse((x-r, y-r, x+r, y+r), fill=rand_color(0, 200)) return img def perspective_warp(img, max_ratio=0.03): if max_ratio <= 0: return img w, h = img.size dx = int(w * max_ratio) dy = int(h * max_ratio * 0.7) src = np.float32([[0,0],[w,0],[w,h],[0,h]]) dst = np.float32([[random.randint(0,dx), random.randint(0,dy)], [w-random.randint(0,dx), random.randint(0,dy)], [w-random.randint(0,dx), h-random.randint(0,dy)], [random.randint(0,dx), h-random.randint(0,dy)]]) M = cv2.getPerspectiveTransform(src, dst) arr = np.array(img.convert("RGB"))[:, :, ::-1] # to BGR out = cv2.warpPerspective(arr, M, (w, h), borderMode=cv2.BORDER_REPLICATE) return Image.fromarray(out[:, :, ::-1]) # back to RGB def jpeg_recompress(img, qmin=70, qmax=95): q = random.randint(qmin, qmax) buf = io.BytesIO() img.save(buf, format="JPEG", quality=q) buf.seek(0) return Image.open(buf).convert("RGB") def add_noise_and_blur(img, noise_sigma=(0.0, 6.0), blur_sigma=(0.0, 0.8), motion_prob=0.1): # gaussian noise s = random.uniform(*noise_sigma) if s > 0.05: arr = np.array(img).astype(np.float32) arr += np.random.normal(0, s, arr.shape).astype(np.float32) arr = np.clip(arr, 0, 255).astype(np.uint8) img = Image.fromarray(arr) # blur if random.random() < motion_prob: # simple directional blur ksize = random.choice([3,5]) kernel = Image.new("L", (ksize, ksize), 0) draw = ImageDraw.Draw(kernel) draw.line((0, ksize//2, ksize-1, ksize//2), fill=255, width=1) kernel = kernel.rotate(random.uniform(0, 180), resample=Image.BILINEAR) kernel = np.array(kernel, dtype=np.float32) kernel /= max(1, kernel.sum()) import cv2 arr = np.array(img) arr = cv2.filter2D(arr, -1, kernel) img = Image.fromarray(arr) else: sigma = random.uniform(*blur_sigma) if sigma > 0.05: img = img.filter(ImageFilter.GaussianBlur(radius=sigma)) return img def render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT): # randomize basic style knobs bg_choice = random.choice(["solid", "gradient"]) fg_color = rand_color(0, 80) if bg_choice == "solid": bg_color = rand_color(210, 255) bg = Image.new("RGB", (width, height), color=bg_color) else: bg = gradient_bg(width, height) # Adjust font sizes for larger dimensions font_sizes = [int(height * 0.7), int(height * 0.75), int(height * 0.8), int(height * 0.85)] font_size = random.choice(font_sizes) # ImageCaptcha accepts fonts via fonts arg; here we keep default but jitter spacing image = ImageCaptcha(width=width, height=height, fonts=None, font_sizes=[font_size]) # draw base image base = Image.frombytes('RGB', (width, height), image.generate_image(text).tobytes()) # quick contrast tweak: recolor foreground by compositing text mask if needed # For minimal change, we stick with base and apply light warps/noise # mild rotation/shear angle = random.uniform(-6, 6) base = base.rotate(angle, resample=Image.BILINEAR, expand=False, fillcolor=bg.getpixel((0,0))) # perspective warp (very light) if random.random() < 0.6: base = perspective_warp(base, max_ratio=0.025) # draw interference over the image base = add_interference(base, line_range=(0, 3), dot_range=(10, 60)) # light noise + blur + jpeg recompress to add artifacts base = add_noise_and_blur(base, noise_sigma=(0.0, 5.0), blur_sigma=(0.0, 0.7), motion_prob=0.12) base = jpeg_recompress(base, qmin=72, qmax=92) # optional low contrast: 20% chance to darken bg and lighten fg a bit if random.random() < 0.2: base = base.point(lambda p: int(p*0.95 + 6)) # Convert to grayscale if specified if GRAYSCALE: base = base.convert('L') return base # Fix: Extract names and thresholds upfront train_name, val_name, test_name = directories[0][0], directories[1][0], directories[2][0] train_ratio, val_ratio, test_ratio = directories[0][1], directories[1][1], directories[2][1] # Calculate split thresholds n = NUM_IMAGES train_end = int(n * train_ratio) val_end = train_end + int(n * val_ratio) # Create directories once train_dir = os.path.join(DATASET_DIR, train_name) val_dir = os.path.join(DATASET_DIR, val_name) test_dir = os.path.join(DATASET_DIR, test_name) os.makedirs(DATASET_DIR, exist_ok=True) os.makedirs(train_dir, exist_ok=True) os.makedirs(val_dir, exist_ok=True) os.makedirs(test_dir, exist_ok=True) image = ImageCaptcha(width=160, height=60) # kept for compatibility if needed with open(LABELS, mode="w", newline="") as f: writer = csv.writer(f) writer.writerow(["filename","label"]) for i in range(NUM_IMAGES): if i % max(1, (NUM_IMAGES//100)) == 0: print(f"{i} images made") # Pick output directory based on thresholds if i < train_end: OUTPUT_DIR = train_dir elif i < val_end: OUTPUT_DIR = val_dir else: OUTPUT_DIR = test_dir text = ''.join(random.choices(CHARS, k=random.randint(CAPTCHA_LEN_LOWER_LIMIT, CAPTCHA_LEN_UPPER_LIMIT))) filename = f"{text}_{i}.png" filepath = os.path.join(OUTPUT_DIR, filename) # --- minimal change: replace image.write with our small variation renderer --- img = render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT) img.save(filepath) # ----------------------------------------- writer.writerow([filename, text]) print("Data Generated!") # Fixed split logic df = pd.read_csv(LABELS) n = len(df) train_end = int(n * train_ratio) val_end = train_end + int(n * val_ratio) df_train = df.iloc[:train_end] df_val = df.iloc[train_end:val_end] df_test = df.iloc[val_end:] df_train.to_csv(os.path.join(DATASET_DIR, f"{train_name}/labels.csv"), index=False) df_val.to_csv(os.path.join(DATASET_DIR, f"{val_name}/labels.csv"), index=False) df_test.to_csv(os.path.join(DATASET_DIR, f"{test_name}/labels.csv"), index=False) print("Labels Generated")