Spaces:
Running
Running
mohakkapoor4
Refactor .gitignore to specify checkpoint file types and exclude all but the best model. Update inference.py to use enhanced CAPTCHA generation and adjust dimensions. Increase training epochs in train.py for better model performance. Update training metrics and data generation logic in data.py for improved dataset handling and augmentation. Update config.py for dataset path consistency.
322be7d
| from captcha.image import ImageCaptcha | |
| import random, string, os, csv, io | |
| import pandas as pd | |
| from PIL import Image, ImageDraw, ImageFilter | |
| import numpy as np | |
| import cv2 | |
| # ===== your original config ===== | |
| DATASET_DIR = "Dataset/captchas" | |
| LABELS = "Dataset/labels.csv" | |
| NUM_IMAGES = 100000 | |
| CHARS = string.ascii_letters + string.digits | |
| CAPTCHA_LEN_LOWER_LIMIT = 5 | |
| CAPTCHA_LEN_UPPER_LIMIT = 7 | |
| directories = [["train",0.8],["val",0.1],["test",0.1]] | |
| # Match config.py dimensions | |
| IMG_WIDTH = 256 # W_max from config | |
| IMG_HEIGHT = 60 # H from config | |
| GRAYSCALE = True # grayscale from config | |
| # ----- minimal augment helpers ----- | |
| def rand_color(lo=0, hi=255): | |
| return tuple(random.randint(lo, hi) for _ in range(3)) | |
| def gradient_bg(w, h): | |
| top = rand_color(200, 255) | |
| bot = rand_color(200, 255) | |
| arr = np.zeros((h, w, 3), dtype=np.uint8) | |
| for y in range(h): | |
| t = y / max(1, h - 1) | |
| arr[y, :, :] = (np.array(top) * (1 - t) + np.array(bot) * t).astype(np.uint8) | |
| return Image.fromarray(arr) | |
| def add_interference(img, line_range=(0, 3), dot_range=(10, 80)): | |
| draw = ImageDraw.Draw(img) | |
| w, h = img.size | |
| for _ in range(random.randint(*line_range)): | |
| x1, y1 = random.randint(0, w-1), random.randint(0, h-1) | |
| x2, y2 = random.randint(0, w-1), random.randint(0, h-1) | |
| draw.line((x1, y1, x2, y2), fill=rand_color(50, 180), width=random.randint(1, 2)) | |
| for _ in range(random.randint(*dot_range)): | |
| x, y = random.randint(0, w-1), random.randint(0, h-1) | |
| r = random.choice([0, 1]) | |
| draw.ellipse((x-r, y-r, x+r, y+r), fill=rand_color(0, 200)) | |
| return img | |
| def perspective_warp(img, max_ratio=0.03): | |
| if max_ratio <= 0: | |
| return img | |
| w, h = img.size | |
| dx = int(w * max_ratio) | |
| dy = int(h * max_ratio * 0.7) | |
| src = np.float32([[0,0],[w,0],[w,h],[0,h]]) | |
| dst = np.float32([[random.randint(0,dx), random.randint(0,dy)], | |
| [w-random.randint(0,dx), random.randint(0,dy)], | |
| [w-random.randint(0,dx), h-random.randint(0,dy)], | |
| [random.randint(0,dx), h-random.randint(0,dy)]]) | |
| M = cv2.getPerspectiveTransform(src, dst) | |
| arr = np.array(img.convert("RGB"))[:, :, ::-1] # to BGR | |
| out = cv2.warpPerspective(arr, M, (w, h), borderMode=cv2.BORDER_REPLICATE) | |
| return Image.fromarray(out[:, :, ::-1]) # back to RGB | |
| def jpeg_recompress(img, qmin=70, qmax=95): | |
| q = random.randint(qmin, qmax) | |
| buf = io.BytesIO() | |
| img.save(buf, format="JPEG", quality=q) | |
| buf.seek(0) | |
| return Image.open(buf).convert("RGB") | |
| def add_noise_and_blur(img, noise_sigma=(0.0, 6.0), blur_sigma=(0.0, 0.8), motion_prob=0.1): | |
| # gaussian noise | |
| s = random.uniform(*noise_sigma) | |
| if s > 0.05: | |
| arr = np.array(img).astype(np.float32) | |
| arr += np.random.normal(0, s, arr.shape).astype(np.float32) | |
| arr = np.clip(arr, 0, 255).astype(np.uint8) | |
| img = Image.fromarray(arr) | |
| # blur | |
| if random.random() < motion_prob: | |
| # simple directional blur | |
| ksize = random.choice([3,5]) | |
| kernel = Image.new("L", (ksize, ksize), 0) | |
| draw = ImageDraw.Draw(kernel) | |
| draw.line((0, ksize//2, ksize-1, ksize//2), fill=255, width=1) | |
| kernel = kernel.rotate(random.uniform(0, 180), resample=Image.BILINEAR) | |
| kernel = np.array(kernel, dtype=np.float32) | |
| kernel /= max(1, kernel.sum()) | |
| import cv2 | |
| arr = np.array(img) | |
| arr = cv2.filter2D(arr, -1, kernel) | |
| img = Image.fromarray(arr) | |
| else: | |
| sigma = random.uniform(*blur_sigma) | |
| if sigma > 0.05: | |
| img = img.filter(ImageFilter.GaussianBlur(radius=sigma)) | |
| return img | |
| def render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT): | |
| # randomize basic style knobs | |
| bg_choice = random.choice(["solid", "gradient"]) | |
| fg_color = rand_color(0, 80) | |
| if bg_choice == "solid": | |
| bg_color = rand_color(210, 255) | |
| bg = Image.new("RGB", (width, height), color=bg_color) | |
| else: | |
| bg = gradient_bg(width, height) | |
| # Adjust font sizes for larger dimensions | |
| font_sizes = [int(height * 0.7), int(height * 0.75), int(height * 0.8), int(height * 0.85)] | |
| font_size = random.choice(font_sizes) | |
| # ImageCaptcha accepts fonts via fonts arg; here we keep default but jitter spacing | |
| image = ImageCaptcha(width=width, height=height, fonts=None, font_sizes=[font_size]) | |
| # draw base image | |
| base = Image.frombytes('RGB', (width, height), image.generate_image(text).tobytes()) | |
| # quick contrast tweak: recolor foreground by compositing text mask if needed | |
| # For minimal change, we stick with base and apply light warps/noise | |
| # mild rotation/shear | |
| angle = random.uniform(-6, 6) | |
| base = base.rotate(angle, resample=Image.BILINEAR, expand=False, fillcolor=bg.getpixel((0,0))) | |
| # perspective warp (very light) | |
| if random.random() < 0.6: | |
| base = perspective_warp(base, max_ratio=0.025) | |
| # draw interference over the image | |
| base = add_interference(base, line_range=(0, 3), dot_range=(10, 60)) | |
| # light noise + blur + jpeg recompress to add artifacts | |
| base = add_noise_and_blur(base, noise_sigma=(0.0, 5.0), blur_sigma=(0.0, 0.7), motion_prob=0.12) | |
| base = jpeg_recompress(base, qmin=72, qmax=92) | |
| # optional low contrast: 20% chance to darken bg and lighten fg a bit | |
| if random.random() < 0.2: | |
| base = base.point(lambda p: int(p*0.95 + 6)) | |
| # Convert to grayscale if specified | |
| if GRAYSCALE: | |
| base = base.convert('L') | |
| return base | |
| # Fix: Extract names and thresholds upfront | |
| train_name, val_name, test_name = directories[0][0], directories[1][0], directories[2][0] | |
| train_ratio, val_ratio, test_ratio = directories[0][1], directories[1][1], directories[2][1] | |
| # Calculate split thresholds | |
| n = NUM_IMAGES | |
| train_end = int(n * train_ratio) | |
| val_end = train_end + int(n * val_ratio) | |
| # Create directories once | |
| train_dir = os.path.join(DATASET_DIR, train_name) | |
| val_dir = os.path.join(DATASET_DIR, val_name) | |
| test_dir = os.path.join(DATASET_DIR, test_name) | |
| os.makedirs(DATASET_DIR, exist_ok=True) | |
| os.makedirs(train_dir, exist_ok=True) | |
| os.makedirs(val_dir, exist_ok=True) | |
| os.makedirs(test_dir, exist_ok=True) | |
| image = ImageCaptcha(width=160, height=60) # kept for compatibility if needed | |
| with open(LABELS, mode="w", newline="") as f: | |
| writer = csv.writer(f) | |
| writer.writerow(["filename","label"]) | |
| for i in range(NUM_IMAGES): | |
| if i % max(1, (NUM_IMAGES//100)) == 0: | |
| print(f"{i} images made") | |
| # Pick output directory based on thresholds | |
| if i < train_end: | |
| OUTPUT_DIR = train_dir | |
| elif i < val_end: | |
| OUTPUT_DIR = val_dir | |
| else: | |
| OUTPUT_DIR = test_dir | |
| text = ''.join(random.choices(CHARS, k=random.randint(CAPTCHA_LEN_LOWER_LIMIT, CAPTCHA_LEN_UPPER_LIMIT))) | |
| filename = f"{text}_{i}.png" | |
| filepath = os.path.join(OUTPUT_DIR, filename) | |
| # --- minimal change: replace image.write with our small variation renderer --- | |
| img = render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT) | |
| img.save(filepath) | |
| # ----------------------------------------- | |
| writer.writerow([filename, text]) | |
| print("Data Generated!") | |
| # Fixed split logic | |
| df = pd.read_csv(LABELS) | |
| n = len(df) | |
| train_end = int(n * train_ratio) | |
| val_end = train_end + int(n * val_ratio) | |
| df_train = df.iloc[:train_end] | |
| df_val = df.iloc[train_end:val_end] | |
| df_test = df.iloc[val_end:] | |
| df_train.to_csv(os.path.join(DATASET_DIR, f"{train_name}/labels.csv"), index=False) | |
| df_val.to_csv(os.path.join(DATASET_DIR, f"{val_name}/labels.csv"), index=False) | |
| df_test.to_csv(os.path.join(DATASET_DIR, f"{test_name}/labels.csv"), index=False) | |
| print("Labels Generated") | |