mohakkapoor4 commited on
Commit
322be7d
·
1 Parent(s): a1eb0d1

Refactor .gitignore to specify checkpoint file types and exclude all but the best model. Update inference.py to use enhanced CAPTCHA generation and adjust dimensions. Increase training epochs in train.py for better model performance. Update training metrics and data generation logic in data.py for improved dataset handling and augmentation. Update config.py for dataset path consistency.

Browse files
.gitignore CHANGED
@@ -80,14 +80,13 @@ desktop.ini
80
  !Metrics/*.jpg
81
 
82
  # Models and checkpoints
83
- checkpoints/
84
- *.ckpt
85
- *.onnx
86
- *.tflite
87
- *.pth
88
- *.pt
89
- *.bin
90
- *.safetensors
91
  runs/
92
  outputs/
93
  artifacts/
 
80
  !Metrics/*.jpg
81
 
82
  # Models and checkpoints
83
+ checkpoints/*.pth
84
+ checkpoints/*.pt
85
+ checkpoints/*.ckpt
86
+ checkpoints/*.onnx
87
+ checkpoints/*.bin
88
+ checkpoints/*.safetensors
89
+ !checkpoints/best_model.pth
 
90
  runs/
91
  outputs/
92
  artifacts/
Metrics/loss_comparison.png CHANGED

Git LFS Details

  • SHA256: 4e3a5a131f815aeff76e358f45fd9af95bef77d001ef8ba538451d0b3779e005
  • Pointer size: 131 Bytes
  • Size of remote file: 322 kB

Git LFS Details

  • SHA256: 222aef648ad7d0110be78f32efa160916039593db240d2a7020406846ae13412
  • Pointer size: 131 Bytes
  • Size of remote file: 333 kB
Metrics/training_losses.png CHANGED

Git LFS Details

  • SHA256: c74acd1702091eee23712df3b801b9d4c310959a389d2d567b11567c19280db9
  • Pointer size: 131 Bytes
  • Size of remote file: 112 kB

Git LFS Details

  • SHA256: bd9dc01e6f9fb72c032edf1cc52d43e2a77a5267f0448af0487de1948e12c261
  • Pointer size: 131 Bytes
  • Size of remote file: 112 kB
Metrics/training_metrics.txt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb9b2125cd77da83e51022b8de31388541a080c2f63d11e8b85cb6b34efe534
3
- size 822
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ed95b7a50649f8393e171a702b5e096adaa4d66712b3faf26c45931854dffb7
3
+ size 842
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import gradio as gr
4
+ from PIL import Image
5
+ import torch
6
+
7
+ # Import your inference module
8
+ import inference as inf
9
+ from src.generateCaptcha import generate_captcha
10
+ from src.config import cfg # sizes, charset, dirs
11
+
12
+ # Device and one-time model load
13
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ MODEL = inf.load_model("checkpoints/best_model.pth").to(DEVICE).eval()
15
+
16
+ # Ensure results dir exists
17
+ os.makedirs(cfg.RESULT_DIR, exist_ok=True)
18
+
19
+ def random_text():
20
+ L = random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)
21
+ return "".join(random.choices(cfg.chars, k=L))
22
+
23
+ def ui_generate():
24
+ text = random_text()
25
+ filename = f"{text}_{random.randint(1000,9999)}.png"
26
+ # Use generateCaptcha.py directly
27
+ img = generate_captcha(text, width=cfg.W_max, height=cfg.H)
28
+
29
+ # Save to results directory
30
+ filepath = os.path.join(cfg.RESULT_DIR, filename)
31
+ img.save(filepath)
32
+
33
+ return img, text, filepath
34
+
35
+ def ui_solve(img: Image.Image, path_hint: str):
36
+ # Prefer uploaded image
37
+ if img is not None:
38
+ tmp_path = os.path.join(cfg.RESULT_DIR, f"upload_{random.randint(1000,9999)}.png")
39
+ img.save(tmp_path)
40
+ tensor = inf.preprocess_image(tmp_path, (cfg.W_max, cfg.H))
41
+ pred = inf.predict_captcha(MODEL, tensor, DEVICE)
42
+ return pred
43
+ # Otherwise, solve the last generated image
44
+ if path_hint and os.path.exists(path_hint):
45
+ tensor = inf.preprocess_image(path_hint, (cfg.W_max, cfg.H))
46
+ pred = inf.predict_captcha(MODEL, tensor, DEVICE)
47
+ return pred
48
+ return "No image provided. Generate or upload first."
49
+
50
+ with gr.Blocks(title="CAPTCHA OCR (checkpoint)") as demo:
51
+ gr.Markdown("## CAPTCHA OCR demo")
52
+
53
+ with gr.Row():
54
+ gen_btn = gr.Button("Generate CAPTCHA", variant="primary")
55
+ gt_out = gr.Textbox(label="Ground Truth", interactive=False)
56
+
57
+ with gr.Row():
58
+ img_out = gr.Image(label="Generated CAPTCHA", type="pil")
59
+ path_box = gr.Textbox(label="Internal Path", interactive=False, visible=False)
60
+
61
+ gen_btn.click(fn=ui_generate, outputs=[img_out, gt_out, path_box])
62
+
63
+ gr.Markdown("### Solve")
64
+ with gr.Row():
65
+ img_in = gr.Image(label="Upload CAPTCHA (optional)", type="pil")
66
+ solve_btn = gr.Button("Solve")
67
+ pred_out = gr.Textbox(label="Prediction", interactive=False)
68
+
69
+ solve_btn.click(fn=ui_solve, inputs=[img_in, path_box], outputs=[pred_out])
70
+
71
+ if __name__ == "__main__":
72
+ demo.launch()
checkpoints/best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88e646907eb2ca7a43d87a5cd251be9c7d2b79f98e30d5ca9e53b8ae93e6045d
3
+ size 48371934
inference.py CHANGED
@@ -7,7 +7,7 @@ from src.config import cfg
7
  from src.model_crnn import CRNN
8
  from src.vocab import ctc_greedy_decode, vocab_size
9
  from src.plotting import TrainingMetrics
10
- from captcha.image import ImageCaptcha
11
 
12
  def load_model(checkpoint_path="checkpoints/best_model.pth"):
13
  """Load the trained model from checkpoint."""
@@ -69,12 +69,17 @@ def predict_captcha(model, image_tensor, device):
69
 
70
  return prediction[0] if prediction else ""
71
 
72
- def generate_test_captcha(text, filename, width=160, height=60):
73
- """Generate a test CAPTCHA image."""
74
- image = ImageCaptcha(width=width, height=height)
 
 
 
 
 
75
  filepath = os.path.join(cfg.RESULT_DIR, filename)
76
- image.write(text, filepath)
77
- print(f"Generated test CAPTCHA: {filename}")
78
  return filepath
79
 
80
  def main():
@@ -92,7 +97,7 @@ def main():
92
  print("Model loaded successfully!")
93
 
94
  # Generate test CAPTCHAs
95
- print("\nGenerating test CAPTCHAs...")
96
  test_cases = []
97
 
98
  for i in range(4):
@@ -100,8 +105,8 @@ def main():
100
  text = ''.join(random.choices(cfg.chars, k=random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)))
101
  filename = f"{text}_{i}.png"
102
 
103
- # Generate image
104
- image_path = generate_test_captcha(text, filename)
105
  test_cases.append((text, image_path, "")) # Add empty prediction slot
106
 
107
  # Run inference
@@ -151,7 +156,7 @@ def main():
151
  correct_chars += 1
152
 
153
  char_accuracy = (correct_chars / total_chars) * 100 if total_chars > 0 else 0
154
- print(f"🔤 Character Accuracy: {correct_chars}/{total_chars} ({char_accuracy:.1f}%)")
155
 
156
  if accuracy >= 80:
157
  print("Excellent performance!")
 
7
  from src.model_crnn import CRNN
8
  from src.vocab import ctc_greedy_decode, vocab_size
9
  from src.plotting import TrainingMetrics
10
+ from src.generateCaptcha import generate_captcha
11
 
12
  def load_model(checkpoint_path="checkpoints/best_model.pth"):
13
  """Load the trained model from checkpoint."""
 
69
 
70
  return prediction[0] if prediction else ""
71
 
72
+ def generate_test_captcha(text, filename, width=256, height=60):
73
+ """Generate a test CAPTCHA image using enhanced generation."""
74
+ # Use the enhanced CAPTCHA generation from generateCaptcha.py
75
+ img = generate_captcha(text, width=width, height=height)
76
+
77
+ # Ensure results directory exists
78
+ os.makedirs(cfg.RESULT_DIR, exist_ok=True)
79
+
80
  filepath = os.path.join(cfg.RESULT_DIR, filename)
81
+ img.save(filepath)
82
+ print(f"Generated enhanced test CAPTCHA: {filename}")
83
  return filepath
84
 
85
  def main():
 
97
  print("Model loaded successfully!")
98
 
99
  # Generate test CAPTCHAs
100
+ print("\nGenerating enhanced test CAPTCHAs...")
101
  test_cases = []
102
 
103
  for i in range(4):
 
105
  text = ''.join(random.choices(cfg.chars, k=random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)))
106
  filename = f"{text}_{i}.png"
107
 
108
+ # Generate enhanced image
109
+ image_path = generate_test_captcha(text, filename, width=cfg.W_max, height=cfg.H)
110
  test_cases.append((text, image_path, "")) # Add empty prediction slot
111
 
112
  # Run inference
 
156
  correct_chars += 1
157
 
158
  char_accuracy = (correct_chars / total_chars) * 100 if total_chars > 0 else 0
159
+ print(f"Character Accuracy: {correct_chars}/{total_chars} ({char_accuracy:.1f}%)")
160
 
161
  if accuracy >= 80:
162
  print("Excellent performance!")
src/config.py CHANGED
@@ -4,7 +4,7 @@ from dataclasses import dataclass
4
 
5
  @dataclass
6
  class Config:
7
- data_root: str = os.getenv("DATA_ROOT","Dataset_test\captchas")
8
 
9
  chars: str = string.ascii_letters + string.digits
10
  CAPTCHA_LEN_LOWER_LIMIT: int = 5
 
4
 
5
  @dataclass
6
  class Config:
7
+ data_root: str = os.getenv("DATA_ROOT","Dataset\captchas")
8
 
9
  chars: str = string.ascii_letters + string.digits
10
  CAPTCHA_LEN_LOWER_LIMIT: int = 5
src/data.py CHANGED
@@ -1,63 +1,212 @@
1
  from captcha.image import ImageCaptcha
2
- import random
3
- import string
4
- import os
5
- import csv
6
  import pandas as pd
 
 
 
7
 
8
- # config
9
- DATASET_DIR = "Dataset_test/captchas"
10
- LABELS = "Dataset_test/labels.csv"
11
- NUM_IMAGES = 10000
12
  CHARS = string.ascii_letters + string.digits
13
  CAPTCHA_LEN_LOWER_LIMIT = 5
14
  CAPTCHA_LEN_UPPER_LIMIT = 7
15
  directories = [["train",0.8],["val",0.1],["test",0.1]]
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  os.makedirs(DATASET_DIR, exist_ok=True)
18
- image = ImageCaptcha(width=160, height=60)
 
 
19
 
 
20
 
21
- with open(LABELS,mode="w",newline="") as f:
22
  writer = csv.writer(f)
23
  writer.writerow(["filename","label"])
24
- OUTPUT_DIR = os.path.join(DATASET_DIR,directories[0][0])
25
- os.makedirs(OUTPUT_DIR,exist_ok=True)
26
  for i in range(NUM_IMAGES):
27
- if i%(NUM_IMAGES/100) ==0:
28
  print(f"{i} images made")
29
- if i>(0.8*NUM_IMAGES-1) and i<(0.9*NUM_IMAGES):
30
- OUTPUT_DIR = os.path.join(DATASET_DIR,directories[1][0])
31
- os.makedirs(OUTPUT_DIR,exist_ok=True)
32
- elif i>(0.9*NUM_IMAGES-1):
33
-
34
- OUTPUT_DIR = os.path.join(DATASET_DIR,directories[2][0])
35
- os.makedirs(OUTPUT_DIR,exist_ok=True)
36
- text = ''.join(random.choices(CHARS, k=random.randint(CAPTCHA_LEN_LOWER_LIMIT,CAPTCHA_LEN_UPPER_LIMIT)))
 
 
37
  filename = f"{text}_{i}.png"
38
  filepath = os.path.join(OUTPUT_DIR, filename)
39
- image.write(text, filepath)
40
- writer.writerow([filename,text])
41
 
42
- print("Data Generated!")
 
 
 
43
 
 
44
 
45
- df = pd.read_csv(LABELS)
46
 
 
 
47
  n = len(df)
48
- train_end = int(n * directories[0][1])
49
- val_end = train_end + int(n * directories[2][1])
50
 
51
- # Split datasets
52
  df_train = df.iloc[:train_end]
53
  df_val = df.iloc[train_end:val_end]
54
  df_test = df.iloc[val_end:]
55
 
56
- # Save
57
- df_train.to_csv(os.path.join(DATASET_DIR,"train/labels.csv"), index=False)
58
- df_val.to_csv(os.path.join(DATASET_DIR,"val/labels.csv"), index=False)
59
- df_test.to_csv(os.path.join(DATASET_DIR,"test/labels.csv"), index=False)
60
-
61
  print("Labels Generated")
62
-
63
-
 
1
  from captcha.image import ImageCaptcha
2
+ import random, string, os, csv, io
 
 
 
3
  import pandas as pd
4
+ from PIL import Image, ImageDraw, ImageFilter
5
+ import numpy as np
6
+ import cv2
7
 
8
+ # ===== your original config =====
9
+ DATASET_DIR = "Dataset/captchas"
10
+ LABELS = "Dataset/labels.csv"
11
+ NUM_IMAGES = 100000
12
  CHARS = string.ascii_letters + string.digits
13
  CAPTCHA_LEN_LOWER_LIMIT = 5
14
  CAPTCHA_LEN_UPPER_LIMIT = 7
15
  directories = [["train",0.8],["val",0.1],["test",0.1]]
16
 
17
+ # Match config.py dimensions
18
+ IMG_WIDTH = 256 # W_max from config
19
+ IMG_HEIGHT = 60 # H from config
20
+ GRAYSCALE = True # grayscale from config
21
+
22
+
23
+ # ----- minimal augment helpers -----
24
+ def rand_color(lo=0, hi=255):
25
+ return tuple(random.randint(lo, hi) for _ in range(3))
26
+
27
+ def gradient_bg(w, h):
28
+ top = rand_color(200, 255)
29
+ bot = rand_color(200, 255)
30
+ arr = np.zeros((h, w, 3), dtype=np.uint8)
31
+ for y in range(h):
32
+ t = y / max(1, h - 1)
33
+ arr[y, :, :] = (np.array(top) * (1 - t) + np.array(bot) * t).astype(np.uint8)
34
+ return Image.fromarray(arr)
35
+
36
+ def add_interference(img, line_range=(0, 3), dot_range=(10, 80)):
37
+ draw = ImageDraw.Draw(img)
38
+ w, h = img.size
39
+ for _ in range(random.randint(*line_range)):
40
+ x1, y1 = random.randint(0, w-1), random.randint(0, h-1)
41
+ x2, y2 = random.randint(0, w-1), random.randint(0, h-1)
42
+ draw.line((x1, y1, x2, y2), fill=rand_color(50, 180), width=random.randint(1, 2))
43
+ for _ in range(random.randint(*dot_range)):
44
+ x, y = random.randint(0, w-1), random.randint(0, h-1)
45
+ r = random.choice([0, 1])
46
+ draw.ellipse((x-r, y-r, x+r, y+r), fill=rand_color(0, 200))
47
+ return img
48
+
49
+ def perspective_warp(img, max_ratio=0.03):
50
+ if max_ratio <= 0:
51
+ return img
52
+ w, h = img.size
53
+ dx = int(w * max_ratio)
54
+ dy = int(h * max_ratio * 0.7)
55
+ src = np.float32([[0,0],[w,0],[w,h],[0,h]])
56
+ dst = np.float32([[random.randint(0,dx), random.randint(0,dy)],
57
+ [w-random.randint(0,dx), random.randint(0,dy)],
58
+ [w-random.randint(0,dx), h-random.randint(0,dy)],
59
+ [random.randint(0,dx), h-random.randint(0,dy)]])
60
+ M = cv2.getPerspectiveTransform(src, dst)
61
+ arr = np.array(img.convert("RGB"))[:, :, ::-1] # to BGR
62
+ out = cv2.warpPerspective(arr, M, (w, h), borderMode=cv2.BORDER_REPLICATE)
63
+ return Image.fromarray(out[:, :, ::-1]) # back to RGB
64
+
65
+ def jpeg_recompress(img, qmin=70, qmax=95):
66
+ q = random.randint(qmin, qmax)
67
+ buf = io.BytesIO()
68
+ img.save(buf, format="JPEG", quality=q)
69
+ buf.seek(0)
70
+ return Image.open(buf).convert("RGB")
71
+
72
+ def add_noise_and_blur(img, noise_sigma=(0.0, 6.0), blur_sigma=(0.0, 0.8), motion_prob=0.1):
73
+ # gaussian noise
74
+ s = random.uniform(*noise_sigma)
75
+ if s > 0.05:
76
+ arr = np.array(img).astype(np.float32)
77
+ arr += np.random.normal(0, s, arr.shape).astype(np.float32)
78
+ arr = np.clip(arr, 0, 255).astype(np.uint8)
79
+ img = Image.fromarray(arr)
80
+ # blur
81
+ if random.random() < motion_prob:
82
+ # simple directional blur
83
+ ksize = random.choice([3,5])
84
+ kernel = Image.new("L", (ksize, ksize), 0)
85
+ draw = ImageDraw.Draw(kernel)
86
+ draw.line((0, ksize//2, ksize-1, ksize//2), fill=255, width=1)
87
+ kernel = kernel.rotate(random.uniform(0, 180), resample=Image.BILINEAR)
88
+ kernel = np.array(kernel, dtype=np.float32)
89
+ kernel /= max(1, kernel.sum())
90
+ import cv2
91
+ arr = np.array(img)
92
+ arr = cv2.filter2D(arr, -1, kernel)
93
+ img = Image.fromarray(arr)
94
+ else:
95
+ sigma = random.uniform(*blur_sigma)
96
+ if sigma > 0.05:
97
+ img = img.filter(ImageFilter.GaussianBlur(radius=sigma))
98
+ return img
99
+
100
+ def render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT):
101
+ # randomize basic style knobs
102
+ bg_choice = random.choice(["solid", "gradient"])
103
+ fg_color = rand_color(0, 80)
104
+ if bg_choice == "solid":
105
+ bg_color = rand_color(210, 255)
106
+ bg = Image.new("RGB", (width, height), color=bg_color)
107
+ else:
108
+ bg = gradient_bg(width, height)
109
+
110
+ # Adjust font sizes for larger dimensions
111
+ font_sizes = [int(height * 0.7), int(height * 0.75), int(height * 0.8), int(height * 0.85)]
112
+ font_size = random.choice(font_sizes)
113
+
114
+ # ImageCaptcha accepts fonts via fonts arg; here we keep default but jitter spacing
115
+ image = ImageCaptcha(width=width, height=height, fonts=None, font_sizes=[font_size])
116
+
117
+ # draw base image
118
+ base = Image.frombytes('RGB', (width, height), image.generate_image(text).tobytes())
119
+
120
+ # quick contrast tweak: recolor foreground by compositing text mask if needed
121
+ # For minimal change, we stick with base and apply light warps/noise
122
+ # mild rotation/shear
123
+ angle = random.uniform(-6, 6)
124
+ base = base.rotate(angle, resample=Image.BILINEAR, expand=False, fillcolor=bg.getpixel((0,0)))
125
+
126
+ # perspective warp (very light)
127
+ if random.random() < 0.6:
128
+ base = perspective_warp(base, max_ratio=0.025)
129
+
130
+ # draw interference over the image
131
+ base = add_interference(base, line_range=(0, 3), dot_range=(10, 60))
132
+
133
+ # light noise + blur + jpeg recompress to add artifacts
134
+ base = add_noise_and_blur(base, noise_sigma=(0.0, 5.0), blur_sigma=(0.0, 0.7), motion_prob=0.12)
135
+ base = jpeg_recompress(base, qmin=72, qmax=92)
136
+
137
+ # optional low contrast: 20% chance to darken bg and lighten fg a bit
138
+ if random.random() < 0.2:
139
+ base = base.point(lambda p: int(p*0.95 + 6))
140
+
141
+ # Convert to grayscale if specified
142
+ if GRAYSCALE:
143
+ base = base.convert('L')
144
+
145
+ return base
146
+
147
+
148
+
149
+ # Fix: Extract names and thresholds upfront
150
+ train_name, val_name, test_name = directories[0][0], directories[1][0], directories[2][0]
151
+ train_ratio, val_ratio, test_ratio = directories[0][1], directories[1][1], directories[2][1]
152
+
153
+ # Calculate split thresholds
154
+ n = NUM_IMAGES
155
+ train_end = int(n * train_ratio)
156
+ val_end = train_end + int(n * val_ratio)
157
+
158
+ # Create directories once
159
+ train_dir = os.path.join(DATASET_DIR, train_name)
160
+ val_dir = os.path.join(DATASET_DIR, val_name)
161
+ test_dir = os.path.join(DATASET_DIR, test_name)
162
+
163
  os.makedirs(DATASET_DIR, exist_ok=True)
164
+ os.makedirs(train_dir, exist_ok=True)
165
+ os.makedirs(val_dir, exist_ok=True)
166
+ os.makedirs(test_dir, exist_ok=True)
167
 
168
+ image = ImageCaptcha(width=160, height=60) # kept for compatibility if needed
169
 
170
+ with open(LABELS, mode="w", newline="") as f:
171
  writer = csv.writer(f)
172
  writer.writerow(["filename","label"])
173
+
 
174
  for i in range(NUM_IMAGES):
175
+ if i % max(1, (NUM_IMAGES//100)) == 0:
176
  print(f"{i} images made")
177
+
178
+ # Pick output directory based on thresholds
179
+ if i < train_end:
180
+ OUTPUT_DIR = train_dir
181
+ elif i < val_end:
182
+ OUTPUT_DIR = val_dir
183
+ else:
184
+ OUTPUT_DIR = test_dir
185
+
186
+ text = ''.join(random.choices(CHARS, k=random.randint(CAPTCHA_LEN_LOWER_LIMIT, CAPTCHA_LEN_UPPER_LIMIT)))
187
  filename = f"{text}_{i}.png"
188
  filepath = os.path.join(OUTPUT_DIR, filename)
 
 
189
 
190
+ # --- minimal change: replace image.write with our small variation renderer ---
191
+ img = render_with_variation(text, width=IMG_WIDTH, height=IMG_HEIGHT)
192
+ img.save(filepath)
193
+ # -----------------------------------------
194
 
195
+ writer.writerow([filename, text])
196
 
197
+ print("Data Generated!")
198
 
199
+ # Fixed split logic
200
+ df = pd.read_csv(LABELS)
201
  n = len(df)
202
+ train_end = int(n * train_ratio)
203
+ val_end = train_end + int(n * val_ratio)
204
 
 
205
  df_train = df.iloc[:train_end]
206
  df_val = df.iloc[train_end:val_end]
207
  df_test = df.iloc[val_end:]
208
 
209
+ df_train.to_csv(os.path.join(DATASET_DIR, f"{train_name}/labels.csv"), index=False)
210
+ df_val.to_csv(os.path.join(DATASET_DIR, f"{val_name}/labels.csv"), index=False)
211
+ df_test.to_csv(os.path.join(DATASET_DIR, f"{test_name}/labels.csv"), index=False)
 
 
212
  print("Labels Generated")
 
 
src/generateCaptcha.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple CAPTCHA Generation Utility
3
+ Generates individual CAPTCHA images using enhanced rendering
4
+ """
5
+
6
+ import random
7
+ import string
8
+ from PIL import Image, ImageDraw, ImageFilter
9
+ import numpy as np
10
+ import cv2
11
+ import io
12
+
13
+ # Configuration - match your training setup
14
+ IMG_WIDTH = 256
15
+ IMG_HEIGHT = 60
16
+ GRAYSCALE = True
17
+ CHARS = string.ascii_letters + string.digits
18
+ CAPTCHA_LEN_LOWER_LIMIT = 5
19
+ CAPTCHA_LEN_UPPER_LIMIT = 7
20
+
21
+ def rand_color(lo=0, hi=255):
22
+ """Generate random RGB color."""
23
+ return tuple(random.randint(lo, hi) for _ in range(3))
24
+
25
+ def gradient_bg(w, h):
26
+ """Create gradient background."""
27
+ top = rand_color(200, 255)
28
+ bot = rand_color(200, 255)
29
+ arr = np.zeros((h, w, 3), dtype=np.uint8)
30
+ for y in range(h):
31
+ t = y / max(1, h - 1)
32
+ arr[y, :, :] = (np.array(top) * (1 - t) + np.array(bot) * t).astype(np.uint8)
33
+ return Image.fromarray(arr)
34
+
35
+ def add_interference(img, line_range=(0, 3), dot_range=(10, 80)):
36
+ """Add interference patterns (lines and dots)."""
37
+ draw = ImageDraw.Draw(img)
38
+ w, h = img.size
39
+ for _ in range(random.randint(*line_range)):
40
+ x1, y1 = random.randint(0, w-1), random.randint(0, h-1)
41
+ x2, y2 = random.randint(0, w-1), random.randint(0, h-1)
42
+ draw.line((x1, y1, x2, y2), fill=rand_color(50, 180), width=random.randint(1, 2))
43
+ for _ in range(random.randint(*dot_range)):
44
+ x, y = random.randint(0, w-1), random.randint(0, h-1)
45
+ r = random.choice([0, 1])
46
+ draw.ellipse((x-r, y-r, x+r, y+r), fill=rand_color(0, 200))
47
+ return img
48
+
49
+ def perspective_warp(img, max_ratio=0.03):
50
+ """Apply perspective warping."""
51
+ if max_ratio <= 0:
52
+ return img
53
+ w, h = img.size
54
+ dx = int(w * max_ratio)
55
+ dy = int(h * max_ratio * 0.7)
56
+ src = np.float32([[0,0],[w,0],[w,h],[0,h]])
57
+ dst = np.float32([[random.randint(0,dx), random.randint(0,dy)],
58
+ [w-random.randint(0,dx), random.randint(0,dy)],
59
+ [w-random.randint(0,dx), h-random.randint(0,dy)],
60
+ [random.randint(0,dx), h-random.randint(0,dy)]])
61
+ M = cv2.getPerspectiveTransform(src, dst)
62
+ arr = np.array(img.convert("RGB"))[:, :, ::-1] # to BGR
63
+ out = cv2.warpPerspective(arr, M, (w, h), borderMode=cv2.BORDER_REPLICATE)
64
+ return Image.fromarray(out[:, :, ::-1]) # back to RGB
65
+
66
+ def jpeg_recompress(img, qmin=70, qmax=95):
67
+ """Recompress image to simulate JPEG artifacts."""
68
+ q = random.randint(qmin, qmax)
69
+ buf = io.BytesIO()
70
+ img.save(buf, format="JPEG", quality=q)
71
+ buf.seek(0)
72
+ return Image.open(buf).convert("RGB")
73
+
74
+ def add_noise_and_blur(img, noise_sigma=(0.0, 6.0), blur_sigma=(0.0, 0.8), motion_prob=0.1):
75
+ """Add noise and blur effects."""
76
+ # Gaussian noise
77
+ s = random.uniform(*noise_sigma)
78
+ if s > 0.05:
79
+ arr = np.array(img).astype(np.float32)
80
+ arr += np.random.normal(0, s, arr.shape).astype(np.float32)
81
+ arr = np.clip(arr, 0, 255).astype(np.uint8)
82
+ img = Image.fromarray(arr)
83
+
84
+ # Blur
85
+ if random.random() < motion_prob:
86
+ # Simple directional blur
87
+ ksize = random.choice([3,5])
88
+ kernel = Image.new("L", (ksize, ksize), 0)
89
+ draw = ImageDraw.Draw(kernel)
90
+ draw.line((0, ksize//2, ksize-1, ksize//2), fill=255, width=1)
91
+ kernel = kernel.rotate(random.uniform(0, 180), resample=Image.BILINEAR)
92
+ kernel = np.array(kernel, dtype=np.float32)
93
+ kernel /= max(1, kernel.sum())
94
+ arr = np.array(img)
95
+ arr = cv2.filter2D(arr, -1, kernel)
96
+ img = Image.fromarray(arr)
97
+ else:
98
+ sigma = random.uniform(*blur_sigma)
99
+ if sigma > 0.05:
100
+ img = img.filter(ImageFilter.GaussianBlur(radius=sigma))
101
+
102
+ return img
103
+
104
+ def generate_captcha(text=None, width=IMG_WIDTH, height=IMG_HEIGHT, save_path=None):
105
+ """
106
+ Generate a single enhanced CAPTCHA image.
107
+
108
+ Args:
109
+ text (str, optional): Text to render. If None, generates random text.
110
+ width (int): Image width
111
+ height (int): Image height
112
+ save_path (str, optional): Path to save the image. If None, returns PIL Image.
113
+
114
+ Returns:
115
+ PIL Image if save_path is None, otherwise saves and returns the path
116
+ """
117
+ # Generate random text if none provided
118
+ if text is None:
119
+ text = ''.join(random.choices(CHARS, k=random.randint(CAPTCHA_LEN_LOWER_LIMIT, CAPTCHA_LEN_UPPER_LIMIT)))
120
+
121
+ # Randomize basic style
122
+ bg_choice = random.choice(["solid", "gradient"])
123
+ fg_color = rand_color(0, 80)
124
+
125
+ if bg_choice == "solid":
126
+ bg_color = rand_color(210, 255)
127
+ bg = Image.new("RGB", (width, height), color=bg_color)
128
+ else:
129
+ bg = gradient_bg(width, height)
130
+
131
+ # Adjust font sizes for larger dimensions
132
+ font_sizes = [int(height * 0.7), int(height * 0.75), int(height * 0.8), int(height * 0.85)]
133
+ font_size = random.choice(font_sizes)
134
+
135
+ # Use ImageCaptcha for base text rendering
136
+ from captcha.image import ImageCaptcha
137
+ image = ImageCaptcha(width=width, height=height, fonts=None, font_sizes=[font_size])
138
+
139
+ # Draw base image
140
+ base = Image.frombytes('RGB', (width, height), image.generate_image(text).tobytes())
141
+
142
+ # Apply enhancements
143
+ angle = random.uniform(-6, 6)
144
+ base = base.rotate(angle, resample=Image.BILINEAR, expand=False, fillcolor=bg.getpixel((0,0)))
145
+
146
+ # Perspective warp (very light)
147
+ if random.random() < 0.6:
148
+ base = perspective_warp(base, max_ratio=0.025)
149
+
150
+ # Add interference
151
+ base = add_interference(base, line_range=(0, 3), dot_range=(10, 60))
152
+
153
+ # Noise + blur + JPEG recompression
154
+ base = add_noise_and_blur(base, noise_sigma=(0.0, 5.0), blur_sigma=(0.0, 0.7), motion_prob=0.12)
155
+ base = jpeg_recompress(base, qmin=72, qmax=92)
156
+
157
+ # Optional low contrast
158
+ if random.random() < 0.2:
159
+ base = base.point(lambda p: int(p*0.95 + 6))
160
+
161
+ # Convert to grayscale if specified
162
+ if GRAYSCALE:
163
+ base = base.convert('L')
164
+
165
+ # Save or return
166
+ if save_path:
167
+ base.save(save_path)
168
+ return save_path
169
+ else:
170
+ return base
171
+
172
+
173
+ if __name__ == "__main__":
174
+ # Example usage
175
+ print("Generating sample CAPTCHAs...")
176
+
177
+ # Generate with specific text
178
+ img1 = generate_captcha("HELLO", save_path="sample_HELLO.png")
179
+ print(f"Generated: sample_HELLO.png")
180
+
181
+ print("Done! Check the generated images.")
train.py CHANGED
@@ -50,7 +50,7 @@ def main():
50
  scaler = torch.amp.GradScaler('cuda', enabled=False) # Disable AMP for stability
51
 
52
  # Epoch-based training with scheduler
53
- epochs = 20 # Increased for OneCycleLR
54
  scheduler = torch.optim.lr_scheduler.OneCycleLR(
55
  optimizer, max_lr=3e-4, steps_per_epoch=len(train_dl), epochs=epochs
56
  )
 
50
  scaler = torch.amp.GradScaler('cuda', enabled=False) # Disable AMP for stability
51
 
52
  # Epoch-based training with scheduler
53
+ epochs = 40 # Increased for OneCycleLR
54
  scheduler = torch.optim.lr_scheduler.OneCycleLR(
55
  optimizer, max_lr=3e-4, steps_per_epoch=len(train_dl), epochs=epochs
56
  )