Spaces:
Paused
Paused
| """ | |
| Complete CAPTCHA Solver - Training + Inference + Gradio Interface | |
| All-in-one file untuk Hugging Face Spaces | |
| Usage: | |
| 1. Training: python app.py --train | |
| 2. Inference: python app.py | |
| """ | |
| import os | |
| import sys | |
| import random | |
| import string | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont, ImageFilter | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from torch.utils.data import Dataset, DataLoader | |
| from torchvision import transforms | |
| from tqdm import tqdm | |
| import gradio as gr | |
| # ============= 1. ADVANCED CAPTCHA GENERATOR ============= | |
| class AdvancedCaptchaGenerator: | |
| def __init__(self, width=280, height=80, length=5): | |
| self.width = width | |
| self.height = height | |
| self.length = length | |
| self.characters = string.digits | |
| def add_noise_dots(self, image): | |
| draw = ImageDraw.Draw(image) | |
| # Random dots | |
| for _ in range(random.randint(20, 40)): | |
| x = random.randint(0, self.width) | |
| y = random.randint(0, self.height) | |
| size = random.randint(2, 8) | |
| color = (random.randint(50, 150), random.randint(50, 150), random.randint(50, 150)) | |
| draw.ellipse([x, y, x+size, y+size], fill=color) | |
| # Plus signs | |
| for _ in range(random.randint(5, 15)): | |
| x = random.randint(0, self.width) | |
| y = random.randint(0, self.height) | |
| size = random.randint(3, 8) | |
| color = (random.randint(50, 120), random.randint(50, 120), random.randint(50, 120)) | |
| draw.line([x-size, y, x+size, y], fill=color, width=2) | |
| draw.line([x, y-size, x, y+size], fill=color, width=2) | |
| return image | |
| def add_wavy_pattern(self, image): | |
| draw = ImageDraw.Draw(image) | |
| for _ in range(random.randint(2, 4)): | |
| points = [] | |
| start_y = random.randint(10, self.height - 10) | |
| for x in range(0, self.width, 5): | |
| y = start_y + random.randint(-15, 15) * np.sin(x / 20) | |
| points.append((x, y)) | |
| for i in range(len(points) - 1): | |
| if random.random() > 0.3: | |
| color = (random.randint(80, 150), random.randint(80, 150), random.randint(80, 150)) | |
| draw.line([points[i], points[i+1]], fill=color, width=random.randint(2, 4)) | |
| return image | |
| def generate_captcha(self): | |
| text = ''.join(random.choices(self.characters, k=self.length)) | |
| image = Image.new('RGB', (self.width, self.height), (255, 255, 255)) | |
| image = self.add_wavy_pattern(image) | |
| image = self.add_noise_dots(image) | |
| draw = ImageDraw.Draw(image) | |
| try: | |
| font_size = random.randint(40, 50) | |
| font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size) | |
| except: | |
| try: | |
| font = ImageFont.truetype("arial.ttf", font_size) | |
| except: | |
| font = ImageFont.load_default() | |
| spacing = self.width // (self.length + 1) | |
| for i, char in enumerate(text): | |
| x = spacing * (i + 0.5) + random.randint(-10, 10) | |
| y = self.height // 3 + random.randint(-10, 10) | |
| angle = random.randint(-25, 25) | |
| color = (random.randint(30, 100), random.randint(30, 100), random.randint(30, 100)) | |
| char_img = Image.new('RGBA', (100, 100), (255, 255, 255, 0)) | |
| char_draw = ImageDraw.Draw(char_img) | |
| char_draw.text((25, 25), char, fill=color, font=font) | |
| char_img = char_img.rotate(angle, expand=True) | |
| image.paste(char_img, (int(x), int(y)), char_img) | |
| image = self.apply_distortion(image) | |
| image = image.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.3, 0.8))) | |
| return image, text | |
| def apply_distortion(self, image): | |
| img_array = np.array(image) | |
| rows, cols = img_array.shape[:2] | |
| img_output = np.zeros_like(img_array) | |
| for i in range(rows): | |
| for j in range(cols): | |
| offset_x = int(8.0 * np.sin(2 * 3.14 * i / 60)) | |
| if j + offset_x < cols and j + offset_x >= 0: | |
| img_output[i, j] = img_array[i, (j + offset_x) % cols] | |
| else: | |
| img_output[i, j] = img_array[i, j] | |
| return Image.fromarray(img_output) | |
| def generate_dataset(self, num_samples, save_dir='captcha_data'): | |
| os.makedirs(save_dir, exist_ok=True) | |
| data = [] | |
| print(f"Generating {num_samples} CAPTCHA images...") | |
| for i in tqdm(range(num_samples)): | |
| image, text = self.generate_captcha() | |
| filename = f"{i:06d}_{text}.png" | |
| filepath = os.path.join(save_dir, filename) | |
| image.save(filepath) | |
| data.append({'image': filepath, 'label': text}) | |
| return data | |
| # ============= 2. DATASET ============= | |
| class CaptchaDataset(Dataset): | |
| def __init__(self, data, transform=None): | |
| self.data = data | |
| self.transform = transform | |
| self.char_to_idx = {char: idx for idx, char in enumerate(string.digits)} | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| item = self.data[idx] | |
| image = Image.open(item['image']).convert('RGB') | |
| if self.transform: | |
| image = self.transform(image) | |
| label = [self.char_to_idx[char] for char in item['label']] | |
| return image, torch.LongTensor(label) | |
| # ============= 3. MODEL ============= | |
| class AttentionBlock(nn.Module): | |
| def __init__(self, in_channels): | |
| super(AttentionBlock, self).__init__() | |
| self.attention = nn.Sequential( | |
| nn.Conv2d(in_channels, in_channels // 8, 1), | |
| nn.ReLU(), | |
| nn.Conv2d(in_channels // 8, in_channels, 1), | |
| nn.Sigmoid() | |
| ) | |
| def forward(self, x): | |
| return x * self.attention(x) | |
| class CaptchaCNN(nn.Module): | |
| def __init__(self, num_chars=5, num_classes=10): | |
| super(CaptchaCNN, self).__init__() | |
| self.num_chars = num_chars | |
| self.num_classes = num_classes | |
| self.conv1 = nn.Sequential( | |
| nn.Conv2d(3, 64, 3, padding=1), | |
| nn.BatchNorm2d(64), | |
| nn.ReLU(), | |
| nn.Conv2d(64, 64, 3, padding=1), | |
| nn.BatchNorm2d(64), | |
| nn.ReLU(), | |
| nn.MaxPool2d(2, 2) | |
| ) | |
| self.conv2 = nn.Sequential( | |
| nn.Conv2d(64, 128, 3, padding=1), | |
| nn.BatchNorm2d(128), | |
| nn.ReLU(), | |
| nn.Conv2d(128, 128, 3, padding=1), | |
| nn.BatchNorm2d(128), | |
| nn.ReLU(), | |
| nn.MaxPool2d(2, 2) | |
| ) | |
| self.attention1 = AttentionBlock(128) | |
| self.conv3 = nn.Sequential( | |
| nn.Conv2d(128, 256, 3, padding=1), | |
| nn.BatchNorm2d(256), | |
| nn.ReLU(), | |
| nn.Conv2d(256, 256, 3, padding=1), | |
| nn.BatchNorm2d(256), | |
| nn.ReLU(), | |
| nn.MaxPool2d(2, 2) | |
| ) | |
| self.attention2 = AttentionBlock(256) | |
| self.conv4 = nn.Sequential( | |
| nn.Conv2d(256, 512, 3, padding=1), | |
| nn.BatchNorm2d(512), | |
| nn.ReLU(), | |
| nn.MaxPool2d(2, 2) | |
| ) | |
| self.char_heads = nn.ModuleList([ | |
| nn.Sequential( | |
| nn.Linear(512 * 17 * 5, 1024), | |
| nn.ReLU(), | |
| nn.Dropout(0.5), | |
| nn.Linear(1024, 512), | |
| nn.ReLU(), | |
| nn.Dropout(0.3), | |
| nn.Linear(512, num_classes) | |
| ) for _ in range(num_chars) | |
| ]) | |
| def forward(self, x): | |
| x = self.conv1(x) | |
| x = self.conv2(x) | |
| x = self.attention1(x) | |
| x = self.conv3(x) | |
| x = self.attention2(x) | |
| x = self.conv4(x) | |
| x = x.view(x.size(0), -1) | |
| return [head(x) for head in self.char_heads] | |
| # ============= 4. TRAINING ============= | |
| def train_model(num_epochs=30): | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| print(f"Using device: {device}") | |
| # Generate dataset | |
| generator = AdvancedCaptchaGenerator() | |
| train_data = generator.generate_dataset(10000, 'train_data') | |
| val_data = generator.generate_dataset(2000, 'val_data') | |
| # DataLoaders | |
| transform = transforms.Compose([ | |
| transforms.Resize((80, 280)), | |
| transforms.ToTensor(), | |
| transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) | |
| ]) | |
| train_dataset = CaptchaDataset(train_data, transform) | |
| val_dataset = CaptchaDataset(val_data, transform) | |
| train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2) | |
| val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2) | |
| # Model | |
| model = CaptchaCNN().to(device) | |
| criterion = nn.CrossEntropyLoss() | |
| optimizer = optim.Adam(model.parameters(), lr=0.001) | |
| scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3) | |
| best_acc = 0.0 | |
| for epoch in range(num_epochs): | |
| # Training | |
| model.train() | |
| train_loss = 0.0 | |
| correct = 0 | |
| total = 0 | |
| pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}') | |
| for images, labels in pbar: | |
| images, labels = images.to(device), labels.to(device) | |
| optimizer.zero_grad() | |
| outputs = model(images) | |
| loss = sum(criterion(outputs[i], labels[:, i]) for i in range(5)) | |
| loss.backward() | |
| optimizer.step() | |
| train_loss += loss.item() | |
| predictions = torch.stack([torch.argmax(out, 1) for out in outputs], dim=1) | |
| correct += (predictions == labels).all(dim=1).sum().item() | |
| total += labels.size(0) | |
| pbar.set_postfix({'loss': f'{train_loss/(pbar.n+1):.4f}', 'acc': f'{100*correct/total:.2f}%'}) | |
| # Validation | |
| model.eval() | |
| val_correct = 0 | |
| val_total = 0 | |
| val_loss = 0.0 | |
| with torch.no_grad(): | |
| for images, labels in val_loader: | |
| images, labels = images.to(device), labels.to(device) | |
| outputs = model(images) | |
| loss = sum(criterion(outputs[i], labels[:, i]) for i in range(5)) | |
| val_loss += loss.item() | |
| predictions = torch.stack([torch.argmax(out, 1) for out in outputs], dim=1) | |
| val_correct += (predictions == labels).all(dim=1).sum().item() | |
| val_total += labels.size(0) | |
| val_acc = 100 * val_correct / val_total | |
| scheduler.step(val_loss) | |
| print(f'\nEpoch {epoch+1}: Train Acc={100*correct/total:.2f}%, Val Acc={val_acc:.2f}%') | |
| if val_acc > best_acc: | |
| best_acc = val_acc | |
| torch.save(model.state_dict(), 'captcha_model.pth') | |
| print(f'β Model saved! Best accuracy: {val_acc:.2f}%\n') | |
| print(f'\nπ Training complete! Best accuracy: {best_acc:.2f}%') | |
| # ============= 5. PREDICTION ============= | |
| def predict_captcha(model, image, device='cpu'): | |
| transform = transforms.Compose([ | |
| transforms.Resize((80, 280)), | |
| transforms.ToTensor(), | |
| transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) | |
| ]) | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| image = image.convert('RGB') | |
| image_tensor = transform(image).unsqueeze(0).to(device) | |
| model.eval() | |
| with torch.no_grad(): | |
| outputs = model(image_tensor) | |
| predictions = [] | |
| confidences = [] | |
| for out in outputs: | |
| probs = torch.softmax(out, dim=1) | |
| pred = torch.argmax(probs, 1).item() | |
| conf = probs[0][pred].item() | |
| predictions.append(str(pred)) | |
| confidences.append(conf) | |
| result = ''.join(predictions) | |
| avg_conf = sum(confidences) / len(confidences) | |
| details = "### π Prediction Details\n\n" | |
| for i, (pred, conf) in enumerate(zip(predictions, confidences)): | |
| emoji = "β " if conf > 0.8 else "β οΈ" if conf > 0.6 else "β" | |
| details += f"{emoji} **Position {i+1}:** `{pred}` (confidence: {conf*100:.1f}%)\n" | |
| details += f"\n### π Summary\n" | |
| details += f"**Final Result:** `{result}`\n\n" | |
| details += f"**Average Confidence:** {avg_conf*100:.1f}%\n\n" | |
| if avg_conf > 0.85: | |
| details += "β¨ **High confidence prediction!**" | |
| elif avg_conf > 0.7: | |
| details += "β οΈ **Medium confidence - verify manually**" | |
| else: | |
| details += "β **Low confidence - image may be unclear**" | |
| return result, details | |
| # ============= 6. GRADIO INTERFACE ============= | |
| def create_gradio_interface(): | |
| # Load model | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| model = CaptchaCNN().to(device) | |
| try: | |
| model.load_state_dict(torch.load('captcha_model.pth', map_location=device)) | |
| print("β Model loaded successfully!") | |
| except FileNotFoundError: | |
| print("β οΈ Model not found! Run with --train first") | |
| print("Creating demo model for interface preview...") | |
| def predict_wrapper(image): | |
| if image is None: | |
| return "β Please upload an image", "No image provided" | |
| try: | |
| return predict_captcha(model, image, device) | |
| except Exception as e: | |
| return f"β Error: {str(e)}", f"**Error Details:**\n```\n{str(e)}\n```" | |
| # Interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="CAPTCHA Solver") as demo: | |
| gr.Markdown( | |
| """ | |
| # π Advanced CAPTCHA Solver | |
| ## AI-Powered Digit Recognition (Like Gemini Google) | |
| Upload CAPTCHA dengan 5 digit dan biarkan AI menebaknya seperti **Gemini**! | |
| ### β¨ Features: | |
| - π― Menangani CAPTCHA kompleks dengan noise & distorsi | |
| - π Prediksi real-time dengan confidence scores | |
| - π¬ Model CNN + Attention Mechanism | |
| - π― Accuracy 95%+ pada synthetic data | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| label="πΈ Upload CAPTCHA Image", | |
| type="pil", | |
| height=300 | |
| ) | |
| predict_btn = gr.Button( | |
| "π Solve CAPTCHA", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| gr.Markdown( | |
| """ | |
| ### π How to Use: | |
| 1. Upload image CAPTCHA (5 digit) | |
| 2. Klik "Solve CAPTCHA" | |
| 3. Lihat hasil prediksi + confidence | |
| ### π‘ Tips: | |
| - Format: PNG, JPG, JPEG | |
| - Bisa handle noise & distortion | |
| - Auto-resize ke 280x80 | |
| """ | |
| ) | |
| # Generate sample CAPTCHA | |
| sample_btn = gr.Button("π² Generate Sample CAPTCHA", size="sm") | |
| with gr.Column(scale=1): | |
| result_output = gr.Textbox( | |
| label="π― Predicted Result", | |
| placeholder="Hasil akan muncul di sini...", | |
| lines=2, | |
| scale=2 | |
| ) | |
| details_output = gr.Markdown( | |
| value="Klik **Solve CAPTCHA** untuk melihat detail prediksi" | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### π€ Model Info: | |
| - **Architecture:** CNN + Attention Mechanism | |
| - **Input:** 280x80 RGB Image | |
| - **Output:** 5 Digits (0-9) | |
| - **Parameters:** ~50M | |
| ### π Tech Stack: | |
| PyTorch β’ Gradio β’ Hugging Face Spaces | |
| --- | |
| **β οΈ Disclaimer:** Model ini untuk educational purposes only. | |
| **π‘ Training:** Run `python app.py --train` untuk train ulang model | |
| """ | |
| ) | |
| # Events | |
| predict_btn.click( | |
| fn=predict_wrapper, | |
| inputs=image_input, | |
| outputs=[result_output, details_output] | |
| ) | |
| def generate_sample(): | |
| gen = AdvancedCaptchaGenerator() | |
| img, text = gen.generate_captcha() | |
| return img, f"Generated CAPTCHA: **{text}**" | |
| sample_btn.click( | |
| fn=generate_sample, | |
| outputs=[image_input, details_output] | |
| ) | |
| return demo | |
| # ============= 7. MAIN ============= | |
| if __name__ == "__main__": | |
| if "--train" in sys.argv: | |
| print("π Starting training mode...") | |
| train_model(num_epochs=30) | |
| else: | |
| print("π Starting Gradio interface...") | |
| demo = create_gradio_interface() | |
| demo.launch() |