Spaces:

7eu7d7
/

CAPTCHA_recognize

Sleeping

App Files Files Community

7eu7d7 commited on Jun 5, 2025

Commit

1f584ff

1 Parent(s): da04e0d

init

Browse files

Files changed (6) hide show

app.py +77 -0
cap.py +100 -0
models/__init__.py +1 -0
models/enc_dec.py +28 -0
requirements.txt +7 -0
utils.py +27 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from functools import lru_cache
+import gradio as gr
+import numpy as np
+from PIL import Image
+from huggingface_hub import hf_hub_download
+from cap import Predictor
+@lru_cache()
+def load_predictor(model):
+    predictor = Predictor(hf_hub_download(
+        f'7eu7d7/CAPTCHA_recognize',
+        model,
+    ))
+    return predictor
+def process_image(image):
+    """
+    Process the uploaded image - this is an example function
+    You can modify this function to implement specific image processing logic
+    """
+    if image is None:
+        return "Please upload an image first"
+    # Example processing: convert image to grayscale
+    if isinstance(image, np.ndarray):
+        # If it's a numpy array, convert to PIL Image
+        img = Image.fromarray(image.astype('uint8')).convert('RGB')
+    else:
+        img = image.convert('RGB')
+    predictor = load_predictor('captcha-2000.safetensors')
+    text = predictor.pred_img(img, show=False)
+    return text
+# Create Gradio interface
+with gr.Blocks(title="CAPTCHA Recognize") as demo:
+    with gr.Row():
+        # Left column - Input area
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                label="Upload CAPTCHA Image",
+                type="pil",
+                height=300
+            )
+            # Run button
+            process_btn = gr.Button(
+                "Run",
+                variant="primary",
+                size="lg"
+            )
+        # Right column - Output area
+        with gr.Column(scale=1):
+            text_output = gr.Textbox(
+                label="Result",
+                lines=4,
+                interactive=False
+            )
+    # Bind events
+    process_btn.click(
+        fn=process_image,
+        inputs=image_input,
+        outputs=[text_output]
+    )
+# Launch the application
+if __name__ == "__main__":
+    demo.launch()

cap.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# -*- coding: utf-8 -*-
+import torch
+import argparse
+from models import ResnetEncoderDecoder
+from utils import remove_rptch
+from safetensors import safe_open
+from torchvision import transforms as T
+from PIL import Image
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+char_dict = '_0123456789abcdefghijklmnopqrstuvwxyz'
+id_chr_map = {i: c for i, c in enumerate(char_dict)}
+class Predictor:
+    def __init__(self, model_path, char_dict=char_dict):
+        self.model = ResnetEncoderDecoder(char_dict).to(device)
+        self.model.eval()
+        if str(device)=='cpu':
+            check_point = self.load_safetensor(model_path, map_location=torch.device('cpu'))
+        else:
+            check_point = self.load_safetensor(model_path)
+        self.model.load_state_dict(check_point)
+        self.char_dict = char_dict
+        self.trans = T.Compose([
+                        T.ToTensor(),
+                        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+                    ])
+    # >>>>> from RainbowNeko Engine >>>>>
+    @staticmethod
+    def fold_dict(safe_f, split_key=':'):
+        dict_fold = {}
+        for k in safe_f.keys():
+            k_list = k.split(split_key)
+            dict_last = dict_fold
+            for item in k_list[:-1]:
+                if item not in dict_last:
+                    dict_last[item] = {}
+                dict_last = dict_last[item]
+            dict_last[k_list[-1]]=safe_f.get_tensor(k)
+        return dict_fold
+    def load_safetensor(self, ckpt_f, map_location='cpu'):
+        with safe_open(ckpt_f, framework="pt", device=map_location) as f:
+            sd_fold = self.fold_dict(f)
+        return sd_fold
+    # <<<<< from RainbowNeko Engine <<<<<
+    def pred(self, input):
+        pred = self.model(input.to(device))
+        B, H, W, C = pred.size()
+        T_ = H * W
+        pred = pred.view(B, T_, -1)
+        pred = pred + 1e-10
+        pred_cls = torch.max(pred, 2)[1].data.cpu().numpy()[0]
+        pred_cls = pred_cls.reshape((H, W)).T.reshape((H * W,))
+        final_str = remove_rptch(''.join(self.char_dict[x] for x in pred_cls if x))
+        return pred_cls, final_str, (H, W)
+    def pred_img(self, image, show=True):
+        if isinstance(image, str):
+            image = Image.open(image).convert('RGB')
+        image = self.trans(image)
+        pred_cls, final_str, (H, W) = self.pred(image.unsqueeze(0))
+        if show:
+            pred_string = ''.join(['%2s' % self.char_dict[pn] for pn in pred_cls])
+            pred_string_set = [pred_string[i:i + W * 2] for i in range(0, len(pred_string), W * 2)]
+            print('Prediction: ')
+            for pre_str in pred_string_set:
+                print(pre_str)
+            print('Result:', final_str)
+        return final_str
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='CAPTCHA Recognizer')
+    parser.add_argument('--model_path', type=str, default='exps/captcha/ckpts/model-2000.safetensors', help='Path to the model file')
+    parser.add_argument('--image_path', type=str, default=[
+            '/data1/dzy/CAPTCHA_recognize/data3/test/2.jpg',
+            '/data1/dzy/Verification_Code_CV_v1.1/imgs/00097.png',
+            '/data1/dzy/Verification_Code_CV_v1.1/imgs/00098.png',
+            '/data1/dzy/Verification_Code_CV_v1.1/imgs/00099.png',
+        ], nargs='+', help='Path to the image file')
+    args = parser.parse_args()
+    predictor = Predictor(args.model_path)
+    for path in args.image_path:
+        result = predictor.pred_img(path)
+        print(f'Recognized CAPTCHA: {result}')

models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .enc_dec import ResnetEncoderDecoder

models/enc_dec.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# -*- coding: utf-8 -*-
+import timm
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ResnetEncoderDecoder(nn.Module):
+    def __init__(self, char_dict):
+        super(ResnetEncoderDecoder, self).__init__()
+        self.bn = nn.BatchNorm2d(64)
+        resnet = timm.create_model('resnet18', pretrained=True, drop_rate=0.2, drop_path_rate=0.3)
+        self.conv = nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1)
+        self.cnn = nn.Sequential(*list(resnet.children())[4:-2])
+        self.out = nn.Linear(512, len(char_dict))
+        self.char_dict = char_dict
+    def forward(self, input):
+        input = F.silu(self.bn(self.conv(input)), True)
+        input = F.max_pool2d(input, kernel_size=(2, 2), stride=(2, 2))
+        input = self.cnn(input)
+        input = input.permute(0, 2, 3, 1)
+        input = F.softmax(self.out(input), dim=-1)
+        return input

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+torchvision
+pillow
+timm
+safetensors
+numpy
+huggingface_hub

utils.py ADDED Viewed

	@@ -0,0 +1,27 @@

+def rmchr(text,index):
+    return text[:index]+text[index+1:]
+def count_rptch(text):
+    maxch=(1,0)
+    nowch=(0,0)
+    lastch=None
+    for index,i in enumerate(text):
+        if lastch == i:
+            nowch = (nowch[0]+1,nowch[1])
+            if nowch[0]>maxch[0]:
+                maxch=nowch
+        else:
+            nowch=(1,index)
+        lastch=i
+    return maxch
+def remove_rptch(text,tar_len=4):
+    while len(text)>tar_len:
+        maxch = count_rptch(text)
+        if maxch[0]<=1:
+            break
+        text=rmchr(text,maxch[1])
+    return text