Text_Captcha_breaker

Sleeping

App Files Files Community

Futi613 commited on Oct 6, 2025

Commit

b8669eb

verified ·

1 Parent(s): b410914

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -47

app.py CHANGED Viewed

@@ -6,31 +6,36 @@ from PIL import Image
 from tokenizer_base import Tokenizer
 import pathlib
 import os
-import gradio as gr
 from huggingface_hub import Repository
-# repo = Repository(
-#     local_dir="secret_models",
-#     repo_type="model",
-#     clone_from="docparser/captcha",
-#     token=True
-# )
-# repo.git_pull()
 cwd = pathlib.Path(__file__).parent.resolve()
-model_file = os.path.join(cwd,"secret_models","captcha.onnx")
-img_size = (32,128)
 charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
 tokenizer_base = Tokenizer(charset)
 def get_transform(img_size):
-        transforms = []
-        transforms.extend([
-            T.Resize(img_size, T.InterpolationMode.BICUBIC),
-            T.ToTensor(),
-            T.Normalize(0.5, 0.5)
-        ])
-        return T.Compose(transforms)
 def to_numpy(tensor):
     return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
@@ -41,40 +46,51 @@ def initialize_model(model_file):
     onnx_model = onnx.load(model_file)
     onnx.checker.check_model(onnx_model)
     ort_session = rt.InferenceSession(model_file)
-    return transform,ort_session
 def get_text(img_org):
     try:
-        # img_org = Image.open(image_path)
-        # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
-        x = transform(img_org.convert('RGB')).unsqueeze(0)
-        # compute ONNX Runtime output prediction
-        ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
-        logits = ort_session.run(None, ort_inputs)[0]
-        probs = torch.tensor(logits).softmax(-1)
-        preds, probs = tokenizer_base.decode(probs)
-        preds = preds[0]
-        print(preds)
-        return preds
     except Exception as e:
-        print(str(e))
-transform,ort_session = initialize_model(model_file=model_file)
-gr.Interface(
-    get_text,
-    inputs=gr.Image(type="pil"),
-    outputs=gr.Textbox(),
-    title="Text Captcha Reader",
-    examples=["8000.png","11JW29.png","2a8486.jpg","2nbcx.png",
-             "000679.png","000HU.png","00Uga.png.jpg","00bAQwhAZU.jpg",
-             "00h57kYf.jpg","0EoHdtVb.png","0JS21.png","0p98z.png","10010.png"]
-).launch()
-# if __name__ == "__main__":
-#     image_path = "8000.png"
-#     preds,probs = get_text(image_path)
-#     print(preds[0])

 from tokenizer_base import Tokenizer
 import pathlib
 import os
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import base64
+from io import BytesIO
 from huggingface_hub import Repository
+repo = Repository(
+    local_dir="secret_models",
+    repo_type="model",
+    clone_from="docparser/captcha",
+    token=True
+)
+repo.git_pull()
 cwd = pathlib.Path(__file__).parent.resolve()
+model_file = os.path.join(cwd, "secret_models", "captcha.onnx")
+img_size = (32, 128)
 charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
 tokenizer_base = Tokenizer(charset)
+app = FastAPI(title="Text Captcha Reader API")
 def get_transform(img_size):
+    transforms = []
+    transforms.extend([
+        T.Resize(img_size, T.InterpolationMode.BICUBIC),
+        T.ToTensor(),
+        T.Normalize(0.5, 0.5)
+    ])
+    return T.Compose(transforms)
 def to_numpy(tensor):
     return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
     onnx_model = onnx.load(model_file)
     onnx.checker.check_model(onnx_model)
     ort_session = rt.InferenceSession(model_file)
+    return transform, ort_session
 def get_text(img_org):
+    # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
+    x = transform(img_org.convert('RGB')).unsqueeze(0)
+    # compute ONNX Runtime output prediction
+    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
+    logits = ort_session.run(None, ort_inputs)[0]
+    probs = torch.tensor(logits).softmax(-1)
+    preds, probs = tokenizer_base.decode(probs)
+    preds = preds[0]
+    print(preds)
+    return preds
+# Initialize model at startup
+transform, ort_session = initialize_model(model_file=model_file)
+# Pydantic model for request
+class ImageRequest(BaseModel):
+    image: str  # base64 encoded image
+# Pydantic model for response
+class TextResponse(BaseModel):
+    text: str
+@app.post("/predict", response_model=TextResponse)
+async def predict_captcha(request: ImageRequest):
     try:
+        # Decode base64 image
+        image_data = base64.b64decode(request.image)
+        img = Image.open(BytesIO(image_data))
+        # Get prediction
+        text = get_text(img)
+        return TextResponse(text=text)
     except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Error processing image: {str(e)}")
+@app.get("/health")
+async def health_check():
+    return {"status": "ok"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)