Spaces:

AxleToe
/

captcha-solving

Paused

App Files Files Community

AxleToe commited on Jul 15, 2025

Commit

066a23d

verified ·

1 Parent(s): 1a04e0c

Create app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# app.py
+import gradio as gr
+from PIL import Image
+from transformers import VisionEncoderDecoderModel, TrOCRProcessor
+import torch
+print("--- Initializing Solver Service ---")
+# Use a GPU if available (Hugging Face may provide one)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# --- LOAD MODELS ONLY ONCE AT STARTUP ---
+print("1. Loading TrOCR processor...")
+processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3", use_fast=True)
+print("   - Processor loaded.")
+print("2. Loading VisionEncoderDecoder model...")
+model = VisionEncoderDecoderModel.from_pretrained("anuashok/ocr-captcha-v3").to(device)
+print("   - Model loaded.")
+print(f"--- Model is running on: {device.upper()} ---")
+# --- END OF HEAVY LOADING ---
+def solve_captcha(input_image: Image.Image) -> str:
+    """
+    Solves a CAPTCHA using the pre-loaded model.
+    This function uses the exact image processing logic from your original script.
+    """
+    print("--- Received image for solving ---")
+    # 1. Convert input image to RGBA (as in your original code)
+    image = input_image.convert("RGBA")
+    # 2. Prepare a white background
+    background = Image.new("RGBA", image.size, (255, 255, 255))
+    # 3. Composite the image onto the white background and convert to RGB
+    combined = Image.alpha_composite(background, image).convert("RGB")
+    print("   - Image pre-processing complete.")
+    # 4. Prepare image for the model
+    pixel_values = processor(images=combined, return_tensors="pt").pixel_values.to(device)
+    print("   - Image prepared for model.")
+    # 5. Run model inference
+    generated_ids = model.generate(pixel_values)
+    print("   - Model inference complete.")
+    # 6. Decode the result
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    print(f"   - Decoding complete. Result: {generated_text}")
+    return generated_text
+# --- Create the Gradio Interface and API Endpoint ---
+gr.Interface(
+    fn=solve_captcha,
+    inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
+    outputs=gr.Textbox(label="Result"),
+    title="TrOCR CAPTCHA Solver (Custom Logic)",
+    description="An API for the anuashok/ocr-captcha-v3 model using specific pre-processing."
+).launch()