Spaces:

Graf-J
/

Captcha-Demo

Running

App Files Files Community

Graf-J commited on Feb 28

Commit

2959f55

verified ·

1 Parent(s): b32cabd

Initial Commit

Browse files

Files changed (35) hide show

.gitattributes +1 -0
Dockerfile +20 -20
Nunito.ttf +3 -0
app.py +325 -0
images/3eplzv.jpg +0 -0
images/46CN5W.jpg +0 -0
images/5820.jpg +0 -0
images/6521.jpg +0 -0
images/67qas.jpg +0 -0
images/75ke.jpg +0 -0
images/8JKM.jpg +0 -0
images/8jpwt0.jpg +0 -0
images/B1QAZ6.jpg +0 -0
images/CCX8.jpg +0 -0
images/EPOD.jpg +0 -0
images/ER6Y.jpg +0 -0
images/EWSP.jpg +0 -0
images/GIOGp.jpg +0 -0
images/HCDS.jpg +0 -0
images/JBWkEs.jpg +0 -0
images/KKh8Q.jpg +0 -0
images/MFMH.jpg +0 -0
images/NJSEX.jpg +0 -0
images/R6AB.jpg +0 -0
images/TVHF.jpg +0 -0
images/Vb4cG.jpg +0 -0
images/XaNqQx.jpg +0 -0
images/YULM.jpg +0 -0
images/abfsh.jpg +0 -0
images/b6yc.jpg +0 -0
images/bCWaLR.jpg +0 -0
images/d3no.jpg +0 -0
images/iq1sZo.jpg +0 -0
images/kJtOfk.jpg +0 -0
requirements.txt +210 -3

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Nunito.ttf filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -1,20 +1,20 @@
-FROM python:3.13.5-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.13.5-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt ./
+COPY app.py ./
+RUN pip3 install -r requirements.txt
+EXPOSE 8501
+HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

Nunito.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2a6ab02dcefcf4c7481e92ffb49ad0c7bc7a19ccd18eb5d7d9f4e21211998c6
+size 275644

app.py ADDED Viewed

	@@ -0,0 +1,325 @@

+import streamlit as st
+import time
+import torch
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+import torchvision.transforms.functional as F_vision
+from PIL import Image
+from transformers import AutoModel, AutoProcessor, pipeline
+from captcha.image import ImageCaptcha
+import io
+import matplotlib.pyplot as plt
+import numpy as np
+from typing import Optional
+from st_keyup import st_keyup
+st.set_page_config(page_title="CAPTCHA Model Showcase", layout="wide")
+st.title("CAPTCHA Models Showcase")
+st.markdown("""
+Explore generation with various text and augmentations, or test the models with your own images!
+""")
+# --- Models configuration ---
+@st.cache_resource
+def load_finetuned_models():
+    # Cache all models
+    return {
+        "Graf-J/captcha-conv-transformer-finetuned": {
+            "Architecture": "Convolutional Transformer",
+            "Training Data": "hammer888/captcha-data",
+            "Python Captcha Library": "Included",
+            "Parameters": "12,279,551",
+            "Model Size": "51.7 MB",
+            "Sequence Accuracy (Python Captcha)": "88.42%",
+            "CER (Python Captcha)": "2.08%",
+            "Link": "https://huggingface.co/Graf-J/captcha-conv-transformer-finetuned"
+        },
+        "Graf-J/captcha-crnn-finetuned": {
+            "Architecture": "CRNN",
+            "Training Data": "hammer888/captcha-data",
+            "Python Captcha Library": "Included",
+            "Parameters": "3,570,943",
+            "Model Size": "14.3 MB",
+            "Sequence Accuracy (Python Captcha)": "86.20%",
+            "CER (Python Captcha)": "2.53%",
+            "Link": "https://huggingface.co/Graf-J/captcha-crnn-finetuned"
+        }
+    }
+MODELS_FINETUNED = load_finetuned_models()
+@st.cache_resource
+def load_all_models_hammer_stats():
+    # Base and Finetuned models with hammer888 metrics for Section 2
+    return {
+        "Graf-J/captcha-conv-transformer-base": {
+            "Architecture": "Convolutional Transformer",
+            "Training Data": "hammer888/captcha-data",
+            "Parameters": "12,279,551",
+            "Model Size": "51.7 MB",
+            "Sequence Accuracy (hammer888)": "97.38%",
+            "CER (hammer888)": "0.57%",
+            "Link": "https://huggingface.co/Graf-J/captcha-conv-transformer-base"
+        },
+        "Graf-J/captcha-crnn-base": {
+            "Architecture": "CRNN",
+            "Training Data": "hammer888/captcha-data",
+            "Parameters": "3,570,943",
+            "Model Size": "14.3 MB",
+            "Sequence Accuracy (hammer888)": "96.81%",
+            "CER (hammer888)": "0.70%",
+            "Link": "https://huggingface.co/Graf-J/captcha-crnn-base"
+        },
+        "Graf-J/captcha-conv-transformer-finetuned": {
+            "Architecture": "Convolutional Transformer",
+            "Training Data": "hammer888/captcha-data + Python Captcha",
+            "Parameters": "12,279,551",
+            "Model Size": "51.7 MB",
+            "Sequence Accuracy (hammer888)": "95.36%",
+            "CER (hammer888)": "1.03%",
+            "Link": "https://huggingface.co/Graf-J/captcha-conv-transformer-finetuned"
+        },
+        "Graf-J/captcha-crnn-finetuned": {
+            "Architecture": "CRNN",
+            "Training Data": "hammer888/captcha-data + Python Captcha",
+            "Parameters": "3,570,943",
+            "Model Size": "14.3 MB",
+            "Sequence Accuracy (hammer888)": "92.98%",
+            "CER (hammer888)": "1.59%",
+            "Link": "https://huggingface.co/Graf-J/captcha-crnn-finetuned"
+        },
+    }
+ALL_MODELS = load_all_models_hammer_stats()
+@st.cache_resource
+def get_model_pipeline(model_id):
+    return pipeline(task="captcha-recognition", model=model_id, trust_remote_code=True)
+@st.cache_resource
+def get_custom_model(model_id):
+    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+    model = AutoModel.from_pretrained(model_id, trust_remote_code=True)
+    model.eval()
+    return processor, model
+def predict(image, model_id):
+    model_info = ALL_MODELS.get(model_id)
+    if not model_info:
+        raise ValueError("Model not found")
+    processor, model = get_custom_model(model_id)
+    inputs = processor(images=image)
+    with torch.no_grad():
+        outputs = model(inputs["pixel_values"])
+        logits = outputs.logits
+    # CTC Decode
+    prediction = processor.batch_decode(logits)[0]
+    # Calculate confidences (simplified for display)
+    probs = F.softmax(logits, dim=-1)
+    max_probs, _ = torch.max(probs, dim=-1)
+    # We take the mean confidence across the sequence as an example,
+    # or we can just return the raw string if character level is too complex without alignment.
+    confidence = max_probs[0].mean().item()
+    return prediction, confidence
+def apply_transformations(img: Image.Image, rotation: float, alpha: float, seed: Optional[int] = None) -> Image.Image:
+    """Applies rotation and elastic distortion to a PIL image using torchvision."""
+    distorted_raw = img
+    # Needs to be a tensor for transforms if it expects tensor
+    if alpha > 0:
+        if seed is not None:
+            torch.manual_seed(seed)
+        # Use standard ElasticTransform, it expects a tensor.
+        elasticter = transforms.ElasticTransform(alpha=float(alpha), sigma=9.0, fill=255)
+        tensor_img = transforms.ToTensor()(distorted_raw)
+        distorted_tensor = elasticter(tensor_img)
+        # Convert back to PIL Image, but ToPILImage expects C x H x W in [0, 1] range
+        # Some older elastic transforms might not fill correctly with 255 if tensor is 0-1.
+        distorted_raw = transforms.ToPILImage()(distorted_tensor)
+    if rotation != 0:
+        distorted_raw = F_vision.rotate(distorted_raw, float(rotation), fill=255)
+    if alpha > 0:
+        crop_amount = int(alpha / 35)
+        width, height = distorted_raw.size
+        # Crop: (left, upper, right, lower)
+        distorted_raw = distorted_raw.crop((crop_amount, 0, width - crop_amount, height))
+    return distorted_raw
+st.header("1. CAPTCHA Generation & Inference")
+col1, col2 = st.columns([1, 2])
+with col1:
+    selected_model_1 = st.selectbox("Select Model", list(MODELS_FINETUNED.keys()), key="model_sec1")
+    st.markdown("**Model Statistics:**")
+    stats = {k: v for k, v in MODELS_FINETUNED[selected_model_1].items() if k != "type"}
+    st.table({
+        "Metric": list(stats.keys()),
+        "Value": list(stats.values())
+    })
+with col2:
+    st.subheader("Generate CAPTCHA")
+    # Session state for caching the base image
+    if "base_captcha_image" not in st.session_state:
+        st.session_state.base_captcha_image = None
+    if "last_captcha_text" not in st.session_state:
+        st.session_state.last_captcha_text = ""
+    if "distortion_seed" not in st.session_state:
+        st.session_state.distortion_seed = torch.randint(0, 1000000, (1,)).item()
+    input_col, btn_col = st.columns([3, 1])
+    with input_col:
+        captcha_val = st_keyup("Enter text (1-8 alphanumeric chars)", value="aZ93eiL", debounce=300)
+        captcha_text: str = str(captcha_val) if captcha_val is not None else ""
+    with btn_col:
+        st.write("")
+        st.write("")
+        regen_btn = st.button("🔄 Regenerate Image")
+    slider_col1, slider_col2 = st.columns(2)
+    with slider_col1:
+         rotation = st.slider("Rotation (-15 to 15)", -15, 15, 0)
+    with slider_col2:
+         distortion = st.slider("Distortion Alpha (0 to 100)", 0, 100, 0)
+    if not captcha_text.isalnum():
+         st.error("Text must be alphanumeric!")
+    elif not (1 <= len(captcha_text) <= 8):
+         st.error("Text must be between 1 and 8 characters!")
+    else:
+        try:
+            # Check if we need to generate a new base image
+            if st.session_state.base_captcha_image is None or st.session_state.last_captcha_text != captcha_text or regen_btn:
+                generator = ImageCaptcha(fonts=["Nunito.ttf"])
+                st.session_state.base_captcha_image = generator.generate_image(captcha_text)
+                st.session_state.last_captcha_text = captcha_text
+                st.session_state.distortion_seed = torch.randint(0, 1000000, (1,)).item()
+            img = st.session_state.base_captcha_image
+            # Apply User's Transformation Logic
+            transformed_img = apply_transformations(img, rotation, distortion, st.session_state.distortion_seed)
+            # Predict
+            pred_text, conf = predict(transformed_img, selected_model_1)
+            # Display Side by Side (Input vs Prediction)
+            res_col1, res_col2 = st.columns(2)
+            with res_col1:
+                st.image(transformed_img, caption=f"Original: '{captcha_text}'", use_container_width=True)
+            with res_col2:
+                # Character-level coloring
+                html_chars = []
+                def char_at(s: str, idx: int) -> str:
+                    return s[idx] if idx < len(s) else ""
+                for i, p_char in enumerate(pred_text):
+                    color = "green" if p_char == char_at(str(captcha_text), i) else "red"
+                    html_chars.append(f"<span style='color: {color};'>{p_char}</span>")
+                colored_pred = "".join(html_chars)
+                st.markdown(f"<h3 style='text-align: center;'>Model Prediction:</h3>", unsafe_allow_html=True)
+                st.markdown(f"<h1 style='text-align: center;'><b>{colored_pred}</b></h1>", unsafe_allow_html=True)
+                if conf is not None:
+                    st.markdown(f"<p style='text-align: center; color: gray;'>Avg Confidence: {conf:.2%}</p>", unsafe_allow_html=True)
+        except Exception as e:
+            st.error(f"Error during prediction: {e}")
+st.divider()
+st.header("2. Upload & Test")
+import os
+# Load all available images from the images directory
+import glob
+image_files = glob.glob("images/*.jpg") + glob.glob("images/*.png")
+col_sec2_1, col_sec2_2 = st.columns([1, 2])
+with col_sec2_1:
+    selected_model_2 = st.selectbox("Select Model", list(ALL_MODELS.keys()), key="model_sec2")
+    st.markdown("**Model Statistics:**")
+    stats_2 = {k: v for k, v in ALL_MODELS[selected_model_2].items() if k != "type"}
+    st.table({
+        "Metric": list(stats_2.keys()),
+        "Value": list(stats_2.values())
+    })
+    with st.expander("Show Example Images"):
+        st.markdown("Drag and Drop one of these images into the uploader above!")
+        # Display in a grid of 3 columns
+        with st.container(height=400):
+            cols = st.columns(3)
+            for i, img_path in enumerate(image_files):
+                with cols[i % 3]:
+                    st.image(img_path, use_container_width=True)
+with col_sec2_2:
+    st.subheader("Upload an image")
+    uploaded_file = st.file_uploader("Choose an image file", type=["png", "jpg", "jpeg"], key="test_uploader")
+    image_to_predict = None
+    if uploaded_file is not None:
+        image_to_predict = Image.open(uploaded_file).convert("RGB")
+        try:
+            pred_text, conf = predict(image_to_predict, selected_model_2)
+            # Check for ground truth in filename depending on source
+            ground_truth = None
+            if uploaded_file is not None:
+                # Strip extension and check if it acts as a GT
+                base_name = os.path.splitext(uploaded_file.name)[0]
+                # We assume it's GT if it's alphanumeric and matches acceptable length (1-10)
+                if isinstance(base_name, str):
+                    if base_name.isalnum() and 1 <= len(base_name) <= 10:
+                        ground_truth = base_name
+            # Display Side by Side (Input vs Prediction)
+            res2_col1, res2_col2 = st.columns(2)
+            with res2_col1:
+                st.image(image_to_predict, caption="Uploaded Image", use_container_width=True)
+            with res2_col2:
+                # Render logic
+                if ground_truth:
+                    html_chars = []
+                    def char_at(s: str, idx: int) -> str:
+                        return s[idx] if idx < len(s) else ""
+                    for i, p_char in enumerate(pred_text):
+                        color = "green" if p_char == char_at(ground_truth, i) else "red"
+                        html_chars.append(f"<span style='color: {color};'>{p_char}</span>")
+                    colored_pred = "".join(html_chars)
+                    st.markdown(f"<h3 style='text-align: center;'>Model Prediction:</h3>", unsafe_allow_html=True)
+                    st.markdown(f"<h1 style='text-align: center;'><b>{colored_pred}</b></h1>", unsafe_allow_html=True)
+                else:
+                    st.markdown(f"<h3 style='text-align: center;'>Model Prediction:</h3>", unsafe_allow_html=True)
+                    st.markdown(f"<h1 style='text-align: center;'><b>{pred_text}</b></h1>", unsafe_allow_html=True)
+                if conf is not None:
+                   st.markdown(f"<p style='text-align: center; color: gray;'>Avg Confidence: {conf:.2%}</p>", unsafe_allow_html=True)
+        except Exception as e:
+            st.error(f"Error during prediction: {e}")

images/3eplzv.jpg ADDED Viewed

images/46CN5W.jpg ADDED Viewed

images/5820.jpg ADDED Viewed

images/6521.jpg ADDED Viewed

images/67qas.jpg ADDED Viewed

images/75ke.jpg ADDED Viewed

images/8JKM.jpg ADDED Viewed

images/8jpwt0.jpg ADDED Viewed

images/B1QAZ6.jpg ADDED Viewed

images/CCX8.jpg ADDED Viewed

images/EPOD.jpg ADDED Viewed

images/ER6Y.jpg ADDED Viewed

images/EWSP.jpg ADDED Viewed

images/GIOGp.jpg ADDED Viewed

images/HCDS.jpg ADDED Viewed

images/JBWkEs.jpg ADDED Viewed

images/KKh8Q.jpg ADDED Viewed

images/MFMH.jpg ADDED Viewed

images/NJSEX.jpg ADDED Viewed

images/R6AB.jpg ADDED Viewed

images/TVHF.jpg ADDED Viewed

images/Vb4cG.jpg ADDED Viewed

images/XaNqQx.jpg ADDED Viewed

images/YULM.jpg ADDED Viewed

images/abfsh.jpg ADDED Viewed

images/b6yc.jpg ADDED Viewed

images/bCWaLR.jpg ADDED Viewed

images/d3no.jpg ADDED Viewed

images/iq1sZo.jpg ADDED Viewed

images/kJtOfk.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,3 +1,210 @@
-altair
-pandas
-streamlit

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml --output-file requirements.txt
+altair==6.0.0
+    # via streamlit
+annotated-doc==0.0.4
+    # via typer
+anyio==4.12.1
+    # via httpx
+attrs==25.4.0
+    # via
+    #   jsonschema
+    #   referencing
+blinker==1.9.0
+    # via streamlit
+cachetools==6.2.6
+    # via streamlit
+captcha==0.7.1
+    # via captcha-website (pyproject.toml)
+certifi==2026.2.25
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+charset-normalizer==3.4.4
+    # via requests
+click==8.3.1
+    # via
+    #   streamlit
+    #   typer
+colorama==0.4.6
+    # via
+    #   click
+    #   tqdm
+contourpy==1.3.3
+    # via matplotlib
+cycler==0.12.1
+    # via matplotlib
+filelock==3.24.3
+    # via
+    #   huggingface-hub
+    #   torch
+fonttools==4.61.1
+    # via matplotlib
+fsspec==2026.2.0
+    # via
+    #   huggingface-hub
+    #   torch
+gitdb==4.0.12
+    # via gitpython
+gitpython==3.1.46
+    # via streamlit
+h11==0.16.0
+    # via httpcore
+hf-xet==1.3.2
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via huggingface-hub
+huggingface-hub==1.5.0
+    # via
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+jinja2==3.1.6
+    # via
+    #   altair
+    #   pydeck
+    #   streamlit-keyup
+    #   torch
+jsonschema==4.26.0
+    # via altair
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+kiwisolver==1.4.9
+    # via matplotlib
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via jinja2
+matplotlib==3.10.8
+    # via captcha-website (pyproject.toml)
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+narwhals==2.17.0
+    # via altair
+networkx==3.6.1
+    # via torch
+numpy==2.4.2
+    # via
+    #   captcha-website (pyproject.toml)
+    #   contourpy
+    #   matplotlib
+    #   pandas
+    #   pydeck
+    #   streamlit
+    #   torchvision
+    #   transformers
+packaging==26.0
+    # via
+    #   altair
+    #   huggingface-hub
+    #   matplotlib
+    #   streamlit
+    #   transformers
+pandas==2.3.3
+    # via streamlit
+pillow==12.1.1
+    # via
+    #   captcha-website (pyproject.toml)
+    #   captcha
+    #   matplotlib
+    #   streamlit
+    #   torchvision
+protobuf==6.33.5
+    # via streamlit
+pyarrow==23.0.1
+    # via streamlit
+pydeck==0.9.1
+    # via streamlit
+pygments==2.19.2
+    # via rich
+pyparsing==3.3.2
+    # via matplotlib
+python-dateutil==2.9.0.post0
+    # via
+    #   matplotlib
+    #   pandas
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.3
+    # via
+    #   huggingface-hub
+    #   transformers
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2026.2.28
+    # via transformers
+requests==2.32.5
+    # via streamlit
+rich==14.3.3
+    # via typer
+rpds-py==0.30.0
+    # via
+    #   jsonschema
+    #   referencing
+safetensors==0.7.0
+    # via transformers
+setuptools==82.0.0
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+smmap==5.0.2
+    # via gitdb
+streamlit==1.54.0
+    # via
+    #   captcha-website (pyproject.toml)
+    #   streamlit-keyup
+streamlit-keyup==0.3.0
+    # via captcha-website (pyproject.toml)
+sympy==1.14.0
+    # via torch
+tenacity==9.1.4
+    # via streamlit
+tokenizers==0.22.2
+    # via transformers
+toml==0.10.2
+    # via streamlit
+torch==2.10.0
+    # via
+    #   captcha-website (pyproject.toml)
+    #   torchvision
+torchvision==0.25.0
+    # via captcha-website (pyproject.toml)
+tornado==6.5.4
+    # via streamlit
+tqdm==4.67.3
+    # via
+    #   huggingface-hub
+    #   transformers
+transformers==5.2.0
+    # via captcha-website (pyproject.toml)
+typer==0.24.1
+    # via
+    #   huggingface-hub
+    #   typer-slim
+typer-slim==0.24.0
+    # via transformers
+typing-extensions==4.15.0
+    # via
+    #   altair
+    #   huggingface-hub
+    #   streamlit
+    #   torch
+tzdata==2025.3
+    # via pandas
+urllib3==2.6.3
+    # via requests
+watchdog==6.0.0
+    # via streamlit