Spaces:

TechRaj
/

cs4243-miniproject-captcha-recognition

Sleeping

App Files Files Community

Karthikraj Sivakumar commited on Nov 10, 2025

Commit

3072360

1 Parent(s): e3ce74a

add confidence scoring

Browse files

Files changed (1) hide show

app.py +70 -19

app.py CHANGED Viewed

@@ -253,11 +253,51 @@ model.eval()
 print(f"Model loaded successfully! Using device: {device}")
 # ==========================================
-# 4. Prediction Function
 # ==========================================
 def predict_captcha(image):
-    """Predict CAPTCHA text from image"""
     # Preprocess
     img_tensor = preprocess_image(image).to(device)
@@ -266,23 +306,29 @@ def predict_captcha(image):
     with torch.no_grad():
         log_probs = model(img_tensor)
-    # Greedy decoding
-    _, max_indices = torch.max(log_probs, dim=2)
-    max_indices = max_indices.squeeze(1).cpu().numpy()
-    # CTC collapse (remove blanks and repeated tokens)
-    collapsed = []
-    prev = None
-    for token in max_indices:
-        if token != 0 and token != prev:
-            collapsed.append(token)
-        prev = token
-    # Decode to text
-    prediction = ''.join([idx_to_char.get(t, '') for t in collapsed])
-    # Return with confidence info
-    return prediction
 # ==========================================
 # 5. Gradio Interface
@@ -291,12 +337,12 @@ def predict_captcha(image):
 demo = gr.Interface(
     fn=predict_captcha,
     inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
-    outputs=gr.Textbox(label="Predicted CAPTCHA Text", scale=2),
     title="CAPTCHA Recognition System",
     description="""
     **CS4243 Mini Project - CAPTCHA Recognition using CRNN + CTC Loss**
-    Upload a CAPTCHA image to see the model's prediction.
     **Model Architecture:**
     - ResNet-based CNN feature extraction (4 layers, 2 blocks each)
@@ -308,8 +354,13 @@ demo = gr.Interface(
     - Character Accuracy: 85.82%
     - Trained on 7,777 samples with heavy augmentation
     **Training Details:**
-    - 14 iterations of experimentation
     - Data augmentation: rotation, shear, black lines, noise
     - Regularization: dropout, weight decay, early stopping
     """,

 print(f"Model loaded successfully! Using device: {device}")
 # ==========================================
+# 4. Prediction Functions
 # ==========================================
+def ctc_decode_with_confidence(log_probs, idx_to_char):
+    """
+    Decode CTC output with confidence score
+    Args:
+        log_probs: Log probabilities from model (T, 1, C)
+        idx_to_char: Character mapping dictionary
+    Returns:
+        prediction: Decoded text string
+        confidence: Average probability score (0-1)
+    """
+    # Convert log probs to regular probabilities
+    probs = torch.exp(log_probs).squeeze(1)  # (T, C)
+    # Greedy decoding - get max probability and index at each timestep
+    max_probs, max_indices = torch.max(probs, dim=1)
+    max_probs = max_probs.cpu().numpy()
+    max_indices = max_indices.cpu().numpy()
+    # CTC collapse (remove blanks and repeated tokens)
+    collapsed_tokens = []
+    collapsed_probs = []
+    prev = None
+    for token, prob in zip(max_indices, max_probs):
+        if token != 0 and token != prev:  # Not blank and not repeat
+            collapsed_tokens.append(token)
+            collapsed_probs.append(prob)
+        prev = token
+    # Decode to text
+    prediction = ''.join([idx_to_char.get(t, '') for t in collapsed_tokens])
+    # Calculate average confidence
+    confidence = float(np.mean(collapsed_probs)) if collapsed_probs else 0.0
+    return prediction, confidence
 def predict_captcha(image):
+    """Predict CAPTCHA text from image with confidence score"""
     # Preprocess
     img_tensor = preprocess_image(image).to(device)
     with torch.no_grad():
         log_probs = model(img_tensor)
+    # Decode with confidence
+    prediction, confidence = ctc_decode_with_confidence(log_probs, idx_to_char)
+    # Format output with confidence indicator
+    confidence_pct = confidence * 100
+    if confidence < 0.6:
+        status = "⚠️ Low Confidence"
+        note = "Result may be uncertain due to visual ambiguity (e.g., 0/o, i/1/l confusion)"
+    elif confidence < 0.75:
+        status = "⚡ Medium Confidence"
+        note = "Result is reasonably reliable"
+    else:
+        status = "✓ High Confidence"
+        note = "Result is highly reliable"
+    # Return formatted string
+    output = f"Prediction: {prediction}\n\n"
+    output += f"{status}\n"
+    output += f"Confidence: {confidence_pct:.1f}%\n\n"
+    output += f"{note}"
+    return output
 # ==========================================
 # 5. Gradio Interface
 demo = gr.Interface(
     fn=predict_captcha,
     inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
+    outputs=gr.Textbox(label="Prediction Results", lines=6, scale=2),
     title="CAPTCHA Recognition System",
     description="""
     **CS4243 Mini Project - CAPTCHA Recognition using CRNN + CTC Loss**
+    Upload a CAPTCHA image to see the model's prediction with confidence score.
     **Model Architecture:**
     - ResNet-based CNN feature extraction (4 layers, 2 blocks each)
     - Character Accuracy: 85.82%
     - Trained on 7,777 samples with heavy augmentation
+    **Features:**
+    - **Confidence scoring**: Shows prediction reliability
+    - **Low confidence warnings**: Alerts when visual ambiguity exists (0/o, i/1/l confusion)
+    - **Real-time inference**: Results in <1 second
     **Training Details:**
+    - 14 iterations of systematic experimentation
     - Data augmentation: rotation, shear, black lines, noise
     - Regularization: dropout, weight decay, early stopping
     """,