Spaces:

TechRaj
/

cs4243-miniproject-captcha-recognition

Sleeping

App Files Files Community

Karthikraj Sivakumar commited on Nov 10, 2025

Commit

c9c30b5

1 Parent(s): 3072360

try showing multiple predictions

Browse files

Files changed (1) hide show

app.py +97 -13

app.py CHANGED Viewed

@@ -296,8 +296,77 @@ def ctc_decode_with_confidence(log_probs, idx_to_char):
     return prediction, confidence
 def predict_captcha(image):
-    """Predict CAPTCHA text from image with confidence score"""
     # Preprocess
     img_tensor = preprocess_image(image).to(device)
@@ -306,27 +375,41 @@ def predict_captcha(image):
     with torch.no_grad():
         log_probs = model(img_tensor)
-    # Decode with confidence
     prediction, confidence = ctc_decode_with_confidence(log_probs, idx_to_char)
-    # Format output with confidence indicator
     confidence_pct = confidence * 100
     if confidence < 0.6:
         status = "⚠️ Low Confidence"
-        note = "Result may be uncertain due to visual ambiguity (e.g., 0/o, i/1/l confusion)"
     elif confidence < 0.75:
         status = "⚡ Medium Confidence"
         note = "Result is reasonably reliable"
     else:
         status = "✓ High Confidence"
         note = "Result is highly reliable"
-    # Return formatted string
-    output = f"Prediction: {prediction}\n\n"
-    output += f"{status}\n"
-    output += f"Confidence: {confidence_pct:.1f}%\n\n"
-    output += f"{note}"
     return output
@@ -337,7 +420,7 @@ def predict_captcha(image):
 demo = gr.Interface(
     fn=predict_captcha,
     inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
-    outputs=gr.Textbox(label="Prediction Results", lines=6, scale=2),
     title="CAPTCHA Recognition System",
     description="""
     **CS4243 Mini Project - CAPTCHA Recognition using CRNN + CTC Loss**
@@ -356,7 +439,8 @@ demo = gr.Interface(
     **Features:**
     - **Confidence scoring**: Shows prediction reliability
-    - **Low confidence warnings**: Alerts when visual ambiguity exists (0/o, i/1/l confusion)
     - **Real-time inference**: Results in <1 second
     **Training Details:**

     return prediction, confidence
+def ctc_decode_top_k(log_probs, idx_to_char, k=3):
+    """
+    Decode CTC output with top-k alternative predictions using beam search
+    Args:
+        log_probs: Log probabilities from model (T, 1, C)
+        idx_to_char: Character mapping dictionary
+        k: Number of top predictions to return
+    Returns:
+        List of (prediction, confidence) tuples sorted by confidence
+    """
+    probs = torch.exp(log_probs).squeeze(1).cpu()  # (T, C)
+    T, C = probs.shape
+    # Simple beam search
+    beams = [{'text': '', 'prob': 1.0, 'last': None}]
+    for t in range(T):
+        new_beams = []
+        for beam in beams:
+            # Get top-k tokens at this timestep
+            topk_probs, topk_indices = torch.topk(probs[t], k=min(k*2, C))
+            for prob, idx in zip(topk_probs, topk_indices):
+                idx = idx.item()
+                prob = prob.item()
+                # CTC rules
+                if idx == 0:  # Blank token
+                    new_beams.append({
+                        'text': beam['text'],
+                        'prob': beam['prob'] * prob,
+                        'last': None
+                    })
+                elif idx != beam['last']:  # New character (not repeat)
+                    char = idx_to_char.get(idx, '')
+                    new_beams.append({
+                        'text': beam['text'] + char,
+                        'prob': beam['prob'] * prob,
+                        'last': idx
+                    })
+                else:  # Repeat - continue same character
+                    new_beams.append({
+                        'text': beam['text'],
+                        'prob': beam['prob'] * prob,
+                        'last': beam['last']
+                    })
+        # Keep top k beams
+        beams = sorted(new_beams, key=lambda x: x['prob'], reverse=True)[:k]
+    # Remove duplicates and return top k unique predictions
+    seen = set()
+    results = []
+    for beam in beams:
+        text = beam['text']
+        if text not in seen:
+            seen.add(text)
+            # Normalize probability by sequence length
+            confidence = beam['prob'] ** (1.0 / max(len(text), 1))
+            results.append((text, float(confidence)))
+        if len(results) >= k:
+            break
+    return results
 def predict_captcha(image):
+    """Predict CAPTCHA text from image with confidence score and alternatives"""
     # Preprocess
     img_tensor = preprocess_image(image).to(device)
     with torch.no_grad():
         log_probs = model(img_tensor)
+    # Get primary prediction with confidence
     prediction, confidence = ctc_decode_with_confidence(log_probs, idx_to_char)
     confidence_pct = confidence * 100
+    # Format output
+    output = f"**Primary Prediction:** {prediction}\n\n"
+    # Add status indicator
     if confidence < 0.6:
         status = "⚠️ Low Confidence"
+        note = "Visual ambiguity detected (e.g., 0/o, i/1/l confusion)"
+        # Get alternative predictions when confidence is low
+        top_predictions = ctc_decode_top_k(log_probs, idx_to_char, k=3)
+        output += f"{status} — {confidence_pct:.1f}%\n"
+        output += f"{note}\n\n"
+        output += "**Alternative Predictions:**\n"
+        for i, (text, conf) in enumerate(top_predictions, 1):
+            conf_pct = conf * 100
+            output += f"{i}. `{text}` — {conf_pct:.1f}%\n"
+        output += "\n💡 *Tip: Check which makes sense in context*"
     elif confidence < 0.75:
         status = "⚡ Medium Confidence"
         note = "Result is reasonably reliable"
+        output += f"{status} — {confidence_pct:.1f}%\n"
+        output += f"{note}"
     else:
         status = "✓ High Confidence"
         note = "Result is highly reliable"
+        output += f"{status} — {confidence_pct:.1f}%\n"
+        output += f"{note}"
     return output
 demo = gr.Interface(
     fn=predict_captcha,
     inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
+    outputs=gr.Textbox(label="Prediction Results", lines=10, scale=2),
     title="CAPTCHA Recognition System",
     description="""
     **CS4243 Mini Project - CAPTCHA Recognition using CRNN + CTC Loss**
     **Features:**
     - **Confidence scoring**: Shows prediction reliability
+    - **Multiple predictions**: Shows top 3 alternatives when confidence < 60%
+    - **Smart warnings**: Alerts when visual ambiguity exists (0/o, i/1/l confusion)
     - **Real-time inference**: Results in <1 second
     **Training Details:**