Karthikraj Sivakumar commited on
Commit
c9c30b5
·
1 Parent(s): 3072360

try showing multiple predictions

Browse files
Files changed (1) hide show
  1. app.py +97 -13
app.py CHANGED
@@ -296,8 +296,77 @@ def ctc_decode_with_confidence(log_probs, idx_to_char):
296
  return prediction, confidence
297
 
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  def predict_captcha(image):
300
- """Predict CAPTCHA text from image with confidence score"""
301
 
302
  # Preprocess
303
  img_tensor = preprocess_image(image).to(device)
@@ -306,27 +375,41 @@ def predict_captcha(image):
306
  with torch.no_grad():
307
  log_probs = model(img_tensor)
308
 
309
- # Decode with confidence
310
  prediction, confidence = ctc_decode_with_confidence(log_probs, idx_to_char)
311
-
312
- # Format output with confidence indicator
313
  confidence_pct = confidence * 100
314
 
 
 
 
 
315
  if confidence < 0.6:
316
  status = "⚠️ Low Confidence"
317
- note = "Result may be uncertain due to visual ambiguity (e.g., 0/o, i/1/l confusion)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  elif confidence < 0.75:
319
  status = "⚡ Medium Confidence"
320
  note = "Result is reasonably reliable"
 
 
321
  else:
322
  status = "✓ High Confidence"
323
  note = "Result is highly reliable"
324
-
325
- # Return formatted string
326
- output = f"Prediction: {prediction}\n\n"
327
- output += f"{status}\n"
328
- output += f"Confidence: {confidence_pct:.1f}%\n\n"
329
- output += f"{note}"
330
 
331
  return output
332
 
@@ -337,7 +420,7 @@ def predict_captcha(image):
337
  demo = gr.Interface(
338
  fn=predict_captcha,
339
  inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
340
- outputs=gr.Textbox(label="Prediction Results", lines=6, scale=2),
341
  title="CAPTCHA Recognition System",
342
  description="""
343
  **CS4243 Mini Project - CAPTCHA Recognition using CRNN + CTC Loss**
@@ -356,7 +439,8 @@ demo = gr.Interface(
356
 
357
  **Features:**
358
  - **Confidence scoring**: Shows prediction reliability
359
- - **Low confidence warnings**: Alerts when visual ambiguity exists (0/o, i/1/l confusion)
 
360
  - **Real-time inference**: Results in <1 second
361
 
362
  **Training Details:**
 
296
  return prediction, confidence
297
 
298
 
299
+ def ctc_decode_top_k(log_probs, idx_to_char, k=3):
300
+ """
301
+ Decode CTC output with top-k alternative predictions using beam search
302
+
303
+ Args:
304
+ log_probs: Log probabilities from model (T, 1, C)
305
+ idx_to_char: Character mapping dictionary
306
+ k: Number of top predictions to return
307
+
308
+ Returns:
309
+ List of (prediction, confidence) tuples sorted by confidence
310
+ """
311
+ probs = torch.exp(log_probs).squeeze(1).cpu() # (T, C)
312
+ T, C = probs.shape
313
+
314
+ # Simple beam search
315
+ beams = [{'text': '', 'prob': 1.0, 'last': None}]
316
+
317
+ for t in range(T):
318
+ new_beams = []
319
+
320
+ for beam in beams:
321
+ # Get top-k tokens at this timestep
322
+ topk_probs, topk_indices = torch.topk(probs[t], k=min(k*2, C))
323
+
324
+ for prob, idx in zip(topk_probs, topk_indices):
325
+ idx = idx.item()
326
+ prob = prob.item()
327
+
328
+ # CTC rules
329
+ if idx == 0: # Blank token
330
+ new_beams.append({
331
+ 'text': beam['text'],
332
+ 'prob': beam['prob'] * prob,
333
+ 'last': None
334
+ })
335
+ elif idx != beam['last']: # New character (not repeat)
336
+ char = idx_to_char.get(idx, '')
337
+ new_beams.append({
338
+ 'text': beam['text'] + char,
339
+ 'prob': beam['prob'] * prob,
340
+ 'last': idx
341
+ })
342
+ else: # Repeat - continue same character
343
+ new_beams.append({
344
+ 'text': beam['text'],
345
+ 'prob': beam['prob'] * prob,
346
+ 'last': beam['last']
347
+ })
348
+
349
+ # Keep top k beams
350
+ beams = sorted(new_beams, key=lambda x: x['prob'], reverse=True)[:k]
351
+
352
+ # Remove duplicates and return top k unique predictions
353
+ seen = set()
354
+ results = []
355
+ for beam in beams:
356
+ text = beam['text']
357
+ if text not in seen:
358
+ seen.add(text)
359
+ # Normalize probability by sequence length
360
+ confidence = beam['prob'] ** (1.0 / max(len(text), 1))
361
+ results.append((text, float(confidence)))
362
+ if len(results) >= k:
363
+ break
364
+
365
+ return results
366
+
367
+
368
  def predict_captcha(image):
369
+ """Predict CAPTCHA text from image with confidence score and alternatives"""
370
 
371
  # Preprocess
372
  img_tensor = preprocess_image(image).to(device)
 
375
  with torch.no_grad():
376
  log_probs = model(img_tensor)
377
 
378
+ # Get primary prediction with confidence
379
  prediction, confidence = ctc_decode_with_confidence(log_probs, idx_to_char)
 
 
380
  confidence_pct = confidence * 100
381
 
382
+ # Format output
383
+ output = f"**Primary Prediction:** {prediction}\n\n"
384
+
385
+ # Add status indicator
386
  if confidence < 0.6:
387
  status = "⚠️ Low Confidence"
388
+ note = "Visual ambiguity detected (e.g., 0/o, i/1/l confusion)"
389
+
390
+ # Get alternative predictions when confidence is low
391
+ top_predictions = ctc_decode_top_k(log_probs, idx_to_char, k=3)
392
+
393
+ output += f"{status} — {confidence_pct:.1f}%\n"
394
+ output += f"{note}\n\n"
395
+ output += "**Alternative Predictions:**\n"
396
+
397
+ for i, (text, conf) in enumerate(top_predictions, 1):
398
+ conf_pct = conf * 100
399
+ output += f"{i}. `{text}` — {conf_pct:.1f}%\n"
400
+
401
+ output += "\n💡 *Tip: Check which makes sense in context*"
402
+
403
  elif confidence < 0.75:
404
  status = "⚡ Medium Confidence"
405
  note = "Result is reasonably reliable"
406
+ output += f"{status} — {confidence_pct:.1f}%\n"
407
+ output += f"{note}"
408
  else:
409
  status = "✓ High Confidence"
410
  note = "Result is highly reliable"
411
+ output += f"{status} — {confidence_pct:.1f}%\n"
412
+ output += f"{note}"
 
 
 
 
413
 
414
  return output
415
 
 
420
  demo = gr.Interface(
421
  fn=predict_captcha,
422
  inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
423
+ outputs=gr.Textbox(label="Prediction Results", lines=10, scale=2),
424
  title="CAPTCHA Recognition System",
425
  description="""
426
  **CS4243 Mini Project - CAPTCHA Recognition using CRNN + CTC Loss**
 
439
 
440
  **Features:**
441
  - **Confidence scoring**: Shows prediction reliability
442
+ - **Multiple predictions**: Shows top 3 alternatives when confidence < 60%
443
+ - **Smart warnings**: Alerts when visual ambiguity exists (0/o, i/1/l confusion)
444
  - **Real-time inference**: Results in <1 second
445
 
446
  **Training Details:**