Spaces:
Sleeping
Sleeping
File size: 1,607 Bytes
dfc2fe0 8507a36 065e49f 97c59d8 065e49f dfc2fe0 065e49f dfc2fe0 8507a36 97c59d8 8507a36 dfc2fe0 8507a36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import string
import torch
# ----------------------------
# Vocabulary
# ----------------------------
# app/utils.py
import string
DIGITS = string.digits
LOWER = string.ascii_lowercase
UPPER = string.ascii_uppercase
BLANK_CHAR = "-"
CHARS = DIGITS + LOWER + UPPER + BLANK_CHAR
char2idx = {c: i for i, c in enumerate(CHARS)}
idx2char = {i: c for c, i in char2idx.items()}
# ----------------------------
# CTC Beam Search Decoder
# ----------------------------
def ctc_beam_search(logits, beam_width=5):
"""
logits: (B, T, C)
"""
probs = logits.softmax(2)[0] # (T, C)
T, C = probs.shape
beams = [("", 1.0)]
for t in range(T):
new_beams = {}
for prefix, score in beams:
for c in range(C):
p = probs[t, c].item()
if p < 1e-4:
continue
char = idx2char[c]
new_prefix = prefix if char == BLANK_CHAR else prefix + char
new_beams[new_prefix] = max(
new_beams.get(new_prefix, 0.0),
score * p
)
beams = sorted(
new_beams.items(),
key=lambda x: x[1],
reverse=True
)[:beam_width]
return beams
# ----------------------------
# Decode + Confidence
# ----------------------------
def decode_with_confidence(logits):
beams = ctc_beam_search(logits, beam_width=5)
best_text, best_score = beams[0]
# normalize confidence (simple & stable)
confidence = min(1.0, best_score * 10)
return best_text, round(confidence, 3)
|