File size: 1,607 Bytes
dfc2fe0
 
 
8507a36
 
 
065e49f
 
 
97c59d8
065e49f
 
dfc2fe0
 
065e49f
dfc2fe0
 
 
 
 
8507a36
 
 
 
 
 
 
 
 
97c59d8
8507a36
dfc2fe0
8507a36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import string
import torch

# ----------------------------
# Vocabulary
# ----------------------------
# app/utils.py
import string

DIGITS = string.digits
LOWER  = string.ascii_lowercase
UPPER  = string.ascii_uppercase
BLANK_CHAR = "-"

CHARS = DIGITS + LOWER + UPPER + BLANK_CHAR

char2idx = {c: i for i, c in enumerate(CHARS)}
idx2char = {i: c for c, i in char2idx.items()}


# ----------------------------
# CTC Beam Search Decoder
# ----------------------------
def ctc_beam_search(logits, beam_width=5):
    """
    logits: (B, T, C)
    """
    probs = logits.softmax(2)[0]  # (T, C)
    T, C = probs.shape

    beams = [("", 1.0)]

    for t in range(T):
        new_beams = {}

        for prefix, score in beams:
            for c in range(C):
                p = probs[t, c].item()
                if p < 1e-4:
                    continue

                char = idx2char[c]
                new_prefix = prefix if char == BLANK_CHAR else prefix + char

                new_beams[new_prefix] = max(
                    new_beams.get(new_prefix, 0.0),
                    score * p
                )

        beams = sorted(
            new_beams.items(),
            key=lambda x: x[1],
            reverse=True
        )[:beam_width]

    return beams


# ----------------------------
# Decode + Confidence
# ----------------------------
def decode_with_confidence(logits):
    beams = ctc_beam_search(logits, beam_width=5)

    best_text, best_score = beams[0]

    # normalize confidence (simple & stable)
    confidence = min(1.0, best_score * 10)

    return best_text, round(confidence, 3)