hetchyy's picture
Upload folder using huggingface_hub
35fe812 verified
# cython: boundscheck=False, wraparound=False, cdivision=True
"""
Cython-accelerated word-boundary-constrained substring Levenshtein DP
with wraparound support for repetition detection.
When max_wraps=0, uses rolling rows (identical to the old standard DP).
When max_wraps>0, uses full 3D matrix with parent pointers for traceback.
"""
from libc.stdlib cimport malloc, free
from libc.math cimport INFINITY, fabs
# ---------------------------------------------------------------------------
# Phoneme → integer encoding (built lazily on first call)
# ---------------------------------------------------------------------------
cdef dict _phoneme_to_id = {}
cdef int _num_phonemes = 0
cdef double *_sub_matrix = NULL # flat _num_phonemes × _num_phonemes
cdef double _default_sub = 1.0
cdef int _encode_phoneme(str p):
"""Return integer id for *p*, assigning a new one if unseen."""
global _num_phonemes
cdef int pid
try:
pid = _phoneme_to_id[p]
except KeyError:
pid = _num_phonemes
_phoneme_to_id[p] = pid
_num_phonemes += 1
return pid
def init_substitution_matrix(dict sub_costs, double default_sub):
"""Build the dense substitution-cost matrix from the Python dict.
Must be called once before the first DP call (phoneme_matcher.py does
this at import time).
Parameters
----------
sub_costs : dict[(str, str), float]
Phoneme-pair substitution costs (both orderings already present).
default_sub : float
Cost used for pairs not in *sub_costs*.
"""
global _sub_matrix, _default_sub, _num_phonemes
_default_sub = default_sub
# First pass: make sure every phoneme in sub_costs has an id
for (a, b) in sub_costs:
_encode_phoneme(a)
_encode_phoneme(b)
# Allocate matrix (will be re-allocated if new phonemes appear later)
_rebuild_matrix(sub_costs)
cdef void _rebuild_matrix(dict sub_costs):
"""(Re)allocate and fill the dense cost matrix."""
global _sub_matrix, _num_phonemes, _default_sub
cdef int size = _num_phonemes
cdef int i, j
if _sub_matrix != NULL:
free(_sub_matrix)
_sub_matrix = <double *>malloc(size * size * sizeof(double))
if _sub_matrix == NULL:
raise MemoryError("Failed to allocate substitution matrix")
# Fill with default
for i in range(size * size):
_sub_matrix[i] = _default_sub
# Diagonal = 0 (match)
for i in range(size):
_sub_matrix[i * size + i] = 0.0
# Overrides from dict
cdef int aid, bid
cdef double cost
for (a, b), cost in sub_costs.items():
aid = _phoneme_to_id.get(a, -1)
bid = _phoneme_to_id.get(b, -1)
if aid >= 0 and bid >= 0:
_sub_matrix[aid * size + bid] = cost
cdef inline double _get_sub_cost(int pid, int rid, int size) nogil:
"""Look up substitution cost from the dense matrix."""
if pid == rid:
return 0.0
if pid < size and rid < size:
return _sub_matrix[pid * size + rid]
return _default_sub
# ---------------------------------------------------------------------------
# Helper: grow matrix when new phonemes are encountered at runtime
# ---------------------------------------------------------------------------
cdef void _grow_matrix():
"""Expand the substitution matrix to cover newly added phonemes.
New rows/columns are filled with the default substitution cost,
diagonal with 0.0. Existing entries are preserved.
"""
global _sub_matrix, _num_phonemes
cdef int new_size = _num_phonemes
cdef double *new_mat
cdef int i
if _sub_matrix == NULL:
_sub_matrix = <double *>malloc(new_size * new_size * sizeof(double))
if _sub_matrix == NULL:
return
for i in range(new_size * new_size):
_sub_matrix[i] = _default_sub
for i in range(new_size):
_sub_matrix[i * new_size + i] = 0.0
return
new_mat = <double *>malloc(new_size * new_size * sizeof(double))
if new_mat == NULL:
return
for i in range(new_size * new_size):
new_mat[i] = _default_sub
for i in range(new_size):
new_mat[i * new_size + i] = 0.0
free(_sub_matrix)
_sub_matrix = new_mat
# ---------------------------------------------------------------------------
# Shared: encode inputs and precompute boundaries
# ---------------------------------------------------------------------------
cdef struct EncodedInput:
int *P_ids
int *R_ids
int *R_w
char *start_bd
char *end_bd
int *ws_pos # word-start positions (sorted)
int *we_pos # word-end positions (sorted)
int n_ws # count of word-start positions
int n_we # count of word-end positions
int mat_size # phoneme matrix size at time of encoding
int m # len(P)
int n # len(R)
cdef EncodedInput _encode_inputs(list P_list, list R_list, list R_phone_to_word_list) except *:
"""Encode string lists to C arrays, precompute word boundaries."""
cdef EncodedInput enc
cdef int m = len(P_list)
cdef int n = len(R_list)
cdef int i, j
cdef bint need_rebuild = False
enc.m = m
enc.n = n
enc.P_ids = NULL
enc.R_ids = NULL
enc.R_w = NULL
enc.start_bd = NULL
enc.end_bd = NULL
enc.ws_pos = NULL
enc.we_pos = NULL
enc.P_ids = <int *>malloc(m * sizeof(int))
enc.R_ids = <int *>malloc(n * sizeof(int))
enc.R_w = <int *>malloc(n * sizeof(int))
if enc.P_ids == NULL or enc.R_ids == NULL or enc.R_w == NULL:
_free_encoded(&enc)
raise MemoryError()
for i in range(m):
p = P_list[i]
if p not in _phoneme_to_id:
_encode_phoneme(p)
need_rebuild = True
enc.P_ids[i] = _phoneme_to_id[p]
for j in range(n):
r = R_list[j]
if r not in _phoneme_to_id:
_encode_phoneme(r)
need_rebuild = True
enc.R_ids[j] = _phoneme_to_id[r]
enc.R_w[j] = <int>R_phone_to_word_list[j]
if need_rebuild and _sub_matrix != NULL:
_grow_matrix()
enc.mat_size = _num_phonemes
# Precompute boundary flags
enc.start_bd = <char *>malloc((n + 1) * sizeof(char))
enc.end_bd = <char *>malloc((n + 1) * sizeof(char))
if enc.start_bd == NULL or enc.end_bd == NULL:
_free_encoded(&enc)
raise MemoryError()
enc.start_bd[0] = 1
for j in range(1, n):
enc.start_bd[j] = 1 if enc.R_w[j] != enc.R_w[j - 1] else 0
enc.start_bd[n] = 0
enc.end_bd[0] = 0
for j in range(1, n):
enc.end_bd[j] = 1 if enc.R_w[j] != enc.R_w[j - 1] else 0
enc.end_bd[n] = 1
# Build sorted arrays of boundary positions
enc.n_ws = 0
enc.n_we = 0
for j in range(n + 1):
if enc.start_bd[j]: enc.n_ws += 1
if enc.end_bd[j]: enc.n_we += 1
enc.ws_pos = <int *>malloc(enc.n_ws * sizeof(int))
enc.we_pos = <int *>malloc(enc.n_we * sizeof(int))
if enc.ws_pos == NULL or enc.we_pos == NULL:
_free_encoded(&enc)
raise MemoryError()
cdef int ws_i = 0, we_i = 0
for j in range(n + 1):
if enc.start_bd[j]:
enc.ws_pos[ws_i] = j; ws_i += 1
if enc.end_bd[j]:
enc.we_pos[we_i] = j; we_i += 1
return enc
cdef void _free_encoded(EncodedInput *enc):
"""Free all arrays in an EncodedInput."""
if enc.P_ids != NULL: free(enc.P_ids)
if enc.R_ids != NULL: free(enc.R_ids)
if enc.R_w != NULL: free(enc.R_w)
if enc.start_bd != NULL: free(enc.start_bd)
if enc.end_bd != NULL: free(enc.end_bd)
if enc.ws_pos != NULL: free(enc.ws_pos)
if enc.we_pos != NULL: free(enc.we_pos)
enc.P_ids = NULL
enc.R_ids = NULL
enc.R_w = NULL
enc.start_bd = NULL
enc.end_bd = NULL
enc.ws_pos = NULL
enc.we_pos = NULL
# ---------------------------------------------------------------------------
# Rolling-row DP for max_wraps=0 (fast path, 89% of segments)
# ---------------------------------------------------------------------------
cdef tuple _align_rolling(
EncodedInput *enc,
int expected_word,
double prior_weight,
double cost_sub,
double cost_del,
double cost_ins,
):
"""Standard word-boundary DP using rolling rows. No wraparound."""
cdef int m = enc.m, n = enc.n
cdef int mat_size = enc.mat_size
cdef double INF_VAL = INFINITY
# Allocate rolling rows
cdef double *prev_cost = <double *>malloc((n + 1) * sizeof(double))
cdef double *curr_cost = <double *>malloc((n + 1) * sizeof(double))
cdef int *prev_start = <int *>malloc((n + 1) * sizeof(int))
cdef int *curr_start = <int *>malloc((n + 1) * sizeof(int))
if prev_cost == NULL or curr_cost == NULL or prev_start == NULL or curr_start == NULL:
if prev_cost != NULL: free(prev_cost)
if curr_cost != NULL: free(curr_cost)
if prev_start != NULL: free(prev_start)
if curr_start != NULL: free(curr_start)
raise MemoryError()
cdef int i, j
cdef double del_option, ins_option, sub_option, sc
cdef double *tmp_d
cdef int *tmp_i
cdef bint col0_start = enc.start_bd[0]
# Initialize row 0
for j in range(n + 1):
if enc.start_bd[j]:
prev_cost[j] = 0.0
prev_start[j] = j
else:
prev_cost[j] = INF_VAL
prev_start[j] = -1
# Core DP loop
for i in range(1, m + 1):
if col0_start:
curr_cost[0] = i * cost_del
curr_start[0] = 0
else:
curr_cost[0] = INF_VAL
curr_start[0] = -1
for j in range(1, n + 1):
del_option = prev_cost[j] + cost_del
ins_option = curr_cost[j - 1] + cost_ins
sc = _get_sub_cost(enc.P_ids[i - 1], enc.R_ids[j - 1], mat_size)
sub_option = prev_cost[j - 1] + sc
if sub_option <= del_option and sub_option <= ins_option:
curr_cost[j] = sub_option
curr_start[j] = prev_start[j - 1]
elif del_option <= ins_option:
curr_cost[j] = del_option
curr_start[j] = prev_start[j]
else:
curr_cost[j] = ins_option
curr_start[j] = curr_start[j - 1]
tmp_d = prev_cost; prev_cost = curr_cost; curr_cost = tmp_d
tmp_i = prev_start; prev_start = curr_start; curr_start = tmp_i
# Best-match selection
cdef double best_score = INF_VAL
cdef int best_j = -1, best_j_start = -1
cdef double best_cost_val = INF_VAL, best_norm = INF_VAL
cdef double dist, norm_dist, prior, score
cdef int j_start_val, ref_len, denom, start_word
for j in range(1, n + 1):
if not enc.end_bd[j]:
continue
if prev_cost[j] >= INF_VAL:
continue
dist = prev_cost[j]
j_start_val = prev_start[j]
ref_len = j - j_start_val
denom = m if m > ref_len else ref_len
if denom < 1:
denom = 1
norm_dist = dist / denom
if j_start_val < n:
start_word = enc.R_w[j_start_val]
else:
start_word = enc.R_w[j - 1]
prior = prior_weight * fabs(<double>(start_word - expected_word))
score = norm_dist + prior
if score < best_score:
best_score = score
best_j = j
best_j_start = j_start_val
best_cost_val = dist
best_norm = norm_dist
free(prev_cost); free(curr_cost)
free(prev_start); free(curr_start)
if best_j < 0:
return (None, None, float('inf'), float('inf'), 0, 0, [])
return (best_j, best_j_start, best_cost_val, best_norm, 0, best_j, [])
# ---------------------------------------------------------------------------
# Full 3D DP for max_wraps>0 (with traceback)
# ---------------------------------------------------------------------------
cdef tuple _align_full_3d(
EncodedInput *enc,
int expected_word,
double prior_weight,
double cost_sub,
double cost_del,
double cost_ins,
double wrap_penalty,
int max_wraps,
int sc_mode, # 0=subtract, 1=no_subtract, 2=additive
double wrap_score_cost,
double wrap_span_weight,
):
"""Wraparound DP with full 3D matrix and parent pointers for traceback."""
cdef int m = enc.m, n = enc.n
cdef int K = max_wraps
cdef int mat_size = enc.mat_size
cdef double INF_VAL = INFINITY
# 3D indexing: [i * layer_stride + k * col_stride + j]
cdef int col_stride = n + 1
cdef int layer_stride = (K + 1) * col_stride
cdef long total_3d = <long>(m + 1) * layer_stride
# Allocate 3D arrays
cdef double *cost_3d = NULL
cdef int *start_3d = NULL
cdef int *max_j_3d = NULL
cdef int *min_w_3d = NULL # minimum word index reached along path
cdef int *par_i = NULL
cdef int *par_k = NULL
cdef int *par_j = NULL
cdef char *par_t = NULL # 0=sub, 1=del, 2=ins, 3=wrap
cdef int BIG_W = 999999
cost_3d = <double *>malloc(total_3d * sizeof(double))
start_3d = <int *>malloc(total_3d * sizeof(int))
max_j_3d = <int *>malloc(total_3d * sizeof(int))
min_w_3d = <int *>malloc(total_3d * sizeof(int))
par_i = <int *>malloc(total_3d * sizeof(int))
par_k = <int *>malloc(total_3d * sizeof(int))
par_j = <int *>malloc(total_3d * sizeof(int))
par_t = <char *>malloc(total_3d * sizeof(char))
if (cost_3d == NULL or start_3d == NULL or max_j_3d == NULL or min_w_3d == NULL or
par_i == NULL or par_k == NULL or par_j == NULL or par_t == NULL):
if cost_3d != NULL: free(cost_3d)
if start_3d != NULL: free(start_3d)
if max_j_3d != NULL: free(max_j_3d)
if min_w_3d != NULL: free(min_w_3d)
if par_i != NULL: free(par_i)
if par_k != NULL: free(par_k)
if par_j != NULL: free(par_j)
if par_t != NULL: free(par_t)
raise MemoryError()
cdef long idx
cdef int i, j, k
cdef int koff, koff_src, koff_dst
cdef long base_i, base_prev
cdef int w_j, mw_val
# Initialize all to INF / -1
for idx in range(total_3d):
cost_3d[idx] = INF_VAL
start_3d[idx] = -1
max_j_3d[idx] = -1
min_w_3d[idx] = BIG_W
par_i[idx] = -1
par_k[idx] = -1
par_j[idx] = -1
par_t[idx] = -1
# Row 0, k=0: free starts at word boundaries
for j in range(n + 1):
if enc.start_bd[j]:
cost_3d[j] = 0.0 # i=0, k=0, j
start_3d[j] = j
max_j_3d[j] = j
min_w_3d[j] = enc.R_w[j] if j < n else BIG_W
# Variables for DP transitions
cdef double del_opt, ins_opt, sub_opt, sc, new_cost, cost_at_end, best_val
cdef int j_end, j_sw, mj_val, word_span
cdef int we_i, ws_i
# Fill DP
for i in range(1, m + 1):
base_i = <long>i * layer_stride
base_prev = <long>(i - 1) * layer_stride
# Standard transitions for each k
for k in range(K + 1):
koff = k * col_stride
# Column 0: deletion base case, k=0 only
if k == 0 and enc.start_bd[0]:
idx = base_i + koff
cost_3d[idx] = i * cost_del
start_3d[idx] = 0
max_j_3d[idx] = 0
min_w_3d[idx] = min_w_3d[base_prev + koff]
par_i[idx] = i - 1
par_k[idx] = 0
par_j[idx] = 0
par_t[idx] = 1 # del
for j in range(1, n + 1):
idx = base_i + koff + j
# Deletion: prev row, same j, same k
del_opt = INF_VAL
if cost_3d[base_prev + koff + j] < INF_VAL:
del_opt = cost_3d[base_prev + koff + j] + cost_del
# Insertion: same row, j-1, same k
ins_opt = INF_VAL
if cost_3d[base_i + koff + j - 1] < INF_VAL:
ins_opt = cost_3d[base_i + koff + j - 1] + cost_ins
# Substitution: prev row, j-1, same k
sub_opt = INF_VAL
if cost_3d[base_prev + koff + j - 1] < INF_VAL:
sc = _get_sub_cost(enc.P_ids[i - 1], enc.R_ids[j - 1], mat_size)
sub_opt = cost_3d[base_prev + koff + j - 1] + sc
if sub_opt <= del_opt and sub_opt <= ins_opt:
cost_3d[idx] = sub_opt
start_3d[idx] = start_3d[base_prev + koff + j - 1]
mj_val = max_j_3d[base_prev + koff + j - 1]
max_j_3d[idx] = j if j > mj_val else mj_val
w_j = enc.R_w[j - 1]
mw_val = min_w_3d[base_prev + koff + j - 1]
min_w_3d[idx] = w_j if w_j < mw_val else mw_val
par_i[idx] = i - 1; par_k[idx] = k; par_j[idx] = j - 1; par_t[idx] = 0
elif del_opt <= ins_opt:
cost_3d[idx] = del_opt
start_3d[idx] = start_3d[base_prev + koff + j]
max_j_3d[idx] = max_j_3d[base_prev + koff + j]
min_w_3d[idx] = min_w_3d[base_prev + koff + j]
par_i[idx] = i - 1; par_k[idx] = k; par_j[idx] = j; par_t[idx] = 1
elif ins_opt < INF_VAL:
cost_3d[idx] = ins_opt
start_3d[idx] = start_3d[base_i + koff + j - 1]
mj_val = max_j_3d[base_i + koff + j - 1]
max_j_3d[idx] = j if j > mj_val else mj_val
w_j = enc.R_w[j - 1]
mw_val = min_w_3d[base_i + koff + j - 1]
min_w_3d[idx] = w_j if w_j < mw_val else mw_val
par_i[idx] = i; par_k[idx] = k; par_j[idx] = j - 1; par_t[idx] = 2
# Wrap transitions (within same row i)
for k in range(K):
koff_src = k * col_stride
koff_dst = (k + 1) * col_stride
for we_i in range(enc.n_we):
j_end = enc.we_pos[we_i]
if cost_3d[base_i + koff_src + j_end] >= INF_VAL:
continue
cost_at_end = cost_3d[base_i + koff_src + j_end]
for ws_i in range(enc.n_ws):
j_sw = enc.ws_pos[ws_i]
if j_sw >= j_end:
continue
word_span = enc.R_w[j_end - 1] - enc.R_w[j_sw]
if word_span < 0:
word_span = -word_span
new_cost = cost_at_end + wrap_penalty + wrap_span_weight * word_span
idx = base_i + koff_dst + j_sw
if new_cost < cost_3d[idx]:
cost_3d[idx] = new_cost
start_3d[idx] = start_3d[base_i + koff_src + j_end]
mj_val = max_j_3d[base_i + koff_src + j_end]
max_j_3d[idx] = j_end if j_end > mj_val else mj_val
mw_val = min_w_3d[base_i + koff_src + j_end]
w_j = enc.R_w[j_sw]
min_w_3d[idx] = w_j if w_j < mw_val else mw_val
par_i[idx] = i; par_k[idx] = k; par_j[idx] = j_end; par_t[idx] = 3
# Insertion re-sweep from wrap positions
for j in range(1, n + 1):
idx = base_i + koff_dst + j
ins_opt = cost_3d[base_i + koff_dst + j - 1] + cost_ins \
if cost_3d[base_i + koff_dst + j - 1] < INF_VAL else INF_VAL
if ins_opt < cost_3d[idx]:
cost_3d[idx] = ins_opt
start_3d[idx] = start_3d[base_i + koff_dst + j - 1]
mj_val = max_j_3d[base_i + koff_dst + j - 1]
max_j_3d[idx] = j if j > mj_val else mj_val
w_j = enc.R_w[j - 1]
mw_val = min_w_3d[base_i + koff_dst + j - 1]
min_w_3d[idx] = w_j if w_j < mw_val else mw_val
par_i[idx] = i; par_k[idx] = k + 1; par_j[idx] = j - 1; par_t[idx] = 2
# ------------------------------------------------------------------
# Best-match selection (end boundaries only, across all k)
# ------------------------------------------------------------------
cdef double best_score = INF_VAL
cdef int best_j = -1, best_j_start = -1
cdef double best_cost_val = INF_VAL, best_norm = INF_VAL
cdef int best_k_val = 0, best_max_j = -1
cdef double dist, norm_dist, prior, score, phoneme_cost
cdef int j_start_val, ref_len, denom, start_word, max_j_val, min_word_val, eff_start
base_i = <long>m * layer_stride
for k in range(K + 1):
koff = k * col_stride
for j in range(1, n + 1):
if not enc.end_bd[j]:
continue
idx = base_i + koff + j
if cost_3d[idx] >= INF_VAL:
continue
dist = cost_3d[idx]
j_start_val = start_3d[idx]
if j_start_val < 0:
continue
max_j_val = max_j_3d[idx]
ref_len = (max_j_val if max_j_val > j else j) - j_start_val
if ref_len <= 0:
continue
denom = m if m > ref_len else ref_len
if denom < 1:
denom = 1
if sc_mode == 1: # no_subtract
phoneme_cost = dist
else: # subtract or additive
phoneme_cost = dist - k * wrap_penalty
norm_dist = phoneme_cost / denom
if j_start_val < n:
start_word = enc.R_w[j_start_val]
else:
start_word = enc.R_w[j - 1]
# Use earliest word the path actually touches for fair prior
min_word_val = min_w_3d[idx]
eff_start = min_word_val if min_word_val < start_word and min_word_val < BIG_W else start_word
prior = prior_weight * fabs(<double>(eff_start - expected_word))
score = norm_dist + prior
if sc_mode == 2: # additive
score = score + k * wrap_score_cost
if score < best_score:
best_score = score
best_j = j
best_j_start = j_start_val
best_cost_val = dist
best_norm = norm_dist
best_k_val = k
best_max_j = max_j_val
if best_j < 0:
free(cost_3d); free(start_3d); free(max_j_3d); free(min_w_3d)
free(par_i); free(par_k); free(par_j); free(par_t)
return (None, None, float('inf'), float('inf'), 0, 0, [])
# ------------------------------------------------------------------
# Traceback: walk parent pointers, collect wrap points
# ------------------------------------------------------------------
wrap_points = []
cdef int ci = m, ck = best_k_val, cj = best_j
cdef int pi, pk, pj
cdef char pt
while True:
idx = <long>ci * layer_stride + ck * col_stride + cj
if par_i[idx] < 0:
break
pi = par_i[idx]
pk = par_k[idx]
pj = par_j[idx]
pt = par_t[idx]
if pt == 3: # wrap
# Wrap: at P position ci, R jumped from pj (j_end) back to cj (j_start)
wrap_points.append((ci, pj, cj))
ci = pi; ck = pk; cj = pj
wrap_points.reverse() # chronological order
free(cost_3d); free(start_3d); free(max_j_3d)
free(par_i); free(par_k); free(par_j); free(par_t)
return (best_j, best_j_start, best_cost_val, best_norm, best_k_val, best_max_j, wrap_points)
# ---------------------------------------------------------------------------
# Public API: unified wraparound DP
# ---------------------------------------------------------------------------
def cy_align_wraparound(
list P_list,
list R_list,
list R_phone_to_word_list,
int expected_word,
double prior_weight,
double cost_sub,
double cost_del,
double cost_ins,
double wrap_penalty = 2.0,
int max_wraps = 0,
str scoring_mode = "subtract",
double wrap_score_cost = 0.01,
double wrap_span_weight = 0.1,
):
"""Wraparound word-boundary-constrained substring alignment (Cython).
When max_wraps=0, uses rolling rows (fast path, no traceback needed).
When max_wraps>0, uses full 3D matrix with parent pointers for traceback.
Returns (best_j, best_j_start, best_cost, best_norm_dist, best_k, best_max_j, wrap_points).
wrap_points: list of (i, j_end, j_start) — P position and R positions of each wrap.
Empty list when no wraps detected.
scoring_mode:
"subtract" — phoneme_cost = dist - k*wrap_penalty (wrap is free in score)
"no_subtract" — phoneme_cost = dist (wrap penalty stays in score)
"additive" — phoneme_cost = dist - k*wrap_penalty, score += k*wrap_score_cost
"""
cdef int m = len(P_list)
cdef int n = len(R_list)
if m == 0 or n == 0:
return (None, None, float('inf'), float('inf'), 0, 0, [])
# Encode inputs
cdef EncodedInput enc = _encode_inputs(P_list, R_list, R_phone_to_word_list)
# Encode scoring mode outside of branches (cdef not allowed inside if blocks)
cdef int sc_mode
if scoring_mode == "no_subtract":
sc_mode = 1
elif scoring_mode == "additive":
sc_mode = 2
else:
sc_mode = 0
cdef tuple result
try:
if max_wraps == 0:
# Fast path: rolling rows, no wraparound
result = _align_rolling(
&enc, expected_word, prior_weight,
cost_sub, cost_del, cost_ins,
)
else:
result = _align_full_3d(
&enc, expected_word, prior_weight,
cost_sub, cost_del, cost_ins,
wrap_penalty, max_wraps,
sc_mode, wrap_score_cost,
wrap_span_weight,
)
finally:
_free_encoded(&enc)
return result