TTS-Demo / src /alignment /__init__.py
valtecAI-team's picture
Upload folder using huggingface_hub
f8b4a6c verified
"""
Monotonic alignment package
"""
import numba
from numpy import zeros, int32, float32
from torch import from_numpy
@numba.jit(
numba.void(
numba.int32[:, :, ::1],
numba.float32[:, :, ::1],
numba.int32[::1],
numba.int32[::1],
),
nopython=True,
nogil=True,
)
def maximum_path_jit(paths, values, t_ys, t_xs):
b = paths.shape[0]
max_neg_val = -1e9
for i in range(int(b)):
path = paths[i]
value = values[i]
t_y = t_ys[i]
t_x = t_xs[i]
v_prev = v_cur = 0.0
index = t_x - 1
for y in range(t_y):
for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
if x == y:
v_cur = max_neg_val
else:
v_cur = value[y - 1, x]
if x == 0:
if y == 0:
v_prev = 0.0
else:
v_prev = max_neg_val
else:
v_prev = value[y - 1, x - 1]
value[y, x] += max(v_prev, v_cur)
for y in range(t_y - 1, -1, -1):
path[y, index] = 1
if index != 0 and (
index == y or value[y - 1, index] < value[y - 1, index - 1]
):
index = index - 1
def maximum_path(neg_cent, mask):
device = neg_cent.device
dtype = neg_cent.dtype
neg_cent = neg_cent.data.cpu().numpy().astype(float32)
path = zeros(neg_cent.shape, dtype=int32)
t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(int32)
t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(int32)
maximum_path_jit(path, neg_cent, t_t_max, t_s_max)
return from_numpy(path).to(device=device, dtype=dtype)