Plana-Archive commited on
Commit
43f623a
·
verified ·
1 Parent(s): 8fc0fba

Direct migration: commons.py

Browse files
Files changed (1) hide show
  1. commons.py +172 -0
commons.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+ from torch.nn import functional as F
4
+ import torch.jit
5
+
6
+
7
+ def script_method(fn, _rcb=None):
8
+ return fn
9
+
10
+
11
+ def script(obj, optimize=True, _frames_up=0, _rcb=None):
12
+ return obj
13
+
14
+
15
+ torch.jit.script_method = script_method
16
+ torch.jit.script = script
17
+
18
+
19
+ def init_weights(m, mean=0.0, std=0.01):
20
+ classname = m.__class__.__name__
21
+ if classname.find("Conv") != -1:
22
+ m.weight.data.normal_(mean, std)
23
+
24
+
25
+ def get_padding(kernel_size, dilation=1):
26
+ return int((kernel_size*dilation - dilation)/2)
27
+
28
+
29
+ def convert_pad_shape(pad_shape):
30
+ l = pad_shape[::-1]
31
+ pad_shape = [item for sublist in l for item in sublist]
32
+ return pad_shape
33
+
34
+
35
+ def intersperse(lst, item):
36
+ result = [item] * (len(lst) * 2 + 1)
37
+ result[1::2] = lst
38
+ return result
39
+
40
+
41
+ def kl_divergence(m_p, logs_p, m_q, logs_q):
42
+ """KL(P||Q)"""
43
+ kl = (logs_q - logs_p) - 0.5
44
+ kl += 0.5 * (torch.exp(2. * logs_p) + ((m_p - m_q)**2)) * torch.exp(-2. * logs_q)
45
+ return kl
46
+
47
+
48
+ def rand_gumbel(shape):
49
+ """Sample from the Gumbel distribution, protect from overflows."""
50
+ uniform_samples = torch.rand(shape) * 0.99998 + 0.00001
51
+ return -torch.log(-torch.log(uniform_samples))
52
+
53
+
54
+ def rand_gumbel_like(x):
55
+ g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device)
56
+ return g
57
+
58
+
59
+ def slice_segments(x, ids_str, segment_size=4):
60
+ ret = torch.zeros_like(x[:, :, :segment_size])
61
+ for i in range(x.size(0)):
62
+ idx_str = ids_str[i]
63
+ idx_end = idx_str + segment_size
64
+ ret[i] = x[i, :, idx_str:idx_end]
65
+ return ret
66
+
67
+
68
+ def rand_slice_segments(x, x_lengths=None, segment_size=4):
69
+ b, d, t = x.size()
70
+ if x_lengths is None:
71
+ x_lengths = t
72
+ ids_str_max = x_lengths - segment_size + 1
73
+ ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
74
+ ret = slice_segments(x, ids_str, segment_size)
75
+ return ret, ids_str
76
+
77
+
78
+ def get_timing_signal_1d(
79
+ length, channels, min_timescale=1.0, max_timescale=1.0e4):
80
+ position = torch.arange(length, dtype=torch.float)
81
+ num_timescales = channels // 2
82
+ log_timescale_increment = (
83
+ math.log(float(max_timescale) / float(min_timescale)) /
84
+ (num_timescales - 1))
85
+ inv_timescales = min_timescale * torch.exp(
86
+ torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment)
87
+ scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1)
88
+ signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0)
89
+ signal = F.pad(signal, [0, 0, 0, channels % 2])
90
+ signal = signal.view(1, channels, length)
91
+ return signal
92
+
93
+
94
+ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4):
95
+ b, channels, length = x.size()
96
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
97
+ return x + signal.to(dtype=x.dtype, device=x.device)
98
+
99
+
100
+ def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1):
101
+ b, channels, length = x.size()
102
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
103
+ return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis)
104
+
105
+
106
+ def subsequent_mask(length):
107
+ mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
108
+ return mask
109
+
110
+
111
+ @torch.jit.script
112
+ def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
113
+ n_channels_int = n_channels[0]
114
+ in_act = input_a + input_b
115
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
116
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
117
+ acts = t_act * s_act
118
+ return acts
119
+
120
+
121
+ def convert_pad_shape(pad_shape):
122
+ l = pad_shape[::-1]
123
+ pad_shape = [item for sublist in l for item in sublist]
124
+ return pad_shape
125
+
126
+
127
+ def shift_1d(x):
128
+ x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1]
129
+ return x
130
+
131
+
132
+ def sequence_mask(length, max_length=None):
133
+ if max_length is None:
134
+ max_length = length.max()
135
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
136
+ return x.unsqueeze(0) < length.unsqueeze(1)
137
+
138
+
139
+ def generate_path(duration, mask):
140
+ """
141
+ duration: [b, 1, t_x]
142
+ mask: [b, 1, t_y, t_x]
143
+ """
144
+ device = duration.device
145
+
146
+ b, _, t_y, t_x = mask.shape
147
+ cum_duration = torch.cumsum(duration, -1)
148
+
149
+ cum_duration_flat = cum_duration.view(b * t_x)
150
+ path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
151
+ path = path.view(b, t_x, t_y)
152
+ path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
153
+ path = path.unsqueeze(1).transpose(2,3) * mask
154
+ return path
155
+
156
+
157
+ def clip_grad_value_(parameters, clip_value, norm_type=2):
158
+ if isinstance(parameters, torch.Tensor):
159
+ parameters = [parameters]
160
+ parameters = list(filter(lambda p: p.grad is not None, parameters))
161
+ norm_type = float(norm_type)
162
+ if clip_value is not None:
163
+ clip_value = float(clip_value)
164
+
165
+ total_norm = 0
166
+ for p in parameters:
167
+ param_norm = p.grad.data.norm(norm_type)
168
+ total_norm += param_norm.item() ** norm_type
169
+ if clip_value is not None:
170
+ p.grad.data.clamp_(min=-clip_value, max=clip_value)
171
+ total_norm = total_norm ** (1. / norm_type)
172
+ return total_norm