ak36 commited on
Commit
9c5b5e6
·
1 Parent(s): bf65828
Files changed (40) hide show
  1. .ipynb_checkpoints/models-checkpoint.py +2 -1
  2. .ipynb_checkpoints/optimizers-checkpoint.py +73 -0
  3. Configs/.ipynb_checkpoints/config_ft_single-checkpoint.yml +5 -5
  4. Configs/config_ft_single.yml +4 -4
  5. Demo/.ipynb_checkpoints/Inference_pod_90h_30k-checkpoint.ipynb +423 -31
  6. __pycache__/models.cpython-310.pyc +0 -0
  7. logs/pod_90h_30k_second_v2/.ipynb_checkpoints/train-checkpoint.log +0 -0
  8. logs/pod_90h_30k_second_v2/config_ft_single.yml +22 -0
  9. logs/pod_90h_30k_second_v2/epoch_2nd_00000.pth +3 -0
  10. logs/pod_90h_30k_second_v2/epoch_2nd_00001.pth +3 -0
  11. logs/pod_90h_30k_second_v2/epoch_2nd_00002.pth +3 -0
  12. logs/pod_90h_30k_second_v2/epoch_2nd_00003.pth +3 -0
  13. logs/pod_90h_30k_second_v2/epoch_2nd_00004.pth +3 -0
  14. logs/pod_90h_30k_second_v2/epoch_2nd_00005.pth +3 -0
  15. logs/pod_90h_30k_second_v2/epoch_2nd_00006.pth +3 -0
  16. logs/pod_90h_30k_second_v2/epoch_2nd_00007.pth +3 -0
  17. logs/pod_90h_30k_second_v2/epoch_2nd_00008.pth +3 -0
  18. logs/pod_90h_30k_second_v2/epoch_2nd_00009.pth +3 -0
  19. logs/pod_90h_30k_second_v2/epoch_2nd_00010.pth +3 -0
  20. logs/pod_90h_30k_second_v2/epoch_2nd_00011.pth +3 -0
  21. logs/pod_90h_30k_second_v2/epoch_2nd_00012.pth +3 -0
  22. logs/pod_90h_30k_second_v2/epoch_2nd_00013.pth +3 -0
  23. logs/pod_90h_30k_second_v2/epoch_2nd_00014.pth +3 -0
  24. logs/pod_90h_30k_second_v2/epoch_2nd_00015.pth +3 -0
  25. logs/pod_90h_30k_second_v2/epoch_2nd_00016.pth +3 -0
  26. logs/pod_90h_30k_second_v2/epoch_2nd_00017.pth +3 -0
  27. logs/pod_90h_30k_second_v2/epoch_2nd_00018.pth +3 -0
  28. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758267.7f09b0e2c0b0.17026.0 +3 -0
  29. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758489.7f09b0e2c0b0.18353.0 +3 -0
  30. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758524.7f09b0e2c0b0.18773.0 +3 -0
  31. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758552.7f09b0e2c0b0.19160.0 +3 -0
  32. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758602.7f09b0e2c0b0.19654.0 +3 -0
  33. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749763142.7f09b0e2c0b0.41611.0 +3 -0
  34. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749763329.7f09b0e2c0b0.42740.0 +3 -0
  35. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749763548.7f09b0e2c0b0.44123.0 +3 -0
  36. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749789808.7f09b0e2c0b0.1500.0 +3 -0
  37. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749790964.7f09b0e2c0b0.2345.0 +3 -0
  38. logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749791414.7f09b0e2c0b0.1465.0 +3 -0
  39. logs/pod_90h_30k_second_v2/train.log +0 -0
  40. models.py +2 -1
.ipynb_checkpoints/models-checkpoint.py CHANGED
@@ -703,8 +703,9 @@ def load_checkpoint(model, optimizer, path, load_only_params=True, ignore_module
703
  _ = [model[key].eval() for key in model]
704
 
705
  if not load_only_params:
706
- epoch = state["epoch"]
707
  iters = state["iters"]
 
708
  optimizer.load_state_dict(state["optimizer"])
709
  else:
710
  epoch = 0
 
703
  _ = [model[key].eval() for key in model]
704
 
705
  if not load_only_params:
706
+ epoch = state["epoch"] + 1
707
  iters = state["iters"]
708
+ print('Load checkpoint from %s, epoch %d, iters %d' % (path, epoch, iters))
709
  optimizer.load_state_dict(state["optimizer"])
710
  else:
711
  epoch = 0
.ipynb_checkpoints/optimizers-checkpoint.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #coding:utf-8
2
+ import os, sys
3
+ import os.path as osp
4
+ import numpy as np
5
+ import torch
6
+ from torch import nn
7
+ from torch.optim import Optimizer
8
+ from functools import reduce
9
+ from torch.optim import AdamW
10
+
11
+ class MultiOptimizer:
12
+ def __init__(self, optimizers={}, schedulers={}):
13
+ self.optimizers = optimizers
14
+ self.schedulers = schedulers
15
+ self.keys = list(optimizers.keys())
16
+ self.param_groups = reduce(lambda x,y: x+y, [v.param_groups for v in self.optimizers.values()])
17
+
18
+ def state_dict(self):
19
+ state_dicts = [(key, self.optimizers[key].state_dict())\
20
+ for key in self.keys]
21
+ return state_dicts
22
+
23
+ def load_state_dict(self, state_dict):
24
+ for key, val in state_dict:
25
+ try:
26
+ self.optimizers[key].load_state_dict(val)
27
+ except:
28
+ print("Unloaded %s" % key)
29
+
30
+ def step(self, key=None, scaler=None):
31
+ keys = [key] if key is not None else self.keys
32
+ _ = [self._step(key, scaler) for key in keys]
33
+
34
+ def _step(self, key, scaler=None):
35
+ if scaler is not None:
36
+ scaler.step(self.optimizers[key])
37
+ scaler.update()
38
+ else:
39
+ self.optimizers[key].step()
40
+
41
+ def zero_grad(self, key=None):
42
+ if key is not None:
43
+ self.optimizers[key].zero_grad()
44
+ else:
45
+ _ = [self.optimizers[key].zero_grad() for key in self.keys]
46
+
47
+ def scheduler(self, *args, key=None):
48
+ if key is not None:
49
+ self.schedulers[key].step(*args)
50
+ else:
51
+ _ = [self.schedulers[key].step(*args) for key in self.keys]
52
+
53
+ def define_scheduler(optimizer, params):
54
+ scheduler = torch.optim.lr_scheduler.OneCycleLR(
55
+ optimizer,
56
+ max_lr=params.get('max_lr', 2e-4),
57
+ epochs=params.get('epochs', 200),
58
+ steps_per_epoch=params.get('steps_per_epoch', 1000),
59
+ pct_start=params.get('pct_start', 0.0),
60
+ div_factor=1,
61
+ final_div_factor=1)
62
+
63
+ return scheduler
64
+
65
+ def build_optimizer(parameters_dict, scheduler_params_dict, lr):
66
+ optim = dict([(key, AdamW(params, lr=lr, weight_decay=1e-4, betas=(0.0, 0.99), eps=1e-9))
67
+ for key, params in parameters_dict.items()])
68
+
69
+ schedulers = dict([(key, define_scheduler(opt, scheduler_params_dict[key])) \
70
+ for key, opt in optim.items()])
71
+
72
+ multi_optim = MultiOptimizer(optim, schedulers)
73
+ return multi_optim
Configs/.ipynb_checkpoints/config_ft_single-checkpoint.yml CHANGED
@@ -1,5 +1,5 @@
1
  # ─── GLOBAL ──────────────────────────────────────────────────────────
2
- log_dir: logs/pod_90h_30k_second_lr1
3
  device: "cuda"
4
 
5
  batch_size: 12 # 40 GB A100, fp16
@@ -11,7 +11,7 @@ save_freq: 1
11
  log_interval: 50
12
 
13
  # leave blank on first run
14
- pretrained_model: "" #"/workspace/styletts2/logs/pod_90h_30k/epoch_2nd_00003.pth"
15
  second_stage_load_pretrained: true
16
  load_only_params: false
17
 
@@ -48,14 +48,14 @@ loss_params:
48
  lambda_sty: 1. # style reconstruction loss (2nd stage)
49
  lambda_diff: 1. # score matching loss (2nd stage)
50
 
51
- diff_epoch: 0 # style diffusion starting epoch (2nd stage)
52
- joint_epoch: 0 # joint training starting epoch (2nd stage)
53
 
54
  # ─── OPTIMISER ──────────────────────────────────────────────────────
55
  optimizer_params:
56
  lr: 0.0001
57
  bert_lr: 0.00001
58
- ft_lr: 0.0001
59
  grad_accum_steps: 2
60
 
61
  # ─── MODEL (core network & sub-modules) ─────────────────────────────
 
1
  # ─── GLOBAL ──────────────────────────────────────────────────────────
2
+ log_dir: logs/pod_90h_30k_second_v2
3
  device: "cuda"
4
 
5
  batch_size: 12 # 40 GB A100, fp16
 
11
  log_interval: 50
12
 
13
  # leave blank on first run
14
+ pretrained_model: "/workspace/styletts2/logs/pod_90h_30k_second_v2/epoch_2nd_00005.pth"
15
  second_stage_load_pretrained: true
16
  load_only_params: false
17
 
 
48
  lambda_sty: 1. # style reconstruction loss (2nd stage)
49
  lambda_diff: 1. # score matching loss (2nd stage)
50
 
51
+ diff_epoch: 1 # style diffusion starting epoch (2nd stage)
52
+ joint_epoch: 5 # joint training starting epoch (2nd stage)
53
 
54
  # ─── OPTIMISER ──────────────────────────────────────────────────────
55
  optimizer_params:
56
  lr: 0.0001
57
  bert_lr: 0.00001
58
+ ft_lr: 0.00001
59
  grad_accum_steps: 2
60
 
61
  # ─── MODEL (core network & sub-modules) ─────────────────────────────
Configs/config_ft_single.yml CHANGED
@@ -1,5 +1,5 @@
1
  # ─── GLOBAL ──────────────────────────────────────────────────────────
2
- log_dir: logs/pod_90h_30k_second_lr1
3
  device: "cuda"
4
 
5
  batch_size: 12 # 40 GB A100, fp16
@@ -11,7 +11,7 @@ save_freq: 1
11
  log_interval: 50
12
 
13
  # leave blank on first run
14
- pretrained_model: "" #"/workspace/styletts2/logs/pod_90h_30k/epoch_2nd_00003.pth"
15
  second_stage_load_pretrained: true
16
  load_only_params: false
17
 
@@ -48,8 +48,8 @@ loss_params:
48
  lambda_sty: 1. # style reconstruction loss (2nd stage)
49
  lambda_diff: 1. # score matching loss (2nd stage)
50
 
51
- diff_epoch: 0 # style diffusion starting epoch (2nd stage)
52
- joint_epoch: 0 # joint training starting epoch (2nd stage)
53
 
54
  # ─── OPTIMISER ──────────────────────────────────────────────────────
55
  optimizer_params:
 
1
  # ─── GLOBAL ──────────────────────────────────────────────────────────
2
+ log_dir: logs/pod_90h_30k_second_v2
3
  device: "cuda"
4
 
5
  batch_size: 12 # 40 GB A100, fp16
 
11
  log_interval: 50
12
 
13
  # leave blank on first run
14
+ pretrained_model: "/workspace/styletts2/logs/pod_90h_30k_second_v2/epoch_2nd_00005.pth"
15
  second_stage_load_pretrained: true
16
  load_only_params: false
17
 
 
48
  lambda_sty: 1. # style reconstruction loss (2nd stage)
49
  lambda_diff: 1. # score matching loss (2nd stage)
50
 
51
+ diff_epoch: 1 # style diffusion starting epoch (2nd stage)
52
+ joint_epoch: 5 # joint training starting epoch (2nd stage)
53
 
54
  # ─── OPTIMISER ──────────────────────────────────────────────────────
55
  optimizer_params:
Demo/.ipynb_checkpoints/Inference_pod_90h_30k-checkpoint.ipynb CHANGED
@@ -20,7 +20,7 @@
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": null,
24
  "id": "96e173bf",
25
  "metadata": {},
26
  "outputs": [],
@@ -39,20 +39,227 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  "id": "da84c60f",
44
  "metadata": {},
45
- "outputs": [],
 
 
 
 
 
 
 
 
46
  "source": [
47
  "%cd .."
48
  ]
49
  },
50
  {
51
  "cell_type": "code",
52
- "execution_count": null,
53
  "id": "5a3ddcc8",
54
  "metadata": {},
55
- "outputs": [],
 
 
 
 
 
 
 
 
56
  "source": [
57
  "# load packages\n",
58
  "import time\n",
@@ -77,7 +284,7 @@
77
  },
78
  {
79
  "cell_type": "code",
80
- "execution_count": null,
81
  "id": "00ee05e1",
82
  "metadata": {},
83
  "outputs": [],
@@ -113,7 +320,7 @@
113
  },
114
  {
115
  "cell_type": "code",
116
- "execution_count": null,
117
  "id": "bbdc04c0",
118
  "metadata": {},
119
  "outputs": [],
@@ -121,6 +328,111 @@
121
  "device = 'cuda' if torch.cuda.is_available() else 'cpu'"
122
  ]
123
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  {
125
  "cell_type": "markdown",
126
  "id": "7b9cecbe",
@@ -131,7 +443,7 @@
131
  },
132
  {
133
  "cell_type": "code",
134
- "execution_count": null,
135
  "id": "64fc4c0f",
136
  "metadata": {},
137
  "outputs": [],
@@ -143,12 +455,12 @@
143
  },
144
  {
145
  "cell_type": "code",
146
- "execution_count": null,
147
  "id": "48e7b644",
148
  "metadata": {},
149
  "outputs": [],
150
  "source": [
151
- "config = yaml.safe_load(open(\"Models/LibriTTS/config.yml\"))\n",
152
  "\n",
153
  "# load pretrained ASR model\n",
154
  "ASR_config = config.get('ASR_config', False)\n",
@@ -167,10 +479,21 @@
167
  },
168
  {
169
  "cell_type": "code",
170
- "execution_count": null,
171
  "id": "ffc18cf7",
172
  "metadata": {},
173
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
174
  "source": [
175
  "model_params = recursive_munch(config['model_params'])\n",
176
  "model = build_model(model_params, text_aligner, pitch_extractor, plbert)\n",
@@ -180,21 +503,41 @@
180
  },
181
  {
182
  "cell_type": "code",
183
- "execution_count": null,
184
  "id": "64529d5c",
185
  "metadata": {},
186
  "outputs": [],
187
  "source": [
188
- "params_whole = torch.load(\"Models/LibriTTS/epochs_2nd_00020.pth\", map_location='cpu')\n",
189
  "params = params_whole['net']"
190
  ]
191
  },
192
  {
193
  "cell_type": "code",
194
- "execution_count": null,
195
  "id": "895d9706",
196
  "metadata": {},
197
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  "source": [
199
  "for key in model:\n",
200
  " if key in params:\n",
@@ -217,7 +560,7 @@
217
  },
218
  {
219
  "cell_type": "code",
220
- "execution_count": null,
221
  "id": "c1a59db2",
222
  "metadata": {},
223
  "outputs": [],
@@ -227,7 +570,7 @@
227
  },
228
  {
229
  "cell_type": "code",
230
- "execution_count": null,
231
  "id": "e30985ab",
232
  "metadata": {},
233
  "outputs": [],
@@ -250,7 +593,7 @@
250
  },
251
  {
252
  "cell_type": "code",
253
- "execution_count": null,
254
  "id": "ca57469c",
255
  "metadata": {},
256
  "outputs": [],
@@ -335,7 +678,7 @@
335
  },
336
  {
337
  "cell_type": "code",
338
- "execution_count": null,
339
  "id": "cace9787",
340
  "metadata": {},
341
  "outputs": [],
@@ -454,6 +797,22 @@
454
  " display(ipd.Audio(path, rate=24000, normalize=False))"
455
  ]
456
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  {
458
  "cell_type": "markdown",
459
  "id": "141e91b3",
@@ -470,7 +829,7 @@
470
  },
471
  {
472
  "cell_type": "code",
473
- "execution_count": null,
474
  "id": "81addda4",
475
  "metadata": {},
476
  "outputs": [],
@@ -481,9 +840,46 @@
481
  {
482
  "cell_type": "code",
483
  "execution_count": null,
484
- "id": "be1b2a11",
485
  "metadata": {},
486
  "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  "source": [
488
  "texts = {}\n",
489
  "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
@@ -913,9 +1309,7 @@
913
  "cell_type": "code",
914
  "execution_count": null,
915
  "id": "6d0a3825",
916
- "metadata": {
917
- "scrolled": false
918
- },
919
  "outputs": [],
920
  "source": [
921
  "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
@@ -1110,9 +1504,7 @@
1110
  "cell_type": "code",
1111
  "execution_count": null,
1112
  "id": "44a4cea1",
1113
- "metadata": {
1114
- "scrolled": false
1115
- },
1116
  "outputs": [],
1117
  "source": [
1118
  "start = time.time()\n",
@@ -1133,9 +1525,9 @@
1133
  ],
1134
  "metadata": {
1135
  "kernelspec": {
1136
- "display_name": "NLP",
1137
  "language": "python",
1138
- "name": "nlp"
1139
  },
1140
  "language_info": {
1141
  "codemirror_mode": {
@@ -1147,7 +1539,7 @@
1147
  "name": "python",
1148
  "nbconvert_exporter": "python",
1149
  "pygments_lexer": "ipython3",
1150
- "version": "3.9.7"
1151
  }
1152
  },
1153
  "nbformat": 4,
 
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 1,
24
  "id": "96e173bf",
25
  "metadata": {},
26
  "outputs": [],
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 4,
43
+ "id": "2458c639-10a0-4b57-8602-22bc893c5176",
44
+ "metadata": {
45
+ "scrolled": true
46
+ },
47
+ "outputs": [
48
+ {
49
+ "name": "stdout",
50
+ "output_type": "stream",
51
+ "text": [
52
+ "Collecting git+https://github.com/resemble-ai/monotonic_align.git (from -r requirements.txt (line 17))\n",
53
+ " Cloning https://github.com/resemble-ai/monotonic_align.git to /tmp/pip-req-build-ps9pa2ga\n",
54
+ " Running command git clone --filter=blob:none --quiet https://github.com/resemble-ai/monotonic_align.git /tmp/pip-req-build-ps9pa2ga\n",
55
+ " Resolved https://github.com/resemble-ai/monotonic_align.git to commit c6e5e6cb19882164027eb6e35118e841eed9298e\n",
56
+ " Installing build dependencies ... \u001b[?25ldone\n",
57
+ "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
58
+ "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
59
+ "\u001b[?25hCollecting SoundFile (from -r requirements.txt (line 1))\n",
60
+ " Using cached soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl.metadata (16 kB)\n",
61
+ "Requirement already satisfied: torchaudio in /venv/main/lib/python3.12/site-packages (from -r requirements.txt (line 2)) (2.6.0+cu126)\n",
62
+ "Collecting munch (from -r requirements.txt (line 3))\n",
63
+ " Using cached munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)\n",
64
+ "Requirement already satisfied: torch in /venv/main/lib/python3.12/site-packages (from -r requirements.txt (line 4)) (2.6.0+cu126)\n",
65
+ "Collecting pydub (from -r requirements.txt (line 5))\n",
66
+ " Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
67
+ "Requirement already satisfied: pyyaml in /venv/main/lib/python3.12/site-packages (from -r requirements.txt (line 6)) (6.0.2)\n",
68
+ "Collecting librosa (from -r requirements.txt (line 7))\n",
69
+ " Using cached librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)\n",
70
+ "Collecting nltk (from -r requirements.txt (line 8))\n",
71
+ " Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)\n",
72
+ "Collecting matplotlib (from -r requirements.txt (line 9))\n",
73
+ " Downloading matplotlib-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
74
+ "Collecting accelerate (from -r requirements.txt (line 10))\n",
75
+ " Using cached accelerate-1.7.0-py3-none-any.whl.metadata (19 kB)\n",
76
+ "Collecting transformers (from -r requirements.txt (line 11))\n",
77
+ " Using cached transformers-4.52.4-py3-none-any.whl.metadata (38 kB)\n",
78
+ "Collecting einops (from -r requirements.txt (line 12))\n",
79
+ " Using cached einops-0.8.1-py3-none-any.whl.metadata (13 kB)\n",
80
+ "Collecting einops-exts (from -r requirements.txt (line 13))\n",
81
+ " Using cached einops_exts-0.0.4-py3-none-any.whl.metadata (621 bytes)\n",
82
+ "Requirement already satisfied: tqdm in /venv/main/lib/python3.12/site-packages (from -r requirements.txt (line 14)) (4.67.1)\n",
83
+ "Collecting typing (from -r requirements.txt (line 15))\n",
84
+ " Using cached typing-3.7.4.3.tar.gz (78 kB)\n",
85
+ " Preparing metadata (setup.py) ... \u001b[?25ldone\n",
86
+ "\u001b[?25hRequirement already satisfied: typing-extensions in /venv/main/lib/python3.12/site-packages (from -r requirements.txt (line 16)) (4.13.2)\n",
87
+ "Collecting cffi>=1.0 (from SoundFile->-r requirements.txt (line 1))\n",
88
+ " Downloading cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
89
+ "Requirement already satisfied: numpy in /venv/main/lib/python3.12/site-packages (from SoundFile->-r requirements.txt (line 1)) (2.1.2)\n",
90
+ "Requirement already satisfied: filelock in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (3.18.0)\n",
91
+ "Requirement already satisfied: setuptools in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (70.2.0)\n",
92
+ "Requirement already satisfied: sympy==1.13.1 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (1.13.1)\n",
93
+ "Requirement already satisfied: networkx in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (3.3)\n",
94
+ "Requirement already satisfied: jinja2 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (3.1.4)\n",
95
+ "Requirement already satisfied: fsspec in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (2025.3.2)\n",
96
+ "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (12.6.77)\n",
97
+ "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (12.6.77)\n",
98
+ "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (12.6.80)\n",
99
+ "Requirement already satisfied: nvidia-cudnn-cu12==9.5.1.17 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (9.5.1.17)\n",
100
+ "Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (12.6.4.1)\n",
101
+ "Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (11.3.0.4)\n",
102
+ "Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (10.3.7.77)\n",
103
+ "Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (11.7.1.2)\n",
104
+ "Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (12.5.4.2)\n",
105
+ "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (0.6.3)\n",
106
+ "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (2.21.5)\n",
107
+ "Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (12.6.77)\n",
108
+ "Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (12.6.85)\n",
109
+ "Requirement already satisfied: triton==3.2.0 in /venv/main/lib/python3.12/site-packages (from torch->-r requirements.txt (line 4)) (3.2.0)\n",
110
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /venv/main/lib/python3.12/site-packages (from sympy==1.13.1->torch->-r requirements.txt (line 4)) (1.3.0)\n",
111
+ "Collecting audioread>=2.1.9 (from librosa->-r requirements.txt (line 7))\n",
112
+ " Using cached audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)\n",
113
+ "Collecting numba>=0.51.0 (from librosa->-r requirements.txt (line 7))\n",
114
+ " Downloading numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.8 kB)\n",
115
+ "Collecting scipy>=1.6.0 (from librosa->-r requirements.txt (line 7))\n",
116
+ " Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n",
117
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
118
+ "\u001b[?25hCollecting scikit-learn>=1.1.0 (from librosa->-r requirements.txt (line 7))\n",
119
+ " Downloading scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)\n",
120
+ "Collecting joblib>=1.0 (from librosa->-r requirements.txt (line 7))\n",
121
+ " Using cached joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)\n",
122
+ "Requirement already satisfied: decorator>=4.3.0 in /venv/main/lib/python3.12/site-packages (from librosa->-r requirements.txt (line 7)) (5.2.1)\n",
123
+ "Collecting pooch>=1.1 (from librosa->-r requirements.txt (line 7))\n",
124
+ " Using cached pooch-1.8.2-py3-none-any.whl.metadata (10 kB)\n",
125
+ "Collecting soxr>=0.3.2 (from librosa->-r requirements.txt (line 7))\n",
126
+ " Downloading soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n",
127
+ "Collecting lazy_loader>=0.1 (from librosa->-r requirements.txt (line 7))\n",
128
+ " Using cached lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)\n",
129
+ "Collecting msgpack>=1.0 (from librosa->-r requirements.txt (line 7))\n",
130
+ " Downloading msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n",
131
+ "Collecting click (from nltk->-r requirements.txt (line 8))\n",
132
+ " Using cached click-8.2.1-py3-none-any.whl.metadata (2.5 kB)\n",
133
+ "Collecting regex>=2021.8.3 (from nltk->-r requirements.txt (line 8))\n",
134
+ " Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
135
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
136
+ "\u001b[?25hCollecting contourpy>=1.0.1 (from matplotlib->-r requirements.txt (line 9))\n",
137
+ " Downloading contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)\n",
138
+ "Collecting cycler>=0.10 (from matplotlib->-r requirements.txt (line 9))\n",
139
+ " Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
140
+ "Collecting fonttools>=4.22.0 (from matplotlib->-r requirements.txt (line 9))\n",
141
+ " Downloading fonttools-4.58.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (106 kB)\n",
142
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.3/106.3 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
143
+ "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib->-r requirements.txt (line 9))\n",
144
+ " Downloading kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)\n",
145
+ "Requirement already satisfied: packaging>=20.0 in /venv/main/lib/python3.12/site-packages (from matplotlib->-r requirements.txt (line 9)) (25.0)\n",
146
+ "Requirement already satisfied: pillow>=8 in /venv/main/lib/python3.12/site-packages (from matplotlib->-r requirements.txt (line 9)) (11.0.0)\n",
147
+ "Collecting pyparsing>=2.3.1 (from matplotlib->-r requirements.txt (line 9))\n",
148
+ " Using cached pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)\n",
149
+ "Requirement already satisfied: python-dateutil>=2.7 in /venv/main/lib/python3.12/site-packages (from matplotlib->-r requirements.txt (line 9)) (2.9.0.post0)\n",
150
+ "Requirement already satisfied: psutil in /venv/main/lib/python3.12/site-packages (from accelerate->-r requirements.txt (line 10)) (7.0.0)\n",
151
+ "Requirement already satisfied: huggingface-hub>=0.21.0 in /venv/main/lib/python3.12/site-packages (from accelerate->-r requirements.txt (line 10)) (0.30.2)\n",
152
+ "Collecting safetensors>=0.4.3 (from accelerate->-r requirements.txt (line 10))\n",
153
+ " Using cached safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
154
+ "Requirement already satisfied: requests in /venv/main/lib/python3.12/site-packages (from transformers->-r requirements.txt (line 11)) (2.32.3)\n",
155
+ "Collecting tokenizers<0.22,>=0.21 (from transformers->-r requirements.txt (line 11))\n",
156
+ " Using cached tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)\n",
157
+ "Collecting pycparser (from cffi>=1.0->SoundFile->-r requirements.txt (line 1))\n",
158
+ " Using cached pycparser-2.22-py3-none-any.whl.metadata (943 bytes)\n",
159
+ "Collecting llvmlite<0.45,>=0.44.0dev0 (from numba>=0.51.0->librosa->-r requirements.txt (line 7))\n",
160
+ " Downloading llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)\n",
161
+ "Requirement already satisfied: platformdirs>=2.5.0 in /venv/main/lib/python3.12/site-packages (from pooch>=1.1->librosa->-r requirements.txt (line 7)) (4.3.7)\n",
162
+ "Requirement already satisfied: six>=1.5 in /venv/main/lib/python3.12/site-packages (from python-dateutil>=2.7->matplotlib->-r requirements.txt (line 9)) (1.17.0)\n",
163
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /venv/main/lib/python3.12/site-packages (from requests->transformers->-r requirements.txt (line 11)) (3.4.1)\n",
164
+ "Requirement already satisfied: idna<4,>=2.5 in /venv/main/lib/python3.12/site-packages (from requests->transformers->-r requirements.txt (line 11)) (3.10)\n",
165
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /venv/main/lib/python3.12/site-packages (from requests->transformers->-r requirements.txt (line 11)) (2.4.0)\n",
166
+ "Requirement already satisfied: certifi>=2017.4.17 in /venv/main/lib/python3.12/site-packages (from requests->transformers->-r requirements.txt (line 11)) (2025.4.26)\n",
167
+ "Collecting threadpoolctl>=3.1.0 (from scikit-learn>=1.1.0->librosa->-r requirements.txt (line 7))\n",
168
+ " Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)\n",
169
+ "Requirement already satisfied: MarkupSafe>=2.0 in /venv/main/lib/python3.12/site-packages (from jinja2->torch->-r requirements.txt (line 4)) (2.1.5)\n",
170
+ "Using cached soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl (1.3 MB)\n",
171
+ "Using cached munch-4.0.0-py2.py3-none-any.whl (9.9 kB)\n",
172
+ "Using cached pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
173
+ "Using cached librosa-0.11.0-py3-none-any.whl (260 kB)\n",
174
+ "Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB)\n",
175
+ "Downloading matplotlib-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.6 MB)\n",
176
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.6/8.6 MB\u001b[0m \u001b[31m28.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
177
+ "\u001b[?25hUsing cached accelerate-1.7.0-py3-none-any.whl (362 kB)\n",
178
+ "Using cached transformers-4.52.4-py3-none-any.whl (10.5 MB)\n",
179
+ "Using cached einops-0.8.1-py3-none-any.whl (64 kB)\n",
180
+ "Using cached einops_exts-0.0.4-py3-none-any.whl (3.9 kB)\n",
181
+ "Using cached audioread-3.0.1-py3-none-any.whl (23 kB)\n",
182
+ "Downloading cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (479 kB)\n",
183
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.4/479.4 kB\u001b[0m \u001b[31m169.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
184
+ "\u001b[?25hDownloading contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (323 kB)\n",
185
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.7/323.7 kB\u001b[0m \u001b[31m127.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
186
+ "\u001b[?25hUsing cached cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
187
+ "Downloading fonttools-4.58.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)\n",
188
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m87.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
189
+ "\u001b[?25hUsing cached joblib-1.5.1-py3-none-any.whl (307 kB)\n",
190
+ "Downloading kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)\n",
191
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m185.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
192
+ "\u001b[?25hUsing cached lazy_loader-0.4-py3-none-any.whl (12 kB)\n",
193
+ "Downloading msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (401 kB)\n",
194
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.4/401.4 kB\u001b[0m \u001b[31m192.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
195
+ "\u001b[?25hDownloading numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.9 MB)\n",
196
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.9/3.9 MB\u001b[0m \u001b[31m42.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
197
+ "\u001b[?25hUsing cached pooch-1.8.2-py3-none-any.whl (64 kB)\n",
198
+ "Using cached pyparsing-3.2.3-py3-none-any.whl (111 kB)\n",
199
+ "Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (796 kB)\n",
200
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m796.9/796.9 kB\u001b[0m \u001b[31m125.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
201
+ "\u001b[?25hUsing cached safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (471 kB)\n",
202
+ "Downloading scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.5 MB)\n",
203
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.5/12.5 MB\u001b[0m \u001b[31m43.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
204
+ "\u001b[?25hDownloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.3 MB)\n",
205
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m37.3/37.3 MB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
206
+ "\u001b[?25hDownloading soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (248 kB)\n",
207
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m248.5/248.5 kB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
208
+ "\u001b[?25hUsing cached tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
209
+ "Using cached click-8.2.1-py3-none-any.whl (102 kB)\n",
210
+ "Downloading llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (42.4 MB)\n",
211
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.4/42.4 MB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
212
+ "\u001b[?25hUsing cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)\n",
213
+ "Using cached pycparser-2.22-py3-none-any.whl (117 kB)\n",
214
+ "Building wheels for collected packages: typing, monotonic_align\n",
215
+ " Building wheel for typing (setup.py) ... \u001b[?25ldone\n",
216
+ "\u001b[?25h Created wheel for typing: filename=typing-3.7.4.3-py3-none-any.whl size=26304 sha256=7bd8523fe1f7cb4e20da87ee646956891addbdea2d87074f6bbf77fe282e8720\n",
217
+ " Stored in directory: /root/.cache/pip/wheels/12/98/52/2bffe242a9a487f00886e43b8ed8dac46456702e11a0d6abef\n",
218
+ " Building wheel for monotonic_align (pyproject.toml) ... \u001b[?25ldone\n",
219
+ "\u001b[?25h Created wheel for monotonic_align: filename=monotonic_align-1.2-cp312-cp312-linux_x86_64.whl size=1543517 sha256=dc9566d3e5a0656ebf939e760d934e0926d435f336db84e0019c7391576cd4cc\n",
220
+ " Stored in directory: /tmp/pip-ephem-wheel-cache-0gzg26zy/wheels/76/0a/37/00634137cd000799e060087bd1cb49a060ac6a48fc42a15488\n",
221
+ "Successfully built typing monotonic_align\n",
222
+ "Installing collected packages: pydub, typing, threadpoolctl, soxr, scipy, safetensors, regex, pyparsing, pycparser, munch, msgpack, monotonic_align, llvmlite, lazy_loader, kiwisolver, joblib, fonttools, einops, cycler, contourpy, click, audioread, scikit-learn, pooch, numba, nltk, matplotlib, einops-exts, cffi, tokenizers, SoundFile, transformers, librosa, accelerate\n",
223
+ "Successfully installed SoundFile-0.13.1 accelerate-1.7.0 audioread-3.0.1 cffi-1.17.1 click-8.2.1 contourpy-1.3.2 cycler-0.12.1 einops-0.8.1 einops-exts-0.0.4 fonttools-4.58.2 joblib-1.5.1 kiwisolver-1.4.8 lazy_loader-0.4 librosa-0.11.0 llvmlite-0.44.0 matplotlib-3.10.3 monotonic_align-1.2 msgpack-1.1.0 munch-4.0.0 nltk-3.9.1 numba-0.61.2 pooch-1.8.2 pycparser-2.22 pydub-0.25.1 pyparsing-3.2.3 regex-2024.11.6 safetensors-0.5.3 scikit-learn-1.7.0 scipy-1.15.3 soxr-0.5.0.post1 threadpoolctl-3.6.0 tokenizers-0.21.1 transformers-4.52.4 typing-3.7.4.3\n"
224
+ ]
225
+ }
226
+ ],
227
+ "source": [
228
+ "!pip install -r requirements.txt"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": 2,
234
  "id": "da84c60f",
235
  "metadata": {},
236
+ "outputs": [
237
+ {
238
+ "name": "stdout",
239
+ "output_type": "stream",
240
+ "text": [
241
+ "/workspace/styletts2\n"
242
+ ]
243
+ }
244
+ ],
245
  "source": [
246
  "%cd .."
247
  ]
248
  },
249
  {
250
  "cell_type": "code",
251
+ "execution_count": 5,
252
  "id": "5a3ddcc8",
253
  "metadata": {},
254
+ "outputs": [
255
+ {
256
+ "name": "stdout",
257
+ "output_type": "stream",
258
+ "text": [
259
+ "177\n"
260
+ ]
261
+ }
262
+ ],
263
  "source": [
264
  "# load packages\n",
265
  "import time\n",
 
284
  },
285
  {
286
  "cell_type": "code",
287
+ "execution_count": 6,
288
  "id": "00ee05e1",
289
  "metadata": {},
290
  "outputs": [],
 
320
  },
321
  {
322
  "cell_type": "code",
323
+ "execution_count": 7,
324
  "id": "bbdc04c0",
325
  "metadata": {},
326
  "outputs": [],
 
328
  "device = 'cuda' if torch.cuda.is_available() else 'cpu'"
329
  ]
330
  },
331
+ {
332
+ "cell_type": "code",
333
+ "execution_count": 10,
334
+ "id": "bc8a517e-915c-427f-a3e0-b96310317bec",
335
+ "metadata": {
336
+ "scrolled": true
337
+ },
338
+ "outputs": [
339
+ {
340
+ "name": "stdout",
341
+ "output_type": "stream",
342
+ "text": [
343
+ "Requirement already satisfied: phonemizer in /venv/main/lib/python3.12/site-packages (3.3.0)\n",
344
+ "Requirement already satisfied: joblib in /venv/main/lib/python3.12/site-packages (from phonemizer) (1.5.1)\n",
345
+ "Requirement already satisfied: segments in /venv/main/lib/python3.12/site-packages (from phonemizer) (2.3.0)\n",
346
+ "Requirement already satisfied: attrs>=18.1 in /venv/main/lib/python3.12/site-packages (from phonemizer) (25.3.0)\n",
347
+ "Requirement already satisfied: dlinfo in /venv/main/lib/python3.12/site-packages (from phonemizer) (2.0.0)\n",
348
+ "Requirement already satisfied: typing-extensions in /venv/main/lib/python3.12/site-packages (from phonemizer) (4.13.2)\n",
349
+ "Requirement already satisfied: regex in /venv/main/lib/python3.12/site-packages (from segments->phonemizer) (2024.11.6)\n",
350
+ "Requirement already satisfied: csvw>=1.5.6 in /venv/main/lib/python3.12/site-packages (from segments->phonemizer) (3.5.1)\n",
351
+ "Requirement already satisfied: isodate in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (0.7.2)\n",
352
+ "Requirement already satisfied: python-dateutil in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (2.9.0.post0)\n",
353
+ "Requirement already satisfied: rfc3986<2 in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (1.5.0)\n",
354
+ "Requirement already satisfied: uritemplate>=3.0.0 in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (4.2.0)\n",
355
+ "Requirement already satisfied: babel in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (2.17.0)\n",
356
+ "Requirement already satisfied: requests in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (2.32.3)\n",
357
+ "Requirement already satisfied: language-tags in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (1.2.0)\n",
358
+ "Requirement already satisfied: rdflib in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (7.1.4)\n",
359
+ "Requirement already satisfied: colorama in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (0.4.6)\n",
360
+ "Requirement already satisfied: jsonschema in /venv/main/lib/python3.12/site-packages (from csvw>=1.5.6->segments->phonemizer) (4.24.0)\n",
361
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /venv/main/lib/python3.12/site-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (2025.4.1)\n",
362
+ "Requirement already satisfied: referencing>=0.28.4 in /venv/main/lib/python3.12/site-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (0.36.2)\n",
363
+ "Requirement already satisfied: rpds-py>=0.7.1 in /venv/main/lib/python3.12/site-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (0.25.1)\n",
364
+ "Requirement already satisfied: six>=1.5 in /venv/main/lib/python3.12/site-packages (from python-dateutil->csvw>=1.5.6->segments->phonemizer) (1.17.0)\n",
365
+ "Requirement already satisfied: pyparsing<4,>=2.1.0 in /venv/main/lib/python3.12/site-packages (from rdflib->csvw>=1.5.6->segments->phonemizer) (3.2.3)\n",
366
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /venv/main/lib/python3.12/site-packages (from requests->csvw>=1.5.6->segments->phonemizer) (3.4.1)\n",
367
+ "Requirement already satisfied: idna<4,>=2.5 in /venv/main/lib/python3.12/site-packages (from requests->csvw>=1.5.6->segments->phonemizer) (3.10)\n",
368
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /venv/main/lib/python3.12/site-packages (from requests->csvw>=1.5.6->segments->phonemizer) (2.4.0)\n",
369
+ "Requirement already satisfied: certifi>=2017.4.17 in /venv/main/lib/python3.12/site-packages (from requests->csvw>=1.5.6->segments->phonemizer) (2025.4.26)\n"
370
+ ]
371
+ }
372
+ ],
373
+ "source": [
374
+ "!pip install phonemizer"
375
+ ]
376
+ },
377
+ {
378
+ "cell_type": "code",
379
+ "execution_count": 13,
380
+ "id": "48f471f2-ae4a-489e-9d6b-11caff294cf6",
381
+ "metadata": {
382
+ "scrolled": true
383
+ },
384
+ "outputs": [
385
+ {
386
+ "name": "stdout",
387
+ "output_type": "stream",
388
+ "text": [
389
+ "Reading package lists... Done\n",
390
+ "Building dependency tree... Done\n",
391
+ "Reading state information... Done\n",
392
+ "The following additional packages will be installed:\n",
393
+ " espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n",
394
+ "The following NEW packages will be installed:\n",
395
+ " espeak-ng espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n",
396
+ "0 upgraded, 5 newly installed, 0 to remove and 42 not upgraded.\n",
397
+ "Need to get 5128 kB of archives.\n",
398
+ "After this operation, 13.7 MB of additional disk space will be used.\n",
399
+ "Get:1 http://archive.ubuntu.com/ubuntu noble/main amd64 libpcaudio0 amd64 1.2-2build3 [9144 B]\n",
400
+ "Get:2 http://archive.ubuntu.com/ubuntu noble/main amd64 libsonic0 amd64 0.2.0-13build1 [10.3 kB]\n",
401
+ "Get:3 http://archive.ubuntu.com/ubuntu noble/main amd64 espeak-ng-data amd64 1.51+dfsg-12build1 [4538 kB]\n",
402
+ "Get:4 http://archive.ubuntu.com/ubuntu noble/main amd64 libespeak-ng1 amd64 1.51+dfsg-12build1 [206 kB]\n",
403
+ "Get:5 http://archive.ubuntu.com/ubuntu noble/universe amd64 espeak-ng amd64 1.51+dfsg-12build1 [364 kB]\n",
404
+ "Fetched 5128 kB in 2s (3310 kB/s) \n",
405
+ "debconf: delaying package configuration, since apt-utils is not installed\n",
406
+ "Selecting previously unselected package libpcaudio0:amd64.\n",
407
+ "(Reading database ... 41253 files and directories currently installed.)\n",
408
+ "Preparing to unpack .../libpcaudio0_1.2-2build3_amd64.deb ...\n",
409
+ "Unpacking libpcaudio0:amd64 (1.2-2build3) ...\n",
410
+ "Selecting previously unselected package libsonic0:amd64.\n",
411
+ "Preparing to unpack .../libsonic0_0.2.0-13build1_amd64.deb ...\n",
412
+ "Unpacking libsonic0:amd64 (0.2.0-13build1) ...\n",
413
+ "Selecting previously unselected package espeak-ng-data:amd64.\n",
414
+ "Preparing to unpack .../espeak-ng-data_1.51+dfsg-12build1_amd64.deb ...\n",
415
+ "Unpacking espeak-ng-data:amd64 (1.51+dfsg-12build1) ...\n",
416
+ "Selecting previously unselected package libespeak-ng1:amd64.\n",
417
+ "Preparing to unpack .../libespeak-ng1_1.51+dfsg-12build1_amd64.deb ...\n",
418
+ "Unpacking libespeak-ng1:amd64 (1.51+dfsg-12build1) ...\n",
419
+ "Selecting previously unselected package espeak-ng.\n",
420
+ "Preparing to unpack .../espeak-ng_1.51+dfsg-12build1_amd64.deb ...\n",
421
+ "Unpacking espeak-ng (1.51+dfsg-12build1) ...\n",
422
+ "Setting up libpcaudio0:amd64 (1.2-2build3) ...\n",
423
+ "Setting up libsonic0:amd64 (0.2.0-13build1) ...\n",
424
+ "Setting up espeak-ng-data:amd64 (1.51+dfsg-12build1) ...\n",
425
+ "Setting up libespeak-ng1:amd64 (1.51+dfsg-12build1) ...\n",
426
+ "Setting up espeak-ng (1.51+dfsg-12build1) ...\n",
427
+ "Processing triggers for man-db (2.12.0-4build2) ...\n",
428
+ "Processing triggers for libc-bin (2.39-0ubuntu8.4) ...\n"
429
+ ]
430
+ }
431
+ ],
432
+ "source": [
433
+ "!sudo apt-get install -y -V espeak-ng"
434
+ ]
435
+ },
436
  {
437
  "cell_type": "markdown",
438
  "id": "7b9cecbe",
 
443
  },
444
  {
445
  "cell_type": "code",
446
+ "execution_count": 14,
447
  "id": "64fc4c0f",
448
  "metadata": {},
449
  "outputs": [],
 
455
  },
456
  {
457
  "cell_type": "code",
458
+ "execution_count": 15,
459
  "id": "48e7b644",
460
  "metadata": {},
461
  "outputs": [],
462
  "source": [
463
+ "config = yaml.safe_load(open(\"logs/pod_90h_30k_second_lr1/config_ft_single.yml\"))\n",
464
  "\n",
465
  "# load pretrained ASR model\n",
466
  "ASR_config = config.get('ASR_config', False)\n",
 
479
  },
480
  {
481
  "cell_type": "code",
482
+ "execution_count": 16,
483
  "id": "ffc18cf7",
484
  "metadata": {},
485
+ "outputs": [
486
+ {
487
+ "name": "stderr",
488
+ "output_type": "stream",
489
+ "text": [
490
+ "/venv/main/lib/python3.12/site-packages/torch/nn/utils/weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.\n",
491
+ " WeightNorm.apply(module, name, dim)\n",
492
+ "/venv/main/lib/python3.12/site-packages/torch/nn/modules/rnn.py:123: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.2 and num_layers=1\n",
493
+ " warnings.warn(\n"
494
+ ]
495
+ }
496
+ ],
497
  "source": [
498
  "model_params = recursive_munch(config['model_params'])\n",
499
  "model = build_model(model_params, text_aligner, pitch_extractor, plbert)\n",
 
503
  },
504
  {
505
  "cell_type": "code",
506
+ "execution_count": 18,
507
  "id": "64529d5c",
508
  "metadata": {},
509
  "outputs": [],
510
  "source": [
511
+ "params_whole = torch.load(\"logs/pod_90h_30k_second_lr1/epoch_2nd_00018.pth\", map_location='cpu')\n",
512
  "params = params_whole['net']"
513
  ]
514
  },
515
  {
516
  "cell_type": "code",
517
+ "execution_count": 19,
518
  "id": "895d9706",
519
  "metadata": {},
520
+ "outputs": [
521
+ {
522
+ "name": "stdout",
523
+ "output_type": "stream",
524
+ "text": [
525
+ "bert loaded\n",
526
+ "bert_encoder loaded\n",
527
+ "predictor loaded\n",
528
+ "decoder loaded\n",
529
+ "text_encoder loaded\n",
530
+ "predictor_encoder loaded\n",
531
+ "style_encoder loaded\n",
532
+ "diffusion loaded\n",
533
+ "text_aligner loaded\n",
534
+ "pitch_extractor loaded\n",
535
+ "mpd loaded\n",
536
+ "msd loaded\n",
537
+ "wd loaded\n"
538
+ ]
539
+ }
540
+ ],
541
  "source": [
542
  "for key in model:\n",
543
  " if key in params:\n",
 
560
  },
561
  {
562
  "cell_type": "code",
563
+ "execution_count": 20,
564
  "id": "c1a59db2",
565
  "metadata": {},
566
  "outputs": [],
 
570
  },
571
  {
572
  "cell_type": "code",
573
+ "execution_count": 21,
574
  "id": "e30985ab",
575
  "metadata": {},
576
  "outputs": [],
 
593
  },
594
  {
595
  "cell_type": "code",
596
+ "execution_count": 22,
597
  "id": "ca57469c",
598
  "metadata": {},
599
  "outputs": [],
 
678
  },
679
  {
680
  "cell_type": "code",
681
+ "execution_count": 23,
682
  "id": "cace9787",
683
  "metadata": {},
684
  "outputs": [],
 
797
  " display(ipd.Audio(path, rate=24000, normalize=False))"
798
  ]
799
  },
800
+ {
801
+ "cell_type": "code",
802
+ "execution_count": null,
803
+ "id": "62747cbb-bb33-4be4-8275-8c292e306987",
804
+ "metadata": {},
805
+ "outputs": [],
806
+ "source": []
807
+ },
808
+ {
809
+ "cell_type": "code",
810
+ "execution_count": null,
811
+ "id": "ec8fb32f-91dd-4fca-a7c6-7f156449c296",
812
+ "metadata": {},
813
+ "outputs": [],
814
+ "source": []
815
+ },
816
  {
817
  "cell_type": "markdown",
818
  "id": "141e91b3",
 
829
  },
830
  {
831
  "cell_type": "code",
832
+ "execution_count": 25,
833
  "id": "81addda4",
834
  "metadata": {},
835
  "outputs": [],
 
840
  {
841
  "cell_type": "code",
842
  "execution_count": null,
843
+ "id": "c0deea36-de7c-4b65-bbc4-8e00697c6796",
844
  "metadata": {},
845
  "outputs": [],
846
+ "source": []
847
+ },
848
+ {
849
+ "cell_type": "code",
850
+ "execution_count": null,
851
+ "id": "41b18368-2fcb-4bc8-8963-00734227267c",
852
+ "metadata": {},
853
+ "outputs": [],
854
+ "source": []
855
+ },
856
+ {
857
+ "cell_type": "code",
858
+ "execution_count": 26,
859
+ "id": "be1b2a11",
860
+ "metadata": {
861
+ "scrolled": true
862
+ },
863
+ "outputs": [
864
+ {
865
+ "ename": "LookupError",
866
+ "evalue": "\n**********************************************************************\n Resource \u001b[93mpunkt_tab\u001b[0m not found.\n Please use the NLTK Downloader to obtain the resource:\n\n \u001b[31m>>> import nltk\n >>> nltk.download('punkt_tab')\n \u001b[0m\n For more information see: https://www.nltk.org/data.html\n\n Attempted to load \u001b[93mtokenizers/punkt_tab/english/\u001b[0m\n\n Searched in:\n - '/root/nltk_data'\n - '/venv/main/nltk_data'\n - '/venv/main/share/nltk_data'\n - '/venv/main/lib/nltk_data'\n - '/usr/share/nltk_data'\n - '/usr/local/share/nltk_data'\n - '/usr/lib/nltk_data'\n - '/usr/local/lib/nltk_data'\n**********************************************************************\n",
867
+ "output_type": "error",
868
+ "traceback": [
869
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
870
+ "\u001b[31mLookupError\u001b[39m Traceback (most recent call last)",
871
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 8\u001b[39m\n\u001b[32m 5\u001b[39m texts[\u001b[33m'\u001b[39m\u001b[33mSurprised\u001b[39m\u001b[33m'\u001b[39m] = \u001b[33m\"\u001b[39m\u001b[33mI can\u001b[39m\u001b[33m'\u001b[39m\u001b[33mt believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 7\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m k,v \u001b[38;5;129;01min\u001b[39;00m texts.items():\n\u001b[32m----> \u001b[39m\u001b[32m8\u001b[39m wav = \u001b[43minference\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mref_s\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdiffusion_steps\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m10\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m0.3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbeta\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m0.7\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_scale\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[38;5;28mprint\u001b[39m(k + \u001b[33m\"\u001b[39m\u001b[33m: \u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 10\u001b[39m display(ipd.Audio(wav, rate=\u001b[32m24000\u001b[39m, normalize=\u001b[38;5;28;01mFalse\u001b[39;00m))\n",
872
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[22]\u001b[39m\u001b[32m, line 4\u001b[39m, in \u001b[36minference\u001b[39m\u001b[34m(text, ref_s, alpha, beta, diffusion_steps, embedding_scale)\u001b[39m\n\u001b[32m 2\u001b[39m text = text.strip()\n\u001b[32m 3\u001b[39m ps = global_phonemizer.phonemize([text])\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m ps = \u001b[43mword_tokenize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mps\u001b[49m\u001b[43m[\u001b[49m\u001b[32;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 5\u001b[39m ps = \u001b[33m'\u001b[39m\u001b[33m \u001b[39m\u001b[33m'\u001b[39m.join(ps)\n\u001b[32m 6\u001b[39m tokens = textclenaer(ps)\n",
873
+ "\u001b[36mFile \u001b[39m\u001b[32m/venv/main/lib/python3.12/site-packages/nltk/tokenize/__init__.py:142\u001b[39m, in \u001b[36mword_tokenize\u001b[39m\u001b[34m(text, language, preserve_line)\u001b[39m\n\u001b[32m 127\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mword_tokenize\u001b[39m(text, language=\u001b[33m\"\u001b[39m\u001b[33menglish\u001b[39m\u001b[33m\"\u001b[39m, preserve_line=\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[32m 128\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 129\u001b[39m \u001b[33;03m Return a tokenized copy of *text*,\u001b[39;00m\n\u001b[32m 130\u001b[39m \u001b[33;03m using NLTK's recommended word tokenizer\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 140\u001b[39m \u001b[33;03m :type preserve_line: bool\u001b[39;00m\n\u001b[32m 141\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m142\u001b[39m sentences = [text] \u001b[38;5;28;01mif\u001b[39;00m preserve_line \u001b[38;5;28;01melse\u001b[39;00m \u001b[43msent_tokenize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlanguage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 143\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m [\n\u001b[32m 144\u001b[39m token \u001b[38;5;28;01mfor\u001b[39;00m sent \u001b[38;5;129;01min\u001b[39;00m sentences \u001b[38;5;28;01mfor\u001b[39;00m token \u001b[38;5;129;01min\u001b[39;00m _treebank_word_tokenizer.tokenize(sent)\n\u001b[32m 145\u001b[39m ]\n",
874
+ "\u001b[36mFile \u001b[39m\u001b[32m/venv/main/lib/python3.12/site-packages/nltk/tokenize/__init__.py:119\u001b[39m, in \u001b[36msent_tokenize\u001b[39m\u001b[34m(text, language)\u001b[39m\n\u001b[32m 109\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34msent_tokenize\u001b[39m(text, language=\u001b[33m\"\u001b[39m\u001b[33menglish\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 110\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 111\u001b[39m \u001b[33;03m Return a sentence-tokenized copy of *text*,\u001b[39;00m\n\u001b[32m 112\u001b[39m \u001b[33;03m using NLTK's recommended sentence tokenizer\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 117\u001b[39m \u001b[33;03m :param language: the model name in the Punkt corpus\u001b[39;00m\n\u001b[32m 118\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m119\u001b[39m tokenizer = \u001b[43m_get_punkt_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlanguage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 120\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m tokenizer.tokenize(text)\n",
875
+ "\u001b[36mFile \u001b[39m\u001b[32m/venv/main/lib/python3.12/site-packages/nltk/tokenize/__init__.py:105\u001b[39m, in \u001b[36m_get_punkt_tokenizer\u001b[39m\u001b[34m(language)\u001b[39m\n\u001b[32m 96\u001b[39m \u001b[38;5;129m@functools\u001b[39m.lru_cache\n\u001b[32m 97\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_get_punkt_tokenizer\u001b[39m(language=\u001b[33m\"\u001b[39m\u001b[33menglish\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 98\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 99\u001b[39m \u001b[33;03m A constructor for the PunktTokenizer that utilizes\u001b[39;00m\n\u001b[32m 100\u001b[39m \u001b[33;03m a lru cache for performance.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 103\u001b[39m \u001b[33;03m :type language: str\u001b[39;00m\n\u001b[32m 104\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m105\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mPunktTokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlanguage\u001b[49m\u001b[43m)\u001b[49m\n",
876
+ "\u001b[36mFile \u001b[39m\u001b[32m/venv/main/lib/python3.12/site-packages/nltk/tokenize/punkt.py:1744\u001b[39m, in \u001b[36mPunktTokenizer.__init__\u001b[39m\u001b[34m(self, lang)\u001b[39m\n\u001b[32m 1742\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, lang=\u001b[33m\"\u001b[39m\u001b[33menglish\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 1743\u001b[39m PunktSentenceTokenizer.\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1744\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mload_lang\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlang\u001b[49m\u001b[43m)\u001b[49m\n",
877
+ "\u001b[36mFile \u001b[39m\u001b[32m/venv/main/lib/python3.12/site-packages/nltk/tokenize/punkt.py:1749\u001b[39m, in \u001b[36mPunktTokenizer.load_lang\u001b[39m\u001b[34m(self, lang)\u001b[39m\n\u001b[32m 1746\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_lang\u001b[39m(\u001b[38;5;28mself\u001b[39m, lang=\u001b[33m\"\u001b[39m\u001b[33menglish\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 1747\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnltk\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdata\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m find\n\u001b[32m-> \u001b[39m\u001b[32m1749\u001b[39m lang_dir = \u001b[43mfind\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43mf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtokenizers/punkt_tab/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mlang\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m/\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 1750\u001b[39m \u001b[38;5;28mself\u001b[39m._params = load_punkt_params(lang_dir)\n\u001b[32m 1751\u001b[39m \u001b[38;5;28mself\u001b[39m._lang = lang\n",
878
+ "\u001b[36mFile \u001b[39m\u001b[32m/venv/main/lib/python3.12/site-packages/nltk/data.py:579\u001b[39m, in \u001b[36mfind\u001b[39m\u001b[34m(resource_name, paths)\u001b[39m\n\u001b[32m 577\u001b[39m sep = \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m * \u001b[32m70\u001b[39m\n\u001b[32m 578\u001b[39m resource_not_found = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00msep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mmsg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00msep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m579\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mLookupError\u001b[39;00m(resource_not_found)\n",
879
+ "\u001b[31mLookupError\u001b[39m: \n**********************************************************************\n Resource \u001b[93mpunkt_tab\u001b[0m not found.\n Please use the NLTK Downloader to obtain the resource:\n\n \u001b[31m>>> import nltk\n >>> nltk.download('punkt_tab')\n \u001b[0m\n For more information see: https://www.nltk.org/data.html\n\n Attempted to load \u001b[93mtokenizers/punkt_tab/english/\u001b[0m\n\n Searched in:\n - '/root/nltk_data'\n - '/venv/main/nltk_data'\n - '/venv/main/share/nltk_data'\n - '/venv/main/lib/nltk_data'\n - '/usr/share/nltk_data'\n - '/usr/local/share/nltk_data'\n - '/usr/lib/nltk_data'\n - '/usr/local/lib/nltk_data'\n**********************************************************************\n"
880
+ ]
881
+ }
882
+ ],
883
  "source": [
884
  "texts = {}\n",
885
  "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
 
1309
  "cell_type": "code",
1310
  "execution_count": null,
1311
  "id": "6d0a3825",
1312
+ "metadata": {},
 
 
1313
  "outputs": [],
1314
  "source": [
1315
  "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
 
1504
  "cell_type": "code",
1505
  "execution_count": null,
1506
  "id": "44a4cea1",
1507
+ "metadata": {},
 
 
1508
  "outputs": [],
1509
  "source": [
1510
  "start = time.time()\n",
 
1525
  ],
1526
  "metadata": {
1527
  "kernelspec": {
1528
+ "display_name": "Python3 (main venv)",
1529
  "language": "python",
1530
+ "name": "main"
1531
  },
1532
  "language_info": {
1533
  "codemirror_mode": {
 
1539
  "name": "python",
1540
  "nbconvert_exporter": "python",
1541
  "pygments_lexer": "ipython3",
1542
+ "version": "3.12.3"
1543
  }
1544
  },
1545
  "nbformat": 4,
__pycache__/models.cpython-310.pyc CHANGED
Binary files a/__pycache__/models.cpython-310.pyc and b/__pycache__/models.cpython-310.pyc differ
 
logs/pod_90h_30k_second_v2/.ipynb_checkpoints/train-checkpoint.log ADDED
The diff for this file is too large to render. See raw diff
 
logs/pod_90h_30k_second_v2/config_ft_single.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {ASR_config: Utils/ASR/config.yml, ASR_path: Utils/ASR/epoch_00080.pth, F0_path: Utils/JDC/bst.t7,
2
+ PLBERT_dir: Utils/PLBERT/, batch_size: 12, data_params: {OOD_data: /workspace/styletts2/data/OOD_texts.txt,
3
+ min_length: 50, root_path: /workspace, train_data: /workspace/styletts2/data/train_list.txt,
4
+ val_data: /workspace/styletts2/data/val_list.txt}, device: cuda, epochs_1st: 25,
5
+ epochs_2nd: 20, first_stage_path: /workspace/styletts2/stage1_final.pth, load_only_params: false,
6
+ log_dir: logs/pod_90h_30k_second_v2, log_interval: 50, loss_params: {TMA_epoch: 14,
7
+ diff_epoch: 1, joint_epoch: 5, lambda_F0: 1.0, lambda_ce: 20.0, lambda_diff: 1.0,
8
+ lambda_dur: 1.0, lambda_gen: 1.0, lambda_mel: 5.0, lambda_mono: 1.0, lambda_norm: 1.0,
9
+ lambda_s2s: 1.0, lambda_slm: 1.0, lambda_sty: 1.0}, max_len: 300, model_params: {
10
+ decoder: {resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]], resblock_kernel_sizes: [
11
+ 3, 7, 11], type: hifigan, upsample_initial_channel: 512, upsample_kernel_sizes: [
12
+ 20, 10, 6, 4], upsample_rates: [10, 5, 3, 2]}, diffusion: {dist: {estimate_sigma_data: true,
13
+ mean: -3.0, sigma_data: 0.3631309394446902, std: 1.0}, embedding_mask_proba: 0.1,
14
+ transformer: {head_features: 64, multiplier: 2, num_heads: 8, num_layers: 3}},
15
+ dim_in: 64, dropout: 0.2, hidden_dim: 512, max_conv_dim: 512, max_dur: 50, multispeaker: true,
16
+ n_layer: 3, n_mels: 80, n_token: 178, slm: {hidden: 768, initial_channel: 64,
17
+ model: microsoft/wavlm-base-plus, nlayers: 13, sr: 16000}, style_dim: 128},
18
+ optimizer_params: {bert_lr: 1.0e-05, ft_lr: 1.0e-05, grad_accum_steps: 2, lr: 0.0001},
19
+ preprocess_params: {spect_params: {hop_length: 300, n_fft: 2048, win_length: 1200},
20
+ sr: 24000}, pretrained_model: /workspace/styletts2/logs/pod_90h_30k_second_v2/epoch_2nd_00005.pth,
21
+ save_freq: 1, second_stage_load_pretrained: true, slmadv_params: {batch_percentage: 0.5,
22
+ iter: 20, max_len: 500, min_len: 400, scale: 0.01, sig: 1.5, thresh: 5}}
logs/pod_90h_30k_second_v2/epoch_2nd_00000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0928157cdc46e1d7e76b85e06e5264bd3cde20091d3042dc4665caf2bfe02526
3
+ size 1055973030
logs/pod_90h_30k_second_v2/epoch_2nd_00001.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0e105ad5e3a8c28cd29e0b3f02eb057076d1a8be9eb2b683f1533635e8401e
3
+ size 1589850598
logs/pod_90h_30k_second_v2/epoch_2nd_00002.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a7b2881b5543005c7581da0d2055750057dea608d01ea788adb1fafc3945f4
3
+ size 1589850598
logs/pod_90h_30k_second_v2/epoch_2nd_00003.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0065304e4bf5ed0559cb9a45c2d4bdd0e609e89d6e21474ffc4b1cf33922571
3
+ size 1589850598
logs/pod_90h_30k_second_v2/epoch_2nd_00004.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256ee7584931924080191070f86a26ee9d32828465a52a33b0a7a94fdd109eb7
3
+ size 1589850598
logs/pod_90h_30k_second_v2/epoch_2nd_00005.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74fc75095ee7d28d84006849e9103d2640292e34f57fe58638cf0dc355e7ed0b
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00006.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:637f68e2b35a7b60431920cfc9a9494d15ca971d3edab962b11c54c02c8728ad
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00007.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05c6d4bba2e93159df4e9b2bb8d3552e695476d173062467cadb57694875f49
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00008.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d97bc932af34f115414e10ac988870e48e50921b0832d14ea142deb89e9d99e
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00009.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b08801054bf85f09cbc03240bdf5e1ec1d834d97d0933d4df29385dc3808d3f
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00010.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae7cbe8be0a7f72aa499f92cbc17f6e7a379314675a5d4eae526eff30f1af72
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00011.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:873f640d25ba128814ef68d1c63ff537bbf9d32e3d3c263584c9feeede06c5b5
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00012.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a26b0348252ea0c3525df5b6f9f88c51291d7a191d5502fa7a604ebbeb89dcca
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00013.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4b6e188d004bbb60c34f4807cb2c2a6593242deab4ec59e7c672a5485fe9988
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00014.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2252d3258328915ef20c3569e7297723537e691479690ebd026fc426dbcf2384
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00015.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43544b5d118dbdaacb98500b2f68c8e355ad53222189948e9ee2890e2fec4430
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00016.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edfe04c4831cd8d61fafc6bb413572806f30a8346c033fcae3bedeb05f94e9ae
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00017.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8684fe24d5bd9a1ae3e24529c19147503367aa61df2e550f34902b87179e1e10
3
+ size 2144951284
logs/pod_90h_30k_second_v2/epoch_2nd_00018.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b08d3be791931c8c1534e2349e2d37dada50a72e700685fd8e4cf6b2d6e381
3
+ size 2144951284
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758267.7f09b0e2c0b0.17026.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d06bd4591368eb36ccaedb27cf696641c63f468363381dee869e800814cb7a9
3
+ size 1984
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758489.7f09b0e2c0b0.18353.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f67fd20bb1e7c93e62e1f9c7195dc5abcc7781698b70c679dc8ec3a16e4301e8
3
+ size 88
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758524.7f09b0e2c0b0.18773.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6f25e7eb7c86858883dd108c75d5748b4c35d1c2523cd938aadd0c1d6aeb8b1
3
+ size 88
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758552.7f09b0e2c0b0.19160.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2256999fdd31fea21179c68e82673acd9189401e9d6ae24fed4e3c2eba5c6fd
3
+ size 88
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749758602.7f09b0e2c0b0.19654.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de73c06f010bf27d9dddc26d28870a7b5cbeb43e7b1d8be67585861622895a2d
3
+ size 6761012
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749763142.7f09b0e2c0b0.41611.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:048f50f6a49f65fd244e93aed39d98e13e34c77499fc09d3bde9c1e80833a4f9
3
+ size 716
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749763329.7f09b0e2c0b0.42740.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2622d704b1a3eb973e99275d7ad036d28e17836bc139b3fe77060ca5332b2c96
3
+ size 1984
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749763548.7f09b0e2c0b0.44123.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:453ad43bcbf767292cc38b1b68b95bce85be1f0554f5773779d1c6d851056587
3
+ size 1953908
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749789808.7f09b0e2c0b0.1500.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f8e754635df505b43fc9ce9b08109bdcc715d84659fc6e8fa4dc93ba2727991
3
+ size 1344
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749790964.7f09b0e2c0b0.2345.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704b38d288963faaa1da5a8302dc10f92e904b92545dd17d2c38f520d9bbea01
3
+ size 88
logs/pod_90h_30k_second_v2/tensorboard/events.out.tfevents.1749791414.7f09b0e2c0b0.1465.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6022662a58865c2f726d48b227e4a35f5643c01cdd8781d20152470fc8905558
3
+ size 3881480
logs/pod_90h_30k_second_v2/train.log ADDED
The diff for this file is too large to render. See raw diff
 
models.py CHANGED
@@ -703,8 +703,9 @@ def load_checkpoint(model, optimizer, path, load_only_params=True, ignore_module
703
  _ = [model[key].eval() for key in model]
704
 
705
  if not load_only_params:
706
- epoch = state["epoch"]
707
  iters = state["iters"]
 
708
  optimizer.load_state_dict(state["optimizer"])
709
  else:
710
  epoch = 0
 
703
  _ = [model[key].eval() for key in model]
704
 
705
  if not load_only_params:
706
+ epoch = state["epoch"] + 1
707
  iters = state["iters"]
708
+ print('Load checkpoint from %s, epoch %d, iters %d' % (path, epoch, iters))
709
  optimizer.load_state_dict(state["optimizer"])
710
  else:
711
  epoch = 0