Respair commited on
Commit
9e121db
·
verified ·
1 Parent(s): 6248e88

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +18 -0
  2. stylekan/Colab/StyleTTS2_Demo_LJSpeech.ipynb +486 -0
  3. stylekan/Colab/StyleTTS2_Demo_LibriTTS.ipynb +1218 -0
  4. stylekan/Colab/StyleTTS2_Finetune_Demo.ipynb +480 -0
  5. stylekan/Configs/config.yml +116 -0
  6. stylekan/Configs/config_ft.yml +123 -0
  7. stylekan/Configs/config_ft_kaede.yml +122 -0
  8. stylekan/Configs/config_kanade.yml +121 -0
  9. stylekan/Configs/config_kanade_test.yml +124 -0
  10. stylekan/Data/OOD_LargeScale_.csv +3 -0
  11. stylekan/Data/filtered_train_list.csv +3 -0
  12. stylekan/Data/metadata_cleanest/48khz_config_with_names_ids.csv +3 -0
  13. stylekan/Data/metadata_cleanest/FT_imas copy.csv +50 -0
  14. stylekan/Data/metadata_cleanest/FT_imas.csv +0 -0
  15. stylekan/Data/metadata_cleanest/FT_imas_remapped.csv +3 -0
  16. stylekan/Data/metadata_cleanest/FT_imas_valid.csv +131 -0
  17. stylekan/Data/metadata_cleanest/FT_imas_valid_less_than_20sec.csv +126 -0
  18. stylekan/Data/metadata_cleanest/FT_imas_valid_more_than_10sec.csv +17 -0
  19. stylekan/Data/metadata_cleanest/FT_saori.csv +0 -0
  20. stylekan/Data/metadata_cleanest/FT_saori_valid.csv +23 -0
  21. stylekan/Data/metadata_cleanest/filtered_train_list.csv +3 -0
  22. stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp.csv +3 -0
  23. stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_HEADER_plus.csv +3 -0
  24. stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv +3 -0
  25. stylekan/Data/metadata_cleanest/prelude.csv +3 -0
  26. stylekan/Data/metadata_cleanest/prelude_id.csv +3 -0
  27. stylekan/Data/metadata_cleanest/train_48_pure.csv +3 -0
  28. stylekan/Data/metadata_cleanest/val_48_pure.csv +126 -0
  29. stylekan/Data/metadata_cleanest/val_48_pure.txt +120 -0
  30. stylekan/Data/mg_valid.csv +198 -0
  31. stylekan/Data/mg_valid.txt +121 -0
  32. stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_chara_198/ranko_chara_198.acb.tmp +0 -0
  33. stylekan/Data/moe_res/imas_split/shiki/shiki_fine/phonemizerASR_script_jpn.py +804 -0
  34. stylekan/Data/moe_res/imas_split/shiki/shiki_fine/shiki_finetune.csv +78 -0
  35. stylekan/Data/train_48_200k.csv +3 -0
  36. stylekan/Data/train_List.csv +3 -0
  37. stylekan/Data/train_List_updated.csv +3 -0
  38. stylekan/Data/val_48_200k.csv +223 -0
  39. stylekan/Demo/Inference_LJSpeech.ipynb +562 -0
  40. stylekan/Demo/Inference_LibriTTS.ipynb +3 -0
  41. stylekan/Demo/infer_24khz.ipynb +3 -0
  42. stylekan/Demo/syuko_style_vectors.csv +0 -0
  43. stylekan/LICENSE +21 -0
  44. stylekan/Models/Style_Kanade/2nd_phase_last.pth +3 -0
  45. stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth +3 -0
  46. stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00002.pth +3 -0
  47. stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00004.pth +3 -0
  48. stylekan/Models/Style_Kanade/config_kanade.yml +23 -0
  49. stylekan/Models/Style_Kanade/epoch_1st_00013.pth +3 -0
  50. stylekan/Models/Style_Kanade/epoch_2nd_00000.pth +3 -0
.gitattributes CHANGED
@@ -40,3 +40,21 @@ stts_48khz/StyleTTS2_48khz/Utils/JDC/bst_rmvpe_48k.t7 filter=lfs diff=lfs merge=
40
  stts_48khz/StyleTTS2_48khz/infer.ipynb filter=lfs diff=lfs merge=lfs -text
41
  jp_p2g.jsonl filter=lfs diff=lfs merge=lfs -text
42
  LLM_Clean_TEKNIUM.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  stts_48khz/StyleTTS2_48khz/infer.ipynb filter=lfs diff=lfs merge=lfs -text
41
  jp_p2g.jsonl filter=lfs diff=lfs merge=lfs -text
42
  LLM_Clean_TEKNIUM.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ stylekan/Data/OOD_LargeScale_.csv filter=lfs diff=lfs merge=lfs -text
44
+ stylekan/Data/filtered_train_list.csv filter=lfs diff=lfs merge=lfs -text
45
+ stylekan/Data/metadata_cleanest/48khz_config_with_names_ids.csv filter=lfs diff=lfs merge=lfs -text
46
+ stylekan/Data/metadata_cleanest/FT_imas_remapped.csv filter=lfs diff=lfs merge=lfs -text
47
+ stylekan/Data/metadata_cleanest/filtered_train_list.csv filter=lfs diff=lfs merge=lfs -text
48
+ stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp.csv filter=lfs diff=lfs merge=lfs -text
49
+ stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_HEADER_plus.csv filter=lfs diff=lfs merge=lfs -text
50
+ stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv filter=lfs diff=lfs merge=lfs -text
51
+ stylekan/Data/metadata_cleanest/prelude.csv filter=lfs diff=lfs merge=lfs -text
52
+ stylekan/Data/metadata_cleanest/prelude_id.csv filter=lfs diff=lfs merge=lfs -text
53
+ stylekan/Data/metadata_cleanest/train_48_pure.csv filter=lfs diff=lfs merge=lfs -text
54
+ stylekan/Data/train_48_200k.csv filter=lfs diff=lfs merge=lfs -text
55
+ stylekan/Data/train_List.csv filter=lfs diff=lfs merge=lfs -text
56
+ stylekan/Data/train_List_updated.csv filter=lfs diff=lfs merge=lfs -text
57
+ stylekan/Demo/Inference_LibriTTS.ipynb filter=lfs diff=lfs merge=lfs -text
58
+ stylekan/Demo/infer_24khz.ipynb filter=lfs diff=lfs merge=lfs -text
59
+ stylekan/Utils/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
60
+ stylekan/Utils/PLBERT/step_1050000.t7 filter=lfs diff=lfs merge=lfs -text
stylekan/Colab/StyleTTS2_Demo_LJSpeech.ipynb ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4",
8
+ "authorship_tag": "ABX9TyM1x2mx2VnkYNFVlD+DFzmy",
9
+ "include_colab_link": true
10
+ },
11
+ "kernelspec": {
12
+ "name": "python3",
13
+ "display_name": "Python 3"
14
+ },
15
+ "language_info": {
16
+ "name": "python"
17
+ },
18
+ "accelerator": "GPU"
19
+ },
20
+ "cells": [
21
+ {
22
+ "cell_type": "markdown",
23
+ "metadata": {
24
+ "id": "view-in-github",
25
+ "colab_type": "text"
26
+ },
27
+ "source": [
28
+ "<a href=\"https://colab.research.google.com/github/yl4579/StyleTTS2/blob/main/Colab/StyleTTS2_Demo_LJSpeech.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "markdown",
33
+ "source": [
34
+ "### Install packages and download models"
35
+ ],
36
+ "metadata": {
37
+ "id": "nm653VK4CG9F"
38
+ }
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "source": [
43
+ "%%shell\n",
44
+ "git clone https://github.com/yl4579/StyleTTS2.git\n",
45
+ "cd StyleTTS2\n",
46
+ "pip install SoundFile torchaudio munch torch pydub pyyaml librosa nltk matplotlib accelerate transformers phonemizer einops einops-exts tqdm typing-extensions git+https://github.com/resemble-ai/monotonic_align.git\n",
47
+ "sudo apt-get install espeak-ng\n",
48
+ "git-lfs clone https://huggingface.co/yl4579/StyleTTS2-LJSpeech\n",
49
+ "mv StyleTTS2-LJSpeech/Models ."
50
+ ],
51
+ "metadata": {
52
+ "id": "gciBKMqCCLvT"
53
+ },
54
+ "execution_count": null,
55
+ "outputs": []
56
+ },
57
+ {
58
+ "cell_type": "markdown",
59
+ "source": [
60
+ "### Load models"
61
+ ],
62
+ "metadata": {
63
+ "id": "OAA8lx-XCQnM"
64
+ }
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "source": [
69
+ "%cd StyleTTS2\n",
70
+ "\n",
71
+ "import torch\n",
72
+ "torch.manual_seed(0)\n",
73
+ "torch.backends.cudnn.benchmark = False\n",
74
+ "torch.backends.cudnn.deterministic = True\n",
75
+ "\n",
76
+ "import random\n",
77
+ "random.seed(0)\n",
78
+ "\n",
79
+ "import numpy as np\n",
80
+ "np.random.seed(0)\n",
81
+ "\n",
82
+ "import nltk\n",
83
+ "nltk.download('punkt')\n",
84
+ "\n",
85
+ "# load packages\n",
86
+ "import time\n",
87
+ "import random\n",
88
+ "import yaml\n",
89
+ "from munch import Munch\n",
90
+ "import numpy as np\n",
91
+ "import torch\n",
92
+ "from torch import nn\n",
93
+ "import torch.nn.functional as F\n",
94
+ "import torchaudio\n",
95
+ "import librosa\n",
96
+ "from nltk.tokenize import word_tokenize\n",
97
+ "\n",
98
+ "from models import *\n",
99
+ "from utils import *\n",
100
+ "from text_utils import TextCleaner\n",
101
+ "textclenaer = TextCleaner()\n",
102
+ "\n",
103
+ "%matplotlib inline\n",
104
+ "\n",
105
+ "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
106
+ "\n",
107
+ "to_mel = torchaudio.transforms.MelSpectrogram(\n",
108
+ " n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
109
+ "mean, std = -4, 4\n",
110
+ "\n",
111
+ "def length_to_mask(lengths):\n",
112
+ " mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
113
+ " mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
114
+ " return mask\n",
115
+ "\n",
116
+ "def preprocess(wave):\n",
117
+ " wave_tensor = torch.from_numpy(wave).float()\n",
118
+ " mel_tensor = to_mel(wave_tensor)\n",
119
+ " mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
120
+ " return mel_tensor\n",
121
+ "\n",
122
+ "def compute_style(ref_dicts):\n",
123
+ " reference_embeddings = {}\n",
124
+ " for key, path in ref_dicts.items():\n",
125
+ " wave, sr = librosa.load(path, sr=24000)\n",
126
+ " audio, index = librosa.effects.trim(wave, top_db=30)\n",
127
+ " if sr != 24000:\n",
128
+ " audio = librosa.resample(audio, sr, 24000)\n",
129
+ " mel_tensor = preprocess(audio).to(device)\n",
130
+ "\n",
131
+ " with torch.no_grad():\n",
132
+ " ref = model.style_encoder(mel_tensor.unsqueeze(1))\n",
133
+ " reference_embeddings[key] = (ref.squeeze(1), audio)\n",
134
+ "\n",
135
+ " return reference_embeddings\n",
136
+ "\n",
137
+ "# load phonemizer\n",
138
+ "import phonemizer\n",
139
+ "global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')\n",
140
+ "\n",
141
+ "config = yaml.safe_load(open(\"Models/LJSpeech/config.yml\"))\n",
142
+ "\n",
143
+ "# load pretrained ASR model\n",
144
+ "ASR_config = config.get('ASR_config', False)\n",
145
+ "ASR_path = config.get('ASR_path', False)\n",
146
+ "text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
147
+ "\n",
148
+ "# load pretrained F0 model\n",
149
+ "F0_path = config.get('F0_path', False)\n",
150
+ "pitch_extractor = load_F0_models(F0_path)\n",
151
+ "\n",
152
+ "# load BERT model\n",
153
+ "from Utils.PLBERT.util import load_plbert\n",
154
+ "BERT_path = config.get('PLBERT_dir', False)\n",
155
+ "plbert = load_plbert(BERT_path)\n",
156
+ "\n",
157
+ "model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)\n",
158
+ "_ = [model[key].eval() for key in model]\n",
159
+ "_ = [model[key].to(device) for key in model]\n",
160
+ "\n",
161
+ "params_whole = torch.load(\"Models/LJSpeech/epoch_2nd_00100.pth\", map_location='cpu')\n",
162
+ "params = params_whole['net']\n",
163
+ "\n",
164
+ "for key in model:\n",
165
+ " if key in params:\n",
166
+ " print('%s loaded' % key)\n",
167
+ " try:\n",
168
+ " model[key].load_state_dict(params[key])\n",
169
+ " except:\n",
170
+ " from collections import OrderedDict\n",
171
+ " state_dict = params[key]\n",
172
+ " new_state_dict = OrderedDict()\n",
173
+ " for k, v in state_dict.items():\n",
174
+ " name = k[7:] # remove `module.`\n",
175
+ " new_state_dict[name] = v\n",
176
+ " # load params\n",
177
+ " model[key].load_state_dict(new_state_dict, strict=False)\n",
178
+ "# except:\n",
179
+ "# _load(params[key], model[key])\n",
180
+ "_ = [model[key].eval() for key in model]\n",
181
+ "\n",
182
+ "from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule\n",
183
+ "\n",
184
+ "sampler = DiffusionSampler(\n",
185
+ " model.diffusion.diffusion,\n",
186
+ " sampler=ADPM2Sampler(),\n",
187
+ " sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
188
+ " clamp=False\n",
189
+ ")\n",
190
+ "\n",
191
+ "def inference(text, noise, diffusion_steps=5, embedding_scale=1):\n",
192
+ " text = text.strip()\n",
193
+ " text = text.replace('\"', '')\n",
194
+ " ps = global_phonemizer.phonemize([text])\n",
195
+ " ps = word_tokenize(ps[0])\n",
196
+ " ps = ' '.join(ps)\n",
197
+ "\n",
198
+ " tokens = textclenaer(ps)\n",
199
+ " tokens.insert(0, 0)\n",
200
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
201
+ "\n",
202
+ " with torch.no_grad():\n",
203
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
204
+ " text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
205
+ "\n",
206
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
207
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
208
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
209
+ "\n",
210
+ " s_pred = sampler(noise,\n",
211
+ " embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
212
+ " embedding_scale=embedding_scale).squeeze(0)\n",
213
+ "\n",
214
+ " s = s_pred[:, 128:]\n",
215
+ " ref = s_pred[:, :128]\n",
216
+ "\n",
217
+ " d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
218
+ "\n",
219
+ " x, _ = model.predictor.lstm(d)\n",
220
+ " duration = model.predictor.duration_proj(x)\n",
221
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
222
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
223
+ "\n",
224
+ " pred_dur[-1] += 5\n",
225
+ "\n",
226
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
227
+ " c_frame = 0\n",
228
+ " for i in range(pred_aln_trg.size(0)):\n",
229
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
230
+ " c_frame += int(pred_dur[i].data)\n",
231
+ "\n",
232
+ " # encode prosody\n",
233
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
234
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
235
+ " out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)),\n",
236
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
237
+ "\n",
238
+ " return out.squeeze().cpu().numpy()\n",
239
+ "\n",
240
+ "def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):\n",
241
+ " text = text.strip()\n",
242
+ " text = text.replace('\"', '')\n",
243
+ " ps = global_phonemizer.phonemize([text])\n",
244
+ " ps = word_tokenize(ps[0])\n",
245
+ " ps = ' '.join(ps)\n",
246
+ "\n",
247
+ " tokens = textclenaer(ps)\n",
248
+ " tokens.insert(0, 0)\n",
249
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
250
+ "\n",
251
+ " with torch.no_grad():\n",
252
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
253
+ " text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
254
+ "\n",
255
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
256
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
257
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
258
+ "\n",
259
+ " s_pred = sampler(noise,\n",
260
+ " embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
261
+ " embedding_scale=embedding_scale).squeeze(0)\n",
262
+ "\n",
263
+ " if s_prev is not None:\n",
264
+ " # convex combination of previous and current style\n",
265
+ " s_pred = alpha * s_prev + (1 - alpha) * s_pred\n",
266
+ "\n",
267
+ " s = s_pred[:, 128:]\n",
268
+ " ref = s_pred[:, :128]\n",
269
+ "\n",
270
+ " d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
271
+ "\n",
272
+ " x, _ = model.predictor.lstm(d)\n",
273
+ " duration = model.predictor.duration_proj(x)\n",
274
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
275
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
276
+ "\n",
277
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
278
+ " c_frame = 0\n",
279
+ " for i in range(pred_aln_trg.size(0)):\n",
280
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
281
+ " c_frame += int(pred_dur[i].data)\n",
282
+ "\n",
283
+ " # encode prosody\n",
284
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
285
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
286
+ " out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)),\n",
287
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
288
+ "\n",
289
+ " return out.squeeze().cpu().numpy(), s_pred"
290
+ ],
291
+ "metadata": {
292
+ "id": "m0XRpbxSCSix"
293
+ },
294
+ "execution_count": null,
295
+ "outputs": []
296
+ },
297
+ {
298
+ "cell_type": "markdown",
299
+ "source": [
300
+ "### Synthesize speech"
301
+ ],
302
+ "metadata": {
303
+ "id": "vuCbS0gdArgJ"
304
+ }
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "source": [
309
+ "# @title Input Text { display-mode: \"form\" }\n",
310
+ "# synthesize a text\n",
311
+ "text = \"StyleTTS 2 is a text-to-speech model that leverages style diffusion and adversarial training with large speech language models to achieve human-level text-to-speech synthesis.\" # @param {type:\"string\"}\n"
312
+ ],
313
+ "metadata": {
314
+ "id": "7Ud1Y-kbBPTw"
315
+ },
316
+ "execution_count": 3,
317
+ "outputs": []
318
+ },
319
+ {
320
+ "cell_type": "markdown",
321
+ "source": [
322
+ "#### Basic synthesis (5 diffusion steps)"
323
+ ],
324
+ "metadata": {
325
+ "id": "TM2NjuM7B6sz"
326
+ }
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "source": [
331
+ "start = time.time()\n",
332
+ "noise = torch.randn(1,1,256).to(device)\n",
333
+ "wav = inference(text, noise, diffusion_steps=5, embedding_scale=1)\n",
334
+ "rtf = (time.time() - start) / (len(wav) / 24000)\n",
335
+ "print(f\"RTF = {rtf:5f}\")\n",
336
+ "import IPython.display as ipd\n",
337
+ "display(ipd.Audio(wav, rate=24000))"
338
+ ],
339
+ "metadata": {
340
+ "id": "KILqC-V-Ay5e"
341
+ },
342
+ "execution_count": null,
343
+ "outputs": []
344
+ },
345
+ {
346
+ "cell_type": "markdown",
347
+ "source": [
348
+ "#### With higher diffusion steps (more diverse)\n",
349
+ "Since the sampler is ancestral, the higher the stpes, the more diverse the samples are, with the cost of slower synthesis speed."
350
+ ],
351
+ "metadata": {
352
+ "id": "oZk9o-EzCBVx"
353
+ }
354
+ },
355
+ {
356
+ "cell_type": "code",
357
+ "source": [
358
+ "start = time.time()\n",
359
+ "noise = torch.randn(1,1,256).to(device)\n",
360
+ "wav = inference(text, noise, diffusion_steps=10, embedding_scale=1)\n",
361
+ "rtf = (time.time() - start) / (len(wav) / 24000)\n",
362
+ "print(f\"RTF = {rtf:5f}\")\n",
363
+ "import IPython.display as ipd\n",
364
+ "display(ipd.Audio(wav, rate=24000))"
365
+ ],
366
+ "metadata": {
367
+ "id": "9_OHtzMbB9gL"
368
+ },
369
+ "execution_count": null,
370
+ "outputs": []
371
+ },
372
+ {
373
+ "cell_type": "markdown",
374
+ "source": [
375
+ "### Speech expressiveness\n",
376
+ "The following section recreates the samples shown in [Section 6](https://styletts2.github.io/#emo) of the demo page."
377
+ ],
378
+ "metadata": {
379
+ "id": "NyDACd-0CaqL"
380
+ }
381
+ },
382
+ {
383
+ "cell_type": "markdown",
384
+ "source": [
385
+ "#### With embedding_scale=1\n",
386
+ "This is the classifier-free guidance scale. The higher the scale, the more conditional the style is to the input text and hence more emotional."
387
+ ],
388
+ "metadata": {
389
+ "id": "cRkS5VWxCck4"
390
+ }
391
+ },
392
+ {
393
+ "cell_type": "code",
394
+ "source": [
395
+ "texts = {}\n",
396
+ "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
397
+ "texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
398
+ "texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
399
+ "texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
400
+ "\n",
401
+ "for k,v in texts.items():\n",
402
+ " noise = torch.randn(1,1,256).to(device)\n",
403
+ " wav = inference(v, noise, diffusion_steps=10, embedding_scale=1)\n",
404
+ " print(k + \": \")\n",
405
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
406
+ ],
407
+ "metadata": {
408
+ "id": "H5g5RO-mCbZB"
409
+ },
410
+ "execution_count": null,
411
+ "outputs": []
412
+ },
413
+ {
414
+ "cell_type": "markdown",
415
+ "source": [
416
+ "#### With embedding_scale=2"
417
+ ],
418
+ "metadata": {
419
+ "id": "f4S8TXSpCgpA"
420
+ }
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "source": [
425
+ "texts = {}\n",
426
+ "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
427
+ "texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
428
+ "texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
429
+ "texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
430
+ "\n",
431
+ "for k,v in texts.items():\n",
432
+ " noise = torch.randn(1,1,256).to(device)\n",
433
+ " wav = inference(v, noise, diffusion_steps=10, embedding_scale=2) # embedding_scale=2 for more pronounced emotion\n",
434
+ " print(k + \": \")\n",
435
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
436
+ ],
437
+ "metadata": {
438
+ "id": "xHHIdeNrCezC"
439
+ },
440
+ "execution_count": null,
441
+ "outputs": []
442
+ },
443
+ {
444
+ "cell_type": "markdown",
445
+ "source": [
446
+ "### Long-form generation\n",
447
+ "This section includes basic implementation of Algorithm 1 in the paper for consistent longform audio generation. The example passage is taken from [Section 5](https://styletts2.github.io/#long) of the demo page."
448
+ ],
449
+ "metadata": {
450
+ "id": "nAh7Tov4CkuH"
451
+ }
452
+ },
453
+ {
454
+ "cell_type": "code",
455
+ "source": [
456
+ "passage = '''If the supply of fruit is greater than the family needs, it may be made a source of income by sending the fresh fruit to the market if there is one near enough, or by preserving, canning, and making jelly for sale. To make such an enterprise a success the fruit and work must be first class. There is magic in the word \"Homemade,\" when the product appeals to the eye and the palate; but many careless and incompetent people have found to their sorrow that this word has not magic enough to float inferior goods on the market. As a rule large canning and preserving establishments are clean and have the best appliances, and they employ chemists and skilled labor. The home product must be very good to compete with the attractive goods that are sent out from such establishments. Yet for first-class homemade products there is a market in all large cities. All first-class grocers have customers who purchase such goods.''' # @param {type:\"string\"}"
457
+ ],
458
+ "metadata": {
459
+ "cellView": "form",
460
+ "id": "IJwUbgvACoDu"
461
+ },
462
+ "execution_count": 8,
463
+ "outputs": []
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "source": [
468
+ "sentences = passage.split('.') # simple split by comma\n",
469
+ "wavs = []\n",
470
+ "s_prev = None\n",
471
+ "for text in sentences:\n",
472
+ " if text.strip() == \"\": continue\n",
473
+ " text += '.' # add it back\n",
474
+ " noise = torch.randn(1,1,256).to(device)\n",
475
+ " wav, s_prev = LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=10, embedding_scale=1.5)\n",
476
+ " wavs.append(wav)\n",
477
+ "display(ipd.Audio(np.concatenate(wavs), rate=24000, normalize=False))"
478
+ ],
479
+ "metadata": {
480
+ "id": "nP-7i2QAC0JT"
481
+ },
482
+ "execution_count": null,
483
+ "outputs": []
484
+ }
485
+ ]
486
+ }
stylekan/Colab/StyleTTS2_Demo_LibriTTS.ipynb ADDED
@@ -0,0 +1,1218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "view-in-github",
7
+ "colab_type": "text"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/yl4579/StyleTTS2/blob/main/Colab/StyleTTS2_Demo_LibriTTS.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "aAGQPfgYIR23"
17
+ },
18
+ "source": [
19
+ "### Install packages and download models"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "metadata": {
26
+ "colab": {
27
+ "base_uri": "https://localhost:8080/"
28
+ },
29
+ "id": "zDPW5uSpISd2",
30
+ "outputId": "6463ff79-18d5-4071-c6ad-01947beeb368"
31
+ },
32
+ "outputs": [
33
+ {
34
+ "output_type": "stream",
35
+ "name": "stdout",
36
+ "text": [
37
+
38
+ ]
39
+ }
40
+ ],
41
+ "source": [
42
+ "%%shell\n",
43
+ "git clone https://github.com/yl4579/StyleTTS2.git\n",
44
+ "cd StyleTTS2\n",
45
+ "pip install SoundFile torchaudio munch torch pydub pyyaml librosa nltk matplotlib accelerate transformers phonemizer einops einops-exts tqdm typing-extensions git+https://github.com/resemble-ai/monotonic_align.git\n",
46
+ "sudo apt-get install espeak-ng\n",
47
+ "git-lfs clone https://huggingface.co/yl4579/StyleTTS2-LibriTTS\n",
48
+ "mv StyleTTS2-LibriTTS/Models .\n",
49
+ "mv StyleTTS2-LibriTTS/reference_audio.zip .\n",
50
+ "unzip reference_audio.zip\n",
51
+ "mv reference_audio Demo/reference_audio"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "markdown",
56
+ "metadata": {
57
+ "id": "eJdB_nCOIVIN"
58
+ },
59
+ "source": [
60
+ "### Load models"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": null,
66
+ "metadata": {
67
+ "id": "cha8Tr2uJwN0"
68
+ },
69
+ "outputs": [],
70
+ "source": [
71
+ "import nltk\n",
72
+ "nltk.download('punkt')"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": null,
78
+ "metadata": {
79
+ "id": "Qoow8Wd8ITtm"
80
+ },
81
+ "outputs": [],
82
+ "source": [
83
+ "%cd StyleTTS2\n",
84
+ "\n",
85
+ "import torch\n",
86
+ "torch.manual_seed(0)\n",
87
+ "torch.backends.cudnn.benchmark = False\n",
88
+ "torch.backends.cudnn.deterministic = True\n",
89
+ "\n",
90
+ "import random\n",
91
+ "random.seed(0)\n",
92
+ "\n",
93
+ "import numpy as np\n",
94
+ "np.random.seed(0)\n",
95
+ "\n",
96
+ "# load packages\n",
97
+ "import time\n",
98
+ "import random\n",
99
+ "import yaml\n",
100
+ "from munch import Munch\n",
101
+ "import numpy as np\n",
102
+ "import torch\n",
103
+ "from torch import nn\n",
104
+ "import torch.nn.functional as F\n",
105
+ "import torchaudio\n",
106
+ "import librosa\n",
107
+ "from nltk.tokenize import word_tokenize\n",
108
+ "\n",
109
+ "from models import *\n",
110
+ "from utils import *\n",
111
+ "from text_utils import TextCleaner\n",
112
+ "textclenaer = TextCleaner()\n",
113
+ "\n",
114
+ "%matplotlib inline\n",
115
+ "\n",
116
+ "to_mel = torchaudio.transforms.MelSpectrogram(\n",
117
+ " n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
118
+ "mean, std = -4, 4\n",
119
+ "\n",
120
+ "def length_to_mask(lengths):\n",
121
+ " mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
122
+ " mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
123
+ " return mask\n",
124
+ "\n",
125
+ "def preprocess(wave):\n",
126
+ " wave_tensor = torch.from_numpy(wave).float()\n",
127
+ " mel_tensor = to_mel(wave_tensor)\n",
128
+ " mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
129
+ " return mel_tensor\n",
130
+ "\n",
131
+ "def compute_style(path):\n",
132
+ " wave, sr = librosa.load(path, sr=24000)\n",
133
+ " audio, index = librosa.effects.trim(wave, top_db=30)\n",
134
+ " if sr != 24000:\n",
135
+ " audio = librosa.resample(audio, sr, 24000)\n",
136
+ " mel_tensor = preprocess(audio).to(device)\n",
137
+ "\n",
138
+ " with torch.no_grad():\n",
139
+ " ref_s = model.style_encoder(mel_tensor.unsqueeze(1))\n",
140
+ " ref_p = model.predictor_encoder(mel_tensor.unsqueeze(1))\n",
141
+ "\n",
142
+ " return torch.cat([ref_s, ref_p], dim=1)\n",
143
+ "\n",
144
+ "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
145
+ "\n",
146
+ "# load phonemizer\n",
147
+ "import phonemizer\n",
148
+ "global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)\n",
149
+ "\n",
150
+ "config = yaml.safe_load(open(\"Models/LibriTTS/config.yml\"))\n",
151
+ "\n",
152
+ "# load pretrained ASR model\n",
153
+ "ASR_config = config.get('ASR_config', False)\n",
154
+ "ASR_path = config.get('ASR_path', False)\n",
155
+ "text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
156
+ "\n",
157
+ "# load pretrained F0 model\n",
158
+ "F0_path = config.get('F0_path', False)\n",
159
+ "pitch_extractor = load_F0_models(F0_path)\n",
160
+ "\n",
161
+ "# load BERT model\n",
162
+ "from Utils.PLBERT.util import load_plbert\n",
163
+ "BERT_path = config.get('PLBERT_dir', False)\n",
164
+ "plbert = load_plbert(BERT_path)\n",
165
+ "\n",
166
+ "model_params = recursive_munch(config['model_params'])\n",
167
+ "model = build_model(model_params, text_aligner, pitch_extractor, plbert)\n",
168
+ "_ = [model[key].eval() for key in model]\n",
169
+ "_ = [model[key].to(device) for key in model]\n",
170
+ "\n",
171
+ "params_whole = torch.load(\"Models/LibriTTS/epochs_2nd_00020.pth\", map_location='cpu')\n",
172
+ "params = params_whole['net']\n",
173
+ "\n",
174
+ "for key in model:\n",
175
+ " if key in params:\n",
176
+ " print('%s loaded' % key)\n",
177
+ " try:\n",
178
+ " model[key].load_state_dict(params[key])\n",
179
+ " except:\n",
180
+ " from collections import OrderedDict\n",
181
+ " state_dict = params[key]\n",
182
+ " new_state_dict = OrderedDict()\n",
183
+ " for k, v in state_dict.items():\n",
184
+ " name = k[7:] # remove `module.`\n",
185
+ " new_state_dict[name] = v\n",
186
+ " # load params\n",
187
+ " model[key].load_state_dict(new_state_dict, strict=False)\n",
188
+ "# except:\n",
189
+ "# _load(params[key], model[key])\n",
190
+ "_ = [model[key].eval() for key in model]\n",
191
+ "\n",
192
+ "from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule\n",
193
+ "\n",
194
+ "sampler = DiffusionSampler(\n",
195
+ " model.diffusion.diffusion,\n",
196
+ " sampler=ADPM2Sampler(),\n",
197
+ " sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
198
+ " clamp=False\n",
199
+ ")\n",
200
+ "\n",
201
+ "def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):\n",
202
+ " text = text.strip()\n",
203
+ " ps = global_phonemizer.phonemize([text])\n",
204
+ " ps = word_tokenize(ps[0])\n",
205
+ " ps = ' '.join(ps)\n",
206
+ " tokens = textclenaer(ps)\n",
207
+ " tokens.insert(0, 0)\n",
208
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
209
+ "\n",
210
+ " with torch.no_grad():\n",
211
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
212
+ " text_mask = length_to_mask(input_lengths).to(device)\n",
213
+ "\n",
214
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
215
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
216
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
217
+ "\n",
218
+ " s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
219
+ " embedding=bert_dur,\n",
220
+ " embedding_scale=embedding_scale,\n",
221
+ " features=ref_s, # reference from the same speaker as the embedding\n",
222
+ " num_steps=diffusion_steps).squeeze(1)\n",
223
+ "\n",
224
+ "\n",
225
+ " s = s_pred[:, 128:]\n",
226
+ " ref = s_pred[:, :128]\n",
227
+ "\n",
228
+ " ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
229
+ " s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
230
+ "\n",
231
+ " d = model.predictor.text_encoder(d_en,\n",
232
+ " s, input_lengths, text_mask)\n",
233
+ "\n",
234
+ " x, _ = model.predictor.lstm(d)\n",
235
+ " duration = model.predictor.duration_proj(x)\n",
236
+ "\n",
237
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
238
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
239
+ "\n",
240
+ "\n",
241
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
242
+ " c_frame = 0\n",
243
+ " for i in range(pred_aln_trg.size(0)):\n",
244
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
245
+ " c_frame += int(pred_dur[i].data)\n",
246
+ "\n",
247
+ " # encode prosody\n",
248
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
249
+ " if model_params.decoder.type == \"hifigan\":\n",
250
+ " asr_new = torch.zeros_like(en)\n",
251
+ " asr_new[:, :, 0] = en[:, :, 0]\n",
252
+ " asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
253
+ " en = asr_new\n",
254
+ "\n",
255
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
256
+ "\n",
257
+ " asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
258
+ " if model_params.decoder.type == \"hifigan\":\n",
259
+ " asr_new = torch.zeros_like(asr)\n",
260
+ " asr_new[:, :, 0] = asr[:, :, 0]\n",
261
+ " asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
262
+ " asr = asr_new\n",
263
+ "\n",
264
+ " out = model.decoder(asr,\n",
265
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
266
+ "\n",
267
+ "\n",
268
+ " return out.squeeze().cpu().numpy()[..., :-50] # weird pulse at the end of the model, need to be fixed later\n",
269
+ "\n",
270
+ "def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1):\n",
271
+ " text = text.strip()\n",
272
+ " ps = global_phonemizer.phonemize([text])\n",
273
+ " ps = word_tokenize(ps[0])\n",
274
+ " ps = ' '.join(ps)\n",
275
+ " ps = ps.replace('``', '\"')\n",
276
+ " ps = ps.replace(\"''\", '\"')\n",
277
+ "\n",
278
+ " tokens = textclenaer(ps)\n",
279
+ " tokens.insert(0, 0)\n",
280
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
281
+ "\n",
282
+ " with torch.no_grad():\n",
283
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
284
+ " text_mask = length_to_mask(input_lengths).to(device)\n",
285
+ "\n",
286
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
287
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
288
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
289
+ "\n",
290
+ " s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
291
+ " embedding=bert_dur,\n",
292
+ " embedding_scale=embedding_scale,\n",
293
+ " features=ref_s, # reference from the same speaker as the embedding\n",
294
+ " num_steps=diffusion_steps).squeeze(1)\n",
295
+ "\n",
296
+ " if s_prev is not None:\n",
297
+ " # convex combination of previous and current style\n",
298
+ " s_pred = t * s_prev + (1 - t) * s_pred\n",
299
+ "\n",
300
+ " s = s_pred[:, 128:]\n",
301
+ " ref = s_pred[:, :128]\n",
302
+ "\n",
303
+ " ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
304
+ " s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
305
+ "\n",
306
+ " s_pred = torch.cat([ref, s], dim=-1)\n",
307
+ "\n",
308
+ " d = model.predictor.text_encoder(d_en,\n",
309
+ " s, input_lengths, text_mask)\n",
310
+ "\n",
311
+ " x, _ = model.predictor.lstm(d)\n",
312
+ " duration = model.predictor.duration_proj(x)\n",
313
+ "\n",
314
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
315
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
316
+ "\n",
317
+ "\n",
318
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
319
+ " c_frame = 0\n",
320
+ " for i in range(pred_aln_trg.size(0)):\n",
321
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
322
+ " c_frame += int(pred_dur[i].data)\n",
323
+ "\n",
324
+ " # encode prosody\n",
325
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
326
+ " if model_params.decoder.type == \"hifigan\":\n",
327
+ " asr_new = torch.zeros_like(en)\n",
328
+ " asr_new[:, :, 0] = en[:, :, 0]\n",
329
+ " asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
330
+ " en = asr_new\n",
331
+ "\n",
332
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
333
+ "\n",
334
+ " asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
335
+ " if model_params.decoder.type == \"hifigan\":\n",
336
+ " asr_new = torch.zeros_like(asr)\n",
337
+ " asr_new[:, :, 0] = asr[:, :, 0]\n",
338
+ " asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
339
+ " asr = asr_new\n",
340
+ "\n",
341
+ " out = model.decoder(asr,\n",
342
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
343
+ "\n",
344
+ "\n",
345
+ " return out.squeeze().cpu().numpy()[..., :-100], s_pred # weird pulse at the end of the model, need to be fixed later\n",
346
+ "\n",
347
+ "def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):\n",
348
+ " text = text.strip()\n",
349
+ " ps = global_phonemizer.phonemize([text])\n",
350
+ " ps = word_tokenize(ps[0])\n",
351
+ " ps = ' '.join(ps)\n",
352
+ "\n",
353
+ " tokens = textclenaer(ps)\n",
354
+ " tokens.insert(0, 0)\n",
355
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
356
+ "\n",
357
+ " ref_text = ref_text.strip()\n",
358
+ " ps = global_phonemizer.phonemize([ref_text])\n",
359
+ " ps = word_tokenize(ps[0])\n",
360
+ " ps = ' '.join(ps)\n",
361
+ "\n",
362
+ " ref_tokens = textclenaer(ps)\n",
363
+ " ref_tokens.insert(0, 0)\n",
364
+ " ref_tokens = torch.LongTensor(ref_tokens).to(device).unsqueeze(0)\n",
365
+ "\n",
366
+ "\n",
367
+ " with torch.no_grad():\n",
368
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
369
+ " text_mask = length_to_mask(input_lengths).to(device)\n",
370
+ "\n",
371
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
372
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
373
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
374
+ "\n",
375
+ " ref_input_lengths = torch.LongTensor([ref_tokens.shape[-1]]).to(device)\n",
376
+ " ref_text_mask = length_to_mask(ref_input_lengths).to(device)\n",
377
+ " ref_bert_dur = model.bert(ref_tokens, attention_mask=(~ref_text_mask).int())\n",
378
+ " s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
379
+ " embedding=bert_dur,\n",
380
+ " embedding_scale=embedding_scale,\n",
381
+ " features=ref_s, # reference from the same speaker as the embedding\n",
382
+ " num_steps=diffusion_steps).squeeze(1)\n",
383
+ "\n",
384
+ "\n",
385
+ " s = s_pred[:, 128:]\n",
386
+ " ref = s_pred[:, :128]\n",
387
+ "\n",
388
+ " ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
389
+ " s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
390
+ "\n",
391
+ " d = model.predictor.text_encoder(d_en,\n",
392
+ " s, input_lengths, text_mask)\n",
393
+ "\n",
394
+ " x, _ = model.predictor.lstm(d)\n",
395
+ " duration = model.predictor.duration_proj(x)\n",
396
+ "\n",
397
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
398
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
399
+ "\n",
400
+ "\n",
401
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
402
+ " c_frame = 0\n",
403
+ " for i in range(pred_aln_trg.size(0)):\n",
404
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
405
+ " c_frame += int(pred_dur[i].data)\n",
406
+ "\n",
407
+ " # encode prosody\n",
408
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
409
+ " if model_params.decoder.type == \"hifigan\":\n",
410
+ " asr_new = torch.zeros_like(en)\n",
411
+ " asr_new[:, :, 0] = en[:, :, 0]\n",
412
+ " asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
413
+ " en = asr_new\n",
414
+ "\n",
415
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
416
+ "\n",
417
+ " asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
418
+ " if model_params.decoder.type == \"hifigan\":\n",
419
+ " asr_new = torch.zeros_like(asr)\n",
420
+ " asr_new[:, :, 0] = asr[:, :, 0]\n",
421
+ " asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
422
+ " asr = asr_new\n",
423
+ "\n",
424
+ " out = model.decoder(asr,\n",
425
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
426
+ "\n",
427
+ "\n",
428
+ " return out.squeeze().cpu().numpy()[..., :-50] # weird pulse at the end of the model, need to be fixed later\n"
429
+ ]
430
+ },
431
+ {
432
+ "cell_type": "markdown",
433
+ "metadata": {
434
+ "id": "32S6U0LyJbCA"
435
+ },
436
+ "source": [
437
+ "### Synthesize speech"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "markdown",
442
+ "metadata": {
443
+ "id": "ehK_0daMJdk_"
444
+ },
445
+ "source": [
446
+ "#### Basic synthesis (5 diffusion steps, seen speakers)"
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": null,
452
+ "metadata": {
453
+ "id": "SJs2x41MJhM-"
454
+ },
455
+ "outputs": [],
456
+ "source": [
457
+ "text = ''' StyleTTS 2 is a text to speech model that leverages style diffusion and adversarial training with large speech language models to achieve human level text to speech synthesis. ''' # @param {type:\"string\"}\n"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": null,
463
+ "metadata": {
464
+ "id": "xuqIJe-IJb7A"
465
+ },
466
+ "outputs": [],
467
+ "source": [
468
+ "reference_dicts = {}\n",
469
+ "reference_dicts['696_92939'] = \"Demo/reference_audio/696_92939_000016_000006.wav\"\n",
470
+ "reference_dicts['1789_142896'] = \"Demo/reference_audio/1789_142896_000022_000005.wav\""
471
+ ]
472
+ },
473
+ {
474
+ "cell_type": "code",
475
+ "execution_count": null,
476
+ "metadata": {
477
+ "id": "H3ra3IxJJmF0"
478
+ },
479
+ "outputs": [],
480
+ "source": [
481
+ "noise = torch.randn(1,1,256).to(device)\n",
482
+ "for k, path in reference_dicts.items():\n",
483
+ " ref_s = compute_style(path)\n",
484
+ " start = time.time()\n",
485
+ " wav = inference(text, ref_s, alpha=0.3, beta=0.7, diffusion_steps=5, embedding_scale=1)\n",
486
+ " rtf = (time.time() - start) / (len(wav) / 24000)\n",
487
+ " print(f\"RTF = {rtf:5f}\")\n",
488
+ " import IPython.display as ipd\n",
489
+ " print(k + ' Synthesized:')\n",
490
+ " display(ipd.Audio(wav, rate=24000, normalize=False))\n",
491
+ " print('Reference:')\n",
492
+ " display(ipd.Audio(path, rate=24000, normalize=False))"
493
+ ]
494
+ },
495
+ {
496
+ "cell_type": "markdown",
497
+ "metadata": {
498
+ "id": "aB3wUz6yJ-P_"
499
+ },
500
+ "source": [
501
+ "#### With higher diffusion steps (more diverse)\n",
502
+ "\n",
503
+ "Since the sampler is ancestral, the higher the stpes, the more diverse the samples are, with the cost of slower synthesis speed."
504
+ ]
505
+ },
506
+ {
507
+ "cell_type": "code",
508
+ "execution_count": null,
509
+ "metadata": {
510
+ "id": "lF27XUo4JrKk"
511
+ },
512
+ "outputs": [],
513
+ "source": [
514
+ "noise = torch.randn(1,1,256).to(device)\n",
515
+ "for k, path in reference_dicts.items():\n",
516
+ " ref_s = compute_style(path)\n",
517
+ " start = time.time()\n",
518
+ " wav = inference(text, ref_s, alpha=0.3, beta=0.7, diffusion_steps=10, embedding_scale=1)\n",
519
+ " rtf = (time.time() - start) / (len(wav) / 24000)\n",
520
+ " print(f\"RTF = {rtf:5f}\")\n",
521
+ " import IPython.display as ipd\n",
522
+ " print(k + ' Synthesized:')\n",
523
+ " display(ipd.Audio(wav, rate=24000, normalize=False))\n",
524
+ " print(k + ' Reference:')\n",
525
+ " display(ipd.Audio(path, rate=24000, normalize=False))"
526
+ ]
527
+ },
528
+ {
529
+ "cell_type": "markdown",
530
+ "metadata": {
531
+ "id": "pFT_vmJcKDs1"
532
+ },
533
+ "source": [
534
+ "#### Basic synthesis (5 diffusion steps, unseen speakers)\n",
535
+ "The following samples are to reproduce samples in [Section 4](https://styletts2.github.io/#libri) of the demo page. All spsakers are unseen during training. You can compare the generated samples to popular zero-shot TTS models like Vall-E and NaturalSpeech 2."
536
+ ]
537
+ },
538
+ {
539
+ "cell_type": "code",
540
+ "execution_count": null,
541
+ "metadata": {
542
+ "id": "HvNAeGPEKAWN"
543
+ },
544
+ "outputs": [],
545
+ "source": [
546
+ "reference_dicts = {}\n",
547
+ "# format: (path, text)\n",
548
+ "reference_dicts['1221-135767'] = (\"Demo/reference_audio/1221-135767-0014.wav\", \"Yea, his honourable worship is within, but he hath a godly minister or two with him, and likewise a leech.\")\n",
549
+ "reference_dicts['5639-40744'] = (\"Demo/reference_audio/5639-40744-0020.wav\", \"Thus did this humane and right minded father comfort his unhappy daughter, and her mother embracing her again, did all she could to soothe her feelings.\")\n",
550
+ "reference_dicts['908-157963'] = (\"Demo/reference_audio/908-157963-0027.wav\", \"And lay me down in my cold bed and leave my shining lot.\")\n",
551
+ "reference_dicts['4077-13754'] = (\"Demo/reference_audio/4077-13754-0000.wav\", \"The army found the people in poverty and left them in comparative wealth.\")"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": null,
557
+ "metadata": {
558
+ "id": "mFnyvYp5KAYN"
559
+ },
560
+ "outputs": [],
561
+ "source": [
562
+ "noise = torch.randn(1,1,256).to(device)\n",
563
+ "for k, v in reference_dicts.items():\n",
564
+ " path, text = v\n",
565
+ " ref_s = compute_style(path)\n",
566
+ " start = time.time()\n",
567
+ " wav = inference(text, ref_s, alpha=0.3, beta=0.7, diffusion_steps=5, embedding_scale=1)\n",
568
+ " rtf = (time.time() - start) / (len(wav) / 24000)\n",
569
+ " print(f\"RTF = {rtf:5f}\")\n",
570
+ " import IPython.display as ipd\n",
571
+ " print(k + ' Synthesized: ' + text)\n",
572
+ " display(ipd.Audio(wav, rate=24000, normalize=False))\n",
573
+ " print(k + ' Reference:')\n",
574
+ " display(ipd.Audio(path, rate=24000, normalize=False))"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "markdown",
579
+ "metadata": {
580
+ "id": "QBZ53BQtKNQ6"
581
+ },
582
+ "source": [
583
+ "### Speech expressiveness\n",
584
+ "\n",
585
+ "The following section recreates the samples shown in [Section 6](https://styletts2.github.io/#emo) of the demo page. The speaker reference used is `1221-135767-0014.wav`, which is unseen during training.\n",
586
+ "\n",
587
+ "#### With `embedding_scale=1`\n",
588
+ "This is the classifier-free guidance scale. The higher the scale, the more conditional the style is to the input text and hence more emotional."
589
+ ]
590
+ },
591
+ {
592
+ "cell_type": "code",
593
+ "execution_count": null,
594
+ "metadata": {
595
+ "id": "5FwE9CefKQk6"
596
+ },
597
+ "outputs": [],
598
+ "source": [
599
+ "ref_s = compute_style(\"Demo/reference_audio/1221-135767-0014.wav\")"
600
+ ]
601
+ },
602
+ {
603
+ "cell_type": "code",
604
+ "execution_count": null,
605
+ "metadata": {
606
+ "id": "0CKMI0ZsKUDh"
607
+ },
608
+ "outputs": [],
609
+ "source": [
610
+ "texts = {}\n",
611
+ "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
612
+ "texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
613
+ "texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
614
+ "texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
615
+ "\n",
616
+ "for k,v in texts.items():\n",
617
+ " wav = inference(v, ref_s, diffusion_steps=10, alpha=0.3, beta=0.7, embedding_scale=1)\n",
618
+ " print(k + \": \")\n",
619
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
620
+ ]
621
+ },
622
+ {
623
+ "cell_type": "markdown",
624
+ "metadata": {
625
+ "id": "reemQKVEKWAZ"
626
+ },
627
+ "source": [
628
+ "#### With `embedding_scale=2`"
629
+ ]
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "execution_count": null,
634
+ "metadata": {
635
+ "id": "npIAiAUvKYGv"
636
+ },
637
+ "outputs": [],
638
+ "source": [
639
+ "texts = {}\n",
640
+ "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
641
+ "texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
642
+ "texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
643
+ "texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
644
+ "\n",
645
+ "for k,v in texts.items():\n",
646
+ " noise = torch.randn(1,1,256).to(device)\n",
647
+ " wav = inference(v, ref_s, diffusion_steps=10, alpha=0.3, beta=0.7, embedding_scale=2)\n",
648
+ " print(k + \": \")\n",
649
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
650
+ ]
651
+ },
652
+ {
653
+ "cell_type": "markdown",
654
+ "metadata": {
655
+ "id": "lqKZaXeYKbrH"
656
+ },
657
+ "source": [
658
+ "#### With `embedding_scale=2, alpha = 0.5, beta = 0.9`\n",
659
+ "`alpha` and `beta` is the factor to determine much we use the style sampled based on the text instead of the reference. The higher the value of `alpha` and `beta`, the more suitable the style it is to the text but less similar to the reference. Using higher beta makes the synthesized speech more emotional, at the cost of lower similarity to the reference. `alpha` determines the timbre of the speaker while `beta` determines the prosody."
660
+ ]
661
+ },
662
+ {
663
+ "cell_type": "code",
664
+ "execution_count": null,
665
+ "metadata": {
666
+ "id": "VjXuRCCWKcdN"
667
+ },
668
+ "outputs": [],
669
+ "source": [
670
+ "texts = {}\n",
671
+ "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
672
+ "texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
673
+ "texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
674
+ "texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
675
+ "\n",
676
+ "for k,v in texts.items():\n",
677
+ " noise = torch.randn(1,1,256).to(device)\n",
678
+ " wav = inference(v, ref_s, diffusion_steps=10, alpha=0.5, beta=0.9, embedding_scale=2)\n",
679
+ " print(k + \": \")\n",
680
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
681
+ ]
682
+ },
683
+ {
684
+ "cell_type": "markdown",
685
+ "metadata": {
686
+ "id": "xrwYXGh0KiIW"
687
+ },
688
+ "source": [
689
+ "### Zero-shot speaker adaptation\n",
690
+ "This section recreates the \"Acoustic Environment Maintenance\" and \"Speaker’s Emotion Maintenance\" demo in [Section 4](https://styletts2.github.io/#libri) of the demo page. You can compare the generated samples to popular zero-shot TTS models like Vall-E. Note that the model was trained only on LibriTTS, which is about 250 times fewer data compared to those used to trian Vall-E with similar or better effect for these maintainance."
691
+ ]
692
+ },
693
+ {
694
+ "cell_type": "markdown",
695
+ "metadata": {
696
+ "id": "ETUywHHmKimE"
697
+ },
698
+ "source": [
699
+ "#### Acoustic Environment Maintenance\n",
700
+ "\n",
701
+ "Since we want to maintain the acoustic environment in the speaker (timbre), we set `alpha = 0` to make the speaker as close to the reference as possible while only changing the prosody according to the text. "
702
+ ]
703
+ },
704
+ {
705
+ "cell_type": "code",
706
+ "execution_count": null,
707
+ "metadata": {
708
+ "id": "yvjBK3syKnZL"
709
+ },
710
+ "outputs": [],
711
+ "source": [
712
+ "reference_dicts = {}\n",
713
+ "# format: (path, text)\n",
714
+ "reference_dicts['3'] = (\"Demo/reference_audio/3.wav\", \"As friends thing I definitely I've got more male friends.\")\n",
715
+ "reference_dicts['4'] = (\"Demo/reference_audio/4.wav\", \"Everything is run by computer but you got to know how to think before you can do a computer.\")\n",
716
+ "reference_dicts['5'] = (\"Demo/reference_audio/5.wav\", \"Then out in LA you guys got a whole another ball game within California to worry about.\")"
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": null,
722
+ "metadata": {
723
+ "id": "jclowWp4KomJ"
724
+ },
725
+ "outputs": [],
726
+ "source": [
727
+ "noise = torch.randn(1,1,256).to(device)\n",
728
+ "for k, v in reference_dicts.items():\n",
729
+ " path, text = v\n",
730
+ " ref_s = compute_style(path)\n",
731
+ " start = time.time()\n",
732
+ " wav = inference(text, ref_s, alpha=0.0, beta=0.5, diffusion_steps=5, embedding_scale=1)\n",
733
+ " rtf = (time.time() - start) / (len(wav) / 24000)\n",
734
+ " print(f\"RTF = {rtf:5f}\")\n",
735
+ " import IPython.display as ipd\n",
736
+ " print('Synthesized: ' + text)\n",
737
+ " display(ipd.Audio(wav, rate=24000, normalize=False))\n",
738
+ " print('Reference:')\n",
739
+ " display(ipd.Audio(path, rate=24000, normalize=False))"
740
+ ]
741
+ },
742
+ {
743
+ "cell_type": "markdown",
744
+ "metadata": {
745
+ "id": "LgIm7M93KqVZ"
746
+ },
747
+ "source": [
748
+ "#### Speaker’s Emotion Maintenance\n",
749
+ "\n",
750
+ "Since we want to maintain the emotion in the speaker (prosody), we set `beta = 0.1` to make the speaker as closer to the reference as possible while having some diversity thruogh the slight timbre change."
751
+ ]
752
+ },
753
+ {
754
+ "cell_type": "code",
755
+ "execution_count": null,
756
+ "metadata": {
757
+ "id": "yzsNoP6oKulL"
758
+ },
759
+ "outputs": [],
760
+ "source": [
761
+ "reference_dicts = {}\n",
762
+ "# format: (path, text)\n",
763
+ "reference_dicts['Anger'] = (\"Demo/reference_audio/anger.wav\", \"We have to reduce the number of plastic bags.\")\n",
764
+ "reference_dicts['Sleepy'] = (\"Demo/reference_audio/sleepy.wav\", \"We have to reduce the number of plastic bags.\")\n",
765
+ "reference_dicts['Amused'] = (\"Demo/reference_audio/amused.wav\", \"We have to reduce the number of plastic bags.\")\n",
766
+ "reference_dicts['Disgusted'] = (\"Demo/reference_audio/disgusted.wav\", \"We have to reduce the number of plastic bags.\")"
767
+ ]
768
+ },
769
+ {
770
+ "cell_type": "code",
771
+ "execution_count": null,
772
+ "metadata": {
773
+ "id": "7h2-9cpfKwr4"
774
+ },
775
+ "outputs": [],
776
+ "source": [
777
+ "noise = torch.randn(1,1,256).to(device)\n",
778
+ "for k, v in reference_dicts.items():\n",
779
+ " path, text = v\n",
780
+ " ref_s = compute_style(path)\n",
781
+ " start = time.time()\n",
782
+ " wav = inference(text, ref_s, alpha=0.3, beta=0.1, diffusion_steps=10, embedding_scale=1)\n",
783
+ " rtf = (time.time() - start) / (len(wav) / 24000)\n",
784
+ " print(f\"RTF = {rtf:5f}\")\n",
785
+ " import IPython.display as ipd\n",
786
+ " print(k + ' Synthesized: ' + text)\n",
787
+ " display(ipd.Audio(wav, rate=24000, normalize=False))\n",
788
+ " print(k + ' Reference:')\n",
789
+ " display(ipd.Audio(path, rate=24000, normalize=False))"
790
+ ]
791
+ },
792
+ {
793
+ "cell_type": "markdown",
794
+ "metadata": {
795
+ "id": "aNS82PGwKzgg"
796
+ },
797
+ "source": [
798
+ "### Longform Narration\n",
799
+ "\n",
800
+ "This section includes basic implementation of Algorithm 1 in the paper for consistent longform audio generation. The example passage is taken from [Section 5](https://styletts2.github.io/#long) of the demo page."
801
+ ]
802
+ },
803
+ {
804
+ "cell_type": "code",
805
+ "execution_count": null,
806
+ "metadata": {
807
+ "cellView": "form",
808
+ "id": "qs97nL5HK5DH"
809
+ },
810
+ "outputs": [],
811
+ "source": [
812
+ "passage = passage = '''If the supply of fruit is greater than the family needs, it may be made a source of income by sending the fresh fruit to the market if there is one near enough, or by preserving, canning, and making jelly for sale. To make such an enterprise a success the fruit and work must be first class. There is magic in the word \"Homemade,\" when the product appeals to the eye and the palate; but many careless and incompetent people have found to their sorrow that this word has not magic enough to float inferior goods on the market. As a rule large canning and preserving establishments are clean and have the best appliances, and they employ chemists and skilled labor. The home product must be very good to compete with the attractive goods that are sent out from such establishments. Yet for first class home made products there is a market in all large cities. All first-class grocers have customers who purchase such goods.''' # @param {type:\"string\"}"
813
+ ]
814
+ },
815
+ {
816
+ "cell_type": "code",
817
+ "execution_count": null,
818
+ "metadata": {
819
+ "colab": {
820
+ "background_save": true
821
+ },
822
+ "id": "8Mu9whHYK_1b"
823
+ },
824
+ "outputs": [],
825
+ "source": [
826
+ "# seen speaker\n",
827
+ "path = \"Demo/reference_audio/696_92939_000016_000006.wav\"\n",
828
+ "s_ref = compute_style(path)\n",
829
+ "sentences = passage.split('.') # simple split by comma\n",
830
+ "wavs = []\n",
831
+ "s_prev = None\n",
832
+ "for text in sentences:\n",
833
+ " if text.strip() == \"\": continue\n",
834
+ " text += '.' # add it back\n",
835
+ "\n",
836
+ " wav, s_prev = LFinference(text,\n",
837
+ " s_prev,\n",
838
+ " s_ref,\n",
839
+ " alpha = 0.3,\n",
840
+ " beta = 0.9, # make it more suitable for the text\n",
841
+ " t = 0.7,\n",
842
+ " diffusion_steps=10, embedding_scale=1.5)\n",
843
+ " wavs.append(wav)\n",
844
+ "print('Synthesized: ')\n",
845
+ "display(ipd.Audio(np.concatenate(wavs), rate=24000, normalize=False))\n",
846
+ "print('Reference: ')\n",
847
+ "display(ipd.Audio(path, rate=24000, normalize=False))"
848
+ ]
849
+ },
850
+ {
851
+ "cell_type": "markdown",
852
+ "metadata": {
853
+ "id": "81Rh-lgWLB2i"
854
+ },
855
+ "source": [
856
+ "### Style Transfer\n",
857
+ "\n",
858
+ "The following section demostrates the style transfer capacity for unseen speakers in [Section 6](https://styletts2.github.io/#emo) of the demo page. For this, we set `alpha=0.5, beta = 0.9` for the most pronounced effects (mostly using the sampled style)."
859
+ ]
860
+ },
861
+ {
862
+ "cell_type": "code",
863
+ "execution_count": null,
864
+ "metadata": {
865
+ "id": "CtIgr5kOLE9a"
866
+ },
867
+ "outputs": [],
868
+ "source": [
869
+ "# reference texts to sample styles\n",
870
+ "\n",
871
+ "ref_texts = {}\n",
872
+ "ref_texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
873
+ "ref_texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
874
+ "ref_texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
875
+ "ref_texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\""
876
+ ]
877
+ },
878
+ {
879
+ "cell_type": "code",
880
+ "execution_count": null,
881
+ "metadata": {
882
+ "id": "MlA1CbhzLIoI"
883
+ },
884
+ "outputs": [],
885
+ "source": [
886
+ "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
887
+ "s_ref = compute_style(path)\n",
888
+ "\n",
889
+ "text = \"Yea, his honourable worship is within, but he hath a godly minister or two with him, and likewise a leech.\"\n",
890
+ "for k,v in ref_texts.items():\n",
891
+ " wav = STinference(text, s_ref, v, diffusion_steps=10, alpha=0.5, beta=0.9, embedding_scale=1.5)\n",
892
+ " print(k + \": \")\n",
893
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
894
+ ]
895
+ },
896
+ {
897
+ "cell_type": "markdown",
898
+ "metadata": {
899
+ "id": "2M0iaXlkLJUQ"
900
+ },
901
+ "source": [
902
+ "### Speech diversity\n",
903
+ "\n",
904
+ "This section reproduces samples in [Section 7](https://styletts2.github.io/#var) of the demo page.\n",
905
+ "\n",
906
+ "`alpha` and `beta` determine the diversity of the synthesized speech. There are two extreme cases:\n",
907
+ "- If `alpha = 1` and `beta = 1`, the synthesized speech sounds the most dissimilar to the reference speaker, but it is also the most diverse (each time you synthesize a speech it will be totally different).\n",
908
+ "- If `alpha = 0` and `beta = 0`, the synthesized speech sounds the most siimlar to the reference speaker, but it is deterministic (i.e., the sampled style is not used for speech synthesis).\n"
909
+ ]
910
+ },
911
+ {
912
+ "cell_type": "markdown",
913
+ "metadata": {
914
+ "id": "tSxZDvF2LNu4"
915
+ },
916
+ "source": [
917
+ "#### Default setting (`alpha = 0.3, beta=0.7`)\n",
918
+ "This setting uses 70% of the reference timbre and 30% of the reference prosody and use the diffusion model to sample them based on the text."
919
+ ]
920
+ },
921
+ {
922
+ "cell_type": "code",
923
+ "execution_count": null,
924
+ "metadata": {
925
+ "id": "AAomGCDZLIt5"
926
+ },
927
+ "outputs": [],
928
+ "source": [
929
+ "# unseen speaker\n",
930
+ "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
931
+ "ref_s = compute_style(path)\n",
932
+ "\n",
933
+ "text = \"How much variation is there?\"\n",
934
+ "for _ in range(5):\n",
935
+ " wav = inference(text, ref_s, diffusion_steps=10, alpha=0.3, beta=0.7, embedding_scale=1)\n",
936
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
937
+ ]
938
+ },
939
+ {
940
+ "cell_type": "markdown",
941
+ "metadata": {
942
+ "id": "BKrSMdgcLQRP"
943
+ },
944
+ "source": [
945
+ "#### Less diverse setting (`alpha = 0.1, beta=0.3`)\n",
946
+ "This setting uses 90% of the reference timbre and 70% of the reference prosody. This makes it more similar to the reference speaker at cost of less diverse samples."
947
+ ]
948
+ },
949
+ {
950
+ "cell_type": "code",
951
+ "execution_count": null,
952
+ "metadata": {
953
+ "id": "Uo7gVmFoLRfm"
954
+ },
955
+ "outputs": [],
956
+ "source": [
957
+ "# unseen speaker\n",
958
+ "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
959
+ "ref_s = compute_style(path)\n",
960
+ "\n",
961
+ "text = \"How much variation is there?\"\n",
962
+ "for _ in range(5):\n",
963
+ " wav = inference(text, ref_s, diffusion_steps=10, alpha=0.1, beta=0.3, embedding_scale=1)\n",
964
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
965
+ ]
966
+ },
967
+ {
968
+ "cell_type": "markdown",
969
+ "metadata": {
970
+ "id": "nfQ0Xrg9LStd"
971
+ },
972
+ "source": [
973
+ "#### More diverse setting (`alpha = 0.5, beta=0.95`)\n",
974
+ "This setting uses 50% of the reference timbre and 5% of the reference prosody (so it uses 100% of the sampled prosody, which makes it more diverse), but this makes it more dissimilar to the reference speaker. "
975
+ ]
976
+ },
977
+ {
978
+ "cell_type": "code",
979
+ "execution_count": null,
980
+ "metadata": {
981
+ "id": "cPHz4BzVLT_u"
982
+ },
983
+ "outputs": [],
984
+ "source": [
985
+ "# unseen speaker\n",
986
+ "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
987
+ "ref_s = compute_style(path)\n",
988
+ "\n",
989
+ "text = \"How much variation is there?\"\n",
990
+ "for _ in range(5):\n",
991
+ " wav = inference(text, ref_s, diffusion_steps=10, alpha=0.5, beta=0.95, embedding_scale=1)\n",
992
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
993
+ ]
994
+ },
995
+ {
996
+ "cell_type": "markdown",
997
+ "source": [
998
+ "#### Extreme setting (`alpha = 1, beta=1`)\n",
999
+ "This setting uses 0% of the reference timbre and prosody and use the diffusion model to sample the entire style. This makes the speaker very dissimilar to the reference speaker."
1000
+ ],
1001
+ "metadata": {
1002
+ "id": "hPKg9eYpL00f"
1003
+ }
1004
+ },
1005
+ {
1006
+ "cell_type": "code",
1007
+ "source": [
1008
+ "# unseen speaker\n",
1009
+ "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
1010
+ "ref_s = compute_style(path)\n",
1011
+ "\n",
1012
+ "text = \"How much variation is there?\"\n",
1013
+ "for _ in range(5):\n",
1014
+ " wav = inference(text, ref_s, diffusion_steps=10, alpha=1, beta=1, embedding_scale=1)\n",
1015
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
1016
+ ],
1017
+ "metadata": {
1018
+ "id": "Ei-7JOccL0bF"
1019
+ },
1020
+ "execution_count": null,
1021
+ "outputs": []
1022
+ },
1023
+ {
1024
+ "cell_type": "markdown",
1025
+ "source": [
1026
+ "#### No variation (`alpha = 0, beta=0`)\n",
1027
+ "This setting uses 100% of the reference timbre and prosody and do not use the diffusion model at all. This makes the speaker very similar to the reference speaker, but there is no variation."
1028
+ ],
1029
+ "metadata": {
1030
+ "id": "FVMPc3bhL3eL"
1031
+ }
1032
+ },
1033
+ {
1034
+ "cell_type": "code",
1035
+ "source": [
1036
+ "# unseen speaker\n",
1037
+ "path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
1038
+ "ref_s = compute_style(path)\n",
1039
+ "\n",
1040
+ "text = \"How much variation is there?\"\n",
1041
+ "for _ in range(5):\n",
1042
+ " wav = inference(text, ref_s, diffusion_steps=10, alpha=0, beta=0, embedding_scale=1)\n",
1043
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
1044
+ ],
1045
+ "metadata": {
1046
+ "id": "yh1QZ7uhL4wM"
1047
+ },
1048
+ "execution_count": null,
1049
+ "outputs": []
1050
+ },
1051
+ {
1052
+ "cell_type": "markdown",
1053
+ "source": [
1054
+ "### Extra fun!\n",
1055
+ "\n",
1056
+ "You can record your own voice and clone it using pre-trained StyleTTS 2 model here."
1057
+ ],
1058
+ "metadata": {
1059
+ "id": "T0EvkWrAMBDB"
1060
+ }
1061
+ },
1062
+ {
1063
+ "cell_type": "markdown",
1064
+ "source": [
1065
+ "#### Run the following cell to record your voice for 5 seconds. Please keep speaking to have the best effect."
1066
+ ],
1067
+ "metadata": {
1068
+ "id": "R985j5QONY8I"
1069
+ }
1070
+ },
1071
+ {
1072
+ "cell_type": "code",
1073
+ "source": [
1074
+ "# all imports\n",
1075
+ "from IPython.display import Javascript\n",
1076
+ "from google.colab import output\n",
1077
+ "from base64 import b64decode\n",
1078
+ "\n",
1079
+ "RECORD = \"\"\"\n",
1080
+ "const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n",
1081
+ "const b2text = blob => new Promise(resolve => {\n",
1082
+ " const reader = new FileReader()\n",
1083
+ " reader.onloadend = e => resolve(e.srcElement.result)\n",
1084
+ " reader.readAsDataURL(blob)\n",
1085
+ "})\n",
1086
+ "var record = time => new Promise(async resolve => {\n",
1087
+ " stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n",
1088
+ " recorder = new MediaRecorder(stream)\n",
1089
+ " chunks = []\n",
1090
+ " recorder.ondataavailable = e => chunks.push(e.data)\n",
1091
+ " recorder.start()\n",
1092
+ " await sleep(time)\n",
1093
+ " recorder.onstop = async ()=>{\n",
1094
+ " blob = new Blob(chunks)\n",
1095
+ " text = await b2text(blob)\n",
1096
+ " resolve(text)\n",
1097
+ " }\n",
1098
+ " recorder.stop()\n",
1099
+ "})\n",
1100
+ "\"\"\"\n",
1101
+ "\n",
1102
+ "def record(sec=3):\n",
1103
+ " display(Javascript(RECORD))\n",
1104
+ " s = output.eval_js('record(%d)' % (sec*1000))\n",
1105
+ " b = b64decode(s.split(',')[1])\n",
1106
+ " with open('audio.wav','wb') as f:\n",
1107
+ " f.write(b)\n",
1108
+ " return 'audio.wav' # or webm ?"
1109
+ ],
1110
+ "metadata": {
1111
+ "id": "MWrFs0KWMBpz"
1112
+ },
1113
+ "execution_count": null,
1114
+ "outputs": []
1115
+ },
1116
+ {
1117
+ "cell_type": "markdown",
1118
+ "source": [
1119
+ "#### Please run this cell and speak:"
1120
+ ],
1121
+ "metadata": {
1122
+ "id": "z35qXwM0Nhx1"
1123
+ }
1124
+ },
1125
+ {
1126
+ "cell_type": "code",
1127
+ "source": [
1128
+ "print('Speak now for 5 seconds.')\n",
1129
+ "audio = record(sec=5)\n",
1130
+ "import IPython.display as ipd\n",
1131
+ "display(ipd.Audio(audio, rate=24000, normalize=False))"
1132
+ ],
1133
+ "metadata": {
1134
+ "id": "KUEoFyQBMR-8"
1135
+ },
1136
+ "execution_count": null,
1137
+ "outputs": []
1138
+ },
1139
+ {
1140
+ "cell_type": "markdown",
1141
+ "source": [
1142
+ "#### Synthesize in your own voice"
1143
+ ],
1144
+ "metadata": {
1145
+ "id": "OQS_7IBpNmM1"
1146
+ }
1147
+ },
1148
+ {
1149
+ "cell_type": "code",
1150
+ "source": [
1151
+ "text = ''' StyleTTS 2 is a text to speech model that leverages style diffusion and adversarial training with large speech language models to achieve human level text to speech synthesis. ''' # @param {type:\"string\"}\n"
1152
+ ],
1153
+ "metadata": {
1154
+ "cellView": "form",
1155
+ "id": "c0I3LY7vM8Ta"
1156
+ },
1157
+ "execution_count": null,
1158
+ "outputs": []
1159
+ },
1160
+ {
1161
+ "cell_type": "code",
1162
+ "source": [
1163
+ "reference_dicts = {}\n",
1164
+ "reference_dicts['You'] = audio"
1165
+ ],
1166
+ "metadata": {
1167
+ "id": "80eW-pwxNCxu"
1168
+ },
1169
+ "execution_count": null,
1170
+ "outputs": []
1171
+ },
1172
+ {
1173
+ "cell_type": "code",
1174
+ "source": [
1175
+ "start = time.time()\n",
1176
+ "noise = torch.randn(1,1,256).to(device)\n",
1177
+ "for k, path in reference_dicts.items():\n",
1178
+ " ref_s = compute_style(path)\n",
1179
+ "\n",
1180
+ " wav = inference(text, ref_s, alpha=0.1, beta=0.5, diffusion_steps=5, embedding_scale=1)\n",
1181
+ " rtf = (time.time() - start) / (len(wav) / 24000)\n",
1182
+ " print('Speaker: ' + k)\n",
1183
+ " import IPython.display as ipd\n",
1184
+ " print('Synthesized:')\n",
1185
+ " display(ipd.Audio(wav, rate=24000, normalize=False))\n",
1186
+ " print('Reference:')\n",
1187
+ " display(ipd.Audio(path, rate=24000, normalize=False))"
1188
+ ],
1189
+ "metadata": {
1190
+ "id": "yIga6MTuNJaN"
1191
+ },
1192
+ "execution_count": null,
1193
+ "outputs": []
1194
+ }
1195
+ ],
1196
+ "metadata": {
1197
+ "accelerator": "GPU",
1198
+ "colab": {
1199
+ "provenance": [],
1200
+ "collapsed_sections": [
1201
+ "aAGQPfgYIR23",
1202
+ "eJdB_nCOIVIN",
1203
+ "R985j5QONY8I"
1204
+ ],
1205
+ "authorship_tag": "ABX9TyPQdFTqqVEknEG/ma/HMfU+",
1206
+ "include_colab_link": true
1207
+ },
1208
+ "kernelspec": {
1209
+ "display_name": "Python 3",
1210
+ "name": "python3"
1211
+ },
1212
+ "language_info": {
1213
+ "name": "python"
1214
+ }
1215
+ },
1216
+ "nbformat": 4,
1217
+ "nbformat_minor": 0
1218
+ }
stylekan/Colab/StyleTTS2_Finetune_Demo.ipynb ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4",
8
+ "authorship_tag": "ABX9TyNiDU9ykIeYxO86Lmuid+ph",
9
+ "include_colab_link": true
10
+ },
11
+ "kernelspec": {
12
+ "name": "python3",
13
+ "display_name": "Python 3"
14
+ },
15
+ "language_info": {
16
+ "name": "python"
17
+ },
18
+ "accelerator": "GPU"
19
+ },
20
+ "cells": [
21
+ {
22
+ "cell_type": "markdown",
23
+ "metadata": {
24
+ "id": "view-in-github",
25
+ "colab_type": "text"
26
+ },
27
+ "source": [
28
+ "<a href=\"https://colab.research.google.com/github/yl4579/StyleTTS2/blob/main/Colab/StyleTTS2_Finetune_Demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "markdown",
33
+ "source": [
34
+ "### Install packages and download models"
35
+ ],
36
+ "metadata": {
37
+ "id": "yLqBa4uYPrqE"
38
+ }
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "source": [
43
+ "%%shell\n",
44
+ "git clone https://github.com/yl4579/StyleTTS2.git\n",
45
+ "cd StyleTTS2\n",
46
+ "pip install SoundFile torchaudio munch torch pydub pyyaml librosa nltk matplotlib accelerate transformers phonemizer einops einops-exts tqdm typing-extensions git+https://github.com/resemble-ai/monotonic_align.git\n",
47
+ "sudo apt-get install espeak-ng\n",
48
+ "git-lfs clone https://huggingface.co/yl4579/StyleTTS2-LibriTTS\n",
49
+ "mv StyleTTS2-LibriTTS/Models ."
50
+ ],
51
+ "metadata": {
52
+ "id": "H72WF06ZPrTF"
53
+ },
54
+ "execution_count": null,
55
+ "outputs": []
56
+ },
57
+ {
58
+ "cell_type": "markdown",
59
+ "source": [
60
+ "### Download dataset (LJSpeech, 200 samples, ~15 minutes of data)\n",
61
+ "\n",
62
+ "You can definitely do it with fewer samples. This is just a proof of concept with 200 smaples."
63
+ ],
64
+ "metadata": {
65
+ "id": "G398sL8wPzTB"
66
+ }
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "source": [
71
+ "%cd StyleTTS2\n",
72
+ "!rm -rf Data"
73
+ ],
74
+ "metadata": {
75
+ "id": "kJuQUBrEPy5C"
76
+ },
77
+ "execution_count": null,
78
+ "outputs": []
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "source": [
83
+ "!gdown --id 1vqz26D3yn7OXS2vbfYxfSnpLS6m6tOFP\n",
84
+ "!unzip Data.zip"
85
+ ],
86
+ "metadata": {
87
+ "id": "mDXW8ZZePuSb"
88
+ },
89
+ "execution_count": null,
90
+ "outputs": []
91
+ },
92
+ {
93
+ "cell_type": "markdown",
94
+ "source": [
95
+ "### Change the finetuning config\n",
96
+ "\n",
97
+ "Depending on the GPU you got, you may want to change the bacth size, max audio length, epiochs and so on."
98
+ ],
99
+ "metadata": {
100
+ "id": "_AlBQREWU8ud"
101
+ }
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "source": [
106
+ "config_path = \"Configs/config_ft.yml\"\n",
107
+ "\n",
108
+ "import yaml\n",
109
+ "config = yaml.safe_load(open(config_path))"
110
+ ],
111
+ "metadata": {
112
+ "id": "7uEITi0hU4I2"
113
+ },
114
+ "execution_count": null,
115
+ "outputs": []
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "source": [
120
+ "config['data_params']['root_path'] = \"Data/wavs\"\n",
121
+ "\n",
122
+ "config['batch_size'] = 2 # not enough RAM\n",
123
+ "config['max_len'] = 100 # not enough RAM\n",
124
+ "config['loss_params']['joint_epoch'] = 110 # we do not do SLM adversarial training due to not enough RAM\n",
125
+ "\n",
126
+ "with open(config_path, 'w') as outfile:\n",
127
+ " yaml.dump(config, outfile, default_flow_style=True)"
128
+ ],
129
+ "metadata": {
130
+ "id": "TPTRgOKSVT4K"
131
+ },
132
+ "execution_count": null,
133
+ "outputs": []
134
+ },
135
+ {
136
+ "cell_type": "markdown",
137
+ "source": [
138
+ "### Start finetuning\n"
139
+ ],
140
+ "metadata": {
141
+ "id": "uUuB_19NWj2Y"
142
+ }
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "source": [
147
+ "!python train_finetune.py --config_path ./Configs/config_ft.yml"
148
+ ],
149
+ "metadata": {
150
+ "id": "HZVAD5GKWm-O"
151
+ },
152
+ "execution_count": null,
153
+ "outputs": []
154
+ },
155
+ {
156
+ "cell_type": "markdown",
157
+ "source": [
158
+ "### Test the model quality\n",
159
+ "\n",
160
+ "Note that this mainly serves as a proof of concept due to RAM limitation of free Colab instances. A lot of settings are suboptimal. In the future when DDP works for train_second.py, we will also add mixed precision finetuning to save time and RAM. You can also add SLM adversarial training run if you have paid Colab services (such as A100 with 40G of RAM)."
161
+ ],
162
+ "metadata": {
163
+ "id": "I0_7wsGkXGfc"
164
+ }
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "source": [
169
+ "import nltk\n",
170
+ "nltk.download('punkt')"
171
+ ],
172
+ "metadata": {
173
+ "id": "OPLphjbncE7p"
174
+ },
175
+ "execution_count": null,
176
+ "outputs": []
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "source": [
181
+ "import torch\n",
182
+ "torch.manual_seed(0)\n",
183
+ "torch.backends.cudnn.benchmark = False\n",
184
+ "torch.backends.cudnn.deterministic = True\n",
185
+ "\n",
186
+ "import random\n",
187
+ "random.seed(0)\n",
188
+ "\n",
189
+ "import numpy as np\n",
190
+ "np.random.seed(0)\n",
191
+ "\n",
192
+ "# load packages\n",
193
+ "import time\n",
194
+ "import random\n",
195
+ "import yaml\n",
196
+ "from munch import Munch\n",
197
+ "import numpy as np\n",
198
+ "import torch\n",
199
+ "from torch import nn\n",
200
+ "import torch.nn.functional as F\n",
201
+ "import torchaudio\n",
202
+ "import librosa\n",
203
+ "from nltk.tokenize import word_tokenize\n",
204
+ "\n",
205
+ "from models import *\n",
206
+ "from utils import *\n",
207
+ "from text_utils import TextCleaner\n",
208
+ "textclenaer = TextCleaner()\n",
209
+ "\n",
210
+ "%matplotlib inline\n",
211
+ "\n",
212
+ "to_mel = torchaudio.transforms.MelSpectrogram(\n",
213
+ " n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
214
+ "mean, std = -4, 4\n",
215
+ "\n",
216
+ "def length_to_mask(lengths):\n",
217
+ " mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
218
+ " mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
219
+ " return mask\n",
220
+ "\n",
221
+ "def preprocess(wave):\n",
222
+ " wave_tensor = torch.from_numpy(wave).float()\n",
223
+ " mel_tensor = to_mel(wave_tensor)\n",
224
+ " mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
225
+ " return mel_tensor\n",
226
+ "\n",
227
+ "def compute_style(path):\n",
228
+ " wave, sr = librosa.load(path, sr=24000)\n",
229
+ " audio, index = librosa.effects.trim(wave, top_db=30)\n",
230
+ " if sr != 24000:\n",
231
+ " audio = librosa.resample(audio, sr, 24000)\n",
232
+ " mel_tensor = preprocess(audio).to(device)\n",
233
+ "\n",
234
+ " with torch.no_grad():\n",
235
+ " ref_s = model.style_encoder(mel_tensor.unsqueeze(1))\n",
236
+ " ref_p = model.predictor_encoder(mel_tensor.unsqueeze(1))\n",
237
+ "\n",
238
+ " return torch.cat([ref_s, ref_p], dim=1)\n",
239
+ "\n",
240
+ "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
241
+ "\n",
242
+ "# load phonemizer\n",
243
+ "import phonemizer\n",
244
+ "global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)\n",
245
+ "\n",
246
+ "config = yaml.safe_load(open(\"Models/LJSpeech/config_ft.yml\"))\n",
247
+ "\n",
248
+ "# load pretrained ASR model\n",
249
+ "ASR_config = config.get('ASR_config', False)\n",
250
+ "ASR_path = config.get('ASR_path', False)\n",
251
+ "text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
252
+ "\n",
253
+ "# load pretrained F0 model\n",
254
+ "F0_path = config.get('F0_path', False)\n",
255
+ "pitch_extractor = load_F0_models(F0_path)\n",
256
+ "\n",
257
+ "# load BERT model\n",
258
+ "from Utils.PLBERT.util import load_plbert\n",
259
+ "BERT_path = config.get('PLBERT_dir', False)\n",
260
+ "plbert = load_plbert(BERT_path)\n",
261
+ "\n",
262
+ "model_params = recursive_munch(config['model_params'])\n",
263
+ "model = build_model(model_params, text_aligner, pitch_extractor, plbert)\n",
264
+ "_ = [model[key].eval() for key in model]\n",
265
+ "_ = [model[key].to(device) for key in model]"
266
+ ],
267
+ "metadata": {
268
+ "id": "jIIAoDACXJL0"
269
+ },
270
+ "execution_count": null,
271
+ "outputs": []
272
+ },
273
+ {
274
+ "cell_type": "code",
275
+ "source": [
276
+ "files = [f for f in os.listdir(\"Models/LJSpeech/\") if f.endswith('.pth')]\n",
277
+ "sorted_files = sorted(files, key=lambda x: int(x.split('_')[-1].split('.')[0]))"
278
+ ],
279
+ "metadata": {
280
+ "id": "eKXRAyyzcMpQ"
281
+ },
282
+ "execution_count": null,
283
+ "outputs": []
284
+ },
285
+ {
286
+ "cell_type": "code",
287
+ "source": [
288
+ "params_whole = torch.load(\"Models/LJSpeech/\" + sorted_files[-1], map_location='cpu')\n",
289
+ "params = params_whole['net']"
290
+ ],
291
+ "metadata": {
292
+ "id": "ULuU9-VDb9Pk"
293
+ },
294
+ "execution_count": null,
295
+ "outputs": []
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "source": [
300
+ "for key in model:\n",
301
+ " if key in params:\n",
302
+ " print('%s loaded' % key)\n",
303
+ " try:\n",
304
+ " model[key].load_state_dict(params[key])\n",
305
+ " except:\n",
306
+ " from collections import OrderedDict\n",
307
+ " state_dict = params[key]\n",
308
+ " new_state_dict = OrderedDict()\n",
309
+ " for k, v in state_dict.items():\n",
310
+ " name = k[7:] # remove `module.`\n",
311
+ " new_state_dict[name] = v\n",
312
+ " # load params\n",
313
+ " model[key].load_state_dict(new_state_dict, strict=False)\n",
314
+ "# except:\n",
315
+ "# _load(params[key], model[key])\n",
316
+ "_ = [model[key].eval() for key in model]"
317
+ ],
318
+ "metadata": {
319
+ "id": "J-U29yIYc2ea"
320
+ },
321
+ "execution_count": null,
322
+ "outputs": []
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "source": [
327
+ "from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
328
+ ],
329
+ "metadata": {
330
+ "id": "jrPQ_Yrwc3n6"
331
+ },
332
+ "execution_count": null,
333
+ "outputs": []
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "source": [
338
+ "sampler = DiffusionSampler(\n",
339
+ " model.diffusion.diffusion,\n",
340
+ " sampler=ADPM2Sampler(),\n",
341
+ " sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
342
+ " clamp=False\n",
343
+ ")"
344
+ ],
345
+ "metadata": {
346
+ "id": "n2CWYNoqc455"
347
+ },
348
+ "execution_count": null,
349
+ "outputs": []
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "source": [
354
+ "def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):\n",
355
+ " text = text.strip()\n",
356
+ " ps = global_phonemizer.phonemize([text])\n",
357
+ " ps = word_tokenize(ps[0])\n",
358
+ " ps = ' '.join(ps)\n",
359
+ " tokens = textclenaer(ps)\n",
360
+ " tokens.insert(0, 0)\n",
361
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
362
+ "\n",
363
+ " with torch.no_grad():\n",
364
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
365
+ " text_mask = length_to_mask(input_lengths).to(device)\n",
366
+ "\n",
367
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
368
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
369
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
370
+ "\n",
371
+ " s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
372
+ " embedding=bert_dur,\n",
373
+ " embedding_scale=embedding_scale,\n",
374
+ " features=ref_s, # reference from the same speaker as the embedding\n",
375
+ " num_steps=diffusion_steps).squeeze(1)\n",
376
+ "\n",
377
+ "\n",
378
+ " s = s_pred[:, 128:]\n",
379
+ " ref = s_pred[:, :128]\n",
380
+ "\n",
381
+ " ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
382
+ " s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
383
+ "\n",
384
+ " d = model.predictor.text_encoder(d_en,\n",
385
+ " s, input_lengths, text_mask)\n",
386
+ "\n",
387
+ " x, _ = model.predictor.lstm(d)\n",
388
+ " duration = model.predictor.duration_proj(x)\n",
389
+ "\n",
390
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
391
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
392
+ "\n",
393
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
394
+ " c_frame = 0\n",
395
+ " for i in range(pred_aln_trg.size(0)):\n",
396
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
397
+ " c_frame += int(pred_dur[i].data)\n",
398
+ "\n",
399
+ " # encode prosody\n",
400
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
401
+ " if model_params.decoder.type == \"hifigan\":\n",
402
+ " asr_new = torch.zeros_like(en)\n",
403
+ " asr_new[:, :, 0] = en[:, :, 0]\n",
404
+ " asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
405
+ " en = asr_new\n",
406
+ "\n",
407
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
408
+ "\n",
409
+ " asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
410
+ " if model_params.decoder.type == \"hifigan\":\n",
411
+ " asr_new = torch.zeros_like(asr)\n",
412
+ " asr_new[:, :, 0] = asr[:, :, 0]\n",
413
+ " asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
414
+ " asr = asr_new\n",
415
+ "\n",
416
+ " out = model.decoder(asr,\n",
417
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
418
+ "\n",
419
+ "\n",
420
+ " return out.squeeze().cpu().numpy()[..., :-50] # weird pulse at the end of the model, need to be fixed later"
421
+ ],
422
+ "metadata": {
423
+ "id": "2x5kVb3nc_eY"
424
+ },
425
+ "execution_count": null,
426
+ "outputs": []
427
+ },
428
+ {
429
+ "cell_type": "markdown",
430
+ "source": [
431
+ "### Synthesize speech"
432
+ ],
433
+ "metadata": {
434
+ "id": "O159JnwCc6CC"
435
+ }
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "source": [
440
+ "text = '''Maltby and Company would issue warrants on them deliverable to the importer, and the goods were then passed to be stored in neighboring warehouses.\n",
441
+ "'''"
442
+ ],
443
+ "metadata": {
444
+ "id": "ThciXQ6rc9Eq"
445
+ },
446
+ "execution_count": null,
447
+ "outputs": []
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "source": [
452
+ "# get a random reference in the training set, note that it doesn't matter which one you use\n",
453
+ "path = \"Data/wavs/LJ001-0110.wav\"\n",
454
+ "# this style vector ref_s can be saved as a parameter together with the model weights\n",
455
+ "ref_s = compute_style(path)"
456
+ ],
457
+ "metadata": {
458
+ "id": "jldPkJyCc83a"
459
+ },
460
+ "execution_count": null,
461
+ "outputs": []
462
+ },
463
+ {
464
+ "cell_type": "code",
465
+ "source": [
466
+ "start = time.time()\n",
467
+ "wav = inference(text, ref_s, alpha=0.9, beta=0.9, diffusion_steps=10, embedding_scale=1)\n",
468
+ "rtf = (time.time() - start) / (len(wav) / 24000)\n",
469
+ "print(f\"RTF = {rtf:5f}\")\n",
470
+ "import IPython.display as ipd\n",
471
+ "display(ipd.Audio(wav, rate=24000, normalize=False))"
472
+ ],
473
+ "metadata": {
474
+ "id": "_mIU0jqDdQ-c"
475
+ },
476
+ "execution_count": null,
477
+ "outputs": []
478
+ }
479
+ ]
480
+ }
stylekan/Configs/config.yml ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "Models/LJSpeech"
2
+ first_stage_path: "first_stage.pth"
3
+ save_freq: 2
4
+ log_interval: 10
5
+ device: "cuda"
6
+ epochs_1st: 200 # number of epochs for first stage training (pre-training)
7
+ epochs_2nd: 100 # number of peochs for second stage training (joint training)
8
+ batch_size: 16
9
+ max_len: 400 # maximum number of frames
10
+ pretrained_model: ""
11
+ second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
12
+ load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ F0_path: "Utils/JDC/bst.t7"
15
+ ASR_config: "Utils/ASR/config.yml"
16
+ ASR_path: "Utils/ASR/epoch_00080.pth"
17
+ PLBERT_dir: 'Utils/PLBERT/'
18
+
19
+ data_params:
20
+ train_data: "Data/train_list.txt"
21
+ val_data: "Data/val_list.txt"
22
+ root_path: "/local/LJSpeech-1.1/wavs"
23
+ OOD_data: "Data/OOD_texts.txt"
24
+ min_length: 50 # sample until texts with this size are obtained for OOD texts
25
+
26
+ preprocess_params:
27
+ sr: 24000
28
+ spect_params:
29
+ n_fft: 2048
30
+ win_length: 1200
31
+ hop_length: 300
32
+
33
+ model_params:
34
+ multispeaker: false
35
+
36
+ dim_in: 64
37
+ hidden_dim: 512
38
+ max_conv_dim: 512
39
+ n_layer: 3
40
+ n_mels: 80
41
+
42
+ n_token: 178 # number of phoneme tokens
43
+ max_dur: 50 # maximum duration of a single phoneme
44
+ style_dim: 128 # style vector size
45
+
46
+ dropout: 0.2
47
+
48
+ # config for decoder
49
+ decoder:
50
+ type: 'istftnet' # either hifigan or istftnet
51
+ resblock_kernel_sizes: [3,7,11]
52
+ upsample_rates : [10, 6]
53
+ upsample_initial_channel: 512
54
+ resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
55
+ upsample_kernel_sizes: [20, 12]
56
+ gen_istft_n_fft: 20
57
+ gen_istft_hop_size: 5
58
+
59
+ # speech language model config
60
+ slm:
61
+ model: 'microsoft/wavlm-base-plus'
62
+ sr: 16000 # sampling rate of SLM
63
+ hidden: 768 # hidden size of SLM
64
+ nlayers: 13 # number of layers of SLM
65
+ initial_channel: 64 # initial channels of SLM discriminator head
66
+
67
+ # style diffusion model config
68
+ diffusion:
69
+ embedding_mask_proba: 0.1
70
+ # transformer config
71
+ transformer:
72
+ num_layers: 3
73
+ num_heads: 8
74
+ head_features: 64
75
+ multiplier: 2
76
+
77
+ # diffusion distribution config
78
+ dist:
79
+ sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
80
+ estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
81
+ mean: -3.0
82
+ std: 1.0
83
+
84
+ loss_params:
85
+ lambda_mel: 5. # mel reconstruction loss
86
+ lambda_gen: 1. # generator loss
87
+ lambda_slm: 1. # slm feature matching loss
88
+
89
+ lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
90
+ lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
91
+ TMA_epoch: 50 # TMA starting epoch (1st stage)
92
+
93
+ lambda_F0: 1. # F0 reconstruction loss (2nd stage)
94
+ lambda_norm: 1. # norm reconstruction loss (2nd stage)
95
+ lambda_dur: 1. # duration loss (2nd stage)
96
+ lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
97
+ lambda_sty: 1. # style reconstruction loss (2nd stage)
98
+ lambda_diff: 1. # score matching loss (2nd stage)
99
+
100
+ diff_epoch: 20 # style diffusion starting epoch (2nd stage)
101
+ joint_epoch: 50 # joint training starting epoch (2nd stage)
102
+
103
+ optimizer_params:
104
+ lr: 0.0001 # general learning rate
105
+ bert_lr: 0.00001 # learning rate for PLBERT
106
+ ft_lr: 0.00001 # learning rate for acoustic modules
107
+
108
+ slmadv_params:
109
+ min_len: 400 # minimum length of samples
110
+ max_len: 500 # maximum length of samples
111
+ batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
112
+ iter: 10 # update the discriminator every this iterations of generator update
113
+ thresh: 5 # gradient norm above which the gradient is scaled
114
+ scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
115
+ sig: 1.5 # sigma for differentiable duration modeling
116
+
stylekan/Configs/config_ft.yml ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "Models/IMAS_FineTuned"
2
+ save_freq: 1
3
+ log_interval: 10
4
+ device: "cuda"
5
+ epochs: 50 # number of finetuning epoch (1 hour of data)
6
+ batch_size: 3
7
+ max_len: 2500 # maximum number of frames
8
+ pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth"
9
+ second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
10
+ load_only_params: true # set to true if do not want to load epoch numbers and optimizer parameters
11
+
12
+ # F0_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/JDC/bst_rmvpe_48k.t7"
13
+ # ASR_config: "Utils/ASR/config.yml"
14
+ # ASR_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/ASR/epoch_00050_48K.pth"
15
+
16
+ # CUDA_VISIBLE_DEVICES=0,1,2 accelerate launch multi_gpu_train_finetune_accelerate.py --config_path ./Configs/config_ft.yml
17
+ # CUDA_VISIBLE_DEVICES=5 accelerate launch train_finetune_accelerate.py --config_path ./Configs/config_ft.yml
18
+
19
+ F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
20
+ ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
21
+ ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
22
+
23
+ PLBERT_dir: 'Utils/PLBERT/'
24
+
25
+ data_params:
26
+ train_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/FT_imas.csv"
27
+ val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/FT_imas_valid.csv"
28
+ root_path: ""
29
+ OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
30
+ min_length: 50 # sample until texts with this size are obtained for OOD texts
31
+
32
+
33
+ preprocess_params:
34
+ sr: 24000
35
+ spect_params:
36
+ n_fft: 2048
37
+ win_length: 1200
38
+ hop_length: 300
39
+
40
+ model_params:
41
+ multispeaker: true
42
+
43
+ dim_in: 64
44
+ hidden_dim: 512
45
+ max_conv_dim: 512
46
+ n_layer: 3
47
+ n_mels: 80
48
+
49
+ n_token: 178 # number of phoneme tokens
50
+ max_dur: 50 # maximum duration of a single phoneme
51
+ style_dim: 128 # style vector size
52
+
53
+ dropout: 0.2
54
+
55
+ decoder:
56
+ type: 'istftnet' # either hifigan or istftnet
57
+ resblock_kernel_sizes: [3,7,11]
58
+ upsample_rates : [10, 6]
59
+ upsample_initial_channel: 512
60
+ resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
61
+ upsample_kernel_sizes: [20, 12]
62
+ gen_istft_n_fft: 20
63
+ gen_istft_hop_size: 5
64
+
65
+
66
+
67
+ # speech language model config
68
+ slm:
69
+ model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
70
+ sr: 16000 # sampling rate of SLM
71
+ hidden: 1280 # hidden size of SLM
72
+ nlayers: 33 # number of layers of SLM
73
+ initial_channel: 64 # initial channels of SLM discriminator head
74
+
75
+ # style diffusion model config
76
+ diffusion:
77
+ embedding_mask_proba: 0.1
78
+ # transformer config
79
+ transformer:
80
+ num_layers: 3
81
+ num_heads: 8
82
+ head_features: 64
83
+ multiplier: 2
84
+
85
+ # diffusion distribution config
86
+ dist:
87
+ sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
88
+ estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
89
+ mean: -3.0
90
+ std: 1.0
91
+
92
+ loss_params:
93
+ lambda_mel: 10. # mel reconstruction loss
94
+ lambda_gen: 1. # generator loss
95
+ lambda_slm: 1. # slm feature matching loss
96
+
97
+ lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
98
+ lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
99
+ TMA_epoch: 9 # TMA starting epoch (1st stage)
100
+
101
+ lambda_F0: 1. # F0 reconstruction loss (2nd stage)
102
+ lambda_norm: 1. # norm reconstruction loss (2nd stage)
103
+ lambda_dur: 1. # duration loss (2nd stage)
104
+ lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
105
+ lambda_sty: 1. # style reconstruction loss (2nd stage)
106
+ lambda_diff: 1. # score matching loss (2nd stage)
107
+
108
+ diff_epoch: 0 # style diffusion starting epoch (2nd stage)
109
+ joint_epoch: 30 # joint training starting epoch (2nd stage)
110
+
111
+ optimizer_params:
112
+ lr: 0.0001 # general learning rate
113
+ bert_lr: 0.00001 # learning rate for PLBERT
114
+ ft_lr: 0.00001 # learning rate for acoustic modules
115
+
116
+ slmadv_params:
117
+ min_len: 400 # minimum length of samples
118
+ max_len: 500 # maximum length of samples
119
+ batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
120
+ iter: 20 # update the discriminator every this iterations of generator update
121
+ thresh: 5 # gradient norm above which the gradient is scaled
122
+ scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
123
+ sig: 1.5 # sigma for differentiable duration modeling
stylekan/Configs/config_ft_kaede.yml ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "Models/Kaede_FineTuned"
2
+ save_freq: 1
3
+ log_interval: 10
4
+ device: "cuda"
5
+ epochs: 50 # number of finetuning epoch (1 hour of data)
6
+ batch_size: 4
7
+ max_len: 436.90666666666666666666666666667 # maximum number of frames
8
+ pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Kaede_FineTuned/epoch_2nd_00031.pth"
9
+ second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
10
+ load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
11
+
12
+ # F0_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/JDC/bst_rmvpe_48k.t7"
13
+ # ASR_config: "Utils/ASR/config.yml"
14
+ # ASR_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/ASR/epoch_00050_48K.pth"
15
+
16
+
17
+ # CUDA_VISIBLE_DEVICES=5,6,7 accelerate launch --main_process_port 25001 --num_machines -1 train.py
18
+ F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
19
+ ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
20
+ ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
21
+
22
+ PLBERT_dir: 'Utils/PLBERT/'
23
+
24
+ data_params:
25
+ train_data: "/home/austin/disk1/stts-zs_cleaning/data/train_List_updated_plus_48khz.csv"
26
+ val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/FT_saori_valid.csv"
27
+ root_path: ""
28
+ OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
29
+ min_length: 50 # sample until texts with this size are obtained for OOD texts
30
+
31
+
32
+ preprocess_params:
33
+ sr: 24000
34
+ spect_params:
35
+ n_fft: 2048
36
+ win_length: 1200
37
+ hop_length: 300
38
+
39
+ model_params:
40
+ multispeaker: true
41
+
42
+ dim_in: 64
43
+ hidden_dim: 512
44
+ max_conv_dim: 512
45
+ n_layer: 3
46
+ n_mels: 80
47
+
48
+ n_token: 178 # number of phoneme tokens
49
+ max_dur: 50 # maximum duration of a single phoneme
50
+ style_dim: 128 # style vector size
51
+
52
+ dropout: 0.2
53
+
54
+ decoder:
55
+ type: 'istftnet' # either hifigan or istftnet
56
+ resblock_kernel_sizes: [3,7,11]
57
+ upsample_rates : [10, 6]
58
+ upsample_initial_channel: 512
59
+ resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
60
+ upsample_kernel_sizes: [20, 12]
61
+ gen_istft_n_fft: 20
62
+ gen_istft_hop_size: 5
63
+
64
+
65
+
66
+ # speech language model config
67
+ slm:
68
+ model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
69
+ sr: 16000 # sampling rate of SLM
70
+ hidden: 1280 # hidden size of SLM
71
+ nlayers: 33 # number of layers of SLM
72
+ initial_channel: 64 # initial channels of SLM discriminator head
73
+
74
+ # style diffusion model config
75
+ diffusion:
76
+ embedding_mask_proba: 0.1
77
+ # transformer config
78
+ transformer:
79
+ num_layers: 3
80
+ num_heads: 8
81
+ head_features: 64
82
+ multiplier: 2
83
+
84
+ # diffusion distribution config
85
+ dist:
86
+ sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
87
+ estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
88
+ mean: -3.0
89
+ std: 1.0
90
+
91
+ loss_params:
92
+ lambda_mel: 10. # mel reconstruction loss
93
+ lambda_gen: 1. # generator loss
94
+ lambda_slm: 1. # slm feature matching loss
95
+
96
+ lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
97
+ lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
98
+ TMA_epoch: 9 # TMA starting epoch (1st stage)
99
+
100
+ lambda_F0: 1. # F0 reconstruction loss (2nd stage)
101
+ lambda_norm: 1. # norm reconstruction loss (2nd stage)
102
+ lambda_dur: 1. # duration loss (2nd stage)
103
+ lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
104
+ lambda_sty: 1. # style reconstruction loss (2nd stage)
105
+ lambda_diff: 1. # score matching loss (2nd stage)
106
+
107
+ diff_epoch: 0 # style diffusion starting epoch (2nd stage)
108
+ joint_epoch: 30 # joint training starting epoch (2nd stage)
109
+
110
+ optimizer_params:
111
+ lr: 0.0001 # general learning rate
112
+ bert_lr: 0.00001 # learning rate for PLBERT
113
+ ft_lr: 0.00001 # learning rate for acoustic modules
114
+
115
+ slmadv_params:
116
+ min_len: 400 # minimum length of samples
117
+ max_len: 500 # maximum length of samples
118
+ batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
119
+ iter: 20 # update the discriminator every this iterations of generator update
120
+ thresh: 5 # gradient norm above which the gradient is scaled
121
+ scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
122
+ sig: 1.5 # sigma for differentiable duration modeling
stylekan/Configs/config_kanade.yml ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade_v02"
2
+ first_stage_path: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade_v02/epoch_1st_00026.pth"
3
+ save_freq: 1
4
+ log_interval: 10
5
+ device: "cuda"
6
+ epochs_1st: 30 # number of epochs for first stage training (pre-training)
7
+ epochs_2nd: 20 # number of peochs for second stage training (joint training)
8
+ batch_size: 24
9
+ max_len: 4000 # maximum number of frames
10
+ pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade_v02/epoch_2nd_00004.pth"
11
+ second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
12
+ load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ # CUDA_VISIBLE_DEVICES=1,2,3 accelerate launch train_first.py --config_path ./Configs/config_kanade.yml
15
+ # CUDA_VISIBLE_DEVICES=6,7 accelerate launch accelerate_train_second.py --config_path ./Configs/config_kanade_test.yml
16
+
17
+ F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
18
+ ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
19
+ ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
20
+
21
+ PLBERT_dir: 'Utils/PLBERT/'
22
+
23
+ data_params:
24
+ train_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv"
25
+ val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/mg_valid.txt"
26
+ root_path: ""
27
+ OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
28
+ min_length: 50 # sample until texts with this size are obtained for OOD texts
29
+
30
+
31
+ preprocess_params:
32
+ sr: 24000
33
+ spect_params:
34
+ n_fft: 2048
35
+ win_length: 1200
36
+ hop_length: 300
37
+
38
+ model_params:
39
+ multispeaker: true
40
+
41
+ dim_in: 64
42
+ hidden_dim: 512
43
+ max_conv_dim: 512
44
+ n_layer: 3
45
+ n_mels: 80
46
+
47
+ n_token: 178 # number of phoneme tokens
48
+ max_dur: 50 # maximum duration of a single phoneme
49
+ style_dim: 128 # style vector size
50
+
51
+ dropout: 0.2
52
+
53
+ decoder:
54
+ type: 'istftnet' # either hifigan or istftnet
55
+ resblock_kernel_sizes: [3,7,11]
56
+ upsample_rates : [10, 6]
57
+ upsample_initial_channel: 512
58
+ resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
59
+ upsample_kernel_sizes: [20, 12]
60
+ gen_istft_n_fft: 20
61
+ gen_istft_hop_size: 5
62
+
63
+
64
+
65
+ # speech language model config
66
+ slm:
67
+ model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
68
+ sr: 16000 # sampling rate of SLM
69
+ hidden: 1280 # hidden size of SLM
70
+ nlayers: 33 # number of layers of SLM
71
+ initial_channel: 64 # initial channels of SLM discriminator head
72
+
73
+ # style diffusion model config
74
+ diffusion:
75
+ embedding_mask_proba: 0.1
76
+ # transformer config
77
+ transformer:
78
+ num_layers: 3
79
+ num_heads: 8
80
+ head_features: 64
81
+ multiplier: 2
82
+
83
+ # diffusion distribution config
84
+ dist:
85
+ sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
86
+ estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
87
+ mean: -3.0
88
+ std: 1.0
89
+
90
+ loss_params:
91
+ lambda_mel: 10. # mel reconstruction loss
92
+ lambda_gen: 1. # generator loss
93
+ lambda_slm: 1. # slm feature matching loss
94
+
95
+ lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
96
+ lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
97
+ TMA_epoch: 5 # TMA starting epoch (1st stage)
98
+
99
+ lambda_F0: 1. # F0 reconstruction loss (2nd stage)
100
+ lambda_norm: 1. # norm reconstruction loss (2nd stage)
101
+ lambda_dur: 1. # duration loss (2nd stage)
102
+ lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
103
+ lambda_sty: 1. # style reconstruction loss (2nd stage)
104
+ lambda_diff: 1. # score matching loss (2nd stage)
105
+
106
+ diff_epoch: 4 # style diffusion starting epoch (2nd stage)
107
+ joint_epoch: 999 # joint training starting epoch (2nd stage)
108
+
109
+ optimizer_params:
110
+ lr: 0.0001 # general learning rate
111
+ bert_lr: 0.00001 # learning rate for PLBERT
112
+ ft_lr: 0.00001 # learning rate for acoustic modules
113
+
114
+ slmadv_params:
115
+ min_len: 400 # minimum length of samples
116
+ max_len: 500 # maximum length of samples
117
+ batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
118
+ iter: 20 # update the discriminator every this iterations of generator update
119
+ thresh: 5 # gradient norm above which the gradient is scaled
120
+ scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
121
+ sig: 1.5 # sigma for differentiable duration modeling
stylekan/Configs/config_kanade_test.yml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_dir: "Models/Style_Kanade_test"
2
+ first_stage_path: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/epoch_1st_00013.pth"
3
+ save_freq: 1
4
+ log_interval: 10
5
+ device: "cuda"
6
+ epochs_1st: 25 # number of epochs for first stage training (pre-training)
7
+ epochs_2nd: 15 # number of peochs for second stage training (joint training)
8
+ batch_size: 6
9
+ max_len: 560 # maximum number of frames
10
+ pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/epoch_2nd_00003.pth"
11
+ second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
12
+ load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
13
+
14
+ # F0_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/JDC/bst_rmvpe_48k.t7"
15
+ # ASR_config: "Utils/ASR/config.yml"
16
+ # ASR_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/ASR/epoch_00050_48K.pth"
17
+
18
+
19
+
20
+ F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
21
+ ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
22
+ ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
23
+
24
+ PLBERT_dir: 'Utils/PLBERT/'
25
+
26
+ data_params:
27
+ train_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/filtered_train_list.csv"
28
+ val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/mg_valid.txt"
29
+ root_path: ""
30
+ OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
31
+ min_length: 50 # sample until texts with this size are obtained for OOD texts
32
+
33
+
34
+ preprocess_params:
35
+ sr: 24000
36
+ spect_params:
37
+ n_fft: 2048
38
+ win_length: 1200
39
+ hop_length: 300
40
+
41
+ model_params:
42
+ multispeaker: true
43
+
44
+ dim_in: 64
45
+ hidden_dim: 512
46
+ max_conv_dim: 512
47
+ n_layer: 3
48
+ n_mels: 80
49
+
50
+ n_token: 178 # number of phoneme tokens
51
+ max_dur: 50 # maximum duration of a single phoneme
52
+ style_dim: 128 # style vector size
53
+
54
+ dropout: 0.2
55
+
56
+ decoder:
57
+ type: 'istftnet' # either hifigan or istftnet
58
+ resblock_kernel_sizes: [3,7,11]
59
+ upsample_rates : [10, 6]
60
+ upsample_initial_channel: 512
61
+ resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
62
+ upsample_kernel_sizes: [20, 12]
63
+ gen_istft_n_fft: 20
64
+ gen_istft_hop_size: 5
65
+
66
+
67
+
68
+ # speech language model config
69
+ slm:
70
+ model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
71
+ sr: 16000 # sampling rate of SLM
72
+ hidden: 1280 # hidden size of SLM
73
+ nlayers: 33 # number of layers of SLM
74
+ initial_channel: 64 # initial channels of SLM discriminator head
75
+
76
+ # style diffusion model config
77
+ diffusion:
78
+ embedding_mask_proba: 0.1
79
+ # transformer config
80
+ transformer:
81
+ num_layers: 3
82
+ num_heads: 8
83
+ head_features: 64
84
+ multiplier: 2
85
+
86
+ # diffusion distribution config
87
+ dist:
88
+ sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
89
+ estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
90
+ mean: -3.0
91
+ std: 1.0
92
+
93
+ loss_params:
94
+ lambda_mel: 10. # mel reconstruction loss
95
+ lambda_gen: 1. # generator loss
96
+ lambda_slm: 1. # slm feature matching loss
97
+
98
+ lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
99
+ lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
100
+ TMA_epoch: 9 # TMA starting epoch (1st stage)
101
+
102
+ lambda_F0: 1. # F0 reconstruction loss (2nd stage)
103
+ lambda_norm: 1. # norm reconstruction loss (2nd stage)
104
+ lambda_dur: 1. # duration loss (2nd stage)
105
+ lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
106
+ lambda_sty: 1. # style reconstruction loss (2nd stage)
107
+ lambda_diff: 1. # score matching loss (2nd stage)
108
+
109
+ diff_epoch: 2 # style diffusion starting epoch (2nd stage)
110
+ joint_epoch: 3 # joint training starting epoch (2nd stage)
111
+
112
+ optimizer_params:
113
+ lr: 0.0001 # general learning rate
114
+ bert_lr: 0.00001 # learning rate for PLBERT
115
+ ft_lr: 0.00001 # learning rate for acoustic modules
116
+
117
+ slmadv_params:
118
+ min_len: 400 # minimum length of samples
119
+ max_len: 500 # maximum length of samples
120
+ batch_percentage: 1. # to prevent out of memory, only use half of the original batch size
121
+ iter: 1 # update the discriminator every this iterations of generator update
122
+ thresh: 5 # gradient norm above which the gradient is scaled
123
+ scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
124
+ sig: 1.5 # sigma for differentiable duration modeling
stylekan/Data/OOD_LargeScale_.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9742a1f68c932e3a2812876e88e2d9328e747fa751c7167c0c9237d57fb36a7a
3
+ size 14627728
stylekan/Data/filtered_train_list.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8552b349addd76788632f458d89ec23515340fc71c0788562c1f92632037f8b
3
+ size 92255378
stylekan/Data/metadata_cleanest/48khz_config_with_names_ids.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:935ed8623d9cc6ac3015898d34599c8060c2b1b6f7a75b34c9e8713bda8cebbc
3
+ size 41507323
stylekan/Data/metadata_cleanest/FT_imas copy.csv ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Work_Comyu/Work_Comyu_chunk114.wav|soɽeʔte tsɯmaɽiː.|486
2
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200716/arisu_voice_200716_6_04.wav|kono iʔɕɯɴ o, omoide ni!|495
3
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/Hagoromo/Hagoromo_chunk87.wav|soɯsoɯ, tamani wa oanesaɴ no iɯ koto mo ɕindʑite minaʔte. soɽe de sae haɴ wa medetakɯ, gorʲoɯɕiɴ to wakai ɕite, sono ato ni wa gaiseɴ ɽaibɯ mo tɕanto jaʔtaɴ da jo ne.|486
4
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/52-MaxBeat.wav|go baɴ ɕoɯbɯ, desɯkaɽa....ima no wa tamatama ka mo ɕiɽemaseɴɕi.|563
5
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/4_arisu__0004_(Vocals)/4_arisu__0004_(Vocals)_chunk12.wav|taɕika ni, sɯkoɕi haɕaide ɕimaimaɕitaga...|495
6
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/lazylazy/lazylazy_chunk26.wav|ne.|482
7
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk230_DeepFilterNet3.wav|joːɕi o, oː— koɽe wa koɽe wa osoɽoɕiː ɽitorɯ monsɯtaː da okaɕi o agerɯkaɽa doɯ ka jɯrɯɕite okɯɽe|563
8
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0017/usamin_mobamasu_0017_chunk67.wav|kisekae nanatɕaɴ desɯ joː! tsɯgi hadʲ mo goaikʲoɯ desɯ...ija ne ɕimpiɴ pikapika no doːrɯ kaː...toɯdʑi wa saisentaɴ daʔtaɴ desɯ joː? de mo boːmɯ ga saʔte...ɯɯ...|481
9
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video_chunk35.wav|ne tsɯ i ɕiseɴ, kandʑirɯ naː. pɯɽodʲɯɯsaːsaɴ no hoɯ kaɽa.|486
10
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk66.wav|anzɯtɕaɴ wa iʔɕɯɴ de aidorɯ ni naɽerɯɕi.|483
11
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0017/shiburin_mobamasu_0017_chunk54.wav|jɯrɯgi nai kakɯgo o idaite ɕɯɯ o fɯrɯɴ da.|494
12
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk216.wav|pɯɽodʲɯɯsaːsaɴ.|488
13
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301129/jou_mika_voice_301129_2_02.wav|wataɕi wa odoɽokasaɽerɯ joɽi, odoɽoka ɕitai hadaɕi! itazɯɽa to onadʑi da moː—!|485
14
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/Hagoromo/Hagoromo_chunk118.wav|kɯgiɽi, ka.|486
15
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/Hagoromo/Hagoromo_chunk98.wav|ɯɯɴ, saihaɴ wa goɯiɴ naɴ dakaɽa.|486
16
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200924/shiburin_voice_200924_2_06.wav|wakaɽe wa daidʑi daʔte kizɯitakaɽa. tɕanto iɯ jo, wataɕi kaɽa. sajonaɽa o.|494
17
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200924/shiburin_voice_200924_6_03.wav|ano giŋga no mɯkoɯ e.|494
18
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kanade_CGSS_Episodes/Kanade_CGSS_Episodes_chunk341.wav|tomodatɕi to hetoheto ni narɯ made asobɯ nante.|482
19
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_mobamas/shiki_mobamasu_0002/shiki_mobamasu_0002_chunk8_chunks/chunk_10.wav|akɯɕɯ kai niːkoɯ!|480
20
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/11_arisu__0012_(Vocals)/11_arisu__0012_(Vocals)_chunk44.wav|aɽisɯʔte, niʔponniɴʔpokɯ nai namae de ija deɕita.|495
21
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200086/syuukovoice_200086_2_02.wav|o matɕtɕa tatete agejoɯ ka? a, koːçiː?|486
22
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_201370/arisu_voice_201370_6_05.wav|kawaikɯ, amazɯʔpakɯ!|495
23
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_200183/karen_cgss_voice_200183_2_11.wav|pɯɽodʲɯɯsaːsaɴ wa...oseʔkai na no wa...iː tokoɽo...ka na?|488
24
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/14_arisu__0015_(Vocals)/14_arisu__0015_(Vocals)_chunk171.wav|doɯzo.|495
25
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200368/mifune_voice_200368_6_06.wav|sasajaka de mo...tanoɕinde itadaketaɽanaɽa...saiwai desɯ.|490
26
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200377/kanade_voice_200377_1_07.wav|tataite mo...nigenai wa ne, kɯɽage...ɕigeki mo, naɽerɯ to akirɯ no ka na?|482
27
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_100078/anzu_voice_100078_2_06.wav|pɯɽodʲɯɯsaː, kʲoɯ kɯɽerɯ ame wa nani adʑi?|498
28
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/NBK/NBK_chunk1.wav|katɕimakeʔte anata ne.|482
29
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_cgss/mayu_cgss_card_100270/mayu_cgss_voice_100270_1_08.wav|ɯde no toge naɽa, toɽemaseɴ jo. moɯ, makitsɯite ɕimaʔta mono.|484
30
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_201355/fumika_voice_201355_2_07.wav|ɽei wa oːkiː desɯ ne. fɯtaɽi de iʔte mo, jojɯɯ soɯ de...taɕikamete mite mo?|493
31
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0001/mayu_mobamasu_0001_chunk8.wav|pɯɽodʲɯɯsaːsaɴ to iʔɕo naɽa...|484
32
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/GirlsInTheFrontier/GirlsInTheFrontier_chunk38.wav|kʲoɯ mo aɕita mo asaʔte mo, ataɽaɕiː oɕigoto ga aʔte, soɽe naɽi no tɕoɯseɴ ga aʔte, de mo tanoɕikɯte, akarɯi mainitɕi ga zɯʔto tsɯzɯkɯɴ daʔte.|486
33
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/49-Neverends.wav|oçitoɽiːtɕi fɯɴ desɯkaɽa ne. itɕi bʲoɯ itɕi bʲoɯ o taisetsɯ ni ɕinai to....|563
34
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[ブルーサマーヘブン]塩見周子【ボイス集】 - Niconico Video/【モバマス】[ブルーサマーヘブン]塩見周子【ボイス集】 - Niconico Video_chunk44.wav|tsɯide ni pɯɽodʲɯɯsaːsaɴ kaɽa mo geʔto ɕitɕaoʔka naː.|486
35
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100430/miku_voice_100430_3_01.wav|hanamitɕi de wa ne, mae mo oɕiɽo mo sajɯɯ kaɽa mo kansei ga kikoete kite, kaɽadatɕɯɯ ni faɴ no koe ga tonde kɯrɯ no! soɽe ga ne, minna ni oɯeɴ saɽeterɯʔte, kandʑirɯɴ da!|487
36
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_cgss/usamin_card_100971/usamin_voice_100971_6_02.wav|minasaɴ! natsɯkaɕiː ano koɽo ni, taimɯ sɯɽiːʔpɯ!|481
37
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200762/kanade_voice_200762_6_06.wav|bɯɕitsɯke na tsɯki wa, kaerɯ dʑikaɴ ne. otsɯkaɽesama.|482
38
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0006/mayu_mobamasu_0006_chunk43.wav|çitoɕikɯte.|484
39
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0008/mayu_mobamasu_0008_chunk33.wav|moʔto, moʔtoʔte...|484
40
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0012/momoka_mobamasu_0012_chunk60.wav|seiːʔpai, mabajɯkɯ!|489
41
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300457/mio_honda_voice_300457_3_01.wav|ahaha! ɯgoitetaɽa dandaɴ aʔtomaʔte kita ne! pɯɽodʲɯɯsaː mo sonna kandʑi? joɽoɕiː! soɽe dʑa, kono mama gaŋgaɴ ictɕaoɯ!|492
42
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/PL5.wav|kanadetɕaɴ, ohanaɕi ga—ɽimasɯ...ɽeiʔte kaɽa mo mainitɕi, okoɕite kɯɽemaseɴ ka?|563
43
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk291.wav|koŋkai no oɕigoto ga dekite.|483
44
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200459/kanade_voice_200459_2_02.wav|onseɴ wa sɯki dakedo, ɕɯmi ni sɯrɯ no wa otona ni naʔte kaɽa ni toʔte okɯ wa.|482
45
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0009/mayu_mobamasu_0009_chunk178.wav|todʑikometɕaimaɕita.|484
46
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200988/arisu_voice_200988_2_11.wav|pɯɽodʲɯɯsansaɴ ga mamoʔte kɯɽeterɯ koto...wataɕitatɕi wa ɕiʔtemasɯ jo.|495
47
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1149.wav|dʑiʔto ɕitete kɯdasai ne.|563
48
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/55-NationBlue.wav|ɽiː natɕaɴ, komaɽasetɕaʔtegomeɴ nasai.|563
49
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_301123/nagi_voice_301123_2_01.wav|sns wa, daini no matɕi desɯ ne. sɯgoɕikata o matɕigaerɯto, taiheɴ na koto ni narɯ.|499
50
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0018/usamin_mobamasu_0018_chunk73.wav|pɯɽodʲɯɯsaːsaɴ mo minna mo! tɕiawase na itɕi neɴ ni naɽimasɯ joɯ ni! pɯɽodʲɯɯsaːsaɴ.|481
stylekan/Data/metadata_cleanest/FT_imas.csv ADDED
The diff for this file is too large to render. See raw diff
 
stylekan/Data/metadata_cleanest/FT_imas_remapped.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88548d781c2b51530f2f6729b00c35ba45d9af3085ff3cc8e72695539e5d9b0b
3
+ size 10502242
stylekan/Data/metadata_cleanest/FT_imas_valid.csv ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100519/chieri_voice_100519_2_09.wav|hai, dasasete kɯdasai! baɽaeti baŋgɯmi he no ɕɯtsɯei, gambaɽimasɯ!|5
2
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100685/fredrica_voice_100685_2_11.wav|fɯɽetɕaɴ wa kʲɯɯ goɯ dakaɽa, oboetoite neː—! iʔɕo ni eɽande mo iːkedo!|20
3
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk222.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
4
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0013/fumika__0013_chunk71.wav|ko ki joki, ɽomaɴ sɯ no joɯ desɯ ne.|13
5
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0008_cnk29.wav|kiʔto matɕigaʔteta koto mo iʔpai arɯ.|7
6
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0014/shiburin_mobamasu_0014_chunk5.wav|gaʔkoɯ?|6
7
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/31-MaxBeat.wav|ɯɯɴ, tokɯi de wa—ɽimaseɴ ne....çitoɽi de wa iɽenai ka mo ɕiɽemaseɴ.|2
8
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_100316/anzu_voice_100316_1_12.wav|maː, pɯɽodʲɯɯsaː wa, anzɯ no ɕitsɯjoɯ akɯʔte koto de...|11
9
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/twintail/twintail_chunk84.wav|aɽa?|3
10
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200002/shiburin_voice_200002_2_06.wav|pɯɽodʲɯɯsaːʔte...hana kazarɯ no to ka, doɯ omoɯɴ daɽoɯ...|6
11
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/40-CardComyu.wav|sonna—nata ni jakɯsokɯ ɕimasɯ. kiʔto, daɽe joɽi mo kiɽei ni narɯto. sɯɯpaː moderɯ joɽi mo, kanaɽazɯ. daʔte...hoɽa, oɯsama no meiɽei wa zeʔtai desɯkaɽa ne.|2
12
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video_chunk24.wav|konna hjoɯdʑoɯ...|0
13
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk177_chunk6.wav|mainitɕiːɽoiɽo na oɕigoto ga dekimasɯɕi|2
14
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/55-Neverends.wav|pɯɽodʲɯɯsaː ni kaɽentɕaɴ. ohajoɯ gozaimasɯ.|2
15
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100108/chieri_voice_100108_6_02.wav|kʲoɯ wa sɯʔtoɽi, gambaɽimasɯ ne! sɯkoɕi, otona o mezaɕite!|5
16
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_300665/nagi_voice_300665_2_03.wav|ɽafɯ na kikonaɕi de, dʑimɯɕo e no konaɽi kaɴ o eɴɕɯtsɯ...nani ka matɕigaeta ka.|17
17
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk242_DeepFilterNet3.wav|omoidasɯ dake de kibɯɴ ga warɯkɯ narɯ jo|2
18
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0015/usamin_mobamasu_0015_chunk24.wav|wataɕitatɕi wa kiʔto, otagai ga taisetsɯ naɴ desɯ ne.|4
19
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk179.wav|kono mɯne ni, ɕiʔkaɽi ɕimaɯ ne!|7
20
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_1_05.wav|jozoɽa no hoɕi mitai ni...ɯɯɴ, jozoɽa joɽi mo tsɯjokɯ çikarɯ, sono gɯɽai no kimotɕi.|8
21
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_bonus_cd/kanade_bonus_cd_chunk0.wav|anata ga, wataɕi no?|3
22
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/35-MaxBeat.wav|soɽosoɽo deɕoɯ ka. ikimaɕoɯ, asɯkatɕaɴ.|2
23
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/32-PrettyLiarFin.wav|soɯ ieba, kʲoɯ wa ɕɯzai ga kɯrɯ çi deɕita ne.|2
24
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/9_arisu__0009_(Vocals)/9_arisu__0009_(Vocals)_chunk41.wav|kawaiː kao ɕite imasɯɕi.|1
25
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/TK19.wav|pɯɽodʲɯɯsaː, ano...nomi tomodatɕi ni naɽimaseɴ ka|2
26
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kande5Comyus/Kande5Comyus_chunk168.wav|fɯdaɴ no wataɕi dʑa, sonna saːbisɯ ɕinai mono ne.|3
27
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101157/fredrica_voice_101157_6_07.wav|mata ɽaibɯ ni kitakɯ narɯ mahoɯ, kaketɕaʔta!|20
28
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_card_200074/ranko_voice_200074_1_05.wav|pɯɽodʲɯɯsaː no jɯme...kiʔto wataɕi ga, kono te de!|18
29
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0002_cnk58.wav|kʲoɯ asobiːkanai? dame? pɯɽodʲɯɯsaːsaɴ to...|7
30
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200067/arisu_voice_200067_2_02.wav|dokidoki ɕiterɯ no ka na...wataɕi...|1
31
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100396/miku_voice_100396_6_01.wav|sekɯɕiːkʲaʔto mikɯ no ɽaibɯ, hadʑimete ikɯnʲa!|14
32
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_6_02.wav|wataɕi no koto, moʔto sɯki ni naʔte iːkaɽa ne!|20
33
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/yukyuu_no_hoshisuzushi/yukyuu_no_hoshisuzushi_chunk24.wav|saː, doɯ ka naː—?|0
34
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0013/chie_mobamasu_0013_chunk36.wav|hei! kakemaɕita!|5
35
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk636.wav|tsɯgi no gotɕɯɯmoɴ wa doɯ nasaimasɯ?|2
36
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_4_03.wav|ammaɽi jɯʔkɯɽi kaŋgaeterɯto, seikai ga kawactɕaɯkaɽa gotɕɯɯi! ima mo seitɕoɯ ki! tsɯmaɽi, wataɕiɽaɕikɯ seitɕoɯtɕɯɯ, dakaɽa ne! me o hanaɕita sɯki ni, bidʑiɴ sɯgirɯ fɯɽetɕaɴ ni naʔtɕaɯ ka mo jo!|20
37
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk212.wav|wataɕi ni mahoɯ o kaketa pɯɽodʲɯɯsaːsaɴ ni, sɯteːdʑi kaɽa kaɴɕa o!|7
38
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0006/mayu_mobamasu_0006_chunk55.wav|zembɯ te ni hairɯɴ daʔte.|12
39
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_saint/kanade_saint_chunk39.wav|kɯɽisɯmasɯ keːki to, koɯtɕa ga iː kaɕiɽa.|3
40
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300112/jou_mika_voice_300112_1_08.wav|ataɕi doɯ? oiɕi soɯ deɕoɯ? toɯzeɴ, tabegoɽo daɕi?|8
41
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk208.wav|ano çito no omoi...|18
42
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0013/ranko_mobamasu_0013_chunk99.wav|ɴʔfɯfɯfɯ kaɴɕa ɕijoɯ.|18
43
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0009/ranko_mobamasu_0009_chunk12.wav|soɕite, saigo ni wa, hanajomesaɴ ga tiaɽa o nokosete saʔtɕaɯɴ desɯ!|18
44
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk189.wav|dakedo, anata ni deaiʔte, sonna çibi wa owaɽi o tsɯgemaɕita.|2
45
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0016/fumika__0016_chunk8.wav|pɯɽodʲɯɯsaːsaɴ e to tsɯtaemaɕoɯ.|13
46
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0011/mayu_mobamasu_0011_chunk47.wav|soɕite...|12
47
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0009/miku_mobamasu_0009_chunk32.wav|piː tɕaɴ, mikɯ no madʑime na tokoɽo o wakaʔta? wakaʔtadeɕo? naɽa kaeʔteː!|14
48
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk11.wav|semete dʑɯmbi dake o, minna to iʔɕo ni jaɽitai naʔte.|5
49
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0001/miku_mobamasu_0001_chunk11.wav|haː, oçisama no nioi ga sɯrɯ nonʲaː!nʲɯɯ, mikaketa toki kaɽa zɯʔto ki ni naʔteta no!|14
50
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk143.wav|pɯɯɽodʲɯɯsaːsaɴ!|5
51
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/MainComyu_Kanade/MainComyu_Kanade_chunk104.wav|konomi de wa naikedo.|3
52
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/05-workkomyu.wav|kotɕiɽa no bako wa...komonoiɽe? kiɽei na mojoɯ de, oheja ni oite okitakɯ naɽimasɯ ne.|2
53
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_dorama_cd/kanade_dorama_cd_chunk116.wav|maː...|3
54
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200650/shiburin_voice_200650_5_01.wav|tamani keŋka ɕi taɽi, jɯzɯɽanakaʔtaɽi, haɽi aʔtaɽi...soɽe mo warɯkɯ nai jo ne. asemamiɽe de mo, kaʔko warɯkɯte mo,toɽai adopɯɽimɯsɯ no sei saŋkaʔkei wa, jɯgamanai mama, mae ni sɯsɯnde ikerɯkaɽa.|6
55
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk105.wav|e.|5
56
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_200313/karen_cgss_voice_200313_1_02.wav|mizɯ deʔpoɯ dʑa naideɕoɯ soɽeː—! ojɯ deʔpoɯ daɕi, ɕikamo ondo takame daɕi!|7
57
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk22.wav|koɽe kaɽa mo, wataɕi ni egao o misete hoɕiː na.|7
58
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/37-NationBlue.wav|haːi!|2
59
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_101268/shiki_voice_101268_2_05.wav|wataɕi o wataɕi taɽaɕimerɯ mono, fɯetɕoʔta naː, itsɯ no aida ni ka.|9
60
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/NBK/NBK_chunk77.wav|jokaʔtanaɽa, ikimaɕoɯ.|3
61
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk107.wav|wataɕi o tsɯɽedaɕite. mirɯ dake de, iː no?|7
62
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_yasuragi/kanade_yasuragi_chunk7_AudioSR_Processed_48K.wav|itsɯ ni mo maɕite netsɯ ga haiʔterɯ no wa dʑidʑitsɯ ka na.|3
63
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300537/jou_mika_voice_300537_1_07.wav|mite mite! ataɕi no peinto! fɯ!! osoɯɽi kiɴɕi!|8
64
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0018/usamin_mobamasu_0018_chunk55.wav|minni ɯsaminnʲɯɯjaː! merɯheɴ wa fɯkɯ ni tɕendʑi! desɯ jo! medetai desɯ! naɴ to nakɯ! oɕoɯgatsɯ wa iː desɯ jo ne! minna ga ɕiawase soɯ na kao o ɕitemasɯ! aː...atatamaɽimasɯ neː...|4
65
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0012/chie_mobamasu_0012_chunk9.wav|dono kisetsɯ mo, sɯteki na koto ga takɯsaɴ arɯkedo.|5
66
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0012/fumika__0012_chunk23.wav|kawaɽi aɽimaseɴ.|13
67
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200181/arisu_voice_200181_2_04.wav|komɯɽiko no dʑijɯɯka ni tsɯite wa, doɯ omoimasɯ ka? a, tɕoʔto...|1
68
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Main_Comyu/Main_Comyu_chunk169.wav|igai to ne.|0
69
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0012/mayu_mobamasu_0012_chunk27.wav|kanaɕikɯ narɯ kɯɽai ni.|12
70
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_100750/shiki_voice_100750_2_01.wav|sɯso ga nagai no wa, hakɯi de naɽeterɯ. ɕokosanai ka wa betsɯ mondai dakedo.|9
71
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100596/miku_voice_100596_1_02.wav|jɯɯkitɕaɴ no haɕagibɯɽi ga hampa nainʲa! koɽe ga, doːmɯ pawaː!|14
72
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk246_DeepFilterNet3.wav|jake ni dʑiɕiɴ taʔpɯɽi na no ga sɯkoɕi dake fwaɴ o kakitaterɯkedo|2
73
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300351/mio_honda_voice_300351_2_12.wav|rɯɯmɯ no gaʔkʲɯɯ ɕimbɯɴ tsɯkɯʔte agerɯ! pɯɽodʲɯɯsaː!|16
74
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk21.wav|wataɕi no koi ga...|13
75
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200552/minami_voice_200552_1_01.wav|jɯɯitɕaɴ wa hogaɽaka de geŋki dakaɽa, wataɕi wa tɕoʔto otɕitsɯita fɯɴ'iki de.|19
76
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk371_DeepFilterNet3.wav|kimi mitai na çito ga itaɽa jɯɯɽei ni naɽazɯ ni sɯndaɴ daɽoɯ ne|2
77
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_cgss/mayu_cgss_card_100166/mayu_cgss_voice_100166_2_10.wav|ɽiŋgɯ, taisetsɯ ni ɕimasɯ! ...o, ɽiaɽiŋgɯ? ...soɽe de mo, taisetsɯ ni.|12
78
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200001/shiburin_voice_200001_6_07.wav|faɴʔte, konna ni aʔtakai na...|6
79
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/47-MaxBeat.wav|kakosaɴ wa naɴ ni tɕoɯseɴ sɯrɯɴ desɯ ka?|2
80
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk36.wav|wa...|5
81
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video_chunk39.wav|ano toki çiɽoʔte itadaita kitsɯne koto ɕi jomi ɕɯɯko desɯ.|0
82
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_201091/minami_voice_201091_1_01.wav|ajametɕantatɕi mo nijanija ɕitɕaʔte. hoɽa, hajakɯ kakenai to!|19
83
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0015/fumika__0015_chunk11.wav|makemaseɴ.|13
84
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk3.wav|kiʔto, minna mo onadʑi çikaɽi o mite irɯ no deɕoɯ?|3
85
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk181.wav|wataɕi mo motomete irɯ no.|18
86
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk22.wav|kono sɯgata ni mitoɽete mo iːkedo.|3
87
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_2_07.wav|jakei ga sɯteki na omise no dʲnaːkeɴ ga arɯɴ desɯga...gojotei, ikaga desɯ ka?|10
88
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0009/fumika__0009_chunk49.wav|mɯkaɕi kaɽa, çito no ɕiseɴ o sakete kimaɕita.|13
89
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/EP28.wav|fɯfɯʔ...aɴɕiɴ ɕite kɯdasai. hanabi ja tɕiɽi jɯkɯ sakɯɽa ga ɯtsɯkɯɕiːkaɽa to iʔte mo, wataɕi wa doko ka e kie taɽi nante ɕimaseɴ. daʔte wataɕi no i baɕo wa, koko naɴ desɯkaɽa.......|2
90
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200958/minami_voice_200958_3_01.wav|aɽa, keijakɯ o majoʔte irɯ no? kaɕikokɯte, totemo oɽoka ne. fɯfɯfɯ, itsɯ made gamaɴ ga dekirɯ no ka, mite ite agerɯ...kono amai amai jɯɯwakɯ ni, doko made aɽagaerɯ no kaɕiɽa?|19
91
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300799/mio_honda_voice_300799_1_09.wav|kiɽaɽiɴ? sasɯga ni soɽosoɽo okigaerɯkaɽa sa. sono...ɯtsɯsanaide?|16
92
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk151.wav|iː desɯ ka? kʲoɯ wa moɯ oɕigoto ɕitɕa dame desɯ.|2
93
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0007/shiburin_mobamasu_0007_chunk10.wav|maː, soɯ iɯ tokoɽoʔte ammaɽiːʔta koto naiɕi.|6
94
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201272/kanade_voice_201272_1_01.wav|jɯbi o kaɽamete, netsɯ o tsɯtaeaʔte...fɯɽeta saki kaɽa, madʑiɽiaʔte ɕimai soɯ.|3
95
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk238_chunk4_DeepFilterNet3.wav|haɽoiɴ paːtiː da jo, tatemaedʑoɯ wa.|2
96
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk187.wav|mɯkaɕi no wataɕi wa, kiʔto tokɯni mokɯteki nante motɕiʔte inakaʔta joɯ ni omoimasɯ.|2
97
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0003/fumika__0003_chunk34.wav|ma, a, ano, sɯmimaseɴ, kizɯkazɯ ni...|13
98
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk204.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
99
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk202_DeepFilterNet3.wav|soɯ? homekotoba to ɕite sɯnao ni ɯke toɽiʔte okoɯ ka na. aɽigatoɯ. kimi no kʲoɯrʲokwaʔte koso da jo. kimi ni tetsɯdaiʔte mo ɽaʔte, kono iɕoɯ to soɯbi ni kaɽada o naɽaɕite oitejo kaʔta. ɕikaɕi, me o mawaɕite irɯ kono otoko ni wa iːtai koto ga jama hodo aʔtakeɽedo, koɯ mo aʔsaɽi ɕimawaʔte ɕimaɯto, naɴ da ka hjoɯɕinɯke ɕite ɕimaʔta.|2
100
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk218.wav|oɕiete hoɕiː na.|5
101
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk55_DeepFilterNet3.wav|nasake wa çito no tame naɽazɯ to wa iɯkeɽedo, hakaɽazɯ mo oɽei o moɽaʔte ɕimaʔta ne. de mo...ka, kaʔpɯrɯ waɽibikitɕikeʔto ka...|2
102
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100935/miku_voice_100935_2_06.wav|mikɯtatɕi no tame niːtsɯ mo oɕigoto gambaʔte...pːtɕaɴ wa sɯgoi jo neː!|14
103
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_2_10.wav|kameɽamansaɴ no noɽi mo itsɯ mo to zenzeɴ tɕigaʔte! aː, kintɕoɯ ɕita! ɯfɯfɯ!|8
104
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100022/fredrica_voice_100022_2_11.wav|pɯɽodʲɯɯsaːʔte, ɽiɴwaɴ na no? kaʔkoɯ iː!|20
105
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/unxkaedepuriUBfin-chunk636_DeepFilterNet3.wav|akiɽamete narɯ mono ka! fɯɯɴʔ!|2
106
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/16-christmas.wav|gʲɯː.|2
107
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_301124/nagi_voice_301124_6_03.wav|mezase, toɽendo itɕiː!|17
108
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1019.wav|ano...pɯɽodʲɯɯsaː...|2
109
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100181/momoka_voice_100181_2_05.wav|pɯɽodʲɯɯsaːtɕama wa, takɯsaɴ no çito ni ɕɯkaɽetemasɯ no ne.|15
110
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0013/mayu_mobamasu_0013_chunk0.wav|fɯtaɽi de osampoɯ.|12
111
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video_chunk23.wav|tsɯkaɽeta kaɽada ni, koɯkateki meɴ. de mo...|0
112
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk209.wav|wataɕi o oɯeɴ ɕite kɯɽerɯ, minna no tame ni!|7
113
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/10_arisu__0010_(Vocals)/10_arisu__0010_(Vocals)_chunk2.wav|soɽe wa, doɯ iɯ imi naɴ deɕoɯ ka. wataɕi wa, kandʑoɯ o komete tɕanto eŋgi ɕita tsɯmoɽi desɯ.|1
114
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200425/syuukovoice_200425_5_01.wav|hoɽa hoɽa, kɯɽaeː—! ɯmi ni kite sɯɯtsɯ no jatsɯ wa,nɯɽetɕaeː—! fɯfɯ!! minna de ɯmi e bakansɯ nante, fɯtsɯɯ no onnanokoʔpoi ja ne. minna o tanoɕimaserɯ aidorɯ mo, kʲoɯ wa tanoɕindʑaʔte koː—!|0
115
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Work_Comyu/Work_Comyu_chunk115.wav|ɯtɕiɽa no koto, otabe ja sɯɯ.ʔteki na?ʔjaːɴ! ɕaihaɴʔte wa, daitaɴ!|0
116
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_101189/anzu_voice_101189_2_08.wav|ɯsagi kaɽa aɽaitate no nioi ga sɯrɯ...koko made kiɽei da to, sasɯga ni kizɯkɯ ne.|11
117
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video_chunk18.wav|saː saː, minna tsɯitoide! kʲoto ni wa oinaɽisaɴ no ɽeɴɕoɯ ga arɯɴ da jo ne. kitsɯne wa sono tsɯkai naɴ da.|0
118
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200696/fumika_voice_200696_1_02.wav|sɯ na ha ma no sɯteːdʑi de ɯtaɯ nante...kono keikeɴ, ɕiʔkaɽi to kokoɽo ni kizamikomimasɯ.|13
119
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk833.wav|mɯkaɕi no wataɕi ga omoi moɕi nakaʔta ima ga koko ni aɽimasɯ|2
120
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200439/fumika_voice_200439_6_07.wav|masaniːrʲɯɯɕoɴ no joɯ na...kiɽameki ni mitɕita, ɽaibɯ deɕita.|13
121
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/vocals-chunk21.wav|wataɕi ni desɯ ka?|2
122
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_101107/miku_voice_101107_6_05.wav|miʔkɯ no gohoɯɕinʲa!|14
123
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/kimi_no_stage_isho/kimi_no_stage_isho_chunk29.wav|«fɯʔfɯʔfɯ».|3
124
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201404/karen_cgss_voice_201404_1_11.wav|honto wa giɽigiɽi naɴ da? anata no kotae o matɕi tsɯzɯkete...mɯne ga...hoɽa, ne.|7
125
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_101063/chieri_voice_101063_5_01.wav|o, pɯɽodʲɯɯsaːsaɴ, koʔtɕi desɯ!...a, aite jokaʔta...e? ɯɽisode, miaʔtemasɯ ka?...ɯɽaɕiː desɯ. wataɕi ni wa daitaɴ na irɯ ka moʔte omoʔta no de...fɯfɯʔ, iː toɕi ni naɽi soɯ!|5
126
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0021/usamin_mobamasu_0021_chunk52.wav|moɯ iʔkai kakete okimaɕoɯ! eː i!|4
127
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk161_DeepFilterNet3.wav|zanneɴ dakedo kono ataɽi ni wa miataɽanai ne otoɕitanaɽa kono heɴ ni arɯ hazɯ naɴ dakeɽedo wataɕitatɕi wa saifɯ o mita baɕo kaɽa idoɯ mo ɕite inai hoka ni aʔta koto to ieba|2
128
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100182/momoka_voice_100182_1_10.wav|wataɕi ga mi ni tsɯkerɯ baɽa ni, toge wa aɽimaseɴ! keʔɕite sasaɽimaseɴ no!|15
129
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300811/mio_honda_voice_300811_1_10.wav|oʔto nataɽi! kami çikoɯ ki wa nagetɕa ikenai ze! okɯɽidasɯ kandʑi de ne!|16
130
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200163/mifune_voice_200163_2_09.wav|hea aɽendʑi wa, nigate de. itsɯ mo, kono kamigata ni...jahaɽi, dʑimi?|10
131
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk90_DeepFilterNet3.wav|wataɕi ga kobanda no wa, tada soɽe dake daʔtakaɽa. kazokɯ no tatɕiba mo ɽikai wa ɕite irɯɕi, keʔɕite kiɽai na wake dʑa nai.|2
stylekan/Data/metadata_cleanest/FT_imas_valid_less_than_20sec.csv ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100519/chieri_voice_100519_2_09.wav|hai, dasasete kɯdasai! baɽaeti baŋgɯmi he no ɕɯtsɯei, gambaɽimasɯ!|5
2
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100685/fredrica_voice_100685_2_11.wav|fɯɽetɕaɴ wa kʲɯɯ goɯ dakaɽa, oboetoite neː—! iʔɕo ni eɽande mo iːkedo!|20
3
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk222.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
4
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0013/fumika__0013_chunk71.wav|ko ki joki, ɽomaɴ sɯ no joɯ desɯ ne.|13
5
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0008_cnk29.wav|kiʔto matɕigaʔteta koto mo iʔpai arɯ.|7
6
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0014/shiburin_mobamasu_0014_chunk5.wav|gaʔkoɯ?|6
7
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/31-MaxBeat.wav|ɯɯɴ, tokɯi de wa—ɽimaseɴ ne....çitoɽi de wa iɽenai ka mo ɕiɽemaseɴ.|2
8
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_100316/anzu_voice_100316_1_12.wav|maː, pɯɽodʲɯɯsaː wa, anzɯ no ɕitsɯjoɯ akɯʔte koto de...|11
9
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/twintail/twintail_chunk84.wav|aɽa?|3
10
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200002/shiburin_voice_200002_2_06.wav|pɯɽodʲɯɯsaːʔte...hana kazarɯ no to ka, doɯ omoɯɴ daɽoɯ...|6
11
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/40-CardComyu.wav|sonna—nata ni jakɯsokɯ ɕimasɯ. kiʔto, daɽe joɽi mo kiɽei ni narɯto. sɯɯpaː moderɯ joɽi mo, kanaɽazɯ. daʔte...hoɽa, oɯsama no meiɽei wa zeʔtai desɯkaɽa ne.|2
12
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video_chunk24.wav|konna hjoɯdʑoɯ...|0
13
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk177_chunk6.wav|mainitɕiːɽoiɽo na oɕigoto ga dekimasɯɕi|2
14
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/55-Neverends.wav|pɯɽodʲɯɯsaː ni kaɽentɕaɴ. ohajoɯ gozaimasɯ.|2
15
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100108/chieri_voice_100108_6_02.wav|kʲoɯ wa sɯʔtoɽi, gambaɽimasɯ ne! sɯkoɕi, otona o mezaɕite!|5
16
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_300665/nagi_voice_300665_2_03.wav|ɽafɯ na kikonaɕi de, dʑimɯɕo e no konaɽi kaɴ o eɴɕɯtsɯ...nani ka matɕigaeta ka.|17
17
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk242_DeepFilterNet3.wav|omoidasɯ dake de kibɯɴ ga warɯkɯ narɯ jo|2
18
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0015/usamin_mobamasu_0015_chunk24.wav|wataɕitatɕi wa kiʔto, otagai ga taisetsɯ naɴ desɯ ne.|4
19
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk179.wav|kono mɯne ni, ɕiʔkaɽi ɕimaɯ ne!|7
20
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_1_05.wav|jozoɽa no hoɕi mitai ni...ɯɯɴ, jozoɽa joɽi mo tsɯjokɯ çikarɯ, sono gɯɽai no kimotɕi.|8
21
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_bonus_cd/kanade_bonus_cd_chunk0.wav|anata ga, wataɕi no?|3
22
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/35-MaxBeat.wav|soɽosoɽo deɕoɯ ka. ikimaɕoɯ, asɯkatɕaɴ.|2
23
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/32-PrettyLiarFin.wav|soɯ ieba, kʲoɯ wa ɕɯzai ga kɯrɯ çi deɕita ne.|2
24
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/9_arisu__0009_(Vocals)/9_arisu__0009_(Vocals)_chunk41.wav|kawaiː kao ɕite imasɯɕi.|1
25
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/TK19.wav|pɯɽodʲɯɯsaː, ano...nomi tomodatɕi ni naɽimaseɴ ka|2
26
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kande5Comyus/Kande5Comyus_chunk168.wav|fɯdaɴ no wataɕi dʑa, sonna saːbisɯ ɕinai mono ne.|3
27
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101157/fredrica_voice_101157_6_07.wav|mata ɽaibɯ ni kitakɯ narɯ mahoɯ, kaketɕaʔta!|20
28
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_card_200074/ranko_voice_200074_1_05.wav|pɯɽodʲɯɯsaː no jɯme...kiʔto wataɕi ga, kono te de!|18
29
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0002_cnk58.wav|kʲoɯ asobiːkanai? dame? pɯɽodʲɯɯsaːsaɴ to...|7
30
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200067/arisu_voice_200067_2_02.wav|dokidoki ɕiterɯ no ka na...wataɕi...|1
31
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100396/miku_voice_100396_6_01.wav|sekɯɕiːkʲaʔto mikɯ no ɽaibɯ, hadʑimete ikɯnʲa!|14
32
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_6_02.wav|wataɕi no koto, moʔto sɯki ni naʔte iːkaɽa ne!|20
33
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/yukyuu_no_hoshisuzushi/yukyuu_no_hoshisuzushi_chunk24.wav|saː, doɯ ka naː—?|0
34
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0013/chie_mobamasu_0013_chunk36.wav|hei! kakemaɕita!|5
35
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk636.wav|tsɯgi no gotɕɯɯmoɴ wa doɯ nasaimasɯ?|2
36
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk212.wav|wataɕi ni mahoɯ o kaketa pɯɽodʲɯɯsaːsaɴ ni, sɯteːdʑi kaɽa kaɴɕa o!|7
37
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0006/mayu_mobamasu_0006_chunk55.wav|zembɯ te ni hairɯɴ daʔte.|12
38
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_saint/kanade_saint_chunk39.wav|kɯɽisɯmasɯ keːki to, koɯtɕa ga iː kaɕiɽa.|3
39
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300112/jou_mika_voice_300112_1_08.wav|ataɕi doɯ? oiɕi soɯ deɕoɯ? toɯzeɴ, tabegoɽo daɕi?|8
40
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk208.wav|ano çito no omoi...|18
41
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0013/ranko_mobamasu_0013_chunk99.wav|ɴʔfɯfɯfɯ kaɴɕa ɕijoɯ.|18
42
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0009/ranko_mobamasu_0009_chunk12.wav|soɕite, saigo ni wa, hanajomesaɴ ga tiaɽa o nokosete saʔtɕaɯɴ desɯ!|18
43
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk189.wav|dakedo, anata ni deaiʔte, sonna çibi wa owaɽi o tsɯgemaɕita.|2
44
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0016/fumika__0016_chunk8.wav|pɯɽodʲɯɯsaːsaɴ e to tsɯtaemaɕoɯ.|13
45
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0011/mayu_mobamasu_0011_chunk47.wav|soɕite...|12
46
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0009/miku_mobamasu_0009_chunk32.wav|piː tɕaɴ, mikɯ no madʑime na tokoɽo o wakaʔta? wakaʔtadeɕo? naɽa kaeʔteː!|14
47
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk11.wav|semete dʑɯmbi dake o, minna to iʔɕo ni jaɽitai naʔte.|5
48
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0001/miku_mobamasu_0001_chunk11.wav|haː, oçisama no nioi ga sɯrɯ nonʲaː!nʲɯɯ, mikaketa toki kaɽa zɯʔto ki ni naʔteta no!|14
49
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk143.wav|pɯɯɽodʲɯɯsaːsaɴ!|5
50
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/MainComyu_Kanade/MainComyu_Kanade_chunk104.wav|konomi de wa naikedo.|3
51
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/05-workkomyu.wav|kotɕiɽa no bako wa...komonoiɽe? kiɽei na mojoɯ de, oheja ni oite okitakɯ naɽimasɯ ne.|2
52
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_dorama_cd/kanade_dorama_cd_chunk116.wav|maː...|3
53
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200650/shiburin_voice_200650_5_01.wav|tamani keŋka ɕi taɽi, jɯzɯɽanakaʔtaɽi, haɽi aʔtaɽi...soɽe mo warɯkɯ nai jo ne. asemamiɽe de mo, kaʔko warɯkɯte mo,toɽai adopɯɽimɯsɯ no sei saŋkaʔkei wa, jɯgamanai mama, mae ni sɯsɯnde ikerɯkaɽa.|6
54
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk105.wav|e.|5
55
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_200313/karen_cgss_voice_200313_1_02.wav|mizɯ deʔpoɯ dʑa naideɕoɯ soɽeː—! ojɯ deʔpoɯ daɕi, ɕikamo ondo takame daɕi!|7
56
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk22.wav|koɽe kaɽa mo, wataɕi ni egao o misete hoɕiː na.|7
57
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/37-NationBlue.wav|haːi!|2
58
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_101268/shiki_voice_101268_2_05.wav|wataɕi o wataɕi taɽaɕimerɯ mono, fɯetɕoʔta naː, itsɯ no aida ni ka.|9
59
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/NBK/NBK_chunk77.wav|jokaʔtanaɽa, ikimaɕoɯ.|3
60
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk107.wav|wataɕi o tsɯɽedaɕite. mirɯ dake de, iː no?|7
61
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_yasuragi/kanade_yasuragi_chunk7_AudioSR_Processed_48K.wav|itsɯ ni mo maɕite netsɯ ga haiʔterɯ no wa dʑidʑitsɯ ka na.|3
62
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300537/jou_mika_voice_300537_1_07.wav|mite mite! ataɕi no peinto! fɯ!! osoɯɽi kiɴɕi!|8
63
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0012/chie_mobamasu_0012_chunk9.wav|dono kisetsɯ mo, sɯteki na koto ga takɯsaɴ arɯkedo.|5
64
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0012/fumika__0012_chunk23.wav|kawaɽi aɽimaseɴ.|13
65
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200181/arisu_voice_200181_2_04.wav|komɯɽiko no dʑijɯɯka ni tsɯite wa, doɯ omoimasɯ ka? a, tɕoʔto...|1
66
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Main_Comyu/Main_Comyu_chunk169.wav|igai to ne.|0
67
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0012/mayu_mobamasu_0012_chunk27.wav|kanaɕikɯ narɯ kɯɽai ni.|12
68
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_100750/shiki_voice_100750_2_01.wav|sɯso ga nagai no wa, hakɯi de naɽeterɯ. ɕokosanai ka wa betsɯ mondai dakedo.|9
69
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100596/miku_voice_100596_1_02.wav|jɯɯkitɕaɴ no haɕagibɯɽi ga hampa nainʲa! koɽe ga, doːmɯ pawaː!|14
70
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk246_DeepFilterNet3.wav|jake ni dʑiɕiɴ taʔpɯɽi na no ga sɯkoɕi dake fwaɴ o kakitaterɯkedo|2
71
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300351/mio_honda_voice_300351_2_12.wav|rɯɯmɯ no gaʔkʲɯɯ ɕimbɯɴ tsɯkɯʔte agerɯ! pɯɽodʲɯɯsaː!|16
72
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk21.wav|wataɕi no koi ga...|13
73
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200552/minami_voice_200552_1_01.wav|jɯɯitɕaɴ wa hogaɽaka de geŋki dakaɽa, wataɕi wa tɕoʔto otɕitsɯita fɯɴ'iki de.|19
74
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk371_DeepFilterNet3.wav|kimi mitai na çito ga itaɽa jɯɯɽei ni naɽazɯ ni sɯndaɴ daɽoɯ ne|2
75
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_cgss/mayu_cgss_card_100166/mayu_cgss_voice_100166_2_10.wav|ɽiŋgɯ, taisetsɯ ni ɕimasɯ! ...o, ɽiaɽiŋgɯ? ...soɽe de mo, taisetsɯ ni.|12
76
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200001/shiburin_voice_200001_6_07.wav|faɴʔte, konna ni aʔtakai na...|6
77
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/47-MaxBeat.wav|kakosaɴ wa naɴ ni tɕoɯseɴ sɯrɯɴ desɯ ka?|2
78
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk36.wav|wa...|5
79
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video_chunk39.wav|ano toki çiɽoʔte itadaita kitsɯne koto ɕi jomi ɕɯɯko desɯ.|0
80
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_201091/minami_voice_201091_1_01.wav|ajametɕantatɕi mo nijanija ɕitɕaʔte. hoɽa, hajakɯ kakenai to!|19
81
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0015/fumika__0015_chunk11.wav|makemaseɴ.|13
82
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk3.wav|kiʔto, minna mo onadʑi çikaɽi o mite irɯ no deɕoɯ?|3
83
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk181.wav|wataɕi mo motomete irɯ no.|18
84
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk22.wav|kono sɯgata ni mitoɽete mo iːkedo.|3
85
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_2_07.wav|jakei ga sɯteki na omise no dʲnaːkeɴ ga arɯɴ desɯga...gojotei, ikaga desɯ ka?|10
86
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0009/fumika__0009_chunk49.wav|mɯkaɕi kaɽa, çito no ɕiseɴ o sakete kimaɕita.|13
87
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/EP28.wav|fɯfɯʔ...aɴɕiɴ ɕite kɯdasai. hanabi ja tɕiɽi jɯkɯ sakɯɽa ga ɯtsɯkɯɕiːkaɽa to iʔte mo, wataɕi wa doko ka e kie taɽi nante ɕimaseɴ. daʔte wataɕi no i baɕo wa, koko naɴ desɯkaɽa.......|2
88
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200958/minami_voice_200958_3_01.wav|aɽa, keijakɯ o majoʔte irɯ no? kaɕikokɯte, totemo oɽoka ne. fɯfɯfɯ, itsɯ made gamaɴ ga dekirɯ no ka, mite ite agerɯ...kono amai amai jɯɯwakɯ ni, doko made aɽagaerɯ no kaɕiɽa?|19
89
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300799/mio_honda_voice_300799_1_09.wav|kiɽaɽiɴ? sasɯga ni soɽosoɽo okigaerɯkaɽa sa. sono...ɯtsɯsanaide?|16
90
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk151.wav|iː desɯ ka? kʲoɯ wa moɯ oɕigoto ɕitɕa dame desɯ.|2
91
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0007/shiburin_mobamasu_0007_chunk10.wav|maː, soɯ iɯ tokoɽoʔte ammaɽiːʔta koto naiɕi.|6
92
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201272/kanade_voice_201272_1_01.wav|jɯbi o kaɽamete, netsɯ o tsɯtaeaʔte...fɯɽeta saki kaɽa, madʑiɽiaʔte ɕimai soɯ.|3
93
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk238_chunk4_DeepFilterNet3.wav|haɽoiɴ paːtiː da jo, tatemaedʑoɯ wa.|2
94
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk187.wav|mɯkaɕi no wataɕi wa, kiʔto tokɯni mokɯteki nante motɕiʔte inakaʔta joɯ ni omoimasɯ.|2
95
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0003/fumika__0003_chunk34.wav|ma, a, ano, sɯmimaseɴ, kizɯkazɯ ni...|13
96
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk204.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
97
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk218.wav|oɕiete hoɕiː na.|5
98
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk55_DeepFilterNet3.wav|nasake wa çito no tame naɽazɯ to wa iɯkeɽedo, hakaɽazɯ mo oɽei o moɽaʔte ɕimaʔta ne. de mo...ka, kaʔpɯrɯ waɽibikitɕikeʔto ka...|2
99
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100935/miku_voice_100935_2_06.wav|mikɯtatɕi no tame niːtsɯ mo oɕigoto gambaʔte...pːtɕaɴ wa sɯgoi jo neː!|14
100
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_2_10.wav|kameɽamansaɴ no noɽi mo itsɯ mo to zenzeɴ tɕigaʔte! aː, kintɕoɯ ɕita! ɯfɯfɯ!|8
101
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100022/fredrica_voice_100022_2_11.wav|pɯɽodʲɯɯsaːʔte, ɽiɴwaɴ na no? kaʔkoɯ iː!|20
102
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/unxkaedepuriUBfin-chunk636_DeepFilterNet3.wav|akiɽamete narɯ mono ka! fɯɯɴʔ!|2
103
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/16-christmas.wav|gʲɯː.|2
104
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_301124/nagi_voice_301124_6_03.wav|mezase, toɽendo itɕiː!|17
105
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1019.wav|ano...pɯɽodʲɯɯsaː...|2
106
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100181/momoka_voice_100181_2_05.wav|pɯɽodʲɯɯsaːtɕama wa, takɯsaɴ no çito ni ɕɯkaɽetemasɯ no ne.|15
107
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0013/mayu_mobamasu_0013_chunk0.wav|fɯtaɽi de osampoɯ.|12
108
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video_chunk23.wav|tsɯkaɽeta kaɽada ni, koɯkateki meɴ. de mo...|0
109
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk209.wav|wataɕi o oɯeɴ ɕite kɯɽerɯ, minna no tame ni!|7
110
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/10_arisu__0010_(Vocals)/10_arisu__0010_(Vocals)_chunk2.wav|soɽe wa, doɯ iɯ imi naɴ deɕoɯ ka. wataɕi wa, kandʑoɯ o komete tɕanto eŋgi ɕita tsɯmoɽi desɯ.|1
111
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Work_Comyu/Work_Comyu_chunk115.wav|ɯtɕiɽa no koto, otabe ja sɯɯ.ʔteki na?ʔjaːɴ! ɕaihaɴʔte wa, daitaɴ!|0
112
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_101189/anzu_voice_101189_2_08.wav|ɯsagi kaɽa aɽaitate no nioi ga sɯrɯ...koko made kiɽei da to, sasɯga ni kizɯkɯ ne.|11
113
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video_chunk18.wav|saː saː, minna tsɯitoide! kʲoto ni wa oinaɽisaɴ no ɽeɴɕoɯ ga arɯɴ da jo ne. kitsɯne wa sono tsɯkai naɴ da.|0
114
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200696/fumika_voice_200696_1_02.wav|sɯ na ha ma no sɯteːdʑi de ɯtaɯ nante...kono keikeɴ, ɕiʔkaɽi to kokoɽo ni kizamikomimasɯ.|13
115
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk833.wav|mɯkaɕi no wataɕi ga omoi moɕi nakaʔta ima ga koko ni aɽimasɯ|2
116
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200439/fumika_voice_200439_6_07.wav|masaniːrʲɯɯɕoɴ no joɯ na...kiɽameki ni mitɕita, ɽaibɯ deɕita.|13
117
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/vocals-chunk21.wav|wataɕi ni desɯ ka?|2
118
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_101107/miku_voice_101107_6_05.wav|miʔkɯ no gohoɯɕinʲa!|14
119
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/kimi_no_stage_isho/kimi_no_stage_isho_chunk29.wav|«fɯʔfɯʔfɯ».|3
120
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201404/karen_cgss_voice_201404_1_11.wav|honto wa giɽigiɽi naɴ da? anata no kotae o matɕi tsɯzɯkete...mɯne ga...hoɽa, ne.|7
121
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0021/usamin_mobamasu_0021_chunk52.wav|moɯ iʔkai kakete okimaɕoɯ! eː i!|4
122
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk161_DeepFilterNet3.wav|zanneɴ dakedo kono ataɽi ni wa miataɽanai ne otoɕitanaɽa kono heɴ ni arɯ hazɯ naɴ dakeɽedo wataɕitatɕi wa saifɯ o mita baɕo kaɽa idoɯ mo ɕite inai hoka ni aʔta koto to ieba|2
123
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100182/momoka_voice_100182_1_10.wav|wataɕi ga mi ni tsɯkerɯ baɽa ni, toge wa aɽimaseɴ! keʔɕite sasaɽimaseɴ no!|15
124
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300811/mio_honda_voice_300811_1_10.wav|oʔto nataɽi! kami çikoɯ ki wa nagetɕa ikenai ze! okɯɽidasɯ kandʑi de ne!|16
125
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200163/mifune_voice_200163_2_09.wav|hea aɽendʑi wa, nigate de. itsɯ mo, kono kamigata ni...jahaɽi, dʑimi?|10
126
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk90_DeepFilterNet3.wav|wataɕi ga kobanda no wa, tada soɽe dake daʔtakaɽa. kazokɯ no tatɕiba mo ɽikai wa ɕite irɯɕi, keʔɕite kiɽai na wake dʑa nai.|2
stylekan/Data/metadata_cleanest/FT_imas_valid_more_than_10sec.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/31-MaxBeat.wav|ɯɯɴ, tokɯi de wa—ɽimaseɴ ne....çitoɽi de wa iɽenai ka mo ɕiɽemaseɴ.|2
2
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/40-CardComyu.wav|sonna—nata ni jakɯsokɯ ɕimasɯ. kiʔto, daɽe joɽi mo kiɽei ni narɯto. sɯɯpaː moderɯ joɽi mo, kanaɽazɯ. daʔte...hoɽa, oɯsama no meiɽei wa zeʔtai desɯkaɽa ne.|2
3
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_4_03.wav|ammaɽi jɯʔkɯɽi kaŋgaeterɯto, seikai ga kawactɕaɯkaɽa gotɕɯɯi! ima mo seitɕoɯ ki! tsɯmaɽi, wataɕiɽaɕikɯ seitɕoɯtɕɯɯ, dakaɽa ne! me o hanaɕita sɯki ni, bidʑiɴ sɯgirɯ fɯɽetɕaɴ ni naʔtɕaɯ ka mo jo!|20
4
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0001/miku_mobamasu_0001_chunk11.wav|haː, oçisama no nioi ga sɯrɯ nonʲaː!nʲɯɯ, mikaketa toki kaɽa zɯʔto ki ni naʔteta no!|14
5
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/05-workkomyu.wav|kotɕiɽa no bako wa...komonoiɽe? kiɽei na mojoɯ de, oheja ni oite okitakɯ naɽimasɯ ne.|2
6
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200650/shiburin_voice_200650_5_01.wav|tamani keŋka ɕi taɽi, jɯzɯɽanakaʔtaɽi, haɽi aʔtaɽi...soɽe mo warɯkɯ nai jo ne. asemamiɽe de mo, kaʔko warɯkɯte mo,toɽai adopɯɽimɯsɯ no sei saŋkaʔkei wa, jɯgamanai mama, mae ni sɯsɯnde ikerɯkaɽa.|6
7
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0018/usamin_mobamasu_0018_chunk55.wav|minni ɯsaminnʲɯɯjaː! merɯheɴ wa fɯkɯ ni tɕendʑi! desɯ jo! medetai desɯ! naɴ to nakɯ! oɕoɯgatsɯ wa iː desɯ jo ne! minna ga ɕiawase soɯ na kao o ɕitemasɯ! aː...atatamaɽimasɯ neː...|4
8
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/EP28.wav|fɯfɯʔ...aɴɕiɴ ɕite kɯdasai. hanabi ja tɕiɽi jɯkɯ sakɯɽa ga ɯtsɯkɯɕiːkaɽa to iʔte mo, wataɕi wa doko ka e kie taɽi nante ɕimaseɴ. daʔte wataɕi no i baɕo wa, koko naɴ desɯkaɽa.......|2
9
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200958/minami_voice_200958_3_01.wav|aɽa, keijakɯ o majoʔte irɯ no? kaɕikokɯte, totemo oɽoka ne. fɯfɯfɯ, itsɯ made gamaɴ ga dekirɯ no ka, mite ite agerɯ...kono amai amai jɯɯwakɯ ni, doko made aɽagaerɯ no kaɕiɽa?|19
10
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201272/kanade_voice_201272_1_01.wav|jɯbi o kaɽamete, netsɯ o tsɯtaeaʔte...fɯɽeta saki kaɽa, madʑiɽiaʔte ɕimai soɯ.|3
11
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk202_DeepFilterNet3.wav|soɯ? homekotoba to ɕite sɯnao ni ɯke toɽiʔte okoɯ ka na. aɽigatoɯ. kimi no kʲoɯrʲokwaʔte koso da jo. kimi ni tetsɯdaiʔte mo ɽaʔte, kono iɕoɯ to soɯbi ni kaɽada o naɽaɕite oitejo kaʔta. ɕikaɕi, me o mawaɕite irɯ kono otoko ni wa iːtai koto ga jama hodo aʔtakeɽedo, koɯ mo aʔsaɽi ɕimawaʔte ɕimaɯto, naɴ da ka hjoɯɕinɯke ɕite ɕimaʔta.|2
12
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk55_DeepFilterNet3.wav|nasake wa çito no tame naɽazɯ to wa iɯkeɽedo, hakaɽazɯ mo oɽei o moɽaʔte ɕimaʔta ne. de mo...ka, kaʔpɯrɯ waɽibikitɕikeʔto ka...|2
13
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200425/syuukovoice_200425_5_01.wav|hoɽa hoɽa, kɯɽaeː—! ɯmi ni kite sɯɯtsɯ no jatsɯ wa,nɯɽetɕaeː—! fɯfɯ!! minna de ɯmi e bakansɯ nante, fɯtsɯɯ no onnanokoʔpoi ja ne. minna o tanoɕimaserɯ aidorɯ mo, kʲoɯ wa tanoɕindʑaʔte koː—!|0
14
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201404/karen_cgss_voice_201404_1_11.wav|honto wa giɽigiɽi naɴ da? anata no kotae o matɕi tsɯzɯkete...mɯne ga...hoɽa, ne.|7
15
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_101063/chieri_voice_101063_5_01.wav|o, pɯɽodʲɯɯsaːsaɴ, koʔtɕi desɯ!...a, aite jokaʔta...e? ɯɽisode, miaʔtemasɯ ka?...ɯɽaɕiː desɯ. wataɕi ni wa daitaɴ na irɯ ka moʔte omoʔta no de...fɯfɯʔ, iː toɕi ni naɽi soɯ!|5
16
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk161_DeepFilterNet3.wav|zanneɴ dakedo kono ataɽi ni wa miataɽanai ne otoɕitanaɽa kono heɴ ni arɯ hazɯ naɴ dakeɽedo wataɕitatɕi wa saifɯ o mita baɕo kaɽa idoɯ mo ɕite inai hoka ni aʔta koto to ieba|2
17
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200163/mifune_voice_200163_2_09.wav|hea aɽendʑi wa, nigate de. itsɯ mo, kono kamigata ni...jahaɽi, dʑimi?|10
stylekan/Data/metadata_cleanest/FT_saori.csv ADDED
The diff for this file is too large to render. See raw diff
 
stylekan/Data/metadata_cleanest/FT_saori_valid.csv ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MG56.wav|fantadʑiː no oçimesama....masaka, ikinaɽi konna ni oːkina jakɯ o itadakerɯ nante, odoɽoki deɕita|563
2
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/unxkaedepuriUBfin-chunk738_DeepFilterNet3.wav|de mo soɯ da ne, koko made kitaɽa tanoɕimasete moɽaoɯ ka|563
3
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/DD44.wav|ɽaŋkotɕaɴ wa totemo otona de, jasaɕiː ko. makete iɽaɽemaseɴ ne|563
4
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/29-NationBlue.wav|a, otsɯkaɽesama. sono, moɕi ka ɕite kaedesaɴ mo ɽeʔsɯɴ na no dakedo,...nani ka, oka ɕikaʔta kaɕiɽa?—|563
5
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk300.wav|soɕite, pɯɽodʲɯɯsaː to no tanoɕiː kaiwa mo.|563
6
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1172.wav|oita wa daːme oɕi joki|563
7
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk420.wav|kako no hanaɕi joɽi aɕita no hanaɕi o ɕimaɕoɯ, pɯɽodʲɯɯsaː.|563
8
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/COD39.wav|koko made tsɯite kite kɯɽeta çito mo, moɯ inai çito mo...sɯbete seoimaɕoɯ!|563
9
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/RC30.wav|daisɯki na çito no tame no, toʔte oki no omekaɕi...ɽiʔpɯ de ɕiagemaɕoɯ|563
10
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk417.wav|jɯme ga nakeɽeba sagaseba iːɴ desɯ.|563
11
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/18-InochiMoyashite.wav|de mo, sonna hanaɕi o sɯrɯ no ni, konna tokoɽo de wa ikemaseɴ ne. moʔto iː baɕo niːkanakɯtɕa.|563
12
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1125.wav|omitoːɕi desɯ jo.|563
13
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk82_DeepFilterNet3.wav|kazokɯ to no kizɯna o tatɕikiɽoɯ to ɕite irɯ dʑibɯɴ ga, ɕiʔte iː mono dʑa nai.|563
14
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/IM46.wav|pɯɽodʲɯɯsaː mo kiɴ'iɽo no sɯɯtsɯ o kite, osoɽoi ni!|563
15
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1137.wav|jasaɕikɯ ɕite agemasɯʔ|563
16
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/28-MaxBeat.wav|hai. motɕiɽoɴ manteɴ no go emɯeieʔkɯsɯ desɯ!|563
17
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk187_DeepFilterNet3.wav|nigasɯ mono ka!|563
18
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/radio_imas-chunk11_chunk1_DeepFilterNet3_AudioSR_Processed_48K.wav|saikiɴ tɕoʔto jokɯbaɽi ni naʔte ɕimaʔta mitai naɴ desɯ|563
19
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk93_DeepFilterNet3.wav|dʑibɯɴ ni mo kazokɯ ni mo|563
20
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/03-workkomyu.wav|koɽe wa, neko no matorʲoːɕika deɕoɯ ka?|563
21
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/78-workkomyu.wav|iː koto o omoitsɯkimaɕita. kanadetɕaɴ, kondo wa wataɕi ga toɽimasɯ ne?|563
22
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/25-PrettyLiarFin.wav|fɯfɯfɯ! wataɕitatɕi no fɯtaɽi gɯɽaɕi mo, daibɯ nagakɯ naʔte kimaɕita ne. kanadetɕaɴ no iɽonna meɴ o ɕiɽete, wataɕi wa ɯɽeɕiː desɯ. wataɕi no koto mo, tɕiʔte moɽaete irɯ to omoimasɯɕi.|563
23
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk967.wav|kiɴ to çieta kaɽakɯtɕi ga osɯki?|563
stylekan/Data/metadata_cleanest/filtered_train_list.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b2db964e1cc327e92e4382fca9a9d55e990cf932f436b92e4d4576e7f6fce09
3
+ size 92225988
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d22b1f5dc29a0c229946e744b0b9a7d2fc5c652ecab0e6f46e9b3db42adce0b
3
+ size 91725454
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_HEADER_plus.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7586721acfb629514c3f84e32813cf87636d9d414f015fbd33e1b1864da7fbe5
3
+ size 95952756
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14aa224b114909c939a22ef655923a720985abc216ccd555d470e3537a6bdbe1
3
+ size 91713050
stylekan/Data/metadata_cleanest/prelude.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e411b89fb4d4197df3937e16262f777cce59d6c0c3959e3f8c054277b1f09821
3
+ size 42196851
stylekan/Data/metadata_cleanest/prelude_id.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77e70f06bd8cc12764429356423d3cfc3e06b9975cfd1b0f2f05820ccfb8bdc5
3
+ size 40886844
stylekan/Data/metadata_cleanest/train_48_pure.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b956aafff61ab72fc5d69854b57e9d69d1686866c85aa05bfbcc87d1fbc41114
3
+ size 40216376
stylekan/Data/metadata_cleanest/val_48_pure.csv ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_bonus_cd/ranko_bonus_cd_chunk16.wav|wataɕi, zeʔtai anata no kitai ni kotaete miserɯkaɽa!|14
2
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/25714f7a/wav/25714f7a_1843.wav|naɴ de, osoɽakɯ ɽokɯ doɯ kiʔka ga ɕindaɽa.|91
3
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0902.wav|soʔtɕi no hoɯ ga iː to ka, sɯki to ka.|80
4
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0280.wav|deɕo? go, go, tabetɕaɯ zoː!|76
5
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk1947.wav|dakaɽa.|4
6
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1333.wav|de mo, sonna ikikata wa, itsɯ ka kakegai no nai mono made akiɽamerɯ koto ni narɯ. kiʔto iʔɕoɯ koɯkai sɯrɯ koto ni narɯ.|85
7
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_A_0067.wav|aː, kono tsɯkɯdaniː, sɯki daʔta wa jo ne. saːbisɯ sɯrɯ wa. ato, satoimo no niʔkoɽogaɕi ga sɯgɯ ni dekirɯkaɽa.|10
8
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ee093a4f/wav/ee093a4f_2060.wav|sono seɴ wa aɽi enai to omoʔte iːdeɕo. tada, rɯnatɕomɯ no meidotɕoɯ to toɽikime o ɕite irɯ joɯ desɯkaɽa, sono naijoɯ ga kaɽande irɯ no ka mo ɕiɽemaseɴ.|73
9
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1813.wav|dezaia o seʔtei ɕi niːkimasɯ ka?|63
10
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1548.wav|berɯbeʔto wa, arɯdʑi o motanaiɕi, fɯtsɯɯ no niŋgeɴ ni mo mierɯ. odʑoɯtɕaɴ no ɕiʔte irɯ oni to wa, komponteki nitɕigaɯ sonzai na no sa.|68
11
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0075.wav|tiaɽa saizɯʔte koto de, tamani dʑotɕɯɯ ni tsɯkɯɽaserɯ o.|25
12
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0075.wav|tiaɽa saizɯʔte koto de, tamani dʑotɕɯɯ ni tsɯkɯɽaserɯ o.|25
13
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_X_0059.wav|tɕinami ni, koŋkai wa naɴ kaitɕɯɯ to çiɽaite mo, koɽe ikoɯ wa heŋka naikaɽa ne.|25
14
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/773a4156/wav/773a4156_2767.wav|a, soɽe de, çitotsɯ dake onegai ga arɯɴ desɯkedo.|64
15
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk119.wav|kʲoɯ wa wataɕi kaɽa, sɯkoɕi de mo okɯɽitakɯte.|5
16
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1731.wav|daɽeʔte iwaɽerɯto, sono komarɯɴ dakedo.|7
17
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otome/voices_kanade_otome_chunk8.wav|taikɯtsɯ na kʲɯɯdʑitsɯ nante, sɯgoɕitakɯ nai no?|18
18
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0015/chie_mobamasu_0015_chunk302.wav|toʔte mo tajoɽi ni narɯ, goɕɯdʑinsama desɯ ne. pikapika ni migaki agetaɽa...|24
19
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0767.wav|okɯɽe ga detaɽa niɕikisaɴ ki ni ɕitɕoɯ jo ne.|65
20
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1259.wav|sempai mo ogeŋki soɯ de!|81
21
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1779.wav|de, aitɕaɴʔte daɽe?|86
22
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_2228.wav|haʔ.|29
23
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_B_0286.wav|hadʑimemaɕite. ikani mo fɯrʲoɯ to wa eɴ no nasa soɯ na kao ɕiterɯ wa ne.|25
24
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0327.wav|hamase, tsɯʔteɴ da!|47
25
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1961.wav|ɯɴ, nani ka tegakaɽi mitsɯketai ne. jaʔpaɽi, goiɴ ni dʑoɯbɯtsɯ sasetɕaɯ no wa kinodokɯ de.|76
26
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2702.wav|naɴ te iɯ ka, sono, gomeɴ.|4
27
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk1260.wav|na de koɽa.|7
28
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_0633.wav|iːe, wataɕi wa sofɯto na no naɽa, misaki eʔkɯsɯ ɕiɽo mo dʑɯɯbɯɴ aɽi da to omoimasɯ!|70
29
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0553.wav|wataɕi no koto...mieterɯ?|76
30
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2188.wav|tsɯkitsɯmeta tokoɽo, oɕi no ɕinobɯ wa tada, ɕokɯdʑi o ɕita dake na no dakaɽa.|4
31
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/CN_026.wav|sɯzɯ no oto kitaɽite fɯɯsetsɯ o matoɯ|20
32
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1385.wav|wataɕi wa ɕoɯgakɯsei no toki wa, saʔkaː seɴɕɯ daʔtaɴ da jo.|0
33
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1047.wav|aiɕite irɯ. kono sakiːkanarɯ miɽai ga aɽoɯtomo, ima kono mɯne ni arɯ kimotɕi dake wa hommono da.|88
34
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10005710.wav|soɽe ga doɯ iɯ imi na no ka wa wakaɽanai.|49
35
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_2505.wav|omae wa, nani ka warɯi koto o ɕita wake dʑa nai. dʑibɯɴ no kimotɕi o oɽe ni tsɯtaeta dake da.|84
36
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_0269.wav|soɽe de mo, natsɯkaɕikɯ kandʑirɯ mono da na.|66
37
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk367.wav|ɕiɽaberɯ tsɯmoɽi nado naikeɽedo, ɕiɽabeta tokoɽo de, osoɽakɯ wa sɯisokɯ no iki o dezɯ, kakɯdʑitsɯ ni wa ɕiboɽi kiɽenaidaɽoɯ to iɯ koto daʔta.|3
38
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0374.wav|kowaɽeta ato ni ɕɯɯɽi no kanoɯsei mo aɽimasɯ.|63
39
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10003350.wav|a. dʑibɯɴ no jɯme mo da.|49
40
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/005/S005_F_0011.wav|gomeɴ nasai, ɯmi ni saʔki ga ɯzɯmaiterɯkaɽa, mɯkaɕi no kɯse de ne.|58
41
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0566.wav|ɯɯɴ...de mo ɕimpai nai jo. kanaɽazɯ modorɯʔte, iʔteta dʑaɴ.|81
42
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_2281.wav|moʔto ɯne daɽi ɕitai desɯ...oːnaː no kisɯ o kɯdasai...|63
43
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_0028.wav|a, soɽe dʑa, hontoɯ ni, aʔpeiɽia ga, wataɕi to oanisaɴ no kodomo mitai desɯ ne.|83
44
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk163.wav|doɯki o ɕizɯmerɯ.|0
45
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_2140.wav|sonna ki wa maʔtakɯ ɕimaseŋkedo, oːnaka kɯrɯɕi sɯgite ɕini soɯ desɯ.|83
46
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_2639.wav|nakama ni miserɯ no? teɴɕoɴ agarɯɴ dakaɽa jaʔte mi nasaiʔte.|86
47
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk52.wav|aŋgai, dʑikakɯ wa nai no ka mo ɕiɽenai.|19
48
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1602.wav|zɯʔto tatɕiʔpanaɕi de kiːtetakaɽa tsɯkaɽeta joː.|87
49
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1709.wav|soɯ iɯ imi de wa, sendʑoɯgawaɽa çitagi wa, kawaʔta no de mo, koɯsei ɕita no de mo, modoʔta no de mo toɽikaeɕita no de mo nakɯ, maɕite, deɽeta no de mo doɽota no de mo nakɯ.|16
50
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0591.wav|dʑosei ga mɯtɕɯɯ ni naʔtɕaɯ kɯɽai kaʔko iː otokoʔte koto ne.|87
51
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/02/02001850.wav|tefɯda ga babanɯki ni mɯitetakaɽa, çiʔɕi ni naʔterɯɴ da jo ne. soɽe kɯɽai, wataɕi dʑa nakɯte mo daɽe de mo wakarɯ jo.|6
52
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk935.wav|dʑaː ɕoɯbɯ kaiɕi de iː na? soko sɯwaɽe jo.|16
53
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1452.wav|sono sɯki wa...donna sɯki?|80
54
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/unxkaedepuriUBfin-chunk48_DeepFilterNet3.wav|aitsɯ wa wataɕi ga moɽaɯ jo soɽe dʑaː mata!|20
55
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_card_300836/nagi_voice_300836_6_03.wav|seː no de, dʑampɯ!|43
56
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0002/ranko_mobamasu_0002_chunk3.wav|minna no ɽaibɯ ga kimaʔtaʔte kiːtaɴ desɯkedo, hontoɯ desɯ ka?mɯ, mɯɽhaʔhaː! kaŋki no koe ga kikoerɯ! me o todʑiɽeba, soko ni!|14
57
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk344.wav|nadeko ni taisɯrɯ ikaɽi no keʔka to ɕite.|7
58
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/003/S003_B_0139.wav|iʔteta jatsɯɽa, çiki hadʑimeta zo.|55
59
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0067.wav|moɯ iːja. dʑa, ima wa ammaɽi momerɯ na jo.|47
60
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk1454.wav|omotai no jo.|13
61
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0014/momoka_mobamasu_0014_chunk38.wav|oɕigototɕɯɯ wa, esɯkoːto ɕite kɯdasaimasɯ? iː? koɽe kaɽa wa, fɯkɯsoɯ ni wa ki o tsɯkai nasai.|11
62
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2780.wav|sendʑoɯgahaɽasaɴ ga, ima iʔtai nani o kaŋgaete irɯ no ka o kaŋgaemaɕita.|7
63
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk1207.wav|kiʔto niŋgeɴ no soɯzoɯ o zeʔsɯrɯ mono ni narɯdaɽoɯ.|16
64
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1928.wav|dokiʔto sɯrɯ koto iwanaide.|80
65
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0008/shiburin_mobamasu_0008_chunk12.wav|kiɽei na keɕiki ga mierɯ baɕo ni, kʲoɯmi nai? tɕiːsai koɽo, maitoɕi kazokɯ to iʔteta oki ni haiɽi no baɕo.|50
66
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3620.wav|çinai to iɯto, mae no ɕitɕɯɯ no.|66
67
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100125/usamin_voice_100125_3_01.wav|goɽaidʑoɯ, aɽigatoɯ gozaimaɕita! kite kɯɽeta minasaɴ ni, hai! iʔpai zɯtsɯ! motɕiɽoɴ, pɯɽodʲɯɯsaːsaɴ ni mo!|46
68
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100485/usamin_voice_100485_2_10.wav|ima wa, naite nai desɯ! de mo, ano toki no koto o omoidasɯto...rɯisei zɯɯɴ!|46
69
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1141.wav|ageaɕi o torɯ na, sonna tsɯmoɽi wa nai. ɯɴ, iː iɽozɯkai da na, soko wa hjoɯka ɕite mo iː.|88
70
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2088.wav|çitohada nonɯkɯmoɽi o, miɽerɯ no desɯ.|7
71
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100343/usamin_voice_100343_1_09.wav|katakoɽi ga ɯso no joɯ ni kiete...! oteiɽe sɯɽeba, mada dʑɯɯ neɴ wa motɕi soɯ...|46
72
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200086/syuukovoice_200086_1_05.wav|konna iɕoɯ made kitɕaʔte! aidorɯʔte kandʑi da jo ne! ɯfɯfɯ!|40
73
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1968.wav|sɯki na çito ga soba niːte kɯɽerɯ dake de.|80
74
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2493.wav|gomeɴ, ataɕi wa pasɯ da naː.|95
75
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1073.wav|fɯʔfɯʔfɯɴ, agehatɕaɴ sɯpeɕarɯ o misete jarɯ wa!|81
76
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_0817.wav|doɯ ɕite wataɕi ga taniɴ no akiɽameta jɯme o oɕitsɯkeɽaɽenakɯtɕa ikenaiɴ da. sonna mono o kakaete itaɽa, wataɕi made zasetsɯ sɯrɯ koto ni naɽi soɯ da. iː meiwakɯ da.|88
77
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1099.wav|aɴ ni dokoɽo ka, tɕokɯsetsɯ iwaɽemaɕita. dairekɯto desɯ.|7
78
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0069.wav|soɽe wa, haʔɕɯɯ ɽemme mo kawaɽanai to omoɯ. tɕiba, ibaɽaki, saitama, gɯmma, kakɯtɕi no haiɽeberɯ na jaŋkiː ga dete kɯrɯ koto wa, amaɽi nai to omoɯ.|25
79
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_0920.wav|wataɕi no ɕɯɯ ni mirʲokɯ wa aɽimaɕiɴ kaː—?|62
80
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0004/fumika__0004_chunk25.wav|dema ga kakarɯ no de...|1
81
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/26-MizukiTomo.wav|onegai ɕi maː sɯ!|20
82
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4484.wav|wakaɽijasɯkɯ niʔpoɴ go ni ɕite mita wa.|77
83
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0656.wav|warɯi...naɴ ka, konna koto ni naʔtɕimaʔta.|84
84
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0089.wav|eːʔto...a, kʲɯɯkoɯɕa, kagi kakaʔte...|76
85
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_1416.wav|doɯ ɕite kimitatɕi wa, kono ɽeʔɕa ni haiʔte iko, betsɯ koːdo o toʔte irɯɴ da?|96
86
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3617.wav|tɕoʔto, naɴ no hanaɕi ɕiterɯ no joː.|77
87
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_1661.wav|zɯrɯi...nemɯi...|67
88
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2576.wav|mizɯ ka.|4
89
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk699.wav|ja kimotɕi o jaite irɯʔte iɯ no ka, gikɯɽi.|19
90
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1542.wav|kitɕikɯ dʑa naiɕi ka!|29
91
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/072.wav|iː koto de mo aɽimaɕita? sɯɯpaː de tokɯbaiçiɴ geʔto ɕita to ka. iːɴ desɯ ka? naɽa, onegai ɕimasɯ.|20
92
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1532.wav|sempai mo wataɕi to onadʑi kimotɕi da to omoʔteta no ni.|70
93
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk1557.wav|soɽe de iː no daɽoɯ ka?|0
94
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0877.wav|eŋgi no dʑiʔkoɯ wa pozitibɯ desɯ. desɯga, kɯoɽiti ni tsɯite wa fɯtoɯmei desɯ.|63
95
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk987.wav|ija, soɽe wa ɕiɽanaikedo.|4
96
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk163.wav|naɴ daʔte, kɯtɕinawasaɴ wa kamisama na no desɯkaɽa.|7
97
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04004090.wav|haha.|35
98
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1376.wav|ʔɴ? aː, ano ko ka. ɯɴ, oboeterɯ jo.|95
99
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0498.wav|sono bɯɴ, dʑibɯɴ no koto mo amaɽi hanasanai jo ne. kikeba oɕiete kɯɽerɯɴ daɽoɯkedo, kikanai hoɯ ga iː no ka na?|79
100
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2572.wav|ɯɯɴ, maɕiɽo ga soɯ iɯ no wakarɯ joɯ ni narɯ ni wa, motɕiʔto ɯmakɯ naɽanaito dame kanʲaː.|95
101
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk82.wav|sanisawa sensei wa...|1
102
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1770.wav|wataɕi to akɯma no naɽesome o.|0
103
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk212.wav|iɽaʔɕai, tsɯbasatɕaɴ.|19
104
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3392.wav|sonna no fɯɕizeɴ da wa?|77
105
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0865.wav|soɽe o ɕiɽanai niŋgeɴ kaɽa, fɯɽeai no imi o çikidaɕite kɯɽeta çitotatɕi ga irɯkaɽa.|84
106
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk308.wav|godʑɯɯ kiɽo ɽaɕiː.|13
107
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_1559.wav|ɯmaɽete kono kata, zɯʔto sono kɯgʲoɯ ni taiɕinonderɯ wataɕi ni taisɯrɯ ijami desɯ ka?|78
108
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0105.wav|wataɕi ga tsɯiteʔte mo, jakɯ ni tatanai wa ne.|81
109
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_cgss/chieri_card_100860/chieri_voice_100860_6_05.wav|gʲoɯsei kaɽa no, omotenaɕi desɯ!|24
110
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200063/kanade_voice_200063_6_04.wav|hontoɯ no wataɕi, misete agerɯ!|18
111
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_1097.wav|...ija da!! sonna iʔpoɯteki ni iwaɽete mo nani ga naɴ da ka wakaɽanai jo!|90
112
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_201308/minami_voice_201308_2_02.wav|tɕanto keŋkoɯkotsɯ ga çiɽaiterɯɴ desɯ jo? tsɯbasa ga haete mo daidʑoɯbɯ na joɯ ni.|15
113
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_6_06.wav|jorɯ wa mada, hadʑimaʔta bakaɽi. koɽe dake dʑa, owaɽimaseɴ jo.|56
114
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0022.wav|se��ɕɯɯ made wa ne, keʔkoɯ hoŋki de kaŋgaeteta.|76
115
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk110.wav|mahoɯ de kɯɽoɯba, sagasenai ka na?|24
116
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1667.wav|ano deai o kɯɽeta no ga oja no kʲoɯikɯ naɽa, warɯkɯ wa nai aidʑoɯ daʔta sɯ wa.|29
117
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0017/ranko_mobamasu_0017_chunk84.wav|waga tomo to.|14
118
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0604.wav|koɯ iɯ no otɕitɕisaɴʔpokɯ de tɕɯki mainitɕi saɽerɯto komarɯkedo.|79
119
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2754.wav|kɯrɯma ni çikaɽete ɕindaɽaɕiː zo.|4
120
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1288.wav|osewa ni naʔterɯ tatɕiba dakaɽa, honto wa ammaɽiːenaiɴ dakedo neː.|76
121
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk1557.wav|soɽe de iː no daɽoɯ ka?|0
122
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0877.wav|eŋgi no dʑiʔkoɯ wa pozitibɯ desɯ. desɯga, kɯoɽiti ni tsɯite wa fɯtoɯmei desɯ.|63
123
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk987.wav|ija, soɽe wa ɕiɽanaikedo.|4
124
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk163.wav|naɴ daʔte, kɯtɕinawasaɴ wa kamisama na no desɯkaɽa.|7
125
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04004090.wav|haha.|35
126
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1376.wav|ɯɯɴ? aː, ano ko ka. ɯɴ, oboeterɯ jo.|95
stylekan/Data/metadata_cleanest/val_48_pure.txt ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_bonus_cd/ranko_bonus_cd_chunk16.wav|wataɕi, zeʔtai anata no kitai ni kotaete miserɯkaɽa!|14
2
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/25714f7a/wav/25714f7a_1843.wav|naɴ de, osoɽakɯ ɽokɯ doɯ kiʔka ga ɕindaɽa.|91
3
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0902.wav|soʔtɕi no hoɯ ga iː to ka, sɯki to ka.|80
4
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0280.wav|deɕo? go, go, tabetɕaɯ zoː!|76
5
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk1947.wav|dakaɽa.|4
6
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1333.wav|de mo, sonna ikikata wa, itsɯ ka kakegai no nai mono made akiɽamerɯ koto ni narɯ. kiʔto iʔɕoɯ koɯkai sɯrɯ koto ni narɯ.|85
7
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_A_0067.wav|aː, kono tsɯkɯdaniː, sɯki daʔta wa jo ne. saːbisɯ sɯrɯ wa. ato, satoimo no niʔkoɽogaɕi ga sɯgɯ ni dekirɯkaɽa.|10
8
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ee093a4f/wav/ee093a4f_2060.wav|sono seɴ wa aɽi enai to omoʔte iːdeɕo. tada, rɯnatɕomɯ no meidotɕoɯ to toɽikime o ɕite irɯ joɯ desɯkaɽa, sono naijoɯ ga kaɽande irɯ no ka mo ɕiɽemaseɴ.|73
9
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1813.wav|dezaia o seʔtei ɕi niːkimasɯ ka?|63
10
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1548.wav|berɯbeʔto wa, arɯdʑi o motanaiɕi, fɯtsɯɯ no niŋgeɴ ni mo mierɯ. odʑoɯtɕaɴ no ɕiʔte irɯ oni to wa, komponteki nitɕigaɯ sonzai na no sa.|68
11
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0075.wav|tiaɽa saizɯʔte koto de, tamani dʑotɕɯɯ ni tsɯkɯɽaserɯ o.|25
12
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_C_0854.wav|ɯ a a a a! tɕi kɯ biː i kageɴ hanaɕite!|22
13
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_X_0059.wav|tɕinami ni, koŋkai wa naɴ kaitɕɯɯ to çiɽaite mo, koɽe ikoɯ wa heŋka naikaɽa ne.|25
14
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/773a4156/wav/773a4156_2767.wav|a, soɽe de, çitotsɯ dake onegai ga arɯɴ desɯkedo.|64
15
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk119.wav|kʲoɯ wa wataɕi kaɽa, sɯkoɕi de mo okɯɽitakɯte.|5
16
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1731.wav|daɽeʔte iwaɽerɯto, sono komarɯɴ dakedo.|7
17
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otome/voices_kanade_otome_chunk8.wav|taikɯtsɯ na kʲɯɯdʑitsɯ nante, sɯgoɕitakɯ nai no?|18
18
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0015/chie_mobamasu_0015_chunk302.wav|toʔte mo tajoɽi ni narɯ, goɕɯdʑinsama desɯ ne. pikapika ni migaki agetaɽa...|24
19
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0767.wav|okɯɽe ga detaɽa niɕikisaɴ ki ni ɕitɕoɯ jo ne.|65
20
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1259.wav|sempai mo ogeŋki soɯ de!|81
21
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1779.wav|de, aitɕaɴʔte daɽe?|86
22
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_2228.wav|haʔ.|29
23
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_B_0286.wav|hadʑimemaɕite. ikani mo fɯrʲoɯ to wa eɴ no nasa soɯ na kao ɕiterɯ wa ne.|25
24
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0327.wav|hamase, tsɯʔteɴ da!|47
25
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1961.wav|ɯɴ, nani ka tegakaɽi mitsɯketai ne. jaʔpaɽi, goiɴ ni dʑoɯbɯtsɯ sasetɕaɯ no wa kinodokɯ de.|76
26
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2702.wav|naɴ te iɯ ka, sono, gomeɴ.|4
27
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk1260.wav|na de koɽa.|7
28
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_0633.wav|iːe, wataɕi wa sofɯto na no naɽa, misaki eʔkɯsɯ ɕiɽo mo dʑɯɯbɯɴ aɽi da to omoimasɯ!|70
29
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0553.wav|wataɕi no koto...mieterɯ?|76
30
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2188.wav|tsɯkitsɯmeta tokoɽo, oɕi no ɕinobɯ wa tada, ɕokɯdʑi o ɕita dake na no dakaɽa.|4
31
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/CN_026.wav|sɯzɯ no oto kitaɽite fɯɯsetsɯ o matoɯ|20
32
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1385.wav|wataɕi wa ɕoɯgakɯsei no toki wa, saʔkaː seɴɕɯ daʔtaɴ da jo.|0
33
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1047.wav|aiɕite irɯ. kono sakiːkanarɯ miɽai ga aɽoɯtomo, ima kono mɯne ni arɯ kimotɕi dake wa hommono da.|88
34
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10005710.wav|soɽe ga doɯ iɯ imi na no ka wa wakaɽanai.|49
35
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_2505.wav|omae wa, nani ka warɯi koto o ɕita wake dʑa nai. dʑibɯɴ no kimotɕi o oɽe ni tsɯtaeta dake da.|84
36
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_0269.wav|soɽe de mo, natsɯkaɕikɯ kandʑirɯ mono da na.|66
37
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk367.wav|ɕiɽaberɯ tsɯmoɽi nado naikeɽedo, ɕiɽabeta tokoɽo de, osoɽakɯ wa sɯisokɯ no iki o dezɯ, kakɯdʑitsɯ ni wa ɕiboɽi kiɽenaidaɽoɯ to iɯ koto daʔta.|3
38
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0374.wav|kowaɽeta ato ni ɕɯɯɽi no kanoɯsei mo aɽimasɯ.|63
39
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10003350.wav|a. dʑibɯɴ no jɯme mo da.|49
40
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/005/S005_F_0011.wav|gomeɴ nasai, ɯmi ni saʔki ga ɯzɯmaiterɯkaɽa, mɯkaɕi no kɯse de ne.|58
41
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0566.wav|ɯɯɴ...de mo ɕimpai nai jo. kanaɽazɯ modorɯʔte, iʔteta dʑaɴ.|81
42
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_2281.wav|moʔto ɯne daɽi ɕitai desɯ...oːnaː no kisɯ o kɯdasai...|63
43
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_0028.wav|a, soɽe dʑa, hontoɯ ni, aʔpeiɽia ga, wataɕi to oanisaɴ no kodomo mitai desɯ ne.|83
44
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk163.wav|doɯki o ɕizɯmerɯ.|0
45
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_2140.wav|sonna ki wa maʔtakɯ ɕimaseŋkedo, oːnaka kɯrɯɕi sɯgite ɕini soɯ desɯ.|83
46
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_2639.wav|nakama ni miserɯ no? teɴɕoɴ agarɯɴ dakaɽa jaʔte mi nasaiʔte.|86
47
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk52.wav|aŋgai, dʑikakɯ wa nai no ka mo ɕiɽenai.|19
48
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1602.wav|zɯʔto tatɕiʔpanaɕi de kiːtetakaɽa tsɯkaɽeta joː.|87
49
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1709.wav|soɯ iɯ imi de wa, sendʑoɯgawaɽa çitagi wa, kawaʔta no de mo, koɯsei ɕita no de mo, modoʔta no de mo toɽikaeɕita no de mo nakɯ, maɕite, deɽeta no de mo doɽota no de mo nakɯ.|16
50
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0591.wav|dʑosei ga mɯtɕɯɯ ni naʔtɕaɯ kɯɽai kaʔko iː otokoʔte koto ne.|87
51
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/02/02001850.wav|tefɯda ga babanɯki ni mɯitetakaɽa, çiʔɕi ni naʔterɯɴ da jo ne. soɽe kɯɽai, wataɕi dʑa nakɯte mo daɽe de mo wakarɯ jo.|6
52
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk935.wav|dʑaː ɕoɯbɯ kaiɕi de iː na? soko sɯwaɽe jo.|16
53
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1452.wav|sono sɯki wa...donna sɯki?|80
54
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/unxkaedepuriUBfin-chunk48_DeepFilterNet3.wav|aitsɯ wa wataɕi ga moɽaɯ jo soɽe dʑaː mata!|20
55
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_card_300836/nagi_voice_300836_6_03.wav|seː no de, dʑampɯ!|43
56
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0002/ranko_mobamasu_0002_chunk3.wav|minna no ɽaibɯ ga kimaʔtaʔte kiːtaɴ desɯkedo, hontoɯ desɯ ka?mɯ, mɯɽhaʔhaː! kaŋki no koe ga kikoerɯ! me o todʑiɽeba, soko ni!|14
57
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk344.wav|nadeko ni taisɯrɯ ikaɽi no keʔka to ɕite.|7
58
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/003/S003_B_0139.wav|iʔteta jatsɯɽa, çiki hadʑimeta zo.|55
59
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0067.wav|moɯ iːja. dʑa, ima wa ammaɽi momerɯ na jo.|47
60
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk1454.wav|omotai no jo.|13
61
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0014/momoka_mobamasu_0014_chunk38.wav|oɕigototɕɯɯ wa, esɯkoːto ɕite kɯdasaimasɯ? iː? koɽe kaɽa wa, fɯkɯsoɯ ni wa ki o tsɯkai nasai.|11
62
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2780.wav|sendʑoɯgahaɽasaɴ ga, ima iʔtai nani o kaŋgaete irɯ no ka o kaŋgaemaɕita.|7
63
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk1207.wav|kiʔto niŋgeɴ no soɯzoɯ o zeʔsɯrɯ mono ni narɯdaɽoɯ.|16
64
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1928.wav|dokiʔto sɯrɯ koto iwanaide.|80
65
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0008/shiburin_mobamasu_0008_chunk12.wav|kiɽei na keɕiki ga mierɯ baɕo ni, kʲoɯmi nai? tɕiːsai koɽo, maitoɕi kazokɯ to iʔteta oki ni haiɽi no baɕo.|50
66
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3620.wav|çinai to iɯto, mae no ɕitɕɯɯ no.|66
67
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100125/usamin_voice_100125_3_01.wav|goɽaidʑoɯ, aɽigatoɯ gozaimaɕita! kite kɯɽeta minasaɴ ni, hai! iʔpai zɯtsɯ! motɕiɽoɴ, pɯɽodʲɯɯsaːsaɴ ni mo!|46
68
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100485/usamin_voice_100485_2_10.wav|ima wa, naite nai desɯ! de mo, ano toki no koto o omoidasɯto...rɯisei zɯɯɴ!|46
69
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1141.wav|ageaɕi o torɯ na, sonna tsɯmoɽi wa nai. ɯɴ, iː iɽozɯkai da na, soko wa hjoɯka ɕite mo iː.|88
70
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2088.wav|çitohada nonɯkɯmoɽi o, miɽerɯ no desɯ.|7
71
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100343/usamin_voice_100343_1_09.wav|katakoɽi ga ɯso no joɯ ni kiete...! oteiɽe sɯɽeba, mada dʑɯɯ neɴ wa motɕi soɯ...|46
72
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200086/syuukovoice_200086_1_05.wav|konna iɕoɯ made kitɕaʔte! aidorɯʔte kandʑi da jo ne! ɯfɯfɯ!|40
73
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1968.wav|sɯki na çito ga soba niːte kɯɽerɯ dake de.|80
74
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2493.wav|gomeɴ, ataɕi wa pasɯ da naː.|95
75
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1073.wav|fɯʔfɯʔfɯɴ, agehatɕaɴ sɯpeɕarɯ o misete jarɯ wa!|81
76
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_0817.wav|doɯ ɕite wataɕi ga taniɴ no akiɽameta jɯme o oɕitsɯkeɽaɽenakɯtɕa ikenaiɴ da. sonna mono o kakaete itaɽa, wataɕi made zasetsɯ sɯrɯ koto ni naɽi soɯ da. iː meiwakɯ da.|88
77
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1099.wav|aɴ ni dokoɽo ka, tɕokɯsetsɯ iwaɽemaɕita. dairekɯto desɯ.|7
78
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0069.wav|soɽe wa, haʔɕɯɯ ɽemme mo kawaɽanai to omoɯ. tɕiba, ibaɽaki, saitama, gɯmma, kakɯtɕi no haiɽeberɯ na jaŋkiː ga dete kɯrɯ koto wa, amaɽi nai to omoɯ.|25
79
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_0920.wav|wataɕi no ɕɯɯ ni mirʲokɯ wa aɽimaɕiɴ kaː—?|62
80
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0004/fumika__0004_chunk25.wav|dema ga kakarɯ no de...|1
81
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/26-MizukiTomo.wav|onegai ɕi maː sɯ!|20
82
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4484.wav|wakaɽijasɯkɯ niʔpoɴ go ni ɕite mita wa.|77
83
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0656.wav|warɯi...naɴ ka, konna koto ni naʔtɕimaʔta.|84
84
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0089.wav|eː to...a, kʲɯɯkoɯɕa, kagi kakaʔte...|76
85
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_1416.wav|doɯ ɕite kimitatɕi wa, kono ɽeʔɕa ni haiʔte iko, betsɯ koːdo o toʔte irɯɴ da?|96
86
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3617.wav|tɕoʔto, naɴ no hanaɕi ɕiterɯ no joː.|77
87
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_1661.wav|zɯrɯi...nemɯi...|67
88
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2576.wav|mizɯ ka.|4
89
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk699.wav|ja kimotɕi o jaite irɯʔte iɯ no ka, gikɯɽi.|19
90
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1542.wav|kitɕikɯ dʑa naiɕi ka!|29
91
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/072.wav|iː koto de mo aɽimaɕita? sɯɯpaː de tokɯbaiçiɴ geʔto ɕita to ka. iːɴ desɯ ka? naɽa, onegai ɕimasɯ.|20
92
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1532.wav|sempai mo wataɕi to onadʑi kimotɕi da to omoʔteta no ni.|70
93
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk1557.wav|soɽe de iː no daɽoɯ ka?|0
94
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0877.wav|eŋgi no dʑiʔkoɯ wa pozitibɯ desɯ. desɯga, kɯoɽiti ni tsɯite wa fɯtoɯmei desɯ.|63
95
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk987.wav|ija, soɽe wa ɕiɽanaikedo.|4
96
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk163.wav|naɴ daʔte, kɯtɕinawasaɴ wa kamisama na no desɯkaɽa.|7
97
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04004090.wav|haha.|35
98
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1376.wav|ʔɴ? aː, ano ko ka. ɯɴ, oboeterɯ jo.|95
99
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0498.wav|sono bɯɴ, dʑibɯɴ no koto mo amaɽi hanasanai jo ne. kikeba oɕiete kɯɽerɯɴ daɽoɯkedo, kikanai hoɯ ga iː no ka na?|79
100
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2572.wav|ɯɯɴ, maɕiɽo ga soɯ iɯ no wakarɯ joɯ ni narɯ ni wa, motɕiʔto ɯmakɯ naɽanaito dame kanʲaː.|95
101
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk82.wav|sanisawa sensei wa...|1
102
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1770.wav|wataɕi to akɯma no naɽesome o.|0
103
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk212.wav|iɽaʔɕai, tsɯbasatɕaɴ.|19
104
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3392.wav|sonna no fɯɕizeɴ da wa?|77
105
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0865.wav|soɽe o ɕiɽanai niŋgeɴ kaɽa, fɯɽeai no imi o çikidaɕite kɯɽeta çitotatɕi ga irɯkaɽa.|84
106
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk308.wav|godʑɯɯ kiɽo ɽaɕiː.|13
107
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_1559.wav|ɯmaɽete kono hoɯ, zɯʔto sono kɯgʲoɯ ni taiɕinonderɯ wataɕi ni taisɯrɯ ijami desɯ ka?|78
108
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0105.wav|wataɕi ga tsɯiteʔte mo, jakɯ ni tatanai wa ne.|81
109
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_cgss/chieri_card_100860/chieri_voice_100860_6_05.wav|gʲoɯsei kaɽa no, omotenaɕi desɯ!|24
110
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200063/kanade_voice_200063_6_04.wav|hontoɯ no wataɕi, misete agerɯ!|18
111
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_1097.wav|ija da! sonna iʔpoɯteki niːwaɽete mo nani ga naɴ do ka wakaɽanai jo!|90
112
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_201308/minami_voice_201308_2_02.wav|tɕanto keŋkoɯkotsɯ ga çiɽaiterɯɴ desɯ jo? tsɯbasa ga haete mo daidʑoɯbɯ na joɯ ni.|15
113
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_6_06.wav|jorɯ wa mada, hadʑimaʔta bakaɽi. koɽe dake dʑa, owaɽimaseɴ jo.|56
114
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0022.wav|seɴɕɯɯ made wa ne, keʔko�� hoŋki de kaŋgaeteta.|76
115
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk110.wav|mahoɯ de kɯɽoɯba, sagasenai ka na?|24
116
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1667.wav|ano deai o kɯɽeta no ga oja no kʲoɯikɯ naɽa, warɯkɯ wa nai aidʑoɯ daʔta sɯ wa.|29
117
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0017/ranko_mobamasu_0017_chunk84.wav|waga tomo to.|14
118
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0604.wav|koɯ iɯ no otɕitɕisaɴʔpokɯ de tɕɯki mainitɕi saɽerɯto komarɯkedo.|79
119
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2754.wav|kɯrɯma ni çikaɽete ɕindaɽaɕiː zo.|4
120
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1288.wav|osewa ni naʔterɯ tatɕiba dakaɽa, honto wa ammaɽiːenaiɴ dakedo neː.|76
stylekan/Data/mg_valid.csv ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/598c113f/wav/598c113f_109.wav|iɽaʔɕai! ɕɯɯgakɯ rʲokoɯ wa, tanoɕikato desɯ ka?|194
2
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/282cfa8c/wav/282cfa8c_1042.wav|ima wa magarɯ...itakaʔtakedo, magarɯ joɯ ni naʔtaɴ da jo.|16
3
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MRD12.wav|kakɯbetsɯ desɯ. pɯɽodʲɯɯsaː kaɽa itadakɯ, kono iʔpai...!|563
4
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/35d789d2/wav/35d789d2_138.wav|te wa soko de iːɴ desɯ ka? koɕi ni mawasanakɯte iːɴ desɯ ka?|51
5
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_2155.wav|hontoɯ da...|553
6
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cec410a1/wav/cec410a1_328.wav|dʑosei no ɕaɕiɴ ga noʔte irɯ saito o, ɕiteki na joɯto de etsɯɽaɴ ɕite itaɽaɕiː.|434
7
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6e22f5cd/wav/6e22f5cd_267.wav|soʔka, mada nomerɯ joɯ ni naʔta baʔkaɽi ka. kondo oanesaɴ to, baː de mo iʔte mirɯ?|236
8
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ad28b91b/wav/ad28b91b_1315.wav|dʑibɯɴ de mo mɯtɕa o iʔterɯ no wa wakaʔterɯ. daga asaçi dakaɽa kiːte mitaɴ da.|343
9
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100253/momoka_voice_100253_1_12.wav|fɯɯɯ...dʑibɯɴ no itaɽanasa ga, kɯjaɕiː...pɯɽodʲɯɯsaːtɕama...|489
10
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0253acb6/wav/0253acb6_869.wav|motomoto, koɯ ɕite gaʔkoɯ ni koɽaɽeɽeba, soɽe de mokɯhjoɯ taʔsei daʔta koto dakaɽa.|28
11
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1017.wav|aɽaɽa ɽike, naŋge ni fɯjɯɯ soɯ?|475
12
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/76981655/wav/76981655_0566.wav|mada hjakɯ maɴ kaitɕɯɯ sɯrɯ totɕɯɯ daɕi, mata kaisɯɯ wasɯɽeta ne. haiː—tɕi!|110
13
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/02153faa/wav/02153faa_484.wav|ɯɯɴ, bokɯ ga ɕirɯ kagiɽiːtsɯ mo konna kandʑi da na. hantsɯki mae ni tɕoʔto aɽe kimi no çi ga aʔtakedo, soɽe dake da na.|20
14
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_0673.wav|fɯto naŋge nakɯ, aɕi o tometa.|143
15
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/37c014a1/wav/37c014a1_0474.wav|soɯ ɕi te kɯɽerɯ to kaɴɕa sɯrɯ. moɯ iʔkai, mɯne o jɯɽaɕite agerɯ.|6
16
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0264.wav|otakɯ no ofɯrɯ o kaɕite itadakenai kaɕiɽa? osamɯ ɕibaɽakɯ sentoɯ kajoɯ wa mɯɽi dakaɽa.|345
17
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f6c4b7b2/wav/f6c4b7b2_0424.wav|dʑaː, paiɽoʔto aoikɯɴ de iːdʑa nai! nanni mo mondai nai wa!|439
18
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bf7b3aa8/wav/bf7b3aa8_457.wav|oniːtɕaɴ, tɕinami mo ufo kansokɯjoɯ no boɯeŋkʲo o kaɯ tame ni, mata meidosaɴ ni naʔte iː ka na?|277
19
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0f6fbea8/wav/0f6fbea8_0220.wav|oi, moɯ iː. omae no aiboɯ dʑimaɴ wa iːʔte no.|52
20
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1967ee53/wav/1967ee53_0238.wav|madʑo wa ne, hoɴ o jomɯ no ga sɯki naɴ daʔte.|71
21
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cc948b89/wav/cc948b89_1571.wav|fɯtsɯɯ no dʑiɕɯ kɯnɽeɴ naɽa ɯɽaniwa de jaɽeba iːdʑa nai.|423
22
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5d68aedf/wav/5d68aedf_0376.wav|daʔte, tomodatɕi naŋkaitaʔte nani mo kawaɽanai moɴ!|183
23
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e3ee19b2/wav/e3ee19b2_127.wav|toɯbɯɴ wa koko o tsɯkawasete moɽaoɯ ze. kiːtenai no ka?|370
24
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0871.wav|saiɕo kaɽa sono tsɯmoɽi. kikeɴ na ɽei dakaɽa, fɯtaɽi ni makaserɯ tsɯmoɽi wa nai.|385
25
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/05a45f91/wav/05a45f91_131.wav|negaɽakɯba ɕɯ ga ɕɯkɯfɯkɯ ɕi mamoʔte kɯdasaimasɯ joɯ ni.|70
26
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d19f294/wav/6d19f294_381.wav|desɯ ne. soɯ dekirɯ nante mitoːɕi, sɯkɯnakɯtomo wataɕi wa tateɽaɽenai desɯ.|189
27
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d250131/wav/6d250131_647.wav|kansai no oteɽa ni wa jokɯ arɯɽaɕiːkedo, koko no wa ɕiɽanai.|202
28
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_2513.wav|ɯ, koɽe, sɯgokɯ kimotɕiː— ka mo. sɯgoi, naɴ da ka hazɯkaɕiː oto da ne.|261
29
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk201.wav|ne fɯsokɯ ni naʔtɕaimasɯ...ɯɯ...|483
30
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100395/miku_voice_100395_2_05.wav|pːtɕaɴ ga odoɽokɯ kɯɽai! sekɯɕiː ni naʔte miserɯnʲa!|487
31
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2430.wav|ija jo, soɽe mo ija! wataɕi ga mae de, ɯɯ ga ɯɕiɽo, zɯʔto soɯ ɕite kitadʑa nai! anta wa, damaʔte wataɕi no ɯɕiɽo o tsɯite kɯɽeba iː no!|91
32
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0934.wav|jɯɯ wa so no...ɯwaki to ka wa ɕinai to omoʔterɯkedo...|449
33
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/90fa05fd/wav/90fa05fd_1203.wav|daga, soɯ toɽaerɯ no ga itɕibaɴ, fɯ ni otɕirɯ.|274
34
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_1309.wav|moɯ sono koɽo no kiokɯ wa hotondo naikeɽedo, wataɕi mo niŋgeɴ daʔta.|385
35
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/773a4156/wav/773a4156_2205.wav|moɯ iʔɕoɯ kakaʔte mo kaesenai ka mo ɕiɽenai. soɽe kɯɽai toko ni wa kaɴɕa ɕiterɯ.|214
36
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0b8ae160/wav/0b8ae160_0536.wav|a, ano...dʑɯɯdaiʔte, daidʑoɯbɯ naɴ desɯ ka?|94
37
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/302/S302_E_0006.wav|sɯgokaʔta.|532
38
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bca2cfac/wav/bca2cfac_1044.wav|moɕi ka ɕite, takɯ ja to wakaba iʔɕo daʔta no kai?|320
39
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_3235.wav|jaɽa moɯ, mada oʔkikɯ narɯ no? ma, tɕoʔto wa te kɯgeɴ ɕinasaɴ joː.|319
40
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0579.wav|soɯ ne, fɯkɯsɯɯ no dansei to kaŋkei o moʔte inai kagiɽi wa...|385
41
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1ed99743/wav/1ed99743_557.wav|ɴ. kimitatɕi no bɯtɕiageta, daikʲɯɯɕɯɯsl koɯsoɯ. bokɯ wa, aɽe ni me ga arɯ to mikonde irɯ.|1
42
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201316/karen_cgss_voice_201316_2_01.wav|nita joɯ na fɯkɯ de mo kaŋkei nai! fɯkɯ wa hoɕiːkaɽa kaɯ! idʑoɯ!|488
43
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_4015.wav|tɕoʔto josanteki ni ne, kitai bɯmoɴ no hoɯ de mo joteigai no josaɴ ga kakaɽi soɯ na no jo.|319
44
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk297.wav|geikaiʔte no wa, kamisama ni toʔte meataɽaɕiːɴ da jo. ija, meataɽaɕi ka wa neː ka, kawaɽibaɽi ɕineː ze.|473
45
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5e5993c5/wav/5e5993c5_615.wav|ne, neː, moɯ iʔkai kisɯ ɕite.|120
46
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_521.wav|fɯɴ, itsɯ mo nigijaka na komɯsɯme da ze.|5
47
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100410/momoka_voice_100410_4_02.wav|dakedo, ima wa ɯtaʔte odoʔte, tanoɕinde! haɕitanai ka mo to omoɯ kɯɽai, oːkina koe de waɽaʔte...fɯfɯ!! ojoɯgi wa warɯkɯte mo, tanoɕisa de iʔpai desɯ zo!|489
48
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/22d1fa2f/wav/22d1fa2f_362.wav|dʑoɯdaɴ da, oɽe wa haneda ni hanaɕi ga arɯɴ da jo.|84
49
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1068.wav|soɽe ni, ima wa kono fɯkɯ igai ni kigae ga aɽimaseɴ no de.|174
50
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ee093a4f/wav/ee093a4f_0555.wav|somosomo anata no koɯgi no ɕikata ga ki ni haiɽimaseɴ. iedeʔte naɴ desɯ ka? zeʔsaɴ iedetɕɯɯ no wataɕi ga ieta giɽi de wa aɽimaseŋga, ɕoɯgakɯsei desɯ ka?|371
51
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9ee921f6/wav/9ee921f6_0012.wav|soɯ naɴ desɯ, çito o atsɯmejoɯ to sendeɴ ɕite irɯ mitai de.|347
52
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bb6ac6f1/wav/bb6ac6f1_1043.wav|jɯɯkotɕaɴ seidʑɯɴha da jo! baɽibaɽi ɯki dʑa nai?|263
53
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_1022.wav|moɯ, sonna miɽai mo kimaseɴ jo.|284
54
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/fa4704bf/wav/fa4704bf_119.wav|konna dʑikaɴ ni konna baɕo ni jobidasɯ nante, johodo kikaɽetakɯ nai hanaɕi na no ne.|387
55
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9d33dced/wav/9d33dced_811.wav|ɕikaɕi ɽogɯiɴ ɕite irɯ idʑoɯ, fɯtatsɯ no sekai de iɕiki wa tsɯnagaʔte irɯ.|275
56
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201055/kanade_voice_201055_1_11.wav|aɽa, moɯ çitotsɯ? pɯɽodʲɯɯsaːsaɴ ga onedaɽi nante. fɯfɯ!!|482
57
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_2379.wav|tɕigaɯ, kaʔtaɽi make taɽi sɯrɯ no wa ataɕi!|141
58
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8f8acabb/wav/8f8acabb_426.wav|dʑasɯko wa bɯ tsɯ zoɯ to ka e bakaɽi ki ni haiʔteta ne. soɽe to kimono! hommono no maikosaɴ mita toki no dʑasɯko no teɴɕiɴ no agaɽi gwai, sɯgokaʔta!|244
59
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/74eb72c7/wav/74eb72c7_0253.wav|so, sonna koto nai jo, gɯɯzeɴ dʑa nai ka na.|184
60
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c593ed00/wav/c593ed00_0583.wav|moɯ, hoʔtoite. koko de fɯjɯ o okosɯkaɽa.|466
61
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/a93da23d/wav/a93da23d_0512.wav|maː, nindʑa ɕɯʔɕiɴ no itɕizokɯ de arɯ koto wa çitei ɕimaseŋga.|317
62
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e5d53ec4/wav/e5d53ec4_049.wav|neː, tanteisaɴ ɕiʔterɯ? koko saikiɴ, kanda de jakeɴ no çigai ga aitsɯiteta no.|460
63
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3c58f1c4/wav/3c58f1c4_1376.wav|dʑitsɯ wa wataɕi, paːɴ!ʔte oːkina oto ga sɯrɯ, ɯtɕiage hanabi ga nigate naɴ desɯ.|117
64
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_0621.wav|onadʑi dʑa nai jo. kotoɕi wa, ɕifɯki otokosaɴ to ka jobɯ joteinaɴ dakaɽa.|306
65
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk619.wav|mɯɽidʑi wa ɕinaikeɽedo, nani ka aʔta toki, çitoɽi de naɴ to ka ɕijoɯ to omoctɕa dame jo. anata wa imada ni sono keikoɯ ga tsɯjoikaɽa.|475
66
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk1483.wav|nado to...|477
67
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_0845.wav|moɯ iʔkai dake kikɯ wa? minna wa doɯ?|319
68
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8e1072e6/wav/8e1072e6_0195.wav|soɽe o keʔte koko ka jo? soɽe mo ataɕi ni ɽenɽakɯ naɕi ka jo?|247
69
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_02/Kamiya_Hiroshi_02_chunk905.wav|ɕinseki ne.|478
70
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_240.wav|naː, nosejasɯi dʑaɽo.|5
71
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_0187.wav|oːgeza desɯ ne, taiɕita koto aɽimaseɴ. mada ɕiɽabeterɯ dake desɯɕi.|284
72
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/72921df9/wav/72921df9_190.wav|kao to namae, neː?|228
73
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3ec57102/wav/3ec57102_137.wav|wataɕi no tame ni, oɕokɯdʑi o tɕɯɯdaɴ sɯrɯ çitsɯjoɯ wa aɽimaseɴ. doɯzo, oki ni nasaɽazɯ.|144
74
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/14_arisu__0015_(Vocals)/14_arisu__0015_(Vocals)_chunk153.wav|baɽentaiɴ de mo, itsɯ mo toːɽi desɯ.|495
75
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/d39532a8/wav/d39532a8_1523.wav|dakaɽa kaeʔte kite kɯɽetetaɴ daʔte.|384
76
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_0310.wav|nʲa haha! jaʔpa sempai omoɕiɽoiʔsɯ!|553
77
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4ded9fa1/wav/4ded9fa1_0342.wav|fɯfɯ, de wa toɽiaezɯ, hoːpɯ to jɯki no meija no tame ni, fɯtsɯɯ no fɯkɯ da to ɕindʑite agemaɕoɯ ka.|129
78
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36ea135b/wav/36ea135b_2487.wav|ɯɯɴ, jaɕiɽokɯɴ no kaŋgae mo ataʔterɯ. nozomi sempai wa, kanzeɴ ni wasɯɽetɕaʔterɯ wake dʑa nai.|60
79
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_0662.wav|ano mɯsɯme wa jokɯ wakaɽaɴ na.|261
80
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f4169f28/wav/f4169f28_193.wav|dakaɽa soɯ iɯ imi dʑa nakɯte.|386
81
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301206/jou_mika_voice_301206_1_10.wav|me, tsɯmɯʔte agejoɯ ka? daʔte hoɕiːdeɕoɯ? kisɯ matɕigao?|485
82
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2594.wav|to, kamawazɯ ni hanaɕi o tsɯzɯketa.|477
83
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f04ee070/wav/f04ee070_0345.wav|kaɯrɯkosaɴ to baʔkaɽi nakajasa soɯ ni ɕitɕaʔte soɯ.|461
84
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_0264.wav|masaja, koɽe de kʲoɯ no ɽeɴɕɯɯ wa oçiɽaki da jo neː.|141
85
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/631b0413/wav/631b0413_015.wav|jokaʔtaɽa, kʲoɯ mo iʔɕo ni, iː desɯ ka?|151
86
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/efb922ca/wav/efb922ca_0770.wav|ɯfɯfɯ, baːrɯ o fɯɽimawasɯkaɽa, moɯ tɕoʔto ɯɕiɽo ni sagaʔtete.|409
87
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0480.wav|dʑiʔsai no nami wa moʔto dʑoɯge ɯndoɯ mo hageɕiːɴ dakaɽa.|449
88
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/sakura_moyu/30/30000020.wav|kanodʑo wa, dʑibɯɴ o gisei ni ɕi, bokɯɽa no koto o, jorɯ no sekai o mamoɽoɯ to ɕite irɯ.|518
89
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1288.wav|de mo, kaɽadatɕɯɯ boɽoboɽo de, iki o s��rɯ no ga jaʔto deɕita. koko de wataɕi wa owaɽi naɴ da to, naɴ to nakɯ, wakaɽimaɕita.|174
90
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2704.wav|aɽe? wataɕi no koto ɕiʔterɯ?|91
91
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0339.wav|wataɕi mo itɕi do çinosakasaɴ no tokoɽo ni kao o daɕite mirɯ wa. kanodʑo ga ɕiŋgakɯ ɕita toki no fɯkɯzatsɯ na dʑidʑoɯ to ka, koʔtɕi kaɽa setsɯmei ɕite oita hoɯ ga iː to omoɯɕi.|345
92
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3e02a4dc/wav/3e02a4dc_916.wav|dʑa a, tsɯitectɕa ikenaiɴ desɯ kaː?|213
93
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk817.wav|sasɯga ni sɯgɯ ni nadekoɯ nadekoɯ da to, kampa ɕita joɯ desɯ. ma, iʔte ɕimaeba, maegami ga nakɯnaʔta dake desɯkaɽa, jokɯ miɽeba wakarɯɴ deɕoɯ ne.|473
94
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/00163dc9/wav/00163dc9_1842.wav|aɽe wa, daɽe kaɽa no okɯɽimono daʔta no ka naʔte.|69
95
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/25714f7a/wav/25714f7a_1007.wav|maː soɯ iɯnaɽa damaʔterɯsɯkedo.|95
96
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1292.wav|heː, jaʔte mijoɯ. eʔto,ɴ?|474
97
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk1453.wav|ɯmaɽeotɕita tsɯgi no ɕɯŋkaɴ, wagahai wa wagahai o ɯmiotoɕita botai o mita.|475
98
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c81c2b4d/wav/c81c2b4d_231.wav|ʔte ka, seitoɯ ɽitsɯ ga takai no wa, hatsɯnetɕaɴ ga adobaisɯ ɕite kɯɽerɯkaɽa de...a, dʑikaɴ daidʑoɯbɯ?|400
99
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/598c113f/wav/598c113f_109.wav|iɽaʔɕai! ɕɯɯgakɯ rʲokoɯ wa, tanoɕikato desɯ ka?|194
100
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/282cfa8c/wav/282cfa8c_1042.wav|ima wa magarɯ...itakaʔtakedo, magarɯ joɯ ni naʔtaɴ da jo.|16
101
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MRD12.wav|kakɯbetsɯ desɯ. pɯɽodʲɯɯsaː kaɽa itadakɯ, kono iʔpai...!|563
102
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/35d789d2/wav/35d789d2_138.wav|te wa soko de iːɴ desɯ ka? koɕi ni mawasanakɯte iːɴ desɯ ka?|51
103
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_2155.wav|hontoɯ da...|553
104
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cec410a1/wav/cec410a1_328.wav|dʑosei no ɕaɕiɴ ga noʔte irɯ saito o, ɕiteki na joɯto de etsɯɽaɴ ɕite itaɽaɕiː.|434
105
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6e22f5cd/wav/6e22f5cd_267.wav|soʔka, mada nomerɯ joɯ ni naʔta baʔkaɽi ka. kondo oanesaɴ to, baː de mo iʔte mirɯ?|236
106
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ad28b91b/wav/ad28b91b_1315.wav|dʑibɯɴ de mo mɯtɕa o iʔterɯ no wa wakaʔterɯ. daga asaçi dakaɽa kiːte mitaɴ da.|343
107
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100253/momoka_voice_100253_1_12.wav|fɯɯɯ...dʑibɯɴ no itaɽanasa ga, kɯjaɕiː...pɯɽodʲɯɯsaːtɕama...|489
108
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0253acb6/wav/0253acb6_869.wav|motomoto, koɯ ɕite gaʔkoɯ ni koɽaɽeɽeba, soɽe de mokɯhjoɯ taʔsei daʔta koto dakaɽa.|28
109
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1017.wav|aɽaɽa ɽike, naŋge ni fɯjɯɯ soɯ?|475
110
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/76981655/wav/76981655_0566.wav|mada hjakɯ maɴ kaitɕɯɯ sɯrɯ totɕɯɯ daɕi, mata kaisɯɯ wasɯɽeta ne. haiː—tɕi!|110
111
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/02153faa/wav/02153faa_484.wav|ɯɯɴ, bokɯ ga ɕirɯ kagiɽiːtsɯ mo konna kandʑi da na. hantsɯki mae ni tɕoʔto aɽe kimi no çi ga aʔtakedo, soɽe dake da na.|20
112
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_0673.wav|fɯto naŋge nakɯ, aɕi o tometa.|143
113
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/37c014a1/wav/37c014a1_0474.wav|soɯ ɕi te kɯɽerɯ to kaɴɕa sɯrɯ. moɯ iʔkai, mɯne o jɯɽaɕite agerɯ.|6
114
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0264.wav|otakɯ no ofɯrɯ o kaɕite itadakenai kaɕiɽa? osamɯ ɕibaɽakɯ sentoɯ kajoɯ wa mɯɽi dakaɽa.|345
115
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f6c4b7b2/wav/f6c4b7b2_0424.wav|dʑaː, paiɽoʔto aoikɯɴ de iːdʑa nai! nanni mo mondai nai wa!|439
116
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bf7b3aa8/wav/bf7b3aa8_457.wav|oniːtɕaɴ, tɕinami mo ufo kansokɯjoɯ no boɯeŋkʲo o kaɯ tame ni, mata meidosaɴ ni naʔte iː ka na?|277
117
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0f6fbea8/wav/0f6fbea8_0220.wav|oi, moɯ iː. omae no aiboɯ dʑimaɴ wa iːʔte no.|52
118
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1967ee53/wav/1967ee53_0238.wav|madʑo wa ne, hoɴ o jomɯ no ga sɯki naɴ daʔte.|71
119
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cc948b89/wav/cc948b89_1571.wav|fɯtsɯɯ no dʑiɕɯ kɯnɽeɴ naɽa ɯɽaniwa de jaɽeba iːdʑa nai.|423
120
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5d68aedf/wav/5d68aedf_0376.wav|daʔte, tomodatɕi naŋkaitaʔte nani mo kawaɽanai moɴ!|183
121
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_05.wav|sɯgata mo, koe mo, namae sae kawaʔte mo, tɕitose ga kɯɽeta kiboɯ wa, kienai.|480
122
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_06.wav|koɯfɯkɯ wa doko ni de mo koɽogaʔterɯdeɕo? wataɕi wa, dakʲoɯ ɕitakɯ naikedo?|480
123
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_01.wav|kɯɽaŋkɯ aːʔpɯ! teɴɕi no oɕigoto ɕɯɯrʲoɯ! niŋgeɴ ni modoɽoʔka naːɴ!|480
124
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_02.wav|bʲoɯdoɯ de itainaɽa, mɯkaɴɕiɴ de irɯ koto. katamɯkerɯ saki ga nakɯnactɕoɯ.|480
125
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_12.wav|kiboɯ wa megɯrɯ. tatoe donna ni sɯrɯdokɯ, itakɯtomo. soɽe o anata ga ɕimeɕite kɯɽeta.|480
126
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
127
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_07.wav|zensei to ka ɽaisei to ka, doɯ de mo iː. daʔte soɽe wa, ataɕi dʑa nai moɴ ne.|480
128
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_03.wav|owaɽi o koete.|480
129
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_01.wav|temɽiɴ wa itsɯ daʔte tsɯɽiaʔterɯ. kiboɯ to doɯtoɯ no ɕitsɯrʲa o motsɯ mono ga nani ka, ɕiʔterɯ? soɽe wa ne, zetsɯboɯ da jo.|480
130
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_02.wav|koɽe ga, owaɽi? naɽa, ɯɽeɕiː. daʔte, anata to no, tsɯgi no jakɯsokɯ.|480
131
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_11.wav|tsɯgi deaerɯ toki, wataɕitatɕi wa çitoɽi de wa nai, fɯfɯ, koɽe wa jogeɴ.|480
132
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_12.wav|josoɯ dekinai kimitatɕi de ite jo. tensai ɕikitɕaɴ no josoɯ o, ɯɽagiɽi tsɯzɯkete.|480
133
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
134
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_04.wav|teɴɕi no komoɽi ɯta o.|480
135
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_11.wav|kimi o ɽakɯeɴ e to izanaʔte agerɯ joɴ! ima o tanoɕimi kiʔta ato o, tɕitose tɕanto.|480
136
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_04.wav|osoɽa no ɯeʔte, donna nioi naɴ daɽoɯ ne? ɕiɽeɴ de, mɯkiɕitsɯ de, kaoɽi mo nai no ka na?|480
137
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_05.wav|tokɯbetsɯ ni narɯ no.|480
138
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_06.wav|sajoɯnaɽa, tɕitose. wataɕi ni owaɽi o oɕiete kɯɽeta, tokɯbetsɯ na çito.|480
139
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
140
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_05.wav|çito ni kaŋka saɽerɯ no, betsɯ ni, ɕiɽoi dʑa nai jo. meʔta ni sono aite ga inai dake.|480
141
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_3_01.wav|çito no inotɕi ni kiseɴ ga nai nante, sonna no kiɽeigoto. sɯkɯɯ çito wa eɽabɯɕi, dʑibɯɴ no inotɕi nante taika de saɕidasenai. de mo kimi wa, saɕidaɕite ɕimai soɯ da jo ne. oçito joɕi dakaɽa.|480
142
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_04.wav|moɴ no soto e okɯɽidasenakɯte, gomeɴ nasai. moɴ no saki no koto, kondo oɕiete ne.|480
143
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_08.wav|ɕigo no sekai de tanoɕikɯ jarɯɴ dʑa nakɯte, ima o kiɽitorɯ, fɯɴ, dekirɯ jo.|480
144
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_03.wav|saigo wa nai no desɯ. negaeba doko made mo, fɯtaɽi no tokɯbetsɯ wa tsɯzɯkɯkaɽa.|480
145
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_07.wav|arɯdʑi ni wa kaɴɕa o, wataɕi no tokɯbetsɯ o minogaɕite kɯɽeta, anata to, deaeta.|480
146
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_10.wav|hakoni wa neː, wataɕi wa iɽanai na. teitaiʔte, dʑiɴrɯi to aiɕoɯ ga warɯiɴ daʔte.|480
147
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_03.wav|kempiɴ, dʑiʔkeɴ, a, ɕijokɯɴ ga todoitetaɴ daʔta, ɽabo ni komaɽoːʔto.|480
148
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
149
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_5_01.wav|nakanaide, anata no çitomi ga, namida de nagaɽete ɕimaɯ, soɽe wa, koɽe kaɽa kiboɯ dake o mitsɯkerɯ tame no oɯseki, wataɕi o, seɴ neɴ saki no miɽai de mo, mitɕibiːte kɯɽerɯ çikaɽi, saigo wa, hohoemi de.|480
150
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_09.wav|fɯɽeɽaɽenakɯte mo wakarɯ, atatakasa, daʔte, zɯʔto, anata ni aʔta.|480
151
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_10.wav|zɯʔto omoʔteta, kiɽei na çitomi no iɽo daʔte, inotɕi no iɽo, naɴ da ne.|480
152
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_03.wav|kimi ga ima te ni moʔterɯ no wa, kiboɯ? soɽe to mo zetsɯboɯ? doʔtɕi de mo iː ka. wataɕi wa moɯ te niːɽeterɯkaɽa, hoɕikaʔtaɽa wakete agerɯ jo. katahoɯ wa, kimi ni moɽaʔta mono dakedo.|480
153
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_01.wav|namida. kanaɕikɯte, tsɯɽakɯte, kɯrɯɕiː mono. koɽe kaɽa no anata ni wa, çitsɯjoɯ nai mono.|480
154
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_09.wav|tobɯ no mo keʔkʲokɯ dʑibɯɴ no tɕikaɽa daɕi, de mo tsɯkaɽetɕaɯɕi. hakonde moɽaɯ no ga itɕibaɴ!|480
155
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_08.wav|sɯbete wa bʲoɯdoɯ, koɯhei ni fɯɽisosogɯ, ai mo, inoɽi mo, hontoɯ wa, anata ni daʔte.|480
156
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_02.wav|kotoba ni sɯrɯto nante dʑiːpɯ naɴ daɽoɯ ne. konna mono ni, ataɕitatɕi wa fɯɽimawasaɽete, çiʔɕi ni naʔterɯ, me ni mienai sei de, te ni ɕite mo dʑiʔka ga naikaɽa, jokei ni ne.|480
157
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_08.wav|kaisɯiʔte, konna ni ɕio kaɽakaʔta?, naitenai jo, kantɕigai, kiɴɕi.|480
158
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_07.wav|jorɯ no sampo, okoɽanaide ne. iʔɕo naɽa iː deɕo? ɕimpai naɽa, mihaʔtete.|480
159
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_05.wav|kikaɽetenakɯte jokaʔta. moɕi ka ɕitaɽa, toʔkɯ ni ɕiɽaɽeteta ka mo dakedo.|480
160
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_03.wav|moɕi nigete mo, doko made mo oʔte kɯrɯ ki deɕoɯ? jada jada, oni goʔko wa tanoɕiːkedo, zɯʔto wa tsɯkaɽetɕoɯ. dakaɽa, iʔɕo niːrɯ jo. toːi owaɽi no çi made.|480
161
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_02.wav|anna ni fɯkakɯ, soko ga naiʔte kandʑiteta no ni, sonna koto, nakaʔtaɴ da.|480
162
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_06.wav|kiɽei na ɕigikata nante nai, ka, soɽe de mo, jɯme kɯɽai mite itakaʔta jo.|480
163
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_10.wav|nɯɽeterɯ jo, iː no? sonna koto mo ki ni naɽanai kɯɽai, hoŋki de.|480
164
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_07.wav|otoɕi joɽiːki kiɽeterɯ, oːwatesɯgi, zeɴrʲoɯ da jo neː, soɯ iɯ tokoɽo.|480
165
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_05.wav|kimi wa itsɯ mo itsɯ mo, ataɕi o nigaɕite kɯɽenai jo ne. doko iʔte mo sa.|480
166
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_02.wav|daɽe mo kaɽe mo ga ɽisoɯ o oɕitsɯketerɯ. kimi mo soɯ, tɕitose tɕaɴ daʔte. soɽe de mo kimitatɕi wa, ɽisoɯ dʑa nai wataɕi de mo, tsɯkiaoɯ to ɕiterɯ.|480
167
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
168
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_08.wav|taorɯ kaisanakʲa, wataɕi no koto o wasɯɽeɽaɽenakɯ narɯ koɯsɯi, pɯɕɯ!!|480
169
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_04.wav|ɯmi ni ɕizɯmɯki wa, fɯɯɴ. saː, doɯ daʔta ka naː, oki ni nagasaɽetɕaʔta mitai.|480
170
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_12.wav|tsɯmetai, kaɽa da no ɕiɴ ga hjoɯteŋka mitai, dakaɽa, tamete, donna hoɯhoɯ de mo.|480
171
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
172
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_02.wav|kono ato wa motɕiɽoɴ omimai da joɴ! jowajowa kʲɯɯketsɯkisaɴ no tokoɽo ne.|480
173
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_3_01.wav|dʑɯndo no takai mono wa, soɽe dake moɽoiɴ da jo. joɽokobi mo, kanaɕimi mo, zembɯ zembɯ, kowaɽetɕaɯ. dakaɽa çito wa, mazaʔte ɯme aɯ no. kowaɽete ɯmaɽeta, sɯkima o.|480
174
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_5_01.wav|naɴ de, konna tokoɽo ni, wazawaza kita no? iː ko wa nerɯ dʑikaɴ de, ɕikitɕantatɕi wa, mite no toːɽi warɯi ko de, fɯfɯ, fɯɕigi, hoɴɕiɴ ɕitɕoʔta. kimi wa, okoʔterɯ no ni.|480
175
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
176
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_03.wav|gps o tsɯketeɽeba mendoɯ na ɽenɽakɯ wa iɽanai ka mo neːnʲɯɯ, kaɴɕi ɕakai.|480
177
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_12.wav|itsɯ mo doːɽi de gaʔkaɽi, ɕitenai? fɯɯɴ, itsɯ mo no ataɕi de iːɴ da.|480
178
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_01.wav|kono dʑiki no ɯmiʔte, zenzeɴ samɯkɯ naiɴ da neː. mɯɕiɽo nama noɽokɯte, betobetoː.|480
179
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_01.wav|fɯtsɯɯ dʑa nai ataɕi wa, iɽanakaʔta. tokɯbetsɯ na kagakɯɕa dʑa nai ataɕi wa, papa no ɕikai kaɽa hazɯɽeta. kawaiː dake dʑa nai ataɕi wa, aidorɯ dʑa iɽaɽenai.|480
180
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_01.wav|mabɯɕiː jo. sonna çikaɽi ni ateɽaɽetaɽa, toketenakɯ nactɕaɯ.|480
181
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_04.wav|teɴɕi no komoɽi ɯta o.|480
182
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_11.wav|aː, kɯʔtsɯiterɯto aʔtakaː. koɽe wa kimi no, jasaɕisa no ɯndo ka na?|480
183
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_06.wav|kaketsɯketa no? tɕitose tɕaɴ mo, kimi mo. doɯ ɕite soɯ, oseʔkai.|480
184
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_03.wav|modoʔte kitɕaʔta, çikaɽi no sekai, konna ni, aʔtakakaʔtaɴ da ne.|480
185
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_09.wav|ɯwa! mabɯɕiː! moʔto haʔkoɯrʲoɯ o osaeteː! dʑiɴrɯi ni wa mada hajai!|480
186
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_09.wav|manɯke na kao, ɕiterɯ, wataɕi mo? fɯfɯʔ, okaɕiː, aidorɯ na no ni ne.|480
187
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_10.wav|ameɽika dʑikomi no sɯieihoɯ de, tɕitose tɕaɴ no çitoɽi ja fɯtaɽi, ɕizɯmɯ—!|480
188
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
189
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_05.wav|tokɯbetsɯ ni narɯ no.|480
190
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_03.wav|owaɽi o koete.|480
191
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_04.wav|heɴ na no? ɯmi wa, wataɕi o sɯikonde kɯɽerɯ hazɯ daʔta no ni, kʲoçi ɕite.|480
192
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_11.wav|ɕikaɽaɽerɯ no wa kiɽai. de mo, kʲoɯ wa, ɯɯɴ, naɴ de mo nai jo.|480
193
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine01.wav|haha, çiʔɕi da ne. ataɕi naŋka no tame ni. sonna toko hadʑimete mita. sonna fɯɯ ni, koe o aɽagerɯ toko. itsɯ mo itsɯ mo, kimi wa daɽe ka no tame ni. honto, heɴ na no.|480
194
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine02.wav|aʔ, kizɯtsɯita ɕikitɕaɴ o okoʔte kita çito da! ɽi fɯdʑiɴ! taoɽeta tɕitosetɕaɴ o tasɯketa no ni!, ma, ɯmi ni sasoʔta no wa—taɕi dakedo.|480
195
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine03.wav|ɯɯɴ, naɴ daɽoɯ, jokɯ wakannai. kietakaʔta no ka na. kʲɯɯ ni, doɯ de mo jokɯ nactɕaʔta. kimi to tɕitosetɕaɴ ga, naɴ ka, toːkɯte.|480
196
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine04.wav|"mɯmɯmɯ, kimi made soɯ jɯɯ koto iɯ. "" haihai, samiɕikaʔtandesɯɯ. ɯ eːɴ, kanaɕikaʔta joː kamaʔte kɯɽenakɯte itɕi ."" koɽe de iː?"|480
197
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|aː, iː jo. hanɽoɴ wa motometenai. ataɕi ga hoɕiː no wa, kampeki na haɴɕoɯ. kimi no kasetsɯ o ɕoɯmei ɕite misete, owaɽi no çi made ni.|480
198
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|kiboɯ wa—rɯ. akaɽi wa, kiʔto kie taɽi ɕinai. anata ga wataɕi ni mitɕi o ɕimeɕite kɯɽetakaɽa, kiʔto majowanaide mezaserɯ. owaɽi no saki no, çikaɽi o.|480
stylekan/Data/mg_valid.txt ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1b74d271/wav/1b74d271_045.wav|erɯfʲɴ no rɯɴ o tsɯkaʔte, anata ni jasaɕiː sekai o tsɯkɯɽinaoseba iː no.|37
2
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_1309.wav|moɯ sono koɽo no kiokɯ wa hotondo naikeɽedo, wataɕi mo niŋgeɴ daʔta.|385
3
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6e22f5cd/wav/6e22f5cd_267.wav|soʔka, mada nomerɯ joɯ ni naʔta baʔkaɽi ka. kondo oanesaɴ to, baː de mo iʔte mirɯ?|236
4
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201055/kanade_voice_201055_1_11.wav|aɽa, moɯ çitotsɯ? pɯɽodʲɯɯsaːsaɴ ga onedaɽi nante. fɯfɯ!!|482
5
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_0264.wav|masaja, koɽe de kʲoɯ no ɽeɴɕɯɯ wa oçiɽaki da jo neː.|141
6
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/22d1fa2f/wav/22d1fa2f_362.wav|dʑoɯdaɴ da, oɽe wa haneda ni hanaɕi ga arɯɴ da jo.|84
7
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100395/miku_voice_100395_2_05.wav|pːtɕaɴ ga odoɽokɯ kɯɽai! sekɯɕiː ni naʔte miserɯnʲa!|487
8
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_1574.wav|gomeɴ nasai, neːtɕaɴ no iɯ toːɽi desɯ.|306
9
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/a0fd12d7/wav/a0fd12d7_2005.wav|kaɽeɽa kanodʑoɽa no soɯɕitsɯ kaɴ wa, ika bakaɽi ka. ɽoʔkɯnaʔtosama o ɯɕinaʔta waɽewaɽe idʑoɯ no kanaɕimi no eɴ ni, irɯ no de wa naideɕoɯ ka.|333
10
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2430.wav|ija jo, soɽe mo ija! wataɕi ga mae de, ɯɯ ga ɯɕiɽo, zɯʔto soɯ ɕite kitadʑa nai! anta wa, damaʔte wataɕi no ɯɕiɽo o tsɯite kɯɽeba iː no!|91
11
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1288.wav|de mo, kaɽadatɕɯɯ boɽoboɽo de, iki o sɯrɯ no ga jaʔto deɕita. koko de wataɕi wa owaɽi naɴ da to, naɴ to nakɯ, wakaɽimaɕita.|174
12
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/631b0413/wav/631b0413_015.wav|jokaʔtaɽa, kʲoɯ mo iʔɕo ni, iː desɯ ka?|151
13
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201316/karen_cgss_voice_201316_2_01.wav|nita joɯ na fɯkɯ de mo kaŋkei nai! fɯkɯ wa hoɕiːkaɽa kaɯ! idʑoɯ!|488
14
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_3235.wav|jaɽa moɯ, mada oʔkikɯ narɯ no? ma, tɕoʔto wa te kɯgeɴ ɕinasaɴ joː.|319
15
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f04ee070/wav/f04ee070_0345.wav|kaɯrɯkosaɴ to baʔkaɽi nakajasa soɯ ni ɕitɕaʔte soɯ.|461
16
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/79b0d13c/wav/79b0d13c_437.wav|ɴ? ɕitsɯ wa, saʔki kaɽa ɽe no kimotɕi josa soɯ na kao o mitetaɽa...kaɽada ga, atsɯkɯ naʔte...|267
17
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/sakura_moyu/30/30000020.wav|kanodʑo wa, dʑibɯɴ o gisei ni ɕi, bokɯɽa no koto o, jorɯ no sekai o mamoɽoɯ to ɕite irɯ.|518
18
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1017.wav|aɽaɽa ɽike, naŋge ni fɯjɯɯ soɯ?|475
19
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3e02a4dc/wav/3e02a4dc_916.wav|dʑa a, tsɯitectɕa ikenaiɴ desɯ kaː?|213
20
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3c58f1c4/wav/3c58f1c4_1376.wav|dʑitsɯ wa wataɕi, paːɴ!ʔte oːkina oto ga sɯrɯ, ɯtɕiage hanabi ga nigate naɴ desɯ.|117
21
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0480.wav|dʑiʔsai no nami wa moʔto dʑoɯge ɯndoɯ mo hageɕiːɴ dakaɽa.|449
22
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301206/jou_mika_voice_301206_1_10.wav|me, tsɯmɯʔte agejoɯ ka? daʔte hoɕiːdeɕoɯ? kisɯ matɕigao?|485
23
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/14_arisu__0015_(Vocals)/14_arisu__0015_(Vocals)_chunk153.wav|baɽentaiɴ de mo, itsɯ mo toːɽi desɯ.|495
24
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cec410a1/wav/cec410a1_328.wav|dʑosei no ɕaɕiɴ ga noʔte irɯ saito o, ɕiteki na joɯto de etsɯɽaɴ ɕite itaɽaɕiː.|434
25
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/010/S010_A_0004.wav|ke!! moɯ tɕoʔto de kɯmi no jaɽoɯ o ɯtɕitoʔte jaɽeta no ni jo!|537
26
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0253acb6/wav/0253acb6_869.wav|motomoto, koɯ ɕite gaʔkoɯ ni koɽaɽeɽeba, soɽe de mokɯhjoɯ taʔsei daʔta koto dakaɽa.|28
27
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_0305.wav|ano sa, sono, moʔto te ga kakarɯ mono daʔtaɽa, tabɯɴ tetsɯdaʔte moɽaʔta to omoɯ jo.|306
28
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ee093a4f/wav/ee093a4f_0555.wav|somosomo anata no koɯgi no ɕikata ga ki ni haiɽimaseɴ. iedeʔte naɴ desɯ ka? zeʔsaɴ iedetɕɯɯ no wataɕi ga ieta giɽi de wa aɽimaseŋga, ɕoɯgakɯsei desɯ ka?|371
29
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/90fa05fd/wav/90fa05fd_1203.wav|daga, soɯ toɽaerɯ no ga itɕibaɴ, fɯ ni otɕirɯ.|274
30
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk297.wav|geikaiʔte no wa, kamisama ni toʔte meataɽaɕiːɴ da jo. ija, meataɽaɕi ka wa neː ka, kawaɽibaɽi ɕineː ze.|473
31
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/302/S302_E_0006.wav|sɯgokaʔta.|532
32
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk1483.wav|nado to...|477
33
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4ce0075b/wav/4ce0075b_1410.wav|e, masaka soɽe ni saŋka ɕiɽo to kaiwanaide ne. naɴ no dʑɯmbi mo ɕitenaiɕi, oːzei no çito no mae ni derɯ to ka mɯɽi dakaɽa ne.|187
34
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5e5993c5/wav/5e5993c5_615.wav|ne, neː, moɯ iʔkai kisɯ ɕite.|120
35
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ad28b91b/wav/ad28b91b_1315.wav|dʑibɯɴ de mo mɯtɕa o iʔterɯ no wa wakaʔterɯ. daga asaçi dakaɽa kiːte mitaɴ da.|343
36
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/35d789d2/wav/35d789d2_138.wav|te wa soko de iːɴ desɯ ka? koɕi ni mawasanakɯte iːɴ desɯ ka?|51
37
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0264.wav|otakɯ no ofɯrɯ o kaɕite itadakenai kaɕiɽa? osamɯ ɕibaɽakɯ sentoɯ kajoɯ wa mɯɽi dakaɽa.|345
38
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0ee82b61/wav/0ee82b61_0232.wav|jake ni sawagaɕiː hade na sɯɯtsɯ no wakai otoko no çito ɽaɕiːɴ dakedo, sono çito ni mo oɽei o iwanakɯtɕa ne.|112
39
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f6c4b7b2/wav/f6c4b7b2_0424.wav|dʑaː, paiɽoʔto aoikɯɴ de iːdʑa nai! nanni mo mondai nai wa!|439
40
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk201.wav|ne fɯsokɯ ni naʔtɕaimasɯ...ɯɯ...|483
41
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_0187.wav|oːgeza desɯ ne, taiɕita koto aɽimaseɴ. mada ɕiɽabeterɯ dake desɯɕi.|284
42
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36ea135b/wav/36ea135b_1952.wav|ima kazɯetakedo, ɕita no kaidaɴ mo dʑɯɯ joɴ da ne. kʲɯɯkoɯɕa no kaidaɴ, zembɯ dʑɯɯ joɴ?|60
43
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/72921df9/wav/72921df9_190.wav|kao to namae, neː?|228
44
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bca2cfac/wav/bca2cfac_1044.wav|moɕi ka ɕite, takɯ ja to wakaba iʔɕo daʔta no kai?|320
45
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9d33dced/wav/9d33dced_811.wav|ɕikaɕi ɽogɯiɴ ɕite irɯ idʑoɯ, fɯtatsɯ no sekai de iɕiki wa tsɯnagaʔte irɯ.|275
46
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/58a2282f/wav/58a2282f_0589.wav|tobisaɽasaɴ no koto ga ɕimpai na no wa wakaɽimasɯga, sono, ɕiai ga owaʔta tɕokɯgo desɯɕi, satoɯiɴ sempai no koto mo ki ni kakete hoɕiː desɯ.|186
47
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/54ba80a8/wav/54ba80a8_0026.wav|zɯɯzɯɯ ɕi, to omowanai no?|153
48
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk619.wav|mɯɽidʑi wa ɕinaikeɽedo, nani ka aʔta toki, çitoɽi de naɴ to ka ɕijoɯ to omoctɕa dame jo. anata wa imada ni sono keikoɯ ga tsɯjoikaɽa.|475
49
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c593ed00/wav/c593ed00_0583.wav|moɯ, hoʔtoite. koko de fɯjɯ o okosɯkaɽa.|466
50
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1068.wav|soɽe ni, ima wa kono fɯkɯ igai ni kigae ga aɽimaseɴ no de.|174
51
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2704.wav|aɽe? wataɕi no koto ɕiʔterɯ?|91
52
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_521.wav|fɯɴ, itsɯ mo nigijaka na komɯsɯme da ze.|5
53
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk643.wav|warɯi koto wa iwaɴ, aɽawaɽetaɽa saʔsato kɯɽete jaɽe.|476
54
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_2513.wav|ɯ, koɽe, sɯgokɯ kimotɕiː— ka mo. sɯgoi, naɴ da ka hazɯkaɕiː oto da ne.|261
55
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0b8ae160/wav/0b8ae160_0536.wav|a, ano...dʑɯɯdaiʔte, daidʑoɯbɯ naɴ desɯ ka?|94
56
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0871.wav|saiɕo kaɽa sono tsɯmoɽi. kikeɴ na ɽei dakaɽa, fɯtaɽi ni makaserɯ tsɯmoɽi wa nai.|385
57
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1292.wav|heː, jaʔte mijoɯ. eʔto,ɴ?|474
58
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/b1921b3f/wav/b1921b3f_0549.wav|oni ni kaete mo, aisɯrɯ mono to soitogetai. soɯ kaŋgaeta mono ga, oni o tsɯkaʔta kono çidʑɯtsɯ o amidaɕita.|294
59
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_02/Kamiya_Hiroshi_02_chunk905.wav|ɕinseki ne.|478
60
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100410/momoka_voice_100410_4_02.wav|dakedo, ima wa ɯtaʔte odoʔte, tanoɕinde! haɕitanai ka mo to omoɯ kɯɽai, oːkina koe de waɽaʔte...fɯfɯ!! ojoɯgi wa warɯkɯte mo, tanoɕisa de iʔpai desɯ wa!|489
61
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/76981655/wav/76981655_0566.wav|mada hjakɯ maɴ kaitɕɯɯ sɯrɯ totɕɯɯ daɕi, mata kaisɯɯ wasɯɽeta ne. haiː—tɕi!|110
62
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d250131/wav/6d250131_647.wav|kansai no oteɽa ni wa jokɯ arɯɽaɕiːkedo, koko no wa ɕiɽanai.|202
63
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36d0de98/wav/36d0de98_590.wav|soɽaoka, nani ka wakaʔta no ka?|29
64
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/d39532a8/wav/d39532a8_1523.wav|dakaɽa kaeʔte kite kɯɽetetaɴ daʔte.|384
65
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/a93da23d/wav/a93da23d_0512.wav|maː, nindʑa ɕɯʔɕiɴ no itɕizokɯ de arɯ koto wa çitei ɕimaseŋga.|317
66
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e3ee19b2/wav/e3ee19b2_127.wav|toɯbɯɴ wa koko o tsɯkawasete moɽaoɯ ze. kiːtenai no ka?|370
67
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/33e59069/wav/33e59069_045.wav|çitoɽi daʔte, keʔɕite warɯi mono de wa nai no.|40
68
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100253/momoka_voice_100253_1_12.wav|fɯɯɯ...dʑibɯɴ no itaɽanasa ga, kɯjaɕiː...pɯɽodʲɯɯsaːtɕama...|489
69
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_0662.wav|ano mɯsɯme wa jokɯ wakaɽaɴ na.|261
70
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8f8acabb/wav/8f8acabb_426.wav|dʑasɯko wa bɯ tsɯ zoɯ to ka e bakaɽi ki ni haiʔteta ne. soɽe to kimono! hommono no maikosaɴ mita toki no dʑasɯko no teɴɕiɴ no agaɽi gwai, sɯgokaʔta!|244
71
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_0673.wav|fɯto naŋge nakɯ, aɕi o tometa.|143
72
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_1022.wav|moɯ, sonna miɽai mo kimaseɴ jo.|284
73
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36ea135b/wav/36ea135b_2487.wav|ɯɯɴ, jaɕiɽokɯɴ no kaŋgae mo ataʔterɯ. nozomi sempai wa, kanzeɴ ni wasɯɽetɕaʔterɯ wake dʑa nai.|60
74
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_4015.wav|tɕoʔto josanteki ni ne, kitai bɯmoɴ no hoɯ de mo joteigai no josaɴ ga kakaɽi soɯ na no jo.|319
75
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/598c113f/wav/598c113f_109.wav|iɽaʔɕai! ɕɯɯgakɯ rʲokoɯ wa, tanoɕikato desɯ ka?|194
76
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5d68aedf/wav/5d68aedf_0376.wav|daʔte, tomodatɕi naŋkaitaʔte nani mo kawaɽanai moɴ!|183
77
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0934.wav|jɯɯ wa so no...ɯwaki to ka wa ɕinai to omoʔterɯkedo...|449
78
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8e1072e6/wav/8e1072e6_0195.wav|soɽe o keʔte koko ka jo? soɽe mo ataɕi ni ɽenɽakɯ naɕi ka jo?|247
79
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/79b0d13c/wav/79b0d13c_434.wav|jokaʔta ɕino ni kiːte mo doɽe mo niaʔterɯʔte iɯ dake de kimaɽanakaʔtakaɽa.|267
80
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_2379.wav|tɕigaɯ, kaʔtaɽi make taɽi sɯrɯ no wa ataɕi!|141
81
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/02153faa/wav/02153faa_484.wav|ɯɯɴ, bokɯ ga ɕirɯ kagiɽiːtsɯ mo konna kandʑi da na. hantsɯki mae ni tɕoʔto aɽe kimi no çi ga aʔtakedo, soɽe dake da na.|20
82
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_0845.wav|moɯ iʔkai dake kikɯ wa? minna wa doɯ?|319
83
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/282cfa8c/wav/282cfa8c_1042.wav|ima wa magarɯ...itakaʔtakedo, magarɯ joɯ ni naʔtaɴ da jo.|16
84
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/773a4156/wav/773a4156_2205.wav|moɯ iʔɕoɯ kakaʔte mo kaesenai ka mo ɕiɽenai. soɽe kɯɽai toko ni wa kaɴɕa ɕiterɯ.|214
85
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1ed99743/wav/1ed99743_557.wav|ɴ. kimitatɕi no bɯtɕiageta, daikʲɯɯɕɯɯsl koɯsoɯ. bokɯ wa, aɽe ni me ga arɯ to mikonde irɯ.|1
86
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/00163dc9/wav/00163dc9_1842.wav|aɽe wa, daɽe kaɽa no okɯɽimono daʔta no ka naʔte.|69
87
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ac0e6660/wav/ac0e6660_0269.wav|soɽe naɽa sa, kʲoɯ wa koɽe kaɽa aiterɯ?|246
88
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1967ee53/wav/1967ee53_0238.wav|madʑo wa ne, hoɴ o jomɯ no ga sɯki naɴ daʔte.|71
89
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c81c2b4d/wav/c81c2b4d_231.wav|ʔte ka, seitoɯ ɽitsɯ ga takai no wa, hatsɯnetɕaɴ ga adobaisɯ ɕite kɯɽerɯkaɽa de...a, dʑikaɴ daidʑoɯbɯ?|400
90
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/05a45f91/wav/05a45f91_131.wav|negaɽakɯba ɕɯ ga ɕɯkɯfɯkɯ ɕi mamoʔte kɯdasaimasɯ joɯ ni.|70
91
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cc948b89/wav/cc948b89_1571.wav|fɯtsɯɯ no dʑiɕɯ kɯnɽeɴ naɽa ɯɽaniwa de jaɽeba iːdʑa nai.|423
92
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/37c014a1/wav/37c014a1_0474.wav|soɯ ɕi te kɯɽerɯ to kaɴɕa sɯrɯ. moɯ iʔkai, mɯne o jɯɽaɕite agerɯ.|6
93
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0579.wav|soɯ ne, fɯkɯsɯɯ no dansei to kaŋkei o moʔte inai kagiɽi wa...|385
94
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2594.wav|to, kamawazɯ ni hanaɕi o tsɯzɯketa.|477
95
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f4169f28/wav/f4169f28_193.wav|dakaɽa soɯ iɯ imi dʑa nakɯte.|386
96
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bca2cfac/wav/bca2cfac_0882.wav|dʑimi na kʲampasɯ ɽaifɯ dʑa jondete tsɯmaɽanaikaɽa. tsɯmaɽanai episoːdo wa omoɕiɽokɯ, omoɕiɽoi no wa saɽani omoɕiɽokɯ. taɽinakeɽeba itɕi kaɽa tsɯkɯʔta.|320
97
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9ee921f6/wav/9ee921f6_0012.wav|soɯ naɴ desɯ, çito o atsɯmejoɯ to sendeɴ ɕite irɯ mitai de.|347
98
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MRD12.wav|kakɯbetsɯ desɯ. pɯɽodʲɯɯsaː kaɽa itadakɯ, kono iʔpai...!|563
99
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5e85bf92/wav/5e85bf92_1326.wav|moɯ...gomeɴ ne, kaɽa kaɯ tsɯmoɽi dʑa nakaʔtaɴ dakedo...|217
100
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4ded9fa1/wav/4ded9fa1_0342.wav|fɯfɯ, de wa toɽiaezɯ, hoːpɯ to jɯki no meija no tame ni, fɯtsɯɯ no fɯkɯ da to ɕindʑite agemaɕoɯ ka.|129
101
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk1453.wav|ɯmaɽeotɕita tsɯgi no ɕɯŋkaɴ, wagahai wa wagahai o ɯmiotoɕita botai o mita.|475
102
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e5d53ec4/wav/e5d53ec4_049.wav|neː, tanteisaɴ ɕiʔterɯ? koko saikiɴ, kanda de jakeɴ no çigai ga aitsɯiteta no.|460
103
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk817.wav|sasɯga ni sɯgɯ ni nadekoɯ nadekoɯ da to, kampa ɕita joɯ desɯ. ma, iʔte ɕimaeba, maegami ga nakɯnaʔta dake desɯkaɽa, jokɯ miɽeba wakarɯɴ deɕoɯ ne.|473
104
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_2155.wav|hontoɯ da...|553
105
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_1077.wav|ima kaɽa me o toːɕite, haijakɯ kimete, jomiawase ɕite, ɽokɯoɴ ɕite, oɯbo sɯrɯɴ deɕo?|143
106
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/fa4704bf/wav/fa4704bf_119.wav|konna dʑikaɴ ni konna baɕo ni jobidasɯ nante, johodo kikaɽetakɯ nai hanaɕi na no ne.|387
107
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_0310.wav|nʲa haha! jaʔpa sempai omoɕiɽoiʔsɯ!|553
108
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/74eb72c7/wav/74eb72c7_0253.wav|so, sonna koto nai jo, gɯɯzeɴ dʑa nai ka na.|184
109
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/efb922ca/wav/efb922ca_0770.wav|ɯfɯfɯ, baːrɯ o fɯɽimawasɯkaɽa, moɯ tɕoʔto ɯɕiɽo ni sagaʔtete.|409
110
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3ec57102/wav/3ec57102_137.wav|wataɕi no tame ni, oɕokɯdʑi o tɕɯɯdaɴ sɯrɯ çitsɯjoɯ wa aɽimaseɴ. doɯzo, oki ni nasaɽazɯ.|144
111
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_240.wav|naː, nosejasɯi dʑaɽo.|5
112
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0339.wav|wataɕi mo itɕi do çinosakasaɴ no tokoɽo ni kao o daɕite mirɯ wa. kanodʑo ga ɕiŋgakɯ ɕita toki no fɯkɯzatsɯ na dʑidʑoɯ to ka, koʔtɕi kaɽa setsɯmei ɕite oita hoɯ ga iː to omoɯɕi.|345
113
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bb6ac6f1/wav/bb6ac6f1_1043.wav|jɯɯkotɕaɴ seidʑɯɴha da jo! baɽibaɽi ɯki dʑa nai?|263
114
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/25714f7a/wav/25714f7a_1007.wav|maː soɯ iɯnaɽa damaʔterɯsɯkedo.|95
115
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bf7b3aa8/wav/bf7b3aa8_457.wav|oniːtɕaɴ, tɕinami mo ufo kansokɯjoɯ no boɯeŋkʲo o kaɯ tame ni, mata meidosaɴ ni naʔte iː ka na?|277
116
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/sakura_moyu/06/06005910.wav|sɯgoi ne.|520
117
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d250131/wav/6d250131_427.wav|kaimeɴ ga, naɴ ka oiɕi soɯ na iɽo ɕiterɯ ne!|202
118
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_0621.wav|onadʑi dʑa nai jo. kotoɕi wa, ɕifɯki otokosaɴ to ka jobɯ joteinaɴ dakaɽa.|306
119
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d19f294/wav/6d19f294_381.wav|desɯ ne. soɯ dekirɯ nante mitoːɕi, sɯkɯnakɯtomo wataɕi wa tateɽaɽenai desɯ.|189
120
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0f6fbea8/wav/0f6fbea8_0220.wav|oi, moɯ iː. omae no aiboɯ dʑimaɴ wa iːʔte no.|52
121
+ /home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_05.wav|çito ni kaŋka saɽerɯ no, betsɯ ni, ɕiɽoi dʑa nai jo. meʔta ni sono aite ga inai dake.|480
stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_chara_198/ranko_chara_198.acb.tmp ADDED
Binary file (405 kB). View file
 
stylekan/Data/moe_res/imas_split/shiki/shiki_fine/phonemizerASR_script_jpn.py ADDED
@@ -0,0 +1,804 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSpeechSeq2Seq
2
+ import csv
3
+ from tqdm import tqdm
4
+ from datasets import Dataset, Audio
5
+ import os
6
+ import torch
7
+ import re
8
+ import pykakasi
9
+
10
+ kana_mapper = dict([
11
+ ("ゔぁ","ba"),
12
+ ("ゔぃ","bi"),
13
+ ("ゔぇ","be"),
14
+ ("ゔぉ","bo"),
15
+ ("ゔゃ","bʲa"),
16
+ ("ゔゅ","bʲɯ"),
17
+ ("ゔゃ","bʲa"),
18
+ ("ゔょ","bʲo"),
19
+
20
+ ("ゔ","bɯ"),
21
+
22
+ ("あぁ"," aː"),
23
+ ("いぃ"," iː"),
24
+ ("いぇ"," je"),
25
+ ("いゃ"," ja"),
26
+ ("うぅ"," ɯː"),
27
+ ("えぇ"," eː"),
28
+ ("おぉ"," oː"),
29
+ ("かぁ"," kaː"),
30
+ ("きぃ"," kiː"),
31
+ ("くぅ","kɯː"),
32
+ ("くゃ","ka"),
33
+ ("くゅ","kʲɯ"),
34
+ ("くょ","kʲo"),
35
+ ("けぇ","keː"),
36
+ ("こぉ","koː"),
37
+ ("がぁ","gaː"),
38
+ ("ぎぃ","giː"),
39
+ ("ぐぅ","gɯː"),
40
+ ("ぐゃ","gʲa"),
41
+ ("ぐゅ","gʲɯ"),
42
+ ("ぐょ","gʲo"),
43
+ ("げぇ","geː"),
44
+ ("ごぉ","goː"),
45
+ ("さぁ","saː"),
46
+ ("しぃ","ɕiː"),
47
+ ("すぅ","sɯː"),
48
+ ("すゃ","sʲa"),
49
+ ("すゅ","sʲɯ"),
50
+ ("すょ","sʲo"),
51
+ ("せぇ","seː"),
52
+ ("そぉ","soː"),
53
+ ("ざぁ","zaː"),
54
+ ("じぃ","dʑiː"),
55
+ ("ずぅ","zɯː"),
56
+ ("ずゃ","zʲa"),
57
+ ("ずゅ","zʲɯ"),
58
+ ("ずょ","zʲo"),
59
+ ("ぜぇ","zeː"),
60
+ ("ぞぉ","zeː"),
61
+ ("たぁ","taː"),
62
+ ("ちぃ","tɕiː"),
63
+ ("つぁ","tsa"),
64
+ ("つぃ","tsi"),
65
+ ("つぅ","tsɯː"),
66
+ ("つゃ","tɕa"),
67
+ ("つゅ","tɕɯ"),
68
+ ("つょ","tɕo"),
69
+ ("つぇ","tse"),
70
+ ("つぉ","tso"),
71
+ ("てぇ","teː"),
72
+ ("とぉ","toː"),
73
+ ("だぁ","daː"),
74
+ ("ぢぃ","dʑiː"),
75
+ ("づぅ","dɯː"),
76
+ ("づゃ","zʲa"),
77
+ ("づゅ","zʲɯ"),
78
+ ("づょ","zʲo"),
79
+ ("でぇ","deː"),
80
+ ("どぉ","doː"),
81
+ ("なぁ","naː"),
82
+ ("にぃ","niː"),
83
+ ("ぬぅ","nɯː"),
84
+ ("ぬゃ","nʲa"),
85
+ ("ぬゅ","nʲɯ"),
86
+ ("ぬょ","nʲo"),
87
+ ("ねぇ","neː"),
88
+ ("のぉ","noː"),
89
+ ("はぁ","haː"),
90
+ ("ひぃ","çiː"),
91
+ ("ふぅ","ɸɯː"),
92
+ ("ふゃ","ɸʲa"),
93
+ ("ふゅ","ɸʲɯ"),
94
+ ("ふょ","ɸʲo"),
95
+ ("へぇ","heː"),
96
+ ("ほぉ","hoː"),
97
+ ("ばぁ","baː"),
98
+ ("びぃ","biː"),
99
+ ("ぶぅ","bɯː"),
100
+ ("ふゃ","ɸʲa"),
101
+ ("ぶゅ","bʲɯ"),
102
+ ("ふょ","ɸʲo"),
103
+ ("べぇ","beː"),
104
+ ("ぼぉ","boː"),
105
+ ("ぱぁ","paː"),
106
+ ("ぴぃ","piː"),
107
+ ("ぷぅ","pɯː"),
108
+ ("ぷゃ","pʲa"),
109
+ ("ぷゅ","pʲɯ"),
110
+ ("ぷょ","pʲo"),
111
+ ("ぺぇ","peː"),
112
+ ("ぽぉ","poː"),
113
+ ("まぁ","maː"),
114
+ ("みぃ","miː"),
115
+ ("むぅ","mɯː"),
116
+ ("むゃ","mʲa"),
117
+ ("むゅ","mʲɯ"),
118
+ ("むょ","mʲo"),
119
+ ("めぇ","meː"),
120
+ ("もぉ","moː"),
121
+ ("やぁ","jaː"),
122
+ ("ゆぅ","jɯː"),
123
+ ("ゆゃ","jaː"),
124
+ ("ゆゅ","jɯː"),
125
+ ("ゆょ","joː"),
126
+ ("よぉ","joː"),
127
+ ("らぁ","ɽaː"),
128
+ ("りぃ","ɽiː"),
129
+ ("るぅ","ɽɯː"),
130
+ ("るゃ","ɽʲa"),
131
+ ("るゅ","ɽʲɯ"),
132
+ ("るょ","ɽʲo"),
133
+ ("れぇ","ɽeː"),
134
+ ("ろぉ","ɽoː"),
135
+ ("わぁ","ɯaː"),
136
+ ("をぉ","oː"),
137
+
138
+ ("う゛","bɯ"),
139
+ ("でぃ","di"),
140
+ ("でぇ","deː"),
141
+ ("でゃ","dʲa"),
142
+ ("でゅ","dʲɯ"),
143
+ ("でょ","dʲo"),
144
+ ("てぃ","ti"),
145
+ ("てぇ","teː"),
146
+ ("てゃ","tʲa"),
147
+ ("てゅ","tʲɯ"),
148
+ ("てょ","tʲo"),
149
+ ("すぃ","si"),
150
+ ("ずぁ","zɯa"),
151
+ ("ずぃ","zi"),
152
+ ("ずぅ","zɯ"),
153
+ ("ずゃ","zʲa"),
154
+ ("ずゅ","zʲɯ"),
155
+ ("ずょ","zʲo"),
156
+ ("ずぇ","ze"),
157
+ ("ずぉ","zo"),
158
+ ("きゃ","kʲa"),
159
+ ("きゅ","kʲɯ"),
160
+ ("きょ","kʲo"),
161
+ ("しゃ","ɕʲa"),
162
+ ("しゅ","ɕʲɯ"),
163
+ ("しぇ","ɕʲe"),
164
+ ("しょ","ɕʲo"),
165
+ ("ちゃ","tɕa"),
166
+ ("ちゅ","tɕɯ"),
167
+ ("ちぇ","tɕe"),
168
+ ("ちょ","tɕo"),
169
+ ("とぅ","tɯ"),
170
+ ("とゃ","tʲa"),
171
+ ("とゅ","tʲɯ"),
172
+ ("とょ","tʲo"),
173
+ ("どぁ","doa"),
174
+ ("どぅ","dɯ"),
175
+ ("どゃ","dʲa"),
176
+ ("どゅ","dʲɯ"),
177
+ ("どょ","dʲo"),
178
+ ("どぉ","doː"),
179
+ ("にゃ","nʲa"),
180
+ ("にゅ","nʲɯ"),
181
+ ("にょ","nʲo"),
182
+ ("ひゃ","çʲa"),
183
+ ("ひゅ","çʲɯ"),
184
+ ("ひょ","çʲo"),
185
+ ("みゃ","mʲa"),
186
+ ("みゅ","mʲɯ"),
187
+ ("みょ","mʲo"),
188
+ ("りゃ","ɽʲa"),
189
+ ("りぇ","ɽʲe"),
190
+ ("りゅ","ɽʲɯ"),
191
+ ("りょ","ɽʲo"),
192
+ ("ぎゃ","gʲa"),
193
+ ("ぎゅ","gʲɯ"),
194
+ ("ぎょ","gʲo"),
195
+ ("ぢぇ","dʑe"),
196
+ ("ぢゃ","dʑa"),
197
+ ("ぢゅ","dʑɯ"),
198
+ ("ぢょ","dʑo"),
199
+ ("じぇ","dʑe"),
200
+ ("じゃ","dʑa"),
201
+ ("じゅ","dʑɯ"),
202
+ ("じょ","dʑo"),
203
+ ("びゃ","bʲa"),
204
+ ("びゅ","bʲɯ"),
205
+ ("びょ","bʲo"),
206
+ ("ぴゃ","pʲa"),
207
+ ("ぴゅ","pʲɯ"),
208
+ ("ぴょ","pʲo"),
209
+ ("うぁ","ɯa"),
210
+ ("うぃ","ɯi"),
211
+ ("うぇ","ɯe"),
212
+ ("うぉ","ɯo"),
213
+ ("うゃ","ɯʲa"),
214
+ ("うゅ","ɯʲɯ"),
215
+ ("うょ","ɯʲo"),
216
+ ("ふぁ","ɸa"),
217
+ ("ふぃ","ɸi"),
218
+ ("ふぅ","ɸɯ"),
219
+ ("ふゃ","ɸʲa"),
220
+ ("ふゅ","ɸʲɯ"),
221
+ ("ふょ","ɸʲo"),
222
+ ("ふぇ","ɸe"),
223
+ ("ふぉ","ɸo"),
224
+
225
+ ("あ"," a"),
226
+ ("い"," i"),
227
+ ("う","ɯ"),
228
+ ("え"," e"),
229
+ ("お"," o"),
230
+ ("か"," ka"),
231
+ ("き"," ki"),
232
+ ("く"," kɯ"),
233
+ ("け"," ke"),
234
+ ("こ"," ko"),
235
+ ("さ"," sa"),
236
+ ("し"," ɕi"),
237
+ ("す"," sɯ"),
238
+ ("せ"," se"),
239
+ ("そ"," so"),
240
+ ("た"," ta"),
241
+ ("ち"," tɕi"),
242
+ ("つ"," tsɯ"),
243
+ ("て"," te"),
244
+ ("と"," to"),
245
+ ("な"," na"),
246
+ ("に"," ni"),
247
+ ("ぬ"," nɯ"),
248
+ ("ね"," ne"),
249
+ ("の"," no"),
250
+ ("は"," ha"),
251
+ ("ひ"," çi"),
252
+ ("ふ"," ɸɯ"),
253
+ ("へ"," he"),
254
+ ("ほ"," ho"),
255
+ ("ま"," ma"),
256
+ ("み"," mi"),
257
+ ("む"," mɯ"),
258
+ ("め"," me"),
259
+ ("も"," mo"),
260
+ ("ら"," ɽa"),
261
+ ("り"," ɽi"),
262
+ ("る"," ɽɯ"),
263
+ ("れ"," ɽe"),
264
+ ("ろ"," ɽo"),
265
+ ("が"," ga"),
266
+ ("ぎ"," gi"),
267
+ ("ぐ"," gɯ"),
268
+ ("げ"," ge"),
269
+ ("ご"," go"),
270
+ ("ざ"," za"),
271
+ ("じ"," dʑi"),
272
+ ("ず"," zɯ"),
273
+ ("ぜ"," ze"),
274
+ ("ぞ"," zo"),
275
+ ("だ"," da"),
276
+ ("ぢ"," dʑi"),
277
+ ("づ"," zɯ"),
278
+ ("で"," de"),
279
+ ("ど"," do"),
280
+ ("ば"," ba"),
281
+ ("び"," bi"),
282
+ ("ぶ"," bɯ"),
283
+ ("べ"," be"),
284
+ ("ぼ"," bo"),
285
+ ("ぱ"," pa"),
286
+ ("ぴ"," pi"),
287
+ ("ぷ"," pɯ"),
288
+ ("ぺ"," pe"),
289
+ ("ぽ"," po"),
290
+ ("や"," ja"),
291
+ ("ゆ"," jɯ"),
292
+ ("よ"," jo"),
293
+ ("わ"," ɯa"),
294
+ ("ゐ"," i"),
295
+ ("ゑ"," e"),
296
+ ("ん"," ɴ"),
297
+ ("っ"," ʔ"),
298
+ ("ー"," ː"),
299
+
300
+ ("ぁ"," a"),
301
+ ("ぃ"," i"),
302
+ ("ぅ"," ɯ"),
303
+ ("ぇ"," e"),
304
+ ("ぉ"," o"),
305
+ ("ゎ"," ɯa"),
306
+ ("ぉ"," o"),
307
+
308
+ ("を","o")
309
+ ])
310
+
311
+
312
+ def post_fix(text):
313
+ orig = text
314
+
315
+ for k, v in kana_mapper.items():
316
+ text = text.replace(k, v)
317
+
318
+ return text
319
+
320
+ def convert_to_kana(text):
321
+ kks = pykakasi.kakasi()
322
+
323
+
324
+ def convert_word(word):
325
+ result = kks.convert(word)
326
+ return ''.join(item['hira'] for item in result)
327
+
328
+
329
+ parts = re.split(r'([^\u3000-\u30ff\u3400-\u4dbf\u4e00-\u9fff]+)', text)
330
+
331
+
332
+ converted_parts = [convert_word(part) if re.match(r'[\u3000-\u30ff\u3400-\u4dbf\u4e00-\u9fff]', part) else part for part in parts]
333
+
334
+ return ''.join(converted_parts)
335
+
336
+ import re
337
+
338
+ spaces = dict([
339
+
340
+ ("ɯ ɴ","ɯɴ"),
341
+ ("na ɴ ","naɴ "),
342
+ (" mina ", " miɴna "),
343
+ ("ko ɴ ni tɕi ha","konnitɕiwa"),
344
+ ("ha i","hai"),
345
+ ("boɯtɕama","boʔtɕama"),
346
+ ("i eːi","ieːi"),
347
+ ("taiɕɯtsɯdʑoɯ","taiɕitsɯdʑoɯ"),
348
+ ("soɴna ka ze ni","soɴna fɯɯ ni"),
349
+ (" i e ","ke "),
350
+ ("�",""),
351
+ ("×"," batsɯ "),
352
+ ("se ka ɯndo","sekaɯndo"),
353
+ ("i i","iː"),
354
+ ("i tɕi","itɕi"),
355
+ ("ka i","kai"),
356
+ ("naɴ ga","nani ga"),
357
+ ("i eː i","ieːi"),
358
+
359
+ ("naɴ koɽe","nani koɽe"),
360
+ ("naɴ soɽe","nani soɽe"),
361
+ (" ɕeɴ "," seɴ "),
362
+
363
+ # ("konna","koɴna"),
364
+ # ("sonna"," soɴna "),
365
+ # ("anna","aɴna"),
366
+ # ("nn","ɴn"),
367
+
368
+ ("en ","eɴ "),
369
+ ("in ","iɴ "),
370
+ ("an ","aɴ "),
371
+ ("on ","oɴ "),
372
+ ("ɯn ","ɯɴ "),
373
+ # ("nd","ɴd"),
374
+
375
+ ("koɴd o","kondo"),
376
+ ("ko ɴ d o","kondo"),
377
+ ("ko ɴ do","kondo"),
378
+
379
+ ("oanitɕaɴ","oniːtɕaɴ"),
380
+ ("oanisaɴ","oniːsaɴ"),
381
+ ("oanisama","oniːsama"),
382
+ ("hoːmɯrɯɴɯ","hoːmɯrɯːmɯ"),
383
+ ("so ɴ na ","sonna"),
384
+ (" sonna "," sonna "),
385
+ (" konna "," konna "),
386
+ ("ko ɴ na ","konna"),
387
+ (" ko to "," koto "),
388
+ ("edʑdʑi","eʔtɕi"),
389
+ (" edʑdʑ "," eʔtɕi "),
390
+ (" dʑdʑ "," dʑiːdʑiː "),
391
+ ("secɯnd","sekaɯndo"),
392
+
393
+ ("ɴɯ","nɯ"),
394
+ ("ɴe","ne"),
395
+ ("ɴo","no"),
396
+ ("ɴa","na"),
397
+ ("ɴi","ni"),
398
+ ("ɴʲ","nʲ"),
399
+
400
+ ("hotond o","hotondo"),
401
+ ("hakoɴd e","hakoɴde"),
402
+ ("kaʔkaɽi","gaʔkaɽi"),
403
+
404
+ ("gakɯtɕi ɽi","gaʔtɕiɽi "),
405
+
406
+ (" ʔ","ʔ"),
407
+ ("ʔ ","ʔ"),
408
+
409
+ ("-","ː"),
410
+ ("- ","ː"),
411
+ ("--","~ː"),
412
+ ("~","—"),
413
+ ("、",","),
414
+
415
+ (" ː","ː"),
416
+ ('ka nade',"kanade"),
417
+
418
+ ("ohahasaɴ","okaːsaɴ"),
419
+ (" "," "),
420
+ ("viː","bɯiː"),
421
+ ("ːː","ː—"),
422
+
423
+ ("d ʑ","dʑ"),
424
+ ("d a","da"),
425
+ ("d e","de"),
426
+ ("d o","do"),
427
+ ("d ɯ","dɯ"),
428
+
429
+ ("niːɕiki","ni iɕiki"),
430
+ ("anitɕaɴ","niːtɕaɴ"),
431
+ ("daiːtɕi","dai itɕi"),
432
+
433
+ ("naɴ sono","nani sono"),
434
+ ("naɴ kono","nani kono"),
435
+ ("naɴ ano","nani ano"), # Cutlet please fix your shit
436
+ (" niːtaɽa"," ni itaɽa"),
437
+ ("doɽamaɕiːd","doɽama ɕiːdʲi"),
438
+ ("aɴ ta","anta"),
439
+ ("aɴta","anta"),
440
+ ("naniːʔteɴ","nani iʔteɴ"),
441
+ ("niːkite","ni ikite")
442
+
443
+ ])
444
+
445
+
446
+ sym_ws = dict([
447
+
448
+ ("$ ","dorɯ"),
449
+ ("$ ","dorɯ"),
450
+
451
+ ("〇 ","marɯ"),
452
+ ("¥ ","eɴ"),
453
+
454
+ ("# ","haʔɕɯ tagɯ"),
455
+ ("# ","haʔɕɯ tagɯ"),
456
+
457
+ ("& ","ando"),
458
+ ("& ","ando"),
459
+
460
+ ("% ","paːsento"),
461
+ ("% ","paːsento"),
462
+
463
+ ("@ ","aʔto saiɴ"),
464
+ ("@ ","aʔto saiɴ")
465
+
466
+
467
+
468
+ ])
469
+
470
+ def random_sym_fix(text): # with space
471
+ orig = text
472
+
473
+ for k, v in sym_ws.items():
474
+ text = text.replace(k, f" {v} ")
475
+
476
+ return text
477
+
478
+
479
+ sym_ns = dict([
480
+
481
+ ("$","dorɯ"),
482
+ ("$","dorɯ"),
483
+
484
+ ("〇","marɯ"),
485
+ ("¥","eɴ"),
486
+
487
+ ("#","haʔɕɯ tagɯ"),
488
+ ("#","haʔɕɯ tagɯ"),
489
+
490
+ ("&","ando"),
491
+ ("&","ando"),
492
+
493
+ ("%","paːsento"),
494
+ ("%","paːsento"),
495
+
496
+ ("@","aʔto saiɴ"),
497
+ ("@","aʔto saiɴ"),
498
+
499
+ ("~","—"),
500
+ ("kʲɯɯdʑɯɯkʲɯɯ.kʲɯɯdʑɯɯ","kʲɯɯdʑɯɯ kʲɯɯ teɴ kʲɯɯdʑɯɯ")
501
+
502
+
503
+
504
+
505
+
506
+ ])
507
+
508
+ def random_sym_fix_no_space(text):
509
+ orig = text
510
+
511
+ for k, v in sym_ns.items():
512
+ text = text.replace(k, f" {v} ")
513
+
514
+ return text
515
+
516
+ def random_space_fix(text):
517
+ orig = text
518
+
519
+ for k, v in spaces.items():
520
+ text = text.replace(k, v)
521
+
522
+ return text
523
+
524
+ def number_to_japanese(num):
525
+ if not isinstance(num, int) or num < 0 or num > 9999:
526
+ return "Invalid input"
527
+
528
+ digits = ["", "いち", "に", "さん", "よん", "ご", "ろく", "なな", "はち", "きゅう"]
529
+ tens = ["", "じゅう", "にじゅう", "さんじゅう", "よんじゅう", "ごじゅう", "ろくじゅう", "ななじゅう", "はちじゅう", "きゅうじゅう"]
530
+ hundreds = ["", "ひゃく", "にひゃく", "さんびゃく", "よんひゃく", "ごひゃく", "ろっぴゃく", "ななひゃく", "はっぴゃく", "きゅうひゃく"]
531
+ thousands = ["", "せん", "にせん", "さんぜん", "よんせん", "ごせん", "ろくせん", "ななせん", "はっせん", "きゅうせん"]
532
+
533
+ if num == 0:
534
+ return "ゼロ"
535
+
536
+ result = ""
537
+ if num >= 1000:
538
+ result += thousands[num // 1000]
539
+ num %= 1000
540
+ if num >= 100:
541
+ result += hundreds[num // 100]
542
+ num %= 100
543
+ if num >= 10:
544
+ result += tens[num // 10]
545
+ num %= 10
546
+ if num > 0:
547
+ result += digits[num]
548
+
549
+ return result
550
+
551
+ def convert_numbers_in_string(input_string):
552
+ # Regular expression to find numbers in the string
553
+ number_pattern = re.compile(r'\d+')
554
+
555
+ # Function to replace numbers with their Japanese pronunciation
556
+ def replace_with_japanese(match):
557
+ num = int(match.group())
558
+ return number_to_japanese(num)
559
+
560
+ # Replace all occurrences of numbers in the string
561
+ converted_string = number_pattern.sub(replace_with_japanese, input_string)
562
+ return converted_string
563
+
564
+
565
+ def remove_leading_dots(text):
566
+ # Use regular expression to replace leading dots before any word character
567
+ cleaned_text = re.sub(r'^\.*', '', text)
568
+ return cleaned_text
569
+
570
+ def remove_more_than_three_dots(text):
571
+ # Remove leading dots
572
+ text = re.sub(r'^\.*', '', text)
573
+ # Limit sequences of dots to a maximum of 3, and remove any spaces after the last dot
574
+ text = re.sub(r'\.{4,}\s*', '...', text)
575
+ return text
576
+
577
+ # def replace_repeating_patterns(text):
578
+ # def replace_repeats(match):
579
+ # pattern = match.group(1)
580
+ # if len(match.group(0)) // len(pattern) >= 3:
581
+ # return pattern + "~~~"
582
+ # return match.group(0)
583
+
584
+ # # Pattern for space-separated repeats
585
+ # pattern1 = r'((?:\S+\s+){1,5}?)(?:\1){2,}'
586
+ # # Pattern for continuous repeats without spaces
587
+ # pattern2 = r'(.+?)\1{2,}'
588
+
589
+ # text = re.sub(pattern1, replace_repeats, text)
590
+ # text = re.sub(pattern2, replace_repeats, text)
591
+ # return text
592
+
593
+
594
+ print("loading...")
595
+
596
+ # device = 'cuda:0'
597
+ # processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
598
+ # # model = WhisperForConditionalGeneration.from_pretrained("/home/ubuntu/PL-whisper/Hibiki_ASR_Grapheme_Plus/checkpoint-5000").to("cuda:0")
599
+
600
+ # hibiki = "/home/ubuntu/PL-whisper/Hibiki_ASR_Grapheme_Plus/checkpoint-5000"
601
+ # model = AutoModelForSpeechSeq2Seq.from_pretrained(
602
+ # hibiki,
603
+ # torch_dtype=torch.float16,
604
+ # low_cpu_mem_usage=True,
605
+ # use_safetensors=True,
606
+ # attn_implementation="sdpa",
607
+ # )
608
+ # model.to(device)
609
+ # forced_decoder_ids = processor.get_decoder_prompt_ids(language="japanese", task="transcribe")
610
+
611
+ # print('lock and load')
612
+
613
+ # print('now loading the dataset...')
614
+ # # Define the root directory where you want to start the search
615
+ # root_directory = "/path/to/files"
616
+ # #/home/ubuntu/PL-whisper/split
617
+ # #/home/ubuntu/PL-whisper/myaudio_full/myaudio
618
+
619
+ # # Initialize an empty list to store the .wav file paths
620
+ # wav_files = []
621
+
622
+ # # Walk through all directories and files starting from the root directory
623
+ # for dirpath, dirnames, filenames in os.walk(root_directory):
624
+ # # Iterate over all files in the current directory
625
+ # for filename in filenames:
626
+ # # Check if the file is a .wav file
627
+ # if filename.endswith(".wav"):
628
+ # # Construct the full file path
629
+ # file_path = os.path.join(dirpath, filename)
630
+ # # Add the file path to the list
631
+ # wav_files.append(file_path)
632
+
633
+
634
+ # # Now wav_files contains a list of all .wav file paths
635
+ # print("loaded the dataset, length is ->", len(wav_files))
636
+
637
+ # samples = Dataset.from_dict({"audio": wav_files}).cast_column("audio", Audio(16000))
638
+ # # Prepare the CSV file
639
+ # csv_file_path = './phonemized_moe_25%.csv'
640
+
641
+ # with open(csv_file_path, mode='w', newline='') as csv_file:
642
+ # fieldnames = ['filename', 'transcription']
643
+ # writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
644
+ # writer.writeheader()
645
+
646
+ # # Process each audio file
647
+ # for sample in tqdm(samples):
648
+ # # Load the audio file and process it
649
+ # input_features = processor(sample["audio"]["array"], sampling_rate=sample["audio"]["sampling_rate"], return_tensors="pt").input_features.to(device)
650
+
651
+ # # Generate the transcription
652
+ # predicted_ids = model.generate(input_features, repetition_penalty=1.2,num_beams=4)
653
+ # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
654
+ # if ' neɽitai ' in transcription:
655
+ # transcription = transcription.replace(' neɽitai ', "naɽitai")
656
+ # if 'tɕabiʔto' in transcription:
657
+ # transcription = transcription.replace('tɕabiʔto', "tɕobiʔto")
658
+ # if "ki ni ɕinai" in transcription:
659
+ # transcription = re.sub(r'(?<!\s)ki ni ɕinai', r' ki ni ɕinai', transcription)
660
+ # if 'ʔt' in transcription:
661
+ # transcription = re.sub(r'(?<!\s)ʔt', r'ʔt', transcription)
662
+ # if 'de aɽoɯ' in transcription:
663
+ # transcription = re.sub(r'(?<!\s)de aɽoɯ', r' de aɽoɯ', transcription)
664
+
665
+ # if ".ʔ" in transcription:
666
+ # transcription = transcription.replace(".ʔ","..")
667
+ # if "ʔ." in transcription:
668
+ # transcription = transcription.replace("ʔ.",".")
669
+
670
+ # transcription = convert_numbers_in_string(transcription)
671
+ # transcription = convert_to_kana(transcription)
672
+ # transcription = post_fix(transcription)
673
+
674
+ # if 'ɯa ta ɕi' in transcription:
675
+ # transcription = transcription.replace("ɯa ta ɕi","wataɕi")
676
+ # if ' ' in transcription:
677
+ # transcription = transcription.replace(" "," ")
678
+
679
+ # if 'monoːmoi' in transcription:
680
+ # transcription = transcription.replace("monoːmoi","mono omoi")
681
+
682
+
683
+ # if not re.search(r'[.?!。?!؟!.]$', transcription): # will add a dot at the end of each sentence if no other punctuation symbols exists
684
+ # transcription += '.'
685
+
686
+ # transcription = remove_leading_dots(transcription)
687
+ # transcription = remove_more_than_three_dots(transcription)
688
+ # # Write the transcription to the CSV file
689
+ # writer.writerow({'filename': sample['audio']['path'], 'transcription': transcription.lstrip()})
690
+
691
+ # print(f"Transcriptions have been saved to {csv_file_path}")
692
+
693
+
694
+ device = 'cuda:0'
695
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
696
+
697
+ hibiki = "Respair/Hibiki_ASR_Phonemizer_v0.2"
698
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
699
+ hibiki,
700
+ torch_dtype=torch.float16,
701
+ low_cpu_mem_usage=True,
702
+ use_safetensors=True,
703
+ attn_implementation="sdpa",
704
+ )
705
+ model.to(device)
706
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="japanese", task="transcribe")
707
+
708
+ print('lock and load')
709
+
710
+ print('now loading the dataset...')
711
+
712
+ root_directory = "/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine"
713
+
714
+
715
+ wav_files = []
716
+
717
+ for dirpath, dirnames, filenames in os.walk(root_directory):
718
+ for filename in filenames:
719
+ if filename.endswith(".wav") or filename.endswith(".ogg") or filename.endswith(".mp3"):
720
+ file_path = os.path.join(dirpath, filename)
721
+ wav_files.append(file_path)
722
+
723
+ print("loaded the dataset, length is ->", len(wav_files))
724
+
725
+ samples = Dataset.from_dict({"audio": wav_files}).cast_column("audio", Audio(16000))
726
+
727
+ csv_file_path = '/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_finetune.csv'
728
+
729
+ # Load existing transcriptions
730
+ existing_transcriptions = set()
731
+ if os.path.exists(csv_file_path):
732
+ with open(csv_file_path, mode='r') as csv_file:
733
+ reader = csv.DictReader(csv_file)
734
+ for row in reader:
735
+ existing_transcriptions.add(row['filename'])
736
+
737
+ # Open the CSV file in append mode
738
+ with open(csv_file_path, mode='a', newline='') as csv_file:
739
+ fieldnames = ['filename', 'transcription']
740
+ writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
741
+
742
+ # Write header only if the file is empty
743
+ if csv_file.tell() == 0:
744
+ writer.writeheader()
745
+
746
+ # Process each audio file
747
+ for sample in tqdm(samples):
748
+ audio_path = sample['audio']['path']
749
+
750
+ # Skip if already transcribed
751
+ if audio_path in existing_transcriptions:
752
+ continue
753
+
754
+ # Load the audio file and process it
755
+ input_features = processor(sample["audio"]["array"], sampling_rate=sample["audio"]["sampling_rate"], return_tensors="pt").input_features.to(device)
756
+ input_features = input_features.to(torch.float16)
757
+
758
+ # Generate the transcription
759
+ predicted_ids = model.generate(input_features, repetition_penalty=1.2, num_beams=4)
760
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
761
+
762
+ # Apply all the text processing steps (unchanged from your original code)
763
+ if ' neɽitai ' in transcription:
764
+ transcription = transcription.replace(' neɽitai ', "naɽitai")
765
+ if 'tɕabiʔto' in transcription:
766
+ transcription = transcription.replace('tɕabiʔto', "tɕobiʔto")
767
+ if "ki ni ɕinai" in transcription:
768
+ transcription = re.sub(r'(?<!\s)ki ni ɕinai', r' ki ni ɕinai', transcription)
769
+ if 'ʔt' in transcription:
770
+ transcription = re.sub(r'(?<!\s)ʔt', r'ʔt', transcription)
771
+ if 'de aɽoɯ' in transcription:
772
+ transcription = re.sub(r'(?<!\s)de aɽoɯ', r' de aɽoɯ', transcription)
773
+
774
+ if ".ʔ" in transcription:
775
+ transcription = transcription.replace(".ʔ","..")
776
+ if "ʔ." in transcription:
777
+ transcription = transcription.replace("ʔ.",".")
778
+
779
+ transcription = convert_numbers_in_string(transcription)
780
+ transcription = convert_to_kana(transcription)
781
+ transcription = post_fix(transcription)
782
+
783
+ if 'ɯa ta ɕi' in transcription:
784
+ transcription = transcription.replace("ɯa ta ɕi","wataɕi")
785
+ if ' ' in transcription:
786
+ transcription = transcription.replace(" "," ")
787
+
788
+ if 'monoːmoi' in transcription:
789
+ transcription = transcription.replace("monoːmoi","mono omoi")
790
+
791
+ if not re.search(r'[.?!。?!؟!.]$', transcription):
792
+ transcription += '.'
793
+
794
+ transcription = remove_leading_dots(transcription)
795
+ transcription = remove_more_than_three_dots(transcription)
796
+ transcription = random_space_fix(transcription)
797
+
798
+ transcription = random_sym_fix(transcription) # fixing some symbols, if they have a specific white space such as miku& sakura -> miku ando sakura
799
+ transcription = random_sym_fix_no_space(transcription) # same as above but for those without white space such as miku&sakura -> miku ando sakura
800
+
801
+ # Write the transcription to the CSV file
802
+ writer.writerow({'filename': audio_path, 'transcription': transcription.lstrip()})
803
+
804
+ print(f"Transcriptions have been saved to {csv_file_path}")
stylekan/Data/moe_res/imas_split/shiki/shiki_fine/shiki_finetune.csv ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_05.wav|sɯgata mo, koe mo, namae sae kawaʔte mo, tɕitose ga kɯɽeta kiboɯ wa, kienai.|480
2
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_06.wav|koɯfɯkɯ wa doko ni de mo koɽogaʔterɯdeɕo? wataɕi wa, dakʲoɯ ɕitakɯ naikedo?|480
3
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_01.wav|kɯɽaŋkɯ aːʔpɯ! teɴɕi no oɕigoto ɕɯɯrʲoɯ! niŋgeɴ ni modoɽoʔka naːɴ!|480
4
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_02.wav|bʲoɯdoɯ de itainaɽa, mɯkaɴɕiɴ de irɯ koto. katamɯkerɯ saki ga nakɯnactɕoɯ.|480
5
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_12.wav|kiboɯ wa megɯrɯ. tatoe donna ni sɯrɯdokɯ, itakɯtomo. soɽe o anata ga ɕimeɕite kɯɽeta.|480
6
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
7
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_07.wav|zensei to ka ɽaisei to ka, doɯ de mo iː. daʔte soɽe wa, ataɕi dʑa nai moɴ ne.|480
8
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_03.wav|owaɽi o koete.|480
9
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_01.wav|temɽiɴ wa itsɯ daʔte tsɯɽiaʔterɯ. kiboɯ to doɯtoɯ no ɕitsɯrʲa o motsɯ mono ga nani ka, ɕiʔterɯ? soɽe wa ne, zetsɯboɯ da jo.|480
10
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_02.wav|koɽe ga, owaɽi? naɽa, ɯɽeɕiː. daʔte, anata to no, tsɯgi no jakɯsokɯ.|480
11
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_11.wav|tsɯgi deaerɯ toki, wataɕitatɕi wa çitoɽi de wa nai, fɯfɯ, koɽe wa jogeɴ.|480
12
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_12.wav|josoɯ dekinai kimitatɕi de ite jo. tensai ɕikitɕaɴ no josoɯ o, ɯɽagiɽi tsɯzɯkete.|480
13
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
14
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_04.wav|teɴɕi no komoɽi ɯta o.|480
15
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_11.wav|kimi o ɽakɯeɴ e to izanaʔte agerɯ joɴ! ima o tanoɕimi kiʔta ato o, tɕitose tɕanto.|480
16
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_04.wav|osoɽa no ɯeʔte, donna nioi naɴ daɽoɯ ne? ɕiɽeɴ de, mɯkiɕitsɯ de, kaoɽi mo nai no ka na?|480
17
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_05.wav|tokɯbetsɯ ni narɯ no.|480
18
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_06.wav|sajoɯnaɽa, tɕitose. wataɕi ni owaɽi o oɕiete kɯɽeta, tokɯbetsɯ na çito.|480
19
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
20
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_05.wav|çito ni kaŋka saɽerɯ no, betsɯ ni, ɕiɽoi dʑa nai jo. meʔta ni sono aite ga inai dake.|480
21
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_3_01.wav|çito no inotɕi ni kiseɴ ga nai nante, sonna no kiɽeigoto. sɯkɯɯ çito wa eɽabɯɕi, dʑibɯɴ no inotɕi nante taika de saɕidasenai. de mo kimi wa, saɕidaɕite ɕimai soɯ da jo ne. oçito joɕi dakaɽa.|480
22
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_04.wav|moɴ no soto e okɯɽidasenakɯte, gomeɴ nasai. moɴ no saki no koto, kondo oɕiete ne.|480
23
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_08.wav|ɕigo no sekai de tanoɕikɯ jarɯɴ dʑa nakɯte, ima o kiɽitorɯ, fɯɴ, dekirɯ jo.|480
24
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_03.wav|saigo wa nai no desɯ. negaeba doko made mo, fɯtaɽi no tokɯbetsɯ wa tsɯzɯkɯkaɽa.|480
25
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_07.wav|arɯdʑi ni wa kaɴɕa o, wataɕi no tokɯbetsɯ o minogaɕite kɯɽeta, anata to, deaeta.|480
26
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_10.wav|hakoni wa neː, wataɕi wa iɽanai na. teitaiʔte, dʑiɴrɯi to aiɕoɯ ga warɯiɴ daʔte.|480
27
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_03.wav|kempiɴ, dʑiʔkeɴ, a, ɕijokɯɴ ga todoitetaɴ daʔta, ɽabo ni komaɽoːʔto.|480
28
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
29
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_5_01.wav|nakanaide, anata no çitomi ga, namida de nagaɽete ɕimaɯ, soɽe wa, koɽe kaɽa kiboɯ dake o mitsɯkerɯ tame no oɯseki, wataɕi o, seɴ neɴ saki no miɽai de mo, mitɕibiːte kɯɽerɯ çikaɽi, saigo wa, hohoemi de.|480
30
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_09.wav|fɯɽeɽaɽenakɯte mo wakarɯ, atatakasa, daʔte, zɯʔto, anata ni aʔta.|480
31
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_10.wav|zɯʔto omoʔteta, kiɽei na çitomi no iɽo daʔte, inotɕi no iɽo, naɴ da ne.|480
32
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_03.wav|kimi ga ima te ni moʔterɯ no wa, kiboɯ? soɽe to mo zetsɯboɯ? doʔtɕi de mo iː ka. wataɕi wa moɯ te niːɽeterɯkaɽa, hoɕikaʔtaɽa wakete agerɯ jo. katahoɯ wa, kimi ni moɽaʔta mono dakedo.|480
33
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_01.wav|namida. kanaɕikɯte, tsɯɽakɯte, kɯrɯɕiː mono. koɽe kaɽa no anata ni wa, çitsɯjoɯ nai mono.|480
34
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_09.wav|tobɯ no mo keʔkʲokɯ dʑibɯɴ no tɕikaɽa daɕi, de mo tsɯkaɽetɕaɯɕi. hakonde moɽaɯ no ga itɕibaɴ!|480
35
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_08.wav|sɯbete wa bʲoɯdoɯ, koɯhei ni fɯɽisosogɯ, ai mo, inoɽi mo, hontoɯ wa, anata ni daʔte.|480
36
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_02.wav|kotoba ni sɯrɯto nante dʑiːpɯ naɴ daɽoɯ ne. konna mono ni, ataɕitatɕi wa fɯɽimawasaɽete, çiʔɕi ni naʔterɯ, me ni mienai sei de, te ni ɕite mo dʑiʔka ga naikaɽa, jokei ni ne.|480
37
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_08.wav|kaisɯiʔte, konna ni ɕio kaɽakaʔta?, naitenai jo, kantɕigai, kiɴɕi.|480
38
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_07.wav|jorɯ no sampo, okoɽanaide ne. iʔɕo naɽa iː deɕo? ɕimpai naɽa, mihaʔtete.|480
39
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_05.wav|kikaɽetenakɯte jokaʔta. moɕi ka ɕitaɽa, toʔkɯ ni ɕiɽaɽeteta ka mo dakedo.|480
40
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_03.wav|moɕi nigete mo, doko made mo oʔte kɯrɯ ki deɕoɯ? jada jada, oni goʔko wa tanoɕiːkedo, zɯʔto wa tsɯkaɽetɕoɯ. dakaɽa, iʔɕo niːrɯ jo. toːi owaɽi no çi made.|480
41
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_02.wav|anna ni fɯkakɯ, soko ga naiʔte kandʑiteta no ni, sonna koto, nakaʔtaɴ da.|480
42
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_06.wav|kiɽei na ɕigikata nante nai, ka, soɽe de mo, jɯme kɯɽai mite itakaʔta jo.|480
43
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_10.wav|nɯɽeterɯ jo, iː no? sonna koto mo ki ni naɽanai kɯɽai, hoŋki de.|480
44
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_07.wav|otoɕi joɽiːki kiɽeterɯ, oːwatesɯgi, zeɴrʲoɯ da jo neː, soɯ iɯ tokoɽo.|480
45
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_05.wav|kimi wa itsɯ mo itsɯ mo, ataɕi o nigaɕite kɯɽenai jo ne. doko iʔte mo sa.|480
46
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_02.wav|daɽe mo kaɽe mo ga ɽisoɯ o oɕitsɯketerɯ. kimi mo soɯ, tɕitose tɕaɴ daʔte. soɽe de mo kimitatɕi wa, ɽisoɯ dʑa nai wataɕi de mo, tsɯkiaoɯ to ɕiterɯ.|480
47
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
48
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_08.wav|taorɯ kaisanakʲa, wataɕi no koto o wasɯɽeɽaɽenakɯ narɯ koɯsɯi, pɯɕɯ!!|480
49
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_04.wav|ɯmi ni ɕizɯmɯki wa, fɯɯɴ. saː, doɯ daʔta ka naː, oki ni nagasaɽetɕaʔta mitai.|480
50
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_12.wav|tsɯmetai, kaɽa da no ɕiɴ ga hjoɯteŋka mitai, dakaɽa, tamete, donna hoɯhoɯ de mo.|480
51
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
52
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_02.wav|kono ato wa motɕiɽoɴ omimai da joɴ! jowajowa kʲɯɯketsɯkisaɴ no tokoɽo ne.|480
53
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_3_01.wav|dʑɯndo no takai mono wa, soɽe dake moɽoiɴ da jo. joɽokobi mo, kanaɕimi mo, zembɯ zembɯ, kowaɽetɕaɯ. dakaɽa çito wa, mazaʔte ɯme aɯ no. kowaɽete ɯmaɽeta, sɯkima o.|480
54
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_5_01.wav|naɴ de, konna tokoɽo ni, wazawaza kita no? iː ko wa nerɯ dʑikaɴ de, ɕikitɕantatɕi wa, mite no toːɽi warɯi ko de, fɯfɯ, fɯɕigi, hoɴɕiɴ ɕitɕoʔta. kimi wa, okoʔterɯ no ni.|480
55
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
56
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_03.wav|gps o tsɯketeɽeba mendoɯ na ɽenɽakɯ wa iɽanai ka mo neːnʲɯɯ, kaɴɕi ɕakai.|480
57
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_12.wav|itsɯ mo doːɽi de gaʔkaɽi, ɕitenai? fɯɯɴ, itsɯ mo no ataɕi de iːɴ da.|480
58
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_01.wav|kono dʑiki no ɯmiʔte, zenzeɴ samɯkɯ naiɴ da neː. mɯɕiɽo nama noɽokɯte, betobetoː.|480
59
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_01.wav|fɯtsɯɯ dʑa nai ataɕi wa, iɽanakaʔta. tokɯbetsɯ na kagakɯɕa dʑa nai ataɕi wa, papa no ɕikai kaɽa hazɯɽeta. kawaiː dake dʑa nai ataɕi wa, aidorɯ dʑa iɽaɽenai.|480
60
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_01.wav|mabɯɕiː jo. sonna çikaɽi ni ateɽaɽetaɽa, toketenakɯ nactɕaɯ.|480
61
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_04.wav|teɴɕi no komoɽi ɯta o.|480
62
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_11.wav|aː, kɯʔtsɯiterɯto aʔtakaː. koɽe wa kimi no, jasaɕisa no ɯndo ka na?|480
63
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_06.wav|kaketsɯketa no? tɕitose tɕaɴ mo, kimi mo. doɯ ɕite soɯ, oseʔkai.|480
64
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_03.wav|modoʔte kitɕaʔta, çikaɽi no sekai, konna ni, aʔtakakaʔtaɴ da ne.|480
65
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_09.wav|ɯwa! mabɯɕiː! moʔto haʔkoɯrʲoɯ o osaeteː! dʑiɴrɯi ni wa mada hajai!|480
66
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_09.wav|manɯke na kao, ɕiterɯ, wataɕi mo? fɯfɯʔ, okaɕiː, aidorɯ na no ni ne.|480
67
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_10.wav|ameɽika dʑikomi no sɯieihoɯ de, tɕitose tɕaɴ no çitoɽi ja fɯtaɽi, ɕizɯmɯ—!|480
68
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
69
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_05.wav|tokɯbetsɯ ni narɯ no.|480
70
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_03.wav|owaɽi o koete.|480
71
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_04.wav|heɴ na no? ɯmi wa, wataɕi o sɯikonde kɯɽerɯ hazɯ daʔta no ni, kʲoçi ɕite.|480
72
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_11.wav|ɕikaɽaɽerɯ no wa kiɽai. de mo, kʲoɯ wa, ɯɯɴ, naɴ de mo nai jo.|480
73
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine01.wav|haha, çiʔɕi da ne. ataɕi naŋka no tame ni. sonna toko hadʑimete mita. sonna fɯɯ ni, koe o aɽagerɯ toko. itsɯ mo itsɯ mo, kimi wa daɽe ka no tame ni. honto, heɴ na no.|480
74
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine02.wav|aʔ, kizɯtsɯita ɕikitɕaɴ o okoʔte kita çito da! ɽi fɯdʑiɴ! taoɽeta tɕitosetɕaɴ o tasɯketa no ni!, ma, ɯmi ni sasoʔta no wa—taɕi dakedo.|480
75
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine03.wav|ɯɯɴ, naɴ daɽoɯ, jokɯ wakannai. kietakaʔta no ka na. kʲɯɯ ni, doɯ de mo jokɯ nactɕaʔta. kimi to tɕitosetɕaɴ ga, naɴ ka, toːkɯte.|480
76
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine04.wav|"mɯmɯmɯ, kimi made soɯ jɯɯ koto iɯ. "" haihai, samiɕikaʔtandesɯɯ. ɯ eːɴ, kanaɕikaʔta joː kamaʔte kɯɽenakɯte itɕi ."" koɽe de iː?"|480
77
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|aː, iː jo. hanɽoɴ wa motometenai. ataɕi ga hoɕiː no wa, kampeki na haɴɕoɯ. kimi no kasetsɯ o ɕoɯmei ɕite misete, owaɽi no çi made ni.|480
78
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|kiboɯ wa—rɯ. akaɽi wa, kiʔto kie taɽi ɕinai. anata ga wataɕi ni mitɕi o ɕimeɕite kɯɽetakaɽa, kiʔto majowanaide mezaserɯ. owaɽi no saki no, çikaɽi o.|480
stylekan/Data/train_48_200k.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06973b9e5e41925cfd98c756ff119c08f009df6237ba7c5eb856f3639d171958
3
+ size 41791357
stylekan/Data/train_List.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f7dcbdecaa5994b553594ab3bc5ed09086f50cd3c6d33fe06465299984ba46
3
+ size 92439327
stylekan/Data/train_List_updated.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02541c84b9b6633a382af239974bbd8a0e796d66055ff3d2808f42da6d1fd410
3
+ size 92883947
stylekan/Data/val_48_200k.csv ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_2472.wav|çinata no iɯ toːɽi, okɯnai de matasete ite seikai deɕita.|77
2
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1146.wav|maː maʔtakɯ, fɯjɯɯ no jozaɽa o mite sɯkoɕi dake çitaʔte ita no ni dainaɕi da. ɕikatanai, tsɯki o minagaɽa dakiaoɯ.|94
3
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_0389.wav|baka na koto o iʔtenaide, otonaɕikɯ ɕite ite kɯdasai.|77
4
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/chinshiko/uma/uma_08_umabgv_017.wav|ᵻᵻᵻ.|39
5
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0010/momoka_mobamasu_0010_chunk95.wav|koɯka na no mo iːdeɕo?|11
6
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Event/NatsuKoi/NatsuKoi_chunk76.wav|ɯtɕi mo saːɴ.|43
7
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0002_cnk84.wav|ɯɴ, oʔkeː. sɯgokɯ iː kikakɯ da to omoɯ.|5
8
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3239.wav|ɽokɯdʑɯɯ mei dato, itɕi nitɕi ni tsɯki, itɕi tane no sake ga dʑɯɯ ni ɕoɯ.|72
9
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/06/06002950.wav|neː, taiga.|55
10
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1160.wav|kite ɕimaʔtaɽa,nɯgɯ no ni mo mata teikoɯ ga aʔta no de, sono mama,fɯɯɴ, soɯ naɴ da,saizɯ wa piʔtaɽi da ne, to ka naɴ to ka, fɯtsɯɯ ni teɽe kakɯɕi ni mo naɽanai koto o iʔtsɯtsɯ, ɕɯɯɕiɴ mae no hamigaki o hadʑimeta no daʔta.|20
11
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[クーリッシュピクシー]塩見周子【ボイス集】 - Niconico Video_2/【モバマス】[クーリッシュピクシー]塩見周子【ボイス集】 - Niconico Video_2_chunk33.wav|pɯɽodʲɯɯsaːsaɴ to naɽa, moʔto moʔto ɯe niːki soɯ da jo ne.|43
12
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1376.wav|oːnaː wa kadʑino no eŋkɯɽoːdʑaː de saːdo ni ɽogɯ iːɴ ɕimaɕita. sono ato, saːdo kaɽa sekando ni ɽogɯ iːɴ ɕimaɕita.|69
13
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2060.wav|aitsɯ ga...|4
14
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1311.wav|ɽaːdʑa. esɯerɯdʲi kidoɯ negatibɯ sɯmaːto foɴ o ɕotei no itɕi ni daɕite kɯdasai.|69
15
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0418.wav|a, maː, minna ga gomaʔtete ojakɯ ni taterɯnaɽa kono kɯɽai...te iɯ ka, saizɯ tɕiːsakɯ nai?|71
16
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1353.wav|kʲoto beɴ seidʑi ka?|17
17
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1007.wav|madoɽomi kaɽa sameta toki, daisɯki na çito no kao ga soba ni aʔte, tamaɽanakɯ mɯne ga kʲɯɴ to ɕita.|91
18
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ee093a4f/wav/ee093a4f_1705.wav|ɯɯwa, imoɯto ga koiɕikɯte ɕikata ga nakaʔta hanaɕi to ka, koʔsoɽiːmoɯto ni nijaɯ fɯkɯ o tsɯkɯʔte ɕimaʔta to ka, soɯ iɯ hanaɕi ga kikitakaʔta no ni.|79
19
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_03/Chiwa_Saito_03_chunk82.wav|kiokɯrʲokɯ ni wa dʑiɕiɴ ga arɯ no jo. kamakɯɽa bakɯfɯ ga seidʑitsɯ ɕita toɕi no koto daʔte oboete irɯ wa.|3
20
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_1050.wav|tokɯni baɕo ni kakawarɯ kiokɯ wa, kʲoɯjɯɯ sɯrɯ koto oːideɕo. gaʔkoɯ no koɯzoɯ to ka, dʑitakɯ made no mitɕi to ka sa. kʲoɯjɯɯ ɕitenake taiheɴ na koto bakaɽi dʑa nai.|98
21
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0212.wav|naɽa nani mo ɕimpai nasa soɯ ne.|86
22
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_B_0278.wav|jaʔpaɽi kʲoɯrʲokɯ na no o eɽabitai wa ne.|10
23
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_0164.wav|tsɯ, ɯmakɯ tateɴ na. kageɴ to iɯ mono o ɕiɽaɴ no dakaɽa, ɕigoto ni ɕiɕoɯ ga derɯde wa nai ka.|102
24
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_B_0375.wav|keŋka ja oːkɯmaɴ, soɽe ni kawasɯ.|10
25
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_0890.wav|ehehe, iʔpai de, naɴ ka aʔtakai jo.|96
26
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_0523.wav|soɯ, baka ni ɕiterɯʔte omoʔtadeɕoɯ.|95
27
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1170.wav|mazɯ wa sempai kaɽa doɯzo.|31
28
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_02/Sakamoto_Maya_02_chunk989.wav|taimeɴ no seki ni koɕikakerɯ.|17
29
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/5d68aedf/wav/5d68aedf_1703.wav|de mo wataɕi, hontoɯ wa kaigai nante ikitakɯ nakaʔtaɴ desɯ.|88
30
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0704.wav|honto da jo, sonna koto ni naʔtaɽa, ano ko iʔɕoɯ neɴ ni motsɯ jo.|87
31
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/5d68aedf/wav/5d68aedf_0872.wav|wataɕi to nita joɯ na kʲoɯgɯɯ no ko wa, mawaɽi niːʔpaiːmaɕitaɕi, wataɕi joɽi çidoi tatɕiba niːrɯ ko daʔte imaɕita.|88
32
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_2271.wav|dekirɯnaɽa, kisɯ no çitotsɯ mo ɕite kɯɽerɯto, ɯ oːkʲaː! ijaː!ʔte kandʑite, oːdasɯkaɽi naɴ dakedo.|96
33
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/006/S006_F_0157.wav|hoi, saʔki kɯrɯma ni nosetadʑa nai desɯ ka. kiɽi no bako ni haiʔtemasɯ.|38
34
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/a0fd12d7/wav/a0fd12d7_1743.wav|odʑoɯsama ni wa, zeçi, gonaimitsɯ ni onegai ɕitai no desɯga...joɽoɕiːdeɕoɯ ka?|100
35
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/02/02101640.wav|ima, taigakɯɴ, wataɕi to ki sɯ ɕitetaɴ da jo.|6
36
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0928.wav|minna! ɕitsɯmoɴ sɯme asoko made! harɯ nakɯɴ ga komaʔterɯdʑa nai!|86
37
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_C_0012.wav|kondo dasoɯ to omoʔterɯ ɕiɴ menʲɯɯ no abokado toːsɯto jokaʔtaɽa kansoɯ o kikasete.|10
38
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_03/Horie_Yui_03_chunk1141.wav|soɕite, wataɕi ga ɽikai sɯrɯ no wa okaɕiː to iɯ kimotɕi mo tsɯjokɯ arɯ.|0
39
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/mzr/mzr_13_001_010.wav|ɯfɯ, soɯ iɯ koto jo. dakaɽa, hajakɯ kigaete ki nasai?|15
40
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/a0fd12d7/wav/a0fd12d7_1597.wav|iʔteɽaʔɕaimasɯ. aɕita o tanoɕimi ni ɕite oɽimasɯ.|100
41
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/arisu/arisu_mobamasu/12_arisu__0013_(Vocals)/12_arisu__0013_(Vocals)_chunk32.wav|jasaɕiː sojokaze ni miojɯ da ne.|18
42
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk1083.wav|kaɽe o aː ɕite ɕimaʔta no wa wataɕi de arɯ to iɯ setsɯ wa, igai to sekeɴ de nezɯjoi no daʔta.|20
43
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/06/06002780.wav|otsɯɽi no keisaɴ to ka, mɯzɯkaɕiː jo. rʲoɯte no jɯbi no kazɯ joɽi, takɯsaɴ naɴ da moɯ.|55
44
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[ビーチスタイル]塩見周子【ボイス集】 - Niconico Video_2/【モバマス】[ビーチスタイル]塩見周子【ボイス集】 - Niconico Video_2_chunk36.wav|samɯi toki ni wa...|43
45
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_0507.wav|maː iːja. de sa, ɕigoto doɯ? tanoɕiː?|84
46
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04002110.wav|haː...|37
47
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk500.wav|kaɴɕa ɕite imasɯ.|7
48
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_cgss/chieri_card_100612/chieri_voice_100612_1_06.wav|sɯteːdʑi ga owaʔtaɽa...ai niːkimasɯ! pɯɽodʲɯɯsaːsaɴ.|25
49
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/hsm/hsm_12_003h_040.wav|ᵻᵻᵻ wataɕi mo ɕiawase da jo! goɕɯdʑinsama no tɕintɕiɴ o kandʑinagaɽa iketeᵻᵻᵻ.|60
50
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_1329.wav|mada tɕoʔto dʑikaɴ ga arɯkaɽa kimete okitaiɴ dakedo, haijakɯ doɯ ɕijoʔka.|95
51
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk868.wav|wataɕi wa sɯkoɕi majoʔtakeɽedo, ɕikaɕi kakɯɕite okɯ no mo jaʔpaɽi okaɕiː ki ga ɕita no de, gaʔkoɯ de no dekigoto o hanaɕite okɯ koto ni ɕita.|20
52
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_2732.wav|saiwai sampɯrɯ wa, mawaɽi niːkɯɽa de mo irɯɴ daɕi.|95
53
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1a5a3db8/wav/1a5a3db8_1432.wav|sasɯga ni maniawanaiɴ dʑa nai kaɕiɽa.|75
54
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100126/usamin_voice_100126_2_03.wav|kono kagɯ, ɯsa mise de zaiko naiʔte iwaɽeteta jatsɯ!|49
55
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/df6c208e/wav/df6c208e_0983.wav|ima made no sɯzɯɽitɕaɴ daʔtaɽa, doko ka toːmakɯɴ ni eɴrʲo ɕiteta no ni...iʔtai doɯ ɕitaɴ desɯ ka?|78
56
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_02/Kamiya_Hiroshi_02_chunk1592.wav|jokɯ kikɯɕi ne.|13
57
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_cgss/momoka_card_100191/momoka_voice_100191_1_08.wav|boɯhaɴ bɯzaː o çiʔpaʔtaɽa...doɯ naʔte mo ɕiɽimaseɴ wa jo.|11
58
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_X_0035.wav|tonikakɯ tekitoɯ ni sawaʔte mi nasai! modoɕitakɯ naʔtaɽa, deforɯtoʔte toko o oseba, saiɕo no dʑoɯtai ni modorɯkaɽa.|26
59
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk145.wav|iʔɕo da to...|25
60
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/chinshiko/uma/uma_01_comn05_049.wav|moɯ tɕoʔto tɕikaɽa kageɴ wa tsɯjoi hoɯ ga okonomi desɯ ka?|39
61
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk1091.wav|akeʔpiɽoge na jatsɯ da na. kaɽeɕi kanodʑo na no dakaɽa, toɽitatete jamaɕiː tokoɽo ga arɯ wake de mo nai no daga, deɽikaɕiː ni kakerɯʔte ki wa ɕinakɯ mo nai.|3
62
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/anzu/anzu_cgss/anzu_card_101059/anzu_voice_101059_1_08.wav|joso no kaigiɕitsɯ de taɽake sɯgi—? kʲɯɯ ni tsɯɽete kitaɴ dakaɽa, hoaʔkoɯ wa iko.|12
63
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_1646.wav|koko wa hontoɯ ni geːmɯ no naka na no ka, desɯ jo ne.|89
64
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1063.wav|berɯbeʔto no tame o omoʔte sa, kami o kiɽeʔte.|74
65
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_card_300835/nagi_voice_300835_5_01.wav|fɯɽiː maːkeʔto e joɯ koso. nagi ga adobaizaː to naʔte, oneɯtɕiçiɴ kaɽa ɽea na iʔpiɴ made, kazɯkazɯ no aitemɯ o goannai ɕimasɯ. doɯzo eɴrʲo wa nagesɯtete, ana ga akɯ hodo mite iʔte kɯdasai.|46
66
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/917feebd/wav/917feebd_1103.wav|omae, koitsɯ no nakama dʑa nai no ka?|80
67
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_1009.wav|soɽe de oniːtɕaɴ wa? donna kikakɯ o jɯɯdʑite kita no ka naː—? kikasɯte kikasɯte!|85
68
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_1684.wav|honto ka doɯ ka wa ɕiɽanaikedo, daɽe ka ga sonna koto iʔteta joɯ na ki ga sɯrɯ wa.|83
69
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mifune/mifune_cgss/mifune_card_201312/mifune_voice_201312_2_12.wav|tsɯnorɯ omoi. mɯne ga, osaetsɯkeɽaɽerɯ joɯ de...koɽe ga...omoi to iɯ koto?|62
70
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk2217.wav|waɽikomenai.|20
71
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_0441.wav|heː, tsɯmiki de tsɯkɯʔta waɽi ni wa, tawaː ni mienakɯ mo...te, omotɕa de asobɯ na!|92
72
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_2083.wav|maʔtɕi goʔtorɯ ka doɯ ka o kimerɯɴ wa, jɯɯkɯɴ ja nai?|91
73
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk1080.wav|nɯʔte orɯ.|20
74
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/24/24000470.wav|ano ko wa kʲoɯ mo, asoko de, daɽe ka ga jaʔte kɯrɯ no o, maʔte irɯ no kaɕiɽa?|67
75
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_E_0017.wav|soɯ neː...|10
76
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/hsm/hsm_04_002h_002.wav|tɕi, a, ano...tɕi, sato...tɕaː...na, ɴ nani o ɕite...|60
77
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_03/Chiwa_Saito_03_chunk723.wav|bokɯ to sendʑoɯgahaɽa ga tsɯkiaɯ koto ni naʔta çi.|3
78
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_1622.wav|kono jami bɯtoɯ kai de fɯtaɽi ga deaɯ no mo, ɽomedʑɯɽi de odorɯ kata no bɯtoɯ kai de deaɯ no ni kaketerɯɴ da to omoɯ wa.|95
79
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_2145.wav|ohahasaɴ wa sekaiːtɕi no ohahasaɴ da jo. wataɕi wa ɕiawase da jo. hazɯkaɕisa mo wasɯɽete, mitɕi no mannaka de wataɕi wa naita. haha wa, zɯʔto ɕita o mɯita mama daʔta.|85
80
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0069.wav|tonikakɯ, aɽigatoɯ.|90
81
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/004/S004_B_0327.wav|wataɕi nante ɕoɯgaʔkoɯ no koɽo abaɽetetaʔte dake de, madʑime ni naʔta tɕɯɯgakɯ ikoɯ mo netɕinitɕiːwaɽeta wa.|32
82
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/merged_vocals_chunk151.wav|iː desɯ ka? kʲoɯ wa moɯ oɕigoto ɕitɕa dame desɯ.|21
83
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_A_0717.wav|neɽai wa nani?|26
84
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_2331.wav|ɕiʔkaɕi, koɽe de ɕoɯmei saɽeta ne.|98
85
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/07/07004510.wav|okaeɽɽi, fɯtaɽi to mo. kʲoɯ wa tanoɕiː itɕi nitɕi daʔta ka na?|34
86
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_1859.wav|çito ga ɯmaɽete ɕinɯ kɯɽai no dʑikaɴ. soɽe ga naɴ do mo kɯɽikaesɯ kɯɽai nagai dʑikaɴ. wataɕi wa çitoɽi de naite ita.|96
87
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Event/wish_you_happiness/wish_you_happiness_chunk20.wav|ɯɯɴ, iː nioi!|43
88
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1035.wav|hoɽa, iːdʑa nai, dʑoɕi kai!|93
89
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0169.wav|te kageɴ to ka, honto ɕinakɯte iː. tonikakɯ, itɕi do, kao o mizɯ ni tsɯkete minaito, dʑibɯɴ de mo, daidʑoɯbɯ na no ka, mɯɽi na no ka ga, wakannai. de mo, doɯ ɕite mo, kaɽada ga ɯgokanaikaɽa...|71
90
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_0609.wav|kanaɴ tokoɽo ni, zaɕiki waɽaɕi ga irɯʔte. mae ni hanaɕitetadaɽo?|74
91
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk741.wav|so no tame niːnotɕi o sasageta.|17
92
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mio/mio_honda_cgss/mio_honda_card_301058/mio_honda_voice_301058_2_01.wav|mv koɯkai çi ni geɽiɽa koɯkokɯ tɕaʔkɯ! aka to kɯɽo de somarɯ matɕi, soɯkaɴ da neː—!|41
93
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk151.wav|kai ga, keɕite çinitɕidʑoɯ de nakɯnaʔte ɕimaʔte irɯ, sonna dʑibɯɴ ni.|3
94
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk442.wav|hane kawa tsɯbasa.|3
95
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1216.wav|ajako no keɴ mo, itɕioɯ wa katazɯita. kɯɽaianto ni hoɯkokɯ mo ɕita. jarɯbeki koto wa sɯbete oeta.|74
96
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Main_Comyu/Main_Comyu_chunk1.wav|soɕite, soɽe o maibaɴ ki ni ɕiterɯ saihaɴ mo, oçito joɕi ja neː.|43
97
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/cst/cst_04_003h_020.wav|ᵻᵻᵻ sɯgo i!! dʑitɕi ga okɯ de awaɽeterɯ—!|50
98
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/03/03012970.wav|…atoma ga, kokoɽo ga, paŋkɯ ɕi soɯ desɯ, sempai .|58
99
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3328.wav|jɯe ni jahaɽi, pɯɽasɯ arɯfa wa çitsɯjoɯ na no da. soɽe mo, dekirɯ dake kʲoɯrʲokɯ na...aʔta, seigeɴ jondʑɯɯ, joɕi!|72
100
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_cgss/ranko_card_200796/ranko_voice_200796_5_01.wav|soɽa o mitasɯ amata no kagajaki jo. ima koso, waɽeɽa no tɕikaɽa to naɽe! wataɕi no tsɯmɯgɯ senɽitsɯ to, minna no kanaɽerɯ oŋgakɯ wa, nanimono ni mo ɕibaɽanenɯ tɕijɯɯ o egakɯ wa! saː, haŋgʲakɯɕatatɕi jo! çimeta kokoɽo o tokihanate!|14
101
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/01/01011690.wav|otokonoko ga jorɯ no soto de me o samaɕita toki, moɯ maɕiɽo wa doko ni mo inakaʔta.|29
102
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4035.wav|sonna no ɽaʔkanteki sɯgirɯ wa jo. fɯtsɯɯ ni kaŋgaete, tatɕibateki ni mo ɕindʑoɯteki ni mo, kʲoɯrʲokɯ ɕite kɯɽerɯ wake nai wa.|83
103
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0214.wav|akɯjoɯ ɕita honniɴ to sono çigai no toɯdʑiɕa...maː, koko de wa ɕiwake iːɴ kai ni narɯ ka na.|93
104
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0779.wav|harɯnakɯɴ ga wataɕi to onadʑi gaʔkoɯ ni teʔnʲɯɯ ɕita no mo, otɕitɕisaɴ no saɕigane na no?|86
105
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_0648.wav|ɯso de mo iːkaɽa, oiɕiː fɯɽi o ɕi nasai. soɽe de marɯkɯ osamaɽe wa.|95
106
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1614.wav|tɕoʔto matɕi doɯzo! maʔte, maʔte kɯdasai! ima no wa tɕoʔto ɕita dʑoːkɯ desɯ jo! dʑoːkɯ desɯʔteba!|92
107
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0729.wav|po dʑitibɯapeɽia ni gaitoɯ sɯrɯ toɽabɯrɯ ɕɯɯtiŋgɯ wa zembɯ de nidʑɯɯ nana koɯmokɯ koɽeɽa sɯbete o dʑiʔkoɯ ɕimasɯ ka?|69
108
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk1616.wav|atama wa bojaːʔto ɕite irɯɕi.|4
109
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_2318.wav|asaçi wa koɽe kaɽa mo, gɯrɯɯpɯ no itɕiːɴ to ɕite ɯgoite moɽaɯ. wataɕi no hoɯ mo tetsɯdaɯɕi, mizɯho mo tetsɯdaɯ.|94
110
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk2049.wav|bokɯ wa, kani ga.|13
111
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1161.wav|merɯbeʔto, koɽe aɽaʔte kite okɯɽe. ato tekitoɯ ni tabemono mo.|74
112
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/01/01013490.wav|wataɕi no koto o.|29
113
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_3686.wav|zɯʔto gatagata ɯrɯsei no ni koɽe kaɽa hombaɴ ka ɯʔtoɯɕiː sɯ ne.|31
114
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/012/S012_A_0272.wav|geʔkoɯ da.|63
115
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk1288.wav|eja.|4
116
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2816.wav|zɯibɯɴ to omoikomi no hageɕiː taipɯ to mierɯ.|4
117
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1231.wav|aː, mo wakaʔta! tɕikazɯite kawasɯ koto dake sɯɽeba iːɴ da ne!|101
118
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_E_0031.wav|soɯ...|10
119
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_1446.wav|koɽe wa...soɯ, kimotɕi jokɯ naʔte kɯɽeteɽɯɕaɯ ka jo ne. koɯ iɯ kandʑi de sɯrɯto, mikage wa kimotɕiː— no ne, fɯfɯ!!|73
120
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk61.wav|nani mo iwanakɯte mo, kokotɕi iːkaɽa.|5
121
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk448.wav|a, eʔto...|3
122
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/67eeef73/wav/67eeef73_0121.wav|e. tia. wa matɕigaʔte nai. sekaitɕɯɯ no daɽe joɽi, oɽe ga hoɕoɯ sɯrɯ jo.|81
123
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Event/syuuko_lipps_first_event/syuuko_lipps_first_event_chunk6.wav|heː, naɴ kaigai ka mo. kanadetɕaɴʔte, soɯ iɯ koto iɯ çito dʑa nai to omoʔteta.|43
124
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_1498.wav|ɕomboɽi desɯ ne.|68
125
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk1932.wav|tsɯɯgɯnai?|7
126
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_0710.wav|o, okimotɕi dake tɕoɯdai ɕimasɯ?|73
127
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_F_0451.wav|ᵻᵻᵻ.|23
128
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2269.wav|omae wa imada ni keitai deɴwa no adoɽesɯtɕoɯ kinoɯ o tsɯkaenai no ka.|4
129
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1903.wav|haː, omaeʔte eɽoi waɽi ni hetaɽe da jo na.|76
130
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_1956.wav|fɯfɯɴ, soɯ deɕo?|95
131
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk1814.wav|seŋgokɯ, ano kɯɽoːzeʔto, akenaide!|4
132
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/7b3d6f79/wav/7b3d6f79_1017.wav|eɽai eɽai! doɯzo sɯwaʔte, sɯgɯ oçirɯ tsɯkɯrɯkaɽa.|99
133
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/5d68aedf/wav/5d68aedf_0764.wav|kɯonsaɴ wa koe ga oːkiː no de, ɯrɯsai kɯɽai da to omoimasɯkedo?|88
134
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_0802.wav|betsɯ ni, okoʔtenai wa. ɕiʔto mo ɕitenai wa. ɯɽaimoɕikɯ naŋka nai wa.|83
135
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_2240.wav|naniː—? ɯɽeɕiɴ da!|95
136
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_2032.wav|osamɯkɯɴ wa osamɯkɯɴ da jo.|84
137
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1379.wav|ahaha! ɯtɕi no çitotatɕi minna kɯiɕimboɯ dakaɽa, fɯtsɯɯ ni joɽokobi soɯ dakedo ne.|82
138
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/anzu/anzu_cgss/anzu_card_100932/anzu_voice_100932_6_05.wav|koɽe ga anzɯ no gohoɯɕi da!|12
139
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/09/09006550.wav|sajoɯnaɽa.|65
140
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_1718.wav|de mo, oniː ga, koko made hairʲo ɕite ozeɴ date ɕite kɯɽeta no ni, jabo na tsɯʔkomi wa iɽetakɯ naiɕi. daitai, wataɕi dʑiɕiɴ wa, toʔkɯ ni gamaɴ dekinai no de...|85
141
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/puriUBfin-chunk83_DeepFilterNet3.wav|soɯ kaŋgaete ɕimaɯ ɕɯŋkaɴ ga...|21
142
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/a0fd12d7/wav/a0fd12d7_0346.wav|beirɯhaɯdaː, ɕisɯtemɯ ɽiʔpaː, keŋgeɴ dʑɯmbi.|100
143
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/05/05001580.wav|ɯwasa da jo, hontoɯ no koto dʑa nai.|36
144
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk200.wav|to.|4
145
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_02/Sakamoto_Maya_02_chunk1721.wav|kɯtɕi ni sɯrɯ to waɽactɕaɯ joɯ na kanoɯsei de wa arɯga, ano fɯtaɽi wa, naɴ ka soɯ iɯ kʲaɽa da.|17
146
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1568.wav|hai hai, wakaʔta joɯ. kono mama ɕinaɽetɕimaʔtaɽa mezame ga warɯiɕi ne.|74
147
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0055.wav|rɯʔkɯmaɴ wa, gendʑitsɯ no kɯɯgaɴ ga ɽogɯ iɴ ɕite irɯ wake de wa aɽimaseɴ.|69
148
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/frederica/fredrica_cgss/fredrica_card_101005/fredrica_voice_101005_2_05.wav|fɯtaɽi de kɯrɯɯdʑiŋgɯ sɯrɯ ni wa, iɽoiɽo çitsɯjoɯ da jo neː! gambaʔte!|44
149
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1136.wav|iː hoɯhoɯ ga arɯ wa! iʔso kokɯsaɴ ni kaimei ɕi ɕoɯto ka!|92
150
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/bahakora/bahamoot_44.1khz/bahamoot_44.1khz_chunk31.wav|fesaːforɯkɯ naɽa, takai marʲokɯ o moʔte irɯ hazɯ...desɯʔte?|19
151
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/frederica/frederica_mobamas/frederica_split/3_fred__0003_(Vocals)/3_fred__0003_(Vocals)_chunk3.wav|sɯki na sɯpoːtsɯ?|44
152
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1a5a3db8/wav/1a5a3db8_0073.wav|mezase! dʑoɯinʲɯɯɕoɯ!|75
153
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1463.wav|soɯ iɯ, ofɯzaketa metaɕiteɴ wa tomokakɯ to ɕite.|20
154
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/009/S009_A_0056.wav|soɽe ni koŋkai wa ɽenna mo onadʑi joɯ ni ɯgoitete, ɕiɽaberɯ no wa ɽakɯ deɕita. aitsɯ no ato o oeba iːɴ de.|40
155
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/karen_cgss/karen_cgss_card_201336/karen_cgss_voice_201336_1_03.wav|fɯtaɽi de wakeaʔte odorɯɴ da. negai mo itami mo, takanarɯ mɯne no oto to, kaɽada no netsɯ mo.|5
156
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_chara_309/nagi_voice_309_4_04.wav|dekirɯ joɯ ni naʔta koto ga ɕikakɯteki ni hjoɯgeɴ saɽete imasɯ ne.|46
157
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04011350.wav|onaka ga sɯkɯto ne, doɯ ɕite da ka atama no naka ga boɴ'jaɽi ɕitɕaʔte, maigo ga tsɯjokɯ naʔtɕaɯ jo.|37
158
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_200210/minami_voice_200210_2_11.wav|pɯɽodʲɯɯsaːsaɴ no kotoba ga...nani joɽi no kɯɴɕoɯ desɯ!|16
159
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk2014.wav|oto mo ɕinaiɕi, hokoɽi mo mawa nai.|13
160
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_2616.wav|kʲoɯ kaɽa wataɕi mo iː? iʔɕo ni.|82
161
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1447.wav|sempai, saikiɴ ɕitsɯmoɴ ni kɯrɯ koto heɽimaɕita ne.|76
162
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0953.wav|koibito ni naɽitaiʔte imi dʑa nakɯte ne.|86
163
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/012/S012_A_0273.wav|iɕa wa hada ni awamɯ.|63
164
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1430.wav|ɯ ɽɯ sa i! bambandʑi ni sɯrɯ wa joː!!|92
165
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_cgss/ranko_card_200073/ranko_voice_200073_2_06.wav|baɽa no aʔtɕi mo, pɯɽodʲɯɯsaː to kɯgɯʔta moɴ!|14
166
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_1486.wav|idai na sendʑiɴ ni kaɴɕa desɯga, koɯdai no temmoɴ bɯ no tame ni, semete tentai boɯeŋkʲoɯ kɯɽai wa nokoɕite hoɕikaʔta desɯ ne.|68
167
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/EP6.wav|ɽai neɴ no toɕikoɕi mo...ie, koɽe wa mata kondo. ima wa kono ɕɯŋkaɴ o tanoɕimimaɕoɯ|21
168
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0326.wav|koɽe kaɽa wa, oneːtɕaɴ ga hazɯki no ɕaɕiɴ, toʔte agerɯ.|86
169
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/arisu/arisu_mobamasu/11_arisu__0012_(Vocals)/11_arisu__0012_(Vocals)_chunk113.wav|ikimasɯ jo! adobaisɯ desɯ ka, pɯɽodʲɯɯsaːsaɴ...|18
170
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2518.wav|kʲoɯ ga noʔta no ka, tsɯzɯkezama ni, kondo wadeka bɯrɯɯ no kime zeɽifɯ o çiɽoɯ sɯrɯ kaɽe.|4
171
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk2339.wav|naʔtokɯ ɕita fɯɯ no hanekawa daʔta.|3
172
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/917feebd/wav/917feebd_2714.wav|saihate no soɽa...soɽe koso ga, sɯbete ga tsɯi erɯ soɽa...tsɯi no soɽa!|80
173
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_0167.wav|soɯ ka mo ɕiɽemaseŋkedo, iʔtai nani o ioɯ to ɕite irɯɴ desɯ ka?|76
174
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_0520.wav|geiɴ no hoɯ wa wakaɽimaseŋga, ɕiːna wa erɯfʲɴ to ɕite kakɯsei ɕite ɕimaʔta joɯ desɯ.|77
175
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/saite_jewel_kanade/saite_jewel_kanade_chunk32.wav|sɯki na çito o ɽikai ɕitai no naɽa, sɯki ni naʔte wa ikenai nante. kanaɕiː wa jo ne.|19
176
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/anzu/anzu_cgss/anzu_card_100652/anzu_voice_100652_6_06.wav|kiɽiːtsɯ! ɽeː! ojasɯmi...|12
177
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/32-christmas.wav|ivɯtɕaɴ no, daɽe ka no tame niːʔɕoɯ kemmei de atatakai tokoɽo, hajatetɕaɴ no geɴʔte ita toːɽi, soɽe o ikasɯ koto ga dekiɽeba, kɯɽisɯmasɯerwaibɯiːiː mo, kiʔto sɯteki na mono ni narɯ to kandʑimaɕita.|21
178
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1a5a3db8/wav/1a5a3db8_0535.wav|soko no fɯtaɽi wa saʔki kaɽa naɴ jaʔterɯ wake?|75
179
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/12/12005830.wav|moɯ çitotsɯ dake, nokoʔterɯ. jarɯbeki koto ga nokoʔterɯ.|9
180
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0015/chie_mobamasu_0015_chunk241.wav|çitsɯjoɯ naɴ desɯ jo ne. pɯɽodʲɯɯsaːsaɴ mitai ni.|25
181
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_1707.wav|iʔɕoɯ kemmei, daɽadaɽa ɕite jaɽimasɯ to mo.|98
182
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_0978.wav|kaɽaʔpo...desɯ ne.|89
183
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/7b3d6f79/wav/7b3d6f79_0318.wav|fɯfɯfɯ! soɽe taɽa, naitɕoɯ ka mo ɕiɽenaikedo ne.|99
184
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_03/Horie_Yui_03_chunk68.wav|aidʲa to ka haʔsoɯ to kaʔte iɯ no wa, iʔɕɯɴ no çibana, iwajɯrɯ sɯpaːkɯ de ɕika naiɴ da jo.|0
185
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk731.wav|sonnaa ta ma ga aɽeba, doɽe dake sɯbaɽaɕiː ka, to iɯ hanaɕi da.|20
186
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200580/kanade_voice_200580_2_01.wav|kaedesaɴ to tamani ɽinaː niːkɯ no. kondo, pɯɽodʲɯɯsaːsaɴ mo...doɯ?|19
187
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201271/kanade_voice_201271_2_01.wav|animarɯ seɽapiː, koɯka arɯ wa. ɯtɕi ni wa kitsɯne mo irɯɕi, joɽidoɽi midoɽi ne.|19
188
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0010/shiburin_mobamasu_0010_chunk4.wav|wataɕi wa...|54
189
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_2019.wav|betsɯ ni, moɯ tɕoʔto asobitai dake.|95
190
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/67eeef73/wav/67eeef73_1395.wav|sono toːɽi. toɯtotsɯ de omae mo komaʔte irɯdaɽoɯga, kiŋkʲɯɯ ni kaiseki o onegai ɕitai mono ga arɯ.|81
191
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_0820.wav|fɯfɯʔ, moɯ iɯ kaɴ. soɯ iɯ onna koɽoɕi no ɕiɽifɯ wa, hoka no ko niːɯtaɽa akaɴ jo.|91
192
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk1394.wav|mɯɕiɽo ɕendʑoɯgahaɽa wa, dʑibɯɴ wa moɯ daidʑoɯbɯ da to tɕitɕioja ni oɕietakɯʔte, kono tabi, hatsɯ deːto da to iɯ no ni, ɕendʑoɯgahaɽa tɕitɕi ni doɯhaɴ o negaʔta no de wa naidaɽoɯ ka.|3
193
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mio/mio_honda_cgss/mio_honda_card_301057/mio_honda_voice_301057_1_03.wav|ta i joɯ mitai ni, egao de,ʔte. itsɯ no aida ni ka, kɯtɕigɯse ni naʔteta no ka naː—?|41
194
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk1209.wav|sɯkɯnakɯtomo bokɯ no ie no geŋkaɴ, soɽe ni tsɯkiçi no nikɯtai o hakai ɕita toki no ano tegiwa wa, bokɯ ga koɽe made mite kita dono kaiː ni mo çike o toɽanai.|17
195
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[ハイブリッドエッジ]塩見周子【ボイス集】 - Niconico Video/【モバマス】[ハイブリッドエッジ]塩見周子【ボイス集】 - Niconico Video_chunk34.wav|wataɕi no koe.|43
196
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk10.wav|mʲoɯ ni kiʔpaɽi ɕigasa wa iʔta.|0
197
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0887.wav|kimitatɕi wa jowai çito, komaʔta çito o tasɯkerɯ no ga ɕindʑoɯ daʔtaɴ dʑa nai ka na?|93
198
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/hsm/hsm_06_003h_038.wav|ᵻᵻᵻ.|60
199
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_2431.wav|ɕɯɯkʲoɯteki na koto wa ki ni ɕinakɯte iːɴ dʑa nai ka? wataɕi wa mɯɕiɴ ɽoɴɕa daɕi na.|94
200
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1967.wav|kaimono no oːdaː wa kampeki deɕita.ʔpeɽia wa ɕiʔkaɽi oboetemaɕita.|69
201
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_F_0360.wav|ɯ, ɯrɯseː! tɕoʔto ɕiawase kaiɽo ga boɯsoɯ ɕiterɯ dake da!|23
202
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kande5Comyus/Kande5Comyus_chunk32.wav|sonna ɯso, moɯ naɽetɕaʔta.|19
203
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_201092/minami_voice_201092_6_02.wav|kiboɯ no tsɯbomi o, minoɽasemaɕoɯ!|16
204
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/010/S010_B_0116.wav|oɽe tɕi wa, aitsɯ o datɕi da to omoiteːkedo jo. ɽennasama no tame ni naɽaneːnaɽa, sokɯ, kirɯ tsɯmoɽi da ze.|47
205
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_0900.wav|desɯga, neɴ no tame, kakɯniɴ o onegai ɕimasɯ. wataɕi mo, seŋkawa sensei to iʔɕo ni, tɕikakɯ o sagaɕimasɯkaɽa.|77
206
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_1010.wav|soko kaɽa enerɯgiː o tomonaʔta rɯɯɴ o haʔsei sase, dʑizai ni ajatsɯrɯ koto made kanoɯ to wa omoemaseɴ.|77
207
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0305.wav|jɯɯtɕaɴ to itsɯ de mo iʔɕo niːtai no ni! wataɕi no koto sakete çidoi joː—!|93
208
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/miku/miku_cgss/miku_card_101176/miku_voice_101176_6_04.wav|meɽiː! kɯɽisɯnʲɯɯsɯ!|59
209
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1734.wav|na! çiʔto! ima no kanzeɴ ni ɕite kɯwaɽe ja! soɕi omoɴ ja de!|91
210
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0016/chie_mobamasu_0016_chunk233.wav|ɕiawase ni, naɽetaɽa, iː na.|25
211
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk371.wav|na no ka mo ɕiɽenaiga.|3
212
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/03/03012570.wav|fɯtaɽi ni, onamae o okɯʔte mo iː?|58
213
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1886.wav|sono kawaɽi ɕiʔnʲɯɯ bɯɴ'iɴ wa ne iʔpatsɯgei o çiɽoɯ ɕinakʲa ikenai no.|93
214
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk358.wav|sonna wake dakaɽa, kaiwa mo, bimʲoɯ ni sagɯɽisagɯɽi daʔta.|17
215
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_2312.wav|kanzeɴ ni keɕisaʔtaʔte koto desɯ ka?|82
216
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4345.wav|nete iː wa jo. ojasɯmi.|83
217
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0585.wav|gomeɴ ne, wataɕi, moʔto ɯmakɯ jaɽete iɽeba...|82
218
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/30-Main_Commyu.wav|saikiɴ, totemo ɽakɯ ɕikaʔtako to ga—ʔta no, wasɯɽete imaɕita.....naɴ da to omoimasɯ?|21
219
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_0449.wav|anta saː, mae joɽi kaɽada oːkikɯ naʔterɯɴ dakaɽa saː, ki o tsɯkete jo. sasae kiɽenaiʔteba.|98
220
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_B_0168.wav|narɯhodo.|23
221
+ /home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_2152.wav|jakamaɕiː! waɽewaɽe wa baːmɯkɯɯheɴ o saigeɴ sɯrɯ no niːsogaɕiː no da! kisama no joɯ na bimboɯniɴ no iɽai ni kamaʔte irɯ jojɯɯ wa nai.|102
222
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2518.wav|ija, madʑ de odoɽoita.|4
223
+ /home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/cst/cst_099_bgv_047.wav|ᵻᵻᵻ.|50
stylekan/Demo/Inference_LJSpeech.ipynb ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "9adb7bd1",
6
+ "metadata": {},
7
+ "source": [
8
+ "# StyleTTS 2 Demo (LJSpeech)\n"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "6108384d",
14
+ "metadata": {},
15
+ "source": [
16
+ "### Utils"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "id": "96e173bf",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "import torch\n",
27
+ "torch.manual_seed(0)\n",
28
+ "torch.backends.cudnn.benchmark = False\n",
29
+ "torch.backends.cudnn.deterministic = True\n",
30
+ "\n",
31
+ "import random\n",
32
+ "random.seed(0)\n",
33
+ "\n",
34
+ "import numpy as np\n",
35
+ "np.random.seed(0)"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "id": "da84c60f",
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "%cd .."
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": null,
51
+ "id": "5a3ddcc8",
52
+ "metadata": {},
53
+ "outputs": [],
54
+ "source": [
55
+ "# load packages\n",
56
+ "import time\n",
57
+ "import random\n",
58
+ "import yaml\n",
59
+ "from munch import Munch\n",
60
+ "import numpy as np\n",
61
+ "import torch\n",
62
+ "from torch import nn\n",
63
+ "import torch.nn.functional as F\n",
64
+ "import torchaudio\n",
65
+ "import librosa\n",
66
+ "from nltk.tokenize import word_tokenize\n",
67
+ "\n",
68
+ "from models import *\n",
69
+ "from utils import *\n",
70
+ "from text_utils import TextCleaner\n",
71
+ "textclenaer = TextCleaner()\n",
72
+ "\n",
73
+ "%matplotlib inline"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "id": "0229c7f8",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": []
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "id": "bbdc04c0",
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "device = 'cuda' if torch.cuda.is_available() else 'cpu'"
92
+ ]
93
+ },
94
+ {
95
+ "cell_type": "code",
96
+ "execution_count": null,
97
+ "id": "00ee05e1",
98
+ "metadata": {},
99
+ "outputs": [],
100
+ "source": [
101
+ "to_mel = torchaudio.transforms.MelSpectrogram(\n",
102
+ " n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
103
+ "mean, std = -4, 4\n",
104
+ "\n",
105
+ "def length_to_mask(lengths):\n",
106
+ " mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
107
+ " mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
108
+ " return mask\n",
109
+ "\n",
110
+ "def preprocess(wave):\n",
111
+ " wave_tensor = torch.from_numpy(wave).float()\n",
112
+ " mel_tensor = to_mel(wave_tensor)\n",
113
+ " mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
114
+ " return mel_tensor\n",
115
+ "\n",
116
+ "def compute_style(ref_dicts):\n",
117
+ " reference_embeddings = {}\n",
118
+ " for key, path in ref_dicts.items():\n",
119
+ " wave, sr = librosa.load(path, sr=24000)\n",
120
+ " audio, index = librosa.effects.trim(wave, top_db=30)\n",
121
+ " if sr != 24000:\n",
122
+ " audio = librosa.resample(audio, sr, 24000)\n",
123
+ " mel_tensor = preprocess(audio).to(device)\n",
124
+ "\n",
125
+ " with torch.no_grad():\n",
126
+ " ref = model.style_encoder(mel_tensor.unsqueeze(1))\n",
127
+ " reference_embeddings[key] = (ref.squeeze(1), audio)\n",
128
+ " \n",
129
+ " return reference_embeddings"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "markdown",
134
+ "id": "7b9cecbe",
135
+ "metadata": {},
136
+ "source": [
137
+ "### Load models"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": null,
143
+ "id": "64fc4c0f",
144
+ "metadata": {},
145
+ "outputs": [],
146
+ "source": [
147
+ "# load phonemizer\n",
148
+ "import phonemizer\n",
149
+ "global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": null,
155
+ "id": "48e7b644",
156
+ "metadata": {},
157
+ "outputs": [],
158
+ "source": [
159
+ "config = yaml.safe_load(open(\"Models/LJSpeech/config.yml\"))\n",
160
+ "\n",
161
+ "# load pretrained ASR model\n",
162
+ "ASR_config = config.get('ASR_config', False)\n",
163
+ "ASR_path = config.get('ASR_path', False)\n",
164
+ "text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
165
+ "\n",
166
+ "# load pretrained F0 model\n",
167
+ "F0_path = config.get('F0_path', False)\n",
168
+ "pitch_extractor = load_F0_models(F0_path)\n",
169
+ "\n",
170
+ "# load BERT model\n",
171
+ "from Utils.PLBERT.util import load_plbert\n",
172
+ "BERT_path = config.get('PLBERT_dir', False)\n",
173
+ "plbert = load_plbert(BERT_path)"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": null,
179
+ "id": "ffc18cf7",
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)\n",
184
+ "_ = [model[key].eval() for key in model]\n",
185
+ "_ = [model[key].to(device) for key in model]"
186
+ ]
187
+ },
188
+ {
189
+ "cell_type": "code",
190
+ "execution_count": null,
191
+ "id": "64529d5c",
192
+ "metadata": {},
193
+ "outputs": [],
194
+ "source": [
195
+ "params_whole = torch.load(\"Models/LJSpeech/epoch_2nd_00100.pth\", map_location='cpu')\n",
196
+ "params = params_whole['net']"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": null,
202
+ "id": "895d9706",
203
+ "metadata": {},
204
+ "outputs": [],
205
+ "source": [
206
+ "for key in model:\n",
207
+ " if key in params:\n",
208
+ " print('%s loaded' % key)\n",
209
+ " try:\n",
210
+ " model[key].load_state_dict(params[key])\n",
211
+ " except:\n",
212
+ " from collections import OrderedDict\n",
213
+ " state_dict = params[key]\n",
214
+ " new_state_dict = OrderedDict()\n",
215
+ " for k, v in state_dict.items():\n",
216
+ " name = k[7:] # remove `module.`\n",
217
+ " new_state_dict[name] = v\n",
218
+ " # load params\n",
219
+ " model[key].load_state_dict(new_state_dict, strict=False)\n",
220
+ "# except:\n",
221
+ "# _load(params[key], model[key])\n",
222
+ "_ = [model[key].eval() for key in model]"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": null,
228
+ "id": "c1a59db2",
229
+ "metadata": {},
230
+ "outputs": [],
231
+ "source": [
232
+ "from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": null,
238
+ "id": "e30985ab",
239
+ "metadata": {},
240
+ "outputs": [],
241
+ "source": [
242
+ "sampler = DiffusionSampler(\n",
243
+ " model.diffusion.diffusion,\n",
244
+ " sampler=ADPM2Sampler(),\n",
245
+ " sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
246
+ " clamp=False\n",
247
+ ")"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "markdown",
252
+ "id": "b803110e",
253
+ "metadata": {},
254
+ "source": [
255
+ "### Synthesize speech"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": null,
261
+ "id": "24655f46",
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": [
265
+ "# synthesize a text\n",
266
+ "text = ''' StyleTTS 2 is a text-to-speech model that leverages style diffusion and adversarial training with large speech language models to achieve human-level text-to-speech synthesis. '''"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": null,
272
+ "id": "ca57469c",
273
+ "metadata": {},
274
+ "outputs": [],
275
+ "source": [
276
+ "def inference(text, noise, diffusion_steps=5, embedding_scale=1):\n",
277
+ " text = text.strip()\n",
278
+ " text = text.replace('\"', '')\n",
279
+ " ps = global_phonemizer.phonemize([text])\n",
280
+ " ps = word_tokenize(ps[0])\n",
281
+ " ps = ' '.join(ps)\n",
282
+ "\n",
283
+ " tokens = textclenaer(ps)\n",
284
+ " tokens.insert(0, 0)\n",
285
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
286
+ " \n",
287
+ " with torch.no_grad():\n",
288
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
289
+ " text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
290
+ "\n",
291
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
292
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
293
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2) \n",
294
+ "\n",
295
+ " s_pred = sampler(noise, \n",
296
+ " embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
297
+ " embedding_scale=embedding_scale).squeeze(0)\n",
298
+ "\n",
299
+ " s = s_pred[:, 128:]\n",
300
+ " ref = s_pred[:, :128]\n",
301
+ "\n",
302
+ " d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
303
+ "\n",
304
+ " x, _ = model.predictor.lstm(d)\n",
305
+ " duration = model.predictor.duration_proj(x)\n",
306
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
307
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
308
+ "\n",
309
+ " pred_dur[-1] += 5\n",
310
+ "\n",
311
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
312
+ " c_frame = 0\n",
313
+ " for i in range(pred_aln_trg.size(0)):\n",
314
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
315
+ " c_frame += int(pred_dur[i].data)\n",
316
+ "\n",
317
+ " # encode prosody\n",
318
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
319
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
320
+ " out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)), \n",
321
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
322
+ " \n",
323
+ " return out.squeeze().cpu().numpy()"
324
+ ]
325
+ },
326
+ {
327
+ "cell_type": "markdown",
328
+ "id": "d438ef4f",
329
+ "metadata": {},
330
+ "source": [
331
+ "#### Basic synthesis (5 diffusion steps)"
332
+ ]
333
+ },
334
+ {
335
+ "cell_type": "code",
336
+ "execution_count": null,
337
+ "id": "d3d7f7d5",
338
+ "metadata": {
339
+ "scrolled": true
340
+ },
341
+ "outputs": [],
342
+ "source": [
343
+ "start = time.time()\n",
344
+ "noise = torch.randn(1,1,256).to(device)\n",
345
+ "wav = inference(text, noise, diffusion_steps=5, embedding_scale=1)\n",
346
+ "rtf = (time.time() - start) / (len(wav) / 24000)\n",
347
+ "print(f\"RTF = {rtf:5f}\")\n",
348
+ "import IPython.display as ipd\n",
349
+ "display(ipd.Audio(wav, rate=24000))"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "markdown",
354
+ "id": "2d5d9df0",
355
+ "metadata": {},
356
+ "source": [
357
+ "#### With higher diffusion steps (more diverse)\n",
358
+ "Since the sampler is ancestral, the higher the stpes, the more diverse the samples are, with the cost of slower synthesis speed."
359
+ ]
360
+ },
361
+ {
362
+ "cell_type": "code",
363
+ "execution_count": null,
364
+ "id": "a10129fd",
365
+ "metadata": {},
366
+ "outputs": [],
367
+ "source": [
368
+ "start = time.time()\n",
369
+ "noise = torch.randn(1,1,256).to(device)\n",
370
+ "wav = inference(text, noise, diffusion_steps=10, embedding_scale=1)\n",
371
+ "rtf = (time.time() - start) / (len(wav) / 24000)\n",
372
+ "print(f\"RTF = {rtf:5f}\")\n",
373
+ "import IPython.display as ipd\n",
374
+ "display(ipd.Audio(wav, rate=24000))"
375
+ ]
376
+ },
377
+ {
378
+ "cell_type": "markdown",
379
+ "id": "1877ea15",
380
+ "metadata": {},
381
+ "source": [
382
+ "### Speech expressiveness\n",
383
+ "The following section recreates the samples shown in [Section 6](https://styletts2.github.io/#emo) of the demo page."
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "markdown",
388
+ "id": "4c4777b7",
389
+ "metadata": {},
390
+ "source": [
391
+ "#### With embedding_scale=1\n",
392
+ "This is the classifier-free guidance scale. The higher the scale, the more conditional the style is to the input text and hence more emotional. "
393
+ ]
394
+ },
395
+ {
396
+ "cell_type": "code",
397
+ "execution_count": null,
398
+ "id": "c29ea2f0",
399
+ "metadata": {},
400
+ "outputs": [],
401
+ "source": [
402
+ "texts = {}\n",
403
+ "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
404
+ "texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
405
+ "texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
406
+ "texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
407
+ "\n",
408
+ "for k,v in texts.items():\n",
409
+ " noise = torch.randn(1,1,256).to(device)\n",
410
+ " wav = inference(v, noise, diffusion_steps=10, embedding_scale=1)\n",
411
+ " print(k + \": \")\n",
412
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "markdown",
417
+ "id": "3c89499f",
418
+ "metadata": {},
419
+ "source": [
420
+ "#### With embedding_scale=2"
421
+ ]
422
+ },
423
+ {
424
+ "cell_type": "code",
425
+ "execution_count": null,
426
+ "id": "f73be3aa",
427
+ "metadata": {},
428
+ "outputs": [],
429
+ "source": [
430
+ "texts = {}\n",
431
+ "texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
432
+ "texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
433
+ "texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
434
+ "texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
435
+ "\n",
436
+ "for k,v in texts.items():\n",
437
+ " noise = torch.randn(1,1,256).to(device)\n",
438
+ " wav = inference(v, noise, diffusion_steps=10, embedding_scale=2) # embedding_scale=2 for more pronounced emotion\n",
439
+ " print(k + \": \")\n",
440
+ " display(ipd.Audio(wav, rate=24000, normalize=False))"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "markdown",
445
+ "id": "9320da63",
446
+ "metadata": {},
447
+ "source": [
448
+ "### Long-form generation\n",
449
+ "This section includes basic implementation of Algorithm 1 in the paper for consistent longform audio generation. The example passage is taken from [Section 5](https://styletts2.github.io/#long) of the demo page. "
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": null,
455
+ "id": "cdd4db51",
456
+ "metadata": {},
457
+ "outputs": [],
458
+ "source": [
459
+ "passage = '''If the supply of fruit is greater than the family needs, it may be made a source of income by sending the fresh fruit to the market if there is one near enough, or by preserving, canning, and making jelly for sale. To make such an enterprise a success the fruit and work must be first class. There is magic in the word \"Homemade,\" when the product appeals to the eye and the palate; but many careless and incompetent people have found to their sorrow that this word has not magic enough to float inferior goods on the market. As a rule large canning and preserving establishments are clean and have the best appliances, and they employ chemists and skilled labor. The home product must be very good to compete with the attractive goods that are sent out from such establishments. Yet for first-class homemade products there is a market in all large cities. All first-class grocers have customers who purchase such goods.'''"
460
+ ]
461
+ },
462
+ {
463
+ "cell_type": "code",
464
+ "execution_count": null,
465
+ "id": "ebb941c8",
466
+ "metadata": {},
467
+ "outputs": [],
468
+ "source": [
469
+ "def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):\n",
470
+ " text = text.strip()\n",
471
+ " text = text.replace('\"', '')\n",
472
+ " ps = global_phonemizer.phonemize([text])\n",
473
+ " ps = word_tokenize(ps[0])\n",
474
+ " ps = ' '.join(ps)\n",
475
+ "\n",
476
+ " tokens = textclenaer(ps)\n",
477
+ " tokens.insert(0, 0)\n",
478
+ " tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
479
+ " \n",
480
+ " with torch.no_grad():\n",
481
+ " input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
482
+ " text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
483
+ "\n",
484
+ " t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
485
+ " bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
486
+ " d_en = model.bert_encoder(bert_dur).transpose(-1, -2) \n",
487
+ "\n",
488
+ " s_pred = sampler(noise, \n",
489
+ " embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
490
+ " embedding_scale=embedding_scale).squeeze(0)\n",
491
+ " \n",
492
+ " if s_prev is not None:\n",
493
+ " # convex combination of previous and current style\n",
494
+ " s_pred = alpha * s_prev + (1 - alpha) * s_pred\n",
495
+ " \n",
496
+ " s = s_pred[:, 128:]\n",
497
+ " ref = s_pred[:, :128]\n",
498
+ "\n",
499
+ " d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
500
+ "\n",
501
+ " x, _ = model.predictor.lstm(d)\n",
502
+ " duration = model.predictor.duration_proj(x)\n",
503
+ " duration = torch.sigmoid(duration).sum(axis=-1)\n",
504
+ " pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
505
+ "\n",
506
+ " pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
507
+ " c_frame = 0\n",
508
+ " for i in range(pred_aln_trg.size(0)):\n",
509
+ " pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
510
+ " c_frame += int(pred_dur[i].data)\n",
511
+ "\n",
512
+ " # encode prosody\n",
513
+ " en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
514
+ " F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
515
+ " out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)), \n",
516
+ " F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
517
+ " \n",
518
+ " return out.squeeze().cpu().numpy(), s_pred"
519
+ ]
520
+ },
521
+ {
522
+ "cell_type": "code",
523
+ "execution_count": null,
524
+ "id": "7ca0ef2e",
525
+ "metadata": {},
526
+ "outputs": [],
527
+ "source": [
528
+ "sentences = passage.split('.') # simple split by comma\n",
529
+ "wavs = []\n",
530
+ "s_prev = None\n",
531
+ "for text in sentences:\n",
532
+ " if text.strip() == \"\": continue\n",
533
+ " text += '.' # add it back\n",
534
+ " noise = torch.randn(1,1,256).to(device)\n",
535
+ " wav, s_prev = LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=10, embedding_scale=1.5)\n",
536
+ " wavs.append(wav)\n",
537
+ "display(ipd.Audio(np.concatenate(wavs), rate=24000, normalize=False))"
538
+ ]
539
+ }
540
+ ],
541
+ "metadata": {
542
+ "kernelspec": {
543
+ "display_name": "NLP",
544
+ "language": "python",
545
+ "name": "nlp"
546
+ },
547
+ "language_info": {
548
+ "codemirror_mode": {
549
+ "name": "ipython",
550
+ "version": 3
551
+ },
552
+ "file_extension": ".py",
553
+ "mimetype": "text/x-python",
554
+ "name": "python",
555
+ "nbconvert_exporter": "python",
556
+ "pygments_lexer": "ipython3",
557
+ "version": "3.9.7"
558
+ }
559
+ },
560
+ "nbformat": 4,
561
+ "nbformat_minor": 5
562
+ }
stylekan/Demo/Inference_LibriTTS.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4a3308a129ac51e5e9e538817a0dfff3e1ab6d11bcf3918abe60c97e895a7cd
3
+ size 20886216
stylekan/Demo/infer_24khz.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:608ae17929cbf62af09f4f131ee6dfa60f1dca4683c7353fb4f91fa40fa3ffa2
3
+ size 26976110
stylekan/Demo/syuko_style_vectors.csv ADDED
The diff for this file is too large to render. See raw diff
 
stylekan/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Aaron (Yinghao) Li
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
stylekan/Models/Style_Kanade/2nd_phase_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22cfd8dcaebcc012708c21999546f93ca5ddb6b305a75a1002ce2c60aa820f04
3
+ size 2586719838
stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c94e26293c1722ad7acf82e25de4e225f0dfa832edaab88559acf481eba1bac
3
+ size 2049397288
stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00002.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78278bbaa7ccdf292985c8656dc543f3772d6d52789ed2d95c5b0cbaf86e362c
3
+ size 2049397288
stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00004.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c47ad6a6a952ef7fd7642ec1e57ed278379d77668157c30bd798fa483efcf2e
3
+ size 2049397288
stylekan/Models/Style_Kanade/config_kanade.yml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {ASR_config: /home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml, ASR_path: /home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth,
2
+ F0_path: /home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7, PLBERT_dir: Utils/PLBERT/,
3
+ batch_size: 49, data_params: {OOD_data: /home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv,
4
+ min_length: 50, root_path: '', train_data: /home/austin/disk2/llmvcs/tt/stylekan/Data/filtered_train_list.csv,
5
+ val_data: /home/austin/disk2/llmvcs/tt/stylekan/Data/mg_valid.txt}, device: cuda,
6
+ epochs_1st: 25, epochs_2nd: 15, first_stage_path: /home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/epoch_1st_00013.pth,
7
+ load_only_params: false, log_dir: Models/Style_Kanade, log_interval: 10, loss_params: {
8
+ TMA_epoch: 9, diff_epoch: 2, joint_epoch: 2, lambda_F0: 1.0, lambda_ce: 20.0,
9
+ lambda_diff: 1.0, lambda_dur: 1.0, lambda_gen: 1.0, lambda_mel: 10.0, lambda_mono: 1.0,
10
+ lambda_norm: 1.0, lambda_s2s: 1.0, lambda_slm: 1.0, lambda_sty: 1.0}, max_len: 560,
11
+ model_params: {decoder: {gen_istft_hop_size: 5, gen_istft_n_fft: 20, resblock_dilation_sizes: [
12
+ [1, 3, 5], [1, 3, 5], [1, 3, 5]], resblock_kernel_sizes: [3, 7, 11], type: istftnet,
13
+ upsample_initial_channel: 512, upsample_kernel_sizes: [20, 12], upsample_rates: [
14
+ 10, 6]}, diffusion: {dist: {estimate_sigma_data: true, mean: -3.0, sigma_data: 0.2782753203153678,
15
+ std: 1.0}, embedding_mask_proba: 0.1, transformer: {head_features: 64, multiplier: 2,
16
+ num_heads: 8, num_layers: 3}}, dim_in: 64, dropout: 0.2, hidden_dim: 512,
17
+ max_conv_dim: 512, max_dur: 50, multispeaker: true, n_layer: 3, n_mels: 80, n_token: 178,
18
+ slm: {hidden: 1280, initial_channel: 64, model: Respair/Whisper_Large_v2_Encoder_Block,
19
+ nlayers: 33, sr: 16000}, sr: 24000, style_dim: 128}, optimizer_params: {bert_lr: 1.0e-05,
20
+ ft_lr: 1.0e-05, lr: 0.0001}, preprocess_params: {spect_params: {hop_length: 300,
21
+ n_fft: 2048, win_length: 1200}, sr: 24000}, pretrained_model: /home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth,
22
+ save_freq: 1, second_stage_load_pretrained: true, slmadv_params: {batch_percentage: 0.5,
23
+ iter: 20, max_len: 500, min_len: 400, scale: 0.01, sig: 1.5, thresh: 5}}
stylekan/Models/Style_Kanade/epoch_1st_00013.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ad73479a2a1dd8b630376dd8d6b9ad81bc24da34517c45a15c7dd235cc3110
3
+ size 1918457960
stylekan/Models/Style_Kanade/epoch_2nd_00000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f10185d1bf51609155699f9e53b8b887c422e9cf7275270994db41ddf382bb
3
+ size 1515513256