Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +18 -0
- stylekan/Colab/StyleTTS2_Demo_LJSpeech.ipynb +486 -0
- stylekan/Colab/StyleTTS2_Demo_LibriTTS.ipynb +1218 -0
- stylekan/Colab/StyleTTS2_Finetune_Demo.ipynb +480 -0
- stylekan/Configs/config.yml +116 -0
- stylekan/Configs/config_ft.yml +123 -0
- stylekan/Configs/config_ft_kaede.yml +122 -0
- stylekan/Configs/config_kanade.yml +121 -0
- stylekan/Configs/config_kanade_test.yml +124 -0
- stylekan/Data/OOD_LargeScale_.csv +3 -0
- stylekan/Data/filtered_train_list.csv +3 -0
- stylekan/Data/metadata_cleanest/48khz_config_with_names_ids.csv +3 -0
- stylekan/Data/metadata_cleanest/FT_imas copy.csv +50 -0
- stylekan/Data/metadata_cleanest/FT_imas.csv +0 -0
- stylekan/Data/metadata_cleanest/FT_imas_remapped.csv +3 -0
- stylekan/Data/metadata_cleanest/FT_imas_valid.csv +131 -0
- stylekan/Data/metadata_cleanest/FT_imas_valid_less_than_20sec.csv +126 -0
- stylekan/Data/metadata_cleanest/FT_imas_valid_more_than_10sec.csv +17 -0
- stylekan/Data/metadata_cleanest/FT_saori.csv +0 -0
- stylekan/Data/metadata_cleanest/FT_saori_valid.csv +23 -0
- stylekan/Data/metadata_cleanest/filtered_train_list.csv +3 -0
- stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp.csv +3 -0
- stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_HEADER_plus.csv +3 -0
- stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv +3 -0
- stylekan/Data/metadata_cleanest/prelude.csv +3 -0
- stylekan/Data/metadata_cleanest/prelude_id.csv +3 -0
- stylekan/Data/metadata_cleanest/train_48_pure.csv +3 -0
- stylekan/Data/metadata_cleanest/val_48_pure.csv +126 -0
- stylekan/Data/metadata_cleanest/val_48_pure.txt +120 -0
- stylekan/Data/mg_valid.csv +198 -0
- stylekan/Data/mg_valid.txt +121 -0
- stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_chara_198/ranko_chara_198.acb.tmp +0 -0
- stylekan/Data/moe_res/imas_split/shiki/shiki_fine/phonemizerASR_script_jpn.py +804 -0
- stylekan/Data/moe_res/imas_split/shiki/shiki_fine/shiki_finetune.csv +78 -0
- stylekan/Data/train_48_200k.csv +3 -0
- stylekan/Data/train_List.csv +3 -0
- stylekan/Data/train_List_updated.csv +3 -0
- stylekan/Data/val_48_200k.csv +223 -0
- stylekan/Demo/Inference_LJSpeech.ipynb +562 -0
- stylekan/Demo/Inference_LibriTTS.ipynb +3 -0
- stylekan/Demo/infer_24khz.ipynb +3 -0
- stylekan/Demo/syuko_style_vectors.csv +0 -0
- stylekan/LICENSE +21 -0
- stylekan/Models/Style_Kanade/2nd_phase_last.pth +3 -0
- stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth +3 -0
- stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00002.pth +3 -0
- stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00004.pth +3 -0
- stylekan/Models/Style_Kanade/config_kanade.yml +23 -0
- stylekan/Models/Style_Kanade/epoch_1st_00013.pth +3 -0
- stylekan/Models/Style_Kanade/epoch_2nd_00000.pth +3 -0
.gitattributes
CHANGED
|
@@ -40,3 +40,21 @@ stts_48khz/StyleTTS2_48khz/Utils/JDC/bst_rmvpe_48k.t7 filter=lfs diff=lfs merge=
|
|
| 40 |
stts_48khz/StyleTTS2_48khz/infer.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 41 |
jp_p2g.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
LLM_Clean_TEKNIUM.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
stts_48khz/StyleTTS2_48khz/infer.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 41 |
jp_p2g.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
LLM_Clean_TEKNIUM.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
stylekan/Data/OOD_LargeScale_.csv filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
stylekan/Data/filtered_train_list.csv filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
stylekan/Data/metadata_cleanest/48khz_config_with_names_ids.csv filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
stylekan/Data/metadata_cleanest/FT_imas_remapped.csv filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
stylekan/Data/metadata_cleanest/filtered_train_list.csv filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp.csv filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_HEADER_plus.csv filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
stylekan/Data/metadata_cleanest/prelude.csv filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
stylekan/Data/metadata_cleanest/prelude_id.csv filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
stylekan/Data/metadata_cleanest/train_48_pure.csv filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
stylekan/Data/train_48_200k.csv filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
stylekan/Data/train_List.csv filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
stylekan/Data/train_List_updated.csv filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
stylekan/Demo/Inference_LibriTTS.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
stylekan/Demo/infer_24khz.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
stylekan/Utils/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
stylekan/Utils/PLBERT/step_1050000.t7 filter=lfs diff=lfs merge=lfs -text
|
stylekan/Colab/StyleTTS2_Demo_LJSpeech.ipynb
ADDED
|
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"gpuType": "T4",
|
| 8 |
+
"authorship_tag": "ABX9TyM1x2mx2VnkYNFVlD+DFzmy",
|
| 9 |
+
"include_colab_link": true
|
| 10 |
+
},
|
| 11 |
+
"kernelspec": {
|
| 12 |
+
"name": "python3",
|
| 13 |
+
"display_name": "Python 3"
|
| 14 |
+
},
|
| 15 |
+
"language_info": {
|
| 16 |
+
"name": "python"
|
| 17 |
+
},
|
| 18 |
+
"accelerator": "GPU"
|
| 19 |
+
},
|
| 20 |
+
"cells": [
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "markdown",
|
| 23 |
+
"metadata": {
|
| 24 |
+
"id": "view-in-github",
|
| 25 |
+
"colab_type": "text"
|
| 26 |
+
},
|
| 27 |
+
"source": [
|
| 28 |
+
"<a href=\"https://colab.research.google.com/github/yl4579/StyleTTS2/blob/main/Colab/StyleTTS2_Demo_LJSpeech.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"source": [
|
| 34 |
+
"### Install packages and download models"
|
| 35 |
+
],
|
| 36 |
+
"metadata": {
|
| 37 |
+
"id": "nm653VK4CG9F"
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"source": [
|
| 43 |
+
"%%shell\n",
|
| 44 |
+
"git clone https://github.com/yl4579/StyleTTS2.git\n",
|
| 45 |
+
"cd StyleTTS2\n",
|
| 46 |
+
"pip install SoundFile torchaudio munch torch pydub pyyaml librosa nltk matplotlib accelerate transformers phonemizer einops einops-exts tqdm typing-extensions git+https://github.com/resemble-ai/monotonic_align.git\n",
|
| 47 |
+
"sudo apt-get install espeak-ng\n",
|
| 48 |
+
"git-lfs clone https://huggingface.co/yl4579/StyleTTS2-LJSpeech\n",
|
| 49 |
+
"mv StyleTTS2-LJSpeech/Models ."
|
| 50 |
+
],
|
| 51 |
+
"metadata": {
|
| 52 |
+
"id": "gciBKMqCCLvT"
|
| 53 |
+
},
|
| 54 |
+
"execution_count": null,
|
| 55 |
+
"outputs": []
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"cell_type": "markdown",
|
| 59 |
+
"source": [
|
| 60 |
+
"### Load models"
|
| 61 |
+
],
|
| 62 |
+
"metadata": {
|
| 63 |
+
"id": "OAA8lx-XCQnM"
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"cell_type": "code",
|
| 68 |
+
"source": [
|
| 69 |
+
"%cd StyleTTS2\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"import torch\n",
|
| 72 |
+
"torch.manual_seed(0)\n",
|
| 73 |
+
"torch.backends.cudnn.benchmark = False\n",
|
| 74 |
+
"torch.backends.cudnn.deterministic = True\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"import random\n",
|
| 77 |
+
"random.seed(0)\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"import numpy as np\n",
|
| 80 |
+
"np.random.seed(0)\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"import nltk\n",
|
| 83 |
+
"nltk.download('punkt')\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"# load packages\n",
|
| 86 |
+
"import time\n",
|
| 87 |
+
"import random\n",
|
| 88 |
+
"import yaml\n",
|
| 89 |
+
"from munch import Munch\n",
|
| 90 |
+
"import numpy as np\n",
|
| 91 |
+
"import torch\n",
|
| 92 |
+
"from torch import nn\n",
|
| 93 |
+
"import torch.nn.functional as F\n",
|
| 94 |
+
"import torchaudio\n",
|
| 95 |
+
"import librosa\n",
|
| 96 |
+
"from nltk.tokenize import word_tokenize\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"from models import *\n",
|
| 99 |
+
"from utils import *\n",
|
| 100 |
+
"from text_utils import TextCleaner\n",
|
| 101 |
+
"textclenaer = TextCleaner()\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"%matplotlib inline\n",
|
| 104 |
+
"\n",
|
| 105 |
+
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"to_mel = torchaudio.transforms.MelSpectrogram(\n",
|
| 108 |
+
" n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
|
| 109 |
+
"mean, std = -4, 4\n",
|
| 110 |
+
"\n",
|
| 111 |
+
"def length_to_mask(lengths):\n",
|
| 112 |
+
" mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
|
| 113 |
+
" mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
|
| 114 |
+
" return mask\n",
|
| 115 |
+
"\n",
|
| 116 |
+
"def preprocess(wave):\n",
|
| 117 |
+
" wave_tensor = torch.from_numpy(wave).float()\n",
|
| 118 |
+
" mel_tensor = to_mel(wave_tensor)\n",
|
| 119 |
+
" mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
|
| 120 |
+
" return mel_tensor\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"def compute_style(ref_dicts):\n",
|
| 123 |
+
" reference_embeddings = {}\n",
|
| 124 |
+
" for key, path in ref_dicts.items():\n",
|
| 125 |
+
" wave, sr = librosa.load(path, sr=24000)\n",
|
| 126 |
+
" audio, index = librosa.effects.trim(wave, top_db=30)\n",
|
| 127 |
+
" if sr != 24000:\n",
|
| 128 |
+
" audio = librosa.resample(audio, sr, 24000)\n",
|
| 129 |
+
" mel_tensor = preprocess(audio).to(device)\n",
|
| 130 |
+
"\n",
|
| 131 |
+
" with torch.no_grad():\n",
|
| 132 |
+
" ref = model.style_encoder(mel_tensor.unsqueeze(1))\n",
|
| 133 |
+
" reference_embeddings[key] = (ref.squeeze(1), audio)\n",
|
| 134 |
+
"\n",
|
| 135 |
+
" return reference_embeddings\n",
|
| 136 |
+
"\n",
|
| 137 |
+
"# load phonemizer\n",
|
| 138 |
+
"import phonemizer\n",
|
| 139 |
+
"global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"config = yaml.safe_load(open(\"Models/LJSpeech/config.yml\"))\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"# load pretrained ASR model\n",
|
| 144 |
+
"ASR_config = config.get('ASR_config', False)\n",
|
| 145 |
+
"ASR_path = config.get('ASR_path', False)\n",
|
| 146 |
+
"text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"# load pretrained F0 model\n",
|
| 149 |
+
"F0_path = config.get('F0_path', False)\n",
|
| 150 |
+
"pitch_extractor = load_F0_models(F0_path)\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"# load BERT model\n",
|
| 153 |
+
"from Utils.PLBERT.util import load_plbert\n",
|
| 154 |
+
"BERT_path = config.get('PLBERT_dir', False)\n",
|
| 155 |
+
"plbert = load_plbert(BERT_path)\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)\n",
|
| 158 |
+
"_ = [model[key].eval() for key in model]\n",
|
| 159 |
+
"_ = [model[key].to(device) for key in model]\n",
|
| 160 |
+
"\n",
|
| 161 |
+
"params_whole = torch.load(\"Models/LJSpeech/epoch_2nd_00100.pth\", map_location='cpu')\n",
|
| 162 |
+
"params = params_whole['net']\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"for key in model:\n",
|
| 165 |
+
" if key in params:\n",
|
| 166 |
+
" print('%s loaded' % key)\n",
|
| 167 |
+
" try:\n",
|
| 168 |
+
" model[key].load_state_dict(params[key])\n",
|
| 169 |
+
" except:\n",
|
| 170 |
+
" from collections import OrderedDict\n",
|
| 171 |
+
" state_dict = params[key]\n",
|
| 172 |
+
" new_state_dict = OrderedDict()\n",
|
| 173 |
+
" for k, v in state_dict.items():\n",
|
| 174 |
+
" name = k[7:] # remove `module.`\n",
|
| 175 |
+
" new_state_dict[name] = v\n",
|
| 176 |
+
" # load params\n",
|
| 177 |
+
" model[key].load_state_dict(new_state_dict, strict=False)\n",
|
| 178 |
+
"# except:\n",
|
| 179 |
+
"# _load(params[key], model[key])\n",
|
| 180 |
+
"_ = [model[key].eval() for key in model]\n",
|
| 181 |
+
"\n",
|
| 182 |
+
"from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule\n",
|
| 183 |
+
"\n",
|
| 184 |
+
"sampler = DiffusionSampler(\n",
|
| 185 |
+
" model.diffusion.diffusion,\n",
|
| 186 |
+
" sampler=ADPM2Sampler(),\n",
|
| 187 |
+
" sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
|
| 188 |
+
" clamp=False\n",
|
| 189 |
+
")\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"def inference(text, noise, diffusion_steps=5, embedding_scale=1):\n",
|
| 192 |
+
" text = text.strip()\n",
|
| 193 |
+
" text = text.replace('\"', '')\n",
|
| 194 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 195 |
+
" ps = word_tokenize(ps[0])\n",
|
| 196 |
+
" ps = ' '.join(ps)\n",
|
| 197 |
+
"\n",
|
| 198 |
+
" tokens = textclenaer(ps)\n",
|
| 199 |
+
" tokens.insert(0, 0)\n",
|
| 200 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 201 |
+
"\n",
|
| 202 |
+
" with torch.no_grad():\n",
|
| 203 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
|
| 204 |
+
" text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
|
| 205 |
+
"\n",
|
| 206 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 207 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 208 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
|
| 209 |
+
"\n",
|
| 210 |
+
" s_pred = sampler(noise,\n",
|
| 211 |
+
" embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
|
| 212 |
+
" embedding_scale=embedding_scale).squeeze(0)\n",
|
| 213 |
+
"\n",
|
| 214 |
+
" s = s_pred[:, 128:]\n",
|
| 215 |
+
" ref = s_pred[:, :128]\n",
|
| 216 |
+
"\n",
|
| 217 |
+
" d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
|
| 218 |
+
"\n",
|
| 219 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 220 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 221 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 222 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 223 |
+
"\n",
|
| 224 |
+
" pred_dur[-1] += 5\n",
|
| 225 |
+
"\n",
|
| 226 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 227 |
+
" c_frame = 0\n",
|
| 228 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 229 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 230 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 231 |
+
"\n",
|
| 232 |
+
" # encode prosody\n",
|
| 233 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 234 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 235 |
+
" out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)),\n",
|
| 236 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 237 |
+
"\n",
|
| 238 |
+
" return out.squeeze().cpu().numpy()\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):\n",
|
| 241 |
+
" text = text.strip()\n",
|
| 242 |
+
" text = text.replace('\"', '')\n",
|
| 243 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 244 |
+
" ps = word_tokenize(ps[0])\n",
|
| 245 |
+
" ps = ' '.join(ps)\n",
|
| 246 |
+
"\n",
|
| 247 |
+
" tokens = textclenaer(ps)\n",
|
| 248 |
+
" tokens.insert(0, 0)\n",
|
| 249 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 250 |
+
"\n",
|
| 251 |
+
" with torch.no_grad():\n",
|
| 252 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
|
| 253 |
+
" text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
|
| 254 |
+
"\n",
|
| 255 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 256 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 257 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
|
| 258 |
+
"\n",
|
| 259 |
+
" s_pred = sampler(noise,\n",
|
| 260 |
+
" embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
|
| 261 |
+
" embedding_scale=embedding_scale).squeeze(0)\n",
|
| 262 |
+
"\n",
|
| 263 |
+
" if s_prev is not None:\n",
|
| 264 |
+
" # convex combination of previous and current style\n",
|
| 265 |
+
" s_pred = alpha * s_prev + (1 - alpha) * s_pred\n",
|
| 266 |
+
"\n",
|
| 267 |
+
" s = s_pred[:, 128:]\n",
|
| 268 |
+
" ref = s_pred[:, :128]\n",
|
| 269 |
+
"\n",
|
| 270 |
+
" d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
|
| 271 |
+
"\n",
|
| 272 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 273 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 274 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 275 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 276 |
+
"\n",
|
| 277 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 278 |
+
" c_frame = 0\n",
|
| 279 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 280 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 281 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 282 |
+
"\n",
|
| 283 |
+
" # encode prosody\n",
|
| 284 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 285 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 286 |
+
" out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)),\n",
|
| 287 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 288 |
+
"\n",
|
| 289 |
+
" return out.squeeze().cpu().numpy(), s_pred"
|
| 290 |
+
],
|
| 291 |
+
"metadata": {
|
| 292 |
+
"id": "m0XRpbxSCSix"
|
| 293 |
+
},
|
| 294 |
+
"execution_count": null,
|
| 295 |
+
"outputs": []
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "markdown",
|
| 299 |
+
"source": [
|
| 300 |
+
"### Synthesize speech"
|
| 301 |
+
],
|
| 302 |
+
"metadata": {
|
| 303 |
+
"id": "vuCbS0gdArgJ"
|
| 304 |
+
}
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"cell_type": "code",
|
| 308 |
+
"source": [
|
| 309 |
+
"# @title Input Text { display-mode: \"form\" }\n",
|
| 310 |
+
"# synthesize a text\n",
|
| 311 |
+
"text = \"StyleTTS 2 is a text-to-speech model that leverages style diffusion and adversarial training with large speech language models to achieve human-level text-to-speech synthesis.\" # @param {type:\"string\"}\n"
|
| 312 |
+
],
|
| 313 |
+
"metadata": {
|
| 314 |
+
"id": "7Ud1Y-kbBPTw"
|
| 315 |
+
},
|
| 316 |
+
"execution_count": 3,
|
| 317 |
+
"outputs": []
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"cell_type": "markdown",
|
| 321 |
+
"source": [
|
| 322 |
+
"#### Basic synthesis (5 diffusion steps)"
|
| 323 |
+
],
|
| 324 |
+
"metadata": {
|
| 325 |
+
"id": "TM2NjuM7B6sz"
|
| 326 |
+
}
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"cell_type": "code",
|
| 330 |
+
"source": [
|
| 331 |
+
"start = time.time()\n",
|
| 332 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 333 |
+
"wav = inference(text, noise, diffusion_steps=5, embedding_scale=1)\n",
|
| 334 |
+
"rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 335 |
+
"print(f\"RTF = {rtf:5f}\")\n",
|
| 336 |
+
"import IPython.display as ipd\n",
|
| 337 |
+
"display(ipd.Audio(wav, rate=24000))"
|
| 338 |
+
],
|
| 339 |
+
"metadata": {
|
| 340 |
+
"id": "KILqC-V-Ay5e"
|
| 341 |
+
},
|
| 342 |
+
"execution_count": null,
|
| 343 |
+
"outputs": []
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"cell_type": "markdown",
|
| 347 |
+
"source": [
|
| 348 |
+
"#### With higher diffusion steps (more diverse)\n",
|
| 349 |
+
"Since the sampler is ancestral, the higher the stpes, the more diverse the samples are, with the cost of slower synthesis speed."
|
| 350 |
+
],
|
| 351 |
+
"metadata": {
|
| 352 |
+
"id": "oZk9o-EzCBVx"
|
| 353 |
+
}
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"cell_type": "code",
|
| 357 |
+
"source": [
|
| 358 |
+
"start = time.time()\n",
|
| 359 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 360 |
+
"wav = inference(text, noise, diffusion_steps=10, embedding_scale=1)\n",
|
| 361 |
+
"rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 362 |
+
"print(f\"RTF = {rtf:5f}\")\n",
|
| 363 |
+
"import IPython.display as ipd\n",
|
| 364 |
+
"display(ipd.Audio(wav, rate=24000))"
|
| 365 |
+
],
|
| 366 |
+
"metadata": {
|
| 367 |
+
"id": "9_OHtzMbB9gL"
|
| 368 |
+
},
|
| 369 |
+
"execution_count": null,
|
| 370 |
+
"outputs": []
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"cell_type": "markdown",
|
| 374 |
+
"source": [
|
| 375 |
+
"### Speech expressiveness\n",
|
| 376 |
+
"The following section recreates the samples shown in [Section 6](https://styletts2.github.io/#emo) of the demo page."
|
| 377 |
+
],
|
| 378 |
+
"metadata": {
|
| 379 |
+
"id": "NyDACd-0CaqL"
|
| 380 |
+
}
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"cell_type": "markdown",
|
| 384 |
+
"source": [
|
| 385 |
+
"#### With embedding_scale=1\n",
|
| 386 |
+
"This is the classifier-free guidance scale. The higher the scale, the more conditional the style is to the input text and hence more emotional."
|
| 387 |
+
],
|
| 388 |
+
"metadata": {
|
| 389 |
+
"id": "cRkS5VWxCck4"
|
| 390 |
+
}
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"cell_type": "code",
|
| 394 |
+
"source": [
|
| 395 |
+
"texts = {}\n",
|
| 396 |
+
"texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 397 |
+
"texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 398 |
+
"texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 399 |
+
"texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
|
| 400 |
+
"\n",
|
| 401 |
+
"for k,v in texts.items():\n",
|
| 402 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 403 |
+
" wav = inference(v, noise, diffusion_steps=10, embedding_scale=1)\n",
|
| 404 |
+
" print(k + \": \")\n",
|
| 405 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 406 |
+
],
|
| 407 |
+
"metadata": {
|
| 408 |
+
"id": "H5g5RO-mCbZB"
|
| 409 |
+
},
|
| 410 |
+
"execution_count": null,
|
| 411 |
+
"outputs": []
|
| 412 |
+
},
|
| 413 |
+
{
|
| 414 |
+
"cell_type": "markdown",
|
| 415 |
+
"source": [
|
| 416 |
+
"#### With embedding_scale=2"
|
| 417 |
+
],
|
| 418 |
+
"metadata": {
|
| 419 |
+
"id": "f4S8TXSpCgpA"
|
| 420 |
+
}
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"cell_type": "code",
|
| 424 |
+
"source": [
|
| 425 |
+
"texts = {}\n",
|
| 426 |
+
"texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 427 |
+
"texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 428 |
+
"texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 429 |
+
"texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
|
| 430 |
+
"\n",
|
| 431 |
+
"for k,v in texts.items():\n",
|
| 432 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 433 |
+
" wav = inference(v, noise, diffusion_steps=10, embedding_scale=2) # embedding_scale=2 for more pronounced emotion\n",
|
| 434 |
+
" print(k + \": \")\n",
|
| 435 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 436 |
+
],
|
| 437 |
+
"metadata": {
|
| 438 |
+
"id": "xHHIdeNrCezC"
|
| 439 |
+
},
|
| 440 |
+
"execution_count": null,
|
| 441 |
+
"outputs": []
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"cell_type": "markdown",
|
| 445 |
+
"source": [
|
| 446 |
+
"### Long-form generation\n",
|
| 447 |
+
"This section includes basic implementation of Algorithm 1 in the paper for consistent longform audio generation. The example passage is taken from [Section 5](https://styletts2.github.io/#long) of the demo page."
|
| 448 |
+
],
|
| 449 |
+
"metadata": {
|
| 450 |
+
"id": "nAh7Tov4CkuH"
|
| 451 |
+
}
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"cell_type": "code",
|
| 455 |
+
"source": [
|
| 456 |
+
"passage = '''If the supply of fruit is greater than the family needs, it may be made a source of income by sending the fresh fruit to the market if there is one near enough, or by preserving, canning, and making jelly for sale. To make such an enterprise a success the fruit and work must be first class. There is magic in the word \"Homemade,\" when the product appeals to the eye and the palate; but many careless and incompetent people have found to their sorrow that this word has not magic enough to float inferior goods on the market. As a rule large canning and preserving establishments are clean and have the best appliances, and they employ chemists and skilled labor. The home product must be very good to compete with the attractive goods that are sent out from such establishments. Yet for first-class homemade products there is a market in all large cities. All first-class grocers have customers who purchase such goods.''' # @param {type:\"string\"}"
|
| 457 |
+
],
|
| 458 |
+
"metadata": {
|
| 459 |
+
"cellView": "form",
|
| 460 |
+
"id": "IJwUbgvACoDu"
|
| 461 |
+
},
|
| 462 |
+
"execution_count": 8,
|
| 463 |
+
"outputs": []
|
| 464 |
+
},
|
| 465 |
+
{
|
| 466 |
+
"cell_type": "code",
|
| 467 |
+
"source": [
|
| 468 |
+
"sentences = passage.split('.') # simple split by comma\n",
|
| 469 |
+
"wavs = []\n",
|
| 470 |
+
"s_prev = None\n",
|
| 471 |
+
"for text in sentences:\n",
|
| 472 |
+
" if text.strip() == \"\": continue\n",
|
| 473 |
+
" text += '.' # add it back\n",
|
| 474 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 475 |
+
" wav, s_prev = LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=10, embedding_scale=1.5)\n",
|
| 476 |
+
" wavs.append(wav)\n",
|
| 477 |
+
"display(ipd.Audio(np.concatenate(wavs), rate=24000, normalize=False))"
|
| 478 |
+
],
|
| 479 |
+
"metadata": {
|
| 480 |
+
"id": "nP-7i2QAC0JT"
|
| 481 |
+
},
|
| 482 |
+
"execution_count": null,
|
| 483 |
+
"outputs": []
|
| 484 |
+
}
|
| 485 |
+
]
|
| 486 |
+
}
|
stylekan/Colab/StyleTTS2_Demo_LibriTTS.ipynb
ADDED
|
@@ -0,0 +1,1218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {
|
| 6 |
+
"id": "view-in-github",
|
| 7 |
+
"colab_type": "text"
|
| 8 |
+
},
|
| 9 |
+
"source": [
|
| 10 |
+
"<a href=\"https://colab.research.google.com/github/yl4579/StyleTTS2/blob/main/Colab/StyleTTS2_Demo_LibriTTS.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "markdown",
|
| 15 |
+
"metadata": {
|
| 16 |
+
"id": "aAGQPfgYIR23"
|
| 17 |
+
},
|
| 18 |
+
"source": [
|
| 19 |
+
"### Install packages and download models"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"cell_type": "code",
|
| 24 |
+
"execution_count": null,
|
| 25 |
+
"metadata": {
|
| 26 |
+
"colab": {
|
| 27 |
+
"base_uri": "https://localhost:8080/"
|
| 28 |
+
},
|
| 29 |
+
"id": "zDPW5uSpISd2",
|
| 30 |
+
"outputId": "6463ff79-18d5-4071-c6ad-01947beeb368"
|
| 31 |
+
},
|
| 32 |
+
"outputs": [
|
| 33 |
+
{
|
| 34 |
+
"output_type": "stream",
|
| 35 |
+
"name": "stdout",
|
| 36 |
+
"text": [
|
| 37 |
+
|
| 38 |
+
]
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
+
"source": [
|
| 42 |
+
"%%shell\n",
|
| 43 |
+
"git clone https://github.com/yl4579/StyleTTS2.git\n",
|
| 44 |
+
"cd StyleTTS2\n",
|
| 45 |
+
"pip install SoundFile torchaudio munch torch pydub pyyaml librosa nltk matplotlib accelerate transformers phonemizer einops einops-exts tqdm typing-extensions git+https://github.com/resemble-ai/monotonic_align.git\n",
|
| 46 |
+
"sudo apt-get install espeak-ng\n",
|
| 47 |
+
"git-lfs clone https://huggingface.co/yl4579/StyleTTS2-LibriTTS\n",
|
| 48 |
+
"mv StyleTTS2-LibriTTS/Models .\n",
|
| 49 |
+
"mv StyleTTS2-LibriTTS/reference_audio.zip .\n",
|
| 50 |
+
"unzip reference_audio.zip\n",
|
| 51 |
+
"mv reference_audio Demo/reference_audio"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "markdown",
|
| 56 |
+
"metadata": {
|
| 57 |
+
"id": "eJdB_nCOIVIN"
|
| 58 |
+
},
|
| 59 |
+
"source": [
|
| 60 |
+
"### Load models"
|
| 61 |
+
]
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"cell_type": "code",
|
| 65 |
+
"execution_count": null,
|
| 66 |
+
"metadata": {
|
| 67 |
+
"id": "cha8Tr2uJwN0"
|
| 68 |
+
},
|
| 69 |
+
"outputs": [],
|
| 70 |
+
"source": [
|
| 71 |
+
"import nltk\n",
|
| 72 |
+
"nltk.download('punkt')"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"cell_type": "code",
|
| 77 |
+
"execution_count": null,
|
| 78 |
+
"metadata": {
|
| 79 |
+
"id": "Qoow8Wd8ITtm"
|
| 80 |
+
},
|
| 81 |
+
"outputs": [],
|
| 82 |
+
"source": [
|
| 83 |
+
"%cd StyleTTS2\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"import torch\n",
|
| 86 |
+
"torch.manual_seed(0)\n",
|
| 87 |
+
"torch.backends.cudnn.benchmark = False\n",
|
| 88 |
+
"torch.backends.cudnn.deterministic = True\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"import random\n",
|
| 91 |
+
"random.seed(0)\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"import numpy as np\n",
|
| 94 |
+
"np.random.seed(0)\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"# load packages\n",
|
| 97 |
+
"import time\n",
|
| 98 |
+
"import random\n",
|
| 99 |
+
"import yaml\n",
|
| 100 |
+
"from munch import Munch\n",
|
| 101 |
+
"import numpy as np\n",
|
| 102 |
+
"import torch\n",
|
| 103 |
+
"from torch import nn\n",
|
| 104 |
+
"import torch.nn.functional as F\n",
|
| 105 |
+
"import torchaudio\n",
|
| 106 |
+
"import librosa\n",
|
| 107 |
+
"from nltk.tokenize import word_tokenize\n",
|
| 108 |
+
"\n",
|
| 109 |
+
"from models import *\n",
|
| 110 |
+
"from utils import *\n",
|
| 111 |
+
"from text_utils import TextCleaner\n",
|
| 112 |
+
"textclenaer = TextCleaner()\n",
|
| 113 |
+
"\n",
|
| 114 |
+
"%matplotlib inline\n",
|
| 115 |
+
"\n",
|
| 116 |
+
"to_mel = torchaudio.transforms.MelSpectrogram(\n",
|
| 117 |
+
" n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
|
| 118 |
+
"mean, std = -4, 4\n",
|
| 119 |
+
"\n",
|
| 120 |
+
"def length_to_mask(lengths):\n",
|
| 121 |
+
" mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
|
| 122 |
+
" mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
|
| 123 |
+
" return mask\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"def preprocess(wave):\n",
|
| 126 |
+
" wave_tensor = torch.from_numpy(wave).float()\n",
|
| 127 |
+
" mel_tensor = to_mel(wave_tensor)\n",
|
| 128 |
+
" mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
|
| 129 |
+
" return mel_tensor\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"def compute_style(path):\n",
|
| 132 |
+
" wave, sr = librosa.load(path, sr=24000)\n",
|
| 133 |
+
" audio, index = librosa.effects.trim(wave, top_db=30)\n",
|
| 134 |
+
" if sr != 24000:\n",
|
| 135 |
+
" audio = librosa.resample(audio, sr, 24000)\n",
|
| 136 |
+
" mel_tensor = preprocess(audio).to(device)\n",
|
| 137 |
+
"\n",
|
| 138 |
+
" with torch.no_grad():\n",
|
| 139 |
+
" ref_s = model.style_encoder(mel_tensor.unsqueeze(1))\n",
|
| 140 |
+
" ref_p = model.predictor_encoder(mel_tensor.unsqueeze(1))\n",
|
| 141 |
+
"\n",
|
| 142 |
+
" return torch.cat([ref_s, ref_p], dim=1)\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
| 145 |
+
"\n",
|
| 146 |
+
"# load phonemizer\n",
|
| 147 |
+
"import phonemizer\n",
|
| 148 |
+
"global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)\n",
|
| 149 |
+
"\n",
|
| 150 |
+
"config = yaml.safe_load(open(\"Models/LibriTTS/config.yml\"))\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"# load pretrained ASR model\n",
|
| 153 |
+
"ASR_config = config.get('ASR_config', False)\n",
|
| 154 |
+
"ASR_path = config.get('ASR_path', False)\n",
|
| 155 |
+
"text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"# load pretrained F0 model\n",
|
| 158 |
+
"F0_path = config.get('F0_path', False)\n",
|
| 159 |
+
"pitch_extractor = load_F0_models(F0_path)\n",
|
| 160 |
+
"\n",
|
| 161 |
+
"# load BERT model\n",
|
| 162 |
+
"from Utils.PLBERT.util import load_plbert\n",
|
| 163 |
+
"BERT_path = config.get('PLBERT_dir', False)\n",
|
| 164 |
+
"plbert = load_plbert(BERT_path)\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"model_params = recursive_munch(config['model_params'])\n",
|
| 167 |
+
"model = build_model(model_params, text_aligner, pitch_extractor, plbert)\n",
|
| 168 |
+
"_ = [model[key].eval() for key in model]\n",
|
| 169 |
+
"_ = [model[key].to(device) for key in model]\n",
|
| 170 |
+
"\n",
|
| 171 |
+
"params_whole = torch.load(\"Models/LibriTTS/epochs_2nd_00020.pth\", map_location='cpu')\n",
|
| 172 |
+
"params = params_whole['net']\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"for key in model:\n",
|
| 175 |
+
" if key in params:\n",
|
| 176 |
+
" print('%s loaded' % key)\n",
|
| 177 |
+
" try:\n",
|
| 178 |
+
" model[key].load_state_dict(params[key])\n",
|
| 179 |
+
" except:\n",
|
| 180 |
+
" from collections import OrderedDict\n",
|
| 181 |
+
" state_dict = params[key]\n",
|
| 182 |
+
" new_state_dict = OrderedDict()\n",
|
| 183 |
+
" for k, v in state_dict.items():\n",
|
| 184 |
+
" name = k[7:] # remove `module.`\n",
|
| 185 |
+
" new_state_dict[name] = v\n",
|
| 186 |
+
" # load params\n",
|
| 187 |
+
" model[key].load_state_dict(new_state_dict, strict=False)\n",
|
| 188 |
+
"# except:\n",
|
| 189 |
+
"# _load(params[key], model[key])\n",
|
| 190 |
+
"_ = [model[key].eval() for key in model]\n",
|
| 191 |
+
"\n",
|
| 192 |
+
"from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule\n",
|
| 193 |
+
"\n",
|
| 194 |
+
"sampler = DiffusionSampler(\n",
|
| 195 |
+
" model.diffusion.diffusion,\n",
|
| 196 |
+
" sampler=ADPM2Sampler(),\n",
|
| 197 |
+
" sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
|
| 198 |
+
" clamp=False\n",
|
| 199 |
+
")\n",
|
| 200 |
+
"\n",
|
| 201 |
+
"def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):\n",
|
| 202 |
+
" text = text.strip()\n",
|
| 203 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 204 |
+
" ps = word_tokenize(ps[0])\n",
|
| 205 |
+
" ps = ' '.join(ps)\n",
|
| 206 |
+
" tokens = textclenaer(ps)\n",
|
| 207 |
+
" tokens.insert(0, 0)\n",
|
| 208 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 209 |
+
"\n",
|
| 210 |
+
" with torch.no_grad():\n",
|
| 211 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
|
| 212 |
+
" text_mask = length_to_mask(input_lengths).to(device)\n",
|
| 213 |
+
"\n",
|
| 214 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 215 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 216 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
|
| 217 |
+
"\n",
|
| 218 |
+
" s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
|
| 219 |
+
" embedding=bert_dur,\n",
|
| 220 |
+
" embedding_scale=embedding_scale,\n",
|
| 221 |
+
" features=ref_s, # reference from the same speaker as the embedding\n",
|
| 222 |
+
" num_steps=diffusion_steps).squeeze(1)\n",
|
| 223 |
+
"\n",
|
| 224 |
+
"\n",
|
| 225 |
+
" s = s_pred[:, 128:]\n",
|
| 226 |
+
" ref = s_pred[:, :128]\n",
|
| 227 |
+
"\n",
|
| 228 |
+
" ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
|
| 229 |
+
" s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
|
| 230 |
+
"\n",
|
| 231 |
+
" d = model.predictor.text_encoder(d_en,\n",
|
| 232 |
+
" s, input_lengths, text_mask)\n",
|
| 233 |
+
"\n",
|
| 234 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 235 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 236 |
+
"\n",
|
| 237 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 238 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"\n",
|
| 241 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 242 |
+
" c_frame = 0\n",
|
| 243 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 244 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 245 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 246 |
+
"\n",
|
| 247 |
+
" # encode prosody\n",
|
| 248 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 249 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 250 |
+
" asr_new = torch.zeros_like(en)\n",
|
| 251 |
+
" asr_new[:, :, 0] = en[:, :, 0]\n",
|
| 252 |
+
" asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
|
| 253 |
+
" en = asr_new\n",
|
| 254 |
+
"\n",
|
| 255 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 256 |
+
"\n",
|
| 257 |
+
" asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 258 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 259 |
+
" asr_new = torch.zeros_like(asr)\n",
|
| 260 |
+
" asr_new[:, :, 0] = asr[:, :, 0]\n",
|
| 261 |
+
" asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
|
| 262 |
+
" asr = asr_new\n",
|
| 263 |
+
"\n",
|
| 264 |
+
" out = model.decoder(asr,\n",
|
| 265 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"\n",
|
| 268 |
+
" return out.squeeze().cpu().numpy()[..., :-50] # weird pulse at the end of the model, need to be fixed later\n",
|
| 269 |
+
"\n",
|
| 270 |
+
"def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1):\n",
|
| 271 |
+
" text = text.strip()\n",
|
| 272 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 273 |
+
" ps = word_tokenize(ps[0])\n",
|
| 274 |
+
" ps = ' '.join(ps)\n",
|
| 275 |
+
" ps = ps.replace('``', '\"')\n",
|
| 276 |
+
" ps = ps.replace(\"''\", '\"')\n",
|
| 277 |
+
"\n",
|
| 278 |
+
" tokens = textclenaer(ps)\n",
|
| 279 |
+
" tokens.insert(0, 0)\n",
|
| 280 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 281 |
+
"\n",
|
| 282 |
+
" with torch.no_grad():\n",
|
| 283 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
|
| 284 |
+
" text_mask = length_to_mask(input_lengths).to(device)\n",
|
| 285 |
+
"\n",
|
| 286 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 287 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 288 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
|
| 289 |
+
"\n",
|
| 290 |
+
" s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
|
| 291 |
+
" embedding=bert_dur,\n",
|
| 292 |
+
" embedding_scale=embedding_scale,\n",
|
| 293 |
+
" features=ref_s, # reference from the same speaker as the embedding\n",
|
| 294 |
+
" num_steps=diffusion_steps).squeeze(1)\n",
|
| 295 |
+
"\n",
|
| 296 |
+
" if s_prev is not None:\n",
|
| 297 |
+
" # convex combination of previous and current style\n",
|
| 298 |
+
" s_pred = t * s_prev + (1 - t) * s_pred\n",
|
| 299 |
+
"\n",
|
| 300 |
+
" s = s_pred[:, 128:]\n",
|
| 301 |
+
" ref = s_pred[:, :128]\n",
|
| 302 |
+
"\n",
|
| 303 |
+
" ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
|
| 304 |
+
" s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
|
| 305 |
+
"\n",
|
| 306 |
+
" s_pred = torch.cat([ref, s], dim=-1)\n",
|
| 307 |
+
"\n",
|
| 308 |
+
" d = model.predictor.text_encoder(d_en,\n",
|
| 309 |
+
" s, input_lengths, text_mask)\n",
|
| 310 |
+
"\n",
|
| 311 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 312 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 313 |
+
"\n",
|
| 314 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 315 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"\n",
|
| 318 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 319 |
+
" c_frame = 0\n",
|
| 320 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 321 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 322 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 323 |
+
"\n",
|
| 324 |
+
" # encode prosody\n",
|
| 325 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 326 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 327 |
+
" asr_new = torch.zeros_like(en)\n",
|
| 328 |
+
" asr_new[:, :, 0] = en[:, :, 0]\n",
|
| 329 |
+
" asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
|
| 330 |
+
" en = asr_new\n",
|
| 331 |
+
"\n",
|
| 332 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 333 |
+
"\n",
|
| 334 |
+
" asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 335 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 336 |
+
" asr_new = torch.zeros_like(asr)\n",
|
| 337 |
+
" asr_new[:, :, 0] = asr[:, :, 0]\n",
|
| 338 |
+
" asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
|
| 339 |
+
" asr = asr_new\n",
|
| 340 |
+
"\n",
|
| 341 |
+
" out = model.decoder(asr,\n",
|
| 342 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 343 |
+
"\n",
|
| 344 |
+
"\n",
|
| 345 |
+
" return out.squeeze().cpu().numpy()[..., :-100], s_pred # weird pulse at the end of the model, need to be fixed later\n",
|
| 346 |
+
"\n",
|
| 347 |
+
"def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):\n",
|
| 348 |
+
" text = text.strip()\n",
|
| 349 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 350 |
+
" ps = word_tokenize(ps[0])\n",
|
| 351 |
+
" ps = ' '.join(ps)\n",
|
| 352 |
+
"\n",
|
| 353 |
+
" tokens = textclenaer(ps)\n",
|
| 354 |
+
" tokens.insert(0, 0)\n",
|
| 355 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 356 |
+
"\n",
|
| 357 |
+
" ref_text = ref_text.strip()\n",
|
| 358 |
+
" ps = global_phonemizer.phonemize([ref_text])\n",
|
| 359 |
+
" ps = word_tokenize(ps[0])\n",
|
| 360 |
+
" ps = ' '.join(ps)\n",
|
| 361 |
+
"\n",
|
| 362 |
+
" ref_tokens = textclenaer(ps)\n",
|
| 363 |
+
" ref_tokens.insert(0, 0)\n",
|
| 364 |
+
" ref_tokens = torch.LongTensor(ref_tokens).to(device).unsqueeze(0)\n",
|
| 365 |
+
"\n",
|
| 366 |
+
"\n",
|
| 367 |
+
" with torch.no_grad():\n",
|
| 368 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
|
| 369 |
+
" text_mask = length_to_mask(input_lengths).to(device)\n",
|
| 370 |
+
"\n",
|
| 371 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 372 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 373 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
|
| 374 |
+
"\n",
|
| 375 |
+
" ref_input_lengths = torch.LongTensor([ref_tokens.shape[-1]]).to(device)\n",
|
| 376 |
+
" ref_text_mask = length_to_mask(ref_input_lengths).to(device)\n",
|
| 377 |
+
" ref_bert_dur = model.bert(ref_tokens, attention_mask=(~ref_text_mask).int())\n",
|
| 378 |
+
" s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
|
| 379 |
+
" embedding=bert_dur,\n",
|
| 380 |
+
" embedding_scale=embedding_scale,\n",
|
| 381 |
+
" features=ref_s, # reference from the same speaker as the embedding\n",
|
| 382 |
+
" num_steps=diffusion_steps).squeeze(1)\n",
|
| 383 |
+
"\n",
|
| 384 |
+
"\n",
|
| 385 |
+
" s = s_pred[:, 128:]\n",
|
| 386 |
+
" ref = s_pred[:, :128]\n",
|
| 387 |
+
"\n",
|
| 388 |
+
" ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
|
| 389 |
+
" s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
|
| 390 |
+
"\n",
|
| 391 |
+
" d = model.predictor.text_encoder(d_en,\n",
|
| 392 |
+
" s, input_lengths, text_mask)\n",
|
| 393 |
+
"\n",
|
| 394 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 395 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 396 |
+
"\n",
|
| 397 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 398 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 399 |
+
"\n",
|
| 400 |
+
"\n",
|
| 401 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 402 |
+
" c_frame = 0\n",
|
| 403 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 404 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 405 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 406 |
+
"\n",
|
| 407 |
+
" # encode prosody\n",
|
| 408 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 409 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 410 |
+
" asr_new = torch.zeros_like(en)\n",
|
| 411 |
+
" asr_new[:, :, 0] = en[:, :, 0]\n",
|
| 412 |
+
" asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
|
| 413 |
+
" en = asr_new\n",
|
| 414 |
+
"\n",
|
| 415 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 416 |
+
"\n",
|
| 417 |
+
" asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 418 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 419 |
+
" asr_new = torch.zeros_like(asr)\n",
|
| 420 |
+
" asr_new[:, :, 0] = asr[:, :, 0]\n",
|
| 421 |
+
" asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
|
| 422 |
+
" asr = asr_new\n",
|
| 423 |
+
"\n",
|
| 424 |
+
" out = model.decoder(asr,\n",
|
| 425 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"\n",
|
| 428 |
+
" return out.squeeze().cpu().numpy()[..., :-50] # weird pulse at the end of the model, need to be fixed later\n"
|
| 429 |
+
]
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"cell_type": "markdown",
|
| 433 |
+
"metadata": {
|
| 434 |
+
"id": "32S6U0LyJbCA"
|
| 435 |
+
},
|
| 436 |
+
"source": [
|
| 437 |
+
"### Synthesize speech"
|
| 438 |
+
]
|
| 439 |
+
},
|
| 440 |
+
{
|
| 441 |
+
"cell_type": "markdown",
|
| 442 |
+
"metadata": {
|
| 443 |
+
"id": "ehK_0daMJdk_"
|
| 444 |
+
},
|
| 445 |
+
"source": [
|
| 446 |
+
"#### Basic synthesis (5 diffusion steps, seen speakers)"
|
| 447 |
+
]
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"cell_type": "code",
|
| 451 |
+
"execution_count": null,
|
| 452 |
+
"metadata": {
|
| 453 |
+
"id": "SJs2x41MJhM-"
|
| 454 |
+
},
|
| 455 |
+
"outputs": [],
|
| 456 |
+
"source": [
|
| 457 |
+
"text = ''' StyleTTS 2 is a text to speech model that leverages style diffusion and adversarial training with large speech language models to achieve human level text to speech synthesis. ''' # @param {type:\"string\"}\n"
|
| 458 |
+
]
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"cell_type": "code",
|
| 462 |
+
"execution_count": null,
|
| 463 |
+
"metadata": {
|
| 464 |
+
"id": "xuqIJe-IJb7A"
|
| 465 |
+
},
|
| 466 |
+
"outputs": [],
|
| 467 |
+
"source": [
|
| 468 |
+
"reference_dicts = {}\n",
|
| 469 |
+
"reference_dicts['696_92939'] = \"Demo/reference_audio/696_92939_000016_000006.wav\"\n",
|
| 470 |
+
"reference_dicts['1789_142896'] = \"Demo/reference_audio/1789_142896_000022_000005.wav\""
|
| 471 |
+
]
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"cell_type": "code",
|
| 475 |
+
"execution_count": null,
|
| 476 |
+
"metadata": {
|
| 477 |
+
"id": "H3ra3IxJJmF0"
|
| 478 |
+
},
|
| 479 |
+
"outputs": [],
|
| 480 |
+
"source": [
|
| 481 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 482 |
+
"for k, path in reference_dicts.items():\n",
|
| 483 |
+
" ref_s = compute_style(path)\n",
|
| 484 |
+
" start = time.time()\n",
|
| 485 |
+
" wav = inference(text, ref_s, alpha=0.3, beta=0.7, diffusion_steps=5, embedding_scale=1)\n",
|
| 486 |
+
" rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 487 |
+
" print(f\"RTF = {rtf:5f}\")\n",
|
| 488 |
+
" import IPython.display as ipd\n",
|
| 489 |
+
" print(k + ' Synthesized:')\n",
|
| 490 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))\n",
|
| 491 |
+
" print('Reference:')\n",
|
| 492 |
+
" display(ipd.Audio(path, rate=24000, normalize=False))"
|
| 493 |
+
]
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"cell_type": "markdown",
|
| 497 |
+
"metadata": {
|
| 498 |
+
"id": "aB3wUz6yJ-P_"
|
| 499 |
+
},
|
| 500 |
+
"source": [
|
| 501 |
+
"#### With higher diffusion steps (more diverse)\n",
|
| 502 |
+
"\n",
|
| 503 |
+
"Since the sampler is ancestral, the higher the stpes, the more diverse the samples are, with the cost of slower synthesis speed."
|
| 504 |
+
]
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"cell_type": "code",
|
| 508 |
+
"execution_count": null,
|
| 509 |
+
"metadata": {
|
| 510 |
+
"id": "lF27XUo4JrKk"
|
| 511 |
+
},
|
| 512 |
+
"outputs": [],
|
| 513 |
+
"source": [
|
| 514 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 515 |
+
"for k, path in reference_dicts.items():\n",
|
| 516 |
+
" ref_s = compute_style(path)\n",
|
| 517 |
+
" start = time.time()\n",
|
| 518 |
+
" wav = inference(text, ref_s, alpha=0.3, beta=0.7, diffusion_steps=10, embedding_scale=1)\n",
|
| 519 |
+
" rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 520 |
+
" print(f\"RTF = {rtf:5f}\")\n",
|
| 521 |
+
" import IPython.display as ipd\n",
|
| 522 |
+
" print(k + ' Synthesized:')\n",
|
| 523 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))\n",
|
| 524 |
+
" print(k + ' Reference:')\n",
|
| 525 |
+
" display(ipd.Audio(path, rate=24000, normalize=False))"
|
| 526 |
+
]
|
| 527 |
+
},
|
| 528 |
+
{
|
| 529 |
+
"cell_type": "markdown",
|
| 530 |
+
"metadata": {
|
| 531 |
+
"id": "pFT_vmJcKDs1"
|
| 532 |
+
},
|
| 533 |
+
"source": [
|
| 534 |
+
"#### Basic synthesis (5 diffusion steps, unseen speakers)\n",
|
| 535 |
+
"The following samples are to reproduce samples in [Section 4](https://styletts2.github.io/#libri) of the demo page. All spsakers are unseen during training. You can compare the generated samples to popular zero-shot TTS models like Vall-E and NaturalSpeech 2."
|
| 536 |
+
]
|
| 537 |
+
},
|
| 538 |
+
{
|
| 539 |
+
"cell_type": "code",
|
| 540 |
+
"execution_count": null,
|
| 541 |
+
"metadata": {
|
| 542 |
+
"id": "HvNAeGPEKAWN"
|
| 543 |
+
},
|
| 544 |
+
"outputs": [],
|
| 545 |
+
"source": [
|
| 546 |
+
"reference_dicts = {}\n",
|
| 547 |
+
"# format: (path, text)\n",
|
| 548 |
+
"reference_dicts['1221-135767'] = (\"Demo/reference_audio/1221-135767-0014.wav\", \"Yea, his honourable worship is within, but he hath a godly minister or two with him, and likewise a leech.\")\n",
|
| 549 |
+
"reference_dicts['5639-40744'] = (\"Demo/reference_audio/5639-40744-0020.wav\", \"Thus did this humane and right minded father comfort his unhappy daughter, and her mother embracing her again, did all she could to soothe her feelings.\")\n",
|
| 550 |
+
"reference_dicts['908-157963'] = (\"Demo/reference_audio/908-157963-0027.wav\", \"And lay me down in my cold bed and leave my shining lot.\")\n",
|
| 551 |
+
"reference_dicts['4077-13754'] = (\"Demo/reference_audio/4077-13754-0000.wav\", \"The army found the people in poverty and left them in comparative wealth.\")"
|
| 552 |
+
]
|
| 553 |
+
},
|
| 554 |
+
{
|
| 555 |
+
"cell_type": "code",
|
| 556 |
+
"execution_count": null,
|
| 557 |
+
"metadata": {
|
| 558 |
+
"id": "mFnyvYp5KAYN"
|
| 559 |
+
},
|
| 560 |
+
"outputs": [],
|
| 561 |
+
"source": [
|
| 562 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 563 |
+
"for k, v in reference_dicts.items():\n",
|
| 564 |
+
" path, text = v\n",
|
| 565 |
+
" ref_s = compute_style(path)\n",
|
| 566 |
+
" start = time.time()\n",
|
| 567 |
+
" wav = inference(text, ref_s, alpha=0.3, beta=0.7, diffusion_steps=5, embedding_scale=1)\n",
|
| 568 |
+
" rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 569 |
+
" print(f\"RTF = {rtf:5f}\")\n",
|
| 570 |
+
" import IPython.display as ipd\n",
|
| 571 |
+
" print(k + ' Synthesized: ' + text)\n",
|
| 572 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))\n",
|
| 573 |
+
" print(k + ' Reference:')\n",
|
| 574 |
+
" display(ipd.Audio(path, rate=24000, normalize=False))"
|
| 575 |
+
]
|
| 576 |
+
},
|
| 577 |
+
{
|
| 578 |
+
"cell_type": "markdown",
|
| 579 |
+
"metadata": {
|
| 580 |
+
"id": "QBZ53BQtKNQ6"
|
| 581 |
+
},
|
| 582 |
+
"source": [
|
| 583 |
+
"### Speech expressiveness\n",
|
| 584 |
+
"\n",
|
| 585 |
+
"The following section recreates the samples shown in [Section 6](https://styletts2.github.io/#emo) of the demo page. The speaker reference used is `1221-135767-0014.wav`, which is unseen during training.\n",
|
| 586 |
+
"\n",
|
| 587 |
+
"#### With `embedding_scale=1`\n",
|
| 588 |
+
"This is the classifier-free guidance scale. The higher the scale, the more conditional the style is to the input text and hence more emotional."
|
| 589 |
+
]
|
| 590 |
+
},
|
| 591 |
+
{
|
| 592 |
+
"cell_type": "code",
|
| 593 |
+
"execution_count": null,
|
| 594 |
+
"metadata": {
|
| 595 |
+
"id": "5FwE9CefKQk6"
|
| 596 |
+
},
|
| 597 |
+
"outputs": [],
|
| 598 |
+
"source": [
|
| 599 |
+
"ref_s = compute_style(\"Demo/reference_audio/1221-135767-0014.wav\")"
|
| 600 |
+
]
|
| 601 |
+
},
|
| 602 |
+
{
|
| 603 |
+
"cell_type": "code",
|
| 604 |
+
"execution_count": null,
|
| 605 |
+
"metadata": {
|
| 606 |
+
"id": "0CKMI0ZsKUDh"
|
| 607 |
+
},
|
| 608 |
+
"outputs": [],
|
| 609 |
+
"source": [
|
| 610 |
+
"texts = {}\n",
|
| 611 |
+
"texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 612 |
+
"texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 613 |
+
"texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 614 |
+
"texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
|
| 615 |
+
"\n",
|
| 616 |
+
"for k,v in texts.items():\n",
|
| 617 |
+
" wav = inference(v, ref_s, diffusion_steps=10, alpha=0.3, beta=0.7, embedding_scale=1)\n",
|
| 618 |
+
" print(k + \": \")\n",
|
| 619 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 620 |
+
]
|
| 621 |
+
},
|
| 622 |
+
{
|
| 623 |
+
"cell_type": "markdown",
|
| 624 |
+
"metadata": {
|
| 625 |
+
"id": "reemQKVEKWAZ"
|
| 626 |
+
},
|
| 627 |
+
"source": [
|
| 628 |
+
"#### With `embedding_scale=2`"
|
| 629 |
+
]
|
| 630 |
+
},
|
| 631 |
+
{
|
| 632 |
+
"cell_type": "code",
|
| 633 |
+
"execution_count": null,
|
| 634 |
+
"metadata": {
|
| 635 |
+
"id": "npIAiAUvKYGv"
|
| 636 |
+
},
|
| 637 |
+
"outputs": [],
|
| 638 |
+
"source": [
|
| 639 |
+
"texts = {}\n",
|
| 640 |
+
"texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 641 |
+
"texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 642 |
+
"texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 643 |
+
"texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
|
| 644 |
+
"\n",
|
| 645 |
+
"for k,v in texts.items():\n",
|
| 646 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 647 |
+
" wav = inference(v, ref_s, diffusion_steps=10, alpha=0.3, beta=0.7, embedding_scale=2)\n",
|
| 648 |
+
" print(k + \": \")\n",
|
| 649 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 650 |
+
]
|
| 651 |
+
},
|
| 652 |
+
{
|
| 653 |
+
"cell_type": "markdown",
|
| 654 |
+
"metadata": {
|
| 655 |
+
"id": "lqKZaXeYKbrH"
|
| 656 |
+
},
|
| 657 |
+
"source": [
|
| 658 |
+
"#### With `embedding_scale=2, alpha = 0.5, beta = 0.9`\n",
|
| 659 |
+
"`alpha` and `beta` is the factor to determine much we use the style sampled based on the text instead of the reference. The higher the value of `alpha` and `beta`, the more suitable the style it is to the text but less similar to the reference. Using higher beta makes the synthesized speech more emotional, at the cost of lower similarity to the reference. `alpha` determines the timbre of the speaker while `beta` determines the prosody."
|
| 660 |
+
]
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"cell_type": "code",
|
| 664 |
+
"execution_count": null,
|
| 665 |
+
"metadata": {
|
| 666 |
+
"id": "VjXuRCCWKcdN"
|
| 667 |
+
},
|
| 668 |
+
"outputs": [],
|
| 669 |
+
"source": [
|
| 670 |
+
"texts = {}\n",
|
| 671 |
+
"texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 672 |
+
"texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 673 |
+
"texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 674 |
+
"texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
|
| 675 |
+
"\n",
|
| 676 |
+
"for k,v in texts.items():\n",
|
| 677 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 678 |
+
" wav = inference(v, ref_s, diffusion_steps=10, alpha=0.5, beta=0.9, embedding_scale=2)\n",
|
| 679 |
+
" print(k + \": \")\n",
|
| 680 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 681 |
+
]
|
| 682 |
+
},
|
| 683 |
+
{
|
| 684 |
+
"cell_type": "markdown",
|
| 685 |
+
"metadata": {
|
| 686 |
+
"id": "xrwYXGh0KiIW"
|
| 687 |
+
},
|
| 688 |
+
"source": [
|
| 689 |
+
"### Zero-shot speaker adaptation\n",
|
| 690 |
+
"This section recreates the \"Acoustic Environment Maintenance\" and \"Speaker’s Emotion Maintenance\" demo in [Section 4](https://styletts2.github.io/#libri) of the demo page. You can compare the generated samples to popular zero-shot TTS models like Vall-E. Note that the model was trained only on LibriTTS, which is about 250 times fewer data compared to those used to trian Vall-E with similar or better effect for these maintainance."
|
| 691 |
+
]
|
| 692 |
+
},
|
| 693 |
+
{
|
| 694 |
+
"cell_type": "markdown",
|
| 695 |
+
"metadata": {
|
| 696 |
+
"id": "ETUywHHmKimE"
|
| 697 |
+
},
|
| 698 |
+
"source": [
|
| 699 |
+
"#### Acoustic Environment Maintenance\n",
|
| 700 |
+
"\n",
|
| 701 |
+
"Since we want to maintain the acoustic environment in the speaker (timbre), we set `alpha = 0` to make the speaker as close to the reference as possible while only changing the prosody according to the text. "
|
| 702 |
+
]
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"cell_type": "code",
|
| 706 |
+
"execution_count": null,
|
| 707 |
+
"metadata": {
|
| 708 |
+
"id": "yvjBK3syKnZL"
|
| 709 |
+
},
|
| 710 |
+
"outputs": [],
|
| 711 |
+
"source": [
|
| 712 |
+
"reference_dicts = {}\n",
|
| 713 |
+
"# format: (path, text)\n",
|
| 714 |
+
"reference_dicts['3'] = (\"Demo/reference_audio/3.wav\", \"As friends thing I definitely I've got more male friends.\")\n",
|
| 715 |
+
"reference_dicts['4'] = (\"Demo/reference_audio/4.wav\", \"Everything is run by computer but you got to know how to think before you can do a computer.\")\n",
|
| 716 |
+
"reference_dicts['5'] = (\"Demo/reference_audio/5.wav\", \"Then out in LA you guys got a whole another ball game within California to worry about.\")"
|
| 717 |
+
]
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"cell_type": "code",
|
| 721 |
+
"execution_count": null,
|
| 722 |
+
"metadata": {
|
| 723 |
+
"id": "jclowWp4KomJ"
|
| 724 |
+
},
|
| 725 |
+
"outputs": [],
|
| 726 |
+
"source": [
|
| 727 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 728 |
+
"for k, v in reference_dicts.items():\n",
|
| 729 |
+
" path, text = v\n",
|
| 730 |
+
" ref_s = compute_style(path)\n",
|
| 731 |
+
" start = time.time()\n",
|
| 732 |
+
" wav = inference(text, ref_s, alpha=0.0, beta=0.5, diffusion_steps=5, embedding_scale=1)\n",
|
| 733 |
+
" rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 734 |
+
" print(f\"RTF = {rtf:5f}\")\n",
|
| 735 |
+
" import IPython.display as ipd\n",
|
| 736 |
+
" print('Synthesized: ' + text)\n",
|
| 737 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))\n",
|
| 738 |
+
" print('Reference:')\n",
|
| 739 |
+
" display(ipd.Audio(path, rate=24000, normalize=False))"
|
| 740 |
+
]
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"cell_type": "markdown",
|
| 744 |
+
"metadata": {
|
| 745 |
+
"id": "LgIm7M93KqVZ"
|
| 746 |
+
},
|
| 747 |
+
"source": [
|
| 748 |
+
"#### Speaker’s Emotion Maintenance\n",
|
| 749 |
+
"\n",
|
| 750 |
+
"Since we want to maintain the emotion in the speaker (prosody), we set `beta = 0.1` to make the speaker as closer to the reference as possible while having some diversity thruogh the slight timbre change."
|
| 751 |
+
]
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"cell_type": "code",
|
| 755 |
+
"execution_count": null,
|
| 756 |
+
"metadata": {
|
| 757 |
+
"id": "yzsNoP6oKulL"
|
| 758 |
+
},
|
| 759 |
+
"outputs": [],
|
| 760 |
+
"source": [
|
| 761 |
+
"reference_dicts = {}\n",
|
| 762 |
+
"# format: (path, text)\n",
|
| 763 |
+
"reference_dicts['Anger'] = (\"Demo/reference_audio/anger.wav\", \"We have to reduce the number of plastic bags.\")\n",
|
| 764 |
+
"reference_dicts['Sleepy'] = (\"Demo/reference_audio/sleepy.wav\", \"We have to reduce the number of plastic bags.\")\n",
|
| 765 |
+
"reference_dicts['Amused'] = (\"Demo/reference_audio/amused.wav\", \"We have to reduce the number of plastic bags.\")\n",
|
| 766 |
+
"reference_dicts['Disgusted'] = (\"Demo/reference_audio/disgusted.wav\", \"We have to reduce the number of plastic bags.\")"
|
| 767 |
+
]
|
| 768 |
+
},
|
| 769 |
+
{
|
| 770 |
+
"cell_type": "code",
|
| 771 |
+
"execution_count": null,
|
| 772 |
+
"metadata": {
|
| 773 |
+
"id": "7h2-9cpfKwr4"
|
| 774 |
+
},
|
| 775 |
+
"outputs": [],
|
| 776 |
+
"source": [
|
| 777 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 778 |
+
"for k, v in reference_dicts.items():\n",
|
| 779 |
+
" path, text = v\n",
|
| 780 |
+
" ref_s = compute_style(path)\n",
|
| 781 |
+
" start = time.time()\n",
|
| 782 |
+
" wav = inference(text, ref_s, alpha=0.3, beta=0.1, diffusion_steps=10, embedding_scale=1)\n",
|
| 783 |
+
" rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 784 |
+
" print(f\"RTF = {rtf:5f}\")\n",
|
| 785 |
+
" import IPython.display as ipd\n",
|
| 786 |
+
" print(k + ' Synthesized: ' + text)\n",
|
| 787 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))\n",
|
| 788 |
+
" print(k + ' Reference:')\n",
|
| 789 |
+
" display(ipd.Audio(path, rate=24000, normalize=False))"
|
| 790 |
+
]
|
| 791 |
+
},
|
| 792 |
+
{
|
| 793 |
+
"cell_type": "markdown",
|
| 794 |
+
"metadata": {
|
| 795 |
+
"id": "aNS82PGwKzgg"
|
| 796 |
+
},
|
| 797 |
+
"source": [
|
| 798 |
+
"### Longform Narration\n",
|
| 799 |
+
"\n",
|
| 800 |
+
"This section includes basic implementation of Algorithm 1 in the paper for consistent longform audio generation. The example passage is taken from [Section 5](https://styletts2.github.io/#long) of the demo page."
|
| 801 |
+
]
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"cell_type": "code",
|
| 805 |
+
"execution_count": null,
|
| 806 |
+
"metadata": {
|
| 807 |
+
"cellView": "form",
|
| 808 |
+
"id": "qs97nL5HK5DH"
|
| 809 |
+
},
|
| 810 |
+
"outputs": [],
|
| 811 |
+
"source": [
|
| 812 |
+
"passage = passage = '''If the supply of fruit is greater than the family needs, it may be made a source of income by sending the fresh fruit to the market if there is one near enough, or by preserving, canning, and making jelly for sale. To make such an enterprise a success the fruit and work must be first class. There is magic in the word \"Homemade,\" when the product appeals to the eye and the palate; but many careless and incompetent people have found to their sorrow that this word has not magic enough to float inferior goods on the market. As a rule large canning and preserving establishments are clean and have the best appliances, and they employ chemists and skilled labor. The home product must be very good to compete with the attractive goods that are sent out from such establishments. Yet for first class home made products there is a market in all large cities. All first-class grocers have customers who purchase such goods.''' # @param {type:\"string\"}"
|
| 813 |
+
]
|
| 814 |
+
},
|
| 815 |
+
{
|
| 816 |
+
"cell_type": "code",
|
| 817 |
+
"execution_count": null,
|
| 818 |
+
"metadata": {
|
| 819 |
+
"colab": {
|
| 820 |
+
"background_save": true
|
| 821 |
+
},
|
| 822 |
+
"id": "8Mu9whHYK_1b"
|
| 823 |
+
},
|
| 824 |
+
"outputs": [],
|
| 825 |
+
"source": [
|
| 826 |
+
"# seen speaker\n",
|
| 827 |
+
"path = \"Demo/reference_audio/696_92939_000016_000006.wav\"\n",
|
| 828 |
+
"s_ref = compute_style(path)\n",
|
| 829 |
+
"sentences = passage.split('.') # simple split by comma\n",
|
| 830 |
+
"wavs = []\n",
|
| 831 |
+
"s_prev = None\n",
|
| 832 |
+
"for text in sentences:\n",
|
| 833 |
+
" if text.strip() == \"\": continue\n",
|
| 834 |
+
" text += '.' # add it back\n",
|
| 835 |
+
"\n",
|
| 836 |
+
" wav, s_prev = LFinference(text,\n",
|
| 837 |
+
" s_prev,\n",
|
| 838 |
+
" s_ref,\n",
|
| 839 |
+
" alpha = 0.3,\n",
|
| 840 |
+
" beta = 0.9, # make it more suitable for the text\n",
|
| 841 |
+
" t = 0.7,\n",
|
| 842 |
+
" diffusion_steps=10, embedding_scale=1.5)\n",
|
| 843 |
+
" wavs.append(wav)\n",
|
| 844 |
+
"print('Synthesized: ')\n",
|
| 845 |
+
"display(ipd.Audio(np.concatenate(wavs), rate=24000, normalize=False))\n",
|
| 846 |
+
"print('Reference: ')\n",
|
| 847 |
+
"display(ipd.Audio(path, rate=24000, normalize=False))"
|
| 848 |
+
]
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
"cell_type": "markdown",
|
| 852 |
+
"metadata": {
|
| 853 |
+
"id": "81Rh-lgWLB2i"
|
| 854 |
+
},
|
| 855 |
+
"source": [
|
| 856 |
+
"### Style Transfer\n",
|
| 857 |
+
"\n",
|
| 858 |
+
"The following section demostrates the style transfer capacity for unseen speakers in [Section 6](https://styletts2.github.io/#emo) of the demo page. For this, we set `alpha=0.5, beta = 0.9` for the most pronounced effects (mostly using the sampled style)."
|
| 859 |
+
]
|
| 860 |
+
},
|
| 861 |
+
{
|
| 862 |
+
"cell_type": "code",
|
| 863 |
+
"execution_count": null,
|
| 864 |
+
"metadata": {
|
| 865 |
+
"id": "CtIgr5kOLE9a"
|
| 866 |
+
},
|
| 867 |
+
"outputs": [],
|
| 868 |
+
"source": [
|
| 869 |
+
"# reference texts to sample styles\n",
|
| 870 |
+
"\n",
|
| 871 |
+
"ref_texts = {}\n",
|
| 872 |
+
"ref_texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 873 |
+
"ref_texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 874 |
+
"ref_texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 875 |
+
"ref_texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\""
|
| 876 |
+
]
|
| 877 |
+
},
|
| 878 |
+
{
|
| 879 |
+
"cell_type": "code",
|
| 880 |
+
"execution_count": null,
|
| 881 |
+
"metadata": {
|
| 882 |
+
"id": "MlA1CbhzLIoI"
|
| 883 |
+
},
|
| 884 |
+
"outputs": [],
|
| 885 |
+
"source": [
|
| 886 |
+
"path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
|
| 887 |
+
"s_ref = compute_style(path)\n",
|
| 888 |
+
"\n",
|
| 889 |
+
"text = \"Yea, his honourable worship is within, but he hath a godly minister or two with him, and likewise a leech.\"\n",
|
| 890 |
+
"for k,v in ref_texts.items():\n",
|
| 891 |
+
" wav = STinference(text, s_ref, v, diffusion_steps=10, alpha=0.5, beta=0.9, embedding_scale=1.5)\n",
|
| 892 |
+
" print(k + \": \")\n",
|
| 893 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 894 |
+
]
|
| 895 |
+
},
|
| 896 |
+
{
|
| 897 |
+
"cell_type": "markdown",
|
| 898 |
+
"metadata": {
|
| 899 |
+
"id": "2M0iaXlkLJUQ"
|
| 900 |
+
},
|
| 901 |
+
"source": [
|
| 902 |
+
"### Speech diversity\n",
|
| 903 |
+
"\n",
|
| 904 |
+
"This section reproduces samples in [Section 7](https://styletts2.github.io/#var) of the demo page.\n",
|
| 905 |
+
"\n",
|
| 906 |
+
"`alpha` and `beta` determine the diversity of the synthesized speech. There are two extreme cases:\n",
|
| 907 |
+
"- If `alpha = 1` and `beta = 1`, the synthesized speech sounds the most dissimilar to the reference speaker, but it is also the most diverse (each time you synthesize a speech it will be totally different).\n",
|
| 908 |
+
"- If `alpha = 0` and `beta = 0`, the synthesized speech sounds the most siimlar to the reference speaker, but it is deterministic (i.e., the sampled style is not used for speech synthesis).\n"
|
| 909 |
+
]
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"cell_type": "markdown",
|
| 913 |
+
"metadata": {
|
| 914 |
+
"id": "tSxZDvF2LNu4"
|
| 915 |
+
},
|
| 916 |
+
"source": [
|
| 917 |
+
"#### Default setting (`alpha = 0.3, beta=0.7`)\n",
|
| 918 |
+
"This setting uses 70% of the reference timbre and 30% of the reference prosody and use the diffusion model to sample them based on the text."
|
| 919 |
+
]
|
| 920 |
+
},
|
| 921 |
+
{
|
| 922 |
+
"cell_type": "code",
|
| 923 |
+
"execution_count": null,
|
| 924 |
+
"metadata": {
|
| 925 |
+
"id": "AAomGCDZLIt5"
|
| 926 |
+
},
|
| 927 |
+
"outputs": [],
|
| 928 |
+
"source": [
|
| 929 |
+
"# unseen speaker\n",
|
| 930 |
+
"path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
|
| 931 |
+
"ref_s = compute_style(path)\n",
|
| 932 |
+
"\n",
|
| 933 |
+
"text = \"How much variation is there?\"\n",
|
| 934 |
+
"for _ in range(5):\n",
|
| 935 |
+
" wav = inference(text, ref_s, diffusion_steps=10, alpha=0.3, beta=0.7, embedding_scale=1)\n",
|
| 936 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 937 |
+
]
|
| 938 |
+
},
|
| 939 |
+
{
|
| 940 |
+
"cell_type": "markdown",
|
| 941 |
+
"metadata": {
|
| 942 |
+
"id": "BKrSMdgcLQRP"
|
| 943 |
+
},
|
| 944 |
+
"source": [
|
| 945 |
+
"#### Less diverse setting (`alpha = 0.1, beta=0.3`)\n",
|
| 946 |
+
"This setting uses 90% of the reference timbre and 70% of the reference prosody. This makes it more similar to the reference speaker at cost of less diverse samples."
|
| 947 |
+
]
|
| 948 |
+
},
|
| 949 |
+
{
|
| 950 |
+
"cell_type": "code",
|
| 951 |
+
"execution_count": null,
|
| 952 |
+
"metadata": {
|
| 953 |
+
"id": "Uo7gVmFoLRfm"
|
| 954 |
+
},
|
| 955 |
+
"outputs": [],
|
| 956 |
+
"source": [
|
| 957 |
+
"# unseen speaker\n",
|
| 958 |
+
"path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
|
| 959 |
+
"ref_s = compute_style(path)\n",
|
| 960 |
+
"\n",
|
| 961 |
+
"text = \"How much variation is there?\"\n",
|
| 962 |
+
"for _ in range(5):\n",
|
| 963 |
+
" wav = inference(text, ref_s, diffusion_steps=10, alpha=0.1, beta=0.3, embedding_scale=1)\n",
|
| 964 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 965 |
+
]
|
| 966 |
+
},
|
| 967 |
+
{
|
| 968 |
+
"cell_type": "markdown",
|
| 969 |
+
"metadata": {
|
| 970 |
+
"id": "nfQ0Xrg9LStd"
|
| 971 |
+
},
|
| 972 |
+
"source": [
|
| 973 |
+
"#### More diverse setting (`alpha = 0.5, beta=0.95`)\n",
|
| 974 |
+
"This setting uses 50% of the reference timbre and 5% of the reference prosody (so it uses 100% of the sampled prosody, which makes it more diverse), but this makes it more dissimilar to the reference speaker. "
|
| 975 |
+
]
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"cell_type": "code",
|
| 979 |
+
"execution_count": null,
|
| 980 |
+
"metadata": {
|
| 981 |
+
"id": "cPHz4BzVLT_u"
|
| 982 |
+
},
|
| 983 |
+
"outputs": [],
|
| 984 |
+
"source": [
|
| 985 |
+
"# unseen speaker\n",
|
| 986 |
+
"path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
|
| 987 |
+
"ref_s = compute_style(path)\n",
|
| 988 |
+
"\n",
|
| 989 |
+
"text = \"How much variation is there?\"\n",
|
| 990 |
+
"for _ in range(5):\n",
|
| 991 |
+
" wav = inference(text, ref_s, diffusion_steps=10, alpha=0.5, beta=0.95, embedding_scale=1)\n",
|
| 992 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 993 |
+
]
|
| 994 |
+
},
|
| 995 |
+
{
|
| 996 |
+
"cell_type": "markdown",
|
| 997 |
+
"source": [
|
| 998 |
+
"#### Extreme setting (`alpha = 1, beta=1`)\n",
|
| 999 |
+
"This setting uses 0% of the reference timbre and prosody and use the diffusion model to sample the entire style. This makes the speaker very dissimilar to the reference speaker."
|
| 1000 |
+
],
|
| 1001 |
+
"metadata": {
|
| 1002 |
+
"id": "hPKg9eYpL00f"
|
| 1003 |
+
}
|
| 1004 |
+
},
|
| 1005 |
+
{
|
| 1006 |
+
"cell_type": "code",
|
| 1007 |
+
"source": [
|
| 1008 |
+
"# unseen speaker\n",
|
| 1009 |
+
"path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
|
| 1010 |
+
"ref_s = compute_style(path)\n",
|
| 1011 |
+
"\n",
|
| 1012 |
+
"text = \"How much variation is there?\"\n",
|
| 1013 |
+
"for _ in range(5):\n",
|
| 1014 |
+
" wav = inference(text, ref_s, diffusion_steps=10, alpha=1, beta=1, embedding_scale=1)\n",
|
| 1015 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 1016 |
+
],
|
| 1017 |
+
"metadata": {
|
| 1018 |
+
"id": "Ei-7JOccL0bF"
|
| 1019 |
+
},
|
| 1020 |
+
"execution_count": null,
|
| 1021 |
+
"outputs": []
|
| 1022 |
+
},
|
| 1023 |
+
{
|
| 1024 |
+
"cell_type": "markdown",
|
| 1025 |
+
"source": [
|
| 1026 |
+
"#### No variation (`alpha = 0, beta=0`)\n",
|
| 1027 |
+
"This setting uses 100% of the reference timbre and prosody and do not use the diffusion model at all. This makes the speaker very similar to the reference speaker, but there is no variation."
|
| 1028 |
+
],
|
| 1029 |
+
"metadata": {
|
| 1030 |
+
"id": "FVMPc3bhL3eL"
|
| 1031 |
+
}
|
| 1032 |
+
},
|
| 1033 |
+
{
|
| 1034 |
+
"cell_type": "code",
|
| 1035 |
+
"source": [
|
| 1036 |
+
"# unseen speaker\n",
|
| 1037 |
+
"path = \"Demo/reference_audio/1221-135767-0014.wav\"\n",
|
| 1038 |
+
"ref_s = compute_style(path)\n",
|
| 1039 |
+
"\n",
|
| 1040 |
+
"text = \"How much variation is there?\"\n",
|
| 1041 |
+
"for _ in range(5):\n",
|
| 1042 |
+
" wav = inference(text, ref_s, diffusion_steps=10, alpha=0, beta=0, embedding_scale=1)\n",
|
| 1043 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 1044 |
+
],
|
| 1045 |
+
"metadata": {
|
| 1046 |
+
"id": "yh1QZ7uhL4wM"
|
| 1047 |
+
},
|
| 1048 |
+
"execution_count": null,
|
| 1049 |
+
"outputs": []
|
| 1050 |
+
},
|
| 1051 |
+
{
|
| 1052 |
+
"cell_type": "markdown",
|
| 1053 |
+
"source": [
|
| 1054 |
+
"### Extra fun!\n",
|
| 1055 |
+
"\n",
|
| 1056 |
+
"You can record your own voice and clone it using pre-trained StyleTTS 2 model here."
|
| 1057 |
+
],
|
| 1058 |
+
"metadata": {
|
| 1059 |
+
"id": "T0EvkWrAMBDB"
|
| 1060 |
+
}
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"cell_type": "markdown",
|
| 1064 |
+
"source": [
|
| 1065 |
+
"#### Run the following cell to record your voice for 5 seconds. Please keep speaking to have the best effect."
|
| 1066 |
+
],
|
| 1067 |
+
"metadata": {
|
| 1068 |
+
"id": "R985j5QONY8I"
|
| 1069 |
+
}
|
| 1070 |
+
},
|
| 1071 |
+
{
|
| 1072 |
+
"cell_type": "code",
|
| 1073 |
+
"source": [
|
| 1074 |
+
"# all imports\n",
|
| 1075 |
+
"from IPython.display import Javascript\n",
|
| 1076 |
+
"from google.colab import output\n",
|
| 1077 |
+
"from base64 import b64decode\n",
|
| 1078 |
+
"\n",
|
| 1079 |
+
"RECORD = \"\"\"\n",
|
| 1080 |
+
"const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n",
|
| 1081 |
+
"const b2text = blob => new Promise(resolve => {\n",
|
| 1082 |
+
" const reader = new FileReader()\n",
|
| 1083 |
+
" reader.onloadend = e => resolve(e.srcElement.result)\n",
|
| 1084 |
+
" reader.readAsDataURL(blob)\n",
|
| 1085 |
+
"})\n",
|
| 1086 |
+
"var record = time => new Promise(async resolve => {\n",
|
| 1087 |
+
" stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n",
|
| 1088 |
+
" recorder = new MediaRecorder(stream)\n",
|
| 1089 |
+
" chunks = []\n",
|
| 1090 |
+
" recorder.ondataavailable = e => chunks.push(e.data)\n",
|
| 1091 |
+
" recorder.start()\n",
|
| 1092 |
+
" await sleep(time)\n",
|
| 1093 |
+
" recorder.onstop = async ()=>{\n",
|
| 1094 |
+
" blob = new Blob(chunks)\n",
|
| 1095 |
+
" text = await b2text(blob)\n",
|
| 1096 |
+
" resolve(text)\n",
|
| 1097 |
+
" }\n",
|
| 1098 |
+
" recorder.stop()\n",
|
| 1099 |
+
"})\n",
|
| 1100 |
+
"\"\"\"\n",
|
| 1101 |
+
"\n",
|
| 1102 |
+
"def record(sec=3):\n",
|
| 1103 |
+
" display(Javascript(RECORD))\n",
|
| 1104 |
+
" s = output.eval_js('record(%d)' % (sec*1000))\n",
|
| 1105 |
+
" b = b64decode(s.split(',')[1])\n",
|
| 1106 |
+
" with open('audio.wav','wb') as f:\n",
|
| 1107 |
+
" f.write(b)\n",
|
| 1108 |
+
" return 'audio.wav' # or webm ?"
|
| 1109 |
+
],
|
| 1110 |
+
"metadata": {
|
| 1111 |
+
"id": "MWrFs0KWMBpz"
|
| 1112 |
+
},
|
| 1113 |
+
"execution_count": null,
|
| 1114 |
+
"outputs": []
|
| 1115 |
+
},
|
| 1116 |
+
{
|
| 1117 |
+
"cell_type": "markdown",
|
| 1118 |
+
"source": [
|
| 1119 |
+
"#### Please run this cell and speak:"
|
| 1120 |
+
],
|
| 1121 |
+
"metadata": {
|
| 1122 |
+
"id": "z35qXwM0Nhx1"
|
| 1123 |
+
}
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"cell_type": "code",
|
| 1127 |
+
"source": [
|
| 1128 |
+
"print('Speak now for 5 seconds.')\n",
|
| 1129 |
+
"audio = record(sec=5)\n",
|
| 1130 |
+
"import IPython.display as ipd\n",
|
| 1131 |
+
"display(ipd.Audio(audio, rate=24000, normalize=False))"
|
| 1132 |
+
],
|
| 1133 |
+
"metadata": {
|
| 1134 |
+
"id": "KUEoFyQBMR-8"
|
| 1135 |
+
},
|
| 1136 |
+
"execution_count": null,
|
| 1137 |
+
"outputs": []
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"cell_type": "markdown",
|
| 1141 |
+
"source": [
|
| 1142 |
+
"#### Synthesize in your own voice"
|
| 1143 |
+
],
|
| 1144 |
+
"metadata": {
|
| 1145 |
+
"id": "OQS_7IBpNmM1"
|
| 1146 |
+
}
|
| 1147 |
+
},
|
| 1148 |
+
{
|
| 1149 |
+
"cell_type": "code",
|
| 1150 |
+
"source": [
|
| 1151 |
+
"text = ''' StyleTTS 2 is a text to speech model that leverages style diffusion and adversarial training with large speech language models to achieve human level text to speech synthesis. ''' # @param {type:\"string\"}\n"
|
| 1152 |
+
],
|
| 1153 |
+
"metadata": {
|
| 1154 |
+
"cellView": "form",
|
| 1155 |
+
"id": "c0I3LY7vM8Ta"
|
| 1156 |
+
},
|
| 1157 |
+
"execution_count": null,
|
| 1158 |
+
"outputs": []
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"cell_type": "code",
|
| 1162 |
+
"source": [
|
| 1163 |
+
"reference_dicts = {}\n",
|
| 1164 |
+
"reference_dicts['You'] = audio"
|
| 1165 |
+
],
|
| 1166 |
+
"metadata": {
|
| 1167 |
+
"id": "80eW-pwxNCxu"
|
| 1168 |
+
},
|
| 1169 |
+
"execution_count": null,
|
| 1170 |
+
"outputs": []
|
| 1171 |
+
},
|
| 1172 |
+
{
|
| 1173 |
+
"cell_type": "code",
|
| 1174 |
+
"source": [
|
| 1175 |
+
"start = time.time()\n",
|
| 1176 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 1177 |
+
"for k, path in reference_dicts.items():\n",
|
| 1178 |
+
" ref_s = compute_style(path)\n",
|
| 1179 |
+
"\n",
|
| 1180 |
+
" wav = inference(text, ref_s, alpha=0.1, beta=0.5, diffusion_steps=5, embedding_scale=1)\n",
|
| 1181 |
+
" rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 1182 |
+
" print('Speaker: ' + k)\n",
|
| 1183 |
+
" import IPython.display as ipd\n",
|
| 1184 |
+
" print('Synthesized:')\n",
|
| 1185 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))\n",
|
| 1186 |
+
" print('Reference:')\n",
|
| 1187 |
+
" display(ipd.Audio(path, rate=24000, normalize=False))"
|
| 1188 |
+
],
|
| 1189 |
+
"metadata": {
|
| 1190 |
+
"id": "yIga6MTuNJaN"
|
| 1191 |
+
},
|
| 1192 |
+
"execution_count": null,
|
| 1193 |
+
"outputs": []
|
| 1194 |
+
}
|
| 1195 |
+
],
|
| 1196 |
+
"metadata": {
|
| 1197 |
+
"accelerator": "GPU",
|
| 1198 |
+
"colab": {
|
| 1199 |
+
"provenance": [],
|
| 1200 |
+
"collapsed_sections": [
|
| 1201 |
+
"aAGQPfgYIR23",
|
| 1202 |
+
"eJdB_nCOIVIN",
|
| 1203 |
+
"R985j5QONY8I"
|
| 1204 |
+
],
|
| 1205 |
+
"authorship_tag": "ABX9TyPQdFTqqVEknEG/ma/HMfU+",
|
| 1206 |
+
"include_colab_link": true
|
| 1207 |
+
},
|
| 1208 |
+
"kernelspec": {
|
| 1209 |
+
"display_name": "Python 3",
|
| 1210 |
+
"name": "python3"
|
| 1211 |
+
},
|
| 1212 |
+
"language_info": {
|
| 1213 |
+
"name": "python"
|
| 1214 |
+
}
|
| 1215 |
+
},
|
| 1216 |
+
"nbformat": 4,
|
| 1217 |
+
"nbformat_minor": 0
|
| 1218 |
+
}
|
stylekan/Colab/StyleTTS2_Finetune_Demo.ipynb
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"gpuType": "T4",
|
| 8 |
+
"authorship_tag": "ABX9TyNiDU9ykIeYxO86Lmuid+ph",
|
| 9 |
+
"include_colab_link": true
|
| 10 |
+
},
|
| 11 |
+
"kernelspec": {
|
| 12 |
+
"name": "python3",
|
| 13 |
+
"display_name": "Python 3"
|
| 14 |
+
},
|
| 15 |
+
"language_info": {
|
| 16 |
+
"name": "python"
|
| 17 |
+
},
|
| 18 |
+
"accelerator": "GPU"
|
| 19 |
+
},
|
| 20 |
+
"cells": [
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "markdown",
|
| 23 |
+
"metadata": {
|
| 24 |
+
"id": "view-in-github",
|
| 25 |
+
"colab_type": "text"
|
| 26 |
+
},
|
| 27 |
+
"source": [
|
| 28 |
+
"<a href=\"https://colab.research.google.com/github/yl4579/StyleTTS2/blob/main/Colab/StyleTTS2_Finetune_Demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"source": [
|
| 34 |
+
"### Install packages and download models"
|
| 35 |
+
],
|
| 36 |
+
"metadata": {
|
| 37 |
+
"id": "yLqBa4uYPrqE"
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"source": [
|
| 43 |
+
"%%shell\n",
|
| 44 |
+
"git clone https://github.com/yl4579/StyleTTS2.git\n",
|
| 45 |
+
"cd StyleTTS2\n",
|
| 46 |
+
"pip install SoundFile torchaudio munch torch pydub pyyaml librosa nltk matplotlib accelerate transformers phonemizer einops einops-exts tqdm typing-extensions git+https://github.com/resemble-ai/monotonic_align.git\n",
|
| 47 |
+
"sudo apt-get install espeak-ng\n",
|
| 48 |
+
"git-lfs clone https://huggingface.co/yl4579/StyleTTS2-LibriTTS\n",
|
| 49 |
+
"mv StyleTTS2-LibriTTS/Models ."
|
| 50 |
+
],
|
| 51 |
+
"metadata": {
|
| 52 |
+
"id": "H72WF06ZPrTF"
|
| 53 |
+
},
|
| 54 |
+
"execution_count": null,
|
| 55 |
+
"outputs": []
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"cell_type": "markdown",
|
| 59 |
+
"source": [
|
| 60 |
+
"### Download dataset (LJSpeech, 200 samples, ~15 minutes of data)\n",
|
| 61 |
+
"\n",
|
| 62 |
+
"You can definitely do it with fewer samples. This is just a proof of concept with 200 smaples."
|
| 63 |
+
],
|
| 64 |
+
"metadata": {
|
| 65 |
+
"id": "G398sL8wPzTB"
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"source": [
|
| 71 |
+
"%cd StyleTTS2\n",
|
| 72 |
+
"!rm -rf Data"
|
| 73 |
+
],
|
| 74 |
+
"metadata": {
|
| 75 |
+
"id": "kJuQUBrEPy5C"
|
| 76 |
+
},
|
| 77 |
+
"execution_count": null,
|
| 78 |
+
"outputs": []
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"cell_type": "code",
|
| 82 |
+
"source": [
|
| 83 |
+
"!gdown --id 1vqz26D3yn7OXS2vbfYxfSnpLS6m6tOFP\n",
|
| 84 |
+
"!unzip Data.zip"
|
| 85 |
+
],
|
| 86 |
+
"metadata": {
|
| 87 |
+
"id": "mDXW8ZZePuSb"
|
| 88 |
+
},
|
| 89 |
+
"execution_count": null,
|
| 90 |
+
"outputs": []
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"cell_type": "markdown",
|
| 94 |
+
"source": [
|
| 95 |
+
"### Change the finetuning config\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"Depending on the GPU you got, you may want to change the bacth size, max audio length, epiochs and so on."
|
| 98 |
+
],
|
| 99 |
+
"metadata": {
|
| 100 |
+
"id": "_AlBQREWU8ud"
|
| 101 |
+
}
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"cell_type": "code",
|
| 105 |
+
"source": [
|
| 106 |
+
"config_path = \"Configs/config_ft.yml\"\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"import yaml\n",
|
| 109 |
+
"config = yaml.safe_load(open(config_path))"
|
| 110 |
+
],
|
| 111 |
+
"metadata": {
|
| 112 |
+
"id": "7uEITi0hU4I2"
|
| 113 |
+
},
|
| 114 |
+
"execution_count": null,
|
| 115 |
+
"outputs": []
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"cell_type": "code",
|
| 119 |
+
"source": [
|
| 120 |
+
"config['data_params']['root_path'] = \"Data/wavs\"\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"config['batch_size'] = 2 # not enough RAM\n",
|
| 123 |
+
"config['max_len'] = 100 # not enough RAM\n",
|
| 124 |
+
"config['loss_params']['joint_epoch'] = 110 # we do not do SLM adversarial training due to not enough RAM\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"with open(config_path, 'w') as outfile:\n",
|
| 127 |
+
" yaml.dump(config, outfile, default_flow_style=True)"
|
| 128 |
+
],
|
| 129 |
+
"metadata": {
|
| 130 |
+
"id": "TPTRgOKSVT4K"
|
| 131 |
+
},
|
| 132 |
+
"execution_count": null,
|
| 133 |
+
"outputs": []
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"cell_type": "markdown",
|
| 137 |
+
"source": [
|
| 138 |
+
"### Start finetuning\n"
|
| 139 |
+
],
|
| 140 |
+
"metadata": {
|
| 141 |
+
"id": "uUuB_19NWj2Y"
|
| 142 |
+
}
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"cell_type": "code",
|
| 146 |
+
"source": [
|
| 147 |
+
"!python train_finetune.py --config_path ./Configs/config_ft.yml"
|
| 148 |
+
],
|
| 149 |
+
"metadata": {
|
| 150 |
+
"id": "HZVAD5GKWm-O"
|
| 151 |
+
},
|
| 152 |
+
"execution_count": null,
|
| 153 |
+
"outputs": []
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "markdown",
|
| 157 |
+
"source": [
|
| 158 |
+
"### Test the model quality\n",
|
| 159 |
+
"\n",
|
| 160 |
+
"Note that this mainly serves as a proof of concept due to RAM limitation of free Colab instances. A lot of settings are suboptimal. In the future when DDP works for train_second.py, we will also add mixed precision finetuning to save time and RAM. You can also add SLM adversarial training run if you have paid Colab services (such as A100 with 40G of RAM)."
|
| 161 |
+
],
|
| 162 |
+
"metadata": {
|
| 163 |
+
"id": "I0_7wsGkXGfc"
|
| 164 |
+
}
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"cell_type": "code",
|
| 168 |
+
"source": [
|
| 169 |
+
"import nltk\n",
|
| 170 |
+
"nltk.download('punkt')"
|
| 171 |
+
],
|
| 172 |
+
"metadata": {
|
| 173 |
+
"id": "OPLphjbncE7p"
|
| 174 |
+
},
|
| 175 |
+
"execution_count": null,
|
| 176 |
+
"outputs": []
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"cell_type": "code",
|
| 180 |
+
"source": [
|
| 181 |
+
"import torch\n",
|
| 182 |
+
"torch.manual_seed(0)\n",
|
| 183 |
+
"torch.backends.cudnn.benchmark = False\n",
|
| 184 |
+
"torch.backends.cudnn.deterministic = True\n",
|
| 185 |
+
"\n",
|
| 186 |
+
"import random\n",
|
| 187 |
+
"random.seed(0)\n",
|
| 188 |
+
"\n",
|
| 189 |
+
"import numpy as np\n",
|
| 190 |
+
"np.random.seed(0)\n",
|
| 191 |
+
"\n",
|
| 192 |
+
"# load packages\n",
|
| 193 |
+
"import time\n",
|
| 194 |
+
"import random\n",
|
| 195 |
+
"import yaml\n",
|
| 196 |
+
"from munch import Munch\n",
|
| 197 |
+
"import numpy as np\n",
|
| 198 |
+
"import torch\n",
|
| 199 |
+
"from torch import nn\n",
|
| 200 |
+
"import torch.nn.functional as F\n",
|
| 201 |
+
"import torchaudio\n",
|
| 202 |
+
"import librosa\n",
|
| 203 |
+
"from nltk.tokenize import word_tokenize\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"from models import *\n",
|
| 206 |
+
"from utils import *\n",
|
| 207 |
+
"from text_utils import TextCleaner\n",
|
| 208 |
+
"textclenaer = TextCleaner()\n",
|
| 209 |
+
"\n",
|
| 210 |
+
"%matplotlib inline\n",
|
| 211 |
+
"\n",
|
| 212 |
+
"to_mel = torchaudio.transforms.MelSpectrogram(\n",
|
| 213 |
+
" n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
|
| 214 |
+
"mean, std = -4, 4\n",
|
| 215 |
+
"\n",
|
| 216 |
+
"def length_to_mask(lengths):\n",
|
| 217 |
+
" mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
|
| 218 |
+
" mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
|
| 219 |
+
" return mask\n",
|
| 220 |
+
"\n",
|
| 221 |
+
"def preprocess(wave):\n",
|
| 222 |
+
" wave_tensor = torch.from_numpy(wave).float()\n",
|
| 223 |
+
" mel_tensor = to_mel(wave_tensor)\n",
|
| 224 |
+
" mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
|
| 225 |
+
" return mel_tensor\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"def compute_style(path):\n",
|
| 228 |
+
" wave, sr = librosa.load(path, sr=24000)\n",
|
| 229 |
+
" audio, index = librosa.effects.trim(wave, top_db=30)\n",
|
| 230 |
+
" if sr != 24000:\n",
|
| 231 |
+
" audio = librosa.resample(audio, sr, 24000)\n",
|
| 232 |
+
" mel_tensor = preprocess(audio).to(device)\n",
|
| 233 |
+
"\n",
|
| 234 |
+
" with torch.no_grad():\n",
|
| 235 |
+
" ref_s = model.style_encoder(mel_tensor.unsqueeze(1))\n",
|
| 236 |
+
" ref_p = model.predictor_encoder(mel_tensor.unsqueeze(1))\n",
|
| 237 |
+
"\n",
|
| 238 |
+
" return torch.cat([ref_s, ref_p], dim=1)\n",
|
| 239 |
+
"\n",
|
| 240 |
+
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
| 241 |
+
"\n",
|
| 242 |
+
"# load phonemizer\n",
|
| 243 |
+
"import phonemizer\n",
|
| 244 |
+
"global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)\n",
|
| 245 |
+
"\n",
|
| 246 |
+
"config = yaml.safe_load(open(\"Models/LJSpeech/config_ft.yml\"))\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"# load pretrained ASR model\n",
|
| 249 |
+
"ASR_config = config.get('ASR_config', False)\n",
|
| 250 |
+
"ASR_path = config.get('ASR_path', False)\n",
|
| 251 |
+
"text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
|
| 252 |
+
"\n",
|
| 253 |
+
"# load pretrained F0 model\n",
|
| 254 |
+
"F0_path = config.get('F0_path', False)\n",
|
| 255 |
+
"pitch_extractor = load_F0_models(F0_path)\n",
|
| 256 |
+
"\n",
|
| 257 |
+
"# load BERT model\n",
|
| 258 |
+
"from Utils.PLBERT.util import load_plbert\n",
|
| 259 |
+
"BERT_path = config.get('PLBERT_dir', False)\n",
|
| 260 |
+
"plbert = load_plbert(BERT_path)\n",
|
| 261 |
+
"\n",
|
| 262 |
+
"model_params = recursive_munch(config['model_params'])\n",
|
| 263 |
+
"model = build_model(model_params, text_aligner, pitch_extractor, plbert)\n",
|
| 264 |
+
"_ = [model[key].eval() for key in model]\n",
|
| 265 |
+
"_ = [model[key].to(device) for key in model]"
|
| 266 |
+
],
|
| 267 |
+
"metadata": {
|
| 268 |
+
"id": "jIIAoDACXJL0"
|
| 269 |
+
},
|
| 270 |
+
"execution_count": null,
|
| 271 |
+
"outputs": []
|
| 272 |
+
},
|
| 273 |
+
{
|
| 274 |
+
"cell_type": "code",
|
| 275 |
+
"source": [
|
| 276 |
+
"files = [f for f in os.listdir(\"Models/LJSpeech/\") if f.endswith('.pth')]\n",
|
| 277 |
+
"sorted_files = sorted(files, key=lambda x: int(x.split('_')[-1].split('.')[0]))"
|
| 278 |
+
],
|
| 279 |
+
"metadata": {
|
| 280 |
+
"id": "eKXRAyyzcMpQ"
|
| 281 |
+
},
|
| 282 |
+
"execution_count": null,
|
| 283 |
+
"outputs": []
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"cell_type": "code",
|
| 287 |
+
"source": [
|
| 288 |
+
"params_whole = torch.load(\"Models/LJSpeech/\" + sorted_files[-1], map_location='cpu')\n",
|
| 289 |
+
"params = params_whole['net']"
|
| 290 |
+
],
|
| 291 |
+
"metadata": {
|
| 292 |
+
"id": "ULuU9-VDb9Pk"
|
| 293 |
+
},
|
| 294 |
+
"execution_count": null,
|
| 295 |
+
"outputs": []
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "code",
|
| 299 |
+
"source": [
|
| 300 |
+
"for key in model:\n",
|
| 301 |
+
" if key in params:\n",
|
| 302 |
+
" print('%s loaded' % key)\n",
|
| 303 |
+
" try:\n",
|
| 304 |
+
" model[key].load_state_dict(params[key])\n",
|
| 305 |
+
" except:\n",
|
| 306 |
+
" from collections import OrderedDict\n",
|
| 307 |
+
" state_dict = params[key]\n",
|
| 308 |
+
" new_state_dict = OrderedDict()\n",
|
| 309 |
+
" for k, v in state_dict.items():\n",
|
| 310 |
+
" name = k[7:] # remove `module.`\n",
|
| 311 |
+
" new_state_dict[name] = v\n",
|
| 312 |
+
" # load params\n",
|
| 313 |
+
" model[key].load_state_dict(new_state_dict, strict=False)\n",
|
| 314 |
+
"# except:\n",
|
| 315 |
+
"# _load(params[key], model[key])\n",
|
| 316 |
+
"_ = [model[key].eval() for key in model]"
|
| 317 |
+
],
|
| 318 |
+
"metadata": {
|
| 319 |
+
"id": "J-U29yIYc2ea"
|
| 320 |
+
},
|
| 321 |
+
"execution_count": null,
|
| 322 |
+
"outputs": []
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"cell_type": "code",
|
| 326 |
+
"source": [
|
| 327 |
+
"from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
|
| 328 |
+
],
|
| 329 |
+
"metadata": {
|
| 330 |
+
"id": "jrPQ_Yrwc3n6"
|
| 331 |
+
},
|
| 332 |
+
"execution_count": null,
|
| 333 |
+
"outputs": []
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"cell_type": "code",
|
| 337 |
+
"source": [
|
| 338 |
+
"sampler = DiffusionSampler(\n",
|
| 339 |
+
" model.diffusion.diffusion,\n",
|
| 340 |
+
" sampler=ADPM2Sampler(),\n",
|
| 341 |
+
" sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
|
| 342 |
+
" clamp=False\n",
|
| 343 |
+
")"
|
| 344 |
+
],
|
| 345 |
+
"metadata": {
|
| 346 |
+
"id": "n2CWYNoqc455"
|
| 347 |
+
},
|
| 348 |
+
"execution_count": null,
|
| 349 |
+
"outputs": []
|
| 350 |
+
},
|
| 351 |
+
{
|
| 352 |
+
"cell_type": "code",
|
| 353 |
+
"source": [
|
| 354 |
+
"def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):\n",
|
| 355 |
+
" text = text.strip()\n",
|
| 356 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 357 |
+
" ps = word_tokenize(ps[0])\n",
|
| 358 |
+
" ps = ' '.join(ps)\n",
|
| 359 |
+
" tokens = textclenaer(ps)\n",
|
| 360 |
+
" tokens.insert(0, 0)\n",
|
| 361 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 362 |
+
"\n",
|
| 363 |
+
" with torch.no_grad():\n",
|
| 364 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)\n",
|
| 365 |
+
" text_mask = length_to_mask(input_lengths).to(device)\n",
|
| 366 |
+
"\n",
|
| 367 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 368 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 369 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2)\n",
|
| 370 |
+
"\n",
|
| 371 |
+
" s_pred = sampler(noise = torch.randn((1, 256)).unsqueeze(1).to(device),\n",
|
| 372 |
+
" embedding=bert_dur,\n",
|
| 373 |
+
" embedding_scale=embedding_scale,\n",
|
| 374 |
+
" features=ref_s, # reference from the same speaker as the embedding\n",
|
| 375 |
+
" num_steps=diffusion_steps).squeeze(1)\n",
|
| 376 |
+
"\n",
|
| 377 |
+
"\n",
|
| 378 |
+
" s = s_pred[:, 128:]\n",
|
| 379 |
+
" ref = s_pred[:, :128]\n",
|
| 380 |
+
"\n",
|
| 381 |
+
" ref = alpha * ref + (1 - alpha) * ref_s[:, :128]\n",
|
| 382 |
+
" s = beta * s + (1 - beta) * ref_s[:, 128:]\n",
|
| 383 |
+
"\n",
|
| 384 |
+
" d = model.predictor.text_encoder(d_en,\n",
|
| 385 |
+
" s, input_lengths, text_mask)\n",
|
| 386 |
+
"\n",
|
| 387 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 388 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 389 |
+
"\n",
|
| 390 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 391 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 392 |
+
"\n",
|
| 393 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 394 |
+
" c_frame = 0\n",
|
| 395 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 396 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 397 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 398 |
+
"\n",
|
| 399 |
+
" # encode prosody\n",
|
| 400 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 401 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 402 |
+
" asr_new = torch.zeros_like(en)\n",
|
| 403 |
+
" asr_new[:, :, 0] = en[:, :, 0]\n",
|
| 404 |
+
" asr_new[:, :, 1:] = en[:, :, 0:-1]\n",
|
| 405 |
+
" en = asr_new\n",
|
| 406 |
+
"\n",
|
| 407 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 408 |
+
"\n",
|
| 409 |
+
" asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 410 |
+
" if model_params.decoder.type == \"hifigan\":\n",
|
| 411 |
+
" asr_new = torch.zeros_like(asr)\n",
|
| 412 |
+
" asr_new[:, :, 0] = asr[:, :, 0]\n",
|
| 413 |
+
" asr_new[:, :, 1:] = asr[:, :, 0:-1]\n",
|
| 414 |
+
" asr = asr_new\n",
|
| 415 |
+
"\n",
|
| 416 |
+
" out = model.decoder(asr,\n",
|
| 417 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 418 |
+
"\n",
|
| 419 |
+
"\n",
|
| 420 |
+
" return out.squeeze().cpu().numpy()[..., :-50] # weird pulse at the end of the model, need to be fixed later"
|
| 421 |
+
],
|
| 422 |
+
"metadata": {
|
| 423 |
+
"id": "2x5kVb3nc_eY"
|
| 424 |
+
},
|
| 425 |
+
"execution_count": null,
|
| 426 |
+
"outputs": []
|
| 427 |
+
},
|
| 428 |
+
{
|
| 429 |
+
"cell_type": "markdown",
|
| 430 |
+
"source": [
|
| 431 |
+
"### Synthesize speech"
|
| 432 |
+
],
|
| 433 |
+
"metadata": {
|
| 434 |
+
"id": "O159JnwCc6CC"
|
| 435 |
+
}
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"cell_type": "code",
|
| 439 |
+
"source": [
|
| 440 |
+
"text = '''Maltby and Company would issue warrants on them deliverable to the importer, and the goods were then passed to be stored in neighboring warehouses.\n",
|
| 441 |
+
"'''"
|
| 442 |
+
],
|
| 443 |
+
"metadata": {
|
| 444 |
+
"id": "ThciXQ6rc9Eq"
|
| 445 |
+
},
|
| 446 |
+
"execution_count": null,
|
| 447 |
+
"outputs": []
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"cell_type": "code",
|
| 451 |
+
"source": [
|
| 452 |
+
"# get a random reference in the training set, note that it doesn't matter which one you use\n",
|
| 453 |
+
"path = \"Data/wavs/LJ001-0110.wav\"\n",
|
| 454 |
+
"# this style vector ref_s can be saved as a parameter together with the model weights\n",
|
| 455 |
+
"ref_s = compute_style(path)"
|
| 456 |
+
],
|
| 457 |
+
"metadata": {
|
| 458 |
+
"id": "jldPkJyCc83a"
|
| 459 |
+
},
|
| 460 |
+
"execution_count": null,
|
| 461 |
+
"outputs": []
|
| 462 |
+
},
|
| 463 |
+
{
|
| 464 |
+
"cell_type": "code",
|
| 465 |
+
"source": [
|
| 466 |
+
"start = time.time()\n",
|
| 467 |
+
"wav = inference(text, ref_s, alpha=0.9, beta=0.9, diffusion_steps=10, embedding_scale=1)\n",
|
| 468 |
+
"rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 469 |
+
"print(f\"RTF = {rtf:5f}\")\n",
|
| 470 |
+
"import IPython.display as ipd\n",
|
| 471 |
+
"display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 472 |
+
],
|
| 473 |
+
"metadata": {
|
| 474 |
+
"id": "_mIU0jqDdQ-c"
|
| 475 |
+
},
|
| 476 |
+
"execution_count": null,
|
| 477 |
+
"outputs": []
|
| 478 |
+
}
|
| 479 |
+
]
|
| 480 |
+
}
|
stylekan/Configs/config.yml
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_dir: "Models/LJSpeech"
|
| 2 |
+
first_stage_path: "first_stage.pth"
|
| 3 |
+
save_freq: 2
|
| 4 |
+
log_interval: 10
|
| 5 |
+
device: "cuda"
|
| 6 |
+
epochs_1st: 200 # number of epochs for first stage training (pre-training)
|
| 7 |
+
epochs_2nd: 100 # number of peochs for second stage training (joint training)
|
| 8 |
+
batch_size: 16
|
| 9 |
+
max_len: 400 # maximum number of frames
|
| 10 |
+
pretrained_model: ""
|
| 11 |
+
second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
|
| 12 |
+
load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
|
| 13 |
+
|
| 14 |
+
F0_path: "Utils/JDC/bst.t7"
|
| 15 |
+
ASR_config: "Utils/ASR/config.yml"
|
| 16 |
+
ASR_path: "Utils/ASR/epoch_00080.pth"
|
| 17 |
+
PLBERT_dir: 'Utils/PLBERT/'
|
| 18 |
+
|
| 19 |
+
data_params:
|
| 20 |
+
train_data: "Data/train_list.txt"
|
| 21 |
+
val_data: "Data/val_list.txt"
|
| 22 |
+
root_path: "/local/LJSpeech-1.1/wavs"
|
| 23 |
+
OOD_data: "Data/OOD_texts.txt"
|
| 24 |
+
min_length: 50 # sample until texts with this size are obtained for OOD texts
|
| 25 |
+
|
| 26 |
+
preprocess_params:
|
| 27 |
+
sr: 24000
|
| 28 |
+
spect_params:
|
| 29 |
+
n_fft: 2048
|
| 30 |
+
win_length: 1200
|
| 31 |
+
hop_length: 300
|
| 32 |
+
|
| 33 |
+
model_params:
|
| 34 |
+
multispeaker: false
|
| 35 |
+
|
| 36 |
+
dim_in: 64
|
| 37 |
+
hidden_dim: 512
|
| 38 |
+
max_conv_dim: 512
|
| 39 |
+
n_layer: 3
|
| 40 |
+
n_mels: 80
|
| 41 |
+
|
| 42 |
+
n_token: 178 # number of phoneme tokens
|
| 43 |
+
max_dur: 50 # maximum duration of a single phoneme
|
| 44 |
+
style_dim: 128 # style vector size
|
| 45 |
+
|
| 46 |
+
dropout: 0.2
|
| 47 |
+
|
| 48 |
+
# config for decoder
|
| 49 |
+
decoder:
|
| 50 |
+
type: 'istftnet' # either hifigan or istftnet
|
| 51 |
+
resblock_kernel_sizes: [3,7,11]
|
| 52 |
+
upsample_rates : [10, 6]
|
| 53 |
+
upsample_initial_channel: 512
|
| 54 |
+
resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
|
| 55 |
+
upsample_kernel_sizes: [20, 12]
|
| 56 |
+
gen_istft_n_fft: 20
|
| 57 |
+
gen_istft_hop_size: 5
|
| 58 |
+
|
| 59 |
+
# speech language model config
|
| 60 |
+
slm:
|
| 61 |
+
model: 'microsoft/wavlm-base-plus'
|
| 62 |
+
sr: 16000 # sampling rate of SLM
|
| 63 |
+
hidden: 768 # hidden size of SLM
|
| 64 |
+
nlayers: 13 # number of layers of SLM
|
| 65 |
+
initial_channel: 64 # initial channels of SLM discriminator head
|
| 66 |
+
|
| 67 |
+
# style diffusion model config
|
| 68 |
+
diffusion:
|
| 69 |
+
embedding_mask_proba: 0.1
|
| 70 |
+
# transformer config
|
| 71 |
+
transformer:
|
| 72 |
+
num_layers: 3
|
| 73 |
+
num_heads: 8
|
| 74 |
+
head_features: 64
|
| 75 |
+
multiplier: 2
|
| 76 |
+
|
| 77 |
+
# diffusion distribution config
|
| 78 |
+
dist:
|
| 79 |
+
sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
|
| 80 |
+
estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
|
| 81 |
+
mean: -3.0
|
| 82 |
+
std: 1.0
|
| 83 |
+
|
| 84 |
+
loss_params:
|
| 85 |
+
lambda_mel: 5. # mel reconstruction loss
|
| 86 |
+
lambda_gen: 1. # generator loss
|
| 87 |
+
lambda_slm: 1. # slm feature matching loss
|
| 88 |
+
|
| 89 |
+
lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
|
| 90 |
+
lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
|
| 91 |
+
TMA_epoch: 50 # TMA starting epoch (1st stage)
|
| 92 |
+
|
| 93 |
+
lambda_F0: 1. # F0 reconstruction loss (2nd stage)
|
| 94 |
+
lambda_norm: 1. # norm reconstruction loss (2nd stage)
|
| 95 |
+
lambda_dur: 1. # duration loss (2nd stage)
|
| 96 |
+
lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
|
| 97 |
+
lambda_sty: 1. # style reconstruction loss (2nd stage)
|
| 98 |
+
lambda_diff: 1. # score matching loss (2nd stage)
|
| 99 |
+
|
| 100 |
+
diff_epoch: 20 # style diffusion starting epoch (2nd stage)
|
| 101 |
+
joint_epoch: 50 # joint training starting epoch (2nd stage)
|
| 102 |
+
|
| 103 |
+
optimizer_params:
|
| 104 |
+
lr: 0.0001 # general learning rate
|
| 105 |
+
bert_lr: 0.00001 # learning rate for PLBERT
|
| 106 |
+
ft_lr: 0.00001 # learning rate for acoustic modules
|
| 107 |
+
|
| 108 |
+
slmadv_params:
|
| 109 |
+
min_len: 400 # minimum length of samples
|
| 110 |
+
max_len: 500 # maximum length of samples
|
| 111 |
+
batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
|
| 112 |
+
iter: 10 # update the discriminator every this iterations of generator update
|
| 113 |
+
thresh: 5 # gradient norm above which the gradient is scaled
|
| 114 |
+
scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
|
| 115 |
+
sig: 1.5 # sigma for differentiable duration modeling
|
| 116 |
+
|
stylekan/Configs/config_ft.yml
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_dir: "Models/IMAS_FineTuned"
|
| 2 |
+
save_freq: 1
|
| 3 |
+
log_interval: 10
|
| 4 |
+
device: "cuda"
|
| 5 |
+
epochs: 50 # number of finetuning epoch (1 hour of data)
|
| 6 |
+
batch_size: 3
|
| 7 |
+
max_len: 2500 # maximum number of frames
|
| 8 |
+
pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth"
|
| 9 |
+
second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
|
| 10 |
+
load_only_params: true # set to true if do not want to load epoch numbers and optimizer parameters
|
| 11 |
+
|
| 12 |
+
# F0_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/JDC/bst_rmvpe_48k.t7"
|
| 13 |
+
# ASR_config: "Utils/ASR/config.yml"
|
| 14 |
+
# ASR_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/ASR/epoch_00050_48K.pth"
|
| 15 |
+
|
| 16 |
+
# CUDA_VISIBLE_DEVICES=0,1,2 accelerate launch multi_gpu_train_finetune_accelerate.py --config_path ./Configs/config_ft.yml
|
| 17 |
+
# CUDA_VISIBLE_DEVICES=5 accelerate launch train_finetune_accelerate.py --config_path ./Configs/config_ft.yml
|
| 18 |
+
|
| 19 |
+
F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
|
| 20 |
+
ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
|
| 21 |
+
ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
|
| 22 |
+
|
| 23 |
+
PLBERT_dir: 'Utils/PLBERT/'
|
| 24 |
+
|
| 25 |
+
data_params:
|
| 26 |
+
train_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/FT_imas.csv"
|
| 27 |
+
val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/FT_imas_valid.csv"
|
| 28 |
+
root_path: ""
|
| 29 |
+
OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
|
| 30 |
+
min_length: 50 # sample until texts with this size are obtained for OOD texts
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
preprocess_params:
|
| 34 |
+
sr: 24000
|
| 35 |
+
spect_params:
|
| 36 |
+
n_fft: 2048
|
| 37 |
+
win_length: 1200
|
| 38 |
+
hop_length: 300
|
| 39 |
+
|
| 40 |
+
model_params:
|
| 41 |
+
multispeaker: true
|
| 42 |
+
|
| 43 |
+
dim_in: 64
|
| 44 |
+
hidden_dim: 512
|
| 45 |
+
max_conv_dim: 512
|
| 46 |
+
n_layer: 3
|
| 47 |
+
n_mels: 80
|
| 48 |
+
|
| 49 |
+
n_token: 178 # number of phoneme tokens
|
| 50 |
+
max_dur: 50 # maximum duration of a single phoneme
|
| 51 |
+
style_dim: 128 # style vector size
|
| 52 |
+
|
| 53 |
+
dropout: 0.2
|
| 54 |
+
|
| 55 |
+
decoder:
|
| 56 |
+
type: 'istftnet' # either hifigan or istftnet
|
| 57 |
+
resblock_kernel_sizes: [3,7,11]
|
| 58 |
+
upsample_rates : [10, 6]
|
| 59 |
+
upsample_initial_channel: 512
|
| 60 |
+
resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
|
| 61 |
+
upsample_kernel_sizes: [20, 12]
|
| 62 |
+
gen_istft_n_fft: 20
|
| 63 |
+
gen_istft_hop_size: 5
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# speech language model config
|
| 68 |
+
slm:
|
| 69 |
+
model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
|
| 70 |
+
sr: 16000 # sampling rate of SLM
|
| 71 |
+
hidden: 1280 # hidden size of SLM
|
| 72 |
+
nlayers: 33 # number of layers of SLM
|
| 73 |
+
initial_channel: 64 # initial channels of SLM discriminator head
|
| 74 |
+
|
| 75 |
+
# style diffusion model config
|
| 76 |
+
diffusion:
|
| 77 |
+
embedding_mask_proba: 0.1
|
| 78 |
+
# transformer config
|
| 79 |
+
transformer:
|
| 80 |
+
num_layers: 3
|
| 81 |
+
num_heads: 8
|
| 82 |
+
head_features: 64
|
| 83 |
+
multiplier: 2
|
| 84 |
+
|
| 85 |
+
# diffusion distribution config
|
| 86 |
+
dist:
|
| 87 |
+
sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
|
| 88 |
+
estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
|
| 89 |
+
mean: -3.0
|
| 90 |
+
std: 1.0
|
| 91 |
+
|
| 92 |
+
loss_params:
|
| 93 |
+
lambda_mel: 10. # mel reconstruction loss
|
| 94 |
+
lambda_gen: 1. # generator loss
|
| 95 |
+
lambda_slm: 1. # slm feature matching loss
|
| 96 |
+
|
| 97 |
+
lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
|
| 98 |
+
lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
|
| 99 |
+
TMA_epoch: 9 # TMA starting epoch (1st stage)
|
| 100 |
+
|
| 101 |
+
lambda_F0: 1. # F0 reconstruction loss (2nd stage)
|
| 102 |
+
lambda_norm: 1. # norm reconstruction loss (2nd stage)
|
| 103 |
+
lambda_dur: 1. # duration loss (2nd stage)
|
| 104 |
+
lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
|
| 105 |
+
lambda_sty: 1. # style reconstruction loss (2nd stage)
|
| 106 |
+
lambda_diff: 1. # score matching loss (2nd stage)
|
| 107 |
+
|
| 108 |
+
diff_epoch: 0 # style diffusion starting epoch (2nd stage)
|
| 109 |
+
joint_epoch: 30 # joint training starting epoch (2nd stage)
|
| 110 |
+
|
| 111 |
+
optimizer_params:
|
| 112 |
+
lr: 0.0001 # general learning rate
|
| 113 |
+
bert_lr: 0.00001 # learning rate for PLBERT
|
| 114 |
+
ft_lr: 0.00001 # learning rate for acoustic modules
|
| 115 |
+
|
| 116 |
+
slmadv_params:
|
| 117 |
+
min_len: 400 # minimum length of samples
|
| 118 |
+
max_len: 500 # maximum length of samples
|
| 119 |
+
batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
|
| 120 |
+
iter: 20 # update the discriminator every this iterations of generator update
|
| 121 |
+
thresh: 5 # gradient norm above which the gradient is scaled
|
| 122 |
+
scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
|
| 123 |
+
sig: 1.5 # sigma for differentiable duration modeling
|
stylekan/Configs/config_ft_kaede.yml
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_dir: "Models/Kaede_FineTuned"
|
| 2 |
+
save_freq: 1
|
| 3 |
+
log_interval: 10
|
| 4 |
+
device: "cuda"
|
| 5 |
+
epochs: 50 # number of finetuning epoch (1 hour of data)
|
| 6 |
+
batch_size: 4
|
| 7 |
+
max_len: 436.90666666666666666666666666667 # maximum number of frames
|
| 8 |
+
pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Kaede_FineTuned/epoch_2nd_00031.pth"
|
| 9 |
+
second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
|
| 10 |
+
load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
|
| 11 |
+
|
| 12 |
+
# F0_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/JDC/bst_rmvpe_48k.t7"
|
| 13 |
+
# ASR_config: "Utils/ASR/config.yml"
|
| 14 |
+
# ASR_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/ASR/epoch_00050_48K.pth"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# CUDA_VISIBLE_DEVICES=5,6,7 accelerate launch --main_process_port 25001 --num_machines -1 train.py
|
| 18 |
+
F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
|
| 19 |
+
ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
|
| 20 |
+
ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
|
| 21 |
+
|
| 22 |
+
PLBERT_dir: 'Utils/PLBERT/'
|
| 23 |
+
|
| 24 |
+
data_params:
|
| 25 |
+
train_data: "/home/austin/disk1/stts-zs_cleaning/data/train_List_updated_plus_48khz.csv"
|
| 26 |
+
val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/FT_saori_valid.csv"
|
| 27 |
+
root_path: ""
|
| 28 |
+
OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
|
| 29 |
+
min_length: 50 # sample until texts with this size are obtained for OOD texts
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
preprocess_params:
|
| 33 |
+
sr: 24000
|
| 34 |
+
spect_params:
|
| 35 |
+
n_fft: 2048
|
| 36 |
+
win_length: 1200
|
| 37 |
+
hop_length: 300
|
| 38 |
+
|
| 39 |
+
model_params:
|
| 40 |
+
multispeaker: true
|
| 41 |
+
|
| 42 |
+
dim_in: 64
|
| 43 |
+
hidden_dim: 512
|
| 44 |
+
max_conv_dim: 512
|
| 45 |
+
n_layer: 3
|
| 46 |
+
n_mels: 80
|
| 47 |
+
|
| 48 |
+
n_token: 178 # number of phoneme tokens
|
| 49 |
+
max_dur: 50 # maximum duration of a single phoneme
|
| 50 |
+
style_dim: 128 # style vector size
|
| 51 |
+
|
| 52 |
+
dropout: 0.2
|
| 53 |
+
|
| 54 |
+
decoder:
|
| 55 |
+
type: 'istftnet' # either hifigan or istftnet
|
| 56 |
+
resblock_kernel_sizes: [3,7,11]
|
| 57 |
+
upsample_rates : [10, 6]
|
| 58 |
+
upsample_initial_channel: 512
|
| 59 |
+
resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
|
| 60 |
+
upsample_kernel_sizes: [20, 12]
|
| 61 |
+
gen_istft_n_fft: 20
|
| 62 |
+
gen_istft_hop_size: 5
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# speech language model config
|
| 67 |
+
slm:
|
| 68 |
+
model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
|
| 69 |
+
sr: 16000 # sampling rate of SLM
|
| 70 |
+
hidden: 1280 # hidden size of SLM
|
| 71 |
+
nlayers: 33 # number of layers of SLM
|
| 72 |
+
initial_channel: 64 # initial channels of SLM discriminator head
|
| 73 |
+
|
| 74 |
+
# style diffusion model config
|
| 75 |
+
diffusion:
|
| 76 |
+
embedding_mask_proba: 0.1
|
| 77 |
+
# transformer config
|
| 78 |
+
transformer:
|
| 79 |
+
num_layers: 3
|
| 80 |
+
num_heads: 8
|
| 81 |
+
head_features: 64
|
| 82 |
+
multiplier: 2
|
| 83 |
+
|
| 84 |
+
# diffusion distribution config
|
| 85 |
+
dist:
|
| 86 |
+
sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
|
| 87 |
+
estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
|
| 88 |
+
mean: -3.0
|
| 89 |
+
std: 1.0
|
| 90 |
+
|
| 91 |
+
loss_params:
|
| 92 |
+
lambda_mel: 10. # mel reconstruction loss
|
| 93 |
+
lambda_gen: 1. # generator loss
|
| 94 |
+
lambda_slm: 1. # slm feature matching loss
|
| 95 |
+
|
| 96 |
+
lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
|
| 97 |
+
lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
|
| 98 |
+
TMA_epoch: 9 # TMA starting epoch (1st stage)
|
| 99 |
+
|
| 100 |
+
lambda_F0: 1. # F0 reconstruction loss (2nd stage)
|
| 101 |
+
lambda_norm: 1. # norm reconstruction loss (2nd stage)
|
| 102 |
+
lambda_dur: 1. # duration loss (2nd stage)
|
| 103 |
+
lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
|
| 104 |
+
lambda_sty: 1. # style reconstruction loss (2nd stage)
|
| 105 |
+
lambda_diff: 1. # score matching loss (2nd stage)
|
| 106 |
+
|
| 107 |
+
diff_epoch: 0 # style diffusion starting epoch (2nd stage)
|
| 108 |
+
joint_epoch: 30 # joint training starting epoch (2nd stage)
|
| 109 |
+
|
| 110 |
+
optimizer_params:
|
| 111 |
+
lr: 0.0001 # general learning rate
|
| 112 |
+
bert_lr: 0.00001 # learning rate for PLBERT
|
| 113 |
+
ft_lr: 0.00001 # learning rate for acoustic modules
|
| 114 |
+
|
| 115 |
+
slmadv_params:
|
| 116 |
+
min_len: 400 # minimum length of samples
|
| 117 |
+
max_len: 500 # maximum length of samples
|
| 118 |
+
batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
|
| 119 |
+
iter: 20 # update the discriminator every this iterations of generator update
|
| 120 |
+
thresh: 5 # gradient norm above which the gradient is scaled
|
| 121 |
+
scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
|
| 122 |
+
sig: 1.5 # sigma for differentiable duration modeling
|
stylekan/Configs/config_kanade.yml
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_dir: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade_v02"
|
| 2 |
+
first_stage_path: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade_v02/epoch_1st_00026.pth"
|
| 3 |
+
save_freq: 1
|
| 4 |
+
log_interval: 10
|
| 5 |
+
device: "cuda"
|
| 6 |
+
epochs_1st: 30 # number of epochs for first stage training (pre-training)
|
| 7 |
+
epochs_2nd: 20 # number of peochs for second stage training (joint training)
|
| 8 |
+
batch_size: 24
|
| 9 |
+
max_len: 4000 # maximum number of frames
|
| 10 |
+
pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade_v02/epoch_2nd_00004.pth"
|
| 11 |
+
second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
|
| 12 |
+
load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
|
| 13 |
+
|
| 14 |
+
# CUDA_VISIBLE_DEVICES=1,2,3 accelerate launch train_first.py --config_path ./Configs/config_kanade.yml
|
| 15 |
+
# CUDA_VISIBLE_DEVICES=6,7 accelerate launch accelerate_train_second.py --config_path ./Configs/config_kanade_test.yml
|
| 16 |
+
|
| 17 |
+
F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
|
| 18 |
+
ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
|
| 19 |
+
ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
|
| 20 |
+
|
| 21 |
+
PLBERT_dir: 'Utils/PLBERT/'
|
| 22 |
+
|
| 23 |
+
data_params:
|
| 24 |
+
train_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv"
|
| 25 |
+
val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/mg_valid.txt"
|
| 26 |
+
root_path: ""
|
| 27 |
+
OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
|
| 28 |
+
min_length: 50 # sample until texts with this size are obtained for OOD texts
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
preprocess_params:
|
| 32 |
+
sr: 24000
|
| 33 |
+
spect_params:
|
| 34 |
+
n_fft: 2048
|
| 35 |
+
win_length: 1200
|
| 36 |
+
hop_length: 300
|
| 37 |
+
|
| 38 |
+
model_params:
|
| 39 |
+
multispeaker: true
|
| 40 |
+
|
| 41 |
+
dim_in: 64
|
| 42 |
+
hidden_dim: 512
|
| 43 |
+
max_conv_dim: 512
|
| 44 |
+
n_layer: 3
|
| 45 |
+
n_mels: 80
|
| 46 |
+
|
| 47 |
+
n_token: 178 # number of phoneme tokens
|
| 48 |
+
max_dur: 50 # maximum duration of a single phoneme
|
| 49 |
+
style_dim: 128 # style vector size
|
| 50 |
+
|
| 51 |
+
dropout: 0.2
|
| 52 |
+
|
| 53 |
+
decoder:
|
| 54 |
+
type: 'istftnet' # either hifigan or istftnet
|
| 55 |
+
resblock_kernel_sizes: [3,7,11]
|
| 56 |
+
upsample_rates : [10, 6]
|
| 57 |
+
upsample_initial_channel: 512
|
| 58 |
+
resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
|
| 59 |
+
upsample_kernel_sizes: [20, 12]
|
| 60 |
+
gen_istft_n_fft: 20
|
| 61 |
+
gen_istft_hop_size: 5
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# speech language model config
|
| 66 |
+
slm:
|
| 67 |
+
model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
|
| 68 |
+
sr: 16000 # sampling rate of SLM
|
| 69 |
+
hidden: 1280 # hidden size of SLM
|
| 70 |
+
nlayers: 33 # number of layers of SLM
|
| 71 |
+
initial_channel: 64 # initial channels of SLM discriminator head
|
| 72 |
+
|
| 73 |
+
# style diffusion model config
|
| 74 |
+
diffusion:
|
| 75 |
+
embedding_mask_proba: 0.1
|
| 76 |
+
# transformer config
|
| 77 |
+
transformer:
|
| 78 |
+
num_layers: 3
|
| 79 |
+
num_heads: 8
|
| 80 |
+
head_features: 64
|
| 81 |
+
multiplier: 2
|
| 82 |
+
|
| 83 |
+
# diffusion distribution config
|
| 84 |
+
dist:
|
| 85 |
+
sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
|
| 86 |
+
estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
|
| 87 |
+
mean: -3.0
|
| 88 |
+
std: 1.0
|
| 89 |
+
|
| 90 |
+
loss_params:
|
| 91 |
+
lambda_mel: 10. # mel reconstruction loss
|
| 92 |
+
lambda_gen: 1. # generator loss
|
| 93 |
+
lambda_slm: 1. # slm feature matching loss
|
| 94 |
+
|
| 95 |
+
lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
|
| 96 |
+
lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
|
| 97 |
+
TMA_epoch: 5 # TMA starting epoch (1st stage)
|
| 98 |
+
|
| 99 |
+
lambda_F0: 1. # F0 reconstruction loss (2nd stage)
|
| 100 |
+
lambda_norm: 1. # norm reconstruction loss (2nd stage)
|
| 101 |
+
lambda_dur: 1. # duration loss (2nd stage)
|
| 102 |
+
lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
|
| 103 |
+
lambda_sty: 1. # style reconstruction loss (2nd stage)
|
| 104 |
+
lambda_diff: 1. # score matching loss (2nd stage)
|
| 105 |
+
|
| 106 |
+
diff_epoch: 4 # style diffusion starting epoch (2nd stage)
|
| 107 |
+
joint_epoch: 999 # joint training starting epoch (2nd stage)
|
| 108 |
+
|
| 109 |
+
optimizer_params:
|
| 110 |
+
lr: 0.0001 # general learning rate
|
| 111 |
+
bert_lr: 0.00001 # learning rate for PLBERT
|
| 112 |
+
ft_lr: 0.00001 # learning rate for acoustic modules
|
| 113 |
+
|
| 114 |
+
slmadv_params:
|
| 115 |
+
min_len: 400 # minimum length of samples
|
| 116 |
+
max_len: 500 # maximum length of samples
|
| 117 |
+
batch_percentage: 0.5 # to prevent out of memory, only use half of the original batch size
|
| 118 |
+
iter: 20 # update the discriminator every this iterations of generator update
|
| 119 |
+
thresh: 5 # gradient norm above which the gradient is scaled
|
| 120 |
+
scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
|
| 121 |
+
sig: 1.5 # sigma for differentiable duration modeling
|
stylekan/Configs/config_kanade_test.yml
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_dir: "Models/Style_Kanade_test"
|
| 2 |
+
first_stage_path: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/epoch_1st_00013.pth"
|
| 3 |
+
save_freq: 1
|
| 4 |
+
log_interval: 10
|
| 5 |
+
device: "cuda"
|
| 6 |
+
epochs_1st: 25 # number of epochs for first stage training (pre-training)
|
| 7 |
+
epochs_2nd: 15 # number of peochs for second stage training (joint training)
|
| 8 |
+
batch_size: 6
|
| 9 |
+
max_len: 560 # maximum number of frames
|
| 10 |
+
pretrained_model: "/home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/epoch_2nd_00003.pth"
|
| 11 |
+
second_stage_load_pretrained: true # set to true if the pre-trained model is for 2nd stage
|
| 12 |
+
load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
|
| 13 |
+
|
| 14 |
+
# F0_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/JDC/bst_rmvpe_48k.t7"
|
| 15 |
+
# ASR_config: "Utils/ASR/config.yml"
|
| 16 |
+
# ASR_path: "/home/ubuntu/STTS_48khz/StyleTTS2-48khz/Utils/ASR/epoch_00050_48K.pth"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
F0_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7"
|
| 21 |
+
ASR_config: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml"
|
| 22 |
+
ASR_path: "/home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth"
|
| 23 |
+
|
| 24 |
+
PLBERT_dir: 'Utils/PLBERT/'
|
| 25 |
+
|
| 26 |
+
data_params:
|
| 27 |
+
train_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/filtered_train_list.csv"
|
| 28 |
+
val_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/mg_valid.txt"
|
| 29 |
+
root_path: ""
|
| 30 |
+
OOD_data: "/home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv"
|
| 31 |
+
min_length: 50 # sample until texts with this size are obtained for OOD texts
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
preprocess_params:
|
| 35 |
+
sr: 24000
|
| 36 |
+
spect_params:
|
| 37 |
+
n_fft: 2048
|
| 38 |
+
win_length: 1200
|
| 39 |
+
hop_length: 300
|
| 40 |
+
|
| 41 |
+
model_params:
|
| 42 |
+
multispeaker: true
|
| 43 |
+
|
| 44 |
+
dim_in: 64
|
| 45 |
+
hidden_dim: 512
|
| 46 |
+
max_conv_dim: 512
|
| 47 |
+
n_layer: 3
|
| 48 |
+
n_mels: 80
|
| 49 |
+
|
| 50 |
+
n_token: 178 # number of phoneme tokens
|
| 51 |
+
max_dur: 50 # maximum duration of a single phoneme
|
| 52 |
+
style_dim: 128 # style vector size
|
| 53 |
+
|
| 54 |
+
dropout: 0.2
|
| 55 |
+
|
| 56 |
+
decoder:
|
| 57 |
+
type: 'istftnet' # either hifigan or istftnet
|
| 58 |
+
resblock_kernel_sizes: [3,7,11]
|
| 59 |
+
upsample_rates : [10, 6]
|
| 60 |
+
upsample_initial_channel: 512
|
| 61 |
+
resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
|
| 62 |
+
upsample_kernel_sizes: [20, 12]
|
| 63 |
+
gen_istft_n_fft: 20
|
| 64 |
+
gen_istft_hop_size: 5
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# speech language model config
|
| 69 |
+
slm:
|
| 70 |
+
model: 'Respair/Whisper_Large_v2_Encoder_Block' # The model itself is hardcoded, change it through -> losses.py
|
| 71 |
+
sr: 16000 # sampling rate of SLM
|
| 72 |
+
hidden: 1280 # hidden size of SLM
|
| 73 |
+
nlayers: 33 # number of layers of SLM
|
| 74 |
+
initial_channel: 64 # initial channels of SLM discriminator head
|
| 75 |
+
|
| 76 |
+
# style diffusion model config
|
| 77 |
+
diffusion:
|
| 78 |
+
embedding_mask_proba: 0.1
|
| 79 |
+
# transformer config
|
| 80 |
+
transformer:
|
| 81 |
+
num_layers: 3
|
| 82 |
+
num_heads: 8
|
| 83 |
+
head_features: 64
|
| 84 |
+
multiplier: 2
|
| 85 |
+
|
| 86 |
+
# diffusion distribution config
|
| 87 |
+
dist:
|
| 88 |
+
sigma_data: 0.2 # placeholder for estimate_sigma_data set to false
|
| 89 |
+
estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
|
| 90 |
+
mean: -3.0
|
| 91 |
+
std: 1.0
|
| 92 |
+
|
| 93 |
+
loss_params:
|
| 94 |
+
lambda_mel: 10. # mel reconstruction loss
|
| 95 |
+
lambda_gen: 1. # generator loss
|
| 96 |
+
lambda_slm: 1. # slm feature matching loss
|
| 97 |
+
|
| 98 |
+
lambda_mono: 1. # monotonic alignment loss (1st stage, TMA)
|
| 99 |
+
lambda_s2s: 1. # sequence-to-sequence loss (1st stage, TMA)
|
| 100 |
+
TMA_epoch: 9 # TMA starting epoch (1st stage)
|
| 101 |
+
|
| 102 |
+
lambda_F0: 1. # F0 reconstruction loss (2nd stage)
|
| 103 |
+
lambda_norm: 1. # norm reconstruction loss (2nd stage)
|
| 104 |
+
lambda_dur: 1. # duration loss (2nd stage)
|
| 105 |
+
lambda_ce: 20. # duration predictor probability output CE loss (2nd stage)
|
| 106 |
+
lambda_sty: 1. # style reconstruction loss (2nd stage)
|
| 107 |
+
lambda_diff: 1. # score matching loss (2nd stage)
|
| 108 |
+
|
| 109 |
+
diff_epoch: 2 # style diffusion starting epoch (2nd stage)
|
| 110 |
+
joint_epoch: 3 # joint training starting epoch (2nd stage)
|
| 111 |
+
|
| 112 |
+
optimizer_params:
|
| 113 |
+
lr: 0.0001 # general learning rate
|
| 114 |
+
bert_lr: 0.00001 # learning rate for PLBERT
|
| 115 |
+
ft_lr: 0.00001 # learning rate for acoustic modules
|
| 116 |
+
|
| 117 |
+
slmadv_params:
|
| 118 |
+
min_len: 400 # minimum length of samples
|
| 119 |
+
max_len: 500 # maximum length of samples
|
| 120 |
+
batch_percentage: 1. # to prevent out of memory, only use half of the original batch size
|
| 121 |
+
iter: 1 # update the discriminator every this iterations of generator update
|
| 122 |
+
thresh: 5 # gradient norm above which the gradient is scaled
|
| 123 |
+
scale: 0.01 # gradient scaling factor for predictors from SLM discriminators
|
| 124 |
+
sig: 1.5 # sigma for differentiable duration modeling
|
stylekan/Data/OOD_LargeScale_.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9742a1f68c932e3a2812876e88e2d9328e747fa751c7167c0c9237d57fb36a7a
|
| 3 |
+
size 14627728
|
stylekan/Data/filtered_train_list.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8552b349addd76788632f458d89ec23515340fc71c0788562c1f92632037f8b
|
| 3 |
+
size 92255378
|
stylekan/Data/metadata_cleanest/48khz_config_with_names_ids.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:935ed8623d9cc6ac3015898d34599c8060c2b1b6f7a75b34c9e8713bda8cebbc
|
| 3 |
+
size 41507323
|
stylekan/Data/metadata_cleanest/FT_imas copy.csv
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Work_Comyu/Work_Comyu_chunk114.wav|soɽeʔte tsɯmaɽiː.|486
|
| 2 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200716/arisu_voice_200716_6_04.wav|kono iʔɕɯɴ o, omoide ni!|495
|
| 3 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/Hagoromo/Hagoromo_chunk87.wav|soɯsoɯ, tamani wa oanesaɴ no iɯ koto mo ɕindʑite minaʔte. soɽe de sae haɴ wa medetakɯ, gorʲoɯɕiɴ to wakai ɕite, sono ato ni wa gaiseɴ ɽaibɯ mo tɕanto jaʔtaɴ da jo ne.|486
|
| 4 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/52-MaxBeat.wav|go baɴ ɕoɯbɯ, desɯkaɽa....ima no wa tamatama ka mo ɕiɽemaseɴɕi.|563
|
| 5 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/4_arisu__0004_(Vocals)/4_arisu__0004_(Vocals)_chunk12.wav|taɕika ni, sɯkoɕi haɕaide ɕimaimaɕitaga...|495
|
| 6 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/lazylazy/lazylazy_chunk26.wav|ne.|482
|
| 7 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk230_DeepFilterNet3.wav|joːɕi o, oː— koɽe wa koɽe wa osoɽoɕiː ɽitorɯ monsɯtaː da okaɕi o agerɯkaɽa doɯ ka jɯrɯɕite okɯɽe|563
|
| 8 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0017/usamin_mobamasu_0017_chunk67.wav|kisekae nanatɕaɴ desɯ joː! tsɯgi hadʲ mo goaikʲoɯ desɯ...ija ne ɕimpiɴ pikapika no doːrɯ kaː...toɯdʑi wa saisentaɴ daʔtaɴ desɯ joː? de mo boːmɯ ga saʔte...ɯɯ...|481
|
| 9 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video_chunk35.wav|ne tsɯ i ɕiseɴ, kandʑirɯ naː. pɯɽodʲɯɯsaːsaɴ no hoɯ kaɽa.|486
|
| 10 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk66.wav|anzɯtɕaɴ wa iʔɕɯɴ de aidorɯ ni naɽerɯɕi.|483
|
| 11 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0017/shiburin_mobamasu_0017_chunk54.wav|jɯrɯgi nai kakɯgo o idaite ɕɯɯ o fɯrɯɴ da.|494
|
| 12 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk216.wav|pɯɽodʲɯɯsaːsaɴ.|488
|
| 13 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301129/jou_mika_voice_301129_2_02.wav|wataɕi wa odoɽokasaɽerɯ joɽi, odoɽoka ɕitai hadaɕi! itazɯɽa to onadʑi da moː—!|485
|
| 14 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/Hagoromo/Hagoromo_chunk118.wav|kɯgiɽi, ka.|486
|
| 15 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/Hagoromo/Hagoromo_chunk98.wav|ɯɯɴ, saihaɴ wa goɯiɴ naɴ dakaɽa.|486
|
| 16 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200924/shiburin_voice_200924_2_06.wav|wakaɽe wa daidʑi daʔte kizɯitakaɽa. tɕanto iɯ jo, wataɕi kaɽa. sajonaɽa o.|494
|
| 17 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200924/shiburin_voice_200924_6_03.wav|ano giŋga no mɯkoɯ e.|494
|
| 18 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kanade_CGSS_Episodes/Kanade_CGSS_Episodes_chunk341.wav|tomodatɕi to hetoheto ni narɯ made asobɯ nante.|482
|
| 19 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_mobamas/shiki_mobamasu_0002/shiki_mobamasu_0002_chunk8_chunks/chunk_10.wav|akɯɕɯ kai niːkoɯ!|480
|
| 20 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/11_arisu__0012_(Vocals)/11_arisu__0012_(Vocals)_chunk44.wav|aɽisɯʔte, niʔponniɴʔpokɯ nai namae de ija deɕita.|495
|
| 21 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200086/syuukovoice_200086_2_02.wav|o matɕtɕa tatete agejoɯ ka? a, koːçiː?|486
|
| 22 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_201370/arisu_voice_201370_6_05.wav|kawaikɯ, amazɯʔpakɯ!|495
|
| 23 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_200183/karen_cgss_voice_200183_2_11.wav|pɯɽodʲɯɯsaːsaɴ wa...oseʔkai na no wa...iː tokoɽo...ka na?|488
|
| 24 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/14_arisu__0015_(Vocals)/14_arisu__0015_(Vocals)_chunk171.wav|doɯzo.|495
|
| 25 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200368/mifune_voice_200368_6_06.wav|sasajaka de mo...tanoɕinde itadaketaɽanaɽa...saiwai desɯ.|490
|
| 26 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200377/kanade_voice_200377_1_07.wav|tataite mo...nigenai wa ne, kɯɽage...ɕigeki mo, naɽerɯ to akirɯ no ka na?|482
|
| 27 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_100078/anzu_voice_100078_2_06.wav|pɯɽodʲɯɯsaː, kʲoɯ kɯɽerɯ ame wa nani adʑi?|498
|
| 28 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/NBK/NBK_chunk1.wav|katɕimakeʔte anata ne.|482
|
| 29 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_cgss/mayu_cgss_card_100270/mayu_cgss_voice_100270_1_08.wav|ɯde no toge naɽa, toɽemaseɴ jo. moɯ, makitsɯite ɕimaʔta mono.|484
|
| 30 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_201355/fumika_voice_201355_2_07.wav|ɽei wa oːkiː desɯ ne. fɯtaɽi de iʔte mo, jojɯɯ soɯ de...taɕikamete mite mo?|493
|
| 31 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0001/mayu_mobamasu_0001_chunk8.wav|pɯɽodʲɯɯsaːsaɴ to iʔɕo naɽa...|484
|
| 32 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/GirlsInTheFrontier/GirlsInTheFrontier_chunk38.wav|kʲoɯ mo aɕita mo asaʔte mo, ataɽaɕiː oɕigoto ga aʔte, soɽe naɽi no tɕoɯseɴ ga aʔte, de mo tanoɕikɯte, akarɯi mainitɕi ga zɯʔto tsɯzɯkɯɴ daʔte.|486
|
| 33 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/49-Neverends.wav|oçitoɽiːtɕi fɯɴ desɯkaɽa ne. itɕi bʲoɯ itɕi bʲoɯ o taisetsɯ ni ɕinai to....|563
|
| 34 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[ブルーサマーヘブン]塩見周子【ボイス集】 - Niconico Video/【モバマス】[ブルーサマーヘブン]塩見周子【ボイス集】 - Niconico Video_chunk44.wav|tsɯide ni pɯɽodʲɯɯsaːsaɴ kaɽa mo geʔto ɕitɕaoʔka naː.|486
|
| 35 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100430/miku_voice_100430_3_01.wav|hanamitɕi de wa ne, mae mo oɕiɽo mo sajɯɯ kaɽa mo kansei ga kikoete kite, kaɽadatɕɯɯ ni faɴ no koe ga tonde kɯrɯ no! soɽe ga ne, minna ni oɯeɴ saɽeterɯʔte, kandʑirɯɴ da!|487
|
| 36 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_cgss/usamin_card_100971/usamin_voice_100971_6_02.wav|minasaɴ! natsɯkaɕiː ano koɽo ni, taimɯ sɯɽiːʔpɯ!|481
|
| 37 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200762/kanade_voice_200762_6_06.wav|bɯɕitsɯke na tsɯki wa, kaerɯ dʑikaɴ ne. otsɯkaɽesama.|482
|
| 38 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0006/mayu_mobamasu_0006_chunk43.wav|çitoɕikɯte.|484
|
| 39 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0008/mayu_mobamasu_0008_chunk33.wav|moʔto, moʔtoʔte...|484
|
| 40 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0012/momoka_mobamasu_0012_chunk60.wav|seiːʔpai, mabajɯkɯ!|489
|
| 41 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300457/mio_honda_voice_300457_3_01.wav|ahaha! ɯgoitetaɽa dandaɴ aʔtomaʔte kita ne! pɯɽodʲɯɯsaː mo sonna kandʑi? joɽoɕiː! soɽe dʑa, kono mama gaŋgaɴ ictɕaoɯ!|492
|
| 42 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/PL5.wav|kanadetɕaɴ, ohanaɕi ga—ɽimasɯ...ɽeiʔte kaɽa mo mainitɕi, okoɕite kɯɽemaseɴ ka?|563
|
| 43 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk291.wav|koŋkai no oɕigoto ga dekite.|483
|
| 44 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200459/kanade_voice_200459_2_02.wav|onseɴ wa sɯki dakedo, ɕɯmi ni sɯrɯ no wa otona ni naʔte kaɽa ni toʔte okɯ wa.|482
|
| 45 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0009/mayu_mobamasu_0009_chunk178.wav|todʑikometɕaimaɕita.|484
|
| 46 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200988/arisu_voice_200988_2_11.wav|pɯɽodʲɯɯsansaɴ ga mamoʔte kɯɽeterɯ koto...wataɕitatɕi wa ɕiʔtemasɯ jo.|495
|
| 47 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1149.wav|dʑiʔto ɕitete kɯdasai ne.|563
|
| 48 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/55-NationBlue.wav|ɽiː natɕaɴ, komaɽasetɕaʔtegomeɴ nasai.|563
|
| 49 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_301123/nagi_voice_301123_2_01.wav|sns wa, daini no matɕi desɯ ne. sɯgoɕikata o matɕigaerɯto, taiheɴ na koto ni narɯ.|499
|
| 50 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0018/usamin_mobamasu_0018_chunk73.wav|pɯɽodʲɯɯsaːsaɴ mo minna mo! tɕiawase na itɕi neɴ ni naɽimasɯ joɯ ni! pɯɽodʲɯɯsaːsaɴ.|481
|
stylekan/Data/metadata_cleanest/FT_imas.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
stylekan/Data/metadata_cleanest/FT_imas_remapped.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88548d781c2b51530f2f6729b00c35ba45d9af3085ff3cc8e72695539e5d9b0b
|
| 3 |
+
size 10502242
|
stylekan/Data/metadata_cleanest/FT_imas_valid.csv
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100519/chieri_voice_100519_2_09.wav|hai, dasasete kɯdasai! baɽaeti baŋgɯmi he no ɕɯtsɯei, gambaɽimasɯ!|5
|
| 2 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100685/fredrica_voice_100685_2_11.wav|fɯɽetɕaɴ wa kʲɯɯ goɯ dakaɽa, oboetoite neː—! iʔɕo ni eɽande mo iːkedo!|20
|
| 3 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk222.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
|
| 4 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0013/fumika__0013_chunk71.wav|ko ki joki, ɽomaɴ sɯ no joɯ desɯ ne.|13
|
| 5 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0008_cnk29.wav|kiʔto matɕigaʔteta koto mo iʔpai arɯ.|7
|
| 6 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0014/shiburin_mobamasu_0014_chunk5.wav|gaʔkoɯ?|6
|
| 7 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/31-MaxBeat.wav|ɯɯɴ, tokɯi de wa—ɽimaseɴ ne....çitoɽi de wa iɽenai ka mo ɕiɽemaseɴ.|2
|
| 8 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_100316/anzu_voice_100316_1_12.wav|maː, pɯɽodʲɯɯsaː wa, anzɯ no ɕitsɯjoɯ akɯʔte koto de...|11
|
| 9 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/twintail/twintail_chunk84.wav|aɽa?|3
|
| 10 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200002/shiburin_voice_200002_2_06.wav|pɯɽodʲɯɯsaːʔte...hana kazarɯ no to ka, doɯ omoɯɴ daɽoɯ...|6
|
| 11 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/40-CardComyu.wav|sonna—nata ni jakɯsokɯ ɕimasɯ. kiʔto, daɽe joɽi mo kiɽei ni narɯto. sɯɯpaː moderɯ joɽi mo, kanaɽazɯ. daʔte...hoɽa, oɯsama no meiɽei wa zeʔtai desɯkaɽa ne.|2
|
| 12 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video_chunk24.wav|konna hjoɯdʑoɯ...|0
|
| 13 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk177_chunk6.wav|mainitɕiːɽoiɽo na oɕigoto ga dekimasɯɕi|2
|
| 14 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/55-Neverends.wav|pɯɽodʲɯɯsaː ni kaɽentɕaɴ. ohajoɯ gozaimasɯ.|2
|
| 15 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100108/chieri_voice_100108_6_02.wav|kʲoɯ wa sɯʔtoɽi, gambaɽimasɯ ne! sɯkoɕi, otona o mezaɕite!|5
|
| 16 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_300665/nagi_voice_300665_2_03.wav|ɽafɯ na kikonaɕi de, dʑimɯɕo e no konaɽi kaɴ o eɴɕɯtsɯ...nani ka matɕigaeta ka.|17
|
| 17 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk242_DeepFilterNet3.wav|omoidasɯ dake de kibɯɴ ga warɯkɯ narɯ jo|2
|
| 18 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0015/usamin_mobamasu_0015_chunk24.wav|wataɕitatɕi wa kiʔto, otagai ga taisetsɯ naɴ desɯ ne.|4
|
| 19 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk179.wav|kono mɯne ni, ɕiʔkaɽi ɕimaɯ ne!|7
|
| 20 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_1_05.wav|jozoɽa no hoɕi mitai ni...ɯɯɴ, jozoɽa joɽi mo tsɯjokɯ çikarɯ, sono gɯɽai no kimotɕi.|8
|
| 21 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_bonus_cd/kanade_bonus_cd_chunk0.wav|anata ga, wataɕi no?|3
|
| 22 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/35-MaxBeat.wav|soɽosoɽo deɕoɯ ka. ikimaɕoɯ, asɯkatɕaɴ.|2
|
| 23 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/32-PrettyLiarFin.wav|soɯ ieba, kʲoɯ wa ɕɯzai ga kɯrɯ çi deɕita ne.|2
|
| 24 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/9_arisu__0009_(Vocals)/9_arisu__0009_(Vocals)_chunk41.wav|kawaiː kao ɕite imasɯɕi.|1
|
| 25 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/TK19.wav|pɯɽodʲɯɯsaː, ano...nomi tomodatɕi ni naɽimaseɴ ka|2
|
| 26 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kande5Comyus/Kande5Comyus_chunk168.wav|fɯdaɴ no wataɕi dʑa, sonna saːbisɯ ɕinai mono ne.|3
|
| 27 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101157/fredrica_voice_101157_6_07.wav|mata ɽaibɯ ni kitakɯ narɯ mahoɯ, kaketɕaʔta!|20
|
| 28 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_card_200074/ranko_voice_200074_1_05.wav|pɯɽodʲɯɯsaː no jɯme...kiʔto wataɕi ga, kono te de!|18
|
| 29 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0002_cnk58.wav|kʲoɯ asobiːkanai? dame? pɯɽodʲɯɯsaːsaɴ to...|7
|
| 30 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200067/arisu_voice_200067_2_02.wav|dokidoki ɕiterɯ no ka na...wataɕi...|1
|
| 31 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100396/miku_voice_100396_6_01.wav|sekɯɕiːkʲaʔto mikɯ no ɽaibɯ, hadʑimete ikɯnʲa!|14
|
| 32 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_6_02.wav|wataɕi no koto, moʔto sɯki ni naʔte iːkaɽa ne!|20
|
| 33 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/yukyuu_no_hoshisuzushi/yukyuu_no_hoshisuzushi_chunk24.wav|saː, doɯ ka naː—?|0
|
| 34 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0013/chie_mobamasu_0013_chunk36.wav|hei! kakemaɕita!|5
|
| 35 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk636.wav|tsɯgi no gotɕɯɯmoɴ wa doɯ nasaimasɯ?|2
|
| 36 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_4_03.wav|ammaɽi jɯʔkɯɽi kaŋgaeterɯto, seikai ga kawactɕaɯkaɽa gotɕɯɯi! ima mo seitɕoɯ ki! tsɯmaɽi, wataɕiɽaɕikɯ seitɕoɯtɕɯɯ, dakaɽa ne! me o hanaɕita sɯki ni, bidʑiɴ sɯgirɯ fɯɽetɕaɴ ni naʔtɕaɯ ka mo jo!|20
|
| 37 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk212.wav|wataɕi ni mahoɯ o kaketa pɯɽodʲɯɯsaːsaɴ ni, sɯteːdʑi kaɽa kaɴɕa o!|7
|
| 38 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0006/mayu_mobamasu_0006_chunk55.wav|zembɯ te ni hairɯɴ daʔte.|12
|
| 39 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_saint/kanade_saint_chunk39.wav|kɯɽisɯmasɯ keːki to, koɯtɕa ga iː kaɕiɽa.|3
|
| 40 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300112/jou_mika_voice_300112_1_08.wav|ataɕi doɯ? oiɕi soɯ deɕoɯ? toɯzeɴ, tabegoɽo daɕi?|8
|
| 41 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk208.wav|ano çito no omoi...|18
|
| 42 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0013/ranko_mobamasu_0013_chunk99.wav|ɴʔfɯfɯfɯ kaɴɕa ɕijoɯ.|18
|
| 43 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0009/ranko_mobamasu_0009_chunk12.wav|soɕite, saigo ni wa, hanajomesaɴ ga tiaɽa o nokosete saʔtɕaɯɴ desɯ!|18
|
| 44 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk189.wav|dakedo, anata ni deaiʔte, sonna çibi wa owaɽi o tsɯgemaɕita.|2
|
| 45 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0016/fumika__0016_chunk8.wav|pɯɽodʲɯɯsaːsaɴ e to tsɯtaemaɕoɯ.|13
|
| 46 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0011/mayu_mobamasu_0011_chunk47.wav|soɕite...|12
|
| 47 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0009/miku_mobamasu_0009_chunk32.wav|piː tɕaɴ, mikɯ no madʑime na tokoɽo o wakaʔta? wakaʔtadeɕo? naɽa kaeʔteː!|14
|
| 48 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk11.wav|semete dʑɯmbi dake o, minna to iʔɕo ni jaɽitai naʔte.|5
|
| 49 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0001/miku_mobamasu_0001_chunk11.wav|haː, oçisama no nioi ga sɯrɯ nonʲaː!nʲɯɯ, mikaketa toki kaɽa zɯʔto ki ni naʔteta no!|14
|
| 50 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk143.wav|pɯɯɽodʲɯɯsaːsaɴ!|5
|
| 51 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/MainComyu_Kanade/MainComyu_Kanade_chunk104.wav|konomi de wa naikedo.|3
|
| 52 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/05-workkomyu.wav|kotɕiɽa no bako wa...komonoiɽe? kiɽei na mojoɯ de, oheja ni oite okitakɯ naɽimasɯ ne.|2
|
| 53 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_dorama_cd/kanade_dorama_cd_chunk116.wav|maː...|3
|
| 54 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200650/shiburin_voice_200650_5_01.wav|tamani keŋka ɕi taɽi, jɯzɯɽanakaʔtaɽi, haɽi aʔtaɽi...soɽe mo warɯkɯ nai jo ne. asemamiɽe de mo, kaʔko warɯkɯte mo,toɽai adopɯɽimɯsɯ no sei saŋkaʔkei wa, jɯgamanai mama, mae ni sɯsɯnde ikerɯkaɽa.|6
|
| 55 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk105.wav|e.|5
|
| 56 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_200313/karen_cgss_voice_200313_1_02.wav|mizɯ deʔpoɯ dʑa naideɕoɯ soɽeː—! ojɯ deʔpoɯ daɕi, ɕikamo ondo takame daɕi!|7
|
| 57 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk22.wav|koɽe kaɽa mo, wataɕi ni egao o misete hoɕiː na.|7
|
| 58 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/37-NationBlue.wav|haːi!|2
|
| 59 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_101268/shiki_voice_101268_2_05.wav|wataɕi o wataɕi taɽaɕimerɯ mono, fɯetɕoʔta naː, itsɯ no aida ni ka.|9
|
| 60 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/NBK/NBK_chunk77.wav|jokaʔtanaɽa, ikimaɕoɯ.|3
|
| 61 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk107.wav|wataɕi o tsɯɽedaɕite. mirɯ dake de, iː no?|7
|
| 62 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_yasuragi/kanade_yasuragi_chunk7_AudioSR_Processed_48K.wav|itsɯ ni mo maɕite netsɯ ga haiʔterɯ no wa dʑidʑitsɯ ka na.|3
|
| 63 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300537/jou_mika_voice_300537_1_07.wav|mite mite! ataɕi no peinto! fɯ!! osoɯɽi kiɴɕi!|8
|
| 64 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0018/usamin_mobamasu_0018_chunk55.wav|minni ɯsaminnʲɯɯjaː! merɯheɴ wa fɯkɯ ni tɕendʑi! desɯ jo! medetai desɯ! naɴ to nakɯ! oɕoɯgatsɯ wa iː desɯ jo ne! minna ga ɕiawase soɯ na kao o ɕitemasɯ! aː...atatamaɽimasɯ neː...|4
|
| 65 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0012/chie_mobamasu_0012_chunk9.wav|dono kisetsɯ mo, sɯteki na koto ga takɯsaɴ arɯkedo.|5
|
| 66 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0012/fumika__0012_chunk23.wav|kawaɽi aɽimaseɴ.|13
|
| 67 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200181/arisu_voice_200181_2_04.wav|komɯɽiko no dʑijɯɯka ni tsɯite wa, doɯ omoimasɯ ka? a, tɕoʔto...|1
|
| 68 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Main_Comyu/Main_Comyu_chunk169.wav|igai to ne.|0
|
| 69 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0012/mayu_mobamasu_0012_chunk27.wav|kanaɕikɯ narɯ kɯɽai ni.|12
|
| 70 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_100750/shiki_voice_100750_2_01.wav|sɯso ga nagai no wa, hakɯi de naɽeterɯ. ɕokosanai ka wa betsɯ mondai dakedo.|9
|
| 71 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100596/miku_voice_100596_1_02.wav|jɯɯkitɕaɴ no haɕagibɯɽi ga hampa nainʲa! koɽe ga, doːmɯ pawaː!|14
|
| 72 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk246_DeepFilterNet3.wav|jake ni dʑiɕiɴ taʔpɯɽi na no ga sɯkoɕi dake fwaɴ o kakitaterɯkedo|2
|
| 73 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300351/mio_honda_voice_300351_2_12.wav|rɯɯmɯ no gaʔkʲɯɯ ɕimbɯɴ tsɯkɯʔte agerɯ! pɯɽodʲɯɯsaː!|16
|
| 74 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk21.wav|wataɕi no koi ga...|13
|
| 75 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200552/minami_voice_200552_1_01.wav|jɯɯitɕaɴ wa hogaɽaka de geŋki dakaɽa, wataɕi wa tɕoʔto otɕitsɯita fɯɴ'iki de.|19
|
| 76 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk371_DeepFilterNet3.wav|kimi mitai na çito ga itaɽa jɯɯɽei ni naɽazɯ ni sɯndaɴ daɽoɯ ne|2
|
| 77 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_cgss/mayu_cgss_card_100166/mayu_cgss_voice_100166_2_10.wav|ɽiŋgɯ, taisetsɯ ni ɕimasɯ! ...o, ɽiaɽiŋgɯ? ...soɽe de mo, taisetsɯ ni.|12
|
| 78 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200001/shiburin_voice_200001_6_07.wav|faɴʔte, konna ni aʔtakai na...|6
|
| 79 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/47-MaxBeat.wav|kakosaɴ wa naɴ ni tɕoɯseɴ sɯrɯɴ desɯ ka?|2
|
| 80 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk36.wav|wa...|5
|
| 81 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video_chunk39.wav|ano toki çiɽoʔte itadaita kitsɯne koto ɕi jomi ɕɯɯko desɯ.|0
|
| 82 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_201091/minami_voice_201091_1_01.wav|ajametɕantatɕi mo nijanija ɕitɕaʔte. hoɽa, hajakɯ kakenai to!|19
|
| 83 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0015/fumika__0015_chunk11.wav|makemaseɴ.|13
|
| 84 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk3.wav|kiʔto, minna mo onadʑi çikaɽi o mite irɯ no deɕoɯ?|3
|
| 85 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk181.wav|wataɕi mo motomete irɯ no.|18
|
| 86 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk22.wav|kono sɯgata ni mitoɽete mo iːkedo.|3
|
| 87 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_2_07.wav|jakei ga sɯteki na omise no dʲnaːkeɴ ga arɯɴ desɯga...gojotei, ikaga desɯ ka?|10
|
| 88 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0009/fumika__0009_chunk49.wav|mɯkaɕi kaɽa, çito no ɕiseɴ o sakete kimaɕita.|13
|
| 89 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/EP28.wav|fɯfɯʔ...aɴɕiɴ ɕite kɯdasai. hanabi ja tɕiɽi jɯkɯ sakɯɽa ga ɯtsɯkɯɕiːkaɽa to iʔte mo, wataɕi wa doko ka e kie taɽi nante ɕimaseɴ. daʔte wataɕi no i baɕo wa, koko naɴ desɯkaɽa.......|2
|
| 90 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200958/minami_voice_200958_3_01.wav|aɽa, keijakɯ o majoʔte irɯ no? kaɕikokɯte, totemo oɽoka ne. fɯfɯfɯ, itsɯ made gamaɴ ga dekirɯ no ka, mite ite agerɯ...kono amai amai jɯɯwakɯ ni, doko made aɽagaerɯ no kaɕiɽa?|19
|
| 91 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300799/mio_honda_voice_300799_1_09.wav|kiɽaɽiɴ? sasɯga ni soɽosoɽo okigaerɯkaɽa sa. sono...ɯtsɯsanaide?|16
|
| 92 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk151.wav|iː desɯ ka? kʲoɯ wa moɯ oɕigoto ɕitɕa dame desɯ.|2
|
| 93 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0007/shiburin_mobamasu_0007_chunk10.wav|maː, soɯ iɯ tokoɽoʔte ammaɽiːʔta koto naiɕi.|6
|
| 94 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201272/kanade_voice_201272_1_01.wav|jɯbi o kaɽamete, netsɯ o tsɯtaeaʔte...fɯɽeta saki kaɽa, madʑiɽiaʔte ɕimai soɯ.|3
|
| 95 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk238_chunk4_DeepFilterNet3.wav|haɽoiɴ paːtiː da jo, tatemaedʑoɯ wa.|2
|
| 96 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk187.wav|mɯkaɕi no wataɕi wa, kiʔto tokɯni mokɯteki nante motɕiʔte inakaʔta joɯ ni omoimasɯ.|2
|
| 97 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0003/fumika__0003_chunk34.wav|ma, a, ano, sɯmimaseɴ, kizɯkazɯ ni...|13
|
| 98 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk204.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
|
| 99 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk202_DeepFilterNet3.wav|soɯ? homekotoba to ɕite sɯnao ni ɯke toɽiʔte okoɯ ka na. aɽigatoɯ. kimi no kʲoɯrʲokwaʔte koso da jo. kimi ni tetsɯdaiʔte mo ɽaʔte, kono iɕoɯ to soɯbi ni kaɽada o naɽaɕite oitejo kaʔta. ɕikaɕi, me o mawaɕite irɯ kono otoko ni wa iːtai koto ga jama hodo aʔtakeɽedo, koɯ mo aʔsaɽi ɕimawaʔte ɕimaɯto, naɴ da ka hjoɯɕinɯke ɕite ɕimaʔta.|2
|
| 100 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk218.wav|oɕiete hoɕiː na.|5
|
| 101 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk55_DeepFilterNet3.wav|nasake wa çito no tame naɽazɯ to wa iɯkeɽedo, hakaɽazɯ mo oɽei o moɽaʔte ɕimaʔta ne. de mo...ka, kaʔpɯrɯ waɽibikitɕikeʔto ka...|2
|
| 102 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100935/miku_voice_100935_2_06.wav|mikɯtatɕi no tame niːtsɯ mo oɕigoto gambaʔte...pːtɕaɴ wa sɯgoi jo neː!|14
|
| 103 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_2_10.wav|kameɽamansaɴ no noɽi mo itsɯ mo to zenzeɴ tɕigaʔte! aː, kintɕoɯ ɕita! ɯfɯfɯ!|8
|
| 104 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100022/fredrica_voice_100022_2_11.wav|pɯɽodʲɯɯsaːʔte, ɽiɴwaɴ na no? kaʔkoɯ iː!|20
|
| 105 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/unxkaedepuriUBfin-chunk636_DeepFilterNet3.wav|akiɽamete narɯ mono ka! fɯɯɴʔ!|2
|
| 106 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/16-christmas.wav|gʲɯː.|2
|
| 107 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_301124/nagi_voice_301124_6_03.wav|mezase, toɽendo itɕiː!|17
|
| 108 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1019.wav|ano...pɯɽodʲɯɯsaː...|2
|
| 109 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100181/momoka_voice_100181_2_05.wav|pɯɽodʲɯɯsaːtɕama wa, takɯsaɴ no çito ni ɕɯkaɽetemasɯ no ne.|15
|
| 110 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0013/mayu_mobamasu_0013_chunk0.wav|fɯtaɽi de osampoɯ.|12
|
| 111 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video_chunk23.wav|tsɯkaɽeta kaɽada ni, koɯkateki meɴ. de mo...|0
|
| 112 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk209.wav|wataɕi o oɯeɴ ɕite kɯɽerɯ, minna no tame ni!|7
|
| 113 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/10_arisu__0010_(Vocals)/10_arisu__0010_(Vocals)_chunk2.wav|soɽe wa, doɯ iɯ imi naɴ deɕoɯ ka. wataɕi wa, kandʑoɯ o komete tɕanto eŋgi ɕita tsɯmoɽi desɯ.|1
|
| 114 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200425/syuukovoice_200425_5_01.wav|hoɽa hoɽa, kɯɽaeː—! ɯmi ni kite sɯɯtsɯ no jatsɯ wa,nɯɽetɕaeː—! fɯfɯ!! minna de ɯmi e bakansɯ nante, fɯtsɯɯ no onnanokoʔpoi ja ne. minna o tanoɕimaserɯ aidorɯ mo, kʲoɯ wa tanoɕindʑaʔte koː—!|0
|
| 115 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Work_Comyu/Work_Comyu_chunk115.wav|ɯtɕiɽa no koto, otabe ja sɯɯ.ʔteki na?ʔjaːɴ! ɕaihaɴʔte wa, daitaɴ!|0
|
| 116 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_101189/anzu_voice_101189_2_08.wav|ɯsagi kaɽa aɽaitate no nioi ga sɯrɯ...koko made kiɽei da to, sasɯga ni kizɯkɯ ne.|11
|
| 117 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video_chunk18.wav|saː saː, minna tsɯitoide! kʲoto ni wa oinaɽisaɴ no ɽeɴɕoɯ ga arɯɴ da jo ne. kitsɯne wa sono tsɯkai naɴ da.|0
|
| 118 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200696/fumika_voice_200696_1_02.wav|sɯ na ha ma no sɯteːdʑi de ɯtaɯ nante...kono keikeɴ, ɕiʔkaɽi to kokoɽo ni kizamikomimasɯ.|13
|
| 119 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk833.wav|mɯkaɕi no wataɕi ga omoi moɕi nakaʔta ima ga koko ni aɽimasɯ|2
|
| 120 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200439/fumika_voice_200439_6_07.wav|masaniːrʲɯɯɕoɴ no joɯ na...kiɽameki ni mitɕita, ɽaibɯ deɕita.|13
|
| 121 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/vocals-chunk21.wav|wataɕi ni desɯ ka?|2
|
| 122 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_101107/miku_voice_101107_6_05.wav|miʔkɯ no gohoɯɕinʲa!|14
|
| 123 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/kimi_no_stage_isho/kimi_no_stage_isho_chunk29.wav|«fɯʔfɯʔfɯ».|3
|
| 124 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201404/karen_cgss_voice_201404_1_11.wav|honto wa giɽigiɽi naɴ da? anata no kotae o matɕi tsɯzɯkete...mɯne ga...hoɽa, ne.|7
|
| 125 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_101063/chieri_voice_101063_5_01.wav|o, pɯɽodʲɯɯsaːsaɴ, koʔtɕi desɯ!...a, aite jokaʔta...e? ɯɽisode, miaʔtemasɯ ka?...ɯɽaɕiː desɯ. wataɕi ni wa daitaɴ na irɯ ka moʔte omoʔta no de...fɯfɯʔ, iː toɕi ni naɽi soɯ!|5
|
| 126 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0021/usamin_mobamasu_0021_chunk52.wav|moɯ iʔkai kakete okimaɕoɯ! eː i!|4
|
| 127 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk161_DeepFilterNet3.wav|zanneɴ dakedo kono ataɽi ni wa miataɽanai ne otoɕitanaɽa kono heɴ ni arɯ hazɯ naɴ dakeɽedo wataɕitatɕi wa saifɯ o mita baɕo kaɽa idoɯ mo ɕite inai hoka ni aʔta koto to ieba|2
|
| 128 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100182/momoka_voice_100182_1_10.wav|wataɕi ga mi ni tsɯkerɯ baɽa ni, toge wa aɽimaseɴ! keʔɕite sasaɽimaseɴ no!|15
|
| 129 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300811/mio_honda_voice_300811_1_10.wav|oʔto nataɽi! kami çikoɯ ki wa nagetɕa ikenai ze! okɯɽidasɯ kandʑi de ne!|16
|
| 130 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200163/mifune_voice_200163_2_09.wav|hea aɽendʑi wa, nigate de. itsɯ mo, kono kamigata ni...jahaɽi, dʑimi?|10
|
| 131 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk90_DeepFilterNet3.wav|wataɕi ga kobanda no wa, tada soɽe dake daʔtakaɽa. kazokɯ no tatɕiba mo ɽikai wa ɕite irɯɕi, keʔɕite kiɽai na wake dʑa nai.|2
|
stylekan/Data/metadata_cleanest/FT_imas_valid_less_than_20sec.csv
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100519/chieri_voice_100519_2_09.wav|hai, dasasete kɯdasai! baɽaeti baŋgɯmi he no ɕɯtsɯei, gambaɽimasɯ!|5
|
| 2 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100685/fredrica_voice_100685_2_11.wav|fɯɽetɕaɴ wa kʲɯɯ goɯ dakaɽa, oboetoite neː—! iʔɕo ni eɽande mo iːkedo!|20
|
| 3 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk222.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
|
| 4 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0013/fumika__0013_chunk71.wav|ko ki joki, ɽomaɴ sɯ no joɯ desɯ ne.|13
|
| 5 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0008_cnk29.wav|kiʔto matɕigaʔteta koto mo iʔpai arɯ.|7
|
| 6 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0014/shiburin_mobamasu_0014_chunk5.wav|gaʔkoɯ?|6
|
| 7 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/31-MaxBeat.wav|ɯɯɴ, tokɯi de wa—ɽimaseɴ ne....çitoɽi de wa iɽenai ka mo ɕiɽemaseɴ.|2
|
| 8 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_100316/anzu_voice_100316_1_12.wav|maː, pɯɽodʲɯɯsaː wa, anzɯ no ɕitsɯjoɯ akɯʔte koto de...|11
|
| 9 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/twintail/twintail_chunk84.wav|aɽa?|3
|
| 10 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200002/shiburin_voice_200002_2_06.wav|pɯɽodʲɯɯsaːʔte...hana kazarɯ no to ka, doɯ omoɯɴ daɽoɯ...|6
|
| 11 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/40-CardComyu.wav|sonna—nata ni jakɯsokɯ ɕimasɯ. kiʔto, daɽe joɽi mo kiɽei ni narɯto. sɯɯpaː moderɯ joɽi mo, kanaɽazɯ. daʔte...hoɽa, oɯsama no meiɽei wa zeʔtai desɯkaɽa ne.|2
|
| 12 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video/【モバマス】[想いひとひら]塩見周子【ボイス集】 - Niconico Video_chunk24.wav|konna hjoɯdʑoɯ...|0
|
| 13 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk177_chunk6.wav|mainitɕiːɽoiɽo na oɕigoto ga dekimasɯɕi|2
|
| 14 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/55-Neverends.wav|pɯɽodʲɯɯsaː ni kaɽentɕaɴ. ohajoɯ gozaimasɯ.|2
|
| 15 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_100108/chieri_voice_100108_6_02.wav|kʲoɯ wa sɯʔtoɽi, gambaɽimasɯ ne! sɯkoɕi, otona o mezaɕite!|5
|
| 16 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_300665/nagi_voice_300665_2_03.wav|ɽafɯ na kikonaɕi de, dʑimɯɕo e no konaɽi kaɴ o eɴɕɯtsɯ...nani ka matɕigaeta ka.|17
|
| 17 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk242_DeepFilterNet3.wav|omoidasɯ dake de kibɯɴ ga warɯkɯ narɯ jo|2
|
| 18 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0015/usamin_mobamasu_0015_chunk24.wav|wataɕitatɕi wa kiʔto, otagai ga taisetsɯ naɴ desɯ ne.|4
|
| 19 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk179.wav|kono mɯne ni, ɕiʔkaɽi ɕimaɯ ne!|7
|
| 20 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_1_05.wav|jozoɽa no hoɕi mitai ni...ɯɯɴ, jozoɽa joɽi mo tsɯjokɯ çikarɯ, sono gɯɽai no kimotɕi.|8
|
| 21 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_bonus_cd/kanade_bonus_cd_chunk0.wav|anata ga, wataɕi no?|3
|
| 22 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/35-MaxBeat.wav|soɽosoɽo deɕoɯ ka. ikimaɕoɯ, asɯkatɕaɴ.|2
|
| 23 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/32-PrettyLiarFin.wav|soɯ ieba, kʲoɯ wa ɕɯzai ga kɯrɯ çi deɕita ne.|2
|
| 24 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/9_arisu__0009_(Vocals)/9_arisu__0009_(Vocals)_chunk41.wav|kawaiː kao ɕite imasɯɕi.|1
|
| 25 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/TK19.wav|pɯɽodʲɯɯsaː, ano...nomi tomodatɕi ni naɽimaseɴ ka|2
|
| 26 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kande5Comyus/Kande5Comyus_chunk168.wav|fɯdaɴ no wataɕi dʑa, sonna saːbisɯ ɕinai mono ne.|3
|
| 27 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101157/fredrica_voice_101157_6_07.wav|mata ɽaibɯ ni kitakɯ narɯ mahoɯ, kaketɕaʔta!|20
|
| 28 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_card_200074/ranko_voice_200074_1_05.wav|pɯɽodʲɯɯsaː no jɯme...kiʔto wataɕi ga, kono te de!|18
|
| 29 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0002_cnk58.wav|kʲoɯ asobiːkanai? dame? pɯɽodʲɯɯsaːsaɴ to...|7
|
| 30 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200067/arisu_voice_200067_2_02.wav|dokidoki ɕiterɯ no ka na...wataɕi...|1
|
| 31 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100396/miku_voice_100396_6_01.wav|sekɯɕiːkʲaʔto mikɯ no ɽaibɯ, hadʑimete ikɯnʲa!|14
|
| 32 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_6_02.wav|wataɕi no koto, moʔto sɯki ni naʔte iːkaɽa ne!|20
|
| 33 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Event/yukyuu_no_hoshisuzushi/yukyuu_no_hoshisuzushi_chunk24.wav|saː, doɯ ka naː—?|0
|
| 34 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0013/chie_mobamasu_0013_chunk36.wav|hei! kakemaɕita!|5
|
| 35 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk636.wav|tsɯgi no gotɕɯɯmoɴ wa doɯ nasaimasɯ?|2
|
| 36 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk212.wav|wataɕi ni mahoɯ o kaketa pɯɽodʲɯɯsaːsaɴ ni, sɯteːdʑi kaɽa kaɴɕa o!|7
|
| 37 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0006/mayu_mobamasu_0006_chunk55.wav|zembɯ te ni hairɯɴ daʔte.|12
|
| 38 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_saint/kanade_saint_chunk39.wav|kɯɽisɯmasɯ keːki to, koɯtɕa ga iː kaɕiɽa.|3
|
| 39 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300112/jou_mika_voice_300112_1_08.wav|ataɕi doɯ? oiɕi soɯ deɕoɯ? toɯzeɴ, tabegoɽo daɕi?|8
|
| 40 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk208.wav|ano çito no omoi...|18
|
| 41 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0013/ranko_mobamasu_0013_chunk99.wav|ɴʔfɯfɯfɯ kaɴɕa ɕijoɯ.|18
|
| 42 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0009/ranko_mobamasu_0009_chunk12.wav|soɕite, saigo ni wa, hanajomesaɴ ga tiaɽa o nokosete saʔtɕaɯɴ desɯ!|18
|
| 43 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk189.wav|dakedo, anata ni deaiʔte, sonna çibi wa owaɽi o tsɯgemaɕita.|2
|
| 44 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0016/fumika__0016_chunk8.wav|pɯɽodʲɯɯsaːsaɴ e to tsɯtaemaɕoɯ.|13
|
| 45 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0011/mayu_mobamasu_0011_chunk47.wav|soɕite...|12
|
| 46 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0009/miku_mobamasu_0009_chunk32.wav|piː tɕaɴ, mikɯ no madʑime na tokoɽo o wakaʔta? wakaʔtadeɕo? naɽa kaeʔteː!|14
|
| 47 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk11.wav|semete dʑɯmbi dake o, minna to iʔɕo ni jaɽitai naʔte.|5
|
| 48 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0001/miku_mobamasu_0001_chunk11.wav|haː, oçisama no nioi ga sɯrɯ nonʲaː!nʲɯɯ, mikaketa toki kaɽa zɯʔto ki ni naʔteta no!|14
|
| 49 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk143.wav|pɯɯɽodʲɯɯsaːsaɴ!|5
|
| 50 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/MainComyu_Kanade/MainComyu_Kanade_chunk104.wav|konomi de wa naikedo.|3
|
| 51 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/05-workkomyu.wav|kotɕiɽa no bako wa...komonoiɽe? kiɽei na mojoɯ de, oheja ni oite okitakɯ naɽimasɯ ne.|2
|
| 52 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/kanade_dorama_cd/kanade_dorama_cd_chunk116.wav|maː...|3
|
| 53 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200650/shiburin_voice_200650_5_01.wav|tamani keŋka ɕi taɽi, jɯzɯɽanakaʔtaɽi, haɽi aʔtaɽi...soɽe mo warɯkɯ nai jo ne. asemamiɽe de mo, kaʔko warɯkɯte mo,toɽai adopɯɽimɯsɯ no sei saŋkaʔkei wa, jɯgamanai mama, mae ni sɯsɯnde ikerɯkaɽa.|6
|
| 54 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk105.wav|e.|5
|
| 55 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_200313/karen_cgss_voice_200313_1_02.wav|mizɯ deʔpoɯ dʑa naideɕoɯ soɽeː—! ojɯ deʔpoɯ daɕi, ɕikamo ondo takame daɕi!|7
|
| 56 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk22.wav|koɽe kaɽa mo, wataɕi ni egao o misete hoɕiː na.|7
|
| 57 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/37-NationBlue.wav|haːi!|2
|
| 58 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_101268/shiki_voice_101268_2_05.wav|wataɕi o wataɕi taɽaɕimerɯ mono, fɯetɕoʔta naː, itsɯ no aida ni ka.|9
|
| 59 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/NBK/NBK_chunk77.wav|jokaʔtanaɽa, ikimaɕoɯ.|3
|
| 60 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk107.wav|wataɕi o tsɯɽedaɕite. mirɯ dake de, iː no?|7
|
| 61 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Episodes/kanade_yasuragi/kanade_yasuragi_chunk7_AudioSR_Processed_48K.wav|itsɯ ni mo maɕite netsɯ ga haiʔterɯ no wa dʑidʑitsɯ ka na.|3
|
| 62 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_300537/jou_mika_voice_300537_1_07.wav|mite mite! ataɕi no peinto! fɯ!! osoɯɽi kiɴɕi!|8
|
| 63 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0012/chie_mobamasu_0012_chunk9.wav|dono kisetsɯ mo, sɯteki na koto ga takɯsaɴ arɯkedo.|5
|
| 64 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0012/fumika__0012_chunk23.wav|kawaɽi aɽimaseɴ.|13
|
| 65 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_cgss/arisu_card_200181/arisu_voice_200181_2_04.wav|komɯɽiko no dʑijɯɯka ni tsɯite wa, doɯ omoimasɯ ka? a, tɕoʔto...|1
|
| 66 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Main_Comyu/Main_Comyu_chunk169.wav|igai to ne.|0
|
| 67 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0012/mayu_mobamasu_0012_chunk27.wav|kanaɕikɯ narɯ kɯɽai ni.|12
|
| 68 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_cgss/shiki_card_100750/shiki_voice_100750_2_01.wav|sɯso ga nagai no wa, hakɯi de naɽeterɯ. ɕokosanai ka wa betsɯ mondai dakedo.|9
|
| 69 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100596/miku_voice_100596_1_02.wav|jɯɯkitɕaɴ no haɕagibɯɽi ga hampa nainʲa! koɽe ga, doːmɯ pawaː!|14
|
| 70 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk246_DeepFilterNet3.wav|jake ni dʑiɕiɴ taʔpɯɽi na no ga sɯkoɕi dake fwaɴ o kakitaterɯkedo|2
|
| 71 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300351/mio_honda_voice_300351_2_12.wav|rɯɯmɯ no gaʔkʲɯɯ ɕimbɯɴ tsɯkɯʔte agerɯ! pɯɽodʲɯɯsaː!|16
|
| 72 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk21.wav|wataɕi no koi ga...|13
|
| 73 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200552/minami_voice_200552_1_01.wav|jɯɯitɕaɴ wa hogaɽaka de geŋki dakaɽa, wataɕi wa tɕoʔto otɕitsɯita fɯɴ'iki de.|19
|
| 74 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk371_DeepFilterNet3.wav|kimi mitai na çito ga itaɽa jɯɯɽei ni naɽazɯ ni sɯndaɴ daɽoɯ ne|2
|
| 75 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_cgss/mayu_cgss_card_100166/mayu_cgss_voice_100166_2_10.wav|ɽiŋgɯ, taisetsɯ ni ɕimasɯ! ...o, ɽiaɽiŋgɯ? ...soɽe de mo, taisetsɯ ni.|12
|
| 76 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200001/shiburin_voice_200001_6_07.wav|faɴʔte, konna ni aʔtakai na...|6
|
| 77 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/47-MaxBeat.wav|kakosaɴ wa naɴ ni tɕoɯseɴ sɯrɯɴ desɯ ka?|2
|
| 78 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0005/chie_mobamasu_0005_chunk36.wav|wa...|5
|
| 79 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video/【モバマス】[奇しき縁]塩見周子【ボイス集】 - Niconico Video_chunk39.wav|ano toki çiɽoʔte itadaita kitsɯne koto ɕi jomi ɕɯɯko desɯ.|0
|
| 80 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_201091/minami_voice_201091_1_01.wav|ajametɕantatɕi mo nijanija ɕitɕaʔte. hoɽa, hajakɯ kakenai to!|19
|
| 81 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0015/fumika__0015_chunk11.wav|makemaseɴ.|13
|
| 82 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk3.wav|kiʔto, minna mo onadʑi çikaɽi o mite irɯ no deɕoɯ?|3
|
| 83 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0016/ranko_mobamasu_0016_chunk181.wav|wataɕi mo motomete irɯ no.|18
|
| 84 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otomoe_plus/voices_kanade_otomoe_plus_chunk22.wav|kono sɯgata ni mitoɽete mo iːkedo.|3
|
| 85 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_2_07.wav|jakei ga sɯteki na omise no dʲnaːkeɴ ga arɯɴ desɯga...gojotei, ikaga desɯ ka?|10
|
| 86 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0009/fumika__0009_chunk49.wav|mɯkaɕi kaɽa, çito no ɕiseɴ o sakete kimaɕita.|13
|
| 87 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/EP28.wav|fɯfɯʔ...aɴɕiɴ ɕite kɯdasai. hanabi ja tɕiɽi jɯkɯ sakɯɽa ga ɯtsɯkɯɕiːkaɽa to iʔte mo, wataɕi wa doko ka e kie taɽi nante ɕimaseɴ. daʔte wataɕi no i baɕo wa, koko naɴ desɯkaɽa.......|2
|
| 88 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200958/minami_voice_200958_3_01.wav|aɽa, keijakɯ o majoʔte irɯ no? kaɕikokɯte, totemo oɽoka ne. fɯfɯfɯ, itsɯ made gamaɴ ga dekirɯ no ka, mite ite agerɯ...kono amai amai jɯɯwakɯ ni, doko made aɽagaerɯ no kaɕiɽa?|19
|
| 89 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300799/mio_honda_voice_300799_1_09.wav|kiɽaɽiɴ? sasɯga ni soɽosoɽo okigaerɯkaɽa sa. sono...ɯtsɯsanaide?|16
|
| 90 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk151.wav|iː desɯ ka? kʲoɯ wa moɯ oɕigoto ɕitɕa dame desɯ.|2
|
| 91 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0007/shiburin_mobamasu_0007_chunk10.wav|maː, soɯ iɯ tokoɽoʔte ammaɽiːʔta koto naiɕi.|6
|
| 92 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201272/kanade_voice_201272_1_01.wav|jɯbi o kaɽamete, netsɯ o tsɯtaeaʔte...fɯɽeta saki kaɽa, madʑiɽiaʔte ɕimai soɯ.|3
|
| 93 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk238_chunk4_DeepFilterNet3.wav|haɽoiɴ paːtiː da jo, tatemaedʑoɯ wa.|2
|
| 94 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk187.wav|mɯkaɕi no wataɕi wa, kiʔto tokɯni mokɯteki nante motɕiʔte inakaʔta joɯ ni omoimasɯ.|2
|
| 95 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0003/fumika__0003_chunk34.wav|ma, a, ano, sɯmimaseɴ, kizɯkazɯ ni...|13
|
| 96 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk204.wav|osewa ni naɽimasɯ. kotoɕi mo.|2
|
| 97 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk218.wav|oɕiete hoɕiː na.|5
|
| 98 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk55_DeepFilterNet3.wav|nasake wa çito no tame naɽazɯ to wa iɯkeɽedo, hakaɽazɯ mo oɽei o moɽaʔte ɕimaʔta ne. de mo...ka, kaʔpɯrɯ waɽibikitɕikeʔto ka...|2
|
| 99 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100935/miku_voice_100935_2_06.wav|mikɯtatɕi no tame niːtsɯ mo oɕigoto gambaʔte...pːtɕaɴ wa sɯgoi jo neː!|14
|
| 100 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301268/jou_mika_voice_301268_2_10.wav|kameɽamansaɴ no noɽi mo itsɯ mo to zenzeɴ tɕigaʔte! aː, kintɕoɯ ɕita! ɯfɯfɯ!|8
|
| 101 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_100022/fredrica_voice_100022_2_11.wav|pɯɽodʲɯɯsaːʔte, ɽiɴwaɴ na no? kaʔkoɯ iː!|20
|
| 102 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/unxkaedepuriUBfin-chunk636_DeepFilterNet3.wav|akiɽamete narɯ mono ka! fɯɯɴʔ!|2
|
| 103 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/16-christmas.wav|gʲɯː.|2
|
| 104 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/nagi/nagi_cgss/nagi_card_301124/nagi_voice_301124_6_03.wav|mezase, toɽendo itɕiː!|17
|
| 105 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1019.wav|ano...pɯɽodʲɯɯsaː...|2
|
| 106 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100181/momoka_voice_100181_2_05.wav|pɯɽodʲɯɯsaːtɕama wa, takɯsaɴ no çito ni ɕɯkaɽetemasɯ no ne.|15
|
| 107 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mayu/mayu_mobamas/mayu_mobamasu_0013/mayu_mobamasu_0013_chunk0.wav|fɯtaɽi de osampoɯ.|12
|
| 108 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video/【モバマス】[フォレストヴィリディス]塩見周子【ボイス集】 - Niconico Video_chunk23.wav|tsɯkaɽeta kaɽada ni, koɯkateki meɴ. de mo...|0
|
| 109 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk209.wav|wataɕi o oɯeɴ ɕite kɯɽerɯ, minna no tame ni!|7
|
| 110 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/10_arisu__0010_(Vocals)/10_arisu__0010_(Vocals)_chunk2.wav|soɽe wa, doɯ iɯ imi naɴ deɕoɯ ka. wataɕi wa, kandʑoɯ o komete tɕanto eŋgi ɕita tsɯmoɽi desɯ.|1
|
| 111 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Work_Comyu/Work_Comyu_chunk115.wav|ɯtɕiɽa no koto, otabe ja sɯɯ.ʔteki na?ʔjaːɴ! ɕaihaɴʔte wa, daitaɴ!|0
|
| 112 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/anzu/anzu_cgss/anzu_card_101189/anzu_voice_101189_2_08.wav|ɯsagi kaɽa aɽaitate no nioi ga sɯrɯ...koko made kiɽei da to, sasɯga ni kizɯkɯ ne.|11
|
| 113 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video/【モバマス】[あやかし京娘]塩見周子【ボイス集】 - Niconico Video_chunk18.wav|saː saː, minna tsɯitoide! kʲoto ni wa oinaɽisaɴ no ɽeɴɕoɯ ga arɯɴ da jo ne. kitsɯne wa sono tsɯkai naɴ da.|0
|
| 114 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200696/fumika_voice_200696_1_02.wav|sɯ na ha ma no sɯteːdʑi de ɯtaɯ nante...kono keikeɴ, ɕiʔkaɽi to kokoɽo ni kizamikomimasɯ.|13
|
| 115 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk833.wav|mɯkaɕi no wataɕi ga omoi moɕi nakaʔta ima ga koko ni aɽimasɯ|2
|
| 116 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/fumika/fumika_cgss/fumika_card_200439/fumika_voice_200439_6_07.wav|masaniːrʲɯɯɕoɴ no joɯ na...kiɽameki ni mitɕita, ɽaibɯ deɕita.|13
|
| 117 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/vocals-chunk21.wav|wataɕi ni desɯ ka?|2
|
| 118 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_101107/miku_voice_101107_6_05.wav|miʔkɯ no gohoɯɕinʲa!|14
|
| 119 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/kimi_no_stage_isho/kimi_no_stage_isho_chunk29.wav|«fɯʔfɯʔfɯ».|3
|
| 120 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201404/karen_cgss_voice_201404_1_11.wav|honto wa giɽigiɽi naɴ da? anata no kotae o matɕi tsɯzɯkete...mɯne ga...hoɽa, ne.|7
|
| 121 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0021/usamin_mobamasu_0021_chunk52.wav|moɯ iʔkai kakete okimaɕoɯ! eː i!|4
|
| 122 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk161_DeepFilterNet3.wav|zanneɴ dakedo kono ataɽi ni wa miataɽanai ne otoɕitanaɽa kono heɴ ni arɯ hazɯ naɴ dakeɽedo wataɕitatɕi wa saifɯ o mita baɕo kaɽa idoɯ mo ɕite inai hoka ni aʔta koto to ieba|2
|
| 123 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100182/momoka_voice_100182_1_10.wav|wataɕi ga mi ni tsɯkerɯ baɽa ni, toge wa aɽimaseɴ! keʔɕite sasaɽimaseɴ no!|15
|
| 124 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mio/mio_honda_cgss/mio_honda_card_300811/mio_honda_voice_300811_1_10.wav|oʔto nataɽi! kami çikoɯ ki wa nagetɕa ikenai ze! okɯɽidasɯ kandʑi de ne!|16
|
| 125 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200163/mifune_voice_200163_2_09.wav|hea aɽendʑi wa, nigate de. itsɯ mo, kono kamigata ni...jahaɽi, dʑimi?|10
|
| 126 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk90_DeepFilterNet3.wav|wataɕi ga kobanda no wa, tada soɽe dake daʔtakaɽa. kazokɯ no tatɕiba mo ɽikai wa ɕite irɯɕi, keʔɕite kiɽai na wake dʑa nai.|2
|
stylekan/Data/metadata_cleanest/FT_imas_valid_more_than_10sec.csv
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/31-MaxBeat.wav|ɯɯɴ, tokɯi de wa—ɽimaseɴ ne....çitoɽi de wa iɽenai ka mo ɕiɽemaseɴ.|2
|
| 2 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/40-CardComyu.wav|sonna—nata ni jakɯsokɯ ɕimasɯ. kiʔto, daɽe joɽi mo kiɽei ni narɯto. sɯɯpaː moderɯ joɽi mo, kanaɽazɯ. daʔte...hoɽa, oɯsama no meiɽei wa zeʔtai desɯkaɽa ne.|2
|
| 3 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/frederica/fredrica_cgss/fredrica_card_101090/fredrica_voice_101090_4_03.wav|ammaɽi jɯʔkɯɽi kaŋgaeterɯto, seikai ga kawactɕaɯkaɽa gotɕɯɯi! ima mo seitɕoɯ ki! tsɯmaɽi, wataɕiɽaɕikɯ seitɕoɯtɕɯɯ, dakaɽa ne! me o hanaɕita sɯki ni, bidʑiɴ sɯgirɯ fɯɽetɕaɴ ni naʔtɕaɯ ka mo jo!|20
|
| 4 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_mobamas/miku_mobamasu_0001/miku_mobamasu_0001_chunk11.wav|haː, oçisama no nioi ga sɯrɯ nonʲaː!nʲɯɯ, mikaketa toki kaɽa zɯʔto ki ni naʔteta no!|14
|
| 5 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/05-workkomyu.wav|kotɕiɽa no bako wa...komonoiɽe? kiɽei na mojoɯ de, oheja ni oite okitakɯ naɽimasɯ ne.|2
|
| 6 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiburin/shiburin_cgss/shiburin_card_200650/shiburin_voice_200650_5_01.wav|tamani keŋka ɕi taɽi, jɯzɯɽanakaʔtaɽi, haɽi aʔtaɽi...soɽe mo warɯkɯ nai jo ne. asemamiɽe de mo, kaʔko warɯkɯte mo,toɽai adopɯɽimɯsɯ no sei saŋkaʔkei wa, jɯgamanai mama, mae ni sɯsɯnde ikerɯkaɽa.|6
|
| 7 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/usamin/usamin_mobamas/usamin_mobamasu_0018/usamin_mobamasu_0018_chunk55.wav|minni ɯsaminnʲɯɯjaː! merɯheɴ wa fɯkɯ ni tɕendʑi! desɯ jo! medetai desɯ! naɴ to nakɯ! oɕoɯgatsɯ wa iː desɯ jo ne! minna ga ɕiawase soɯ na kao o ɕitemasɯ! aː...atatamaɽimasɯ neː...|4
|
| 8 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/EP28.wav|fɯfɯʔ...aɴɕiɴ ɕite kɯdasai. hanabi ja tɕiɽi jɯkɯ sakɯɽa ga ɯtsɯkɯɕiːkaɽa to iʔte mo, wataɕi wa doko ka e kie taɽi nante ɕimaseɴ. daʔte wataɕi no i baɕo wa, koko naɴ desɯkaɽa.......|2
|
| 9 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/minami/minami_cgss/minami_card_200958/minami_voice_200958_3_01.wav|aɽa, keijakɯ o majoʔte irɯ no? kaɕikokɯte, totemo oɽoka ne. fɯfɯfɯ, itsɯ made gamaɴ ga dekirɯ no ka, mite ite agerɯ...kono amai amai jɯɯwakɯ ni, doko made aɽagaerɯ no kaɕiɽa?|19
|
| 10 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201272/kanade_voice_201272_1_01.wav|jɯbi o kaɽamete, netsɯ o tsɯtaeaʔte...fɯɽeta saki kaɽa, madʑiɽiaʔte ɕimai soɯ.|3
|
| 11 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk202_DeepFilterNet3.wav|soɯ? homekotoba to ɕite sɯnao ni ɯke toɽiʔte okoɯ ka na. aɽigatoɯ. kimi no kʲoɯrʲokwaʔte koso da jo. kimi ni tetsɯdaiʔte mo ɽaʔte, kono iɕoɯ to soɯbi ni kaɽada o naɽaɕite oitejo kaʔta. ɕikaɕi, me o mawaɕite irɯ kono otoko ni wa iːtai koto ga jama hodo aʔtakeɽedo, koɯ mo aʔsaɽi ɕimawaʔte ɕimaɯto, naɴ da ka hjoɯɕinɯke ɕite ɕimaʔta.|2
|
| 12 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk55_DeepFilterNet3.wav|nasake wa çito no tame naɽazɯ to wa iɯkeɽedo, hakaɽazɯ mo oɽei o moɽaʔte ɕimaʔta ne. de mo...ka, kaʔpɯrɯ waɽibikitɕikeʔto ka...|2
|
| 13 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200425/syuukovoice_200425_5_01.wav|hoɽa hoɽa, kɯɽaeː—! ɯmi ni kite sɯɯtsɯ no jatsɯ wa,nɯɽetɕaeː—! fɯfɯ!! minna de ɯmi e bakansɯ nante, fɯtsɯɯ no onnanokoʔpoi ja ne. minna o tanoɕimaserɯ aidorɯ mo, kʲoɯ wa tanoɕindʑaʔte koː—!|0
|
| 14 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201404/karen_cgss_voice_201404_1_11.wav|honto wa giɽigiɽi naɴ da? anata no kotae o matɕi tsɯzɯkete...mɯne ga...hoɽa, ne.|7
|
| 15 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_cgss/chieri_card_101063/chieri_voice_101063_5_01.wav|o, pɯɽodʲɯɯsaːsaɴ, koʔtɕi desɯ!...a, aite jokaʔta...e? ɯɽisode, miaʔtemasɯ ka?...ɯɽaɕiː desɯ. wataɕi ni wa daitaɴ na irɯ ka moʔte omoʔta no de...fɯfɯʔ, iː toɕi ni naɽi soɯ!|5
|
| 16 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk161_DeepFilterNet3.wav|zanneɴ dakedo kono ataɽi ni wa miataɽanai ne otoɕitanaɽa kono heɴ ni arɯ hazɯ naɴ dakeɽedo wataɕitatɕi wa saifɯ o mita baɕo kaɽa idoɯ mo ɕite inai hoka ni aʔta koto to ieba|2
|
| 17 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/mifune/mifune_cgss/mifune_card_200163/mifune_voice_200163_2_09.wav|hea aɽendʑi wa, nigate de. itsɯ mo, kono kamigata ni...jahaɽi, dʑimi?|10
|
stylekan/Data/metadata_cleanest/FT_saori.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
stylekan/Data/metadata_cleanest/FT_saori_valid.csv
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MG56.wav|fantadʑiː no oçimesama....masaka, ikinaɽi konna ni oːkina jakɯ o itadakerɯ nante, odoɽoki deɕita|563
|
| 2 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/unxkaedepuriUBfin-chunk738_DeepFilterNet3.wav|de mo soɯ da ne, koko made kitaɽa tanoɕimasete moɽaoɯ ka|563
|
| 3 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/DD44.wav|ɽaŋkotɕaɴ wa totemo otona de, jasaɕiː ko. makete iɽaɽemaseɴ ne|563
|
| 4 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/29-NationBlue.wav|a, otsɯkaɽesama. sono, moɕi ka ɕite kaedesaɴ mo ɽeʔsɯɴ na no dakedo,...nani ka, oka ɕikaʔta kaɕiɽa?—|563
|
| 5 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk300.wav|soɕite, pɯɽodʲɯɯsaː to no tanoɕiː kaiwa mo.|563
|
| 6 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1172.wav|oita wa daːme oɕi joki|563
|
| 7 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk420.wav|kako no hanaɕi joɽi aɕita no hanaɕi o ɕimaɕoɯ, pɯɽodʲɯɯsaː.|563
|
| 8 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/COD39.wav|koko made tsɯite kite kɯɽeta çito mo, moɯ inai çito mo...sɯbete seoimaɕoɯ!|563
|
| 9 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/RC30.wav|daisɯki na çito no tame no, toʔte oki no omekaɕi...ɽiʔpɯ de ɕiagemaɕoɯ|563
|
| 10 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk417.wav|jɯme ga nakeɽeba sagaseba iːɴ desɯ.|563
|
| 11 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/18-InochiMoyashite.wav|de mo, sonna hanaɕi o sɯrɯ no ni, konna tokoɽo de wa ikemaseɴ ne. moʔto iː baɕo niːkanakɯtɕa.|563
|
| 12 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1125.wav|omitoːɕi desɯ jo.|563
|
| 13 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk82_DeepFilterNet3.wav|kazokɯ to no kizɯna o tatɕikiɽoɯ to ɕite irɯ dʑibɯɴ ga, ɕiʔte iː mono dʑa nai.|563
|
| 14 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/IM46.wav|pɯɽodʲɯɯsaː mo kiɴ'iɽo no sɯɯtsɯ o kite, osoɽoi ni!|563
|
| 15 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk1137.wav|jasaɕikɯ ɕite agemasɯʔ|563
|
| 16 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/28-MaxBeat.wav|hai. motɕiɽoɴ manteɴ no go emɯeieʔkɯsɯ desɯ!|563
|
| 17 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puri2-chunk187_DeepFilterNet3.wav|nigasɯ mono ka!|563
|
| 18 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/radio_imas-chunk11_chunk1_DeepFilterNet3_AudioSR_Processed_48K.wav|saikiɴ tɕoʔto jokɯbaɽi ni naʔte ɕimaʔta mitai naɴ desɯ|563
|
| 19 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/puriUBfin-chunk93_DeepFilterNet3.wav|dʑibɯɴ ni mo kazokɯ ni mo|563
|
| 20 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/03-workkomyu.wav|koɽe wa, neko no matorʲoːɕika deɕoɯ ka?|563
|
| 21 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/78-workkomyu.wav|iː koto o omoitsɯkimaɕita. kanadetɕaɴ, kondo wa wataɕi ga toɽimasɯ ne?|563
|
| 22 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/25-PrettyLiarFin.wav|fɯfɯfɯ! wataɕitatɕi no fɯtaɽi gɯɽaɕi mo, daibɯ nagakɯ naʔte kimaɕita ne. kanadetɕaɴ no iɽonna meɴ o ɕiɽete, wataɕi wa ɯɽeɕiː desɯ. wataɕi no koto mo, tɕiʔte moɽaete irɯ to omoimasɯɕi.|563
|
| 23 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/merged_vocals_chunk967.wav|kiɴ to çieta kaɽakɯtɕi ga osɯki?|563
|
stylekan/Data/metadata_cleanest/filtered_train_list.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b2db964e1cc327e92e4382fca9a9d55e990cf932f436b92e4d4576e7f6fce09
|
| 3 |
+
size 92225988
|
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d22b1f5dc29a0c229946e744b0b9a7d2fc5c652ecab0e6f46e9b3db42adce0b
|
| 3 |
+
size 91725454
|
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_HEADER_plus.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7586721acfb629514c3f84e32813cf87636d9d414f015fbd33e1b1864da7fbe5
|
| 3 |
+
size 95952756
|
stylekan/Data/metadata_cleanest/filtered_train_list_no_nsp_plus.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14aa224b114909c939a22ef655923a720985abc216ccd555d470e3537a6bdbe1
|
| 3 |
+
size 91713050
|
stylekan/Data/metadata_cleanest/prelude.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e411b89fb4d4197df3937e16262f777cce59d6c0c3959e3f8c054277b1f09821
|
| 3 |
+
size 42196851
|
stylekan/Data/metadata_cleanest/prelude_id.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77e70f06bd8cc12764429356423d3cfc3e06b9975cfd1b0f2f05820ccfb8bdc5
|
| 3 |
+
size 40886844
|
stylekan/Data/metadata_cleanest/train_48_pure.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b956aafff61ab72fc5d69854b57e9d69d1686866c85aa05bfbcc87d1fbc41114
|
| 3 |
+
size 40216376
|
stylekan/Data/metadata_cleanest/val_48_pure.csv
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_bonus_cd/ranko_bonus_cd_chunk16.wav|wataɕi, zeʔtai anata no kitai ni kotaete miserɯkaɽa!|14
|
| 2 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/25714f7a/wav/25714f7a_1843.wav|naɴ de, osoɽakɯ ɽokɯ doɯ kiʔka ga ɕindaɽa.|91
|
| 3 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0902.wav|soʔtɕi no hoɯ ga iː to ka, sɯki to ka.|80
|
| 4 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0280.wav|deɕo? go, go, tabetɕaɯ zoː!|76
|
| 5 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk1947.wav|dakaɽa.|4
|
| 6 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1333.wav|de mo, sonna ikikata wa, itsɯ ka kakegai no nai mono made akiɽamerɯ koto ni narɯ. kiʔto iʔɕoɯ koɯkai sɯrɯ koto ni narɯ.|85
|
| 7 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_A_0067.wav|aː, kono tsɯkɯdaniː, sɯki daʔta wa jo ne. saːbisɯ sɯrɯ wa. ato, satoimo no niʔkoɽogaɕi ga sɯgɯ ni dekirɯkaɽa.|10
|
| 8 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ee093a4f/wav/ee093a4f_2060.wav|sono seɴ wa aɽi enai to omoʔte iːdeɕo. tada, rɯnatɕomɯ no meidotɕoɯ to toɽikime o ɕite irɯ joɯ desɯkaɽa, sono naijoɯ ga kaɽande irɯ no ka mo ɕiɽemaseɴ.|73
|
| 9 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1813.wav|dezaia o seʔtei ɕi niːkimasɯ ka?|63
|
| 10 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1548.wav|berɯbeʔto wa, arɯdʑi o motanaiɕi, fɯtsɯɯ no niŋgeɴ ni mo mierɯ. odʑoɯtɕaɴ no ɕiʔte irɯ oni to wa, komponteki nitɕigaɯ sonzai na no sa.|68
|
| 11 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0075.wav|tiaɽa saizɯʔte koto de, tamani dʑotɕɯɯ ni tsɯkɯɽaserɯ o.|25
|
| 12 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0075.wav|tiaɽa saizɯʔte koto de, tamani dʑotɕɯɯ ni tsɯkɯɽaserɯ o.|25
|
| 13 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_X_0059.wav|tɕinami ni, koŋkai wa naɴ kaitɕɯɯ to çiɽaite mo, koɽe ikoɯ wa heŋka naikaɽa ne.|25
|
| 14 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/773a4156/wav/773a4156_2767.wav|a, soɽe de, çitotsɯ dake onegai ga arɯɴ desɯkedo.|64
|
| 15 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk119.wav|kʲoɯ wa wataɕi kaɽa, sɯkoɕi de mo okɯɽitakɯte.|5
|
| 16 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1731.wav|daɽeʔte iwaɽerɯto, sono komarɯɴ dakedo.|7
|
| 17 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otome/voices_kanade_otome_chunk8.wav|taikɯtsɯ na kʲɯɯdʑitsɯ nante, sɯgoɕitakɯ nai no?|18
|
| 18 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0015/chie_mobamasu_0015_chunk302.wav|toʔte mo tajoɽi ni narɯ, goɕɯdʑinsama desɯ ne. pikapika ni migaki agetaɽa...|24
|
| 19 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0767.wav|okɯɽe ga detaɽa niɕikisaɴ ki ni ɕitɕoɯ jo ne.|65
|
| 20 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1259.wav|sempai mo ogeŋki soɯ de!|81
|
| 21 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1779.wav|de, aitɕaɴʔte daɽe?|86
|
| 22 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_2228.wav|haʔ.|29
|
| 23 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_B_0286.wav|hadʑimemaɕite. ikani mo fɯrʲoɯ to wa eɴ no nasa soɯ na kao ɕiterɯ wa ne.|25
|
| 24 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0327.wav|hamase, tsɯʔteɴ da!|47
|
| 25 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1961.wav|ɯɴ, nani ka tegakaɽi mitsɯketai ne. jaʔpaɽi, goiɴ ni dʑoɯbɯtsɯ sasetɕaɯ no wa kinodokɯ de.|76
|
| 26 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2702.wav|naɴ te iɯ ka, sono, gomeɴ.|4
|
| 27 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk1260.wav|na de koɽa.|7
|
| 28 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_0633.wav|iːe, wataɕi wa sofɯto na no naɽa, misaki eʔkɯsɯ ɕiɽo mo dʑɯɯbɯɴ aɽi da to omoimasɯ!|70
|
| 29 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0553.wav|wataɕi no koto...mieterɯ?|76
|
| 30 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2188.wav|tsɯkitsɯmeta tokoɽo, oɕi no ɕinobɯ wa tada, ɕokɯdʑi o ɕita dake na no dakaɽa.|4
|
| 31 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/CN_026.wav|sɯzɯ no oto kitaɽite fɯɯsetsɯ o matoɯ|20
|
| 32 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1385.wav|wataɕi wa ɕoɯgakɯsei no toki wa, saʔkaː seɴɕɯ daʔtaɴ da jo.|0
|
| 33 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1047.wav|aiɕite irɯ. kono sakiːkanarɯ miɽai ga aɽoɯtomo, ima kono mɯne ni arɯ kimotɕi dake wa hommono da.|88
|
| 34 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10005710.wav|soɽe ga doɯ iɯ imi na no ka wa wakaɽanai.|49
|
| 35 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_2505.wav|omae wa, nani ka warɯi koto o ɕita wake dʑa nai. dʑibɯɴ no kimotɕi o oɽe ni tsɯtaeta dake da.|84
|
| 36 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_0269.wav|soɽe de mo, natsɯkaɕikɯ kandʑirɯ mono da na.|66
|
| 37 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk367.wav|ɕiɽaberɯ tsɯmoɽi nado naikeɽedo, ɕiɽabeta tokoɽo de, osoɽakɯ wa sɯisokɯ no iki o dezɯ, kakɯdʑitsɯ ni wa ɕiboɽi kiɽenaidaɽoɯ to iɯ koto daʔta.|3
|
| 38 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0374.wav|kowaɽeta ato ni ɕɯɯɽi no kanoɯsei mo aɽimasɯ.|63
|
| 39 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10003350.wav|a. dʑibɯɴ no jɯme mo da.|49
|
| 40 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/005/S005_F_0011.wav|gomeɴ nasai, ɯmi ni saʔki ga ɯzɯmaiterɯkaɽa, mɯkaɕi no kɯse de ne.|58
|
| 41 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0566.wav|ɯɯɴ...de mo ɕimpai nai jo. kanaɽazɯ modorɯʔte, iʔteta dʑaɴ.|81
|
| 42 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_2281.wav|moʔto ɯne daɽi ɕitai desɯ...oːnaː no kisɯ o kɯdasai...|63
|
| 43 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_0028.wav|a, soɽe dʑa, hontoɯ ni, aʔpeiɽia ga, wataɕi to oanisaɴ no kodomo mitai desɯ ne.|83
|
| 44 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk163.wav|doɯki o ɕizɯmerɯ.|0
|
| 45 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_2140.wav|sonna ki wa maʔtakɯ ɕimaseŋkedo, oːnaka kɯrɯɕi sɯgite ɕini soɯ desɯ.|83
|
| 46 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_2639.wav|nakama ni miserɯ no? teɴɕoɴ agarɯɴ dakaɽa jaʔte mi nasaiʔte.|86
|
| 47 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk52.wav|aŋgai, dʑikakɯ wa nai no ka mo ɕiɽenai.|19
|
| 48 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1602.wav|zɯʔto tatɕiʔpanaɕi de kiːtetakaɽa tsɯkaɽeta joː.|87
|
| 49 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1709.wav|soɯ iɯ imi de wa, sendʑoɯgawaɽa çitagi wa, kawaʔta no de mo, koɯsei ɕita no de mo, modoʔta no de mo toɽikaeɕita no de mo nakɯ, maɕite, deɽeta no de mo doɽota no de mo nakɯ.|16
|
| 50 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0591.wav|dʑosei ga mɯtɕɯɯ ni naʔtɕaɯ kɯɽai kaʔko iː otokoʔte koto ne.|87
|
| 51 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/02/02001850.wav|tefɯda ga babanɯki ni mɯitetakaɽa, çiʔɕi ni naʔterɯɴ da jo ne. soɽe kɯɽai, wataɕi dʑa nakɯte mo daɽe de mo wakarɯ jo.|6
|
| 52 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk935.wav|dʑaː ɕoɯbɯ kaiɕi de iː na? soko sɯwaɽe jo.|16
|
| 53 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1452.wav|sono sɯki wa...donna sɯki?|80
|
| 54 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/unxkaedepuriUBfin-chunk48_DeepFilterNet3.wav|aitsɯ wa wataɕi ga moɽaɯ jo soɽe dʑaː mata!|20
|
| 55 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_card_300836/nagi_voice_300836_6_03.wav|seː no de, dʑampɯ!|43
|
| 56 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0002/ranko_mobamasu_0002_chunk3.wav|minna no ɽaibɯ ga kimaʔtaʔte kiːtaɴ desɯkedo, hontoɯ desɯ ka?mɯ, mɯɽhaʔhaː! kaŋki no koe ga kikoerɯ! me o todʑiɽeba, soko ni!|14
|
| 57 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk344.wav|nadeko ni taisɯrɯ ikaɽi no keʔka to ɕite.|7
|
| 58 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/003/S003_B_0139.wav|iʔteta jatsɯɽa, çiki hadʑimeta zo.|55
|
| 59 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0067.wav|moɯ iːja. dʑa, ima wa ammaɽi momerɯ na jo.|47
|
| 60 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk1454.wav|omotai no jo.|13
|
| 61 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0014/momoka_mobamasu_0014_chunk38.wav|oɕigototɕɯɯ wa, esɯkoːto ɕite kɯdasaimasɯ? iː? koɽe kaɽa wa, fɯkɯsoɯ ni wa ki o tsɯkai nasai.|11
|
| 62 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2780.wav|sendʑoɯgahaɽasaɴ ga, ima iʔtai nani o kaŋgaete irɯ no ka o kaŋgaemaɕita.|7
|
| 63 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk1207.wav|kiʔto niŋgeɴ no soɯzoɯ o zeʔsɯrɯ mono ni narɯdaɽoɯ.|16
|
| 64 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1928.wav|dokiʔto sɯrɯ koto iwanaide.|80
|
| 65 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0008/shiburin_mobamasu_0008_chunk12.wav|kiɽei na keɕiki ga mierɯ baɕo ni, kʲoɯmi nai? tɕiːsai koɽo, maitoɕi kazokɯ to iʔteta oki ni haiɽi no baɕo.|50
|
| 66 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3620.wav|çinai to iɯto, mae no ɕitɕɯɯ no.|66
|
| 67 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100125/usamin_voice_100125_3_01.wav|goɽaidʑoɯ, aɽigatoɯ gozaimaɕita! kite kɯɽeta minasaɴ ni, hai! iʔpai zɯtsɯ! motɕiɽoɴ, pɯɽodʲɯɯsaːsaɴ ni mo!|46
|
| 68 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100485/usamin_voice_100485_2_10.wav|ima wa, naite nai desɯ! de mo, ano toki no koto o omoidasɯto...rɯisei zɯɯɴ!|46
|
| 69 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1141.wav|ageaɕi o torɯ na, sonna tsɯmoɽi wa nai. ɯɴ, iː iɽozɯkai da na, soko wa hjoɯka ɕite mo iː.|88
|
| 70 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2088.wav|çitohada nonɯkɯmoɽi o, miɽerɯ no desɯ.|7
|
| 71 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100343/usamin_voice_100343_1_09.wav|katakoɽi ga ɯso no joɯ ni kiete...! oteiɽe sɯɽeba, mada dʑɯɯ neɴ wa motɕi soɯ...|46
|
| 72 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200086/syuukovoice_200086_1_05.wav|konna iɕoɯ made kitɕaʔte! aidorɯʔte kandʑi da jo ne! ɯfɯfɯ!|40
|
| 73 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1968.wav|sɯki na çito ga soba niːte kɯɽerɯ dake de.|80
|
| 74 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2493.wav|gomeɴ, ataɕi wa pasɯ da naː.|95
|
| 75 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1073.wav|fɯʔfɯʔfɯɴ, agehatɕaɴ sɯpeɕarɯ o misete jarɯ wa!|81
|
| 76 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_0817.wav|doɯ ɕite wataɕi ga taniɴ no akiɽameta jɯme o oɕitsɯkeɽaɽenakɯtɕa ikenaiɴ da. sonna mono o kakaete itaɽa, wataɕi made zasetsɯ sɯrɯ koto ni naɽi soɯ da. iː meiwakɯ da.|88
|
| 77 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1099.wav|aɴ ni dokoɽo ka, tɕokɯsetsɯ iwaɽemaɕita. dairekɯto desɯ.|7
|
| 78 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0069.wav|soɽe wa, haʔɕɯɯ ɽemme mo kawaɽanai to omoɯ. tɕiba, ibaɽaki, saitama, gɯmma, kakɯtɕi no haiɽeberɯ na jaŋkiː ga dete kɯrɯ koto wa, amaɽi nai to omoɯ.|25
|
| 79 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_0920.wav|wataɕi no ɕɯɯ ni mirʲokɯ wa aɽimaɕiɴ kaː—?|62
|
| 80 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0004/fumika__0004_chunk25.wav|dema ga kakarɯ no de...|1
|
| 81 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/26-MizukiTomo.wav|onegai ɕi maː sɯ!|20
|
| 82 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4484.wav|wakaɽijasɯkɯ niʔpoɴ go ni ɕite mita wa.|77
|
| 83 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0656.wav|warɯi...naɴ ka, konna koto ni naʔtɕimaʔta.|84
|
| 84 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0089.wav|eːʔto...a, kʲɯɯkoɯɕa, kagi kakaʔte...|76
|
| 85 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_1416.wav|doɯ ɕite kimitatɕi wa, kono ɽeʔɕa ni haiʔte iko, betsɯ koːdo o toʔte irɯɴ da?|96
|
| 86 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3617.wav|tɕoʔto, naɴ no hanaɕi ɕiterɯ no joː.|77
|
| 87 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_1661.wav|zɯrɯi...nemɯi...|67
|
| 88 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2576.wav|mizɯ ka.|4
|
| 89 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk699.wav|ja kimotɕi o jaite irɯʔte iɯ no ka, gikɯɽi.|19
|
| 90 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1542.wav|kitɕikɯ dʑa naiɕi ka!|29
|
| 91 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/072.wav|iː koto de mo aɽimaɕita? sɯɯpaː de tokɯbaiçiɴ geʔto ɕita to ka. iːɴ desɯ ka? naɽa, onegai ɕimasɯ.|20
|
| 92 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1532.wav|sempai mo wataɕi to onadʑi kimotɕi da to omoʔteta no ni.|70
|
| 93 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk1557.wav|soɽe de iː no daɽoɯ ka?|0
|
| 94 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0877.wav|eŋgi no dʑiʔkoɯ wa pozitibɯ desɯ. desɯga, kɯoɽiti ni tsɯite wa fɯtoɯmei desɯ.|63
|
| 95 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk987.wav|ija, soɽe wa ɕiɽanaikedo.|4
|
| 96 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk163.wav|naɴ daʔte, kɯtɕinawasaɴ wa kamisama na no desɯkaɽa.|7
|
| 97 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04004090.wav|haha.|35
|
| 98 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1376.wav|ʔɴ? aː, ano ko ka. ɯɴ, oboeterɯ jo.|95
|
| 99 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0498.wav|sono bɯɴ, dʑibɯɴ no koto mo amaɽi hanasanai jo ne. kikeba oɕiete kɯɽerɯɴ daɽoɯkedo, kikanai hoɯ ga iː no ka na?|79
|
| 100 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2572.wav|ɯɯɴ, maɕiɽo ga soɯ iɯ no wakarɯ joɯ ni narɯ ni wa, motɕiʔto ɯmakɯ naɽanaito dame kanʲaː.|95
|
| 101 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk82.wav|sanisawa sensei wa...|1
|
| 102 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1770.wav|wataɕi to akɯma no naɽesome o.|0
|
| 103 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk212.wav|iɽaʔɕai, tsɯbasatɕaɴ.|19
|
| 104 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3392.wav|sonna no fɯɕizeɴ da wa?|77
|
| 105 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0865.wav|soɽe o ɕiɽanai niŋgeɴ kaɽa, fɯɽeai no imi o çikidaɕite kɯɽeta çitotatɕi ga irɯkaɽa.|84
|
| 106 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk308.wav|godʑɯɯ kiɽo ɽaɕiː.|13
|
| 107 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_1559.wav|ɯmaɽete kono kata, zɯʔto sono kɯgʲoɯ ni taiɕinonderɯ wataɕi ni taisɯrɯ ijami desɯ ka?|78
|
| 108 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0105.wav|wataɕi ga tsɯiteʔte mo, jakɯ ni tatanai wa ne.|81
|
| 109 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_cgss/chieri_card_100860/chieri_voice_100860_6_05.wav|gʲoɯsei kaɽa no, omotenaɕi desɯ!|24
|
| 110 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200063/kanade_voice_200063_6_04.wav|hontoɯ no wataɕi, misete agerɯ!|18
|
| 111 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_1097.wav|...ija da!! sonna iʔpoɯteki ni iwaɽete mo nani ga naɴ da ka wakaɽanai jo!|90
|
| 112 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_201308/minami_voice_201308_2_02.wav|tɕanto keŋkoɯkotsɯ ga çiɽaiterɯɴ desɯ jo? tsɯbasa ga haete mo daidʑoɯbɯ na joɯ ni.|15
|
| 113 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_6_06.wav|jorɯ wa mada, hadʑimaʔta bakaɽi. koɽe dake dʑa, owaɽimaseɴ jo.|56
|
| 114 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0022.wav|se��ɕɯɯ made wa ne, keʔkoɯ hoŋki de kaŋgaeteta.|76
|
| 115 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk110.wav|mahoɯ de kɯɽoɯba, sagasenai ka na?|24
|
| 116 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1667.wav|ano deai o kɯɽeta no ga oja no kʲoɯikɯ naɽa, warɯkɯ wa nai aidʑoɯ daʔta sɯ wa.|29
|
| 117 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0017/ranko_mobamasu_0017_chunk84.wav|waga tomo to.|14
|
| 118 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0604.wav|koɯ iɯ no otɕitɕisaɴʔpokɯ de tɕɯki mainitɕi saɽerɯto komarɯkedo.|79
|
| 119 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2754.wav|kɯrɯma ni çikaɽete ɕindaɽaɕiː zo.|4
|
| 120 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1288.wav|osewa ni naʔterɯ tatɕiba dakaɽa, honto wa ammaɽiːenaiɴ dakedo neː.|76
|
| 121 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk1557.wav|soɽe de iː no daɽoɯ ka?|0
|
| 122 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0877.wav|eŋgi no dʑiʔkoɯ wa pozitibɯ desɯ. desɯga, kɯoɽiti ni tsɯite wa fɯtoɯmei desɯ.|63
|
| 123 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk987.wav|ija, soɽe wa ɕiɽanaikedo.|4
|
| 124 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk163.wav|naɴ daʔte, kɯtɕinawasaɴ wa kamisama na no desɯkaɽa.|7
|
| 125 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04004090.wav|haha.|35
|
| 126 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1376.wav|ɯɯɴ? aː, ano ko ka. ɯɴ, oboeterɯ jo.|95
|
stylekan/Data/metadata_cleanest/val_48_pure.txt
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_bonus_cd/ranko_bonus_cd_chunk16.wav|wataɕi, zeʔtai anata no kitai ni kotaete miserɯkaɽa!|14
|
| 2 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/25714f7a/wav/25714f7a_1843.wav|naɴ de, osoɽakɯ ɽokɯ doɯ kiʔka ga ɕindaɽa.|91
|
| 3 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0902.wav|soʔtɕi no hoɯ ga iː to ka, sɯki to ka.|80
|
| 4 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0280.wav|deɕo? go, go, tabetɕaɯ zoː!|76
|
| 5 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk1947.wav|dakaɽa.|4
|
| 6 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1333.wav|de mo, sonna ikikata wa, itsɯ ka kakegai no nai mono made akiɽamerɯ koto ni narɯ. kiʔto iʔɕoɯ koɯkai sɯrɯ koto ni narɯ.|85
|
| 7 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_A_0067.wav|aː, kono tsɯkɯdaniː, sɯki daʔta wa jo ne. saːbisɯ sɯrɯ wa. ato, satoimo no niʔkoɽogaɕi ga sɯgɯ ni dekirɯkaɽa.|10
|
| 8 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ee093a4f/wav/ee093a4f_2060.wav|sono seɴ wa aɽi enai to omoʔte iːdeɕo. tada, rɯnatɕomɯ no meidotɕoɯ to toɽikime o ɕite irɯ joɯ desɯkaɽa, sono naijoɯ ga kaɽande irɯ no ka mo ɕiɽemaseɴ.|73
|
| 9 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1813.wav|dezaia o seʔtei ɕi niːkimasɯ ka?|63
|
| 10 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1548.wav|berɯbeʔto wa, arɯdʑi o motanaiɕi, fɯtsɯɯ no niŋgeɴ ni mo mierɯ. odʑoɯtɕaɴ no ɕiʔte irɯ oni to wa, komponteki nitɕigaɯ sonzai na no sa.|68
|
| 11 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0075.wav|tiaɽa saizɯʔte koto de, tamani dʑotɕɯɯ ni tsɯkɯɽaserɯ o.|25
|
| 12 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_C_0854.wav|ɯ a a a a! tɕi kɯ biː i kageɴ hanaɕite!|22
|
| 13 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_X_0059.wav|tɕinami ni, koŋkai wa naɴ kaitɕɯɯ to çiɽaite mo, koɽe ikoɯ wa heŋka naikaɽa ne.|25
|
| 14 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/773a4156/wav/773a4156_2767.wav|a, soɽe de, çitotsɯ dake onegai ga arɯɴ desɯkedo.|64
|
| 15 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0003_cnk119.wav|kʲoɯ wa wataɕi kaɽa, sɯkoɕi de mo okɯɽitakɯte.|5
|
| 16 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1731.wav|daɽeʔte iwaɽerɯto, sono komarɯɴ dakedo.|7
|
| 17 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/mobamas_voices/Serifu/voices_kanade_otome/voices_kanade_otome_chunk8.wav|taikɯtsɯ na kʲɯɯdʑitsɯ nante, sɯgoɕitakɯ nai no?|18
|
| 18 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0015/chie_mobamasu_0015_chunk302.wav|toʔte mo tajoɽi ni narɯ, goɕɯdʑinsama desɯ ne. pikapika ni migaki agetaɽa...|24
|
| 19 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0767.wav|okɯɽe ga detaɽa niɕikisaɴ ki ni ɕitɕoɯ jo ne.|65
|
| 20 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1259.wav|sempai mo ogeŋki soɯ de!|81
|
| 21 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1779.wav|de, aitɕaɴʔte daɽe?|86
|
| 22 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_2228.wav|haʔ.|29
|
| 23 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_B_0286.wav|hadʑimemaɕite. ikani mo fɯrʲoɯ to wa eɴ no nasa soɯ na kao ɕiterɯ wa ne.|25
|
| 24 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0327.wav|hamase, tsɯʔteɴ da!|47
|
| 25 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1961.wav|ɯɴ, nani ka tegakaɽi mitsɯketai ne. jaʔpaɽi, goiɴ ni dʑoɯbɯtsɯ sasetɕaɯ no wa kinodokɯ de.|76
|
| 26 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2702.wav|naɴ te iɯ ka, sono, gomeɴ.|4
|
| 27 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk1260.wav|na de koɽa.|7
|
| 28 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_0633.wav|iːe, wataɕi wa sofɯto na no naɽa, misaki eʔkɯsɯ ɕiɽo mo dʑɯɯbɯɴ aɽi da to omoimasɯ!|70
|
| 29 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0553.wav|wataɕi no koto...mieterɯ?|76
|
| 30 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2188.wav|tsɯkitsɯmeta tokoɽo, oɕi no ɕinobɯ wa tada, ɕokɯdʑi o ɕita dake na no dakaɽa.|4
|
| 31 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/CN_026.wav|sɯzɯ no oto kitaɽite fɯɯsetsɯ o matoɯ|20
|
| 32 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1385.wav|wataɕi wa ɕoɯgakɯsei no toki wa, saʔkaː seɴɕɯ daʔtaɴ da jo.|0
|
| 33 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1047.wav|aiɕite irɯ. kono sakiːkanarɯ miɽai ga aɽoɯtomo, ima kono mɯne ni arɯ kimotɕi dake wa hommono da.|88
|
| 34 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10005710.wav|soɽe ga doɯ iɯ imi na no ka wa wakaɽanai.|49
|
| 35 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_2505.wav|omae wa, nani ka warɯi koto o ɕita wake dʑa nai. dʑibɯɴ no kimotɕi o oɽe ni tsɯtaeta dake da.|84
|
| 36 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_0269.wav|soɽe de mo, natsɯkaɕikɯ kandʑirɯ mono da na.|66
|
| 37 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk367.wav|ɕiɽaberɯ tsɯmoɽi nado naikeɽedo, ɕiɽabeta tokoɽo de, osoɽakɯ wa sɯisokɯ no iki o dezɯ, kakɯdʑitsɯ ni wa ɕiboɽi kiɽenaidaɽoɯ to iɯ koto daʔta.|3
|
| 38 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0374.wav|kowaɽeta ato ni ɕɯɯɽi no kanoɯsei mo aɽimasɯ.|63
|
| 39 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/10/10003350.wav|a. dʑibɯɴ no jɯme mo da.|49
|
| 40 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/005/S005_F_0011.wav|gomeɴ nasai, ɯmi ni saʔki ga ɯzɯmaiterɯkaɽa, mɯkaɕi no kɯse de ne.|58
|
| 41 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0566.wav|ɯɯɴ...de mo ɕimpai nai jo. kanaɽazɯ modorɯʔte, iʔteta dʑaɴ.|81
|
| 42 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_2281.wav|moʔto ɯne daɽi ɕitai desɯ...oːnaː no kisɯ o kɯdasai...|63
|
| 43 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_0028.wav|a, soɽe dʑa, hontoɯ ni, aʔpeiɽia ga, wataɕi to oanisaɴ no kodomo mitai desɯ ne.|83
|
| 44 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk163.wav|doɯki o ɕizɯmerɯ.|0
|
| 45 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_2140.wav|sonna ki wa maʔtakɯ ɕimaseŋkedo, oːnaka kɯrɯɕi sɯgite ɕini soɯ desɯ.|83
|
| 46 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_2639.wav|nakama ni miserɯ no? teɴɕoɴ agarɯɴ dakaɽa jaʔte mi nasaiʔte.|86
|
| 47 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk52.wav|aŋgai, dʑikakɯ wa nai no ka mo ɕiɽenai.|19
|
| 48 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1602.wav|zɯʔto tatɕiʔpanaɕi de kiːtetakaɽa tsɯkaɽeta joː.|87
|
| 49 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1709.wav|soɯ iɯ imi de wa, sendʑoɯgawaɽa çitagi wa, kawaʔta no de mo, koɯsei ɕita no de mo, modoʔta no de mo toɽikaeɕita no de mo nakɯ, maɕite, deɽeta no de mo doɽota no de mo nakɯ.|16
|
| 50 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0591.wav|dʑosei ga mɯtɕɯɯ ni naʔtɕaɯ kɯɽai kaʔko iː otokoʔte koto ne.|87
|
| 51 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/02/02001850.wav|tefɯda ga babanɯki ni mɯitetakaɽa, çiʔɕi ni naʔterɯɴ da jo ne. soɽe kɯɽai, wataɕi dʑa nakɯte mo daɽe de mo wakarɯ jo.|6
|
| 52 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk935.wav|dʑaː ɕoɯbɯ kaiɕi de iː na? soko sɯwaɽe jo.|16
|
| 53 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1452.wav|sono sɯki wa...donna sɯki?|80
|
| 54 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/unxkaedepuriUBfin-chunk48_DeepFilterNet3.wav|aitsɯ wa wataɕi ga moɽaɯ jo soɽe dʑaː mata!|20
|
| 55 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_card_300836/nagi_voice_300836_6_03.wav|seː no de, dʑampɯ!|43
|
| 56 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0002/ranko_mobamasu_0002_chunk3.wav|minna no ɽaibɯ ga kimaʔtaʔte kiːtaɴ desɯkedo, hontoɯ desɯ ka?mɯ, mɯɽhaʔhaː! kaŋki no koe ga kikoerɯ! me o todʑiɽeba, soko ni!|14
|
| 57 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk344.wav|nadeko ni taisɯrɯ ikaɽi no keʔka to ɕite.|7
|
| 58 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/003/S003_B_0139.wav|iʔteta jatsɯɽa, çiki hadʑimeta zo.|55
|
| 59 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/017/S017_F_0067.wav|moɯ iːja. dʑa, ima wa ammaɽi momerɯ na jo.|47
|
| 60 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk1454.wav|omotai no jo.|13
|
| 61 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0014/momoka_mobamasu_0014_chunk38.wav|oɕigototɕɯɯ wa, esɯkoːto ɕite kɯdasaimasɯ? iː? koɽe kaɽa wa, fɯkɯsoɯ ni wa ki o tsɯkai nasai.|11
|
| 62 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2780.wav|sendʑoɯgahaɽasaɴ ga, ima iʔtai nani o kaŋgaete irɯ no ka o kaŋgaemaɕita.|7
|
| 63 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk1207.wav|kiʔto niŋgeɴ no soɯzoɯ o zeʔsɯrɯ mono ni narɯdaɽoɯ.|16
|
| 64 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1928.wav|dokiʔto sɯrɯ koto iwanaide.|80
|
| 65 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0008/shiburin_mobamasu_0008_chunk12.wav|kiɽei na keɕiki ga mierɯ baɕo ni, kʲoɯmi nai? tɕiːsai koɽo, maitoɕi kazokɯ to iʔteta oki ni haiɽi no baɕo.|50
|
| 66 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3620.wav|çinai to iɯto, mae no ɕitɕɯɯ no.|66
|
| 67 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100125/usamin_voice_100125_3_01.wav|goɽaidʑoɯ, aɽigatoɯ gozaimaɕita! kite kɯɽeta minasaɴ ni, hai! iʔpai zɯtsɯ! motɕiɽoɴ, pɯɽodʲɯɯsaːsaɴ ni mo!|46
|
| 68 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100485/usamin_voice_100485_2_10.wav|ima wa, naite nai desɯ! de mo, ano toki no koto o omoidasɯto...rɯisei zɯɯɴ!|46
|
| 69 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1141.wav|ageaɕi o torɯ na, sonna tsɯmoɽi wa nai. ɯɴ, iː iɽozɯkai da na, soko wa hjoɯka ɕite mo iː.|88
|
| 70 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk2088.wav|çitohada nonɯkɯmoɽi o, miɽerɯ no desɯ.|7
|
| 71 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100343/usamin_voice_100343_1_09.wav|katakoɽi ga ɯso no joɯ ni kiete...! oteiɽe sɯɽeba, mada dʑɯɯ neɴ wa motɕi soɯ...|46
|
| 72 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuko_CGSS_ShinAido_Home_Room/syuuko_card_200086/syuukovoice_200086_1_05.wav|konna iɕoɯ made kitɕaʔte! aidorɯʔte kandʑi da jo ne! ɯfɯfɯ!|40
|
| 73 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_1968.wav|sɯki na çito ga soba niːte kɯɽerɯ dake de.|80
|
| 74 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2493.wav|gomeɴ, ataɕi wa pasɯ da naː.|95
|
| 75 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_1073.wav|fɯʔfɯʔfɯɴ, agehatɕaɴ sɯpeɕarɯ o misete jarɯ wa!|81
|
| 76 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_0817.wav|doɯ ɕite wataɕi ga taniɴ no akiɽameta jɯme o oɕitsɯkeɽaɽenakɯtɕa ikenaiɴ da. sonna mono o kakaete itaɽa, wataɕi made zasetsɯ sɯrɯ koto ni naɽi soɯ da. iː meiwakɯ da.|88
|
| 77 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk1099.wav|aɴ ni dokoɽo ka, tɕokɯsetsɯ iwaɽemaɕita. dairekɯto desɯ.|7
|
| 78 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_F_0069.wav|soɽe wa, haʔɕɯɯ ɽemme mo kawaɽanai to omoɯ. tɕiba, ibaɽaki, saitama, gɯmma, kakɯtɕi no haiɽeberɯ na jaŋkiː ga dete kɯrɯ koto wa, amaɽi nai to omoɯ.|25
|
| 79 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_0920.wav|wataɕi no ɕɯɯ ni mirʲokɯ wa aɽimaɕiɴ kaː—?|62
|
| 80 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0004/fumika__0004_chunk25.wav|dema ga kakarɯ no de...|1
|
| 81 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/26-MizukiTomo.wav|onegai ɕi maː sɯ!|20
|
| 82 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4484.wav|wakaɽijasɯkɯ niʔpoɴ go ni ɕite mita wa.|77
|
| 83 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0656.wav|warɯi...naɴ ka, konna koto ni naʔtɕimaʔta.|84
|
| 84 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0089.wav|eː to...a, kʲɯɯkoɯɕa, kagi kakaʔte...|76
|
| 85 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_1416.wav|doɯ ɕite kimitatɕi wa, kono ɽeʔɕa ni haiʔte iko, betsɯ koːdo o toʔte irɯɴ da?|96
|
| 86 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3617.wav|tɕoʔto, naɴ no hanaɕi ɕiterɯ no joː.|77
|
| 87 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_1661.wav|zɯrɯi...nemɯi...|67
|
| 88 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2576.wav|mizɯ ka.|4
|
| 89 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk699.wav|ja kimotɕi o jaite irɯʔte iɯ no ka, gikɯɽi.|19
|
| 90 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1542.wav|kitɕikɯ dʑa naiɕi ka!|29
|
| 91 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/072.wav|iː koto de mo aɽimaɕita? sɯɯpaː de tokɯbaiçiɴ geʔto ɕita to ka. iːɴ desɯ ka? naɽa, onegai ɕimasɯ.|20
|
| 92 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1532.wav|sempai mo wataɕi to onadʑi kimotɕi da to omoʔteta no ni.|70
|
| 93 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_01/Horie_Yui_01_chunk1557.wav|soɽe de iː no daɽoɯ ka?|0
|
| 94 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0877.wav|eŋgi no dʑiʔkoɯ wa pozitibɯ desɯ. desɯga, kɯoɽiti ni tsɯite wa fɯtoɯmei desɯ.|63
|
| 95 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk987.wav|ija, soɽe wa ɕiɽanaikedo.|4
|
| 96 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk163.wav|naɴ daʔte, kɯtɕinawasaɴ wa kamisama na no desɯkaɽa.|7
|
| 97 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04004090.wav|haha.|35
|
| 98 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1376.wav|ʔɴ? aː, ano ko ka. ɯɴ, oboeterɯ jo.|95
|
| 99 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0498.wav|sono bɯɴ, dʑibɯɴ no koto mo amaɽi hanasanai jo ne. kikeba oɕiete kɯɽerɯɴ daɽoɯkedo, kikanai hoɯ ga iː no ka na?|79
|
| 100 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_2572.wav|ɯɯɴ, maɕiɽo ga soɯ iɯ no wakarɯ joɯ ni narɯ ni wa, motɕiʔto ɯmakɯ naɽanaito dame kanʲaː.|95
|
| 101 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/fumika/fumika_mobamas/fumika_split/fumika__0014/fumika__0014_chunk82.wav|sanisawa sensei wa...|1
|
| 102 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk1770.wav|wataɕi to akɯma no naɽesome o.|0
|
| 103 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk212.wav|iɽaʔɕai, tsɯbasatɕaɴ.|19
|
| 104 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_3392.wav|sonna no fɯɕizeɴ da wa?|77
|
| 105 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0865.wav|soɽe o ɕiɽanai niŋgeɴ kaɽa, fɯɽeai no imi o çikidaɕite kɯɽeta çitotatɕi ga irɯkaɽa.|84
|
| 106 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk308.wav|godʑɯɯ kiɽo ɽaɕiː.|13
|
| 107 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_1559.wav|ɯmaɽete kono hoɯ, zɯʔto sono kɯgʲoɯ ni taiɕinonderɯ wataɕi ni taisɯrɯ ijami desɯ ka?|78
|
| 108 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0105.wav|wataɕi ga tsɯiteʔte mo, jakɯ ni tatanai wa ne.|81
|
| 109 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_cgss/chieri_card_100860/chieri_voice_100860_6_05.wav|gʲoɯsei kaɽa no, omotenaɕi desɯ!|24
|
| 110 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200063/kanade_voice_200063_6_04.wav|hontoɯ no wataɕi, misete agerɯ!|18
|
| 111 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_1097.wav|ija da! sonna iʔpoɯteki niːwaɽete mo nani ga naɴ do ka wakaɽanai jo!|90
|
| 112 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_201308/minami_voice_201308_2_02.wav|tɕanto keŋkoɯkotsɯ ga çiɽaiterɯɴ desɯ jo? tsɯbasa ga haete mo daidʑoɯbɯ na joɯ ni.|15
|
| 113 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mifune/mifune_cgss/mifune_card_201246/mifune_voice_201246_6_06.wav|jorɯ wa mada, hadʑimaʔta bakaɽi. koɽe dake dʑa, owaɽimaseɴ jo.|56
|
| 114 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0022.wav|seɴɕɯɯ made wa ne, keʔko�� hoŋki de kaŋgaeteta.|76
|
| 115 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0017/chie_mobamasu_0017_chunk110.wav|mahoɯ de kɯɽoɯba, sagasenai ka na?|24
|
| 116 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1667.wav|ano deai o kɯɽeta no ga oja no kʲoɯikɯ naɽa, warɯkɯ wa nai aidʑoɯ daʔta sɯ wa.|29
|
| 117 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_mobamas/ranko_mobamasu_0017/ranko_mobamasu_0017_chunk84.wav|waga tomo to.|14
|
| 118 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_0604.wav|koɯ iɯ no otɕitɕisaɴʔpokɯ de tɕɯki mainitɕi saɽerɯto komarɯkedo.|79
|
| 119 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2754.wav|kɯrɯma ni çikaɽete ɕindaɽaɕiː zo.|4
|
| 120 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1288.wav|osewa ni naʔterɯ tatɕiba dakaɽa, honto wa ammaɽiːenaiɴ dakedo neː.|76
|
stylekan/Data/mg_valid.csv
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/598c113f/wav/598c113f_109.wav|iɽaʔɕai! ɕɯɯgakɯ rʲokoɯ wa, tanoɕikato desɯ ka?|194
|
| 2 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/282cfa8c/wav/282cfa8c_1042.wav|ima wa magarɯ...itakaʔtakedo, magarɯ joɯ ni naʔtaɴ da jo.|16
|
| 3 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MRD12.wav|kakɯbetsɯ desɯ. pɯɽodʲɯɯsaː kaɽa itadakɯ, kono iʔpai...!|563
|
| 4 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/35d789d2/wav/35d789d2_138.wav|te wa soko de iːɴ desɯ ka? koɕi ni mawasanakɯte iːɴ desɯ ka?|51
|
| 5 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_2155.wav|hontoɯ da...|553
|
| 6 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cec410a1/wav/cec410a1_328.wav|dʑosei no ɕaɕiɴ ga noʔte irɯ saito o, ɕiteki na joɯto de etsɯɽaɴ ɕite itaɽaɕiː.|434
|
| 7 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6e22f5cd/wav/6e22f5cd_267.wav|soʔka, mada nomerɯ joɯ ni naʔta baʔkaɽi ka. kondo oanesaɴ to, baː de mo iʔte mirɯ?|236
|
| 8 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ad28b91b/wav/ad28b91b_1315.wav|dʑibɯɴ de mo mɯtɕa o iʔterɯ no wa wakaʔterɯ. daga asaçi dakaɽa kiːte mitaɴ da.|343
|
| 9 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100253/momoka_voice_100253_1_12.wav|fɯɯɯ...dʑibɯɴ no itaɽanasa ga, kɯjaɕiː...pɯɽodʲɯɯsaːtɕama...|489
|
| 10 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0253acb6/wav/0253acb6_869.wav|motomoto, koɯ ɕite gaʔkoɯ ni koɽaɽeɽeba, soɽe de mokɯhjoɯ taʔsei daʔta koto dakaɽa.|28
|
| 11 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1017.wav|aɽaɽa ɽike, naŋge ni fɯjɯɯ soɯ?|475
|
| 12 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/76981655/wav/76981655_0566.wav|mada hjakɯ maɴ kaitɕɯɯ sɯrɯ totɕɯɯ daɕi, mata kaisɯɯ wasɯɽeta ne. haiː—tɕi!|110
|
| 13 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/02153faa/wav/02153faa_484.wav|ɯɯɴ, bokɯ ga ɕirɯ kagiɽiːtsɯ mo konna kandʑi da na. hantsɯki mae ni tɕoʔto aɽe kimi no çi ga aʔtakedo, soɽe dake da na.|20
|
| 14 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_0673.wav|fɯto naŋge nakɯ, aɕi o tometa.|143
|
| 15 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/37c014a1/wav/37c014a1_0474.wav|soɯ ɕi te kɯɽerɯ to kaɴɕa sɯrɯ. moɯ iʔkai, mɯne o jɯɽaɕite agerɯ.|6
|
| 16 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0264.wav|otakɯ no ofɯrɯ o kaɕite itadakenai kaɕiɽa? osamɯ ɕibaɽakɯ sentoɯ kajoɯ wa mɯɽi dakaɽa.|345
|
| 17 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f6c4b7b2/wav/f6c4b7b2_0424.wav|dʑaː, paiɽoʔto aoikɯɴ de iːdʑa nai! nanni mo mondai nai wa!|439
|
| 18 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bf7b3aa8/wav/bf7b3aa8_457.wav|oniːtɕaɴ, tɕinami mo ufo kansokɯjoɯ no boɯeŋkʲo o kaɯ tame ni, mata meidosaɴ ni naʔte iː ka na?|277
|
| 19 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0f6fbea8/wav/0f6fbea8_0220.wav|oi, moɯ iː. omae no aiboɯ dʑimaɴ wa iːʔte no.|52
|
| 20 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1967ee53/wav/1967ee53_0238.wav|madʑo wa ne, hoɴ o jomɯ no ga sɯki naɴ daʔte.|71
|
| 21 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cc948b89/wav/cc948b89_1571.wav|fɯtsɯɯ no dʑiɕɯ kɯnɽeɴ naɽa ɯɽaniwa de jaɽeba iːdʑa nai.|423
|
| 22 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5d68aedf/wav/5d68aedf_0376.wav|daʔte, tomodatɕi naŋkaitaʔte nani mo kawaɽanai moɴ!|183
|
| 23 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e3ee19b2/wav/e3ee19b2_127.wav|toɯbɯɴ wa koko o tsɯkawasete moɽaoɯ ze. kiːtenai no ka?|370
|
| 24 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0871.wav|saiɕo kaɽa sono tsɯmoɽi. kikeɴ na ɽei dakaɽa, fɯtaɽi ni makaserɯ tsɯmoɽi wa nai.|385
|
| 25 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/05a45f91/wav/05a45f91_131.wav|negaɽakɯba ɕɯ ga ɕɯkɯfɯkɯ ɕi mamoʔte kɯdasaimasɯ joɯ ni.|70
|
| 26 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d19f294/wav/6d19f294_381.wav|desɯ ne. soɯ dekirɯ nante mitoːɕi, sɯkɯnakɯtomo wataɕi wa tateɽaɽenai desɯ.|189
|
| 27 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d250131/wav/6d250131_647.wav|kansai no oteɽa ni wa jokɯ arɯɽaɕiːkedo, koko no wa ɕiɽanai.|202
|
| 28 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_2513.wav|ɯ, koɽe, sɯgokɯ kimotɕiː— ka mo. sɯgoi, naɴ da ka hazɯkaɕiː oto da ne.|261
|
| 29 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk201.wav|ne fɯsokɯ ni naʔtɕaimasɯ...ɯɯ...|483
|
| 30 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100395/miku_voice_100395_2_05.wav|pːtɕaɴ ga odoɽokɯ kɯɽai! sekɯɕiː ni naʔte miserɯnʲa!|487
|
| 31 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2430.wav|ija jo, soɽe mo ija! wataɕi ga mae de, ɯɯ ga ɯɕiɽo, zɯʔto soɯ ɕite kitadʑa nai! anta wa, damaʔte wataɕi no ɯɕiɽo o tsɯite kɯɽeba iː no!|91
|
| 32 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0934.wav|jɯɯ wa so no...ɯwaki to ka wa ɕinai to omoʔterɯkedo...|449
|
| 33 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/90fa05fd/wav/90fa05fd_1203.wav|daga, soɯ toɽaerɯ no ga itɕibaɴ, fɯ ni otɕirɯ.|274
|
| 34 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_1309.wav|moɯ sono koɽo no kiokɯ wa hotondo naikeɽedo, wataɕi mo niŋgeɴ daʔta.|385
|
| 35 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/773a4156/wav/773a4156_2205.wav|moɯ iʔɕoɯ kakaʔte mo kaesenai ka mo ɕiɽenai. soɽe kɯɽai toko ni wa kaɴɕa ɕiterɯ.|214
|
| 36 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0b8ae160/wav/0b8ae160_0536.wav|a, ano...dʑɯɯdaiʔte, daidʑoɯbɯ naɴ desɯ ka?|94
|
| 37 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/302/S302_E_0006.wav|sɯgokaʔta.|532
|
| 38 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bca2cfac/wav/bca2cfac_1044.wav|moɕi ka ɕite, takɯ ja to wakaba iʔɕo daʔta no kai?|320
|
| 39 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_3235.wav|jaɽa moɯ, mada oʔkikɯ narɯ no? ma, tɕoʔto wa te kɯgeɴ ɕinasaɴ joː.|319
|
| 40 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0579.wav|soɯ ne, fɯkɯsɯɯ no dansei to kaŋkei o moʔte inai kagiɽi wa...|385
|
| 41 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1ed99743/wav/1ed99743_557.wav|ɴ. kimitatɕi no bɯtɕiageta, daikʲɯɯɕɯɯsl koɯsoɯ. bokɯ wa, aɽe ni me ga arɯ to mikonde irɯ.|1
|
| 42 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201316/karen_cgss_voice_201316_2_01.wav|nita joɯ na fɯkɯ de mo kaŋkei nai! fɯkɯ wa hoɕiːkaɽa kaɯ! idʑoɯ!|488
|
| 43 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_4015.wav|tɕoʔto josanteki ni ne, kitai bɯmoɴ no hoɯ de mo joteigai no josaɴ ga kakaɽi soɯ na no jo.|319
|
| 44 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk297.wav|geikaiʔte no wa, kamisama ni toʔte meataɽaɕiːɴ da jo. ija, meataɽaɕi ka wa neː ka, kawaɽibaɽi ɕineː ze.|473
|
| 45 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5e5993c5/wav/5e5993c5_615.wav|ne, neː, moɯ iʔkai kisɯ ɕite.|120
|
| 46 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_521.wav|fɯɴ, itsɯ mo nigijaka na komɯsɯme da ze.|5
|
| 47 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100410/momoka_voice_100410_4_02.wav|dakedo, ima wa ɯtaʔte odoʔte, tanoɕinde! haɕitanai ka mo to omoɯ kɯɽai, oːkina koe de waɽaʔte...fɯfɯ!! ojoɯgi wa warɯkɯte mo, tanoɕisa de iʔpai desɯ zo!|489
|
| 48 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/22d1fa2f/wav/22d1fa2f_362.wav|dʑoɯdaɴ da, oɽe wa haneda ni hanaɕi ga arɯɴ da jo.|84
|
| 49 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1068.wav|soɽe ni, ima wa kono fɯkɯ igai ni kigae ga aɽimaseɴ no de.|174
|
| 50 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ee093a4f/wav/ee093a4f_0555.wav|somosomo anata no koɯgi no ɕikata ga ki ni haiɽimaseɴ. iedeʔte naɴ desɯ ka? zeʔsaɴ iedetɕɯɯ no wataɕi ga ieta giɽi de wa aɽimaseŋga, ɕoɯgakɯsei desɯ ka?|371
|
| 51 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9ee921f6/wav/9ee921f6_0012.wav|soɯ naɴ desɯ, çito o atsɯmejoɯ to sendeɴ ɕite irɯ mitai de.|347
|
| 52 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bb6ac6f1/wav/bb6ac6f1_1043.wav|jɯɯkotɕaɴ seidʑɯɴha da jo! baɽibaɽi ɯki dʑa nai?|263
|
| 53 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_1022.wav|moɯ, sonna miɽai mo kimaseɴ jo.|284
|
| 54 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/fa4704bf/wav/fa4704bf_119.wav|konna dʑikaɴ ni konna baɕo ni jobidasɯ nante, johodo kikaɽetakɯ nai hanaɕi na no ne.|387
|
| 55 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9d33dced/wav/9d33dced_811.wav|ɕikaɕi ɽogɯiɴ ɕite irɯ idʑoɯ, fɯtatsɯ no sekai de iɕiki wa tsɯnagaʔte irɯ.|275
|
| 56 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201055/kanade_voice_201055_1_11.wav|aɽa, moɯ çitotsɯ? pɯɽodʲɯɯsaːsaɴ ga onedaɽi nante. fɯfɯ!!|482
|
| 57 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_2379.wav|tɕigaɯ, kaʔtaɽi make taɽi sɯrɯ no wa ataɕi!|141
|
| 58 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8f8acabb/wav/8f8acabb_426.wav|dʑasɯko wa bɯ tsɯ zoɯ to ka e bakaɽi ki ni haiʔteta ne. soɽe to kimono! hommono no maikosaɴ mita toki no dʑasɯko no teɴɕiɴ no agaɽi gwai, sɯgokaʔta!|244
|
| 59 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/74eb72c7/wav/74eb72c7_0253.wav|so, sonna koto nai jo, gɯɯzeɴ dʑa nai ka na.|184
|
| 60 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c593ed00/wav/c593ed00_0583.wav|moɯ, hoʔtoite. koko de fɯjɯ o okosɯkaɽa.|466
|
| 61 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/a93da23d/wav/a93da23d_0512.wav|maː, nindʑa ɕɯʔɕiɴ no itɕizokɯ de arɯ koto wa çitei ɕimaseŋga.|317
|
| 62 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e5d53ec4/wav/e5d53ec4_049.wav|neː, tanteisaɴ ɕiʔterɯ? koko saikiɴ, kanda de jakeɴ no çigai ga aitsɯiteta no.|460
|
| 63 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3c58f1c4/wav/3c58f1c4_1376.wav|dʑitsɯ wa wataɕi, paːɴ!ʔte oːkina oto ga sɯrɯ, ɯtɕiage hanabi ga nigate naɴ desɯ.|117
|
| 64 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_0621.wav|onadʑi dʑa nai jo. kotoɕi wa, ɕifɯki otokosaɴ to ka jobɯ joteinaɴ dakaɽa.|306
|
| 65 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk619.wav|mɯɽidʑi wa ɕinaikeɽedo, nani ka aʔta toki, çitoɽi de naɴ to ka ɕijoɯ to omoctɕa dame jo. anata wa imada ni sono keikoɯ ga tsɯjoikaɽa.|475
|
| 66 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk1483.wav|nado to...|477
|
| 67 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_0845.wav|moɯ iʔkai dake kikɯ wa? minna wa doɯ?|319
|
| 68 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8e1072e6/wav/8e1072e6_0195.wav|soɽe o keʔte koko ka jo? soɽe mo ataɕi ni ɽenɽakɯ naɕi ka jo?|247
|
| 69 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_02/Kamiya_Hiroshi_02_chunk905.wav|ɕinseki ne.|478
|
| 70 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_240.wav|naː, nosejasɯi dʑaɽo.|5
|
| 71 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_0187.wav|oːgeza desɯ ne, taiɕita koto aɽimaseɴ. mada ɕiɽabeterɯ dake desɯɕi.|284
|
| 72 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/72921df9/wav/72921df9_190.wav|kao to namae, neː?|228
|
| 73 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3ec57102/wav/3ec57102_137.wav|wataɕi no tame ni, oɕokɯdʑi o tɕɯɯdaɴ sɯrɯ çitsɯjoɯ wa aɽimaseɴ. doɯzo, oki ni nasaɽazɯ.|144
|
| 74 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/14_arisu__0015_(Vocals)/14_arisu__0015_(Vocals)_chunk153.wav|baɽentaiɴ de mo, itsɯ mo toːɽi desɯ.|495
|
| 75 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/d39532a8/wav/d39532a8_1523.wav|dakaɽa kaeʔte kite kɯɽetetaɴ daʔte.|384
|
| 76 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_0310.wav|nʲa haha! jaʔpa sempai omoɕiɽoiʔsɯ!|553
|
| 77 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4ded9fa1/wav/4ded9fa1_0342.wav|fɯfɯ, de wa toɽiaezɯ, hoːpɯ to jɯki no meija no tame ni, fɯtsɯɯ no fɯkɯ da to ɕindʑite agemaɕoɯ ka.|129
|
| 78 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36ea135b/wav/36ea135b_2487.wav|ɯɯɴ, jaɕiɽokɯɴ no kaŋgae mo ataʔterɯ. nozomi sempai wa, kanzeɴ ni wasɯɽetɕaʔterɯ wake dʑa nai.|60
|
| 79 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_0662.wav|ano mɯsɯme wa jokɯ wakaɽaɴ na.|261
|
| 80 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f4169f28/wav/f4169f28_193.wav|dakaɽa soɯ iɯ imi dʑa nakɯte.|386
|
| 81 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301206/jou_mika_voice_301206_1_10.wav|me, tsɯmɯʔte agejoɯ ka? daʔte hoɕiːdeɕoɯ? kisɯ matɕigao?|485
|
| 82 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2594.wav|to, kamawazɯ ni hanaɕi o tsɯzɯketa.|477
|
| 83 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f04ee070/wav/f04ee070_0345.wav|kaɯrɯkosaɴ to baʔkaɽi nakajasa soɯ ni ɕitɕaʔte soɯ.|461
|
| 84 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_0264.wav|masaja, koɽe de kʲoɯ no ɽeɴɕɯɯ wa oçiɽaki da jo neː.|141
|
| 85 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/631b0413/wav/631b0413_015.wav|jokaʔtaɽa, kʲoɯ mo iʔɕo ni, iː desɯ ka?|151
|
| 86 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/efb922ca/wav/efb922ca_0770.wav|ɯfɯfɯ, baːrɯ o fɯɽimawasɯkaɽa, moɯ tɕoʔto ɯɕiɽo ni sagaʔtete.|409
|
| 87 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0480.wav|dʑiʔsai no nami wa moʔto dʑoɯge ɯndoɯ mo hageɕiːɴ dakaɽa.|449
|
| 88 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/sakura_moyu/30/30000020.wav|kanodʑo wa, dʑibɯɴ o gisei ni ɕi, bokɯɽa no koto o, jorɯ no sekai o mamoɽoɯ to ɕite irɯ.|518
|
| 89 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1288.wav|de mo, kaɽadatɕɯɯ boɽoboɽo de, iki o s��rɯ no ga jaʔto deɕita. koko de wataɕi wa owaɽi naɴ da to, naɴ to nakɯ, wakaɽimaɕita.|174
|
| 90 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2704.wav|aɽe? wataɕi no koto ɕiʔterɯ?|91
|
| 91 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0339.wav|wataɕi mo itɕi do çinosakasaɴ no tokoɽo ni kao o daɕite mirɯ wa. kanodʑo ga ɕiŋgakɯ ɕita toki no fɯkɯzatsɯ na dʑidʑoɯ to ka, koʔtɕi kaɽa setsɯmei ɕite oita hoɯ ga iː to omoɯɕi.|345
|
| 92 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3e02a4dc/wav/3e02a4dc_916.wav|dʑa a, tsɯitectɕa ikenaiɴ desɯ kaː?|213
|
| 93 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk817.wav|sasɯga ni sɯgɯ ni nadekoɯ nadekoɯ da to, kampa ɕita joɯ desɯ. ma, iʔte ɕimaeba, maegami ga nakɯnaʔta dake desɯkaɽa, jokɯ miɽeba wakarɯɴ deɕoɯ ne.|473
|
| 94 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/00163dc9/wav/00163dc9_1842.wav|aɽe wa, daɽe kaɽa no okɯɽimono daʔta no ka naʔte.|69
|
| 95 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/25714f7a/wav/25714f7a_1007.wav|maː soɯ iɯnaɽa damaʔterɯsɯkedo.|95
|
| 96 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1292.wav|heː, jaʔte mijoɯ. eʔto,ɴ?|474
|
| 97 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk1453.wav|ɯmaɽeotɕita tsɯgi no ɕɯŋkaɴ, wagahai wa wagahai o ɯmiotoɕita botai o mita.|475
|
| 98 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c81c2b4d/wav/c81c2b4d_231.wav|ʔte ka, seitoɯ ɽitsɯ ga takai no wa, hatsɯnetɕaɴ ga adobaisɯ ɕite kɯɽerɯkaɽa de...a, dʑikaɴ daidʑoɯbɯ?|400
|
| 99 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/598c113f/wav/598c113f_109.wav|iɽaʔɕai! ɕɯɯgakɯ rʲokoɯ wa, tanoɕikato desɯ ka?|194
|
| 100 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/282cfa8c/wav/282cfa8c_1042.wav|ima wa magarɯ...itakaʔtakedo, magarɯ joɯ ni naʔtaɴ da jo.|16
|
| 101 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MRD12.wav|kakɯbetsɯ desɯ. pɯɽodʲɯɯsaː kaɽa itadakɯ, kono iʔpai...!|563
|
| 102 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/35d789d2/wav/35d789d2_138.wav|te wa soko de iːɴ desɯ ka? koɕi ni mawasanakɯte iːɴ desɯ ka?|51
|
| 103 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_2155.wav|hontoɯ da...|553
|
| 104 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cec410a1/wav/cec410a1_328.wav|dʑosei no ɕaɕiɴ ga noʔte irɯ saito o, ɕiteki na joɯto de etsɯɽaɴ ɕite itaɽaɕiː.|434
|
| 105 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6e22f5cd/wav/6e22f5cd_267.wav|soʔka, mada nomerɯ joɯ ni naʔta baʔkaɽi ka. kondo oanesaɴ to, baː de mo iʔte mirɯ?|236
|
| 106 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ad28b91b/wav/ad28b91b_1315.wav|dʑibɯɴ de mo mɯtɕa o iʔterɯ no wa wakaʔterɯ. daga asaçi dakaɽa kiːte mitaɴ da.|343
|
| 107 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100253/momoka_voice_100253_1_12.wav|fɯɯɯ...dʑibɯɴ no itaɽanasa ga, kɯjaɕiː...pɯɽodʲɯɯsaːtɕama...|489
|
| 108 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0253acb6/wav/0253acb6_869.wav|motomoto, koɯ ɕite gaʔkoɯ ni koɽaɽeɽeba, soɽe de mokɯhjoɯ taʔsei daʔta koto dakaɽa.|28
|
| 109 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1017.wav|aɽaɽa ɽike, naŋge ni fɯjɯɯ soɯ?|475
|
| 110 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/76981655/wav/76981655_0566.wav|mada hjakɯ maɴ kaitɕɯɯ sɯrɯ totɕɯɯ daɕi, mata kaisɯɯ wasɯɽeta ne. haiː—tɕi!|110
|
| 111 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/02153faa/wav/02153faa_484.wav|ɯɯɴ, bokɯ ga ɕirɯ kagiɽiːtsɯ mo konna kandʑi da na. hantsɯki mae ni tɕoʔto aɽe kimi no çi ga aʔtakedo, soɽe dake da na.|20
|
| 112 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_0673.wav|fɯto naŋge nakɯ, aɕi o tometa.|143
|
| 113 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/37c014a1/wav/37c014a1_0474.wav|soɯ ɕi te kɯɽerɯ to kaɴɕa sɯrɯ. moɯ iʔkai, mɯne o jɯɽaɕite agerɯ.|6
|
| 114 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0264.wav|otakɯ no ofɯrɯ o kaɕite itadakenai kaɕiɽa? osamɯ ɕibaɽakɯ sentoɯ kajoɯ wa mɯɽi dakaɽa.|345
|
| 115 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f6c4b7b2/wav/f6c4b7b2_0424.wav|dʑaː, paiɽoʔto aoikɯɴ de iːdʑa nai! nanni mo mondai nai wa!|439
|
| 116 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bf7b3aa8/wav/bf7b3aa8_457.wav|oniːtɕaɴ, tɕinami mo ufo kansokɯjoɯ no boɯeŋkʲo o kaɯ tame ni, mata meidosaɴ ni naʔte iː ka na?|277
|
| 117 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0f6fbea8/wav/0f6fbea8_0220.wav|oi, moɯ iː. omae no aiboɯ dʑimaɴ wa iːʔte no.|52
|
| 118 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1967ee53/wav/1967ee53_0238.wav|madʑo wa ne, hoɴ o jomɯ no ga sɯki naɴ daʔte.|71
|
| 119 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cc948b89/wav/cc948b89_1571.wav|fɯtsɯɯ no dʑiɕɯ kɯnɽeɴ naɽa ɯɽaniwa de jaɽeba iːdʑa nai.|423
|
| 120 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5d68aedf/wav/5d68aedf_0376.wav|daʔte, tomodatɕi naŋkaitaʔte nani mo kawaɽanai moɴ!|183
|
| 121 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_05.wav|sɯgata mo, koe mo, namae sae kawaʔte mo, tɕitose ga kɯɽeta kiboɯ wa, kienai.|480
|
| 122 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_06.wav|koɯfɯkɯ wa doko ni de mo koɽogaʔterɯdeɕo? wataɕi wa, dakʲoɯ ɕitakɯ naikedo?|480
|
| 123 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_01.wav|kɯɽaŋkɯ aːʔpɯ! teɴɕi no oɕigoto ɕɯɯrʲoɯ! niŋgeɴ ni modoɽoʔka naːɴ!|480
|
| 124 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_02.wav|bʲoɯdoɯ de itainaɽa, mɯkaɴɕiɴ de irɯ koto. katamɯkerɯ saki ga nakɯnactɕoɯ.|480
|
| 125 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_12.wav|kiboɯ wa megɯrɯ. tatoe donna ni sɯrɯdokɯ, itakɯtomo. soɽe o anata ga ɕimeɕite kɯɽeta.|480
|
| 126 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
|
| 127 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_07.wav|zensei to ka ɽaisei to ka, doɯ de mo iː. daʔte soɽe wa, ataɕi dʑa nai moɴ ne.|480
|
| 128 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_03.wav|owaɽi o koete.|480
|
| 129 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_01.wav|temɽiɴ wa itsɯ daʔte tsɯɽiaʔterɯ. kiboɯ to doɯtoɯ no ɕitsɯrʲa o motsɯ mono ga nani ka, ɕiʔterɯ? soɽe wa ne, zetsɯboɯ da jo.|480
|
| 130 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_02.wav|koɽe ga, owaɽi? naɽa, ɯɽeɕiː. daʔte, anata to no, tsɯgi no jakɯsokɯ.|480
|
| 131 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_11.wav|tsɯgi deaerɯ toki, wataɕitatɕi wa çitoɽi de wa nai, fɯfɯ, koɽe wa jogeɴ.|480
|
| 132 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_12.wav|josoɯ dekinai kimitatɕi de ite jo. tensai ɕikitɕaɴ no josoɯ o, ɯɽagiɽi tsɯzɯkete.|480
|
| 133 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
|
| 134 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_04.wav|teɴɕi no komoɽi ɯta o.|480
|
| 135 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_11.wav|kimi o ɽakɯeɴ e to izanaʔte agerɯ joɴ! ima o tanoɕimi kiʔta ato o, tɕitose tɕanto.|480
|
| 136 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_04.wav|osoɽa no ɯeʔte, donna nioi naɴ daɽoɯ ne? ɕiɽeɴ de, mɯkiɕitsɯ de, kaoɽi mo nai no ka na?|480
|
| 137 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_05.wav|tokɯbetsɯ ni narɯ no.|480
|
| 138 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_06.wav|sajoɯnaɽa, tɕitose. wataɕi ni owaɽi o oɕiete kɯɽeta, tokɯbetsɯ na çito.|480
|
| 139 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
|
| 140 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_05.wav|çito ni kaŋka saɽerɯ no, betsɯ ni, ɕiɽoi dʑa nai jo. meʔta ni sono aite ga inai dake.|480
|
| 141 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_3_01.wav|çito no inotɕi ni kiseɴ ga nai nante, sonna no kiɽeigoto. sɯkɯɯ çito wa eɽabɯɕi, dʑibɯɴ no inotɕi nante taika de saɕidasenai. de mo kimi wa, saɕidaɕite ɕimai soɯ da jo ne. oçito joɕi dakaɽa.|480
|
| 142 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_04.wav|moɴ no soto e okɯɽidasenakɯte, gomeɴ nasai. moɴ no saki no koto, kondo oɕiete ne.|480
|
| 143 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_08.wav|ɕigo no sekai de tanoɕikɯ jarɯɴ dʑa nakɯte, ima o kiɽitorɯ, fɯɴ, dekirɯ jo.|480
|
| 144 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_03.wav|saigo wa nai no desɯ. negaeba doko made mo, fɯtaɽi no tokɯbetsɯ wa tsɯzɯkɯkaɽa.|480
|
| 145 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_07.wav|arɯdʑi ni wa kaɴɕa o, wataɕi no tokɯbetsɯ o minogaɕite kɯɽeta, anata to, deaeta.|480
|
| 146 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_10.wav|hakoni wa neː, wataɕi wa iɽanai na. teitaiʔte, dʑiɴrɯi to aiɕoɯ ga warɯiɴ daʔte.|480
|
| 147 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_03.wav|kempiɴ, dʑiʔkeɴ, a, ɕijokɯɴ ga todoitetaɴ daʔta, ɽabo ni komaɽoːʔto.|480
|
| 148 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
|
| 149 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_5_01.wav|nakanaide, anata no çitomi ga, namida de nagaɽete ɕimaɯ, soɽe wa, koɽe kaɽa kiboɯ dake o mitsɯkerɯ tame no oɯseki, wataɕi o, seɴ neɴ saki no miɽai de mo, mitɕibiːte kɯɽerɯ çikaɽi, saigo wa, hohoemi de.|480
|
| 150 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_09.wav|fɯɽeɽaɽenakɯte mo wakarɯ, atatakasa, daʔte, zɯʔto, anata ni aʔta.|480
|
| 151 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_10.wav|zɯʔto omoʔteta, kiɽei na çitomi no iɽo daʔte, inotɕi no iɽo, naɴ da ne.|480
|
| 152 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_03.wav|kimi ga ima te ni moʔterɯ no wa, kiboɯ? soɽe to mo zetsɯboɯ? doʔtɕi de mo iː ka. wataɕi wa moɯ te niːɽeterɯkaɽa, hoɕikaʔtaɽa wakete agerɯ jo. katahoɯ wa, kimi ni moɽaʔta mono dakedo.|480
|
| 153 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_01.wav|namida. kanaɕikɯte, tsɯɽakɯte, kɯrɯɕiː mono. koɽe kaɽa no anata ni wa, çitsɯjoɯ nai mono.|480
|
| 154 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_09.wav|tobɯ no mo keʔkʲokɯ dʑibɯɴ no tɕikaɽa daɕi, de mo tsɯkaɽetɕaɯɕi. hakonde moɽaɯ no ga itɕibaɴ!|480
|
| 155 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_08.wav|sɯbete wa bʲoɯdoɯ, koɯhei ni fɯɽisosogɯ, ai mo, inoɽi mo, hontoɯ wa, anata ni daʔte.|480
|
| 156 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_02.wav|kotoba ni sɯrɯto nante dʑiːpɯ naɴ daɽoɯ ne. konna mono ni, ataɕitatɕi wa fɯɽimawasaɽete, çiʔɕi ni naʔterɯ, me ni mienai sei de, te ni ɕite mo dʑiʔka ga naikaɽa, jokei ni ne.|480
|
| 157 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_08.wav|kaisɯiʔte, konna ni ɕio kaɽakaʔta?, naitenai jo, kantɕigai, kiɴɕi.|480
|
| 158 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_07.wav|jorɯ no sampo, okoɽanaide ne. iʔɕo naɽa iː deɕo? ɕimpai naɽa, mihaʔtete.|480
|
| 159 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_05.wav|kikaɽetenakɯte jokaʔta. moɕi ka ɕitaɽa, toʔkɯ ni ɕiɽaɽeteta ka mo dakedo.|480
|
| 160 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_03.wav|moɕi nigete mo, doko made mo oʔte kɯrɯ ki deɕoɯ? jada jada, oni goʔko wa tanoɕiːkedo, zɯʔto wa tsɯkaɽetɕoɯ. dakaɽa, iʔɕo niːrɯ jo. toːi owaɽi no çi made.|480
|
| 161 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_02.wav|anna ni fɯkakɯ, soko ga naiʔte kandʑiteta no ni, sonna koto, nakaʔtaɴ da.|480
|
| 162 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_06.wav|kiɽei na ɕigikata nante nai, ka, soɽe de mo, jɯme kɯɽai mite itakaʔta jo.|480
|
| 163 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_10.wav|nɯɽeterɯ jo, iː no? sonna koto mo ki ni naɽanai kɯɽai, hoŋki de.|480
|
| 164 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_07.wav|otoɕi joɽiːki kiɽeterɯ, oːwatesɯgi, zeɴrʲoɯ da jo neː, soɯ iɯ tokoɽo.|480
|
| 165 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_05.wav|kimi wa itsɯ mo itsɯ mo, ataɕi o nigaɕite kɯɽenai jo ne. doko iʔte mo sa.|480
|
| 166 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_02.wav|daɽe mo kaɽe mo ga ɽisoɯ o oɕitsɯketerɯ. kimi mo soɯ, tɕitose tɕaɴ daʔte. soɽe de mo kimitatɕi wa, ɽisoɯ dʑa nai wataɕi de mo, tsɯkiaoɯ to ɕiterɯ.|480
|
| 167 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
|
| 168 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_08.wav|taorɯ kaisanakʲa, wataɕi no koto o wasɯɽeɽaɽenakɯ narɯ koɯsɯi, pɯɕɯ!!|480
|
| 169 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_04.wav|ɯmi ni ɕizɯmɯki wa, fɯɯɴ. saː, doɯ daʔta ka naː, oki ni nagasaɽetɕaʔta mitai.|480
|
| 170 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_12.wav|tsɯmetai, kaɽa da no ɕiɴ ga hjoɯteŋka mitai, dakaɽa, tamete, donna hoɯhoɯ de mo.|480
|
| 171 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
|
| 172 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_02.wav|kono ato wa motɕiɽoɴ omimai da joɴ! jowajowa kʲɯɯketsɯkisaɴ no tokoɽo ne.|480
|
| 173 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_3_01.wav|dʑɯndo no takai mono wa, soɽe dake moɽoiɴ da jo. joɽokobi mo, kanaɕimi mo, zembɯ zembɯ, kowaɽetɕaɯ. dakaɽa çito wa, mazaʔte ɯme aɯ no. kowaɽete ɯmaɽeta, sɯkima o.|480
|
| 174 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_5_01.wav|naɴ de, konna tokoɽo ni, wazawaza kita no? iː ko wa nerɯ dʑikaɴ de, ɕikitɕantatɕi wa, mite no toːɽi warɯi ko de, fɯfɯ, fɯɕigi, hoɴɕiɴ ɕitɕoʔta. kimi wa, okoʔterɯ no ni.|480
|
| 175 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
|
| 176 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_03.wav|gps o tsɯketeɽeba mendoɯ na ɽenɽakɯ wa iɽanai ka mo neːnʲɯɯ, kaɴɕi ɕakai.|480
|
| 177 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_12.wav|itsɯ mo doːɽi de gaʔkaɽi, ɕitenai? fɯɯɴ, itsɯ mo no ataɕi de iːɴ da.|480
|
| 178 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_01.wav|kono dʑiki no ɯmiʔte, zenzeɴ samɯkɯ naiɴ da neː. mɯɕiɽo nama noɽokɯte, betobetoː.|480
|
| 179 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_01.wav|fɯtsɯɯ dʑa nai ataɕi wa, iɽanakaʔta. tokɯbetsɯ na kagakɯɕa dʑa nai ataɕi wa, papa no ɕikai kaɽa hazɯɽeta. kawaiː dake dʑa nai ataɕi wa, aidorɯ dʑa iɽaɽenai.|480
|
| 180 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_01.wav|mabɯɕiː jo. sonna çikaɽi ni ateɽaɽetaɽa, toketenakɯ nactɕaɯ.|480
|
| 181 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_04.wav|teɴɕi no komoɽi ɯta o.|480
|
| 182 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_11.wav|aː, kɯʔtsɯiterɯto aʔtakaː. koɽe wa kimi no, jasaɕisa no ɯndo ka na?|480
|
| 183 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_06.wav|kaketsɯketa no? tɕitose tɕaɴ mo, kimi mo. doɯ ɕite soɯ, oseʔkai.|480
|
| 184 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_03.wav|modoʔte kitɕaʔta, çikaɽi no sekai, konna ni, aʔtakakaʔtaɴ da ne.|480
|
| 185 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_09.wav|ɯwa! mabɯɕiː! moʔto haʔkoɯrʲoɯ o osaeteː! dʑiɴrɯi ni wa mada hajai!|480
|
| 186 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_09.wav|manɯke na kao, ɕiterɯ, wataɕi mo? fɯfɯʔ, okaɕiː, aidorɯ na no ni ne.|480
|
| 187 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_10.wav|ameɽika dʑikomi no sɯieihoɯ de, tɕitose tɕaɴ no çitoɽi ja fɯtaɽi, ɕizɯmɯ—!|480
|
| 188 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
|
| 189 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_05.wav|tokɯbetsɯ ni narɯ no.|480
|
| 190 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_03.wav|owaɽi o koete.|480
|
| 191 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_04.wav|heɴ na no? ɯmi wa, wataɕi o sɯikonde kɯɽerɯ hazɯ daʔta no ni, kʲoçi ɕite.|480
|
| 192 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_11.wav|ɕikaɽaɽerɯ no wa kiɽai. de mo, kʲoɯ wa, ɯɯɴ, naɴ de mo nai jo.|480
|
| 193 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine01.wav|haha, çiʔɕi da ne. ataɕi naŋka no tame ni. sonna toko hadʑimete mita. sonna fɯɯ ni, koe o aɽagerɯ toko. itsɯ mo itsɯ mo, kimi wa daɽe ka no tame ni. honto, heɴ na no.|480
|
| 194 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine02.wav|aʔ, kizɯtsɯita ɕikitɕaɴ o okoʔte kita çito da! ɽi fɯdʑiɴ! taoɽeta tɕitosetɕaɴ o tasɯketa no ni!, ma, ɯmi ni sasoʔta no wa—taɕi dakedo.|480
|
| 195 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine03.wav|ɯɯɴ, naɴ daɽoɯ, jokɯ wakannai. kietakaʔta no ka na. kʲɯɯ ni, doɯ de mo jokɯ nactɕaʔta. kimi to tɕitosetɕaɴ ga, naɴ ka, toːkɯte.|480
|
| 196 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine04.wav|"mɯmɯmɯ, kimi made soɯ jɯɯ koto iɯ. "" haihai, samiɕikaʔtandesɯɯ. ɯ eːɴ, kanaɕikaʔta joː kamaʔte kɯɽenakɯte itɕi ."" koɽe de iː?"|480
|
| 197 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|aː, iː jo. hanɽoɴ wa motometenai. ataɕi ga hoɕiː no wa, kampeki na haɴɕoɯ. kimi no kasetsɯ o ɕoɯmei ɕite misete, owaɽi no çi made ni.|480
|
| 198 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|kiboɯ wa—rɯ. akaɽi wa, kiʔto kie taɽi ɕinai. anata ga wataɕi ni mitɕi o ɕimeɕite kɯɽetakaɽa, kiʔto majowanaide mezaserɯ. owaɽi no saki no, çikaɽi o.|480
|
stylekan/Data/mg_valid.txt
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1b74d271/wav/1b74d271_045.wav|erɯfʲɴ no rɯɴ o tsɯkaʔte, anata ni jasaɕiː sekai o tsɯkɯɽinaoseba iː no.|37
|
| 2 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_1309.wav|moɯ sono koɽo no kiokɯ wa hotondo naikeɽedo, wataɕi mo niŋgeɴ daʔta.|385
|
| 3 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6e22f5cd/wav/6e22f5cd_267.wav|soʔka, mada nomerɯ joɯ ni naʔta baʔkaɽi ka. kondo oanesaɴ to, baː de mo iʔte mirɯ?|236
|
| 4 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201055/kanade_voice_201055_1_11.wav|aɽa, moɯ çitotsɯ? pɯɽodʲɯɯsaːsaɴ ga onedaɽi nante. fɯfɯ!!|482
|
| 5 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_0264.wav|masaja, koɽe de kʲoɯ no ɽeɴɕɯɯ wa oçiɽaki da jo neː.|141
|
| 6 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/22d1fa2f/wav/22d1fa2f_362.wav|dʑoɯdaɴ da, oɽe wa haneda ni hanaɕi ga arɯɴ da jo.|84
|
| 7 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/miku/miku_cgss/miku_card_100395/miku_voice_100395_2_05.wav|pːtɕaɴ ga odoɽokɯ kɯɽai! sekɯɕiː ni naʔte miserɯnʲa!|487
|
| 8 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_1574.wav|gomeɴ nasai, neːtɕaɴ no iɯ toːɽi desɯ.|306
|
| 9 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/a0fd12d7/wav/a0fd12d7_2005.wav|kaɽeɽa kanodʑoɽa no soɯɕitsɯ kaɴ wa, ika bakaɽi ka. ɽoʔkɯnaʔtosama o ɯɕinaʔta waɽewaɽe idʑoɯ no kanaɕimi no eɴ ni, irɯ no de wa naideɕoɯ ka.|333
|
| 10 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2430.wav|ija jo, soɽe mo ija! wataɕi ga mae de, ɯɯ ga ɯɕiɽo, zɯʔto soɯ ɕite kitadʑa nai! anta wa, damaʔte wataɕi no ɯɕiɽo o tsɯite kɯɽeba iː no!|91
|
| 11 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1288.wav|de mo, kaɽadatɕɯɯ boɽoboɽo de, iki o sɯrɯ no ga jaʔto deɕita. koko de wataɕi wa owaɽi naɴ da to, naɴ to nakɯ, wakaɽimaɕita.|174
|
| 12 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/631b0413/wav/631b0413_015.wav|jokaʔtaɽa, kʲoɯ mo iʔɕo ni, iː desɯ ka?|151
|
| 13 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/Karen/karen_cgss/karen_cgss_card_201316/karen_cgss_voice_201316_2_01.wav|nita joɯ na fɯkɯ de mo kaŋkei nai! fɯkɯ wa hoɕiːkaɽa kaɯ! idʑoɯ!|488
|
| 14 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_3235.wav|jaɽa moɯ, mada oʔkikɯ narɯ no? ma, tɕoʔto wa te kɯgeɴ ɕinasaɴ joː.|319
|
| 15 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f04ee070/wav/f04ee070_0345.wav|kaɯrɯkosaɴ to baʔkaɽi nakajasa soɯ ni ɕitɕaʔte soɯ.|461
|
| 16 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/79b0d13c/wav/79b0d13c_437.wav|ɴ? ɕitsɯ wa, saʔki kaɽa ɽe no kimotɕi josa soɯ na kao o mitetaɽa...kaɽada ga, atsɯkɯ naʔte...|267
|
| 17 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/sakura_moyu/30/30000020.wav|kanodʑo wa, dʑibɯɴ o gisei ni ɕi, bokɯɽa no koto o, jorɯ no sekai o mamoɽoɯ to ɕite irɯ.|518
|
| 18 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1017.wav|aɽaɽa ɽike, naŋge ni fɯjɯɯ soɯ?|475
|
| 19 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3e02a4dc/wav/3e02a4dc_916.wav|dʑa a, tsɯitectɕa ikenaiɴ desɯ kaː?|213
|
| 20 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3c58f1c4/wav/3c58f1c4_1376.wav|dʑitsɯ wa wataɕi, paːɴ!ʔte oːkina oto ga sɯrɯ, ɯtɕiage hanabi ga nigate naɴ desɯ.|117
|
| 21 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0480.wav|dʑiʔsai no nami wa moʔto dʑoɯge ɯndoɯ mo hageɕiːɴ dakaɽa.|449
|
| 22 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/jou_mika/jou_mika_cgss/jou_mika_card_301206/jou_mika_voice_301206_1_10.wav|me, tsɯmɯʔte agejoɯ ka? daʔte hoɕiːdeɕoɯ? kisɯ matɕigao?|485
|
| 23 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/arisu/arisu_mobamasu/14_arisu__0015_(Vocals)/14_arisu__0015_(Vocals)_chunk153.wav|baɽentaiɴ de mo, itsɯ mo toːɽi desɯ.|495
|
| 24 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cec410a1/wav/cec410a1_328.wav|dʑosei no ɕaɕiɴ ga noʔte irɯ saito o, ɕiteki na joɯto de etsɯɽaɴ ɕite itaɽaɕiː.|434
|
| 25 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/010/S010_A_0004.wav|ke!! moɯ tɕoʔto de kɯmi no jaɽoɯ o ɯtɕitoʔte jaɽeta no ni jo!|537
|
| 26 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0253acb6/wav/0253acb6_869.wav|motomoto, koɯ ɕite gaʔkoɯ ni koɽaɽeɽeba, soɽe de mokɯhjoɯ taʔsei daʔta koto dakaɽa.|28
|
| 27 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_0305.wav|ano sa, sono, moʔto te ga kakarɯ mono daʔtaɽa, tabɯɴ tetsɯdaʔte moɽaʔta to omoɯ jo.|306
|
| 28 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ee093a4f/wav/ee093a4f_0555.wav|somosomo anata no koɯgi no ɕikata ga ki ni haiɽimaseɴ. iedeʔte naɴ desɯ ka? zeʔsaɴ iedetɕɯɯ no wataɕi ga ieta giɽi de wa aɽimaseŋga, ɕoɯgakɯsei desɯ ka?|371
|
| 29 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/90fa05fd/wav/90fa05fd_1203.wav|daga, soɯ toɽaerɯ no ga itɕibaɴ, fɯ ni otɕirɯ.|274
|
| 30 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__02/Shinichiro_Miki__02_chunk297.wav|geikaiʔte no wa, kamisama ni toʔte meataɽaɕiːɴ da jo. ija, meataɽaɕi ka wa neː ka, kawaɽibaɽi ɕineː ze.|473
|
| 31 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/302/S302_E_0006.wav|sɯgokaʔta.|532
|
| 32 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk1483.wav|nado to...|477
|
| 33 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4ce0075b/wav/4ce0075b_1410.wav|e, masaka soɽe ni saŋka ɕiɽo to kaiwanaide ne. naɴ no dʑɯmbi mo ɕitenaiɕi, oːzei no çito no mae ni derɯ to ka mɯɽi dakaɽa ne.|187
|
| 34 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5e5993c5/wav/5e5993c5_615.wav|ne, neː, moɯ iʔkai kisɯ ɕite.|120
|
| 35 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ad28b91b/wav/ad28b91b_1315.wav|dʑibɯɴ de mo mɯtɕa o iʔterɯ no wa wakaʔterɯ. daga asaçi dakaɽa kiːte mitaɴ da.|343
|
| 36 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/35d789d2/wav/35d789d2_138.wav|te wa soko de iːɴ desɯ ka? koɕi ni mawasanakɯte iːɴ desɯ ka?|51
|
| 37 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0264.wav|otakɯ no ofɯrɯ o kaɕite itadakenai kaɕiɽa? osamɯ ɕibaɽakɯ sentoɯ kajoɯ wa mɯɽi dakaɽa.|345
|
| 38 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0ee82b61/wav/0ee82b61_0232.wav|jake ni sawagaɕiː hade na sɯɯtsɯ no wakai otoko no çito ɽaɕiːɴ dakedo, sono çito ni mo oɽei o iwanakɯtɕa ne.|112
|
| 39 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f6c4b7b2/wav/f6c4b7b2_0424.wav|dʑaː, paiɽoʔto aoikɯɴ de iːdʑa nai! nanni mo mondai nai wa!|439
|
| 40 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk201.wav|ne fɯsokɯ ni naʔtɕaimasɯ...ɯɯ...|483
|
| 41 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_0187.wav|oːgeza desɯ ne, taiɕita koto aɽimaseɴ. mada ɕiɽabeterɯ dake desɯɕi.|284
|
| 42 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36ea135b/wav/36ea135b_1952.wav|ima kazɯetakedo, ɕita no kaidaɴ mo dʑɯɯ joɴ da ne. kʲɯɯkoɯɕa no kaidaɴ, zembɯ dʑɯɯ joɴ?|60
|
| 43 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/72921df9/wav/72921df9_190.wav|kao to namae, neː?|228
|
| 44 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bca2cfac/wav/bca2cfac_1044.wav|moɕi ka ɕite, takɯ ja to wakaba iʔɕo daʔta no kai?|320
|
| 45 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9d33dced/wav/9d33dced_811.wav|ɕikaɕi ɽogɯiɴ ɕite irɯ idʑoɯ, fɯtatsɯ no sekai de iɕiki wa tsɯnagaʔte irɯ.|275
|
| 46 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/58a2282f/wav/58a2282f_0589.wav|tobisaɽasaɴ no koto ga ɕimpai na no wa wakaɽimasɯga, sono, ɕiai ga owaʔta tɕokɯgo desɯɕi, satoɯiɴ sempai no koto mo ki ni kakete hoɕiː desɯ.|186
|
| 47 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/54ba80a8/wav/54ba80a8_0026.wav|zɯɯzɯɯ ɕi, to omowanai no?|153
|
| 48 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk619.wav|mɯɽidʑi wa ɕinaikeɽedo, nani ka aʔta toki, çitoɽi de naɴ to ka ɕijoɯ to omoctɕa dame jo. anata wa imada ni sono keikoɯ ga tsɯjoikaɽa.|475
|
| 49 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c593ed00/wav/c593ed00_0583.wav|moɯ, hoʔtoite. koko de fɯjɯ o okosɯkaɽa.|466
|
| 50 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/653a1bc0/wav/653a1bc0_1068.wav|soɽe ni, ima wa kono fɯkɯ igai ni kigae ga aɽimaseɴ no de.|174
|
| 51 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1a5a3db8/wav/1a5a3db8_2704.wav|aɽe? wataɕi no koto ɕiʔterɯ?|91
|
| 52 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_521.wav|fɯɴ, itsɯ mo nigijaka na komɯsɯme da ze.|5
|
| 53 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk643.wav|warɯi koto wa iwaɴ, aɽawaɽetaɽa saʔsato kɯɽete jaɽe.|476
|
| 54 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_2513.wav|ɯ, koɽe, sɯgokɯ kimotɕiː— ka mo. sɯgoi, naɴ da ka hazɯkaɕiː oto da ne.|261
|
| 55 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0b8ae160/wav/0b8ae160_0536.wav|a, ano...dʑɯɯdaiʔte, daidʑoɯbɯ naɴ desɯ ka?|94
|
| 56 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0871.wav|saiɕo kaɽa sono tsɯmoɽi. kikeɴ na ɽei dakaɽa, fɯtaɽi ni makaserɯ tsɯmoɽi wa nai.|385
|
| 57 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1292.wav|heː, jaʔte mijoɯ. eʔto,ɴ?|474
|
| 58 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/b1921b3f/wav/b1921b3f_0549.wav|oni ni kaete mo, aisɯrɯ mono to soitogetai. soɯ kaŋgaeta mono ga, oni o tsɯkaʔta kono çidʑɯtsɯ o amidaɕita.|294
|
| 59 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_02/Kamiya_Hiroshi_02_chunk905.wav|ɕinseki ne.|478
|
| 60 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100410/momoka_voice_100410_4_02.wav|dakedo, ima wa ɯtaʔte odoʔte, tanoɕinde! haɕitanai ka mo to omoɯ kɯɽai, oːkina koe de waɽaʔte...fɯfɯ!! ojoɯgi wa warɯkɯte mo, tanoɕisa de iʔpai desɯ wa!|489
|
| 61 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/76981655/wav/76981655_0566.wav|mada hjakɯ maɴ kaitɕɯɯ sɯrɯ totɕɯɯ daɕi, mata kaisɯɯ wasɯɽeta ne. haiː—tɕi!|110
|
| 62 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d250131/wav/6d250131_647.wav|kansai no oteɽa ni wa jokɯ arɯɽaɕiːkedo, koko no wa ɕiɽanai.|202
|
| 63 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36d0de98/wav/36d0de98_590.wav|soɽaoka, nani ka wakaʔta no ka?|29
|
| 64 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/d39532a8/wav/d39532a8_1523.wav|dakaɽa kaeʔte kite kɯɽetetaɴ daʔte.|384
|
| 65 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/a93da23d/wav/a93da23d_0512.wav|maː, nindʑa ɕɯʔɕiɴ no itɕizokɯ de arɯ koto wa çitei ɕimaseŋga.|317
|
| 66 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e3ee19b2/wav/e3ee19b2_127.wav|toɯbɯɴ wa koko o tsɯkawasete moɽaoɯ ze. kiːtenai no ka?|370
|
| 67 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/33e59069/wav/33e59069_045.wav|çitoɽi daʔte, keʔɕite warɯi mono de wa nai no.|40
|
| 68 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/momoka/momoka_cgss/momoka_card_100253/momoka_voice_100253_1_12.wav|fɯɯɯ...dʑibɯɴ no itaɽanasa ga, kɯjaɕiː...pɯɽodʲɯɯsaːtɕama...|489
|
| 69 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bce2a5af/wav/bce2a5af_0662.wav|ano mɯsɯme wa jokɯ wakaɽaɴ na.|261
|
| 70 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8f8acabb/wav/8f8acabb_426.wav|dʑasɯko wa bɯ tsɯ zoɯ to ka e bakaɽi ki ni haiʔteta ne. soɽe to kimono! hommono no maikosaɴ mita toki no dʑasɯko no teɴɕiɴ no agaɽi gwai, sɯgokaʔta!|244
|
| 71 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_0673.wav|fɯto naŋge nakɯ, aɕi o tometa.|143
|
| 72 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/84be23bd/wav/84be23bd_1022.wav|moɯ, sonna miɽai mo kimaseɴ jo.|284
|
| 73 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/36ea135b/wav/36ea135b_2487.wav|ɯɯɴ, jaɕiɽokɯɴ no kaŋgae mo ataʔterɯ. nozomi sempai wa, kanzeɴ ni wasɯɽetɕaʔterɯ wake dʑa nai.|60
|
| 74 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_4015.wav|tɕoʔto josanteki ni ne, kitai bɯmoɴ no hoɯ de mo joteigai no josaɴ ga kakaɽi soɯ na no jo.|319
|
| 75 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/598c113f/wav/598c113f_109.wav|iɽaʔɕai! ɕɯɯgakɯ rʲokoɯ wa, tanoɕikato desɯ ka?|194
|
| 76 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5d68aedf/wav/5d68aedf_0376.wav|daʔte, tomodatɕi naŋkaitaʔte nani mo kawaɽanai moɴ!|183
|
| 77 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f19b6190/wav/f19b6190_0934.wav|jɯɯ wa so no...ɯwaki to ka wa ɕinai to omoʔterɯkedo...|449
|
| 78 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/8e1072e6/wav/8e1072e6_0195.wav|soɽe o keʔte koko ka jo? soɽe mo ataɕi ni ɽenɽakɯ naɕi ka jo?|247
|
| 79 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/79b0d13c/wav/79b0d13c_434.wav|jokaʔta ɕino ni kiːte mo doɽe mo niaʔterɯʔte iɯ dake de kimaɽanakaʔtakaɽa.|267
|
| 80 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/46d6bf83/wav/46d6bf83_2379.wav|tɕigaɯ, kaʔtaɽi make taɽi sɯrɯ no wa ataɕi!|141
|
| 81 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/02153faa/wav/02153faa_484.wav|ɯɯɴ, bokɯ ga ɕirɯ kagiɽiːtsɯ mo konna kandʑi da na. hantsɯki mae ni tɕoʔto aɽe kimi no çi ga aʔtakedo, soɽe dake da na.|20
|
| 82 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/940de876/wav/940de876_0845.wav|moɯ iʔkai dake kikɯ wa? minna wa doɯ?|319
|
| 83 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/282cfa8c/wav/282cfa8c_1042.wav|ima wa magarɯ...itakaʔtakedo, magarɯ joɯ ni naʔtaɴ da jo.|16
|
| 84 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/773a4156/wav/773a4156_2205.wav|moɯ iʔɕoɯ kakaʔte mo kaesenai ka mo ɕiɽenai. soɽe kɯɽai toko ni wa kaɴɕa ɕiterɯ.|214
|
| 85 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1ed99743/wav/1ed99743_557.wav|ɴ. kimitatɕi no bɯtɕiageta, daikʲɯɯɕɯɯsl koɯsoɯ. bokɯ wa, aɽe ni me ga arɯ to mikonde irɯ.|1
|
| 86 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/00163dc9/wav/00163dc9_1842.wav|aɽe wa, daɽe kaɽa no okɯɽimono daʔta no ka naʔte.|69
|
| 87 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/ac0e6660/wav/ac0e6660_0269.wav|soɽe naɽa sa, kʲoɯ wa koɽe kaɽa aiterɯ?|246
|
| 88 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/1967ee53/wav/1967ee53_0238.wav|madʑo wa ne, hoɴ o jomɯ no ga sɯki naɴ daʔte.|71
|
| 89 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/c81c2b4d/wav/c81c2b4d_231.wav|ʔte ka, seitoɯ ɽitsɯ ga takai no wa, hatsɯnetɕaɴ ga adobaisɯ ɕite kɯɽerɯkaɽa de...a, dʑikaɴ daidʑoɯbɯ?|400
|
| 90 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/05a45f91/wav/05a45f91_131.wav|negaɽakɯba ɕɯ ga ɕɯkɯfɯkɯ ɕi mamoʔte kɯdasaimasɯ joɯ ni.|70
|
| 91 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cc948b89/wav/cc948b89_1571.wav|fɯtsɯɯ no dʑiɕɯ kɯnɽeɴ naɽa ɯɽaniwa de jaɽeba iːdʑa nai.|423
|
| 92 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/37c014a1/wav/37c014a1_0474.wav|soɯ ɕi te kɯɽerɯ to kaɴɕa sɯrɯ. moɯ iʔkai, mɯne o jɯɽaɕite agerɯ.|6
|
| 93 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/cda4375a/wav/cda4375a_0579.wav|soɯ ne, fɯkɯsɯɯ no dansei to kaŋkei o moʔte inai kagiɽi wa...|385
|
| 94 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2594.wav|to, kamawazɯ ni hanaɕi o tsɯzɯketa.|477
|
| 95 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/f4169f28/wav/f4169f28_193.wav|dakaɽa soɯ iɯ imi dʑa nakɯte.|386
|
| 96 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bca2cfac/wav/bca2cfac_0882.wav|dʑimi na kʲampasɯ ɽaifɯ dʑa jondete tsɯmaɽanaikaɽa. tsɯmaɽanai episoːdo wa omoɕiɽokɯ, omoɕiɽoi no wa saɽani omoɕiɽokɯ. taɽinakeɽeba itɕi kaɽa tsɯkɯʔta.|320
|
| 97 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/9ee921f6/wav/9ee921f6_0012.wav|soɯ naɴ desɯ, çito o atsɯmejoɯ to sendeɴ ɕite irɯ mitai de.|347
|
| 98 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/saori/MRD12.wav|kakɯbetsɯ desɯ. pɯɽodʲɯɯsaː kaɽa itadakɯ, kono iʔpai...!|563
|
| 99 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/5e85bf92/wav/5e85bf92_1326.wav|moɯ...gomeɴ ne, kaɽa kaɯ tsɯmoɽi dʑa nakaʔtaɴ dakedo...|217
|
| 100 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4ded9fa1/wav/4ded9fa1_0342.wav|fɯfɯ, de wa toɽiaezɯ, hoːpɯ to jɯki no meija no tame ni, fɯtsɯɯ no fɯkɯ da to ɕindʑite agemaɕoɯ ka.|129
|
| 101 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk1453.wav|ɯmaɽeotɕita tsɯgi no ɕɯŋkaɴ, wagahai wa wagahai o ɯmiotoɕita botai o mita.|475
|
| 102 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/e5d53ec4/wav/e5d53ec4_049.wav|neː, tanteisaɴ ɕiʔterɯ? koko saikiɴ, kanda de jakeɴ no çigai ga aitsɯiteta no.|460
|
| 103 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk817.wav|sasɯga ni sɯgɯ ni nadekoɯ nadekoɯ da to, kampa ɕita joɯ desɯ. ma, iʔte ɕimaeba, maegami ga nakɯnaʔta dake desɯkaɽa, jokɯ miɽeba wakarɯɴ deɕoɯ ne.|473
|
| 104 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_2155.wav|hontoɯ da...|553
|
| 105 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/4e2f4ba6/wav/4e2f4ba6_1077.wav|ima kaɽa me o toːɕite, haijakɯ kimete, jomiawase ɕite, ɽokɯoɴ ɕite, oɯbo sɯrɯɴ deɕo?|143
|
| 106 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/fa4704bf/wav/fa4704bf_119.wav|konna dʑikaɴ ni konna baɕo ni jobidasɯ nante, johodo kikaɽetakɯ nai hanaɕi na no ne.|387
|
| 107 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/tsujido/vo/011/S011_A_0310.wav|nʲa haha! jaʔpa sempai omoɕiɽoiʔsɯ!|553
|
| 108 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/74eb72c7/wav/74eb72c7_0253.wav|so, sonna koto nai jo, gɯɯzeɴ dʑa nai ka na.|184
|
| 109 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/efb922ca/wav/efb922ca_0770.wav|ɯfɯfɯ, baːrɯ o fɯɽimawasɯkaɽa, moɯ tɕoʔto ɯɕiɽo ni sagaʔtete.|409
|
| 110 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/3ec57102/wav/3ec57102_137.wav|wataɕi no tame ni, oɕokɯdʑi o tɕɯɯdaɴ sɯrɯ çitsɯjoɯ wa aɽimaseɴ. doɯzo, oki ni nasaɽazɯ.|144
|
| 111 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/2af831b5/wav/2af831b5_240.wav|naː, nosejasɯi dʑaɽo.|5
|
| 112 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/99b5eb16/wav/99b5eb16_0339.wav|wataɕi mo itɕi do çinosakasaɴ no tokoɽo ni kao o daɕite mirɯ wa. kanodʑo ga ɕiŋgakɯ ɕita toki no fɯkɯzatsɯ na dʑidʑoɯ to ka, koʔtɕi kaɽa setsɯmei ɕite oita hoɯ ga iː to omoɯɕi.|345
|
| 113 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bb6ac6f1/wav/bb6ac6f1_1043.wav|jɯɯkotɕaɴ seidʑɯɴha da jo! baɽibaɽi ɯki dʑa nai?|263
|
| 114 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/25714f7a/wav/25714f7a_1007.wav|maː soɯ iɯnaɽa damaʔterɯsɯkedo.|95
|
| 115 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/bf7b3aa8/wav/bf7b3aa8_457.wav|oniːtɕaɴ, tɕinami mo ufo kansokɯjoɯ no boɯeŋkʲo o kaɯ tame ni, mata meidosaɴ ni naʔte iː ka na?|277
|
| 116 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/sakura_moyu/06/06005910.wav|sɯgoi ne.|520
|
| 117 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d250131/wav/6d250131_427.wav|kaimeɴ ga, naɴ ka oiɕi soɯ na iɽo ɕiterɯ ne!|202
|
| 118 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/7787d8bf/wav/7787d8bf_0621.wav|onadʑi dʑa nai jo. kotoɕi wa, ɕifɯki otokosaɴ to ka jobɯ joteinaɴ dakaɽa.|306
|
| 119 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/6d19f294/wav/6d19f294_381.wav|desɯ ne. soɯ dekirɯ nante mitoːɕi, sɯkɯnakɯtomo wataɕi wa tateɽaɽenai desɯ.|189
|
| 120 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_sp/data/0f6fbea8/wav/0f6fbea8_0220.wav|oi, moɯ iː. omae no aiboɯ dʑimaɴ wa iːʔte no.|52
|
| 121 |
+
/home/austin/disk2/llmvcs/tt/stylekan/Data/moe_res/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_05.wav|çito ni kaŋka saɽerɯ no, betsɯ ni, ɕiɽoi dʑa nai jo. meʔta ni sono aite ga inai dake.|480
|
stylekan/Data/moe_res/imas_split/ranko/ranko_cgss/ranko_chara_198/ranko_chara_198.acb.tmp
ADDED
|
Binary file (405 kB). View file
|
|
|
stylekan/Data/moe_res/imas_split/shiki/shiki_fine/phonemizerASR_script_jpn.py
ADDED
|
@@ -0,0 +1,804 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSpeechSeq2Seq
|
| 2 |
+
import csv
|
| 3 |
+
from tqdm import tqdm
|
| 4 |
+
from datasets import Dataset, Audio
|
| 5 |
+
import os
|
| 6 |
+
import torch
|
| 7 |
+
import re
|
| 8 |
+
import pykakasi
|
| 9 |
+
|
| 10 |
+
kana_mapper = dict([
|
| 11 |
+
("ゔぁ","ba"),
|
| 12 |
+
("ゔぃ","bi"),
|
| 13 |
+
("ゔぇ","be"),
|
| 14 |
+
("ゔぉ","bo"),
|
| 15 |
+
("ゔゃ","bʲa"),
|
| 16 |
+
("ゔゅ","bʲɯ"),
|
| 17 |
+
("ゔゃ","bʲa"),
|
| 18 |
+
("ゔょ","bʲo"),
|
| 19 |
+
|
| 20 |
+
("ゔ","bɯ"),
|
| 21 |
+
|
| 22 |
+
("あぁ"," aː"),
|
| 23 |
+
("いぃ"," iː"),
|
| 24 |
+
("いぇ"," je"),
|
| 25 |
+
("いゃ"," ja"),
|
| 26 |
+
("うぅ"," ɯː"),
|
| 27 |
+
("えぇ"," eː"),
|
| 28 |
+
("おぉ"," oː"),
|
| 29 |
+
("かぁ"," kaː"),
|
| 30 |
+
("きぃ"," kiː"),
|
| 31 |
+
("くぅ","kɯː"),
|
| 32 |
+
("くゃ","ka"),
|
| 33 |
+
("くゅ","kʲɯ"),
|
| 34 |
+
("くょ","kʲo"),
|
| 35 |
+
("けぇ","keː"),
|
| 36 |
+
("こぉ","koː"),
|
| 37 |
+
("がぁ","gaː"),
|
| 38 |
+
("ぎぃ","giː"),
|
| 39 |
+
("ぐぅ","gɯː"),
|
| 40 |
+
("ぐゃ","gʲa"),
|
| 41 |
+
("ぐゅ","gʲɯ"),
|
| 42 |
+
("ぐょ","gʲo"),
|
| 43 |
+
("げぇ","geː"),
|
| 44 |
+
("ごぉ","goː"),
|
| 45 |
+
("さぁ","saː"),
|
| 46 |
+
("しぃ","ɕiː"),
|
| 47 |
+
("すぅ","sɯː"),
|
| 48 |
+
("すゃ","sʲa"),
|
| 49 |
+
("すゅ","sʲɯ"),
|
| 50 |
+
("すょ","sʲo"),
|
| 51 |
+
("せぇ","seː"),
|
| 52 |
+
("そぉ","soː"),
|
| 53 |
+
("ざぁ","zaː"),
|
| 54 |
+
("じぃ","dʑiː"),
|
| 55 |
+
("ずぅ","zɯː"),
|
| 56 |
+
("ずゃ","zʲa"),
|
| 57 |
+
("ずゅ","zʲɯ"),
|
| 58 |
+
("ずょ","zʲo"),
|
| 59 |
+
("ぜぇ","zeː"),
|
| 60 |
+
("ぞぉ","zeː"),
|
| 61 |
+
("たぁ","taː"),
|
| 62 |
+
("ちぃ","tɕiː"),
|
| 63 |
+
("つぁ","tsa"),
|
| 64 |
+
("つぃ","tsi"),
|
| 65 |
+
("つぅ","tsɯː"),
|
| 66 |
+
("つゃ","tɕa"),
|
| 67 |
+
("つゅ","tɕɯ"),
|
| 68 |
+
("つょ","tɕo"),
|
| 69 |
+
("つぇ","tse"),
|
| 70 |
+
("つぉ","tso"),
|
| 71 |
+
("てぇ","teː"),
|
| 72 |
+
("とぉ","toː"),
|
| 73 |
+
("だぁ","daː"),
|
| 74 |
+
("ぢぃ","dʑiː"),
|
| 75 |
+
("づぅ","dɯː"),
|
| 76 |
+
("づゃ","zʲa"),
|
| 77 |
+
("づゅ","zʲɯ"),
|
| 78 |
+
("づょ","zʲo"),
|
| 79 |
+
("でぇ","deː"),
|
| 80 |
+
("どぉ","doː"),
|
| 81 |
+
("なぁ","naː"),
|
| 82 |
+
("にぃ","niː"),
|
| 83 |
+
("ぬぅ","nɯː"),
|
| 84 |
+
("ぬゃ","nʲa"),
|
| 85 |
+
("ぬゅ","nʲɯ"),
|
| 86 |
+
("ぬょ","nʲo"),
|
| 87 |
+
("ねぇ","neː"),
|
| 88 |
+
("のぉ","noː"),
|
| 89 |
+
("はぁ","haː"),
|
| 90 |
+
("ひぃ","çiː"),
|
| 91 |
+
("ふぅ","ɸɯː"),
|
| 92 |
+
("ふゃ","ɸʲa"),
|
| 93 |
+
("ふゅ","ɸʲɯ"),
|
| 94 |
+
("ふょ","ɸʲo"),
|
| 95 |
+
("へぇ","heː"),
|
| 96 |
+
("ほぉ","hoː"),
|
| 97 |
+
("ばぁ","baː"),
|
| 98 |
+
("びぃ","biː"),
|
| 99 |
+
("ぶぅ","bɯː"),
|
| 100 |
+
("ふゃ","ɸʲa"),
|
| 101 |
+
("ぶゅ","bʲɯ"),
|
| 102 |
+
("ふょ","ɸʲo"),
|
| 103 |
+
("べぇ","beː"),
|
| 104 |
+
("ぼぉ","boː"),
|
| 105 |
+
("ぱぁ","paː"),
|
| 106 |
+
("ぴぃ","piː"),
|
| 107 |
+
("ぷぅ","pɯː"),
|
| 108 |
+
("ぷゃ","pʲa"),
|
| 109 |
+
("ぷゅ","pʲɯ"),
|
| 110 |
+
("ぷょ","pʲo"),
|
| 111 |
+
("ぺぇ","peː"),
|
| 112 |
+
("ぽぉ","poː"),
|
| 113 |
+
("まぁ","maː"),
|
| 114 |
+
("みぃ","miː"),
|
| 115 |
+
("むぅ","mɯː"),
|
| 116 |
+
("むゃ","mʲa"),
|
| 117 |
+
("むゅ","mʲɯ"),
|
| 118 |
+
("むょ","mʲo"),
|
| 119 |
+
("めぇ","meː"),
|
| 120 |
+
("もぉ","moː"),
|
| 121 |
+
("やぁ","jaː"),
|
| 122 |
+
("ゆぅ","jɯː"),
|
| 123 |
+
("ゆゃ","jaː"),
|
| 124 |
+
("ゆゅ","jɯː"),
|
| 125 |
+
("ゆょ","joː"),
|
| 126 |
+
("よぉ","joː"),
|
| 127 |
+
("らぁ","ɽaː"),
|
| 128 |
+
("りぃ","ɽiː"),
|
| 129 |
+
("るぅ","ɽɯː"),
|
| 130 |
+
("るゃ","ɽʲa"),
|
| 131 |
+
("るゅ","ɽʲɯ"),
|
| 132 |
+
("るょ","ɽʲo"),
|
| 133 |
+
("れぇ","ɽeː"),
|
| 134 |
+
("ろぉ","ɽoː"),
|
| 135 |
+
("わぁ","ɯaː"),
|
| 136 |
+
("をぉ","oː"),
|
| 137 |
+
|
| 138 |
+
("う゛","bɯ"),
|
| 139 |
+
("でぃ","di"),
|
| 140 |
+
("でぇ","deː"),
|
| 141 |
+
("でゃ","dʲa"),
|
| 142 |
+
("でゅ","dʲɯ"),
|
| 143 |
+
("でょ","dʲo"),
|
| 144 |
+
("てぃ","ti"),
|
| 145 |
+
("てぇ","teː"),
|
| 146 |
+
("てゃ","tʲa"),
|
| 147 |
+
("てゅ","tʲɯ"),
|
| 148 |
+
("てょ","tʲo"),
|
| 149 |
+
("すぃ","si"),
|
| 150 |
+
("ずぁ","zɯa"),
|
| 151 |
+
("ずぃ","zi"),
|
| 152 |
+
("ずぅ","zɯ"),
|
| 153 |
+
("ずゃ","zʲa"),
|
| 154 |
+
("ずゅ","zʲɯ"),
|
| 155 |
+
("ずょ","zʲo"),
|
| 156 |
+
("ずぇ","ze"),
|
| 157 |
+
("ずぉ","zo"),
|
| 158 |
+
("きゃ","kʲa"),
|
| 159 |
+
("きゅ","kʲɯ"),
|
| 160 |
+
("きょ","kʲo"),
|
| 161 |
+
("しゃ","ɕʲa"),
|
| 162 |
+
("しゅ","ɕʲɯ"),
|
| 163 |
+
("しぇ","ɕʲe"),
|
| 164 |
+
("しょ","ɕʲo"),
|
| 165 |
+
("ちゃ","tɕa"),
|
| 166 |
+
("ちゅ","tɕɯ"),
|
| 167 |
+
("ちぇ","tɕe"),
|
| 168 |
+
("ちょ","tɕo"),
|
| 169 |
+
("とぅ","tɯ"),
|
| 170 |
+
("とゃ","tʲa"),
|
| 171 |
+
("とゅ","tʲɯ"),
|
| 172 |
+
("とょ","tʲo"),
|
| 173 |
+
("どぁ","doa"),
|
| 174 |
+
("どぅ","dɯ"),
|
| 175 |
+
("どゃ","dʲa"),
|
| 176 |
+
("どゅ","dʲɯ"),
|
| 177 |
+
("どょ","dʲo"),
|
| 178 |
+
("どぉ","doː"),
|
| 179 |
+
("にゃ","nʲa"),
|
| 180 |
+
("にゅ","nʲɯ"),
|
| 181 |
+
("にょ","nʲo"),
|
| 182 |
+
("ひゃ","çʲa"),
|
| 183 |
+
("ひゅ","çʲɯ"),
|
| 184 |
+
("ひょ","çʲo"),
|
| 185 |
+
("みゃ","mʲa"),
|
| 186 |
+
("みゅ","mʲɯ"),
|
| 187 |
+
("みょ","mʲo"),
|
| 188 |
+
("りゃ","ɽʲa"),
|
| 189 |
+
("りぇ","ɽʲe"),
|
| 190 |
+
("りゅ","ɽʲɯ"),
|
| 191 |
+
("りょ","ɽʲo"),
|
| 192 |
+
("ぎゃ","gʲa"),
|
| 193 |
+
("ぎゅ","gʲɯ"),
|
| 194 |
+
("ぎょ","gʲo"),
|
| 195 |
+
("ぢぇ","dʑe"),
|
| 196 |
+
("ぢゃ","dʑa"),
|
| 197 |
+
("ぢゅ","dʑɯ"),
|
| 198 |
+
("ぢょ","dʑo"),
|
| 199 |
+
("じぇ","dʑe"),
|
| 200 |
+
("じゃ","dʑa"),
|
| 201 |
+
("じゅ","dʑɯ"),
|
| 202 |
+
("じょ","dʑo"),
|
| 203 |
+
("びゃ","bʲa"),
|
| 204 |
+
("びゅ","bʲɯ"),
|
| 205 |
+
("びょ","bʲo"),
|
| 206 |
+
("ぴゃ","pʲa"),
|
| 207 |
+
("ぴゅ","pʲɯ"),
|
| 208 |
+
("ぴょ","pʲo"),
|
| 209 |
+
("うぁ","ɯa"),
|
| 210 |
+
("うぃ","ɯi"),
|
| 211 |
+
("うぇ","ɯe"),
|
| 212 |
+
("うぉ","ɯo"),
|
| 213 |
+
("うゃ","ɯʲa"),
|
| 214 |
+
("うゅ","ɯʲɯ"),
|
| 215 |
+
("うょ","ɯʲo"),
|
| 216 |
+
("ふぁ","ɸa"),
|
| 217 |
+
("ふぃ","ɸi"),
|
| 218 |
+
("ふぅ","ɸɯ"),
|
| 219 |
+
("ふゃ","ɸʲa"),
|
| 220 |
+
("ふゅ","ɸʲɯ"),
|
| 221 |
+
("ふょ","ɸʲo"),
|
| 222 |
+
("ふぇ","ɸe"),
|
| 223 |
+
("ふぉ","ɸo"),
|
| 224 |
+
|
| 225 |
+
("あ"," a"),
|
| 226 |
+
("い"," i"),
|
| 227 |
+
("う","ɯ"),
|
| 228 |
+
("え"," e"),
|
| 229 |
+
("お"," o"),
|
| 230 |
+
("か"," ka"),
|
| 231 |
+
("き"," ki"),
|
| 232 |
+
("く"," kɯ"),
|
| 233 |
+
("け"," ke"),
|
| 234 |
+
("こ"," ko"),
|
| 235 |
+
("さ"," sa"),
|
| 236 |
+
("し"," ɕi"),
|
| 237 |
+
("す"," sɯ"),
|
| 238 |
+
("せ"," se"),
|
| 239 |
+
("そ"," so"),
|
| 240 |
+
("た"," ta"),
|
| 241 |
+
("ち"," tɕi"),
|
| 242 |
+
("つ"," tsɯ"),
|
| 243 |
+
("て"," te"),
|
| 244 |
+
("と"," to"),
|
| 245 |
+
("な"," na"),
|
| 246 |
+
("に"," ni"),
|
| 247 |
+
("ぬ"," nɯ"),
|
| 248 |
+
("ね"," ne"),
|
| 249 |
+
("の"," no"),
|
| 250 |
+
("は"," ha"),
|
| 251 |
+
("ひ"," çi"),
|
| 252 |
+
("ふ"," ɸɯ"),
|
| 253 |
+
("へ"," he"),
|
| 254 |
+
("ほ"," ho"),
|
| 255 |
+
("ま"," ma"),
|
| 256 |
+
("み"," mi"),
|
| 257 |
+
("む"," mɯ"),
|
| 258 |
+
("め"," me"),
|
| 259 |
+
("も"," mo"),
|
| 260 |
+
("ら"," ɽa"),
|
| 261 |
+
("り"," ɽi"),
|
| 262 |
+
("る"," ɽɯ"),
|
| 263 |
+
("れ"," ɽe"),
|
| 264 |
+
("ろ"," ɽo"),
|
| 265 |
+
("が"," ga"),
|
| 266 |
+
("ぎ"," gi"),
|
| 267 |
+
("ぐ"," gɯ"),
|
| 268 |
+
("げ"," ge"),
|
| 269 |
+
("ご"," go"),
|
| 270 |
+
("ざ"," za"),
|
| 271 |
+
("じ"," dʑi"),
|
| 272 |
+
("ず"," zɯ"),
|
| 273 |
+
("ぜ"," ze"),
|
| 274 |
+
("ぞ"," zo"),
|
| 275 |
+
("だ"," da"),
|
| 276 |
+
("ぢ"," dʑi"),
|
| 277 |
+
("づ"," zɯ"),
|
| 278 |
+
("で"," de"),
|
| 279 |
+
("ど"," do"),
|
| 280 |
+
("ば"," ba"),
|
| 281 |
+
("び"," bi"),
|
| 282 |
+
("ぶ"," bɯ"),
|
| 283 |
+
("べ"," be"),
|
| 284 |
+
("ぼ"," bo"),
|
| 285 |
+
("ぱ"," pa"),
|
| 286 |
+
("ぴ"," pi"),
|
| 287 |
+
("ぷ"," pɯ"),
|
| 288 |
+
("ぺ"," pe"),
|
| 289 |
+
("ぽ"," po"),
|
| 290 |
+
("や"," ja"),
|
| 291 |
+
("ゆ"," jɯ"),
|
| 292 |
+
("よ"," jo"),
|
| 293 |
+
("わ"," ɯa"),
|
| 294 |
+
("ゐ"," i"),
|
| 295 |
+
("ゑ"," e"),
|
| 296 |
+
("ん"," ɴ"),
|
| 297 |
+
("っ"," ʔ"),
|
| 298 |
+
("ー"," ː"),
|
| 299 |
+
|
| 300 |
+
("ぁ"," a"),
|
| 301 |
+
("ぃ"," i"),
|
| 302 |
+
("ぅ"," ɯ"),
|
| 303 |
+
("ぇ"," e"),
|
| 304 |
+
("ぉ"," o"),
|
| 305 |
+
("ゎ"," ɯa"),
|
| 306 |
+
("ぉ"," o"),
|
| 307 |
+
|
| 308 |
+
("を","o")
|
| 309 |
+
])
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def post_fix(text):
|
| 313 |
+
orig = text
|
| 314 |
+
|
| 315 |
+
for k, v in kana_mapper.items():
|
| 316 |
+
text = text.replace(k, v)
|
| 317 |
+
|
| 318 |
+
return text
|
| 319 |
+
|
| 320 |
+
def convert_to_kana(text):
|
| 321 |
+
kks = pykakasi.kakasi()
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def convert_word(word):
|
| 325 |
+
result = kks.convert(word)
|
| 326 |
+
return ''.join(item['hira'] for item in result)
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
parts = re.split(r'([^\u3000-\u30ff\u3400-\u4dbf\u4e00-\u9fff]+)', text)
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
converted_parts = [convert_word(part) if re.match(r'[\u3000-\u30ff\u3400-\u4dbf\u4e00-\u9fff]', part) else part for part in parts]
|
| 333 |
+
|
| 334 |
+
return ''.join(converted_parts)
|
| 335 |
+
|
| 336 |
+
import re
|
| 337 |
+
|
| 338 |
+
spaces = dict([
|
| 339 |
+
|
| 340 |
+
("ɯ ɴ","ɯɴ"),
|
| 341 |
+
("na ɴ ","naɴ "),
|
| 342 |
+
(" mina ", " miɴna "),
|
| 343 |
+
("ko ɴ ni tɕi ha","konnitɕiwa"),
|
| 344 |
+
("ha i","hai"),
|
| 345 |
+
("boɯtɕama","boʔtɕama"),
|
| 346 |
+
("i eːi","ieːi"),
|
| 347 |
+
("taiɕɯtsɯdʑoɯ","taiɕitsɯdʑoɯ"),
|
| 348 |
+
("soɴna ka ze ni","soɴna fɯɯ ni"),
|
| 349 |
+
(" i e ","ke "),
|
| 350 |
+
("�",""),
|
| 351 |
+
("×"," batsɯ "),
|
| 352 |
+
("se ka ɯndo","sekaɯndo"),
|
| 353 |
+
("i i","iː"),
|
| 354 |
+
("i tɕi","itɕi"),
|
| 355 |
+
("ka i","kai"),
|
| 356 |
+
("naɴ ga","nani ga"),
|
| 357 |
+
("i eː i","ieːi"),
|
| 358 |
+
|
| 359 |
+
("naɴ koɽe","nani koɽe"),
|
| 360 |
+
("naɴ soɽe","nani soɽe"),
|
| 361 |
+
(" ɕeɴ "," seɴ "),
|
| 362 |
+
|
| 363 |
+
# ("konna","koɴna"),
|
| 364 |
+
# ("sonna"," soɴna "),
|
| 365 |
+
# ("anna","aɴna"),
|
| 366 |
+
# ("nn","ɴn"),
|
| 367 |
+
|
| 368 |
+
("en ","eɴ "),
|
| 369 |
+
("in ","iɴ "),
|
| 370 |
+
("an ","aɴ "),
|
| 371 |
+
("on ","oɴ "),
|
| 372 |
+
("ɯn ","ɯɴ "),
|
| 373 |
+
# ("nd","ɴd"),
|
| 374 |
+
|
| 375 |
+
("koɴd o","kondo"),
|
| 376 |
+
("ko ɴ d o","kondo"),
|
| 377 |
+
("ko ɴ do","kondo"),
|
| 378 |
+
|
| 379 |
+
("oanitɕaɴ","oniːtɕaɴ"),
|
| 380 |
+
("oanisaɴ","oniːsaɴ"),
|
| 381 |
+
("oanisama","oniːsama"),
|
| 382 |
+
("hoːmɯrɯɴɯ","hoːmɯrɯːmɯ"),
|
| 383 |
+
("so ɴ na ","sonna"),
|
| 384 |
+
(" sonna "," sonna "),
|
| 385 |
+
(" konna "," konna "),
|
| 386 |
+
("ko ɴ na ","konna"),
|
| 387 |
+
(" ko to "," koto "),
|
| 388 |
+
("edʑdʑi","eʔtɕi"),
|
| 389 |
+
(" edʑdʑ "," eʔtɕi "),
|
| 390 |
+
(" dʑdʑ "," dʑiːdʑiː "),
|
| 391 |
+
("secɯnd","sekaɯndo"),
|
| 392 |
+
|
| 393 |
+
("ɴɯ","nɯ"),
|
| 394 |
+
("ɴe","ne"),
|
| 395 |
+
("ɴo","no"),
|
| 396 |
+
("ɴa","na"),
|
| 397 |
+
("ɴi","ni"),
|
| 398 |
+
("ɴʲ","nʲ"),
|
| 399 |
+
|
| 400 |
+
("hotond o","hotondo"),
|
| 401 |
+
("hakoɴd e","hakoɴde"),
|
| 402 |
+
("kaʔkaɽi","gaʔkaɽi"),
|
| 403 |
+
|
| 404 |
+
("gakɯtɕi ɽi","gaʔtɕiɽi "),
|
| 405 |
+
|
| 406 |
+
(" ʔ","ʔ"),
|
| 407 |
+
("ʔ ","ʔ"),
|
| 408 |
+
|
| 409 |
+
("-","ː"),
|
| 410 |
+
("- ","ː"),
|
| 411 |
+
("--","~ː"),
|
| 412 |
+
("~","—"),
|
| 413 |
+
("、",","),
|
| 414 |
+
|
| 415 |
+
(" ː","ː"),
|
| 416 |
+
('ka nade',"kanade"),
|
| 417 |
+
|
| 418 |
+
("ohahasaɴ","okaːsaɴ"),
|
| 419 |
+
(" "," "),
|
| 420 |
+
("viː","bɯiː"),
|
| 421 |
+
("ːː","ː—"),
|
| 422 |
+
|
| 423 |
+
("d ʑ","dʑ"),
|
| 424 |
+
("d a","da"),
|
| 425 |
+
("d e","de"),
|
| 426 |
+
("d o","do"),
|
| 427 |
+
("d ɯ","dɯ"),
|
| 428 |
+
|
| 429 |
+
("niːɕiki","ni iɕiki"),
|
| 430 |
+
("anitɕaɴ","niːtɕaɴ"),
|
| 431 |
+
("daiːtɕi","dai itɕi"),
|
| 432 |
+
|
| 433 |
+
("naɴ sono","nani sono"),
|
| 434 |
+
("naɴ kono","nani kono"),
|
| 435 |
+
("naɴ ano","nani ano"), # Cutlet please fix your shit
|
| 436 |
+
(" niːtaɽa"," ni itaɽa"),
|
| 437 |
+
("doɽamaɕiːd","doɽama ɕiːdʲi"),
|
| 438 |
+
("aɴ ta","anta"),
|
| 439 |
+
("aɴta","anta"),
|
| 440 |
+
("naniːʔteɴ","nani iʔteɴ"),
|
| 441 |
+
("niːkite","ni ikite")
|
| 442 |
+
|
| 443 |
+
])
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
sym_ws = dict([
|
| 447 |
+
|
| 448 |
+
("$ ","dorɯ"),
|
| 449 |
+
("$ ","dorɯ"),
|
| 450 |
+
|
| 451 |
+
("〇 ","marɯ"),
|
| 452 |
+
("¥ ","eɴ"),
|
| 453 |
+
|
| 454 |
+
("# ","haʔɕɯ tagɯ"),
|
| 455 |
+
("# ","haʔɕɯ tagɯ"),
|
| 456 |
+
|
| 457 |
+
("& ","ando"),
|
| 458 |
+
("& ","ando"),
|
| 459 |
+
|
| 460 |
+
("% ","paːsento"),
|
| 461 |
+
("% ","paːsento"),
|
| 462 |
+
|
| 463 |
+
("@ ","aʔto saiɴ"),
|
| 464 |
+
("@ ","aʔto saiɴ")
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
])
|
| 469 |
+
|
| 470 |
+
def random_sym_fix(text): # with space
|
| 471 |
+
orig = text
|
| 472 |
+
|
| 473 |
+
for k, v in sym_ws.items():
|
| 474 |
+
text = text.replace(k, f" {v} ")
|
| 475 |
+
|
| 476 |
+
return text
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
sym_ns = dict([
|
| 480 |
+
|
| 481 |
+
("$","dorɯ"),
|
| 482 |
+
("$","dorɯ"),
|
| 483 |
+
|
| 484 |
+
("〇","marɯ"),
|
| 485 |
+
("¥","eɴ"),
|
| 486 |
+
|
| 487 |
+
("#","haʔɕɯ tagɯ"),
|
| 488 |
+
("#","haʔɕɯ tagɯ"),
|
| 489 |
+
|
| 490 |
+
("&","ando"),
|
| 491 |
+
("&","ando"),
|
| 492 |
+
|
| 493 |
+
("%","paːsento"),
|
| 494 |
+
("%","paːsento"),
|
| 495 |
+
|
| 496 |
+
("@","aʔto saiɴ"),
|
| 497 |
+
("@","aʔto saiɴ"),
|
| 498 |
+
|
| 499 |
+
("~","—"),
|
| 500 |
+
("kʲɯɯdʑɯɯkʲɯɯ.kʲɯɯdʑɯɯ","kʲɯɯdʑɯɯ kʲɯɯ teɴ kʲɯɯdʑɯɯ")
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
])
|
| 507 |
+
|
| 508 |
+
def random_sym_fix_no_space(text):
|
| 509 |
+
orig = text
|
| 510 |
+
|
| 511 |
+
for k, v in sym_ns.items():
|
| 512 |
+
text = text.replace(k, f" {v} ")
|
| 513 |
+
|
| 514 |
+
return text
|
| 515 |
+
|
| 516 |
+
def random_space_fix(text):
|
| 517 |
+
orig = text
|
| 518 |
+
|
| 519 |
+
for k, v in spaces.items():
|
| 520 |
+
text = text.replace(k, v)
|
| 521 |
+
|
| 522 |
+
return text
|
| 523 |
+
|
| 524 |
+
def number_to_japanese(num):
|
| 525 |
+
if not isinstance(num, int) or num < 0 or num > 9999:
|
| 526 |
+
return "Invalid input"
|
| 527 |
+
|
| 528 |
+
digits = ["", "いち", "に", "さん", "よん", "ご", "ろく", "なな", "はち", "きゅう"]
|
| 529 |
+
tens = ["", "じゅう", "にじゅう", "さんじゅう", "よんじゅう", "ごじゅう", "ろくじゅう", "ななじゅう", "はちじゅう", "きゅうじゅう"]
|
| 530 |
+
hundreds = ["", "ひゃく", "にひゃく", "さんびゃく", "よんひゃく", "ごひゃく", "ろっぴゃく", "ななひゃく", "はっぴゃく", "きゅうひゃく"]
|
| 531 |
+
thousands = ["", "せん", "にせん", "さんぜん", "よんせん", "ごせん", "ろくせん", "ななせん", "はっせん", "きゅうせん"]
|
| 532 |
+
|
| 533 |
+
if num == 0:
|
| 534 |
+
return "ゼロ"
|
| 535 |
+
|
| 536 |
+
result = ""
|
| 537 |
+
if num >= 1000:
|
| 538 |
+
result += thousands[num // 1000]
|
| 539 |
+
num %= 1000
|
| 540 |
+
if num >= 100:
|
| 541 |
+
result += hundreds[num // 100]
|
| 542 |
+
num %= 100
|
| 543 |
+
if num >= 10:
|
| 544 |
+
result += tens[num // 10]
|
| 545 |
+
num %= 10
|
| 546 |
+
if num > 0:
|
| 547 |
+
result += digits[num]
|
| 548 |
+
|
| 549 |
+
return result
|
| 550 |
+
|
| 551 |
+
def convert_numbers_in_string(input_string):
|
| 552 |
+
# Regular expression to find numbers in the string
|
| 553 |
+
number_pattern = re.compile(r'\d+')
|
| 554 |
+
|
| 555 |
+
# Function to replace numbers with their Japanese pronunciation
|
| 556 |
+
def replace_with_japanese(match):
|
| 557 |
+
num = int(match.group())
|
| 558 |
+
return number_to_japanese(num)
|
| 559 |
+
|
| 560 |
+
# Replace all occurrences of numbers in the string
|
| 561 |
+
converted_string = number_pattern.sub(replace_with_japanese, input_string)
|
| 562 |
+
return converted_string
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
def remove_leading_dots(text):
|
| 566 |
+
# Use regular expression to replace leading dots before any word character
|
| 567 |
+
cleaned_text = re.sub(r'^\.*', '', text)
|
| 568 |
+
return cleaned_text
|
| 569 |
+
|
| 570 |
+
def remove_more_than_three_dots(text):
|
| 571 |
+
# Remove leading dots
|
| 572 |
+
text = re.sub(r'^\.*', '', text)
|
| 573 |
+
# Limit sequences of dots to a maximum of 3, and remove any spaces after the last dot
|
| 574 |
+
text = re.sub(r'\.{4,}\s*', '...', text)
|
| 575 |
+
return text
|
| 576 |
+
|
| 577 |
+
# def replace_repeating_patterns(text):
|
| 578 |
+
# def replace_repeats(match):
|
| 579 |
+
# pattern = match.group(1)
|
| 580 |
+
# if len(match.group(0)) // len(pattern) >= 3:
|
| 581 |
+
# return pattern + "~~~"
|
| 582 |
+
# return match.group(0)
|
| 583 |
+
|
| 584 |
+
# # Pattern for space-separated repeats
|
| 585 |
+
# pattern1 = r'((?:\S+\s+){1,5}?)(?:\1){2,}'
|
| 586 |
+
# # Pattern for continuous repeats without spaces
|
| 587 |
+
# pattern2 = r'(.+?)\1{2,}'
|
| 588 |
+
|
| 589 |
+
# text = re.sub(pattern1, replace_repeats, text)
|
| 590 |
+
# text = re.sub(pattern2, replace_repeats, text)
|
| 591 |
+
# return text
|
| 592 |
+
|
| 593 |
+
|
| 594 |
+
print("loading...")
|
| 595 |
+
|
| 596 |
+
# device = 'cuda:0'
|
| 597 |
+
# processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
|
| 598 |
+
# # model = WhisperForConditionalGeneration.from_pretrained("/home/ubuntu/PL-whisper/Hibiki_ASR_Grapheme_Plus/checkpoint-5000").to("cuda:0")
|
| 599 |
+
|
| 600 |
+
# hibiki = "/home/ubuntu/PL-whisper/Hibiki_ASR_Grapheme_Plus/checkpoint-5000"
|
| 601 |
+
# model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 602 |
+
# hibiki,
|
| 603 |
+
# torch_dtype=torch.float16,
|
| 604 |
+
# low_cpu_mem_usage=True,
|
| 605 |
+
# use_safetensors=True,
|
| 606 |
+
# attn_implementation="sdpa",
|
| 607 |
+
# )
|
| 608 |
+
# model.to(device)
|
| 609 |
+
# forced_decoder_ids = processor.get_decoder_prompt_ids(language="japanese", task="transcribe")
|
| 610 |
+
|
| 611 |
+
# print('lock and load')
|
| 612 |
+
|
| 613 |
+
# print('now loading the dataset...')
|
| 614 |
+
# # Define the root directory where you want to start the search
|
| 615 |
+
# root_directory = "/path/to/files"
|
| 616 |
+
# #/home/ubuntu/PL-whisper/split
|
| 617 |
+
# #/home/ubuntu/PL-whisper/myaudio_full/myaudio
|
| 618 |
+
|
| 619 |
+
# # Initialize an empty list to store the .wav file paths
|
| 620 |
+
# wav_files = []
|
| 621 |
+
|
| 622 |
+
# # Walk through all directories and files starting from the root directory
|
| 623 |
+
# for dirpath, dirnames, filenames in os.walk(root_directory):
|
| 624 |
+
# # Iterate over all files in the current directory
|
| 625 |
+
# for filename in filenames:
|
| 626 |
+
# # Check if the file is a .wav file
|
| 627 |
+
# if filename.endswith(".wav"):
|
| 628 |
+
# # Construct the full file path
|
| 629 |
+
# file_path = os.path.join(dirpath, filename)
|
| 630 |
+
# # Add the file path to the list
|
| 631 |
+
# wav_files.append(file_path)
|
| 632 |
+
|
| 633 |
+
|
| 634 |
+
# # Now wav_files contains a list of all .wav file paths
|
| 635 |
+
# print("loaded the dataset, length is ->", len(wav_files))
|
| 636 |
+
|
| 637 |
+
# samples = Dataset.from_dict({"audio": wav_files}).cast_column("audio", Audio(16000))
|
| 638 |
+
# # Prepare the CSV file
|
| 639 |
+
# csv_file_path = './phonemized_moe_25%.csv'
|
| 640 |
+
|
| 641 |
+
# with open(csv_file_path, mode='w', newline='') as csv_file:
|
| 642 |
+
# fieldnames = ['filename', 'transcription']
|
| 643 |
+
# writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
| 644 |
+
# writer.writeheader()
|
| 645 |
+
|
| 646 |
+
# # Process each audio file
|
| 647 |
+
# for sample in tqdm(samples):
|
| 648 |
+
# # Load the audio file and process it
|
| 649 |
+
# input_features = processor(sample["audio"]["array"], sampling_rate=sample["audio"]["sampling_rate"], return_tensors="pt").input_features.to(device)
|
| 650 |
+
|
| 651 |
+
# # Generate the transcription
|
| 652 |
+
# predicted_ids = model.generate(input_features, repetition_penalty=1.2,num_beams=4)
|
| 653 |
+
# transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
| 654 |
+
# if ' neɽitai ' in transcription:
|
| 655 |
+
# transcription = transcription.replace(' neɽitai ', "naɽitai")
|
| 656 |
+
# if 'tɕabiʔto' in transcription:
|
| 657 |
+
# transcription = transcription.replace('tɕabiʔto', "tɕobiʔto")
|
| 658 |
+
# if "ki ni ɕinai" in transcription:
|
| 659 |
+
# transcription = re.sub(r'(?<!\s)ki ni ɕinai', r' ki ni ɕinai', transcription)
|
| 660 |
+
# if 'ʔt' in transcription:
|
| 661 |
+
# transcription = re.sub(r'(?<!\s)ʔt', r'ʔt', transcription)
|
| 662 |
+
# if 'de aɽoɯ' in transcription:
|
| 663 |
+
# transcription = re.sub(r'(?<!\s)de aɽoɯ', r' de aɽoɯ', transcription)
|
| 664 |
+
|
| 665 |
+
# if ".ʔ" in transcription:
|
| 666 |
+
# transcription = transcription.replace(".ʔ","..")
|
| 667 |
+
# if "ʔ." in transcription:
|
| 668 |
+
# transcription = transcription.replace("ʔ.",".")
|
| 669 |
+
|
| 670 |
+
# transcription = convert_numbers_in_string(transcription)
|
| 671 |
+
# transcription = convert_to_kana(transcription)
|
| 672 |
+
# transcription = post_fix(transcription)
|
| 673 |
+
|
| 674 |
+
# if 'ɯa ta ɕi' in transcription:
|
| 675 |
+
# transcription = transcription.replace("ɯa ta ɕi","wataɕi")
|
| 676 |
+
# if ' ' in transcription:
|
| 677 |
+
# transcription = transcription.replace(" "," ")
|
| 678 |
+
|
| 679 |
+
# if 'monoːmoi' in transcription:
|
| 680 |
+
# transcription = transcription.replace("monoːmoi","mono omoi")
|
| 681 |
+
|
| 682 |
+
|
| 683 |
+
# if not re.search(r'[.?!。?!؟!.]$', transcription): # will add a dot at the end of each sentence if no other punctuation symbols exists
|
| 684 |
+
# transcription += '.'
|
| 685 |
+
|
| 686 |
+
# transcription = remove_leading_dots(transcription)
|
| 687 |
+
# transcription = remove_more_than_three_dots(transcription)
|
| 688 |
+
# # Write the transcription to the CSV file
|
| 689 |
+
# writer.writerow({'filename': sample['audio']['path'], 'transcription': transcription.lstrip()})
|
| 690 |
+
|
| 691 |
+
# print(f"Transcriptions have been saved to {csv_file_path}")
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
device = 'cuda:0'
|
| 695 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
|
| 696 |
+
|
| 697 |
+
hibiki = "Respair/Hibiki_ASR_Phonemizer_v0.2"
|
| 698 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 699 |
+
hibiki,
|
| 700 |
+
torch_dtype=torch.float16,
|
| 701 |
+
low_cpu_mem_usage=True,
|
| 702 |
+
use_safetensors=True,
|
| 703 |
+
attn_implementation="sdpa",
|
| 704 |
+
)
|
| 705 |
+
model.to(device)
|
| 706 |
+
forced_decoder_ids = processor.get_decoder_prompt_ids(language="japanese", task="transcribe")
|
| 707 |
+
|
| 708 |
+
print('lock and load')
|
| 709 |
+
|
| 710 |
+
print('now loading the dataset...')
|
| 711 |
+
|
| 712 |
+
root_directory = "/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine"
|
| 713 |
+
|
| 714 |
+
|
| 715 |
+
wav_files = []
|
| 716 |
+
|
| 717 |
+
for dirpath, dirnames, filenames in os.walk(root_directory):
|
| 718 |
+
for filename in filenames:
|
| 719 |
+
if filename.endswith(".wav") or filename.endswith(".ogg") or filename.endswith(".mp3"):
|
| 720 |
+
file_path = os.path.join(dirpath, filename)
|
| 721 |
+
wav_files.append(file_path)
|
| 722 |
+
|
| 723 |
+
print("loaded the dataset, length is ->", len(wav_files))
|
| 724 |
+
|
| 725 |
+
samples = Dataset.from_dict({"audio": wav_files}).cast_column("audio", Audio(16000))
|
| 726 |
+
|
| 727 |
+
csv_file_path = '/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_finetune.csv'
|
| 728 |
+
|
| 729 |
+
# Load existing transcriptions
|
| 730 |
+
existing_transcriptions = set()
|
| 731 |
+
if os.path.exists(csv_file_path):
|
| 732 |
+
with open(csv_file_path, mode='r') as csv_file:
|
| 733 |
+
reader = csv.DictReader(csv_file)
|
| 734 |
+
for row in reader:
|
| 735 |
+
existing_transcriptions.add(row['filename'])
|
| 736 |
+
|
| 737 |
+
# Open the CSV file in append mode
|
| 738 |
+
with open(csv_file_path, mode='a', newline='') as csv_file:
|
| 739 |
+
fieldnames = ['filename', 'transcription']
|
| 740 |
+
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
| 741 |
+
|
| 742 |
+
# Write header only if the file is empty
|
| 743 |
+
if csv_file.tell() == 0:
|
| 744 |
+
writer.writeheader()
|
| 745 |
+
|
| 746 |
+
# Process each audio file
|
| 747 |
+
for sample in tqdm(samples):
|
| 748 |
+
audio_path = sample['audio']['path']
|
| 749 |
+
|
| 750 |
+
# Skip if already transcribed
|
| 751 |
+
if audio_path in existing_transcriptions:
|
| 752 |
+
continue
|
| 753 |
+
|
| 754 |
+
# Load the audio file and process it
|
| 755 |
+
input_features = processor(sample["audio"]["array"], sampling_rate=sample["audio"]["sampling_rate"], return_tensors="pt").input_features.to(device)
|
| 756 |
+
input_features = input_features.to(torch.float16)
|
| 757 |
+
|
| 758 |
+
# Generate the transcription
|
| 759 |
+
predicted_ids = model.generate(input_features, repetition_penalty=1.2, num_beams=4)
|
| 760 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
| 761 |
+
|
| 762 |
+
# Apply all the text processing steps (unchanged from your original code)
|
| 763 |
+
if ' neɽitai ' in transcription:
|
| 764 |
+
transcription = transcription.replace(' neɽitai ', "naɽitai")
|
| 765 |
+
if 'tɕabiʔto' in transcription:
|
| 766 |
+
transcription = transcription.replace('tɕabiʔto', "tɕobiʔto")
|
| 767 |
+
if "ki ni ɕinai" in transcription:
|
| 768 |
+
transcription = re.sub(r'(?<!\s)ki ni ɕinai', r' ki ni ɕinai', transcription)
|
| 769 |
+
if 'ʔt' in transcription:
|
| 770 |
+
transcription = re.sub(r'(?<!\s)ʔt', r'ʔt', transcription)
|
| 771 |
+
if 'de aɽoɯ' in transcription:
|
| 772 |
+
transcription = re.sub(r'(?<!\s)de aɽoɯ', r' de aɽoɯ', transcription)
|
| 773 |
+
|
| 774 |
+
if ".ʔ" in transcription:
|
| 775 |
+
transcription = transcription.replace(".ʔ","..")
|
| 776 |
+
if "ʔ." in transcription:
|
| 777 |
+
transcription = transcription.replace("ʔ.",".")
|
| 778 |
+
|
| 779 |
+
transcription = convert_numbers_in_string(transcription)
|
| 780 |
+
transcription = convert_to_kana(transcription)
|
| 781 |
+
transcription = post_fix(transcription)
|
| 782 |
+
|
| 783 |
+
if 'ɯa ta ɕi' in transcription:
|
| 784 |
+
transcription = transcription.replace("ɯa ta ɕi","wataɕi")
|
| 785 |
+
if ' ' in transcription:
|
| 786 |
+
transcription = transcription.replace(" "," ")
|
| 787 |
+
|
| 788 |
+
if 'monoːmoi' in transcription:
|
| 789 |
+
transcription = transcription.replace("monoːmoi","mono omoi")
|
| 790 |
+
|
| 791 |
+
if not re.search(r'[.?!。?!؟!.]$', transcription):
|
| 792 |
+
transcription += '.'
|
| 793 |
+
|
| 794 |
+
transcription = remove_leading_dots(transcription)
|
| 795 |
+
transcription = remove_more_than_three_dots(transcription)
|
| 796 |
+
transcription = random_space_fix(transcription)
|
| 797 |
+
|
| 798 |
+
transcription = random_sym_fix(transcription) # fixing some symbols, if they have a specific white space such as miku& sakura -> miku ando sakura
|
| 799 |
+
transcription = random_sym_fix_no_space(transcription) # same as above but for those without white space such as miku&sakura -> miku ando sakura
|
| 800 |
+
|
| 801 |
+
# Write the transcription to the CSV file
|
| 802 |
+
writer.writerow({'filename': audio_path, 'transcription': transcription.lstrip()})
|
| 803 |
+
|
| 804 |
+
print(f"Transcriptions have been saved to {csv_file_path}")
|
stylekan/Data/moe_res/imas_split/shiki/shiki_fine/shiki_finetune.csv
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_05.wav|sɯgata mo, koe mo, namae sae kawaʔte mo, tɕitose ga kɯɽeta kiboɯ wa, kienai.|480
|
| 2 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_06.wav|koɯfɯkɯ wa doko ni de mo koɽogaʔterɯdeɕo? wataɕi wa, dakʲoɯ ɕitakɯ naikedo?|480
|
| 3 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_01.wav|kɯɽaŋkɯ aːʔpɯ! teɴɕi no oɕigoto ɕɯɯrʲoɯ! niŋgeɴ ni modoɽoʔka naːɴ!|480
|
| 4 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_02.wav|bʲoɯdoɯ de itainaɽa, mɯkaɴɕiɴ de irɯ koto. katamɯkerɯ saki ga nakɯnactɕoɯ.|480
|
| 5 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_12.wav|kiboɯ wa megɯrɯ. tatoe donna ni sɯrɯdokɯ, itakɯtomo. soɽe o anata ga ɕimeɕite kɯɽeta.|480
|
| 6 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
|
| 7 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_07.wav|zensei to ka ɽaisei to ka, doɯ de mo iː. daʔte soɽe wa, ataɕi dʑa nai moɴ ne.|480
|
| 8 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_03.wav|owaɽi o koete.|480
|
| 9 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_01.wav|temɽiɴ wa itsɯ daʔte tsɯɽiaʔterɯ. kiboɯ to doɯtoɯ no ɕitsɯrʲa o motsɯ mono ga nani ka, ɕiʔterɯ? soɽe wa ne, zetsɯboɯ da jo.|480
|
| 10 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_02.wav|koɽe ga, owaɽi? naɽa, ɯɽeɕiː. daʔte, anata to no, tsɯgi no jakɯsokɯ.|480
|
| 11 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_11.wav|tsɯgi deaerɯ toki, wataɕitatɕi wa çitoɽi de wa nai, fɯfɯ, koɽe wa jogeɴ.|480
|
| 12 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_12.wav|josoɯ dekinai kimitatɕi de ite jo. tensai ɕikitɕaɴ no josoɯ o, ɯɽagiɽi tsɯzɯkete.|480
|
| 13 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
|
| 14 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_04.wav|teɴɕi no komoɽi ɯta o.|480
|
| 15 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_11.wav|kimi o ɽakɯeɴ e to izanaʔte agerɯ joɴ! ima o tanoɕimi kiʔta ato o, tɕitose tɕanto.|480
|
| 16 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_04.wav|osoɽa no ɯeʔte, donna nioi naɴ daɽoɯ ne? ɕiɽeɴ de, mɯkiɕitsɯ de, kaoɽi mo nai no ka na?|480
|
| 17 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_05.wav|tokɯbetsɯ ni narɯ no.|480
|
| 18 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_06.wav|sajoɯnaɽa, tɕitose. wataɕi ni owaɽi o oɕiete kɯɽeta, tokɯbetsɯ na çito.|480
|
| 19 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
|
| 20 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_05.wav|çito ni kaŋka saɽerɯ no, betsɯ ni, ɕiɽoi dʑa nai jo. meʔta ni sono aite ga inai dake.|480
|
| 21 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_3_01.wav|çito no inotɕi ni kiseɴ ga nai nante, sonna no kiɽeigoto. sɯkɯɯ çito wa eɽabɯɕi, dʑibɯɴ no inotɕi nante taika de saɕidasenai. de mo kimi wa, saɕidaɕite ɕimai soɯ da jo ne. oçito joɕi dakaɽa.|480
|
| 22 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_04.wav|moɴ no soto e okɯɽidasenakɯte, gomeɴ nasai. moɴ no saki no koto, kondo oɕiete ne.|480
|
| 23 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_08.wav|ɕigo no sekai de tanoɕikɯ jarɯɴ dʑa nakɯte, ima o kiɽitorɯ, fɯɴ, dekirɯ jo.|480
|
| 24 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_03.wav|saigo wa nai no desɯ. negaeba doko made mo, fɯtaɽi no tokɯbetsɯ wa tsɯzɯkɯkaɽa.|480
|
| 25 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_07.wav|arɯdʑi ni wa kaɴɕa o, wataɕi no tokɯbetsɯ o minogaɕite kɯɽeta, anata to, deaeta.|480
|
| 26 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_10.wav|hakoni wa neː, wataɕi wa iɽanai na. teitaiʔte, dʑiɴrɯi to aiɕoɯ ga warɯiɴ daʔte.|480
|
| 27 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_03.wav|kempiɴ, dʑiʔkeɴ, a, ɕijokɯɴ ga todoitetaɴ daʔta, ɽabo ni komaɽoːʔto.|480
|
| 28 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
|
| 29 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_5_01.wav|nakanaide, anata no çitomi ga, namida de nagaɽete ɕimaɯ, soɽe wa, koɽe kaɽa kiboɯ dake o mitsɯkerɯ tame no oɯseki, wataɕi o, seɴ neɴ saki no miɽai de mo, mitɕibiːte kɯɽerɯ çikaɽi, saigo wa, hohoemi de.|480
|
| 30 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_09.wav|fɯɽeɽaɽenakɯte mo wakarɯ, atatakasa, daʔte, zɯʔto, anata ni aʔta.|480
|
| 31 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_10.wav|zɯʔto omoʔteta, kiɽei na çitomi no iɽo daʔte, inotɕi no iɽo, naɴ da ne.|480
|
| 32 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_03.wav|kimi ga ima te ni moʔterɯ no wa, kiboɯ? soɽe to mo zetsɯboɯ? doʔtɕi de mo iː ka. wataɕi wa moɯ te niːɽeterɯkaɽa, hoɕikaʔtaɽa wakete agerɯ jo. katahoɯ wa, kimi ni moɽaʔta mono dakedo.|480
|
| 33 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_01.wav|namida. kanaɕikɯte, tsɯɽakɯte, kɯrɯɕiː mono. koɽe kaɽa no anata ni wa, çitsɯjoɯ nai mono.|480
|
| 34 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_2_09.wav|tobɯ no mo keʔkʲokɯ dʑibɯɴ no tɕikaɽa daɕi, de mo tsɯkaɽetɕaɯɕi. hakonde moɽaɯ no ga itɕibaɴ!|480
|
| 35 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_1_08.wav|sɯbete wa bʲoɯdoɯ, koɯhei ni fɯɽisosogɯ, ai mo, inoɽi mo, hontoɯ wa, anata ni daʔte.|480
|
| 36 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101360/shiki_voice_101360_4_02.wav|kotoba ni sɯrɯto nante dʑiːpɯ naɴ daɽoɯ ne. konna mono ni, ataɕitatɕi wa fɯɽimawasaɽete, çiʔɕi ni naʔterɯ, me ni mienai sei de, te ni ɕite mo dʑiʔka ga naikaɽa, jokei ni ne.|480
|
| 37 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_08.wav|kaisɯiʔte, konna ni ɕio kaɽakaʔta?, naitenai jo, kantɕigai, kiɴɕi.|480
|
| 38 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_07.wav|jorɯ no sampo, okoɽanaide ne. iʔɕo naɽa iː deɕo? ɕimpai naɽa, mihaʔtete.|480
|
| 39 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_05.wav|kikaɽetenakɯte jokaʔta. moɕi ka ɕitaɽa, toʔkɯ ni ɕiɽaɽeteta ka mo dakedo.|480
|
| 40 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_03.wav|moɕi nigete mo, doko made mo oʔte kɯrɯ ki deɕoɯ? jada jada, oni goʔko wa tanoɕiːkedo, zɯʔto wa tsɯkaɽetɕoɯ. dakaɽa, iʔɕo niːrɯ jo. toːi owaɽi no çi made.|480
|
| 41 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_02.wav|anna ni fɯkakɯ, soko ga naiʔte kandʑiteta no ni, sonna koto, nakaʔtaɴ da.|480
|
| 42 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_06.wav|kiɽei na ɕigikata nante nai, ka, soɽe de mo, jɯme kɯɽai mite itakaʔta jo.|480
|
| 43 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_10.wav|nɯɽeterɯ jo, iː no? sonna koto mo ki ni naɽanai kɯɽai, hoŋki de.|480
|
| 44 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_07.wav|otoɕi joɽiːki kiɽeterɯ, oːwatesɯgi, zeɴrʲoɯ da jo neː, soɯ iɯ tokoɽo.|480
|
| 45 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_05.wav|kimi wa itsɯ mo itsɯ mo, ataɕi o nigaɕite kɯɽenai jo ne. doko iʔte mo sa.|480
|
| 46 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_02.wav|daɽe mo kaɽe mo ga ɽisoɯ o oɕitsɯketerɯ. kimi mo soɯ, tɕitose tɕaɴ daʔte. soɽe de mo kimitatɕi wa, ɽisoɯ dʑa nai wataɕi de mo, tsɯkiaoɯ to ɕiterɯ.|480
|
| 47 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_02.wav|kagajaki no katɕi o, ɕimeɕite misete.|480
|
| 48 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_08.wav|taorɯ kaisanakʲa, wataɕi no koto o wasɯɽeɽaɽenakɯ narɯ koɯsɯi, pɯɕɯ!!|480
|
| 49 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_04.wav|ɯmi ni ɕizɯmɯki wa, fɯɯɴ. saː, doɯ daʔta ka naː, oki ni nagasaɽetɕaʔta mitai.|480
|
| 50 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_12.wav|tsɯmetai, kaɽa da no ɕiɴ ga hjoɯteŋka mitai, dakaɽa, tamete, donna hoɯhoɯ de mo.|480
|
| 51 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_07.wav|çikaɽi ni tokerɯ no, kikata nonɯkɯmoɽi to.|480
|
| 52 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_02.wav|kono ato wa motɕiɽoɴ omimai da joɴ! jowajowa kʲɯɯketsɯkisaɴ no tokoɽo ne.|480
|
| 53 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_3_01.wav|dʑɯndo no takai mono wa, soɽe dake moɽoiɴ da jo. joɽokobi mo, kanaɕimi mo, zembɯ zembɯ, kowaɽetɕaɯ. dakaɽa çito wa, mazaʔte ɯme aɯ no. kowaɽete ɯmaɽeta, sɯkima o.|480
|
| 54 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_5_01.wav|naɴ de, konna tokoɽo ni, wazawaza kita no? iː ko wa nerɯ dʑikaɴ de, ɕikitɕantatɕi wa, mite no toːɽi warɯi ko de, fɯfɯ, fɯɕigi, hoɴɕiɴ ɕitɕoʔta. kimi wa, okoʔterɯ no ni.|480
|
| 55 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_06.wav|mimamoʔterɯkaɽa. fɯtatabi, deaɯ made.|480
|
| 56 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_03.wav|gps o tsɯketeɽeba mendoɯ na ɽenɽakɯ wa iɽanai ka mo neːnʲɯɯ, kaɴɕi ɕakai.|480
|
| 57 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_12.wav|itsɯ mo doːɽi de gaʔkaɽi, ɕitenai? fɯɯɴ, itsɯ mo no ataɕi de iːɴ da.|480
|
| 58 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_01.wav|kono dʑiki no ɯmiʔte, zenzeɴ samɯkɯ naiɴ da neː. mɯɕiɽo nama noɽokɯte, betobetoː.|480
|
| 59 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_4_01.wav|fɯtsɯɯ dʑa nai ataɕi wa, iɽanakaʔta. tokɯbetsɯ na kagakɯɕa dʑa nai ataɕi wa, papa no ɕikai kaɽa hazɯɽeta. kawaiː dake dʑa nai ataɕi wa, aidorɯ dʑa iɽaɽenai.|480
|
| 60 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_01.wav|mabɯɕiː jo. sonna çikaɽi ni ateɽaɽetaɽa, toketenakɯ nactɕaɯ.|480
|
| 61 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_04.wav|teɴɕi no komoɽi ɯta o.|480
|
| 62 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_11.wav|aː, kɯʔtsɯiterɯto aʔtakaː. koɽe wa kimi no, jasaɕisa no ɯndo ka na?|480
|
| 63 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_06.wav|kaketsɯketa no? tɕitose tɕaɴ mo, kimi mo. doɯ ɕite soɯ, oseʔkai.|480
|
| 64 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_03.wav|modoʔte kitɕaʔta, çikaɽi no sekai, konna ni, aʔtakakaʔtaɴ da ne.|480
|
| 65 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_09.wav|ɯwa! mabɯɕiː! moʔto haʔkoɯrʲoɯ o osaeteː! dʑiɴrɯi ni wa mada hajai!|480
|
| 66 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_09.wav|manɯke na kao, ɕiterɯ, wataɕi mo? fɯfɯʔ, okaɕiː, aidorɯ na no ni ne.|480
|
| 67 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_2_10.wav|ameɽika dʑikomi no sɯieihoɯ de, tɕitose tɕaɴ no çitoɽi ja fɯtaɽi, ɕizɯmɯ—!|480
|
| 68 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_01.wav|jɯme no saki no zasetsɯ nante, kɯtsɯgaesoɯ ka.|480
|
| 69 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_05.wav|tokɯbetsɯ ni narɯ no.|480
|
| 70 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_6_03.wav|owaɽi o koete.|480
|
| 71 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_04.wav|heɴ na no? ɯmi wa, wataɕi o sɯikonde kɯɽerɯ hazɯ daʔta no ni, kʲoçi ɕite.|480
|
| 72 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/dialogue_shiki_fine/shiki_card_101359/shiki_voice_101359_1_11.wav|ɕikaɽaɽerɯ no wa kiɽai. de mo, kʲoɯ wa, ɯɯɴ, naɴ de mo nai jo.|480
|
| 73 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine01.wav|haha, çiʔɕi da ne. ataɕi naŋka no tame ni. sonna toko hadʑimete mita. sonna fɯɯ ni, koe o aɽagerɯ toko. itsɯ mo itsɯ mo, kimi wa daɽe ka no tame ni. honto, heɴ na no.|480
|
| 74 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine02.wav|aʔ, kizɯtsɯita ɕikitɕaɴ o okoʔte kita çito da! ɽi fɯdʑiɴ! taoɽeta tɕitosetɕaɴ o tasɯketa no ni!, ma, ɯmi ni sasoʔta no wa—taɕi dakedo.|480
|
| 75 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine03.wav|ɯɯɴ, naɴ daɽoɯ, jokɯ wakannai. kietakaʔta no ka na. kʲɯɯ ni, doɯ de mo jokɯ nactɕaʔta. kimi to tɕitosetɕaɴ ga, naɴ ka, toːkɯte.|480
|
| 76 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine04.wav|"mɯmɯmɯ, kimi made soɯ jɯɯ koto iɯ. "" haihai, samiɕikaʔtandesɯɯ. ɯ eːɴ, kanaɕikaʔta joː kamaʔte kɯɽenakɯte itɕi ."" koɽe de iː?"|480
|
| 77 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|aː, iː jo. hanɽoɴ wa motometenai. ataɕi ga hoɕiː no wa, kampeki na haɴɕoɯ. kimi no kasetsɯ o ɕoɯmei ɕite misete, owaɽi no çi made ni.|480
|
| 78 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_fine05.wav|kiboɯ wa—rɯ. akaɽi wa, kiʔto kie taɽi ɕinai. anata ga wataɕi ni mitɕi o ɕimeɕite kɯɽetakaɽa, kiʔto majowanaide mezaserɯ. owaɽi no saki no, çikaɽi o.|480
|
stylekan/Data/train_48_200k.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06973b9e5e41925cfd98c756ff119c08f009df6237ba7c5eb856f3639d171958
|
| 3 |
+
size 41791357
|
stylekan/Data/train_List.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37f7dcbdecaa5994b553594ab3bc5ed09086f50cd3c6d33fe06465299984ba46
|
| 3 |
+
size 92439327
|
stylekan/Data/train_List_updated.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02541c84b9b6633a382af239974bbd8a0e796d66055ff3d2808f42da6d1fd410
|
| 3 |
+
size 92883947
|
stylekan/Data/val_48_200k.csv
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_2472.wav|çinata no iɯ toːɽi, okɯnai de matasete ite seikai deɕita.|77
|
| 2 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_1146.wav|maː maʔtakɯ, fɯjɯɯ no jozaɽa o mite sɯkoɕi dake çitaʔte ita no ni dainaɕi da. ɕikatanai, tsɯki o minagaɽa dakiaoɯ.|94
|
| 3 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_0389.wav|baka na koto o iʔtenaide, otonaɕikɯ ɕite ite kɯdasai.|77
|
| 4 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/chinshiko/uma/uma_08_umabgv_017.wav|ᵻᵻᵻ.|39
|
| 5 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_mobamas/momoka_mobamasu_0010/momoka_mobamasu_0010_chunk95.wav|koɯka na no mo iːdeɕo?|11
|
| 6 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Event/NatsuKoi/NatsuKoi_chunk76.wav|ɯtɕi mo saːɴ.|43
|
| 7 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0002_cnk84.wav|ɯɴ, oʔkeː. sɯgokɯ iː kikakɯ da to omoɯ.|5
|
| 8 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3239.wav|ɽokɯdʑɯɯ mei dato, itɕi nitɕi ni tsɯki, itɕi tane no sake ga dʑɯɯ ni ɕoɯ.|72
|
| 9 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/06/06002950.wav|neː, taiga.|55
|
| 10 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1160.wav|kite ɕimaʔtaɽa,nɯgɯ no ni mo mata teikoɯ ga aʔta no de, sono mama,fɯɯɴ, soɯ naɴ da,saizɯ wa piʔtaɽi da ne, to ka naɴ to ka, fɯtsɯɯ ni teɽe kakɯɕi ni mo naɽanai koto o iʔtsɯtsɯ, ɕɯɯɕiɴ mae no hamigaki o hadʑimeta no daʔta.|20
|
| 11 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[クーリッシュピクシー]塩見周子【ボイス集】 - Niconico Video_2/【モバマス】[クーリッシュピクシー]塩見周子【ボイス集】 - Niconico Video_2_chunk33.wav|pɯɽodʲɯɯsaːsaɴ to naɽa, moʔto moʔto ɯe niːki soɯ da jo ne.|43
|
| 12 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1376.wav|oːnaː wa kadʑino no eŋkɯɽoːdʑaː de saːdo ni ɽogɯ iːɴ ɕimaɕita. sono ato, saːdo kaɽa sekando ni ɽogɯ iːɴ ɕimaɕita.|69
|
| 13 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk2060.wav|aitsɯ ga...|4
|
| 14 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1311.wav|ɽaːdʑa. esɯerɯdʲi kidoɯ negatibɯ sɯmaːto foɴ o ɕotei no itɕi ni daɕite kɯdasai.|69
|
| 15 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0418.wav|a, maː, minna ga gomaʔtete ojakɯ ni taterɯnaɽa kono kɯɽai...te iɯ ka, saizɯ tɕiːsakɯ nai?|71
|
| 16 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk1353.wav|kʲoto beɴ seidʑi ka?|17
|
| 17 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1007.wav|madoɽomi kaɽa sameta toki, daisɯki na çito no kao ga soba ni aʔte, tamaɽanakɯ mɯne ga kʲɯɴ to ɕita.|91
|
| 18 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ee093a4f/wav/ee093a4f_1705.wav|ɯɯwa, imoɯto ga koiɕikɯte ɕikata ga nakaʔta hanaɕi to ka, koʔsoɽiːmoɯto ni nijaɯ fɯkɯ o tsɯkɯʔte ɕimaʔta to ka, soɯ iɯ hanaɕi ga kikitakaʔta no ni.|79
|
| 19 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_03/Chiwa_Saito_03_chunk82.wav|kiokɯrʲokɯ ni wa dʑiɕiɴ ga arɯ no jo. kamakɯɽa bakɯfɯ ga seidʑitsɯ ɕita toɕi no koto daʔte oboete irɯ wa.|3
|
| 20 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_1050.wav|tokɯni baɕo ni kakawarɯ kiokɯ wa, kʲoɯjɯɯ sɯrɯ koto oːideɕo. gaʔkoɯ no koɯzoɯ to ka, dʑitakɯ made no mitɕi to ka sa. kʲoɯjɯɯ ɕitenake taiheɴ na koto bakaɽi dʑa nai.|98
|
| 21 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0212.wav|naɽa nani mo ɕimpai nasa soɯ ne.|86
|
| 22 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_B_0278.wav|jaʔpaɽi kʲoɯrʲokɯ na no o eɽabitai wa ne.|10
|
| 23 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_0164.wav|tsɯ, ɯmakɯ tateɴ na. kageɴ to iɯ mono o ɕiɽaɴ no dakaɽa, ɕigoto ni ɕiɕoɯ ga derɯde wa nai ka.|102
|
| 24 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_B_0375.wav|keŋka ja oːkɯmaɴ, soɽe ni kawasɯ.|10
|
| 25 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_0890.wav|ehehe, iʔpai de, naɴ ka aʔtakai jo.|96
|
| 26 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_0523.wav|soɯ, baka ni ɕiterɯʔte omoʔtadeɕoɯ.|95
|
| 27 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_1170.wav|mazɯ wa sempai kaɽa doɯzo.|31
|
| 28 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_02/Sakamoto_Maya_02_chunk989.wav|taimeɴ no seki ni koɕikakerɯ.|17
|
| 29 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/5d68aedf/wav/5d68aedf_1703.wav|de mo wataɕi, hontoɯ wa kaigai nante ikitakɯ nakaʔtaɴ desɯ.|88
|
| 30 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6d565f54/wav/6d565f54_0704.wav|honto da jo, sonna koto ni naʔtaɽa, ano ko iʔɕoɯ neɴ ni motsɯ jo.|87
|
| 31 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/5d68aedf/wav/5d68aedf_0872.wav|wataɕi to nita joɯ na kʲoɯgɯɯ no ko wa, mawaɽi niːʔpaiːmaɕitaɕi, wataɕi joɽi çidoi tatɕiba niːrɯ ko daʔte imaɕita.|88
|
| 32 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_2271.wav|dekirɯnaɽa, kisɯ no çitotsɯ mo ɕite kɯɽerɯto, ɯ oːkʲaː! ijaː!ʔte kandʑite, oːdasɯkaɽi naɴ dakedo.|96
|
| 33 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/006/S006_F_0157.wav|hoi, saʔki kɯrɯma ni nosetadʑa nai desɯ ka. kiɽi no bako ni haiʔtemasɯ.|38
|
| 34 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/a0fd12d7/wav/a0fd12d7_1743.wav|odʑoɯsama ni wa, zeçi, gonaimitsɯ ni onegai ɕitai no desɯga...joɽoɕiːdeɕoɯ ka?|100
|
| 35 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/02/02101640.wav|ima, taigakɯɴ, wataɕi to ki sɯ ɕitetaɴ da jo.|6
|
| 36 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0928.wav|minna! ɕitsɯmoɴ sɯme asoko made! harɯ nakɯɴ ga komaʔterɯdʑa nai!|86
|
| 37 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_C_0012.wav|kondo dasoɯ to omoʔterɯ ɕiɴ menʲɯɯ no abokado toːsɯto jokaʔtaɽa kansoɯ o kikasete.|10
|
| 38 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_03/Horie_Yui_03_chunk1141.wav|soɕite, wataɕi ga ɽikai sɯrɯ no wa okaɕiː to iɯ kimotɕi mo tsɯjokɯ arɯ.|0
|
| 39 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/mzr/mzr_13_001_010.wav|ɯfɯ, soɯ iɯ koto jo. dakaɽa, hajakɯ kigaete ki nasai?|15
|
| 40 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/a0fd12d7/wav/a0fd12d7_1597.wav|iʔteɽaʔɕaimasɯ. aɕita o tanoɕimi ni ɕite oɽimasɯ.|100
|
| 41 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/arisu/arisu_mobamasu/12_arisu__0013_(Vocals)/12_arisu__0013_(Vocals)_chunk32.wav|jasaɕiː sojokaze ni miojɯ da ne.|18
|
| 42 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk1083.wav|kaɽe o aː ɕite ɕimaʔta no wa wataɕi de arɯ to iɯ setsɯ wa, igai to sekeɴ de nezɯjoi no daʔta.|20
|
| 43 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/06/06002780.wav|otsɯɽi no keisaɴ to ka, mɯzɯkaɕiː jo. rʲoɯte no jɯbi no kazɯ joɽi, takɯsaɴ naɴ da moɯ.|55
|
| 44 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[ビーチスタイル]塩見周子【ボイス集】 - Niconico Video_2/【モバマス】[ビーチスタイル]塩見周子【ボイス集】 - Niconico Video_2_chunk36.wav|samɯi toki ni wa...|43
|
| 45 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_0507.wav|maː iːja. de sa, ɕigoto doɯ? tanoɕiː?|84
|
| 46 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04002110.wav|haː...|37
|
| 47 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki__01/Shinichiro_Miki__01_chunk500.wav|kaɴɕa ɕite imasɯ.|7
|
| 48 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_cgss/chieri_card_100612/chieri_voice_100612_1_06.wav|sɯteːdʑi ga owaʔtaɽa...ai niːkimasɯ! pɯɽodʲɯɯsaːsaɴ.|25
|
| 49 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/hsm/hsm_12_003h_040.wav|ᵻᵻᵻ wataɕi mo ɕiawase da jo! goɕɯdʑinsama no tɕintɕiɴ o kandʑinagaɽa iketeᵻᵻᵻ.|60
|
| 50 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_1329.wav|mada tɕoʔto dʑikaɴ ga arɯkaɽa kimete okitaiɴ dakedo, haijakɯ doɯ ɕijoʔka.|95
|
| 51 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk868.wav|wataɕi wa sɯkoɕi majoʔtakeɽedo, ɕikaɕi kakɯɕite okɯ no mo jaʔpaɽi okaɕiː ki ga ɕita no de, gaʔkoɯ de no dekigoto o hanaɕite okɯ koto ni ɕita.|20
|
| 52 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_2732.wav|saiwai sampɯrɯ wa, mawaɽi niːkɯɽa de mo irɯɴ daɕi.|95
|
| 53 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1a5a3db8/wav/1a5a3db8_1432.wav|sasɯga ni maniawanaiɴ dʑa nai kaɕiɽa.|75
|
| 54 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/usamin/usamin_cgss/usamin_card_100126/usamin_voice_100126_2_03.wav|kono kagɯ, ɯsa mise de zaiko naiʔte iwaɽeteta jatsɯ!|49
|
| 55 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/df6c208e/wav/df6c208e_0983.wav|ima made no sɯzɯɽitɕaɴ daʔtaɽa, doko ka toːmakɯɴ ni eɴrʲo ɕiteta no ni...iʔtai doɯ ɕitaɴ desɯ ka?|78
|
| 56 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_02/Kamiya_Hiroshi_02_chunk1592.wav|jokɯ kikɯɕi ne.|13
|
| 57 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/momoka/momoka_cgss/momoka_card_100191/momoka_voice_100191_1_08.wav|boɯhaɴ bɯzaː o çiʔpaʔtaɽa...doɯ naʔte mo ɕiɽimaseɴ wa jo.|11
|
| 58 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_X_0035.wav|tonikakɯ tekitoɯ ni sawaʔte mi nasai! modoɕitakɯ naʔtaɽa, deforɯtoʔte toko o oseba, saiɕo no dʑoɯtai ni modorɯkaɽa.|26
|
| 59 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0014/chie_mobamasu_0014_chunk145.wav|iʔɕo da to...|25
|
| 60 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/chinshiko/uma/uma_01_comn05_049.wav|moɯ tɕoʔto tɕikaɽa kageɴ wa tsɯjoi hoɯ ga okonomi desɯ ka?|39
|
| 61 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk1091.wav|akeʔpiɽoge na jatsɯ da na. kaɽeɕi kanodʑo na no dakaɽa, toɽitatete jamaɕiː tokoɽo ga arɯ wake de mo nai no daga, deɽikaɕiː ni kakerɯʔte ki wa ɕinakɯ mo nai.|3
|
| 62 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/anzu/anzu_cgss/anzu_card_101059/anzu_voice_101059_1_08.wav|joso no kaigiɕitsɯ de taɽake sɯgi—? kʲɯɯ ni tsɯɽete kitaɴ dakaɽa, hoaʔkoɯ wa iko.|12
|
| 63 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_1646.wav|koko wa hontoɯ ni geːmɯ no naka na no ka, desɯ jo ne.|89
|
| 64 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1063.wav|berɯbeʔto no tame o omoʔte sa, kami o kiɽeʔte.|74
|
| 65 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_card_300835/nagi_voice_300835_5_01.wav|fɯɽiː maːkeʔto e joɯ koso. nagi ga adobaizaː to naʔte, oneɯtɕiçiɴ kaɽa ɽea na iʔpiɴ made, kazɯkazɯ no aitemɯ o goannai ɕimasɯ. doɯzo eɴrʲo wa nagesɯtete, ana ga akɯ hodo mite iʔte kɯdasai.|46
|
| 66 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/917feebd/wav/917feebd_1103.wav|omae, koitsɯ no nakama dʑa nai no ka?|80
|
| 67 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_1009.wav|soɽe de oniːtɕaɴ wa? donna kikakɯ o jɯɯdʑite kita no ka naː—? kikasɯte kikasɯte!|85
|
| 68 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_1684.wav|honto ka doɯ ka wa ɕiɽanaikedo, daɽe ka ga sonna koto iʔteta joɯ na ki ga sɯrɯ wa.|83
|
| 69 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mifune/mifune_cgss/mifune_card_201312/mifune_voice_201312_2_12.wav|tsɯnorɯ omoi. mɯne ga, osaetsɯkeɽaɽerɯ joɯ de...koɽe ga...omoi to iɯ koto?|62
|
| 70 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk2217.wav|waɽikomenai.|20
|
| 71 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_0441.wav|heː, tsɯmiki de tsɯkɯʔta waɽi ni wa, tawaː ni mienakɯ mo...te, omotɕa de asobɯ na!|92
|
| 72 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_2083.wav|maʔtɕi goʔtorɯ ka doɯ ka o kimerɯɴ wa, jɯɯkɯɴ ja nai?|91
|
| 73 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_01/Sawashiro_Miyuki_01_chunk1080.wav|nɯʔte orɯ.|20
|
| 74 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/24/24000470.wav|ano ko wa kʲoɯ mo, asoko de, daɽe ka ga jaʔte kɯrɯ no o, maʔte irɯ no kaɕiɽa?|67
|
| 75 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_E_0017.wav|soɯ neː...|10
|
| 76 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/hsm/hsm_04_002h_002.wav|tɕi, a, ano...tɕi, sato...tɕaː...na, ɴ nani o ɕite...|60
|
| 77 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_03/Chiwa_Saito_03_chunk723.wav|bokɯ to sendʑoɯgahaɽa ga tsɯkiaɯ koto ni naʔta çi.|3
|
| 78 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_1622.wav|kono jami bɯtoɯ kai de fɯtaɽi ga deaɯ no mo, ɽomedʑɯɽi de odorɯ kata no bɯtoɯ kai de deaɯ no ni kaketerɯɴ da to omoɯ wa.|95
|
| 79 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_2145.wav|ohahasaɴ wa sekaiːtɕi no ohahasaɴ da jo. wataɕi wa ɕiawase da jo. hazɯkaɕisa mo wasɯɽete, mitɕi no mannaka de wataɕi wa naita. haha wa, zɯʔto ɕita o mɯita mama daʔta.|85
|
| 80 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/b8b5fe66/wav/b8b5fe66_0069.wav|tonikakɯ, aɽigatoɯ.|90
|
| 81 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/004/S004_B_0327.wav|wataɕi nante ɕoɯgaʔkoɯ no koɽo abaɽetetaʔte dake de, madʑime ni naʔta tɕɯɯgakɯ ikoɯ mo netɕinitɕiːwaɽeta wa.|32
|
| 82 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/merged_vocals_chunk151.wav|iː desɯ ka? kʲoɯ wa moɯ oɕigoto ɕitɕa dame desɯ.|21
|
| 83 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/002/S002_A_0717.wav|neɽai wa nani?|26
|
| 84 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_2331.wav|ɕiʔkaɕi, koɽe de ɕoɯmei saɽeta ne.|98
|
| 85 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/07/07004510.wav|okaeɽɽi, fɯtaɽi to mo. kʲoɯ wa tanoɕiː itɕi nitɕi daʔta ka na?|34
|
| 86 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bc778ddb/wav/bc778ddb_1859.wav|çito ga ɯmaɽete ɕinɯ kɯɽai no dʑikaɴ. soɽe ga naɴ do mo kɯɽikaesɯ kɯɽai nagai dʑikaɴ. wataɕi wa çitoɽi de naite ita.|96
|
| 87 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Event/wish_you_happiness/wish_you_happiness_chunk20.wav|ɯɯɴ, iː nioi!|43
|
| 88 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1035.wav|hoɽa, iːdʑa nai, dʑoɕi kai!|93
|
| 89 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4ce0075b/wav/4ce0075b_0169.wav|te kageɴ to ka, honto ɕinakɯte iː. tonikakɯ, itɕi do, kao o mizɯ ni tsɯkete minaito, dʑibɯɴ de mo, daidʑoɯbɯ na no ka, mɯɽi na no ka ga, wakannai. de mo, doɯ ɕite mo, kaɽada ga ɯgokanaikaɽa...|71
|
| 90 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_0609.wav|kanaɴ tokoɽo ni, zaɕiki waɽaɕi ga irɯʔte. mae ni hanaɕitetadaɽo?|74
|
| 91 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk741.wav|so no tame niːnotɕi o sasageta.|17
|
| 92 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mio/mio_honda_cgss/mio_honda_card_301058/mio_honda_voice_301058_2_01.wav|mv koɯkai çi ni geɽiɽa koɯkokɯ tɕaʔkɯ! aka to kɯɽo de somarɯ matɕi, soɯkaɴ da neː—!|41
|
| 93 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_02/Chiwa_Saito_02_chunk151.wav|kai ga, keɕite çinitɕidʑoɯ de nakɯnaʔte ɕimaʔte irɯ, sonna dʑibɯɴ ni.|3
|
| 94 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk442.wav|hane kawa tsɯbasa.|3
|
| 95 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1216.wav|ajako no keɴ mo, itɕioɯ wa katazɯita. kɯɽaianto ni hoɯkokɯ mo ɕita. jarɯbeki koto wa sɯbete oeta.|74
|
| 96 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Card_Commyuu/Main_Comyu/Main_Comyu_chunk1.wav|soɕite, soɽe o maibaɴ ki ni ɕiterɯ saihaɴ mo, oçito joɕi ja neː.|43
|
| 97 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/cst/cst_04_003h_020.wav|ᵻᵻᵻ sɯgo i!! dʑitɕi ga okɯ de awaɽeterɯ—!|50
|
| 98 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/03/03012970.wav|…atoma ga, kokoɽo ga, paŋkɯ ɕi soɯ desɯ, sempai .|58
|
| 99 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/2cf01874/wav/2cf01874_3328.wav|jɯe ni jahaɽi, pɯɽasɯ arɯfa wa çitsɯjoɯ na no da. soɽe mo, dekirɯ dake kʲoɯrʲokɯ na...aʔta, seigeɴ jondʑɯɯ, joɕi!|72
|
| 100 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_cgss/ranko_card_200796/ranko_voice_200796_5_01.wav|soɽa o mitasɯ amata no kagajaki jo. ima koso, waɽeɽa no tɕikaɽa to naɽe! wataɕi no tsɯmɯgɯ senɽitsɯ to, minna no kanaɽerɯ oŋgakɯ wa, nanimono ni mo ɕibaɽanenɯ tɕijɯɯ o egakɯ wa! saː, haŋgʲakɯɕatatɕi jo! çimeta kokoɽo o tokihanate!|14
|
| 101 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/01/01011690.wav|otokonoko ga jorɯ no soto de me o samaɕita toki, moɯ maɕiɽo wa doko ni mo inakaʔta.|29
|
| 102 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4035.wav|sonna no ɽaʔkanteki sɯgirɯ wa jo. fɯtsɯɯ ni kaŋgaete, tatɕibateki ni mo ɕindʑoɯteki ni mo, kʲoɯrʲokɯ ɕite kɯɽerɯ wake nai wa.|83
|
| 103 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0214.wav|akɯjoɯ ɕita honniɴ to sono çigai no toɯdʑiɕa...maː, koko de wa ɕiwake iːɴ kai ni narɯ ka na.|93
|
| 104 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0779.wav|harɯnakɯɴ ga wataɕi to onadʑi gaʔkoɯ ni teʔnʲɯɯ ɕita no mo, otɕitɕisaɴ no saɕigane na no?|86
|
| 105 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_0648.wav|ɯso de mo iːkaɽa, oiɕiː fɯɽi o ɕi nasai. soɽe de marɯkɯ osamaɽe wa.|95
|
| 106 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1614.wav|tɕoʔto matɕi doɯzo! maʔte, maʔte kɯdasai! ima no wa tɕoʔto ɕita dʑoːkɯ desɯ jo! dʑoːkɯ desɯʔteba!|92
|
| 107 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0729.wav|po dʑitibɯapeɽia ni gaitoɯ sɯrɯ toɽabɯrɯ ɕɯɯtiŋgɯ wa zembɯ de nidʑɯɯ nana koɯmokɯ koɽeɽa sɯbete o dʑiʔkoɯ ɕimasɯ ka?|69
|
| 108 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk1616.wav|atama wa bojaːʔto ɕite irɯɕi.|4
|
| 109 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_2318.wav|asaçi wa koɽe kaɽa mo, gɯrɯɯpɯ no itɕiːɴ to ɕite ɯgoite moɽaɯ. wataɕi no hoɯ mo tetsɯdaɯɕi, mizɯho mo tetsɯdaɯ.|94
|
| 110 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk2049.wav|bokɯ wa, kani ga.|13
|
| 111 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1161.wav|merɯbeʔto, koɽe aɽaʔte kite okɯɽe. ato tekitoɯ ni tabemono mo.|74
|
| 112 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/01/01013490.wav|wataɕi no koto o.|29
|
| 113 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/011/S011_A_3686.wav|zɯʔto gatagata ɯrɯsei no ni koɽe kaɽa hombaɴ ka ɯʔtoɯɕiː sɯ ne.|31
|
| 114 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/012/S012_A_0272.wav|geʔkoɯ da.|63
|
| 115 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk1288.wav|eja.|4
|
| 116 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2816.wav|zɯibɯɴ to omoikomi no hageɕiː taipɯ to mierɯ.|4
|
| 117 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/46d6bf83/wav/46d6bf83_1231.wav|aː, mo wakaʔta! tɕikazɯite kawasɯ koto dake sɯɽeba iːɴ da ne!|101
|
| 118 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/013/S013_E_0031.wav|soɯ...|10
|
| 119 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_1446.wav|koɽe wa...soɯ, kimotɕi jokɯ naʔte kɯɽeteɽɯɕaɯ ka jo ne. koɯ iɯ kandʑi de sɯrɯto, mikage wa kimotɕiː— no ne, fɯfɯ!!|73
|
| 120 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/mobamasu/karen_mobamasu_0006_cnk61.wav|nani mo iwanakɯte mo, kokotɕi iːkaɽa.|5
|
| 121 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk448.wav|a, eʔto...|3
|
| 122 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/67eeef73/wav/67eeef73_0121.wav|e. tia. wa matɕigaʔte nai. sekaitɕɯɯ no daɽe joɽi, oɽe ga hoɕoɯ sɯrɯ jo.|81
|
| 123 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Events_and_Card/Event/syuuko_lipps_first_event/syuuko_lipps_first_event_chunk6.wav|heː, naɴ kaigai ka mo. kanadetɕaɴʔte, soɯ iɯ koto iɯ çito dʑa nai to omoʔteta.|43
|
| 124 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_1498.wav|ɕomboɽi desɯ ne.|68
|
| 125 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/shinichiro_miki/Shinichiro_Miki_03/Shinichiro_Miki_03_chunk1932.wav|tsɯɯgɯnai?|7
|
| 126 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/8b6e7173/wav/8b6e7173_0710.wav|o, okimotɕi dake tɕoɯdai ɕimasɯ?|73
|
| 127 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_F_0451.wav|ᵻᵻᵻ.|23
|
| 128 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2269.wav|omae wa imada ni keitai deɴwa no adoɽesɯtɕoɯ kinoɯ o tsɯkaenai no ka.|4
|
| 129 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1903.wav|haː, omaeʔte eɽoi waɽi ni hetaɽe da jo na.|76
|
| 130 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_1956.wav|fɯfɯɴ, soɯ deɕo?|95
|
| 131 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk1814.wav|seŋgokɯ, ano kɯɽoːzeʔto, akenaide!|4
|
| 132 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/7b3d6f79/wav/7b3d6f79_1017.wav|eɽai eɽai! doɯzo sɯwaʔte, sɯgɯ oçirɯ tsɯkɯrɯkaɽa.|99
|
| 133 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/5d68aedf/wav/5d68aedf_0764.wav|kɯonsaɴ wa koe ga oːkiː no de, ɯrɯsai kɯɽai da to omoimasɯkedo?|88
|
| 134 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_0802.wav|betsɯ ni, okoʔtenai wa. ɕiʔto mo ɕitenai wa. ɯɽaimoɕikɯ naŋka nai wa.|83
|
| 135 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_2240.wav|naniː—? ɯɽeɕiɴ da!|95
|
| 136 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/95c3bdd8/wav/95c3bdd8_2032.wav|osamɯkɯɴ wa osamɯkɯɴ da jo.|84
|
| 137 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_1379.wav|ahaha! ɯtɕi no çitotatɕi minna kɯiɕimboɯ dakaɽa, fɯtsɯɯ ni joɽokobi soɯ dakedo ne.|82
|
| 138 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/anzu/anzu_cgss/anzu_card_100932/anzu_voice_100932_6_05.wav|koɽe ga anzɯ no gohoɯɕi da!|12
|
| 139 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/09/09006550.wav|sajoɯnaɽa.|65
|
| 140 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bb6ac6f1/wav/bb6ac6f1_1718.wav|de mo, oniː ga, koko made hairʲo ɕite ozeɴ date ɕite kɯɽeta no ni, jabo na tsɯʔkomi wa iɽetakɯ naiɕi. daitai, wataɕi dʑiɕiɴ wa, toʔkɯ ni gamaɴ dekinai no de...|85
|
| 141 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/puriUBfin-chunk83_DeepFilterNet3.wav|soɯ kaŋgaete ɕimaɯ ɕɯŋkaɴ ga...|21
|
| 142 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/a0fd12d7/wav/a0fd12d7_0346.wav|beirɯhaɯdaː, ɕisɯtemɯ ɽiʔpaː, keŋgeɴ dʑɯmbi.|100
|
| 143 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/05/05001580.wav|ɯwasa da jo, hontoɯ no koto dʑa nai.|36
|
| 144 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_03/Sakurai_Takahiro_03_chunk200.wav|to.|4
|
| 145 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_02/Sakamoto_Maya_02_chunk1721.wav|kɯtɕi ni sɯrɯ to waɽactɕaɯ joɯ na kanoɯsei de wa arɯga, ano fɯtaɽi wa, naɴ ka soɯ iɯ kʲaɽa da.|17
|
| 146 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cbe5080e/wav/cbe5080e_1568.wav|hai hai, wakaʔta joɯ. kono mama ɕinaɽetɕimaʔtaɽa mezame ga warɯiɕi ne.|74
|
| 147 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_0055.wav|rɯʔkɯmaɴ wa, gendʑitsɯ no kɯɯgaɴ ga ɽogɯ iɴ ɕite irɯ wake de wa aɽimaseɴ.|69
|
| 148 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/frederica/fredrica_cgss/fredrica_card_101005/fredrica_voice_101005_2_05.wav|fɯtaɽi de kɯrɯɯdʑiŋgɯ sɯrɯ ni wa, iɽoiɽo çitsɯjoɯ da jo neː! gambaʔte!|44
|
| 149 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1136.wav|iː hoɯhoɯ ga arɯ wa! iʔso kokɯsaɴ ni kaimei ɕi ɕoɯto ka!|92
|
| 150 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/bahakora/bahamoot_44.1khz/bahamoot_44.1khz_chunk31.wav|fesaːforɯkɯ naɽa, takai marʲokɯ o moʔte irɯ hazɯ...desɯʔte?|19
|
| 151 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/frederica/frederica_mobamas/frederica_split/3_fred__0003_(Vocals)/3_fred__0003_(Vocals)_chunk3.wav|sɯki na sɯpoːtsɯ?|44
|
| 152 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1a5a3db8/wav/1a5a3db8_0073.wav|mezase! dʑoɯinʲɯɯɕoɯ!|75
|
| 153 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_02/Sawashiro_Miyuki_02_chunk1463.wav|soɯ iɯ, ofɯzaketa metaɕiteɴ wa tomokakɯ to ɕite.|20
|
| 154 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/009/S009_A_0056.wav|soɽe ni koŋkai wa ɽenna mo onadʑi joɯ ni ɯgoitete, ɕiɽaberɯ no wa ɽakɯ deɕita. aitsɯ no ato o oeba iːɴ de.|40
|
| 155 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Karen/karen_cgss/karen_cgss_card_201336/karen_cgss_voice_201336_1_03.wav|fɯtaɽi de wakeaʔte odorɯɴ da. negai mo itami mo, takanarɯ mɯne no oto to, kaɽada no netsɯ mo.|5
|
| 156 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/nagi/nagi_cgss/nagi_chara_309/nagi_voice_309_4_04.wav|dekirɯ joɯ ni naʔta koto ga ɕikakɯteki ni hjoɯgeɴ saɽete imasɯ ne.|46
|
| 157 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/04/04011350.wav|onaka ga sɯkɯto ne, doɯ ɕite da ka atama no naka ga boɴ'jaɽi ɕitɕaʔte, maigo ga tsɯjokɯ naʔtɕaɯ jo.|37
|
| 158 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_200210/minami_voice_200210_2_11.wav|pɯɽodʲɯɯsaːsaɴ no kotoba ga...nani joɽi no kɯɴɕoɯ desɯ!|16
|
| 159 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/kamiya_hiroshi/Kamiya_Hiroshi_01/Kamiya_Hiroshi_01_chunk2014.wav|oto mo ɕinaiɕi, hokoɽi mo mawa nai.|13
|
| 160 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_2616.wav|kʲoɯ kaɽa wataɕi mo iː? iʔɕo ni.|82
|
| 161 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_1447.wav|sempai, saikiɴ ɕitsɯmoɴ ni kɯrɯ koto heɽimaɕita ne.|76
|
| 162 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0953.wav|koibito ni naɽitaiʔte imi dʑa nakɯte ne.|86
|
| 163 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/012/S012_A_0273.wav|iɕa wa hada ni awamɯ.|63
|
| 164 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/cc948b89/wav/cc948b89_1430.wav|ɯ ɽɯ sa i! bambandʑi ni sɯrɯ wa joː!!|92
|
| 165 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/ranko/ranko_cgss/ranko_card_200073/ranko_voice_200073_2_06.wav|baɽa no aʔtɕi mo, pɯɽodʲɯɯsaː to kɯgɯʔta moɴ!|14
|
| 166 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/6489388e/wav/6489388e_1486.wav|idai na sendʑiɴ ni kaɴɕa desɯga, koɯdai no temmoɴ bɯ no tame ni, semete tentai boɯeŋkʲoɯ kɯɽai wa nokoɕite hoɕikaʔta desɯ ne.|68
|
| 167 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/EP6.wav|ɽai neɴ no toɕikoɕi mo...ie, koɽe wa mata kondo. ima wa kono ɕɯŋkaɴ o tanoɕimimaɕoɯ|21
|
| 168 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/00163dc9/wav/00163dc9_0326.wav|koɽe kaɽa wa, oneːtɕaɴ ga hazɯki no ɕaɕiɴ, toʔte agerɯ.|86
|
| 169 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/arisu/arisu_mobamasu/11_arisu__0012_(Vocals)/11_arisu__0012_(Vocals)_chunk113.wav|ikimasɯ jo! adobaisɯ desɯ ka, pɯɽodʲɯɯsaːsaɴ...|18
|
| 170 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_01/Sakurai_Takahiro_01_chunk2518.wav|kʲoɯ ga noʔta no ka, tsɯzɯkezama ni, kondo wadeka bɯrɯɯ no kime zeɽifɯ o çiɽoɯ sɯrɯ kaɽe.|4
|
| 171 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk2339.wav|naʔtokɯ ɕita fɯɯ no hanekawa daʔta.|3
|
| 172 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/917feebd/wav/917feebd_2714.wav|saihate no soɽa...soɽe koso ga, sɯbete ga tsɯi erɯ soɽa...tsɯi no soɽa!|80
|
| 173 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/3c58f1c4/wav/3c58f1c4_0167.wav|soɯ ka mo ɕiɽemaseŋkedo, iʔtai nani o ioɯ to ɕite irɯɴ desɯ ka?|76
|
| 174 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_0520.wav|geiɴ no hoɯ wa wakaɽimaseŋga, ɕiːna wa erɯfʲɴ to ɕite kakɯsei ɕite ɕimaʔta joɯ desɯ.|77
|
| 175 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/saite_jewel_kanade/saite_jewel_kanade_chunk32.wav|sɯki na çito o ɽikai ɕitai no naɽa, sɯki ni naʔte wa ikenai nante. kanaɕiː wa jo ne.|19
|
| 176 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/anzu/anzu_cgss/anzu_card_100652/anzu_voice_100652_6_06.wav|kiɽiːtsɯ! ɽeː! ojasɯmi...|12
|
| 177 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/32-christmas.wav|ivɯtɕaɴ no, daɽe ka no tame niːʔɕoɯ kemmei de atatakai tokoɽo, hajatetɕaɴ no geɴʔte ita toːɽi, soɽe o ikasɯ koto ga dekiɽeba, kɯɽisɯmasɯerwaibɯiːiː mo, kiʔto sɯteki na mono ni narɯ to kandʑimaɕita.|21
|
| 178 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1a5a3db8/wav/1a5a3db8_0535.wav|soko no fɯtaɽi wa saʔki kaɽa naɴ jaʔterɯ wake?|75
|
| 179 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/12/12005830.wav|moɯ çitotsɯ dake, nokoʔterɯ. jarɯbeki koto ga nokoʔterɯ.|9
|
| 180 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0015/chie_mobamasu_0015_chunk241.wav|çitsɯjoɯ naɴ desɯ jo ne. pɯɽodʲɯɯsaːsaɴ mitai ni.|25
|
| 181 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_1707.wav|iʔɕoɯ kemmei, daɽadaɽa ɕite jaɽimasɯ to mo.|98
|
| 182 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/84be23bd/wav/84be23bd_0978.wav|kaɽaʔpo...desɯ ne.|89
|
| 183 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/7b3d6f79/wav/7b3d6f79_0318.wav|fɯfɯfɯ! soɽe taɽa, naitɕoɯ ka mo ɕiɽenaikedo ne.|99
|
| 184 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_03/Horie_Yui_03_chunk68.wav|aidʲa to ka haʔsoɯ to kaʔte iɯ no wa, iʔɕɯɴ no çibana, iwajɯrɯ sɯpaːkɯ de ɕika naiɴ da jo.|0
|
| 185 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sawashiro_miyuki/Sawashiro_Miyuki_03/Sawashiro_Miyuki_03_chunk731.wav|sonnaa ta ma ga aɽeba, doɽe dake sɯbaɽaɕiː ka, to iɯ hanaɕi da.|20
|
| 186 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_200580/kanade_voice_200580_2_01.wav|kaedesaɴ to tamani ɽinaː niːkɯ no. kondo, pɯɽodʲɯɯsaːsaɴ mo...doɯ?|19
|
| 187 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_voice_home_shinaido_room/kanade_card_201271/kanade_voice_201271_2_01.wav|animarɯ seɽapiː, koɯka arɯ wa. ɯtɕi ni wa kitsɯne mo irɯɕi, joɽidoɽi midoɽi ne.|19
|
| 188 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiburin/shiburin_mobamas/shiburin_mobamasu_0010/shiburin_mobamasu_0010_chunk4.wav|wataɕi wa...|54
|
| 189 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/4e2f4ba6/wav/4e2f4ba6_2019.wav|betsɯ ni, moɯ tɕoʔto asobitai dake.|95
|
| 190 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/67eeef73/wav/67eeef73_1395.wav|sono toːɽi. toɯtotsɯ de omae mo komaʔte irɯdaɽoɯga, kiŋkʲɯɯ ni kaiseki o onegai ɕitai mono ga arɯ.|81
|
| 191 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_0820.wav|fɯfɯʔ, moɯ iɯ kaɴ. soɯ iɯ onna koɽoɕi no ɕiɽifɯ wa, hoka no ko niːɯtaɽa akaɴ jo.|91
|
| 192 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk1394.wav|mɯɕiɽo ɕendʑoɯgahaɽa wa, dʑibɯɴ wa moɯ daidʑoɯbɯ da to tɕitɕioja ni oɕietakɯʔte, kono tabi, hatsɯ deːto da to iɯ no ni, ɕendʑoɯgahaɽa tɕitɕi ni doɯhaɴ o negaʔta no de wa naidaɽoɯ ka.|3
|
| 193 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/mio/mio_honda_cgss/mio_honda_card_301057/mio_honda_voice_301057_1_03.wav|ta i joɯ mitai ni, egao de,ʔte. itsɯ no aida ni ka, kɯtɕigɯse ni naʔteta no ka naː—?|41
|
| 194 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_03/Sakamoto_Maya_03_chunk1209.wav|sɯkɯnakɯtomo bokɯ no ie no geŋkaɴ, soɽe ni tsɯkiçi no nikɯtai o hakai ɕita toki no ano tegiwa wa, bokɯ ga koɽe made mite kita dono kaiː ni mo çike o toɽanai.|17
|
| 195 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Syuuko/Syuuko_Mobamas/Syuko Voice/【モバマス】[ハイブリッドエッジ]塩見周子【ボイス集】 - Niconico Video/【モバマス】[ハイブリッドエッジ]塩見周子【ボイス集】 - Niconico Video_chunk34.wav|wataɕi no koe.|43
|
| 196 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/horie_yui/Horie_Yui_02/Horie_Yui_02_chunk10.wav|mʲoɯ ni kiʔpaɽi ɕigasa wa iʔta.|0
|
| 197 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0887.wav|kimitatɕi wa jowai çito, komaʔta çito o tasɯkerɯ no ga ɕindʑoɯ daʔtaɴ dʑa nai ka na?|93
|
| 198 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/hsm/hsm_06_003h_038.wav|ᵻᵻᵻ.|60
|
| 199 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/ad28b91b/wav/ad28b91b_2431.wav|ɕɯɯkʲoɯteki na koto wa ki ni ɕinakɯte iːɴ dʑa nai ka? wataɕi wa mɯɕiɴ ɽoɴɕa daɕi na.|94
|
| 200 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/449d5a0a/wav/449d5a0a_1967.wav|kaimono no oːdaː wa kampeki deɕita.ʔpeɽia wa ɕiʔkaɽi oboetemaɕita.|69
|
| 201 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_F_0360.wav|ɯ, ɯrɯseː! tɕoʔto ɕiawase kaiɽo ga boɯsoɯ ɕiterɯ dake da!|23
|
| 202 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/Kanade/Kanade_Events_and_Card/Kanade_Events/Kande5Comyus/Kande5Comyus_chunk32.wav|sonna ɯso, moɯ naɽetɕaʔta.|19
|
| 203 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/minami/minami_cgss/minami_card_201092/minami_voice_201092_6_02.wav|kiboɯ no tsɯbomi o, minoɽasemaɕoɯ!|16
|
| 204 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/010/S010_B_0116.wav|oɽe tɕi wa, aitsɯ o datɕi da to omoiteːkedo jo. ɽennasama no tame ni naɽaneːnaɽa, sokɯ, kirɯ tsɯmoɽi da ze.|47
|
| 205 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_0900.wav|desɯga, neɴ no tame, kakɯniɴ o onegai ɕimasɯ. wataɕi mo, seŋkawa sensei to iʔɕo ni, tɕikakɯ o sagaɕimasɯkaɽa.|77
|
| 206 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bbd90363/wav/bbd90363_1010.wav|soko kaɽa enerɯgiː o tomonaʔta rɯɯɴ o haʔsei sase, dʑizai ni ajatsɯrɯ koto made kanoɯ to wa omoemaseɴ.|77
|
| 207 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_0305.wav|jɯɯtɕaɴ to itsɯ de mo iʔɕo niːtai no ni! wataɕi no koto sakete çidoi joː—!|93
|
| 208 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/miku/miku_cgss/miku_card_101176/miku_voice_101176_6_04.wav|meɽiː! kɯɽisɯnʲɯɯsɯ!|59
|
| 209 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/1cc3c6c0/wav/1cc3c6c0_1734.wav|na! çiʔto! ima no kanzeɴ ni ɕite kɯwaɽe ja! soɕi omoɴ ja de!|91
|
| 210 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/chieri/chieri_mobamas/chie_mobamasu_0016/chie_mobamasu_0016_chunk233.wav|ɕiawase ni, naɽetaɽa, iː na.|25
|
| 211 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/chiwa_saito/Chiwa_Saito_01/Chiwa_Saito_01_chunk371.wav|na no ka mo ɕiɽenaiga.|3
|
| 212 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/sakura_moyu/03/03012570.wav|fɯtaɽi ni, onamae o okɯʔte mo iː?|58
|
| 213 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/9febd2ae/wav/9febd2ae_1886.wav|sono kawaɽi ɕiʔnʲɯɯ bɯɴ'iɴ wa ne iʔpatsɯgei o çiɽoɯ ɕinakʲa ikenai no.|93
|
| 214 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakamoto_maya/Sakamoto_Maya_01/Sakamoto_Maya_01_chunk358.wav|sonna wake dakaɽa, kaiwa mo, bimʲoɯ ni sagɯɽisagɯɽi daʔta.|17
|
| 215 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_2312.wav|kanzeɴ ni keɕisaʔtaʔte koto desɯ ka?|82
|
| 216 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/940de876/wav/940de876_4345.wav|nete iː wa jo. ojasɯmi.|83
|
| 217 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/36ea135b/wav/36ea135b_0585.wav|gomeɴ ne, wataɕi, moʔto ɯmakɯ jaɽete iɽeba...|82
|
| 218 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori/30-Main_Commyu.wav|saikiɴ, totemo ɽakɯ ɕikaʔtako to ga—ʔta no, wasɯɽete imaɕita.....naɴ da to omoimasɯ?|21
|
| 219 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/bce2a5af/wav/bce2a5af_0449.wav|anta saː, mae joɽi kaɽada oːkikɯ naʔterɯɴ dakaɽa saː, ki o tsɯkete jo. sasae kiɽenaiʔteba.|98
|
| 220 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/tsujido/vo/001/S001_B_0168.wav|narɯhodo.|23
|
| 221 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_48/18460462/wav/18460462_2152.wav|jakamaɕiː! waɽewaɽe wa baːmɯkɯɯheɴ o saigeɴ sɯrɯ no niːsogaɕiː no da! kisama no joɯ na bimboɯniɴ no iɽai ni kamaʔte irɯ jojɯɯ wa nai.|102
|
| 222 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/monogatari/monogatari_voices/monogatari_split/sakurai_takahiro/Sakurai_Takahiro_02/Sakurai_Takahiro_02_chunk2518.wav|ija, madʑ de odoɽoita.|4
|
| 223 |
+
/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/ochinbarai/voice/cst/cst_099_bgv_047.wav|ᵻᵻᵻ.|50
|
stylekan/Demo/Inference_LJSpeech.ipynb
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "9adb7bd1",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"# StyleTTS 2 Demo (LJSpeech)\n"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "markdown",
|
| 13 |
+
"id": "6108384d",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"### Utils"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "code",
|
| 21 |
+
"execution_count": null,
|
| 22 |
+
"id": "96e173bf",
|
| 23 |
+
"metadata": {},
|
| 24 |
+
"outputs": [],
|
| 25 |
+
"source": [
|
| 26 |
+
"import torch\n",
|
| 27 |
+
"torch.manual_seed(0)\n",
|
| 28 |
+
"torch.backends.cudnn.benchmark = False\n",
|
| 29 |
+
"torch.backends.cudnn.deterministic = True\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"import random\n",
|
| 32 |
+
"random.seed(0)\n",
|
| 33 |
+
"\n",
|
| 34 |
+
"import numpy as np\n",
|
| 35 |
+
"np.random.seed(0)"
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"cell_type": "code",
|
| 40 |
+
"execution_count": null,
|
| 41 |
+
"id": "da84c60f",
|
| 42 |
+
"metadata": {},
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"source": [
|
| 45 |
+
"%cd .."
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "code",
|
| 50 |
+
"execution_count": null,
|
| 51 |
+
"id": "5a3ddcc8",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"outputs": [],
|
| 54 |
+
"source": [
|
| 55 |
+
"# load packages\n",
|
| 56 |
+
"import time\n",
|
| 57 |
+
"import random\n",
|
| 58 |
+
"import yaml\n",
|
| 59 |
+
"from munch import Munch\n",
|
| 60 |
+
"import numpy as np\n",
|
| 61 |
+
"import torch\n",
|
| 62 |
+
"from torch import nn\n",
|
| 63 |
+
"import torch.nn.functional as F\n",
|
| 64 |
+
"import torchaudio\n",
|
| 65 |
+
"import librosa\n",
|
| 66 |
+
"from nltk.tokenize import word_tokenize\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"from models import *\n",
|
| 69 |
+
"from utils import *\n",
|
| 70 |
+
"from text_utils import TextCleaner\n",
|
| 71 |
+
"textclenaer = TextCleaner()\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"%matplotlib inline"
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"cell_type": "code",
|
| 78 |
+
"execution_count": null,
|
| 79 |
+
"id": "0229c7f8",
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [],
|
| 82 |
+
"source": []
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"cell_type": "code",
|
| 86 |
+
"execution_count": null,
|
| 87 |
+
"id": "bbdc04c0",
|
| 88 |
+
"metadata": {},
|
| 89 |
+
"outputs": [],
|
| 90 |
+
"source": [
|
| 91 |
+
"device = 'cuda' if torch.cuda.is_available() else 'cpu'"
|
| 92 |
+
]
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"cell_type": "code",
|
| 96 |
+
"execution_count": null,
|
| 97 |
+
"id": "00ee05e1",
|
| 98 |
+
"metadata": {},
|
| 99 |
+
"outputs": [],
|
| 100 |
+
"source": [
|
| 101 |
+
"to_mel = torchaudio.transforms.MelSpectrogram(\n",
|
| 102 |
+
" n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
|
| 103 |
+
"mean, std = -4, 4\n",
|
| 104 |
+
"\n",
|
| 105 |
+
"def length_to_mask(lengths):\n",
|
| 106 |
+
" mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)\n",
|
| 107 |
+
" mask = torch.gt(mask+1, lengths.unsqueeze(1))\n",
|
| 108 |
+
" return mask\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"def preprocess(wave):\n",
|
| 111 |
+
" wave_tensor = torch.from_numpy(wave).float()\n",
|
| 112 |
+
" mel_tensor = to_mel(wave_tensor)\n",
|
| 113 |
+
" mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
|
| 114 |
+
" return mel_tensor\n",
|
| 115 |
+
"\n",
|
| 116 |
+
"def compute_style(ref_dicts):\n",
|
| 117 |
+
" reference_embeddings = {}\n",
|
| 118 |
+
" for key, path in ref_dicts.items():\n",
|
| 119 |
+
" wave, sr = librosa.load(path, sr=24000)\n",
|
| 120 |
+
" audio, index = librosa.effects.trim(wave, top_db=30)\n",
|
| 121 |
+
" if sr != 24000:\n",
|
| 122 |
+
" audio = librosa.resample(audio, sr, 24000)\n",
|
| 123 |
+
" mel_tensor = preprocess(audio).to(device)\n",
|
| 124 |
+
"\n",
|
| 125 |
+
" with torch.no_grad():\n",
|
| 126 |
+
" ref = model.style_encoder(mel_tensor.unsqueeze(1))\n",
|
| 127 |
+
" reference_embeddings[key] = (ref.squeeze(1), audio)\n",
|
| 128 |
+
" \n",
|
| 129 |
+
" return reference_embeddings"
|
| 130 |
+
]
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"cell_type": "markdown",
|
| 134 |
+
"id": "7b9cecbe",
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"source": [
|
| 137 |
+
"### Load models"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"execution_count": null,
|
| 143 |
+
"id": "64fc4c0f",
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"outputs": [],
|
| 146 |
+
"source": [
|
| 147 |
+
"# load phonemizer\n",
|
| 148 |
+
"import phonemizer\n",
|
| 149 |
+
"global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)"
|
| 150 |
+
]
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"cell_type": "code",
|
| 154 |
+
"execution_count": null,
|
| 155 |
+
"id": "48e7b644",
|
| 156 |
+
"metadata": {},
|
| 157 |
+
"outputs": [],
|
| 158 |
+
"source": [
|
| 159 |
+
"config = yaml.safe_load(open(\"Models/LJSpeech/config.yml\"))\n",
|
| 160 |
+
"\n",
|
| 161 |
+
"# load pretrained ASR model\n",
|
| 162 |
+
"ASR_config = config.get('ASR_config', False)\n",
|
| 163 |
+
"ASR_path = config.get('ASR_path', False)\n",
|
| 164 |
+
"text_aligner = load_ASR_models(ASR_path, ASR_config)\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"# load pretrained F0 model\n",
|
| 167 |
+
"F0_path = config.get('F0_path', False)\n",
|
| 168 |
+
"pitch_extractor = load_F0_models(F0_path)\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"# load BERT model\n",
|
| 171 |
+
"from Utils.PLBERT.util import load_plbert\n",
|
| 172 |
+
"BERT_path = config.get('PLBERT_dir', False)\n",
|
| 173 |
+
"plbert = load_plbert(BERT_path)"
|
| 174 |
+
]
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"cell_type": "code",
|
| 178 |
+
"execution_count": null,
|
| 179 |
+
"id": "ffc18cf7",
|
| 180 |
+
"metadata": {},
|
| 181 |
+
"outputs": [],
|
| 182 |
+
"source": [
|
| 183 |
+
"model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)\n",
|
| 184 |
+
"_ = [model[key].eval() for key in model]\n",
|
| 185 |
+
"_ = [model[key].to(device) for key in model]"
|
| 186 |
+
]
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"cell_type": "code",
|
| 190 |
+
"execution_count": null,
|
| 191 |
+
"id": "64529d5c",
|
| 192 |
+
"metadata": {},
|
| 193 |
+
"outputs": [],
|
| 194 |
+
"source": [
|
| 195 |
+
"params_whole = torch.load(\"Models/LJSpeech/epoch_2nd_00100.pth\", map_location='cpu')\n",
|
| 196 |
+
"params = params_whole['net']"
|
| 197 |
+
]
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"cell_type": "code",
|
| 201 |
+
"execution_count": null,
|
| 202 |
+
"id": "895d9706",
|
| 203 |
+
"metadata": {},
|
| 204 |
+
"outputs": [],
|
| 205 |
+
"source": [
|
| 206 |
+
"for key in model:\n",
|
| 207 |
+
" if key in params:\n",
|
| 208 |
+
" print('%s loaded' % key)\n",
|
| 209 |
+
" try:\n",
|
| 210 |
+
" model[key].load_state_dict(params[key])\n",
|
| 211 |
+
" except:\n",
|
| 212 |
+
" from collections import OrderedDict\n",
|
| 213 |
+
" state_dict = params[key]\n",
|
| 214 |
+
" new_state_dict = OrderedDict()\n",
|
| 215 |
+
" for k, v in state_dict.items():\n",
|
| 216 |
+
" name = k[7:] # remove `module.`\n",
|
| 217 |
+
" new_state_dict[name] = v\n",
|
| 218 |
+
" # load params\n",
|
| 219 |
+
" model[key].load_state_dict(new_state_dict, strict=False)\n",
|
| 220 |
+
"# except:\n",
|
| 221 |
+
"# _load(params[key], model[key])\n",
|
| 222 |
+
"_ = [model[key].eval() for key in model]"
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"cell_type": "code",
|
| 227 |
+
"execution_count": null,
|
| 228 |
+
"id": "c1a59db2",
|
| 229 |
+
"metadata": {},
|
| 230 |
+
"outputs": [],
|
| 231 |
+
"source": [
|
| 232 |
+
"from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "code",
|
| 237 |
+
"execution_count": null,
|
| 238 |
+
"id": "e30985ab",
|
| 239 |
+
"metadata": {},
|
| 240 |
+
"outputs": [],
|
| 241 |
+
"source": [
|
| 242 |
+
"sampler = DiffusionSampler(\n",
|
| 243 |
+
" model.diffusion.diffusion,\n",
|
| 244 |
+
" sampler=ADPM2Sampler(),\n",
|
| 245 |
+
" sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0), # empirical parameters\n",
|
| 246 |
+
" clamp=False\n",
|
| 247 |
+
")"
|
| 248 |
+
]
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"cell_type": "markdown",
|
| 252 |
+
"id": "b803110e",
|
| 253 |
+
"metadata": {},
|
| 254 |
+
"source": [
|
| 255 |
+
"### Synthesize speech"
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"cell_type": "code",
|
| 260 |
+
"execution_count": null,
|
| 261 |
+
"id": "24655f46",
|
| 262 |
+
"metadata": {},
|
| 263 |
+
"outputs": [],
|
| 264 |
+
"source": [
|
| 265 |
+
"# synthesize a text\n",
|
| 266 |
+
"text = ''' StyleTTS 2 is a text-to-speech model that leverages style diffusion and adversarial training with large speech language models to achieve human-level text-to-speech synthesis. '''"
|
| 267 |
+
]
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"cell_type": "code",
|
| 271 |
+
"execution_count": null,
|
| 272 |
+
"id": "ca57469c",
|
| 273 |
+
"metadata": {},
|
| 274 |
+
"outputs": [],
|
| 275 |
+
"source": [
|
| 276 |
+
"def inference(text, noise, diffusion_steps=5, embedding_scale=1):\n",
|
| 277 |
+
" text = text.strip()\n",
|
| 278 |
+
" text = text.replace('\"', '')\n",
|
| 279 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 280 |
+
" ps = word_tokenize(ps[0])\n",
|
| 281 |
+
" ps = ' '.join(ps)\n",
|
| 282 |
+
"\n",
|
| 283 |
+
" tokens = textclenaer(ps)\n",
|
| 284 |
+
" tokens.insert(0, 0)\n",
|
| 285 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 286 |
+
" \n",
|
| 287 |
+
" with torch.no_grad():\n",
|
| 288 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
|
| 289 |
+
" text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
|
| 290 |
+
"\n",
|
| 291 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 292 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 293 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2) \n",
|
| 294 |
+
"\n",
|
| 295 |
+
" s_pred = sampler(noise, \n",
|
| 296 |
+
" embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
|
| 297 |
+
" embedding_scale=embedding_scale).squeeze(0)\n",
|
| 298 |
+
"\n",
|
| 299 |
+
" s = s_pred[:, 128:]\n",
|
| 300 |
+
" ref = s_pred[:, :128]\n",
|
| 301 |
+
"\n",
|
| 302 |
+
" d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
|
| 303 |
+
"\n",
|
| 304 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 305 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 306 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 307 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 308 |
+
"\n",
|
| 309 |
+
" pred_dur[-1] += 5\n",
|
| 310 |
+
"\n",
|
| 311 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 312 |
+
" c_frame = 0\n",
|
| 313 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 314 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 315 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 316 |
+
"\n",
|
| 317 |
+
" # encode prosody\n",
|
| 318 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 319 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 320 |
+
" out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)), \n",
|
| 321 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 322 |
+
" \n",
|
| 323 |
+
" return out.squeeze().cpu().numpy()"
|
| 324 |
+
]
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"cell_type": "markdown",
|
| 328 |
+
"id": "d438ef4f",
|
| 329 |
+
"metadata": {},
|
| 330 |
+
"source": [
|
| 331 |
+
"#### Basic synthesis (5 diffusion steps)"
|
| 332 |
+
]
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"cell_type": "code",
|
| 336 |
+
"execution_count": null,
|
| 337 |
+
"id": "d3d7f7d5",
|
| 338 |
+
"metadata": {
|
| 339 |
+
"scrolled": true
|
| 340 |
+
},
|
| 341 |
+
"outputs": [],
|
| 342 |
+
"source": [
|
| 343 |
+
"start = time.time()\n",
|
| 344 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 345 |
+
"wav = inference(text, noise, diffusion_steps=5, embedding_scale=1)\n",
|
| 346 |
+
"rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 347 |
+
"print(f\"RTF = {rtf:5f}\")\n",
|
| 348 |
+
"import IPython.display as ipd\n",
|
| 349 |
+
"display(ipd.Audio(wav, rate=24000))"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"cell_type": "markdown",
|
| 354 |
+
"id": "2d5d9df0",
|
| 355 |
+
"metadata": {},
|
| 356 |
+
"source": [
|
| 357 |
+
"#### With higher diffusion steps (more diverse)\n",
|
| 358 |
+
"Since the sampler is ancestral, the higher the stpes, the more diverse the samples are, with the cost of slower synthesis speed."
|
| 359 |
+
]
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"cell_type": "code",
|
| 363 |
+
"execution_count": null,
|
| 364 |
+
"id": "a10129fd",
|
| 365 |
+
"metadata": {},
|
| 366 |
+
"outputs": [],
|
| 367 |
+
"source": [
|
| 368 |
+
"start = time.time()\n",
|
| 369 |
+
"noise = torch.randn(1,1,256).to(device)\n",
|
| 370 |
+
"wav = inference(text, noise, diffusion_steps=10, embedding_scale=1)\n",
|
| 371 |
+
"rtf = (time.time() - start) / (len(wav) / 24000)\n",
|
| 372 |
+
"print(f\"RTF = {rtf:5f}\")\n",
|
| 373 |
+
"import IPython.display as ipd\n",
|
| 374 |
+
"display(ipd.Audio(wav, rate=24000))"
|
| 375 |
+
]
|
| 376 |
+
},
|
| 377 |
+
{
|
| 378 |
+
"cell_type": "markdown",
|
| 379 |
+
"id": "1877ea15",
|
| 380 |
+
"metadata": {},
|
| 381 |
+
"source": [
|
| 382 |
+
"### Speech expressiveness\n",
|
| 383 |
+
"The following section recreates the samples shown in [Section 6](https://styletts2.github.io/#emo) of the demo page."
|
| 384 |
+
]
|
| 385 |
+
},
|
| 386 |
+
{
|
| 387 |
+
"cell_type": "markdown",
|
| 388 |
+
"id": "4c4777b7",
|
| 389 |
+
"metadata": {},
|
| 390 |
+
"source": [
|
| 391 |
+
"#### With embedding_scale=1\n",
|
| 392 |
+
"This is the classifier-free guidance scale. The higher the scale, the more conditional the style is to the input text and hence more emotional. "
|
| 393 |
+
]
|
| 394 |
+
},
|
| 395 |
+
{
|
| 396 |
+
"cell_type": "code",
|
| 397 |
+
"execution_count": null,
|
| 398 |
+
"id": "c29ea2f0",
|
| 399 |
+
"metadata": {},
|
| 400 |
+
"outputs": [],
|
| 401 |
+
"source": [
|
| 402 |
+
"texts = {}\n",
|
| 403 |
+
"texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 404 |
+
"texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 405 |
+
"texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 406 |
+
"texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"for k,v in texts.items():\n",
|
| 409 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 410 |
+
" wav = inference(v, noise, diffusion_steps=10, embedding_scale=1)\n",
|
| 411 |
+
" print(k + \": \")\n",
|
| 412 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 413 |
+
]
|
| 414 |
+
},
|
| 415 |
+
{
|
| 416 |
+
"cell_type": "markdown",
|
| 417 |
+
"id": "3c89499f",
|
| 418 |
+
"metadata": {},
|
| 419 |
+
"source": [
|
| 420 |
+
"#### With embedding_scale=2"
|
| 421 |
+
]
|
| 422 |
+
},
|
| 423 |
+
{
|
| 424 |
+
"cell_type": "code",
|
| 425 |
+
"execution_count": null,
|
| 426 |
+
"id": "f73be3aa",
|
| 427 |
+
"metadata": {},
|
| 428 |
+
"outputs": [],
|
| 429 |
+
"source": [
|
| 430 |
+
"texts = {}\n",
|
| 431 |
+
"texts['Happy'] = \"We are happy to invite you to join us on a journey to the past, where we will visit the most amazing monuments ever built by human hands.\"\n",
|
| 432 |
+
"texts['Sad'] = \"I am sorry to say that we have suffered a severe setback in our efforts to restore prosperity and confidence.\"\n",
|
| 433 |
+
"texts['Angry'] = \"The field of astronomy is a joke! Its theories are based on flawed observations and biased interpretations!\"\n",
|
| 434 |
+
"texts['Surprised'] = \"I can't believe it! You mean to tell me that you have discovered a new species of bacteria in this pond?\"\n",
|
| 435 |
+
"\n",
|
| 436 |
+
"for k,v in texts.items():\n",
|
| 437 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 438 |
+
" wav = inference(v, noise, diffusion_steps=10, embedding_scale=2) # embedding_scale=2 for more pronounced emotion\n",
|
| 439 |
+
" print(k + \": \")\n",
|
| 440 |
+
" display(ipd.Audio(wav, rate=24000, normalize=False))"
|
| 441 |
+
]
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"cell_type": "markdown",
|
| 445 |
+
"id": "9320da63",
|
| 446 |
+
"metadata": {},
|
| 447 |
+
"source": [
|
| 448 |
+
"### Long-form generation\n",
|
| 449 |
+
"This section includes basic implementation of Algorithm 1 in the paper for consistent longform audio generation. The example passage is taken from [Section 5](https://styletts2.github.io/#long) of the demo page. "
|
| 450 |
+
]
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"cell_type": "code",
|
| 454 |
+
"execution_count": null,
|
| 455 |
+
"id": "cdd4db51",
|
| 456 |
+
"metadata": {},
|
| 457 |
+
"outputs": [],
|
| 458 |
+
"source": [
|
| 459 |
+
"passage = '''If the supply of fruit is greater than the family needs, it may be made a source of income by sending the fresh fruit to the market if there is one near enough, or by preserving, canning, and making jelly for sale. To make such an enterprise a success the fruit and work must be first class. There is magic in the word \"Homemade,\" when the product appeals to the eye and the palate; but many careless and incompetent people have found to their sorrow that this word has not magic enough to float inferior goods on the market. As a rule large canning and preserving establishments are clean and have the best appliances, and they employ chemists and skilled labor. The home product must be very good to compete with the attractive goods that are sent out from such establishments. Yet for first-class homemade products there is a market in all large cities. All first-class grocers have customers who purchase such goods.'''"
|
| 460 |
+
]
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"cell_type": "code",
|
| 464 |
+
"execution_count": null,
|
| 465 |
+
"id": "ebb941c8",
|
| 466 |
+
"metadata": {},
|
| 467 |
+
"outputs": [],
|
| 468 |
+
"source": [
|
| 469 |
+
"def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):\n",
|
| 470 |
+
" text = text.strip()\n",
|
| 471 |
+
" text = text.replace('\"', '')\n",
|
| 472 |
+
" ps = global_phonemizer.phonemize([text])\n",
|
| 473 |
+
" ps = word_tokenize(ps[0])\n",
|
| 474 |
+
" ps = ' '.join(ps)\n",
|
| 475 |
+
"\n",
|
| 476 |
+
" tokens = textclenaer(ps)\n",
|
| 477 |
+
" tokens.insert(0, 0)\n",
|
| 478 |
+
" tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)\n",
|
| 479 |
+
" \n",
|
| 480 |
+
" with torch.no_grad():\n",
|
| 481 |
+
" input_lengths = torch.LongTensor([tokens.shape[-1]]).to(tokens.device)\n",
|
| 482 |
+
" text_mask = length_to_mask(input_lengths).to(tokens.device)\n",
|
| 483 |
+
"\n",
|
| 484 |
+
" t_en = model.text_encoder(tokens, input_lengths, text_mask)\n",
|
| 485 |
+
" bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())\n",
|
| 486 |
+
" d_en = model.bert_encoder(bert_dur).transpose(-1, -2) \n",
|
| 487 |
+
"\n",
|
| 488 |
+
" s_pred = sampler(noise, \n",
|
| 489 |
+
" embedding=bert_dur[0].unsqueeze(0), num_steps=diffusion_steps,\n",
|
| 490 |
+
" embedding_scale=embedding_scale).squeeze(0)\n",
|
| 491 |
+
" \n",
|
| 492 |
+
" if s_prev is not None:\n",
|
| 493 |
+
" # convex combination of previous and current style\n",
|
| 494 |
+
" s_pred = alpha * s_prev + (1 - alpha) * s_pred\n",
|
| 495 |
+
" \n",
|
| 496 |
+
" s = s_pred[:, 128:]\n",
|
| 497 |
+
" ref = s_pred[:, :128]\n",
|
| 498 |
+
"\n",
|
| 499 |
+
" d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)\n",
|
| 500 |
+
"\n",
|
| 501 |
+
" x, _ = model.predictor.lstm(d)\n",
|
| 502 |
+
" duration = model.predictor.duration_proj(x)\n",
|
| 503 |
+
" duration = torch.sigmoid(duration).sum(axis=-1)\n",
|
| 504 |
+
" pred_dur = torch.round(duration.squeeze()).clamp(min=1)\n",
|
| 505 |
+
"\n",
|
| 506 |
+
" pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))\n",
|
| 507 |
+
" c_frame = 0\n",
|
| 508 |
+
" for i in range(pred_aln_trg.size(0)):\n",
|
| 509 |
+
" pred_aln_trg[i, c_frame:c_frame + int(pred_dur[i].data)] = 1\n",
|
| 510 |
+
" c_frame += int(pred_dur[i].data)\n",
|
| 511 |
+
"\n",
|
| 512 |
+
" # encode prosody\n",
|
| 513 |
+
" en = (d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device))\n",
|
| 514 |
+
" F0_pred, N_pred = model.predictor.F0Ntrain(en, s)\n",
|
| 515 |
+
" out = model.decoder((t_en @ pred_aln_trg.unsqueeze(0).to(device)), \n",
|
| 516 |
+
" F0_pred, N_pred, ref.squeeze().unsqueeze(0))\n",
|
| 517 |
+
" \n",
|
| 518 |
+
" return out.squeeze().cpu().numpy(), s_pred"
|
| 519 |
+
]
|
| 520 |
+
},
|
| 521 |
+
{
|
| 522 |
+
"cell_type": "code",
|
| 523 |
+
"execution_count": null,
|
| 524 |
+
"id": "7ca0ef2e",
|
| 525 |
+
"metadata": {},
|
| 526 |
+
"outputs": [],
|
| 527 |
+
"source": [
|
| 528 |
+
"sentences = passage.split('.') # simple split by comma\n",
|
| 529 |
+
"wavs = []\n",
|
| 530 |
+
"s_prev = None\n",
|
| 531 |
+
"for text in sentences:\n",
|
| 532 |
+
" if text.strip() == \"\": continue\n",
|
| 533 |
+
" text += '.' # add it back\n",
|
| 534 |
+
" noise = torch.randn(1,1,256).to(device)\n",
|
| 535 |
+
" wav, s_prev = LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=10, embedding_scale=1.5)\n",
|
| 536 |
+
" wavs.append(wav)\n",
|
| 537 |
+
"display(ipd.Audio(np.concatenate(wavs), rate=24000, normalize=False))"
|
| 538 |
+
]
|
| 539 |
+
}
|
| 540 |
+
],
|
| 541 |
+
"metadata": {
|
| 542 |
+
"kernelspec": {
|
| 543 |
+
"display_name": "NLP",
|
| 544 |
+
"language": "python",
|
| 545 |
+
"name": "nlp"
|
| 546 |
+
},
|
| 547 |
+
"language_info": {
|
| 548 |
+
"codemirror_mode": {
|
| 549 |
+
"name": "ipython",
|
| 550 |
+
"version": 3
|
| 551 |
+
},
|
| 552 |
+
"file_extension": ".py",
|
| 553 |
+
"mimetype": "text/x-python",
|
| 554 |
+
"name": "python",
|
| 555 |
+
"nbconvert_exporter": "python",
|
| 556 |
+
"pygments_lexer": "ipython3",
|
| 557 |
+
"version": "3.9.7"
|
| 558 |
+
}
|
| 559 |
+
},
|
| 560 |
+
"nbformat": 4,
|
| 561 |
+
"nbformat_minor": 5
|
| 562 |
+
}
|
stylekan/Demo/Inference_LibriTTS.ipynb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4a3308a129ac51e5e9e538817a0dfff3e1ab6d11bcf3918abe60c97e895a7cd
|
| 3 |
+
size 20886216
|
stylekan/Demo/infer_24khz.ipynb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:608ae17929cbf62af09f4f131ee6dfa60f1dca4683c7353fb4f91fa40fa3ffa2
|
| 3 |
+
size 26976110
|
stylekan/Demo/syuko_style_vectors.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
stylekan/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2023 Aaron (Yinghao) Li
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
stylekan/Models/Style_Kanade/2nd_phase_last.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22cfd8dcaebcc012708c21999546f93ca5ddb6b305a75a1002ce2c60aa820f04
|
| 3 |
+
size 2586719838
|
stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c94e26293c1722ad7acf82e25de4e225f0dfa832edaab88559acf481eba1bac
|
| 3 |
+
size 2049397288
|
stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00002.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78278bbaa7ccdf292985c8656dc543f3772d6d52789ed2d95c5b0cbaf86e362c
|
| 3 |
+
size 2049397288
|
stylekan/Models/Style_Kanade/NO_SLM_epoch_2nd_00004.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c47ad6a6a952ef7fd7642ec1e57ed278379d77668157c30bd798fa483efcf2e
|
| 3 |
+
size 2049397288
|
stylekan/Models/Style_Kanade/config_kanade.yml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{ASR_config: /home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/config.yml, ASR_path: /home/austin/disk2/llmvcs/tt/stylekan/Utils/ASR/bst_00080.pth,
|
| 2 |
+
F0_path: /home/austin/disk2/llmvcs/tt/stylekan/Utils/JDC/bst.t7, PLBERT_dir: Utils/PLBERT/,
|
| 3 |
+
batch_size: 49, data_params: {OOD_data: /home/austin/disk2/llmvcs/tt/stylekan/Data/OOD_LargeScale_.csv,
|
| 4 |
+
min_length: 50, root_path: '', train_data: /home/austin/disk2/llmvcs/tt/stylekan/Data/filtered_train_list.csv,
|
| 5 |
+
val_data: /home/austin/disk2/llmvcs/tt/stylekan/Data/mg_valid.txt}, device: cuda,
|
| 6 |
+
epochs_1st: 25, epochs_2nd: 15, first_stage_path: /home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/epoch_1st_00013.pth,
|
| 7 |
+
load_only_params: false, log_dir: Models/Style_Kanade, log_interval: 10, loss_params: {
|
| 8 |
+
TMA_epoch: 9, diff_epoch: 2, joint_epoch: 2, lambda_F0: 1.0, lambda_ce: 20.0,
|
| 9 |
+
lambda_diff: 1.0, lambda_dur: 1.0, lambda_gen: 1.0, lambda_mel: 10.0, lambda_mono: 1.0,
|
| 10 |
+
lambda_norm: 1.0, lambda_s2s: 1.0, lambda_slm: 1.0, lambda_sty: 1.0}, max_len: 560,
|
| 11 |
+
model_params: {decoder: {gen_istft_hop_size: 5, gen_istft_n_fft: 20, resblock_dilation_sizes: [
|
| 12 |
+
[1, 3, 5], [1, 3, 5], [1, 3, 5]], resblock_kernel_sizes: [3, 7, 11], type: istftnet,
|
| 13 |
+
upsample_initial_channel: 512, upsample_kernel_sizes: [20, 12], upsample_rates: [
|
| 14 |
+
10, 6]}, diffusion: {dist: {estimate_sigma_data: true, mean: -3.0, sigma_data: 0.2782753203153678,
|
| 15 |
+
std: 1.0}, embedding_mask_proba: 0.1, transformer: {head_features: 64, multiplier: 2,
|
| 16 |
+
num_heads: 8, num_layers: 3}}, dim_in: 64, dropout: 0.2, hidden_dim: 512,
|
| 17 |
+
max_conv_dim: 512, max_dur: 50, multispeaker: true, n_layer: 3, n_mels: 80, n_token: 178,
|
| 18 |
+
slm: {hidden: 1280, initial_channel: 64, model: Respair/Whisper_Large_v2_Encoder_Block,
|
| 19 |
+
nlayers: 33, sr: 16000}, sr: 24000, style_dim: 128}, optimizer_params: {bert_lr: 1.0e-05,
|
| 20 |
+
ft_lr: 1.0e-05, lr: 0.0001}, preprocess_params: {spect_params: {hop_length: 300,
|
| 21 |
+
n_fft: 2048, win_length: 1200}, sr: 24000}, pretrained_model: /home/austin/disk2/llmvcs/tt/stylekan/Models/Style_Kanade/NO_SLM_3_epoch_2nd_00002.pth,
|
| 22 |
+
save_freq: 1, second_stage_load_pretrained: true, slmadv_params: {batch_percentage: 0.5,
|
| 23 |
+
iter: 20, max_len: 500, min_len: 400, scale: 0.01, sig: 1.5, thresh: 5}}
|
stylekan/Models/Style_Kanade/epoch_1st_00013.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15ad73479a2a1dd8b630376dd8d6b9ad81bc24da34517c45a15c7dd235cc3110
|
| 3 |
+
size 1918457960
|
stylekan/Models/Style_Kanade/epoch_2nd_00000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65f10185d1bf51609155699f9e53b8b887c422e9cf7275270994db41ddf382bb
|
| 3 |
+
size 1515513256
|