drrobot9 commited on
Commit
3a53cbe
·
verified ·
1 Parent(s): 26ce7cd

Upload 62 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. yarngpt/README.md +114 -0
  3. yarngpt/__init__.py +1 -0
  4. yarngpt/__pycache__/__init__.cpython-312.pyc +0 -0
  5. yarngpt/__pycache__/audiotokenizer.cpython-312.pyc +0 -0
  6. yarngpt/audiotokenizer.py +319 -0
  7. yarngpt/default_speakers/azeez.json +413 -0
  8. yarngpt/default_speakers/chinenye.json +274 -0
  9. yarngpt/default_speakers/emma.json +441 -0
  10. yarngpt/default_speakers/idera.json +396 -0
  11. yarngpt/default_speakers/joke.json +430 -0
  12. yarngpt/default_speakers/jude.json +263 -0
  13. yarngpt/default_speakers/onye.json +621 -0
  14. yarngpt/default_speakers/osagie.json +486 -0
  15. yarngpt/default_speakers/regina.json +574 -0
  16. yarngpt/default_speakers/remi.json +382 -0
  17. yarngpt/default_speakers/saheed.json +564 -0
  18. yarngpt/default_speakers/tayo.json +523 -0
  19. yarngpt/default_speakers/umar.json +469 -0
  20. yarngpt/default_speakers/zainab.json +457 -0
  21. yarngpt/default_speakers_local/hausa_female1.json +273 -0
  22. yarngpt/default_speakers_local/hausa_female2.json +273 -0
  23. yarngpt/default_speakers_local/hausa_male1.json +367 -0
  24. yarngpt/default_speakers_local/hausa_male2.json +207 -0
  25. yarngpt/default_speakers_local/igbo_female1.json +246 -0
  26. yarngpt/default_speakers_local/igbo_female2.json +202 -0
  27. yarngpt/default_speakers_local/igbo_male2.json +277 -0
  28. yarngpt/default_speakers_local/yoruba_female1.json +416 -0
  29. yarngpt/default_speakers_local/yoruba_female2.json +193 -0
  30. yarngpt/default_speakers_local/yoruba_male1.json +234 -0
  31. yarngpt/default_speakers_local/yoruba_male2.json +238 -0
  32. yarngpt/default_speakers_local/yoruba_male3.json +234 -0
  33. yarngpt/notebooks/Merge_datasets.ipynb +851 -0
  34. yarngpt/notebooks/Merge_datasets_local (1).ipynb +258 -0
  35. yarngpt/notebooks/Yoruba_prepare_data_naij (2).ipynb +0 -0
  36. yarngpt/notebooks/audio_0c026c21-f432-4d20-a86b-899a10d9ed60.webp +3 -0
  37. yarngpt/notebooks/train_YarnGPT.ipynb +0 -0
  38. yarngpt/notebooks/train_YarnGPT_local.ipynb +0 -0
  39. yarngpt/python-wrapper/README.md +80 -0
  40. yarngpt/python-wrapper/audiotokenizer.py +317 -0
  41. yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/Yoruba_prepare_data_naij (2)-checkpoint.ipynb +0 -0
  42. yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/emma-checkpoint.json +441 -0
  43. yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/idera-checkpoint.json +396 -0
  44. yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/onye-checkpoint.json +621 -0
  45. yarngpt/python-wrapper/default_speakers/Yoruba_prepare_data_naij (2).ipynb +0 -0
  46. yarngpt/python-wrapper/default_speakers/chinenye.json +274 -0
  47. yarngpt/python-wrapper/default_speakers/emma.json +441 -0
  48. yarngpt/python-wrapper/default_speakers/idera.json +396 -0
  49. yarngpt/python-wrapper/default_speakers/joke.json +430 -0
  50. yarngpt/python-wrapper/default_speakers/jude.json +263 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ yarngpt/notebooks/audio_0c026c21-f432-4d20-a86b-899a10d9ed60.webp filter=lfs diff=lfs merge=lfs -text
yarngpt/README.md ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YarnGPT 🎙️
2
+ ![image/png](https://github.com/saheedniyi02/yarngpt/blob/main/notebooks%2Faudio_0c026c21-f432-4d20-a86b-899a10d9ed60.webp)
3
+ A text-to-speech model generating natural Nigerian-accented English speech. Built on pure language modeling without external adapters.
4
+
5
+ Web Url: https://yarngpt.co/
6
+
7
+ ## Quick Start
8
+
9
+ ```python
10
+
11
+ !git clone https://github.com/saheedniyi02/yarngpt.git
12
+
13
+ pip install outetts uroman
14
+
15
+ import os
16
+ import re
17
+ import json
18
+ import torch
19
+ import inflect
20
+ import random
21
+ import uroman as ur
22
+ import numpy as np
23
+ import torchaudio
24
+ import IPython
25
+ from transformers import AutoModelForCausalLM, AutoTokenizer
26
+ from outetts.wav_tokenizer.decoder import WavTokenizer
27
+
28
+
29
+ !wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
30
+ !gdown 1-ASeEkrn4HY49yZWHTASgfGFNXdVnLTt
31
+
32
+
33
+ from yarngpt.audiotokenizer import AudioTokenizerV2
34
+
35
+ tokenizer_path="saheedniyi/YarnGPT2"
36
+ wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
37
+ wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
38
+
39
+
40
+ audio_tokenizer=AudioTokenizerV2(
41
+ tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
42
+ )
43
+
44
+
45
+ model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
46
+
47
+ #change the text
48
+ text="The election was won by businessman and politician, Moshood Abiola, but Babangida annulled the results, citing concerns over national security."
49
+
50
+ # change the language and voice
51
+ prompt=audio_tokenizer.create_prompt(text,lang="english",speaker_name="idera")
52
+
53
+ input_ids=audio_tokenizer.tokenize_prompt(prompt)
54
+
55
+ output = model.generate(
56
+ input_ids=input_ids,
57
+ temperature=0.1,
58
+ repetition_penalty=1.1,
59
+ max_length=4000,
60
+ #num_beams=5,# using a beam size helps for the local languages but not english
61
+ )
62
+
63
+ codes=audio_tokenizer.get_codes(output)
64
+ audio=audio_tokenizer.get_audio(codes)
65
+ IPython.display.Audio(audio,rate=24000)
66
+ torchaudio.save(f"Sample.wav", audio, sample_rate=24000)
67
+
68
+ ```
69
+
70
+ ## Features
71
+
72
+ - 🗣️ 12 preset voices (6 male, 6 female)
73
+ - 🎯 Trained on 2000+ hours of Nigerian audio
74
+ - 🔊 24kHz high-quality audio output
75
+ - 🚀 Simple API for quick integration
76
+ - 📝 Support for long-form text
77
+
78
+ ## Available Voices
79
+ - Female: zainab, idera, regina, chinenye, joke, remi
80
+ - Male: jude, tayo, umar, osagie, onye, emma
81
+
82
+ ## Examples
83
+
84
+ Check out our [demo notebook](link-to-notebook) or listen to [sample outputs](https://huggingface.co/saheedniyi/YarnGPT/tree/main/audio).
85
+
86
+ ## Model Details
87
+
88
+ - Base: [HuggingFaceTB/SmolLM2-360M](https://huggingface.co/HuggingFaceTB/SmolLM2-360M)
89
+ - Training: 5 epochs on A100 GPU
90
+ - Data: Nigerian movies, podcasts, and open-source audio
91
+ - Architecture: Pure language modeling approach
92
+
93
+ ## Limitations
94
+
95
+ - English to Nigerian-accented English only
96
+ - May not capture all Nigerian accent variations
97
+ - Training data includes auto-generated content
98
+
99
+ ## Citation
100
+
101
+ ```bibtex
102
+ @misc{yarngpt2025,
103
+ author = {Saheed Azeez},
104
+ title = {YarnGPT: Nigerian-Accented English Text-to-Speech Model},
105
+ year = {2025},
106
+ publisher = {Hugging Face}
107
+ }
108
+ ```
109
+
110
+ ## License
111
+ MIT
112
+
113
+ ## Acknowledgments
114
+ Built with [WavTokenizer](https://github.com/jishengpeng/WavTokenizer) and inspired by [OuteTTS](https://huggingface.co/OuteAI/OuteTTS-0.2-500M/).
yarngpt/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
yarngpt/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (148 Bytes). View file
 
yarngpt/__pycache__/audiotokenizer.cpython-312.pyc ADDED
Binary file (20.1 kB). View file
 
yarngpt/audiotokenizer.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import torch
5
+ import inflect
6
+ import random
7
+ import uroman as ur
8
+ import numpy as np
9
+ import torchaudio
10
+ from transformers import AutoTokenizer
11
+ from outetts.wav_tokenizer.decoder import WavTokenizer
12
+ from outetts.wav_tokenizer.encoder.utils import convert_audio
13
+
14
+ class AudioTokenizer:
15
+
16
+ def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
17
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
19
+ self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
20
+ self.bos = "<|im_start|>"
21
+ self.eos = "<|im_end|>"
22
+ self.input_length=0
23
+ self.special_tokens = {
24
+ "audio_code": "<|{}|>",
25
+ "text_start": "<|text_start|>",
26
+ "text_end": "<|text_end|>",
27
+ "audio_start": "<|audio_start|>",
28
+ "audio_end": "<|audio_end|>",
29
+ "time": "<|t_{:.2f}|>",
30
+ "code_start": "<|code_start|>",
31
+ "code_end": "<|code_end|>",
32
+ "text_sep": "<|text_sep|>"
33
+ }
34
+ self.lec = inflect.engine()
35
+ #self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
36
+ #self.config_path = "/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
37
+ #self.model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
38
+ self.wavtokenizer = WavTokenizer.from_pretrained0802(wav_tokenizer_config_path, wav_tokenizer_model_path)
39
+ self.wavtokenizer = self.wavtokenizer.to(self.device)
40
+ self.BASE_DIR = os.path.dirname(__file__)
41
+ self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers")
42
+ self.speakers=["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye"]
43
+
44
+ def get_speaker_path(self,speaker_name):
45
+ return os.path.join(self.DEFAULT_SPEAKERS_DIR, f"{speaker_name}.json")
46
+
47
+ def load_speaker(self, path: str):
48
+ with open(path, "r") as f:
49
+ return json.load(f)
50
+
51
+ def load_default_speaker(self, name: str):
52
+ name = name.lower().strip()
53
+ speaker_path=self.get_speaker_path(name)
54
+ return self.load_speaker(speaker_path)
55
+
56
+
57
+ def process_text(self, text: str):
58
+
59
+ text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
60
+ text = re.sub(r'[-_/,\.\\]', ' ', text)
61
+ text = re.sub(r'[^a-z\s]', '', text)
62
+ text = re.sub(r'\s+', ' ', text).strip()
63
+ return text.split()
64
+
65
+ def create_audio_prompt(self,words: list) -> str:
66
+ prompt = []
67
+ for i in words:
68
+ word = i["word"]
69
+ duration = self.special_tokens["time"].format(float(i["duration"]))
70
+ tokens = "".join([self.special_tokens["audio_code"].format(c) for c in i["codes"]])
71
+ prompt.append(f'{word}{duration}{self.special_tokens["code_start"]}{tokens}{self.special_tokens["code_end"]}')
72
+ return "\n".join(prompt)
73
+
74
+ def create_prompt(self,text,speaker_name="idera"):
75
+ speaker=self.load_default_speaker(speaker_name)
76
+ input_words = self.process_text(speaker["text"]) + self.process_text(text)
77
+ #input_words = process_text(speaker["text"]) + input_words
78
+
79
+ inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
80
+ prompt = self.text_prompt.format(
81
+ bos=self.bos,
82
+ text_start=self.special_tokens['text_start'],
83
+ words=inputs_words_strings,
84
+ text_end=self.special_tokens['text_end'],
85
+ audio_start=self.special_tokens['audio_start']
86
+ )
87
+ prompt += self.create_audio_prompt(speaker["words"])
88
+
89
+ return prompt
90
+
91
+ def tokenize_prompt(self, prompt):
92
+ input_ids = self.tokenizer.encode(
93
+ prompt,
94
+ add_special_tokens=False,
95
+ return_tensors="pt"
96
+ ).to(self.device)
97
+ self.input_length=input_ids.shape[1]
98
+ return input_ids.to(self.device)
99
+
100
+
101
+ def get_audio(self,discrete_code):
102
+ discrete_code=torch.tensor([[discrete_code]]).to(self.device)
103
+ features = self.wavtokenizer.codes_to_features(discrete_code).to(self.device)
104
+ bandwidth_id = torch.tensor([0]).to(self.device)
105
+ audio_out = self.wavtokenizer.decode(features, bandwidth_id=bandwidth_id)
106
+ return audio_out.to("cpu")
107
+
108
+ def extract_integers(self,s):
109
+ # Match integers enclosed in vertical bars |integer|
110
+ matches = re.findall(r'\|(-?\d+)\|', s)
111
+ # Convert matches to integers
112
+ return [int(match) for match in matches]
113
+
114
+ def get_codes(self, output):
115
+ new_output=self.tokenizer.decode(output[0][self.input_length:])
116
+ codes=self.extract_integers(new_output)
117
+ return codes
118
+
119
+
120
+ class AudioTokenizerForLocal(AudioTokenizer):
121
+
122
+ def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
123
+ super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
124
+ self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
125
+ self.special_tokens = {
126
+ "audio_code": "<|{}|>",
127
+ "text_start": "<|text_start|>",
128
+ "text_end": "<|text_end|>",
129
+ "audio_start": "<|audio_start|>",
130
+ "audio_end": "<|audio_end|>",
131
+ "word_start": "<|word_start|>",
132
+ "word_end": "<|word_end|>",
133
+ "time": "<|t_{:.2f}|>",
134
+ "code_start": "<|code_start|>",
135
+ "code_end": "<|code_end|>",
136
+ "text_sep": "<|text_sep|>",
137
+ "hausa":"<|hausa|>",
138
+ "igbo":"<|igbo|>",
139
+ "yoruba":"<|yoruba|>",
140
+ }
141
+ self.uroman = ur.Uroman()
142
+ self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers_local")
143
+ self.speakers = [
144
+ "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
145
+ "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
146
+ ]
147
+
148
+ def process_text(self, text: str):
149
+ text = self.uroman.romanize_string(text)
150
+ text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
151
+ text = re.sub(r'[-_/,\.\\]', ' ', text)
152
+ text = re.sub(r'[^a-z\s]', '', text)
153
+ text = re.sub(r'\s+', ' ', text).strip()
154
+ return text.split()
155
+
156
+ def create_prompt(self,text,lang,speaker_name=None):
157
+ assert lang in ["hausa","igbo","yoruba"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba']"
158
+ #if no speaker
159
+ if speaker_name is None:
160
+ if lang=="hausa":
161
+ speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
162
+ elif lang=="igbo":
163
+ speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
164
+ else:
165
+ speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
166
+ speaker=self.load_default_speaker(speaker_name)
167
+ input_words = self.process_text(speaker["text"]) + self.process_text(text)
168
+ #input_words = process_text(speaker["text"]) + input_words
169
+
170
+ inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
171
+ prompt = self.text_prompt.format(
172
+ bos=self.bos,
173
+ text_start=self.special_tokens['text_start'],
174
+ words=inputs_words_strings,
175
+ text_end=self.special_tokens['text_end'],
176
+ lang=self.special_tokens[lang],
177
+ audio_start=self.special_tokens['audio_start']
178
+ )
179
+ prompt += self.create_audio_prompt(speaker["words"])
180
+
181
+ return prompt
182
+
183
+
184
+ class AudioTokenizerV2(AudioTokenizer):
185
+
186
+ def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
187
+ super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
188
+ self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
189
+ self.asr_prompt="{bos}\n{code_start}{codes}{code_end}\n{asr}\n"
190
+ self.special_tokens = {
191
+ "audio_code": "<|{}|>",
192
+ "text_start": "<|text_start|>",
193
+ "text_end": "<|text_end|>",
194
+ "audio_start": "<|audio_start|>",
195
+ "audio_end": "<|audio_end|>",
196
+ "word_start": "<|word_start|>",
197
+ "word_end": "<|word_end|>",
198
+ "time": "<|t_{:.2f}|>",
199
+ "code_start": "<|code_start|>",
200
+ "code_end": "<|code_end|>",
201
+ "text_sep": "<|text_sep|>",
202
+ "hausa":"<|hausa|>",
203
+ "igbo":"<|igbo|>",
204
+ "yoruba":"<|yoruba|>",
205
+ "english":"<|english|>",#<|english|>
206
+ "asr":"<|asr|>"
207
+ }
208
+ self.uroman = ur.Uroman()
209
+ self.DEFAULT_SPEAKERS_DIR_LOCAL = os.path.join(self.BASE_DIR, "default_speakers_local")
210
+ self.DEFAULT_SPEAKERS_ENG = os.path.join(self.BASE_DIR, "default_speakers")
211
+ self.speakers_local = [
212
+ "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
213
+ "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
214
+ ]
215
+ self.speakers_eng = ["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye","saheed"]
216
+ self.changed_tokens=[('<|1836|>', '<|453|><|453|>'),
217
+ ('<|1837|>', '<|1836|><|1836|>'),
218
+ ('<|1838|>', '<|1837|><|1837|>'),
219
+ ('<|1840|>', '<|244|><|167|>'),
220
+ ('<|1841|>', '<|235|><|219|>'),
221
+ ('<|1844|>', '<|453|><|244|>'),
222
+ ('<|1845|>', '<|1838|><|1838|>')]
223
+
224
+ def process_text(self, text: str):
225
+ text = self.uroman.romanize_string(text)
226
+ text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
227
+ text = re.sub(r'[-_/,\.\\]', ' ', text)
228
+ text = re.sub(r'[^a-z\s]', '', text)
229
+ text = re.sub(r'\s+', ' ', text).strip()
230
+ return text.split()
231
+
232
+ def get_speaker_path(self,speaker_name,dir):
233
+ return os.path.join(dir, f"{speaker_name}.json")
234
+
235
+ def load_speaker(self, path: str):
236
+ with open(path, "r") as f:
237
+ return json.load(f)
238
+
239
+ def load_default_speaker(self, name: str,dir: str):
240
+ name = name.lower().strip()
241
+ speaker_path=self.get_speaker_path(name,dir)
242
+ return self.load_speaker(speaker_path)
243
+
244
+ def create_prompt(self,text,lang,speaker_name=None):
245
+ assert lang in ["hausa","igbo","yoruba","english"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba','english']"
246
+ #if no speaker
247
+ dir=self.DEFAULT_SPEAKERS_DIR_LOCAL
248
+ if speaker_name is None:
249
+ if lang=="hausa":
250
+ speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
251
+ elif lang=="igbo":
252
+ speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
253
+ elif lang=="yoruba":
254
+ speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
255
+ else:
256
+ speaker_name=random.choice(self.speakers_eng)
257
+
258
+ if lang=="english":
259
+ dir=self.DEFAULT_SPEAKERS_ENG
260
+ speaker=self.load_default_speaker(speaker_name,dir)
261
+ input_words = self.process_text(speaker["text"]) + self.process_text(text)
262
+ #input_words = process_text(speaker["text"]) + input_words
263
+
264
+ inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
265
+ prompt = self.text_prompt.format(
266
+ bos=self.bos,
267
+ text_start=self.special_tokens['text_start'],
268
+ words=inputs_words_strings,
269
+ text_end=self.special_tokens['text_end'],
270
+ lang=self.special_tokens[lang],
271
+ audio_start=self.special_tokens['audio_start']
272
+ )
273
+ prompt += self.create_audio_prompt(speaker["words"])
274
+
275
+ return prompt
276
+ def replace_tokens(text):
277
+ for pair in self.changed_tokens:
278
+ text=text.replace(pair[0],pair[-1])
279
+ return text
280
+
281
+ def resample(self,audio: np.ndarray, sr: int, target_sr: int):
282
+ audio = audio.to(dtype=torch.float32)
283
+ #.clone().detach()
284
+ audio = audio.unsqueeze(0)
285
+ # 1 as last arg corresponds to mono audio
286
+ resampled = convert_audio(audio, sr, target_sr, 1)
287
+ return resampled.to(self.device )
288
+
289
+ def quantize_wavtokenizer(self, path):
290
+ audio_data, sample_rate = torchaudio.load(path)
291
+ audio_data=audio_data.squeeze()
292
+ audio = self.resample(audio_data, sample_rate, 24000).to(self.device)
293
+ if audio.ndim==3:
294
+ audio=audio.squeeze(1)
295
+ bandwidth_id = torch.tensor([0]).to(self.device )
296
+ _, codes = self.wavtokenizer.encode_infer(audio, bandwidth_id=bandwidth_id)
297
+ codes = codes.squeeze(1).to(self.device)#+last_text_token
298
+ res=""
299
+ for code in codes[0].tolist():
300
+ res+=f"<|{code}|>"
301
+ return res
302
+
303
+ def create_asr_prompt(self,audio_path):
304
+ codes=self.quantize_wavtokenizer(audio_path)
305
+ prompt = self.asr_prompt.format(
306
+ bos=self.bos,
307
+ code_start=self.special_tokens['code_start'],
308
+ codes=codes,
309
+ code_end=self.special_tokens['code_end'],
310
+ asr=self.special_tokens["asr"],
311
+ )
312
+ return prompt
313
+
314
+ def get_asr_results(self,output):
315
+ res=""
316
+ for text in self.tokenizer.decode(output[0]).split("<|text_start|>")[-1].split("<|text_end|>")[0].split("\n"):
317
+ res+=text.split("<|word_start|>")[-1].split("<|word_end|>")[0]
318
+ res+=" "
319
+ return res.strip()
yarngpt/default_speakers/azeez.json ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Hello! My name is Saheed azeez and I am testing the audio feature",
3
+ "words": [
4
+ {
5
+ "word": "hello",
6
+ "duration": 1.22,
7
+ "codes": [
8
+ 219,
9
+ 244,
10
+ 244,
11
+ 167,
12
+ 453,
13
+ 453,
14
+ 453,
15
+ 453,
16
+ 453,
17
+ 453,
18
+ 453,
19
+ 453,
20
+ 453,
21
+ 453,
22
+ 453,
23
+ 453,
24
+ 453,
25
+ 453,
26
+ 453,
27
+ 453,
28
+ 453,
29
+ 453,
30
+ 453,
31
+ 453,
32
+ 453,
33
+ 453,
34
+ 453,
35
+ 453,
36
+ 453,
37
+ 453,
38
+ 453,
39
+ 453,
40
+ 453,
41
+ 453,
42
+ 244,
43
+ 219,
44
+ 237,
45
+ 864,
46
+ 1041,
47
+ 1048,
48
+ 1372,
49
+ 1780,
50
+ 1554,
51
+ 1024,
52
+ 702,
53
+ 1814,
54
+ 1754,
55
+ 1315,
56
+ 1697,
57
+ 1719,
58
+ 1682,
59
+ 307,
60
+ 621,
61
+ 901,
62
+ 355,
63
+ 783,
64
+ 1726,
65
+ 353,
66
+ 1416,
67
+ 729,
68
+ 803,
69
+ 1494,
70
+ 353,
71
+ 876,
72
+ 1818,
73
+ 932,
74
+ 1068,
75
+ 1813,
76
+ 875,
77
+ 1774,
78
+ 766,
79
+ 1453,
80
+ 1466,
81
+ 792,
82
+ 1388,
83
+ 1495,
84
+ 1236,
85
+ 1462,
86
+ 431,
87
+ 1025,
88
+ 1429,
89
+ 1128,
90
+ 1236,
91
+ 1483,
92
+ 1305,
93
+ 1352,
94
+ 1681,
95
+ 5,
96
+ 1758,
97
+ 1481,
98
+ 1339
99
+ ]
100
+ },
101
+ {
102
+ "word": "my",
103
+ "duration": 0.18,
104
+ "codes": [
105
+ 1333,
106
+ 1339,
107
+ 1388,
108
+ 1373,
109
+ 974,
110
+ 723,
111
+ 1776,
112
+ 1001,
113
+ 1160,
114
+ 1769,
115
+ 1048,
116
+ 1646,
117
+ 1321,
118
+ 912
119
+ ]
120
+ },
121
+ {
122
+ "word": "name",
123
+ "duration": 0.2,
124
+ "codes": [
125
+ 1596,
126
+ 325,
127
+ 876,
128
+ 1303,
129
+ 973,
130
+ 1707,
131
+ 1332,
132
+ 1300,
133
+ 145,
134
+ 1136,
135
+ 1266,
136
+ 1353,
137
+ 845,
138
+ 913,
139
+ 989
140
+ ]
141
+ },
142
+ {
143
+ "word": "is",
144
+ "duration": 0.12,
145
+ "codes": [
146
+ 1257,
147
+ 1372,
148
+ 1617,
149
+ 1800,
150
+ 1568,
151
+ 1679,
152
+ 1798,
153
+ 1476,
154
+ 1759
155
+ ]
156
+ },
157
+ {
158
+ "word": "saheed",
159
+ "duration": 0.5,
160
+ "codes": [
161
+ 1807,
162
+ 1354,
163
+ 1737,
164
+ 1738,
165
+ 1060,
166
+ 1122,
167
+ 1195,
168
+ 1275,
169
+ 1129,
170
+ 1473,
171
+ 688,
172
+ 1675,
173
+ 1724,
174
+ 1392,
175
+ 1146,
176
+ 1605,
177
+ 1784,
178
+ 1476,
179
+ 1454,
180
+ 1743,
181
+ 1824,
182
+ 706,
183
+ 1706,
184
+ 669,
185
+ 91,
186
+ 1079,
187
+ 1456,
188
+ 1645,
189
+ 1041,
190
+ 1687,
191
+ 1425,
192
+ 1205,
193
+ 830,
194
+ 1525,
195
+ 1007,
196
+ 1291,
197
+ 723
198
+ ]
199
+ },
200
+ {
201
+ "word": "azeez",
202
+ "duration": 0.48,
203
+ "codes": [
204
+ 829,
205
+ 926,
206
+ 1438,
207
+ 1124,
208
+ 1282,
209
+ 1745,
210
+ 1019,
211
+ 1430,
212
+ 1657,
213
+ 1715,
214
+ 1637,
215
+ 1653,
216
+ 1713,
217
+ 1370,
218
+ 1534,
219
+ 1410,
220
+ 1767,
221
+ 814,
222
+ 22,
223
+ 1703,
224
+ 1534,
225
+ 1797,
226
+ 1488,
227
+ 1812,
228
+ 1637,
229
+ 1791,
230
+ 1720,
231
+ 1677,
232
+ 1807,
233
+ 1459,
234
+ 1779,
235
+ 1767,
236
+ 1145,
237
+ 1239,
238
+ 1622,
239
+ 1264
240
+ ]
241
+ },
242
+ {
243
+ "word": "and",
244
+ "duration": 0.24,
245
+ "codes": [
246
+ 1780,
247
+ 1291,
248
+ 1174,
249
+ 1435,
250
+ 1494,
251
+ 1807,
252
+ 662,
253
+ 1760,
254
+ 1694,
255
+ 363,
256
+ 1225,
257
+ 1775,
258
+ 1264,
259
+ 1455,
260
+ 1014,
261
+ 1758,
262
+ 1620,
263
+ 1013
264
+ ]
265
+ },
266
+ {
267
+ "word": "i",
268
+ "duration": 0.06,
269
+ "codes": [
270
+ 1823,
271
+ 1295,
272
+ 1397,
273
+ 1108,
274
+ 1275
275
+ ]
276
+ },
277
+ {
278
+ "word": "am",
279
+ "duration": 0.14,
280
+ "codes": [
281
+ 1129,
282
+ 1697,
283
+ 835,
284
+ 1589,
285
+ 1719,
286
+ 1534,
287
+ 1495,
288
+ 1025,
289
+ 1405,
290
+ 766
291
+ ]
292
+ },
293
+ {
294
+ "word": "testing",
295
+ "duration": 0.42,
296
+ "codes": [
297
+ 196,
298
+ 1118,
299
+ 761,
300
+ 1314,
301
+ 1770,
302
+ 1138,
303
+ 1429,
304
+ 728,
305
+ 1497,
306
+ 1792,
307
+ 1049,
308
+ 1430,
309
+ 1062,
310
+ 1788,
311
+ 1354,
312
+ 1555,
313
+ 1735,
314
+ 1728,
315
+ 954,
316
+ 1754,
317
+ 343,
318
+ 1418,
319
+ 636,
320
+ 1501,
321
+ 1301,
322
+ 901,
323
+ 763,
324
+ 1620,
325
+ 1687,
326
+ 177,
327
+ 1706,
328
+ 325
329
+ ]
330
+ },
331
+ {
332
+ "word": "the",
333
+ "duration": 0.14,
334
+ "codes": [
335
+ 810,
336
+ 1421,
337
+ 1404,
338
+ 1093,
339
+ 781,
340
+ 752,
341
+ 1780,
342
+ 1749,
343
+ 850,
344
+ 1435
345
+ ]
346
+ },
347
+ {
348
+ "word": "audio",
349
+ "duration": 0.3,
350
+ "codes": [
351
+ 1792,
352
+ 1381,
353
+ 1309,
354
+ 1472,
355
+ 1449,
356
+ 1785,
357
+ 114,
358
+ 601,
359
+ 866,
360
+ 1764,
361
+ 1212,
362
+ 1453,
363
+ 1152,
364
+ 1777,
365
+ 853,
366
+ 1735,
367
+ 1052,
368
+ 355,
369
+ 1421,
370
+ 1605,
371
+ 1761,
372
+ 1664,
373
+ 540
374
+ ]
375
+ },
376
+ {
377
+ "word": "feature",
378
+ "duration": 0.4,
379
+ "codes": [
380
+ 1682,
381
+ 1442,
382
+ 1819,
383
+ 1818,
384
+ 710,
385
+ 1776,
386
+ 1205,
387
+ 646,
388
+ 1688,
389
+ 1572,
390
+ 875,
391
+ 1367,
392
+ 476,
393
+ 1285,
394
+ 460,
395
+ 342,
396
+ 1784,
397
+ 28,
398
+ 1621,
399
+ 1745,
400
+ 1462,
401
+ 988,
402
+ 1780,
403
+ 1697,
404
+ 1249,
405
+ 1348,
406
+ 1120,
407
+ 1590,
408
+ 803,
409
+ 1205
410
+ ]
411
+ }
412
+ ]
413
+ }
yarngpt/default_speakers/chinenye.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "and once I got that out of the way",
3
+ "words": [
4
+ {
5
+ "word": "and",
6
+ "duration": 1.18,
7
+ "codes": [
8
+ 1073,
9
+ 1804,
10
+ 1510,
11
+ 1562,
12
+ 377,
13
+ 1287,
14
+ 1615,
15
+ 175,
16
+ 631,
17
+ 1702,
18
+ 1700,
19
+ 1590,
20
+ 1158,
21
+ 1676,
22
+ 758,
23
+ 1727,
24
+ 1548,
25
+ 1464,
26
+ 1605,
27
+ 1469,
28
+ 1291,
29
+ 1755,
30
+ 1656,
31
+ 1323,
32
+ 1372,
33
+ 269,
34
+ 1252,
35
+ 1466,
36
+ 1677,
37
+ 1192,
38
+ 1220,
39
+ 1815,
40
+ 1658,
41
+ 1818,
42
+ 1514,
43
+ 1480,
44
+ 1747,
45
+ 1413,
46
+ 1440,
47
+ 1403,
48
+ 28,
49
+ 1806,
50
+ 1536,
51
+ 1269,
52
+ 1673,
53
+ 1616,
54
+ 1619,
55
+ 1745,
56
+ 1532,
57
+ 1659,
58
+ 1682,
59
+ 1777,
60
+ 1764,
61
+ 1766,
62
+ 1796,
63
+ 1827,
64
+ 719,
65
+ 1768,
66
+ 1761,
67
+ 1524,
68
+ 1782,
69
+ 1410,
70
+ 1748,
71
+ 1764,
72
+ 1447,
73
+ 1791,
74
+ 1790,
75
+ 1528,
76
+ 1550,
77
+ 1491,
78
+ 1764,
79
+ 1324,
80
+ 790,
81
+ 1307,
82
+ 664,
83
+ 719,
84
+ 1224,
85
+ 1571,
86
+ 1740,
87
+ 1062,
88
+ 1775,
89
+ 1494,
90
+ 486,
91
+ 1544,
92
+ 1828,
93
+ 961,
94
+ 1115,
95
+ 1308
96
+ ]
97
+ },
98
+ {
99
+ "word": "once",
100
+ "duration": 0.46,
101
+ "codes": [
102
+ 996,
103
+ 1407,
104
+ 892,
105
+ 1326,
106
+ 1223,
107
+ 362,
108
+ 36,
109
+ 1103,
110
+ 1734,
111
+ 1755,
112
+ 1798,
113
+ 749,
114
+ 1603,
115
+ 1748,
116
+ 519,
117
+ 1643,
118
+ 1744,
119
+ 176,
120
+ 1709,
121
+ 749,
122
+ 1615,
123
+ 1801,
124
+ 1438,
125
+ 1719,
126
+ 1491,
127
+ 1802,
128
+ 1575,
129
+ 1750,
130
+ 1180,
131
+ 1077,
132
+ 855,
133
+ 1511,
134
+ 961,
135
+ 1739,
136
+ 632
137
+ ]
138
+ },
139
+ {
140
+ "word": "i",
141
+ "duration": 0.16,
142
+ "codes": [
143
+ 398,
144
+ 1055,
145
+ 767,
146
+ 57,
147
+ 1777,
148
+ 1706,
149
+ 34,
150
+ 1025,
151
+ 1745,
152
+ 1796,
153
+ 1266,
154
+ 1348
155
+ ]
156
+ },
157
+ {
158
+ "word": "got",
159
+ "duration": 0.24,
160
+ "codes": [
161
+ 1555,
162
+ 639,
163
+ 1708,
164
+ 813,
165
+ 1152,
166
+ 753,
167
+ 718,
168
+ 1742,
169
+ 756,
170
+ 1109,
171
+ 1796,
172
+ 85,
173
+ 1623,
174
+ 1769,
175
+ 1759,
176
+ 1491,
177
+ 1769,
178
+ 1693
179
+ ]
180
+ },
181
+ {
182
+ "word": "that",
183
+ "duration": 0.28,
184
+ "codes": [
185
+ 1555,
186
+ 1732,
187
+ 1301,
188
+ 755,
189
+ 1224,
190
+ 1192,
191
+ 1241,
192
+ 1192,
193
+ 1102,
194
+ 944,
195
+ 1358,
196
+ 855,
197
+ 1342,
198
+ 1603,
199
+ 1693,
200
+ 1783,
201
+ 1689,
202
+ 1803,
203
+ 1126,
204
+ 1089,
205
+ 839
206
+ ]
207
+ },
208
+ {
209
+ "word": "out",
210
+ "duration": 0.16,
211
+ "codes": [
212
+ 887,
213
+ 1726,
214
+ 1411,
215
+ 1758,
216
+ 839,
217
+ 9,
218
+ 1686,
219
+ 1642,
220
+ 1695,
221
+ 998,
222
+ 828,
223
+ 1755
224
+ ]
225
+ },
226
+ {
227
+ "word": "of",
228
+ "duration": 0.08,
229
+ "codes": [
230
+ 1825,
231
+ 1734,
232
+ 1281,
233
+ 1794,
234
+ 1518,
235
+ 1696
236
+ ]
237
+ },
238
+ {
239
+ "word": "the",
240
+ "duration": 0.14,
241
+ "codes": [
242
+ 1565,
243
+ 1608,
244
+ 1541,
245
+ 1258,
246
+ 1798,
247
+ 1499,
248
+ 1685,
249
+ 1554,
250
+ 1776,
251
+ 1602,
252
+ 1381
253
+ ]
254
+ },
255
+ {
256
+ "word": "way",
257
+ "duration": 0.16,
258
+ "codes": [
259
+ 1822,
260
+ 1773,
261
+ 1663,
262
+ 1710,
263
+ 1554,
264
+ 1493,
265
+ 4,
266
+ 1620,
267
+ 1755,
268
+ 416,
269
+ 1384,
270
+ 1688
271
+ ]
272
+ }
273
+ ]
274
+ }
yarngpt/default_speakers/emma.json ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Scientists have discovered a new planet that may be capable of supporting life!",
3
+ "words": [
4
+ {
5
+ "word": "scientists",
6
+ "duration": 0.82,
7
+ "codes": [
8
+ 1334,
9
+ 1359,
10
+ 619,
11
+ 1057,
12
+ 1528,
13
+ 817,
14
+ 1175,
15
+ 884,
16
+ 527,
17
+ 1519,
18
+ 323,
19
+ 980,
20
+ 608,
21
+ 1104,
22
+ 1271,
23
+ 1265,
24
+ 1237,
25
+ 191,
26
+ 1308,
27
+ 203,
28
+ 1126,
29
+ 1226,
30
+ 1265,
31
+ 1073,
32
+ 1661,
33
+ 903,
34
+ 502,
35
+ 197,
36
+ 127,
37
+ 1712,
38
+ 877,
39
+ 1717,
40
+ 1735,
41
+ 1076,
42
+ 1284,
43
+ 1629,
44
+ 784,
45
+ 62,
46
+ 175,
47
+ 432,
48
+ 767,
49
+ 533,
50
+ 990,
51
+ 1258,
52
+ 823,
53
+ 1651,
54
+ 1801,
55
+ 701,
56
+ 1382,
57
+ 554,
58
+ 527,
59
+ 117,
60
+ 323,
61
+ 989,
62
+ 884,
63
+ 817,
64
+ 495,
65
+ 781,
66
+ 1214,
67
+ 1099,
68
+ 1104
69
+ ]
70
+ },
71
+ {
72
+ "word": "have",
73
+ "duration": 0.24,
74
+ "codes": [
75
+ 930,
76
+ 1393,
77
+ 1303,
78
+ 1001,
79
+ 1438,
80
+ 628,
81
+ 1774,
82
+ 973,
83
+ 1758,
84
+ 1501,
85
+ 1761,
86
+ 1428,
87
+ 1725,
88
+ 669,
89
+ 1780,
90
+ 487,
91
+ 866,
92
+ 1762
93
+ ]
94
+ },
95
+ {
96
+ "word": "discovered",
97
+ "duration": 0.66,
98
+ "codes": [
99
+ 820,
100
+ 1592,
101
+ 1737,
102
+ 731,
103
+ 1325,
104
+ 1644,
105
+ 884,
106
+ 1300,
107
+ 323,
108
+ 596,
109
+ 231,
110
+ 296,
111
+ 943,
112
+ 990,
113
+ 1214,
114
+ 1039,
115
+ 1039,
116
+ 1430,
117
+ 866,
118
+ 19,
119
+ 1675,
120
+ 1824,
121
+ 1030,
122
+ 1630,
123
+ 1758,
124
+ 783,
125
+ 1598,
126
+ 1832,
127
+ 1330,
128
+ 1319,
129
+ 1730,
130
+ 1449,
131
+ 1414,
132
+ 1511,
133
+ 695,
134
+ 1526,
135
+ 1410,
136
+ 95,
137
+ 1686,
138
+ 1400,
139
+ 961,
140
+ 1809,
141
+ 1303,
142
+ 355,
143
+ 544,
144
+ 1671,
145
+ 1493,
146
+ 1290,
147
+ 1732,
148
+ 1808
149
+ ]
150
+ },
151
+ {
152
+ "word": "a",
153
+ "duration": 0.14,
154
+ "codes": [
155
+ 968,
156
+ 1281,
157
+ 895,
158
+ 1827,
159
+ 1819,
160
+ 694,
161
+ 1509,
162
+ 1346,
163
+ 928,
164
+ 1449,
165
+ 1512
166
+ ]
167
+ },
168
+ {
169
+ "word": "new",
170
+ "duration": 0.24,
171
+ "codes": [
172
+ 1433,
173
+ 1689,
174
+ 1685,
175
+ 1598,
176
+ 1547,
177
+ 1369,
178
+ 1228,
179
+ 1708,
180
+ 1285,
181
+ 1722,
182
+ 1257,
183
+ 625,
184
+ 1114,
185
+ 1425,
186
+ 465,
187
+ 950,
188
+ 651,
189
+ 561
190
+ ]
191
+ },
192
+ {
193
+ "word": "planet",
194
+ "duration": 0.48,
195
+ "codes": [
196
+ 1707,
197
+ 821,
198
+ 1225,
199
+ 1228,
200
+ 1168,
201
+ 1291,
202
+ 1739,
203
+ 813,
204
+ 1738,
205
+ 966,
206
+ 1829,
207
+ 1229,
208
+ 1751,
209
+ 1280,
210
+ 1120,
211
+ 1537,
212
+ 1145,
213
+ 1257,
214
+ 1145,
215
+ 1490,
216
+ 1565,
217
+ 41,
218
+ 1677,
219
+ 1796,
220
+ 1258,
221
+ 1228,
222
+ 1389,
223
+ 1145,
224
+ 1433,
225
+ 763,
226
+ 1255,
227
+ 355,
228
+ 509,
229
+ 869,
230
+ 1144,
231
+ 501
232
+ ]
233
+ },
234
+ {
235
+ "word": "that",
236
+ "duration": 0.26,
237
+ "codes": [
238
+ 1571,
239
+ 1404,
240
+ 1484,
241
+ 1716,
242
+ 1136,
243
+ 1720,
244
+ 1237,
245
+ 1420,
246
+ 1680,
247
+ 892,
248
+ 1458,
249
+ 1697,
250
+ 669,
251
+ 1658,
252
+ 859,
253
+ 1128,
254
+ 804,
255
+ 1157,
256
+ 1694
257
+ ]
258
+ },
259
+ {
260
+ "word": "may",
261
+ "duration": 0.18,
262
+ "codes": [
263
+ 1339,
264
+ 761,
265
+ 820,
266
+ 1150,
267
+ 823,
268
+ 1706,
269
+ 1815,
270
+ 1354,
271
+ 1417,
272
+ 820,
273
+ 744,
274
+ 1413,
275
+ 995,
276
+ 733
277
+ ]
278
+ },
279
+ {
280
+ "word": "be",
281
+ "duration": 0.18,
282
+ "codes": [
283
+ 20,
284
+ 1763,
285
+ 1417,
286
+ 821,
287
+ 1384,
288
+ 1784,
289
+ 968,
290
+ 1767,
291
+ 501,
292
+ 795,
293
+ 378,
294
+ 242,
295
+ 447
296
+ ]
297
+ },
298
+ {
299
+ "word": "capable",
300
+ "duration": 0.56,
301
+ "codes": [
302
+ 666,
303
+ 1170,
304
+ 1637,
305
+ 1746,
306
+ 1042,
307
+ 1331,
308
+ 695,
309
+ 1739,
310
+ 1136,
311
+ 1471,
312
+ 1823,
313
+ 1185,
314
+ 1231,
315
+ 459,
316
+ 1071,
317
+ 168,
318
+ 418,
319
+ 513,
320
+ 431,
321
+ 669,
322
+ 840,
323
+ 938,
324
+ 1463,
325
+ 1640,
326
+ 1741,
327
+ 86,
328
+ 1273,
329
+ 724,
330
+ 1006,
331
+ 544,
332
+ 1408,
333
+ 1352,
334
+ 1721,
335
+ 1490,
336
+ 1321,
337
+ 1674,
338
+ 792,
339
+ 1765,
340
+ 1093,
341
+ 1731,
342
+ 1506,
343
+ 1742,
344
+ 1465
345
+ ]
346
+ },
347
+ {
348
+ "word": "of",
349
+ "duration": 0.16,
350
+ "codes": [
351
+ 1697,
352
+ 1435,
353
+ 42,
354
+ 1593,
355
+ 1573,
356
+ 1146,
357
+ 1600,
358
+ 980,
359
+ 878,
360
+ 713,
361
+ 796,
362
+ 1364
363
+ ]
364
+ },
365
+ {
366
+ "word": "supporting",
367
+ "duration": 0.62,
368
+ "codes": [
369
+ 541,
370
+ 833,
371
+ 1546,
372
+ 1230,
373
+ 1232,
374
+ 1417,
375
+ 1473,
376
+ 1486,
377
+ 1759,
378
+ 1327,
379
+ 1806,
380
+ 544,
381
+ 918,
382
+ 526,
383
+ 418,
384
+ 950,
385
+ 669,
386
+ 1749,
387
+ 1499,
388
+ 959,
389
+ 1806,
390
+ 203,
391
+ 1771,
392
+ 1651,
393
+ 1433,
394
+ 686,
395
+ 967,
396
+ 484,
397
+ 649,
398
+ 884,
399
+ 176,
400
+ 323,
401
+ 1349,
402
+ 722,
403
+ 1230,
404
+ 1218,
405
+ 1430,
406
+ 1663,
407
+ 1648,
408
+ 1808,
409
+ 1629,
410
+ 1822,
411
+ 1813,
412
+ 1663,
413
+ 1418,
414
+ 1742
415
+ ]
416
+ },
417
+ {
418
+ "word": "life",
419
+ "duration": 0.22,
420
+ "codes": [
421
+ 1622,
422
+ 1648,
423
+ 1141,
424
+ 1682,
425
+ 1353,
426
+ 1351,
427
+ 1822,
428
+ 1229,
429
+ 1621,
430
+ 1435,
431
+ 1766,
432
+ 1428,
433
+ 1727,
434
+ 1343,
435
+ 1769,
436
+ 823,
437
+ 1050
438
+ ]
439
+ }
440
+ ]
441
+ }
yarngpt/default_speakers/idera.json ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Scientists have discovered a new planet that may be capable of supporting life!",
3
+ "words": [
4
+ {
5
+ "word": "scientists",
6
+ "duration": "1.00",
7
+ "codes": [
8
+ 258,
9
+ 551,
10
+ 21,
11
+ 401,
12
+ 509,
13
+ 235,
14
+ 151,
15
+ 94,
16
+ 194,
17
+ 496,
18
+ 241,
19
+ 420,
20
+ 606,
21
+ 256,
22
+ 311,
23
+ 464,
24
+ 343,
25
+ 765,
26
+ 56,
27
+ 23,
28
+ 209,
29
+ 72,
30
+ 851,
31
+ 360,
32
+ 442,
33
+ 257,
34
+ 457,
35
+ 75,
36
+ 265,
37
+ 227,
38
+ 16,
39
+ 167,
40
+ 194,
41
+ 391,
42
+ 68,
43
+ 786,
44
+ 1642,
45
+ 888,
46
+ 884,
47
+ 1688,
48
+ 1021,
49
+ 1270,
50
+ 1250,
51
+ 640,
52
+ 1471,
53
+ 1193,
54
+ 1117,
55
+ 95,
56
+ 158,
57
+ 587,
58
+ 1484,
59
+ 1054,
60
+ 947,
61
+ 521,
62
+ 234,
63
+ 502,
64
+ 1172,
65
+ 1379,
66
+ 1332,
67
+ 1267,
68
+ 1659,
69
+ 226,
70
+ 325,
71
+ 404,
72
+ 634,
73
+ 713,
74
+ 333,
75
+ 1210,
76
+ 1028,
77
+ 700,
78
+ 1804,
79
+ 1549,
80
+ 1552,
81
+ 1527,
82
+ 701,
83
+ 895
84
+ ]
85
+ },
86
+ {
87
+ "word": "have",
88
+ "duration": "0.16",
89
+ "codes": [
90
+ 652,
91
+ 1487,
92
+ 1045,
93
+ 665,
94
+ 384,
95
+ 908,
96
+ 1073,
97
+ 903,
98
+ 169,
99
+ 91,
100
+ 1242,
101
+ 59,
102
+ 1614
103
+ ]
104
+ },
105
+ {
106
+ "word": "discovered",
107
+ "duration": "0.52",
108
+ "codes": [
109
+ 1523,
110
+ 519,
111
+ 1311,
112
+ 1166,
113
+ 1049,
114
+ 368,
115
+ 176,
116
+ 1546,
117
+ 990,
118
+ 546,
119
+ 1091,
120
+ 872,
121
+ 975,
122
+ 224,
123
+ 419,
124
+ 1714,
125
+ 1247,
126
+ 1769,
127
+ 1141,
128
+ 811,
129
+ 1149,
130
+ 320,
131
+ 1161,
132
+ 982,
133
+ 732,
134
+ 473,
135
+ 1025,
136
+ 470,
137
+ 1253,
138
+ 1345,
139
+ 965,
140
+ 916,
141
+ 407,
142
+ 844,
143
+ 594,
144
+ 1710,
145
+ 193,
146
+ 740,
147
+ 761,
148
+ 1740
149
+ ]
150
+ },
151
+ {
152
+ "word": "a",
153
+ "duration": "0.08",
154
+ "codes": [
155
+ 5,
156
+ 414,
157
+ 1608,
158
+ 449,
159
+ 1643,
160
+ 1732,
161
+ 1653
162
+ ]
163
+ },
164
+ {
165
+ "word": "new",
166
+ "duration": "0.18",
167
+ "codes": [
168
+ 396,
169
+ 1599,
170
+ 1733,
171
+ 250,
172
+ 1624,
173
+ 485,
174
+ 1645,
175
+ 771,
176
+ 1630,
177
+ 736,
178
+ 336,
179
+ 476,
180
+ 641,
181
+ 345
182
+ ]
183
+ },
184
+ {
185
+ "word": "planet",
186
+ "duration": "0.38",
187
+ "codes": [
188
+ 21,
189
+ 131,
190
+ 1743,
191
+ 1082,
192
+ 1707,
193
+ 86,
194
+ 1075,
195
+ 883,
196
+ 944,
197
+ 1103,
198
+ 790,
199
+ 978,
200
+ 860,
201
+ 1738,
202
+ 1060,
203
+ 749,
204
+ 171,
205
+ 679,
206
+ 1144,
207
+ 966,
208
+ 1532,
209
+ 1179,
210
+ 714,
211
+ 1123,
212
+ 1308,
213
+ 1524,
214
+ 752,
215
+ 1613,
216
+ 1266
217
+ ]
218
+ },
219
+ {
220
+ "word": "that",
221
+ "duration": "0.14",
222
+ "codes": [
223
+ 64,
224
+ 32,
225
+ 1457,
226
+ 1095,
227
+ 931,
228
+ 1774,
229
+ 1017,
230
+ 1661,
231
+ 1713,
232
+ 355,
233
+ 1708
234
+ ]
235
+ },
236
+ {
237
+ "word": "may",
238
+ "duration": "0.12",
239
+ "codes": [
240
+ 1800,
241
+ 1070,
242
+ 1452,
243
+ 1185,
244
+ 1295,
245
+ 26,
246
+ 638,
247
+ 240,
248
+ 1480,
249
+ 1461
250
+ ]
251
+ },
252
+ {
253
+ "word": "be",
254
+ "duration": "0.12",
255
+ "codes": [
256
+ 859,
257
+ 729,
258
+ 848,
259
+ 1131,
260
+ 1618,
261
+ 928,
262
+ 331,
263
+ 504,
264
+ 487,
265
+ 417
266
+ ]
267
+ },
268
+ {
269
+ "word": "capable",
270
+ "duration": "0.42",
271
+ "codes": [
272
+ 686,
273
+ 1040,
274
+ 28,
275
+ 1456,
276
+ 1056,
277
+ 1133,
278
+ 901,
279
+ 1127,
280
+ 693,
281
+ 1406,
282
+ 20,
283
+ 118,
284
+ 141,
285
+ 572,
286
+ 845,
287
+ 1280,
288
+ 353,
289
+ 1726,
290
+ 338,
291
+ 1413,
292
+ 484,
293
+ 272,
294
+ 1569,
295
+ 144,
296
+ 1581,
297
+ 437,
298
+ 1502,
299
+ 963,
300
+ 1415,
301
+ 655,
302
+ 949,
303
+ 1289
304
+ ]
305
+ },
306
+ {
307
+ "word": "of",
308
+ "duration": "0.10",
309
+ "codes": [
310
+ 1198,
311
+ 1755,
312
+ 1478,
313
+ 1548,
314
+ 802,
315
+ 1513,
316
+ 1290,
317
+ 636
318
+ ]
319
+ },
320
+ {
321
+ "word": "supporting",
322
+ "duration": "0.54",
323
+ "codes": [
324
+ 541,
325
+ 867,
326
+ 750,
327
+ 1505,
328
+ 754,
329
+ 1344,
330
+ 1032,
331
+ 734,
332
+ 505,
333
+ 559,
334
+ 220,
335
+ 288,
336
+ 342,
337
+ 591,
338
+ 1459,
339
+ 1721,
340
+ 490,
341
+ 825,
342
+ 80,
343
+ 1221,
344
+ 1234,
345
+ 639,
346
+ 1052,
347
+ 450,
348
+ 1557,
349
+ 1302,
350
+ 784,
351
+ 1547,
352
+ 823,
353
+ 527,
354
+ 1667,
355
+ 1437,
356
+ 832,
357
+ 1366,
358
+ 674,
359
+ 1607,
360
+ 486,
361
+ 893,
362
+ 1748,
363
+ 792,
364
+ 1757
365
+ ]
366
+ },
367
+ {
368
+ "word": "life",
369
+ "duration": "0.28",
370
+ "codes": [
371
+ 1761,
372
+ 149,
373
+ 1501,
374
+ 1342,
375
+ 1063,
376
+ 1124,
377
+ 117,
378
+ 1225,
379
+ 1115,
380
+ 1155,
381
+ 1815,
382
+ 1035,
383
+ 936,
384
+ 807,
385
+ 930,
386
+ 1514,
387
+ 837,
388
+ 1104,
389
+ 1145,
390
+ 1164,
391
+ 1687,
392
+ 1589
393
+ ]
394
+ }
395
+ ]
396
+ }
yarngpt/default_speakers/joke.json ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "i still said you and i was like mister so this is what you are doing with",
3
+ "words": [
4
+ {
5
+ "word": "i",
6
+ "duration": 0.34,
7
+ "codes": [
8
+ 1737,
9
+ 1555,
10
+ 1439,
11
+ 1679,
12
+ 1634,
13
+ 1661,
14
+ 1764,
15
+ 1698,
16
+ 1715,
17
+ 862,
18
+ 1516,
19
+ 1427,
20
+ 1350,
21
+ 1136,
22
+ 1472,
23
+ 1113,
24
+ 1686,
25
+ 1596,
26
+ 1005,
27
+ 1365,
28
+ 1180,
29
+ 1473,
30
+ 1296,
31
+ 1337,
32
+ 1579
33
+ ]
34
+ },
35
+ {
36
+ "word": "still",
37
+ "duration": 0.26,
38
+ "codes": [
39
+ 848,
40
+ 1653,
41
+ 1756,
42
+ 1711,
43
+ 1693,
44
+ 1722,
45
+ 1580,
46
+ 1552,
47
+ 502,
48
+ 1416,
49
+ 1463,
50
+ 1341,
51
+ 1449,
52
+ 1542,
53
+ 1700,
54
+ 1786,
55
+ 428,
56
+ 1728,
57
+ 1624,
58
+ 1624
59
+ ]
60
+ },
61
+ {
62
+ "word": "said",
63
+ "duration": 0.24,
64
+ "codes": [
65
+ 1657,
66
+ 1744,
67
+ 1657,
68
+ 1634,
69
+ 1615,
70
+ 1534,
71
+ 996,
72
+ 1296,
73
+ 1542,
74
+ 577,
75
+ 1047,
76
+ 1506,
77
+ 440,
78
+ 1756,
79
+ 1783,
80
+ 1593,
81
+ 906,
82
+ 1810
83
+ ]
84
+ },
85
+ {
86
+ "word": "you",
87
+ "duration": 0.62,
88
+ "codes": [
89
+ 1610,
90
+ 409,
91
+ 1534,
92
+ 1685,
93
+ 1709,
94
+ 1756,
95
+ 363,
96
+ 1441,
97
+ 1789,
98
+ 1594,
99
+ 863,
100
+ 1773,
101
+ 1612,
102
+ 1535,
103
+ 1602,
104
+ 1615,
105
+ 1426,
106
+ 48,
107
+ 1690,
108
+ 1740,
109
+ 1650,
110
+ 1824,
111
+ 1613,
112
+ 1807,
113
+ 1041,
114
+ 1778,
115
+ 719,
116
+ 1002,
117
+ 1759,
118
+ 1403,
119
+ 1766,
120
+ 1826,
121
+ 1002,
122
+ 1769,
123
+ 1661,
124
+ 1278,
125
+ 1759,
126
+ 1351,
127
+ 1638,
128
+ 1740,
129
+ 1395,
130
+ 1722,
131
+ 1765,
132
+ 1751,
133
+ 1461,
134
+ 1492
135
+ ]
136
+ },
137
+ {
138
+ "word": "and",
139
+ "duration": 0.14,
140
+ "codes": [
141
+ 1056,
142
+ 1494,
143
+ 1389,
144
+ 1002,
145
+ 1452,
146
+ 1413,
147
+ 1345,
148
+ 1401,
149
+ 1593,
150
+ 1073,
151
+ 775
152
+ ]
153
+ },
154
+ {
155
+ "word": "i",
156
+ "duration": 0.08,
157
+ "codes": [
158
+ 1812,
159
+ 547,
160
+ 1581,
161
+ 1468,
162
+ 949,
163
+ 1740
164
+ ]
165
+ },
166
+ {
167
+ "word": "was",
168
+ "duration": 0.16,
169
+ "codes": [
170
+ 1662,
171
+ 1542,
172
+ 363,
173
+ 1374,
174
+ 1598,
175
+ 1563,
176
+ 1394,
177
+ 473,
178
+ 863,
179
+ 1587,
180
+ 1685,
181
+ 1729
182
+ ]
183
+ },
184
+ {
185
+ "word": "like",
186
+ "duration": 0.28,
187
+ "codes": [
188
+ 1407,
189
+ 1444,
190
+ 1286,
191
+ 1506,
192
+ 1366,
193
+ 1286,
194
+ 1013,
195
+ 502,
196
+ 631,
197
+ 1449,
198
+ 1374,
199
+ 1711,
200
+ 1413,
201
+ 1660,
202
+ 1679,
203
+ 1783,
204
+ 1772,
205
+ 1723,
206
+ 1549,
207
+ 1674,
208
+ 1388
209
+ ]
210
+ },
211
+ {
212
+ "word": "mister",
213
+ "duration": 0.84,
214
+ "codes": [
215
+ 1591,
216
+ 1765,
217
+ 1653,
218
+ 1549,
219
+ 1449,
220
+ 1341,
221
+ 473,
222
+ 1363,
223
+ 1605,
224
+ 1554,
225
+ 1387,
226
+ 1641,
227
+ 1439,
228
+ 362,
229
+ 1606,
230
+ 319,
231
+ 1691,
232
+ 1582,
233
+ 1617,
234
+ 1756,
235
+ 1286,
236
+ 1409,
237
+ 1221,
238
+ 1372,
239
+ 1584,
240
+ 794,
241
+ 1636,
242
+ 1488,
243
+ 1280,
244
+ 1366,
245
+ 1753,
246
+ 1636,
247
+ 882,
248
+ 1723,
249
+ 1796,
250
+ 1769,
251
+ 1717,
252
+ 1549,
253
+ 1518,
254
+ 1633,
255
+ 175,
256
+ 1678,
257
+ 1679,
258
+ 1549,
259
+ 1732,
260
+ 1710,
261
+ 1662,
262
+ 1744,
263
+ 1641,
264
+ 1696,
265
+ 1565,
266
+ 1769,
267
+ 1789,
268
+ 719,
269
+ 1831,
270
+ 1786,
271
+ 1451,
272
+ 1728,
273
+ 1646,
274
+ 1713,
275
+ 1672,
276
+ 1774,
277
+ 1734
278
+ ]
279
+ },
280
+ {
281
+ "word": "so",
282
+ "duration": 0.14,
283
+ "codes": [
284
+ 1354,
285
+ 1518,
286
+ 1791,
287
+ 1374,
288
+ 277,
289
+ 1542,
290
+ 1366,
291
+ 700,
292
+ 1444,
293
+ 1744,
294
+ 1217
295
+ ]
296
+ },
297
+ {
298
+ "word": "this",
299
+ "duration": 0.2,
300
+ "codes": [
301
+ 1461,
302
+ 1588,
303
+ 1672,
304
+ 1712,
305
+ 1679,
306
+ 175,
307
+ 63,
308
+ 426,
309
+ 293,
310
+ 1654,
311
+ 57,
312
+ 1616,
313
+ 1394,
314
+ 1789,
315
+ 175
316
+ ]
317
+ },
318
+ {
319
+ "word": "is",
320
+ "duration": 0.06,
321
+ "codes": [
322
+ 1394,
323
+ 1605,
324
+ 1596,
325
+ 1800,
326
+ 269
327
+ ]
328
+ },
329
+ {
330
+ "word": "what",
331
+ "duration": 0.16,
332
+ "codes": [
333
+ 1706,
334
+ 759,
335
+ 1047,
336
+ 1493,
337
+ 637,
338
+ 1723,
339
+ 1772,
340
+ 1748,
341
+ 1634,
342
+ 4,
343
+ 1387,
344
+ 1710
345
+ ]
346
+ },
347
+ {
348
+ "word": "you",
349
+ "duration": 0.1,
350
+ "codes": [
351
+ 890,
352
+ 1374,
353
+ 1019,
354
+ 848,
355
+ 1415,
356
+ 1341,
357
+ 1073
358
+ ]
359
+ },
360
+ {
361
+ "word": "are",
362
+ "duration": 0.1,
363
+ "codes": [
364
+ 1286,
365
+ 127,
366
+ 949,
367
+ 870,
368
+ 1734,
369
+ 1593,
370
+ 1761,
371
+ 1717
372
+ ]
373
+ },
374
+ {
375
+ "word": "doing",
376
+ "duration": 0.22,
377
+ "codes": [
378
+ 1643,
379
+ 1485,
380
+ 1708,
381
+ 1394,
382
+ 1469,
383
+ 348,
384
+ 1676,
385
+ 1685,
386
+ 428,
387
+ 1584,
388
+ 1695,
389
+ 1596,
390
+ 1613,
391
+ 1286,
392
+ 1787,
393
+ 1374
394
+ ]
395
+ },
396
+ {
397
+ "word": "with",
398
+ "duration": 0.36,
399
+ "codes": [
400
+ 1382,
401
+ 615,
402
+ 1127,
403
+ 1742,
404
+ 1591,
405
+ 239,
406
+ 1810,
407
+ 1778,
408
+ 719,
409
+ 1616,
410
+ 1549,
411
+ 519,
412
+ 1804,
413
+ 1416,
414
+ 1636,
415
+ 1584,
416
+ 1437,
417
+ 1698,
418
+ 1625,
419
+ 1494,
420
+ 1633,
421
+ 1545,
422
+ 1747,
423
+ 1737,
424
+ 1672,
425
+ 1646,
426
+ 1778
427
+ ]
428
+ }
429
+ ]
430
+ }
yarngpt/default_speakers/jude.json ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "know what I'm saying what I'm saying is that if you say",
3
+ "words": [
4
+ {
5
+ "word": "know",
6
+ "duration": 0.44,
7
+ "codes": [
8
+ 1824,
9
+ 1820,
10
+ 1743,
11
+ 1819,
12
+ 1171,
13
+ 1796,
14
+ 1613,
15
+ 1126,
16
+ 1500,
17
+ 1346,
18
+ 1429,
19
+ 1810,
20
+ 1655,
21
+ 1462,
22
+ 1780,
23
+ 1812,
24
+ 1518,
25
+ 1431,
26
+ 741,
27
+ 1206,
28
+ 1325,
29
+ 1392,
30
+ 920,
31
+ 409,
32
+ 4,
33
+ 1270,
34
+ 416,
35
+ 1759,
36
+ 1141,
37
+ 708,
38
+ 1022,
39
+ 1769,
40
+ 1384
41
+ ]
42
+ },
43
+ {
44
+ "word": "what",
45
+ "duration": 0.12,
46
+ "codes": [
47
+ 607,
48
+ 787,
49
+ 48,
50
+ 1350,
51
+ 1340,
52
+ 297,
53
+ 364,
54
+ 825,
55
+ 1775
56
+ ]
57
+ },
58
+ {
59
+ "word": "im",
60
+ "duration": 0.1,
61
+ "codes": [
62
+ 1668,
63
+ 1311,
64
+ 1651,
65
+ 1048,
66
+ 176,
67
+ 430,
68
+ 333
69
+ ]
70
+ },
71
+ {
72
+ "word": "saying",
73
+ "duration": 0.56,
74
+ "codes": [
75
+ 822,
76
+ 648,
77
+ 1568,
78
+ 1660,
79
+ 1071,
80
+ 1399,
81
+ 890,
82
+ 1396,
83
+ 1381,
84
+ 1818,
85
+ 124,
86
+ 1623,
87
+ 361,
88
+ 1588,
89
+ 1688,
90
+ 1280,
91
+ 1805,
92
+ 1659,
93
+ 1605,
94
+ 1412,
95
+ 1672,
96
+ 1752,
97
+ 1741,
98
+ 1514,
99
+ 1817,
100
+ 1796,
101
+ 1763,
102
+ 1790,
103
+ 1595,
104
+ 1788,
105
+ 1823,
106
+ 758,
107
+ 1466,
108
+ 1802,
109
+ 1788,
110
+ 1649,
111
+ 1614,
112
+ 1751,
113
+ 1718,
114
+ 1585,
115
+ 1637,
116
+ 1773
117
+ ]
118
+ },
119
+ {
120
+ "word": "what",
121
+ "duration": 0.12,
122
+ "codes": [
123
+ 1666,
124
+ 1680,
125
+ 1431,
126
+ 411,
127
+ 1687,
128
+ 695,
129
+ 1629,
130
+ 1678,
131
+ 664,
132
+ 1087
133
+ ]
134
+ },
135
+ {
136
+ "word": "im",
137
+ "duration": 0.16,
138
+ "codes": [
139
+ 117,
140
+ 408,
141
+ 1813,
142
+ 1729,
143
+ 1336,
144
+ 1710,
145
+ 1833,
146
+ 1615,
147
+ 276,
148
+ 362,
149
+ 1364,
150
+ 687
151
+ ]
152
+ },
153
+ {
154
+ "word": "saying",
155
+ "duration": 0.26,
156
+ "codes": [
157
+ 28,
158
+ 440,
159
+ 1376,
160
+ 1196,
161
+ 1147,
162
+ 1636,
163
+ 1272,
164
+ 1449,
165
+ 198,
166
+ 1277,
167
+ 1470,
168
+ 1485,
169
+ 1100,
170
+ 1588,
171
+ 1673,
172
+ 1620,
173
+ 1710,
174
+ 1753,
175
+ 806
176
+ ]
177
+ },
178
+ {
179
+ "word": "is",
180
+ "duration": 0.06,
181
+ "codes": [
182
+ 1621,
183
+ 1636,
184
+ 1833,
185
+ 529,
186
+ 1653
187
+ ]
188
+ },
189
+ {
190
+ "word": "that",
191
+ "duration": 0.24,
192
+ "codes": [
193
+ 1773,
194
+ 1004,
195
+ 1796,
196
+ 907,
197
+ 239,
198
+ 1804,
199
+ 565,
200
+ 1432,
201
+ 1534,
202
+ 1718,
203
+ 1643,
204
+ 1432,
205
+ 1447,
206
+ 1273,
207
+ 1824,
208
+ 1657,
209
+ 1776,
210
+ 1651
211
+ ]
212
+ },
213
+ {
214
+ "word": "if",
215
+ "duration": 0.12,
216
+ "codes": [
217
+ 1649,
218
+ 1620,
219
+ 1342,
220
+ 176,
221
+ 1773,
222
+ 178,
223
+ 1710,
224
+ 1710,
225
+ 1521
226
+ ]
227
+ },
228
+ {
229
+ "word": "you",
230
+ "duration": 0.16,
231
+ "codes": [
232
+ 959,
233
+ 1728,
234
+ 1651,
235
+ 361,
236
+ 822,
237
+ 1661,
238
+ 1341,
239
+ 780,
240
+ 1518,
241
+ 335,
242
+ 452,
243
+ 736
244
+ ]
245
+ },
246
+ {
247
+ "word": "say",
248
+ "duration": 0.14,
249
+ "codes": [
250
+ 372,
251
+ 1217,
252
+ 713,
253
+ 848,
254
+ 1140,
255
+ 1420,
256
+ 1549,
257
+ 483,
258
+ 125,
259
+ 1353
260
+ ]
261
+ }
262
+ ]
263
+ }
yarngpt/default_speakers/onye.json ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "out to another level also going through in the shop chop scotch bonnet peppers",
3
+ "words": [
4
+ {
5
+ "word": "out",
6
+ "duration": 0.34,
7
+ "codes": [
8
+ 546,
9
+ 416,
10
+ 1519,
11
+ 1673,
12
+ 1806,
13
+ 1015,
14
+ 693,
15
+ 1447,
16
+ 9,
17
+ 1306,
18
+ 1485,
19
+ 1477,
20
+ 1178,
21
+ 1543,
22
+ 1830,
23
+ 1558,
24
+ 1801,
25
+ 1423,
26
+ 1487,
27
+ 1165,
28
+ 1743,
29
+ 1726,
30
+ 1772,
31
+ 368,
32
+ 1555
33
+ ]
34
+ },
35
+ {
36
+ "word": "to",
37
+ "duration": 0.28,
38
+ "codes": [
39
+ 1823,
40
+ 1713,
41
+ 1734,
42
+ 368,
43
+ 1547,
44
+ 1741,
45
+ 1737,
46
+ 1784,
47
+ 1801,
48
+ 1732,
49
+ 1389,
50
+ 994,
51
+ 1158,
52
+ 1278,
53
+ 1800,
54
+ 1658,
55
+ 519,
56
+ 1542,
57
+ 1792,
58
+ 1700,
59
+ 1415
60
+ ]
61
+ },
62
+ {
63
+ "word": "another",
64
+ "duration": 0.4,
65
+ "codes": [
66
+ 1541,
67
+ 1824,
68
+ 1624,
69
+ 1757,
70
+ 1294,
71
+ 1734,
72
+ 1756,
73
+ 1821,
74
+ 1147,
75
+ 1663,
76
+ 1697,
77
+ 1156,
78
+ 1069,
79
+ 53,
80
+ 1223,
81
+ 1212,
82
+ 1736,
83
+ 1748,
84
+ 1744,
85
+ 758,
86
+ 1494,
87
+ 374,
88
+ 1187,
89
+ 1448,
90
+ 1410,
91
+ 1356,
92
+ 1732,
93
+ 1452,
94
+ 1295,
95
+ 1656
96
+ ]
97
+ },
98
+ {
99
+ "word": "level",
100
+ "duration": 1.86,
101
+ "codes": [
102
+ 1688,
103
+ 1527,
104
+ 1417,
105
+ 1486,
106
+ 384,
107
+ 1378,
108
+ 1342,
109
+ 1075,
110
+ 1046,
111
+ 1247,
112
+ 1660,
113
+ 1525,
114
+ 719,
115
+ 1769,
116
+ 1628,
117
+ 1810,
118
+ 1078,
119
+ 1429,
120
+ 1483,
121
+ 1280,
122
+ 1814,
123
+ 1115,
124
+ 184,
125
+ 1014,
126
+ 1686,
127
+ 1341,
128
+ 1347,
129
+ 1502,
130
+ 1350,
131
+ 1666,
132
+ 1686,
133
+ 1823,
134
+ 1749,
135
+ 1412,
136
+ 1651,
137
+ 1832,
138
+ 1701,
139
+ 1782,
140
+ 1741,
141
+ 1798,
142
+ 1828,
143
+ 1701,
144
+ 1796,
145
+ 1807,
146
+ 1701,
147
+ 1768,
148
+ 1817,
149
+ 1524,
150
+ 1786,
151
+ 1400,
152
+ 1717,
153
+ 1722,
154
+ 1773,
155
+ 1202,
156
+ 1098,
157
+ 1161,
158
+ 1750,
159
+ 822,
160
+ 1420,
161
+ 1434,
162
+ 979,
163
+ 1764,
164
+ 1313,
165
+ 1734,
166
+ 1458,
167
+ 1660,
168
+ 1200,
169
+ 370,
170
+ 1636,
171
+ 1186,
172
+ 768,
173
+ 855,
174
+ 599,
175
+ 1632,
176
+ 1164,
177
+ 1041,
178
+ 1791,
179
+ 1714,
180
+ 368,
181
+ 1715,
182
+ 1500,
183
+ 1817,
184
+ 1817,
185
+ 1772,
186
+ 1805,
187
+ 1825,
188
+ 1818,
189
+ 1828,
190
+ 1395,
191
+ 1718,
192
+ 1818,
193
+ 0,
194
+ 1696,
195
+ 1808,
196
+ 1637,
197
+ 1796,
198
+ 1701,
199
+ 1796,
200
+ 1824,
201
+ 1646,
202
+ 1702,
203
+ 1714,
204
+ 895,
205
+ 1764,
206
+ 1637,
207
+ 1717,
208
+ 1747,
209
+ 1751,
210
+ 1696,
211
+ 639,
212
+ 1436,
213
+ 1828,
214
+ 1818,
215
+ 1737,
216
+ 1832,
217
+ 1646,
218
+ 1796,
219
+ 1822,
220
+ 1741,
221
+ 1791,
222
+ 1701,
223
+ 1796,
224
+ 1779,
225
+ 1638,
226
+ 1783,
227
+ 1751,
228
+ 1781,
229
+ 1768,
230
+ 1412,
231
+ 1744,
232
+ 1720,
233
+ 1403,
234
+ 1802,
235
+ 1638,
236
+ 1734,
237
+ 1802,
238
+ 1826,
239
+ 1785,
240
+ 1443,
241
+ 1167
242
+ ]
243
+ },
244
+ {
245
+ "word": "also",
246
+ "duration": 0.26,
247
+ "codes": [
248
+ 973,
249
+ 1187,
250
+ 1333,
251
+ 359,
252
+ 1494,
253
+ 1222,
254
+ 1759,
255
+ 749,
256
+ 533,
257
+ 4,
258
+ 1599,
259
+ 1608,
260
+ 1280,
261
+ 1167,
262
+ 1015,
263
+ 1526,
264
+ 1662,
265
+ 1728,
266
+ 1016,
267
+ 1796
268
+ ]
269
+ },
270
+ {
271
+ "word": "going",
272
+ "duration": 0.26,
273
+ "codes": [
274
+ 1789,
275
+ 1291,
276
+ 1209,
277
+ 828,
278
+ 1452,
279
+ 1749,
280
+ 1052,
281
+ 1460,
282
+ 1783,
283
+ 1656,
284
+ 1542,
285
+ 1281,
286
+ 1710,
287
+ 1716,
288
+ 1404,
289
+ 1734,
290
+ 495,
291
+ 1624,
292
+ 1747
293
+ ]
294
+ },
295
+ {
296
+ "word": "through",
297
+ "duration": 0.34,
298
+ "codes": [
299
+ 1465,
300
+ 1664,
301
+ 1786,
302
+ 231,
303
+ 1826,
304
+ 1318,
305
+ 1494,
306
+ 1505,
307
+ 1063,
308
+ 1311,
309
+ 1656,
310
+ 1265,
311
+ 1720,
312
+ 1226,
313
+ 940,
314
+ 1490,
315
+ 1447,
316
+ 1730,
317
+ 1348,
318
+ 1637,
319
+ 1118,
320
+ 1710,
321
+ 841,
322
+ 795,
323
+ 298,
324
+ 1216
325
+ ]
326
+ },
327
+ {
328
+ "word": "in",
329
+ "duration": 0.42,
330
+ "codes": [
331
+ 899,
332
+ 1240,
333
+ 869,
334
+ 679,
335
+ 1343,
336
+ 1280,
337
+ 1681,
338
+ 1221,
339
+ 1632,
340
+ 1221,
341
+ 1479,
342
+ 1431,
343
+ 1623,
344
+ 1372,
345
+ 1722,
346
+ 1494,
347
+ 1011,
348
+ 1636,
349
+ 957,
350
+ 1661,
351
+ 939,
352
+ 1772,
353
+ 1096,
354
+ 1688,
355
+ 1537,
356
+ 1360,
357
+ 1734,
358
+ 1595,
359
+ 1781,
360
+ 1284,
361
+ 1413
362
+ ]
363
+ },
364
+ {
365
+ "word": "the",
366
+ "duration": 1.08,
367
+ "codes": [
368
+ 1701,
369
+ 1447,
370
+ 1328,
371
+ 1690,
372
+ 1281,
373
+ 1401,
374
+ 700,
375
+ 1295,
376
+ 1494,
377
+ 1326,
378
+ 1218,
379
+ 361,
380
+ 922,
381
+ 1210,
382
+ 1300,
383
+ 19,
384
+ 1403,
385
+ 1272,
386
+ 1150,
387
+ 1062,
388
+ 1457,
389
+ 1344,
390
+ 1167,
391
+ 1742,
392
+ 996,
393
+ 1158,
394
+ 1245,
395
+ 1210,
396
+ 1720,
397
+ 1823,
398
+ 85,
399
+ 1829,
400
+ 1555,
401
+ 1718,
402
+ 979,
403
+ 1665,
404
+ 1783,
405
+ 1088,
406
+ 1810,
407
+ 1828,
408
+ 1795,
409
+ 1419,
410
+ 1795,
411
+ 1826,
412
+ 1779,
413
+ 1741,
414
+ 1719,
415
+ 1809,
416
+ 1646,
417
+ 1765,
418
+ 1818,
419
+ 1713,
420
+ 1821,
421
+ 1737,
422
+ 1348,
423
+ 1821,
424
+ 1400,
425
+ 1748,
426
+ 1278,
427
+ 1521,
428
+ 758,
429
+ 1701,
430
+ 1798,
431
+ 1817,
432
+ 1646,
433
+ 1672,
434
+ 1825,
435
+ 1796,
436
+ 957,
437
+ 1808,
438
+ 1807,
439
+ 1833,
440
+ 1798,
441
+ 1425,
442
+ 1830,
443
+ 1037,
444
+ 1251,
445
+ 554,
446
+ 1395,
447
+ 175,
448
+ 919
449
+ ]
450
+ },
451
+ {
452
+ "word": "shop",
453
+ "duration": 0.3,
454
+ "codes": [
455
+ 1611,
456
+ 154,
457
+ 1329,
458
+ 1701,
459
+ 1677,
460
+ 1210,
461
+ 880,
462
+ 660,
463
+ 816,
464
+ 1276,
465
+ 1471,
466
+ 41,
467
+ 1779,
468
+ 1465,
469
+ 1298,
470
+ 1817,
471
+ 1777,
472
+ 1073,
473
+ 1713,
474
+ 1808,
475
+ 1818,
476
+ 1348,
477
+ 1711
478
+ ]
479
+ },
480
+ {
481
+ "word": "chop",
482
+ "duration": 0.3,
483
+ "codes": [
484
+ 1439,
485
+ 4,
486
+ 315,
487
+ 1751,
488
+ 1731,
489
+ 53,
490
+ 1184,
491
+ 1132,
492
+ 755,
493
+ 1429,
494
+ 1464,
495
+ 1483,
496
+ 1770,
497
+ 1749,
498
+ 1278,
499
+ 1769,
500
+ 1511,
501
+ 1683,
502
+ 1779,
503
+ 1660,
504
+ 183,
505
+ 1535,
506
+ 416
507
+ ]
508
+ },
509
+ {
510
+ "word": "scotch",
511
+ "duration": 0.4,
512
+ "codes": [
513
+ 1518,
514
+ 1679,
515
+ 0,
516
+ 1695,
517
+ 1682,
518
+ 1098,
519
+ 1764,
520
+ 1256,
521
+ 1808,
522
+ 1609,
523
+ 1745,
524
+ 1318,
525
+ 632,
526
+ 1197,
527
+ 271,
528
+ 1683,
529
+ 1774,
530
+ 1824,
531
+ 1783,
532
+ 1671,
533
+ 1805,
534
+ 22,
535
+ 631,
536
+ 117,
537
+ 1345,
538
+ 800,
539
+ 1707,
540
+ 1466,
541
+ 1005,
542
+ 1462
543
+ ]
544
+ },
545
+ {
546
+ "word": "bonnet",
547
+ "duration": 0.34,
548
+ "codes": [
549
+ 1677,
550
+ 1826,
551
+ 1277,
552
+ 524,
553
+ 1001,
554
+ 789,
555
+ 973,
556
+ 1509,
557
+ 1817,
558
+ 546,
559
+ 1260,
560
+ 1117,
561
+ 782,
562
+ 142,
563
+ 1455,
564
+ 947,
565
+ 1814,
566
+ 1815,
567
+ 0,
568
+ 1538,
569
+ 1766,
570
+ 1744,
571
+ 1824,
572
+ 239,
573
+ 1710
574
+ ]
575
+ },
576
+ {
577
+ "word": "peppers",
578
+ "duration": 0.5,
579
+ "codes": [
580
+ 1817,
581
+ 1287,
582
+ 1769,
583
+ 1309,
584
+ 446,
585
+ 1173,
586
+ 1183,
587
+ 375,
588
+ 1342,
589
+ 1815,
590
+ 1382,
591
+ 1685,
592
+ 1797,
593
+ 1351,
594
+ 1798,
595
+ 1631,
596
+ 749,
597
+ 1717,
598
+ 1324,
599
+ 1147,
600
+ 1186,
601
+ 955,
602
+ 577,
603
+ 1736,
604
+ 827,
605
+ 1240,
606
+ 1484,
607
+ 847,
608
+ 1661,
609
+ 1475,
610
+ 1287,
611
+ 1535,
612
+ 595,
613
+ 1286,
614
+ 1734,
615
+ 1256,
616
+ 319,
617
+ 1688
618
+ ]
619
+ }
620
+ ]
621
+ }
yarngpt/default_speakers/osagie.json ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "do Charlotte Douglas shallots be me shut up dummy Libby shallots foolish storms",
3
+ "words": [
4
+ {
5
+ "word": "do",
6
+ "duration": 1.18,
7
+ "codes": [
8
+ 1798,
9
+ 858,
10
+ 1653,
11
+ 1400,
12
+ 1441,
13
+ 1810,
14
+ 1180,
15
+ 892,
16
+ 1487,
17
+ 380,
18
+ 208,
19
+ 452,
20
+ 181,
21
+ 714,
22
+ 521,
23
+ 152,
24
+ 1180,
25
+ 2,
26
+ 142,
27
+ 756,
28
+ 208,
29
+ 874,
30
+ 380,
31
+ 565,
32
+ 422,
33
+ 656,
34
+ 81,
35
+ 860,
36
+ 146,
37
+ 1042,
38
+ 1685,
39
+ 1580,
40
+ 50,
41
+ 137,
42
+ 132,
43
+ 170,
44
+ 1633,
45
+ 648,
46
+ 1819,
47
+ 898,
48
+ 1247,
49
+ 1646,
50
+ 1491,
51
+ 438,
52
+ 85,
53
+ 46,
54
+ 170,
55
+ 664,
56
+ 2,
57
+ 236,
58
+ 65,
59
+ 100,
60
+ 393,
61
+ 324,
62
+ 170,
63
+ 1499,
64
+ 1619,
65
+ 519,
66
+ 123,
67
+ 798,
68
+ 79,
69
+ 1447,
70
+ 132,
71
+ 146,
72
+ 779,
73
+ 380,
74
+ 221,
75
+ 1588,
76
+ 228,
77
+ 1443,
78
+ 152,
79
+ 1366,
80
+ 1441,
81
+ 189,
82
+ 320,
83
+ 1387,
84
+ 368,
85
+ 1599,
86
+ 295,
87
+ 65,
88
+ 1353,
89
+ 13,
90
+ 920,
91
+ 1341,
92
+ 55,
93
+ 315,
94
+ 1542,
95
+ 315
96
+ ]
97
+ },
98
+ {
99
+ "word": "charlotte",
100
+ "duration": 0.42,
101
+ "codes": [
102
+ 543,
103
+ 769,
104
+ 69,
105
+ 714,
106
+ 725,
107
+ 212,
108
+ 374,
109
+ 1439,
110
+ 25,
111
+ 1453,
112
+ 637,
113
+ 291,
114
+ 1212,
115
+ 106,
116
+ 1671,
117
+ 146,
118
+ 82,
119
+ 1261,
120
+ 1710,
121
+ 686,
122
+ 1571,
123
+ 213,
124
+ 298,
125
+ 510,
126
+ 452,
127
+ 1396,
128
+ 1635,
129
+ 1760,
130
+ 1469,
131
+ 1793,
132
+ 1233,
133
+ 851
134
+ ]
135
+ },
136
+ {
137
+ "word": "douglas",
138
+ "duration": 0.42,
139
+ "codes": [
140
+ 1539,
141
+ 2,
142
+ 679,
143
+ 51,
144
+ 215,
145
+ 1068,
146
+ 295,
147
+ 115,
148
+ 1150,
149
+ 753,
150
+ 1806,
151
+ 287,
152
+ 85,
153
+ 725,
154
+ 1312,
155
+ 293,
156
+ 614,
157
+ 1610,
158
+ 380,
159
+ 260,
160
+ 1014,
161
+ 104,
162
+ 777,
163
+ 1697,
164
+ 270,
165
+ 580,
166
+ 794,
167
+ 1345,
168
+ 1552,
169
+ 7,
170
+ 178
171
+ ]
172
+ },
173
+ {
174
+ "word": "shallots",
175
+ "duration": 0.48,
176
+ "codes": [
177
+ 315,
178
+ 290,
179
+ 333,
180
+ 1761,
181
+ 412,
182
+ 520,
183
+ 125,
184
+ 367,
185
+ 1001,
186
+ 700,
187
+ 1258,
188
+ 955,
189
+ 388,
190
+ 880,
191
+ 324,
192
+ 637,
193
+ 642,
194
+ 1723,
195
+ 1480,
196
+ 990,
197
+ 507,
198
+ 652,
199
+ 69,
200
+ 1670,
201
+ 1073,
202
+ 1433,
203
+ 830,
204
+ 1737,
205
+ 1769,
206
+ 1829,
207
+ 1524,
208
+ 1605,
209
+ 1737,
210
+ 1660,
211
+ 1782,
212
+ 1687,
213
+ 1802
214
+ ]
215
+ },
216
+ {
217
+ "word": "be",
218
+ "duration": 0.16,
219
+ "codes": [
220
+ 1715,
221
+ 687,
222
+ 1365,
223
+ 49,
224
+ 98,
225
+ 357,
226
+ 1416,
227
+ 245,
228
+ 1058,
229
+ 870,
230
+ 1689,
231
+ 1588
232
+ ]
233
+ },
234
+ {
235
+ "word": "me",
236
+ "duration": 0.36,
237
+ "codes": [
238
+ 1469,
239
+ 1221,
240
+ 1783,
241
+ 127,
242
+ 372,
243
+ 519,
244
+ 98,
245
+ 50,
246
+ 1439,
247
+ 876,
248
+ 362,
249
+ 1439,
250
+ 1506,
251
+ 1452,
252
+ 736,
253
+ 1740,
254
+ 1715,
255
+ 1641,
256
+ 1628,
257
+ 1807,
258
+ 1654,
259
+ 1601,
260
+ 911,
261
+ 788,
262
+ 1451,
263
+ 356,
264
+ 1450
265
+ ]
266
+ },
267
+ {
268
+ "word": "shut",
269
+ "duration": 0.34,
270
+ "codes": [
271
+ 202,
272
+ 543,
273
+ 1527,
274
+ 1345,
275
+ 105,
276
+ 721,
277
+ 128,
278
+ 571,
279
+ 1180,
280
+ 1366,
281
+ 1187,
282
+ 860,
283
+ 1113,
284
+ 1089,
285
+ 270,
286
+ 113,
287
+ 525,
288
+ 992,
289
+ 1588,
290
+ 975,
291
+ 668,
292
+ 780,
293
+ 399,
294
+ 233,
295
+ 510
296
+ ]
297
+ },
298
+ {
299
+ "word": "up",
300
+ "duration": 0.1,
301
+ "codes": [
302
+ 1715,
303
+ 1833,
304
+ 1719,
305
+ 363,
306
+ 1763,
307
+ 1784,
308
+ 1765,
309
+ 85
310
+ ]
311
+ },
312
+ {
313
+ "word": "dummy",
314
+ "duration": 0.36,
315
+ "codes": [
316
+ 101,
317
+ 47,
318
+ 1127,
319
+ 205,
320
+ 164,
321
+ 647,
322
+ 300,
323
+ 737,
324
+ 300,
325
+ 910,
326
+ 549,
327
+ 1598,
328
+ 333,
329
+ 900,
330
+ 1521,
331
+ 1287,
332
+ 917,
333
+ 362,
334
+ 290,
335
+ 1353,
336
+ 917,
337
+ 407,
338
+ 1588,
339
+ 1396,
340
+ 1415,
341
+ 440,
342
+ 1565
343
+ ]
344
+ },
345
+ {
346
+ "word": "libby",
347
+ "duration": 0.36,
348
+ "codes": [
349
+ 935,
350
+ 479,
351
+ 153,
352
+ 127,
353
+ 162,
354
+ 782,
355
+ 932,
356
+ 1023,
357
+ 1262,
358
+ 343,
359
+ 1728,
360
+ 502,
361
+ 1401,
362
+ 996,
363
+ 350,
364
+ 1445,
365
+ 856,
366
+ 298,
367
+ 48,
368
+ 1698,
369
+ 1470,
370
+ 1736,
371
+ 26,
372
+ 1342,
373
+ 328,
374
+ 372,
375
+ 1451
376
+ ]
377
+ },
378
+ {
379
+ "word": "shallots",
380
+ "duration": 0.4,
381
+ "codes": [
382
+ 7,
383
+ 50,
384
+ 519,
385
+ 1221,
386
+ 212,
387
+ 238,
388
+ 1083,
389
+ 844,
390
+ 333,
391
+ 182,
392
+ 472,
393
+ 839,
394
+ 609,
395
+ 656,
396
+ 208,
397
+ 291,
398
+ 1234,
399
+ 1678,
400
+ 1151,
401
+ 867,
402
+ 290,
403
+ 546,
404
+ 848,
405
+ 1700,
406
+ 1740,
407
+ 26,
408
+ 1617,
409
+ 1238,
410
+ 183,
411
+ 1693
412
+ ]
413
+ },
414
+ {
415
+ "word": "foolish",
416
+ "duration": 0.38,
417
+ "codes": [
418
+ 863,
419
+ 176,
420
+ 1546,
421
+ 1470,
422
+ 1435,
423
+ 716,
424
+ 1460,
425
+ 1013,
426
+ 217,
427
+ 1374,
428
+ 736,
429
+ 91,
430
+ 959,
431
+ 767,
432
+ 1678,
433
+ 1541,
434
+ 903,
435
+ 362,
436
+ 1336,
437
+ 1345,
438
+ 546,
439
+ 848,
440
+ 253,
441
+ 335,
442
+ 510,
443
+ 69,
444
+ 546,
445
+ 1166,
446
+ 1677
447
+ ]
448
+ },
449
+ {
450
+ "word": "storms",
451
+ "duration": 0.4,
452
+ "codes": [
453
+ 939,
454
+ 1361,
455
+ 1719,
456
+ 1428,
457
+ 1691,
458
+ 319,
459
+ 1596,
460
+ 236,
461
+ 757,
462
+ 1625,
463
+ 123,
464
+ 1297,
465
+ 55,
466
+ 132,
467
+ 708,
468
+ 92,
469
+ 1344,
470
+ 848,
471
+ 1232,
472
+ 518,
473
+ 695,
474
+ 1726,
475
+ 1502,
476
+ 1759,
477
+ 363,
478
+ 1751,
479
+ 1524,
480
+ 409,
481
+ 189,
482
+ 0
483
+ ]
484
+ }
485
+ ]
486
+ }
yarngpt/default_speakers/regina.json ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "was just like is that what is amazing to you your marriage is",
3
+ "words": [
4
+ {
5
+ "word": "was",
6
+ "duration": 1.02,
7
+ "codes": [
8
+ 1514,
9
+ 571,
10
+ 892,
11
+ 386,
12
+ 186,
13
+ 1403,
14
+ 1082,
15
+ 636,
16
+ 851,
17
+ 1287,
18
+ 1678,
19
+ 1166,
20
+ 162,
21
+ 1345,
22
+ 282,
23
+ 104,
24
+ 1345,
25
+ 329,
26
+ 637,
27
+ 844,
28
+ 537,
29
+ 1366,
30
+ 537,
31
+ 282,
32
+ 1485,
33
+ 537,
34
+ 637,
35
+ 844,
36
+ 537,
37
+ 1710,
38
+ 375,
39
+ 452,
40
+ 1588,
41
+ 537,
42
+ 1382,
43
+ 714,
44
+ 206,
45
+ 333,
46
+ 330,
47
+ 344,
48
+ 281,
49
+ 1523,
50
+ 44,
51
+ 1557,
52
+ 315,
53
+ 479,
54
+ 271,
55
+ 370,
56
+ 110,
57
+ 498,
58
+ 768,
59
+ 560,
60
+ 579,
61
+ 847,
62
+ 961,
63
+ 293,
64
+ 1351,
65
+ 1141,
66
+ 138,
67
+ 1229,
68
+ 2,
69
+ 847,
70
+ 1245,
71
+ 1345,
72
+ 1829,
73
+ 1811,
74
+ 1326,
75
+ 955,
76
+ 1314,
77
+ 137,
78
+ 270,
79
+ 1743,
80
+ 324,
81
+ 1389,
82
+ 1027,
83
+ 863
84
+ ]
85
+ },
86
+ {
87
+ "word": "just",
88
+ "duration": 0.28,
89
+ "codes": [
90
+ 333,
91
+ 38,
92
+ 1518,
93
+ 1296,
94
+ 146,
95
+ 1077,
96
+ 1204,
97
+ 665,
98
+ 658,
99
+ 1005,
100
+ 944,
101
+ 1136,
102
+ 519,
103
+ 749,
104
+ 1061,
105
+ 69,
106
+ 1363,
107
+ 415,
108
+ 1679,
109
+ 1741,
110
+ 138
111
+ ]
112
+ },
113
+ {
114
+ "word": "like",
115
+ "duration": 1.68,
116
+ "codes": [
117
+ 1796,
118
+ 714,
119
+ 65,
120
+ 13,
121
+ 664,
122
+ 1077,
123
+ 463,
124
+ 232,
125
+ 461,
126
+ 1210,
127
+ 356,
128
+ 346,
129
+ 1196,
130
+ 202,
131
+ 631,
132
+ 1804,
133
+ 1096,
134
+ 450,
135
+ 23,
136
+ 1535,
137
+ 415,
138
+ 582,
139
+ 328,
140
+ 546,
141
+ 1571,
142
+ 344,
143
+ 1512,
144
+ 1242,
145
+ 141,
146
+ 194,
147
+ 220,
148
+ 258,
149
+ 246,
150
+ 220,
151
+ 246,
152
+ 542,
153
+ 258,
154
+ 246,
155
+ 220,
156
+ 151,
157
+ 246,
158
+ 542,
159
+ 342,
160
+ 220,
161
+ 75,
162
+ 246,
163
+ 220,
164
+ 246,
165
+ 542,
166
+ 246,
167
+ 220,
168
+ 542,
169
+ 161,
170
+ 450,
171
+ 419,
172
+ 246,
173
+ 542,
174
+ 246,
175
+ 542,
176
+ 246,
177
+ 220,
178
+ 542,
179
+ 246,
180
+ 246,
181
+ 542,
182
+ 246,
183
+ 542,
184
+ 342,
185
+ 542,
186
+ 342,
187
+ 246,
188
+ 542,
189
+ 342,
190
+ 220,
191
+ 75,
192
+ 246,
193
+ 75,
194
+ 246,
195
+ 542,
196
+ 246,
197
+ 220,
198
+ 75,
199
+ 161,
200
+ 542,
201
+ 342,
202
+ 220,
203
+ 258,
204
+ 246,
205
+ 220,
206
+ 75,
207
+ 342,
208
+ 220,
209
+ 258,
210
+ 194,
211
+ 220,
212
+ 436,
213
+ 246,
214
+ 220,
215
+ 194,
216
+ 194,
217
+ 1442,
218
+ 246,
219
+ 220,
220
+ 246,
221
+ 246,
222
+ 246,
223
+ 151,
224
+ 1551,
225
+ 1522,
226
+ 1362,
227
+ 652,
228
+ 1557,
229
+ 333,
230
+ 273,
231
+ 928,
232
+ 1551,
233
+ 180,
234
+ 1570,
235
+ 652,
236
+ 1664,
237
+ 6,
238
+ 654,
239
+ 281,
240
+ 1578,
241
+ 1557,
242
+ 1346,
243
+ 756
244
+ ]
245
+ },
246
+ {
247
+ "word": "is",
248
+ "duration": 0.06,
249
+ "codes": [
250
+ 1337,
251
+ 1662,
252
+ 198,
253
+ 33
254
+ ]
255
+ },
256
+ {
257
+ "word": "that",
258
+ "duration": 0.12,
259
+ "codes": [
260
+ 1679,
261
+ 236,
262
+ 934,
263
+ 1056,
264
+ 208,
265
+ 609,
266
+ 860,
267
+ 1318,
268
+ 1340
269
+ ]
270
+ },
271
+ {
272
+ "word": "what",
273
+ "duration": 0.14,
274
+ "codes": [
275
+ 1618,
276
+ 806,
277
+ 1068,
278
+ 113,
279
+ 1686,
280
+ 428,
281
+ 230,
282
+ 409,
283
+ 263,
284
+ 415,
285
+ 175
286
+ ]
287
+ },
288
+ {
289
+ "word": "is",
290
+ "duration": 0.1,
291
+ "codes": [
292
+ 415,
293
+ 1773,
294
+ 1539,
295
+ 124,
296
+ 1563,
297
+ 700,
298
+ 579
299
+ ]
300
+ },
301
+ {
302
+ "word": "amazing",
303
+ "duration": 0.34,
304
+ "codes": [
305
+ 973,
306
+ 695,
307
+ 1247,
308
+ 1737,
309
+ 1609,
310
+ 1664,
311
+ 1006,
312
+ 134,
313
+ 409,
314
+ 416,
315
+ 774,
316
+ 848,
317
+ 1542,
318
+ 10,
319
+ 1441,
320
+ 1539,
321
+ 129,
322
+ 1698,
323
+ 687,
324
+ 1620,
325
+ 1340,
326
+ 749,
327
+ 469,
328
+ 1695,
329
+ 448,
330
+ 448
331
+ ]
332
+ },
333
+ {
334
+ "word": "to",
335
+ "duration": 0.12,
336
+ "codes": [
337
+ 189,
338
+ 198,
339
+ 124,
340
+ 1753,
341
+ 510,
342
+ 1825,
343
+ 856,
344
+ 1441,
345
+ 1688
346
+ ]
347
+ },
348
+ {
349
+ "word": "you",
350
+ "duration": 1.62,
351
+ "codes": [
352
+ 1552,
353
+ 1546,
354
+ 1698,
355
+ 166,
356
+ 101,
357
+ 1457,
358
+ 137,
359
+ 864,
360
+ 790,
361
+ 794,
362
+ 1615,
363
+ 454,
364
+ 1512,
365
+ 328,
366
+ 634,
367
+ 1578,
368
+ 409,
369
+ 1592,
370
+ 176,
371
+ 1441,
372
+ 1644,
373
+ 356,
374
+ 1641,
375
+ 1580,
376
+ 510,
377
+ 1609,
378
+ 407,
379
+ 882,
380
+ 1580,
381
+ 218,
382
+ 1616,
383
+ 865,
384
+ 409,
385
+ 1570,
386
+ 1376,
387
+ 1734,
388
+ 34,
389
+ 687,
390
+ 1592,
391
+ 556,
392
+ 640,
393
+ 1592,
394
+ 6,
395
+ 1362,
396
+ 4,
397
+ 1546,
398
+ 1302,
399
+ 1376,
400
+ 1570,
401
+ 34,
402
+ 652,
403
+ 180,
404
+ 1569,
405
+ 203,
406
+ 1744,
407
+ 282,
408
+ 945,
409
+ 362,
410
+ 931,
411
+ 1662,
412
+ 631,
413
+ 1580,
414
+ 452,
415
+ 329,
416
+ 725,
417
+ 140,
418
+ 277,
419
+ 1113,
420
+ 537,
421
+ 1332,
422
+ 560,
423
+ 282,
424
+ 1056,
425
+ 270,
426
+ 940,
427
+ 755,
428
+ 860,
429
+ 104,
430
+ 903,
431
+ 537,
432
+ 1310,
433
+ 579,
434
+ 282,
435
+ 848,
436
+ 371,
437
+ 844,
438
+ 1808,
439
+ 400,
440
+ 1772,
441
+ 1166,
442
+ 213,
443
+ 1485,
444
+ 1502,
445
+ 276,
446
+ 1594,
447
+ 1599,
448
+ 1819,
449
+ 1197,
450
+ 441,
451
+ 1318,
452
+ 1237,
453
+ 679,
454
+ 1186,
455
+ 384,
456
+ 609,
457
+ 637,
458
+ 157,
459
+ 609,
460
+ 637,
461
+ 157,
462
+ 790,
463
+ 157,
464
+ 547,
465
+ 452,
466
+ 452,
467
+ 870,
468
+ 162,
469
+ 320,
470
+ 1649,
471
+ 1272,
472
+ 1318,
473
+ 860
474
+ ]
475
+ },
476
+ {
477
+ "word": "your",
478
+ "duration": 0.16,
479
+ "codes": [
480
+ 1477,
481
+ 67,
482
+ 113,
483
+ 1149,
484
+ 479,
485
+ 901,
486
+ 1232,
487
+ 295,
488
+ 9,
489
+ 1129,
490
+ 67,
491
+ 1825
492
+ ]
493
+ },
494
+ {
495
+ "word": "marriage",
496
+ "duration": 0.8,
497
+ "codes": [
498
+ 529,
499
+ 697,
500
+ 695,
501
+ 1429,
502
+ 282,
503
+ 626,
504
+ 1355,
505
+ 192,
506
+ 1671,
507
+ 100,
508
+ 95,
509
+ 1310,
510
+ 388,
511
+ 1155,
512
+ 1494,
513
+ 104,
514
+ 104,
515
+ 587,
516
+ 1156,
517
+ 67,
518
+ 57,
519
+ 1437,
520
+ 697,
521
+ 714,
522
+ 1221,
523
+ 1443,
524
+ 2,
525
+ 1357,
526
+ 931,
527
+ 931,
528
+ 1298,
529
+ 388,
530
+ 1136,
531
+ 1604,
532
+ 428,
533
+ 1240,
534
+ 1698,
535
+ 65,
536
+ 1272,
537
+ 128,
538
+ 755,
539
+ 79,
540
+ 794,
541
+ 1698,
542
+ 1518,
543
+ 1546,
544
+ 1696,
545
+ 448,
546
+ 233,
547
+ 1599,
548
+ 1732,
549
+ 1240,
550
+ 110,
551
+ 775,
552
+ 483,
553
+ 100,
554
+ 1075,
555
+ 346,
556
+ 863,
557
+ 1498
558
+ ]
559
+ },
560
+ {
561
+ "word": "is",
562
+ "duration": 0.1,
563
+ "codes": [
564
+ 631,
565
+ 18,
566
+ 679,
567
+ 430,
568
+ 176,
569
+ 10,
570
+ 52
571
+ ]
572
+ }
573
+ ]
574
+ }
yarngpt/default_speakers/remi.json ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "animal noral human being",
3
+ "words": [
4
+ {
5
+ "word": "animal",
6
+ "duration": 2.79,
7
+ "codes": [
8
+ 1679,
9
+ 1711,
10
+ 714,
11
+ 1588,
12
+ 906,
13
+ 725,
14
+ 789,
15
+ 456,
16
+ 79,
17
+ 230,
18
+ 1127,
19
+ 532,
20
+ 200,
21
+ 834,
22
+ 29,
23
+ 753,
24
+ 1420,
25
+ 595,
26
+ 997,
27
+ 557,
28
+ 205,
29
+ 488,
30
+ 775,
31
+ 63,
32
+ 1520,
33
+ 1600,
34
+ 1394,
35
+ 1811,
36
+ 1715,
37
+ 473,
38
+ 805,
39
+ 128,
40
+ 502,
41
+ 1353,
42
+ 1636,
43
+ 1832,
44
+ 182,
45
+ 381,
46
+ 281,
47
+ 1540,
48
+ 748,
49
+ 1341,
50
+ 1744,
51
+ 374,
52
+ 1767,
53
+ 182,
54
+ 621,
55
+ 495,
56
+ 234,
57
+ 909,
58
+ 1383,
59
+ 92,
60
+ 1545,
61
+ 1394,
62
+ 1794,
63
+ 1641,
64
+ 319,
65
+ 1452,
66
+ 1240,
67
+ 217,
68
+ 1815,
69
+ 388,
70
+ 828,
71
+ 1664,
72
+ 184,
73
+ 1239,
74
+ 319,
75
+ 1469,
76
+ 1810,
77
+ 36,
78
+ 1019,
79
+ 1451,
80
+ 774,
81
+ 1819,
82
+ 1521,
83
+ 761,
84
+ 23,
85
+ 1609,
86
+ 273,
87
+ 52,
88
+ 1670,
89
+ 524,
90
+ 813,
91
+ 806,
92
+ 79,
93
+ 1141,
94
+ 1677,
95
+ 138,
96
+ 1409,
97
+ 1468,
98
+ 1633,
99
+ 1573,
100
+ 782,
101
+ 1655,
102
+ 1669,
103
+ 1239,
104
+ 458,
105
+ 1495,
106
+ 258,
107
+ 544,
108
+ 1532,
109
+ 1567,
110
+ 1627,
111
+ 1641,
112
+ 851,
113
+ 1573,
114
+ 1569,
115
+ 265,
116
+ 686,
117
+ 72,
118
+ 151,
119
+ 342,
120
+ 194,
121
+ 75,
122
+ 419,
123
+ 342,
124
+ 542,
125
+ 419,
126
+ 75,
127
+ 342,
128
+ 246,
129
+ 75,
130
+ 342,
131
+ 246,
132
+ 56,
133
+ 161,
134
+ 246,
135
+ 442,
136
+ 161,
137
+ 56,
138
+ 156,
139
+ 420,
140
+ 161,
141
+ 75,
142
+ 219,
143
+ 194,
144
+ 56,
145
+ 156,
146
+ 220,
147
+ 453,
148
+ 156,
149
+ 1019,
150
+ 490,
151
+ 1415,
152
+ 742,
153
+ 1533,
154
+ 412,
155
+ 828,
156
+ 138,
157
+ 1487,
158
+ 128,
159
+ 660,
160
+ 1339,
161
+ 882,
162
+ 154,
163
+ 1533,
164
+ 47,
165
+ 312,
166
+ 730,
167
+ 1087,
168
+ 764,
169
+ 346,
170
+ 1394,
171
+ 179,
172
+ 959,
173
+ 1344,
174
+ 324,
175
+ 1457,
176
+ 388,
177
+ 57,
178
+ 514,
179
+ 1323,
180
+ 631,
181
+ 6,
182
+ 479,
183
+ 815,
184
+ 1599,
185
+ 384,
186
+ 952,
187
+ 1650,
188
+ 57,
189
+ 314,
190
+ 320,
191
+ 787,
192
+ 1488,
193
+ 147,
194
+ 203,
195
+ 1078,
196
+ 192,
197
+ 1663,
198
+ 236,
199
+ 1501,
200
+ 270,
201
+ 1280,
202
+ 716,
203
+ 631,
204
+ 1584,
205
+ 1605,
206
+ 1779,
207
+ 1239,
208
+ 363,
209
+ 1437,
210
+ 430,
211
+ 1554,
212
+ 1069,
213
+ 189,
214
+ 319,
215
+ 856,
216
+ 143
217
+ ]
218
+ },
219
+ {
220
+ "word": "noral",
221
+ "duration": 0.56,
222
+ "codes": [
223
+ 1831,
224
+ 201,
225
+ 1674,
226
+ 1707,
227
+ 1807,
228
+ 487,
229
+ 1577,
230
+ 1394,
231
+ 1341,
232
+ 412,
233
+ 814,
234
+ 205,
235
+ 1633,
236
+ 79,
237
+ 1267,
238
+ 1625,
239
+ 315,
240
+ 1649,
241
+ 4,
242
+ 780,
243
+ 368,
244
+ 592,
245
+ 1633,
246
+ 592,
247
+ 1431,
248
+ 1563,
249
+ 599,
250
+ 176,
251
+ 10,
252
+ 725,
253
+ 1468,
254
+ 76,
255
+ 593,
256
+ 714,
257
+ 146,
258
+ 974,
259
+ 725,
260
+ 549,
261
+ 57,
262
+ 1068,
263
+ 1729,
264
+ 52
265
+ ]
266
+ },
267
+ {
268
+ "word": "human",
269
+ "duration": 0.82,
270
+ "codes": [
271
+ 1552,
272
+ 233,
273
+ 298,
274
+ 949,
275
+ 1636,
276
+ 380,
277
+ 363,
278
+ 1520,
279
+ 1768,
280
+ 85,
281
+ 483,
282
+ 876,
283
+ 125,
284
+ 153,
285
+ 564,
286
+ 200,
287
+ 1221,
288
+ 803,
289
+ 1712,
290
+ 117,
291
+ 804,
292
+ 688,
293
+ 787,
294
+ 1345,
295
+ 592,
296
+ 291,
297
+ 472,
298
+ 158,
299
+ 132,
300
+ 1827,
301
+ 617,
302
+ 157,
303
+ 36,
304
+ 1186,
305
+ 1008,
306
+ 324,
307
+ 961,
308
+ 644,
309
+ 179,
310
+ 931,
311
+ 1400,
312
+ 688,
313
+ 1015,
314
+ 488,
315
+ 532,
316
+ 500,
317
+ 952,
318
+ 945,
319
+ 29,
320
+ 1497,
321
+ 529,
322
+ 749,
323
+ 1733,
324
+ 439,
325
+ 63,
326
+ 1773,
327
+ 1527,
328
+ 1622,
329
+ 728,
330
+ 1613,
331
+ 1274,
332
+ 136
333
+ ]
334
+ },
335
+ {
336
+ "word": "being",
337
+ "duration": 0.54,
338
+ "codes": [
339
+ 546,
340
+ 1287,
341
+ 166,
342
+ 315,
343
+ 1678,
344
+ 882,
345
+ 1753,
346
+ 1018,
347
+ 1449,
348
+ 1581,
349
+ 298,
350
+ 1710,
351
+ 1799,
352
+ 1772,
353
+ 1406,
354
+ 1538,
355
+ 1728,
356
+ 1657,
357
+ 1778,
358
+ 182,
359
+ 921,
360
+ 217,
361
+ 1615,
362
+ 133,
363
+ 217,
364
+ 1516,
365
+ 1830,
366
+ 844,
367
+ 1584,
368
+ 338,
369
+ 1639,
370
+ 644,
371
+ 417,
372
+ 774,
373
+ 1724,
374
+ 648,
375
+ 749,
376
+ 4,
377
+ 315,
378
+ 1497
379
+ ]
380
+ }
381
+ ]
382
+ }
yarngpt/default_speakers/saheed.json ADDED
@@ -0,0 +1,564 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Hello! My name is Saheed azeez and I am testing the audio feature",
3
+ "words": [
4
+ {
5
+ "word": "hello",
6
+ "duration": 2.38,
7
+ "codes": [
8
+ 219,
9
+ 244,
10
+ 244,
11
+ 167,
12
+ 453,
13
+ 453,
14
+ 453,
15
+ 453,
16
+ 453,
17
+ 453,
18
+ 453,
19
+ 453,
20
+ 453,
21
+ 453,
22
+ 453,
23
+ 453,
24
+ 453,
25
+ 453,
26
+ 453,
27
+ 453,
28
+ 453,
29
+ 453,
30
+ 453,
31
+ 453,
32
+ 453,
33
+ 453,
34
+ 453,
35
+ 453,
36
+ 453,
37
+ 453,
38
+ 453,
39
+ 453,
40
+ 453,
41
+ 453,
42
+ 244,
43
+ 219,
44
+ 139,
45
+ 966,
46
+ 1099,
47
+ 1299,
48
+ 1433,
49
+ 1128,
50
+ 1266,
51
+ 1517,
52
+ 649,
53
+ 196,
54
+ 1731,
55
+ 1405,
56
+ 830,
57
+ 1771,
58
+ 964,
59
+ 476,
60
+ 1803,
61
+ 584,
62
+ 875,
63
+ 1683,
64
+ 986,
65
+ 363,
66
+ 1489,
67
+ 465,
68
+ 5,
69
+ 1067,
70
+ 606,
71
+ 1590,
72
+ 1397,
73
+ 265,
74
+ 1446,
75
+ 1279,
76
+ 799,
77
+ 1491,
78
+ 1367,
79
+ 606,
80
+ 1593,
81
+ 1279,
82
+ 360,
83
+ 256,
84
+ 1705,
85
+ 1425,
86
+ 58,
87
+ 1210,
88
+ 1357,
89
+ 1379,
90
+ 752,
91
+ 1640,
92
+ 837,
93
+ 734,
94
+ 1787,
95
+ 1406,
96
+ 1052,
97
+ 1796,
98
+ 686,
99
+ 1446,
100
+ 1716,
101
+ 564,
102
+ 595,
103
+ 1716,
104
+ 728,
105
+ 847,
106
+ 732,
107
+ 935,
108
+ 1253,
109
+ 752,
110
+ 1019,
111
+ 1455,
112
+ 564,
113
+ 1492,
114
+ 733,
115
+ 1645,
116
+ 1391,
117
+ 728,
118
+ 1501,
119
+ 1822,
120
+ 1339,
121
+ 1677,
122
+ 1456,
123
+ 807,
124
+ 1738,
125
+ 710,
126
+ 1381,
127
+ 1292,
128
+ 406,
129
+ 1517,
130
+ 1458,
131
+ 761,
132
+ 1361,
133
+ 649,
134
+ 17,
135
+ 1367,
136
+ 606,
137
+ 1771,
138
+ 1028,
139
+ 464,
140
+ 1309,
141
+ 691,
142
+ 1023,
143
+ 1314,
144
+ 692,
145
+ 1373,
146
+ 837,
147
+ 442,
148
+ 1683,
149
+ 838,
150
+ 476,
151
+ 1475,
152
+ 950,
153
+ 136,
154
+ 1309,
155
+ 465,
156
+ 17,
157
+ 19,
158
+ 765,
159
+ 1553,
160
+ 1305,
161
+ 534,
162
+ 1309,
163
+ 666,
164
+ 761,
165
+ 1067,
166
+ 442,
167
+ 1704,
168
+ 1128,
169
+ 633,
170
+ 1438,
171
+ 1011,
172
+ 406,
173
+ 1489,
174
+ 136,
175
+ 1813,
176
+ 1589,
177
+ 763,
178
+ 1489,
179
+ 696,
180
+ 643,
181
+ 1305,
182
+ 246,
183
+ 406,
184
+ 1421,
185
+ 37
186
+ ]
187
+ },
188
+ {
189
+ "word": "my",
190
+ "duration": 0.2,
191
+ "codes": [
192
+ 1187,
193
+ 1770,
194
+ 646,
195
+ 1174,
196
+ 1771,
197
+ 1192,
198
+ 800,
199
+ 310,
200
+ 1318,
201
+ 1500,
202
+ 909,
203
+ 1104,
204
+ 1792,
205
+ 1218,
206
+ 1832
207
+ ]
208
+ },
209
+ {
210
+ "word": "name",
211
+ "duration": 0.24,
212
+ "codes": [
213
+ 875,
214
+ 1583,
215
+ 1632,
216
+ 671,
217
+ 1002,
218
+ 905,
219
+ 1073,
220
+ 1294,
221
+ 595,
222
+ 1684,
223
+ 1501,
224
+ 1797,
225
+ 850,
226
+ 1761,
227
+ 1751,
228
+ 935,
229
+ 1443,
230
+ 1781
231
+ ]
232
+ },
233
+ {
234
+ "word": "is",
235
+ "duration": 0.14,
236
+ "codes": [
237
+ 1780,
238
+ 1215,
239
+ 1674,
240
+ 1815,
241
+ 1451,
242
+ 1673,
243
+ 1303,
244
+ 1660,
245
+ 1613,
246
+ 1379,
247
+ 1756
248
+ ]
249
+ },
250
+ {
251
+ "word": "saheed",
252
+ "duration": 0.68,
253
+ "codes": [
254
+ 1419,
255
+ 1568,
256
+ 1643,
257
+ 1099,
258
+ 1795,
259
+ 970,
260
+ 1184,
261
+ 1498,
262
+ 877,
263
+ 1162,
264
+ 902,
265
+ 1537,
266
+ 1192,
267
+ 1565,
268
+ 1472,
269
+ 1109,
270
+ 1225,
271
+ 1321,
272
+ 1453,
273
+ 1654,
274
+ 1274,
275
+ 1811,
276
+ 1695,
277
+ 946,
278
+ 1631,
279
+ 1590,
280
+ 1152,
281
+ 820,
282
+ 272,
283
+ 1458,
284
+ 1378,
285
+ 240,
286
+ 1421,
287
+ 174,
288
+ 925,
289
+ 1126,
290
+ 1346,
291
+ 1600,
292
+ 1716,
293
+ 258,
294
+ 1611,
295
+ 442,
296
+ 625,
297
+ 1448,
298
+ 246,
299
+ 957,
300
+ 226,
301
+ 338,
302
+ 1190,
303
+ 921,
304
+ 1505
305
+ ]
306
+ },
307
+ {
308
+ "word": "azeez",
309
+ "duration": 0.8,
310
+ "codes": [
311
+ 1195,
312
+ 646,
313
+ 1505,
314
+ 1014,
315
+ 250,
316
+ 837,
317
+ 729,
318
+ 121,
319
+ 1715,
320
+ 1446,
321
+ 1430,
322
+ 1608,
323
+ 1575,
324
+ 1057,
325
+ 1643,
326
+ 1514,
327
+ 1795,
328
+ 893,
329
+ 1718,
330
+ 1383,
331
+ 840,
332
+ 1802,
333
+ 426,
334
+ 1414,
335
+ 1573,
336
+ 1784,
337
+ 1285,
338
+ 852,
339
+ 1246,
340
+ 896,
341
+ 1744,
342
+ 1299,
343
+ 495,
344
+ 1796,
345
+ 1570,
346
+ 1665,
347
+ 505,
348
+ 888,
349
+ 1654,
350
+ 343,
351
+ 1120,
352
+ 1474,
353
+ 16,
354
+ 1035,
355
+ 505,
356
+ 1699,
357
+ 862,
358
+ 692,
359
+ 1623,
360
+ 633,
361
+ 566,
362
+ 1037,
363
+ 342,
364
+ 950,
365
+ 261,
366
+ 729,
367
+ 1317,
368
+ 177,
369
+ 1213,
370
+ 1333
371
+ ]
372
+ },
373
+ {
374
+ "word": "and",
375
+ "duration": 0.34,
376
+ "codes": [
377
+ 908,
378
+ 1203,
379
+ 1683,
380
+ 926,
381
+ 1278,
382
+ 564,
383
+ 1067,
384
+ 1003,
385
+ 90,
386
+ 459,
387
+ 568,
388
+ 272,
389
+ 1117,
390
+ 1396,
391
+ 1411,
392
+ 1233,
393
+ 193,
394
+ 1197,
395
+ 970,
396
+ 1065,
397
+ 1611,
398
+ 883,
399
+ 1216,
400
+ 1776,
401
+ 747
402
+ ]
403
+ },
404
+ {
405
+ "word": "i",
406
+ "duration": 0.06,
407
+ "codes": [
408
+ 924,
409
+ 1628,
410
+ 988,
411
+ 1116,
412
+ 1388
413
+ ]
414
+ },
415
+ {
416
+ "word": "am",
417
+ "duration": 0.18,
418
+ "codes": [
419
+ 1199,
420
+ 1188,
421
+ 593,
422
+ 953,
423
+ 459,
424
+ 272,
425
+ 869,
426
+ 1321,
427
+ 145,
428
+ 1306,
429
+ 272,
430
+ 406,
431
+ 1479
432
+ ]
433
+ },
434
+ {
435
+ "word": "testing",
436
+ "duration": 0.44,
437
+ "codes": [
438
+ 237,
439
+ 1003,
440
+ 1638,
441
+ 638,
442
+ 1180,
443
+ 1666,
444
+ 811,
445
+ 1178,
446
+ 1565,
447
+ 814,
448
+ 1211,
449
+ 1654,
450
+ 1779,
451
+ 1313,
452
+ 1619,
453
+ 1684,
454
+ 1230,
455
+ 419,
456
+ 891,
457
+ 28,
458
+ 1231,
459
+ 1379,
460
+ 729,
461
+ 1682,
462
+ 338,
463
+ 1468,
464
+ 136,
465
+ 1630,
466
+ 1215,
467
+ 251,
468
+ 1464,
469
+ 781,
470
+ 598
471
+ ]
472
+ },
473
+ {
474
+ "word": "the",
475
+ "duration": 0.22,
476
+ "codes": [
477
+ 555,
478
+ 692,
479
+ 663,
480
+ 1632,
481
+ 905,
482
+ 807,
483
+ 1085,
484
+ 752,
485
+ 1433,
486
+ 392,
487
+ 921,
488
+ 1820,
489
+ 363,
490
+ 987,
491
+ 1328,
492
+ 734,
493
+ 1063
494
+ ]
495
+ },
496
+ {
497
+ "word": "audio",
498
+ "duration": 0.34,
499
+ "codes": [
500
+ 1294,
501
+ 814,
502
+ 1423,
503
+ 1750,
504
+ 747,
505
+ 672,
506
+ 651,
507
+ 250,
508
+ 1478,
509
+ 37,
510
+ 1760,
511
+ 1021,
512
+ 850,
513
+ 58,
514
+ 438,
515
+ 953,
516
+ 1668,
517
+ 771,
518
+ 729,
519
+ 1456,
520
+ 322,
521
+ 591,
522
+ 1474,
523
+ 1440,
524
+ 1170
525
+ ]
526
+ },
527
+ {
528
+ "word": "feature",
529
+ "duration": 0.4,
530
+ "codes": [
531
+ 332,
532
+ 1333,
533
+ 1146,
534
+ 1025,
535
+ 19,
536
+ 501,
537
+ 169,
538
+ 1250,
539
+ 734,
540
+ 1629,
541
+ 1383,
542
+ 355,
543
+ 1747,
544
+ 584,
545
+ 237,
546
+ 1428,
547
+ 240,
548
+ 1298,
549
+ 999,
550
+ 1338,
551
+ 1438,
552
+ 1727,
553
+ 987,
554
+ 1455,
555
+ 792,
556
+ 932,
557
+ 1199,
558
+ 355,
559
+ 1185,
560
+ 772
561
+ ]
562
+ }
563
+ ]
564
+ }
yarngpt/default_speakers/tayo.json ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "and enjoy ourselves we need more parties let party start again now we know",
3
+ "words": [
4
+ {
5
+ "word": "and",
6
+ "duration": 0.5,
7
+ "codes": [
8
+ 82,
9
+ 1201,
10
+ 329,
11
+ 992,
12
+ 908,
13
+ 847,
14
+ 925,
15
+ 1666,
16
+ 1057,
17
+ 1266,
18
+ 1448,
19
+ 1737,
20
+ 1251,
21
+ 1031,
22
+ 1759,
23
+ 1459,
24
+ 1094,
25
+ 1750,
26
+ 1739,
27
+ 1521,
28
+ 594,
29
+ 1625,
30
+ 732,
31
+ 1326,
32
+ 1095,
33
+ 828,
34
+ 239,
35
+ 752,
36
+ 1221,
37
+ 1382,
38
+ 705,
39
+ 1716,
40
+ 865,
41
+ 1503,
42
+ 478,
43
+ 1692,
44
+ 938
45
+ ]
46
+ },
47
+ {
48
+ "word": "enjoy",
49
+ "duration": 0.4,
50
+ "codes": [
51
+ 844,
52
+ 192,
53
+ 737,
54
+ 344,
55
+ 276,
56
+ 138,
57
+ 48,
58
+ 1616,
59
+ 28,
60
+ 1530,
61
+ 1550,
62
+ 1383,
63
+ 1712,
64
+ 69,
65
+ 1261,
66
+ 547,
67
+ 249,
68
+ 1047,
69
+ 500,
70
+ 182,
71
+ 63,
72
+ 1445,
73
+ 935,
74
+ 865,
75
+ 1478,
76
+ 1670,
77
+ 479,
78
+ 116,
79
+ 1674,
80
+ 886
81
+ ]
82
+ },
83
+ {
84
+ "word": "ourselves",
85
+ "duration": 0.7,
86
+ "codes": [
87
+ 467,
88
+ 1534,
89
+ 901,
90
+ 569,
91
+ 1740,
92
+ 882,
93
+ 1579,
94
+ 507,
95
+ 276,
96
+ 1296,
97
+ 543,
98
+ 399,
99
+ 404,
100
+ 1624,
101
+ 1666,
102
+ 153,
103
+ 102,
104
+ 1323,
105
+ 1552,
106
+ 65,
107
+ 898,
108
+ 1577,
109
+ 757,
110
+ 1446,
111
+ 1022,
112
+ 363,
113
+ 124,
114
+ 947,
115
+ 1441,
116
+ 581,
117
+ 1677,
118
+ 1269,
119
+ 1525,
120
+ 1170,
121
+ 505,
122
+ 1681,
123
+ 1212,
124
+ 1273,
125
+ 1364,
126
+ 1513,
127
+ 1826,
128
+ 1139,
129
+ 1756,
130
+ 639,
131
+ 1450,
132
+ 1810,
133
+ 1638,
134
+ 1644,
135
+ 1669,
136
+ 1519,
137
+ 851,
138
+ 1362,
139
+ 1672
140
+ ]
141
+ },
142
+ {
143
+ "word": "we",
144
+ "duration": 0.1,
145
+ "codes": [
146
+ 875,
147
+ 1558,
148
+ 1249,
149
+ 1445,
150
+ 181,
151
+ 738,
152
+ 1641
153
+ ]
154
+ },
155
+ {
156
+ "word": "need",
157
+ "duration": 0.14,
158
+ "codes": [
159
+ 1603,
160
+ 177,
161
+ 195,
162
+ 65,
163
+ 1600,
164
+ 104,
165
+ 143,
166
+ 1574,
167
+ 1416,
168
+ 160,
169
+ 50
170
+ ]
171
+ },
172
+ {
173
+ "word": "more",
174
+ "duration": 0.18,
175
+ "codes": [
176
+ 48,
177
+ 1597,
178
+ 39,
179
+ 1414,
180
+ 74,
181
+ 1192,
182
+ 84,
183
+ 1345,
184
+ 748,
185
+ 1269,
186
+ 1672,
187
+ 686,
188
+ 1820,
189
+ 1442
190
+ ]
191
+ },
192
+ {
193
+ "word": "parties",
194
+ "duration": 0.56,
195
+ "codes": [
196
+ 1640,
197
+ 1030,
198
+ 138,
199
+ 147,
200
+ 413,
201
+ 110,
202
+ 282,
203
+ 1633,
204
+ 1659,
205
+ 1524,
206
+ 176,
207
+ 350,
208
+ 137,
209
+ 1004,
210
+ 92,
211
+ 1240,
212
+ 1521,
213
+ 1376,
214
+ 502,
215
+ 1558,
216
+ 592,
217
+ 473,
218
+ 1021,
219
+ 1805,
220
+ 1346,
221
+ 1393,
222
+ 1759,
223
+ 1786,
224
+ 231,
225
+ 1728,
226
+ 117,
227
+ 1366,
228
+ 1754,
229
+ 1073,
230
+ 1786,
231
+ 1354,
232
+ 1532,
233
+ 1572,
234
+ 1754,
235
+ 16,
236
+ 257,
237
+ 273
238
+ ]
239
+ },
240
+ {
241
+ "word": "let",
242
+ "duration": 0.16,
243
+ "codes": [
244
+ 1312,
245
+ 961,
246
+ 372,
247
+ 212,
248
+ 1253,
249
+ 115,
250
+ 656,
251
+ 1374,
252
+ 78,
253
+ 1322,
254
+ 1284,
255
+ 343
256
+ ]
257
+ },
258
+ {
259
+ "word": "party",
260
+ "duration": 0.24,
261
+ "codes": [
262
+ 1572,
263
+ 1662,
264
+ 25,
265
+ 390,
266
+ 892,
267
+ 212,
268
+ 637,
269
+ 576,
270
+ 176,
271
+ 1702,
272
+ 640,
273
+ 276,
274
+ 52,
275
+ 648,
276
+ 577,
277
+ 1240,
278
+ 276,
279
+ 155
280
+ ]
281
+ },
282
+ {
283
+ "word": "start",
284
+ "duration": 0.3,
285
+ "codes": [
286
+ 213,
287
+ 356,
288
+ 1603,
289
+ 1284,
290
+ 1442,
291
+ 1599,
292
+ 705,
293
+ 82,
294
+ 65,
295
+ 764,
296
+ 349,
297
+ 370,
298
+ 856,
299
+ 1524,
300
+ 1508,
301
+ 209,
302
+ 495,
303
+ 1552,
304
+ 50,
305
+ 1588,
306
+ 863,
307
+ 63
308
+ ]
309
+ },
310
+ {
311
+ "word": "again",
312
+ "duration": 0.3,
313
+ "codes": [
314
+ 1267,
315
+ 273,
316
+ 298,
317
+ 1409,
318
+ 101,
319
+ 1548,
320
+ 733,
321
+ 625,
322
+ 1728,
323
+ 1283,
324
+ 286,
325
+ 1645,
326
+ 1363,
327
+ 368,
328
+ 153,
329
+ 289,
330
+ 716,
331
+ 1756,
332
+ 865,
333
+ 1376,
334
+ 688,
335
+ 332,
336
+ 731
337
+ ]
338
+ },
339
+ {
340
+ "word": "now",
341
+ "duration": 0.44,
342
+ "codes": [
343
+ 983,
344
+ 385,
345
+ 1002,
346
+ 806,
347
+ 1798,
348
+ 95,
349
+ 1776,
350
+ 825,
351
+ 1790,
352
+ 737,
353
+ 1595,
354
+ 907,
355
+ 932,
356
+ 1786,
357
+ 626,
358
+ 831,
359
+ 1823,
360
+ 1680,
361
+ 1780,
362
+ 1502,
363
+ 1206,
364
+ 1078,
365
+ 47,
366
+ 829,
367
+ 868,
368
+ 69,
369
+ 277,
370
+ 429,
371
+ 125,
372
+ 132,
373
+ 14,
374
+ 1497,
375
+ 444
376
+ ]
377
+ },
378
+ {
379
+ "word": "we",
380
+ "duration": 1.32,
381
+ "codes": [
382
+ 1692,
383
+ 648,
384
+ 481,
385
+ 155,
386
+ 483,
387
+ 126,
388
+ 1283,
389
+ 12,
390
+ 108,
391
+ 429,
392
+ 828,
393
+ 128,
394
+ 1161,
395
+ 725,
396
+ 155,
397
+ 107,
398
+ 1610,
399
+ 228,
400
+ 1492,
401
+ 1560,
402
+ 368,
403
+ 1138,
404
+ 810,
405
+ 1572,
406
+ 1562,
407
+ 320,
408
+ 112,
409
+ 520,
410
+ 52,
411
+ 49,
412
+ 1008,
413
+ 1635,
414
+ 1728,
415
+ 1523,
416
+ 62,
417
+ 190,
418
+ 648,
419
+ 592,
420
+ 384,
421
+ 969,
422
+ 1441,
423
+ 519,
424
+ 1536,
425
+ 1571,
426
+ 1587,
427
+ 1539,
428
+ 15,
429
+ 1156,
430
+ 376,
431
+ 1022,
432
+ 642,
433
+ 483,
434
+ 1794,
435
+ 1335,
436
+ 1712,
437
+ 1449,
438
+ 529,
439
+ 1558,
440
+ 1463,
441
+ 1559,
442
+ 1706,
443
+ 1460,
444
+ 249,
445
+ 1308,
446
+ 293,
447
+ 529,
448
+ 841,
449
+ 201,
450
+ 1256,
451
+ 931,
452
+ 132,
453
+ 1173,
454
+ 479,
455
+ 286,
456
+ 1075,
457
+ 153,
458
+ 13,
459
+ 1503,
460
+ 398,
461
+ 415,
462
+ 432,
463
+ 7,
464
+ 183,
465
+ 103,
466
+ 409,
467
+ 736,
468
+ 15,
469
+ 940,
470
+ 1459,
471
+ 15,
472
+ 1631,
473
+ 1580,
474
+ 1773,
475
+ 624,
476
+ 1417,
477
+ 926,
478
+ 531,
479
+ 1159,
480
+ 1257
481
+ ]
482
+ },
483
+ {
484
+ "word": "know",
485
+ "duration": 0.44,
486
+ "codes": [
487
+ 777,
488
+ 1240,
489
+ 446,
490
+ 303,
491
+ 153,
492
+ 263,
493
+ 1402,
494
+ 317,
495
+ 1365,
496
+ 481,
497
+ 848,
498
+ 1280,
499
+ 354,
500
+ 1415,
501
+ 245,
502
+ 408,
503
+ 462,
504
+ 466,
505
+ 253,
506
+ 943,
507
+ 472,
508
+ 215,
509
+ 143,
510
+ 519,
511
+ 202,
512
+ 1389,
513
+ 1608,
514
+ 714,
515
+ 1599,
516
+ 399,
517
+ 944,
518
+ 124,
519
+ 844
520
+ ]
521
+ }
522
+ ]
523
+ }
yarngpt/default_speakers/umar.json ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "that i'd like to share with everybody in the world yes sometimes you go all the way",
3
+ "words": [
4
+ {
5
+ "word": "that",
6
+ "duration": 0.48,
7
+ "codes": [
8
+ 519,
9
+ 848,
10
+ 1374,
11
+ 416,
12
+ 940,
13
+ 1445,
14
+ 416,
15
+ 753,
16
+ 1616,
17
+ 774,
18
+ 803,
19
+ 1697,
20
+ 1541,
21
+ 1047,
22
+ 200,
23
+ 462,
24
+ 1417,
25
+ 1313,
26
+ 1296,
27
+ 184,
28
+ 1396,
29
+ 1568,
30
+ 1416,
31
+ 1444,
32
+ 1631,
33
+ 1463,
34
+ 702,
35
+ 1831,
36
+ 1564,
37
+ 1374,
38
+ 1580,
39
+ 1643,
40
+ 1681,
41
+ 1660,
42
+ 1124,
43
+ 1720
44
+ ]
45
+ },
46
+ {
47
+ "word": "id",
48
+ "duration": 0.38,
49
+ "codes": [
50
+ 4,
51
+ 705,
52
+ 1534,
53
+ 1290,
54
+ 1661,
55
+ 302,
56
+ 1798,
57
+ 844,
58
+ 197,
59
+ 1027,
60
+ 1606,
61
+ 903,
62
+ 1414,
63
+ 794,
64
+ 871,
65
+ 882,
66
+ 941,
67
+ 1310,
68
+ 871,
69
+ 1247,
70
+ 1140,
71
+ 1247,
72
+ 718,
73
+ 1422,
74
+ 1509,
75
+ 1678,
76
+ 1093,
77
+ 1734
78
+ ]
79
+ },
80
+ {
81
+ "word": "like",
82
+ "duration": 0.18,
83
+ "codes": [
84
+ 647,
85
+ 1824,
86
+ 474,
87
+ 1111,
88
+ 599,
89
+ 221,
90
+ 1435,
91
+ 822,
92
+ 1409,
93
+ 1717,
94
+ 1748,
95
+ 1550,
96
+ 1738,
97
+ 1717
98
+ ]
99
+ },
100
+ {
101
+ "word": "to",
102
+ "duration": 0.14,
103
+ "codes": [
104
+ 1535,
105
+ 231,
106
+ 1794,
107
+ 1553,
108
+ 1351,
109
+ 1365,
110
+ 1296,
111
+ 1781,
112
+ 1599,
113
+ 1082
114
+ ]
115
+ },
116
+ {
117
+ "word": "share",
118
+ "duration": 0.18,
119
+ "codes": [
120
+ 1737,
121
+ 0,
122
+ 979,
123
+ 1688,
124
+ 546,
125
+ 1807,
126
+ 319,
127
+ 252,
128
+ 1805,
129
+ 714,
130
+ 580,
131
+ 1524,
132
+ 798,
133
+ 1779
134
+ ]
135
+ },
136
+ {
137
+ "word": "with",
138
+ "duration": 0.14,
139
+ "codes": [
140
+ 1698,
141
+ 702,
142
+ 966,
143
+ 1461,
144
+ 127,
145
+ 1681,
146
+ 85,
147
+ 1741,
148
+ 1588,
149
+ 718
150
+ ]
151
+ },
152
+ {
153
+ "word": "everybody",
154
+ "duration": 0.4,
155
+ "codes": [
156
+ 1600,
157
+ 806,
158
+ 1770,
159
+ 1078,
160
+ 1727,
161
+ 679,
162
+ 1569,
163
+ 1452,
164
+ 1685,
165
+ 774,
166
+ 1598,
167
+ 1382,
168
+ 1520,
169
+ 1786,
170
+ 1702,
171
+ 1607,
172
+ 1747,
173
+ 828,
174
+ 1553,
175
+ 983,
176
+ 1103,
177
+ 882,
178
+ 1427,
179
+ 1679,
180
+ 1613,
181
+ 1636,
182
+ 1433,
183
+ 519,
184
+ 853,
185
+ 1451
186
+ ]
187
+ },
188
+ {
189
+ "word": "in",
190
+ "duration": 0.06,
191
+ "codes": [
192
+ 1369,
193
+ 1654,
194
+ 1581,
195
+ 1600,
196
+ 1452
197
+ ]
198
+ },
199
+ {
200
+ "word": "the",
201
+ "duration": 0.12,
202
+ "codes": [
203
+ 1241,
204
+ 1769,
205
+ 678,
206
+ 1751,
207
+ 1280,
208
+ 1711,
209
+ 1663,
210
+ 1772,
211
+ 1655
212
+ ]
213
+ },
214
+ {
215
+ "word": "world",
216
+ "duration": 0.74,
217
+ "codes": [
218
+ 973,
219
+ 1231,
220
+ 1015,
221
+ 1052,
222
+ 1415,
223
+ 721,
224
+ 1822,
225
+ 825,
226
+ 1076,
227
+ 1431,
228
+ 1357,
229
+ 1389,
230
+ 744,
231
+ 1263,
232
+ 1525,
233
+ 1794,
234
+ 319,
235
+ 1678,
236
+ 1732,
237
+ 1395,
238
+ 1695,
239
+ 1827,
240
+ 1059,
241
+ 1719,
242
+ 1675,
243
+ 1714,
244
+ 1635,
245
+ 1466,
246
+ 1730,
247
+ 1750,
248
+ 1395,
249
+ 1525,
250
+ 1827,
251
+ 1313,
252
+ 1440,
253
+ 1447,
254
+ 1292,
255
+ 1762,
256
+ 1226,
257
+ 1418,
258
+ 1750,
259
+ 719,
260
+ 1549,
261
+ 1761,
262
+ 1459,
263
+ 1717,
264
+ 1800,
265
+ 1404,
266
+ 1702,
267
+ 1795,
268
+ 1711,
269
+ 1789,
270
+ 1808,
271
+ 1759,
272
+ 385,
273
+ 415
274
+ ]
275
+ },
276
+ {
277
+ "word": "yes",
278
+ "duration": 0.32,
279
+ "codes": [
280
+ 302,
281
+ 1704,
282
+ 485,
283
+ 983,
284
+ 234,
285
+ 63,
286
+ 462,
287
+ 483,
288
+ 82,
289
+ 827,
290
+ 999,
291
+ 1143,
292
+ 102,
293
+ 1655,
294
+ 117,
295
+ 1619,
296
+ 519,
297
+ 1217,
298
+ 1518,
299
+ 1476,
300
+ 333,
301
+ 1660,
302
+ 1238,
303
+ 1679
304
+ ]
305
+ },
306
+ {
307
+ "word": "sometimes",
308
+ "duration": 0.58,
309
+ "codes": [
310
+ 1287,
311
+ 546,
312
+ 1552,
313
+ 1736,
314
+ 1647,
315
+ 836,
316
+ 575,
317
+ 354,
318
+ 1156,
319
+ 1264,
320
+ 1194,
321
+ 1761,
322
+ 1629,
323
+ 1452,
324
+ 1241,
325
+ 1394,
326
+ 856,
327
+ 1313,
328
+ 1653,
329
+ 736,
330
+ 556,
331
+ 1387,
332
+ 1824,
333
+ 966,
334
+ 373,
335
+ 1424,
336
+ 1342,
337
+ 221,
338
+ 580,
339
+ 1412,
340
+ 940,
341
+ 626,
342
+ 1797,
343
+ 858,
344
+ 972,
345
+ 1525,
346
+ 1744,
347
+ 738,
348
+ 1695,
349
+ 1542,
350
+ 1604,
351
+ 1394,
352
+ 1627
353
+ ]
354
+ },
355
+ {
356
+ "word": "you",
357
+ "duration": 0.12,
358
+ "codes": [
359
+ 1460,
360
+ 546,
361
+ 1427,
362
+ 1451,
363
+ 1081,
364
+ 1760,
365
+ 1463,
366
+ 1628,
367
+ 1692
368
+ ]
369
+ },
370
+ {
371
+ "word": "go",
372
+ "duration": 0.26,
373
+ "codes": [
374
+ 1521,
375
+ 1734,
376
+ 753,
377
+ 770,
378
+ 1640,
379
+ 1757,
380
+ 297,
381
+ 462,
382
+ 702,
383
+ 1826,
384
+ 1440,
385
+ 1828,
386
+ 1747,
387
+ 1651,
388
+ 1729,
389
+ 1087,
390
+ 580,
391
+ 1698,
392
+ 1194,
393
+ 1308
394
+ ]
395
+ },
396
+ {
397
+ "word": "all",
398
+ "duration": 0.42,
399
+ "codes": [
400
+ 863,
401
+ 610,
402
+ 429,
403
+ 443,
404
+ 1087,
405
+ 183,
406
+ 782,
407
+ 613,
408
+ 222,
409
+ 1047,
410
+ 1492,
411
+ 154,
412
+ 955,
413
+ 429,
414
+ 443,
415
+ 613,
416
+ 983,
417
+ 328,
418
+ 382,
419
+ 359,
420
+ 341,
421
+ 217,
422
+ 456,
423
+ 289,
424
+ 1324,
425
+ 714,
426
+ 756,
427
+ 369,
428
+ 211,
429
+ 127,
430
+ 1827,
431
+ 1563
432
+ ]
433
+ },
434
+ {
435
+ "word": "the",
436
+ "duration": 0.12,
437
+ "codes": [
438
+ 1686,
439
+ 949,
440
+ 1296,
441
+ 829,
442
+ 1463,
443
+ 1731,
444
+ 1222,
445
+ 1353,
446
+ 1780
447
+ ]
448
+ },
449
+ {
450
+ "word": "way",
451
+ "duration": 0.18,
452
+ "codes": [
453
+ 1263,
454
+ 890,
455
+ 683,
456
+ 289,
457
+ 217,
458
+ 326,
459
+ 335,
460
+ 1059,
461
+ 1204,
462
+ 213,
463
+ 1340,
464
+ 289,
465
+ 191
466
+ ]
467
+ }
468
+ ]
469
+ }
yarngpt/default_speakers/zainab.json ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "mama giver her because she gave so",
3
+ "words": [
4
+ {
5
+ "word": "mama",
6
+ "duration": 1.46,
7
+ "codes": [
8
+ 1734,
9
+ 1812,
10
+ 1759,
11
+ 1721,
12
+ 1765,
13
+ 1769,
14
+ 1805,
15
+ 1800,
16
+ 1734,
17
+ 1380,
18
+ 1706,
19
+ 1724,
20
+ 1695,
21
+ 1769,
22
+ 1772,
23
+ 1689,
24
+ 1511,
25
+ 339,
26
+ 1077,
27
+ 1492,
28
+ 1494,
29
+ 1353,
30
+ 890,
31
+ 753,
32
+ 29,
33
+ 607,
34
+ 1812,
35
+ 1310,
36
+ 1326,
37
+ 1497,
38
+ 818,
39
+ 1716,
40
+ 1776,
41
+ 1155,
42
+ 1645,
43
+ 1545,
44
+ 1371,
45
+ 1454,
46
+ 1205,
47
+ 1464,
48
+ 703,
49
+ 1096,
50
+ 1285,
51
+ 1811,
52
+ 1494,
53
+ 738,
54
+ 1248,
55
+ 1725,
56
+ 952,
57
+ 230,
58
+ 1415,
59
+ 1691,
60
+ 1718,
61
+ 41,
62
+ 1685,
63
+ 1783,
64
+ 1092,
65
+ 1346,
66
+ 954,
67
+ 776,
68
+ 702,
69
+ 1157,
70
+ 1152,
71
+ 1768,
72
+ 572,
73
+ 1025,
74
+ 1750,
75
+ 1231,
76
+ 900,
77
+ 1764,
78
+ 1246,
79
+ 1572,
80
+ 1711,
81
+ 1534,
82
+ 1320,
83
+ 1389,
84
+ 197,
85
+ 1584,
86
+ 1019,
87
+ 1576,
88
+ 1027,
89
+ 1402,
90
+ 506,
91
+ 1402,
92
+ 617,
93
+ 1490,
94
+ 1358,
95
+ 770,
96
+ 1666,
97
+ 1025,
98
+ 921,
99
+ 1658,
100
+ 830,
101
+ 1062,
102
+ 1598,
103
+ 1095,
104
+ 1174,
105
+ 1680,
106
+ 1501,
107
+ 1332,
108
+ 1827,
109
+ 1588,
110
+ 231,
111
+ 1633,
112
+ 1591,
113
+ 736,
114
+ 1825,
115
+ 1696,
116
+ 1614
117
+ ]
118
+ },
119
+ {
120
+ "word": "giver",
121
+ "duration": 0.36,
122
+ "codes": [
123
+ 1346,
124
+ 404,
125
+ 1270,
126
+ 1389,
127
+ 1363,
128
+ 1426,
129
+ 1008,
130
+ 473,
131
+ 1341,
132
+ 1604,
133
+ 1773,
134
+ 385,
135
+ 1685,
136
+ 736,
137
+ 1778,
138
+ 1577,
139
+ 1189,
140
+ 1830,
141
+ 973,
142
+ 1192,
143
+ 1624,
144
+ 1766,
145
+ 1344,
146
+ 1542,
147
+ 1463,
148
+ 1253,
149
+ 1554
150
+ ]
151
+ },
152
+ {
153
+ "word": "her",
154
+ "duration": 1.89,
155
+ "codes": [
156
+ 1828,
157
+ 1287,
158
+ 1520,
159
+ 1671,
160
+ 1546,
161
+ 932,
162
+ 1367,
163
+ 1176,
164
+ 953,
165
+ 1225,
166
+ 1508,
167
+ 1822,
168
+ 1642,
169
+ 381,
170
+ 1003,
171
+ 1288,
172
+ 355,
173
+ 627,
174
+ 256,
175
+ 1231,
176
+ 822,
177
+ 863,
178
+ 1826,
179
+ 788,
180
+ 1786,
181
+ 1796,
182
+ 1585,
183
+ 1266,
184
+ 1236,
185
+ 1157,
186
+ 476,
187
+ 1425,
188
+ 1814,
189
+ 1488,
190
+ 1763,
191
+ 343,
192
+ 385,
193
+ 1419,
194
+ 1413,
195
+ 1537,
196
+ 1465,
197
+ 1413,
198
+ 1689,
199
+ 975,
200
+ 27,
201
+ 1804,
202
+ 1766,
203
+ 1750,
204
+ 1612,
205
+ 1293,
206
+ 1613,
207
+ 1629,
208
+ 1011,
209
+ 1572,
210
+ 1708,
211
+ 1669,
212
+ 1440,
213
+ 1598,
214
+ 1514,
215
+ 1773,
216
+ 1166,
217
+ 1769,
218
+ 923,
219
+ 1792,
220
+ 1764,
221
+ 1491,
222
+ 1807,
223
+ 1768,
224
+ 1157,
225
+ 1808,
226
+ 1491,
227
+ 1721,
228
+ 1816,
229
+ 1783,
230
+ 901,
231
+ 1468,
232
+ 1824,
233
+ 1743,
234
+ 1801,
235
+ 1745,
236
+ 1656,
237
+ 1425,
238
+ 1745,
239
+ 1775,
240
+ 1807,
241
+ 714,
242
+ 1755,
243
+ 1704,
244
+ 1661,
245
+ 1493,
246
+ 776,
247
+ 1783,
248
+ 416,
249
+ 1670,
250
+ 1406,
251
+ 1769,
252
+ 362,
253
+ 1636,
254
+ 1464,
255
+ 1651,
256
+ 1403,
257
+ 1800,
258
+ 1426,
259
+ 1831,
260
+ 1827,
261
+ 1160,
262
+ 1759,
263
+ 1720,
264
+ 1651,
265
+ 1762,
266
+ 1331,
267
+ 1746,
268
+ 1433,
269
+ 1466,
270
+ 1023,
271
+ 1425,
272
+ 1742,
273
+ 486,
274
+ 1771,
275
+ 1816,
276
+ 1301,
277
+ 1583,
278
+ 320,
279
+ 1300,
280
+ 315,
281
+ 52,
282
+ 1217,
283
+ 67,
284
+ 502,
285
+ 1485,
286
+ 848,
287
+ 1734,
288
+ 1387,
289
+ 1783,
290
+ 1626,
291
+ 920,
292
+ 361,
293
+ 1715,
294
+ 1657,
295
+ 1560,
296
+ 85,
297
+ 1562
298
+ ]
299
+ },
300
+ {
301
+ "word": "because",
302
+ "duration": 0.48,
303
+ "codes": [
304
+ 1756,
305
+ 844,
306
+ 245,
307
+ 1310,
308
+ 312,
309
+ 344,
310
+ 1734,
311
+ 1319,
312
+ 1722,
313
+ 1386,
314
+ 1230,
315
+ 461,
316
+ 1344,
317
+ 847,
318
+ 658,
319
+ 1078,
320
+ 1554,
321
+ 537,
322
+ 987,
323
+ 848,
324
+ 1055,
325
+ 840,
326
+ 1710,
327
+ 736,
328
+ 1679,
329
+ 213,
330
+ 844,
331
+ 731,
332
+ 631,
333
+ 1638,
334
+ 166,
335
+ 858,
336
+ 1535,
337
+ 50,
338
+ 1651,
339
+ 713
340
+ ]
341
+ },
342
+ {
343
+ "word": "she",
344
+ "duration": 0.38,
345
+ "codes": [
346
+ 556,
347
+ 1735,
348
+ 654,
349
+ 1524,
350
+ 1769,
351
+ 1387,
352
+ 639,
353
+ 1463,
354
+ 1625,
355
+ 1726,
356
+ 1664,
357
+ 1691,
358
+ 1531,
359
+ 1603,
360
+ 1833,
361
+ 121,
362
+ 1627,
363
+ 1757,
364
+ 736,
365
+ 1583,
366
+ 1684,
367
+ 1741,
368
+ 1831,
369
+ 1791,
370
+ 1034,
371
+ 1807,
372
+ 1338,
373
+ 1737
374
+ ]
375
+ },
376
+ {
377
+ "word": "gave",
378
+ "duration": 0.76,
379
+ "codes": [
380
+ 1790,
381
+ 430,
382
+ 1310,
383
+ 399,
384
+ 599,
385
+ 1542,
386
+ 1394,
387
+ 1075,
388
+ 834,
389
+ 428,
390
+ 1015,
391
+ 249,
392
+ 362,
393
+ 945,
394
+ 108,
395
+ 1308,
396
+ 29,
397
+ 362,
398
+ 1766,
399
+ 448,
400
+ 1370,
401
+ 197,
402
+ 298,
403
+ 1353,
404
+ 1566,
405
+ 1485,
406
+ 1341,
407
+ 1544,
408
+ 1468,
409
+ 1366,
410
+ 849,
411
+ 1584,
412
+ 1441,
413
+ 1696,
414
+ 1610,
415
+ 1702,
416
+ 702,
417
+ 1508,
418
+ 1653,
419
+ 1508,
420
+ 1535,
421
+ 502,
422
+ 1485,
423
+ 232,
424
+ 648,
425
+ 863,
426
+ 631,
427
+ 348,
428
+ 372,
429
+ 129,
430
+ 1296,
431
+ 253,
432
+ 1599,
433
+ 1364,
434
+ 315,
435
+ 920,
436
+ 18,
437
+ 183
438
+ ]
439
+ },
440
+ {
441
+ "word": "so",
442
+ "duration": 0.14,
443
+ "codes": [
444
+ 428,
445
+ 372,
446
+ 15,
447
+ 202,
448
+ 286,
449
+ 1344,
450
+ 714,
451
+ 966,
452
+ 1341,
453
+ 184
454
+ ]
455
+ }
456
+ ]
457
+ }
yarngpt/default_speakers_local/hausa_female1.json ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Idan hira tayi \u0257a\u0257i bana son na tashi.",
3
+ "words": [
4
+ {
5
+ "word": "idan",
6
+ "duration": "0.52",
7
+ "codes": [
8
+ 165,
9
+ 338,
10
+ 781,
11
+ 661,
12
+ 601,
13
+ 691,
14
+ 1154,
15
+ 762,
16
+ 691,
17
+ 523,
18
+ 641,
19
+ 378,
20
+ 1464,
21
+ 38,
22
+ 1280,
23
+ 243,
24
+ 1784,
25
+ 195,
26
+ 5,
27
+ 1679,
28
+ 77,
29
+ 530,
30
+ 1527,
31
+ 270,
32
+ 243,
33
+ 374,
34
+ 200,
35
+ 157,
36
+ 152,
37
+ 228,
38
+ 768,
39
+ 743,
40
+ 104,
41
+ 221,
42
+ 968,
43
+ 479,
44
+ 321,
45
+ 1679,
46
+ 1279
47
+ ]
48
+ },
49
+ {
50
+ "word": "hira",
51
+ "duration": "0.38",
52
+ "codes": [
53
+ 1587,
54
+ 1544,
55
+ 683,
56
+ 92,
57
+ 1255,
58
+ 46,
59
+ 106,
60
+ 636,
61
+ 320,
62
+ 53,
63
+ 249,
64
+ 123,
65
+ 1140,
66
+ 1290,
67
+ 93,
68
+ 553,
69
+ 0,
70
+ 1192,
71
+ 210,
72
+ 587,
73
+ 1184,
74
+ 764,
75
+ 215,
76
+ 221,
77
+ 2,
78
+ 1115,
79
+ 1079,
80
+ 1033
81
+ ]
82
+ },
83
+ {
84
+ "word": "tayi",
85
+ "duration": "0.38",
86
+ "codes": [
87
+ 447,
88
+ 1292,
89
+ 198,
90
+ 50,
91
+ 1439,
92
+ 1191,
93
+ 1399,
94
+ 106,
95
+ 880,
96
+ 844,
97
+ 306,
98
+ 466,
99
+ 74,
100
+ 260,
101
+ 152,
102
+ 723,
103
+ 723,
104
+ 687,
105
+ 306,
106
+ 195,
107
+ 648,
108
+ 466,
109
+ 30,
110
+ 1110,
111
+ 637,
112
+ 384,
113
+ 1131,
114
+ 342,
115
+ 392
116
+ ]
117
+ },
118
+ {
119
+ "word": "dadi",
120
+ "duration": "0.38",
121
+ "codes": [
122
+ 751,
123
+ 412,
124
+ 212,
125
+ 306,
126
+ 388,
127
+ 589,
128
+ 446,
129
+ 479,
130
+ 880,
131
+ 768,
132
+ 467,
133
+ 699,
134
+ 128,
135
+ 665,
136
+ 882,
137
+ 908,
138
+ 171,
139
+ 1146,
140
+ 1297,
141
+ 687,
142
+ 901,
143
+ 1110,
144
+ 153,
145
+ 386,
146
+ 1330,
147
+ 1283,
148
+ 1181,
149
+ 1070,
150
+ 766
151
+ ]
152
+ },
153
+ {
154
+ "word": "bana",
155
+ "duration": "0.46",
156
+ "codes": [
157
+ 534,
158
+ 1440,
159
+ 1102,
160
+ 1194,
161
+ 474,
162
+ 252,
163
+ 39,
164
+ 367,
165
+ 116,
166
+ 212,
167
+ 36,
168
+ 115,
169
+ 76,
170
+ 1173,
171
+ 931,
172
+ 1285,
173
+ 1630,
174
+ 678,
175
+ 1087,
176
+ 208,
177
+ 1055,
178
+ 441,
179
+ 545,
180
+ 324,
181
+ 1192,
182
+ 179,
183
+ 1147,
184
+ 897,
185
+ 1387,
186
+ 1283,
187
+ 10,
188
+ 1,
189
+ 654,
190
+ 863,
191
+ 103
192
+ ]
193
+ },
194
+ {
195
+ "word": "son",
196
+ "duration": "0.22",
197
+ "codes": [
198
+ 198,
199
+ 507,
200
+ 1477,
201
+ 915,
202
+ 215,
203
+ 267,
204
+ 1232,
205
+ 1041,
206
+ 569,
207
+ 1596,
208
+ 1759,
209
+ 229,
210
+ 901,
211
+ 1774,
212
+ 1487,
213
+ 51
214
+ ]
215
+ },
216
+ {
217
+ "word": "na",
218
+ "duration": "0.16",
219
+ "codes": [
220
+ 251,
221
+ 243,
222
+ 965,
223
+ 215,
224
+ 135,
225
+ 711,
226
+ 105,
227
+ 1350,
228
+ 1556,
229
+ 226,
230
+ 459,
231
+ 68
232
+ ]
233
+ },
234
+ {
235
+ "word": "tashi",
236
+ "duration": "0.42",
237
+ "codes": [
238
+ 20,
239
+ 502,
240
+ 610,
241
+ 179,
242
+ 711,
243
+ 800,
244
+ 424,
245
+ 352,
246
+ 102,
247
+ 569,
248
+ 67,
249
+ 262,
250
+ 855,
251
+ 413,
252
+ 63,
253
+ 701,
254
+ 1719,
255
+ 262,
256
+ 383,
257
+ 1166,
258
+ 358,
259
+ 1331,
260
+ 596,
261
+ 383,
262
+ 1351,
263
+ 96,
264
+ 1170,
265
+ 1061,
266
+ 1059,
267
+ 1392,
268
+ 328,
269
+ 1471
270
+ ]
271
+ }
272
+ ]
273
+ }
yarngpt/default_speakers_local/hausa_female2.json ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Idan hira tayi \u0257a\u0257i bana son na tashi.",
3
+ "words": [
4
+ {
5
+ "word": "idan",
6
+ "duration": "0.52",
7
+ "codes": [
8
+ 165,
9
+ 338,
10
+ 781,
11
+ 661,
12
+ 601,
13
+ 691,
14
+ 1154,
15
+ 762,
16
+ 691,
17
+ 523,
18
+ 641,
19
+ 378,
20
+ 1464,
21
+ 38,
22
+ 1280,
23
+ 243,
24
+ 1784,
25
+ 195,
26
+ 5,
27
+ 1679,
28
+ 77,
29
+ 530,
30
+ 1527,
31
+ 270,
32
+ 243,
33
+ 374,
34
+ 200,
35
+ 157,
36
+ 152,
37
+ 228,
38
+ 768,
39
+ 743,
40
+ 104,
41
+ 221,
42
+ 968,
43
+ 479,
44
+ 321,
45
+ 1679,
46
+ 1279
47
+ ]
48
+ },
49
+ {
50
+ "word": "hira",
51
+ "duration": "0.38",
52
+ "codes": [
53
+ 1587,
54
+ 1544,
55
+ 683,
56
+ 92,
57
+ 1255,
58
+ 46,
59
+ 106,
60
+ 636,
61
+ 320,
62
+ 53,
63
+ 249,
64
+ 123,
65
+ 1140,
66
+ 1290,
67
+ 93,
68
+ 553,
69
+ 0,
70
+ 1192,
71
+ 210,
72
+ 587,
73
+ 1184,
74
+ 764,
75
+ 215,
76
+ 221,
77
+ 2,
78
+ 1115,
79
+ 1079,
80
+ 1033
81
+ ]
82
+ },
83
+ {
84
+ "word": "tayi",
85
+ "duration": "0.38",
86
+ "codes": [
87
+ 447,
88
+ 1292,
89
+ 198,
90
+ 50,
91
+ 1439,
92
+ 1191,
93
+ 1399,
94
+ 106,
95
+ 880,
96
+ 844,
97
+ 306,
98
+ 466,
99
+ 74,
100
+ 260,
101
+ 152,
102
+ 723,
103
+ 723,
104
+ 687,
105
+ 306,
106
+ 195,
107
+ 648,
108
+ 466,
109
+ 30,
110
+ 1110,
111
+ 637,
112
+ 384,
113
+ 1131,
114
+ 342,
115
+ 392
116
+ ]
117
+ },
118
+ {
119
+ "word": "dadi",
120
+ "duration": "0.38",
121
+ "codes": [
122
+ 751,
123
+ 412,
124
+ 212,
125
+ 306,
126
+ 388,
127
+ 589,
128
+ 446,
129
+ 479,
130
+ 880,
131
+ 768,
132
+ 467,
133
+ 699,
134
+ 128,
135
+ 665,
136
+ 882,
137
+ 908,
138
+ 171,
139
+ 1146,
140
+ 1297,
141
+ 687,
142
+ 901,
143
+ 1110,
144
+ 153,
145
+ 386,
146
+ 1330,
147
+ 1283,
148
+ 1181,
149
+ 1070,
150
+ 766
151
+ ]
152
+ },
153
+ {
154
+ "word": "bana",
155
+ "duration": "0.46",
156
+ "codes": [
157
+ 534,
158
+ 1440,
159
+ 1102,
160
+ 1194,
161
+ 474,
162
+ 252,
163
+ 39,
164
+ 367,
165
+ 116,
166
+ 212,
167
+ 36,
168
+ 115,
169
+ 76,
170
+ 1173,
171
+ 931,
172
+ 1285,
173
+ 1630,
174
+ 678,
175
+ 1087,
176
+ 208,
177
+ 1055,
178
+ 441,
179
+ 545,
180
+ 324,
181
+ 1192,
182
+ 179,
183
+ 1147,
184
+ 897,
185
+ 1387,
186
+ 1283,
187
+ 10,
188
+ 1,
189
+ 654,
190
+ 863,
191
+ 103
192
+ ]
193
+ },
194
+ {
195
+ "word": "son",
196
+ "duration": "0.22",
197
+ "codes": [
198
+ 198,
199
+ 507,
200
+ 1477,
201
+ 915,
202
+ 215,
203
+ 267,
204
+ 1232,
205
+ 1041,
206
+ 569,
207
+ 1596,
208
+ 1759,
209
+ 229,
210
+ 901,
211
+ 1774,
212
+ 1487,
213
+ 51
214
+ ]
215
+ },
216
+ {
217
+ "word": "na",
218
+ "duration": "0.16",
219
+ "codes": [
220
+ 251,
221
+ 243,
222
+ 965,
223
+ 215,
224
+ 135,
225
+ 711,
226
+ 105,
227
+ 1350,
228
+ 1556,
229
+ 226,
230
+ 459,
231
+ 68
232
+ ]
233
+ },
234
+ {
235
+ "word": "tashi",
236
+ "duration": "0.42",
237
+ "codes": [
238
+ 20,
239
+ 502,
240
+ 610,
241
+ 179,
242
+ 711,
243
+ 800,
244
+ 424,
245
+ 352,
246
+ 102,
247
+ 569,
248
+ 67,
249
+ 262,
250
+ 855,
251
+ 413,
252
+ 63,
253
+ 701,
254
+ 1719,
255
+ 262,
256
+ 383,
257
+ 1166,
258
+ 358,
259
+ 1331,
260
+ 596,
261
+ 383,
262
+ 1351,
263
+ 96,
264
+ 1170,
265
+ 1061,
266
+ 1059,
267
+ 1392,
268
+ 328,
269
+ 1471
270
+ ]
271
+ }
272
+ ]
273
+ }
yarngpt/default_speakers_local/hausa_male1.json ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Eh, mun za\u0253i yin wasan kwaikwayo don nuna al'adunmu yayin ranar al'ada.",
3
+ "words": [
4
+ {
5
+ "word": "eh",
6
+ "duration": "0.86",
7
+ "codes": [
8
+ 165,
9
+ 226,
10
+ 1145,
11
+ 284,
12
+ 77,
13
+ 187,
14
+ 459,
15
+ 77,
16
+ 691,
17
+ 278,
18
+ 643,
19
+ 247,
20
+ 156,
21
+ 204,
22
+ 89,
23
+ 1247,
24
+ 52,
25
+ 1350,
26
+ 433,
27
+ 812,
28
+ 328,
29
+ 553,
30
+ 648,
31
+ 602,
32
+ 1075,
33
+ 243,
34
+ 557,
35
+ 507,
36
+ 645,
37
+ 352,
38
+ 29,
39
+ 451,
40
+ 83,
41
+ 787,
42
+ 10,
43
+ 1000,
44
+ 1791,
45
+ 620,
46
+ 188,
47
+ 1681,
48
+ 447,
49
+ 752,
50
+ 1405,
51
+ 1070,
52
+ 861,
53
+ 1142,
54
+ 163,
55
+ 1293,
56
+ 674,
57
+ 250,
58
+ 724,
59
+ 259,
60
+ 624,
61
+ 676,
62
+ 259,
63
+ 1114,
64
+ 526,
65
+ 199,
66
+ 724,
67
+ 163,
68
+ 168,
69
+ 447,
70
+ 663,
71
+ 1471
72
+ ]
73
+ },
74
+ {
75
+ "word": "mun",
76
+ "duration": "0.22",
77
+ "codes": [
78
+ 651,
79
+ 617,
80
+ 1411,
81
+ 389,
82
+ 1329,
83
+ 491,
84
+ 1680,
85
+ 1053,
86
+ 618,
87
+ 488,
88
+ 1494,
89
+ 1224,
90
+ 1259,
91
+ 1317,
92
+ 1457,
93
+ 508,
94
+ 1341
95
+ ]
96
+ },
97
+ {
98
+ "word": "zabi",
99
+ "duration": "0.40",
100
+ "codes": [
101
+ 1777,
102
+ 0,
103
+ 1794,
104
+ 83,
105
+ 74,
106
+ 462,
107
+ 1170,
108
+ 1212,
109
+ 159,
110
+ 1361,
111
+ 384,
112
+ 373,
113
+ 218,
114
+ 613,
115
+ 1583,
116
+ 1311,
117
+ 188,
118
+ 1466,
119
+ 338,
120
+ 405,
121
+ 1321,
122
+ 307,
123
+ 1161,
124
+ 1623,
125
+ 293,
126
+ 1644,
127
+ 858,
128
+ 703,
129
+ 911,
130
+ 326
131
+ ]
132
+ },
133
+ {
134
+ "word": "yin",
135
+ "duration": "0.20",
136
+ "codes": [
137
+ 1715,
138
+ 870,
139
+ 341,
140
+ 1711,
141
+ 1542,
142
+ 429,
143
+ 1565,
144
+ 326,
145
+ 1771,
146
+ 966,
147
+ 91,
148
+ 614,
149
+ 620,
150
+ 647,
151
+ 1755
152
+ ]
153
+ },
154
+ {
155
+ "word": "wasan",
156
+ "duration": "0.44",
157
+ "codes": [
158
+ 1070,
159
+ 520,
160
+ 973,
161
+ 754,
162
+ 83,
163
+ 997,
164
+ 1253,
165
+ 982,
166
+ 359,
167
+ 537,
168
+ 1115,
169
+ 1677,
170
+ 1358,
171
+ 1250,
172
+ 1403,
173
+ 1637,
174
+ 881,
175
+ 382,
176
+ 1754,
177
+ 589,
178
+ 1131,
179
+ 88,
180
+ 1256,
181
+ 988,
182
+ 83,
183
+ 672,
184
+ 644,
185
+ 847,
186
+ 322,
187
+ 983,
188
+ 1305,
189
+ 31,
190
+ 967
191
+ ]
192
+ },
193
+ {
194
+ "word": "kwaikwayo",
195
+ "duration": "0.58",
196
+ "codes": [
197
+ 1071,
198
+ 1003,
199
+ 1811,
200
+ 684,
201
+ 1210,
202
+ 553,
203
+ 1535,
204
+ 491,
205
+ 398,
206
+ 222,
207
+ 315,
208
+ 439,
209
+ 205,
210
+ 174,
211
+ 1742,
212
+ 1373,
213
+ 259,
214
+ 1185,
215
+ 1787,
216
+ 516,
217
+ 1440,
218
+ 646,
219
+ 1402,
220
+ 267,
221
+ 1677,
222
+ 553,
223
+ 344,
224
+ 429,
225
+ 202,
226
+ 389,
227
+ 782,
228
+ 662,
229
+ 388,
230
+ 177,
231
+ 553,
232
+ 1413,
233
+ 491,
234
+ 554,
235
+ 222,
236
+ 759,
237
+ 111,
238
+ 1719,
239
+ 1305,
240
+ 437
241
+ ]
242
+ },
243
+ {
244
+ "word": "don",
245
+ "duration": "0.24",
246
+ "codes": [
247
+ 144,
248
+ 824,
249
+ 90,
250
+ 637,
251
+ 439,
252
+ 138,
253
+ 593,
254
+ 609,
255
+ 617,
256
+ 1247,
257
+ 444,
258
+ 793,
259
+ 600,
260
+ 1425,
261
+ 1379,
262
+ 283,
263
+ 995,
264
+ 1804
265
+ ]
266
+ },
267
+ {
268
+ "word": "nuna",
269
+ "duration": "0.40",
270
+ "codes": [
271
+ 389,
272
+ 669,
273
+ 1804,
274
+ 506,
275
+ 1668,
276
+ 1621,
277
+ 341,
278
+ 913,
279
+ 1495,
280
+ 1819,
281
+ 112,
282
+ 647,
283
+ 743,
284
+ 1612,
285
+ 506,
286
+ 1320,
287
+ 1648,
288
+ 106,
289
+ 1107,
290
+ 579,
291
+ 326,
292
+ 140,
293
+ 1220,
294
+ 936,
295
+ 661,
296
+ 729,
297
+ 1183,
298
+ 441,
299
+ 797,
300
+ 309
301
+ ]
302
+ },
303
+ {
304
+ "word": "aladunmu",
305
+ "duration": "0.76",
306
+ "codes": [
307
+ 1260,
308
+ 179,
309
+ 1240,
310
+ 68,
311
+ 753,
312
+ 807,
313
+ 1808,
314
+ 894,
315
+ 140,
316
+ 791,
317
+ 1486,
318
+ 1276,
319
+ 1471,
320
+ 1132,
321
+ 573,
322
+ 797,
323
+ 1307,
324
+ 271,
325
+ 632,
326
+ 1059,
327
+ 699,
328
+ 816,
329
+ 282,
330
+ 908,
331
+ 1240,
332
+ 41,
333
+ 144,
334
+ 1721,
335
+ 322,
336
+ 237,
337
+ 1284,
338
+ 1312,
339
+ 1444,
340
+ 521,
341
+ 593,
342
+ 753,
343
+ 506,
344
+ 1024,
345
+ 439,
346
+ 1142,
347
+ 1790,
348
+ 478,
349
+ 1164,
350
+ 953,
351
+ 1727,
352
+ 1078,
353
+ 564,
354
+ 1665,
355
+ 482,
356
+ 976,
357
+ 910,
358
+ 727,
359
+ 297,
360
+ 677,
361
+ 297,
362
+ 507,
363
+ 1157
364
+ ]
365
+ }
366
+ ]
367
+ }
yarngpt/default_speakers_local/hausa_male2.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Audu ya hau jirgi a Kaduna.",
3
+ "words": [
4
+ {
5
+ "word": "audu",
6
+ "duration": "0.75",
7
+ "codes": [
8
+ 165,
9
+ 167,
10
+ 68,
11
+ 567,
12
+ 156,
13
+ 351,
14
+ 337,
15
+ 156,
16
+ 351,
17
+ 337,
18
+ 337,
19
+ 219,
20
+ 584,
21
+ 156,
22
+ 762,
23
+ 334,
24
+ 185,
25
+ 156,
26
+ 334,
27
+ 762,
28
+ 156,
29
+ 337,
30
+ 612,
31
+ 219,
32
+ 691,
33
+ 185,
34
+ 156,
35
+ 204,
36
+ 862,
37
+ 777,
38
+ 589,
39
+ 173,
40
+ 550,
41
+ 128,
42
+ 489,
43
+ 182,
44
+ 74,
45
+ 255,
46
+ 427,
47
+ 1554,
48
+ 945,
49
+ 289,
50
+ 79,
51
+ 875,
52
+ 442,
53
+ 1664,
54
+ 464,
55
+ 230,
56
+ 1500,
57
+ 181,
58
+ 1152,
59
+ 286,
60
+ 103,
61
+ 662,
62
+ 125
63
+ ]
64
+ },
65
+ {
66
+ "word": "ya",
67
+ "duration": "0.22",
68
+ "codes": [
69
+ 201,
70
+ 1332,
71
+ 67,
72
+ 1041,
73
+ 248,
74
+ 901,
75
+ 352,
76
+ 969,
77
+ 642,
78
+ 105,
79
+ 215,
80
+ 411,
81
+ 408,
82
+ 1235,
83
+ 1212,
84
+ 1264,
85
+ 653
86
+ ]
87
+ },
88
+ {
89
+ "word": "hau",
90
+ "duration": "0.22",
91
+ "codes": [
92
+ 1083,
93
+ 913,
94
+ 1026,
95
+ 1295,
96
+ 1473,
97
+ 1399,
98
+ 41,
99
+ 629,
100
+ 1081,
101
+ 623,
102
+ 536,
103
+ 890,
104
+ 1554,
105
+ 384,
106
+ 1664,
107
+ 921,
108
+ 325
109
+ ]
110
+ },
111
+ {
112
+ "word": "jirgi",
113
+ "duration": "0.48",
114
+ "codes": [
115
+ 486,
116
+ 1536,
117
+ 597,
118
+ 1088,
119
+ 1743,
120
+ 1286,
121
+ 340,
122
+ 949,
123
+ 116,
124
+ 1441,
125
+ 1550,
126
+ 28,
127
+ 1073,
128
+ 973,
129
+ 233,
130
+ 1319,
131
+ 733,
132
+ 465,
133
+ 1152,
134
+ 1644,
135
+ 773,
136
+ 1651,
137
+ 175,
138
+ 1281,
139
+ 1563,
140
+ 11,
141
+ 1773,
142
+ 1323,
143
+ 30,
144
+ 10,
145
+ 424,
146
+ 293,
147
+ 1437,
148
+ 1484,
149
+ 1072,
150
+ 370
151
+ ]
152
+ },
153
+ {
154
+ "word": "a",
155
+ "duration": "0.10",
156
+ "codes": [
157
+ 159,
158
+ 697,
159
+ 53,
160
+ 1040,
161
+ 1256,
162
+ 264,
163
+ 710,
164
+ 1251
165
+ ]
166
+ },
167
+ {
168
+ "word": "kaduna",
169
+ "duration": "0.44",
170
+ "codes": [
171
+ 1203,
172
+ 764,
173
+ 1473,
174
+ 1156,
175
+ 400,
176
+ 212,
177
+ 1698,
178
+ 1217,
179
+ 145,
180
+ 1569,
181
+ 1151,
182
+ 1056,
183
+ 1700,
184
+ 1527,
185
+ 629,
186
+ 1747,
187
+ 1350,
188
+ 738,
189
+ 1734,
190
+ 55,
191
+ 1595,
192
+ 890,
193
+ 55,
194
+ 1364,
195
+ 203,
196
+ 281,
197
+ 952,
198
+ 1234,
199
+ 452,
200
+ 93,
201
+ 1036,
202
+ 565,
203
+ 969
204
+ ]
205
+ }
206
+ ]
207
+ }
yarngpt/default_speakers_local/igbo_female1.json ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Codeine na-agba ah\u1ee5 \u1ecbnweta.",
3
+ "words": [
4
+ {
5
+ "word": "codeine",
6
+ "duration": "0.68",
7
+ "codes": [
8
+ 165,
9
+ 336,
10
+ 1359,
11
+ 661,
12
+ 199,
13
+ 379,
14
+ 585,
15
+ 1742,
16
+ 210,
17
+ 303,
18
+ 388,
19
+ 412,
20
+ 1772,
21
+ 794,
22
+ 1607,
23
+ 467,
24
+ 622,
25
+ 201,
26
+ 575,
27
+ 447,
28
+ 319,
29
+ 352,
30
+ 234,
31
+ 1797,
32
+ 405,
33
+ 1703,
34
+ 1831,
35
+ 1163,
36
+ 1826,
37
+ 1152,
38
+ 563,
39
+ 696,
40
+ 1284,
41
+ 157,
42
+ 100,
43
+ 402,
44
+ 315,
45
+ 1036,
46
+ 1298,
47
+ 592,
48
+ 1177,
49
+ 665,
50
+ 7,
51
+ 794,
52
+ 509,
53
+ 192,
54
+ 1092,
55
+ 821,
56
+ 1022,
57
+ 834,
58
+ 132
59
+ ]
60
+ },
61
+ {
62
+ "word": "na",
63
+ "duration": "0.20",
64
+ "codes": [
65
+ 1764,
66
+ 1340,
67
+ 1394,
68
+ 1341,
69
+ 146,
70
+ 303,
71
+ 1102,
72
+ 172,
73
+ 366,
74
+ 1263,
75
+ 708,
76
+ 164,
77
+ 836,
78
+ 1424,
79
+ 81
80
+ ]
81
+ },
82
+ {
83
+ "word": "agba",
84
+ "duration": "0.76",
85
+ "codes": [
86
+ 994,
87
+ 841,
88
+ 816,
89
+ 744,
90
+ 1743,
91
+ 1051,
92
+ 1023,
93
+ 1556,
94
+ 331,
95
+ 1706,
96
+ 160,
97
+ 160,
98
+ 403,
99
+ 142,
100
+ 565,
101
+ 723,
102
+ 140,
103
+ 874,
104
+ 339,
105
+ 186,
106
+ 1229,
107
+ 309,
108
+ 461,
109
+ 1015,
110
+ 81,
111
+ 297,
112
+ 1206,
113
+ 1041,
114
+ 585,
115
+ 960,
116
+ 1007,
117
+ 223,
118
+ 578,
119
+ 1142,
120
+ 242,
121
+ 1215,
122
+ 261,
123
+ 857,
124
+ 1390,
125
+ 334,
126
+ 837,
127
+ 735,
128
+ 334,
129
+ 649,
130
+ 563,
131
+ 544,
132
+ 672,
133
+ 316,
134
+ 544,
135
+ 630,
136
+ 337,
137
+ 601,
138
+ 978,
139
+ 956,
140
+ 642,
141
+ 552,
142
+ 164
143
+ ]
144
+ },
145
+ {
146
+ "word": "ahu",
147
+ "duration": "0.72",
148
+ "codes": [
149
+ 254,
150
+ 1014,
151
+ 571,
152
+ 208,
153
+ 1388,
154
+ 393,
155
+ 467,
156
+ 1453,
157
+ 402,
158
+ 361,
159
+ 1464,
160
+ 665,
161
+ 1468,
162
+ 1643,
163
+ 858,
164
+ 1663,
165
+ 1381,
166
+ 1596,
167
+ 1420,
168
+ 1235,
169
+ 1287,
170
+ 1483,
171
+ 277,
172
+ 1753,
173
+ 949,
174
+ 483,
175
+ 1554,
176
+ 787,
177
+ 1407,
178
+ 1100,
179
+ 1035,
180
+ 578,
181
+ 591,
182
+ 504,
183
+ 460,
184
+ 712,
185
+ 838,
186
+ 516,
187
+ 620,
188
+ 460,
189
+ 223,
190
+ 928,
191
+ 1422,
192
+ 1513,
193
+ 1699,
194
+ 513,
195
+ 896,
196
+ 242,
197
+ 313,
198
+ 1634,
199
+ 1237,
200
+ 249,
201
+ 153,
202
+ 1056,
203
+ 508
204
+ ]
205
+ },
206
+ {
207
+ "word": "inweta",
208
+ "duration": "0.44",
209
+ "codes": [
210
+ 1391,
211
+ 416,
212
+ 182,
213
+ 488,
214
+ 500,
215
+ 1544,
216
+ 1237,
217
+ 577,
218
+ 1813,
219
+ 860,
220
+ 749,
221
+ 679,
222
+ 51,
223
+ 682,
224
+ 506,
225
+ 79,
226
+ 49,
227
+ 254,
228
+ 987,
229
+ 348,
230
+ 1418,
231
+ 1688,
232
+ 1735,
233
+ 1658,
234
+ 544,
235
+ 16,
236
+ 1777,
237
+ 309,
238
+ 25,
239
+ 1317,
240
+ 146,
241
+ 1333,
242
+ 147
243
+ ]
244
+ }
245
+ ]
246
+ }
yarngpt/default_speakers_local/igbo_female2.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Umunwoke n\u1ecd na \u1ecct\u1ee5t\u1ee5 \u1ecdr\u1ee5 \u1ecdch\u1ecbch\u1ecb",
3
+ "words": [
4
+ {
5
+ "word": "umunwoke",
6
+ "duration": "0.79",
7
+ "codes": [
8
+ 156,
9
+ 1807,
10
+ 1225,
11
+ 976,
12
+ 950,
13
+ 1205,
14
+ 957,
15
+ 669,
16
+ 838,
17
+ 1142,
18
+ 781,
19
+ 666,
20
+ 1151,
21
+ 1219,
22
+ 1044,
23
+ 42,
24
+ 51,
25
+ 1712,
26
+ 893,
27
+ 963,
28
+ 438,
29
+ 30,
30
+ 529,
31
+ 792,
32
+ 1769,
33
+ 102,
34
+ 834,
35
+ 1398,
36
+ 1258,
37
+ 1460,
38
+ 1407,
39
+ 1265,
40
+ 1615,
41
+ 682,
42
+ 455,
43
+ 488,
44
+ 395,
45
+ 376,
46
+ 1136,
47
+ 1391,
48
+ 79,
49
+ 1052,
50
+ 1747,
51
+ 1739,
52
+ 351,
53
+ 1421,
54
+ 423,
55
+ 344,
56
+ 253,
57
+ 1098,
58
+ 479,
59
+ 1077,
60
+ 243,
61
+ 364,
62
+ 1812,
63
+ 315,
64
+ 1073,
65
+ 832
66
+ ]
67
+ },
68
+ {
69
+ "word": "no",
70
+ "duration": "0.16",
71
+ "codes": [
72
+ 175,
73
+ 1407,
74
+ 458,
75
+ 860,
76
+ 1025,
77
+ 65,
78
+ 1443,
79
+ 1482,
80
+ 371,
81
+ 1257,
82
+ 890,
83
+ 1161,
84
+ 449
85
+ ]
86
+ },
87
+ {
88
+ "word": "na",
89
+ "duration": "0.10",
90
+ "codes": [
91
+ 1650,
92
+ 639,
93
+ 322,
94
+ 1596,
95
+ 741,
96
+ 987,
97
+ 1452
98
+ ]
99
+ },
100
+ {
101
+ "word": "otutu",
102
+ "duration": "0.38",
103
+ "codes": [
104
+ 371,
105
+ 1107,
106
+ 1444,
107
+ 794,
108
+ 1517,
109
+ 504,
110
+ 930,
111
+ 767,
112
+ 990,
113
+ 507,
114
+ 1314,
115
+ 1766,
116
+ 1073,
117
+ 1229,
118
+ 1525,
119
+ 1664,
120
+ 460,
121
+ 896,
122
+ 1230,
123
+ 640,
124
+ 507,
125
+ 919,
126
+ 1104,
127
+ 1320,
128
+ 1022,
129
+ 234,
130
+ 520,
131
+ 583,
132
+ 959
133
+ ]
134
+ },
135
+ {
136
+ "word": "oru",
137
+ "duration": "0.28",
138
+ "codes": [
139
+ 324,
140
+ 943,
141
+ 65,
142
+ 613,
143
+ 709,
144
+ 128,
145
+ 384,
146
+ 681,
147
+ 1071,
148
+ 1732,
149
+ 1392,
150
+ 616,
151
+ 706,
152
+ 679,
153
+ 510,
154
+ 934,
155
+ 37,
156
+ 76,
157
+ 1032,
158
+ 1618,
159
+ 944
160
+ ]
161
+ },
162
+ {
163
+ "word": "ochichi",
164
+ "duration": "0.44",
165
+ "codes": [
166
+ 1234,
167
+ 1267,
168
+ 295,
169
+ 1278,
170
+ 891,
171
+ 1652,
172
+ 1142,
173
+ 435,
174
+ 356,
175
+ 599,
176
+ 70,
177
+ 517,
178
+ 1303,
179
+ 788,
180
+ 1314,
181
+ 57,
182
+ 1700,
183
+ 1790,
184
+ 432,
185
+ 1495,
186
+ 435,
187
+ 823,
188
+ 1583,
189
+ 350,
190
+ 290,
191
+ 656,
192
+ 70,
193
+ 1074,
194
+ 1104,
195
+ 911,
196
+ 1297,
197
+ 1708,
198
+ 1826
199
+ ]
200
+ }
201
+ ]
202
+ }
yarngpt/default_speakers_local/igbo_male2.json ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Any\u1ecb na-eji nkw\u1ee5 n'ihu na-eme fan aka",
3
+ "words": [
4
+ {
5
+ "word": "anyi",
6
+ "duration": "0.79",
7
+ "codes": [
8
+ 165,
9
+ 226,
10
+ 672,
11
+ 278,
12
+ 1279,
13
+ 924,
14
+ 1648,
15
+ 1079,
16
+ 1010,
17
+ 1321,
18
+ 869,
19
+ 964,
20
+ 1118,
21
+ 964,
22
+ 691,
23
+ 1033,
24
+ 964,
25
+ 762,
26
+ 981,
27
+ 772,
28
+ 630,
29
+ 967,
30
+ 676,
31
+ 676,
32
+ 460,
33
+ 567,
34
+ 680,
35
+ 301,
36
+ 334,
37
+ 981,
38
+ 301,
39
+ 334,
40
+ 981,
41
+ 316,
42
+ 316,
43
+ 316,
44
+ 223,
45
+ 1007,
46
+ 571,
47
+ 524,
48
+ 402,
49
+ 147,
50
+ 367,
51
+ 402,
52
+ 303,
53
+ 182,
54
+ 1729,
55
+ 510,
56
+ 914,
57
+ 293,
58
+ 1636,
59
+ 683,
60
+ 500,
61
+ 1369,
62
+ 451,
63
+ 756,
64
+ 1339,
65
+ 1619
66
+ ]
67
+ },
68
+ {
69
+ "word": "na",
70
+ "duration": "0.12",
71
+ "codes": [
72
+ 1756,
73
+ 593,
74
+ 1446,
75
+ 48,
76
+ 67,
77
+ 96,
78
+ 759,
79
+ 488,
80
+ 69
81
+ ]
82
+ },
83
+ {
84
+ "word": "eji",
85
+ "duration": "0.26",
86
+ "codes": [
87
+ 367,
88
+ 890,
89
+ 357,
90
+ 966,
91
+ 654,
92
+ 41,
93
+ 1478,
94
+ 1637,
95
+ 1381,
96
+ 654,
97
+ 330,
98
+ 844,
99
+ 372,
100
+ 1147,
101
+ 202,
102
+ 206,
103
+ 148,
104
+ 455,
105
+ 50,
106
+ 592
107
+ ]
108
+ },
109
+ {
110
+ "word": "nkwu",
111
+ "duration": "0.28",
112
+ "codes": [
113
+ 506,
114
+ 515,
115
+ 1363,
116
+ 1663,
117
+ 1464,
118
+ 1383,
119
+ 1770,
120
+ 1251,
121
+ 1639,
122
+ 1705,
123
+ 1634,
124
+ 1464,
125
+ 583,
126
+ 1008,
127
+ 1384,
128
+ 557,
129
+ 1002,
130
+ 716,
131
+ 952,
132
+ 1552,
133
+ 506
134
+ ]
135
+ },
136
+ {
137
+ "word": "nihu",
138
+ "duration": "0.36",
139
+ "codes": [
140
+ 1366,
141
+ 1650,
142
+ 716,
143
+ 890,
144
+ 1494,
145
+ 189,
146
+ 687,
147
+ 439,
148
+ 15,
149
+ 45,
150
+ 297,
151
+ 48,
152
+ 33,
153
+ 335,
154
+ 1591,
155
+ 1560,
156
+ 1574,
157
+ 1368,
158
+ 1069,
159
+ 1394,
160
+ 1166,
161
+ 1457,
162
+ 109,
163
+ 143,
164
+ 1574,
165
+ 1663,
166
+ 286
167
+ ]
168
+ },
169
+ {
170
+ "word": "na",
171
+ "duration": "0.14",
172
+ "codes": [
173
+ 1748,
174
+ 1454,
175
+ 1238,
176
+ 407,
177
+ 148,
178
+ 30,
179
+ 49,
180
+ 789,
181
+ 488,
182
+ 137,
183
+ 1166
184
+ ]
185
+ },
186
+ {
187
+ "word": "eme",
188
+ "duration": "0.32",
189
+ "codes": [
190
+ 537,
191
+ 471,
192
+ 1136,
193
+ 1296,
194
+ 1284,
195
+ 217,
196
+ 1516,
197
+ 593,
198
+ 704,
199
+ 1002,
200
+ 433,
201
+ 205,
202
+ 263,
203
+ 1247,
204
+ 665,
205
+ 428,
206
+ 269,
207
+ 22,
208
+ 519,
209
+ 1400,
210
+ 400,
211
+ 1400,
212
+ 1171,
213
+ 493
214
+ ]
215
+ },
216
+ {
217
+ "word": "fan",
218
+ "duration": "0.40",
219
+ "codes": [
220
+ 1212,
221
+ 911,
222
+ 640,
223
+ 1265,
224
+ 386,
225
+ 352,
226
+ 102,
227
+ 252,
228
+ 642,
229
+ 1182,
230
+ 985,
231
+ 115,
232
+ 730,
233
+ 347,
234
+ 173,
235
+ 1676,
236
+ 794,
237
+ 363,
238
+ 1217,
239
+ 1388,
240
+ 736,
241
+ 843,
242
+ 1422,
243
+ 660,
244
+ 1160,
245
+ 474,
246
+ 1403,
247
+ 142,
248
+ 1278,
249
+ 147
250
+ ]
251
+ },
252
+ {
253
+ "word": "aka",
254
+ "duration": "0.24",
255
+ "codes": [
256
+ 1492,
257
+ 402,
258
+ 1280,
259
+ 595,
260
+ 1732,
261
+ 1697,
262
+ 838,
263
+ 1809,
264
+ 1199,
265
+ 724,
266
+ 337,
267
+ 516,
268
+ 948,
269
+ 1700,
270
+ 1129,
271
+ 901,
272
+ 934,
273
+ 1110
274
+ ]
275
+ }
276
+ ]
277
+ }
yarngpt/default_speakers_local/yoruba_female1.json ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Kulikuli j\u1eb9\u0301 \u01f9kan \u00ecpanu t\u00ed w\u00f3\u0323n \u1e63e n\u00edpa l\u00edlo \u1eb9\u0300p\u00e0, p\u1eb9lu or\u00eds\u00ec\u00edr\u00eds\u00ec\u00ed \u01f9kan",
3
+ "words": [
4
+ {
5
+ "word": "kulikuli",
6
+ "duration": "0.50",
7
+ "codes": [
8
+ 156,
9
+ 1777,
10
+ 479,
11
+ 1086,
12
+ 243,
13
+ 127,
14
+ 170,
15
+ 1275,
16
+ 1470,
17
+ 392,
18
+ 278,
19
+ 837,
20
+ 1142,
21
+ 284,
22
+ 1411,
23
+ 1742,
24
+ 1280,
25
+ 87,
26
+ 898,
27
+ 228,
28
+ 67,
29
+ 1499,
30
+ 1568,
31
+ 1035,
32
+ 978,
33
+ 157,
34
+ 1078,
35
+ 243,
36
+ 1708,
37
+ 170,
38
+ 1498,
39
+ 346,
40
+ 344,
41
+ 526,
42
+ 1039,
43
+ 316,
44
+ 526
45
+ ]
46
+ },
47
+ {
48
+ "word": "je",
49
+ "duration": "0.28",
50
+ "codes": [
51
+ 1570,
52
+ 1290,
53
+ 654,
54
+ 328,
55
+ 816,
56
+ 270,
57
+ 402,
58
+ 271,
59
+ 76,
60
+ 43,
61
+ 1259,
62
+ 303,
63
+ 371,
64
+ 1077,
65
+ 560,
66
+ 1117,
67
+ 1108,
68
+ 1110,
69
+ 1481,
70
+ 691,
71
+ 1825
72
+ ]
73
+ },
74
+ {
75
+ "word": "nkan",
76
+ "duration": "0.26",
77
+ "codes": [
78
+ 1465,
79
+ 1312,
80
+ 538,
81
+ 1807,
82
+ 1152,
83
+ 27,
84
+ 20,
85
+ 379,
86
+ 1378,
87
+ 1505,
88
+ 84,
89
+ 959,
90
+ 756,
91
+ 107,
92
+ 949,
93
+ 996,
94
+ 1358,
95
+ 1286,
96
+ 755,
97
+ 1686
98
+ ]
99
+ },
100
+ {
101
+ "word": "ipanu",
102
+ "duration": "0.54",
103
+ "codes": [
104
+ 371,
105
+ 1224,
106
+ 458,
107
+ 1601,
108
+ 241,
109
+ 247,
110
+ 620,
111
+ 423,
112
+ 584,
113
+ 905,
114
+ 411,
115
+ 1209,
116
+ 309,
117
+ 88,
118
+ 1511,
119
+ 164,
120
+ 552,
121
+ 1104,
122
+ 140,
123
+ 737,
124
+ 1699,
125
+ 595,
126
+ 1257,
127
+ 544,
128
+ 1733,
129
+ 169,
130
+ 1339,
131
+ 1830,
132
+ 123,
133
+ 1048,
134
+ 1378,
135
+ 1817,
136
+ 775,
137
+ 1093,
138
+ 669,
139
+ 1663,
140
+ 464,
141
+ 1536,
142
+ 696,
143
+ 1120,
144
+ 781
145
+ ]
146
+ },
147
+ {
148
+ "word": "ti",
149
+ "duration": "0.22",
150
+ "codes": [
151
+ 724,
152
+ 1120,
153
+ 1250,
154
+ 885,
155
+ 432,
156
+ 1556,
157
+ 1803,
158
+ 759,
159
+ 234,
160
+ 1104,
161
+ 1264,
162
+ 205,
163
+ 892,
164
+ 1223,
165
+ 1051,
166
+ 1141
167
+ ]
168
+ },
169
+ {
170
+ "word": "won",
171
+ "duration": "0.26",
172
+ "codes": [
173
+ 205,
174
+ 1004,
175
+ 1107,
176
+ 386,
177
+ 951,
178
+ 53,
179
+ 339,
180
+ 1186,
181
+ 664,
182
+ 874,
183
+ 1245,
184
+ 547,
185
+ 1320,
186
+ 918,
187
+ 1363,
188
+ 1638,
189
+ 654,
190
+ 279,
191
+ 1040,
192
+ 739
193
+ ]
194
+ },
195
+ {
196
+ "word": "se",
197
+ "duration": "0.22",
198
+ "codes": [
199
+ 1082,
200
+ 878,
201
+ 760,
202
+ 1094,
203
+ 973,
204
+ 656,
205
+ 142,
206
+ 10,
207
+ 170,
208
+ 1744,
209
+ 170,
210
+ 495,
211
+ 2,
212
+ 379,
213
+ 725,
214
+ 1816
215
+ ]
216
+ },
217
+ {
218
+ "word": "nipa",
219
+ "duration": "0.36",
220
+ "codes": [
221
+ 963,
222
+ 1436,
223
+ 49,
224
+ 43,
225
+ 386,
226
+ 1731,
227
+ 537,
228
+ 121,
229
+ 496,
230
+ 666,
231
+ 423,
232
+ 668,
233
+ 851,
234
+ 811,
235
+ 737,
236
+ 25,
237
+ 260,
238
+ 1313,
239
+ 300,
240
+ 303,
241
+ 951,
242
+ 1153,
243
+ 172,
244
+ 589,
245
+ 1831,
246
+ 1088,
247
+ 378
248
+ ]
249
+ },
250
+ {
251
+ "word": "lilo",
252
+ "duration": "0.30",
253
+ "codes": [
254
+ 451,
255
+ 1801,
256
+ 1800,
257
+ 967,
258
+ 1313,
259
+ 49,
260
+ 1814,
261
+ 659,
262
+ 858,
263
+ 534,
264
+ 1217,
265
+ 727,
266
+ 609,
267
+ 651,
268
+ 1411,
269
+ 688,
270
+ 321,
271
+ 47,
272
+ 1271,
273
+ 79,
274
+ 362,
275
+ 816,
276
+ 157
277
+ ]
278
+ },
279
+ {
280
+ "word": "epa",
281
+ "duration": "0.40",
282
+ "codes": [
283
+ 1272,
284
+ 497,
285
+ 1192,
286
+ 67,
287
+ 986,
288
+ 54,
289
+ 351,
290
+ 423,
291
+ 1154,
292
+ 561,
293
+ 584,
294
+ 417,
295
+ 209,
296
+ 1017,
297
+ 424,
298
+ 1122,
299
+ 25,
300
+ 1191,
301
+ 475,
302
+ 140,
303
+ 1184,
304
+ 730,
305
+ 1459,
306
+ 1266,
307
+ 379,
308
+ 799,
309
+ 567,
310
+ 460,
311
+ 379,
312
+ 676
313
+ ]
314
+ },
315
+ {
316
+ "word": "pelu",
317
+ "duration": "0.28",
318
+ "codes": [
319
+ 381,
320
+ 926,
321
+ 433,
322
+ 811,
323
+ 76,
324
+ 774,
325
+ 1179,
326
+ 380,
327
+ 1668,
328
+ 1646,
329
+ 1364,
330
+ 1446,
331
+ 1241,
332
+ 1503,
333
+ 1384,
334
+ 902,
335
+ 1073,
336
+ 443,
337
+ 74,
338
+ 1015,
339
+ 1107
340
+ ]
341
+ },
342
+ {
343
+ "word": "orisiirisii",
344
+ "duration": "0.64",
345
+ "codes": [
346
+ 51,
347
+ 1047,
348
+ 367,
349
+ 674,
350
+ 1117,
351
+ 734,
352
+ 498,
353
+ 1504,
354
+ 1045,
355
+ 656,
356
+ 773,
357
+ 382,
358
+ 198,
359
+ 792,
360
+ 1662,
361
+ 760,
362
+ 1261,
363
+ 1094,
364
+ 1091,
365
+ 1505,
366
+ 602,
367
+ 1670,
368
+ 1497,
369
+ 1447,
370
+ 465,
371
+ 135,
372
+ 98,
373
+ 528,
374
+ 682,
375
+ 812,
376
+ 269,
377
+ 175,
378
+ 290,
379
+ 547,
380
+ 340,
381
+ 382,
382
+ 1073,
383
+ 528,
384
+ 1033,
385
+ 700,
386
+ 195,
387
+ 529,
388
+ 37,
389
+ 687,
390
+ 1022,
391
+ 343,
392
+ 1335,
393
+ 1092
394
+ ]
395
+ },
396
+ {
397
+ "word": "nkan",
398
+ "duration": "0.16",
399
+ "codes": [
400
+ 1339,
401
+ 1657,
402
+ 859,
403
+ 1288,
404
+ 544,
405
+ 207,
406
+ 459,
407
+ 1735,
408
+ 1736,
409
+ 959,
410
+ 106,
411
+ 427,
412
+ 107
413
+ ]
414
+ }
415
+ ]
416
+ }
yarngpt/default_speakers_local/yoruba_female2.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Irin\u1e63\u1eb9\u0301 \u00e0gb\u1eb9\u0300 ni katakata.",
3
+ "words": [
4
+ {
5
+ "word": "irinse",
6
+ "duration": "1.19",
7
+ "codes": [
8
+ 219,
9
+ 219,
10
+ 219,
11
+ 219,
12
+ 805,
13
+ 636,
14
+ 459,
15
+ 918,
16
+ 820,
17
+ 918,
18
+ 950,
19
+ 795,
20
+ 447,
21
+ 1284,
22
+ 447,
23
+ 378,
24
+ 641,
25
+ 77,
26
+ 939,
27
+ 316,
28
+ 278,
29
+ 16,
30
+ 223,
31
+ 776,
32
+ 374,
33
+ 1810,
34
+ 110,
35
+ 967,
36
+ 51,
37
+ 717,
38
+ 1289,
39
+ 155,
40
+ 1731,
41
+ 1199,
42
+ 195,
43
+ 1332,
44
+ 1106,
45
+ 940,
46
+ 328,
47
+ 1493,
48
+ 230,
49
+ 687,
50
+ 510,
51
+ 356,
52
+ 1178,
53
+ 253,
54
+ 24,
55
+ 318,
56
+ 70,
57
+ 1002,
58
+ 977,
59
+ 719,
60
+ 113,
61
+ 228,
62
+ 1556,
63
+ 1316,
64
+ 88,
65
+ 79,
66
+ 1316,
67
+ 1316,
68
+ 628,
69
+ 79,
70
+ 1492,
71
+ 915,
72
+ 1671,
73
+ 492,
74
+ 1758,
75
+ 334,
76
+ 470,
77
+ 1038,
78
+ 223,
79
+ 68,
80
+ 563,
81
+ 223,
82
+ 224,
83
+ 185,
84
+ 244,
85
+ 417,
86
+ 337,
87
+ 244,
88
+ 360,
89
+ 165,
90
+ 224,
91
+ 187,
92
+ 1821,
93
+ 1119,
94
+ 958,
95
+ 192,
96
+ 200
97
+ ]
98
+ },
99
+ {
100
+ "word": "agbe",
101
+ "duration": "0.32",
102
+ "codes": [
103
+ 74,
104
+ 456,
105
+ 1156,
106
+ 49,
107
+ 1409,
108
+ 414,
109
+ 1437,
110
+ 145,
111
+ 17,
112
+ 1121,
113
+ 237,
114
+ 1442,
115
+ 389,
116
+ 698,
117
+ 30,
118
+ 30,
119
+ 489,
120
+ 1558,
121
+ 30,
122
+ 721,
123
+ 994,
124
+ 201,
125
+ 1702,
126
+ 835
127
+ ]
128
+ },
129
+ {
130
+ "word": "ni",
131
+ "duration": "0.12",
132
+ "codes": [
133
+ 1540,
134
+ 310,
135
+ 29,
136
+ 890,
137
+ 952,
138
+ 319,
139
+ 196,
140
+ 272,
141
+ 1536
142
+ ]
143
+ },
144
+ {
145
+ "word": "katakata",
146
+ "duration": "0.56",
147
+ "codes": [
148
+ 274,
149
+ 993,
150
+ 1624,
151
+ 855,
152
+ 1065,
153
+ 152,
154
+ 610,
155
+ 1170,
156
+ 775,
157
+ 1541,
158
+ 1806,
159
+ 1592,
160
+ 713,
161
+ 1539,
162
+ 1424,
163
+ 1229,
164
+ 93,
165
+ 1194,
166
+ 1310,
167
+ 1392,
168
+ 727,
169
+ 1428,
170
+ 32,
171
+ 902,
172
+ 1643,
173
+ 1304,
174
+ 977,
175
+ 1316,
176
+ 587,
177
+ 777,
178
+ 1258,
179
+ 830,
180
+ 562,
181
+ 1720,
182
+ 34,
183
+ 667,
184
+ 415,
185
+ 1194,
186
+ 1477,
187
+ 352,
188
+ 1187,
189
+ 1345
190
+ ]
191
+ }
192
+ ]
193
+ }
yarngpt/default_speakers_local/yoruba_male1.json ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "\u00ccj\u1ecdba t\u00ed f\u00ed \u00f2fin d\u00e9 t\u00edta \u1ecdt\u00ed l\u00edle.",
3
+ "words": [
4
+ {
5
+ "word": "ijoba",
6
+ "duration": "0.67",
7
+ "codes": [
8
+ 165,
9
+ 1236,
10
+ 1667,
11
+ 933,
12
+ 729,
13
+ 1699,
14
+ 1425,
15
+ 1080,
16
+ 1255,
17
+ 458,
18
+ 795,
19
+ 1348,
20
+ 334,
21
+ 1458,
22
+ 458,
23
+ 566,
24
+ 584,
25
+ 187,
26
+ 1774,
27
+ 296,
28
+ 123,
29
+ 190,
30
+ 1787,
31
+ 1470,
32
+ 558,
33
+ 1392,
34
+ 1693,
35
+ 885,
36
+ 1315,
37
+ 760,
38
+ 609,
39
+ 357,
40
+ 864,
41
+ 575,
42
+ 74,
43
+ 798,
44
+ 1401,
45
+ 1380,
46
+ 169,
47
+ 1157,
48
+ 871,
49
+ 208,
50
+ 622,
51
+ 146,
52
+ 1232,
53
+ 107,
54
+ 382,
55
+ 801,
56
+ 1707
57
+ ]
58
+ },
59
+ {
60
+ "word": "ti",
61
+ "duration": "0.16",
62
+ "codes": [
63
+ 459,
64
+ 1475,
65
+ 833,
66
+ 1082,
67
+ 1496,
68
+ 1241,
69
+ 1342,
70
+ 211,
71
+ 153,
72
+ 1709,
73
+ 1640,
74
+ 468
75
+ ]
76
+ },
77
+ {
78
+ "word": "fi",
79
+ "duration": "0.14",
80
+ "codes": [
81
+ 1752,
82
+ 1230,
83
+ 854,
84
+ 1420,
85
+ 854,
86
+ 1146,
87
+ 1257,
88
+ 388,
89
+ 1686,
90
+ 539,
91
+ 289
92
+ ]
93
+ },
94
+ {
95
+ "word": "ofin",
96
+ "duration": "0.26",
97
+ "codes": [
98
+ 341,
99
+ 1008,
100
+ 1701,
101
+ 359,
102
+ 1696,
103
+ 1250,
104
+ 1226,
105
+ 781,
106
+ 1292,
107
+ 1432,
108
+ 989,
109
+ 998,
110
+ 236,
111
+ 962,
112
+ 1308,
113
+ 749,
114
+ 1462,
115
+ 1460,
116
+ 1039,
117
+ 932
118
+ ]
119
+ },
120
+ {
121
+ "word": "de",
122
+ "duration": "0.16",
123
+ "codes": [
124
+ 1020,
125
+ 1808,
126
+ 907,
127
+ 276,
128
+ 597,
129
+ 1069,
130
+ 217,
131
+ 648,
132
+ 1068,
133
+ 468,
134
+ 981,
135
+ 1003
136
+ ]
137
+ },
138
+ {
139
+ "word": "tita",
140
+ "duration": "0.46",
141
+ "codes": [
142
+ 645,
143
+ 1041,
144
+ 605,
145
+ 947,
146
+ 1505,
147
+ 162,
148
+ 1820,
149
+ 688,
150
+ 101,
151
+ 1764,
152
+ 418,
153
+ 885,
154
+ 513,
155
+ 1569,
156
+ 1082,
157
+ 446,
158
+ 711,
159
+ 294,
160
+ 326,
161
+ 1203,
162
+ 1190,
163
+ 524,
164
+ 408,
165
+ 222,
166
+ 1490,
167
+ 1162,
168
+ 1486,
169
+ 885,
170
+ 247,
171
+ 899,
172
+ 513,
173
+ 1187,
174
+ 614,
175
+ 424,
176
+ 184
177
+ ]
178
+ },
179
+ {
180
+ "word": "oti",
181
+ "duration": "0.28",
182
+ "codes": [
183
+ 979,
184
+ 997,
185
+ 1581,
186
+ 620,
187
+ 967,
188
+ 460,
189
+ 1430,
190
+ 1731,
191
+ 279,
192
+ 499,
193
+ 769,
194
+ 517,
195
+ 1077,
196
+ 263,
197
+ 1443,
198
+ 397,
199
+ 166,
200
+ 1554,
201
+ 440,
202
+ 1009,
203
+ 1427
204
+ ]
205
+ },
206
+ {
207
+ "word": "lile",
208
+ "duration": "0.28",
209
+ "codes": [
210
+ 409,
211
+ 1677,
212
+ 599,
213
+ 296,
214
+ 629,
215
+ 74,
216
+ 129,
217
+ 1740,
218
+ 11,
219
+ 1404,
220
+ 920,
221
+ 10,
222
+ 269,
223
+ 1604,
224
+ 990,
225
+ 1200,
226
+ 1217,
227
+ 1178,
228
+ 293,
229
+ 30,
230
+ 36
231
+ ]
232
+ }
233
+ ]
234
+ }
yarngpt/default_speakers_local/yoruba_male2.json ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "\u1ecdk\u1ecd\u0300 \u00f2furuf\u00fa t\u00ed jay\u00e9 w\u1ecd \u0144 bal\u00e8 l\u00f3w\u00f3.",
3
+ "words": [
4
+ {
5
+ "word": "oko",
6
+ "duration": "0.42",
7
+ "codes": [
8
+ 165,
9
+ 1480,
10
+ 1405,
11
+ 1428,
12
+ 761,
13
+ 1343,
14
+ 591,
15
+ 311,
16
+ 345,
17
+ 1209,
18
+ 545,
19
+ 346,
20
+ 880,
21
+ 413,
22
+ 112,
23
+ 882,
24
+ 1051,
25
+ 831,
26
+ 866,
27
+ 918,
28
+ 1622,
29
+ 1776,
30
+ 1213,
31
+ 945,
32
+ 942,
33
+ 455,
34
+ 1217,
35
+ 675,
36
+ 268,
37
+ 683,
38
+ 536
39
+ ]
40
+ },
41
+ {
42
+ "word": "ofurufu",
43
+ "duration": "0.52",
44
+ "codes": [
45
+ 317,
46
+ 1016,
47
+ 354,
48
+ 1467,
49
+ 1626,
50
+ 1686,
51
+ 1012,
52
+ 1450,
53
+ 1090,
54
+ 849,
55
+ 1230,
56
+ 1774,
57
+ 992,
58
+ 148,
59
+ 395,
60
+ 1446,
61
+ 909,
62
+ 1712,
63
+ 1624,
64
+ 327,
65
+ 283,
66
+ 1554,
67
+ 1796,
68
+ 952,
69
+ 1450,
70
+ 184,
71
+ 689,
72
+ 604,
73
+ 902,
74
+ 989,
75
+ 1517,
76
+ 983,
77
+ 250,
78
+ 39,
79
+ 792,
80
+ 289,
81
+ 865,
82
+ 272,
83
+ 336,
84
+ 694
85
+ ]
86
+ },
87
+ {
88
+ "word": "ti",
89
+ "duration": "0.16",
90
+ "codes": [
91
+ 1818,
92
+ 279,
93
+ 96,
94
+ 1097,
95
+ 383,
96
+ 876,
97
+ 14,
98
+ 1700,
99
+ 515,
100
+ 1713,
101
+ 1033,
102
+ 59
103
+ ]
104
+ },
105
+ {
106
+ "word": "jaye",
107
+ "duration": "0.36",
108
+ "codes": [
109
+ 1522,
110
+ 774,
111
+ 452,
112
+ 303,
113
+ 695,
114
+ 648,
115
+ 809,
116
+ 679,
117
+ 1015,
118
+ 626,
119
+ 398,
120
+ 1720,
121
+ 1,
122
+ 1497,
123
+ 748,
124
+ 46,
125
+ 1744,
126
+ 644,
127
+ 190,
128
+ 1060,
129
+ 455,
130
+ 529,
131
+ 111,
132
+ 1515,
133
+ 1762,
134
+ 150,
135
+ 1560
136
+ ]
137
+ },
138
+ {
139
+ "word": "wo",
140
+ "duration": "0.34",
141
+ "codes": [
142
+ 484,
143
+ 503,
144
+ 1388,
145
+ 61,
146
+ 289,
147
+ 1422,
148
+ 294,
149
+ 831,
150
+ 1328,
151
+ 462,
152
+ 1612,
153
+ 905,
154
+ 1541,
155
+ 785,
156
+ 509,
157
+ 1185,
158
+ 1802,
159
+ 845,
160
+ 1440,
161
+ 986,
162
+ 360,
163
+ 281,
164
+ 1703,
165
+ 1456,
166
+ 1674,
167
+ 1776
168
+ ]
169
+ },
170
+ {
171
+ "word": "n",
172
+ "duration": "0.12",
173
+ "codes": [
174
+ 1002,
175
+ 289,
176
+ 47,
177
+ 616,
178
+ 1594,
179
+ 852,
180
+ 831,
181
+ 458,
182
+ 220
183
+ ]
184
+ },
185
+ {
186
+ "word": "bale",
187
+ "duration": "0.32",
188
+ "codes": [
189
+ 953,
190
+ 1426,
191
+ 159,
192
+ 1758,
193
+ 474,
194
+ 1347,
195
+ 579,
196
+ 699,
197
+ 599,
198
+ 1433,
199
+ 483,
200
+ 1142,
201
+ 1088,
202
+ 988,
203
+ 906,
204
+ 552,
205
+ 128,
206
+ 1648,
207
+ 474,
208
+ 1678,
209
+ 668,
210
+ 1060,
211
+ 101,
212
+ 1478
213
+ ]
214
+ },
215
+ {
216
+ "word": "lowo",
217
+ "duration": "0.22",
218
+ "codes": [
219
+ 612,
220
+ 326,
221
+ 1661,
222
+ 978,
223
+ 88,
224
+ 1620,
225
+ 169,
226
+ 811,
227
+ 98,
228
+ 363,
229
+ 31,
230
+ 425,
231
+ 1531,
232
+ 394,
233
+ 1248,
234
+ 809
235
+ ]
236
+ }
237
+ ]
238
+ }
yarngpt/default_speakers_local/yoruba_male3.json ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "\u00ccj\u1ecdba t\u00ed f\u00ed \u00f2fin d\u00e9 t\u00edta \u1ecdt\u00ed l\u00edle.",
3
+ "words": [
4
+ {
5
+ "word": "\u00ccj\u1ecdba",
6
+ "duration": "0.67",
7
+ "codes": [
8
+ 165,
9
+ 1236,
10
+ 1667,
11
+ 933,
12
+ 729,
13
+ 1699,
14
+ 1425,
15
+ 1080,
16
+ 1255,
17
+ 458,
18
+ 795,
19
+ 1348,
20
+ 334,
21
+ 1458,
22
+ 458,
23
+ 566,
24
+ 584,
25
+ 187,
26
+ 1774,
27
+ 296,
28
+ 123,
29
+ 190,
30
+ 1787,
31
+ 1470,
32
+ 558,
33
+ 1392,
34
+ 1693,
35
+ 885,
36
+ 1315,
37
+ 760,
38
+ 609,
39
+ 357,
40
+ 864,
41
+ 575,
42
+ 74,
43
+ 798,
44
+ 1401,
45
+ 1380,
46
+ 169,
47
+ 1157,
48
+ 871,
49
+ 208,
50
+ 622,
51
+ 146,
52
+ 1232,
53
+ 107,
54
+ 382,
55
+ 801,
56
+ 1707
57
+ ]
58
+ },
59
+ {
60
+ "word": "t\u00ed",
61
+ "duration": "0.16",
62
+ "codes": [
63
+ 459,
64
+ 1475,
65
+ 833,
66
+ 1082,
67
+ 1496,
68
+ 1241,
69
+ 1342,
70
+ 211,
71
+ 153,
72
+ 1709,
73
+ 1640,
74
+ 468
75
+ ]
76
+ },
77
+ {
78
+ "word": "f\u00ed",
79
+ "duration": "0.14",
80
+ "codes": [
81
+ 1752,
82
+ 1230,
83
+ 854,
84
+ 1420,
85
+ 854,
86
+ 1146,
87
+ 1257,
88
+ 388,
89
+ 1686,
90
+ 539,
91
+ 289
92
+ ]
93
+ },
94
+ {
95
+ "word": "\u00f2fin",
96
+ "duration": "0.26",
97
+ "codes": [
98
+ 341,
99
+ 1008,
100
+ 1701,
101
+ 359,
102
+ 1696,
103
+ 1250,
104
+ 1226,
105
+ 781,
106
+ 1292,
107
+ 1432,
108
+ 989,
109
+ 998,
110
+ 236,
111
+ 962,
112
+ 1308,
113
+ 749,
114
+ 1462,
115
+ 1460,
116
+ 1039,
117
+ 932
118
+ ]
119
+ },
120
+ {
121
+ "word": "d\u00e9",
122
+ "duration": "0.16",
123
+ "codes": [
124
+ 1020,
125
+ 1808,
126
+ 907,
127
+ 276,
128
+ 597,
129
+ 1069,
130
+ 217,
131
+ 648,
132
+ 1068,
133
+ 468,
134
+ 981,
135
+ 1003
136
+ ]
137
+ },
138
+ {
139
+ "word": "t\u00edta",
140
+ "duration": "0.46",
141
+ "codes": [
142
+ 645,
143
+ 1041,
144
+ 605,
145
+ 947,
146
+ 1505,
147
+ 162,
148
+ 1820,
149
+ 688,
150
+ 101,
151
+ 1764,
152
+ 418,
153
+ 885,
154
+ 513,
155
+ 1569,
156
+ 1082,
157
+ 446,
158
+ 711,
159
+ 294,
160
+ 326,
161
+ 1203,
162
+ 1190,
163
+ 524,
164
+ 408,
165
+ 222,
166
+ 1490,
167
+ 1162,
168
+ 1486,
169
+ 885,
170
+ 247,
171
+ 899,
172
+ 513,
173
+ 1187,
174
+ 614,
175
+ 424,
176
+ 184
177
+ ]
178
+ },
179
+ {
180
+ "word": "\u1ecdt\u00ed",
181
+ "duration": "0.28",
182
+ "codes": [
183
+ 979,
184
+ 997,
185
+ 1581,
186
+ 620,
187
+ 967,
188
+ 460,
189
+ 1430,
190
+ 1731,
191
+ 279,
192
+ 499,
193
+ 769,
194
+ 517,
195
+ 1077,
196
+ 263,
197
+ 1443,
198
+ 397,
199
+ 166,
200
+ 1554,
201
+ 440,
202
+ 1009,
203
+ 1427
204
+ ]
205
+ },
206
+ {
207
+ "word": "l\u00edle.",
208
+ "duration": "0.28",
209
+ "codes": [
210
+ 409,
211
+ 1677,
212
+ 599,
213
+ 296,
214
+ 629,
215
+ 74,
216
+ 129,
217
+ 1740,
218
+ 11,
219
+ 1404,
220
+ 920,
221
+ 10,
222
+ 269,
223
+ 1604,
224
+ 990,
225
+ 1200,
226
+ 1217,
227
+ 1178,
228
+ 293,
229
+ 30,
230
+ 36
231
+ ]
232
+ }
233
+ ]
234
+ }
yarngpt/notebooks/Merge_datasets.ipynb ADDED
@@ -0,0 +1,851 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "id": "mKb-4Hv4xNpF",
8
+ "colab": {
9
+ "base_uri": "https://localhost:8080/"
10
+ },
11
+ "outputId": "8f45fbf9-5e31-4995-a18b-b5d2b9a9e9f5"
12
+ },
13
+ "outputs": [
14
+ {
15
+ "output_type": "stream",
16
+ "name": "stdout",
17
+ "text": [
18
+ "Collecting datasets\n",
19
+ " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n",
20
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.16.1)\n",
21
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4)\n",
22
+ "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n",
23
+ "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n",
24
+ " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
25
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n",
26
+ "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n",
27
+ "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.67.1)\n",
28
+ "Collecting xxhash (from datasets)\n",
29
+ " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
30
+ "Collecting multiprocess<0.70.17 (from datasets)\n",
31
+ " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
32
+ "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)\n",
33
+ " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
34
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.10)\n",
35
+ "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.27.0)\n",
36
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.2)\n",
37
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2)\n",
38
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\n",
39
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.2)\n",
40
+ "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
41
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.3.0)\n",
42
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n",
43
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
44
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\n",
45
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\n",
46
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->datasets) (4.12.2)\n",
47
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.4.0)\n",
48
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.10)\n",
49
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2.2.3)\n",
50
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.12.14)\n",
51
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
52
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
53
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
54
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
55
+ "Downloading datasets-3.2.0-py3-none-any.whl (480 kB)\n",
56
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
57
+ "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
58
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
59
+ "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
60
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
61
+ "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
62
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
63
+ "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
64
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
65
+ "\u001b[?25hInstalling collected packages: xxhash, fsspec, dill, multiprocess, datasets\n",
66
+ " Attempting uninstall: fsspec\n",
67
+ " Found existing installation: fsspec 2024.10.0\n",
68
+ " Uninstalling fsspec-2024.10.0:\n",
69
+ " Successfully uninstalled fsspec-2024.10.0\n",
70
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
71
+ "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
72
+ "\u001b[0mSuccessfully installed datasets-3.2.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 xxhash-3.5.0\n"
73
+ ]
74
+ }
75
+ ],
76
+ "source": [
77
+ "pip install datasets"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": null,
83
+ "metadata": {
84
+ "id": "-Oz-lmmExH_F"
85
+ },
86
+ "outputs": [],
87
+ "source": [
88
+ "import os\n",
89
+ "import pandas as pd\n",
90
+ "import huggingface_hub\n",
91
+ "import datasets\n",
92
+ "from datasets import load_dataset, load_from_disk,concatenate_datasets"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "source": [
98
+ "all_df=[]\n",
99
+ "for df_path in os.listdir(\"/content/drive/MyDrive/Tokenized\"):\n",
100
+ " if ('.gsheet' not in df_path) and ((\"yt\" in df_path) or (\"mv\" in df_path)):\n",
101
+ " print(df_path)\n",
102
+ " all_df.append( pd.read_csv(f\"/content/drive/MyDrive/Tokenized/{df_path}\"))\n",
103
+ " #print(df_path)"
104
+ ],
105
+ "metadata": {
106
+ "colab": {
107
+ "base_uri": "https://localhost:8080/"
108
+ },
109
+ "id": "ACU3ZWjAfTsI",
110
+ "outputId": "d57c0bd1-6161-4ef3-f722-480e1f40af05"
111
+ },
112
+ "execution_count": null,
113
+ "outputs": [
114
+ {
115
+ "output_type": "stream",
116
+ "name": "stdout",
117
+ "text": [
118
+ "tokenized_yt0.csv\n",
119
+ "tokenized_mv0.csv\n",
120
+ "tokenized_yt1.csv\n",
121
+ "tokenized_mv1.csv\n",
122
+ "tokenized_yt2.csv\n",
123
+ "tokenized_mv2.csv\n",
124
+ "tokenized_yt3.csv\n",
125
+ "tokenized_mv3.csv\n",
126
+ "tokenized_yt4.csv\n",
127
+ "tokenized_mv4.csv\n",
128
+ "tokenized_yt5.csv\n",
129
+ "tokenized_mv5.csv\n",
130
+ "tokenized_yt6.csv\n",
131
+ "tokenized_mv6.csv\n",
132
+ "tokenized_yt7.csv\n",
133
+ "tokenized_mv7.csv\n",
134
+ "tokenized_yt8.csv\n",
135
+ "tokenized_mv8.csv\n",
136
+ "tokenized_yt9.csv\n",
137
+ "tokenized_mv9.csv\n",
138
+ "tokenized_yt10.csv\n",
139
+ "tokenized_mv10.csv\n",
140
+ "tokenized_yt11.csv\n",
141
+ "tokenized_mv11.csv\n",
142
+ "tokenized_yt12.csv\n",
143
+ "tokenized_mv12.csv\n",
144
+ "tokenized_yt13.csv\n",
145
+ "tokenized_mv13.csv\n",
146
+ "tokenized_yt14.csv\n",
147
+ "tokenized_mv14.csv\n",
148
+ "tokenized_yt15.csv\n",
149
+ "tokenized_mv15.csv\n",
150
+ "tokenized_yt16.csv\n",
151
+ "tokenized_mv16.csv\n",
152
+ "tokenized_yt17.csv\n",
153
+ "tokenized_mv17.csv\n",
154
+ "tokenized_yt18.csv\n",
155
+ "tokenized_mv18.csv\n",
156
+ "tokenized_yt19.csv\n",
157
+ "tokenized_mv19.csv\n",
158
+ "tokenized_yt20.csv\n",
159
+ "tokenized_mv20.csv\n",
160
+ "tokenized_yt21.csv\n",
161
+ "tokenized_mv21.csv\n",
162
+ "tokenized_yt22.csv\n",
163
+ "tokenized_mv22.csv\n",
164
+ "tokenized_yt23.csv\n",
165
+ "tokenized_mv23.csv\n",
166
+ "tokenized_yt24.csv\n",
167
+ "tokenized_mv24.csv\n",
168
+ "tokenized_yt25.csv\n",
169
+ "tokenized_mv25.csv\n",
170
+ "tokenized_yt26.csv\n",
171
+ "tokenized_mv26.csv\n",
172
+ "tokenized_mv27.csv\n",
173
+ "tokenized_yt27.csv\n",
174
+ "tokenized_mv28.csv\n",
175
+ "tokenized_yt28.csv\n",
176
+ "tokenized_mv29.csv\n",
177
+ "tokenized_yt29.csv\n",
178
+ "tokenized_mv30.csv\n",
179
+ "tokenized_yt30.csv\n",
180
+ "tokenized_mv31.csv\n",
181
+ "tokenized_yt31.csv\n",
182
+ "tokenized_mv32.csv\n",
183
+ "tokenized_yt32.csv\n",
184
+ "tokenized_mv33.csv\n",
185
+ "tokenized_yt33.csv\n",
186
+ "tokenized_mv34.csv\n",
187
+ "tokenized_yt34.csv\n",
188
+ "tokenized_mv35.csv\n",
189
+ "tokenized_mv37.csv\n",
190
+ "tokenized_mv38.csv\n",
191
+ "tokenized_yt37.csv\n",
192
+ "tokenized_mv39.csv\n",
193
+ "tokenized_yt38.csv\n",
194
+ "tokenized_mv40.csv\n",
195
+ "tokenized_yt39.csv\n",
196
+ "tokenized_mv41.csv\n",
197
+ "tokenized_yt40.csv\n",
198
+ "tokenized_mv42.csv\n",
199
+ "tokenized_yt41.csv\n",
200
+ "tokenized_mv43.csv\n",
201
+ "tokenized_yt42.csv\n",
202
+ "tokenized_mv44.csv\n",
203
+ "tokenized_yt43.csv\n",
204
+ "tokenized_mv45.csv\n",
205
+ "tokenized_mv46.csv\n",
206
+ "tokenized_yt44.csv\n",
207
+ "tokenized_mv47.csv\n",
208
+ "tokenized_yt45.csv\n",
209
+ "tokenized_mv48.csv\n",
210
+ "tokenized_yt46.csv\n",
211
+ "tokenized_mv49.csv\n",
212
+ "tokenized_yt47.csv\n",
213
+ "tokenized_mv50.csv\n",
214
+ "tokenized_yt48.csv\n",
215
+ "tokenized_mv51.csv\n",
216
+ "tokenized_mv70.csv\n",
217
+ "tokenized_yt70.csv\n",
218
+ "tokenized_mv71.csv\n",
219
+ "tokenized_mv72.csv\n",
220
+ "tokenized_yt71.csv\n",
221
+ "tokenized_mv73.csv\n",
222
+ "tokenized_yt72.csv\n",
223
+ "tokenized_mv74.csv\n",
224
+ "tokenized_yt73.csv\n",
225
+ "tokenized_mv75.csv\n",
226
+ "tokenized_yt74.csv\n",
227
+ "tokenized_mv76.csv\n",
228
+ "tokenized_yt75.csv\n",
229
+ "tokenized_mv77.csv\n",
230
+ "tokenized_yt76.csv\n",
231
+ "tokenized_mv78.csv\n",
232
+ "tokenized_yt77.csv\n",
233
+ "tokenized_mv79.csv\n",
234
+ "tokenized_yt78.csv\n",
235
+ "tokenized_mv80.csv\n",
236
+ "tokenized_mv81.csv\n",
237
+ "tokenized_yt79.csv\n",
238
+ "tokenized_mv82.csv\n",
239
+ "tokenized_yt80.csv\n",
240
+ "tokenized_mv83.csv\n",
241
+ "tokenized_yt81.csv\n",
242
+ "tokenized_mv84.csv\n",
243
+ "tokenized_yt82.csv\n",
244
+ "tokenized_mv85.csv\n",
245
+ "tokenized_yt83.csv\n",
246
+ "tokenized_mv86.csv\n",
247
+ "tokenized_yt84.csv\n",
248
+ "tokenized_mv87.csv\n",
249
+ "tokenized_yt85.csv\n",
250
+ "tokenized_mv88.csv\n",
251
+ "tokenized_yt86.csv\n",
252
+ "tokenized_mv89.csv\n",
253
+ "tokenized_mv90.csv\n",
254
+ "tokenized_yt87.csv\n",
255
+ "tokenized_mv91.csv\n",
256
+ "tokenized_yt88.csv\n",
257
+ "tokenized_mv92.csv\n",
258
+ "tokenized_yt89.csv\n",
259
+ "tokenized_mv93.csv\n",
260
+ "tokenized_yt90.csv\n",
261
+ "tokenized_mv94.csv\n",
262
+ "tokenized_mv95.csv\n",
263
+ "tokenized_yt91.csv\n",
264
+ "tokenized_mv96.csv\n",
265
+ "tokenized_yt92.csv\n",
266
+ "tokenized_mv97.csv\n",
267
+ "tokenized_yt93.csv\n",
268
+ "tokenized_mv98.csv\n",
269
+ "tokenized_yt94.csv\n",
270
+ "tokenized_mv99.csv\n",
271
+ "tokenized_mv100.csv\n",
272
+ "tokenized_yt95.csv\n",
273
+ "tokenized_mv101.csv\n",
274
+ "tokenized_yt96.csv\n",
275
+ "tokenized_mv102.csv\n",
276
+ "tokenized_yt97.csv\n",
277
+ "tokenized_mv103.csv\n",
278
+ "tokenized_yt98.csv\n",
279
+ "tokenized_mv104.csv\n",
280
+ "tokenized_mv105.csv\n",
281
+ "tokenized_yt99.csv\n",
282
+ "tokenized_mv106.csv\n",
283
+ "tokenized_yt100.csv\n",
284
+ "tokenized_mv107.csv\n",
285
+ "tokenized_yt101.csv\n",
286
+ "tokenized_mv108.csv\n",
287
+ "tokenized_yt102.csv\n",
288
+ "tokenized_mv109.csv\n",
289
+ "tokenized_mv110.csv\n",
290
+ "tokenized_yt103.csv\n",
291
+ "tokenized_mv112.csv\n",
292
+ "tokenized_yt104.csv\n",
293
+ "tokenized_yt105.csv\n",
294
+ "tokenized_yt106.csv\n",
295
+ "tokenized_yt107.csv\n",
296
+ "tokenized_yt108.csv\n",
297
+ "tokenized_yt109.csv\n",
298
+ "tokenized_yt110.csv\n",
299
+ "tokenized_yt111.csv\n",
300
+ "tokenized_yt112.csv\n",
301
+ "tokenized_yt113.csv\n",
302
+ "tokenized_yt114.csv\n",
303
+ "tokenized_yt115.csv\n",
304
+ "tokenized_yt116.csv\n",
305
+ "tokenized_yt117.csv\n",
306
+ "tokenized_yt118.csv\n",
307
+ "tokenized_yt119.csv\n",
308
+ "tokenized_yt120.csv\n",
309
+ "tokenized_yt121.csv\n",
310
+ "tokenized_yt122.csv\n",
311
+ "tokenized_yt123.csv\n",
312
+ "tokenized_yt124.csv\n",
313
+ "tokenized_yt125.csv\n",
314
+ "tokenized_yt126.csv\n",
315
+ "tokenized_yt127.csv\n",
316
+ "tokenized_yt128.csv\n",
317
+ "tokenized_yt129.csv\n",
318
+ "tokenized_yt130.csv\n",
319
+ "tokenized_yt131.csv\n",
320
+ "tokenized_yt132.csv\n",
321
+ "tokenized_yt133.csv\n",
322
+ "tokenized_yt134.csv\n",
323
+ "tokenized_yt135.csv\n",
324
+ "tokenized_yt136.csv\n",
325
+ "tokenized_yt137.csv\n",
326
+ "tokenized_yt138.csv\n",
327
+ "tokenized_yt139.csv\n",
328
+ "tokenized_yt140.csv\n",
329
+ "tokenized_yt141.csv\n",
330
+ "tokenized_yt142.csv\n",
331
+ "tokenized_yt143.csv\n",
332
+ "tokenized_yt144.csv\n",
333
+ "tokenized_yt145.csv\n",
334
+ "tokenized_yt146.csv\n",
335
+ "tokenized_yt147.csv\n",
336
+ "tokenized_yt148.csv\n",
337
+ "tokenized_yt149.csv\n",
338
+ "tokenized_yt150.csv\n",
339
+ "tokenized_yt151.csv\n",
340
+ "tokenized_yt152.csv\n",
341
+ "tokenized_yt153.csv\n",
342
+ "tokenized_yt154.csv\n",
343
+ "tokenized_yt155.csv\n",
344
+ "tokenized_yt156.csv\n",
345
+ "tokenized_yt157.csv\n",
346
+ "tokenized_yt158.csv\n",
347
+ "tokenized_yt159.csv\n",
348
+ "tokenized_yt160.csv\n",
349
+ "tokenized_yt161.csv\n",
350
+ "tokenized_yt162.csv\n",
351
+ "tokenized_yt163.csv\n",
352
+ "tokenized_yt164.csv\n",
353
+ "tokenized_yt165.csv\n",
354
+ "tokenized_yt166.csv\n",
355
+ "tokenized_yt167.csv\n",
356
+ "tokenized_yt168.csv\n",
357
+ "tokenized_yt169.csv\n",
358
+ "tokenized_yt170.csv\n",
359
+ "tokenized_yt171.csv\n",
360
+ "tokenized_yt172.csv\n",
361
+ "tokenized_yt173.csv\n",
362
+ "tokenized_yt174.csv\n",
363
+ "tokenized_yt175.csv\n",
364
+ "tokenized_yt176.csv\n",
365
+ "tokenized_yt177.csv\n",
366
+ "tokenized_yt178.csv\n",
367
+ "tokenized_yt179.csv\n",
368
+ "tokenized_yt180.csv\n",
369
+ "tokenized_yt181.csv\n",
370
+ "tokenized_yt182.csv\n",
371
+ "tokenized_yt183.csv\n",
372
+ "tokenized_yt184.csv\n",
373
+ "tokenized_yt185.csv\n",
374
+ "tokenized_yt186.csv\n",
375
+ "tokenized_yt187.csv\n",
376
+ "tokenized_yt188.csv\n",
377
+ "tokenized_yt189.csv\n",
378
+ "tokenized_yt190.csv\n",
379
+ "tokenized_yt191.csv\n",
380
+ "tokenized_yt192.csv\n",
381
+ "tokenized_yt193.csv\n",
382
+ "tokenized_yt194.csv\n",
383
+ "tokenized_yt195.csv\n",
384
+ "tokenized_yt196.csv\n",
385
+ "tokenized_yt197.csv\n",
386
+ "tokenized_yt198.csv\n",
387
+ "tokenized_yt199.csv\n",
388
+ "tokenized_yt200.csv\n",
389
+ "tokenized_yt201.csv\n",
390
+ "tokenized_yt202.csv\n",
391
+ "tokenized_yt203.csv\n",
392
+ "tokenized_yt204.csv\n",
393
+ "tokenized_yt205.csv\n",
394
+ "tokenized_yt206.csv\n",
395
+ "tokenized_yt210.csv\n",
396
+ "tokenized_yt211.csv\n",
397
+ "tokenized_yt212.csv\n",
398
+ "tokenized_yt213.csv\n",
399
+ "tokenized_yt214.csv\n",
400
+ "tokenized_yt215.csv\n",
401
+ "tokenized_yt216.csv\n",
402
+ "tokenized_yt217.csv\n",
403
+ "tokenized_yt218.csv\n",
404
+ "tokenized_yt219.csv\n",
405
+ "tokenized_yt220.csv\n",
406
+ "tokenized_yt221.csv\n",
407
+ "tokenized_yt222.csv\n",
408
+ "tokenized_yt223.csv\n",
409
+ "tokenized_yt224.csv\n",
410
+ "tokenized_yt225.csv\n",
411
+ "tokenized_yt226.csv\n",
412
+ "tokenized_yt227.csv\n",
413
+ "tokenized_yt228.csv\n",
414
+ "tokenized_yt230.csv\n",
415
+ "tokenized_yt1101.csv\n"
416
+ ]
417
+ }
418
+ ]
419
+ },
420
+ {
421
+ "cell_type": "code",
422
+ "source": [
423
+ "train_data=pd.concat(all_df)"
424
+ ],
425
+ "metadata": {
426
+ "id": "9f8aFlgOfb6c"
427
+ },
428
+ "execution_count": null,
429
+ "outputs": []
430
+ },
431
+ {
432
+ "cell_type": "code",
433
+ "source": [
434
+ "train_data"
435
+ ],
436
+ "metadata": {
437
+ "colab": {
438
+ "base_uri": "https://localhost:8080/",
439
+ "height": 461
440
+ },
441
+ "id": "Hsq8k7WogC_j",
442
+ "outputId": "be6ce9a2-b1a0-4a7b-9251-40bc6e1e538f"
443
+ },
444
+ "execution_count": null,
445
+ "outputs": [
446
+ {
447
+ "output_type": "execute_result",
448
+ "data": {
449
+ "text/plain": [
450
+ " Unnamed: 0 0\n",
451
+ "0 0 <|im_start|>\\n<|text_start|>music<|text_sep|>h...\n",
452
+ "1 1 <|im_start|>\\n<|text_start|>money<|text_sep|>d...\n",
453
+ "2 2 <|im_start|>\\n<|text_start|>you<|text_sep|>no<...\n",
454
+ "3 3 <|im_start|>\\n<|text_start|>morning<|text_sep|...\n",
455
+ "4 4 <|im_start|>\\n<|text_start|>um<|text_sep|>im<|...\n",
456
+ ".. ... ...\n",
457
+ "209 209 <|im_start|>\\n<|text_start|>there<|text_sep|>g...\n",
458
+ "210 210 <|im_start|>\\n<|text_start|>im<|text_sep|>look...\n",
459
+ "211 211 <|im_start|>\\n<|text_start|>all<|text_sep|>of<...\n",
460
+ "212 212 <|im_start|>\\n<|text_start|>good<|text_sep|>ti...\n",
461
+ "213 213 <|im_start|>\\n<|text_start|>have<|text_sep|>be...\n",
462
+ "\n",
463
+ "[295292 rows x 2 columns]"
464
+ ],
465
+ "text/html": [
466
+ "\n",
467
+ " <div id=\"df-b8e001d3-2f9d-47f0-b7d3-3e4be94d8818\" class=\"colab-df-container\">\n",
468
+ " <div>\n",
469
+ "<style scoped>\n",
470
+ " .dataframe tbody tr th:only-of-type {\n",
471
+ " vertical-align: middle;\n",
472
+ " }\n",
473
+ "\n",
474
+ " .dataframe tbody tr th {\n",
475
+ " vertical-align: top;\n",
476
+ " }\n",
477
+ "\n",
478
+ " .dataframe thead th {\n",
479
+ " text-align: right;\n",
480
+ " }\n",
481
+ "</style>\n",
482
+ "<table border=\"1\" class=\"dataframe\">\n",
483
+ " <thead>\n",
484
+ " <tr style=\"text-align: right;\">\n",
485
+ " <th></th>\n",
486
+ " <th>Unnamed: 0</th>\n",
487
+ " <th>0</th>\n",
488
+ " </tr>\n",
489
+ " </thead>\n",
490
+ " <tbody>\n",
491
+ " <tr>\n",
492
+ " <th>0</th>\n",
493
+ " <td>0</td>\n",
494
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;music&lt;|text_sep|&gt;h...</td>\n",
495
+ " </tr>\n",
496
+ " <tr>\n",
497
+ " <th>1</th>\n",
498
+ " <td>1</td>\n",
499
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;money&lt;|text_sep|&gt;d...</td>\n",
500
+ " </tr>\n",
501
+ " <tr>\n",
502
+ " <th>2</th>\n",
503
+ " <td>2</td>\n",
504
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;you&lt;|text_sep|&gt;no&lt;...</td>\n",
505
+ " </tr>\n",
506
+ " <tr>\n",
507
+ " <th>3</th>\n",
508
+ " <td>3</td>\n",
509
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;morning&lt;|text_sep|...</td>\n",
510
+ " </tr>\n",
511
+ " <tr>\n",
512
+ " <th>4</th>\n",
513
+ " <td>4</td>\n",
514
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;um&lt;|text_sep|&gt;im&lt;|...</td>\n",
515
+ " </tr>\n",
516
+ " <tr>\n",
517
+ " <th>...</th>\n",
518
+ " <td>...</td>\n",
519
+ " <td>...</td>\n",
520
+ " </tr>\n",
521
+ " <tr>\n",
522
+ " <th>209</th>\n",
523
+ " <td>209</td>\n",
524
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;there&lt;|text_sep|&gt;g...</td>\n",
525
+ " </tr>\n",
526
+ " <tr>\n",
527
+ " <th>210</th>\n",
528
+ " <td>210</td>\n",
529
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;im&lt;|text_sep|&gt;look...</td>\n",
530
+ " </tr>\n",
531
+ " <tr>\n",
532
+ " <th>211</th>\n",
533
+ " <td>211</td>\n",
534
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;all&lt;|text_sep|&gt;of&lt;...</td>\n",
535
+ " </tr>\n",
536
+ " <tr>\n",
537
+ " <th>212</th>\n",
538
+ " <td>212</td>\n",
539
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;good&lt;|text_sep|&gt;ti...</td>\n",
540
+ " </tr>\n",
541
+ " <tr>\n",
542
+ " <th>213</th>\n",
543
+ " <td>213</td>\n",
544
+ " <td>&lt;|im_start|&gt;\\n&lt;|text_start|&gt;have&lt;|text_sep|&gt;be...</td>\n",
545
+ " </tr>\n",
546
+ " </tbody>\n",
547
+ "</table>\n",
548
+ "<p>295292 rows × 2 columns</p>\n",
549
+ "</div>\n",
550
+ " <div class=\"colab-df-buttons\">\n",
551
+ "\n",
552
+ " <div class=\"colab-df-container\">\n",
553
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b8e001d3-2f9d-47f0-b7d3-3e4be94d8818')\"\n",
554
+ " title=\"Convert this dataframe to an interactive table.\"\n",
555
+ " style=\"display:none;\">\n",
556
+ "\n",
557
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
558
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
559
+ " </svg>\n",
560
+ " </button>\n",
561
+ "\n",
562
+ " <style>\n",
563
+ " .colab-df-container {\n",
564
+ " display:flex;\n",
565
+ " gap: 12px;\n",
566
+ " }\n",
567
+ "\n",
568
+ " .colab-df-convert {\n",
569
+ " background-color: #E8F0FE;\n",
570
+ " border: none;\n",
571
+ " border-radius: 50%;\n",
572
+ " cursor: pointer;\n",
573
+ " display: none;\n",
574
+ " fill: #1967D2;\n",
575
+ " height: 32px;\n",
576
+ " padding: 0 0 0 0;\n",
577
+ " width: 32px;\n",
578
+ " }\n",
579
+ "\n",
580
+ " .colab-df-convert:hover {\n",
581
+ " background-color: #E2EBFA;\n",
582
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
583
+ " fill: #174EA6;\n",
584
+ " }\n",
585
+ "\n",
586
+ " .colab-df-buttons div {\n",
587
+ " margin-bottom: 4px;\n",
588
+ " }\n",
589
+ "\n",
590
+ " [theme=dark] .colab-df-convert {\n",
591
+ " background-color: #3B4455;\n",
592
+ " fill: #D2E3FC;\n",
593
+ " }\n",
594
+ "\n",
595
+ " [theme=dark] .colab-df-convert:hover {\n",
596
+ " background-color: #434B5C;\n",
597
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
598
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
599
+ " fill: #FFFFFF;\n",
600
+ " }\n",
601
+ " </style>\n",
602
+ "\n",
603
+ " <script>\n",
604
+ " const buttonEl =\n",
605
+ " document.querySelector('#df-b8e001d3-2f9d-47f0-b7d3-3e4be94d8818 button.colab-df-convert');\n",
606
+ " buttonEl.style.display =\n",
607
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
608
+ "\n",
609
+ " async function convertToInteractive(key) {\n",
610
+ " const element = document.querySelector('#df-b8e001d3-2f9d-47f0-b7d3-3e4be94d8818');\n",
611
+ " const dataTable =\n",
612
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
613
+ " [key], {});\n",
614
+ " if (!dataTable) return;\n",
615
+ "\n",
616
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
617
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
618
+ " + ' to learn more about interactive tables.';\n",
619
+ " element.innerHTML = '';\n",
620
+ " dataTable['output_type'] = 'display_data';\n",
621
+ " await google.colab.output.renderOutput(dataTable, element);\n",
622
+ " const docLink = document.createElement('div');\n",
623
+ " docLink.innerHTML = docLinkHtml;\n",
624
+ " element.appendChild(docLink);\n",
625
+ " }\n",
626
+ " </script>\n",
627
+ " </div>\n",
628
+ "\n",
629
+ "\n",
630
+ "<div id=\"df-a078f78c-88de-41c2-a0a4-09f4550f52fb\">\n",
631
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a078f78c-88de-41c2-a0a4-09f4550f52fb')\"\n",
632
+ " title=\"Suggest charts\"\n",
633
+ " style=\"display:none;\">\n",
634
+ "\n",
635
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
636
+ " width=\"24px\">\n",
637
+ " <g>\n",
638
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
639
+ " </g>\n",
640
+ "</svg>\n",
641
+ " </button>\n",
642
+ "\n",
643
+ "<style>\n",
644
+ " .colab-df-quickchart {\n",
645
+ " --bg-color: #E8F0FE;\n",
646
+ " --fill-color: #1967D2;\n",
647
+ " --hover-bg-color: #E2EBFA;\n",
648
+ " --hover-fill-color: #174EA6;\n",
649
+ " --disabled-fill-color: #AAA;\n",
650
+ " --disabled-bg-color: #DDD;\n",
651
+ " }\n",
652
+ "\n",
653
+ " [theme=dark] .colab-df-quickchart {\n",
654
+ " --bg-color: #3B4455;\n",
655
+ " --fill-color: #D2E3FC;\n",
656
+ " --hover-bg-color: #434B5C;\n",
657
+ " --hover-fill-color: #FFFFFF;\n",
658
+ " --disabled-bg-color: #3B4455;\n",
659
+ " --disabled-fill-color: #666;\n",
660
+ " }\n",
661
+ "\n",
662
+ " .colab-df-quickchart {\n",
663
+ " background-color: var(--bg-color);\n",
664
+ " border: none;\n",
665
+ " border-radius: 50%;\n",
666
+ " cursor: pointer;\n",
667
+ " display: none;\n",
668
+ " fill: var(--fill-color);\n",
669
+ " height: 32px;\n",
670
+ " padding: 0;\n",
671
+ " width: 32px;\n",
672
+ " }\n",
673
+ "\n",
674
+ " .colab-df-quickchart:hover {\n",
675
+ " background-color: var(--hover-bg-color);\n",
676
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
677
+ " fill: var(--button-hover-fill-color);\n",
678
+ " }\n",
679
+ "\n",
680
+ " .colab-df-quickchart-complete:disabled,\n",
681
+ " .colab-df-quickchart-complete:disabled:hover {\n",
682
+ " background-color: var(--disabled-bg-color);\n",
683
+ " fill: var(--disabled-fill-color);\n",
684
+ " box-shadow: none;\n",
685
+ " }\n",
686
+ "\n",
687
+ " .colab-df-spinner {\n",
688
+ " border: 2px solid var(--fill-color);\n",
689
+ " border-color: transparent;\n",
690
+ " border-bottom-color: var(--fill-color);\n",
691
+ " animation:\n",
692
+ " spin 1s steps(1) infinite;\n",
693
+ " }\n",
694
+ "\n",
695
+ " @keyframes spin {\n",
696
+ " 0% {\n",
697
+ " border-color: transparent;\n",
698
+ " border-bottom-color: var(--fill-color);\n",
699
+ " border-left-color: var(--fill-color);\n",
700
+ " }\n",
701
+ " 20% {\n",
702
+ " border-color: transparent;\n",
703
+ " border-left-color: var(--fill-color);\n",
704
+ " border-top-color: var(--fill-color);\n",
705
+ " }\n",
706
+ " 30% {\n",
707
+ " border-color: transparent;\n",
708
+ " border-left-color: var(--fill-color);\n",
709
+ " border-top-color: var(--fill-color);\n",
710
+ " border-right-color: var(--fill-color);\n",
711
+ " }\n",
712
+ " 40% {\n",
713
+ " border-color: transparent;\n",
714
+ " border-right-color: var(--fill-color);\n",
715
+ " border-top-color: var(--fill-color);\n",
716
+ " }\n",
717
+ " 60% {\n",
718
+ " border-color: transparent;\n",
719
+ " border-right-color: var(--fill-color);\n",
720
+ " }\n",
721
+ " 80% {\n",
722
+ " border-color: transparent;\n",
723
+ " border-right-color: var(--fill-color);\n",
724
+ " border-bottom-color: var(--fill-color);\n",
725
+ " }\n",
726
+ " 90% {\n",
727
+ " border-color: transparent;\n",
728
+ " border-bottom-color: var(--fill-color);\n",
729
+ " }\n",
730
+ " }\n",
731
+ "</style>\n",
732
+ "\n",
733
+ " <script>\n",
734
+ " async function quickchart(key) {\n",
735
+ " const quickchartButtonEl =\n",
736
+ " document.querySelector('#' + key + ' button');\n",
737
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
738
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
739
+ " try {\n",
740
+ " const charts = await google.colab.kernel.invokeFunction(\n",
741
+ " 'suggestCharts', [key], {});\n",
742
+ " } catch (error) {\n",
743
+ " console.error('Error during call to suggestCharts:', error);\n",
744
+ " }\n",
745
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
746
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
747
+ " }\n",
748
+ " (() => {\n",
749
+ " let quickchartButtonEl =\n",
750
+ " document.querySelector('#df-a078f78c-88de-41c2-a0a4-09f4550f52fb button');\n",
751
+ " quickchartButtonEl.style.display =\n",
752
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
753
+ " })();\n",
754
+ " </script>\n",
755
+ "</div>\n",
756
+ "\n",
757
+ " <div id=\"id_0bd96b93-aa89-458f-a577-12a9d983d031\">\n",
758
+ " <style>\n",
759
+ " .colab-df-generate {\n",
760
+ " background-color: #E8F0FE;\n",
761
+ " border: none;\n",
762
+ " border-radius: 50%;\n",
763
+ " cursor: pointer;\n",
764
+ " display: none;\n",
765
+ " fill: #1967D2;\n",
766
+ " height: 32px;\n",
767
+ " padding: 0 0 0 0;\n",
768
+ " width: 32px;\n",
769
+ " }\n",
770
+ "\n",
771
+ " .colab-df-generate:hover {\n",
772
+ " background-color: #E2EBFA;\n",
773
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
774
+ " fill: #174EA6;\n",
775
+ " }\n",
776
+ "\n",
777
+ " [theme=dark] .colab-df-generate {\n",
778
+ " background-color: #3B4455;\n",
779
+ " fill: #D2E3FC;\n",
780
+ " }\n",
781
+ "\n",
782
+ " [theme=dark] .colab-df-generate:hover {\n",
783
+ " background-color: #434B5C;\n",
784
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
785
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
786
+ " fill: #FFFFFF;\n",
787
+ " }\n",
788
+ " </style>\n",
789
+ " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('train_data')\"\n",
790
+ " title=\"Generate code using this dataframe.\"\n",
791
+ " style=\"display:none;\">\n",
792
+ "\n",
793
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
794
+ " width=\"24px\">\n",
795
+ " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
796
+ " </svg>\n",
797
+ " </button>\n",
798
+ " <script>\n",
799
+ " (() => {\n",
800
+ " const buttonEl =\n",
801
+ " document.querySelector('#id_0bd96b93-aa89-458f-a577-12a9d983d031 button.colab-df-generate');\n",
802
+ " buttonEl.style.display =\n",
803
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
804
+ "\n",
805
+ " buttonEl.onclick = () => {\n",
806
+ " google.colab.notebook.generateWithVariable('train_data');\n",
807
+ " }\n",
808
+ " })();\n",
809
+ " </script>\n",
810
+ " </div>\n",
811
+ "\n",
812
+ " </div>\n",
813
+ " </div>\n"
814
+ ],
815
+ "application/vnd.google.colaboratory.intrinsic+json": {
816
+ "type": "dataframe",
817
+ "variable_name": "train_data"
818
+ }
819
+ },
820
+ "metadata": {},
821
+ "execution_count": 6
822
+ }
823
+ ]
824
+ },
825
+ {
826
+ "cell_type": "code",
827
+ "source": [
828
+ "train_data.to_csv(\"/content/drive/MyDrive/Tokenized2/all_data.csv\")"
829
+ ],
830
+ "metadata": {
831
+ "id": "S0011JRDtLO2"
832
+ },
833
+ "execution_count": null,
834
+ "outputs": []
835
+ }
836
+ ],
837
+ "metadata": {
838
+ "colab": {
839
+ "provenance": []
840
+ },
841
+ "kernelspec": {
842
+ "display_name": "Python 3",
843
+ "name": "python3"
844
+ },
845
+ "language_info": {
846
+ "name": "python"
847
+ }
848
+ },
849
+ "nbformat": 4,
850
+ "nbformat_minor": 0
851
+ }
yarngpt/notebooks/Merge_datasets_local (1).ipynb ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "id": "mKb-4Hv4xNpF"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "pip install datasets"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {
18
+ "id": "1QHW2w8cdupP"
19
+ },
20
+ "outputs": [],
21
+ "source": [
22
+ "import huggingface_hub"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": null,
28
+ "metadata": {
29
+ "id": "-Oz-lmmExH_F"
30
+ },
31
+ "outputs": [],
32
+ "source": [
33
+ "import os\n",
34
+ "import pandas as pd\n",
35
+ "import huggingface_hub\n",
36
+ "import datasets\n",
37
+ "from datasets import load_dataset, load_from_disk,concatenate_datasets"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "metadata": {
44
+ "id": "NfVKJ5xgdyc1"
45
+ },
46
+ "outputs": [],
47
+ "source": [
48
+ "huggingface_hub.login()"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": null,
54
+ "metadata": {
55
+ "id": "llvhheVWjDwi"
56
+ },
57
+ "outputs": [],
58
+ "source": [
59
+ "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
60
+ "checkpoint=\"saheedniyi/YarnGPT\"\n",
61
+ "#checkpoint=\"saheedniyi/public_extra2\"#device = \"cuda\" # for GPU usage or \"cpu\" for CPU usage\n",
62
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "metadata": {
69
+ "id": "sfVhIZEpisZg"
70
+ },
71
+ "outputs": [],
72
+ "source": [
73
+ "def token_length(prompt):\n",
74
+ " return len(tokenizer(prompt)[\"input_ids\"])"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "metadata": {
81
+ "id": "ACU3ZWjAfTsI"
82
+ },
83
+ "outputs": [],
84
+ "source": [
85
+ "all_df=[]\n",
86
+ "for df_path in os.listdir(\"/content/drive/MyDrive/naij_tokenized\"):\n",
87
+ " if ('.gsheet' not in df_path):\n",
88
+ " df=pd.read_csv(f\"/content/drive/MyDrive/naij_tokenized/{df_path}\")\n",
89
+ " df[\"length\"]=df[\"tts\"].apply(token_length)\n",
90
+ " print(df_path)\n",
91
+ " all_df.append(df)"
92
+ ]
93
+ },
94
+ {
95
+ "cell_type": "code",
96
+ "execution_count": null,
97
+ "metadata": {
98
+ "id": "9f8aFlgOfb6c"
99
+ },
100
+ "outputs": [],
101
+ "source": [
102
+ "train_data=pd.concat(all_df)"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": null,
108
+ "metadata": {
109
+ "id": "Hsq8k7WogC_j"
110
+ },
111
+ "outputs": [],
112
+ "source": [
113
+ "train_data_1=train_data.drop_duplicates(\"tts\")"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": null,
119
+ "metadata": {
120
+ "id": "dVYrYSISn9hE"
121
+ },
122
+ "outputs": [],
123
+ "source": [
124
+ "train_data_1.shape"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": null,
130
+ "metadata": {
131
+ "id": "bec2mRTdoAf_"
132
+ },
133
+ "outputs": [],
134
+ "source": [
135
+ "train_data.shape"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "source": [
141
+ "train_data_1.drop(\"stt\",axis=1, inplace=True)"
142
+ ],
143
+ "metadata": {
144
+ "id": "pythz_XcgF9t"
145
+ },
146
+ "execution_count": null,
147
+ "outputs": []
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "source": [
152
+ "train_data_1.drop([\"Unnamed: 0\",\"__index_level_0__\"],axis=1, inplace=True)"
153
+ ],
154
+ "metadata": {
155
+ "id": "9yebXiwvgZMa"
156
+ },
157
+ "execution_count": null,
158
+ "outputs": []
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "source": [
163
+ "def replace_text(txt):\n",
164
+ " txt=txt.replace(\"<|hausa|\\n\",\"<|hausa|>\\n\")\n",
165
+ " txt=txt.replace(\"<|igbo|\\n\",\"<|igbo|>\\n\")\n",
166
+ " txt=txt.replace(\"<|yoruba|\\n\",\"<|yoruba|>\\n\")#hausa\":\"<|hausa|\",\n",
167
+ " txt=txt.replace(\"\\n<|tts|>\",\"\")\n",
168
+ " return txt"
169
+ ],
170
+ "metadata": {
171
+ "id": "kUvi1ItQg6HT"
172
+ },
173
+ "execution_count": null,
174
+ "outputs": []
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": null,
179
+ "metadata": {
180
+ "id": "TgWv4W8Wiwd1"
181
+ },
182
+ "outputs": [],
183
+ "source": [
184
+ "train_data_1=train_data_1[train_data_1[\"tts\"]!=\"An error occurred\"]"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "source": [
190
+ "train_data_1[\"tts\"]=train_data_1[\"tts\"].apply(replace_text)"
191
+ ],
192
+ "metadata": {
193
+ "id": "CP7bVwRqh_qH"
194
+ },
195
+ "execution_count": null,
196
+ "outputs": []
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "source": [
201
+ "train_data_1.shape"
202
+ ],
203
+ "metadata": {
204
+ "id": "YMk40ZsIrW44"
205
+ },
206
+ "execution_count": null,
207
+ "outputs": []
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "source": [
212
+ "train_data_1"
213
+ ],
214
+ "metadata": {
215
+ "id": "RVD3f0frrcfi"
216
+ },
217
+ "execution_count": null,
218
+ "outputs": []
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "source": [
223
+ "train_data_1=train_data_1[train_data_1[\"length\"]<4000]"
224
+ ],
225
+ "metadata": {
226
+ "id": "utJLIf1orhnE"
227
+ },
228
+ "execution_count": null,
229
+ "outputs": []
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "source": [
234
+ "train_data_1.to_csv(\"/content/drive/MyDrive/naij_tokenized/final_all_lang.csv\")"
235
+ ],
236
+ "metadata": {
237
+ "id": "BOynFjQlruWJ"
238
+ },
239
+ "execution_count": null,
240
+ "outputs": []
241
+ }
242
+ ],
243
+ "metadata": {
244
+ "colab": {
245
+ "machine_shape": "hm",
246
+ "provenance": []
247
+ },
248
+ "kernelspec": {
249
+ "display_name": "Python 3",
250
+ "name": "python3"
251
+ },
252
+ "language_info": {
253
+ "name": "python"
254
+ }
255
+ },
256
+ "nbformat": 4,
257
+ "nbformat_minor": 0
258
+ }
yarngpt/notebooks/Yoruba_prepare_data_naij (2).ipynb ADDED
The diff for this file is too large to render. See raw diff
 
yarngpt/notebooks/audio_0c026c21-f432-4d20-a86b-899a10d9ed60.webp ADDED

Git LFS Details

  • SHA256: 2340310c5cad3c8bbb5a435bb61546ba30452d681264047babd448d7ca02f52e
  • Pointer size: 131 Bytes
  • Size of remote file: 229 kB
yarngpt/notebooks/train_YarnGPT.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
yarngpt/notebooks/train_YarnGPT_local.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
yarngpt/python-wrapper/README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YarnGPT Python Wrapper Library
2
+
3
+ ## Description
4
+ YarnGPT is a Python wrapper for the YarnGPT text-to-speech model, designed to synthesize natural Nigerian-accented English speech using a pure language modeling approach. This library provides a simple API to convert text into audio output, allowing users to select from various preset voices and adjust generation parameters.
5
+
6
+ ## Features
7
+ - Supports 6 preset voices (idera, jude, joke, umar, osagie, onye)
8
+ - Utilizes Hugging Face's model caching for efficient model loading
9
+ - Exposes a straightforward API function: generate_speech(text, speaker, temperature, repetition_penalty, max_length)
10
+ - Allows customization of generation parameters such as temperature, repetition penalty, and maximum token length
11
+ - Includes unit tests to ensure core functionality
12
+
13
+ ## Installation
14
+ 1. Create and activate a virtual environment:
15
+ - On Linux/MacOS:
16
+ ```bash
17
+ python3 -m venv env
18
+ source env/bin/activate
19
+ ```
20
+ - On Windows:
21
+ ```bash
22
+ python -m venv env
23
+ env\Scripts\activate
24
+ ```
25
+
26
+ 2. Install the package:
27
+ ```bash
28
+ pip install yarngpt
29
+ ```
30
+
31
+ ## Usage
32
+ Basic usage to generate and save audio:
33
+ ```python
34
+ from yarngpt import generate_speech
35
+ import torchaudio
36
+
37
+ # Generate speech with the default speaker (idera)
38
+ audio = generate_speech("Hello, this is a test.")
39
+
40
+ # Save the generated audio
41
+ torchaudio.save("output.wav", audio, sample_rate=24000)
42
+ ```
43
+
44
+ For Jupyter Notebook users, you can also play the audio directly:
45
+ ```python
46
+ from yarngpt import generate_speech
47
+ import torchaudio
48
+ from IPython.display import Audio
49
+
50
+ # Generate and save speech
51
+ audio = generate_speech("Hello, this is a test.", speaker="joke")
52
+ torchaudio.save("output.wav", audio, sample_rate=24000)
53
+
54
+ # Play the audio in the notebook
55
+ Audio("output.wav")
56
+ ```
57
+
58
+ ## Parameter Options
59
+ - `text`: The input string to convert to speech
60
+ - `speaker`: Choose from available speakers: idera, jude, joke, umar, osagie, onye (default is "idera")
61
+ - `temperature`: Controls the randomness of generation (default is 0.1)
62
+ - `repetition_penalty`: A factor to reduce repetitive output (default is 1.1)
63
+ - `max_length`: The maximum length of the generated output tokens (default is 4000)
64
+
65
+ ## Testing
66
+ Run the unit tests to verify functionality:
67
+ ```bash
68
+ python -m unittest discover -s tests
69
+ ```
70
+
71
+
72
+ ## License
73
+ This project is licensed under the MIT License.
74
+
75
+ ## Acknowledgments
76
+ - Built as a contribution to yarngpt projects
77
+ - Utilizes Hugging Face's model caching and the transformers library
78
+ - Special thanks to the open-source community for their ongoing support
79
+
80
+ For more details and documentation, visit the GitHub repository: https://github.com/jerryola1
yarngpt/python-wrapper/audiotokenizer.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import torch
5
+ import inflect
6
+ import random
7
+ import uroman as ur
8
+ import numpy as np
9
+ import torchaudio
10
+ from transformers import AutoTokenizer
11
+ from outetts.wav_tokenizer.decoder import WavTokenizer
12
+ from outetts.wav_tokenizer.encoder.utils import convert_audio
13
+
14
+ class AudioTokenizer:
15
+
16
+ def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
17
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
19
+ self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
20
+ self.bos = "<|im_start|>"
21
+ self.eos = "<|im_end|>"
22
+ self.input_length=0
23
+ self.special_tokens = {
24
+ "audio_code": "<|{}|>",
25
+ "text_start": "<|text_start|>",
26
+ "text_end": "<|text_end|>",
27
+ "audio_start": "<|audio_start|>",
28
+ "audio_end": "<|audio_end|>",
29
+ "time": "<|t_{:.2f}|>",
30
+ "code_start": "<|code_start|>",
31
+ "code_end": "<|code_end|>",
32
+ "text_sep": "<|text_sep|>"
33
+ }
34
+ self.lec = inflect.engine()
35
+ #self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
36
+ #self.config_path = "/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
37
+ #self.model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
38
+ self.wavtokenizer = WavTokenizer.from_pretrained0802(wav_tokenizer_config_path, wav_tokenizer_model_path)
39
+ self.wavtokenizer = self.wavtokenizer.to(self.device)
40
+ self.BASE_DIR = os.path.dirname(__file__)
41
+ self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers")
42
+ self.speakers=["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye"]
43
+
44
+ def get_speaker_path(self,speaker_name):
45
+ return os.path.join(self.DEFAULT_SPEAKERS_DIR, f"{speaker_name}.json")
46
+
47
+ def load_speaker(self, path: str):
48
+ with open(path, "r") as f:
49
+ return json.load(f)
50
+
51
+ def load_default_speaker(self, name: str):
52
+ name = name.lower().strip()
53
+ speaker_path=self.get_speaker_path(name)
54
+ return self.load_speaker(speaker_path)
55
+
56
+
57
+ def process_text(self, text: str):
58
+
59
+ text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
60
+ text = re.sub(r'[-_/,\.\\]', ' ', text)
61
+ text = re.sub(r'[^a-z\s]', '', text)
62
+ text = re.sub(r'\s+', ' ', text).strip()
63
+ return text.split()
64
+
65
+ def create_audio_prompt(self,words: list) -> str:
66
+ prompt = []
67
+ for i in words:
68
+ word = i["word"]
69
+ duration = self.special_tokens["time"].format(float(i["duration"]))
70
+ tokens = "".join([self.special_tokens["audio_code"].format(c) for c in i["codes"]])
71
+ prompt.append(f'{word}{duration}{self.special_tokens["code_start"]}{tokens}{self.special_tokens["code_end"]}')
72
+ return "\n".join(prompt)
73
+
74
+ def create_prompt(self,text,speaker_name="idera"):
75
+ speaker=self.load_default_speaker(speaker_name)
76
+ input_words = self.process_text(speaker["text"]) + self.process_text(text)
77
+ #input_words = process_text(speaker["text"]) + input_words
78
+
79
+ inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
80
+ prompt = self.text_prompt.format(
81
+ bos=self.bos,
82
+ text_start=self.special_tokens['text_start'],
83
+ words=inputs_words_strings,
84
+ text_end=self.special_tokens['text_end'],
85
+ audio_start=self.special_tokens['audio_start']
86
+ )
87
+ prompt += self.create_audio_prompt(speaker["words"])
88
+
89
+ return prompt
90
+
91
+ def tokenize_prompt(self, prompt):
92
+ input_ids = self.tokenizer.encode(
93
+ prompt,
94
+ add_special_tokens=False,
95
+ return_tensors="pt"
96
+ ).to(self.device)
97
+ self.input_length=input_ids.shape[1]
98
+ return input_ids.to(self.device)
99
+
100
+
101
+ def get_audio(self,discrete_code):
102
+ discrete_code=torch.tensor([[discrete_code]]).to(self.device)
103
+ features = self.wavtokenizer.codes_to_features(discrete_code).to(self.device)
104
+ bandwidth_id = torch.tensor([0]).to(self.device)
105
+ audio_out = self.wavtokenizer.decode(features, bandwidth_id=bandwidth_id)
106
+ return audio_out.to("cpu")
107
+
108
+ def extract_integers(self,s):
109
+ # Match integers enclosed in vertical bars |integer|
110
+ matches = re.findall(r'\|(-?\d+)\|', s)
111
+ # Convert matches to integers
112
+ return [int(match) for match in matches]
113
+
114
+ def get_codes(self, output):
115
+ new_output=self.tokenizer.decode(output[0][self.input_length:])
116
+ codes=self.extract_integers(new_output)
117
+ return codes
118
+
119
+
120
+ class AudioTokenizerForLocal(AudioTokenizer):
121
+
122
+ def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
123
+ super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
124
+ self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
125
+ self.special_tokens = {
126
+ "audio_code": "<|{}|>",
127
+ "text_start": "<|text_start|>",
128
+ "text_end": "<|text_end|>",
129
+ "audio_start": "<|audio_start|>",
130
+ "audio_end": "<|audio_end|>",
131
+ "word_start": "<|word_start|>",
132
+ "word_end": "<|word_end|>",
133
+ "time": "<|t_{:.2f}|>",
134
+ "code_start": "<|code_start|>",
135
+ "code_end": "<|code_end|>",
136
+ "text_sep": "<|text_sep|>",
137
+ "hausa":"<|hausa|>",
138
+ "igbo":"<|igbo|>",
139
+ "yoruba":"<|yoruba|>",
140
+ }
141
+ self.uroman = ur.Uroman()
142
+ self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers_local")
143
+ self.speakers = [
144
+ "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
145
+ "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
146
+ ]
147
+
148
+ def process_text(self, text: str):
149
+ text = self.uroman.romanize_string(text)
150
+ text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
151
+ text = re.sub(r'[-_/,\.\\]', ' ', text)
152
+ text = re.sub(r'[^a-z\s]', '', text)
153
+ text = re.sub(r'\s+', ' ', text).strip()
154
+ return text.split()
155
+
156
+ def create_prompt(self,text,lang,speaker_name=None):
157
+ assert lang in ["hausa","igbo","yoruba"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba']"
158
+ #if no speaker
159
+ if speaker_name is None:
160
+ if lang=="hausa":
161
+ speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
162
+ elif lang=="igbo":
163
+ speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
164
+ else:
165
+ speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
166
+ speaker=self.load_default_speaker(speaker_name)
167
+ input_words = self.process_text(speaker["text"]) + self.process_text(text)
168
+ #input_words = process_text(speaker["text"]) + input_words
169
+
170
+ inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
171
+ prompt = self.text_prompt.format(
172
+ bos=self.bos,
173
+ text_start=self.special_tokens['text_start'],
174
+ words=inputs_words_strings,
175
+ text_end=self.special_tokens['text_end'],
176
+ lang=self.special_tokens[lang],
177
+ audio_start=self.special_tokens['audio_start']
178
+ )
179
+ prompt += self.create_audio_prompt(speaker["words"])
180
+
181
+ return prompt
182
+
183
+
184
+ class AudioTokenizerV2(AudioTokenizer):
185
+
186
+ def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
187
+ super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
188
+ self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
189
+ self.asr_prompt="{bos}\n{code_start}{codes}{code_end}\n{asr}\n"
190
+ self.special_tokens = {
191
+ "audio_code": "<|{}|>",
192
+ "text_start": "<|text_start|>",
193
+ "text_end": "<|text_end|>",
194
+ "audio_start": "<|audio_start|>",
195
+ "audio_end": "<|audio_end|>",
196
+ "word_start": "<|word_start|>",
197
+ "word_end": "<|word_end|>",
198
+ "time": "<|t_{:.2f}|>",
199
+ "code_start": "<|code_start|>",
200
+ "code_end": "<|code_end|>",
201
+ "text_sep": "<|text_sep|>",
202
+ "hausa":"<|hausa|>",
203
+ "igbo":"<|igbo|>",
204
+ "yoruba":"<|yoruba|>",
205
+ "english":"<|english|>",#<|english|>
206
+ "asr":"<|asr|>"
207
+ }
208
+ self.uroman = ur.Uroman()
209
+ self.DEFAULT_SPEAKERS_DIR_LOCAL = os.path.join(self.BASE_DIR, "default_speakers_local")
210
+ self.DEFAULT_SPEAKERS_ENG = os.path.join(self.BASE_DIR, "default_speakers")
211
+ self.speakers_local = [
212
+ "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
213
+ "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
214
+ ]
215
+ self.speakers_eng = ["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye","saheed"]
216
+ self.changed_tokens=[('<|1836|>', '<|453|><|453|>'),
217
+ ('<|1837|>', '<|1836|><|1836|>'),
218
+ ('<|1838|>', '<|1837|><|1837|>'),
219
+ ('<|1840|>', '<|244|><|167|>'),
220
+ ('<|1841|>', '<|235|><|219|>'),
221
+ ('<|1844|>', '<|453|><|244|>'),
222
+ ('<|1845|>', '<|1838|><|1838|>')]
223
+
224
+ def process_text(self, text: str):
225
+ text = self.uroman.romanize_string(text)
226
+ text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
227
+ text = re.sub(r'[-_/,\.\\]', ' ', text)
228
+ text = re.sub(r'[^a-z\s]', '', text)
229
+ text = re.sub(r'\s+', ' ', text).strip()
230
+ return text.split()
231
+
232
+ def get_speaker_path(self,speaker_name,dir):
233
+ return os.path.join(dir, f"{speaker_name}.json")
234
+
235
+ def load_speaker(self, path: str):
236
+ with open(path, "r") as f:
237
+ return json.load(f)
238
+
239
+ def load_default_speaker(self, name: str,dir: str):
240
+ name = name.lower().strip()
241
+ speaker_path=self.get_speaker_path(name,dir)
242
+ return self.load_speaker(speaker_path)
243
+
244
+ def create_prompt(self,text,lang,speaker_name=None):
245
+ assert lang in ["hausa","igbo","yoruba","english"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba','english']"
246
+ #if no speaker
247
+ dir=self.DEFAULT_SPEAKERS_DIR_LOCAL
248
+ if speaker_name is None:
249
+ if lang=="hausa":
250
+ speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
251
+ elif lang=="igbo":
252
+ speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
253
+ elif lang=="yoruba":
254
+ speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
255
+ else:
256
+ speaker_name=random.choice(self.speakers_eng)
257
+
258
+ if lang=="english":
259
+ dir=self.DEFAULT_SPEAKERS_ENG
260
+ speaker=self.load_default_speaker(speaker_name,dir)
261
+ input_words = self.process_text(speaker["text"]) + self.process_text(text)
262
+ #input_words = process_text(speaker["text"]) + input_words
263
+
264
+ inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
265
+ prompt = self.text_prompt.format(
266
+ bos=self.bos,
267
+ text_start=self.special_tokens['text_start'],
268
+ words=inputs_words_strings,
269
+ text_end=self.special_tokens['text_end'],
270
+ lang=self.special_tokens[lang],
271
+ audio_start=self.special_tokens['audio_start']
272
+ )
273
+ prompt += self.create_audio_prompt(speaker["words"])
274
+
275
+ return prompt
276
+ def replace_tokens(text):
277
+ for pair in self.changed_tokens:
278
+ text=text.replace(pair[0],pair[-1])
279
+ return text
280
+
281
+ def resample(self,audio: np.ndarray, sr: int, target_sr: int):
282
+ audio = audio.to(dtype=torch.float32)
283
+ #.clone().detach()
284
+ audio = audio.unsqueeze(0)
285
+ # 1 as last arg corresponds to mono audio
286
+ resampled = convert_audio(audio, sr, target_sr, 1)
287
+ return resampled.to(self.device )
288
+
289
+ def quantize_wavtokenizer(self, path):
290
+ audio_data, sample_rate = torchaudio.load(path)
291
+ audio_data=audio_data.squeeze()
292
+ audio = self.resample(audio_data, sample_rate, 24000).to(self.device)
293
+ bandwidth_id = torch.tensor([0]).to(self.device )
294
+ _, codes = self.wavtokenizer.encode_infer(audio, bandwidth_id=bandwidth_id)
295
+ codes = codes.squeeze(1).to(self.device)#+last_text_token
296
+ res=""
297
+ for code in codes[0].tolist():
298
+ res+=f"<|{code}|>"
299
+ return res
300
+
301
+ def load_asr_prompt(self,audio_path):
302
+ codes=self.quantize_wavtokenizer(audio_path)
303
+ prompt = self.asr_prompt.format(
304
+ bos=self.bos,
305
+ code_start=self.special_tokens['code_start'],
306
+ codes=codes,
307
+ code_end=self.special_tokens['code_end'],
308
+ asr=self.special_tokens["asr"],
309
+ )
310
+ return prompt
311
+
312
+ def get_asr_results(self,output):
313
+ res=""
314
+ for text in self.tokenizer.decode(output[0]).split("<|text_start|>")[-1].split("<|text_end|>")[0].split("\n"):
315
+ res+=text.split("<|word_start|>")[-1].split("<|word_end|>")[0]
316
+ res+=" "
317
+ return res.strip()
yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/Yoruba_prepare_data_naij (2)-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/emma-checkpoint.json ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Scientists have discovered a new planet that may be capable of supporting life!",
3
+ "words": [
4
+ {
5
+ "word": "scientists",
6
+ "duration": 0.82,
7
+ "codes": [
8
+ 1334,
9
+ 1359,
10
+ 619,
11
+ 1057,
12
+ 1528,
13
+ 817,
14
+ 1175,
15
+ 884,
16
+ 527,
17
+ 1519,
18
+ 323,
19
+ 980,
20
+ 608,
21
+ 1104,
22
+ 1271,
23
+ 1265,
24
+ 1237,
25
+ 191,
26
+ 1308,
27
+ 203,
28
+ 1126,
29
+ 1226,
30
+ 1265,
31
+ 1073,
32
+ 1661,
33
+ 903,
34
+ 502,
35
+ 197,
36
+ 127,
37
+ 1712,
38
+ 877,
39
+ 1717,
40
+ 1735,
41
+ 1076,
42
+ 1284,
43
+ 1629,
44
+ 784,
45
+ 62,
46
+ 175,
47
+ 432,
48
+ 767,
49
+ 533,
50
+ 990,
51
+ 1258,
52
+ 823,
53
+ 1651,
54
+ 1801,
55
+ 701,
56
+ 1382,
57
+ 554,
58
+ 527,
59
+ 117,
60
+ 323,
61
+ 989,
62
+ 884,
63
+ 817,
64
+ 495,
65
+ 781,
66
+ 1214,
67
+ 1099,
68
+ 1104
69
+ ]
70
+ },
71
+ {
72
+ "word": "have",
73
+ "duration": 0.24,
74
+ "codes": [
75
+ 930,
76
+ 1393,
77
+ 1303,
78
+ 1001,
79
+ 1438,
80
+ 628,
81
+ 1774,
82
+ 973,
83
+ 1758,
84
+ 1501,
85
+ 1761,
86
+ 1428,
87
+ 1725,
88
+ 669,
89
+ 1780,
90
+ 487,
91
+ 866,
92
+ 1762
93
+ ]
94
+ },
95
+ {
96
+ "word": "discovered",
97
+ "duration": 0.66,
98
+ "codes": [
99
+ 820,
100
+ 1592,
101
+ 1737,
102
+ 731,
103
+ 1325,
104
+ 1644,
105
+ 884,
106
+ 1300,
107
+ 323,
108
+ 596,
109
+ 231,
110
+ 296,
111
+ 943,
112
+ 990,
113
+ 1214,
114
+ 1039,
115
+ 1039,
116
+ 1430,
117
+ 866,
118
+ 19,
119
+ 1675,
120
+ 1824,
121
+ 1030,
122
+ 1630,
123
+ 1758,
124
+ 783,
125
+ 1598,
126
+ 1832,
127
+ 1330,
128
+ 1319,
129
+ 1730,
130
+ 1449,
131
+ 1414,
132
+ 1511,
133
+ 695,
134
+ 1526,
135
+ 1410,
136
+ 95,
137
+ 1686,
138
+ 1400,
139
+ 961,
140
+ 1809,
141
+ 1303,
142
+ 355,
143
+ 544,
144
+ 1671,
145
+ 1493,
146
+ 1290,
147
+ 1732,
148
+ 1808
149
+ ]
150
+ },
151
+ {
152
+ "word": "a",
153
+ "duration": 0.14,
154
+ "codes": [
155
+ 968,
156
+ 1281,
157
+ 895,
158
+ 1827,
159
+ 1819,
160
+ 694,
161
+ 1509,
162
+ 1346,
163
+ 928,
164
+ 1449,
165
+ 1512
166
+ ]
167
+ },
168
+ {
169
+ "word": "new",
170
+ "duration": 0.24,
171
+ "codes": [
172
+ 1433,
173
+ 1689,
174
+ 1685,
175
+ 1598,
176
+ 1547,
177
+ 1369,
178
+ 1228,
179
+ 1708,
180
+ 1285,
181
+ 1722,
182
+ 1257,
183
+ 625,
184
+ 1114,
185
+ 1425,
186
+ 465,
187
+ 950,
188
+ 651,
189
+ 561
190
+ ]
191
+ },
192
+ {
193
+ "word": "planet",
194
+ "duration": 0.48,
195
+ "codes": [
196
+ 1707,
197
+ 821,
198
+ 1225,
199
+ 1228,
200
+ 1168,
201
+ 1291,
202
+ 1739,
203
+ 813,
204
+ 1738,
205
+ 966,
206
+ 1829,
207
+ 1229,
208
+ 1751,
209
+ 1280,
210
+ 1120,
211
+ 1537,
212
+ 1145,
213
+ 1257,
214
+ 1145,
215
+ 1490,
216
+ 1565,
217
+ 41,
218
+ 1677,
219
+ 1796,
220
+ 1258,
221
+ 1228,
222
+ 1389,
223
+ 1145,
224
+ 1433,
225
+ 763,
226
+ 1255,
227
+ 355,
228
+ 509,
229
+ 869,
230
+ 1144,
231
+ 501
232
+ ]
233
+ },
234
+ {
235
+ "word": "that",
236
+ "duration": 0.26,
237
+ "codes": [
238
+ 1571,
239
+ 1404,
240
+ 1484,
241
+ 1716,
242
+ 1136,
243
+ 1720,
244
+ 1237,
245
+ 1420,
246
+ 1680,
247
+ 892,
248
+ 1458,
249
+ 1697,
250
+ 669,
251
+ 1658,
252
+ 859,
253
+ 1128,
254
+ 804,
255
+ 1157,
256
+ 1694
257
+ ]
258
+ },
259
+ {
260
+ "word": "may",
261
+ "duration": 0.18,
262
+ "codes": [
263
+ 1339,
264
+ 761,
265
+ 820,
266
+ 1150,
267
+ 823,
268
+ 1706,
269
+ 1815,
270
+ 1354,
271
+ 1417,
272
+ 820,
273
+ 744,
274
+ 1413,
275
+ 995,
276
+ 733
277
+ ]
278
+ },
279
+ {
280
+ "word": "be",
281
+ "duration": 0.18,
282
+ "codes": [
283
+ 20,
284
+ 1763,
285
+ 1417,
286
+ 821,
287
+ 1384,
288
+ 1784,
289
+ 968,
290
+ 1767,
291
+ 501,
292
+ 795,
293
+ 378,
294
+ 242,
295
+ 447
296
+ ]
297
+ },
298
+ {
299
+ "word": "capable",
300
+ "duration": 0.56,
301
+ "codes": [
302
+ 666,
303
+ 1170,
304
+ 1637,
305
+ 1746,
306
+ 1042,
307
+ 1331,
308
+ 695,
309
+ 1739,
310
+ 1136,
311
+ 1471,
312
+ 1823,
313
+ 1185,
314
+ 1231,
315
+ 459,
316
+ 1071,
317
+ 168,
318
+ 418,
319
+ 513,
320
+ 431,
321
+ 669,
322
+ 840,
323
+ 938,
324
+ 1463,
325
+ 1640,
326
+ 1741,
327
+ 86,
328
+ 1273,
329
+ 724,
330
+ 1006,
331
+ 544,
332
+ 1408,
333
+ 1352,
334
+ 1721,
335
+ 1490,
336
+ 1321,
337
+ 1674,
338
+ 792,
339
+ 1765,
340
+ 1093,
341
+ 1731,
342
+ 1506,
343
+ 1742,
344
+ 1465
345
+ ]
346
+ },
347
+ {
348
+ "word": "of",
349
+ "duration": 0.16,
350
+ "codes": [
351
+ 1697,
352
+ 1435,
353
+ 42,
354
+ 1593,
355
+ 1573,
356
+ 1146,
357
+ 1600,
358
+ 980,
359
+ 878,
360
+ 713,
361
+ 796,
362
+ 1364
363
+ ]
364
+ },
365
+ {
366
+ "word": "supporting",
367
+ "duration": 0.62,
368
+ "codes": [
369
+ 541,
370
+ 833,
371
+ 1546,
372
+ 1230,
373
+ 1232,
374
+ 1417,
375
+ 1473,
376
+ 1486,
377
+ 1759,
378
+ 1327,
379
+ 1806,
380
+ 544,
381
+ 918,
382
+ 526,
383
+ 418,
384
+ 950,
385
+ 669,
386
+ 1749,
387
+ 1499,
388
+ 959,
389
+ 1806,
390
+ 203,
391
+ 1771,
392
+ 1651,
393
+ 1433,
394
+ 686,
395
+ 967,
396
+ 484,
397
+ 649,
398
+ 884,
399
+ 176,
400
+ 323,
401
+ 1349,
402
+ 722,
403
+ 1230,
404
+ 1218,
405
+ 1430,
406
+ 1663,
407
+ 1648,
408
+ 1808,
409
+ 1629,
410
+ 1822,
411
+ 1813,
412
+ 1663,
413
+ 1418,
414
+ 1742
415
+ ]
416
+ },
417
+ {
418
+ "word": "life",
419
+ "duration": 0.22,
420
+ "codes": [
421
+ 1622,
422
+ 1648,
423
+ 1141,
424
+ 1682,
425
+ 1353,
426
+ 1351,
427
+ 1822,
428
+ 1229,
429
+ 1621,
430
+ 1435,
431
+ 1766,
432
+ 1428,
433
+ 1727,
434
+ 1343,
435
+ 1769,
436
+ 823,
437
+ 1050
438
+ ]
439
+ }
440
+ ]
441
+ }
yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/idera-checkpoint.json ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Scientists have discovered a new planet that may be capable of supporting life!",
3
+ "words": [
4
+ {
5
+ "word": "scientists",
6
+ "duration": "1.00",
7
+ "codes": [
8
+ 258,
9
+ 551,
10
+ 21,
11
+ 401,
12
+ 509,
13
+ 235,
14
+ 151,
15
+ 94,
16
+ 194,
17
+ 496,
18
+ 241,
19
+ 420,
20
+ 606,
21
+ 256,
22
+ 311,
23
+ 464,
24
+ 343,
25
+ 765,
26
+ 56,
27
+ 23,
28
+ 209,
29
+ 72,
30
+ 851,
31
+ 360,
32
+ 442,
33
+ 257,
34
+ 457,
35
+ 75,
36
+ 265,
37
+ 227,
38
+ 16,
39
+ 167,
40
+ 194,
41
+ 391,
42
+ 68,
43
+ 786,
44
+ 1642,
45
+ 888,
46
+ 884,
47
+ 1688,
48
+ 1021,
49
+ 1270,
50
+ 1250,
51
+ 640,
52
+ 1471,
53
+ 1193,
54
+ 1117,
55
+ 95,
56
+ 158,
57
+ 587,
58
+ 1484,
59
+ 1054,
60
+ 947,
61
+ 521,
62
+ 234,
63
+ 502,
64
+ 1172,
65
+ 1379,
66
+ 1332,
67
+ 1267,
68
+ 1659,
69
+ 226,
70
+ 325,
71
+ 404,
72
+ 634,
73
+ 713,
74
+ 333,
75
+ 1210,
76
+ 1028,
77
+ 700,
78
+ 1804,
79
+ 1549,
80
+ 1552,
81
+ 1527,
82
+ 701,
83
+ 895
84
+ ]
85
+ },
86
+ {
87
+ "word": "have",
88
+ "duration": "0.16",
89
+ "codes": [
90
+ 652,
91
+ 1487,
92
+ 1045,
93
+ 665,
94
+ 384,
95
+ 908,
96
+ 1073,
97
+ 903,
98
+ 169,
99
+ 91,
100
+ 1242,
101
+ 59,
102
+ 1614
103
+ ]
104
+ },
105
+ {
106
+ "word": "discovered",
107
+ "duration": "0.52",
108
+ "codes": [
109
+ 1523,
110
+ 519,
111
+ 1311,
112
+ 1166,
113
+ 1049,
114
+ 368,
115
+ 176,
116
+ 1546,
117
+ 990,
118
+ 546,
119
+ 1091,
120
+ 872,
121
+ 975,
122
+ 224,
123
+ 419,
124
+ 1714,
125
+ 1247,
126
+ 1769,
127
+ 1141,
128
+ 811,
129
+ 1149,
130
+ 320,
131
+ 1161,
132
+ 982,
133
+ 732,
134
+ 473,
135
+ 1025,
136
+ 470,
137
+ 1253,
138
+ 1345,
139
+ 965,
140
+ 916,
141
+ 407,
142
+ 844,
143
+ 594,
144
+ 1710,
145
+ 193,
146
+ 740,
147
+ 761,
148
+ 1740
149
+ ]
150
+ },
151
+ {
152
+ "word": "a",
153
+ "duration": "0.08",
154
+ "codes": [
155
+ 5,
156
+ 414,
157
+ 1608,
158
+ 449,
159
+ 1643,
160
+ 1732,
161
+ 1653
162
+ ]
163
+ },
164
+ {
165
+ "word": "new",
166
+ "duration": "0.18",
167
+ "codes": [
168
+ 396,
169
+ 1599,
170
+ 1733,
171
+ 250,
172
+ 1624,
173
+ 485,
174
+ 1645,
175
+ 771,
176
+ 1630,
177
+ 736,
178
+ 336,
179
+ 476,
180
+ 641,
181
+ 345
182
+ ]
183
+ },
184
+ {
185
+ "word": "planet",
186
+ "duration": "0.38",
187
+ "codes": [
188
+ 21,
189
+ 131,
190
+ 1743,
191
+ 1082,
192
+ 1707,
193
+ 86,
194
+ 1075,
195
+ 883,
196
+ 944,
197
+ 1103,
198
+ 790,
199
+ 978,
200
+ 860,
201
+ 1738,
202
+ 1060,
203
+ 749,
204
+ 171,
205
+ 679,
206
+ 1144,
207
+ 966,
208
+ 1532,
209
+ 1179,
210
+ 714,
211
+ 1123,
212
+ 1308,
213
+ 1524,
214
+ 752,
215
+ 1613,
216
+ 1266
217
+ ]
218
+ },
219
+ {
220
+ "word": "that",
221
+ "duration": "0.14",
222
+ "codes": [
223
+ 64,
224
+ 32,
225
+ 1457,
226
+ 1095,
227
+ 931,
228
+ 1774,
229
+ 1017,
230
+ 1661,
231
+ 1713,
232
+ 355,
233
+ 1708
234
+ ]
235
+ },
236
+ {
237
+ "word": "may",
238
+ "duration": "0.12",
239
+ "codes": [
240
+ 1800,
241
+ 1070,
242
+ 1452,
243
+ 1185,
244
+ 1295,
245
+ 26,
246
+ 638,
247
+ 240,
248
+ 1480,
249
+ 1461
250
+ ]
251
+ },
252
+ {
253
+ "word": "be",
254
+ "duration": "0.12",
255
+ "codes": [
256
+ 859,
257
+ 729,
258
+ 848,
259
+ 1131,
260
+ 1618,
261
+ 928,
262
+ 331,
263
+ 504,
264
+ 487,
265
+ 417
266
+ ]
267
+ },
268
+ {
269
+ "word": "capable",
270
+ "duration": "0.42",
271
+ "codes": [
272
+ 686,
273
+ 1040,
274
+ 28,
275
+ 1456,
276
+ 1056,
277
+ 1133,
278
+ 901,
279
+ 1127,
280
+ 693,
281
+ 1406,
282
+ 20,
283
+ 118,
284
+ 141,
285
+ 572,
286
+ 845,
287
+ 1280,
288
+ 353,
289
+ 1726,
290
+ 338,
291
+ 1413,
292
+ 484,
293
+ 272,
294
+ 1569,
295
+ 144,
296
+ 1581,
297
+ 437,
298
+ 1502,
299
+ 963,
300
+ 1415,
301
+ 655,
302
+ 949,
303
+ 1289
304
+ ]
305
+ },
306
+ {
307
+ "word": "of",
308
+ "duration": "0.10",
309
+ "codes": [
310
+ 1198,
311
+ 1755,
312
+ 1478,
313
+ 1548,
314
+ 802,
315
+ 1513,
316
+ 1290,
317
+ 636
318
+ ]
319
+ },
320
+ {
321
+ "word": "supporting",
322
+ "duration": "0.54",
323
+ "codes": [
324
+ 541,
325
+ 867,
326
+ 750,
327
+ 1505,
328
+ 754,
329
+ 1344,
330
+ 1032,
331
+ 734,
332
+ 505,
333
+ 559,
334
+ 220,
335
+ 288,
336
+ 342,
337
+ 591,
338
+ 1459,
339
+ 1721,
340
+ 490,
341
+ 825,
342
+ 80,
343
+ 1221,
344
+ 1234,
345
+ 639,
346
+ 1052,
347
+ 450,
348
+ 1557,
349
+ 1302,
350
+ 784,
351
+ 1547,
352
+ 823,
353
+ 527,
354
+ 1667,
355
+ 1437,
356
+ 832,
357
+ 1366,
358
+ 674,
359
+ 1607,
360
+ 486,
361
+ 893,
362
+ 1748,
363
+ 792,
364
+ 1757
365
+ ]
366
+ },
367
+ {
368
+ "word": "life",
369
+ "duration": "0.28",
370
+ "codes": [
371
+ 1761,
372
+ 149,
373
+ 1501,
374
+ 1342,
375
+ 1063,
376
+ 1124,
377
+ 117,
378
+ 1225,
379
+ 1115,
380
+ 1155,
381
+ 1815,
382
+ 1035,
383
+ 936,
384
+ 807,
385
+ 930,
386
+ 1514,
387
+ 837,
388
+ 1104,
389
+ 1145,
390
+ 1164,
391
+ 1687,
392
+ 1589
393
+ ]
394
+ }
395
+ ]
396
+ }
yarngpt/python-wrapper/default_speakers/.ipynb_checkpoints/onye-checkpoint.json ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "out to another level also going through in the shop chop scotch bonnet peppers",
3
+ "words": [
4
+ {
5
+ "word": "out",
6
+ "duration": 0.34,
7
+ "codes": [
8
+ 546,
9
+ 416,
10
+ 1519,
11
+ 1673,
12
+ 1806,
13
+ 1015,
14
+ 693,
15
+ 1447,
16
+ 9,
17
+ 1306,
18
+ 1485,
19
+ 1477,
20
+ 1178,
21
+ 1543,
22
+ 1830,
23
+ 1558,
24
+ 1801,
25
+ 1423,
26
+ 1487,
27
+ 1165,
28
+ 1743,
29
+ 1726,
30
+ 1772,
31
+ 368,
32
+ 1555
33
+ ]
34
+ },
35
+ {
36
+ "word": "to",
37
+ "duration": 0.28,
38
+ "codes": [
39
+ 1823,
40
+ 1713,
41
+ 1734,
42
+ 368,
43
+ 1547,
44
+ 1741,
45
+ 1737,
46
+ 1784,
47
+ 1801,
48
+ 1732,
49
+ 1389,
50
+ 994,
51
+ 1158,
52
+ 1278,
53
+ 1800,
54
+ 1658,
55
+ 519,
56
+ 1542,
57
+ 1792,
58
+ 1700,
59
+ 1415
60
+ ]
61
+ },
62
+ {
63
+ "word": "another",
64
+ "duration": 0.4,
65
+ "codes": [
66
+ 1541,
67
+ 1824,
68
+ 1624,
69
+ 1757,
70
+ 1294,
71
+ 1734,
72
+ 1756,
73
+ 1821,
74
+ 1147,
75
+ 1663,
76
+ 1697,
77
+ 1156,
78
+ 1069,
79
+ 53,
80
+ 1223,
81
+ 1212,
82
+ 1736,
83
+ 1748,
84
+ 1744,
85
+ 758,
86
+ 1494,
87
+ 374,
88
+ 1187,
89
+ 1448,
90
+ 1410,
91
+ 1356,
92
+ 1732,
93
+ 1452,
94
+ 1295,
95
+ 1656
96
+ ]
97
+ },
98
+ {
99
+ "word": "level",
100
+ "duration": 1.86,
101
+ "codes": [
102
+ 1688,
103
+ 1527,
104
+ 1417,
105
+ 1486,
106
+ 384,
107
+ 1378,
108
+ 1342,
109
+ 1075,
110
+ 1046,
111
+ 1247,
112
+ 1660,
113
+ 1525,
114
+ 719,
115
+ 1769,
116
+ 1628,
117
+ 1810,
118
+ 1078,
119
+ 1429,
120
+ 1483,
121
+ 1280,
122
+ 1814,
123
+ 1115,
124
+ 184,
125
+ 1014,
126
+ 1686,
127
+ 1341,
128
+ 1347,
129
+ 1502,
130
+ 1350,
131
+ 1666,
132
+ 1686,
133
+ 1823,
134
+ 1749,
135
+ 1412,
136
+ 1651,
137
+ 1832,
138
+ 1701,
139
+ 1782,
140
+ 1741,
141
+ 1798,
142
+ 1828,
143
+ 1701,
144
+ 1796,
145
+ 1807,
146
+ 1701,
147
+ 1768,
148
+ 1817,
149
+ 1524,
150
+ 1786,
151
+ 1400,
152
+ 1717,
153
+ 1722,
154
+ 1773,
155
+ 1202,
156
+ 1098,
157
+ 1161,
158
+ 1750,
159
+ 822,
160
+ 1420,
161
+ 1434,
162
+ 979,
163
+ 1764,
164
+ 1313,
165
+ 1734,
166
+ 1458,
167
+ 1660,
168
+ 1200,
169
+ 370,
170
+ 1636,
171
+ 1186,
172
+ 768,
173
+ 855,
174
+ 599,
175
+ 1632,
176
+ 1164,
177
+ 1041,
178
+ 1791,
179
+ 1714,
180
+ 368,
181
+ 1715,
182
+ 1500,
183
+ 1817,
184
+ 1817,
185
+ 1772,
186
+ 1805,
187
+ 1825,
188
+ 1818,
189
+ 1828,
190
+ 1395,
191
+ 1718,
192
+ 1818,
193
+ 0,
194
+ 1696,
195
+ 1808,
196
+ 1637,
197
+ 1796,
198
+ 1701,
199
+ 1796,
200
+ 1824,
201
+ 1646,
202
+ 1702,
203
+ 1714,
204
+ 895,
205
+ 1764,
206
+ 1637,
207
+ 1717,
208
+ 1747,
209
+ 1751,
210
+ 1696,
211
+ 639,
212
+ 1436,
213
+ 1828,
214
+ 1818,
215
+ 1737,
216
+ 1832,
217
+ 1646,
218
+ 1796,
219
+ 1822,
220
+ 1741,
221
+ 1791,
222
+ 1701,
223
+ 1796,
224
+ 1779,
225
+ 1638,
226
+ 1783,
227
+ 1751,
228
+ 1781,
229
+ 1768,
230
+ 1412,
231
+ 1744,
232
+ 1720,
233
+ 1403,
234
+ 1802,
235
+ 1638,
236
+ 1734,
237
+ 1802,
238
+ 1826,
239
+ 1785,
240
+ 1443,
241
+ 1167
242
+ ]
243
+ },
244
+ {
245
+ "word": "also",
246
+ "duration": 0.26,
247
+ "codes": [
248
+ 973,
249
+ 1187,
250
+ 1333,
251
+ 359,
252
+ 1494,
253
+ 1222,
254
+ 1759,
255
+ 749,
256
+ 533,
257
+ 4,
258
+ 1599,
259
+ 1608,
260
+ 1280,
261
+ 1167,
262
+ 1015,
263
+ 1526,
264
+ 1662,
265
+ 1728,
266
+ 1016,
267
+ 1796
268
+ ]
269
+ },
270
+ {
271
+ "word": "going",
272
+ "duration": 0.26,
273
+ "codes": [
274
+ 1789,
275
+ 1291,
276
+ 1209,
277
+ 828,
278
+ 1452,
279
+ 1749,
280
+ 1052,
281
+ 1460,
282
+ 1783,
283
+ 1656,
284
+ 1542,
285
+ 1281,
286
+ 1710,
287
+ 1716,
288
+ 1404,
289
+ 1734,
290
+ 495,
291
+ 1624,
292
+ 1747
293
+ ]
294
+ },
295
+ {
296
+ "word": "through",
297
+ "duration": 0.34,
298
+ "codes": [
299
+ 1465,
300
+ 1664,
301
+ 1786,
302
+ 231,
303
+ 1826,
304
+ 1318,
305
+ 1494,
306
+ 1505,
307
+ 1063,
308
+ 1311,
309
+ 1656,
310
+ 1265,
311
+ 1720,
312
+ 1226,
313
+ 940,
314
+ 1490,
315
+ 1447,
316
+ 1730,
317
+ 1348,
318
+ 1637,
319
+ 1118,
320
+ 1710,
321
+ 841,
322
+ 795,
323
+ 298,
324
+ 1216
325
+ ]
326
+ },
327
+ {
328
+ "word": "in",
329
+ "duration": 0.42,
330
+ "codes": [
331
+ 899,
332
+ 1240,
333
+ 869,
334
+ 679,
335
+ 1343,
336
+ 1280,
337
+ 1681,
338
+ 1221,
339
+ 1632,
340
+ 1221,
341
+ 1479,
342
+ 1431,
343
+ 1623,
344
+ 1372,
345
+ 1722,
346
+ 1494,
347
+ 1011,
348
+ 1636,
349
+ 957,
350
+ 1661,
351
+ 939,
352
+ 1772,
353
+ 1096,
354
+ 1688,
355
+ 1537,
356
+ 1360,
357
+ 1734,
358
+ 1595,
359
+ 1781,
360
+ 1284,
361
+ 1413
362
+ ]
363
+ },
364
+ {
365
+ "word": "the",
366
+ "duration": 1.08,
367
+ "codes": [
368
+ 1701,
369
+ 1447,
370
+ 1328,
371
+ 1690,
372
+ 1281,
373
+ 1401,
374
+ 700,
375
+ 1295,
376
+ 1494,
377
+ 1326,
378
+ 1218,
379
+ 361,
380
+ 922,
381
+ 1210,
382
+ 1300,
383
+ 19,
384
+ 1403,
385
+ 1272,
386
+ 1150,
387
+ 1062,
388
+ 1457,
389
+ 1344,
390
+ 1167,
391
+ 1742,
392
+ 996,
393
+ 1158,
394
+ 1245,
395
+ 1210,
396
+ 1720,
397
+ 1823,
398
+ 85,
399
+ 1829,
400
+ 1555,
401
+ 1718,
402
+ 979,
403
+ 1665,
404
+ 1783,
405
+ 1088,
406
+ 1810,
407
+ 1828,
408
+ 1795,
409
+ 1419,
410
+ 1795,
411
+ 1826,
412
+ 1779,
413
+ 1741,
414
+ 1719,
415
+ 1809,
416
+ 1646,
417
+ 1765,
418
+ 1818,
419
+ 1713,
420
+ 1821,
421
+ 1737,
422
+ 1348,
423
+ 1821,
424
+ 1400,
425
+ 1748,
426
+ 1278,
427
+ 1521,
428
+ 758,
429
+ 1701,
430
+ 1798,
431
+ 1817,
432
+ 1646,
433
+ 1672,
434
+ 1825,
435
+ 1796,
436
+ 957,
437
+ 1808,
438
+ 1807,
439
+ 1833,
440
+ 1798,
441
+ 1425,
442
+ 1830,
443
+ 1037,
444
+ 1251,
445
+ 554,
446
+ 1395,
447
+ 175,
448
+ 919
449
+ ]
450
+ },
451
+ {
452
+ "word": "shop",
453
+ "duration": 0.3,
454
+ "codes": [
455
+ 1611,
456
+ 154,
457
+ 1329,
458
+ 1701,
459
+ 1677,
460
+ 1210,
461
+ 880,
462
+ 660,
463
+ 816,
464
+ 1276,
465
+ 1471,
466
+ 41,
467
+ 1779,
468
+ 1465,
469
+ 1298,
470
+ 1817,
471
+ 1777,
472
+ 1073,
473
+ 1713,
474
+ 1808,
475
+ 1818,
476
+ 1348,
477
+ 1711
478
+ ]
479
+ },
480
+ {
481
+ "word": "chop",
482
+ "duration": 0.3,
483
+ "codes": [
484
+ 1439,
485
+ 4,
486
+ 315,
487
+ 1751,
488
+ 1731,
489
+ 53,
490
+ 1184,
491
+ 1132,
492
+ 755,
493
+ 1429,
494
+ 1464,
495
+ 1483,
496
+ 1770,
497
+ 1749,
498
+ 1278,
499
+ 1769,
500
+ 1511,
501
+ 1683,
502
+ 1779,
503
+ 1660,
504
+ 183,
505
+ 1535,
506
+ 416
507
+ ]
508
+ },
509
+ {
510
+ "word": "scotch",
511
+ "duration": 0.4,
512
+ "codes": [
513
+ 1518,
514
+ 1679,
515
+ 0,
516
+ 1695,
517
+ 1682,
518
+ 1098,
519
+ 1764,
520
+ 1256,
521
+ 1808,
522
+ 1609,
523
+ 1745,
524
+ 1318,
525
+ 632,
526
+ 1197,
527
+ 271,
528
+ 1683,
529
+ 1774,
530
+ 1824,
531
+ 1783,
532
+ 1671,
533
+ 1805,
534
+ 22,
535
+ 631,
536
+ 117,
537
+ 1345,
538
+ 800,
539
+ 1707,
540
+ 1466,
541
+ 1005,
542
+ 1462
543
+ ]
544
+ },
545
+ {
546
+ "word": "bonnet",
547
+ "duration": 0.34,
548
+ "codes": [
549
+ 1677,
550
+ 1826,
551
+ 1277,
552
+ 524,
553
+ 1001,
554
+ 789,
555
+ 973,
556
+ 1509,
557
+ 1817,
558
+ 546,
559
+ 1260,
560
+ 1117,
561
+ 782,
562
+ 142,
563
+ 1455,
564
+ 947,
565
+ 1814,
566
+ 1815,
567
+ 0,
568
+ 1538,
569
+ 1766,
570
+ 1744,
571
+ 1824,
572
+ 239,
573
+ 1710
574
+ ]
575
+ },
576
+ {
577
+ "word": "peppers",
578
+ "duration": 0.5,
579
+ "codes": [
580
+ 1817,
581
+ 1287,
582
+ 1769,
583
+ 1309,
584
+ 446,
585
+ 1173,
586
+ 1183,
587
+ 375,
588
+ 1342,
589
+ 1815,
590
+ 1382,
591
+ 1685,
592
+ 1797,
593
+ 1351,
594
+ 1798,
595
+ 1631,
596
+ 749,
597
+ 1717,
598
+ 1324,
599
+ 1147,
600
+ 1186,
601
+ 955,
602
+ 577,
603
+ 1736,
604
+ 827,
605
+ 1240,
606
+ 1484,
607
+ 847,
608
+ 1661,
609
+ 1475,
610
+ 1287,
611
+ 1535,
612
+ 595,
613
+ 1286,
614
+ 1734,
615
+ 1256,
616
+ 319,
617
+ 1688
618
+ ]
619
+ }
620
+ ]
621
+ }
yarngpt/python-wrapper/default_speakers/Yoruba_prepare_data_naij (2).ipynb ADDED
The diff for this file is too large to render. See raw diff
 
yarngpt/python-wrapper/default_speakers/chinenye.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "and once I got that out of the way",
3
+ "words": [
4
+ {
5
+ "word": "and",
6
+ "duration": 1.18,
7
+ "codes": [
8
+ 1073,
9
+ 1804,
10
+ 1510,
11
+ 1562,
12
+ 377,
13
+ 1287,
14
+ 1615,
15
+ 175,
16
+ 631,
17
+ 1702,
18
+ 1700,
19
+ 1590,
20
+ 1158,
21
+ 1676,
22
+ 758,
23
+ 1727,
24
+ 1548,
25
+ 1464,
26
+ 1605,
27
+ 1469,
28
+ 1291,
29
+ 1755,
30
+ 1656,
31
+ 1323,
32
+ 1372,
33
+ 269,
34
+ 1252,
35
+ 1466,
36
+ 1677,
37
+ 1192,
38
+ 1220,
39
+ 1815,
40
+ 1658,
41
+ 1818,
42
+ 1514,
43
+ 1480,
44
+ 1747,
45
+ 1413,
46
+ 1440,
47
+ 1403,
48
+ 28,
49
+ 1806,
50
+ 1536,
51
+ 1269,
52
+ 1673,
53
+ 1616,
54
+ 1619,
55
+ 1745,
56
+ 1532,
57
+ 1659,
58
+ 1682,
59
+ 1777,
60
+ 1764,
61
+ 1766,
62
+ 1796,
63
+ 1827,
64
+ 719,
65
+ 1768,
66
+ 1761,
67
+ 1524,
68
+ 1782,
69
+ 1410,
70
+ 1748,
71
+ 1764,
72
+ 1447,
73
+ 1791,
74
+ 1790,
75
+ 1528,
76
+ 1550,
77
+ 1491,
78
+ 1764,
79
+ 1324,
80
+ 790,
81
+ 1307,
82
+ 664,
83
+ 719,
84
+ 1224,
85
+ 1571,
86
+ 1740,
87
+ 1062,
88
+ 1775,
89
+ 1494,
90
+ 486,
91
+ 1544,
92
+ 1828,
93
+ 961,
94
+ 1115,
95
+ 1308
96
+ ]
97
+ },
98
+ {
99
+ "word": "once",
100
+ "duration": 0.46,
101
+ "codes": [
102
+ 996,
103
+ 1407,
104
+ 892,
105
+ 1326,
106
+ 1223,
107
+ 362,
108
+ 36,
109
+ 1103,
110
+ 1734,
111
+ 1755,
112
+ 1798,
113
+ 749,
114
+ 1603,
115
+ 1748,
116
+ 519,
117
+ 1643,
118
+ 1744,
119
+ 176,
120
+ 1709,
121
+ 749,
122
+ 1615,
123
+ 1801,
124
+ 1438,
125
+ 1719,
126
+ 1491,
127
+ 1802,
128
+ 1575,
129
+ 1750,
130
+ 1180,
131
+ 1077,
132
+ 855,
133
+ 1511,
134
+ 961,
135
+ 1739,
136
+ 632
137
+ ]
138
+ },
139
+ {
140
+ "word": "i",
141
+ "duration": 0.16,
142
+ "codes": [
143
+ 398,
144
+ 1055,
145
+ 767,
146
+ 57,
147
+ 1777,
148
+ 1706,
149
+ 34,
150
+ 1025,
151
+ 1745,
152
+ 1796,
153
+ 1266,
154
+ 1348
155
+ ]
156
+ },
157
+ {
158
+ "word": "got",
159
+ "duration": 0.24,
160
+ "codes": [
161
+ 1555,
162
+ 639,
163
+ 1708,
164
+ 813,
165
+ 1152,
166
+ 753,
167
+ 718,
168
+ 1742,
169
+ 756,
170
+ 1109,
171
+ 1796,
172
+ 85,
173
+ 1623,
174
+ 1769,
175
+ 1759,
176
+ 1491,
177
+ 1769,
178
+ 1693
179
+ ]
180
+ },
181
+ {
182
+ "word": "that",
183
+ "duration": 0.28,
184
+ "codes": [
185
+ 1555,
186
+ 1732,
187
+ 1301,
188
+ 755,
189
+ 1224,
190
+ 1192,
191
+ 1241,
192
+ 1192,
193
+ 1102,
194
+ 944,
195
+ 1358,
196
+ 855,
197
+ 1342,
198
+ 1603,
199
+ 1693,
200
+ 1783,
201
+ 1689,
202
+ 1803,
203
+ 1126,
204
+ 1089,
205
+ 839
206
+ ]
207
+ },
208
+ {
209
+ "word": "out",
210
+ "duration": 0.16,
211
+ "codes": [
212
+ 887,
213
+ 1726,
214
+ 1411,
215
+ 1758,
216
+ 839,
217
+ 9,
218
+ 1686,
219
+ 1642,
220
+ 1695,
221
+ 998,
222
+ 828,
223
+ 1755
224
+ ]
225
+ },
226
+ {
227
+ "word": "of",
228
+ "duration": 0.08,
229
+ "codes": [
230
+ 1825,
231
+ 1734,
232
+ 1281,
233
+ 1794,
234
+ 1518,
235
+ 1696
236
+ ]
237
+ },
238
+ {
239
+ "word": "the",
240
+ "duration": 0.14,
241
+ "codes": [
242
+ 1565,
243
+ 1608,
244
+ 1541,
245
+ 1258,
246
+ 1798,
247
+ 1499,
248
+ 1685,
249
+ 1554,
250
+ 1776,
251
+ 1602,
252
+ 1381
253
+ ]
254
+ },
255
+ {
256
+ "word": "way",
257
+ "duration": 0.16,
258
+ "codes": [
259
+ 1822,
260
+ 1773,
261
+ 1663,
262
+ 1710,
263
+ 1554,
264
+ 1493,
265
+ 4,
266
+ 1620,
267
+ 1755,
268
+ 416,
269
+ 1384,
270
+ 1688
271
+ ]
272
+ }
273
+ ]
274
+ }
yarngpt/python-wrapper/default_speakers/emma.json ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Scientists have discovered a new planet that may be capable of supporting life!",
3
+ "words": [
4
+ {
5
+ "word": "scientists",
6
+ "duration": 0.82,
7
+ "codes": [
8
+ 1334,
9
+ 1359,
10
+ 619,
11
+ 1057,
12
+ 1528,
13
+ 817,
14
+ 1175,
15
+ 884,
16
+ 527,
17
+ 1519,
18
+ 323,
19
+ 980,
20
+ 608,
21
+ 1104,
22
+ 1271,
23
+ 1265,
24
+ 1237,
25
+ 191,
26
+ 1308,
27
+ 203,
28
+ 1126,
29
+ 1226,
30
+ 1265,
31
+ 1073,
32
+ 1661,
33
+ 903,
34
+ 502,
35
+ 197,
36
+ 127,
37
+ 1712,
38
+ 877,
39
+ 1717,
40
+ 1735,
41
+ 1076,
42
+ 1284,
43
+ 1629,
44
+ 784,
45
+ 62,
46
+ 175,
47
+ 432,
48
+ 767,
49
+ 533,
50
+ 990,
51
+ 1258,
52
+ 823,
53
+ 1651,
54
+ 1801,
55
+ 701,
56
+ 1382,
57
+ 554,
58
+ 527,
59
+ 117,
60
+ 323,
61
+ 989,
62
+ 884,
63
+ 817,
64
+ 495,
65
+ 781,
66
+ 1214,
67
+ 1099,
68
+ 1104
69
+ ]
70
+ },
71
+ {
72
+ "word": "have",
73
+ "duration": 0.24,
74
+ "codes": [
75
+ 930,
76
+ 1393,
77
+ 1303,
78
+ 1001,
79
+ 1438,
80
+ 628,
81
+ 1774,
82
+ 973,
83
+ 1758,
84
+ 1501,
85
+ 1761,
86
+ 1428,
87
+ 1725,
88
+ 669,
89
+ 1780,
90
+ 487,
91
+ 866,
92
+ 1762
93
+ ]
94
+ },
95
+ {
96
+ "word": "discovered",
97
+ "duration": 0.66,
98
+ "codes": [
99
+ 820,
100
+ 1592,
101
+ 1737,
102
+ 731,
103
+ 1325,
104
+ 1644,
105
+ 884,
106
+ 1300,
107
+ 323,
108
+ 596,
109
+ 231,
110
+ 296,
111
+ 943,
112
+ 990,
113
+ 1214,
114
+ 1039,
115
+ 1039,
116
+ 1430,
117
+ 866,
118
+ 19,
119
+ 1675,
120
+ 1824,
121
+ 1030,
122
+ 1630,
123
+ 1758,
124
+ 783,
125
+ 1598,
126
+ 1832,
127
+ 1330,
128
+ 1319,
129
+ 1730,
130
+ 1449,
131
+ 1414,
132
+ 1511,
133
+ 695,
134
+ 1526,
135
+ 1410,
136
+ 95,
137
+ 1686,
138
+ 1400,
139
+ 961,
140
+ 1809,
141
+ 1303,
142
+ 355,
143
+ 544,
144
+ 1671,
145
+ 1493,
146
+ 1290,
147
+ 1732,
148
+ 1808
149
+ ]
150
+ },
151
+ {
152
+ "word": "a",
153
+ "duration": 0.14,
154
+ "codes": [
155
+ 968,
156
+ 1281,
157
+ 895,
158
+ 1827,
159
+ 1819,
160
+ 694,
161
+ 1509,
162
+ 1346,
163
+ 928,
164
+ 1449,
165
+ 1512
166
+ ]
167
+ },
168
+ {
169
+ "word": "new",
170
+ "duration": 0.24,
171
+ "codes": [
172
+ 1433,
173
+ 1689,
174
+ 1685,
175
+ 1598,
176
+ 1547,
177
+ 1369,
178
+ 1228,
179
+ 1708,
180
+ 1285,
181
+ 1722,
182
+ 1257,
183
+ 625,
184
+ 1114,
185
+ 1425,
186
+ 465,
187
+ 950,
188
+ 651,
189
+ 561
190
+ ]
191
+ },
192
+ {
193
+ "word": "planet",
194
+ "duration": 0.48,
195
+ "codes": [
196
+ 1707,
197
+ 821,
198
+ 1225,
199
+ 1228,
200
+ 1168,
201
+ 1291,
202
+ 1739,
203
+ 813,
204
+ 1738,
205
+ 966,
206
+ 1829,
207
+ 1229,
208
+ 1751,
209
+ 1280,
210
+ 1120,
211
+ 1537,
212
+ 1145,
213
+ 1257,
214
+ 1145,
215
+ 1490,
216
+ 1565,
217
+ 41,
218
+ 1677,
219
+ 1796,
220
+ 1258,
221
+ 1228,
222
+ 1389,
223
+ 1145,
224
+ 1433,
225
+ 763,
226
+ 1255,
227
+ 355,
228
+ 509,
229
+ 869,
230
+ 1144,
231
+ 501
232
+ ]
233
+ },
234
+ {
235
+ "word": "that",
236
+ "duration": 0.26,
237
+ "codes": [
238
+ 1571,
239
+ 1404,
240
+ 1484,
241
+ 1716,
242
+ 1136,
243
+ 1720,
244
+ 1237,
245
+ 1420,
246
+ 1680,
247
+ 892,
248
+ 1458,
249
+ 1697,
250
+ 669,
251
+ 1658,
252
+ 859,
253
+ 1128,
254
+ 804,
255
+ 1157,
256
+ 1694
257
+ ]
258
+ },
259
+ {
260
+ "word": "may",
261
+ "duration": 0.18,
262
+ "codes": [
263
+ 1339,
264
+ 761,
265
+ 820,
266
+ 1150,
267
+ 823,
268
+ 1706,
269
+ 1815,
270
+ 1354,
271
+ 1417,
272
+ 820,
273
+ 744,
274
+ 1413,
275
+ 995,
276
+ 733
277
+ ]
278
+ },
279
+ {
280
+ "word": "be",
281
+ "duration": 0.18,
282
+ "codes": [
283
+ 20,
284
+ 1763,
285
+ 1417,
286
+ 821,
287
+ 1384,
288
+ 1784,
289
+ 968,
290
+ 1767,
291
+ 501,
292
+ 795,
293
+ 378,
294
+ 242,
295
+ 447
296
+ ]
297
+ },
298
+ {
299
+ "word": "capable",
300
+ "duration": 0.56,
301
+ "codes": [
302
+ 666,
303
+ 1170,
304
+ 1637,
305
+ 1746,
306
+ 1042,
307
+ 1331,
308
+ 695,
309
+ 1739,
310
+ 1136,
311
+ 1471,
312
+ 1823,
313
+ 1185,
314
+ 1231,
315
+ 459,
316
+ 1071,
317
+ 168,
318
+ 418,
319
+ 513,
320
+ 431,
321
+ 669,
322
+ 840,
323
+ 938,
324
+ 1463,
325
+ 1640,
326
+ 1741,
327
+ 86,
328
+ 1273,
329
+ 724,
330
+ 1006,
331
+ 544,
332
+ 1408,
333
+ 1352,
334
+ 1721,
335
+ 1490,
336
+ 1321,
337
+ 1674,
338
+ 792,
339
+ 1765,
340
+ 1093,
341
+ 1731,
342
+ 1506,
343
+ 1742,
344
+ 1465
345
+ ]
346
+ },
347
+ {
348
+ "word": "of",
349
+ "duration": 0.16,
350
+ "codes": [
351
+ 1697,
352
+ 1435,
353
+ 42,
354
+ 1593,
355
+ 1573,
356
+ 1146,
357
+ 1600,
358
+ 980,
359
+ 878,
360
+ 713,
361
+ 796,
362
+ 1364
363
+ ]
364
+ },
365
+ {
366
+ "word": "supporting",
367
+ "duration": 0.62,
368
+ "codes": [
369
+ 541,
370
+ 833,
371
+ 1546,
372
+ 1230,
373
+ 1232,
374
+ 1417,
375
+ 1473,
376
+ 1486,
377
+ 1759,
378
+ 1327,
379
+ 1806,
380
+ 544,
381
+ 918,
382
+ 526,
383
+ 418,
384
+ 950,
385
+ 669,
386
+ 1749,
387
+ 1499,
388
+ 959,
389
+ 1806,
390
+ 203,
391
+ 1771,
392
+ 1651,
393
+ 1433,
394
+ 686,
395
+ 967,
396
+ 484,
397
+ 649,
398
+ 884,
399
+ 176,
400
+ 323,
401
+ 1349,
402
+ 722,
403
+ 1230,
404
+ 1218,
405
+ 1430,
406
+ 1663,
407
+ 1648,
408
+ 1808,
409
+ 1629,
410
+ 1822,
411
+ 1813,
412
+ 1663,
413
+ 1418,
414
+ 1742
415
+ ]
416
+ },
417
+ {
418
+ "word": "life",
419
+ "duration": 0.22,
420
+ "codes": [
421
+ 1622,
422
+ 1648,
423
+ 1141,
424
+ 1682,
425
+ 1353,
426
+ 1351,
427
+ 1822,
428
+ 1229,
429
+ 1621,
430
+ 1435,
431
+ 1766,
432
+ 1428,
433
+ 1727,
434
+ 1343,
435
+ 1769,
436
+ 823,
437
+ 1050
438
+ ]
439
+ }
440
+ ]
441
+ }
yarngpt/python-wrapper/default_speakers/idera.json ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "Scientists have discovered a new planet that may be capable of supporting life!",
3
+ "words": [
4
+ {
5
+ "word": "scientists",
6
+ "duration": "1.00",
7
+ "codes": [
8
+ 258,
9
+ 551,
10
+ 21,
11
+ 401,
12
+ 509,
13
+ 235,
14
+ 151,
15
+ 94,
16
+ 194,
17
+ 496,
18
+ 241,
19
+ 420,
20
+ 606,
21
+ 256,
22
+ 311,
23
+ 464,
24
+ 343,
25
+ 765,
26
+ 56,
27
+ 23,
28
+ 209,
29
+ 72,
30
+ 851,
31
+ 360,
32
+ 442,
33
+ 257,
34
+ 457,
35
+ 75,
36
+ 265,
37
+ 227,
38
+ 16,
39
+ 167,
40
+ 194,
41
+ 391,
42
+ 68,
43
+ 786,
44
+ 1642,
45
+ 888,
46
+ 884,
47
+ 1688,
48
+ 1021,
49
+ 1270,
50
+ 1250,
51
+ 640,
52
+ 1471,
53
+ 1193,
54
+ 1117,
55
+ 95,
56
+ 158,
57
+ 587,
58
+ 1484,
59
+ 1054,
60
+ 947,
61
+ 521,
62
+ 234,
63
+ 502,
64
+ 1172,
65
+ 1379,
66
+ 1332,
67
+ 1267,
68
+ 1659,
69
+ 226,
70
+ 325,
71
+ 404,
72
+ 634,
73
+ 713,
74
+ 333,
75
+ 1210,
76
+ 1028,
77
+ 700,
78
+ 1804,
79
+ 1549,
80
+ 1552,
81
+ 1527,
82
+ 701,
83
+ 895
84
+ ]
85
+ },
86
+ {
87
+ "word": "have",
88
+ "duration": "0.16",
89
+ "codes": [
90
+ 652,
91
+ 1487,
92
+ 1045,
93
+ 665,
94
+ 384,
95
+ 908,
96
+ 1073,
97
+ 903,
98
+ 169,
99
+ 91,
100
+ 1242,
101
+ 59,
102
+ 1614
103
+ ]
104
+ },
105
+ {
106
+ "word": "discovered",
107
+ "duration": "0.52",
108
+ "codes": [
109
+ 1523,
110
+ 519,
111
+ 1311,
112
+ 1166,
113
+ 1049,
114
+ 368,
115
+ 176,
116
+ 1546,
117
+ 990,
118
+ 546,
119
+ 1091,
120
+ 872,
121
+ 975,
122
+ 224,
123
+ 419,
124
+ 1714,
125
+ 1247,
126
+ 1769,
127
+ 1141,
128
+ 811,
129
+ 1149,
130
+ 320,
131
+ 1161,
132
+ 982,
133
+ 732,
134
+ 473,
135
+ 1025,
136
+ 470,
137
+ 1253,
138
+ 1345,
139
+ 965,
140
+ 916,
141
+ 407,
142
+ 844,
143
+ 594,
144
+ 1710,
145
+ 193,
146
+ 740,
147
+ 761,
148
+ 1740
149
+ ]
150
+ },
151
+ {
152
+ "word": "a",
153
+ "duration": "0.08",
154
+ "codes": [
155
+ 5,
156
+ 414,
157
+ 1608,
158
+ 449,
159
+ 1643,
160
+ 1732,
161
+ 1653
162
+ ]
163
+ },
164
+ {
165
+ "word": "new",
166
+ "duration": "0.18",
167
+ "codes": [
168
+ 396,
169
+ 1599,
170
+ 1733,
171
+ 250,
172
+ 1624,
173
+ 485,
174
+ 1645,
175
+ 771,
176
+ 1630,
177
+ 736,
178
+ 336,
179
+ 476,
180
+ 641,
181
+ 345
182
+ ]
183
+ },
184
+ {
185
+ "word": "planet",
186
+ "duration": "0.38",
187
+ "codes": [
188
+ 21,
189
+ 131,
190
+ 1743,
191
+ 1082,
192
+ 1707,
193
+ 86,
194
+ 1075,
195
+ 883,
196
+ 944,
197
+ 1103,
198
+ 790,
199
+ 978,
200
+ 860,
201
+ 1738,
202
+ 1060,
203
+ 749,
204
+ 171,
205
+ 679,
206
+ 1144,
207
+ 966,
208
+ 1532,
209
+ 1179,
210
+ 714,
211
+ 1123,
212
+ 1308,
213
+ 1524,
214
+ 752,
215
+ 1613,
216
+ 1266
217
+ ]
218
+ },
219
+ {
220
+ "word": "that",
221
+ "duration": "0.14",
222
+ "codes": [
223
+ 64,
224
+ 32,
225
+ 1457,
226
+ 1095,
227
+ 931,
228
+ 1774,
229
+ 1017,
230
+ 1661,
231
+ 1713,
232
+ 355,
233
+ 1708
234
+ ]
235
+ },
236
+ {
237
+ "word": "may",
238
+ "duration": "0.12",
239
+ "codes": [
240
+ 1800,
241
+ 1070,
242
+ 1452,
243
+ 1185,
244
+ 1295,
245
+ 26,
246
+ 638,
247
+ 240,
248
+ 1480,
249
+ 1461
250
+ ]
251
+ },
252
+ {
253
+ "word": "be",
254
+ "duration": "0.12",
255
+ "codes": [
256
+ 859,
257
+ 729,
258
+ 848,
259
+ 1131,
260
+ 1618,
261
+ 928,
262
+ 331,
263
+ 504,
264
+ 487,
265
+ 417
266
+ ]
267
+ },
268
+ {
269
+ "word": "capable",
270
+ "duration": "0.42",
271
+ "codes": [
272
+ 686,
273
+ 1040,
274
+ 28,
275
+ 1456,
276
+ 1056,
277
+ 1133,
278
+ 901,
279
+ 1127,
280
+ 693,
281
+ 1406,
282
+ 20,
283
+ 118,
284
+ 141,
285
+ 572,
286
+ 845,
287
+ 1280,
288
+ 353,
289
+ 1726,
290
+ 338,
291
+ 1413,
292
+ 484,
293
+ 272,
294
+ 1569,
295
+ 144,
296
+ 1581,
297
+ 437,
298
+ 1502,
299
+ 963,
300
+ 1415,
301
+ 655,
302
+ 949,
303
+ 1289
304
+ ]
305
+ },
306
+ {
307
+ "word": "of",
308
+ "duration": "0.10",
309
+ "codes": [
310
+ 1198,
311
+ 1755,
312
+ 1478,
313
+ 1548,
314
+ 802,
315
+ 1513,
316
+ 1290,
317
+ 636
318
+ ]
319
+ },
320
+ {
321
+ "word": "supporting",
322
+ "duration": "0.54",
323
+ "codes": [
324
+ 541,
325
+ 867,
326
+ 750,
327
+ 1505,
328
+ 754,
329
+ 1344,
330
+ 1032,
331
+ 734,
332
+ 505,
333
+ 559,
334
+ 220,
335
+ 288,
336
+ 342,
337
+ 591,
338
+ 1459,
339
+ 1721,
340
+ 490,
341
+ 825,
342
+ 80,
343
+ 1221,
344
+ 1234,
345
+ 639,
346
+ 1052,
347
+ 450,
348
+ 1557,
349
+ 1302,
350
+ 784,
351
+ 1547,
352
+ 823,
353
+ 527,
354
+ 1667,
355
+ 1437,
356
+ 832,
357
+ 1366,
358
+ 674,
359
+ 1607,
360
+ 486,
361
+ 893,
362
+ 1748,
363
+ 792,
364
+ 1757
365
+ ]
366
+ },
367
+ {
368
+ "word": "life",
369
+ "duration": "0.28",
370
+ "codes": [
371
+ 1761,
372
+ 149,
373
+ 1501,
374
+ 1342,
375
+ 1063,
376
+ 1124,
377
+ 117,
378
+ 1225,
379
+ 1115,
380
+ 1155,
381
+ 1815,
382
+ 1035,
383
+ 936,
384
+ 807,
385
+ 930,
386
+ 1514,
387
+ 837,
388
+ 1104,
389
+ 1145,
390
+ 1164,
391
+ 1687,
392
+ 1589
393
+ ]
394
+ }
395
+ ]
396
+ }
yarngpt/python-wrapper/default_speakers/joke.json ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "i still said you and i was like mister so this is what you are doing with",
3
+ "words": [
4
+ {
5
+ "word": "i",
6
+ "duration": 0.34,
7
+ "codes": [
8
+ 1737,
9
+ 1555,
10
+ 1439,
11
+ 1679,
12
+ 1634,
13
+ 1661,
14
+ 1764,
15
+ 1698,
16
+ 1715,
17
+ 862,
18
+ 1516,
19
+ 1427,
20
+ 1350,
21
+ 1136,
22
+ 1472,
23
+ 1113,
24
+ 1686,
25
+ 1596,
26
+ 1005,
27
+ 1365,
28
+ 1180,
29
+ 1473,
30
+ 1296,
31
+ 1337,
32
+ 1579
33
+ ]
34
+ },
35
+ {
36
+ "word": "still",
37
+ "duration": 0.26,
38
+ "codes": [
39
+ 848,
40
+ 1653,
41
+ 1756,
42
+ 1711,
43
+ 1693,
44
+ 1722,
45
+ 1580,
46
+ 1552,
47
+ 502,
48
+ 1416,
49
+ 1463,
50
+ 1341,
51
+ 1449,
52
+ 1542,
53
+ 1700,
54
+ 1786,
55
+ 428,
56
+ 1728,
57
+ 1624,
58
+ 1624
59
+ ]
60
+ },
61
+ {
62
+ "word": "said",
63
+ "duration": 0.24,
64
+ "codes": [
65
+ 1657,
66
+ 1744,
67
+ 1657,
68
+ 1634,
69
+ 1615,
70
+ 1534,
71
+ 996,
72
+ 1296,
73
+ 1542,
74
+ 577,
75
+ 1047,
76
+ 1506,
77
+ 440,
78
+ 1756,
79
+ 1783,
80
+ 1593,
81
+ 906,
82
+ 1810
83
+ ]
84
+ },
85
+ {
86
+ "word": "you",
87
+ "duration": 0.62,
88
+ "codes": [
89
+ 1610,
90
+ 409,
91
+ 1534,
92
+ 1685,
93
+ 1709,
94
+ 1756,
95
+ 363,
96
+ 1441,
97
+ 1789,
98
+ 1594,
99
+ 863,
100
+ 1773,
101
+ 1612,
102
+ 1535,
103
+ 1602,
104
+ 1615,
105
+ 1426,
106
+ 48,
107
+ 1690,
108
+ 1740,
109
+ 1650,
110
+ 1824,
111
+ 1613,
112
+ 1807,
113
+ 1041,
114
+ 1778,
115
+ 719,
116
+ 1002,
117
+ 1759,
118
+ 1403,
119
+ 1766,
120
+ 1826,
121
+ 1002,
122
+ 1769,
123
+ 1661,
124
+ 1278,
125
+ 1759,
126
+ 1351,
127
+ 1638,
128
+ 1740,
129
+ 1395,
130
+ 1722,
131
+ 1765,
132
+ 1751,
133
+ 1461,
134
+ 1492
135
+ ]
136
+ },
137
+ {
138
+ "word": "and",
139
+ "duration": 0.14,
140
+ "codes": [
141
+ 1056,
142
+ 1494,
143
+ 1389,
144
+ 1002,
145
+ 1452,
146
+ 1413,
147
+ 1345,
148
+ 1401,
149
+ 1593,
150
+ 1073,
151
+ 775
152
+ ]
153
+ },
154
+ {
155
+ "word": "i",
156
+ "duration": 0.08,
157
+ "codes": [
158
+ 1812,
159
+ 547,
160
+ 1581,
161
+ 1468,
162
+ 949,
163
+ 1740
164
+ ]
165
+ },
166
+ {
167
+ "word": "was",
168
+ "duration": 0.16,
169
+ "codes": [
170
+ 1662,
171
+ 1542,
172
+ 363,
173
+ 1374,
174
+ 1598,
175
+ 1563,
176
+ 1394,
177
+ 473,
178
+ 863,
179
+ 1587,
180
+ 1685,
181
+ 1729
182
+ ]
183
+ },
184
+ {
185
+ "word": "like",
186
+ "duration": 0.28,
187
+ "codes": [
188
+ 1407,
189
+ 1444,
190
+ 1286,
191
+ 1506,
192
+ 1366,
193
+ 1286,
194
+ 1013,
195
+ 502,
196
+ 631,
197
+ 1449,
198
+ 1374,
199
+ 1711,
200
+ 1413,
201
+ 1660,
202
+ 1679,
203
+ 1783,
204
+ 1772,
205
+ 1723,
206
+ 1549,
207
+ 1674,
208
+ 1388
209
+ ]
210
+ },
211
+ {
212
+ "word": "mister",
213
+ "duration": 0.84,
214
+ "codes": [
215
+ 1591,
216
+ 1765,
217
+ 1653,
218
+ 1549,
219
+ 1449,
220
+ 1341,
221
+ 473,
222
+ 1363,
223
+ 1605,
224
+ 1554,
225
+ 1387,
226
+ 1641,
227
+ 1439,
228
+ 362,
229
+ 1606,
230
+ 319,
231
+ 1691,
232
+ 1582,
233
+ 1617,
234
+ 1756,
235
+ 1286,
236
+ 1409,
237
+ 1221,
238
+ 1372,
239
+ 1584,
240
+ 794,
241
+ 1636,
242
+ 1488,
243
+ 1280,
244
+ 1366,
245
+ 1753,
246
+ 1636,
247
+ 882,
248
+ 1723,
249
+ 1796,
250
+ 1769,
251
+ 1717,
252
+ 1549,
253
+ 1518,
254
+ 1633,
255
+ 175,
256
+ 1678,
257
+ 1679,
258
+ 1549,
259
+ 1732,
260
+ 1710,
261
+ 1662,
262
+ 1744,
263
+ 1641,
264
+ 1696,
265
+ 1565,
266
+ 1769,
267
+ 1789,
268
+ 719,
269
+ 1831,
270
+ 1786,
271
+ 1451,
272
+ 1728,
273
+ 1646,
274
+ 1713,
275
+ 1672,
276
+ 1774,
277
+ 1734
278
+ ]
279
+ },
280
+ {
281
+ "word": "so",
282
+ "duration": 0.14,
283
+ "codes": [
284
+ 1354,
285
+ 1518,
286
+ 1791,
287
+ 1374,
288
+ 277,
289
+ 1542,
290
+ 1366,
291
+ 700,
292
+ 1444,
293
+ 1744,
294
+ 1217
295
+ ]
296
+ },
297
+ {
298
+ "word": "this",
299
+ "duration": 0.2,
300
+ "codes": [
301
+ 1461,
302
+ 1588,
303
+ 1672,
304
+ 1712,
305
+ 1679,
306
+ 175,
307
+ 63,
308
+ 426,
309
+ 293,
310
+ 1654,
311
+ 57,
312
+ 1616,
313
+ 1394,
314
+ 1789,
315
+ 175
316
+ ]
317
+ },
318
+ {
319
+ "word": "is",
320
+ "duration": 0.06,
321
+ "codes": [
322
+ 1394,
323
+ 1605,
324
+ 1596,
325
+ 1800,
326
+ 269
327
+ ]
328
+ },
329
+ {
330
+ "word": "what",
331
+ "duration": 0.16,
332
+ "codes": [
333
+ 1706,
334
+ 759,
335
+ 1047,
336
+ 1493,
337
+ 637,
338
+ 1723,
339
+ 1772,
340
+ 1748,
341
+ 1634,
342
+ 4,
343
+ 1387,
344
+ 1710
345
+ ]
346
+ },
347
+ {
348
+ "word": "you",
349
+ "duration": 0.1,
350
+ "codes": [
351
+ 890,
352
+ 1374,
353
+ 1019,
354
+ 848,
355
+ 1415,
356
+ 1341,
357
+ 1073
358
+ ]
359
+ },
360
+ {
361
+ "word": "are",
362
+ "duration": 0.1,
363
+ "codes": [
364
+ 1286,
365
+ 127,
366
+ 949,
367
+ 870,
368
+ 1734,
369
+ 1593,
370
+ 1761,
371
+ 1717
372
+ ]
373
+ },
374
+ {
375
+ "word": "doing",
376
+ "duration": 0.22,
377
+ "codes": [
378
+ 1643,
379
+ 1485,
380
+ 1708,
381
+ 1394,
382
+ 1469,
383
+ 348,
384
+ 1676,
385
+ 1685,
386
+ 428,
387
+ 1584,
388
+ 1695,
389
+ 1596,
390
+ 1613,
391
+ 1286,
392
+ 1787,
393
+ 1374
394
+ ]
395
+ },
396
+ {
397
+ "word": "with",
398
+ "duration": 0.36,
399
+ "codes": [
400
+ 1382,
401
+ 615,
402
+ 1127,
403
+ 1742,
404
+ 1591,
405
+ 239,
406
+ 1810,
407
+ 1778,
408
+ 719,
409
+ 1616,
410
+ 1549,
411
+ 519,
412
+ 1804,
413
+ 1416,
414
+ 1636,
415
+ 1584,
416
+ 1437,
417
+ 1698,
418
+ 1625,
419
+ 1494,
420
+ 1633,
421
+ 1545,
422
+ 1747,
423
+ 1737,
424
+ 1672,
425
+ 1646,
426
+ 1778
427
+ ]
428
+ }
429
+ ]
430
+ }
yarngpt/python-wrapper/default_speakers/jude.json ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "text": "know what I'm saying what I'm saying is that if you say",
3
+ "words": [
4
+ {
5
+ "word": "know",
6
+ "duration": 0.44,
7
+ "codes": [
8
+ 1824,
9
+ 1820,
10
+ 1743,
11
+ 1819,
12
+ 1171,
13
+ 1796,
14
+ 1613,
15
+ 1126,
16
+ 1500,
17
+ 1346,
18
+ 1429,
19
+ 1810,
20
+ 1655,
21
+ 1462,
22
+ 1780,
23
+ 1812,
24
+ 1518,
25
+ 1431,
26
+ 741,
27
+ 1206,
28
+ 1325,
29
+ 1392,
30
+ 920,
31
+ 409,
32
+ 4,
33
+ 1270,
34
+ 416,
35
+ 1759,
36
+ 1141,
37
+ 708,
38
+ 1022,
39
+ 1769,
40
+ 1384
41
+ ]
42
+ },
43
+ {
44
+ "word": "what",
45
+ "duration": 0.12,
46
+ "codes": [
47
+ 607,
48
+ 787,
49
+ 48,
50
+ 1350,
51
+ 1340,
52
+ 297,
53
+ 364,
54
+ 825,
55
+ 1775
56
+ ]
57
+ },
58
+ {
59
+ "word": "im",
60
+ "duration": 0.1,
61
+ "codes": [
62
+ 1668,
63
+ 1311,
64
+ 1651,
65
+ 1048,
66
+ 176,
67
+ 430,
68
+ 333
69
+ ]
70
+ },
71
+ {
72
+ "word": "saying",
73
+ "duration": 0.56,
74
+ "codes": [
75
+ 822,
76
+ 648,
77
+ 1568,
78
+ 1660,
79
+ 1071,
80
+ 1399,
81
+ 890,
82
+ 1396,
83
+ 1381,
84
+ 1818,
85
+ 124,
86
+ 1623,
87
+ 361,
88
+ 1588,
89
+ 1688,
90
+ 1280,
91
+ 1805,
92
+ 1659,
93
+ 1605,
94
+ 1412,
95
+ 1672,
96
+ 1752,
97
+ 1741,
98
+ 1514,
99
+ 1817,
100
+ 1796,
101
+ 1763,
102
+ 1790,
103
+ 1595,
104
+ 1788,
105
+ 1823,
106
+ 758,
107
+ 1466,
108
+ 1802,
109
+ 1788,
110
+ 1649,
111
+ 1614,
112
+ 1751,
113
+ 1718,
114
+ 1585,
115
+ 1637,
116
+ 1773
117
+ ]
118
+ },
119
+ {
120
+ "word": "what",
121
+ "duration": 0.12,
122
+ "codes": [
123
+ 1666,
124
+ 1680,
125
+ 1431,
126
+ 411,
127
+ 1687,
128
+ 695,
129
+ 1629,
130
+ 1678,
131
+ 664,
132
+ 1087
133
+ ]
134
+ },
135
+ {
136
+ "word": "im",
137
+ "duration": 0.16,
138
+ "codes": [
139
+ 117,
140
+ 408,
141
+ 1813,
142
+ 1729,
143
+ 1336,
144
+ 1710,
145
+ 1833,
146
+ 1615,
147
+ 276,
148
+ 362,
149
+ 1364,
150
+ 687
151
+ ]
152
+ },
153
+ {
154
+ "word": "saying",
155
+ "duration": 0.26,
156
+ "codes": [
157
+ 28,
158
+ 440,
159
+ 1376,
160
+ 1196,
161
+ 1147,
162
+ 1636,
163
+ 1272,
164
+ 1449,
165
+ 198,
166
+ 1277,
167
+ 1470,
168
+ 1485,
169
+ 1100,
170
+ 1588,
171
+ 1673,
172
+ 1620,
173
+ 1710,
174
+ 1753,
175
+ 806
176
+ ]
177
+ },
178
+ {
179
+ "word": "is",
180
+ "duration": 0.06,
181
+ "codes": [
182
+ 1621,
183
+ 1636,
184
+ 1833,
185
+ 529,
186
+ 1653
187
+ ]
188
+ },
189
+ {
190
+ "word": "that",
191
+ "duration": 0.24,
192
+ "codes": [
193
+ 1773,
194
+ 1004,
195
+ 1796,
196
+ 907,
197
+ 239,
198
+ 1804,
199
+ 565,
200
+ 1432,
201
+ 1534,
202
+ 1718,
203
+ 1643,
204
+ 1432,
205
+ 1447,
206
+ 1273,
207
+ 1824,
208
+ 1657,
209
+ 1776,
210
+ 1651
211
+ ]
212
+ },
213
+ {
214
+ "word": "if",
215
+ "duration": 0.12,
216
+ "codes": [
217
+ 1649,
218
+ 1620,
219
+ 1342,
220
+ 176,
221
+ 1773,
222
+ 178,
223
+ 1710,
224
+ 1710,
225
+ 1521
226
+ ]
227
+ },
228
+ {
229
+ "word": "you",
230
+ "duration": 0.16,
231
+ "codes": [
232
+ 959,
233
+ 1728,
234
+ 1651,
235
+ 361,
236
+ 822,
237
+ 1661,
238
+ 1341,
239
+ 780,
240
+ 1518,
241
+ 335,
242
+ 452,
243
+ 736
244
+ ]
245
+ },
246
+ {
247
+ "word": "say",
248
+ "duration": 0.14,
249
+ "codes": [
250
+ 372,
251
+ 1217,
252
+ 713,
253
+ 848,
254
+ 1140,
255
+ 1420,
256
+ 1549,
257
+ 483,
258
+ 125,
259
+ 1353
260
+ ]
261
+ }
262
+ ]
263
+ }