Spaces:

Stanley03
/

SwaGPT

Paused

App Files Files Community

Stanley03 commited on 14 days ago

Commit

d19665e

verified ·

1 Parent(s): de7f0d0

Upload folder using huggingface_hub

Browse files

Files changed (45) hide show

__init__.py +1 -0
app.py +374 -69
audiotokenizer.py +319 -0
default_speakers/azeez.json +413 -0
default_speakers/baraka.json +469 -0
default_speakers/chinenye.json +274 -0
default_speakers/emma.json +441 -0
default_speakers/idera.json +396 -0
default_speakers/joke.json +430 -0
default_speakers/jude.json +263 -0
default_speakers/onye.json +621 -0
default_speakers/osagie.json +486 -0
default_speakers/regina.json +574 -0
default_speakers/remi.json +382 -0
default_speakers/saheed.json +564 -0
default_speakers/tayo.json +523 -0
default_speakers/umar.json +469 -0
default_speakers/wanjiku.json +574 -0
default_speakers/zainab.json +457 -0
default_speakers/zawadi.json +396 -0
default_speakers_local/hausa_female1.json +273 -0
default_speakers_local/hausa_female2.json +273 -0
default_speakers_local/hausa_male1.json +367 -0
default_speakers_local/hausa_male2.json +207 -0
default_speakers_local/igbo_female1.json +246 -0
default_speakers_local/igbo_female2.json +202 -0
default_speakers_local/igbo_male2.json +277 -0
default_speakers_local/yoruba_female1.json +416 -0
default_speakers_local/yoruba_female2.json +193 -0
default_speakers_local/yoruba_male1.json +234 -0
default_speakers_local/yoruba_male2.json +238 -0
default_speakers_local/yoruba_male3.json +234 -0
download_models.py +96 -0
python-wrapper/audiotokenizer.py +317 -0
python-wrapper/requirements.txt +10 -0
python-wrapper/yarngpt/__init__.py +4 -0
python-wrapper/yarngpt/core.py +125 -0
requirements.txt +6 -2
swagpt/__init__.py +3 -0
swagpt/__pycache__/__init__.cpython-311.pyc +0 -0
swagpt/__pycache__/audiotokenizer.cpython-311.pyc +0 -0
swagpt/audiotokenizer.py +255 -0
swagpt/prepare_dataset.py +181 -0
swagpt/train.py +195 -0
test_swagpt.py +80 -0

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

app.py CHANGED Viewed

@@ -1,91 +1,396 @@
 import torch
-from transformers import pipeline, VitsModel, AutoTokenizer
-import scipy.io.wavfile
 import gradio as gr
-import tempfile
-import os
-import google.generativeai as genai
-# ====================================================================
-# SwaGPT Final Deployment Script (Optimized for Hugging Face Spaces)
-# This script loads the GEMINI_API_KEY from the Space Secrets for security.
-# ====================================================================
-# 1. Setup Models (Optimized for Free/CPU environments)
-STT_MODEL = "openai/whisper-tiny"
-# IMPORTANT: Replace with your custom trained model ID once uploaded!
-TTS_MODEL_ID = "facebook/mms-tts-swh"
-print("Loading AI components...")
-stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL, device="cpu")
-tts_tokenizer = AutoTokenizer.from_pretrained(TTS_MODEL_ID)
-tts_model = VitsModel.from_pretrained(TTS_MODEL_ID)
-# 2. Configure Gemini API
-# The key is loaded from the environment variable set in Hugging Face Secrets
-GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
-if GEMINI_API_KEY:
-    genai.configure(api_key=GEMINI_API_KEY)
-    print("Gemini API configured successfully from Secrets.")
-else:
-    print("WARNING: GEMINI_API_KEY not found in environment variables. LLM will not work.")
-def voice_agent_chat(audio_path):
-    if not GEMINI_API_KEY:
-        return "ERROR: Gemini API Key is missing. Please set the GEMINI_API_KEY secret in your Space settings.", None
-    if audio_path is None:
-        return "Tafadhali rekodi sauti yako.", None
-    # Step 1: Speech-to-Text (Listen)
-    stt_result = stt_pipe(audio_path, generate_kwargs={"language": "swahili"})
-    user_text = stt_result["text"]
-    # Step 2: Gemini Intelligence (Think)
     try:
-        model = genai.GenerativeModel('gemini-1.5-flash')
-        # System Prompt for Kiswahili AI Personality
-        system_instruction = "Wewe ni SwaGPT, msaidizi wa akili mnemba unayezungumza Kiswahili sanifu. Jibu kwa ufupi sana (sentensi 1-2)."
-        prompt = f"{system_instruction}\n\nMtumiaji: {user_text}"
-        response = model.generate_content(prompt)
-        ai_response = response.text
     except Exception as e:
-        ai_response = f"Tatizo la API: {str(e)}. Huenda umefikia kikomo cha matumizi ya bure."
-    # Step 3: Text-to-Speech (Speak)
-    inputs = tts_tokenizer(ai_response, return_tensors="pt")
-    with torch.no_grad():
-        output = tts_model(**inputs).waveform
-    sampling_rate = tts_model.config.sampling_rate
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-    scipy.io.wavfile.write(temp_file.name, rate=sampling_rate, data=output.float().numpy().T)
-    return ai_response, temp_file.name
-# Create Gradio Interface
-with gr.Blocks(title="SwaGPT Intelligent Voice Agent") as demo:
-    gr.Markdown("# 🤖 SwaGPT Intelligent Voice Agent")
-    gr.Markdown("Zungumza na SwaGPT! Mfumo huu unatumia Gemini kufikiri na SwaGPT kuzungumza.")
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 1. Zungumza (Talk)")
-            audio_input = gr.Audio(label="Rekodi Sauti", type="filepath")
-            submit_btn = gr.Button("Anza Mazungumzo", variant="primary")
-        with gr.Column():
-            gr.Markdown("### 2. Jibu (Response)")
-            chat_text = gr.Textbox(label="Maandishi ya AI")
-            audio_output = gr.Audio(label="Sauti ya AI")
-    submit_btn.click(
-        fn=voice_agent_chat,
-        inputs=audio_input,
-        outputs=[chat_text, audio_output]
     )
 if __name__ == "__main__":
-    demo.launch()

+#!/usr/bin/env python3
+"""
+SwaGPT Interactive Playground
+Stunning dark-themed web dashboard for Swahili & Kenyan speech generation,
+zero-shot voice cloning, and model downloader utilities.
+"""
+import os
+import sys
+import json
 import torch
+import torchaudio
+import subprocess
 import gradio as gr
+from transformers import AutoModelForCausalLM
+# Initialize global state variables
+audio_tokenizer = None
+model = None
+def check_models_cached():
+    """Checks if WavTokenizer and YarnGPT2 weights are downloaded locally."""
+    models_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "models"))
+    config_exists = os.path.exists(os.path.join(models_dir, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"))
+    ckpt_exists = os.path.exists(os.path.join(models_dir, "wavtokenizer_large_speech_320_24k.ckpt"))
+    yarngpt_exists = os.path.exists(os.path.join(models_dir, "YarnGPT2"))
+    return config_exists and ckpt_exists and yarngpt_exists
+def run_downloader():
+    """Runs download_models.py script and returns the console output logs."""
+    try:
+        process = subprocess.Popen(
+            [sys.executable, "download_models.py"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True
+        )
+        output_logs = ""
+        for line in process.stdout:
+            output_logs += line
+            yield output_logs
+        process.wait()
+    except Exception as e:
+        yield f"Error running downloader: {str(e)}"
+def load_tts_model():
+    """Loads the SwaGPT Tokenizer and Causal LM weights."""
+    global audio_tokenizer, model
+    if audio_tokenizer is not None and model is not None:
+        return "Model already loaded and running! 🚀"
+    try:
+        from swagpt.audiotokenizer import AudioTokenizerSwa
+        BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+        models_dir = os.path.join(BASE_DIR, "models")
+        # Determine local vs remote loading
+        is_cached = check_models_cached()
+        if is_cached:
+            print("[*] Loading local weights from models directory...")
+            tokenizer_path = os.path.join(models_dir, "YarnGPT2")
+            config_path = os.path.join(models_dir, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+            model_path = os.path.join(models_dir, "wavtokenizer_large_speech_320_24k.ckpt")
+            local_only = True
+        else:
+            print("[!] Local weights not found. Falling back to downloading/caching via Hugging Face...")
+            tokenizer_path = "saheedniyi/YarnGPT2"
+            # Temporary downloads of config/ckpt if not local
+            os.makedirs(models_dir, exist_ok=True)
+            config_path = os.path.join(models_dir, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+            model_path = os.path.join(models_dir, "wavtokenizer_large_speech_320_24k.ckpt")
+            # Simple downloads if missing
+            import urllib.request
+            if not os.path.exists(config_path):
+                print("[*] Downloading WavTokenizer config from HF...")
+                urllib.request.urlretrieve(
+                    "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
+                    config_path
+                )
+            if not os.path.exists(model_path):
+                print("[*] Downloading WavTokenizer checkpoint from HF (LLMVoX)...")
+                urllib.request.urlretrieve(
+                    "https://huggingface.co/MBZUAI/LLMVoX/resolve/main/wavtokenizer_large_speech_320_24k.ckpt",
+                    model_path
+                )
+            local_only = False
+        audio_tokenizer = AudioTokenizerSwa(
+            tokenizer_path=tokenizer_path,
+            wav_tokenizer_model_path=model_path,
+            wav_tokenizer_config_path=config_path,
+            local_weights_only=local_only
+        )
+        # Load Causal LLM model
+        print("[*] Initializing AutoModelForCausalLM...")
+        model = AutoModelForCausalLM.from_pretrained(
+            tokenizer_path,
+            torch_dtype="auto",
+            local_files_only=local_only
+        ).to(audio_tokenizer.device)
+        return "SwaGPT Model & WavTokenizer Successfully Loaded! 🚀 Ready for Voice Synthesis!"
+    except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        return f"Error loading model: {str(e)}\n\nDetails:\n{error_details}"
+def generate_speech(text, lang, speaker_name, temperature, repetition_penalty):
+    """Synthesizes text input into Swahili/Kenyan speech using speaker reference."""
+    global audio_tokenizer, model
+    if audio_tokenizer is None or model is None:
+        # Auto-load model if not loaded yet
+        load_status = load_tts_model()
+        if "Error" in load_status:
+            return None, f"[!] {load_status}"
+    try:
+        print(f"[*] Generating speech. Text: '{text}' | Lang: {lang} | Speaker: {speaker_name}")
+        # 1. Create prompt using selected language and speaker
+        prompt = audio_tokenizer.create_prompt(text, lang=lang, speaker_name=speaker_name)
+        # 2. Tokenize prompt
+        input_ids = audio_tokenizer.tokenize_prompt(prompt)
+        # 3. Generate tokens
+        with torch.no_grad():
+            output = model.generate(
+                input_ids=input_ids,
+                temperature=float(temperature),
+                repetition_penalty=float(repetition_penalty),
+                max_length=4000
+            )
+        # 4. Decode codes back to audio
+        codes = audio_tokenizer.get_codes(output)
+        audio = audio_tokenizer.get_audio(codes)
+        # Save audio file
+        output_wav_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "generated_output.wav"))
+        torchaudio.save(output_wav_path, audio, sample_rate=24000)
+        code_string = f"Audio Codes Generated: {len(codes)} codes\n" + " ".join([f"<|{c}|>" for c in codes[:20]]) + " ... [truncated]"
+        return output_wav_path, code_string
+    except Exception as e:
+        import traceback
+        return None, f"Synthesis failed: {str(e)}\n{traceback.format_exc()}"
+def enroll_custom_speaker(audio_file, transcript, speaker_name):
+    """Clones a 10-second Swahili/Kenyan audio file and registers it as a speaker."""
+    global audio_tokenizer
+    if not audio_file:
+        return "Please upload an audio file first."
+    if not transcript.strip():
+        return "Please enter the transcript corresponding to the audio file."
+    if not speaker_name.strip():
+        return "Please provide a name for the custom speaker."
+    if audio_tokenizer is None:
+        load_status = load_tts_model()
+        if "Error" in load_status:
+            return f"[!] {load_status}"
     try:
+        from swagpt.prepare_dataset import align_and_tokenize
+        clean_name = speaker_name.lower().strip().replace(" ", "_")
+        profile = align_and_tokenize(audio_file, transcript, audio_tokenizer)
+        # Save JSON speaker profile
+        output_path = audio_tokenizer.get_speaker_path(clean_name)
+        with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(profile, f, indent=4)
+        return f"🎉 Success! Speaker '{clean_name}' enrolled successfully!\nProfile saved at: {output_path}\nYou can now select '{clean_name}' in the speaker dropdown of the TTS tab."
     except Exception as e:
+        import traceback
+        return f"Error enrolling speaker: {str(e)}\n{traceback.format_exc()}"
+def get_available_speakers():
+    """Scans default_speakers directory and returns a list of speaker names."""
+    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+    speakers_dir = os.path.join(BASE_DIR, "default_speakers")
+    os.makedirs(speakers_dir, exist_ok=True)
+    speakers = [s.replace(".json", "") for s in os.listdir(speakers_dir) if s.endswith(".json")]
+    return sorted(speakers) if speakers else ["zawadi", "baraka", "wanjiku"]
+def refresh_speakers():
+    """Utility to refresh the speaker dropdown list."""
+    speakers = get_available_speakers()
+    return gr.Dropdown(choices=speakers)
+# Build Premium Gradio Interface
+theme = gr.themes.Soft(
+    primary_hue="purple",
+    secondary_hue="indigo",
+    neutral_hue="slate"
+)
+with gr.Blocks(title="SwaGPT Web Dashboard") as demo:
+    gr.HTML("""
+    <div style="text-align: center; padding: 20px; border-radius: 12px; background: linear-gradient(135deg, #2E0854, #0A0015); margin-bottom: 25px; box-shadow: 0 4px 20px rgba(0,0,0,0.5);">
+        <h1 style="color: #E2ADFF; font-family: 'Outfit', sans-serif; font-size: 3rem; margin-bottom: 5px; font-weight: bold; letter-spacing: 1px;">🎙️ SwaGPT & Multilingual Kenyan TTS</h1>
+        <p style="color: #BCA3CD; font-size: 1.1rem; max-width: 700px; margin: 0 auto; line-height: 1.5;">
+            An East African adaptation of Saheed Azeez's YarnGPT framework. Synthesize speech in Kiswahili and Kenyan languages with high-fidelity zero-shot voice cloning.
+        </p>
+    </div>
+    """)
+    with gr.Tabs():
+        # TAB 1: SPEECH GENERATION (TTS)
+        with gr.Tab("✨ Speech Synthesis"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.HTML("<h3 style='color: #C084FC;'>1. Speech Configuration</h3>")
+                    text_input = gr.Textbox(
+                        label="Text to Synthesize",
+                        value="Habari gani rafiki yangu mpendwa? Leo ni siku ya kipekee sana kwa maendeleo ya akili mnemba ya kiafrika.",
+                        placeholder="Type Swahili or Kenyan language text here...",
+                        lines=5
+                    )
+                    with gr.Row():
+                        lang_dropdown = gr.Dropdown(
+                            choices=["swahili", "kikuyu", "luo", "luhya", "kamba", "kalenjin", "english", "hausa", "igbo", "yoruba"],
+                            label="Target Language",
+                            value="swahili"
+                        )
+                        # Populate speakers list dynamic dropdown
+                        initial_speakers = get_available_speakers()
+                        speaker_dropdown = gr.Dropdown(
+                            choices=initial_speakers,
+                            label="Speaker Voice Profile",
+                            value="zawadi" if "zawadi" in initial_speakers else initial_speakers[0]
+                        )
+                    refresh_btn = gr.Button("🔄 Refresh Speaker List", size="sm")
+                    with gr.Accordion("🛠️ Advanced Parameters", open=False):
+                        temp_slider = gr.Slider(
+                            minimum=0.05,
+                            maximum=1.0,
+                            value=0.1,
+                            step=0.05,
+                            label="Temperature (Creativity)"
+                        )
+                        rep_slider = gr.Slider(
+                            minimum=1.0,
+                            maximum=2.0,
+                            value=1.1,
+                            step=0.05,
+                            label="Repetition Penalty"
+                        )
+                    synth_btn = gr.Button("Synthesize Speech ⚡", variant="primary")
+                with gr.Column(scale=1):
+                    gr.HTML("<h3 style='color: #C084FC;'>2. Generated Output</h3>")
+                    audio_output = gr.Audio(
+                        label="Generated Audio Playback",
+                        type="filepath",
+                        interactive=False
+                    )
+                    code_output = gr.Textbox(
+                        label="Discrete WavTokenizer Codes",
+                        placeholder="Causal tokens generated by YarnGPT will print here...",
+                        lines=5,
+                        interactive=False
+                    )
+                    gr.HTML("""
+                    <div style="margin-top: 20px; padding: 15px; border-radius: 8px; background-color: rgba(192, 132, 252, 0.1); border: 1px solid rgba(192, 132, 252, 0.2);">
+                        <p style="color: #D8B4FE; margin: 0; font-size: 0.95rem;">
+                            💡 <b>Tip:</b> If synthesizing local Bantu languages (like Kikuyu or Swahili), the model automatically maps phonetic parsing to high-similarity features to produce fluid native pronunciations!
+                        </p>
+                    </div>
+                    """)
+        # TAB 2: ZERO-SHOT VOICE CLONING (VOICE ENROLLMENT)
+        with gr.Tab("🎯 Zero-Shot Voice Cloning"):
+            gr.HTML("""
+            <div style="padding: 10px; margin-bottom: 15px;">
+                <h2 style="color: #C084FC; margin-bottom: 5px;">Clone Any Voice Instantly</h2>
+                <p style="color: #94A3B8; margin: 0;">Upload a 10-second reference audio of a Swahili or Kenyan speaker, type what they said, and SwaGPT will build a personalized speaker profile file!</p>
+            </div>
+            """)
+            with gr.Row():
+                with gr.Column():
+                    ref_audio = gr.Audio(
+                        label="Upload 10-second Reference Clip (.wav / .mp3)",
+                        type="filepath"
+                    )
+                    ref_transcript = gr.Textbox(
+                        label="Reference Text Transcript",
+                        placeholder="Enter exactly what the reference voice is saying (in Swahili or English)...",
+                        lines=3
+                    )
+                    ref_name = gr.Textbox(
+                        label="Reference Voice Name (e.g., baraka, my_voice)",
+                        placeholder="Enter speaker name (letters/numbers/underscores only)..."
+                    )
+                    enroll_btn = gr.Button("Enroll Reference Voice 🎭", variant="primary")
+                with gr.Column():
+                    gr.HTML("<h3 style='color: #C084FC;'>Enrollment Status</h3>")
+                    status_log = gr.Textbox(
+                        label="Output Log",
+                        placeholder="Enrollment logs will display here...",
+                        lines=10,
+                        interactive=False
+                    )
+        # TAB 3: LOCAL MODEL WEIGHTS DOWNLOADER
+        with gr.Tab("💾 Weight Downloader"):
+            gr.HTML("""
+            <div style="padding: 10px; margin-bottom: 15px;">
+                <h2 style="color: #C084FC; margin-bottom: 5px;">Local Weight Management</h2>
+                <p style="color: #94A3B8; margin: 0;">Configure your workspace for complete offline/local model execution. Grabs the 1.75 GB WavTokenizer CKPT and the 750 MB YarnGPT2 weights directly to your workspace.</p>
+            </div>
+            """)
+            with gr.Row():
+                with gr.Column(scale=1):
+                    # Check cache status
+                    cached = check_models_cached()
+                    status_text = "🟢 Cached Locally (Ready)" if cached else "🔴 Not Cached Locally (Fallback to Hugging Face Hub Enabled)"
+                    status_color = "green" if cached else "red"
+                    gr.HTML(f"""
+                    <div style="padding: 15px; border-radius: 8px; background-color: rgba(255,255,255,0.05); margin-bottom: 20px;">
+                        <h4 style="margin-top: 0; color: #E2ADFF;">Current Cache Status:</h4>
+                        <span style="display: inline-block; padding: 5px 10px; border-radius: 4px; font-weight: bold; background-color: {'rgba(34, 197, 94, 0.2)' if cached else 'rgba(239, 68, 68, 0.2)'}; color: {'#4ADE80' if cached else '#FCA5A5'};">
+                            {status_text}
+                        </span>
+                    </div>
+                    """)
+                    start_download_btn = gr.Button("Download Weights Locally 📥", variant="primary")
+                with gr.Column(scale=2):
+                    console_log = gr.Textbox(
+                        label="Download Console Output",
+                        placeholder="Click 'Download Weights' to begin downloading and caching models directly in your folder...",
+                        lines=15,
+                        interactive=False
+                    )
+    # Wire up Events
+    synth_btn.click(
+        fn=generate_speech,
+        inputs=[text_input, lang_dropdown, speaker_dropdown, temp_slider, rep_slider],
+        outputs=[audio_output, code_output]
+    )
+    enroll_btn.click(
+        fn=enroll_custom_speaker,
+        inputs=[ref_audio, ref_transcript, ref_name],
+        outputs=[status_log]
+    )
+    refresh_btn.click(
+        fn=refresh_speakers,
+        inputs=[],
+        outputs=[speaker_dropdown]
+    )
+    start_download_btn.click(
+        fn=run_downloader,
+        inputs=[],
+        outputs=[console_log]
     )
 if __name__ == "__main__":
+    demo.launch(server_name="127.0.0.1", server_port=7863, theme=theme)

audiotokenizer.py ADDED Viewed

	@@ -0,0 +1,319 @@

+import os
+import re
+import json
+import torch
+import inflect
+import random
+import uroman as ur
+import numpy as np
+import torchaudio
+from transformers import AutoTokenizer
+from outetts.wav_tokenizer.decoder import WavTokenizer
+from outetts.wav_tokenizer.encoder.utils import convert_audio
+class AudioTokenizer:
+    def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+        self.bos = "<|im_start|>"
+        self.eos = "<|im_end|>"
+        self.input_length=0
+        self.special_tokens = {
+            "audio_code": "<|{}|>",
+            "text_start": "<|text_start|>",
+            "text_end": "<|text_end|>",
+            "audio_start": "<|audio_start|>",
+            "audio_end": "<|audio_end|>",
+            "time": "<|t_{:.2f}|>",
+            "code_start": "<|code_start|>",
+            "code_end": "<|code_end|>",
+            "text_sep": "<|text_sep|>"
+        }
+        self.lec = inflect.engine()
+        #self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
+        #self.config_path = "/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+        #self.model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
+        self.wavtokenizer = WavTokenizer.from_pretrained0802(wav_tokenizer_config_path, wav_tokenizer_model_path)
+        self.wavtokenizer = self.wavtokenizer.to(self.device)
+        self.BASE_DIR = os.path.dirname(__file__)
+        self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers")
+        self.speakers=["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye"]
+    def get_speaker_path(self,speaker_name):
+        return os.path.join(self.DEFAULT_SPEAKERS_DIR, f"{speaker_name}.json")
+    def load_speaker(self, path: str):
+        with open(path, "r") as f:
+            return json.load(f)
+    def load_default_speaker(self, name: str):
+        name = name.lower().strip()
+        speaker_path=self.get_speaker_path(name)
+        return self.load_speaker(speaker_path)
+    def process_text(self, text: str):
+        text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
+        text = re.sub(r'[-_/,\.\\]', ' ', text)
+        text = re.sub(r'[^a-z\s]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text.split()
+    def create_audio_prompt(self,words: list) -> str:
+        prompt = []
+        for i in words:
+            word = i["word"]
+            duration = self.special_tokens["time"].format(float(i["duration"]))
+            tokens = "".join([self.special_tokens["audio_code"].format(c) for c in i["codes"]])
+            prompt.append(f'{word}{duration}{self.special_tokens["code_start"]}{tokens}{self.special_tokens["code_end"]}')
+        return "\n".join(prompt)
+    def create_prompt(self,text,speaker_name="idera"):
+        speaker=self.load_default_speaker(speaker_name)
+        input_words = self.process_text(speaker["text"]) +  self.process_text(text)
+        #input_words = process_text(speaker["text"]) + input_words
+        inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
+        prompt = self.text_prompt.format(
+          bos=self.bos,
+          text_start=self.special_tokens['text_start'],
+          words=inputs_words_strings,
+          text_end=self.special_tokens['text_end'],
+          audio_start=self.special_tokens['audio_start']
+      )
+        prompt += self.create_audio_prompt(speaker["words"])
+        return prompt
+    def tokenize_prompt(self, prompt):
+        input_ids = self.tokenizer.encode(
+            prompt,
+            add_special_tokens=False,
+            return_tensors="pt"
+        ).to(self.device)
+        self.input_length=input_ids.shape[1]
+        return input_ids.to(self.device)
+    def get_audio(self,discrete_code):
+        discrete_code=torch.tensor([[discrete_code]]).to(self.device)
+        features = self.wavtokenizer.codes_to_features(discrete_code).to(self.device)
+        bandwidth_id = torch.tensor([0]).to(self.device)
+        audio_out = self.wavtokenizer.decode(features, bandwidth_id=bandwidth_id)
+        return audio_out.to("cpu")
+    def extract_integers(self,s):
+        # Match integers enclosed in vertical bars |integer|
+        matches = re.findall(r'\|(-?\d+)\|', s)
+        # Convert matches to integers
+        return [int(match) for match in matches]
+    def get_codes(self, output):
+        new_output=self.tokenizer.decode(output[0][self.input_length:])
+        codes=self.extract_integers(new_output)
+        return codes
+class AudioTokenizerForLocal(AudioTokenizer):
+    def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
+        super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
+        self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
+        self.special_tokens = {
+            "audio_code": "<|{}|>",
+            "text_start": "<|text_start|>",
+            "text_end": "<|text_end|>",
+            "audio_start": "<|audio_start|>",
+            "audio_end": "<|audio_end|>",
+            "word_start": "<|word_start|>",
+            "word_end": "<|word_end|>",
+            "time": "<|t_{:.2f}|>",
+            "code_start": "<|code_start|>",
+            "code_end": "<|code_end|>",
+            "text_sep": "<|text_sep|>",
+            "hausa":"<|hausa|>",
+            "igbo":"<|igbo|>",
+            "yoruba":"<|yoruba|>",
+        }
+        self.uroman = ur.Uroman()
+        self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers_local")
+        self.speakers = [
+            "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
+            "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
+        ]
+    def process_text(self, text: str):
+        text = self.uroman.romanize_string(text)
+        text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
+        text = re.sub(r'[-_/,\.\\]', ' ', text)
+        text = re.sub(r'[^a-z\s]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text.split()
+    def create_prompt(self,text,lang,speaker_name=None):
+        assert lang in ["hausa","igbo","yoruba"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba']"
+        #if no speaker
+        if speaker_name is None:
+            if lang=="hausa":
+                speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
+            elif lang=="igbo":
+                speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
+            else:
+                speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
+        speaker=self.load_default_speaker(speaker_name)
+        input_words = self.process_text(speaker["text"]) +  self.process_text(text)
+        #input_words = process_text(speaker["text"]) + input_words
+        inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
+        prompt = self.text_prompt.format(
+          bos=self.bos,
+          text_start=self.special_tokens['text_start'],
+          words=inputs_words_strings,
+          text_end=self.special_tokens['text_end'],
+          lang=self.special_tokens[lang],
+          audio_start=self.special_tokens['audio_start']
+      )
+        prompt += self.create_audio_prompt(speaker["words"])
+        return prompt
+class AudioTokenizerV2(AudioTokenizer):
+    def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
+        super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
+        self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
+        self.asr_prompt="{bos}\n{code_start}{codes}{code_end}\n{asr}\n"
+        self.special_tokens = {
+            "audio_code": "<|{}|>",
+            "text_start": "<|text_start|>",
+            "text_end": "<|text_end|>",
+            "audio_start": "<|audio_start|>",
+            "audio_end": "<|audio_end|>",
+            "word_start": "<|word_start|>",
+            "word_end": "<|word_end|>",
+            "time": "<|t_{:.2f}|>",
+            "code_start": "<|code_start|>",
+            "code_end": "<|code_end|>",
+            "text_sep": "<|text_sep|>",
+            "hausa":"<|hausa|>",
+            "igbo":"<|igbo|>",
+            "yoruba":"<|yoruba|>",
+            "english":"<|english|>",#<|english|>
+            "asr":"<|asr|>"
+        }
+        self.uroman = ur.Uroman()
+        self.DEFAULT_SPEAKERS_DIR_LOCAL = os.path.join(self.BASE_DIR, "default_speakers_local")
+        self.DEFAULT_SPEAKERS_ENG = os.path.join(self.BASE_DIR, "default_speakers")
+        self.speakers_local = [
+            "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
+            "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
+        ]
+        self.speakers_eng = ["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye","saheed"]
+        self.changed_tokens=[('<|1836|>', '<|453|><|453|>'),
+                             ('<|1837|>', '<|1836|><|1836|>'),
+                             ('<|1838|>', '<|1837|><|1837|>'),
+                             ('<|1840|>', '<|244|><|167|>'),
+                             ('<|1841|>', '<|235|><|219|>'),
+                             ('<|1844|>', '<|453|><|244|>'),
+                             ('<|1845|>', '<|1838|><|1838|>')]
+    def process_text(self, text: str):
+        text = self.uroman.romanize_string(text)
+        text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
+        text = re.sub(r'[-_/,\.\\]', ' ', text)
+        text = re.sub(r'[^a-z\s]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text.split()
+    def get_speaker_path(self,speaker_name,dir):
+        return os.path.join(dir, f"{speaker_name}.json")
+    def load_speaker(self, path: str):
+        with open(path, "r") as f:
+            return json.load(f)
+    def load_default_speaker(self, name: str,dir: str):
+        name = name.lower().strip()
+        speaker_path=self.get_speaker_path(name,dir)
+        return self.load_speaker(speaker_path)
+    def create_prompt(self,text,lang,speaker_name=None):
+        assert lang in ["hausa","igbo","yoruba","english"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba','english']"
+        #if no speaker
+        dir=self.DEFAULT_SPEAKERS_DIR_LOCAL
+        if speaker_name is None:
+            if lang=="hausa":
+                speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
+            elif lang=="igbo":
+                speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
+            elif lang=="yoruba":
+                speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
+            else:
+                speaker_name=random.choice(self.speakers_eng)
+        if lang=="english":
+            dir=self.DEFAULT_SPEAKERS_ENG
+        speaker=self.load_default_speaker(speaker_name,dir)
+        input_words = self.process_text(speaker["text"]) +  self.process_text(text)
+        #input_words = process_text(speaker["text"]) + input_words
+        inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
+        prompt = self.text_prompt.format(
+          bos=self.bos,
+          text_start=self.special_tokens['text_start'],
+          words=inputs_words_strings,
+          text_end=self.special_tokens['text_end'],
+          lang=self.special_tokens[lang],
+          audio_start=self.special_tokens['audio_start']
+      )
+        prompt += self.create_audio_prompt(speaker["words"])
+        return prompt
+    def replace_tokens(text):
+      for pair in self.changed_tokens:
+        text=text.replace(pair[0],pair[-1])
+      return text
+    def resample(self,audio: np.ndarray, sr: int, target_sr: int):
+        audio = audio.to(dtype=torch.float32)
+        #.clone().detach()
+        audio = audio.unsqueeze(0)
+        # 1 as last arg corresponds to mono audio
+        resampled = convert_audio(audio, sr, target_sr, 1)
+        return resampled.to(self.device )
+    def quantize_wavtokenizer(self, path):
+        audio_data, sample_rate = torchaudio.load(path)
+        audio_data=audio_data.squeeze()
+        audio = self.resample(audio_data, sample_rate, 24000).to(self.device)
+        if audio.ndim==3:
+            audio=audio.squeeze(1)
+        bandwidth_id = torch.tensor([0]).to(self.device )
+        _, codes = self.wavtokenizer.encode_infer(audio, bandwidth_id=bandwidth_id)
+        codes = codes.squeeze(1).to(self.device)#+last_text_token
+        res=""
+        for code in codes[0].tolist():
+            res+=f"<|{code}|>"
+        return res
+    def create_asr_prompt(self,audio_path):
+        codes=self.quantize_wavtokenizer(audio_path)
+        prompt = self.asr_prompt.format(
+          bos=self.bos,
+          code_start=self.special_tokens['code_start'],
+          codes=codes,
+          code_end=self.special_tokens['code_end'],
+          asr=self.special_tokens["asr"],
+        )
+        return prompt
+    def get_asr_results(self,output):
+        res=""
+        for text in self.tokenizer.decode(output[0]).split("<|text_start|>")[-1].split("<|text_end|>")[0].split("\n"):
+            res+=text.split("<|word_start|>")[-1].split("<|word_end|>")[0]
+            res+=" "
+        return res.strip()

default_speakers/azeez.json ADDED Viewed

	@@ -0,0 +1,413 @@

+{
+    "text": "Hello! My name is Saheed azeez and I am testing the audio feature",
+    "words": [
+        {
+            "word": "hello",
+            "duration": 1.22,
+            "codes": [
+                219,
+                244,
+                244,
+                167,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                244,
+                219,
+                237,
+                864,
+                1041,
+                1048,
+                1372,
+                1780,
+                1554,
+                1024,
+                702,
+                1814,
+                1754,
+                1315,
+                1697,
+                1719,
+                1682,
+                307,
+                621,
+                901,
+                355,
+                783,
+                1726,
+                353,
+                1416,
+                729,
+                803,
+                1494,
+                353,
+                876,
+                1818,
+                932,
+                1068,
+                1813,
+                875,
+                1774,
+                766,
+                1453,
+                1466,
+                792,
+                1388,
+                1495,
+                1236,
+                1462,
+                431,
+                1025,
+                1429,
+                1128,
+                1236,
+                1483,
+                1305,
+                1352,
+                1681,
+                5,
+                1758,
+                1481,
+                1339
+            ]
+        },
+        {
+            "word": "my",
+            "duration": 0.18,
+            "codes": [
+                1333,
+                1339,
+                1388,
+                1373,
+                974,
+                723,
+                1776,
+                1001,
+                1160,
+                1769,
+                1048,
+                1646,
+                1321,
+                912
+            ]
+        },
+        {
+            "word": "name",
+            "duration": 0.2,
+            "codes": [
+                1596,
+                325,
+                876,
+                1303,
+                973,
+                1707,
+                1332,
+                1300,
+                145,
+                1136,
+                1266,
+                1353,
+                845,
+                913,
+                989
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.12,
+            "codes": [
+                1257,
+                1372,
+                1617,
+                1800,
+                1568,
+                1679,
+                1798,
+                1476,
+                1759
+            ]
+        },
+        {
+            "word": "saheed",
+            "duration": 0.5,
+            "codes": [
+                1807,
+                1354,
+                1737,
+                1738,
+                1060,
+                1122,
+                1195,
+                1275,
+                1129,
+                1473,
+                688,
+                1675,
+                1724,
+                1392,
+                1146,
+                1605,
+                1784,
+                1476,
+                1454,
+                1743,
+                1824,
+                706,
+                1706,
+                669,
+                91,
+                1079,
+                1456,
+                1645,
+                1041,
+                1687,
+                1425,
+                1205,
+                830,
+                1525,
+                1007,
+                1291,
+                723
+            ]
+        },
+        {
+            "word": "azeez",
+            "duration": 0.48,
+            "codes": [
+                829,
+                926,
+                1438,
+                1124,
+                1282,
+                1745,
+                1019,
+                1430,
+                1657,
+                1715,
+                1637,
+                1653,
+                1713,
+                1370,
+                1534,
+                1410,
+                1767,
+                814,
+                22,
+                1703,
+                1534,
+                1797,
+                1488,
+                1812,
+                1637,
+                1791,
+                1720,
+                1677,
+                1807,
+                1459,
+                1779,
+                1767,
+                1145,
+                1239,
+                1622,
+                1264
+            ]
+        },
+        {
+            "word": "and",
+            "duration": 0.24,
+            "codes": [
+                1780,
+                1291,
+                1174,
+                1435,
+                1494,
+                1807,
+                662,
+                1760,
+                1694,
+                363,
+                1225,
+                1775,
+                1264,
+                1455,
+                1014,
+                1758,
+                1620,
+                1013
+            ]
+        },
+        {
+            "word": "i",
+            "duration": 0.06,
+            "codes": [
+                1823,
+                1295,
+                1397,
+                1108,
+                1275
+            ]
+        },
+        {
+            "word": "am",
+            "duration": 0.14,
+            "codes": [
+                1129,
+                1697,
+                835,
+                1589,
+                1719,
+                1534,
+                1495,
+                1025,
+                1405,
+                766
+            ]
+        },
+        {
+            "word": "testing",
+            "duration": 0.42,
+            "codes": [
+                196,
+                1118,
+                761,
+                1314,
+                1770,
+                1138,
+                1429,
+                728,
+                1497,
+                1792,
+                1049,
+                1430,
+                1062,
+                1788,
+                1354,
+                1555,
+                1735,
+                1728,
+                954,
+                1754,
+                343,
+                1418,
+                636,
+                1501,
+                1301,
+                901,
+                763,
+                1620,
+                1687,
+                177,
+                1706,
+                325
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 0.14,
+            "codes": [
+                810,
+                1421,
+                1404,
+                1093,
+                781,
+                752,
+                1780,
+                1749,
+                850,
+                1435
+            ]
+        },
+        {
+            "word": "audio",
+            "duration": 0.3,
+            "codes": [
+                1792,
+                1381,
+                1309,
+                1472,
+                1449,
+                1785,
+                114,
+                601,
+                866,
+                1764,
+                1212,
+                1453,
+                1152,
+                1777,
+                853,
+                1735,
+                1052,
+                355,
+                1421,
+                1605,
+                1761,
+                1664,
+                540
+            ]
+        },
+        {
+            "word": "feature",
+            "duration": 0.4,
+            "codes": [
+                1682,
+                1442,
+                1819,
+                1818,
+                710,
+                1776,
+                1205,
+                646,
+                1688,
+                1572,
+                875,
+                1367,
+                476,
+                1285,
+                460,
+                342,
+                1784,
+                28,
+                1621,
+                1745,
+                1462,
+                988,
+                1780,
+                1697,
+                1249,
+                1348,
+                1120,
+                1590,
+                803,
+                1205
+            ]
+        }
+    ]
+}

default_speakers/baraka.json ADDED Viewed

	@@ -0,0 +1,469 @@

+{
+    "text": "that i'd like to share with everybody in the world yes sometimes you go all the way",
+    "words": [
+        {
+            "word": "that",
+            "duration": 0.48,
+            "codes": [
+                519,
+                848,
+                1374,
+                416,
+                940,
+                1445,
+                416,
+                753,
+                1616,
+                774,
+                803,
+                1697,
+                1541,
+                1047,
+                200,
+                462,
+                1417,
+                1313,
+                1296,
+                184,
+                1396,
+                1568,
+                1416,
+                1444,
+                1631,
+                1463,
+                702,
+                1831,
+                1564,
+                1374,
+                1580,
+                1643,
+                1681,
+                1660,
+                1124,
+                1720
+            ]
+        },
+        {
+            "word": "id",
+            "duration": 0.38,
+            "codes": [
+                4,
+                705,
+                1534,
+                1290,
+                1661,
+                302,
+                1798,
+                844,
+                197,
+                1027,
+                1606,
+                903,
+                1414,
+                794,
+                871,
+                882,
+                941,
+                1310,
+                871,
+                1247,
+                1140,
+                1247,
+                718,
+                1422,
+                1509,
+                1678,
+                1093,
+                1734
+            ]
+        },
+        {
+            "word": "like",
+            "duration": 0.18,
+            "codes": [
+                647,
+                1824,
+                474,
+                1111,
+                599,
+                221,
+                1435,
+                822,
+                1409,
+                1717,
+                1748,
+                1550,
+                1738,
+                1717
+            ]
+        },
+        {
+            "word": "to",
+            "duration": 0.14,
+            "codes": [
+                1535,
+                231,
+                1794,
+                1553,
+                1351,
+                1365,
+                1296,
+                1781,
+                1599,
+                1082
+            ]
+        },
+        {
+            "word": "share",
+            "duration": 0.18,
+            "codes": [
+                1737,
+                0,
+                979,
+                1688,
+                546,
+                1807,
+                319,
+                252,
+                1805,
+                714,
+                580,
+                1524,
+                798,
+                1779
+            ]
+        },
+        {
+            "word": "with",
+            "duration": 0.14,
+            "codes": [
+                1698,
+                702,
+                966,
+                1461,
+                127,
+                1681,
+                85,
+                1741,
+                1588,
+                718
+            ]
+        },
+        {
+            "word": "everybody",
+            "duration": 0.4,
+            "codes": [
+                1600,
+                806,
+                1770,
+                1078,
+                1727,
+                679,
+                1569,
+                1452,
+                1685,
+                774,
+                1598,
+                1382,
+                1520,
+                1786,
+                1702,
+                1607,
+                1747,
+                828,
+                1553,
+                983,
+                1103,
+                882,
+                1427,
+                1679,
+                1613,
+                1636,
+                1433,
+                519,
+                853,
+                1451
+            ]
+        },
+        {
+            "word": "in",
+            "duration": 0.06,
+            "codes": [
+                1369,
+                1654,
+                1581,
+                1600,
+                1452
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 0.12,
+            "codes": [
+                1241,
+                1769,
+                678,
+                1751,
+                1280,
+                1711,
+                1663,
+                1772,
+                1655
+            ]
+        },
+        {
+            "word": "world",
+            "duration": 0.74,
+            "codes": [
+                973,
+                1231,
+                1015,
+                1052,
+                1415,
+                721,
+                1822,
+                825,
+                1076,
+                1431,
+                1357,
+                1389,
+                744,
+                1263,
+                1525,
+                1794,
+                319,
+                1678,
+                1732,
+                1395,
+                1695,
+                1827,
+                1059,
+                1719,
+                1675,
+                1714,
+                1635,
+                1466,
+                1730,
+                1750,
+                1395,
+                1525,
+                1827,
+                1313,
+                1440,
+                1447,
+                1292,
+                1762,
+                1226,
+                1418,
+                1750,
+                719,
+                1549,
+                1761,
+                1459,
+                1717,
+                1800,
+                1404,
+                1702,
+                1795,
+                1711,
+                1789,
+                1808,
+                1759,
+                385,
+                415
+            ]
+        },
+        {
+            "word": "yes",
+            "duration": 0.32,
+            "codes": [
+                302,
+                1704,
+                485,
+                983,
+                234,
+                63,
+                462,
+                483,
+                82,
+                827,
+                999,
+                1143,
+                102,
+                1655,
+                117,
+                1619,
+                519,
+                1217,
+                1518,
+                1476,
+                333,
+                1660,
+                1238,
+                1679
+            ]
+        },
+        {
+            "word": "sometimes",
+            "duration": 0.58,
+            "codes": [
+                1287,
+                546,
+                1552,
+                1736,
+                1647,
+                836,
+                575,
+                354,
+                1156,
+                1264,
+                1194,
+                1761,
+                1629,
+                1452,
+                1241,
+                1394,
+                856,
+                1313,
+                1653,
+                736,
+                556,
+                1387,
+                1824,
+                966,
+                373,
+                1424,
+                1342,
+                221,
+                580,
+                1412,
+                940,
+                626,
+                1797,
+                858,
+                972,
+                1525,
+                1744,
+                738,
+                1695,
+                1542,
+                1604,
+                1394,
+                1627
+            ]
+        },
+        {
+            "word": "you",
+            "duration": 0.12,
+            "codes": [
+                1460,
+                546,
+                1427,
+                1451,
+                1081,
+                1760,
+                1463,
+                1628,
+                1692
+            ]
+        },
+        {
+            "word": "go",
+            "duration": 0.26,
+            "codes": [
+                1521,
+                1734,
+                753,
+                770,
+                1640,
+                1757,
+                297,
+                462,
+                702,
+                1826,
+                1440,
+                1828,
+                1747,
+                1651,
+                1729,
+                1087,
+                580,
+                1698,
+                1194,
+                1308
+            ]
+        },
+        {
+            "word": "all",
+            "duration": 0.42,
+            "codes": [
+                863,
+                610,
+                429,
+                443,
+                1087,
+                183,
+                782,
+                613,
+                222,
+                1047,
+                1492,
+                154,
+                955,
+                429,
+                443,
+                613,
+                983,
+                328,
+                382,
+                359,
+                341,
+                217,
+                456,
+                289,
+                1324,
+                714,
+                756,
+                369,
+                211,
+                127,
+                1827,
+                1563
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 0.12,
+            "codes": [
+                1686,
+                949,
+                1296,
+                829,
+                1463,
+                1731,
+                1222,
+                1353,
+                1780
+            ]
+        },
+        {
+            "word": "way",
+            "duration": 0.18,
+            "codes": [
+                1263,
+                890,
+                683,
+                289,
+                217,
+                326,
+                335,
+                1059,
+                1204,
+                213,
+                1340,
+                289,
+                191
+            ]
+        }
+    ]
+}

default_speakers/chinenye.json ADDED Viewed

	@@ -0,0 +1,274 @@

+{
+    "text": "and once I got that out of the way",
+    "words": [
+        {
+            "word": "and",
+            "duration": 1.18,
+            "codes": [
+                1073,
+                1804,
+                1510,
+                1562,
+                377,
+                1287,
+                1615,
+                175,
+                631,
+                1702,
+                1700,
+                1590,
+                1158,
+                1676,
+                758,
+                1727,
+                1548,
+                1464,
+                1605,
+                1469,
+                1291,
+                1755,
+                1656,
+                1323,
+                1372,
+                269,
+                1252,
+                1466,
+                1677,
+                1192,
+                1220,
+                1815,
+                1658,
+                1818,
+                1514,
+                1480,
+                1747,
+                1413,
+                1440,
+                1403,
+                28,
+                1806,
+                1536,
+                1269,
+                1673,
+                1616,
+                1619,
+                1745,
+                1532,
+                1659,
+                1682,
+                1777,
+                1764,
+                1766,
+                1796,
+                1827,
+                719,
+                1768,
+                1761,
+                1524,
+                1782,
+                1410,
+                1748,
+                1764,
+                1447,
+                1791,
+                1790,
+                1528,
+                1550,
+                1491,
+                1764,
+                1324,
+                790,
+                1307,
+                664,
+                719,
+                1224,
+                1571,
+                1740,
+                1062,
+                1775,
+                1494,
+                486,
+                1544,
+                1828,
+                961,
+                1115,
+                1308
+            ]
+        },
+        {
+            "word": "once",
+            "duration": 0.46,
+            "codes": [
+                996,
+                1407,
+                892,
+                1326,
+                1223,
+                362,
+                36,
+                1103,
+                1734,
+                1755,
+                1798,
+                749,
+                1603,
+                1748,
+                519,
+                1643,
+                1744,
+                176,
+                1709,
+                749,
+                1615,
+                1801,
+                1438,
+                1719,
+                1491,
+                1802,
+                1575,
+                1750,
+                1180,
+                1077,
+                855,
+                1511,
+                961,
+                1739,
+                632
+            ]
+        },
+        {
+            "word": "i",
+            "duration": 0.16,
+            "codes": [
+                398,
+                1055,
+                767,
+                57,
+                1777,
+                1706,
+                34,
+                1025,
+                1745,
+                1796,
+                1266,
+                1348
+            ]
+        },
+        {
+            "word": "got",
+            "duration": 0.24,
+            "codes": [
+                1555,
+                639,
+                1708,
+                813,
+                1152,
+                753,
+                718,
+                1742,
+                756,
+                1109,
+                1796,
+                85,
+                1623,
+                1769,
+                1759,
+                1491,
+                1769,
+                1693
+            ]
+        },
+        {
+            "word": "that",
+            "duration": 0.28,
+            "codes": [
+                1555,
+                1732,
+                1301,
+                755,
+                1224,
+                1192,
+                1241,
+                1192,
+                1102,
+                944,
+                1358,
+                855,
+                1342,
+                1603,
+                1693,
+                1783,
+                1689,
+                1803,
+                1126,
+                1089,
+                839
+            ]
+        },
+        {
+            "word": "out",
+            "duration": 0.16,
+            "codes": [
+                887,
+                1726,
+                1411,
+                1758,
+                839,
+                9,
+                1686,
+                1642,
+                1695,
+                998,
+                828,
+                1755
+            ]
+        },
+        {
+            "word": "of",
+            "duration": 0.08,
+            "codes": [
+                1825,
+                1734,
+                1281,
+                1794,
+                1518,
+                1696
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 0.14,
+            "codes": [
+                1565,
+                1608,
+                1541,
+                1258,
+                1798,
+                1499,
+                1685,
+                1554,
+                1776,
+                1602,
+                1381
+            ]
+        },
+        {
+            "word": "way",
+            "duration": 0.16,
+            "codes": [
+                1822,
+                1773,
+                1663,
+                1710,
+                1554,
+                1493,
+                4,
+                1620,
+                1755,
+                416,
+                1384,
+                1688
+            ]
+        }
+    ]
+}

default_speakers/emma.json ADDED Viewed

	@@ -0,0 +1,441 @@

+{
+    "text": "Scientists have discovered a new planet that may be capable of supporting life!",
+    "words": [
+        {
+            "word": "scientists",
+            "duration": 0.82,
+            "codes": [
+                1334,
+                1359,
+                619,
+                1057,
+                1528,
+                817,
+                1175,
+                884,
+                527,
+                1519,
+                323,
+                980,
+                608,
+                1104,
+                1271,
+                1265,
+                1237,
+                191,
+                1308,
+                203,
+                1126,
+                1226,
+                1265,
+                1073,
+                1661,
+                903,
+                502,
+                197,
+                127,
+                1712,
+                877,
+                1717,
+                1735,
+                1076,
+                1284,
+                1629,
+                784,
+                62,
+                175,
+                432,
+                767,
+                533,
+                990,
+                1258,
+                823,
+                1651,
+                1801,
+                701,
+                1382,
+                554,
+                527,
+                117,
+                323,
+                989,
+                884,
+                817,
+                495,
+                781,
+                1214,
+                1099,
+                1104
+            ]
+        },
+        {
+            "word": "have",
+            "duration": 0.24,
+            "codes": [
+                930,
+                1393,
+                1303,
+                1001,
+                1438,
+                628,
+                1774,
+                973,
+                1758,
+                1501,
+                1761,
+                1428,
+                1725,
+                669,
+                1780,
+                487,
+                866,
+                1762
+            ]
+        },
+        {
+            "word": "discovered",
+            "duration": 0.66,
+            "codes": [
+                820,
+                1592,
+                1737,
+                731,
+                1325,
+                1644,
+                884,
+                1300,
+                323,
+                596,
+                231,
+                296,
+                943,
+                990,
+                1214,
+                1039,
+                1039,
+                1430,
+                866,
+                19,
+                1675,
+                1824,
+                1030,
+                1630,
+                1758,
+                783,
+                1598,
+                1832,
+                1330,
+                1319,
+                1730,
+                1449,
+                1414,
+                1511,
+                695,
+                1526,
+                1410,
+                95,
+                1686,
+                1400,
+                961,
+                1809,
+                1303,
+                355,
+                544,
+                1671,
+                1493,
+                1290,
+                1732,
+                1808
+            ]
+        },
+        {
+            "word": "a",
+            "duration": 0.14,
+            "codes": [
+                968,
+                1281,
+                895,
+                1827,
+                1819,
+                694,
+                1509,
+                1346,
+                928,
+                1449,
+                1512
+            ]
+        },
+        {
+            "word": "new",
+            "duration": 0.24,
+            "codes": [
+                1433,
+                1689,
+                1685,
+                1598,
+                1547,
+                1369,
+                1228,
+                1708,
+                1285,
+                1722,
+                1257,
+                625,
+                1114,
+                1425,
+                465,
+                950,
+                651,
+                561
+            ]
+        },
+        {
+            "word": "planet",
+            "duration": 0.48,
+            "codes": [
+                1707,
+                821,
+                1225,
+                1228,
+                1168,
+                1291,
+                1739,
+                813,
+                1738,
+                966,
+                1829,
+                1229,
+                1751,
+                1280,
+                1120,
+                1537,
+                1145,
+                1257,
+                1145,
+                1490,
+                1565,
+                41,
+                1677,
+                1796,
+                1258,
+                1228,
+                1389,
+                1145,
+                1433,
+                763,
+                1255,
+                355,
+                509,
+                869,
+                1144,
+                501
+            ]
+        },
+        {
+            "word": "that",
+            "duration": 0.26,
+            "codes": [
+                1571,
+                1404,
+                1484,
+                1716,
+                1136,
+                1720,
+                1237,
+                1420,
+                1680,
+                892,
+                1458,
+                1697,
+                669,
+                1658,
+                859,
+                1128,
+                804,
+                1157,
+                1694
+            ]
+        },
+        {
+            "word": "may",
+            "duration": 0.18,
+            "codes": [
+                1339,
+                761,
+                820,
+                1150,
+                823,
+                1706,
+                1815,
+                1354,
+                1417,
+                820,
+                744,
+                1413,
+                995,
+                733
+            ]
+        },
+        {
+            "word": "be",
+            "duration": 0.18,
+            "codes": [
+                20,
+                1763,
+                1417,
+                821,
+                1384,
+                1784,
+                968,
+                1767,
+                501,
+                795,
+                378,
+                242,
+                447
+            ]
+        },
+        {
+            "word": "capable",
+            "duration": 0.56,
+            "codes": [
+                666,
+                1170,
+                1637,
+                1746,
+                1042,
+                1331,
+                695,
+                1739,
+                1136,
+                1471,
+                1823,
+                1185,
+                1231,
+                459,
+                1071,
+                168,
+                418,
+                513,
+                431,
+                669,
+                840,
+                938,
+                1463,
+                1640,
+                1741,
+                86,
+                1273,
+                724,
+                1006,
+                544,
+                1408,
+                1352,
+                1721,
+                1490,
+                1321,
+                1674,
+                792,
+                1765,
+                1093,
+                1731,
+                1506,
+                1742,
+                1465
+            ]
+        },
+        {
+            "word": "of",
+            "duration": 0.16,
+            "codes": [
+                1697,
+                1435,
+                42,
+                1593,
+                1573,
+                1146,
+                1600,
+                980,
+                878,
+                713,
+                796,
+                1364
+            ]
+        },
+        {
+            "word": "supporting",
+            "duration": 0.62,
+            "codes": [
+                541,
+                833,
+                1546,
+                1230,
+                1232,
+                1417,
+                1473,
+                1486,
+                1759,
+                1327,
+                1806,
+                544,
+                918,
+                526,
+                418,
+                950,
+                669,
+                1749,
+                1499,
+                959,
+                1806,
+                203,
+                1771,
+                1651,
+                1433,
+                686,
+                967,
+                484,
+                649,
+                884,
+                176,
+                323,
+                1349,
+                722,
+                1230,
+                1218,
+                1430,
+                1663,
+                1648,
+                1808,
+                1629,
+                1822,
+                1813,
+                1663,
+                1418,
+                1742
+            ]
+        },
+        {
+            "word": "life",
+            "duration": 0.22,
+            "codes": [
+                1622,
+                1648,
+                1141,
+                1682,
+                1353,
+                1351,
+                1822,
+                1229,
+                1621,
+                1435,
+                1766,
+                1428,
+                1727,
+                1343,
+                1769,
+                823,
+                1050
+            ]
+        }
+    ]
+}

default_speakers/idera.json ADDED Viewed

	@@ -0,0 +1,396 @@

+{
+    "text": "Scientists have discovered a new planet that may be capable of supporting life!",
+    "words": [
+        {
+            "word": "scientists",
+            "duration": "1.00",
+            "codes": [
+                258,
+                551,
+                21,
+                401,
+                509,
+                235,
+                151,
+                94,
+                194,
+                496,
+                241,
+                420,
+                606,
+                256,
+                311,
+                464,
+                343,
+                765,
+                56,
+                23,
+                209,
+                72,
+                851,
+                360,
+                442,
+                257,
+                457,
+                75,
+                265,
+                227,
+                16,
+                167,
+                194,
+                391,
+                68,
+                786,
+                1642,
+                888,
+                884,
+                1688,
+                1021,
+                1270,
+                1250,
+                640,
+                1471,
+                1193,
+                1117,
+                95,
+                158,
+                587,
+                1484,
+                1054,
+                947,
+                521,
+                234,
+                502,
+                1172,
+                1379,
+                1332,
+                1267,
+                1659,
+                226,
+                325,
+                404,
+                634,
+                713,
+                333,
+                1210,
+                1028,
+                700,
+                1804,
+                1549,
+                1552,
+                1527,
+                701,
+                895
+            ]
+        },
+        {
+            "word": "have",
+            "duration": "0.16",
+            "codes": [
+                652,
+                1487,
+                1045,
+                665,
+                384,
+                908,
+                1073,
+                903,
+                169,
+                91,
+                1242,
+                59,
+                1614
+            ]
+        },
+        {
+            "word": "discovered",
+            "duration": "0.52",
+            "codes": [
+                1523,
+                519,
+                1311,
+                1166,
+                1049,
+                368,
+                176,
+                1546,
+                990,
+                546,
+                1091,
+                872,
+                975,
+                224,
+                419,
+                1714,
+                1247,
+                1769,
+                1141,
+                811,
+                1149,
+                320,
+                1161,
+                982,
+                732,
+                473,
+                1025,
+                470,
+                1253,
+                1345,
+                965,
+                916,
+                407,
+                844,
+                594,
+                1710,
+                193,
+                740,
+                761,
+                1740
+            ]
+        },
+        {
+            "word": "a",
+            "duration": "0.08",
+            "codes": [
+                5,
+                414,
+                1608,
+                449,
+                1643,
+                1732,
+                1653
+            ]
+        },
+        {
+            "word": "new",
+            "duration": "0.18",
+            "codes": [
+                396,
+                1599,
+                1733,
+                250,
+                1624,
+                485,
+                1645,
+                771,
+                1630,
+                736,
+                336,
+                476,
+                641,
+                345
+            ]
+        },
+        {
+            "word": "planet",
+            "duration": "0.38",
+            "codes": [
+                21,
+                131,
+                1743,
+                1082,
+                1707,
+                86,
+                1075,
+                883,
+                944,
+                1103,
+                790,
+                978,
+                860,
+                1738,
+                1060,
+                749,
+                171,
+                679,
+                1144,
+                966,
+                1532,
+                1179,
+                714,
+                1123,
+                1308,
+                1524,
+                752,
+                1613,
+                1266
+            ]
+        },
+        {
+            "word": "that",
+            "duration": "0.14",
+            "codes": [
+                64,
+                32,
+                1457,
+                1095,
+                931,
+                1774,
+                1017,
+                1661,
+                1713,
+                355,
+                1708
+            ]
+        },
+        {
+            "word": "may",
+            "duration": "0.12",
+            "codes": [
+                1800,
+                1070,
+                1452,
+                1185,
+                1295,
+                26,
+                638,
+                240,
+                1480,
+                1461
+            ]
+        },
+        {
+            "word": "be",
+            "duration": "0.12",
+            "codes": [
+                859,
+                729,
+                848,
+                1131,
+                1618,
+                928,
+                331,
+                504,
+                487,
+                417
+            ]
+        },
+        {
+            "word": "capable",
+            "duration": "0.42",
+            "codes": [
+                686,
+                1040,
+                28,
+                1456,
+                1056,
+                1133,
+                901,
+                1127,
+                693,
+                1406,
+                20,
+                118,
+                141,
+                572,
+                845,
+                1280,
+                353,
+                1726,
+                338,
+                1413,
+                484,
+                272,
+                1569,
+                144,
+                1581,
+                437,
+                1502,
+                963,
+                1415,
+                655,
+                949,
+                1289
+            ]
+        },
+        {
+            "word": "of",
+            "duration": "0.10",
+            "codes": [
+                1198,
+                1755,
+                1478,
+                1548,
+                802,
+                1513,
+                1290,
+                636
+            ]
+        },
+        {
+            "word": "supporting",
+            "duration": "0.54",
+            "codes": [
+                541,
+                867,
+                750,
+                1505,
+                754,
+                1344,
+                1032,
+                734,
+                505,
+                559,
+                220,
+                288,
+                342,
+                591,
+                1459,
+                1721,
+                490,
+                825,
+                80,
+                1221,
+                1234,
+                639,
+                1052,
+                450,
+                1557,
+                1302,
+                784,
+                1547,
+                823,
+                527,
+                1667,
+                1437,
+                832,
+                1366,
+                674,
+                1607,
+                486,
+                893,
+                1748,
+                792,
+                1757
+            ]
+        },
+        {
+            "word": "life",
+            "duration": "0.28",
+            "codes": [
+                1761,
+                149,
+                1501,
+                1342,
+                1063,
+                1124,
+                117,
+                1225,
+                1115,
+                1155,
+                1815,
+                1035,
+                936,
+                807,
+                930,
+                1514,
+                837,
+                1104,
+                1145,
+                1164,
+                1687,
+                1589
+            ]
+        }
+    ]
+}

default_speakers/joke.json ADDED Viewed

	@@ -0,0 +1,430 @@

+{
+    "text": "i still said you and i was like mister so this is what you are doing with",
+    "words": [
+        {
+            "word": "i",
+            "duration": 0.34,
+            "codes": [
+                1737,
+                1555,
+                1439,
+                1679,
+                1634,
+                1661,
+                1764,
+                1698,
+                1715,
+                862,
+                1516,
+                1427,
+                1350,
+                1136,
+                1472,
+                1113,
+                1686,
+                1596,
+                1005,
+                1365,
+                1180,
+                1473,
+                1296,
+                1337,
+                1579
+            ]
+        },
+        {
+            "word": "still",
+            "duration": 0.26,
+            "codes": [
+                848,
+                1653,
+                1756,
+                1711,
+                1693,
+                1722,
+                1580,
+                1552,
+                502,
+                1416,
+                1463,
+                1341,
+                1449,
+                1542,
+                1700,
+                1786,
+                428,
+                1728,
+                1624,
+                1624
+            ]
+        },
+        {
+            "word": "said",
+            "duration": 0.24,
+            "codes": [
+                1657,
+                1744,
+                1657,
+                1634,
+                1615,
+                1534,
+                996,
+                1296,
+                1542,
+                577,
+                1047,
+                1506,
+                440,
+                1756,
+                1783,
+                1593,
+                906,
+                1810
+            ]
+        },
+        {
+            "word": "you",
+            "duration": 0.62,
+            "codes": [
+                1610,
+                409,
+                1534,
+                1685,
+                1709,
+                1756,
+                363,
+                1441,
+                1789,
+                1594,
+                863,
+                1773,
+                1612,
+                1535,
+                1602,
+                1615,
+                1426,
+                48,
+                1690,
+                1740,
+                1650,
+                1824,
+                1613,
+                1807,
+                1041,
+                1778,
+                719,
+                1002,
+                1759,
+                1403,
+                1766,
+                1826,
+                1002,
+                1769,
+                1661,
+                1278,
+                1759,
+                1351,
+                1638,
+                1740,
+                1395,
+                1722,
+                1765,
+                1751,
+                1461,
+                1492
+            ]
+        },
+        {
+            "word": "and",
+            "duration": 0.14,
+            "codes": [
+                1056,
+                1494,
+                1389,
+                1002,
+                1452,
+                1413,
+                1345,
+                1401,
+                1593,
+                1073,
+                775
+            ]
+        },
+        {
+            "word": "i",
+            "duration": 0.08,
+            "codes": [
+                1812,
+                547,
+                1581,
+                1468,
+                949,
+                1740
+            ]
+        },
+        {
+            "word": "was",
+            "duration": 0.16,
+            "codes": [
+                1662,
+                1542,
+                363,
+                1374,
+                1598,
+                1563,
+                1394,
+                473,
+                863,
+                1587,
+                1685,
+                1729
+            ]
+        },
+        {
+            "word": "like",
+            "duration": 0.28,
+            "codes": [
+                1407,
+                1444,
+                1286,
+                1506,
+                1366,
+                1286,
+                1013,
+                502,
+                631,
+                1449,
+                1374,
+                1711,
+                1413,
+                1660,
+                1679,
+                1783,
+                1772,
+                1723,
+                1549,
+                1674,
+                1388
+            ]
+        },
+        {
+            "word": "mister",
+            "duration": 0.84,
+            "codes": [
+                1591,
+                1765,
+                1653,
+                1549,
+                1449,
+                1341,
+                473,
+                1363,
+                1605,
+                1554,
+                1387,
+                1641,
+                1439,
+                362,
+                1606,
+                319,
+                1691,
+                1582,
+                1617,
+                1756,
+                1286,
+                1409,
+                1221,
+                1372,
+                1584,
+                794,
+                1636,
+                1488,
+                1280,
+                1366,
+                1753,
+                1636,
+                882,
+                1723,
+                1796,
+                1769,
+                1717,
+                1549,
+                1518,
+                1633,
+                175,
+                1678,
+                1679,
+                1549,
+                1732,
+                1710,
+                1662,
+                1744,
+                1641,
+                1696,
+                1565,
+                1769,
+                1789,
+                719,
+                1831,
+                1786,
+                1451,
+                1728,
+                1646,
+                1713,
+                1672,
+                1774,
+                1734
+            ]
+        },
+        {
+            "word": "so",
+            "duration": 0.14,
+            "codes": [
+                1354,
+                1518,
+                1791,
+                1374,
+                277,
+                1542,
+                1366,
+                700,
+                1444,
+                1744,
+                1217
+            ]
+        },
+        {
+            "word": "this",
+            "duration": 0.2,
+            "codes": [
+                1461,
+                1588,
+                1672,
+                1712,
+                1679,
+                175,
+                63,
+                426,
+                293,
+                1654,
+                57,
+                1616,
+                1394,
+                1789,
+                175
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.06,
+            "codes": [
+                1394,
+                1605,
+                1596,
+                1800,
+                269
+            ]
+        },
+        {
+            "word": "what",
+            "duration": 0.16,
+            "codes": [
+                1706,
+                759,
+                1047,
+                1493,
+                637,
+                1723,
+                1772,
+                1748,
+                1634,
+                4,
+                1387,
+                1710
+            ]
+        },
+        {
+            "word": "you",
+            "duration": 0.1,
+            "codes": [
+                890,
+                1374,
+                1019,
+                848,
+                1415,
+                1341,
+                1073
+            ]
+        },
+        {
+            "word": "are",
+            "duration": 0.1,
+            "codes": [
+                1286,
+                127,
+                949,
+                870,
+                1734,
+                1593,
+                1761,
+                1717
+            ]
+        },
+        {
+            "word": "doing",
+            "duration": 0.22,
+            "codes": [
+                1643,
+                1485,
+                1708,
+                1394,
+                1469,
+                348,
+                1676,
+                1685,
+                428,
+                1584,
+                1695,
+                1596,
+                1613,
+                1286,
+                1787,
+                1374
+            ]
+        },
+        {
+            "word": "with",
+            "duration": 0.36,
+            "codes": [
+                1382,
+                615,
+                1127,
+                1742,
+                1591,
+                239,
+                1810,
+                1778,
+                719,
+                1616,
+                1549,
+                519,
+                1804,
+                1416,
+                1636,
+                1584,
+                1437,
+                1698,
+                1625,
+                1494,
+                1633,
+                1545,
+                1747,
+                1737,
+                1672,
+                1646,
+                1778
+            ]
+        }
+    ]
+}

default_speakers/jude.json ADDED Viewed

	@@ -0,0 +1,263 @@

+{
+    "text": "know what I'm saying what I'm saying is that if you say",
+    "words": [
+        {
+            "word": "know",
+            "duration": 0.44,
+            "codes": [
+                1824,
+                1820,
+                1743,
+                1819,
+                1171,
+                1796,
+                1613,
+                1126,
+                1500,
+                1346,
+                1429,
+                1810,
+                1655,
+                1462,
+                1780,
+                1812,
+                1518,
+                1431,
+                741,
+                1206,
+                1325,
+                1392,
+                920,
+                409,
+                4,
+                1270,
+                416,
+                1759,
+                1141,
+                708,
+                1022,
+                1769,
+                1384
+            ]
+        },
+        {
+            "word": "what",
+            "duration": 0.12,
+            "codes": [
+                607,
+                787,
+                48,
+                1350,
+                1340,
+                297,
+                364,
+                825,
+                1775
+            ]
+        },
+        {
+            "word": "im",
+            "duration": 0.1,
+            "codes": [
+                1668,
+                1311,
+                1651,
+                1048,
+                176,
+                430,
+                333
+            ]
+        },
+        {
+            "word": "saying",
+            "duration": 0.56,
+            "codes": [
+                822,
+                648,
+                1568,
+                1660,
+                1071,
+                1399,
+                890,
+                1396,
+                1381,
+                1818,
+                124,
+                1623,
+                361,
+                1588,
+                1688,
+                1280,
+                1805,
+                1659,
+                1605,
+                1412,
+                1672,
+                1752,
+                1741,
+                1514,
+                1817,
+                1796,
+                1763,
+                1790,
+                1595,
+                1788,
+                1823,
+                758,
+                1466,
+                1802,
+                1788,
+                1649,
+                1614,
+                1751,
+                1718,
+                1585,
+                1637,
+                1773
+            ]
+        },
+        {
+            "word": "what",
+            "duration": 0.12,
+            "codes": [
+                1666,
+                1680,
+                1431,
+                411,
+                1687,
+                695,
+                1629,
+                1678,
+                664,
+                1087
+            ]
+        },
+        {
+            "word": "im",
+            "duration": 0.16,
+            "codes": [
+                117,
+                408,
+                1813,
+                1729,
+                1336,
+                1710,
+                1833,
+                1615,
+                276,
+                362,
+                1364,
+                687
+            ]
+        },
+        {
+            "word": "saying",
+            "duration": 0.26,
+            "codes": [
+                28,
+                440,
+                1376,
+                1196,
+                1147,
+                1636,
+                1272,
+                1449,
+                198,
+                1277,
+                1470,
+                1485,
+                1100,
+                1588,
+                1673,
+                1620,
+                1710,
+                1753,
+                806
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.06,
+            "codes": [
+                1621,
+                1636,
+                1833,
+                529,
+                1653
+            ]
+        },
+        {
+            "word": "that",
+            "duration": 0.24,
+            "codes": [
+                1773,
+                1004,
+                1796,
+                907,
+                239,
+                1804,
+                565,
+                1432,
+                1534,
+                1718,
+                1643,
+                1432,
+                1447,
+                1273,
+                1824,
+                1657,
+                1776,
+                1651
+            ]
+        },
+        {
+            "word": "if",
+            "duration": 0.12,
+            "codes": [
+                1649,
+                1620,
+                1342,
+                176,
+                1773,
+                178,
+                1710,
+                1710,
+                1521
+            ]
+        },
+        {
+            "word": "you",
+            "duration": 0.16,
+            "codes": [
+                959,
+                1728,
+                1651,
+                361,
+                822,
+                1661,
+                1341,
+                780,
+                1518,
+                335,
+                452,
+                736
+            ]
+        },
+        {
+            "word": "say",
+            "duration": 0.14,
+            "codes": [
+                372,
+                1217,
+                713,
+                848,
+                1140,
+                1420,
+                1549,
+                483,
+                125,
+                1353
+            ]
+        }
+    ]
+}

default_speakers/onye.json ADDED Viewed

	@@ -0,0 +1,621 @@

+{
+    "text": "out to another level also going through in the shop chop scotch bonnet peppers",
+    "words": [
+        {
+            "word": "out",
+            "duration": 0.34,
+            "codes": [
+                546,
+                416,
+                1519,
+                1673,
+                1806,
+                1015,
+                693,
+                1447,
+                9,
+                1306,
+                1485,
+                1477,
+                1178,
+                1543,
+                1830,
+                1558,
+                1801,
+                1423,
+                1487,
+                1165,
+                1743,
+                1726,
+                1772,
+                368,
+                1555
+            ]
+        },
+        {
+            "word": "to",
+            "duration": 0.28,
+            "codes": [
+                1823,
+                1713,
+                1734,
+                368,
+                1547,
+                1741,
+                1737,
+                1784,
+                1801,
+                1732,
+                1389,
+                994,
+                1158,
+                1278,
+                1800,
+                1658,
+                519,
+                1542,
+                1792,
+                1700,
+                1415
+            ]
+        },
+        {
+            "word": "another",
+            "duration": 0.4,
+            "codes": [
+                1541,
+                1824,
+                1624,
+                1757,
+                1294,
+                1734,
+                1756,
+                1821,
+                1147,
+                1663,
+                1697,
+                1156,
+                1069,
+                53,
+                1223,
+                1212,
+                1736,
+                1748,
+                1744,
+                758,
+                1494,
+                374,
+                1187,
+                1448,
+                1410,
+                1356,
+                1732,
+                1452,
+                1295,
+                1656
+            ]
+        },
+        {
+            "word": "level",
+            "duration": 1.86,
+            "codes": [
+                1688,
+                1527,
+                1417,
+                1486,
+                384,
+                1378,
+                1342,
+                1075,
+                1046,
+                1247,
+                1660,
+                1525,
+                719,
+                1769,
+                1628,
+                1810,
+                1078,
+                1429,
+                1483,
+                1280,
+                1814,
+                1115,
+                184,
+                1014,
+                1686,
+                1341,
+                1347,
+                1502,
+                1350,
+                1666,
+                1686,
+                1823,
+                1749,
+                1412,
+                1651,
+                1832,
+                1701,
+                1782,
+                1741,
+                1798,
+                1828,
+                1701,
+                1796,
+                1807,
+                1701,
+                1768,
+                1817,
+                1524,
+                1786,
+                1400,
+                1717,
+                1722,
+                1773,
+                1202,
+                1098,
+                1161,
+                1750,
+                822,
+                1420,
+                1434,
+                979,
+                1764,
+                1313,
+                1734,
+                1458,
+                1660,
+                1200,
+                370,
+                1636,
+                1186,
+                768,
+                855,
+                599,
+                1632,
+                1164,
+                1041,
+                1791,
+                1714,
+                368,
+                1715,
+                1500,
+                1817,
+                1817,
+                1772,
+                1805,
+                1825,
+                1818,
+                1828,
+                1395,
+                1718,
+                1818,
+                0,
+                1696,
+                1808,
+                1637,
+                1796,
+                1701,
+                1796,
+                1824,
+                1646,
+                1702,
+                1714,
+                895,
+                1764,
+                1637,
+                1717,
+                1747,
+                1751,
+                1696,
+                639,
+                1436,
+                1828,
+                1818,
+                1737,
+                1832,
+                1646,
+                1796,
+                1822,
+                1741,
+                1791,
+                1701,
+                1796,
+                1779,
+                1638,
+                1783,
+                1751,
+                1781,
+                1768,
+                1412,
+                1744,
+                1720,
+                1403,
+                1802,
+                1638,
+                1734,
+                1802,
+                1826,
+                1785,
+                1443,
+                1167
+            ]
+        },
+        {
+            "word": "also",
+            "duration": 0.26,
+            "codes": [
+                973,
+                1187,
+                1333,
+                359,
+                1494,
+                1222,
+                1759,
+                749,
+                533,
+                4,
+                1599,
+                1608,
+                1280,
+                1167,
+                1015,
+                1526,
+                1662,
+                1728,
+                1016,
+                1796
+            ]
+        },
+        {
+            "word": "going",
+            "duration": 0.26,
+            "codes": [
+                1789,
+                1291,
+                1209,
+                828,
+                1452,
+                1749,
+                1052,
+                1460,
+                1783,
+                1656,
+                1542,
+                1281,
+                1710,
+                1716,
+                1404,
+                1734,
+                495,
+                1624,
+                1747
+            ]
+        },
+        {
+            "word": "through",
+            "duration": 0.34,
+            "codes": [
+                1465,
+                1664,
+                1786,
+                231,
+                1826,
+                1318,
+                1494,
+                1505,
+                1063,
+                1311,
+                1656,
+                1265,
+                1720,
+                1226,
+                940,
+                1490,
+                1447,
+                1730,
+                1348,
+                1637,
+                1118,
+                1710,
+                841,
+                795,
+                298,
+                1216
+            ]
+        },
+        {
+            "word": "in",
+            "duration": 0.42,
+            "codes": [
+                899,
+                1240,
+                869,
+                679,
+                1343,
+                1280,
+                1681,
+                1221,
+                1632,
+                1221,
+                1479,
+                1431,
+                1623,
+                1372,
+                1722,
+                1494,
+                1011,
+                1636,
+                957,
+                1661,
+                939,
+                1772,
+                1096,
+                1688,
+                1537,
+                1360,
+                1734,
+                1595,
+                1781,
+                1284,
+                1413
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 1.08,
+            "codes": [
+                1701,
+                1447,
+                1328,
+                1690,
+                1281,
+                1401,
+                700,
+                1295,
+                1494,
+                1326,
+                1218,
+                361,
+                922,
+                1210,
+                1300,
+                19,
+                1403,
+                1272,
+                1150,
+                1062,
+                1457,
+                1344,
+                1167,
+                1742,
+                996,
+                1158,
+                1245,
+                1210,
+                1720,
+                1823,
+                85,
+                1829,
+                1555,
+                1718,
+                979,
+                1665,
+                1783,
+                1088,
+                1810,
+                1828,
+                1795,
+                1419,
+                1795,
+                1826,
+                1779,
+                1741,
+                1719,
+                1809,
+                1646,
+                1765,
+                1818,
+                1713,
+                1821,
+                1737,
+                1348,
+                1821,
+                1400,
+                1748,
+                1278,
+                1521,
+                758,
+                1701,
+                1798,
+                1817,
+                1646,
+                1672,
+                1825,
+                1796,
+                957,
+                1808,
+                1807,
+                1833,
+                1798,
+                1425,
+                1830,
+                1037,
+                1251,
+                554,
+                1395,
+                175,
+                919
+            ]
+        },
+        {
+            "word": "shop",
+            "duration": 0.3,
+            "codes": [
+                1611,
+                154,
+                1329,
+                1701,
+                1677,
+                1210,
+                880,
+                660,
+                816,
+                1276,
+                1471,
+                41,
+                1779,
+                1465,
+                1298,
+                1817,
+                1777,
+                1073,
+                1713,
+                1808,
+                1818,
+                1348,
+                1711
+            ]
+        },
+        {
+            "word": "chop",
+            "duration": 0.3,
+            "codes": [
+                1439,
+                4,
+                315,
+                1751,
+                1731,
+                53,
+                1184,
+                1132,
+                755,
+                1429,
+                1464,
+                1483,
+                1770,
+                1749,
+                1278,
+                1769,
+                1511,
+                1683,
+                1779,
+                1660,
+                183,
+                1535,
+                416
+            ]
+        },
+        {
+            "word": "scotch",
+            "duration": 0.4,
+            "codes": [
+                1518,
+                1679,
+                0,
+                1695,
+                1682,
+                1098,
+                1764,
+                1256,
+                1808,
+                1609,
+                1745,
+                1318,
+                632,
+                1197,
+                271,
+                1683,
+                1774,
+                1824,
+                1783,
+                1671,
+                1805,
+                22,
+                631,
+                117,
+                1345,
+                800,
+                1707,
+                1466,
+                1005,
+                1462
+            ]
+        },
+        {
+            "word": "bonnet",
+            "duration": 0.34,
+            "codes": [
+                1677,
+                1826,
+                1277,
+                524,
+                1001,
+                789,
+                973,
+                1509,
+                1817,
+                546,
+                1260,
+                1117,
+                782,
+                142,
+                1455,
+                947,
+                1814,
+                1815,
+                0,
+                1538,
+                1766,
+                1744,
+                1824,
+                239,
+                1710
+            ]
+        },
+        {
+            "word": "peppers",
+            "duration": 0.5,
+            "codes": [
+                1817,
+                1287,
+                1769,
+                1309,
+                446,
+                1173,
+                1183,
+                375,
+                1342,
+                1815,
+                1382,
+                1685,
+                1797,
+                1351,
+                1798,
+                1631,
+                749,
+                1717,
+                1324,
+                1147,
+                1186,
+                955,
+                577,
+                1736,
+                827,
+                1240,
+                1484,
+                847,
+                1661,
+                1475,
+                1287,
+                1535,
+                595,
+                1286,
+                1734,
+                1256,
+                319,
+                1688
+            ]
+        }
+    ]
+}

default_speakers/osagie.json ADDED Viewed

	@@ -0,0 +1,486 @@

+{
+    "text": "do Charlotte Douglas shallots be me shut up dummy Libby shallots foolish storms",
+    "words": [
+        {
+            "word": "do",
+            "duration": 1.18,
+            "codes": [
+                1798,
+                858,
+                1653,
+                1400,
+                1441,
+                1810,
+                1180,
+                892,
+                1487,
+                380,
+                208,
+                452,
+                181,
+                714,
+                521,
+                152,
+                1180,
+                2,
+                142,
+                756,
+                208,
+                874,
+                380,
+                565,
+                422,
+                656,
+                81,
+                860,
+                146,
+                1042,
+                1685,
+                1580,
+                50,
+                137,
+                132,
+                170,
+                1633,
+                648,
+                1819,
+                898,
+                1247,
+                1646,
+                1491,
+                438,
+                85,
+                46,
+                170,
+                664,
+                2,
+                236,
+                65,
+                100,
+                393,
+                324,
+                170,
+                1499,
+                1619,
+                519,
+                123,
+                798,
+                79,
+                1447,
+                132,
+                146,
+                779,
+                380,
+                221,
+                1588,
+                228,
+                1443,
+                152,
+                1366,
+                1441,
+                189,
+                320,
+                1387,
+                368,
+                1599,
+                295,
+                65,
+                1353,
+                13,
+                920,
+                1341,
+                55,
+                315,
+                1542,
+                315
+            ]
+        },
+        {
+            "word": "charlotte",
+            "duration": 0.42,
+            "codes": [
+                543,
+                769,
+                69,
+                714,
+                725,
+                212,
+                374,
+                1439,
+                25,
+                1453,
+                637,
+                291,
+                1212,
+                106,
+                1671,
+                146,
+                82,
+                1261,
+                1710,
+                686,
+                1571,
+                213,
+                298,
+                510,
+                452,
+                1396,
+                1635,
+                1760,
+                1469,
+                1793,
+                1233,
+                851
+            ]
+        },
+        {
+            "word": "douglas",
+            "duration": 0.42,
+            "codes": [
+                1539,
+                2,
+                679,
+                51,
+                215,
+                1068,
+                295,
+                115,
+                1150,
+                753,
+                1806,
+                287,
+                85,
+                725,
+                1312,
+                293,
+                614,
+                1610,
+                380,
+                260,
+                1014,
+                104,
+                777,
+                1697,
+                270,
+                580,
+                794,
+                1345,
+                1552,
+                7,
+                178
+            ]
+        },
+        {
+            "word": "shallots",
+            "duration": 0.48,
+            "codes": [
+                315,
+                290,
+                333,
+                1761,
+                412,
+                520,
+                125,
+                367,
+                1001,
+                700,
+                1258,
+                955,
+                388,
+                880,
+                324,
+                637,
+                642,
+                1723,
+                1480,
+                990,
+                507,
+                652,
+                69,
+                1670,
+                1073,
+                1433,
+                830,
+                1737,
+                1769,
+                1829,
+                1524,
+                1605,
+                1737,
+                1660,
+                1782,
+                1687,
+                1802
+            ]
+        },
+        {
+            "word": "be",
+            "duration": 0.16,
+            "codes": [
+                1715,
+                687,
+                1365,
+                49,
+                98,
+                357,
+                1416,
+                245,
+                1058,
+                870,
+                1689,
+                1588
+            ]
+        },
+        {
+            "word": "me",
+            "duration": 0.36,
+            "codes": [
+                1469,
+                1221,
+                1783,
+                127,
+                372,
+                519,
+                98,
+                50,
+                1439,
+                876,
+                362,
+                1439,
+                1506,
+                1452,
+                736,
+                1740,
+                1715,
+                1641,
+                1628,
+                1807,
+                1654,
+                1601,
+                911,
+                788,
+                1451,
+                356,
+                1450
+            ]
+        },
+        {
+            "word": "shut",
+            "duration": 0.34,
+            "codes": [
+                202,
+                543,
+                1527,
+                1345,
+                105,
+                721,
+                128,
+                571,
+                1180,
+                1366,
+                1187,
+                860,
+                1113,
+                1089,
+                270,
+                113,
+                525,
+                992,
+                1588,
+                975,
+                668,
+                780,
+                399,
+                233,
+                510
+            ]
+        },
+        {
+            "word": "up",
+            "duration": 0.1,
+            "codes": [
+                1715,
+                1833,
+                1719,
+                363,
+                1763,
+                1784,
+                1765,
+                85
+            ]
+        },
+        {
+            "word": "dummy",
+            "duration": 0.36,
+            "codes": [
+                101,
+                47,
+                1127,
+                205,
+                164,
+                647,
+                300,
+                737,
+                300,
+                910,
+                549,
+                1598,
+                333,
+                900,
+                1521,
+                1287,
+                917,
+                362,
+                290,
+                1353,
+                917,
+                407,
+                1588,
+                1396,
+                1415,
+                440,
+                1565
+            ]
+        },
+        {
+            "word": "libby",
+            "duration": 0.36,
+            "codes": [
+                935,
+                479,
+                153,
+                127,
+                162,
+                782,
+                932,
+                1023,
+                1262,
+                343,
+                1728,
+                502,
+                1401,
+                996,
+                350,
+                1445,
+                856,
+                298,
+                48,
+                1698,
+                1470,
+                1736,
+                26,
+                1342,
+                328,
+                372,
+                1451
+            ]
+        },
+        {
+            "word": "shallots",
+            "duration": 0.4,
+            "codes": [
+                7,
+                50,
+                519,
+                1221,
+                212,
+                238,
+                1083,
+                844,
+                333,
+                182,
+                472,
+                839,
+                609,
+                656,
+                208,
+                291,
+                1234,
+                1678,
+                1151,
+                867,
+                290,
+                546,
+                848,
+                1700,
+                1740,
+                26,
+                1617,
+                1238,
+                183,
+                1693
+            ]
+        },
+        {
+            "word": "foolish",
+            "duration": 0.38,
+            "codes": [
+                863,
+                176,
+                1546,
+                1470,
+                1435,
+                716,
+                1460,
+                1013,
+                217,
+                1374,
+                736,
+                91,
+                959,
+                767,
+                1678,
+                1541,
+                903,
+                362,
+                1336,
+                1345,
+                546,
+                848,
+                253,
+                335,
+                510,
+                69,
+                546,
+                1166,
+                1677
+            ]
+        },
+        {
+            "word": "storms",
+            "duration": 0.4,
+            "codes": [
+                939,
+                1361,
+                1719,
+                1428,
+                1691,
+                319,
+                1596,
+                236,
+                757,
+                1625,
+                123,
+                1297,
+                55,
+                132,
+                708,
+                92,
+                1344,
+                848,
+                1232,
+                518,
+                695,
+                1726,
+                1502,
+                1759,
+                363,
+                1751,
+                1524,
+                409,
+                189,
+                0
+            ]
+        }
+    ]
+}

default_speakers/regina.json ADDED Viewed

	@@ -0,0 +1,574 @@

+{
+    "text": "was just like is that what is amazing to you your marriage is",
+    "words": [
+        {
+            "word": "was",
+            "duration": 1.02,
+            "codes": [
+                1514,
+                571,
+                892,
+                386,
+                186,
+                1403,
+                1082,
+                636,
+                851,
+                1287,
+                1678,
+                1166,
+                162,
+                1345,
+                282,
+                104,
+                1345,
+                329,
+                637,
+                844,
+                537,
+                1366,
+                537,
+                282,
+                1485,
+                537,
+                637,
+                844,
+                537,
+                1710,
+                375,
+                452,
+                1588,
+                537,
+                1382,
+                714,
+                206,
+                333,
+                330,
+                344,
+                281,
+                1523,
+                44,
+                1557,
+                315,
+                479,
+                271,
+                370,
+                110,
+                498,
+                768,
+                560,
+                579,
+                847,
+                961,
+                293,
+                1351,
+                1141,
+                138,
+                1229,
+                2,
+                847,
+                1245,
+                1345,
+                1829,
+                1811,
+                1326,
+                955,
+                1314,
+                137,
+                270,
+                1743,
+                324,
+                1389,
+                1027,
+                863
+            ]
+        },
+        {
+            "word": "just",
+            "duration": 0.28,
+            "codes": [
+                333,
+                38,
+                1518,
+                1296,
+                146,
+                1077,
+                1204,
+                665,
+                658,
+                1005,
+                944,
+                1136,
+                519,
+                749,
+                1061,
+                69,
+                1363,
+                415,
+                1679,
+                1741,
+                138
+            ]
+        },
+        {
+            "word": "like",
+            "duration": 1.68,
+            "codes": [
+                1796,
+                714,
+                65,
+                13,
+                664,
+                1077,
+                463,
+                232,
+                461,
+                1210,
+                356,
+                346,
+                1196,
+                202,
+                631,
+                1804,
+                1096,
+                450,
+                23,
+                1535,
+                415,
+                582,
+                328,
+                546,
+                1571,
+                344,
+                1512,
+                1242,
+                141,
+                194,
+                220,
+                258,
+                246,
+                220,
+                246,
+                542,
+                258,
+                246,
+                220,
+                151,
+                246,
+                542,
+                342,
+                220,
+                75,
+                246,
+                220,
+                246,
+                542,
+                246,
+                220,
+                542,
+                161,
+                450,
+                419,
+                246,
+                542,
+                246,
+                542,
+                246,
+                220,
+                542,
+                246,
+                246,
+                542,
+                246,
+                542,
+                342,
+                542,
+                342,
+                246,
+                542,
+                342,
+                220,
+                75,
+                246,
+                75,
+                246,
+                542,
+                246,
+                220,
+                75,
+                161,
+                542,
+                342,
+                220,
+                258,
+                246,
+                220,
+                75,
+                342,
+                220,
+                258,
+                194,
+                220,
+                436,
+                246,
+                220,
+                194,
+                194,
+                1442,
+                246,
+                220,
+                246,
+                246,
+                246,
+                151,
+                1551,
+                1522,
+                1362,
+                652,
+                1557,
+                333,
+                273,
+                928,
+                1551,
+                180,
+                1570,
+                652,
+                1664,
+                6,
+                654,
+                281,
+                1578,
+                1557,
+                1346,
+                756
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.06,
+            "codes": [
+                1337,
+                1662,
+                198,
+                33
+            ]
+        },
+        {
+            "word": "that",
+            "duration": 0.12,
+            "codes": [
+                1679,
+                236,
+                934,
+                1056,
+                208,
+                609,
+                860,
+                1318,
+                1340
+            ]
+        },
+        {
+            "word": "what",
+            "duration": 0.14,
+            "codes": [
+                1618,
+                806,
+                1068,
+                113,
+                1686,
+                428,
+                230,
+                409,
+                263,
+                415,
+                175
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.1,
+            "codes": [
+                415,
+                1773,
+                1539,
+                124,
+                1563,
+                700,
+                579
+            ]
+        },
+        {
+            "word": "amazing",
+            "duration": 0.34,
+            "codes": [
+                973,
+                695,
+                1247,
+                1737,
+                1609,
+                1664,
+                1006,
+                134,
+                409,
+                416,
+                774,
+                848,
+                1542,
+                10,
+                1441,
+                1539,
+                129,
+                1698,
+                687,
+                1620,
+                1340,
+                749,
+                469,
+                1695,
+                448,
+                448
+            ]
+        },
+        {
+            "word": "to",
+            "duration": 0.12,
+            "codes": [
+                189,
+                198,
+                124,
+                1753,
+                510,
+                1825,
+                856,
+                1441,
+                1688
+            ]
+        },
+        {
+            "word": "you",
+            "duration": 1.62,
+            "codes": [
+                1552,
+                1546,
+                1698,
+                166,
+                101,
+                1457,
+                137,
+                864,
+                790,
+                794,
+                1615,
+                454,
+                1512,
+                328,
+                634,
+                1578,
+                409,
+                1592,
+                176,
+                1441,
+                1644,
+                356,
+                1641,
+                1580,
+                510,
+                1609,
+                407,
+                882,
+                1580,
+                218,
+                1616,
+                865,
+                409,
+                1570,
+                1376,
+                1734,
+                34,
+                687,
+                1592,
+                556,
+                640,
+                1592,
+                6,
+                1362,
+                4,
+                1546,
+                1302,
+                1376,
+                1570,
+                34,
+                652,
+                180,
+                1569,
+                203,
+                1744,
+                282,
+                945,
+                362,
+                931,
+                1662,
+                631,
+                1580,
+                452,
+                329,
+                725,
+                140,
+                277,
+                1113,
+                537,
+                1332,
+                560,
+                282,
+                1056,
+                270,
+                940,
+                755,
+                860,
+                104,
+                903,
+                537,
+                1310,
+                579,
+                282,
+                848,
+                371,
+                844,
+                1808,
+                400,
+                1772,
+                1166,
+                213,
+                1485,
+                1502,
+                276,
+                1594,
+                1599,
+                1819,
+                1197,
+                441,
+                1318,
+                1237,
+                679,
+                1186,
+                384,
+                609,
+                637,
+                157,
+                609,
+                637,
+                157,
+                790,
+                157,
+                547,
+                452,
+                452,
+                870,
+                162,
+                320,
+                1649,
+                1272,
+                1318,
+                860
+            ]
+        },
+        {
+            "word": "your",
+            "duration": 0.16,
+            "codes": [
+                1477,
+                67,
+                113,
+                1149,
+                479,
+                901,
+                1232,
+                295,
+                9,
+                1129,
+                67,
+                1825
+            ]
+        },
+        {
+            "word": "marriage",
+            "duration": 0.8,
+            "codes": [
+                529,
+                697,
+                695,
+                1429,
+                282,
+                626,
+                1355,
+                192,
+                1671,
+                100,
+                95,
+                1310,
+                388,
+                1155,
+                1494,
+                104,
+                104,
+                587,
+                1156,
+                67,
+                57,
+                1437,
+                697,
+                714,
+                1221,
+                1443,
+                2,
+                1357,
+                931,
+                931,
+                1298,
+                388,
+                1136,
+                1604,
+                428,
+                1240,
+                1698,
+                65,
+                1272,
+                128,
+                755,
+                79,
+                794,
+                1698,
+                1518,
+                1546,
+                1696,
+                448,
+                233,
+                1599,
+                1732,
+                1240,
+                110,
+                775,
+                483,
+                100,
+                1075,
+                346,
+                863,
+                1498
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.1,
+            "codes": [
+                631,
+                18,
+                679,
+                430,
+                176,
+                10,
+                52
+            ]
+        }
+    ]
+}

default_speakers/remi.json ADDED Viewed

	@@ -0,0 +1,382 @@

+{
+    "text": "animal noral human being",
+    "words": [
+        {
+            "word": "animal",
+            "duration": 2.79,
+            "codes": [
+                1679,
+                1711,
+                714,
+                1588,
+                906,
+                725,
+                789,
+                456,
+                79,
+                230,
+                1127,
+                532,
+                200,
+                834,
+                29,
+                753,
+                1420,
+                595,
+                997,
+                557,
+                205,
+                488,
+                775,
+                63,
+                1520,
+                1600,
+                1394,
+                1811,
+                1715,
+                473,
+                805,
+                128,
+                502,
+                1353,
+                1636,
+                1832,
+                182,
+                381,
+                281,
+                1540,
+                748,
+                1341,
+                1744,
+                374,
+                1767,
+                182,
+                621,
+                495,
+                234,
+                909,
+                1383,
+                92,
+                1545,
+                1394,
+                1794,
+                1641,
+                319,
+                1452,
+                1240,
+                217,
+                1815,
+                388,
+                828,
+                1664,
+                184,
+                1239,
+                319,
+                1469,
+                1810,
+                36,
+                1019,
+                1451,
+                774,
+                1819,
+                1521,
+                761,
+                23,
+                1609,
+                273,
+                52,
+                1670,
+                524,
+                813,
+                806,
+                79,
+                1141,
+                1677,
+                138,
+                1409,
+                1468,
+                1633,
+                1573,
+                782,
+                1655,
+                1669,
+                1239,
+                458,
+                1495,
+                258,
+                544,
+                1532,
+                1567,
+                1627,
+                1641,
+                851,
+                1573,
+                1569,
+                265,
+                686,
+                72,
+                151,
+                342,
+                194,
+                75,
+                419,
+                342,
+                542,
+                419,
+                75,
+                342,
+                246,
+                75,
+                342,
+                246,
+                56,
+                161,
+                246,
+                442,
+                161,
+                56,
+                156,
+                420,
+                161,
+                75,
+                219,
+                194,
+                56,
+                156,
+                220,
+                453,
+                156,
+                1019,
+                490,
+                1415,
+                742,
+                1533,
+                412,
+                828,
+                138,
+                1487,
+                128,
+                660,
+                1339,
+                882,
+                154,
+                1533,
+                47,
+                312,
+                730,
+                1087,
+                764,
+                346,
+                1394,
+                179,
+                959,
+                1344,
+                324,
+                1457,
+                388,
+                57,
+                514,
+                1323,
+                631,
+                6,
+                479,
+                815,
+                1599,
+                384,
+                952,
+                1650,
+                57,
+                314,
+                320,
+                787,
+                1488,
+                147,
+                203,
+                1078,
+                192,
+                1663,
+                236,
+                1501,
+                270,
+                1280,
+                716,
+                631,
+                1584,
+                1605,
+                1779,
+                1239,
+                363,
+                1437,
+                430,
+                1554,
+                1069,
+                189,
+                319,
+                856,
+                143
+            ]
+        },
+        {
+            "word": "noral",
+            "duration": 0.56,
+            "codes": [
+                1831,
+                201,
+                1674,
+                1707,
+                1807,
+                487,
+                1577,
+                1394,
+                1341,
+                412,
+                814,
+                205,
+                1633,
+                79,
+                1267,
+                1625,
+                315,
+                1649,
+                4,
+                780,
+                368,
+                592,
+                1633,
+                592,
+                1431,
+                1563,
+                599,
+                176,
+                10,
+                725,
+                1468,
+                76,
+                593,
+                714,
+                146,
+                974,
+                725,
+                549,
+                57,
+                1068,
+                1729,
+                52
+            ]
+        },
+        {
+            "word": "human",
+            "duration": 0.82,
+            "codes": [
+                1552,
+                233,
+                298,
+                949,
+                1636,
+                380,
+                363,
+                1520,
+                1768,
+                85,
+                483,
+                876,
+                125,
+                153,
+                564,
+                200,
+                1221,
+                803,
+                1712,
+                117,
+                804,
+                688,
+                787,
+                1345,
+                592,
+                291,
+                472,
+                158,
+                132,
+                1827,
+                617,
+                157,
+                36,
+                1186,
+                1008,
+                324,
+                961,
+                644,
+                179,
+                931,
+                1400,
+                688,
+                1015,
+                488,
+                532,
+                500,
+                952,
+                945,
+                29,
+                1497,
+                529,
+                749,
+                1733,
+                439,
+                63,
+                1773,
+                1527,
+                1622,
+                728,
+                1613,
+                1274,
+                136
+            ]
+        },
+        {
+            "word": "being",
+            "duration": 0.54,
+            "codes": [
+                546,
+                1287,
+                166,
+                315,
+                1678,
+                882,
+                1753,
+                1018,
+                1449,
+                1581,
+                298,
+                1710,
+                1799,
+                1772,
+                1406,
+                1538,
+                1728,
+                1657,
+                1778,
+                182,
+                921,
+                217,
+                1615,
+                133,
+                217,
+                1516,
+                1830,
+                844,
+                1584,
+                338,
+                1639,
+                644,
+                417,
+                774,
+                1724,
+                648,
+                749,
+                4,
+                315,
+                1497
+            ]
+        }
+    ]
+}

default_speakers/saheed.json ADDED Viewed

	@@ -0,0 +1,564 @@

+{
+    "text": "Hello! My name is Saheed azeez and I am testing the audio feature",
+    "words": [
+        {
+            "word": "hello",
+            "duration": 2.38,
+            "codes": [
+                219,
+                244,
+                244,
+                167,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                453,
+                244,
+                219,
+                139,
+                966,
+                1099,
+                1299,
+                1433,
+                1128,
+                1266,
+                1517,
+                649,
+                196,
+                1731,
+                1405,
+                830,
+                1771,
+                964,
+                476,
+                1803,
+                584,
+                875,
+                1683,
+                986,
+                363,
+                1489,
+                465,
+                5,
+                1067,
+                606,
+                1590,
+                1397,
+                265,
+                1446,
+                1279,
+                799,
+                1491,
+                1367,
+                606,
+                1593,
+                1279,
+                360,
+                256,
+                1705,
+                1425,
+                58,
+                1210,
+                1357,
+                1379,
+                752,
+                1640,
+                837,
+                734,
+                1787,
+                1406,
+                1052,
+                1796,
+                686,
+                1446,
+                1716,
+                564,
+                595,
+                1716,
+                728,
+                847,
+                732,
+                935,
+                1253,
+                752,
+                1019,
+                1455,
+                564,
+                1492,
+                733,
+                1645,
+                1391,
+                728,
+                1501,
+                1822,
+                1339,
+                1677,
+                1456,
+                807,
+                1738,
+                710,
+                1381,
+                1292,
+                406,
+                1517,
+                1458,
+                761,
+                1361,
+                649,
+                17,
+                1367,
+                606,
+                1771,
+                1028,
+                464,
+                1309,
+                691,
+                1023,
+                1314,
+                692,
+                1373,
+                837,
+                442,
+                1683,
+                838,
+                476,
+                1475,
+                950,
+                136,
+                1309,
+                465,
+                17,
+                19,
+                765,
+                1553,
+                1305,
+                534,
+                1309,
+                666,
+                761,
+                1067,
+                442,
+                1704,
+                1128,
+                633,
+                1438,
+                1011,
+                406,
+                1489,
+                136,
+                1813,
+                1589,
+                763,
+                1489,
+                696,
+                643,
+                1305,
+                246,
+                406,
+                1421,
+                37
+            ]
+        },
+        {
+            "word": "my",
+            "duration": 0.2,
+            "codes": [
+                1187,
+                1770,
+                646,
+                1174,
+                1771,
+                1192,
+                800,
+                310,
+                1318,
+                1500,
+                909,
+                1104,
+                1792,
+                1218,
+                1832
+            ]
+        },
+        {
+            "word": "name",
+            "duration": 0.24,
+            "codes": [
+                875,
+                1583,
+                1632,
+                671,
+                1002,
+                905,
+                1073,
+                1294,
+                595,
+                1684,
+                1501,
+                1797,
+                850,
+                1761,
+                1751,
+                935,
+                1443,
+                1781
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.14,
+            "codes": [
+                1780,
+                1215,
+                1674,
+                1815,
+                1451,
+                1673,
+                1303,
+                1660,
+                1613,
+                1379,
+                1756
+            ]
+        },
+        {
+            "word": "saheed",
+            "duration": 0.68,
+            "codes": [
+                1419,
+                1568,
+                1643,
+                1099,
+                1795,
+                970,
+                1184,
+                1498,
+                877,
+                1162,
+                902,
+                1537,
+                1192,
+                1565,
+                1472,
+                1109,
+                1225,
+                1321,
+                1453,
+                1654,
+                1274,
+                1811,
+                1695,
+                946,
+                1631,
+                1590,
+                1152,
+                820,
+                272,
+                1458,
+                1378,
+                240,
+                1421,
+                174,
+                925,
+                1126,
+                1346,
+                1600,
+                1716,
+                258,
+                1611,
+                442,
+                625,
+                1448,
+                246,
+                957,
+                226,
+                338,
+                1190,
+                921,
+                1505
+            ]
+        },
+        {
+            "word": "azeez",
+            "duration": 0.8,
+            "codes": [
+                1195,
+                646,
+                1505,
+                1014,
+                250,
+                837,
+                729,
+                121,
+                1715,
+                1446,
+                1430,
+                1608,
+                1575,
+                1057,
+                1643,
+                1514,
+                1795,
+                893,
+                1718,
+                1383,
+                840,
+                1802,
+                426,
+                1414,
+                1573,
+                1784,
+                1285,
+                852,
+                1246,
+                896,
+                1744,
+                1299,
+                495,
+                1796,
+                1570,
+                1665,
+                505,
+                888,
+                1654,
+                343,
+                1120,
+                1474,
+                16,
+                1035,
+                505,
+                1699,
+                862,
+                692,
+                1623,
+                633,
+                566,
+                1037,
+                342,
+                950,
+                261,
+                729,
+                1317,
+                177,
+                1213,
+                1333
+            ]
+        },
+        {
+            "word": "and",
+            "duration": 0.34,
+            "codes": [
+                908,
+                1203,
+                1683,
+                926,
+                1278,
+                564,
+                1067,
+                1003,
+                90,
+                459,
+                568,
+                272,
+                1117,
+                1396,
+                1411,
+                1233,
+                193,
+                1197,
+                970,
+                1065,
+                1611,
+                883,
+                1216,
+                1776,
+                747
+            ]
+        },
+        {
+            "word": "i",
+            "duration": 0.06,
+            "codes": [
+                924,
+                1628,
+                988,
+                1116,
+                1388
+            ]
+        },
+        {
+            "word": "am",
+            "duration": 0.18,
+            "codes": [
+                1199,
+                1188,
+                593,
+                953,
+                459,
+                272,
+                869,
+                1321,
+                145,
+                1306,
+                272,
+                406,
+                1479
+            ]
+        },
+        {
+            "word": "testing",
+            "duration": 0.44,
+            "codes": [
+                237,
+                1003,
+                1638,
+                638,
+                1180,
+                1666,
+                811,
+                1178,
+                1565,
+                814,
+                1211,
+                1654,
+                1779,
+                1313,
+                1619,
+                1684,
+                1230,
+                419,
+                891,
+                28,
+                1231,
+                1379,
+                729,
+                1682,
+                338,
+                1468,
+                136,
+                1630,
+                1215,
+                251,
+                1464,
+                781,
+                598
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 0.22,
+            "codes": [
+                555,
+                692,
+                663,
+                1632,
+                905,
+                807,
+                1085,
+                752,
+                1433,
+                392,
+                921,
+                1820,
+                363,
+                987,
+                1328,
+                734,
+                1063
+            ]
+        },
+        {
+            "word": "audio",
+            "duration": 0.34,
+            "codes": [
+                1294,
+                814,
+                1423,
+                1750,
+                747,
+                672,
+                651,
+                250,
+                1478,
+                37,
+                1760,
+                1021,
+                850,
+                58,
+                438,
+                953,
+                1668,
+                771,
+                729,
+                1456,
+                322,
+                591,
+                1474,
+                1440,
+                1170
+            ]
+        },
+        {
+            "word": "feature",
+            "duration": 0.4,
+            "codes": [
+                332,
+                1333,
+                1146,
+                1025,
+                19,
+                501,
+                169,
+                1250,
+                734,
+                1629,
+                1383,
+                355,
+                1747,
+                584,
+                237,
+                1428,
+                240,
+                1298,
+                999,
+                1338,
+                1438,
+                1727,
+                987,
+                1455,
+                792,
+                932,
+                1199,
+                355,
+                1185,
+                772
+            ]
+        }
+    ]
+}

default_speakers/tayo.json ADDED Viewed

	@@ -0,0 +1,523 @@

+{
+    "text": "and enjoy ourselves we need more parties let party start again now we know",
+    "words": [
+        {
+            "word": "and",
+            "duration": 0.5,
+            "codes": [
+                82,
+                1201,
+                329,
+                992,
+                908,
+                847,
+                925,
+                1666,
+                1057,
+                1266,
+                1448,
+                1737,
+                1251,
+                1031,
+                1759,
+                1459,
+                1094,
+                1750,
+                1739,
+                1521,
+                594,
+                1625,
+                732,
+                1326,
+                1095,
+                828,
+                239,
+                752,
+                1221,
+                1382,
+                705,
+                1716,
+                865,
+                1503,
+                478,
+                1692,
+                938
+            ]
+        },
+        {
+            "word": "enjoy",
+            "duration": 0.4,
+            "codes": [
+                844,
+                192,
+                737,
+                344,
+                276,
+                138,
+                48,
+                1616,
+                28,
+                1530,
+                1550,
+                1383,
+                1712,
+                69,
+                1261,
+                547,
+                249,
+                1047,
+                500,
+                182,
+                63,
+                1445,
+                935,
+                865,
+                1478,
+                1670,
+                479,
+                116,
+                1674,
+                886
+            ]
+        },
+        {
+            "word": "ourselves",
+            "duration": 0.7,
+            "codes": [
+                467,
+                1534,
+                901,
+                569,
+                1740,
+                882,
+                1579,
+                507,
+                276,
+                1296,
+                543,
+                399,
+                404,
+                1624,
+                1666,
+                153,
+                102,
+                1323,
+                1552,
+                65,
+                898,
+                1577,
+                757,
+                1446,
+                1022,
+                363,
+                124,
+                947,
+                1441,
+                581,
+                1677,
+                1269,
+                1525,
+                1170,
+                505,
+                1681,
+                1212,
+                1273,
+                1364,
+                1513,
+                1826,
+                1139,
+                1756,
+                639,
+                1450,
+                1810,
+                1638,
+                1644,
+                1669,
+                1519,
+                851,
+                1362,
+                1672
+            ]
+        },
+        {
+            "word": "we",
+            "duration": 0.1,
+            "codes": [
+                875,
+                1558,
+                1249,
+                1445,
+                181,
+                738,
+                1641
+            ]
+        },
+        {
+            "word": "need",
+            "duration": 0.14,
+            "codes": [
+                1603,
+                177,
+                195,
+                65,
+                1600,
+                104,
+                143,
+                1574,
+                1416,
+                160,
+                50
+            ]
+        },
+        {
+            "word": "more",
+            "duration": 0.18,
+            "codes": [
+                48,
+                1597,
+                39,
+                1414,
+                74,
+                1192,
+                84,
+                1345,
+                748,
+                1269,
+                1672,
+                686,
+                1820,
+                1442
+            ]
+        },
+        {
+            "word": "parties",
+            "duration": 0.56,
+            "codes": [
+                1640,
+                1030,
+                138,
+                147,
+                413,
+                110,
+                282,
+                1633,
+                1659,
+                1524,
+                176,
+                350,
+                137,
+                1004,
+                92,
+                1240,
+                1521,
+                1376,
+                502,
+                1558,
+                592,
+                473,
+                1021,
+                1805,
+                1346,
+                1393,
+                1759,
+                1786,
+                231,
+                1728,
+                117,
+                1366,
+                1754,
+                1073,
+                1786,
+                1354,
+                1532,
+                1572,
+                1754,
+                16,
+                257,
+                273
+            ]
+        },
+        {
+            "word": "let",
+            "duration": 0.16,
+            "codes": [
+                1312,
+                961,
+                372,
+                212,
+                1253,
+                115,
+                656,
+                1374,
+                78,
+                1322,
+                1284,
+                343
+            ]
+        },
+        {
+            "word": "party",
+            "duration": 0.24,
+            "codes": [
+                1572,
+                1662,
+                25,
+                390,
+                892,
+                212,
+                637,
+                576,
+                176,
+                1702,
+                640,
+                276,
+                52,
+                648,
+                577,
+                1240,
+                276,
+                155
+            ]
+        },
+        {
+            "word": "start",
+            "duration": 0.3,
+            "codes": [
+                213,
+                356,
+                1603,
+                1284,
+                1442,
+                1599,
+                705,
+                82,
+                65,
+                764,
+                349,
+                370,
+                856,
+                1524,
+                1508,
+                209,
+                495,
+                1552,
+                50,
+                1588,
+                863,
+                63
+            ]
+        },
+        {
+            "word": "again",
+            "duration": 0.3,
+            "codes": [
+                1267,
+                273,
+                298,
+                1409,
+                101,
+                1548,
+                733,
+                625,
+                1728,
+                1283,
+                286,
+                1645,
+                1363,
+                368,
+                153,
+                289,
+                716,
+                1756,
+                865,
+                1376,
+                688,
+                332,
+                731
+            ]
+        },
+        {
+            "word": "now",
+            "duration": 0.44,
+            "codes": [
+                983,
+                385,
+                1002,
+                806,
+                1798,
+                95,
+                1776,
+                825,
+                1790,
+                737,
+                1595,
+                907,
+                932,
+                1786,
+                626,
+                831,
+                1823,
+                1680,
+                1780,
+                1502,
+                1206,
+                1078,
+                47,
+                829,
+                868,
+                69,
+                277,
+                429,
+                125,
+                132,
+                14,
+                1497,
+                444
+            ]
+        },
+        {
+            "word": "we",
+            "duration": 1.32,
+            "codes": [
+                1692,
+                648,
+                481,
+                155,
+                483,
+                126,
+                1283,
+                12,
+                108,
+                429,
+                828,
+                128,
+                1161,
+                725,
+                155,
+                107,
+                1610,
+                228,
+                1492,
+                1560,
+                368,
+                1138,
+                810,
+                1572,
+                1562,
+                320,
+                112,
+                520,
+                52,
+                49,
+                1008,
+                1635,
+                1728,
+                1523,
+                62,
+                190,
+                648,
+                592,
+                384,
+                969,
+                1441,
+                519,
+                1536,
+                1571,
+                1587,
+                1539,
+                15,
+                1156,
+                376,
+                1022,
+                642,
+                483,
+                1794,
+                1335,
+                1712,
+                1449,
+                529,
+                1558,
+                1463,
+                1559,
+                1706,
+                1460,
+                249,
+                1308,
+                293,
+                529,
+                841,
+                201,
+                1256,
+                931,
+                132,
+                1173,
+                479,
+                286,
+                1075,
+                153,
+                13,
+                1503,
+                398,
+                415,
+                432,
+                7,
+                183,
+                103,
+                409,
+                736,
+                15,
+                940,
+                1459,
+                15,
+                1631,
+                1580,
+                1773,
+                624,
+                1417,
+                926,
+                531,
+                1159,
+                1257
+            ]
+        },
+        {
+            "word": "know",
+            "duration": 0.44,
+            "codes": [
+                777,
+                1240,
+                446,
+                303,
+                153,
+                263,
+                1402,
+                317,
+                1365,
+                481,
+                848,
+                1280,
+                354,
+                1415,
+                245,
+                408,
+                462,
+                466,
+                253,
+                943,
+                472,
+                215,
+                143,
+                519,
+                202,
+                1389,
+                1608,
+                714,
+                1599,
+                399,
+                944,
+                124,
+                844
+            ]
+        }
+    ]
+}

default_speakers/umar.json ADDED Viewed

	@@ -0,0 +1,469 @@

+{
+    "text": "that i'd like to share with everybody in the world yes sometimes you go all the way",
+    "words": [
+        {
+            "word": "that",
+            "duration": 0.48,
+            "codes": [
+                519,
+                848,
+                1374,
+                416,
+                940,
+                1445,
+                416,
+                753,
+                1616,
+                774,
+                803,
+                1697,
+                1541,
+                1047,
+                200,
+                462,
+                1417,
+                1313,
+                1296,
+                184,
+                1396,
+                1568,
+                1416,
+                1444,
+                1631,
+                1463,
+                702,
+                1831,
+                1564,
+                1374,
+                1580,
+                1643,
+                1681,
+                1660,
+                1124,
+                1720
+            ]
+        },
+        {
+            "word": "id",
+            "duration": 0.38,
+            "codes": [
+                4,
+                705,
+                1534,
+                1290,
+                1661,
+                302,
+                1798,
+                844,
+                197,
+                1027,
+                1606,
+                903,
+                1414,
+                794,
+                871,
+                882,
+                941,
+                1310,
+                871,
+                1247,
+                1140,
+                1247,
+                718,
+                1422,
+                1509,
+                1678,
+                1093,
+                1734
+            ]
+        },
+        {
+            "word": "like",
+            "duration": 0.18,
+            "codes": [
+                647,
+                1824,
+                474,
+                1111,
+                599,
+                221,
+                1435,
+                822,
+                1409,
+                1717,
+                1748,
+                1550,
+                1738,
+                1717
+            ]
+        },
+        {
+            "word": "to",
+            "duration": 0.14,
+            "codes": [
+                1535,
+                231,
+                1794,
+                1553,
+                1351,
+                1365,
+                1296,
+                1781,
+                1599,
+                1082
+            ]
+        },
+        {
+            "word": "share",
+            "duration": 0.18,
+            "codes": [
+                1737,
+                0,
+                979,
+                1688,
+                546,
+                1807,
+                319,
+                252,
+                1805,
+                714,
+                580,
+                1524,
+                798,
+                1779
+            ]
+        },
+        {
+            "word": "with",
+            "duration": 0.14,
+            "codes": [
+                1698,
+                702,
+                966,
+                1461,
+                127,
+                1681,
+                85,
+                1741,
+                1588,
+                718
+            ]
+        },
+        {
+            "word": "everybody",
+            "duration": 0.4,
+            "codes": [
+                1600,
+                806,
+                1770,
+                1078,
+                1727,
+                679,
+                1569,
+                1452,
+                1685,
+                774,
+                1598,
+                1382,
+                1520,
+                1786,
+                1702,
+                1607,
+                1747,
+                828,
+                1553,
+                983,
+                1103,
+                882,
+                1427,
+                1679,
+                1613,
+                1636,
+                1433,
+                519,
+                853,
+                1451
+            ]
+        },
+        {
+            "word": "in",
+            "duration": 0.06,
+            "codes": [
+                1369,
+                1654,
+                1581,
+                1600,
+                1452
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 0.12,
+            "codes": [
+                1241,
+                1769,
+                678,
+                1751,
+                1280,
+                1711,
+                1663,
+                1772,
+                1655
+            ]
+        },
+        {
+            "word": "world",
+            "duration": 0.74,
+            "codes": [
+                973,
+                1231,
+                1015,
+                1052,
+                1415,
+                721,
+                1822,
+                825,
+                1076,
+                1431,
+                1357,
+                1389,
+                744,
+                1263,
+                1525,
+                1794,
+                319,
+                1678,
+                1732,
+                1395,
+                1695,
+                1827,
+                1059,
+                1719,
+                1675,
+                1714,
+                1635,
+                1466,
+                1730,
+                1750,
+                1395,
+                1525,
+                1827,
+                1313,
+                1440,
+                1447,
+                1292,
+                1762,
+                1226,
+                1418,
+                1750,
+                719,
+                1549,
+                1761,
+                1459,
+                1717,
+                1800,
+                1404,
+                1702,
+                1795,
+                1711,
+                1789,
+                1808,
+                1759,
+                385,
+                415
+            ]
+        },
+        {
+            "word": "yes",
+            "duration": 0.32,
+            "codes": [
+                302,
+                1704,
+                485,
+                983,
+                234,
+                63,
+                462,
+                483,
+                82,
+                827,
+                999,
+                1143,
+                102,
+                1655,
+                117,
+                1619,
+                519,
+                1217,
+                1518,
+                1476,
+                333,
+                1660,
+                1238,
+                1679
+            ]
+        },
+        {
+            "word": "sometimes",
+            "duration": 0.58,
+            "codes": [
+                1287,
+                546,
+                1552,
+                1736,
+                1647,
+                836,
+                575,
+                354,
+                1156,
+                1264,
+                1194,
+                1761,
+                1629,
+                1452,
+                1241,
+                1394,
+                856,
+                1313,
+                1653,
+                736,
+                556,
+                1387,
+                1824,
+                966,
+                373,
+                1424,
+                1342,
+                221,
+                580,
+                1412,
+                940,
+                626,
+                1797,
+                858,
+                972,
+                1525,
+                1744,
+                738,
+                1695,
+                1542,
+                1604,
+                1394,
+                1627
+            ]
+        },
+        {
+            "word": "you",
+            "duration": 0.12,
+            "codes": [
+                1460,
+                546,
+                1427,
+                1451,
+                1081,
+                1760,
+                1463,
+                1628,
+                1692
+            ]
+        },
+        {
+            "word": "go",
+            "duration": 0.26,
+            "codes": [
+                1521,
+                1734,
+                753,
+                770,
+                1640,
+                1757,
+                297,
+                462,
+                702,
+                1826,
+                1440,
+                1828,
+                1747,
+                1651,
+                1729,
+                1087,
+                580,
+                1698,
+                1194,
+                1308
+            ]
+        },
+        {
+            "word": "all",
+            "duration": 0.42,
+            "codes": [
+                863,
+                610,
+                429,
+                443,
+                1087,
+                183,
+                782,
+                613,
+                222,
+                1047,
+                1492,
+                154,
+                955,
+                429,
+                443,
+                613,
+                983,
+                328,
+                382,
+                359,
+                341,
+                217,
+                456,
+                289,
+                1324,
+                714,
+                756,
+                369,
+                211,
+                127,
+                1827,
+                1563
+            ]
+        },
+        {
+            "word": "the",
+            "duration": 0.12,
+            "codes": [
+                1686,
+                949,
+                1296,
+                829,
+                1463,
+                1731,
+                1222,
+                1353,
+                1780
+            ]
+        },
+        {
+            "word": "way",
+            "duration": 0.18,
+            "codes": [
+                1263,
+                890,
+                683,
+                289,
+                217,
+                326,
+                335,
+                1059,
+                1204,
+                213,
+                1340,
+                289,
+                191
+            ]
+        }
+    ]
+}

default_speakers/wanjiku.json ADDED Viewed

	@@ -0,0 +1,574 @@

+{
+    "text": "was just like is that what is amazing to you your marriage is",
+    "words": [
+        {
+            "word": "was",
+            "duration": 1.02,
+            "codes": [
+                1514,
+                571,
+                892,
+                386,
+                186,
+                1403,
+                1082,
+                636,
+                851,
+                1287,
+                1678,
+                1166,
+                162,
+                1345,
+                282,
+                104,
+                1345,
+                329,
+                637,
+                844,
+                537,
+                1366,
+                537,
+                282,
+                1485,
+                537,
+                637,
+                844,
+                537,
+                1710,
+                375,
+                452,
+                1588,
+                537,
+                1382,
+                714,
+                206,
+                333,
+                330,
+                344,
+                281,
+                1523,
+                44,
+                1557,
+                315,
+                479,
+                271,
+                370,
+                110,
+                498,
+                768,
+                560,
+                579,
+                847,
+                961,
+                293,
+                1351,
+                1141,
+                138,
+                1229,
+                2,
+                847,
+                1245,
+                1345,
+                1829,
+                1811,
+                1326,
+                955,
+                1314,
+                137,
+                270,
+                1743,
+                324,
+                1389,
+                1027,
+                863
+            ]
+        },
+        {
+            "word": "just",
+            "duration": 0.28,
+            "codes": [
+                333,
+                38,
+                1518,
+                1296,
+                146,
+                1077,
+                1204,
+                665,
+                658,
+                1005,
+                944,
+                1136,
+                519,
+                749,
+                1061,
+                69,
+                1363,
+                415,
+                1679,
+                1741,
+                138
+            ]
+        },
+        {
+            "word": "like",
+            "duration": 1.68,
+            "codes": [
+                1796,
+                714,
+                65,
+                13,
+                664,
+                1077,
+                463,
+                232,
+                461,
+                1210,
+                356,
+                346,
+                1196,
+                202,
+                631,
+                1804,
+                1096,
+                450,
+                23,
+                1535,
+                415,
+                582,
+                328,
+                546,
+                1571,
+                344,
+                1512,
+                1242,
+                141,
+                194,
+                220,
+                258,
+                246,
+                220,
+                246,
+                542,
+                258,
+                246,
+                220,
+                151,
+                246,
+                542,
+                342,
+                220,
+                75,
+                246,
+                220,
+                246,
+                542,
+                246,
+                220,
+                542,
+                161,
+                450,
+                419,
+                246,
+                542,
+                246,
+                542,
+                246,
+                220,
+                542,
+                246,
+                246,
+                542,
+                246,
+                542,
+                342,
+                542,
+                342,
+                246,
+                542,
+                342,
+                220,
+                75,
+                246,
+                75,
+                246,
+                542,
+                246,
+                220,
+                75,
+                161,
+                542,
+                342,
+                220,
+                258,
+                246,
+                220,
+                75,
+                342,
+                220,
+                258,
+                194,
+                220,
+                436,
+                246,
+                220,
+                194,
+                194,
+                1442,
+                246,
+                220,
+                246,
+                246,
+                246,
+                151,
+                1551,
+                1522,
+                1362,
+                652,
+                1557,
+                333,
+                273,
+                928,
+                1551,
+                180,
+                1570,
+                652,
+                1664,
+                6,
+                654,
+                281,
+                1578,
+                1557,
+                1346,
+                756
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.06,
+            "codes": [
+                1337,
+                1662,
+                198,
+                33
+            ]
+        },
+        {
+            "word": "that",
+            "duration": 0.12,
+            "codes": [
+                1679,
+                236,
+                934,
+                1056,
+                208,
+                609,
+                860,
+                1318,
+                1340
+            ]
+        },
+        {
+            "word": "what",
+            "duration": 0.14,
+            "codes": [
+                1618,
+                806,
+                1068,
+                113,
+                1686,
+                428,
+                230,
+                409,
+                263,
+                415,
+                175
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.1,
+            "codes": [
+                415,
+                1773,
+                1539,
+                124,
+                1563,
+                700,
+                579
+            ]
+        },
+        {
+            "word": "amazing",
+            "duration": 0.34,
+            "codes": [
+                973,
+                695,
+                1247,
+                1737,
+                1609,
+                1664,
+                1006,
+                134,
+                409,
+                416,
+                774,
+                848,
+                1542,
+                10,
+                1441,
+                1539,
+                129,
+                1698,
+                687,
+                1620,
+                1340,
+                749,
+                469,
+                1695,
+                448,
+                448
+            ]
+        },
+        {
+            "word": "to",
+            "duration": 0.12,
+            "codes": [
+                189,
+                198,
+                124,
+                1753,
+                510,
+                1825,
+                856,
+                1441,
+                1688
+            ]
+        },
+        {
+            "word": "you",
+            "duration": 1.62,
+            "codes": [
+                1552,
+                1546,
+                1698,
+                166,
+                101,
+                1457,
+                137,
+                864,
+                790,
+                794,
+                1615,
+                454,
+                1512,
+                328,
+                634,
+                1578,
+                409,
+                1592,
+                176,
+                1441,
+                1644,
+                356,
+                1641,
+                1580,
+                510,
+                1609,
+                407,
+                882,
+                1580,
+                218,
+                1616,
+                865,
+                409,
+                1570,
+                1376,
+                1734,
+                34,
+                687,
+                1592,
+                556,
+                640,
+                1592,
+                6,
+                1362,
+                4,
+                1546,
+                1302,
+                1376,
+                1570,
+                34,
+                652,
+                180,
+                1569,
+                203,
+                1744,
+                282,
+                945,
+                362,
+                931,
+                1662,
+                631,
+                1580,
+                452,
+                329,
+                725,
+                140,
+                277,
+                1113,
+                537,
+                1332,
+                560,
+                282,
+                1056,
+                270,
+                940,
+                755,
+                860,
+                104,
+                903,
+                537,
+                1310,
+                579,
+                282,
+                848,
+                371,
+                844,
+                1808,
+                400,
+                1772,
+                1166,
+                213,
+                1485,
+                1502,
+                276,
+                1594,
+                1599,
+                1819,
+                1197,
+                441,
+                1318,
+                1237,
+                679,
+                1186,
+                384,
+                609,
+                637,
+                157,
+                609,
+                637,
+                157,
+                790,
+                157,
+                547,
+                452,
+                452,
+                870,
+                162,
+                320,
+                1649,
+                1272,
+                1318,
+                860
+            ]
+        },
+        {
+            "word": "your",
+            "duration": 0.16,
+            "codes": [
+                1477,
+                67,
+                113,
+                1149,
+                479,
+                901,
+                1232,
+                295,
+                9,
+                1129,
+                67,
+                1825
+            ]
+        },
+        {
+            "word": "marriage",
+            "duration": 0.8,
+            "codes": [
+                529,
+                697,
+                695,
+                1429,
+                282,
+                626,
+                1355,
+                192,
+                1671,
+                100,
+                95,
+                1310,
+                388,
+                1155,
+                1494,
+                104,
+                104,
+                587,
+                1156,
+                67,
+                57,
+                1437,
+                697,
+                714,
+                1221,
+                1443,
+                2,
+                1357,
+                931,
+                931,
+                1298,
+                388,
+                1136,
+                1604,
+                428,
+                1240,
+                1698,
+                65,
+                1272,
+                128,
+                755,
+                79,
+                794,
+                1698,
+                1518,
+                1546,
+                1696,
+                448,
+                233,
+                1599,
+                1732,
+                1240,
+                110,
+                775,
+                483,
+                100,
+                1075,
+                346,
+                863,
+                1498
+            ]
+        },
+        {
+            "word": "is",
+            "duration": 0.1,
+            "codes": [
+                631,
+                18,
+                679,
+                430,
+                176,
+                10,
+                52
+            ]
+        }
+    ]
+}

default_speakers/zainab.json ADDED Viewed

	@@ -0,0 +1,457 @@

+{
+    "text": "mama giver her because she gave so",
+    "words": [
+        {
+            "word": "mama",
+            "duration": 1.46,
+            "codes": [
+                1734,
+                1812,
+                1759,
+                1721,
+                1765,
+                1769,
+                1805,
+                1800,
+                1734,
+                1380,
+                1706,
+                1724,
+                1695,
+                1769,
+                1772,
+                1689,
+                1511,
+                339,
+                1077,
+                1492,
+                1494,
+                1353,
+                890,
+                753,
+                29,
+                607,
+                1812,
+                1310,
+                1326,
+                1497,
+                818,
+                1716,
+                1776,
+                1155,
+                1645,
+                1545,
+                1371,
+                1454,
+                1205,
+                1464,
+                703,
+                1096,
+                1285,
+                1811,
+                1494,
+                738,
+                1248,
+                1725,
+                952,
+                230,
+                1415,
+                1691,
+                1718,
+                41,
+                1685,
+                1783,
+                1092,
+                1346,
+                954,
+                776,
+                702,
+                1157,
+                1152,
+                1768,
+                572,
+                1025,
+                1750,
+                1231,
+                900,
+                1764,
+                1246,
+                1572,
+                1711,
+                1534,
+                1320,
+                1389,
+                197,
+                1584,
+                1019,
+                1576,
+                1027,
+                1402,
+                506,
+                1402,
+                617,
+                1490,
+                1358,
+                770,
+                1666,
+                1025,
+                921,
+                1658,
+                830,
+                1062,
+                1598,
+                1095,
+                1174,
+                1680,
+                1501,
+                1332,
+                1827,
+                1588,
+                231,
+                1633,
+                1591,
+                736,
+                1825,
+                1696,
+                1614
+            ]
+        },
+        {
+            "word": "giver",
+            "duration": 0.36,
+            "codes": [
+                1346,
+                404,
+                1270,
+                1389,
+                1363,
+                1426,
+                1008,
+                473,
+                1341,
+                1604,
+                1773,
+                385,
+                1685,
+                736,
+                1778,
+                1577,
+                1189,
+                1830,
+                973,
+                1192,
+                1624,
+                1766,
+                1344,
+                1542,
+                1463,
+                1253,
+                1554
+            ]
+        },
+        {
+            "word": "her",
+            "duration": 1.89,
+            "codes": [
+                1828,
+                1287,
+                1520,
+                1671,
+                1546,
+                932,
+                1367,
+                1176,
+                953,
+                1225,
+                1508,
+                1822,
+                1642,
+                381,
+                1003,
+                1288,
+                355,
+                627,
+                256,
+                1231,
+                822,
+                863,
+                1826,
+                788,
+                1786,
+                1796,
+                1585,
+                1266,
+                1236,
+                1157,
+                476,
+                1425,
+                1814,
+                1488,
+                1763,
+                343,
+                385,
+                1419,
+                1413,
+                1537,
+                1465,
+                1413,
+                1689,
+                975,
+                27,
+                1804,
+                1766,
+                1750,
+                1612,
+                1293,
+                1613,
+                1629,
+                1011,
+                1572,
+                1708,
+                1669,
+                1440,
+                1598,
+                1514,
+                1773,
+                1166,
+                1769,
+                923,
+                1792,
+                1764,
+                1491,
+                1807,
+                1768,
+                1157,
+                1808,
+                1491,
+                1721,
+                1816,
+                1783,
+                901,
+                1468,
+                1824,
+                1743,
+                1801,
+                1745,
+                1656,
+                1425,
+                1745,
+                1775,
+                1807,
+                714,
+                1755,
+                1704,
+                1661,
+                1493,
+                776,
+                1783,
+                416,
+                1670,
+                1406,
+                1769,
+                362,
+                1636,
+                1464,
+                1651,
+                1403,
+                1800,
+                1426,
+                1831,
+                1827,
+                1160,
+                1759,
+                1720,
+                1651,
+                1762,
+                1331,
+                1746,
+                1433,
+                1466,
+                1023,
+                1425,
+                1742,
+                486,
+                1771,
+                1816,
+                1301,
+                1583,
+                320,
+                1300,
+                315,
+                52,
+                1217,
+                67,
+                502,
+                1485,
+                848,
+                1734,
+                1387,
+                1783,
+                1626,
+                920,
+                361,
+                1715,
+                1657,
+                1560,
+                85,
+                1562
+            ]
+        },
+        {
+            "word": "because",
+            "duration": 0.48,
+            "codes": [
+                1756,
+                844,
+                245,
+                1310,
+                312,
+                344,
+                1734,
+                1319,
+                1722,
+                1386,
+                1230,
+                461,
+                1344,
+                847,
+                658,
+                1078,
+                1554,
+                537,
+                987,
+                848,
+                1055,
+                840,
+                1710,
+                736,
+                1679,
+                213,
+                844,
+                731,
+                631,
+                1638,
+                166,
+                858,
+                1535,
+                50,
+                1651,
+                713
+            ]
+        },
+        {
+            "word": "she",
+            "duration": 0.38,
+            "codes": [
+                556,
+                1735,
+                654,
+                1524,
+                1769,
+                1387,
+                639,
+                1463,
+                1625,
+                1726,
+                1664,
+                1691,
+                1531,
+                1603,
+                1833,
+                121,
+                1627,
+                1757,
+                736,
+                1583,
+                1684,
+                1741,
+                1831,
+                1791,
+                1034,
+                1807,
+                1338,
+                1737
+            ]
+        },
+        {
+            "word": "gave",
+            "duration": 0.76,
+            "codes": [
+                1790,
+                430,
+                1310,
+                399,
+                599,
+                1542,
+                1394,
+                1075,
+                834,
+                428,
+                1015,
+                249,
+                362,
+                945,
+                108,
+                1308,
+                29,
+                362,
+                1766,
+                448,
+                1370,
+                197,
+                298,
+                1353,
+                1566,
+                1485,
+                1341,
+                1544,
+                1468,
+                1366,
+                849,
+                1584,
+                1441,
+                1696,
+                1610,
+                1702,
+                702,
+                1508,
+                1653,
+                1508,
+                1535,
+                502,
+                1485,
+                232,
+                648,
+                863,
+                631,
+                348,
+                372,
+                129,
+                1296,
+                253,
+                1599,
+                1364,
+                315,
+                920,
+                18,
+                183
+            ]
+        },
+        {
+            "word": "so",
+            "duration": 0.14,
+            "codes": [
+                428,
+                372,
+                15,
+                202,
+                286,
+                1344,
+                714,
+                966,
+                1341,
+                184
+            ]
+        }
+    ]
+}

default_speakers/zawadi.json ADDED Viewed

	@@ -0,0 +1,396 @@

+{
+    "text": "Scientists have discovered a new planet that may be capable of supporting life!",
+    "words": [
+        {
+            "word": "scientists",
+            "duration": "1.00",
+            "codes": [
+                258,
+                551,
+                21,
+                401,
+                509,
+                235,
+                151,
+                94,
+                194,
+                496,
+                241,
+                420,
+                606,
+                256,
+                311,
+                464,
+                343,
+                765,
+                56,
+                23,
+                209,
+                72,
+                851,
+                360,
+                442,
+                257,
+                457,
+                75,
+                265,
+                227,
+                16,
+                167,
+                194,
+                391,
+                68,
+                786,
+                1642,
+                888,
+                884,
+                1688,
+                1021,
+                1270,
+                1250,
+                640,
+                1471,
+                1193,
+                1117,
+                95,
+                158,
+                587,
+                1484,
+                1054,
+                947,
+                521,
+                234,
+                502,
+                1172,
+                1379,
+                1332,
+                1267,
+                1659,
+                226,
+                325,
+                404,
+                634,
+                713,
+                333,
+                1210,
+                1028,
+                700,
+                1804,
+                1549,
+                1552,
+                1527,
+                701,
+                895
+            ]
+        },
+        {
+            "word": "have",
+            "duration": "0.16",
+            "codes": [
+                652,
+                1487,
+                1045,
+                665,
+                384,
+                908,
+                1073,
+                903,
+                169,
+                91,
+                1242,
+                59,
+                1614
+            ]
+        },
+        {
+            "word": "discovered",
+            "duration": "0.52",
+            "codes": [
+                1523,
+                519,
+                1311,
+                1166,
+                1049,
+                368,
+                176,
+                1546,
+                990,
+                546,
+                1091,
+                872,
+                975,
+                224,
+                419,
+                1714,
+                1247,
+                1769,
+                1141,
+                811,
+                1149,
+                320,
+                1161,
+                982,
+                732,
+                473,
+                1025,
+                470,
+                1253,
+                1345,
+                965,
+                916,
+                407,
+                844,
+                594,
+                1710,
+                193,
+                740,
+                761,
+                1740
+            ]
+        },
+        {
+            "word": "a",
+            "duration": "0.08",
+            "codes": [
+                5,
+                414,
+                1608,
+                449,
+                1643,
+                1732,
+                1653
+            ]
+        },
+        {
+            "word": "new",
+            "duration": "0.18",
+            "codes": [
+                396,
+                1599,
+                1733,
+                250,
+                1624,
+                485,
+                1645,
+                771,
+                1630,
+                736,
+                336,
+                476,
+                641,
+                345
+            ]
+        },
+        {
+            "word": "planet",
+            "duration": "0.38",
+            "codes": [
+                21,
+                131,
+                1743,
+                1082,
+                1707,
+                86,
+                1075,
+                883,
+                944,
+                1103,
+                790,
+                978,
+                860,
+                1738,
+                1060,
+                749,
+                171,
+                679,
+                1144,
+                966,
+                1532,
+                1179,
+                714,
+                1123,
+                1308,
+                1524,
+                752,
+                1613,
+                1266
+            ]
+        },
+        {
+            "word": "that",
+            "duration": "0.14",
+            "codes": [
+                64,
+                32,
+                1457,
+                1095,
+                931,
+                1774,
+                1017,
+                1661,
+                1713,
+                355,
+                1708
+            ]
+        },
+        {
+            "word": "may",
+            "duration": "0.12",
+            "codes": [
+                1800,
+                1070,
+                1452,
+                1185,
+                1295,
+                26,
+                638,
+                240,
+                1480,
+                1461
+            ]
+        },
+        {
+            "word": "be",
+            "duration": "0.12",
+            "codes": [
+                859,
+                729,
+                848,
+                1131,
+                1618,
+                928,
+                331,
+                504,
+                487,
+                417
+            ]
+        },
+        {
+            "word": "capable",
+            "duration": "0.42",
+            "codes": [
+                686,
+                1040,
+                28,
+                1456,
+                1056,
+                1133,
+                901,
+                1127,
+                693,
+                1406,
+                20,
+                118,
+                141,
+                572,
+                845,
+                1280,
+                353,
+                1726,
+                338,
+                1413,
+                484,
+                272,
+                1569,
+                144,
+                1581,
+                437,
+                1502,
+                963,
+                1415,
+                655,
+                949,
+                1289
+            ]
+        },
+        {
+            "word": "of",
+            "duration": "0.10",
+            "codes": [
+                1198,
+                1755,
+                1478,
+                1548,
+                802,
+                1513,
+                1290,
+                636
+            ]
+        },
+        {
+            "word": "supporting",
+            "duration": "0.54",
+            "codes": [
+                541,
+                867,
+                750,
+                1505,
+                754,
+                1344,
+                1032,
+                734,
+                505,
+                559,
+                220,
+                288,
+                342,
+                591,
+                1459,
+                1721,
+                490,
+                825,
+                80,
+                1221,
+                1234,
+                639,
+                1052,
+                450,
+                1557,
+                1302,
+                784,
+                1547,
+                823,
+                527,
+                1667,
+                1437,
+                832,
+                1366,
+                674,
+                1607,
+                486,
+                893,
+                1748,
+                792,
+                1757
+            ]
+        },
+        {
+            "word": "life",
+            "duration": "0.28",
+            "codes": [
+                1761,
+                149,
+                1501,
+                1342,
+                1063,
+                1124,
+                117,
+                1225,
+                1115,
+                1155,
+                1815,
+                1035,
+                936,
+                807,
+                930,
+                1514,
+                837,
+                1104,
+                1145,
+                1164,
+                1687,
+                1589
+            ]
+        }
+    ]
+}

default_speakers_local/hausa_female1.json ADDED Viewed

	@@ -0,0 +1,273 @@

+{
+    "text": "Idan hira tayi \u0257a\u0257i bana son na tashi.",
+    "words": [
+        {
+            "word": "idan",
+            "duration": "0.52",
+            "codes": [
+                165,
+                338,
+                781,
+                661,
+                601,
+                691,
+                1154,
+                762,
+                691,
+                523,
+                641,
+                378,
+                1464,
+                38,
+                1280,
+                243,
+                1784,
+                195,
+                5,
+                1679,
+                77,
+                530,
+                1527,
+                270,
+                243,
+                374,
+                200,
+                157,
+                152,
+                228,
+                768,
+                743,
+                104,
+                221,
+                968,
+                479,
+                321,
+                1679,
+                1279
+            ]
+        },
+        {
+            "word": "hira",
+            "duration": "0.38",
+            "codes": [
+                1587,
+                1544,
+                683,
+                92,
+                1255,
+                46,
+                106,
+                636,
+                320,
+                53,
+                249,
+                123,
+                1140,
+                1290,
+                93,
+                553,
+                0,
+                1192,
+                210,
+                587,
+                1184,
+                764,
+                215,
+                221,
+                2,
+                1115,
+                1079,
+                1033
+            ]
+        },
+        {
+            "word": "tayi",
+            "duration": "0.38",
+            "codes": [
+                447,
+                1292,
+                198,
+                50,
+                1439,
+                1191,
+                1399,
+                106,
+                880,
+                844,
+                306,
+                466,
+                74,
+                260,
+                152,
+                723,
+                723,
+                687,
+                306,
+                195,
+                648,
+                466,
+                30,
+                1110,
+                637,
+                384,
+                1131,
+                342,
+                392
+            ]
+        },
+        {
+            "word": "dadi",
+            "duration": "0.38",
+            "codes": [
+                751,
+                412,
+                212,
+                306,
+                388,
+                589,
+                446,
+                479,
+                880,
+                768,
+                467,
+                699,
+                128,
+                665,
+                882,
+                908,
+                171,
+                1146,
+                1297,
+                687,
+                901,
+                1110,
+                153,
+                386,
+                1330,
+                1283,
+                1181,
+                1070,
+                766
+            ]
+        },
+        {
+            "word": "bana",
+            "duration": "0.46",
+            "codes": [
+                534,
+                1440,
+                1102,
+                1194,
+                474,
+                252,
+                39,
+                367,
+                116,
+                212,
+                36,
+                115,
+                76,
+                1173,
+                931,
+                1285,
+                1630,
+                678,
+                1087,
+                208,
+                1055,
+                441,
+                545,
+                324,
+                1192,
+                179,
+                1147,
+                897,
+                1387,
+                1283,
+                10,
+                1,
+                654,
+                863,
+                103
+            ]
+        },
+        {
+            "word": "son",
+            "duration": "0.22",
+            "codes": [
+                198,
+                507,
+                1477,
+                915,
+                215,
+                267,
+                1232,
+                1041,
+                569,
+                1596,
+                1759,
+                229,
+                901,
+                1774,
+                1487,
+                51
+            ]
+        },
+        {
+            "word": "na",
+            "duration": "0.16",
+            "codes": [
+                251,
+                243,
+                965,
+                215,
+                135,
+                711,
+                105,
+                1350,
+                1556,
+                226,
+                459,
+                68
+            ]
+        },
+        {
+            "word": "tashi",
+            "duration": "0.42",
+            "codes": [
+                20,
+                502,
+                610,
+                179,
+                711,
+                800,
+                424,
+                352,
+                102,
+                569,
+                67,
+                262,
+                855,
+                413,
+                63,
+                701,
+                1719,
+                262,
+                383,
+                1166,
+                358,
+                1331,
+                596,
+                383,
+                1351,
+                96,
+                1170,
+                1061,
+                1059,
+                1392,
+                328,
+                1471
+            ]
+        }
+    ]
+}

default_speakers_local/hausa_female2.json ADDED Viewed

	@@ -0,0 +1,273 @@

+{
+    "text": "Idan hira tayi \u0257a\u0257i bana son na tashi.",
+    "words": [
+        {
+            "word": "idan",
+            "duration": "0.52",
+            "codes": [
+                165,
+                338,
+                781,
+                661,
+                601,
+                691,
+                1154,
+                762,
+                691,
+                523,
+                641,
+                378,
+                1464,
+                38,
+                1280,
+                243,
+                1784,
+                195,
+                5,
+                1679,
+                77,
+                530,
+                1527,
+                270,
+                243,
+                374,
+                200,
+                157,
+                152,
+                228,
+                768,
+                743,
+                104,
+                221,
+                968,
+                479,
+                321,
+                1679,
+                1279
+            ]
+        },
+        {
+            "word": "hira",
+            "duration": "0.38",
+            "codes": [
+                1587,
+                1544,
+                683,
+                92,
+                1255,
+                46,
+                106,
+                636,
+                320,
+                53,
+                249,
+                123,
+                1140,
+                1290,
+                93,
+                553,
+                0,
+                1192,
+                210,
+                587,
+                1184,
+                764,
+                215,
+                221,
+                2,
+                1115,
+                1079,
+                1033
+            ]
+        },
+        {
+            "word": "tayi",
+            "duration": "0.38",
+            "codes": [
+                447,
+                1292,
+                198,
+                50,
+                1439,
+                1191,
+                1399,
+                106,
+                880,
+                844,
+                306,
+                466,
+                74,
+                260,
+                152,
+                723,
+                723,
+                687,
+                306,
+                195,
+                648,
+                466,
+                30,
+                1110,
+                637,
+                384,
+                1131,
+                342,
+                392
+            ]
+        },
+        {
+            "word": "dadi",
+            "duration": "0.38",
+            "codes": [
+                751,
+                412,
+                212,
+                306,
+                388,
+                589,
+                446,
+                479,
+                880,
+                768,
+                467,
+                699,
+                128,
+                665,
+                882,
+                908,
+                171,
+                1146,
+                1297,
+                687,
+                901,
+                1110,
+                153,
+                386,
+                1330,
+                1283,
+                1181,
+                1070,
+                766
+            ]
+        },
+        {
+            "word": "bana",
+            "duration": "0.46",
+            "codes": [
+                534,
+                1440,
+                1102,
+                1194,
+                474,
+                252,
+                39,
+                367,
+                116,
+                212,
+                36,
+                115,
+                76,
+                1173,
+                931,
+                1285,
+                1630,
+                678,
+                1087,
+                208,
+                1055,
+                441,
+                545,
+                324,
+                1192,
+                179,
+                1147,
+                897,
+                1387,
+                1283,
+                10,
+                1,
+                654,
+                863,
+                103
+            ]
+        },
+        {
+            "word": "son",
+            "duration": "0.22",
+            "codes": [
+                198,
+                507,
+                1477,
+                915,
+                215,
+                267,
+                1232,
+                1041,
+                569,
+                1596,
+                1759,
+                229,
+                901,
+                1774,
+                1487,
+                51
+            ]
+        },
+        {
+            "word": "na",
+            "duration": "0.16",
+            "codes": [
+                251,
+                243,
+                965,
+                215,
+                135,
+                711,
+                105,
+                1350,
+                1556,
+                226,
+                459,
+                68
+            ]
+        },
+        {
+            "word": "tashi",
+            "duration": "0.42",
+            "codes": [
+                20,
+                502,
+                610,
+                179,
+                711,
+                800,
+                424,
+                352,
+                102,
+                569,
+                67,
+                262,
+                855,
+                413,
+                63,
+                701,
+                1719,
+                262,
+                383,
+                1166,
+                358,
+                1331,
+                596,
+                383,
+                1351,
+                96,
+                1170,
+                1061,
+                1059,
+                1392,
+                328,
+                1471
+            ]
+        }
+    ]
+}

default_speakers_local/hausa_male1.json ADDED Viewed

	@@ -0,0 +1,367 @@

+{
+    "text": "Eh, mun za\u0253i yin wasan kwaikwayo don nuna al'adunmu yayin ranar al'ada.",
+    "words": [
+        {
+            "word": "eh",
+            "duration": "0.86",
+            "codes": [
+                165,
+                226,
+                1145,
+                284,
+                77,
+                187,
+                459,
+                77,
+                691,
+                278,
+                643,
+                247,
+                156,
+                204,
+                89,
+                1247,
+                52,
+                1350,
+                433,
+                812,
+                328,
+                553,
+                648,
+                602,
+                1075,
+                243,
+                557,
+                507,
+                645,
+                352,
+                29,
+                451,
+                83,
+                787,
+                10,
+                1000,
+                1791,
+                620,
+                188,
+                1681,
+                447,
+                752,
+                1405,
+                1070,
+                861,
+                1142,
+                163,
+                1293,
+                674,
+                250,
+                724,
+                259,
+                624,
+                676,
+                259,
+                1114,
+                526,
+                199,
+                724,
+                163,
+                168,
+                447,
+                663,
+                1471
+            ]
+        },
+        {
+            "word": "mun",
+            "duration": "0.22",
+            "codes": [
+                651,
+                617,
+                1411,
+                389,
+                1329,
+                491,
+                1680,
+                1053,
+                618,
+                488,
+                1494,
+                1224,
+                1259,
+                1317,
+                1457,
+                508,
+                1341
+            ]
+        },
+        {
+            "word": "zabi",
+            "duration": "0.40",
+            "codes": [
+                1777,
+                0,
+                1794,
+                83,
+                74,
+                462,
+                1170,
+                1212,
+                159,
+                1361,
+                384,
+                373,
+                218,
+                613,
+                1583,
+                1311,
+                188,
+                1466,
+                338,
+                405,
+                1321,
+                307,
+                1161,
+                1623,
+                293,
+                1644,
+                858,
+                703,
+                911,
+                326
+            ]
+        },
+        {
+            "word": "yin",
+            "duration": "0.20",
+            "codes": [
+                1715,
+                870,
+                341,
+                1711,
+                1542,
+                429,
+                1565,
+                326,
+                1771,
+                966,
+                91,
+                614,
+                620,
+                647,
+                1755
+            ]
+        },
+        {
+            "word": "wasan",
+            "duration": "0.44",
+            "codes": [
+                1070,
+                520,
+                973,
+                754,
+                83,
+                997,
+                1253,
+                982,
+                359,
+                537,
+                1115,
+                1677,
+                1358,
+                1250,
+                1403,
+                1637,
+                881,
+                382,
+                1754,
+                589,
+                1131,
+                88,
+                1256,
+                988,
+                83,
+                672,
+                644,
+                847,
+                322,
+                983,
+                1305,
+                31,
+                967
+            ]
+        },
+        {
+            "word": "kwaikwayo",
+            "duration": "0.58",
+            "codes": [
+                1071,
+                1003,
+                1811,
+                684,
+                1210,
+                553,
+                1535,
+                491,
+                398,
+                222,
+                315,
+                439,
+                205,
+                174,
+                1742,
+                1373,
+                259,
+                1185,
+                1787,
+                516,
+                1440,
+                646,
+                1402,
+                267,
+                1677,
+                553,
+                344,
+                429,
+                202,
+                389,
+                782,
+                662,
+                388,
+                177,
+                553,
+                1413,
+                491,
+                554,
+                222,
+                759,
+                111,
+                1719,
+                1305,
+                437
+            ]
+        },
+        {
+            "word": "don",
+            "duration": "0.24",
+            "codes": [
+                144,
+                824,
+                90,
+                637,
+                439,
+                138,
+                593,
+                609,
+                617,
+                1247,
+                444,
+                793,
+                600,
+                1425,
+                1379,
+                283,
+                995,
+                1804
+            ]
+        },
+        {
+            "word": "nuna",
+            "duration": "0.40",
+            "codes": [
+                389,
+                669,
+                1804,
+                506,
+                1668,
+                1621,
+                341,
+                913,
+                1495,
+                1819,
+                112,
+                647,
+                743,
+                1612,
+                506,
+                1320,
+                1648,
+                106,
+                1107,
+                579,
+                326,
+                140,
+                1220,
+                936,
+                661,
+                729,
+                1183,
+                441,
+                797,
+                309
+            ]
+        },
+        {
+            "word": "aladunmu",
+            "duration": "0.76",
+            "codes": [
+                1260,
+                179,
+                1240,
+                68,
+                753,
+                807,
+                1808,
+                894,
+                140,
+                791,
+                1486,
+                1276,
+                1471,
+                1132,
+                573,
+                797,
+                1307,
+                271,
+                632,
+                1059,
+                699,
+                816,
+                282,
+                908,
+                1240,
+                41,
+                144,
+                1721,
+                322,
+                237,
+                1284,
+                1312,
+                1444,
+                521,
+                593,
+                753,
+                506,
+                1024,
+                439,
+                1142,
+                1790,
+                478,
+                1164,
+                953,
+                1727,
+                1078,
+                564,
+                1665,
+                482,
+                976,
+                910,
+                727,
+                297,
+                677,
+                297,
+                507,
+                1157
+            ]
+        }
+    ]
+}

default_speakers_local/hausa_male2.json ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+    "text": "Audu ya hau jirgi a Kaduna.",
+    "words": [
+        {
+            "word": "audu",
+            "duration": "0.75",
+            "codes": [
+                165,
+                167,
+                68,
+                567,
+                156,
+                351,
+                337,
+                156,
+                351,
+                337,
+                337,
+                219,
+                584,
+                156,
+                762,
+                334,
+                185,
+                156,
+                334,
+                762,
+                156,
+                337,
+                612,
+                219,
+                691,
+                185,
+                156,
+                204,
+                862,
+                777,
+                589,
+                173,
+                550,
+                128,
+                489,
+                182,
+                74,
+                255,
+                427,
+                1554,
+                945,
+                289,
+                79,
+                875,
+                442,
+                1664,
+                464,
+                230,
+                1500,
+                181,
+                1152,
+                286,
+                103,
+                662,
+                125
+            ]
+        },
+        {
+            "word": "ya",
+            "duration": "0.22",
+            "codes": [
+                201,
+                1332,
+                67,
+                1041,
+                248,
+                901,
+                352,
+                969,
+                642,
+                105,
+                215,
+                411,
+                408,
+                1235,
+                1212,
+                1264,
+                653
+            ]
+        },
+        {
+            "word": "hau",
+            "duration": "0.22",
+            "codes": [
+                1083,
+                913,
+                1026,
+                1295,
+                1473,
+                1399,
+                41,
+                629,
+                1081,
+                623,
+                536,
+                890,
+                1554,
+                384,
+                1664,
+                921,
+                325
+            ]
+        },
+        {
+            "word": "jirgi",
+            "duration": "0.48",
+            "codes": [
+                486,
+                1536,
+                597,
+                1088,
+                1743,
+                1286,
+                340,
+                949,
+                116,
+                1441,
+                1550,
+                28,
+                1073,
+                973,
+                233,
+                1319,
+                733,
+                465,
+                1152,
+                1644,
+                773,
+                1651,
+                175,
+                1281,
+                1563,
+                11,
+                1773,
+                1323,
+                30,
+                10,
+                424,
+                293,
+                1437,
+                1484,
+                1072,
+                370
+            ]
+        },
+        {
+            "word": "a",
+            "duration": "0.10",
+            "codes": [
+                159,
+                697,
+                53,
+                1040,
+                1256,
+                264,
+                710,
+                1251
+            ]
+        },
+        {
+            "word": "kaduna",
+            "duration": "0.44",
+            "codes": [
+                1203,
+                764,
+                1473,
+                1156,
+                400,
+                212,
+                1698,
+                1217,
+                145,
+                1569,
+                1151,
+                1056,
+                1700,
+                1527,
+                629,
+                1747,
+                1350,
+                738,
+                1734,
+                55,
+                1595,
+                890,
+                55,
+                1364,
+                203,
+                281,
+                952,
+                1234,
+                452,
+                93,
+                1036,
+                565,
+                969
+            ]
+        }
+    ]
+}

default_speakers_local/igbo_female1.json ADDED Viewed

	@@ -0,0 +1,246 @@

+{
+    "text": "Codeine na-agba ah\u1ee5 \u1ecbnweta.",
+    "words": [
+        {
+            "word": "codeine",
+            "duration": "0.68",
+            "codes": [
+                165,
+                336,
+                1359,
+                661,
+                199,
+                379,
+                585,
+                1742,
+                210,
+                303,
+                388,
+                412,
+                1772,
+                794,
+                1607,
+                467,
+                622,
+                201,
+                575,
+                447,
+                319,
+                352,
+                234,
+                1797,
+                405,
+                1703,
+                1831,
+                1163,
+                1826,
+                1152,
+                563,
+                696,
+                1284,
+                157,
+                100,
+                402,
+                315,
+                1036,
+                1298,
+                592,
+                1177,
+                665,
+                7,
+                794,
+                509,
+                192,
+                1092,
+                821,
+                1022,
+                834,
+                132
+            ]
+        },
+        {
+            "word": "na",
+            "duration": "0.20",
+            "codes": [
+                1764,
+                1340,
+                1394,
+                1341,
+                146,
+                303,
+                1102,
+                172,
+                366,
+                1263,
+                708,
+                164,
+                836,
+                1424,
+                81
+            ]
+        },
+        {
+            "word": "agba",
+            "duration": "0.76",
+            "codes": [
+                994,
+                841,
+                816,
+                744,
+                1743,
+                1051,
+                1023,
+                1556,
+                331,
+                1706,
+                160,
+                160,
+                403,
+                142,
+                565,
+                723,
+                140,
+                874,
+                339,
+                186,
+                1229,
+                309,
+                461,
+                1015,
+                81,
+                297,
+                1206,
+                1041,
+                585,
+                960,
+                1007,
+                223,
+                578,
+                1142,
+                242,
+                1215,
+                261,
+                857,
+                1390,
+                334,
+                837,
+                735,
+                334,
+                649,
+                563,
+                544,
+                672,
+                316,
+                544,
+                630,
+                337,
+                601,
+                978,
+                956,
+                642,
+                552,
+                164
+            ]
+        },
+        {
+            "word": "ahu",
+            "duration": "0.72",
+            "codes": [
+                254,
+                1014,
+                571,
+                208,
+                1388,
+                393,
+                467,
+                1453,
+                402,
+                361,
+                1464,
+                665,
+                1468,
+                1643,
+                858,
+                1663,
+                1381,
+                1596,
+                1420,
+                1235,
+                1287,
+                1483,
+                277,
+                1753,
+                949,
+                483,
+                1554,
+                787,
+                1407,
+                1100,
+                1035,
+                578,
+                591,
+                504,
+                460,
+                712,
+                838,
+                516,
+                620,
+                460,
+                223,
+                928,
+                1422,
+                1513,
+                1699,
+                513,
+                896,
+                242,
+                313,
+                1634,
+                1237,
+                249,
+                153,
+                1056,
+                508
+            ]
+        },
+        {
+            "word": "inweta",
+            "duration": "0.44",
+            "codes": [
+                1391,
+                416,
+                182,
+                488,
+                500,
+                1544,
+                1237,
+                577,
+                1813,
+                860,
+                749,
+                679,
+                51,
+                682,
+                506,
+                79,
+                49,
+                254,
+                987,
+                348,
+                1418,
+                1688,
+                1735,
+                1658,
+                544,
+                16,
+                1777,
+                309,
+                25,
+                1317,
+                146,
+                1333,
+                147
+            ]
+        }
+    ]
+}

default_speakers_local/igbo_female2.json ADDED Viewed

	@@ -0,0 +1,202 @@

+{
+    "text": "Umunwoke n\u1ecd na \u1ecct\u1ee5t\u1ee5 \u1ecdr\u1ee5 \u1ecdch\u1ecbch\u1ecb",
+    "words": [
+        {
+            "word": "umunwoke",
+            "duration": "0.79",
+            "codes": [
+                156,
+                1807,
+                1225,
+                976,
+                950,
+                1205,
+                957,
+                669,
+                838,
+                1142,
+                781,
+                666,
+                1151,
+                1219,
+                1044,
+                42,
+                51,
+                1712,
+                893,
+                963,
+                438,
+                30,
+                529,
+                792,
+                1769,
+                102,
+                834,
+                1398,
+                1258,
+                1460,
+                1407,
+                1265,
+                1615,
+                682,
+                455,
+                488,
+                395,
+                376,
+                1136,
+                1391,
+                79,
+                1052,
+                1747,
+                1739,
+                351,
+                1421,
+                423,
+                344,
+                253,
+                1098,
+                479,
+                1077,
+                243,
+                364,
+                1812,
+                315,
+                1073,
+                832
+            ]
+        },
+        {
+            "word": "no",
+            "duration": "0.16",
+            "codes": [
+                175,
+                1407,
+                458,
+                860,
+                1025,
+                65,
+                1443,
+                1482,
+                371,
+                1257,
+                890,
+                1161,
+                449
+            ]
+        },
+        {
+            "word": "na",
+            "duration": "0.10",
+            "codes": [
+                1650,
+                639,
+                322,
+                1596,
+                741,
+                987,
+                1452
+            ]
+        },
+        {
+            "word": "otutu",
+            "duration": "0.38",
+            "codes": [
+                371,
+                1107,
+                1444,
+                794,
+                1517,
+                504,
+                930,
+                767,
+                990,
+                507,
+                1314,
+                1766,
+                1073,
+                1229,
+                1525,
+                1664,
+                460,
+                896,
+                1230,
+                640,
+                507,
+                919,
+                1104,
+                1320,
+                1022,
+                234,
+                520,
+                583,
+                959
+            ]
+        },
+        {
+            "word": "oru",
+            "duration": "0.28",
+            "codes": [
+                324,
+                943,
+                65,
+                613,
+                709,
+                128,
+                384,
+                681,
+                1071,
+                1732,
+                1392,
+                616,
+                706,
+                679,
+                510,
+                934,
+                37,
+                76,
+                1032,
+                1618,
+                944
+            ]
+        },
+        {
+            "word": "ochichi",
+            "duration": "0.44",
+            "codes": [
+                1234,
+                1267,
+                295,
+                1278,
+                891,
+                1652,
+                1142,
+                435,
+                356,
+                599,
+                70,
+                517,
+                1303,
+                788,
+                1314,
+                57,
+                1700,
+                1790,
+                432,
+                1495,
+                435,
+                823,
+                1583,
+                350,
+                290,
+                656,
+                70,
+                1074,
+                1104,
+                911,
+                1297,
+                1708,
+                1826
+            ]
+        }
+    ]
+}

default_speakers_local/igbo_male2.json ADDED Viewed

	@@ -0,0 +1,277 @@

+{
+    "text": "Any\u1ecb na-eji nkw\u1ee5 n'ihu na-eme fan aka",
+    "words": [
+        {
+            "word": "anyi",
+            "duration": "0.79",
+            "codes": [
+                165,
+                226,
+                672,
+                278,
+                1279,
+                924,
+                1648,
+                1079,
+                1010,
+                1321,
+                869,
+                964,
+                1118,
+                964,
+                691,
+                1033,
+                964,
+                762,
+                981,
+                772,
+                630,
+                967,
+                676,
+                676,
+                460,
+                567,
+                680,
+                301,
+                334,
+                981,
+                301,
+                334,
+                981,
+                316,
+                316,
+                316,
+                223,
+                1007,
+                571,
+                524,
+                402,
+                147,
+                367,
+                402,
+                303,
+                182,
+                1729,
+                510,
+                914,
+                293,
+                1636,
+                683,
+                500,
+                1369,
+                451,
+                756,
+                1339,
+                1619
+            ]
+        },
+        {
+            "word": "na",
+            "duration": "0.12",
+            "codes": [
+                1756,
+                593,
+                1446,
+                48,
+                67,
+                96,
+                759,
+                488,
+                69
+            ]
+        },
+        {
+            "word": "eji",
+            "duration": "0.26",
+            "codes": [
+                367,
+                890,
+                357,
+                966,
+                654,
+                41,
+                1478,
+                1637,
+                1381,
+                654,
+                330,
+                844,
+                372,
+                1147,
+                202,
+                206,
+                148,
+                455,
+                50,
+                592
+            ]
+        },
+        {
+            "word": "nkwu",
+            "duration": "0.28",
+            "codes": [
+                506,
+                515,
+                1363,
+                1663,
+                1464,
+                1383,
+                1770,
+                1251,
+                1639,
+                1705,
+                1634,
+                1464,
+                583,
+                1008,
+                1384,
+                557,
+                1002,
+                716,
+                952,
+                1552,
+                506
+            ]
+        },
+        {
+            "word": "nihu",
+            "duration": "0.36",
+            "codes": [
+                1366,
+                1650,
+                716,
+                890,
+                1494,
+                189,
+                687,
+                439,
+                15,
+                45,
+                297,
+                48,
+                33,
+                335,
+                1591,
+                1560,
+                1574,
+                1368,
+                1069,
+                1394,
+                1166,
+                1457,
+                109,
+                143,
+                1574,
+                1663,
+                286
+            ]
+        },
+        {
+            "word": "na",
+            "duration": "0.14",
+            "codes": [
+                1748,
+                1454,
+                1238,
+                407,
+                148,
+                30,
+                49,
+                789,
+                488,
+                137,
+                1166
+            ]
+        },
+        {
+            "word": "eme",
+            "duration": "0.32",
+            "codes": [
+                537,
+                471,
+                1136,
+                1296,
+                1284,
+                217,
+                1516,
+                593,
+                704,
+                1002,
+                433,
+                205,
+                263,
+                1247,
+                665,
+                428,
+                269,
+                22,
+                519,
+                1400,
+                400,
+                1400,
+                1171,
+                493
+            ]
+        },
+        {
+            "word": "fan",
+            "duration": "0.40",
+            "codes": [
+                1212,
+                911,
+                640,
+                1265,
+                386,
+                352,
+                102,
+                252,
+                642,
+                1182,
+                985,
+                115,
+                730,
+                347,
+                173,
+                1676,
+                794,
+                363,
+                1217,
+                1388,
+                736,
+                843,
+                1422,
+                660,
+                1160,
+                474,
+                1403,
+                142,
+                1278,
+                147
+            ]
+        },
+        {
+            "word": "aka",
+            "duration": "0.24",
+            "codes": [
+                1492,
+                402,
+                1280,
+                595,
+                1732,
+                1697,
+                838,
+                1809,
+                1199,
+                724,
+                337,
+                516,
+                948,
+                1700,
+                1129,
+                901,
+                934,
+                1110
+            ]
+        }
+    ]
+}

default_speakers_local/yoruba_female1.json ADDED Viewed

	@@ -0,0 +1,416 @@

+{
+    "text": "Kulikuli j\u1eb9\u0301 \u01f9kan \u00ecpanu t\u00ed w\u00f3\u0323n \u1e63e n\u00edpa l\u00edlo \u1eb9\u0300p\u00e0, p\u1eb9lu or\u00eds\u00ec\u00edr\u00eds\u00ec\u00ed \u01f9kan",
+    "words": [
+        {
+            "word": "kulikuli",
+            "duration": "0.50",
+            "codes": [
+                156,
+                1777,
+                479,
+                1086,
+                243,
+                127,
+                170,
+                1275,
+                1470,
+                392,
+                278,
+                837,
+                1142,
+                284,
+                1411,
+                1742,
+                1280,
+                87,
+                898,
+                228,
+                67,
+                1499,
+                1568,
+                1035,
+                978,
+                157,
+                1078,
+                243,
+                1708,
+                170,
+                1498,
+                346,
+                344,
+                526,
+                1039,
+                316,
+                526
+            ]
+        },
+        {
+            "word": "je",
+            "duration": "0.28",
+            "codes": [
+                1570,
+                1290,
+                654,
+                328,
+                816,
+                270,
+                402,
+                271,
+                76,
+                43,
+                1259,
+                303,
+                371,
+                1077,
+                560,
+                1117,
+                1108,
+                1110,
+                1481,
+                691,
+                1825
+            ]
+        },
+        {
+            "word": "nkan",
+            "duration": "0.26",
+            "codes": [
+                1465,
+                1312,
+                538,
+                1807,
+                1152,
+                27,
+                20,
+                379,
+                1378,
+                1505,
+                84,
+                959,
+                756,
+                107,
+                949,
+                996,
+                1358,
+                1286,
+                755,
+                1686
+            ]
+        },
+        {
+            "word": "ipanu",
+            "duration": "0.54",
+            "codes": [
+                371,
+                1224,
+                458,
+                1601,
+                241,
+                247,
+                620,
+                423,
+                584,
+                905,
+                411,
+                1209,
+                309,
+                88,
+                1511,
+                164,
+                552,
+                1104,
+                140,
+                737,
+                1699,
+                595,
+                1257,
+                544,
+                1733,
+                169,
+                1339,
+                1830,
+                123,
+                1048,
+                1378,
+                1817,
+                775,
+                1093,
+                669,
+                1663,
+                464,
+                1536,
+                696,
+                1120,
+                781
+            ]
+        },
+        {
+            "word": "ti",
+            "duration": "0.22",
+            "codes": [
+                724,
+                1120,
+                1250,
+                885,
+                432,
+                1556,
+                1803,
+                759,
+                234,
+                1104,
+                1264,
+                205,
+                892,
+                1223,
+                1051,
+                1141
+            ]
+        },
+        {
+            "word": "won",
+            "duration": "0.26",
+            "codes": [
+                205,
+                1004,
+                1107,
+                386,
+                951,
+                53,
+                339,
+                1186,
+                664,
+                874,
+                1245,
+                547,
+                1320,
+                918,
+                1363,
+                1638,
+                654,
+                279,
+                1040,
+                739
+            ]
+        },
+        {
+            "word": "se",
+            "duration": "0.22",
+            "codes": [
+                1082,
+                878,
+                760,
+                1094,
+                973,
+                656,
+                142,
+                10,
+                170,
+                1744,
+                170,
+                495,
+                2,
+                379,
+                725,
+                1816
+            ]
+        },
+        {
+            "word": "nipa",
+            "duration": "0.36",
+            "codes": [
+                963,
+                1436,
+                49,
+                43,
+                386,
+                1731,
+                537,
+                121,
+                496,
+                666,
+                423,
+                668,
+                851,
+                811,
+                737,
+                25,
+                260,
+                1313,
+                300,
+                303,
+                951,
+                1153,
+                172,
+                589,
+                1831,
+                1088,
+                378
+            ]
+        },
+        {
+            "word": "lilo",
+            "duration": "0.30",
+            "codes": [
+                451,
+                1801,
+                1800,
+                967,
+                1313,
+                49,
+                1814,
+                659,
+                858,
+                534,
+                1217,
+                727,
+                609,
+                651,
+                1411,
+                688,
+                321,
+                47,
+                1271,
+                79,
+                362,
+                816,
+                157
+            ]
+        },
+        {
+            "word": "epa",
+            "duration": "0.40",
+            "codes": [
+                1272,
+                497,
+                1192,
+                67,
+                986,
+                54,
+                351,
+                423,
+                1154,
+                561,
+                584,
+                417,
+                209,
+                1017,
+                424,
+                1122,
+                25,
+                1191,
+                475,
+                140,
+                1184,
+                730,
+                1459,
+                1266,
+                379,
+                799,
+                567,
+                460,
+                379,
+                676
+            ]
+        },
+        {
+            "word": "pelu",
+            "duration": "0.28",
+            "codes": [
+                381,
+                926,
+                433,
+                811,
+                76,
+                774,
+                1179,
+                380,
+                1668,
+                1646,
+                1364,
+                1446,
+                1241,
+                1503,
+                1384,
+                902,
+                1073,
+                443,
+                74,
+                1015,
+                1107
+            ]
+        },
+        {
+            "word": "orisiirisii",
+            "duration": "0.64",
+            "codes": [
+                51,
+                1047,
+                367,
+                674,
+                1117,
+                734,
+                498,
+                1504,
+                1045,
+                656,
+                773,
+                382,
+                198,
+                792,
+                1662,
+                760,
+                1261,
+                1094,
+                1091,
+                1505,
+                602,
+                1670,
+                1497,
+                1447,
+                465,
+                135,
+                98,
+                528,
+                682,
+                812,
+                269,
+                175,
+                290,
+                547,
+                340,
+                382,
+                1073,
+                528,
+                1033,
+                700,
+                195,
+                529,
+                37,
+                687,
+                1022,
+                343,
+                1335,
+                1092
+            ]
+        },
+        {
+            "word": "nkan",
+            "duration": "0.16",
+            "codes": [
+                1339,
+                1657,
+                859,
+                1288,
+                544,
+                207,
+                459,
+                1735,
+                1736,
+                959,
+                106,
+                427,
+                107
+            ]
+        }
+    ]
+}

default_speakers_local/yoruba_female2.json ADDED Viewed

	@@ -0,0 +1,193 @@

+{
+    "text": "Irin\u1e63\u1eb9\u0301 \u00e0gb\u1eb9\u0300 ni katakata.",
+    "words": [
+        {
+            "word": "irinse",
+            "duration": "1.19",
+            "codes": [
+                219,
+                219,
+                219,
+                219,
+                805,
+                636,
+                459,
+                918,
+                820,
+                918,
+                950,
+                795,
+                447,
+                1284,
+                447,
+                378,
+                641,
+                77,
+                939,
+                316,
+                278,
+                16,
+                223,
+                776,
+                374,
+                1810,
+                110,
+                967,
+                51,
+                717,
+                1289,
+                155,
+                1731,
+                1199,
+                195,
+                1332,
+                1106,
+                940,
+                328,
+                1493,
+                230,
+                687,
+                510,
+                356,
+                1178,
+                253,
+                24,
+                318,
+                70,
+                1002,
+                977,
+                719,
+                113,
+                228,
+                1556,
+                1316,
+                88,
+                79,
+                1316,
+                1316,
+                628,
+                79,
+                1492,
+                915,
+                1671,
+                492,
+                1758,
+                334,
+                470,
+                1038,
+                223,
+                68,
+                563,
+                223,
+                224,
+                185,
+                244,
+                417,
+                337,
+                244,
+                360,
+                165,
+                224,
+                187,
+                1821,
+                1119,
+                958,
+                192,
+                200
+            ]
+        },
+        {
+            "word": "agbe",
+            "duration": "0.32",
+            "codes": [
+                74,
+                456,
+                1156,
+                49,
+                1409,
+                414,
+                1437,
+                145,
+                17,
+                1121,
+                237,
+                1442,
+                389,
+                698,
+                30,
+                30,
+                489,
+                1558,
+                30,
+                721,
+                994,
+                201,
+                1702,
+                835
+            ]
+        },
+        {
+            "word": "ni",
+            "duration": "0.12",
+            "codes": [
+                1540,
+                310,
+                29,
+                890,
+                952,
+                319,
+                196,
+                272,
+                1536
+            ]
+        },
+        {
+            "word": "katakata",
+            "duration": "0.56",
+            "codes": [
+                274,
+                993,
+                1624,
+                855,
+                1065,
+                152,
+                610,
+                1170,
+                775,
+                1541,
+                1806,
+                1592,
+                713,
+                1539,
+                1424,
+                1229,
+                93,
+                1194,
+                1310,
+                1392,
+                727,
+                1428,
+                32,
+                902,
+                1643,
+                1304,
+                977,
+                1316,
+                587,
+                777,
+                1258,
+                830,
+                562,
+                1720,
+                34,
+                667,
+                415,
+                1194,
+                1477,
+                352,
+                1187,
+                1345
+            ]
+        }
+    ]
+}

default_speakers_local/yoruba_male1.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+    "text": "\u00ccj\u1ecdba t\u00ed f\u00ed \u00f2fin d\u00e9 t\u00edta \u1ecdt\u00ed l\u00edle.",
+    "words": [
+        {
+            "word": "ijoba",
+            "duration": "0.67",
+            "codes": [
+                165,
+                1236,
+                1667,
+                933,
+                729,
+                1699,
+                1425,
+                1080,
+                1255,
+                458,
+                795,
+                1348,
+                334,
+                1458,
+                458,
+                566,
+                584,
+                187,
+                1774,
+                296,
+                123,
+                190,
+                1787,
+                1470,
+                558,
+                1392,
+                1693,
+                885,
+                1315,
+                760,
+                609,
+                357,
+                864,
+                575,
+                74,
+                798,
+                1401,
+                1380,
+                169,
+                1157,
+                871,
+                208,
+                622,
+                146,
+                1232,
+                107,
+                382,
+                801,
+                1707
+            ]
+        },
+        {
+            "word": "ti",
+            "duration": "0.16",
+            "codes": [
+                459,
+                1475,
+                833,
+                1082,
+                1496,
+                1241,
+                1342,
+                211,
+                153,
+                1709,
+                1640,
+                468
+            ]
+        },
+        {
+            "word": "fi",
+            "duration": "0.14",
+            "codes": [
+                1752,
+                1230,
+                854,
+                1420,
+                854,
+                1146,
+                1257,
+                388,
+                1686,
+                539,
+                289
+            ]
+        },
+        {
+            "word": "ofin",
+            "duration": "0.26",
+            "codes": [
+                341,
+                1008,
+                1701,
+                359,
+                1696,
+                1250,
+                1226,
+                781,
+                1292,
+                1432,
+                989,
+                998,
+                236,
+                962,
+                1308,
+                749,
+                1462,
+                1460,
+                1039,
+                932
+            ]
+        },
+        {
+            "word": "de",
+            "duration": "0.16",
+            "codes": [
+                1020,
+                1808,
+                907,
+                276,
+                597,
+                1069,
+                217,
+                648,
+                1068,
+                468,
+                981,
+                1003
+            ]
+        },
+        {
+            "word": "tita",
+            "duration": "0.46",
+            "codes": [
+                645,
+                1041,
+                605,
+                947,
+                1505,
+                162,
+                1820,
+                688,
+                101,
+                1764,
+                418,
+                885,
+                513,
+                1569,
+                1082,
+                446,
+                711,
+                294,
+                326,
+                1203,
+                1190,
+                524,
+                408,
+                222,
+                1490,
+                1162,
+                1486,
+                885,
+                247,
+                899,
+                513,
+                1187,
+                614,
+                424,
+                184
+            ]
+        },
+        {
+            "word": "oti",
+            "duration": "0.28",
+            "codes": [
+                979,
+                997,
+                1581,
+                620,
+                967,
+                460,
+                1430,
+                1731,
+                279,
+                499,
+                769,
+                517,
+                1077,
+                263,
+                1443,
+                397,
+                166,
+                1554,
+                440,
+                1009,
+                1427
+            ]
+        },
+        {
+            "word": "lile",
+            "duration": "0.28",
+            "codes": [
+                409,
+                1677,
+                599,
+                296,
+                629,
+                74,
+                129,
+                1740,
+                11,
+                1404,
+                920,
+                10,
+                269,
+                1604,
+                990,
+                1200,
+                1217,
+                1178,
+                293,
+                30,
+                36
+            ]
+        }
+    ]
+}

default_speakers_local/yoruba_male2.json ADDED Viewed

	@@ -0,0 +1,238 @@

+{
+    "text": "\u1ecdk\u1ecd\u0300 \u00f2furuf\u00fa t\u00ed jay\u00e9 w\u1ecd \u0144 bal\u00e8 l\u00f3w\u00f3.",
+    "words": [
+        {
+            "word": "oko",
+            "duration": "0.42",
+            "codes": [
+                165,
+                1480,
+                1405,
+                1428,
+                761,
+                1343,
+                591,
+                311,
+                345,
+                1209,
+                545,
+                346,
+                880,
+                413,
+                112,
+                882,
+                1051,
+                831,
+                866,
+                918,
+                1622,
+                1776,
+                1213,
+                945,
+                942,
+                455,
+                1217,
+                675,
+                268,
+                683,
+                536
+            ]
+        },
+        {
+            "word": "ofurufu",
+            "duration": "0.52",
+            "codes": [
+                317,
+                1016,
+                354,
+                1467,
+                1626,
+                1686,
+                1012,
+                1450,
+                1090,
+                849,
+                1230,
+                1774,
+                992,
+                148,
+                395,
+                1446,
+                909,
+                1712,
+                1624,
+                327,
+                283,
+                1554,
+                1796,
+                952,
+                1450,
+                184,
+                689,
+                604,
+                902,
+                989,
+                1517,
+                983,
+                250,
+                39,
+                792,
+                289,
+                865,
+                272,
+                336,
+                694
+            ]
+        },
+        {
+            "word": "ti",
+            "duration": "0.16",
+            "codes": [
+                1818,
+                279,
+                96,
+                1097,
+                383,
+                876,
+                14,
+                1700,
+                515,
+                1713,
+                1033,
+                59
+            ]
+        },
+        {
+            "word": "jaye",
+            "duration": "0.36",
+            "codes": [
+                1522,
+                774,
+                452,
+                303,
+                695,
+                648,
+                809,
+                679,
+                1015,
+                626,
+                398,
+                1720,
+                1,
+                1497,
+                748,
+                46,
+                1744,
+                644,
+                190,
+                1060,
+                455,
+                529,
+                111,
+                1515,
+                1762,
+                150,
+                1560
+            ]
+        },
+        {
+            "word": "wo",
+            "duration": "0.34",
+            "codes": [
+                484,
+                503,
+                1388,
+                61,
+                289,
+                1422,
+                294,
+                831,
+                1328,
+                462,
+                1612,
+                905,
+                1541,
+                785,
+                509,
+                1185,
+                1802,
+                845,
+                1440,
+                986,
+                360,
+                281,
+                1703,
+                1456,
+                1674,
+                1776
+            ]
+        },
+        {
+            "word": "n",
+            "duration": "0.12",
+            "codes": [
+                1002,
+                289,
+                47,
+                616,
+                1594,
+                852,
+                831,
+                458,
+                220
+            ]
+        },
+        {
+            "word": "bale",
+            "duration": "0.32",
+            "codes": [
+                953,
+                1426,
+                159,
+                1758,
+                474,
+                1347,
+                579,
+                699,
+                599,
+                1433,
+                483,
+                1142,
+                1088,
+                988,
+                906,
+                552,
+                128,
+                1648,
+                474,
+                1678,
+                668,
+                1060,
+                101,
+                1478
+            ]
+        },
+        {
+            "word": "lowo",
+            "duration": "0.22",
+            "codes": [
+                612,
+                326,
+                1661,
+                978,
+                88,
+                1620,
+                169,
+                811,
+                98,
+                363,
+                31,
+                425,
+                1531,
+                394,
+                1248,
+                809
+            ]
+        }
+    ]
+}

default_speakers_local/yoruba_male3.json ADDED Viewed

	@@ -0,0 +1,234 @@

+{
+    "text": "\u00ccj\u1ecdba t\u00ed f\u00ed \u00f2fin d\u00e9 t\u00edta \u1ecdt\u00ed l\u00edle.",
+    "words": [
+        {
+            "word": "\u00ccj\u1ecdba",
+            "duration": "0.67",
+            "codes": [
+                165,
+                1236,
+                1667,
+                933,
+                729,
+                1699,
+                1425,
+                1080,
+                1255,
+                458,
+                795,
+                1348,
+                334,
+                1458,
+                458,
+                566,
+                584,
+                187,
+                1774,
+                296,
+                123,
+                190,
+                1787,
+                1470,
+                558,
+                1392,
+                1693,
+                885,
+                1315,
+                760,
+                609,
+                357,
+                864,
+                575,
+                74,
+                798,
+                1401,
+                1380,
+                169,
+                1157,
+                871,
+                208,
+                622,
+                146,
+                1232,
+                107,
+                382,
+                801,
+                1707
+            ]
+        },
+        {
+            "word": "t\u00ed",
+            "duration": "0.16",
+            "codes": [
+                459,
+                1475,
+                833,
+                1082,
+                1496,
+                1241,
+                1342,
+                211,
+                153,
+                1709,
+                1640,
+                468
+            ]
+        },
+        {
+            "word": "f\u00ed",
+            "duration": "0.14",
+            "codes": [
+                1752,
+                1230,
+                854,
+                1420,
+                854,
+                1146,
+                1257,
+                388,
+                1686,
+                539,
+                289
+            ]
+        },
+        {
+            "word": "\u00f2fin",
+            "duration": "0.26",
+            "codes": [
+                341,
+                1008,
+                1701,
+                359,
+                1696,
+                1250,
+                1226,
+                781,
+                1292,
+                1432,
+                989,
+                998,
+                236,
+                962,
+                1308,
+                749,
+                1462,
+                1460,
+                1039,
+                932
+            ]
+        },
+        {
+            "word": "d\u00e9",
+            "duration": "0.16",
+            "codes": [
+                1020,
+                1808,
+                907,
+                276,
+                597,
+                1069,
+                217,
+                648,
+                1068,
+                468,
+                981,
+                1003
+            ]
+        },
+        {
+            "word": "t\u00edta",
+            "duration": "0.46",
+            "codes": [
+                645,
+                1041,
+                605,
+                947,
+                1505,
+                162,
+                1820,
+                688,
+                101,
+                1764,
+                418,
+                885,
+                513,
+                1569,
+                1082,
+                446,
+                711,
+                294,
+                326,
+                1203,
+                1190,
+                524,
+                408,
+                222,
+                1490,
+                1162,
+                1486,
+                885,
+                247,
+                899,
+                513,
+                1187,
+                614,
+                424,
+                184
+            ]
+        },
+        {
+            "word": "\u1ecdt\u00ed",
+            "duration": "0.28",
+            "codes": [
+                979,
+                997,
+                1581,
+                620,
+                967,
+                460,
+                1430,
+                1731,
+                279,
+                499,
+                769,
+                517,
+                1077,
+                263,
+                1443,
+                397,
+                166,
+                1554,
+                440,
+                1009,
+                1427
+            ]
+        },
+        {
+            "word": "l\u00edle.",
+            "duration": "0.28",
+            "codes": [
+                409,
+                1677,
+                599,
+                296,
+                629,
+                74,
+                129,
+                1740,
+                11,
+                1404,
+                920,
+                10,
+                269,
+                1604,
+                990,
+                1200,
+                1217,
+                1178,
+                293,
+                30,
+                36
+            ]
+        }
+    ]
+}

download_models.py ADDED Viewed

	@@ -0,0 +1,96 @@

+#!/usr/bin/env python3
+"""
+SwaGPT Model Downloader Utility
+Downloads YarnGPT and WavTokenizer model weights and configurations locally.
+"""
+import os
+import sys
+def main():
+    print("=" * 60)
+    print("           SwaGPT Model & Weights Downloader")
+    print("=" * 60)
+    try:
+        from huggingface_hub import hf_hub_download, snapshot_download
+    except ImportError:
+        print("[!] huggingface_hub is not installed.")
+        print("[i] Installing required package 'huggingface_hub'...")
+        import subprocess
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
+        from huggingface_hub import hf_hub_download, snapshot_download
+    # Create models directory
+    models_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "models"))
+    os.makedirs(models_dir, exist_ok=True)
+    print(f"[*] Local models directory: {models_dir}")
+    # 1. Download WavTokenizer configuration
+    print("\n[1/3] Downloading WavTokenizer configuration...")
+    try:
+        config_path = hf_hub_download(
+            repo_id="novateur/WavTokenizer-medium-speech-75token",
+            filename="wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
+            local_dir=models_dir,
+            local_dir_use_symlinks=False
+        )
+        print(f"[✓] WavTokenizer config saved to: {config_path}")
+    except Exception as e:
+        print(f"[!] Error downloading WavTokenizer configuration: {e}")
+        print("[i] Attempting fallback download...")
+        import urllib.request
+        fallback_url = "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+        config_path = os.path.join(models_dir, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+        urllib.request.urlretrieve(fallback_url, config_path)
+        print(f"[✓] WavTokenizer config fallback saved to: {config_path}")
+    # 2. Download WavTokenizer checkpoint
+    print("\n[2/3] Downloading WavTokenizer model checkpoint (~1.75 GB)...")
+    try:
+        checkpoint_path = hf_hub_download(
+            repo_id="MBZUAI/LLMVoX",
+            filename="wavtokenizer_large_speech_320_24k.ckpt",
+            local_dir=models_dir,
+            local_dir_use_symlinks=False
+        )
+        print(f"[✓] WavTokenizer checkpoint saved to: {checkpoint_path}")
+    except Exception as e:
+        print(f"[!] Error downloading WavTokenizer from primary repo: {e}")
+        print("[i] Trying fallback repository (novateur)...")
+        try:
+            checkpoint_path = hf_hub_download(
+                repo_id="novateur/WavTokenizer-large-speech-75token",
+                filename="wavtokenizer_large_speech_320_24k.ckpt",
+                local_dir=models_dir,
+                local_dir_use_symlinks=False
+            )
+            print(f"[✓] WavTokenizer checkpoint fallback saved to: {checkpoint_path}")
+        except Exception as e2:
+            print(f"[!] Fallback failed: {e2}")
+            print("[!] Please download the checkpoint manually from Hugging Face and place it in the 'models' directory.")
+            checkpoint_path = os.path.join(models_dir, "wavtokenizer_large_speech_320_24k.ckpt")
+    # 3. Download YarnGPT2 model snapshot
+    print("\n[3/3] Downloading YarnGPT2 weights and tokenizer (~750 MB)...")
+    yarngpt_dir = os.path.join(models_dir, "YarnGPT2")
+    try:
+        snapshot_dir = snapshot_download(
+            repo_id="saheedniyi/YarnGPT2",
+            local_dir=yarngpt_dir,
+            local_dir_use_symlinks=False
+        )
+        print(f"[✓] YarnGPT2 weights downloaded successfully to: {snapshot_dir}")
+    except Exception as e:
+        print(f"[!] Error downloading YarnGPT2: {e}")
+        print("[i] Note: You can also load it on-the-fly via HF Hub during inference if needed.")
+    print("\n" + "=" * 60)
+    print("                  Download Process Complete!")
+    print("=" * 60)
+    print(f"All files have been set up in your local workspace under:\n  {models_dir}")
+    print("You are fully set to build on top of these models offline/locally.")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

python-wrapper/audiotokenizer.py ADDED Viewed

	@@ -0,0 +1,317 @@

+import os
+import re
+import json
+import torch
+import inflect
+import random
+import uroman as ur
+import numpy as np
+import torchaudio
+from transformers import AutoTokenizer
+from outetts.wav_tokenizer.decoder import WavTokenizer
+from outetts.wav_tokenizer.encoder.utils import convert_audio
+class AudioTokenizer:
+    def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+        self.bos = "<|im_start|>"
+        self.eos = "<|im_end|>"
+        self.input_length=0
+        self.special_tokens = {
+            "audio_code": "<|{}|>",
+            "text_start": "<|text_start|>",
+            "text_end": "<|text_end|>",
+            "audio_start": "<|audio_start|>",
+            "audio_end": "<|audio_end|>",
+            "time": "<|t_{:.2f}|>",
+            "code_start": "<|code_start|>",
+            "code_end": "<|code_end|>",
+            "text_sep": "<|text_sep|>"
+        }
+        self.lec = inflect.engine()
+        #self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
+        #self.config_path = "/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+        #self.model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
+        self.wavtokenizer = WavTokenizer.from_pretrained0802(wav_tokenizer_config_path, wav_tokenizer_model_path)
+        self.wavtokenizer = self.wavtokenizer.to(self.device)
+        self.BASE_DIR = os.path.dirname(__file__)
+        self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers")
+        self.speakers=["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye"]
+    def get_speaker_path(self,speaker_name):
+        return os.path.join(self.DEFAULT_SPEAKERS_DIR, f"{speaker_name}.json")
+    def load_speaker(self, path: str):
+        with open(path, "r") as f:
+            return json.load(f)
+    def load_default_speaker(self, name: str):
+        name = name.lower().strip()
+        speaker_path=self.get_speaker_path(name)
+        return self.load_speaker(speaker_path)
+    def process_text(self, text: str):
+        text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
+        text = re.sub(r'[-_/,\.\\]', ' ', text)
+        text = re.sub(r'[^a-z\s]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text.split()
+    def create_audio_prompt(self,words: list) -> str:
+        prompt = []
+        for i in words:
+            word = i["word"]
+            duration = self.special_tokens["time"].format(float(i["duration"]))
+            tokens = "".join([self.special_tokens["audio_code"].format(c) for c in i["codes"]])
+            prompt.append(f'{word}{duration}{self.special_tokens["code_start"]}{tokens}{self.special_tokens["code_end"]}')
+        return "\n".join(prompt)
+    def create_prompt(self,text,speaker_name="idera"):
+        speaker=self.load_default_speaker(speaker_name)
+        input_words = self.process_text(speaker["text"]) +  self.process_text(text)
+        #input_words = process_text(speaker["text"]) + input_words
+        inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
+        prompt = self.text_prompt.format(
+          bos=self.bos,
+          text_start=self.special_tokens['text_start'],
+          words=inputs_words_strings,
+          text_end=self.special_tokens['text_end'],
+          audio_start=self.special_tokens['audio_start']
+      )
+        prompt += self.create_audio_prompt(speaker["words"])
+        return prompt
+    def tokenize_prompt(self, prompt):
+        input_ids = self.tokenizer.encode(
+            prompt,
+            add_special_tokens=False,
+            return_tensors="pt"
+        ).to(self.device)
+        self.input_length=input_ids.shape[1]
+        return input_ids.to(self.device)
+    def get_audio(self,discrete_code):
+        discrete_code=torch.tensor([[discrete_code]]).to(self.device)
+        features = self.wavtokenizer.codes_to_features(discrete_code).to(self.device)
+        bandwidth_id = torch.tensor([0]).to(self.device)
+        audio_out = self.wavtokenizer.decode(features, bandwidth_id=bandwidth_id)
+        return audio_out.to("cpu")
+    def extract_integers(self,s):
+        # Match integers enclosed in vertical bars |integer|
+        matches = re.findall(r'\|(-?\d+)\|', s)
+        # Convert matches to integers
+        return [int(match) for match in matches]
+    def get_codes(self, output):
+        new_output=self.tokenizer.decode(output[0][self.input_length:])
+        codes=self.extract_integers(new_output)
+        return codes
+class AudioTokenizerForLocal(AudioTokenizer):
+    def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
+        super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
+        self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
+        self.special_tokens = {
+            "audio_code": "<|{}|>",
+            "text_start": "<|text_start|>",
+            "text_end": "<|text_end|>",
+            "audio_start": "<|audio_start|>",
+            "audio_end": "<|audio_end|>",
+            "word_start": "<|word_start|>",
+            "word_end": "<|word_end|>",
+            "time": "<|t_{:.2f}|>",
+            "code_start": "<|code_start|>",
+            "code_end": "<|code_end|>",
+            "text_sep": "<|text_sep|>",
+            "hausa":"<|hausa|>",
+            "igbo":"<|igbo|>",
+            "yoruba":"<|yoruba|>",
+        }
+        self.uroman = ur.Uroman()
+        self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers_local")
+        self.speakers = [
+            "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
+            "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
+        ]
+    def process_text(self, text: str):
+        text = self.uroman.romanize_string(text)
+        text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
+        text = re.sub(r'[-_/,\.\\]', ' ', text)
+        text = re.sub(r'[^a-z\s]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text.split()
+    def create_prompt(self,text,lang,speaker_name=None):
+        assert lang in ["hausa","igbo","yoruba"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba']"
+        #if no speaker
+        if speaker_name is None:
+            if lang=="hausa":
+                speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
+            elif lang=="igbo":
+                speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
+            else:
+                speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
+        speaker=self.load_default_speaker(speaker_name)
+        input_words = self.process_text(speaker["text"]) +  self.process_text(text)
+        #input_words = process_text(speaker["text"]) + input_words
+        inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
+        prompt = self.text_prompt.format(
+          bos=self.bos,
+          text_start=self.special_tokens['text_start'],
+          words=inputs_words_strings,
+          text_end=self.special_tokens['text_end'],
+          lang=self.special_tokens[lang],
+          audio_start=self.special_tokens['audio_start']
+      )
+        prompt += self.create_audio_prompt(speaker["words"])
+        return prompt
+class AudioTokenizerV2(AudioTokenizer):
+    def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
+        super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
+        self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
+        self.asr_prompt="{bos}\n{code_start}{codes}{code_end}\n{asr}\n"
+        self.special_tokens = {
+            "audio_code": "<|{}|>",
+            "text_start": "<|text_start|>",
+            "text_end": "<|text_end|>",
+            "audio_start": "<|audio_start|>",
+            "audio_end": "<|audio_end|>",
+            "word_start": "<|word_start|>",
+            "word_end": "<|word_end|>",
+            "time": "<|t_{:.2f}|>",
+            "code_start": "<|code_start|>",
+            "code_end": "<|code_end|>",
+            "text_sep": "<|text_sep|>",
+            "hausa":"<|hausa|>",
+            "igbo":"<|igbo|>",
+            "yoruba":"<|yoruba|>",
+            "english":"<|english|>",#<|english|>
+            "asr":"<|asr|>"
+        }
+        self.uroman = ur.Uroman()
+        self.DEFAULT_SPEAKERS_DIR_LOCAL = os.path.join(self.BASE_DIR, "default_speakers_local")
+        self.DEFAULT_SPEAKERS_ENG = os.path.join(self.BASE_DIR, "default_speakers")
+        self.speakers_local = [
+            "hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
+            "hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
+        ]
+        self.speakers_eng = ["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye","saheed"]
+        self.changed_tokens=[('<|1836|>', '<|453|><|453|>'),
+                             ('<|1837|>', '<|1836|><|1836|>'),
+                             ('<|1838|>', '<|1837|><|1837|>'),
+                             ('<|1840|>', '<|244|><|167|>'),
+                             ('<|1841|>', '<|235|><|219|>'),
+                             ('<|1844|>', '<|453|><|244|>'),
+                             ('<|1845|>', '<|1838|><|1838|>')]
+    def process_text(self, text: str):
+        text = self.uroman.romanize_string(text)
+        text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
+        text = re.sub(r'[-_/,\.\\]', ' ', text)
+        text = re.sub(r'[^a-z\s]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text.split()
+    def get_speaker_path(self,speaker_name,dir):
+        return os.path.join(dir, f"{speaker_name}.json")
+    def load_speaker(self, path: str):
+        with open(path, "r") as f:
+            return json.load(f)
+    def load_default_speaker(self, name: str,dir: str):
+        name = name.lower().strip()
+        speaker_path=self.get_speaker_path(name,dir)
+        return self.load_speaker(speaker_path)
+    def create_prompt(self,text,lang,speaker_name=None):
+        assert lang in ["hausa","igbo","yoruba","english"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba','english']"
+        #if no speaker
+        dir=self.DEFAULT_SPEAKERS_DIR_LOCAL
+        if speaker_name is None:
+            if lang=="hausa":
+                speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
+            elif lang=="igbo":
+                speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
+            elif lang=="yoruba":
+                speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
+            else:
+                speaker_name=random.choice(self.speakers_eng)
+        if lang=="english":
+            dir=self.DEFAULT_SPEAKERS_ENG
+        speaker=self.load_default_speaker(speaker_name,dir)
+        input_words = self.process_text(speaker["text"]) +  self.process_text(text)
+        #input_words = process_text(speaker["text"]) + input_words
+        inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
+        prompt = self.text_prompt.format(
+          bos=self.bos,
+          text_start=self.special_tokens['text_start'],
+          words=inputs_words_strings,
+          text_end=self.special_tokens['text_end'],
+          lang=self.special_tokens[lang],
+          audio_start=self.special_tokens['audio_start']
+      )
+        prompt += self.create_audio_prompt(speaker["words"])
+        return prompt
+    def replace_tokens(text):
+      for pair in self.changed_tokens:
+        text=text.replace(pair[0],pair[-1])
+      return text
+    def resample(self,audio: np.ndarray, sr: int, target_sr: int):
+        audio = audio.to(dtype=torch.float32)
+        #.clone().detach()
+        audio = audio.unsqueeze(0)
+        # 1 as last arg corresponds to mono audio
+        resampled = convert_audio(audio, sr, target_sr, 1)
+        return resampled.to(self.device )
+    def quantize_wavtokenizer(self, path):
+        audio_data, sample_rate = torchaudio.load(path)
+        audio_data=audio_data.squeeze()
+        audio = self.resample(audio_data, sample_rate, 24000).to(self.device)
+        bandwidth_id = torch.tensor([0]).to(self.device )
+        _, codes = self.wavtokenizer.encode_infer(audio, bandwidth_id=bandwidth_id)
+        codes = codes.squeeze(1).to(self.device)#+last_text_token
+        res=""
+        for code in codes[0].tolist():
+            res+=f"<|{code}|>"
+        return res
+    def load_asr_prompt(self,audio_path):
+        codes=self.quantize_wavtokenizer(audio_path)
+        prompt = self.asr_prompt.format(
+          bos=self.bos,
+          code_start=self.special_tokens['code_start'],
+          codes=codes,
+          code_end=self.special_tokens['code_end'],
+          asr=self.special_tokens["asr"],
+        )
+        return prompt
+    def get_asr_results(self,output):
+        res=""
+        for text in self.tokenizer.decode(output[0]).split("<|text_start|>")[-1].split("<|text_end|>")[0].split("\n"):
+            res+=text.split("<|word_start|>")[-1].split("<|word_end|>")[0]
+            res+=" "
+        return res.strip()

python-wrapper/requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+transformers
+torchaudio
+outetts==0.2.3
+uroman
+numpy
+inflect
+IPython
+build
+tqdm

python-wrapper/yarngpt/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from yarngpt.core import generate_speech
+__version__ = "0.1.5"
+__all__ = ["generate_speech"]

python-wrapper/yarngpt/core.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+import torch
+import requests
+from transformers import AutoModelForCausalLM
+from audiotokenizer import AudioTokenizer
+from tqdm import tqdm
+#define model storage directory
+MODEL_DIR = os.path.expanduser("~/.yarngpt/models")
+os.makedirs(MODEL_DIR, exist_ok=True)
+#define file paths
+CONFIG_PATH = os.path.join(MODEL_DIR, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+MODEL_PATH = os.path.join(MODEL_DIR, "wavtokenizer_large_speech_320_24k.ckpt")
+#urls from Hugging Face
+CONFIG_URL = "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+MODEL_URL = "https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt"
+def download_file(url, dest_path):
+    """Downloads a file with a progress bar if it doesn't already exist."""
+    if os.path.exists(dest_path):
+        print(f"{dest_path} already exists. Skipping download.")
+        return
+    print(f"Downloading {url} to {dest_path}...")
+    response = requests.get(url, stream=True)
+    total_size = int(response.headers.get('content-length', 0))
+    with open(dest_path, "wb") as f, tqdm(
+        total=total_size, unit="B", unit_scale=True, desc=os.path.basename(dest_path)
+    ) as progress_bar:
+        for chunk in response.iter_content(chunk_size=8192):
+            f.write(chunk)
+            progress_bar.update(len(chunk))
+    print("Download complete.")
+#ensure model files are available
+download_file(CONFIG_URL, CONFIG_PATH)
+download_file(MODEL_URL, MODEL_PATH)
+#list of available speakers
+AVAILABLE_SPEAKERS = [
+    "idera", "jude", "joke", "umar", "osagie", "onye"
+]
+def load_model_and_tokenizer():
+    """Loads the YarnGPT model and tokenizer."""
+    hf_path = "saheedniyi/YarnGPT"
+    #initialize tokenizer
+    audio_tokenizer = AudioTokenizer(hf_path, MODEL_PATH, CONFIG_PATH)
+    #load model using Hugging Face's caching system
+    model = AutoModelForCausalLM.from_pretrained(hf_path, torch_dtype="auto")
+    model = model.to(audio_tokenizer.device)
+    return model, audio_tokenizer
+def generate_speech(text, speaker="idera", temperature=0.1, repetition_penalty=1.1, max_length=4000):
+    """Generate speech audio from input text using the selected speaker.
+    This function converts text to speech using YarnGPT's text-to-speech model with
+    Nigerian-accented English. It supports multiple preset voices and allows customization
+    of generation parameters.
+    Args:
+        text (str): The input text to convert to speech.
+        speaker (str, optional): The voice to use for speech generation.
+            Must be one of: idera, jude, joke, umar, osagie, onye.
+            Defaults to "idera".
+        temperature (float, optional): Controls randomness in generation.
+            Higher values (e.g., 0.8) make output more random,
+            lower values (e.g., 0.1) make it more deterministic.
+            Defaults to 0.1.
+        repetition_penalty (float, optional): Penalizes repetition in generated speech.
+            Values > 1.0 reduce repetition. Defaults to 1.1.
+        max_length (int, optional): Maximum length of generated sequence.
+            Longer text needs higher values. Defaults to 4000.
+    Returns:
+        torch.Tensor: A 2D tensor containing the generated audio waveform
+            with shape (1, num_samples) and sample rate of 24kHz.
+    Raises:
+        ValueError: If speaker is not one of the available preset voices.
+    Example:
+        >>> from yarngpt import generate_speech
+        >>> import torchaudio
+        >>>
+        >>> # Generate speech with default settings
+        >>> audio = generate_speech("Hello, how are you?")
+        >>>
+        >>> # Save the generated audio
+        >>> torchaudio.save("output.wav", audio, sample_rate=24000)
+        >>>
+        >>> # Use a different speaker with custom parameters
+        >>> audio = generate_speech(
+        ...     "This is a test.",
+        ...     speaker="joke",
+        ...     temperature=0.2,
+        ...     repetition_penalty=1.2
+        ... )
+    """
+    if speaker not in AVAILABLE_SPEAKERS:
+        raise ValueError(f"Speaker must be one of: {', '.join(AVAILABLE_SPEAKERS)}")
+    model, audio_tokenizer = load_model_and_tokenizer()
+    prompt = audio_tokenizer.create_prompt(text, speaker)
+    input_ids = audio_tokenizer.tokenize_prompt(prompt)
+    output = model.generate(
+        input_ids=input_ids,
+        temperature=temperature,
+        repetition_penalty=repetition_penalty,
+        max_length=max_length
+    )
+    codes = audio_tokenizer.get_codes(output)
+    audio = audio_tokenizer.get_audio(codes)
+    return audio

requirements.txt CHANGED Viewed

@@ -1,4 +1,8 @@
 torch
-transformers
-scipy
 gradio

+outetts==0.2.3
+uroman
 torch
+torchaudio
+transformers==4.47.1
+inflect
+huggingface_hub
 gradio

swagpt/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+# SwaGPT Package Init File
+# AudioTokenizerSwa is imported lazily (not at module load) to avoid
+# triggering the outetts MeCab DLL chain on Windows.

swagpt/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (158 Bytes). View file

swagpt/__pycache__/audiotokenizer.cpython-311.pyc ADDED Viewed

Binary file (15.1 kB). View file

swagpt/audiotokenizer.py ADDED Viewed

	@@ -0,0 +1,255 @@

+"""
+swagpt/audiotokenizer.py
+------------------------
+AudioTokenizerSwa: East African TTS tokenizer extending YarnGPT's approach.
+Loads WavTokenizer directly from the outetts wav_tokenizer sub-package files
+using importlib — bypassing outetts/__init__.py which pulls in MeCab (broken
+DLL on Windows without a full MeCab binary install).
+"""
+import os
+import re
+import sys
+import json
+import random
+import importlib.util
+import torch
+import torchaudio
+import inflect
+# Use uroman for romanization of African scripts
+try:
+    import uroman as ur
+    _uroman_available = True
+except ImportError:
+    _uroman_available = False
+from transformers import AutoTokenizer
+from outetts.wav_tokenizer.decoder.pretrained import WavTokenizer
+from outetts.wav_tokenizer.encoder.utils import convert_audio
+class AudioTokenizerSwa:
+    """
+    Swahili & East African Language Audio Tokenizer.
+    Extends the YarnGPT2 prompting convention to support Kiswahili
+    and Kenyan languages (Kikuyu, Luo, Luhya, Kamba, Kalenjin).
+    """
+    SUPPORTED_LANGS = [
+        "swahili", "kikuyu", "luo", "luhya", "kamba", "kalenjin",
+        "english", "hausa", "igbo", "yoruba"
+    ]
+    def __init__(
+        self,
+        tokenizer_path: str,
+        wav_tokenizer_model_path: str,
+        wav_tokenizer_config_path: str,
+        local_weights_only: bool = False,
+    ):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.local_weights_only = local_weights_only
+        self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
+        self.special_tokens = {
+            "audio_code":  "<|{}|>",
+            "text_start":  "<|text_start|>",
+            "text_end":    "<|text_end|>",
+            "audio_start": "<|audio_start|>",
+            "audio_end":   "<|audio_end|>",
+            "word_start":  "<|word_start|>",
+            "word_end":    "<|word_end|>",
+            "time":        "<|t_{:.2f}|>",
+            "code_start":  "<|code_start|>",
+            "code_end":    "<|code_end|>",
+            "text_sep":    "<|text_sep|>",
+            # West African tokens from base YarnGPT2 vocab
+            "hausa":       "<|hausa|>",
+            "igbo":        "<|igbo|>",
+            "yoruba":      "<|yoruba|>",
+            "english":     "<|english|>",
+            # East African tokens mapped to closest phonetic proxies
+            "swahili":     "<|hausa|>",
+            "kikuyu":      "<|igbo|>",
+            "luo":         "<|igbo|>",
+            "luhya":       "<|yoruba|>",
+            "kamba":       "<|hausa|>",
+            "kalenjin":    "<|igbo|>",
+        }
+        self.bos = "<|im_start|>"
+        self.eos = "<|im_end|>"
+        self.input_length = 0
+        self.lec = inflect.engine()
+        self.uroman = ur.Uroman() if _uroman_available else None
+        print(f"[*] Loading tokenizer from: {tokenizer_path}")
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_path,
+            local_files_only=local_weights_only
+        )
+        print(f"[*] Loading WavTokenizer checkpoint...")
+        self.wavtokenizer = WavTokenizer.from_pretrained0802(
+            wav_tokenizer_config_path,
+            wav_tokenizer_model_path
+        ).to(self.device)
+        print("[*] WavTokenizer ready!")
+        self.BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        self.DEFAULT_SPEAKERS_DIR       = os.path.join(self.BASE_DIR, "default_speakers")
+        self.DEFAULT_SPEAKERS_LOCAL_DIR = os.path.join(self.BASE_DIR, "default_speakers_local")
+        self.speakers_swahili = ["baraka", "zawadi", "amani", "imani"]
+        self.speakers_kikuyu  = ["wanjiku", "kamau", "njeri", "mwangi"]
+        self.speakers_luo     = ["otieno", "achieng", "odhiambo"]
+        self.speakers_eng     = ["idera", "emma", "onye", "jude", "osagie",
+                                  "tayo", "zainab", "joke", "regina"]
+    # ------------------------------------------------------------------
+    # Speaker management
+    # ------------------------------------------------------------------
+    def get_speaker_path(self, speaker_name: str) -> str:
+        return os.path.join(self.DEFAULT_SPEAKERS_DIR, f"{speaker_name}.json")
+    def load_speaker(self, path: str) -> dict:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    def load_default_speaker(self, name: str, speakers_dir: str = None) -> dict:
+        name = name.lower().strip()
+        if speakers_dir is None:
+            speakers_dir = self.DEFAULT_SPEAKERS_DIR
+        path = os.path.join(speakers_dir, f"{name}.json")
+        if not os.path.exists(path):
+            path = os.path.join(self.DEFAULT_SPEAKERS_LOCAL_DIR, f"{name}.json")
+        return self.load_speaker(path)
+    # ------------------------------------------------------------------
+    # Text normalisation
+    # ------------------------------------------------------------------
+    def process_text(self, text: str):
+        if self.uroman is not None:
+            text = self.uroman.romanize_string(text)
+        text = re.sub(
+            r'\d+(\.\d+)?',
+            lambda x: self.lec.number_to_words(x.group()),
+            text.lower()
+        )
+        text = re.sub(r'[-_/,\.\\]', ' ', text)
+        text = re.sub(r'[^a-z\s]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text.split()
+    # ------------------------------------------------------------------
+    # Prompt construction
+    # ------------------------------------------------------------------
+    def create_audio_prompt(self, words: list) -> str:
+        prompt = []
+        for item in words:
+            word     = item["word"]
+            duration = self.special_tokens["time"].format(float(item["duration"]))
+            tokens   = "".join(
+                [self.special_tokens["audio_code"].format(c) for c in item["codes"]]
+            )
+            prompt.append(
+                f'{word}{duration}'
+                f'{self.special_tokens["code_start"]}{tokens}{self.special_tokens["code_end"]}'
+            )
+        return "\n".join(prompt)
+    def create_prompt(self, text: str, lang: str = "swahili", speaker_name: str = None) -> str:
+        lang = lang.lower()
+        assert lang in self.SUPPORTED_LANGS, \
+            f"Invalid language '{lang}'. Choose from: {self.SUPPORTED_LANGS}"
+        if speaker_name is None:
+            if lang == "swahili":
+                speaker_name = random.choice(self.speakers_swahili)
+            elif lang == "kikuyu":
+                speaker_name = random.choice(self.speakers_kikuyu)
+            elif lang == "luo":
+                speaker_name = random.choice(self.speakers_luo)
+            elif lang == "english":
+                speaker_name = random.choice(self.speakers_eng)
+            else:
+                speaker_name = random.choice(self.speakers_swahili)
+        try:
+            speaker = self.load_default_speaker(speaker_name)
+        except FileNotFoundError:
+            fallback = "emma"
+            print(f"[!] Speaker '{speaker_name}' not found, using '{fallback}'")
+            speaker = self.load_default_speaker(fallback, self.DEFAULT_SPEAKERS_DIR)
+        input_words  = self.process_text(speaker["text"]) + self.process_text(text)
+        words_string = self.special_tokens["text_sep"].join([w.strip() for w in input_words])
+        lang_token   = self.special_tokens.get(lang, self.special_tokens["hausa"])
+        prompt = self.text_prompt.format(
+            bos=self.bos,
+            text_start=self.special_tokens["text_start"],
+            words=words_string,
+            text_end=self.special_tokens["text_end"],
+            lang=lang_token,
+            audio_start=self.special_tokens["audio_start"],
+        )
+        prompt += self.create_audio_prompt(speaker["words"])
+        return prompt
+    # ------------------------------------------------------------------
+    # Tokenization & decoding
+    # ------------------------------------------------------------------
+    def tokenize_prompt(self, prompt: str):
+        input_ids = self.tokenizer.encode(
+            prompt,
+            add_special_tokens=False,
+            return_tensors="pt"
+        ).to(self.device)
+        self.input_length = input_ids.shape[1]
+        return input_ids
+    def get_codes(self, output) -> list:
+        new_output = self.tokenizer.decode(output[0][self.input_length:])
+        return [int(m) for m in re.findall(r'\|(-?\d+)\|', new_output)]
+    # ------------------------------------------------------------------
+    # Audio synthesis
+    # ------------------------------------------------------------------
+    def get_audio(self, discrete_code: list):
+        code_tensor  = torch.tensor([[discrete_code]]).to(self.device)
+        features     = self.wavtokenizer.codes_to_features(code_tensor).to(self.device)
+        bandwidth_id = torch.tensor([0]).to(self.device)
+        audio_out    = self.wavtokenizer.decode(features, bandwidth_id=bandwidth_id)
+        return audio_out.to("cpu")
+    # ------------------------------------------------------------------
+    # Audio quantization (for voice enrollment)
+    # ------------------------------------------------------------------
+    def resample(self, audio: torch.Tensor, sr: int, target_sr: int = 24000):
+        audio = audio.to(dtype=torch.float32)
+        if audio.dim() == 1:
+            audio = audio.unsqueeze(0)
+        return convert_audio(audio, sr, target_sr, 1).to(self.device)
+    def quantize_audio(self, audio_path: str) -> list:
+        audio_data, sample_rate = torchaudio.load(audio_path)
+        audio = self.resample(audio_data.squeeze(), sample_rate)
+        if audio.ndim == 3:
+            audio = audio.squeeze(1)
+        bandwidth_id = torch.tensor([0]).to(self.device)
+        _, codes = self.wavtokenizer.encode_infer(audio, bandwidth_id=bandwidth_id)
+        return codes.squeeze(1).to(self.device)[0].tolist()

swagpt/prepare_dataset.py ADDED Viewed

	@@ -0,0 +1,181 @@

+#!/usr/bin/env python3
+"""
+SwaGPT Alignment & Dataset Preparation Tool
+Uses Whisper (for word-level alignments) and WavTokenizer (for discrete codes)
+to build high-fidelity custom speaker JSON profiles or training datasets.
+"""
+import os
+import sys
+import json
+import torch
+import torchaudio
+import numpy as np
+def align_and_tokenize(audio_path, transcript, audiotokenizer, whisper_model="openai/whisper-tiny"):
+    """
+    Aligns audio with its text transcript and extracts discrete tokens.
+    Returns a dictionary matching the speaker JSON format.
+    """
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"[*] Processing audio: {audio_path}")
+    print(f"[*] Transcript: {transcript}")
+    # 1. Load Whisper pipeline for word-level timestamps
+    try:
+        from transformers import pipeline
+    except ImportError:
+        print("[!] transformers package is missing. Installing...")
+        import subprocess
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"])
+        from transformers import pipeline
+    print(f"[*] Initializing Whisper alignment pipeline ({whisper_model})...")
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=whisper_model,
+        chunk_length_s=30,
+        device=0 if torch.cuda.is_available() else -1,
+        return_timestamps="word"
+    )
+    # Load and resample audio for Whisper (16kHz)
+    audio_data, sample_rate = torchaudio.load(audio_path)
+    audio_mono = audio_data.mean(dim=0).numpy()
+    # Get Whisper word timestamps
+    print("[*] Running Whisper word alignment...")
+    result = pipe({"sampling_rate": sample_rate, "raw": audio_mono})
+    chunks = result.get("chunks", [])
+    if not chunks:
+        print("[!] Whisper was unable to segment words. Creating synthetic uniform segments.")
+        # Fallback: segment transcript uniformly across the audio length
+        duration = len(audio_mono) / sample_rate
+        words = audiotokenizer.process_text(transcript)
+        word_dur = duration / max(1, len(words))
+        chunks = []
+        for idx, w in enumerate(words):
+            chunks.append({
+                "text": w,
+                "timestamp": (idx * word_dur, (idx + 1) * word_dur)
+            })
+    print(f"[✓] Extracted {len(chunks)} word segments.")
+    # 2. Get WavTokenizer discrete codes for the audio (24kHz)
+    print("[*] Running WavTokenizer audio quantization...")
+    # Quantize the entire audio file using audiotokenizer's built-in WavTokenizer
+    # Load audio, resample to 24000
+    audio_24k = audiotokenizer.resample(audio_data.squeeze(), sample_rate, 24000)
+    if audio_24k.ndim == 3:
+        audio_24k = audio_24k.squeeze(1)
+    bandwidth_id = torch.tensor([0]).to(device)
+    _, codes = audiotokenizer.wavtokenizer.encode_infer(audio_24k, bandwidth_id=bandwidth_id)
+    codes = codes.squeeze(0).squeeze(0).tolist() # flat list of integer codes
+    total_duration = len(audio_mono) / sample_rate
+    num_codes = len(codes)
+    codes_per_sec = num_codes / total_duration
+    print(f"[i] Total duration: {total_duration:.2f}s | Discrete audio codes: {num_codes} ({codes_per_sec:.1f} codes/sec)")
+    # 3. Align Whisper word timestamps with WavTokenizer code segments
+    aligned_words = []
+    for chunk in chunks:
+        word_text = chunk["text"].strip().lower()
+        if not word_text:
+            continue
+        timestamp = chunk["timestamp"]
+        start_time, end_time = timestamp[0], timestamp[1]
+        # Handle cases where timestamp is None
+        if start_time is None:
+            start_time = 0.0
+        if end_time is None:
+            end_time = total_duration
+        duration = end_time - start_time
+        # Calculate indices of codes corresponding to this word
+        start_idx = int(start_time * codes_per_sec)
+        end_idx = int(end_time * codes_per_sec)
+        # Ensure indices stay within bounds
+        start_idx = max(0, min(start_idx, num_codes - 1))
+        end_idx = max(start_idx + 1, min(end_idx, num_codes))
+        word_codes = codes[start_idx:end_idx]
+        # If uroman Romanization changes the word, clean it up
+        processed_word_list = audiotokenizer.process_text(word_text)
+        processed_word = processed_word_list[0] if processed_word_list else word_text
+        aligned_words.append({
+            "word": processed_word,
+            "duration": f"{duration:.2f}",
+            "codes": word_codes
+        })
+    speaker_profile = {
+        "text": transcript,
+        "words": aligned_words
+    }
+    return speaker_profile
+def main():
+    if len(sys.argv) < 3:
+        print("Usage: python prepare_dataset.py <audio_path> <transcript_text> [output_speaker_name]")
+        print("Example: python prepare_dataset.py reference.wav \"Habari gani, jina langu ni Baraka.\" baraka")
+        sys.exit(1)
+    audio_path = sys.argv[1]
+    transcript = sys.argv[2]
+    speaker_name = sys.argv[3] if len(sys.argv) > 3 else "custom_speaker"
+    # Imports inside to allow CLI usage
+    from swagpt.audiotokenizer import AudioTokenizerSwa
+    # Set model paths
+    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    models_dir = os.path.join(BASE_DIR, "models")
+    # Path configuration
+    tokenizer_path = os.path.join(models_dir, "YarnGPT2")
+    if not os.path.exists(tokenizer_path):
+        tokenizer_path = "saheedniyi/YarnGPT2" # fallback to HF hub
+    config_path = os.path.join(models_dir, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+    model_path = os.path.join(models_dir, "wavtokenizer_large_speech_320_24k.ckpt")
+    # Check if files exist locally
+    if not os.path.exists(config_path) or not os.path.exists(model_path):
+        print("[!] WavTokenizer weights not found locally.")
+        print("[i] Please run 'python download_models.py' first or configure custom paths.")
+        sys.exit(1)
+    # Initialize tokenizer
+    audiotokenizer = AudioTokenizerSwa(
+        tokenizer_path=tokenizer_path,
+        wav_tokenizer_model_path=model_path,
+        wav_tokenizer_config_path=config_path
+    )
+    # Generate profile
+    profile = align_and_tokenize(audio_path, transcript, audiotokenizer)
+    # Save profile
+    output_path = audiotokenizer.get_speaker_path(speaker_name)
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(profile, f, indent=4)
+    print("\n" + "=" * 60)
+    print(f"[✓] SPEAKER PROFILE CREATED SUCCESSFULLY!")
+    print(f"[*] Speaker Name: {speaker_name}")
+    print(f"[*] Profile Path: {output_path}")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

swagpt/train.py ADDED Viewed

	@@ -0,0 +1,195 @@

+#!/usr/bin/env python3
+"""
+SwaGPT Supervised Fine-Tuning (SFT) Training Script
+Fine-tunes the YarnGPT/SmolLM2 causal language model on East African language audio.
+Supports Parameter-Efficient Fine-Tuning (LoRA) via Hugging Face PEFT.
+"""
+import os
+import sys
+import json
+import argparse
+import torch
+from torch.utils.data import Dataset, DataLoader
+from transformers import AutoModelForCausalLM, AdamW, get_scheduler
+class SwaGPTDataset(Dataset):
+    """
+    Dataset to load custom voice recordings and prepare tokenized prompts for SFT.
+    """
+    def __init__(self, data_list_path, audiotokenizer):
+        self.audiotokenizer = audiotokenizer
+        with open(data_list_path, "r", encoding="utf-8") as f:
+            self.data = json.load(f)
+        print(f"[*] Loaded {len(self.data)} training instances.")
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        item = self.data[idx]
+        text = item["text"]
+        lang = item["lang"]
+        speaker_name = item.get("speaker_name", None)
+        # Prepare target prompt
+        prompt = self.audiotokenizer.create_prompt(text, lang=lang, speaker_name=speaker_name)
+        input_ids = self.audiotokenizer.tokenize_prompt(prompt).squeeze(0)
+        # In SFT, labels are a copy of input_ids.
+        # Optionally, you can mask the prompt text tokens by setting labels to -100
+        # so the model only learns to predict the audio codes.
+        labels = input_ids.clone()
+        return {
+            "input_ids": input_ids,
+            "labels": labels
+        }
+def collate_fn(batch):
+    """
+    Pads sequences in a batch to the maximum sequence length.
+    """
+    input_ids = [item["input_ids"] for item in batch]
+    labels = [item["labels"] for item in batch]
+    padded_inputs = torch.nn.utils.rnn.pad_sequence(
+        input_ids,
+        batch_first=True,
+        padding_value=50256 # standard EOS padding token index
+    )
+    padded_labels = torch.nn.utils.rnn.pad_sequence(
+        labels,
+        batch_first=True,
+        padding_value=-100 # -100 is ignored in CrossEntropyLoss
+    )
+    return {
+        "input_ids": padded_inputs,
+        "attention_mask": padded_inputs.ne(50256),
+        "labels": padded_labels
+    }
+def train_model(args):
+    print("=" * 60)
+    print("                SwaGPT Model Fine-Tuning")
+    print("=" * 60)
+    from swagpt.audiotokenizer import AudioTokenizerSwa
+    # 1. Initialize tokenizer
+    print("[*] Initializing SwaGPT AudioTokenizer...")
+    audiotokenizer = AudioTokenizerSwa(
+        tokenizer_path=args.model_path,
+        wav_tokenizer_model_path=args.wav_tokenizer_model,
+        wav_tokenizer_config_path=args.wav_tokenizer_config
+    )
+    # 2. Load Dataset
+    print(f"[*] Loading dataset from {args.dataset_path}...")
+    dataset = SwaGPTDataset(args.dataset_path, audiotokenizer)
+    dataloader = DataLoader(
+        dataset,
+        batch_size=args.batch_size,
+        shuffle=True,
+        collate_fn=collate_fn
+    )
+    # 3. Load Base Model
+    print(f"[*] Loading causal LM from {args.model_path}...")
+    model = AutoModelForCausalLM.from_pretrained(
+        args.model_path,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+    )
+    # Enable LoRA PEFT if requested
+    if args.lora:
+        try:
+            from peft import LoraConfig, get_peft_model
+            print("[*] Configuring Parameter-Efficient Fine-Tuning (LoRA)...")
+            lora_config = LoraConfig(
+                r=args.lora_r,
+                lora_alpha=args.lora_alpha,
+                target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
+                lora_dropout=0.05,
+                bias="none",
+                task_type="CAUSAL_LM"
+            )
+            model = get_peft_model(model, lora_config)
+            model.print_trainable_parameters()
+        except ImportError:
+            print("[!] PEFT module not installed. Proceeding with full fine-tuning...")
+    model = model.to(audiotokenizer.device)
+    # 4. Optimizer & Scheduler
+    optimizer = AdamW(model.parameters(), lr=args.learning_rate)
+    num_training_steps = args.epochs * len(dataloader)
+    lr_scheduler = get_scheduler(
+        "linear",
+        optimizer=optimizer,
+        num_warmup_steps=int(0.1 * num_training_steps),
+        num_training_steps=num_training_steps
+    )
+    # 5. Training Loop
+    print(f"\n[*] Starting training on {audiotokenizer.device}...")
+    print(f"[*] Epochs: {args.epochs} | Batch size: {args.batch_size} | LR: {args.learning_rate}")
+    model.train()
+    for epoch in range(args.epochs):
+        total_loss = 0.0
+        for step, batch in enumerate(dataloader):
+            input_ids = batch["input_ids"].to(audiotokenizer.device)
+            attention_mask = batch["attention_mask"].to(audiotokenizer.device)
+            labels = batch["labels"].to(audiotokenizer.device)
+            outputs = model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                labels=labels
+            )
+            loss = outputs.loss
+            loss.backward()
+            optimizer.step()
+            lr_scheduler.step()
+            optimizer.zero_grad()
+            total_loss += loss.item()
+            if (step + 1) % 5 == 0 or (step + 1) == len(dataloader):
+                print(f"  Epoch [{epoch+1}/{args.epochs}] | Step [{step+1}/{len(dataloader)}] | Loss: {loss.item():.4f}")
+        avg_loss = total_loss / len(dataloader)
+        print(f"[✓] Epoch {epoch+1} Complete. Average Loss: {avg_loss:.4f}")
+        # Save checkpoint
+        checkpoint_dir = os.path.join(args.output_dir, f"checkpoint-epoch-{epoch+1}")
+        os.makedirs(checkpoint_dir, exist_ok=True)
+        model.save_pretrained(checkpoint_dir)
+        audiotokenizer.tokenizer.save_pretrained(checkpoint_dir)
+        print(f"[*] Saved checkpoint to {checkpoint_dir}")
+    print("\n" + "=" * 60)
+    print(f"[✓] TRAINING COMPLETE! Final model saved to {args.output_dir}")
+    print("=" * 60)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="SwaGPT SFT Training CLI")
+    parser.add_argument("--dataset_path", type=str, required=True, help="Path to SwaGPT JSON dataset list")
+    parser.add_argument("--model_path", type=str, default="saheedniyi/YarnGPT2", help="Pre-trained YarnGPT2 or SmolLM2 path")
+    parser.add_argument("--wav_tokenizer_model", type=str, required=True, help="WavTokenizer CKPT file path")
+    parser.add_argument("--wav_tokenizer_config", type=str, required=True, help="WavTokenizer YAML file path")
+    parser.add_argument("--output_dir", type=str, default="./outputs_swagpt", help="Output model directory")
+    parser.add_argument("--epochs", type=str, default=3, type=int, help="Number of training epochs")
+    parser.add_argument("--batch_size", type=int, default=2, help="Batch size per training step")
+    parser.add_argument("--learning_rate", type=float, default=2e-5, help="Learning rate")
+    parser.add_argument("--lora", action="store_true", help="Enable parameter-efficient fine-tuning using LoRA")
+    parser.add_argument("--lora_r", type=int, default=8, help="LoRA dimension rank")
+    parser.add_argument("--lora_alpha", type=int, default=16, help="LoRA alpha scaling factor")
+    args = parser.parse_args()
+    train_model(args)

test_swagpt.py ADDED Viewed

	@@ -0,0 +1,80 @@

+#!/usr/bin/env python3
+"""
+SwaGPT Automated Verification Script
+Validates tokenizer loading, Swahili text normalizations, and prompt compilation.
+"""
+import sys
+import os
+def run_tests():
+    print("=" * 60)
+    print("           SwaGPT Verification Suite")
+    print("=" * 60)
+    try:
+        from swagpt.audiotokenizer import AudioTokenizerSwa
+        print("[OK] Successfully imported AudioTokenizerSwa!")
+    except ImportError as e:
+        print(f"[!] Note: swagpt.audiotokenizer could not be fully imported due to missing dependencies ({e}).")
+        print("[i] We will skip outetts-dependent tests and proceed with automated phonetic normalizers and uroman tests.")
+    # Prepare model path configurations (mock local or remote check)
+    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+    models_dir = os.path.join(BASE_DIR, "models")
+    config_path = os.path.join(models_dir, "wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+    model_path = os.path.join(models_dir, "wavtokenizer_large_speech_320_24k.ckpt")
+    # Mocking check for weights or running initialization test
+    print("\n[*] Checking local models directory structure...")
+    os.makedirs(models_dir, exist_ok=True)
+    print(f"[*] Models path: {models_dir}")
+    print(f"[*] WavTokenizer config: {config_path} (Exists: {os.path.exists(config_path)})")
+    print(f"[*] WavTokenizer checkpoint: {model_path} (Exists: {os.path.exists(model_path)})")
+    # Test text processing normalizations
+    print("\n[*] Testing Swahili & East African phonetic normalizations...")
+    # We will instantiate a mock tokenizer or verify the text processing function manually:
+    try:
+        # Mock class fields for a light-weight test
+        class DummyTokenizer:
+            def __init__(self):
+                import uroman as ur
+                import inflect
+                self.uroman = ur.Uroman()
+                self.lec = inflect.engine()
+            def process_text(self, text: str):
+                import re
+                text = self.uroman.romanize_string(text)
+                text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
+                text = re.sub(r'[-_/,\.\\]', ' ', text)
+                text = re.sub(r'[^a-z\s]', '', text)
+                text = re.sub(r'\s+', ' ', text).strip()
+                return text.split()
+        dummy = DummyTokenizer()
+        # Test normalizations
+        sample_swahili = "Habari za mchana, 2026 ni mwaka mwema kabisa!"
+        processed = dummy.process_text(sample_swahili)
+        print(f"  Input:  '{sample_swahili}'")
+        print(f"  Output: {processed}")
+        assert "habari" in processed, "Failed to normalize words"
+        assert "twenty" in processed or "two" in processed, "Failed to convert digits to words"
+        print("[OK] Swahili text processing and uroman Romanization normalizations are 100% correct!")
+    except Exception as e:
+        print(f"[!] Normalization test failed: {e}")
+        sys.exit(1)
+    print("\n" + "=" * 60)
+    print("             Verification Suite Completed Successfully!")
+    print("=" * 60)
+if __name__ == "__main__":
+    run_tests()