tts

Running

App Files Files Community

D3vShoaib commited on Jan 15

Commit

ed16331

1 Parent(s): fa6c114

optemized for CPU inspired from hadadxyz/pocket-tts-hf-cpu-optimized

Browse files

Files changed (6) hide show

.gitattributes +0 -35
Dockerfile.txt +10 -0
README.md +6 -6
app.py +386 -50
packages.txt +0 -1
requirements.txt +0 -66

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+FROM hadadrjt/pocket-tts:hf
+WORKDIR /app
+COPY app.py .

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Pocket TTS
 emoji: ⚡
 colorFrom: green
 colorTo: green
-sdk: gradio
-sdk_version: 6.3.0
-app_file: app.py
 pinned: true
-short_description: A 100M text-to-speech (TTS) by Kyutai-Labs
 ---
 # PocketTTS Gradio Interface
@@ -66,4 +66,4 @@ The app supports the following built-in voices:
 Built by [D3vShoaib](https://github.com/D3vShoaib).
-_Note: This is an independent demonstration of the PocketTTS model and is not officially affiliated with Kyutai Labs._

 ---
+title: Pocket TTS (CPU)
 emoji: ⚡
+short_description: A 100M parameters text-to-speech (TTS) model by Kyutai-Labs
+license: apache-2.0
 colorFrom: green
 colorTo: green
+sdk: docker
+app_port: 7860
 pinned: true
 ---
 # PocketTTS Gradio Interface
 Built by [D3vShoaib](https://github.com/D3vShoaib).
+_Note: This is an independent demonstration of the PocketTTS model and is not officially affiliated with Kyutai Labs._

app.py CHANGED Viewed

@@ -1,56 +1,266 @@
 import gradio as gr
 import numpy as np
 import os
 from huggingface_hub import login
 from pocket_tts import TTSModel
 # HF Token for gated models in Spaces
 hf_token = os.getenv("HF_TOKEN")
 if hf_token:
     print("HF_TOKEN found, logging in...")
     login(token=hf_token)
-# Load model once at startup
-print("Loading PocketTTS model...")
-model = TTSModel.load_model()
-print("Model loaded.")
 VOICES = ['alba', 'marius', 'javert', 'jean', 'fantine', 'cosette', 'eponine', 'azelma']
-import traceback
-def generate_speech(text, voice_mode, voice_dropdown, voice_upload):
-    if not text:
-        return None
     try:
-        if voice_mode == "Kyutai Voices":
-            voice_path = voice_dropdown
         else:
-            if not voice_upload:
-                raise gr.Error("Please upload an audio file for voice cloning.")
-            voice_path = voice_upload
-        print(f"Generating with voice: {voice_path}")
-        try:
-            voice_state = model.get_state_for_audio_prompt(voice_path)
-            audio = model.generate_audio(voice_state, text)
-        except Exception as e:
-            full_error = traceback.format_exc()
-            print(f"Error in model processing: {full_error}")
-            raise gr.Error(f"Model error: {str(e)}")
-        # Convert to 16-bit PCM to avoid Gradio warnings
-        audio_np = audio.cpu().numpy()
-        audio_int16 = (audio_np * 32767).astype(np.int16)
-        return (model.sample_rate, audio_int16)
     except gr.Error:
         raise
     except Exception as e:
         full_error = traceback.format_exc()
         print(f"Unexpected error: {full_error}")
         raise gr.Error(f"An unexpected error occurred: {str(e)}")
 # Load custom theme with fallback
@@ -216,7 +426,8 @@ with gr.Blocks() as demo:
             text_input = gr.Textbox(
                 label="Text to Speak",
                 placeholder="Enter text here...",
-                lines=8,
                 elem_id="text-input"
             )
             voice_mode = gr.Radio(
@@ -240,9 +451,64 @@ with gr.Blocks() as demo:
                     type="filepath",
                     elem_id="voice-upload"
                 )
             with gr.Row():
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                 generate_btn = gr.Button("⚡ Generate", variant="primary")
         with gr.Column(scale=1):
             audio_output = gr.Audio(
@@ -250,22 +516,19 @@ with gr.Blocks() as demo:
                 autoplay=True,
                 elem_id="audio-output"
             )
-            gr.Markdown("""
-                ### 🚀 Performance
-                - **Latency**: ~200ms first chunk (local install)
-                - **Speed**: 6x real-time
-                - **Engine**: CPU Optimized
-                - **Note**: Demo limited by Gradio hosting
-            """)
-    gr.Examples(
-        examples=[
-            ["Hello! This is a test of the pocket-tts system. It's incredibly fast and runs right on your CPU.", "Kyutai Voices", "alba", None],
-            ["The quick brown fox jumps over the lazy dog.", "Kyutai Voices", "marius", None],
-            ["Would you like some tea? It's freshly brewed.", "Kyutai Voices", "javert", None]
-        ],
-        inputs=[text_input, voice_mode, voice_select, voice_upload],
-    )
     gr.HTML("""
         <div class="disclaimer">
@@ -301,22 +564,95 @@ with gr.Blocks() as demo:
         outputs=[standard_voice_col, cloning_voice_col]
     )
     # Event handlers
     generate_btn.click(
         fn=generate_speech,
-        inputs=[text_input, voice_mode, voice_select, voice_upload],
         outputs=audio_output
     )
     text_input.submit(
         fn=generate_speech,
-        inputs=[text_input, voice_mode, voice_select, voice_upload],
         outputs=audio_output
     )
     clear_btn.click(
-        fn=lambda: ("", "Kyutai Voices", "alba", None, None),
-        outputs=[text_input, voice_mode, voice_select, voice_upload, audio_output]
     )
 if __name__ == "__main__":

 import gradio as gr
 import numpy as np
 import os
+import time
+import torch
+import tempfile
+import threading
+import scipy.io.wavfile
+import traceback
 from huggingface_hub import login
 from pocket_tts import TTSModel
+# Configure PyTorch threading behavior for CPU optimization
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
 # HF Token for gated models in Spaces
 hf_token = os.getenv("HF_TOKEN")
 if hf_token:
     print("HF_TOKEN found, logging in...")
     login(token=hf_token)
 VOICES = ['alba', 'marius', 'javert', 'jean', 'fantine', 'cosette', 'eponine', 'azelma']
+# Default configuration values
+DEFAULT_VOICE = "alba"
+DEFAULT_MODEL_VARIANT = "b6369a24"
+DEFAULT_TEMPERATURE = 0.8
+DEFAULT_LSD_DECODE_STEPS = 1
+DEFAULT_EOS_THRESHOLD = -4.0
+DEFAULT_NOISE_CLAMP = 0.0
+DEFAULT_FRAMES_AFTER_EOS = 10
+MAXIMUM_INPUT_LENGTH = 1000
+TEMPORARY_FILE_LIFETIME_SECONDS = 7200  # 2 hours
+generation_state_lock = threading.Lock()
+is_currently_generating = False
+stop_generation_requested = False
+temporary_files_registry = {}
+temporary_files_lock = threading.Lock()
+class TextToSpeechManager:
+    """
+    Manages TTS model lifecycle and speech generation operations.
+    Implements lazy loading and caching strategies for performance.
+    """
+    def __init__(self):
+        self.loaded_model = None
+        self.current_configuration = {}
+        self.voice_state_cache = {}
+    def load_or_get_model(
+        self,
+        model_variant,
+        temperature,
+        lsd_decode_steps,
+        noise_clamp,
+        eos_threshold
+    ):
+        """Load a TTS model or return cached instance if configuration matches."""
+        processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
+        processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
+        processed_lsd_steps = int(lsd_decode_steps) if lsd_decode_steps is not None else DEFAULT_LSD_DECODE_STEPS
+        processed_noise_clamp = float(noise_clamp) if noise_clamp and float(noise_clamp) > 0 else None
+        processed_eos_threshold = float(eos_threshold) if eos_threshold is not None else DEFAULT_EOS_THRESHOLD
+        requested_configuration = {
+            "variant": processed_variant,
+            "temp": processed_temperature,
+            "lsd_decode_steps": processed_lsd_steps,
+            "noise_clamp": processed_noise_clamp,
+            "eos_threshold": processed_eos_threshold
+        }
+        if self.loaded_model is None or self.current_configuration != requested_configuration:
+            print(f"Loading model with config: {requested_configuration}")
+            self.loaded_model = TTSModel.load_model(**requested_configuration)
+            self.current_configuration = requested_configuration
+            self.voice_state_cache = {}
+            print("Model loaded.")
+        return self.loaded_model
+    def get_voice_state_for_preset(self, voice_name):
+        """Get or compute voice state for a preset voice with caching."""
+        validated_voice = voice_name if voice_name in VOICES else DEFAULT_VOICE
+        if validated_voice not in self.voice_state_cache:
+            self.voice_state_cache[validated_voice] = self.loaded_model.get_state_for_audio_prompt(
+                audio_conditioning=validated_voice,
+                truncate=False
+            )
+        return self.voice_state_cache[validated_voice]
+    def get_voice_state_for_clone(self, audio_file_path):
+        """Compute voice state from uploaded audio file for voice cloning."""
+        return self.loaded_model.get_state_for_audio_prompt(
+            audio_conditioning=audio_file_path,
+            truncate=False
+        )
+    def generate_audio(self, text_content, voice_state, frames_after_eos, enable_custom_frames):
+        """Generate speech audio from text using the specified voice state."""
+        processed_frames = int(frames_after_eos) if enable_custom_frames else None
+        return self.loaded_model.generate_audio(
+            model_state=voice_state,
+            text_to_generate=text_content,
+            frames_after_eos=processed_frames,
+            copy_state=True
+        )
+    def save_audio_to_file(self, audio_tensor):
+        """Save generated audio tensor to a temporary WAV file."""
+        audio_numpy_data = audio_tensor.numpy()
+        audio_sample_rate = self.loaded_model.sample_rate
+        output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        scipy.io.wavfile.write(output_file.name, audio_sample_rate, audio_numpy_data)
+        with temporary_files_lock:
+            temporary_files_registry[output_file.name] = time.time()
+        return output_file.name
+# Create global TTS manager instance
+tts_manager = TextToSpeechManager()
+# Load model at startup with default parameters
+print("Loading PocketTTS model with default parameters...")
+tts_manager.load_or_get_model(
+    DEFAULT_MODEL_VARIANT,
+    DEFAULT_TEMPERATURE,
+    DEFAULT_LSD_DECODE_STEPS,
+    DEFAULT_NOISE_CLAMP,
+    DEFAULT_EOS_THRESHOLD
+)
+print("Model ready!")
+def cleanup_expired_temporary_files():
+    """Remove temporary files that have exceeded their lifetime."""
+    current_timestamp = time.time()
+    expired_files = []
+    with temporary_files_lock:
+        for file_path, creation_timestamp in list(temporary_files_registry.items()):
+            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
+                expired_files.append(file_path)
+        for file_path in expired_files:
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+                del temporary_files_registry[file_path]
+            except Exception:
+                pass
+def validate_text_input(text_content):
+    """Validate and clean text input for speech generation."""
+    if not text_content or not isinstance(text_content, str):
+        return False, ""
+    cleaned_text = text_content.strip()
+    if not cleaned_text:
+        return False, ""
+    if len(cleaned_text) > MAXIMUM_INPUT_LENGTH:
+        return False, f"Input exceeds maximum length of {MAXIMUM_INPUT_LENGTH} characters."
+    return True, cleaned_text
+def request_generation_stop():
+    """Signal a request to stop the current generation."""
+    global stop_generation_requested
+    stop_generation_requested = True
+    return gr.update(interactive=False)
+# Speech generation function
+def generate_speech(
+    text,
+    voice_mode,
+    voice_dropdown,
+    voice_upload,
+    temperature,
+    lsd_decode_steps,
+    noise_clamp,
+    eos_threshold,
+    frames_after_eos,
+    enable_custom_frames
+):
+    """Perform the complete speech generation workflow with thread safety."""
+    global is_currently_generating, stop_generation_requested
+    cleanup_expired_temporary_files()
+    is_valid, validation_result = validate_text_input(text)
+    if not is_valid:
+        if validation_result:
+            raise gr.Error(validation_result)
+        raise gr.Error("Please enter valid text to generate speech.")
+    if voice_mode == "Voice Cloning" and not voice_upload:
+        raise gr.Error("Please upload an audio file for voice cloning.")
+    with generation_state_lock:
+        if is_currently_generating:
+            raise gr.Error("A generation is already in progress. Please wait.")
+        is_currently_generating = True
+        stop_generation_requested = False
     try:
+        tts_manager.load_or_get_model(
+            DEFAULT_MODEL_VARIANT,
+            temperature,
+            lsd_decode_steps,
+            noise_clamp,
+            eos_threshold
+        )
+        if stop_generation_requested:
+            return None
+        if voice_mode == "Voice Cloning":
+            voice_state = tts_manager.get_voice_state_for_clone(voice_upload)
         else:
+            voice_state = tts_manager.get_voice_state_for_preset(voice_dropdown)
+        if stop_generation_requested:
+            return None
+        print(f"Generating with voice mode: {voice_mode}, temp: {temperature}, lsd_steps: {lsd_decode_steps}")
+        generated_audio = tts_manager.generate_audio(
+            validation_result,
+            voice_state,
+            frames_after_eos,
+            enable_custom_frames
+        )
+        if stop_generation_requested:
+            return None
+        output_file_path = tts_manager.save_audio_to_file(generated_audio)
+        return output_file_path
     except gr.Error:
         raise
     except Exception as e:
         full_error = traceback.format_exc()
         print(f"Unexpected error: {full_error}")
         raise gr.Error(f"An unexpected error occurred: {str(e)}")
+    finally:
+        with generation_state_lock:
+            is_currently_generating = False
+            stop_generation_requested = False
 # Load custom theme with fallback
             text_input = gr.Textbox(
                 label="Text to Speak",
                 placeholder="Enter text here...",
+                value="Hello! Welcome to Pocket TTS. This lightweight text to speech model runs entirely on your CPU. Try changing the voice or adjusting the generation parameters below.",
+                lines=9,
                 elem_id="text-input"
             )
             voice_mode = gr.Radio(
                     type="filepath",
                     elem_id="voice-upload"
                 )
+            # Generation Parameters Accordion
+            with gr.Accordion("⚙️ Generation Parameters", open=False):
+                with gr.Row():
+                    temperature_slider = gr.Slider(
+                        label="Temperature",
+                        minimum=0.1,
+                        maximum=2.0,
+                        step=0.05,
+                        value=DEFAULT_TEMPERATURE,
+                        info="Higher values produce more expressive speech"
+                    )
+                    lsd_decode_steps_slider = gr.Slider(
+                        label="LSD Decode Steps",
+                        minimum=1,
+                        maximum=20,
+                        step=1,
+                        value=DEFAULT_LSD_DECODE_STEPS,
+                        info="More steps may improve quality but slower"
+                    )
+                with gr.Row():
+                    noise_clamp_slider = gr.Slider(
+                        label="Noise Clamp",
+                        minimum=0.0,
+                        maximum=2.0,
+                        step=0.05,
+                        value=DEFAULT_NOISE_CLAMP,
+                        info="Maximum noise sampling value (0 = disabled)"
+                    )
+                    eos_threshold_slider = gr.Slider(
+                        label="End of Sequence Threshold",
+                        minimum=-10.0,
+                        maximum=0.0,
+                        step=0.25,
+                        value=DEFAULT_EOS_THRESHOLD,
+                        info="Smaller values cause earlier completion"
+                    )
+                with gr.Row():
+                    enable_custom_frames_checkbox = gr.Checkbox(
+                        label="Enable Custom Frames After EOS",
+                        value=False,
+                        info="Manually control post-EOS frame generation"
+                    )
+                    frames_after_eos_slider = gr.Slider(
+                        label="Frames After EOS",
+                        minimum=0,
+                        maximum=100,
+                        step=1,
+                        value=DEFAULT_FRAMES_AFTER_EOS,
+                        info="Additional frames after end-of-sequence (80ms per frame)"
+                    )
             with gr.Row():
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
                 generate_btn = gr.Button("⚡ Generate", variant="primary")
+                stop_btn = gr.Button("🔴 Stop", variant="stop", visible=False)
         with gr.Column(scale=1):
             audio_output = gr.Audio(
                 autoplay=True,
                 elem_id="audio-output"
             )
+            gr.Examples(
+                examples=[
+                    ["On Tuesday, the seventeenth of October, two thousand twenty-five, at exactly six forty-five in the morning, the outdoor temperature dropped to twelve point eight degrees Celsius. The forecast predicts a high of twenty-two degrees by noon.", "alba"],
+                    ["Welcome to Station Forty-Seven. Your train to Platform Nineteen B will arrive in approximately fifteen minutes. Please have your tickets ready for inspection.", "marius"],
+                    ["You dare defy me? I have spent twenty long years hunting you across every shadow and every corner of this wretched kingdom. There is no escape. Justice will find you, and when it does, you will kneel before me and beg for mercy that will never come!", "javert"],
+                    ["Flight Seven Ninety-Two to London Heathrow is now boarding at Gate Twenty-Three A. Final call for passengers Smith and Johnson. Departure is scheduled for fourteen thirty hours.", "jean"],
+                    ["Our quarterly revenue reached four point seven million dollars, up eighteen percent from last year. The board meeting is scheduled for the twenty-fifth of November at two fifteen in the afternoon.", "fantine"],
+                    ["The recipe calls for three hundred fifty grams of flour, two hundred milliliters of milk, and one point five teaspoons of vanilla extract. Bake at one hundred eighty degrees for forty-five minutes.", "cosette"],
+                    ["Chapter Fourteen, Page Two Hundred Thirty-Seven. The mysterious traveler arrived at the inn precisely at midnight. He carried nothing but a worn leather satchel and spoke with an accent no one could place.", "eponine"],
+                    ["Exercise routine: Run five kilometers in under thirty minutes. Complete three sets of fifteen push-ups. Rest for ninety seconds between each set. Cool down with ten minutes of stretching.", "azelma"]
+                ],
+                inputs=[text_input, voice_select],
+            )
     gr.HTML("""
         <div class="disclaimer">
         outputs=[standard_voice_col, cloning_voice_col]
     )
+    # Define generation inputs list
+    generation_inputs = [
+        text_input,
+        voice_mode,
+        voice_select,
+        voice_upload,
+        temperature_slider,
+        lsd_decode_steps_slider,
+        noise_clamp_slider,
+        eos_threshold_slider,
+        frames_after_eos_slider,
+        enable_custom_frames_checkbox
+    ]
+    # UI state management functions
+    def switch_to_generating_state():
+        return (
+            gr.update(visible=False),  # Hide generate button
+            gr.update(visible=True, interactive=True)  # Show stop button
+        )
+    def switch_to_idle_state():
+        return (
+            gr.update(visible=True),   # Show generate button
+            gr.update(visible=False)   # Hide stop button
+        )
     # Event handlers
     generate_btn.click(
+        fn=switch_to_generating_state,
+        outputs=[generate_btn, stop_btn]
+    ).then(
         fn=generate_speech,
+        inputs=generation_inputs,
         outputs=audio_output
+    ).then(
+        fn=switch_to_idle_state,
+        outputs=[generate_btn, stop_btn]
     )
     text_input.submit(
+        fn=switch_to_generating_state,
+        outputs=[generate_btn, stop_btn]
+    ).then(
         fn=generate_speech,
+        inputs=generation_inputs,
         outputs=audio_output
+    ).then(
+        fn=switch_to_idle_state,
+        outputs=[generate_btn, stop_btn]
+    )
+    # Stop button handler
+    stop_btn.click(
+        fn=request_generation_stop,
+        outputs=[stop_btn]
     )
+    # Clear button handler - also reset generation parameters
+    def perform_clear_action():
+        return (
+            "",                         # text_input
+            "Kyutai Voices",           # voice_mode
+            "alba",                     # voice_select
+            None,                       # voice_upload
+            None,                       # audio_output
+            DEFAULT_TEMPERATURE,        # temperature_slider
+            DEFAULT_LSD_DECODE_STEPS,   # lsd_decode_steps_slider
+            DEFAULT_NOISE_CLAMP,        # noise_clamp_slider
+            DEFAULT_EOS_THRESHOLD,      # eos_threshold_slider
+            DEFAULT_FRAMES_AFTER_EOS,   # frames_after_eos_slider
+            False                       # enable_custom_frames_checkbox
+        )
     clear_btn.click(
+        fn=perform_clear_action,
+        outputs=[
+            text_input,
+            voice_mode,
+            voice_select,
+            voice_upload,
+            audio_output,
+            temperature_slider,
+            lsd_decode_steps_slider,
+            noise_clamp_slider,
+            eos_threshold_slider,
+            frames_after_eos_slider,
+            enable_custom_frames_checkbox
+        ]
     )
 if __name__ == "__main__":

packages.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- ffmpeg

requirements.txt DELETED Viewed

@@ -1,66 +0,0 @@
-aiofiles==24.1.0
-annotated-doc==0.0.4
-annotated-types==0.7.0
-anyio==4.12.1
-beartype==0.22.9
-brotli==1.2.0
-certifi==2026.1.4
-charset-normalizer==3.4.4
-click==8.3.1
-colorama==0.4.6
-einops==0.8.1
-exceptiongroup==1.3.1
-fastapi==0.128.0
-ffmpy==1.0.0
-filelock==3.20.3
-fsspec==2026.1.0
-gradio==6.3.0
-gradio_client==2.0.3
-groovy==0.1.2
-h11==0.16.0
-hf-xet==1.2.0
-httpcore==1.0.9
-httpx==0.28.1
-huggingface_hub==1.3.1
-idna==3.11
-Jinja2==3.1.6
-markdown-it-py==4.0.0
-MarkupSafe==3.0.3
-mdurl==0.1.2
-mpmath==1.3.0
-networkx==3.4.2
-numpy==2.2.6
-orjson==3.11.5
-packaging==25.0
-pandas==2.3.3
-pillow==12.1.0
-pocket-tts==1.0.1
-pydantic==2.12.5
-pydantic_core==2.41.5
-pydub==0.25.1
-Pygments==2.19.2
-python-dateutil==2.9.0.post0
-python-multipart==0.0.21
-pytz==2025.2
-PyYAML==6.0.3
-requests==2.32.5
-rich==14.2.0
-safehttpx==0.1.7
-safetensors==0.7.0
-scipy==1.15.3
-semantic-version==2.10.0
-sentencepiece==0.2.1
-shellingham==1.5.4
-six==1.17.0
-starlette==0.50.0
-sympy==1.14.0
-tomlkit==0.13.3
-torch==2.9.1
-tqdm==4.67.1
-typer==0.21.1
-typer-slim==0.21.1
-typing-inspection==0.4.2
-typing_extensions==4.15.0
-tzdata==2025.3
-urllib3==2.6.3
-uvicorn==0.40.0