KaniTTS

Sleeping

App Files Files Community

jblast94 commited on Nov 8

Commit

24c936f

verified ·

1 Parent(s): 6fd45d2

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -26

app.py CHANGED Viewed

@@ -5,8 +5,40 @@ import os
 # You must use the exact same model name as your repo
 MODEL_ID = "nineninesix/Kani-TTS-370m"
 @spaces.GPU
 def generate_speech(text: str, model_choice: str, speaker_display: str):
     if not text.strip():
         return "Please enter text for speech generation.", None
@@ -14,13 +46,18 @@ def generate_speech(text: str, model_choice: str, speaker_display: str):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {device}")
-        # --- This is the key part to load a specific model ---
         if model_choice not in MODELS:
             return f"Model '{model_choice}' not found.", None
         selected_model = MODELS[model_choice]
-        # --- This part handles speakers ---
         cfg = selected_model[1]  # Model config
         speaker_map = cfg.get('speaker_id', {}) if cfg is not None else {}
         if speaker_display and speaker_map:
@@ -31,7 +68,6 @@ def generate_speech(text: str, model_choice: str, speaker_display: str):
         print(f"Generating speech with {model_choice}...")
         # --- Use the specific part of the model for generation ---
-        model_to_generate = selected_model[0]
         audio, _, time_report = model_to_generate.run_model(
             text=text,
             speaker_id=speaker_id,
@@ -45,25 +81,7 @@ def generate_speech(text: str, model_choice: str, speaker_display: str):
         return (sample_rate, audio), time_report
-def load_models():
-    global MODELS
-    if not MODELS:
-        print("Loading models into GPU memory...")
-        from transformers import AutoModel
-        model_path = MODEL_ID
-        # Load both the main model and its config
-        model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
-        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
-        MODELS = {
-            "Kani TTS 370M": (model, config)
-        }
-        print(f"Models loaded. Available speakers: {list(config.speaker_id.keys()) if config.speaker_id else []}")
-        return MODELS
-# --- Gradio interface setup ---
 MODELS = load_models()
 with gr.Blocks(title="😻 KaniTTS - Text to Speech") as demo:
@@ -76,7 +94,10 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech") as demo:
     )
     # --- Speaker selector (populated on model load) ---
-    all_speakers = list(MODELS[list(MODELS.keys())[0]][1].speaker_id.keys()) if MODELS and MODELS[list(MODELS.keys())[0]][1] and MODELS[list(MODELS.keys())[0]][1].speaker_id else []
     speaker_dropdown = gr.Dropdown(
         choices=all_speakers,
         value=None,
@@ -91,18 +112,19 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech") as demo:
     audio_output = gr.Audio(label="Generated Audio", type="numpy")
-    # --- Event handlers ---
     model_dropdown.change(
         fn=lambda choice: gr.update(choices=list(MODELS[choice][1].speaker_id.keys()), value=None, visible=True) if MODELS and MODELS[choice][1].speaker_id else gr.update(visible=False),
         inputs=[model_dropdown],
         outputs=[speaker_dropdown]
     )
     generate_btn.click(
         fn=generate_speech,
         inputs=[text_input, model_dropdown, speaker_dropdown],
         outputs=[audio_output]
     )
-    # --- This is the API enabling line ---
-    demo.queue().launch(show_api=True)

 # You must use the exact same model name as your repo
 MODEL_ID = "nineninesix/Kani-TTS-370m"
+# --- Global variable to store loaded models ---
+MODELS = {}
 @spaces.GPU
+def load_models():
+    """Load models into GPU memory and store in a global variable."""
+    global MODELS
+    if not MODELS:
+        print("Loading models into GPU memory...")
+        from transformers import AutoModel, AutoConfig
+        model_path = MODEL_ID
+        # Load both the main model and its configuration
+        model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
+        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+        # Store the loaded model and its configuration in the global variable
+        MODELS = {
+            "Kani TTS 370M": (model, config)
+        }
+        print(f"Models loaded. Available speakers: {list(config.speaker_id.keys()) if config.speaker_id else []}")
+        return MODELS
+# --- Define a separate function for updating the stats display ---
+def update_stats_display():
+    """This function gets the agent's stats and returns a formatted string for Gradio."""
+    # This assumes 'agent' is a global instance of your ConversationalAgent class
+    stats_text = agent.get_memory_stats()
+    return gr.Markdown(f"### 📊 Memory Stats\n{stats_text}")
 def generate_speech(text: str, model_choice: str, speaker_display: str):
+    """Generate speech using the selected model."""
     if not text.strip():
         return "Please enter text for speech generation.", None
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {device}")
+        # Ensure models are loaded
+        if not MODELS:
+            load_models()
+        # Get the selected model from the global variable
         if model_choice not in MODELS:
             return f"Model '{model_choice}' not found.", None
         selected_model = MODELS[model_choice]
+        # --- This is the key part to load a specific model ---
+        model_to_generate = selected_model[0]
         cfg = selected_model[1]  # Model config
         speaker_map = cfg.get('speaker_id', {}) if cfg is not None else {}
         if speaker_display and speaker_map:
         print(f"Generating speech with {model_choice}...")
         # --- Use the specific part of the model for generation ---
         audio, _, time_report = model_to_generate.run_model(
             text=text,
             speaker_id=speaker_id,
         return (sample_rate, audio), time_report
+# --- Create and configure the Gradio interface ---
 MODELS = load_models()
 with gr.Blocks(title="😻 KaniTTS - Text to Speech") as demo:
     )
     # --- Speaker selector (populated on model load) ---
+    all_speakers = []
+    if MODELS and list(MODELS.keys())[0] and MODELS[list(MODELS.keys())[0]][1]:
+        all_speakers.extend(list(MODELS[list(MODELS.keys())[0]][1].speaker_id.keys()))
+    all_speakers = sorted(list(set(all_speakers)))
     speaker_dropdown = gr.Dropdown(
         choices=all_speakers,
         value=None,
     audio_output = gr.Audio(label="Generated Audio", type="numpy")
+    # --- Define the event to update the speakers when the model changes ---
     model_dropdown.change(
         fn=lambda choice: gr.update(choices=list(MODELS[choice][1].speaker_id.keys()), value=None, visible=True) if MODELS and MODELS[choice][1].speaker_id else gr.update(visible=False),
         inputs=[model_dropdown],
         outputs=[speaker_dropdown]
     )
+    # --- Wire up the main generation button ---
     generate_btn.click(
         fn=generate_speech,
         inputs=[text_input, model_dropdown, speaker_dropdown],
         outputs=[audio_output]
     )
+    # --- This is the API-enabling line ---
+    demo.queue().launch(show_api=True)