Spaces:

Neomindapp
/

trained_tts

Sleeping

App Files Files Community

Neomindapp commited on Sep 2, 2024

Commit

f9d0c83

verified ·

1 Parent(s): e68f823

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -52

app.py CHANGED Viewed

@@ -1,64 +1,34 @@
-import torch
 import gradio as gr
-import json
-import numpy as np
-import soundfile as sf
-# Import your Glow-TTS model and related utilities
-from glow_tts.models import GlowTTS
-from glow_tts.utils import text_to_sequence, sequence_to_mel  # Replace with actual functions if different
-# Define paths to your model and configuration (relative paths)
-MODEL_PATH = 'best_model.pth'
-CONFIG_PATH = 'config.json'
-# Load configuration and model
-def load_model(model_path, config_path):
-    # Load the model configuration
-    with open(config_path, 'r') as f:
-        config = json.load(f)
-    # Initialize the Glow-TTS model
-    model = GlowTTS(config)
-    # Load the trained model weights
-    model.load_state_dict(torch.load(model_path))
-    model.eval()
-    return model
-# Load the model
-model = load_model(MODEL_PATH, CONFIG_PATH)
-# Define the function to generate speech
 def generate_speech(text):
-    # Convert text to sequence
-    sequence = text_to_sequence(text)
-    inputs = torch.tensor(sequence).unsqueeze(0)  # Add batch dimension
-    with torch.no_grad():
-        # Generate mel spectrogram from text sequence
-        mel_output = model(inputs)
-    # Convert mel spectrogram to waveform
-    # This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio
-    audio_waveform = mel_to_audio(mel_output)  # Replace with actual conversion if needed
-    # Save the waveform to a temporary file
-    temp_file = 'temp.wav'
-    sf.write(temp_file, audio_waveform, 22050)  # Adjust sample rate if necessary
-    return temp_file
-# Define Gradio interface
-interface = gr.Interface(
     fn=generate_speech,
-    inputs="text",
-    outputs="audio",
-    title="Glow-TTS Model",
-    description="Generate speech from text using the Glow-TTS model."
 )
-# Launch the Gradio interface
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
+import torch
+from your_model_module import YourTTSModel, YourTTSProcessor  # Replace with your actual imports
+# Load the model and processor
+model = YourTTSModel.from_pretrained("config.json")
+model.load_state_dict(torch.load("best_model.pth"))
+model.eval()  # Set the model to evaluation mode
+processor = YourTTSProcessor.from_pretrained("config.json")
 def generate_speech(text):
+    # Process the input text
+    inputs = processor(text, return_tensors="pt")
+    # Generate speech using the model
+    with torch.no_grad():  # No need to compute gradients
+        outputs = model.generate(**inputs)
+    # Process the output to an audio format
+    audio = outputs.squeeze().numpy()  # Adjust this based on how your model outputs data
+    return audio
+# Define the Gradio interface
+iface = gr.Interface(
     fn=generate_speech,
+    inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
+    outputs=gr.Audio(type="numpy"),
+    title="Text-to-Speech with Coqui TTS",
+    description="Generate speech from text using a custom Coqui TTS model."
 )
 if __name__ == "__main__":
+    iface.launch()