Spaces:

nambn0321
/

TTS_run

Runtime error

App Files Files Community

nambn0321 commited on Sep 20, 2025

Commit

c577d87

verified ·

1 Parent(s): f492d3b

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -13

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import torch
 import gradio as gr
 import torchaudio
-# from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
 from transformers.models.speecht5 import SpeechT5HifiGan
@@ -15,7 +14,6 @@ vocoder = vocoder.to(device)
 speaker_embedding = torch.zeros(1, 512).to(device)
-# Load model and processor
 # processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5_4")
 # model = SpeechT5ForTextToSpeech.from_pretrained(
 #     "nambn0321/TTS_with_T5_4",
@@ -24,12 +22,10 @@ speaker_embedding = torch.zeros(1, 512).to(device)
 # )
 # vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-# # Move to CUDA if available
 # device = "cuda" if torch.cuda.is_available() else "cpu"
 # model = model.to(device)
 # vocoder = vocoder.to(device)
-# # # Dummy speaker embedding (or load your real one here)
 # speaker_embedding = torch.tensor([[-0.06632216, -0.02325863,  0.04376163,  0.01112046, -0.02864115,
 #        -0.03048201, -0.04865832,  0.00598873,  0.03105048,  0.01635859,
 #        -0.07552029, -0.09258246,  0.04839027,  0.04307159,  0.05019059,
@@ -135,37 +131,30 @@ speaker_embedding = torch.zeros(1, 512).to(device)
 #         0.02549847, -0.06043207]]).to(device)
 def tts_generate(text):
-    print(f"📝 Input text: {text}")
     try:
         # Preprocess input
-        print("🔄 Processing input...")
         inputs = processor(text=text, return_tensors="pt").to(device)
-        print("✅ Text processed.")
         # Generate waveform directly (with vocoder)
-        print("🎤 Generating speech waveform...")
         with torch.no_grad():
             waveform = model.generate_speech(
                 inputs["input_ids"],
                 speaker_embedding,
                 vocoder=vocoder
             )
-        print("✅ Waveform generated.")
         # Save waveform
         output_path = "output.wav"
         if waveform.dim() == 1:
             waveform = waveform.unsqueeze(0)
         torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)
-        print(f"💾 Audio saved to {output_path}")
         return output_path
     except Exception as e:
-        print("❌ Error during TTS generation:", e)
         return "Error during speech synthesis."
-# Gradio interface
 demo = gr.Interface(
     fn=tts_generate,
     inputs=gr.Textbox(label="Enter text"),
@@ -175,6 +164,6 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    print("🚀 Launching Gradio demo...")
     demo.launch()

 import torch
 import gradio as gr
 import torchaudio
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
 from transformers.models.speecht5 import SpeechT5HifiGan
 speaker_embedding = torch.zeros(1, 512).to(device)
 # processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5_4")
 # model = SpeechT5ForTextToSpeech.from_pretrained(
 #     "nambn0321/TTS_with_T5_4",
 # )
 # vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 # device = "cuda" if torch.cuda.is_available() else "cpu"
 # model = model.to(device)
 # vocoder = vocoder.to(device)
 # speaker_embedding = torch.tensor([[-0.06632216, -0.02325863,  0.04376163,  0.01112046, -0.02864115,
 #        -0.03048201, -0.04865832,  0.00598873,  0.03105048,  0.01635859,
 #        -0.07552029, -0.09258246,  0.04839027,  0.04307159,  0.05019059,
 #         0.02549847, -0.06043207]]).to(device)
 def tts_generate(text):
     try:
         # Preprocess input
         inputs = processor(text=text, return_tensors="pt").to(device)
         # Generate waveform directly (with vocoder)
         with torch.no_grad():
             waveform = model.generate_speech(
                 inputs["input_ids"],
                 speaker_embedding,
                 vocoder=vocoder
             )
         # Save waveform
         output_path = "output.wav"
         if waveform.dim() == 1:
             waveform = waveform.unsqueeze(0)
         torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)
         return output_path
     except Exception as e:
+        print("Error during TTS generation:", e)
         return "Error during speech synthesis."
 demo = gr.Interface(
     fn=tts_generate,
     inputs=gr.Textbox(label="Enter text"),
 )
 if __name__ == "__main__":
+    print("Launching Gradio demo")
     demo.launch()