Spaces:

nambn0321
/

TTS_run

Runtime error

App Files Files Community

nambn0321 commited on Aug 3, 2025

Commit

1cd841e

verified ·

1 Parent(s): 9a99cf5

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -19

app.py CHANGED Viewed

@@ -5,33 +5,96 @@ import torchaudio
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
 from transformers.models.speecht5 import SpeechT5HifiGan
-processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
-model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
-vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = model.to(device)
-vocoder = vocoder.to(device)
-speaker_embedding = torch.zeros(1, 512).to(device)
-# Load model and processor
-# processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5")
-# model = SpeechT5ForTextToSpeech.from_pretrained(
-#     "nambn0321/TTS_with_T5",
-#     use_safetensors=True,
-#     trust_remote_code=True
-# )
 # vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-# # Move to CUDA if available
 # device = "cuda" if torch.cuda.is_available() else "cpu"
 # model = model.to(device)
 # vocoder = vocoder.to(device)
-# # Dummy speaker embedding (or load your real one here)
 # speaker_embedding = torch.zeros(1, 512).to(device)
 def tts_generate(text):
     print(f"📝 Input text: {text}")
     try:

 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
 from transformers.models.speecht5 import SpeechT5HifiGan
+# processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+# model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
 # vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 # device = "cuda" if torch.cuda.is_available() else "cpu"
 # model = model.to(device)
 # vocoder = vocoder.to(device)
 # speaker_embedding = torch.zeros(1, 512).to(device)
+# Load model and processor
+processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5")
+model = SpeechT5ForTextToSpeech.from_pretrained(
+    "nambn0321/TTS_with_T5",
+    use_safetensors=True,
+    trust_remote_code=True
+)
+vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+# Move to CUDA if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+vocoder = vocoder.to(device)
+# # Dummy speaker embedding (or load your real one here)
+speaker_embeddings = torch.tensor([[-0.0663, -0.0233,  0.0438,  0.0111, -0.0286, -0.0305, -0.0487,  0.0060,
+          0.0311,  0.0164, -0.0755, -0.0926,  0.0484,  0.0431,  0.0502,  0.0557,
+          0.0053,  0.0197,  0.0127,  0.0058,  0.0300,  0.0077, -0.0154, -0.0416,
+         -0.0567, -0.0077, -0.0547,  0.0099,  0.0571,  0.0217, -0.0028,  0.0495,
+          0.0475, -0.0174,  0.0359, -0.0379,  0.0302,  0.0693, -0.0105, -0.0734,
+          0.0115, -0.0035,  0.0224,  0.0335,  0.0118, -0.1125, -0.0200,  0.0133,
+         -0.0815,  0.0618,  0.0405,  0.0118,  0.0588,  0.0163, -0.1139, -0.0136,
+         -0.0106,  0.0119,  0.0393,  0.0244,  0.0038, -0.0081, -0.0142, -0.0360,
+          0.0299,  0.0261,  0.0307, -0.0516, -0.0624, -0.0655, -0.0068,  0.0090,
+          0.0380,  0.0029,  0.0206,  0.0426,  0.0073, -0.0028, -0.0544, -0.0757,
+         -0.0796, -0.0405, -0.0633, -0.0804, -0.0316, -0.0558, -0.0556,  0.0566,
+          0.0393, -0.0027,  0.0300, -0.0526,  0.0105, -0.0550,  0.0473, -0.0364,
+          0.0447,  0.0426, -0.0897, -0.0682,  0.0533, -0.0653, -0.0917,  0.0249,
+          0.0406,  0.0114,  0.0402,  0.0187,  0.0287,  0.0150, -0.0771,  0.0080,
+          0.0779, -0.0084,  0.0279,  0.0383, -0.0716,  0.0013, -0.0565,  0.0294,
+          0.0399, -0.0730,  0.0113,  0.0415, -0.0586,  0.0624, -0.0720,  0.0289,
+          0.0370,  0.0526,  0.0385,  0.0302,  0.0451,  0.0400,  0.0104, -0.0805,
+         -0.0610, -0.0278, -0.0395,  0.0081, -0.0022,  0.0050, -0.0027,  0.0500,
+         -0.0832,  0.0167, -0.0224, -0.0416,  0.0369, -0.1350,  0.0280, -0.0435,
+         -0.0639,  0.0163,  0.0037,  0.0344,  0.0058, -0.0856, -0.0878,  0.0121,
+          0.0193,  0.0305,  0.0019,  0.0112, -0.0621,  0.0029,  0.0437,  0.0387,
+          0.0216,  0.0296,  0.0391, -0.0647,  0.0016, -0.0421,  0.0279,  0.0206,
+         -0.0546, -0.0025,  0.0222, -0.0787,  0.0481, -0.0451, -0.0149,  0.0208,
+         -0.0761, -0.0118,  0.0312,  0.0593, -0.0504, -0.0632,  0.0108,  0.0354,
+         -0.0646,  0.0229,  0.0369,  0.0287,  0.0108,  0.0084,  0.0474, -0.0335,
+          0.0408,  0.0169, -0.0683,  0.0106,  0.0184,  0.0124,  0.0262, -0.0235,
+          0.0079,  0.0267,  0.0047, -0.0836, -0.0301,  0.0451, -0.0400,  0.0327,
+          0.0200, -0.0782,  0.0459,  0.0333, -0.0959,  0.0191, -0.0684, -0.0309,
+         -0.0256,  0.0330,  0.0309,  0.0798,  0.0342,  0.0248, -0.0348,  0.0114,
+          0.0223, -0.0250,  0.0214, -0.0198,  0.0143,  0.0336,  0.0016,  0.0506,
+         -0.0593,  0.0198, -0.0944,  0.0044, -0.0675,  0.0455,  0.0453,  0.0284,
+          0.0118,  0.0131,  0.0335, -0.0050,  0.0086, -0.0567,  0.0133,  0.0298,
+          0.0361,  0.0057, -0.0683,  0.0769,  0.0006, -0.0764,  0.0048,  0.0436,
+          0.0014,  0.0543,  0.0219,  0.0353,  0.0326, -0.0171,  0.0510,  0.0041,
+         -0.0491,  0.0163, -0.0496,  0.0455,  0.0577,  0.0144, -0.0824, -0.0613,
+         -0.0399,  0.0322,  0.0040, -0.0596,  0.0255,  0.0433,  0.0056,  0.0341,
+         -0.1160,  0.0023,  0.0274,  0.0127,  0.0228, -0.0661,  0.0046,  0.0077,
+          0.0152, -0.0358,  0.0503,  0.0318, -0.0281,  0.0293,  0.0226, -0.0621,
+          0.0105,  0.0176,  0.0243, -0.0141,  0.0356,  0.0329,  0.0471,  0.0259,
+          0.0185,  0.0097, -0.0906, -0.0619, -0.0214,  0.0247, -0.0555,  0.0395,
+         -0.0400,  0.0354, -0.0566,  0.0069,  0.0273, -0.0684,  0.0471,  0.0696,
+         -0.0575, -0.0837, -0.0660, -0.0268,  0.0286,  0.0609, -0.0569,  0.0270,
+          0.0306, -0.0449, -0.0314,  0.0113,  0.0182, -0.0581,  0.0267,  0.0289,
+         -0.0544,  0.0280,  0.0431,  0.0013, -0.0631, -0.0490, -0.0565,  0.0227,
+         -0.0673,  0.1090,  0.0386, -0.0459, -0.0050,  0.0019, -0.0533,  0.0523,
+         -0.1068,  0.0178,  0.0118,  0.0564,  0.0470,  0.0004, -0.0812, -0.0034,
+          0.0106,  0.0216,  0.0065,  0.0542, -0.0544,  0.0299, -0.0156, -0.0019,
+          0.0435,  0.0218,  0.0449,  0.0526, -0.0901, -0.1279,  0.0270,  0.0128,
+          0.0349,  0.0103,  0.0374, -0.0805,  0.0337,  0.0479,  0.0225,  0.0276,
+         -0.0562,  0.0335, -0.0329,  0.0067,  0.0264, -0.0684, -0.0354,  0.0412,
+          0.0478, -0.0529,  0.0513,  0.0155,  0.0362,  0.0096,  0.0117, -0.0675,
+         -0.0536, -0.0773, -0.0690,  0.0061,  0.0337, -0.0110,  0.0232, -0.0375,
+         -0.0397, -0.0738, -0.1551, -0.0238, -0.0733, -0.0216,  0.0374, -0.0015,
+         -0.0518,  0.0203,  0.0419,  0.0508,  0.0252, -0.0481, -0.0391, -0.0130,
+         -0.0215,  0.0204,  0.0232,  0.0182,  0.0350,  0.0065,  0.0098, -0.0634,
+          0.0016,  0.0125,  0.0525,  0.0006, -0.0354,  0.0453,  0.0206,  0.0060,
+          0.0044, -0.0044,  0.0485,  0.0207,  0.0067,  0.0348, -0.0036, -0.0098,
+          0.0124, -0.0660, -0.0699,  0.0416, -0.0060,  0.0153,  0.0480,  0.0448,
+         -0.0679,  0.0470, -0.0159, -0.0685,  0.0396,  0.0329, -0.0017,  0.0527,
+          0.0256,  0.0036, -0.0468, -0.0553,  0.0057, -0.0058,  0.0229,  0.0624,
+         -0.0378, -0.0404,  0.0180,  0.0268,  0.0569,  0.0400, -0.0515,  0.0313,
+         -0.0391, -0.0106, -0.0590,  0.0244, -0.0388,  0.0394,  0.0221,  0.0069,
+         -0.0553, -0.0123, -0.0625, -0.0730,  0.0094,  0.0574, -0.0653,  0.0365,
+          0.0620, -0.0156, -0.0476,  0.0452,  0.0350, -0.0138,  0.0255, -0.0604]]).unsqueeze(0)
 def tts_generate(text):
     print(f"📝 Input text: {text}")
     try: