Update app.py
Browse files
app.py
CHANGED
|
@@ -5,134 +5,134 @@ import torchaudio
|
|
| 5 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
| 6 |
from transformers.models.speecht5 import SpeechT5HifiGan
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
# vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
| 11 |
-
|
| 12 |
-
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
-
# model = model.to(device)
|
| 14 |
-
# vocoder = vocoder.to(device)
|
| 15 |
-
|
| 16 |
-
# speaker_embedding = torch.zeros(1, 512).to(device)
|
| 17 |
-
|
| 18 |
-
# Load model and processor
|
| 19 |
-
processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5_4")
|
| 20 |
-
model = SpeechT5ForTextToSpeech.from_pretrained(
|
| 21 |
-
"nambn0321/TTS_with_T5_4",
|
| 22 |
-
use_safetensors=True,
|
| 23 |
-
trust_remote_code=True
|
| 24 |
-
)
|
| 25 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
| 26 |
|
| 27 |
-
# Move to CUDA if available
|
| 28 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 29 |
model = model.to(device)
|
| 30 |
vocoder = vocoder.to(device)
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
0.
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
0.
|
| 55 |
-
|
| 56 |
-
0.
|
| 57 |
-
|
| 58 |
-
0.
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
0.
|
| 64 |
-
|
| 65 |
-
0.
|
| 66 |
-
0.
|
| 67 |
-
|
| 68 |
-
0.
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
0.
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
0.
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
0.
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
0.
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
0.
|
| 113 |
-
0.
|
| 114 |
-
0.
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
0.
|
| 121 |
-
0.
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
0.
|
| 133 |
-
0.
|
| 134 |
-
|
| 135 |
-
0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
def tts_generate(text):
|
| 138 |
print(f"📝 Input text: {text}")
|
|
|
|
| 5 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
| 6 |
from transformers.models.speecht5 import SpeechT5HifiGan
|
| 7 |
|
| 8 |
+
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
| 9 |
+
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
| 11 |
|
|
|
|
| 12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
model = model.to(device)
|
| 14 |
vocoder = vocoder.to(device)
|
| 15 |
|
| 16 |
+
speaker_embedding = torch.zeros(1, 512).to(device)
|
| 17 |
+
|
| 18 |
+
# Load model and processor
|
| 19 |
+
# processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5_4")
|
| 20 |
+
# model = SpeechT5ForTextToSpeech.from_pretrained(
|
| 21 |
+
# "nambn0321/TTS_with_T5_4",
|
| 22 |
+
# use_safetensors=True,
|
| 23 |
+
# trust_remote_code=True
|
| 24 |
+
# )
|
| 25 |
+
# vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
| 26 |
+
|
| 27 |
+
# # Move to CUDA if available
|
| 28 |
+
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 29 |
+
# model = model.to(device)
|
| 30 |
+
# vocoder = vocoder.to(device)
|
| 31 |
+
|
| 32 |
+
# # # Dummy speaker embedding (or load your real one here)
|
| 33 |
+
# speaker_embedding = torch.tensor([[-0.06632216, -0.02325863, 0.04376163, 0.01112046, -0.02864115,
|
| 34 |
+
# -0.03048201, -0.04865832, 0.00598873, 0.03105048, 0.01635859,
|
| 35 |
+
# -0.07552029, -0.09258246, 0.04839027, 0.04307159, 0.05019059,
|
| 36 |
+
# 0.05565156, 0.00533272, 0.0197331 , 0.01269842, 0.00576971,
|
| 37 |
+
# 0.02997943, 0.00765277, -0.01538683, -0.04164617, -0.05669912,
|
| 38 |
+
# -0.00767612, -0.05466911, 0.00988977, 0.05714991, 0.0216927 ,
|
| 39 |
+
# -0.00281803, 0.04948897, 0.04745187, -0.01738331, 0.03589115,
|
| 40 |
+
# -0.03788823, 0.03018526, 0.06933809, -0.01054026, -0.07338727,
|
| 41 |
+
# 0.01145766, -0.00347575, 0.02236829, 0.03353192, 0.01183521,
|
| 42 |
+
# -0.11246844, -0.01998361, 0.01333049, -0.08154028, 0.06184796,
|
| 43 |
+
# 0.04050031, 0.01181497, 0.0588 , 0.01634772, -0.11387676,
|
| 44 |
+
# -0.01355756, -0.01059065, 0.01194482, 0.03934296, 0.02436676,
|
| 45 |
+
# 0.00376559, -0.00813801, -0.01421188, -0.03595341, 0.02987706,
|
| 46 |
+
# 0.02612724, 0.03072971, -0.05161813, -0.06241557, -0.06545018,
|
| 47 |
+
# -0.00679519, 0.00900955, 0.03801987, 0.00294477, 0.02057374,
|
| 48 |
+
# 0.04256874, 0.00730863, -0.00282256, -0.05437343, -0.07569141,
|
| 49 |
+
# -0.07964483, -0.04049463, -0.06325456, -0.08040556, -0.03161319,
|
| 50 |
+
# -0.0557906 , -0.05558824, 0.05661038, 0.03932756, -0.00269612,
|
| 51 |
+
# 0.02999815, -0.05263155, 0.01048327, -0.05502405, 0.04730757,
|
| 52 |
+
# -0.03641531, 0.04466332, 0.04261209, -0.08965097, -0.06816243,
|
| 53 |
+
# 0.05328364, -0.0652955 , -0.09165341, 0.02487748, 0.04061233,
|
| 54 |
+
# 0.01143007, 0.04024159, 0.01869776, 0.02870329, 0.01503909,
|
| 55 |
+
# -0.07710361, 0.00802833, 0.07786133, -0.008355 , 0.02792075,
|
| 56 |
+
# 0.03834949, -0.07156748, 0.00127211, -0.05645351, 0.0293999 ,
|
| 57 |
+
# 0.03988929, -0.07301504, 0.01131906, 0.0415033 , -0.05863927,
|
| 58 |
+
# 0.0623733 , -0.07197598, 0.02887617, 0.03702732, 0.05255475,
|
| 59 |
+
# 0.03850314, 0.03016165, 0.04511765, 0.0400167 , 0.01042124,
|
| 60 |
+
# -0.08053102, -0.06103503, -0.02782067, -0.03948715, 0.00812866,
|
| 61 |
+
# -0.00215283, 0.00496819, -0.00270994, 0.04999355, -0.08324838,
|
| 62 |
+
# 0.01673055, -0.0224449 , -0.04158457, 0.03688109, -0.13497816,
|
| 63 |
+
# 0.02797874, -0.04349126, -0.06393341, 0.01634013, 0.00367471,
|
| 64 |
+
# 0.03441324, 0.00576339, -0.08563808, -0.08777589, 0.01206557,
|
| 65 |
+
# 0.01930428, 0.03046028, 0.00186808, 0.01118185, -0.06207091,
|
| 66 |
+
# 0.00285664, 0.04373416, 0.03865229, 0.02155851, 0.02963249,
|
| 67 |
+
# 0.03907783, -0.06465862, 0.00155482, -0.04207559, 0.02787214,
|
| 68 |
+
# 0.02055759, -0.05460549, -0.0024652 , 0.02217332, -0.07867457,
|
| 69 |
+
# 0.04810029, -0.0450572 , -0.01488631, 0.02080196, -0.07611465,
|
| 70 |
+
# -0.01182817, 0.03117848, 0.0593022 , -0.05042631, -0.06321163,
|
| 71 |
+
# 0.01080927, 0.03538311, -0.06461193, 0.02289902, 0.03690634,
|
| 72 |
+
# 0.02868471, 0.01077593, 0.00843379, 0.04739143, -0.03351105,
|
| 73 |
+
# 0.04080784, 0.01689551, -0.06830349, 0.01059405, 0.01843624,
|
| 74 |
+
# 0.01237972, 0.02619306, -0.02353077, 0.00792623, 0.02665057,
|
| 75 |
+
# 0.00471944, -0.08360166, -0.0301204 , 0.04510773, -0.03999252,
|
| 76 |
+
# 0.03273777, 0.02000749, -0.07822321, 0.04588151, 0.03334309,
|
| 77 |
+
# -0.09588112, 0.01911022, -0.06844518, -0.03093524, -0.02563222,
|
| 78 |
+
# 0.03301362, 0.03092113, 0.07978717, 0.03420616, 0.02481706,
|
| 79 |
+
# -0.03479896, 0.01136372, 0.02234516, -0.02502409, 0.02136666,
|
| 80 |
+
# -0.01978885, 0.01426617, 0.0336206 , 0.00164481, 0.05059334,
|
| 81 |
+
# -0.05926166, 0.01984084, -0.09437344, 0.00440842, -0.06748072,
|
| 82 |
+
# 0.04547653, 0.04531173, 0.02839815, 0.01182417, 0.01309258,
|
| 83 |
+
# 0.03345039, -0.0050239 , 0.00861029, -0.05667242, 0.01330826,
|
| 84 |
+
# 0.02976079, 0.03610174, 0.0056701 , -0.06830816, 0.07686577,
|
| 85 |
+
# 0.00055387, -0.07641901, 0.00479465, 0.0435739 , 0.00137714,
|
| 86 |
+
# 0.054296 , 0.02192332, 0.03526516, 0.03261713, -0.01711978,
|
| 87 |
+
# 0.05103486, 0.004091 , -0.04905723, 0.01632674, -0.04963868,
|
| 88 |
+
# 0.04549154, 0.05771144, 0.01438812, -0.08240737, -0.06134431,
|
| 89 |
+
# -0.03986251, 0.03224541, 0.00400033, -0.05963603, 0.02552675,
|
| 90 |
+
# 0.04327708, 0.00562372, 0.03411512, -0.11604068, 0.00232808,
|
| 91 |
+
# 0.02742139, 0.01270449, 0.02279026, -0.06613689, 0.00456405,
|
| 92 |
+
# 0.00770958, 0.01518244, -0.03575909, 0.05028789, 0.03181706,
|
| 93 |
+
# -0.02811741, 0.02930666, 0.02258663, -0.06209057, 0.01053006,
|
| 94 |
+
# 0.01761598, 0.02432001, -0.0141328 , 0.03561908, 0.03293756,
|
| 95 |
+
# 0.04713007, 0.02588944, 0.0185135 , 0.00973485, -0.09059389,
|
| 96 |
+
# -0.06192823, -0.0214373 , 0.02466835, -0.05554106, 0.03954491,
|
| 97 |
+
# -0.03995424, 0.03540933, -0.05664941, 0.00685676, 0.02727092,
|
| 98 |
+
# -0.06838219, 0.04708575, 0.06957678, -0.0574585 , -0.08372921,
|
| 99 |
+
# -0.06601643, -0.02683325, 0.02862075, 0.06086589, -0.05693608,
|
| 100 |
+
# 0.02700268, 0.03062632, -0.0449043 , -0.03139404, 0.01131762,
|
| 101 |
+
# 0.018201 , -0.05808553, 0.02667459, 0.02892675, -0.05436037,
|
| 102 |
+
# 0.02801878, 0.04307706, 0.0013432 , -0.06306062, -0.04901182,
|
| 103 |
+
# -0.05647411, 0.0226799 , -0.06727529, 0.10902219, 0.03856311,
|
| 104 |
+
# -0.04592182, -0.00500258, 0.00186311, -0.05330509, 0.05230814,
|
| 105 |
+
# -0.10676292, 0.01777823, 0.01183014, 0.05641989, 0.04702727,
|
| 106 |
+
# 0.00042184, -0.08117392, -0.00340278, 0.01055175, 0.02158776,
|
| 107 |
+
# 0.00645116, 0.05420727, -0.05439884, 0.02988858, -0.0155564 ,
|
| 108 |
+
# -0.00187941, 0.04348213, 0.02176837, 0.04492295, 0.05255244,
|
| 109 |
+
# -0.09009198, -0.12785755, 0.0270214 , 0.01281871, 0.03488814,
|
| 110 |
+
# 0.01032432, 0.03737413, -0.08046219, 0.03366841, 0.04788679,
|
| 111 |
+
# 0.02247225, 0.02758352, -0.05623886, 0.03350434, -0.03293617,
|
| 112 |
+
# 0.00674522, 0.02637025, -0.06836043, -0.03543041, 0.04120062,
|
| 113 |
+
# 0.04781871, -0.0528533 , 0.05126699, 0.01553862, 0.03617714,
|
| 114 |
+
# 0.0096033 , 0.01169565, -0.06753531, -0.05359954, -0.07725069,
|
| 115 |
+
# -0.0690423 , 0.00608264, 0.03367587, -0.01095485, 0.02317013,
|
| 116 |
+
# -0.03748006, -0.0396716 , -0.07376339, -0.15511133, -0.02377705,
|
| 117 |
+
# -0.0733289 , -0.02155393, 0.03737415, -0.00152944, -0.05182485,
|
| 118 |
+
# 0.0202742 , 0.04189592, 0.05077221, 0.02522502, -0.04805434,
|
| 119 |
+
# -0.03909 , -0.01301163, -0.02148154, 0.02039445, 0.02322994,
|
| 120 |
+
# 0.01821164, 0.03498985, 0.00654902, 0.00980544, -0.06337985,
|
| 121 |
+
# 0.00158023, 0.01253585, 0.05249537, 0.00056358, -0.03539167,
|
| 122 |
+
# 0.04533946, 0.02057356, 0.00598625, 0.00438659, -0.00444954,
|
| 123 |
+
# 0.04846435, 0.02074119, 0.00665891, 0.0347768 , -0.00355295,
|
| 124 |
+
# -0.00983169, 0.01239159, -0.06600927, -0.06987962, 0.04164324,
|
| 125 |
+
# -0.00596055, 0.01529142, 0.04804419, 0.04481226, -0.06791846,
|
| 126 |
+
# 0.04703787, -0.01586268, -0.06848218, 0.03964271, 0.03287267,
|
| 127 |
+
# -0.00166699, 0.05269769, 0.02563164, 0.00356486, -0.04681876,
|
| 128 |
+
# -0.05530458, 0.00568418, -0.00581932, 0.0229376 , 0.06235321,
|
| 129 |
+
# -0.03780747, -0.04042193, 0.01800834, 0.02682916, 0.05686411,
|
| 130 |
+
# 0.03996282, -0.05146077, 0.0312879 , -0.03907526, -0.01055358,
|
| 131 |
+
# -0.05896859, 0.02441409, -0.03880213, 0.03941878, 0.02211095,
|
| 132 |
+
# 0.00688374, -0.05528738, -0.01232414, -0.06249457, -0.07299529,
|
| 133 |
+
# 0.00938593, 0.05738097, -0.06533916, 0.03651554, 0.06204324,
|
| 134 |
+
# -0.01556815, -0.04757515, 0.0451969 , 0.03502326, -0.01376748,
|
| 135 |
+
# 0.02549847, -0.06043207]]).to(device)
|
| 136 |
|
| 137 |
def tts_generate(text):
|
| 138 |
print(f"📝 Input text: {text}")
|