Spaces:

nambn0321
/

TTS_run

Runtime error

File size: 9,673 Bytes

import torch
import gradio as gr
import torchaudio
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
from transformers.models.speecht5 import SpeechT5HifiGan

processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
vocoder = vocoder.to(device)

speaker_embedding = torch.zeros(1, 512).to(device)

# processor = SpeechT5Processor.from_pretrained("nambn0321/TTS_with_T5_4")
# model = SpeechT5ForTextToSpeech.from_pretrained(
#     "nambn0321/TTS_with_T5_4",
#     use_safetensors=True,
#     trust_remote_code=True
# )
# vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

# device = "cuda" if torch.cuda.is_available() else "cpu"
# model = model.to(device)
# vocoder = vocoder.to(device)

# speaker_embedding = torch.tensor([[-0.06632216, -0.02325863,  0.04376163,  0.01112046, -0.02864115,
#        -0.03048201, -0.04865832,  0.00598873,  0.03105048,  0.01635859,
#        -0.07552029, -0.09258246,  0.04839027,  0.04307159,  0.05019059,
#         0.05565156,  0.00533272,  0.0197331 ,  0.01269842,  0.00576971,
#         0.02997943,  0.00765277, -0.01538683, -0.04164617, -0.05669912,
#        -0.00767612, -0.05466911,  0.00988977,  0.05714991,  0.0216927 ,
#        -0.00281803,  0.04948897,  0.04745187, -0.01738331,  0.03589115,
#        -0.03788823,  0.03018526,  0.06933809, -0.01054026, -0.07338727,
#         0.01145766, -0.00347575,  0.02236829,  0.03353192,  0.01183521,
#        -0.11246844, -0.01998361,  0.01333049, -0.08154028,  0.06184796,
#         0.04050031,  0.01181497,  0.0588    ,  0.01634772, -0.11387676,
#        -0.01355756, -0.01059065,  0.01194482,  0.03934296,  0.02436676,
#         0.00376559, -0.00813801, -0.01421188, -0.03595341,  0.02987706,
#         0.02612724,  0.03072971, -0.05161813, -0.06241557, -0.06545018,
#        -0.00679519,  0.00900955,  0.03801987,  0.00294477,  0.02057374,
#         0.04256874,  0.00730863, -0.00282256, -0.05437343, -0.07569141,
#        -0.07964483, -0.04049463, -0.06325456, -0.08040556, -0.03161319,
#        -0.0557906 , -0.05558824,  0.05661038,  0.03932756, -0.00269612,
#         0.02999815, -0.05263155,  0.01048327, -0.05502405,  0.04730757,
#        -0.03641531,  0.04466332,  0.04261209, -0.08965097, -0.06816243,
#         0.05328364, -0.0652955 , -0.09165341,  0.02487748,  0.04061233,
#         0.01143007,  0.04024159,  0.01869776,  0.02870329,  0.01503909,
#        -0.07710361,  0.00802833,  0.07786133, -0.008355  ,  0.02792075,
#         0.03834949, -0.07156748,  0.00127211, -0.05645351,  0.0293999 ,
#         0.03988929, -0.07301504,  0.01131906,  0.0415033 , -0.05863927,
#         0.0623733 , -0.07197598,  0.02887617,  0.03702732,  0.05255475,
#         0.03850314,  0.03016165,  0.04511765,  0.0400167 ,  0.01042124,
#        -0.08053102, -0.06103503, -0.02782067, -0.03948715,  0.00812866,
#        -0.00215283,  0.00496819, -0.00270994,  0.04999355, -0.08324838,
#         0.01673055, -0.0224449 , -0.04158457,  0.03688109, -0.13497816,
#         0.02797874, -0.04349126, -0.06393341,  0.01634013,  0.00367471,
#         0.03441324,  0.00576339, -0.08563808, -0.08777589,  0.01206557,
#         0.01930428,  0.03046028,  0.00186808,  0.01118185, -0.06207091,
#         0.00285664,  0.04373416,  0.03865229,  0.02155851,  0.02963249,
#         0.03907783, -0.06465862,  0.00155482, -0.04207559,  0.02787214,
#         0.02055759, -0.05460549, -0.0024652 ,  0.02217332, -0.07867457,
#         0.04810029, -0.0450572 , -0.01488631,  0.02080196, -0.07611465,
#        -0.01182817,  0.03117848,  0.0593022 , -0.05042631, -0.06321163,
#         0.01080927,  0.03538311, -0.06461193,  0.02289902,  0.03690634,
#         0.02868471,  0.01077593,  0.00843379,  0.04739143, -0.03351105,
#         0.04080784,  0.01689551, -0.06830349,  0.01059405,  0.01843624,
#         0.01237972,  0.02619306, -0.02353077,  0.00792623,  0.02665057,
#         0.00471944, -0.08360166, -0.0301204 ,  0.04510773, -0.03999252,
#         0.03273777,  0.02000749, -0.07822321,  0.04588151,  0.03334309,
#        -0.09588112,  0.01911022, -0.06844518, -0.03093524, -0.02563222,
#         0.03301362,  0.03092113,  0.07978717,  0.03420616,  0.02481706,
#        -0.03479896,  0.01136372,  0.02234516, -0.02502409,  0.02136666,
#        -0.01978885,  0.01426617,  0.0336206 ,  0.00164481,  0.05059334,
#        -0.05926166,  0.01984084, -0.09437344,  0.00440842, -0.06748072,
#         0.04547653,  0.04531173,  0.02839815,  0.01182417,  0.01309258,
#         0.03345039, -0.0050239 ,  0.00861029, -0.05667242,  0.01330826,
#         0.02976079,  0.03610174,  0.0056701 , -0.06830816,  0.07686577,
#         0.00055387, -0.07641901,  0.00479465,  0.0435739 ,  0.00137714,
#         0.054296  ,  0.02192332,  0.03526516,  0.03261713, -0.01711978,
#         0.05103486,  0.004091  , -0.04905723,  0.01632674, -0.04963868,
#         0.04549154,  0.05771144,  0.01438812, -0.08240737, -0.06134431,
#        -0.03986251,  0.03224541,  0.00400033, -0.05963603,  0.02552675,
#         0.04327708,  0.00562372,  0.03411512, -0.11604068,  0.00232808,
#         0.02742139,  0.01270449,  0.02279026, -0.06613689,  0.00456405,
#         0.00770958,  0.01518244, -0.03575909,  0.05028789,  0.03181706,
#        -0.02811741,  0.02930666,  0.02258663, -0.06209057,  0.01053006,
#         0.01761598,  0.02432001, -0.0141328 ,  0.03561908,  0.03293756,
#         0.04713007,  0.02588944,  0.0185135 ,  0.00973485, -0.09059389,
#        -0.06192823, -0.0214373 ,  0.02466835, -0.05554106,  0.03954491,
#        -0.03995424,  0.03540933, -0.05664941,  0.00685676,  0.02727092,
#        -0.06838219,  0.04708575,  0.06957678, -0.0574585 , -0.08372921,
#        -0.06601643, -0.02683325,  0.02862075,  0.06086589, -0.05693608,
#         0.02700268,  0.03062632, -0.0449043 , -0.03139404,  0.01131762,
#         0.018201  , -0.05808553,  0.02667459,  0.02892675, -0.05436037,
#         0.02801878,  0.04307706,  0.0013432 , -0.06306062, -0.04901182,
#        -0.05647411,  0.0226799 , -0.06727529,  0.10902219,  0.03856311,
#        -0.04592182, -0.00500258,  0.00186311, -0.05330509,  0.05230814,
#        -0.10676292,  0.01777823,  0.01183014,  0.05641989,  0.04702727,
#         0.00042184, -0.08117392, -0.00340278,  0.01055175,  0.02158776,
#         0.00645116,  0.05420727, -0.05439884,  0.02988858, -0.0155564 ,
#        -0.00187941,  0.04348213,  0.02176837,  0.04492295,  0.05255244,
#        -0.09009198, -0.12785755,  0.0270214 ,  0.01281871,  0.03488814,
#         0.01032432,  0.03737413, -0.08046219,  0.03366841,  0.04788679,
#         0.02247225,  0.02758352, -0.05623886,  0.03350434, -0.03293617,
#         0.00674522,  0.02637025, -0.06836043, -0.03543041,  0.04120062,
#         0.04781871, -0.0528533 ,  0.05126699,  0.01553862,  0.03617714,
#         0.0096033 ,  0.01169565, -0.06753531, -0.05359954, -0.07725069,
#        -0.0690423 ,  0.00608264,  0.03367587, -0.01095485,  0.02317013,
#        -0.03748006, -0.0396716 , -0.07376339, -0.15511133, -0.02377705,
#        -0.0733289 , -0.02155393,  0.03737415, -0.00152944, -0.05182485,
#         0.0202742 ,  0.04189592,  0.05077221,  0.02522502, -0.04805434,
#        -0.03909   , -0.01301163, -0.02148154,  0.02039445,  0.02322994,
#         0.01821164,  0.03498985,  0.00654902,  0.00980544, -0.06337985,
#         0.00158023,  0.01253585,  0.05249537,  0.00056358, -0.03539167,
#         0.04533946,  0.02057356,  0.00598625,  0.00438659, -0.00444954,
#         0.04846435,  0.02074119,  0.00665891,  0.0347768 , -0.00355295,
#        -0.00983169,  0.01239159, -0.06600927, -0.06987962,  0.04164324,
#        -0.00596055,  0.01529142,  0.04804419,  0.04481226, -0.06791846,
#         0.04703787, -0.01586268, -0.06848218,  0.03964271,  0.03287267,
#        -0.00166699,  0.05269769,  0.02563164,  0.00356486, -0.04681876,
#        -0.05530458,  0.00568418, -0.00581932,  0.0229376 ,  0.06235321,
#        -0.03780747, -0.04042193,  0.01800834,  0.02682916,  0.05686411,
#         0.03996282, -0.05146077,  0.0312879 , -0.03907526, -0.01055358,
#        -0.05896859,  0.02441409, -0.03880213,  0.03941878,  0.02211095,
#         0.00688374, -0.05528738, -0.01232414, -0.06249457, -0.07299529,
#         0.00938593,  0.05738097, -0.06533916,  0.03651554,  0.06204324,
#        -0.01556815, -0.04757515,  0.0451969 ,  0.03502326, -0.01376748,
#         0.02549847, -0.06043207]]).to(device)

def tts_generate(text):
    try:
        # Preprocess input
        inputs = processor(text=text, return_tensors="pt").to(device)

        # Generate waveform directly (with vocoder)
        with torch.no_grad():
            waveform = model.generate_speech(
                inputs["input_ids"],
                speaker_embedding,
                vocoder=vocoder
            )

        # Save waveform
        output_path = "output.wav"
        if waveform.dim() == 1:
            waveform = waveform.unsqueeze(0)
        torchaudio.save(output_path, waveform.cpu(), sample_rate=16000)

        return output_path

    except Exception as e:
        print("Error during TTS generation:", e)
        return "Error during speech synthesis."

demo = gr.Interface(
    fn=tts_generate,
    inputs=gr.Textbox(label="Enter text"),
    outputs=gr.Audio(label="Generated Speech", type="filepath"),
    title="SpeechT5 Text-to-Speech",
    description="Enter text and hear it with my custom SpeechT5."
)

if __name__ == "__main__":
    print("Launching Gradio demo")
    demo.launch()