TGPro1 commited on
Commit
8e1bb8b
·
verified ·
1 Parent(s): 45e1de1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -29
app.py CHANGED
@@ -1,41 +1,33 @@
1
  import os
2
  import gradio as gr
3
- import torch
4
- from TTS.tts.configs.xtts_config import XttsConfig
5
- from TTS.tts.models.xtts import Xtts
6
 
7
  os.environ["COQUI_TOS_AGREED"] = "1"
8
- device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
- print("Loading XTTS from Hugging Face Hub...")
11
- config = XttsConfig()
12
- config.load_json("https://huggingface.co/coqui/XTTS-v2/raw/main/config.json")
13
- model = Xtts.init_from_config(config)
14
- model.load_checkpoint(config, checkpoint_dir="coqui/XTTS-v2", use_deepspeed=False)
15
- if device == "cuda":
16
- model.cuda()
17
 
18
- def clone(text, lang, ref):
19
- out = "out.wav"
20
- outputs = model.synthesize(
21
- text,
22
- config,
23
- speaker_wav=ref,
24
- language=lang,
25
- gpt_cond_len=3,
26
- temperature=0.75
27
  )
28
- import scipy.io.wavfile
29
- scipy.io.wavfile.write(out, 24000, outputs["wav"])
30
- return out
31
 
32
  LANGS = ["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn","ja","ko","hu"]
33
 
34
- with gr.Blocks() as demo:
35
- t=gr.Textbox(label="Text")
36
- l=gr.Dropdown(label="Lang", choices=LANGS, value="fr")
37
- r=gr.Audio(label="Ref", type="filepath")
38
- o=gr.Audio(label="Out")
39
- gr.Button("Generate").click(clone, [t,l,r], [o])
 
 
 
 
40
 
41
  demo.launch()
 
1
  import os
2
  import gradio as gr
3
+ from TTS.api import TTS
 
 
4
 
5
  os.environ["COQUI_TOS_AGREED"] = "1"
 
6
 
7
+ print("Loading XTTS model...")
8
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
 
 
 
 
 
9
 
10
+ def clone_voice(text, language, reference_audio):
11
+ output_path = "output.wav"
12
+ tts.tts_to_file(
13
+ text=text,
14
+ file_path=output_path,
15
+ speaker_wav=reference_audio,
16
+ language=language
 
 
17
  )
18
+ return output_path
 
 
19
 
20
  LANGS = ["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn","ja","ko","hu"]
21
 
22
+ demo = gr.Interface(
23
+ fn=clone_voice,
24
+ inputs=[
25
+ gr.Textbox(label="Text to synthesize"),
26
+ gr.Dropdown(choices=LANGS, value="fr", label="Language"),
27
+ gr.Audio(label="Reference Voice (5-30s)", type="filepath")
28
+ ],
29
+ outputs=gr.Audio(label="Generated Audio"),
30
+ title="XTTS v2 Voice Cloning"
31
+ )
32
 
33
  demo.launch()