TGPro1 commited on
Commit
45e1de1
·
verified ·
1 Parent(s): 64d1822

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -15
app.py CHANGED
@@ -1,28 +1,32 @@
1
  import os
2
  import gradio as gr
3
- from TTS.api import TTS
4
  import torch
 
 
5
 
6
  os.environ["COQUI_TOS_AGREED"] = "1"
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
- print("Loading XTTS...")
10
- try:
11
- # Syntax 1: Standard ID
12
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
13
- except Exception as e:
14
- print(f"Error 1: {e}")
15
- try:
16
- # Syntax 2: Explicit model_name kwarg
17
- tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False).to(device)
18
- except Exception as e2:
19
- print(f"Error 2: {e2}")
20
- # Syntax 3: Short ID (sometimes works depending on mapping)
21
- tts = TTS("xtts_v2").to(device)
22
 
23
  def clone(text, lang, ref):
24
  out = "out.wav"
25
- tts.tts_to_file(text=text, file_path=out, speaker_wav=ref, language=lang)
 
 
 
 
 
 
 
 
 
26
  return out
27
 
28
  LANGS = ["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn","ja","ko","hu"]
 
1
  import os
2
  import gradio as gr
 
3
  import torch
4
+ from TTS.tts.configs.xtts_config import XttsConfig
5
+ from TTS.tts.models.xtts import Xtts
6
 
7
  os.environ["COQUI_TOS_AGREED"] = "1"
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
+ print("Loading XTTS from Hugging Face Hub...")
11
+ config = XttsConfig()
12
+ config.load_json("https://huggingface.co/coqui/XTTS-v2/raw/main/config.json")
13
+ model = Xtts.init_from_config(config)
14
+ model.load_checkpoint(config, checkpoint_dir="coqui/XTTS-v2", use_deepspeed=False)
15
+ if device == "cuda":
16
+ model.cuda()
 
 
 
 
 
 
17
 
18
  def clone(text, lang, ref):
19
  out = "out.wav"
20
+ outputs = model.synthesize(
21
+ text,
22
+ config,
23
+ speaker_wav=ref,
24
+ language=lang,
25
+ gpt_cond_len=3,
26
+ temperature=0.75
27
+ )
28
+ import scipy.io.wavfile
29
+ scipy.io.wavfile.write(out, 24000, outputs["wav"])
30
  return out
31
 
32
  LANGS = ["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn","ja","ko","hu"]