Ray-D-Song commited on
Commit
c01bf88
ยท
1 Parent(s): af0acf2
Files changed (2) hide show
  1. app.py +15 -8
  2. requirements.txt +1 -0
app.py CHANGED
@@ -2,6 +2,7 @@
2
  # Demo also available on HF Spaces: https://huggingface.co/spaces/mrfakename/MeloTTS
3
  import gradio as gr
4
  import os, torch, io
 
5
  os.system('python -m unidic download')
6
  # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
7
  from melo.api import TTS
@@ -10,14 +11,19 @@ import tempfile
10
  import nltk
11
  nltk.download('averaged_perceptron_tagger_eng')
12
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
13
- models = {
14
- 'EN': TTS(language='EN', device=device),
15
- 'ES': TTS(language='ES', device=device),
16
- 'FR': TTS(language='FR', device=device),
17
- 'ZH': TTS(language='ZH', device=device),
18
- 'JP': TTS(language='JP', device=device),
19
- 'KR': TTS(language='KR', device=device),
20
- }
 
 
 
 
 
21
  speaker_ids = models['EN'].hps.data.spk2id
22
 
23
  default_text_dict = {
@@ -29,6 +35,7 @@ default_text_dict = {
29
  'KR': '์ตœ๊ทผ ํ…์ŠคํŠธ ์Œ์„ฑ ๋ณ€ํ™˜ ๋ถ„์•ผ๊ฐ€ ๊ธ‰์†๋„๋กœ ๋ฐœ์ „ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.',
30
  }
31
 
 
32
  def synthesize(text, speaker, speed, language, progress=gr.Progress()):
33
  bio = io.BytesIO()
34
  models[language].tts_to_file(text, models[language].hps.data.spk2id[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav')
 
2
  # Demo also available on HF Spaces: https://huggingface.co/spaces/mrfakename/MeloTTS
3
  import gradio as gr
4
  import os, torch, io
5
+ import spaces
6
  os.system('python -m unidic download')
7
  # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
8
  from melo.api import TTS
 
11
  import nltk
12
  nltk.download('averaged_perceptron_tagger_eng')
13
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
14
+
15
+ @spaces.GPU
16
+ def load_models():
17
+ return {
18
+ 'EN': TTS(language='EN', device=device),
19
+ 'ES': TTS(language='ES', device=device),
20
+ 'FR': TTS(language='FR', device=device),
21
+ 'ZH': TTS(language='ZH', device=device),
22
+ 'JP': TTS(language='JP', device=device),
23
+ 'KR': TTS(language='KR', device=device),
24
+ }
25
+
26
+ models = load_models()
27
  speaker_ids = models['EN'].hps.data.spk2id
28
 
29
  default_text_dict = {
 
35
  'KR': '์ตœ๊ทผ ํ…์ŠคํŠธ ์Œ์„ฑ ๋ณ€ํ™˜ ๋ถ„์•ผ๊ฐ€ ๊ธ‰์†๋„๋กœ ๋ฐœ์ „ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.',
36
  }
37
 
38
+ @spaces.GPU
39
  def synthesize(text, speaker, speed, language, progress=gr.Progress()):
40
  bio = io.BytesIO()
41
  models[language].tts_to_file(text, models[language].hps.data.spk2id[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav')
requirements.txt CHANGED
@@ -24,6 +24,7 @@ pypinyin==0.50.0
24
  cn2an==0.5.22
25
  jieba==0.42.1
26
  gradio
 
27
  langid==1.1.6
28
  tqdm
29
  tensorboard==2.16.2
 
24
  cn2an==0.5.22
25
  jieba==0.42.1
26
  gradio
27
+ spaces
28
  langid==1.1.6
29
  tqdm
30
  tensorboard==2.16.2