Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -22,9 +22,16 @@ pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
|
|
| 22 |
def forward_gpu(ps, ref_s, speed):
|
| 23 |
return models[True](ps, ref_s, speed)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
| 26 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
| 27 |
pipeline = pipelines[voice[0]]
|
|
|
|
| 28 |
pack = pipeline.load_voice(voice)
|
| 29 |
use_gpu = use_gpu and CUDA_AVAILABLE
|
| 30 |
for _, ps, _ in pipeline(text, voice, speed):
|
|
@@ -44,7 +51,6 @@ def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
|
| 44 |
return (24000, audio.numpy()), ps
|
| 45 |
return None, ''
|
| 46 |
|
| 47 |
-
# Arena API
|
| 48 |
def predict(text, voice='af_heart', speed=1):
|
| 49 |
return generate_first(text, voice, speed, use_gpu=False)[0]
|
| 50 |
|
|
@@ -57,6 +63,7 @@ def tokenize_first(text, voice='af_heart'):
|
|
| 57 |
def generate_all(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
| 58 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
| 59 |
pipeline = pipelines[voice[0]]
|
|
|
|
| 60 |
pack = pipeline.load_voice(voice)
|
| 61 |
use_gpu = use_gpu and CUDA_AVAILABLE
|
| 62 |
first = True
|
|
@@ -123,13 +130,11 @@ CHOICES = {
|
|
| 123 |
'🇬🇧 🚹 Lewis': 'bm_lewis',
|
| 124 |
'🇬🇧 🚹 Daniel': 'bm_daniel',
|
| 125 |
}
|
| 126 |
-
for v in CHOICES.values():
|
| 127 |
-
pipelines[v[0]].load_voice(v)
|
| 128 |
|
| 129 |
TOKEN_NOTE = '''
|
| 130 |
💡 Customize pronunciation with Markdown link syntax and /slashes/ like `[Kokoro](/kˈOkəɹO/)`
|
| 131 |
|
| 132 |
-
💬 To adjust intonation, try punctuation `;:,.!?—…"()
|
| 133 |
|
| 134 |
⬇️ Lower stress `[1 level](-1)` or `[2 levels](-2)`
|
| 135 |
|
|
|
|
| 22 |
def forward_gpu(ps, ref_s, speed):
|
| 23 |
return models[True](ps, ref_s, speed)
|
| 24 |
|
| 25 |
+
_loaded_voices = set()
|
| 26 |
+
def _ensure_voice(voice):
|
| 27 |
+
if voice not in _loaded_voices:
|
| 28 |
+
pipelines[voice[0]].load_voice(voice)
|
| 29 |
+
_loaded_voices.add(voice)
|
| 30 |
+
|
| 31 |
def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
| 32 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
| 33 |
pipeline = pipelines[voice[0]]
|
| 34 |
+
_ensure_voice(voice)
|
| 35 |
pack = pipeline.load_voice(voice)
|
| 36 |
use_gpu = use_gpu and CUDA_AVAILABLE
|
| 37 |
for _, ps, _ in pipeline(text, voice, speed):
|
|
|
|
| 51 |
return (24000, audio.numpy()), ps
|
| 52 |
return None, ''
|
| 53 |
|
|
|
|
| 54 |
def predict(text, voice='af_heart', speed=1):
|
| 55 |
return generate_first(text, voice, speed, use_gpu=False)[0]
|
| 56 |
|
|
|
|
| 63 |
def generate_all(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
| 64 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
| 65 |
pipeline = pipelines[voice[0]]
|
| 66 |
+
_ensure_voice(voice)
|
| 67 |
pack = pipeline.load_voice(voice)
|
| 68 |
use_gpu = use_gpu and CUDA_AVAILABLE
|
| 69 |
first = True
|
|
|
|
| 130 |
'🇬🇧 🚹 Lewis': 'bm_lewis',
|
| 131 |
'🇬🇧 🚹 Daniel': 'bm_daniel',
|
| 132 |
}
|
|
|
|
|
|
|
| 133 |
|
| 134 |
TOKEN_NOTE = '''
|
| 135 |
💡 Customize pronunciation with Markdown link syntax and /slashes/ like `[Kokoro](/kˈOkəɹO/)`
|
| 136 |
|
| 137 |
+
💬 To adjust intonation, try punctuation `;:,.!?—…"()""` or stress `ˈ` and `ˌ`
|
| 138 |
|
| 139 |
⬇️ Lower stress `[1 level](-1)` or `[2 levels](-2)`
|
| 140 |
|