Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
import gradio as gr
|
| 3 |
import spaces
|
| 4 |
import torch
|
| 5 |
-
|
| 6 |
from kokoro import KModel, KPipeline
|
| 7 |
|
| 8 |
# Pre-Initialize
|
|
@@ -15,7 +15,6 @@ torch.set_num_threads(4)
|
|
| 15 |
|
| 16 |
# Variables
|
| 17 |
CHAR_LIMIT = 2000
|
| 18 |
-
|
| 19 |
DEFAULT_INPUT = ""
|
| 20 |
DEFAULT_VOICE = "af_heart"
|
| 21 |
|
|
@@ -59,8 +58,13 @@ for v in CHOICES.values():
|
|
| 59 |
VOICE_PACKS[v] = PIPELINES[v[0]].load_voice(v)
|
| 60 |
|
| 61 |
model_instance = KModel().to(DEVICE).eval()
|
| 62 |
-
MODEL = torch.jit.script(model_instance)
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
css = '''
|
| 65 |
.gradio-container{max-width: 560px !important}
|
| 66 |
h1{text-align:center}
|
|
@@ -69,16 +73,26 @@ footer {
|
|
| 69 |
}
|
| 70 |
'''
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
# Functions
|
| 73 |
def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1):
|
| 74 |
text = text.strip()[:CHAR_LIMIT] + "."
|
| 75 |
pipeline = PIPELINES[voice[0]]
|
| 76 |
pack = VOICE_PACKS[voice]
|
| 77 |
-
|
| 78 |
for _, ps, _ in pipeline(text, voice, speed):
|
| 79 |
ref_s = pack[len(ps) - 1]
|
| 80 |
audio = MODEL(ps, ref_s, speed)
|
| 81 |
-
|
|
|
|
|
|
|
| 82 |
|
| 83 |
def cloud():
|
| 84 |
print("[CLOUD] | Space maintained.")
|
|
@@ -93,13 +107,10 @@ with gr.Blocks(css=css) as main:
|
|
| 93 |
input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input")
|
| 94 |
voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice")
|
| 95 |
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed")
|
| 96 |
-
|
| 97 |
submit = gr.Button("▶")
|
| 98 |
maintain = gr.Button("☁️")
|
| 99 |
-
|
| 100 |
with gr.Column():
|
| 101 |
output = gr.Audio(label="Output")
|
| 102 |
-
|
| 103 |
submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output)
|
| 104 |
maintain.click(cloud, inputs=[], outputs=[], queue=False)
|
| 105 |
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import spaces
|
| 4 |
import torch
|
| 5 |
+
import numpy as np
|
| 6 |
from kokoro import KModel, KPipeline
|
| 7 |
|
| 8 |
# Pre-Initialize
|
|
|
|
| 15 |
|
| 16 |
# Variables
|
| 17 |
CHAR_LIMIT = 2000
|
|
|
|
| 18 |
DEFAULT_INPUT = ""
|
| 19 |
DEFAULT_VOICE = "af_heart"
|
| 20 |
|
|
|
|
| 58 |
VOICE_PACKS[v] = PIPELINES[v[0]].load_voice(v)
|
| 59 |
|
| 60 |
model_instance = KModel().to(DEVICE).eval()
|
|
|
|
| 61 |
|
| 62 |
+
try:
|
| 63 |
+
MODEL = torch.jit.script(model_instance)
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print("torch.jit.script failed, using original model:", e)
|
| 66 |
+
MODEL = model_instance
|
| 67 |
+
|
| 68 |
css = '''
|
| 69 |
.gradio-container{max-width: 560px !important}
|
| 70 |
h1{text-align:center}
|
|
|
|
| 73 |
}
|
| 74 |
'''
|
| 75 |
|
| 76 |
+
def trim_silence(audio, threshold=0.001):
|
| 77 |
+
abs_audio = np.abs(audio)
|
| 78 |
+
indices = np.where(abs_audio > threshold)[0]
|
| 79 |
+
if len(indices) == 0:
|
| 80 |
+
return audio
|
| 81 |
+
start = indices[0]
|
| 82 |
+
end = indices[-1] + 1
|
| 83 |
+
return audio[start:end]
|
| 84 |
+
|
| 85 |
# Functions
|
| 86 |
def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1):
|
| 87 |
text = text.strip()[:CHAR_LIMIT] + "."
|
| 88 |
pipeline = PIPELINES[voice[0]]
|
| 89 |
pack = VOICE_PACKS[voice]
|
|
|
|
| 90 |
for _, ps, _ in pipeline(text, voice, speed):
|
| 91 |
ref_s = pack[len(ps) - 1]
|
| 92 |
audio = MODEL(ps, ref_s, speed)
|
| 93 |
+
audio_np = audio.numpy()
|
| 94 |
+
trimmed_audio = trim_silence(audio_np)
|
| 95 |
+
return (24000, trimmed_audio)
|
| 96 |
|
| 97 |
def cloud():
|
| 98 |
print("[CLOUD] | Space maintained.")
|
|
|
|
| 107 |
input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input")
|
| 108 |
voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice")
|
| 109 |
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed")
|
|
|
|
| 110 |
submit = gr.Button("▶")
|
| 111 |
maintain = gr.Button("☁️")
|
|
|
|
| 112 |
with gr.Column():
|
| 113 |
output = gr.Audio(label="Output")
|
|
|
|
| 114 |
submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output)
|
| 115 |
maintain.click(cloud, inputs=[], outputs=[], queue=False)
|
| 116 |
|