dur
Browse files
app.py
CHANGED
|
@@ -9,13 +9,15 @@ audiogen = AudioGen().eval().to('cpu')
|
|
| 9 |
|
| 10 |
|
| 11 |
def audionar_tts(text='frogs',
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
|
| 15 |
if text and text.strip():
|
| 16 |
|
| 17 |
|
| 18 |
-
dur_seconds = max(
|
| 19 |
# Sink Attn
|
| 20 |
background_audio = audiogen.generate(
|
| 21 |
text[:64], # soundscape text - discard if too long cross attention T5
|
|
@@ -38,20 +40,24 @@ def audionar_tts(text='frogs',
|
|
| 38 |
|
| 39 |
with gr.Blocks() as demo:
|
| 40 |
with gr.Row():
|
| 41 |
-
|
| 42 |
label="AudioGen Txt:",
|
| 43 |
placeholder="Describe sound - Type Any language",
|
| 44 |
lines=2,
|
| 45 |
value='dogs barg',
|
| 46 |
)
|
| 47 |
-
|
| 48 |
-
label="
|
| 49 |
-
value=
|
| 50 |
)
|
| 51 |
n_tokens = gr.Number(
|
| 52 |
label="Tokens",
|
| 53 |
value=24,
|
| 54 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
generate_button = gr.Button("Generate Audio",
|
| 56 |
variant="primary")
|
| 57 |
|
|
@@ -59,7 +65,7 @@ with gr.Blocks() as demo:
|
|
| 59 |
|
| 60 |
generate_button.click(
|
| 61 |
fn=audionar_tts,
|
| 62 |
-
inputs=[text, n_tokens, cache_lim],
|
| 63 |
outputs=[output_audio]
|
| 64 |
)
|
| 65 |
demo.launch(debug=True)
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def audionar_tts(text='frogs',
|
| 12 |
+
duration=20.4, # seconds
|
| 13 |
+
max_tokens=24, # True A/R steps (repeats the rest of duration)
|
| 14 |
+
cache_lim=-1
|
| 15 |
+
):
|
| 16 |
|
| 17 |
if text and text.strip():
|
| 18 |
|
| 19 |
|
| 20 |
+
dur_seconds = max(duration + 0.74, 2.0)
|
| 21 |
# Sink Attn
|
| 22 |
background_audio = audiogen.generate(
|
| 23 |
text[:64], # soundscape text - discard if too long cross attention T5
|
|
|
|
| 40 |
|
| 41 |
with gr.Blocks() as demo:
|
| 42 |
with gr.Row():
|
| 43 |
+
text = gr.Textbox(
|
| 44 |
label="AudioGen Txt:",
|
| 45 |
placeholder="Describe sound - Type Any language",
|
| 46 |
lines=2,
|
| 47 |
value='dogs barg',
|
| 48 |
)
|
| 49 |
+
duration = gr.Number(
|
| 50 |
+
label="Duration (s)",
|
| 51 |
+
value=7.1,
|
| 52 |
)
|
| 53 |
n_tokens = gr.Number(
|
| 54 |
label="Tokens",
|
| 55 |
value=24,
|
| 56 |
)
|
| 57 |
+
cache_lim = gr.Number(
|
| 58 |
+
label="kv Cache Flush:",
|
| 59 |
+
value=71,
|
| 60 |
+
)
|
| 61 |
generate_button = gr.Button("Generate Audio",
|
| 62 |
variant="primary")
|
| 63 |
|
|
|
|
| 65 |
|
| 66 |
generate_button.click(
|
| 67 |
fn=audionar_tts,
|
| 68 |
+
inputs=[text, duration, n_tokens, cache_lim],
|
| 69 |
outputs=[output_audio]
|
| 70 |
)
|
| 71 |
demo.launch(debug=True)
|