Spaces:

dkounadis
/

audiogen2

Sleeping

Dionyssos commited on Sep 27, 2025

Commit

f653575

1 Parent(s): 7aa86f1

dur

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,13 +9,15 @@ audiogen = AudioGen().eval().to('cpu')
 def audionar_tts(text='frogs',
-                 max_tokens=24,
-                 cache_lim=-1):
     if text and text.strip():
-        dur_seconds = max(max_tokens * 320 / 16000 + 0.74, 2.0)
         # Sink Attn
         background_audio = audiogen.generate(
             text[:64],  # soundscape text - discard if too long cross attention T5
@@ -38,20 +40,24 @@ def audionar_tts(text='frogs',
 with gr.Blocks() as demo:
     with gr.Row():
-        text_input = gr.Textbox(
             label="AudioGen Txt:",
             placeholder="Describe sound - Type Any language",
             lines=2,
             value='dogs barg',
         )
-        cache_lim = gr.Number(
-            label="kv Cache Flush:",
-            value=71,
         )
         n_tokens = gr.Number(
             label="Tokens",
             value=24,
         )
         generate_button = gr.Button("Generate Audio",
                                     variant="primary")
@@ -59,7 +65,7 @@ with gr.Blocks() as demo:
     generate_button.click(
         fn=audionar_tts,
-        inputs=[text, n_tokens, cache_lim],
         outputs=[output_audio]
     )
 demo.launch(debug=True)

 def audionar_tts(text='frogs',
+                 duration=20.4,  # seconds
+                 max_tokens=24,  # True A/R steps (repeats the rest of duration)
+                 cache_lim=-1
+                 ):
     if text and text.strip():
+        dur_seconds = max(duration + 0.74, 2.0)
         # Sink Attn
         background_audio = audiogen.generate(
             text[:64],  # soundscape text - discard if too long cross attention T5
 with gr.Blocks() as demo:
     with gr.Row():
+        text = gr.Textbox(
             label="AudioGen Txt:",
             placeholder="Describe sound - Type Any language",
             lines=2,
             value='dogs barg',
         )
+        duration = gr.Number(
+            label="Duration (s)",
+            value=7.1,
         )
         n_tokens = gr.Number(
             label="Tokens",
             value=24,
         )
+        cache_lim = gr.Number(
+            label="kv Cache Flush:",
+            value=71,
+        )
         generate_button = gr.Button("Generate Audio",
                                     variant="primary")
     generate_button.click(
         fn=audionar_tts,
+        inputs=[text, duration, n_tokens, cache_lim],
         outputs=[output_audio]
     )
 demo.launch(debug=True)