Spaces:

ak0601
/

Text_2_audio

Runtime error

App Files Files Community

ak0601 commited on Apr 2, 2024

Commit

e605e02

verified ·

1 Parent(s): ad7c71c

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -9

app.py CHANGED Viewed

@@ -140,7 +140,7 @@ css = """
 iface = gr.Blocks(css=css)
 with iface:
-    gr.HTML(
         """
             <div style="text-align: center; max-width: 700px; margin: 0 auto;">
               <div
@@ -149,14 +149,89 @@ with iface:
                 "
               >
                 <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
-                  AudioLDM: Text-to-Audio Generation with Latent Diffusion Models
                 </h1>
-              </div> <p style="margin-bottom: 10px; font-size: 94%">
-                <a href="https://arxiv.org/abs/2301.12503">[Paper]</a> <a href="https://audioldm.github.io/">[Project
-                page]</a> <a href="https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm">[🧨
-                Diffusers]</a>
-              </p>
-            </div>
         """
     )
-iface.queue(max_size=10).launch(debug=True)

 iface = gr.Blocks(css=css)
 with iface:
+        gr.HTML(
         """
             <div style="text-align: center; max-width: 700px; margin: 0 auto;">
               <div
                 "
               >
                 <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
+                  AudioLDM: Text-to-Audio Generation Diffusion Models
                 </h1>
         """
     )
+    with gr.Group():
+        with gr.Box():
+            textbox = gr.Textbox(
+                value="A hammer is hitting a wooden surface",
+                max_lines=1,
+                label="Input text",
+                info="Your text is important for the audio quality. Please ensure it is descriptive by using more adjectives.",
+                elem_id="prompt-in",
+            )
+            negative_textbox = gr.Textbox(
+                value="low quality, average quality",
+                max_lines=1,
+                label="Negative prompt",
+                info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
+                elem_id="prompt-in",
+            )
+            with gr.Accordion("Click to modify detailed configurations", open=False):
+                seed = gr.Number(
+                    value=45,
+                    label="Seed",
+                    info="Change this value (any integer number) will lead to a different generation result.",
+                )
+                duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
+                guidance_scale = gr.Slider(
+                    0,
+                    5,
+                    value=3.5,
+                    step=0.5,
+                    label="Guidance scale",
+                    info="Large => better quality and relevancy to text; Small => better diversity",
+                )
+                n_candidates = gr.Slider(
+                    1,
+                    3,
+                    value=3,
+                    step=1,
+                    label="Number waveforms to generate",
+                    info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
+                )
+            outputs = gr.Video(label="Output", elem_id="output-video")
+            btn = gr.Button("Submit").style(full_width=True)
+        with gr.Group(elem_id="share-btn-container", visible=False):
+            community_icon = gr.HTML(community_icon_html)
+            loading_icon = gr.HTML(loading_icon_html)
+            share_button = gr.Button("Share to community", elem_id="share-btn")
+        btn.click(
+            text2audio,
+            inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
+            outputs=[outputs],
+        )
+        share_button.click(None, [], [], _js=share_js)
+        gr.HTML(
+        gr.Examples(
+            [
+                ["A hammer is hitting a wooden surface", "low quality, average quality", 5, 2.5, 45, 3],
+                ["Peaceful and calming ambient music with singing bowl and other instruments.", "low quality, average quality", 5, 2.5, 45, 3],
+                ["A man is speaking in a small room.", "low quality, average quality", 5, 2.5, 45, 3],
+                ["A female is speaking followed by footstep sound", "low quality, average quality", 5, 2.5, 45, 3],
+                ["Wooden table tapping sound followed by water pouring sound.", "low quality, average quality", 5, 2.5, 45, 3],
+            ],
+            fn=text2audio,
+            inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
+            outputs=[outputs],
+            cache_examples=True,
+        )
+        gr.HTML(
+            """
+                <div class="acknowledgements"> <p>Essential Tricks for Enhancing the Quality of Your Generated
+                Audio</p> <p>1. Try to use more adjectives to describe your sound. For example: "A man is speaking
+                clearly and slowly in a large room" is better than "A man is speaking". This can make sure AudioLDM
+                understands what you want.</p> <p>2. Try to use different random seeds, which can affect the generation
+                quality significantly sometimes.</p> <p>3. It's better to use general terms like 'man' or 'woman'
+                instead of specific names for individuals or abstract objects that humans may not be familiar with,
+                such as 'mummy'.</p> <p>4. Using a negative prompt to not guide the diffusion process can improve the
+                audio quality significantly. Try using negative prompts like 'low quality'.</p> </div>
+                """
+        )