Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -153,85 +153,85 @@ with iface:
|
|
| 153 |
</h1>
|
| 154 |
"""
|
| 155 |
)
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
)
|
| 165 |
-
negative_textbox = gr.Textbox(
|
| 166 |
-
value="low quality, average quality",
|
| 167 |
-
max_lines=1,
|
| 168 |
-
label="Negative prompt",
|
| 169 |
-
info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
|
| 170 |
-
elem_id="prompt-in",
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
with gr.Accordion("Click to modify detailed configurations", open=False):
|
| 174 |
-
seed = gr.Number(
|
| 175 |
-
value=45,
|
| 176 |
-
label="Seed",
|
| 177 |
-
info="Change this value (any integer number) will lead to a different generation result.",
|
| 178 |
)
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
label="Guidance scale",
|
| 186 |
-
info="Large => better quality and relevancy to text; Small => better diversity",
|
| 187 |
)
|
| 188 |
-
n_candidates = gr.Slider(
|
| 189 |
-
1,
|
| 190 |
-
3,
|
| 191 |
-
value=3,
|
| 192 |
-
step=1,
|
| 193 |
-
label="Number waveforms to generate",
|
| 194 |
-
info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
|
| 195 |
-
)
|
| 196 |
-
|
| 197 |
-
outputs = gr.Video(label="Output", elem_id="output-video")
|
| 198 |
-
btn = gr.Button("Submit").style(full_width=True)
|
| 199 |
-
|
| 200 |
-
with gr.Group(elem_id="share-btn-container", visible=False):
|
| 201 |
-
community_icon = gr.HTML(community_icon_html)
|
| 202 |
-
loading_icon = gr.HTML(loading_icon_html)
|
| 203 |
-
share_button = gr.Button("Share to community", elem_id="share-btn")
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
"""
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
</h1>
|
| 154 |
"""
|
| 155 |
)
|
| 156 |
+
with gr.Group():
|
| 157 |
+
with gr.Box():
|
| 158 |
+
textbox = gr.Textbox(
|
| 159 |
+
value="A hammer is hitting a wooden surface",
|
| 160 |
+
max_lines=1,
|
| 161 |
+
label="Input text",
|
| 162 |
+
info="Your text is important for the audio quality. Please ensure it is descriptive by using more adjectives.",
|
| 163 |
+
elem_id="prompt-in",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
)
|
| 165 |
+
negative_textbox = gr.Textbox(
|
| 166 |
+
value="low quality, average quality",
|
| 167 |
+
max_lines=1,
|
| 168 |
+
label="Negative prompt",
|
| 169 |
+
info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
|
| 170 |
+
elem_id="prompt-in",
|
|
|
|
|
|
|
| 171 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
+
with gr.Accordion("Click to modify detailed configurations", open=False):
|
| 174 |
+
seed = gr.Number(
|
| 175 |
+
value=45,
|
| 176 |
+
label="Seed",
|
| 177 |
+
info="Change this value (any integer number) will lead to a different generation result.",
|
| 178 |
+
)
|
| 179 |
+
duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
|
| 180 |
+
guidance_scale = gr.Slider(
|
| 181 |
+
0,
|
| 182 |
+
5,
|
| 183 |
+
value=3.5,
|
| 184 |
+
step=0.5,
|
| 185 |
+
label="Guidance scale",
|
| 186 |
+
info="Large => better quality and relevancy to text; Small => better diversity",
|
| 187 |
+
)
|
| 188 |
+
n_candidates = gr.Slider(
|
| 189 |
+
1,
|
| 190 |
+
3,
|
| 191 |
+
value=3,
|
| 192 |
+
step=1,
|
| 193 |
+
label="Number waveforms to generate",
|
| 194 |
+
info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
outputs = gr.Video(label="Output", elem_id="output-video")
|
| 198 |
+
btn = gr.Button("Submit").style(full_width=True)
|
| 199 |
+
|
| 200 |
+
with gr.Group(elem_id="share-btn-container", visible=False):
|
| 201 |
+
community_icon = gr.HTML(community_icon_html)
|
| 202 |
+
loading_icon = gr.HTML(loading_icon_html)
|
| 203 |
+
share_button = gr.Button("Share to community", elem_id="share-btn")
|
| 204 |
+
|
| 205 |
+
btn.click(
|
| 206 |
+
text2audio,
|
| 207 |
+
inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
|
| 208 |
+
outputs=[outputs],
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
share_button.click(None, [], [], _js=share_js)
|
| 212 |
+
gr.HTML(
|
| 213 |
+
gr.Examples(
|
| 214 |
+
[
|
| 215 |
+
["A hammer is hitting a wooden surface", "low quality, average quality", 5, 2.5, 45, 3],
|
| 216 |
+
["Peaceful and calming ambient music with singing bowl and other instruments.", "low quality, average quality", 5, 2.5, 45, 3],
|
| 217 |
+
["A man is speaking in a small room.", "low quality, average quality", 5, 2.5, 45, 3],
|
| 218 |
+
["A female is speaking followed by footstep sound", "low quality, average quality", 5, 2.5, 45, 3],
|
| 219 |
+
["Wooden table tapping sound followed by water pouring sound.", "low quality, average quality", 5, 2.5, 45, 3],
|
| 220 |
+
],
|
| 221 |
+
fn=text2audio,
|
| 222 |
+
inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
|
| 223 |
+
outputs=[outputs],
|
| 224 |
+
cache_examples=True,
|
| 225 |
+
)
|
| 226 |
+
gr.HTML(
|
| 227 |
"""
|
| 228 |
+
<div class="acknowledgements"> <p>Essential Tricks for Enhancing the Quality of Your Generated
|
| 229 |
+
Audio</p> <p>1. Try to use more adjectives to describe your sound. For example: "A man is speaking
|
| 230 |
+
clearly and slowly in a large room" is better than "A man is speaking". This can make sure AudioLDM
|
| 231 |
+
understands what you want.</p> <p>2. Try to use different random seeds, which can affect the generation
|
| 232 |
+
quality significantly sometimes.</p> <p>3. It's better to use general terms like 'man' or 'woman'
|
| 233 |
+
instead of specific names for individuals or abstract objects that humans may not be familiar with,
|
| 234 |
+
such as 'mummy'.</p> <p>4. Using a negative prompt to not guide the diffusion process can improve the
|
| 235 |
+
audio quality significantly. Try using negative prompts like 'low quality'.</p> </div>
|
| 236 |
+
"""
|
| 237 |
+
)
|