Spaces:
Running
on
Zero
Running
on
Zero
Commit
Β·
694aa5b
1
Parent(s):
59f0768
decapitalizing
Browse files
app.py
CHANGED
|
@@ -339,86 +339,87 @@ def calculate_optimal_bars(bpm):
|
|
| 339 |
|
| 340 |
# ========== GRADIO INTERFACE ==========
|
| 341 |
|
| 342 |
-
with gr.Blocks(title="
|
| 343 |
-
gr.Markdown("#
|
| 344 |
-
gr.Markdown("**
|
| 345 |
|
| 346 |
# ========== MODELS & PROJECT INFO ==========
|
| 347 |
-
with gr.Accordion("
|
| 348 |
|
| 349 |
with gr.Accordion("π stable-audio-open-small", open=False):
|
| 350 |
gr.Markdown("""
|
| 351 |
**stable-audio-open-small** is an incredibly fast model from the zachs and friends at Stability AI. It's capable of generating 12 seconds of audio in under a second, which gives rise to a lot of very interesting kinds of UX.
|
| 352 |
|
| 353 |
-
**
|
| 354 |
|
| 355 |
-
**
|
| 356 |
-
- π€ [
|
| 357 |
-
|
|
|
|
| 358 |
""")
|
| 359 |
|
| 360 |
-
with gr.Accordion("ποΈ
|
| 361 |
gr.Markdown("""
|
| 362 |
-
**MelodyFlow** is a model by
|
| 363 |
|
| 364 |
-
It's not officially a part of the audiocraft repo yet, but we use it as a docker container in the backend for gary4live.
|
| 365 |
|
| 366 |
-
**
|
| 367 |
-
- π€ [MelodyFlow Space](https://huggingface.co/spaces/Facebook/MelodyFlow)
|
| 368 |
-
|
|
|
|
| 369 |
""")
|
| 370 |
|
| 371 |
-
with gr.Accordion("
|
| 372 |
gr.Markdown("""
|
| 373 |
-
**gary4live** is a free/open source project that uses these models, along with
|
| 374 |
|
| 375 |
-
**
|
| 376 |
-
-
|
| 377 |
-
-
|
| 378 |
|
| 379 |
-
**
|
| 380 |
-
-
|
| 381 |
""")
|
| 382 |
|
| 383 |
-
with gr.Accordion("
|
| 384 |
gr.Markdown("""
|
| 385 |
-
**
|
| 386 |
-
1. **
|
| 387 |
-
2. **
|
| 388 |
-
3. **
|
| 389 |
-
4. **
|
| 390 |
-
5. **
|
| 391 |
-
|
| 392 |
-
**
|
| 393 |
-
-
|
| 394 |
-
-
|
| 395 |
-
-
|
| 396 |
-
- MelodyFlow integration for advanced style transfer
|
| 397 |
""")
|
| 398 |
|
| 399 |
# ========== GLOBAL CONTROLS ==========
|
| 400 |
-
gr.Markdown("## ποΈ
|
| 401 |
|
| 402 |
with gr.Row():
|
| 403 |
global_bpm = gr.Dropdown(
|
| 404 |
-
label="
|
| 405 |
choices=[90, 100, 110, 120, 130, 140, 150],
|
| 406 |
value=120,
|
| 407 |
-
info="
|
| 408 |
)
|
| 409 |
|
| 410 |
global_bars = gr.Dropdown(
|
| 411 |
-
label="
|
| 412 |
-
choices=[1, 2, 4
|
| 413 |
value=4,
|
| 414 |
-
info="
|
| 415 |
)
|
| 416 |
|
| 417 |
base_prompt = gr.Textbox(
|
| 418 |
-
label="
|
| 419 |
-
value="
|
| 420 |
-
placeholder="e.g., 'techno', '
|
| 421 |
-
info="
|
| 422 |
)
|
| 423 |
|
| 424 |
# Auto-suggest optimal bars based on BPM
|
|
@@ -429,64 +430,64 @@ with gr.Blocks(title="π΅ Stable Audio Loop Generator") as iface:
|
|
| 429 |
global_bpm.change(update_suggested_bars, inputs=[global_bpm], outputs=[global_bars])
|
| 430 |
|
| 431 |
# ========== LOOP GENERATION ==========
|
| 432 |
-
gr.Markdown("##
|
| 433 |
|
| 434 |
with gr.Row():
|
| 435 |
with gr.Column():
|
| 436 |
-
gr.Markdown("###
|
| 437 |
-
generate_drums_btn = gr.Button("
|
| 438 |
-
drums_audio = gr.Audio(label="
|
| 439 |
-
drums_status = gr.Textbox(label="
|
| 440 |
|
| 441 |
with gr.Column():
|
| 442 |
-
gr.Markdown("###
|
| 443 |
-
generate_instruments_btn = gr.Button("
|
| 444 |
-
instruments_audio = gr.Audio(label="
|
| 445 |
-
instruments_status = gr.Textbox(label="
|
| 446 |
|
| 447 |
# Seed controls
|
| 448 |
with gr.Row():
|
| 449 |
-
drums_seed = gr.Number(label="
|
| 450 |
-
instruments_seed = gr.Number(label="
|
| 451 |
|
| 452 |
# ========== COMBINATION ==========
|
| 453 |
-
gr.Markdown("##
|
| 454 |
|
| 455 |
with gr.Row():
|
| 456 |
num_repeats = gr.Slider(
|
| 457 |
-
label="
|
| 458 |
minimum=1,
|
| 459 |
maximum=5,
|
| 460 |
step=1,
|
| 461 |
value=2,
|
| 462 |
-
info="
|
| 463 |
)
|
| 464 |
-
combine_btn = gr.Button("
|
| 465 |
|
| 466 |
-
combined_audio = gr.Audio(label="
|
| 467 |
-
combine_status = gr.Textbox(label="
|
| 468 |
|
| 469 |
# ========== MELODYFLOW TRANSFORMATION ==========
|
| 470 |
-
gr.Markdown("##
|
| 471 |
|
| 472 |
with gr.Row():
|
| 473 |
with gr.Column():
|
| 474 |
transform_prompt = gr.Textbox(
|
| 475 |
-
label="
|
| 476 |
value="aggressive industrial techno with distorted sounds",
|
| 477 |
-
placeholder="
|
| 478 |
lines=2
|
| 479 |
)
|
| 480 |
|
| 481 |
with gr.Column():
|
| 482 |
transform_solver = gr.Dropdown(
|
| 483 |
-
label="
|
| 484 |
choices=["euler", "midpoint"],
|
| 485 |
value="euler",
|
| 486 |
info="EULER: faster (25 steps), MIDPOINT: slower (64 steps)"
|
| 487 |
)
|
| 488 |
transform_flowstep = gr.Slider(
|
| 489 |
-
label="
|
| 490 |
minimum=0.0,
|
| 491 |
maximum=0.15,
|
| 492 |
step=0.01,
|
|
@@ -494,9 +495,9 @@ with gr.Blocks(title="π΅ Stable Audio Loop Generator") as iface:
|
|
| 494 |
info="Lower = more dramatic transformation"
|
| 495 |
)
|
| 496 |
|
| 497 |
-
transform_btn = gr.Button("
|
| 498 |
-
transformed_audio = gr.Audio(label="
|
| 499 |
-
transform_status = gr.Textbox(label="
|
| 500 |
|
| 501 |
# ========== EVENT HANDLERS ==========
|
| 502 |
|
|
@@ -528,19 +529,19 @@ with gr.Blocks(title="π΅ Stable Audio Loop Generator") as iface:
|
|
| 528 |
outputs=[transformed_audio, transform_status]
|
| 529 |
)
|
| 530 |
|
| 531 |
-
# ========== EXAMPLES ==========
|
| 532 |
-
gr.Markdown("## π― Example Workflows")
|
| 533 |
|
| 534 |
-
examples = gr.Examples(
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
)
|
| 544 |
|
| 545 |
if __name__ == "__main__":
|
| 546 |
iface.launch()
|
|
|
|
| 339 |
|
| 340 |
# ========== GRADIO INTERFACE ==========
|
| 341 |
|
| 342 |
+
with gr.Blocks(title="stable-melodyflow") as iface:
|
| 343 |
+
gr.Markdown("# stable-melodyflow (aka jerry and terry)")
|
| 344 |
+
gr.Markdown("**generate synchronized drum and instrument loops with stable-audio-open-small (jerry), then transform with melodyflow (terry)!**")
|
| 345 |
|
| 346 |
# ========== MODELS & PROJECT INFO ==========
|
| 347 |
+
with gr.Accordion(" some info about these models", open=False):
|
| 348 |
|
| 349 |
with gr.Accordion("π stable-audio-open-small", open=False):
|
| 350 |
gr.Markdown("""
|
| 351 |
**stable-audio-open-small** is an incredibly fast model from the zachs and friends at Stability AI. It's capable of generating 12 seconds of audio in under a second, which gives rise to a lot of very interesting kinds of UX.
|
| 352 |
|
| 353 |
+
**note about generation speed in this zerogpu space:** you'll notice generation times are a little slower here than if you were to use the model on a local gpu. that's just a result of the way zerogpu spaces work i think... let me know if there's a way to keep the model loaded in a zerogpu space!
|
| 354 |
|
| 355 |
+
**links:**
|
| 356 |
+
- π€ [model on HuggingFace](https://huggingface.co/stabilityai/stable-audio-open-small)
|
| 357 |
+
there's a docker container at this repo that can be spun up as a standalone api specifically for stable-audio-open-small:
|
| 358 |
+
- [stable-audio-api](https://github.com/betweentwomidnights/stable-audio-api)
|
| 359 |
""")
|
| 360 |
|
| 361 |
+
with gr.Accordion("ποΈ melodyflow", open=False):
|
| 362 |
gr.Markdown("""
|
| 363 |
+
**MelodyFlow** is a model by meta that can use regularized latent inversion to do transformations of input audio.
|
| 364 |
|
| 365 |
+
It's not officially a part of the audiocraft repo yet, but we use it as a docker container in the backend for gary4live. i really enjoy turning my guitar riffs into orchestra.
|
| 366 |
|
| 367 |
+
**links:**
|
| 368 |
+
- π€ [Official MelodyFlow Space](https://huggingface.co/spaces/Facebook/MelodyFlow)
|
| 369 |
+
|
| 370 |
+
- [our melodyflow api](https://github.com/betweentwomidnights/melodyflow)
|
| 371 |
""")
|
| 372 |
|
| 373 |
+
with gr.Accordion("gary4live Project", open=False):
|
| 374 |
gr.Markdown("""
|
| 375 |
+
**gary4live** is a free/open source project that uses these models, along with musicGen, inside of ableton live to iterate on your projects with you. i run a backend myself so that we can all experiment with it, but you can also spin the backend up locally using docker-compose with our repo.
|
| 376 |
|
| 377 |
+
**project Links:**
|
| 378 |
+
- [frontend repo](https://github.com/betweentwomidnights/gary4live)
|
| 379 |
+
- [backend repo](https://github.com/betweentwomidnights/gary-backend-combined)
|
| 380 |
|
| 381 |
+
**installers:**
|
| 382 |
+
- [p.c. & mac installers on gumroad](https://thepatch.gumroad.com/l/gary4live)
|
| 383 |
""")
|
| 384 |
|
| 385 |
+
with gr.Accordion("how this works", open=False):
|
| 386 |
gr.Markdown("""
|
| 387 |
+
**workflow:**
|
| 388 |
+
1. **set global bpm and bars** - affects both drum and instrument generation
|
| 389 |
+
2. **generate drum loop** - creates BPM-aware percussion with negative prompting to attempt to get rid of instruments
|
| 390 |
+
3. **generate instrument loop** - creates melodic/harmonic content with negative prompting to attempt to get rid of drums
|
| 391 |
+
4. **combine loops** - layer them together with repetitions (up to 30s)
|
| 392 |
+
5. **transform** - use melodyflow to stylistically transform the combined result
|
| 393 |
+
|
| 394 |
+
**features:**
|
| 395 |
+
- bpm-aware generation ensures perfect sync between loops (most the time lol)
|
| 396 |
+
- negative prompting separates drums from instruments (most the time)
|
| 397 |
+
- smart bar calculation optimizes loop length for the BPM
|
|
|
|
| 398 |
""")
|
| 399 |
|
| 400 |
# ========== GLOBAL CONTROLS ==========
|
| 401 |
+
gr.Markdown("## ποΈ global settings")
|
| 402 |
|
| 403 |
with gr.Row():
|
| 404 |
global_bpm = gr.Dropdown(
|
| 405 |
+
label="global bpm",
|
| 406 |
choices=[90, 100, 110, 120, 130, 140, 150],
|
| 407 |
value=120,
|
| 408 |
+
info="bpm applied to both drum and instrument generation. keep this the same for the combine step to work correctly"
|
| 409 |
)
|
| 410 |
|
| 411 |
global_bars = gr.Dropdown(
|
| 412 |
+
label="loop length (bars)",
|
| 413 |
+
choices=[1, 2, 4],
|
| 414 |
value=4,
|
| 415 |
+
info="number of bars for each loop. keep this the same for both pieces of audio"
|
| 416 |
)
|
| 417 |
|
| 418 |
base_prompt = gr.Textbox(
|
| 419 |
+
label="base prompt",
|
| 420 |
+
value="lofi hiphop with pianos",
|
| 421 |
+
placeholder="e.g., 'aggressive techno', 'lofi hiphop', 'chillwave', 'liquid drum and bass'",
|
| 422 |
+
info="prompt applied to either loop. make it more drum/instrument specific for best results"
|
| 423 |
)
|
| 424 |
|
| 425 |
# Auto-suggest optimal bars based on BPM
|
|
|
|
| 430 |
global_bpm.change(update_suggested_bars, inputs=[global_bpm], outputs=[global_bars])
|
| 431 |
|
| 432 |
# ========== LOOP GENERATION ==========
|
| 433 |
+
gr.Markdown("## step one: generate individual loops")
|
| 434 |
|
| 435 |
with gr.Row():
|
| 436 |
with gr.Column():
|
| 437 |
+
gr.Markdown("### drums")
|
| 438 |
+
generate_drums_btn = gr.Button("generate drums", variant="primary", size="lg")
|
| 439 |
+
drums_audio = gr.Audio(label="drum loop", type="filepath")
|
| 440 |
+
drums_status = gr.Textbox(label="status", value="ready to generate")
|
| 441 |
|
| 442 |
with gr.Column():
|
| 443 |
+
gr.Markdown("### instruments")
|
| 444 |
+
generate_instruments_btn = gr.Button("generate instruments", variant="secondary", size="lg")
|
| 445 |
+
instruments_audio = gr.Audio(label="instrument loop", type="filepath")
|
| 446 |
+
instruments_status = gr.Textbox(label="status", value="Ready to generate")
|
| 447 |
|
| 448 |
# Seed controls
|
| 449 |
with gr.Row():
|
| 450 |
+
drums_seed = gr.Number(label="drums seed", value=-1, info="-1 for random")
|
| 451 |
+
instruments_seed = gr.Number(label="instruments seed", value=-1, info="-1 for random")
|
| 452 |
|
| 453 |
# ========== COMBINATION ==========
|
| 454 |
+
gr.Markdown("## step two: combine loops")
|
| 455 |
|
| 456 |
with gr.Row():
|
| 457 |
num_repeats = gr.Slider(
|
| 458 |
+
label="number of repetitions",
|
| 459 |
minimum=1,
|
| 460 |
maximum=5,
|
| 461 |
step=1,
|
| 462 |
value=2,
|
| 463 |
+
info="how many times to repeat each loop (creates longer audio). aim for 30 seconds max"
|
| 464 |
)
|
| 465 |
+
combine_btn = gr.Button("combine", variant="primary", size="lg")
|
| 466 |
|
| 467 |
+
combined_audio = gr.Audio(label="combined loops", type="filepath")
|
| 468 |
+
combine_status = gr.Textbox(label="status", value="Generate loops first")
|
| 469 |
|
| 470 |
# ========== MELODYFLOW TRANSFORMATION ==========
|
| 471 |
+
gr.Markdown("## step three: transform with melodyflow")
|
| 472 |
|
| 473 |
with gr.Row():
|
| 474 |
with gr.Column():
|
| 475 |
transform_prompt = gr.Textbox(
|
| 476 |
+
label="transformation prompt",
|
| 477 |
value="aggressive industrial techno with distorted sounds",
|
| 478 |
+
placeholder="describe the style of transformation",
|
| 479 |
lines=2
|
| 480 |
)
|
| 481 |
|
| 482 |
with gr.Column():
|
| 483 |
transform_solver = gr.Dropdown(
|
| 484 |
+
label="solver",
|
| 485 |
choices=["euler", "midpoint"],
|
| 486 |
value="euler",
|
| 487 |
info="EULER: faster (25 steps), MIDPOINT: slower (64 steps)"
|
| 488 |
)
|
| 489 |
transform_flowstep = gr.Slider(
|
| 490 |
+
label="transform intensity",
|
| 491 |
minimum=0.0,
|
| 492 |
maximum=0.15,
|
| 493 |
step=0.01,
|
|
|
|
| 495 |
info="Lower = more dramatic transformation"
|
| 496 |
)
|
| 497 |
|
| 498 |
+
transform_btn = gr.Button("transform audio", variant="secondary", size="lg")
|
| 499 |
+
transformed_audio = gr.Audio(label="transformed audio", type="filepath")
|
| 500 |
+
transform_status = gr.Textbox(label="status", value="Combine audio first")
|
| 501 |
|
| 502 |
# ========== EVENT HANDLERS ==========
|
| 503 |
|
|
|
|
| 529 |
outputs=[transformed_audio, transform_status]
|
| 530 |
)
|
| 531 |
|
| 532 |
+
# # ========== EXAMPLES ==========
|
| 533 |
+
# gr.Markdown("## π― Example Workflows")
|
| 534 |
|
| 535 |
+
# examples = gr.Examples(
|
| 536 |
+
# examples=[
|
| 537 |
+
# ["techno", 128, 4, "aggressive industrial techno"],
|
| 538 |
+
# ["jazz", 110, 2, "smooth lo-fi jazz with vinyl crackle"],
|
| 539 |
+
# ["ambient", 90, 8, "ethereal ambient soundscape"],
|
| 540 |
+
# ["hip-hop", 100, 4, "classic boom bap hip-hop"],
|
| 541 |
+
# ["drum and bass", 140, 4, "liquid drum and bass"],
|
| 542 |
+
# ],
|
| 543 |
+
# inputs=[base_prompt, global_bpm, global_bars, transform_prompt],
|
| 544 |
+
# )
|
| 545 |
|
| 546 |
if __name__ == "__main__":
|
| 547 |
iface.launch()
|