Spaces:
Running
Running
compact grid UI, 4B default, show training model
Browse files
app.py
CHANGED
|
@@ -398,131 +398,48 @@ def build_ui():
|
|
| 398 |
caption_input = gr.Textbox(
|
| 399 |
label="Music Description",
|
| 400 |
placeholder="e.g. upbeat electronic dance music, 120 BPM",
|
| 401 |
-
lines=
|
| 402 |
value="upbeat electronic dance music, energetic synth leads, driving bassline",
|
| 403 |
)
|
| 404 |
lyrics_input = gr.Textbox(
|
| 405 |
-
label="Lyrics (
|
| 406 |
-
|
| 407 |
-
lines=3,
|
| 408 |
value="[Instrumental]",
|
| 409 |
)
|
| 410 |
-
instrumental_cb = gr.Checkbox(
|
| 411 |
-
label="Instrumental (no vocals)",
|
| 412 |
-
value=True,
|
| 413 |
-
)
|
| 414 |
with gr.Column(scale=1):
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
value=120,
|
| 418 |
-
minimum=0,
|
| 419 |
-
maximum=300,
|
| 420 |
-
)
|
| 421 |
-
duration_input = gr.Slider(
|
| 422 |
-
label="Duration (seconds)",
|
| 423 |
-
minimum=10,
|
| 424 |
-
maximum=120,
|
| 425 |
-
value=10,
|
| 426 |
-
step=5,
|
| 427 |
-
)
|
| 428 |
-
seed_input = gr.Number(
|
| 429 |
-
label="Seed (-1 = random)",
|
| 430 |
-
value=-1,
|
| 431 |
-
)
|
| 432 |
-
steps_input = gr.Slider(
|
| 433 |
-
label="Inference Steps (fewer = faster)",
|
| 434 |
-
minimum=1,
|
| 435 |
-
maximum=32,
|
| 436 |
-
value=8,
|
| 437 |
-
step=1,
|
| 438 |
-
)
|
| 439 |
-
lm_size_input = gr.Dropdown(
|
| 440 |
-
label="LM Model Size",
|
| 441 |
-
choices=["0.6B (fast)", "1.7B (balanced)", "4B (best quality)"],
|
| 442 |
-
value="1.7B (balanced)",
|
| 443 |
-
info="Language model for music understanding",
|
| 444 |
-
)
|
| 445 |
-
lora_select = gr.Dropdown(
|
| 446 |
-
label="Use Trained LoRA",
|
| 447 |
-
choices=get_trained_loras(),
|
| 448 |
-
value="None (no LoRA)",
|
| 449 |
-
info="Select a LoRA you trained to apply it",
|
| 450 |
-
)
|
| 451 |
-
|
| 452 |
-
generate_btn = gr.Button("Generate Music", variant="primary")
|
| 453 |
-
|
| 454 |
with gr.Row():
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
)
|
| 459 |
-
|
| 460 |
-
label="
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
)
|
| 464 |
|
| 465 |
generate_btn.click(
|
| 466 |
fn=generate_music,
|
| 467 |
-
inputs=[
|
| 468 |
-
caption_input,
|
| 469 |
-
lyrics_input,
|
| 470 |
-
instrumental_cb,
|
| 471 |
-
bpm_input,
|
| 472 |
-
duration_input,
|
| 473 |
-
seed_input,
|
| 474 |
-
steps_input,
|
| 475 |
-
lm_size_input,
|
| 476 |
-
lora_select,
|
| 477 |
-
],
|
| 478 |
outputs=[audio_output, gen_status],
|
| 479 |
)
|
| 480 |
|
| 481 |
# ---- Train LoRA Tab ----
|
| 482 |
with gr.Tab("Train LoRA"):
|
| 483 |
-
gr.Markdown(
|
| 484 |
-
"### Train a LoRA adapter on your audio files\n"
|
| 485 |
-
"Upload WAV/MP3/FLAC files to fine-tune the model. "
|
| 486 |
-
"Training runs on CPU so keep epochs low and files short."
|
| 487 |
-
)
|
| 488 |
with gr.Row():
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
value="my_lora",
|
| 498 |
-
)
|
| 499 |
-
with gr.Column():
|
| 500 |
-
epochs_input = gr.Slider(
|
| 501 |
-
label="Epochs",
|
| 502 |
-
minimum=1,
|
| 503 |
-
maximum=10,
|
| 504 |
-
value=1,
|
| 505 |
-
step=1,
|
| 506 |
-
)
|
| 507 |
-
lr_input = gr.Number(
|
| 508 |
-
label="Learning Rate",
|
| 509 |
-
value=1e-4,
|
| 510 |
-
)
|
| 511 |
-
rank_input = gr.Slider(
|
| 512 |
-
label="LoRA Rank",
|
| 513 |
-
minimum=1,
|
| 514 |
-
maximum=64,
|
| 515 |
-
value=8,
|
| 516 |
-
step=1,
|
| 517 |
-
)
|
| 518 |
-
|
| 519 |
train_btn = gr.Button("Start Training", variant="primary")
|
| 520 |
-
train_log = gr.Textbox(
|
| 521 |
-
label="Training Log",
|
| 522 |
-
interactive=False,
|
| 523 |
-
lines=15,
|
| 524 |
-
elem_classes="status-box",
|
| 525 |
-
)
|
| 526 |
|
| 527 |
def train_and_refresh(*args):
|
| 528 |
log = train_lora(*args)
|
|
@@ -531,13 +448,7 @@ def build_ui():
|
|
| 531 |
|
| 532 |
train_btn.click(
|
| 533 |
fn=train_and_refresh,
|
| 534 |
-
inputs=[
|
| 535 |
-
audio_upload,
|
| 536 |
-
lora_name_input,
|
| 537 |
-
epochs_input,
|
| 538 |
-
lr_input,
|
| 539 |
-
rank_input,
|
| 540 |
-
],
|
| 541 |
outputs=[train_log, lora_select],
|
| 542 |
)
|
| 543 |
|
|
|
|
| 398 |
caption_input = gr.Textbox(
|
| 399 |
label="Music Description",
|
| 400 |
placeholder="e.g. upbeat electronic dance music, 120 BPM",
|
| 401 |
+
lines=2,
|
| 402 |
value="upbeat electronic dance music, energetic synth leads, driving bassline",
|
| 403 |
)
|
| 404 |
lyrics_input = gr.Textbox(
|
| 405 |
+
label="Lyrics ([Instrumental] for no vocals)",
|
| 406 |
+
lines=2,
|
|
|
|
| 407 |
value="[Instrumental]",
|
| 408 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
with gr.Column(scale=1):
|
| 410 |
+
audio_output = gr.Audio(label="Output", type="filepath")
|
| 411 |
+
gen_status = gr.Textbox(label="Status", interactive=False, lines=1, elem_classes="status-box")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
with gr.Row():
|
| 413 |
+
instrumental_cb = gr.Checkbox(label="Instrumental", value=True, scale=1)
|
| 414 |
+
bpm_input = gr.Number(label="BPM", value=120, minimum=0, maximum=300, scale=1)
|
| 415 |
+
duration_input = gr.Slider(label="Duration (s)", minimum=10, maximum=120, value=10, step=5, scale=1)
|
| 416 |
+
steps_input = gr.Slider(label="Steps", minimum=1, maximum=32, value=8, step=1, scale=1)
|
| 417 |
+
with gr.Row():
|
| 418 |
+
seed_input = gr.Number(label="Seed", value=-1, scale=1)
|
| 419 |
+
lm_size_input = gr.Dropdown(label="LM Size", choices=["0.6B (fast)", "1.7B (balanced)", "4B (best quality)"], value="4B (best quality)", scale=1)
|
| 420 |
+
lora_select = gr.Dropdown(label="LoRA", choices=get_trained_loras(), value="None (no LoRA)", scale=1)
|
| 421 |
+
generate_btn = gr.Button("Generate Music", variant="primary")
|
| 422 |
|
| 423 |
generate_btn.click(
|
| 424 |
fn=generate_music,
|
| 425 |
+
inputs=[caption_input, lyrics_input, instrumental_cb, bpm_input, duration_input, seed_input, steps_input, lm_size_input, lora_select],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
outputs=[audio_output, gen_status],
|
| 427 |
)
|
| 428 |
|
| 429 |
# ---- Train LoRA Tab ----
|
| 430 |
with gr.Tab("Train LoRA"):
|
| 431 |
+
gr.Markdown("Upload audio files to train a LoRA adapter. Training on CPU, keep epochs low.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
with gr.Row():
|
| 433 |
+
audio_upload = gr.File(label="Audio Files", file_count="multiple", file_types=["audio"], scale=2)
|
| 434 |
+
with gr.Column(scale=1):
|
| 435 |
+
lora_name_input = gr.Textbox(label="LoRA Name", value="my_lora")
|
| 436 |
+
train_model_info = gr.Textbox(label="Training Model", value="acestep-v15-turbo (DiT decoder)", interactive=False)
|
| 437 |
+
with gr.Row():
|
| 438 |
+
epochs_input = gr.Slider(label="Epochs", minimum=1, maximum=10, value=1, step=1, scale=1)
|
| 439 |
+
lr_input = gr.Number(label="LR", value=1e-4, scale=1)
|
| 440 |
+
rank_input = gr.Slider(label="LoRA Rank", minimum=1, maximum=64, value=8, step=1, scale=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
train_btn = gr.Button("Start Training", variant="primary")
|
| 442 |
+
train_log = gr.Textbox(label="Training Log", interactive=False, lines=10, elem_classes="status-box")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
def train_and_refresh(*args):
|
| 445 |
log = train_lora(*args)
|
|
|
|
| 448 |
|
| 449 |
train_btn.click(
|
| 450 |
fn=train_and_refresh,
|
| 451 |
+
inputs=[audio_upload, lora_name_input, epochs_input, lr_input, rank_input],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
outputs=[train_log, lora_select],
|
| 453 |
)
|
| 454 |
|