Nekochu commited on
Commit
4b376ab
·
1 Parent(s): 562fa54

compact grid UI, 4B default, show training model

Browse files
Files changed (1) hide show
  1. app.py +26 -115
app.py CHANGED
@@ -398,131 +398,48 @@ def build_ui():
398
  caption_input = gr.Textbox(
399
  label="Music Description",
400
  placeholder="e.g. upbeat electronic dance music, 120 BPM",
401
- lines=3,
402
  value="upbeat electronic dance music, energetic synth leads, driving bassline",
403
  )
404
  lyrics_input = gr.Textbox(
405
- label="Lyrics (use [Instrumental] for no vocals)",
406
- placeholder="[Instrumental]",
407
- lines=3,
408
  value="[Instrumental]",
409
  )
410
- instrumental_cb = gr.Checkbox(
411
- label="Instrumental (no vocals)",
412
- value=True,
413
- )
414
  with gr.Column(scale=1):
415
- bpm_input = gr.Number(
416
- label="BPM (0 = auto)",
417
- value=120,
418
- minimum=0,
419
- maximum=300,
420
- )
421
- duration_input = gr.Slider(
422
- label="Duration (seconds)",
423
- minimum=10,
424
- maximum=120,
425
- value=10,
426
- step=5,
427
- )
428
- seed_input = gr.Number(
429
- label="Seed (-1 = random)",
430
- value=-1,
431
- )
432
- steps_input = gr.Slider(
433
- label="Inference Steps (fewer = faster)",
434
- minimum=1,
435
- maximum=32,
436
- value=8,
437
- step=1,
438
- )
439
- lm_size_input = gr.Dropdown(
440
- label="LM Model Size",
441
- choices=["0.6B (fast)", "1.7B (balanced)", "4B (best quality)"],
442
- value="1.7B (balanced)",
443
- info="Language model for music understanding",
444
- )
445
- lora_select = gr.Dropdown(
446
- label="Use Trained LoRA",
447
- choices=get_trained_loras(),
448
- value="None (no LoRA)",
449
- info="Select a LoRA you trained to apply it",
450
- )
451
-
452
- generate_btn = gr.Button("Generate Music", variant="primary")
453
-
454
  with gr.Row():
455
- audio_output = gr.Audio(
456
- label="Generated Audio",
457
- type="filepath",
458
- )
459
- gen_status = gr.Textbox(
460
- label="Status",
461
- interactive=False,
462
- elem_classes="status-box",
463
- )
464
 
465
  generate_btn.click(
466
  fn=generate_music,
467
- inputs=[
468
- caption_input,
469
- lyrics_input,
470
- instrumental_cb,
471
- bpm_input,
472
- duration_input,
473
- seed_input,
474
- steps_input,
475
- lm_size_input,
476
- lora_select,
477
- ],
478
  outputs=[audio_output, gen_status],
479
  )
480
 
481
  # ---- Train LoRA Tab ----
482
  with gr.Tab("Train LoRA"):
483
- gr.Markdown(
484
- "### Train a LoRA adapter on your audio files\n"
485
- "Upload WAV/MP3/FLAC files to fine-tune the model. "
486
- "Training runs on CPU so keep epochs low and files short."
487
- )
488
  with gr.Row():
489
- with gr.Column():
490
- audio_upload = gr.File(
491
- label="Upload Audio Files",
492
- file_count="multiple",
493
- file_types=["audio"],
494
- )
495
- lora_name_input = gr.Textbox(
496
- label="LoRA Name",
497
- value="my_lora",
498
- )
499
- with gr.Column():
500
- epochs_input = gr.Slider(
501
- label="Epochs",
502
- minimum=1,
503
- maximum=10,
504
- value=1,
505
- step=1,
506
- )
507
- lr_input = gr.Number(
508
- label="Learning Rate",
509
- value=1e-4,
510
- )
511
- rank_input = gr.Slider(
512
- label="LoRA Rank",
513
- minimum=1,
514
- maximum=64,
515
- value=8,
516
- step=1,
517
- )
518
-
519
  train_btn = gr.Button("Start Training", variant="primary")
520
- train_log = gr.Textbox(
521
- label="Training Log",
522
- interactive=False,
523
- lines=15,
524
- elem_classes="status-box",
525
- )
526
 
527
  def train_and_refresh(*args):
528
  log = train_lora(*args)
@@ -531,13 +448,7 @@ def build_ui():
531
 
532
  train_btn.click(
533
  fn=train_and_refresh,
534
- inputs=[
535
- audio_upload,
536
- lora_name_input,
537
- epochs_input,
538
- lr_input,
539
- rank_input,
540
- ],
541
  outputs=[train_log, lora_select],
542
  )
543
 
 
398
  caption_input = gr.Textbox(
399
  label="Music Description",
400
  placeholder="e.g. upbeat electronic dance music, 120 BPM",
401
+ lines=2,
402
  value="upbeat electronic dance music, energetic synth leads, driving bassline",
403
  )
404
  lyrics_input = gr.Textbox(
405
+ label="Lyrics ([Instrumental] for no vocals)",
406
+ lines=2,
 
407
  value="[Instrumental]",
408
  )
 
 
 
 
409
  with gr.Column(scale=1):
410
+ audio_output = gr.Audio(label="Output", type="filepath")
411
+ gen_status = gr.Textbox(label="Status", interactive=False, lines=1, elem_classes="status-box")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  with gr.Row():
413
+ instrumental_cb = gr.Checkbox(label="Instrumental", value=True, scale=1)
414
+ bpm_input = gr.Number(label="BPM", value=120, minimum=0, maximum=300, scale=1)
415
+ duration_input = gr.Slider(label="Duration (s)", minimum=10, maximum=120, value=10, step=5, scale=1)
416
+ steps_input = gr.Slider(label="Steps", minimum=1, maximum=32, value=8, step=1, scale=1)
417
+ with gr.Row():
418
+ seed_input = gr.Number(label="Seed", value=-1, scale=1)
419
+ lm_size_input = gr.Dropdown(label="LM Size", choices=["0.6B (fast)", "1.7B (balanced)", "4B (best quality)"], value="4B (best quality)", scale=1)
420
+ lora_select = gr.Dropdown(label="LoRA", choices=get_trained_loras(), value="None (no LoRA)", scale=1)
421
+ generate_btn = gr.Button("Generate Music", variant="primary")
422
 
423
  generate_btn.click(
424
  fn=generate_music,
425
+ inputs=[caption_input, lyrics_input, instrumental_cb, bpm_input, duration_input, seed_input, steps_input, lm_size_input, lora_select],
 
 
 
 
 
 
 
 
 
 
426
  outputs=[audio_output, gen_status],
427
  )
428
 
429
  # ---- Train LoRA Tab ----
430
  with gr.Tab("Train LoRA"):
431
+ gr.Markdown("Upload audio files to train a LoRA adapter. Training on CPU, keep epochs low.")
 
 
 
 
432
  with gr.Row():
433
+ audio_upload = gr.File(label="Audio Files", file_count="multiple", file_types=["audio"], scale=2)
434
+ with gr.Column(scale=1):
435
+ lora_name_input = gr.Textbox(label="LoRA Name", value="my_lora")
436
+ train_model_info = gr.Textbox(label="Training Model", value="acestep-v15-turbo (DiT decoder)", interactive=False)
437
+ with gr.Row():
438
+ epochs_input = gr.Slider(label="Epochs", minimum=1, maximum=10, value=1, step=1, scale=1)
439
+ lr_input = gr.Number(label="LR", value=1e-4, scale=1)
440
+ rank_input = gr.Slider(label="LoRA Rank", minimum=1, maximum=64, value=8, step=1, scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  train_btn = gr.Button("Start Training", variant="primary")
442
+ train_log = gr.Textbox(label="Training Log", interactive=False, lines=10, elem_classes="status-box")
 
 
 
 
 
443
 
444
  def train_and_refresh(*args):
445
  log = train_lora(*args)
 
448
 
449
  train_btn.click(
450
  fn=train_and_refresh,
451
+ inputs=[audio_upload, lora_name_input, epochs_input, lr_input, rank_input],
 
 
 
 
 
 
452
  outputs=[train_log, lora_select],
453
  )
454