Spaces:
Sleeping
Sleeping
Variable Segment Length
Browse files
app.py
CHANGED
|
@@ -44,11 +44,10 @@ MAX_PROMPT_INDEX = 0
|
|
| 44 |
git = os.environ.get('GIT', "git")
|
| 45 |
#s.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
| 46 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
|
| 53 |
def interrupt_callback():
|
| 54 |
return INTERRUPTED
|
|
@@ -134,7 +133,7 @@ def git_tag():
|
|
| 134 |
except Exception:
|
| 135 |
return "<none>"
|
| 136 |
|
| 137 |
-
def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperature, cfg_coef):
|
| 138 |
# get melody filename
|
| 139 |
#$Union[str, os.PathLike]
|
| 140 |
symbols = ['_', '.', '-']
|
|
@@ -161,14 +160,14 @@ def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperatur
|
|
| 161 |
# get melody length in number of segments and modify the UI
|
| 162 |
melody = get_melody(melody_filepath)
|
| 163 |
sr, melody_data = melody[0], melody[1]
|
| 164 |
-
segment_samples = sr *
|
| 165 |
total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
|
| 166 |
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
|
| 167 |
MAX_PROMPT_INDEX = total_melodys
|
| 168 |
|
| 169 |
return gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef)
|
| 170 |
|
| 171 |
-
def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, progress=gr.Progress(track_tqdm=True)):
|
| 172 |
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
|
| 173 |
output_segments = None
|
| 174 |
melody_name = "Not Used"
|
|
@@ -219,6 +218,8 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
|
|
| 219 |
segment_duration = duration + overlap
|
| 220 |
else:
|
| 221 |
segment_duration = MODEL.lm.cfg.dataset.segment_duration
|
|
|
|
|
|
|
| 222 |
# implement seed
|
| 223 |
if seed < 0:
|
| 224 |
seed = random.randint(0, 0xffff_ffff_ffff)
|
|
@@ -243,7 +244,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
|
|
| 243 |
if melody and ("melody" in model):
|
| 244 |
# return excess duration, load next model and continue in loop structure building up output_segments
|
| 245 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
| 246 |
-
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only
|
| 247 |
else:
|
| 248 |
# pure original code
|
| 249 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
|
@@ -449,10 +450,12 @@ def ui(**kwargs):
|
|
| 449 |
with gr.Row():
|
| 450 |
with gr.Column():
|
| 451 |
with gr.Row():
|
| 452 |
-
text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out")
|
| 453 |
with gr.Column():
|
| 454 |
-
|
| 455 |
-
|
|
|
|
|
|
|
|
|
|
| 456 |
with gr.Row():
|
| 457 |
submit = gr.Button("Generate", elem_id="btn-generate")
|
| 458 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
|
@@ -460,42 +463,44 @@ def ui(**kwargs):
|
|
| 460 |
with gr.Row():
|
| 461 |
with gr.Column():
|
| 462 |
radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
|
| 463 |
-
melody_filepath = gr.Audio(sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input")
|
| 464 |
with gr.Column():
|
| 465 |
-
harmony_only = gr.Radio(label="Use Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?")
|
| 466 |
-
prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 30 second segment to condition with, - 1
|
| 467 |
with gr.Accordion("Video", open=False):
|
| 468 |
with gr.Row():
|
| 469 |
-
background= gr.Image(value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True)
|
| 470 |
with gr.Column():
|
| 471 |
-
include_title = gr.Checkbox(label="Add Title", value=True, interactive=True)
|
| 472 |
-
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
| 473 |
with gr.Row():
|
| 474 |
-
title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
|
| 475 |
settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
|
| 476 |
-
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True)
|
| 477 |
with gr.Accordion("Expert", open=False):
|
| 478 |
with gr.Row():
|
| 479 |
-
|
|
|
|
| 480 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
| 481 |
with gr.Row():
|
| 482 |
-
topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True)
|
| 483 |
-
topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="overwrites Top-k if not zero")
|
| 484 |
-
temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True)
|
| 485 |
-
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.
|
| 486 |
with gr.Row():
|
| 487 |
-
seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True)
|
| 488 |
gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
|
| 489 |
reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
|
| 490 |
with gr.Column() as c:
|
| 491 |
-
output = gr.Video(label="Generated Music")
|
| 492 |
wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
|
| 493 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
| 494 |
|
| 495 |
radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False)
|
| 496 |
-
melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model,topp, temperature, cfg_coef], outputs=[title, prompt_index , model, topp, temperature, cfg_coef], api_name="melody_filepath_change", queue=False)
|
| 497 |
-
reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="
|
| 498 |
-
|
|
|
|
| 499 |
gr.Examples(
|
| 500 |
examples=[
|
| 501 |
[
|
|
@@ -505,7 +510,7 @@ def ui(**kwargs):
|
|
| 505 |
"80s Pop Synth",
|
| 506 |
950,
|
| 507 |
0.6,
|
| 508 |
-
3.
|
| 509 |
],
|
| 510 |
[
|
| 511 |
"4/4 120bpm 320kbps 48khz, A cheerful country song with acoustic guitars",
|
|
@@ -514,7 +519,7 @@ def ui(**kwargs):
|
|
| 514 |
"Country Guitar",
|
| 515 |
750,
|
| 516 |
0.7,
|
| 517 |
-
|
| 518 |
],
|
| 519 |
[
|
| 520 |
"4/4 120bpm 320kbps 48khz, 90s rock song with electric guitar and heavy drums",
|
|
@@ -523,7 +528,7 @@ def ui(**kwargs):
|
|
| 523 |
"90s Rock Guitar",
|
| 524 |
1150,
|
| 525 |
0.7,
|
| 526 |
-
3.
|
| 527 |
],
|
| 528 |
[
|
| 529 |
"4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
|
|
@@ -532,7 +537,7 @@ def ui(**kwargs):
|
|
| 532 |
"EDM my Bach",
|
| 533 |
500,
|
| 534 |
0.7,
|
| 535 |
-
3.
|
| 536 |
],
|
| 537 |
[
|
| 538 |
"4/4 320kbps 48khz, lofi slow bpm electro chill with organic samples",
|
|
@@ -563,8 +568,8 @@ def ui(**kwargs):
|
|
| 563 |
api_name="submit"
|
| 564 |
).then(
|
| 565 |
predict,
|
| 566 |
-
inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile],
|
| 567 |
-
outputs=[output, wave_file, seed_used])
|
| 568 |
|
| 569 |
# Show the interface
|
| 570 |
launch_kwargs = {}
|
|
@@ -578,11 +583,9 @@ def ui(**kwargs):
|
|
| 578 |
launch_kwargs['server_port'] = server_port
|
| 579 |
if share:
|
| 580 |
launch_kwargs['share'] = share
|
| 581 |
-
launch_kwargs['favicon_path']= "./assets/favicon.ico"
|
| 582 |
-
|
| 583 |
|
| 584 |
|
| 585 |
-
demo.queue(max_size=10, api_open=False).launch(**launch_kwargs)
|
| 586 |
|
| 587 |
if __name__ == "__main__":
|
| 588 |
parser = argparse.ArgumentParser()
|
|
|
|
| 44 |
git = os.environ.get('GIT', "git")
|
| 45 |
#s.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
| 46 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
|
| 47 |
+
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
| 48 |
+
os.environ['CUDA_MODULE_LOADING']='LAZY'
|
| 49 |
+
os.environ['USE_FLASH_ATTENTION'] = '1'
|
| 50 |
+
os.environ['XFORMERS_FORCE_DISABLE_TRITON']= '1'
|
|
|
|
| 51 |
|
| 52 |
def interrupt_callback():
|
| 53 |
return INTERRUPTED
|
|
|
|
| 133 |
except Exception:
|
| 134 |
return "<none>"
|
| 135 |
|
| 136 |
+
def load_melody_filepath(melody_filepath, title, assigned_model,topp, temperature, cfg_coef, segment_duration = 30):
|
| 137 |
# get melody filename
|
| 138 |
#$Union[str, os.PathLike]
|
| 139 |
symbols = ['_', '.', '-']
|
|
|
|
| 160 |
# get melody length in number of segments and modify the UI
|
| 161 |
melody = get_melody(melody_filepath)
|
| 162 |
sr, melody_data = melody[0], melody[1]
|
| 163 |
+
segment_samples = sr * segment_duration
|
| 164 |
total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
|
| 165 |
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
|
| 166 |
MAX_PROMPT_INDEX = total_melodys
|
| 167 |
|
| 168 |
return gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=0), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef)
|
| 169 |
|
| 170 |
+
def predict(model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap=1, prompt_index = 0, include_title = True, include_settings = True, harmony_only = False, profile = gr.OAuthProfile, segment_length = 30, progress=gr.Progress(track_tqdm=True)):
|
| 171 |
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
|
| 172 |
output_segments = None
|
| 173 |
melody_name = "Not Used"
|
|
|
|
| 218 |
segment_duration = duration + overlap
|
| 219 |
else:
|
| 220 |
segment_duration = MODEL.lm.cfg.dataset.segment_duration
|
| 221 |
+
if (segment_length + overlap) < segment_duration:
|
| 222 |
+
segment_duration = segment_length + overlap
|
| 223 |
# implement seed
|
| 224 |
if seed < 0:
|
| 225 |
seed = random.randint(0, 0xffff_ffff_ffff)
|
|
|
|
| 244 |
if melody and ("melody" in model):
|
| 245 |
# return excess duration, load next model and continue in loop structure building up output_segments
|
| 246 |
if duration > MODEL.lm.cfg.dataset.segment_duration:
|
| 247 |
+
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only, progress=gr.Progress(track_tqdm=True))
|
| 248 |
else:
|
| 249 |
# pure original code
|
| 250 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
|
|
|
|
| 450 |
with gr.Row():
|
| 451 |
with gr.Column():
|
| 452 |
with gr.Row():
|
|
|
|
| 453 |
with gr.Column():
|
| 454 |
+
text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
|
| 455 |
+
autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
|
| 456 |
+
with gr.Column():
|
| 457 |
+
duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration")
|
| 458 |
+
model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True, key="chosen_model")
|
| 459 |
with gr.Row():
|
| 460 |
submit = gr.Button("Generate", elem_id="btn-generate")
|
| 461 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
|
|
|
| 463 |
with gr.Row():
|
| 464 |
with gr.Column():
|
| 465 |
radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
|
| 466 |
+
melody_filepath = gr.Audio(sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input", key="melody_input")
|
| 467 |
with gr.Column():
|
| 468 |
+
harmony_only = gr.Radio(label="Use Harmony Only",choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?", key="use_harmony")
|
| 469 |
+
prompt_index = gr.Slider(label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=0, interactive=True, info="Which 15-30 second segment to condition with, - 1 = align with conditioning melody", key="melody_index")
|
| 470 |
with gr.Accordion("Video", open=False):
|
| 471 |
with gr.Row():
|
| 472 |
+
background= gr.Image(value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True, key="background_imagepath")
|
| 473 |
with gr.Column():
|
| 474 |
+
include_title = gr.Checkbox(label="Add Title", value=True, interactive=True,key="add_title")
|
| 475 |
+
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True, key="add_settings")
|
| 476 |
with gr.Row():
|
| 477 |
+
title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True, key="song_title")
|
| 478 |
settings_font = gr.Text(label="Settings Font", value="./assets/arial.ttf", interactive=True)
|
| 479 |
+
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
|
| 480 |
with gr.Accordion("Expert", open=False):
|
| 481 |
with gr.Row():
|
| 482 |
+
segment_duration = gr.Slider(minimum=10, maximum=30, value=30, step =1,label="Music Generation Segment Length (s)", interactive=True)
|
| 483 |
+
overlap = gr.Slider(minimum=0, maximum=15, value=1, step=1, label="Segment Overlap", interactive=True)
|
| 484 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
| 485 |
with gr.Row():
|
| 486 |
+
topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
|
| 487 |
+
topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
|
| 488 |
+
temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
|
| 489 |
+
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
|
| 490 |
with gr.Row():
|
| 491 |
+
seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
|
| 492 |
gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
|
| 493 |
reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
|
| 494 |
with gr.Column() as c:
|
| 495 |
+
output = gr.Video(label="Generated Music", interactive=False, show_download_button=True, show_share_button=True, autoplay=False)
|
| 496 |
wave_file = gr.File(label=".wav file", elem_id="output_wavefile", interactive=True)
|
| 497 |
seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
|
| 498 |
|
| 499 |
radio.change(toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False)
|
| 500 |
+
melody_filepath.change(load_melody_filepath, inputs=[melody_filepath, title, model,topp, temperature, cfg_coef, segment_duration], outputs=[title, prompt_index , model, topp, temperature, cfg_coef], api_name="melody_filepath_change", queue=False)
|
| 501 |
+
reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed_click")
|
| 502 |
+
autoplay_cb.change(fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb], outputs=[output], queue=False, api_name="autoplay_cb_change")
|
| 503 |
+
|
| 504 |
gr.Examples(
|
| 505 |
examples=[
|
| 506 |
[
|
|
|
|
| 510 |
"80s Pop Synth",
|
| 511 |
950,
|
| 512 |
0.6,
|
| 513 |
+
3.5
|
| 514 |
],
|
| 515 |
[
|
| 516 |
"4/4 120bpm 320kbps 48khz, A cheerful country song with acoustic guitars",
|
|
|
|
| 519 |
"Country Guitar",
|
| 520 |
750,
|
| 521 |
0.7,
|
| 522 |
+
4.0
|
| 523 |
],
|
| 524 |
[
|
| 525 |
"4/4 120bpm 320kbps 48khz, 90s rock song with electric guitar and heavy drums",
|
|
|
|
| 528 |
"90s Rock Guitar",
|
| 529 |
1150,
|
| 530 |
0.7,
|
| 531 |
+
3.75
|
| 532 |
],
|
| 533 |
[
|
| 534 |
"4/4 120bpm 320kbps 48khz, a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
|
|
|
|
| 537 |
"EDM my Bach",
|
| 538 |
500,
|
| 539 |
0.7,
|
| 540 |
+
3.75
|
| 541 |
],
|
| 542 |
[
|
| 543 |
"4/4 320kbps 48khz, lofi slow bpm electro chill with organic samples",
|
|
|
|
| 568 |
api_name="submit"
|
| 569 |
).then(
|
| 570 |
predict,
|
| 571 |
+
inputs=[model, text,melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile, segment_duration],
|
| 572 |
+
outputs=[output, wave_file, seed_used], scroll_to_output=True)
|
| 573 |
|
| 574 |
# Show the interface
|
| 575 |
launch_kwargs = {}
|
|
|
|
| 583 |
launch_kwargs['server_port'] = server_port
|
| 584 |
if share:
|
| 585 |
launch_kwargs['share'] = share
|
|
|
|
|
|
|
| 586 |
|
| 587 |
|
| 588 |
+
demo.queue(max_size=10, api_open=False).launch(**launch_kwargs, allowed_paths=["assets","./assets","images","./images", 'e:/TMP'], favicon_path="./assets/favicon.ico")
|
| 589 |
|
| 590 |
if __name__ == "__main__":
|
| 591 |
parser = argparse.ArgumentParser()
|