Spaces:
Paused
Paused
added video visualizer export
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from gradio_client import Client
|
|
| 8 |
|
| 9 |
client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
|
| 10 |
lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
|
|
|
|
| 11 |
|
| 12 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
| 13 |
|
|
@@ -60,23 +61,31 @@ def get_text_after_colon(input_text):
|
|
| 60 |
|
| 61 |
|
| 62 |
def solo_xd(prompt):
|
| 63 |
-
|
| 64 |
-
print("""βββ
|
| 65 |
-
Calling SD-XL for another image...
|
| 66 |
-
""")
|
| 67 |
-
prompt = prompt
|
| 68 |
-
conditioning, pooled = compel(prompt)
|
| 69 |
-
images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
|
| 70 |
-
|
| 71 |
-
print("Finished")
|
| 72 |
return images
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
def infer(audio_file, has_lyrics):
|
| 75 |
print("NEW INFERENCE ...")
|
| 76 |
-
|
| 77 |
truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
|
|
|
|
| 78 |
|
| 79 |
print("Calling LP Music Caps...")
|
|
|
|
| 80 |
cap_result = lpmc_client(
|
| 81 |
truncated_audio, # str (filepath or URL to file) in 'audio_path' Audio component
|
| 82 |
api_name="predict"
|
|
@@ -87,6 +96,7 @@ def infer(audio_file, has_lyrics):
|
|
| 87 |
print("""βββ
|
| 88 |
Getting Lyrics ...
|
| 89 |
""")
|
|
|
|
| 90 |
lyrics_result = lyrics_client.predict(
|
| 91 |
audio_file, # str (filepath or URL to file) in 'Song input' Audio component
|
| 92 |
fn_index=0
|
|
@@ -123,6 +133,7 @@ def infer(audio_file, has_lyrics):
|
|
| 123 |
print("""βββ
|
| 124 |
Calling Llama2 ...
|
| 125 |
""")
|
|
|
|
| 126 |
result = client.predict(
|
| 127 |
llama_q, # str in 'Message' Textbox component
|
| 128 |
api_name="/predict"
|
|
@@ -132,18 +143,32 @@ def infer(audio_file, has_lyrics):
|
|
| 132 |
|
| 133 |
print(f"Llama2 result: {result}")
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
# βββ
|
| 136 |
print("""βββ
|
| 137 |
Calling SD-XL ...
|
| 138 |
""")
|
| 139 |
-
|
|
|
|
| 140 |
conditioning, pooled = compel(prompt)
|
| 141 |
images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
|
| 142 |
|
| 143 |
print("Finished")
|
| 144 |
|
| 145 |
#return cap_result, result, images
|
| 146 |
-
return images, result, gr.update(visible=True), gr.Group.update(visible=True)
|
| 147 |
|
| 148 |
css = """
|
| 149 |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
|
|
@@ -245,25 +270,43 @@ with gr.Blocks(css=css) as demo:
|
|
| 245 |
</div>""")
|
| 246 |
|
| 247 |
audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
|
|
|
|
| 248 |
with gr.Row():
|
| 249 |
has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
|
| 250 |
-
song_title = gr.Textbox(label="Song Title",
|
|
|
|
| 251 |
infer_btn = gr.Button("Generate Image from Music")
|
| 252 |
#lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
with gr.Row():
|
| 254 |
-
|
| 255 |
-
img_result = gr.Image(label="Image Result", elem_id="image-out")
|
| 256 |
-
with gr.Row():
|
| 257 |
tryagain_btn = gr.Button("Try another image ?", visible=False)
|
|
|
|
| 258 |
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
|
| 259 |
community_icon = gr.HTML(community_icon_html)
|
| 260 |
loading_icon = gr.HTML(loading_icon_html)
|
| 261 |
share_button = gr.Button("Share to community", elem_id="share-btn")
|
| 262 |
-
|
| 263 |
gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
|
| 264 |
fn=infer,
|
| 265 |
inputs=[audio_input, has_lyrics],
|
| 266 |
-
outputs=[img_result, llama_trans_cap, tryagain_btn, share_group],
|
| 267 |
cache_examples=True
|
| 268 |
)
|
| 269 |
|
|
@@ -286,8 +329,9 @@ with gr.Blocks(css=css) as demo:
|
|
| 286 |
""")
|
| 287 |
|
| 288 |
#infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
|
| 289 |
-
infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[img_result, llama_trans_cap, tryagain_btn, share_group])
|
| 290 |
share_button.click(None, [], [], _js=share_js)
|
| 291 |
tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
|
|
|
|
| 292 |
|
| 293 |
demo.queue(max_size=20).launch()
|
|
|
|
| 8 |
|
| 9 |
client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
|
| 10 |
lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
|
| 11 |
+
visualizer_client = Client("https://fffiloni-animated-audio-visualizer.hf.space/")
|
| 12 |
|
| 13 |
from share_btn import community_icon_html, loading_icon_html, share_js
|
| 14 |
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
def solo_xd(prompt):
|
| 64 |
+
images = pipe(prompt=prompt).images[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
return images
|
| 66 |
|
| 67 |
+
def get_visualizer_video(audio_in, image_in, song_title):
|
| 68 |
+
|
| 69 |
+
title = f"""{song_title.upper()}\nMusic-to-Image demo by @fffiloni | HuggingFace
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
visualizer_video = visualizer_client.predict(
|
| 73 |
+
title, # str in 'title' Textbox component
|
| 74 |
+
audio_in, # str (filepath or URL to file) in 'audio_in' Audio component
|
| 75 |
+
image_in, # str (filepath or URL to image) in 'image_in' Image component
|
| 76 |
+
api_name="/predict"
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
return visualizer_video[0]
|
| 80 |
+
|
| 81 |
def infer(audio_file, has_lyrics):
|
| 82 |
print("NEW INFERENCE ...")
|
| 83 |
+
gr.Info('Truncating your audio to the first 30 seconds')
|
| 84 |
truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
|
| 85 |
+
processed_audio = truncated_audio
|
| 86 |
|
| 87 |
print("Calling LP Music Caps...")
|
| 88 |
+
gr.Info('Calling LP Music Caps...')
|
| 89 |
cap_result = lpmc_client(
|
| 90 |
truncated_audio, # str (filepath or URL to file) in 'audio_path' Audio component
|
| 91 |
api_name="predict"
|
|
|
|
| 96 |
print("""βββ
|
| 97 |
Getting Lyrics ...
|
| 98 |
""")
|
| 99 |
+
gr.Info("Getting Lyrics ...")
|
| 100 |
lyrics_result = lyrics_client.predict(
|
| 101 |
audio_file, # str (filepath or URL to file) in 'Song input' Audio component
|
| 102 |
fn_index=0
|
|
|
|
| 133 |
print("""βββ
|
| 134 |
Calling Llama2 ...
|
| 135 |
""")
|
| 136 |
+
gr.Info("Calling Llama2 ...")
|
| 137 |
result = client.predict(
|
| 138 |
llama_q, # str in 'Message' Textbox component
|
| 139 |
api_name="/predict"
|
|
|
|
| 143 |
|
| 144 |
print(f"Llama2 result: {result}")
|
| 145 |
|
| 146 |
+
gr.Info("Prompt Optimization ...")
|
| 147 |
+
get_shorter_prompt = f"""
|
| 148 |
+
From this image description, please provide a short but efficient summary for a good Stable Diffusion prompt:
|
| 149 |
+
'{result}'
|
| 150 |
+
"""
|
| 151 |
+
|
| 152 |
+
shorten = client.predict(
|
| 153 |
+
get_shorter_prompt, # str in 'Message' Textbox component
|
| 154 |
+
api_name="/predict"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
print(f'SHORTEN PROMPT: {shorten}')
|
| 158 |
+
|
| 159 |
# βββ
|
| 160 |
print("""βββ
|
| 161 |
Calling SD-XL ...
|
| 162 |
""")
|
| 163 |
+
gr.Info('Calling SD-XL ...')
|
| 164 |
+
prompt = shorten
|
| 165 |
conditioning, pooled = compel(prompt)
|
| 166 |
images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
|
| 167 |
|
| 168 |
print("Finished")
|
| 169 |
|
| 170 |
#return cap_result, result, images
|
| 171 |
+
return processed_audio, images, result, gr.update(visible=True), gr.Group.update(visible=True)
|
| 172 |
|
| 173 |
css = """
|
| 174 |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
|
|
|
|
| 270 |
</div>""")
|
| 271 |
|
| 272 |
audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
|
| 273 |
+
|
| 274 |
with gr.Row():
|
| 275 |
has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
|
| 276 |
+
song_title = gr.Textbox(label="Song Title", placeholder="Title: ", interactive=True, info="If you want to share your result, please provide the title of your audio sample :)", elem_id="song-title")
|
| 277 |
+
|
| 278 |
infer_btn = gr.Button("Generate Image from Music")
|
| 279 |
#lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
|
| 280 |
+
|
| 281 |
+
with gr.Group():
|
| 282 |
+
|
| 283 |
+
with gr.Row():
|
| 284 |
+
|
| 285 |
+
llama_trans_cap = gr.Textbox(label="Llama Image Suggestion", placeholder="Llama2 image prompt suggestion will be displayed here ;)", visible=True, lines=12, max_lines=18, elem_id="llama-prompt")
|
| 286 |
+
|
| 287 |
+
with gr.Tab("Image Result"):
|
| 288 |
+
img_result = gr.Image(label="Image Result", elem_id="image-out", interactive=False, type="filepath")
|
| 289 |
+
|
| 290 |
+
with gr.Tab("Video visualizer"):
|
| 291 |
+
|
| 292 |
+
with gr.Column():
|
| 293 |
+
processed_audio = gr.Audio(type="filepath", visible=False)
|
| 294 |
+
visualizer_video = gr.Video(label="Video visualizer output")
|
| 295 |
+
get_visualizer_vid = gr.Button("Export as video !")
|
| 296 |
+
|
| 297 |
with gr.Row():
|
| 298 |
+
|
|
|
|
|
|
|
| 299 |
tryagain_btn = gr.Button("Try another image ?", visible=False)
|
| 300 |
+
|
| 301 |
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
|
| 302 |
community_icon = gr.HTML(community_icon_html)
|
| 303 |
loading_icon = gr.HTML(loading_icon_html)
|
| 304 |
share_button = gr.Button("Share to community", elem_id="share-btn")
|
| 305 |
+
|
| 306 |
gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
|
| 307 |
fn=infer,
|
| 308 |
inputs=[audio_input, has_lyrics],
|
| 309 |
+
outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group],
|
| 310 |
cache_examples=True
|
| 311 |
)
|
| 312 |
|
|
|
|
| 329 |
""")
|
| 330 |
|
| 331 |
#infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
|
| 332 |
+
infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group])
|
| 333 |
share_button.click(None, [], [], _js=share_js)
|
| 334 |
tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
|
| 335 |
+
get_visualizer_vid.click(fn=get_visualizer_video, inputs=[processed_audio, img_result, song_title], outputs=[visualizer_video], queue=False)
|
| 336 |
|
| 337 |
demo.queue(max_size=20).launch()
|