Spaces:

fffiloni
/

Music-To-Image

Paused

App Files Files Community

fffiloni commited on Aug 10, 2023

Commit

3e7eefe

1 Parent(s): a169bce

added video visualizer export

Browse files

Files changed (1) hide show

app.py +63 -19

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from gradio_client import Client
 client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
 lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
 from share_btn import community_icon_html, loading_icon_html, share_js
@@ -60,23 +61,31 @@ def get_text_after_colon(input_text):
 def solo_xd(prompt):
-    # ———
-    print("""———
-    Calling SD-XL for another image...
-    """)
-    prompt = prompt
-    conditioning, pooled = compel(prompt)
-    images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
-    print("Finished")
     return images
 def infer(audio_file, has_lyrics):
     print("NEW INFERENCE ...")
     truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
     print("Calling LP Music Caps...")
     cap_result = lpmc_client(
     				truncated_audio,	# str (filepath or URL to file) in 'audio_path' Audio component
     				api_name="predict"
@@ -87,6 +96,7 @@ def infer(audio_file, has_lyrics):
         print("""———
         Getting Lyrics ...
         """)
         lyrics_result = lyrics_client.predict(
         				audio_file,	# str (filepath or URL to file) in 'Song input' Audio component
         				fn_index=0
@@ -123,6 +133,7 @@ def infer(audio_file, has_lyrics):
     print("""———
     Calling Llama2 ...
     """)
     result = client.predict(
     				llama_q,	# str in 'Message' Textbox component
     				api_name="/predict"
@@ -132,18 +143,32 @@ def infer(audio_file, has_lyrics):
     print(f"Llama2 result: {result}")
     # ———
     print("""———
     Calling SD-XL ...
     """)
-    prompt = result
     conditioning, pooled = compel(prompt)
     images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
     print("Finished")
     #return cap_result, result, images
-    return images, result, gr.update(visible=True), gr.Group.update(visible=True)
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
@@ -245,25 +270,43 @@ with gr.Blocks(css=css) as demo:
             </div>""")
         audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
         with gr.Row():
             has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
-            song_title = gr.Textbox(label="Song Title", value="Title: ", interactive=True, info="If you want to share your result, please provide the title of your audio sample :)", elem_id="song-title")
         infer_btn = gr.Button("Generate Image from Music")
         #lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
         with gr.Row():
-            llama_trans_cap = gr.Textbox(label="Llama Image Suggestion", placeholder="Llama2 image prompt suggestion will be displayed here ;)", visible=True, lines=12, elem_id="llama-prompt")
-            img_result = gr.Image(label="Image Result", elem_id="image-out")
-        with gr.Row():
             tryagain_btn = gr.Button("Try another image ?", visible=False)
             with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
                     community_icon = gr.HTML(community_icon_html)
                     loading_icon = gr.HTML(loading_icon_html)
                     share_button = gr.Button("Share to community", elem_id="share-btn")
         gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
                     fn=infer,
                     inputs=[audio_input, has_lyrics],
-                    outputs=[img_result, llama_trans_cap, tryagain_btn, share_group],
                     cache_examples=True
                    )
@@ -286,8 +329,9 @@ with gr.Blocks(css=css) as demo:
         """)
     #infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
-    infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[img_result, llama_trans_cap, tryagain_btn, share_group])
     share_button.click(None, [], [], _js=share_js)
     tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
 demo.queue(max_size=20).launch()

 client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
 lyrics_client = Client("https://fffiloni-music-to-lyrics.hf.space/")
+visualizer_client = Client("https://fffiloni-animated-audio-visualizer.hf.space/")
 from share_btn import community_icon_html, loading_icon_html, share_js
 def solo_xd(prompt):
+    images = pipe(prompt=prompt).images[0]
     return images
+def get_visualizer_video(audio_in, image_in, song_title):
+    title = f"""{song_title.upper()}\nMusic-to-Image demo by @fffiloni | HuggingFace
+    """
+    visualizer_video = visualizer_client.predict(
+    				title,	# str in 'title' Textbox component
+    				audio_in,	# str (filepath or URL to file) in 'audio_in' Audio component
+    				image_in,	# str (filepath or URL to image) in 'image_in' Image component
+    				api_name="/predict"
+    )
+    return visualizer_video[0]
 def infer(audio_file, has_lyrics):
     print("NEW INFERENCE ...")
+    gr.Info('Truncating your audio to the first 30 seconds')
     truncated_audio = cut_audio(audio_file, "trunc_audio.mp3")
+    processed_audio = truncated_audio
     print("Calling LP Music Caps...")
+    gr.Info('Calling LP Music Caps...')
     cap_result = lpmc_client(
     				truncated_audio,	# str (filepath or URL to file) in 'audio_path' Audio component
     				api_name="predict"
         print("""———
         Getting Lyrics ...
         """)
+        gr.Info("Getting Lyrics ...")
         lyrics_result = lyrics_client.predict(
         				audio_file,	# str (filepath or URL to file) in 'Song input' Audio component
         				fn_index=0
     print("""———
     Calling Llama2 ...
     """)
+    gr.Info("Calling Llama2 ...")
     result = client.predict(
     				llama_q,	# str in 'Message' Textbox component
     				api_name="/predict"
     print(f"Llama2 result: {result}")
+    gr.Info("Prompt Optimization ...")
+    get_shorter_prompt = f"""
+    From this image description, please provide a short but efficient summary for a good Stable Diffusion prompt:
+    '{result}'
+    """
+    shorten = client.predict(
+    				get_shorter_prompt,	# str in 'Message' Textbox component
+    				api_name="/predict"
+    )
+    print(f'SHORTEN PROMPT: {shorten}')
     # ———
     print("""———
     Calling SD-XL ...
     """)
+    gr.Info('Calling SD-XL ...')
+    prompt = shorten
     conditioning, pooled = compel(prompt)
     images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
     print("Finished")
     #return cap_result, result, images
+    return processed_audio, images, result, gr.update(visible=True), gr.Group.update(visible=True)
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
             </div>""")
         audio_input = gr.Audio(label="Music input", type="filepath", source="upload")
         with gr.Row():
             has_lyrics = gr.Radio(label="Does your audio has lyrics ?", choices=["Yes", "No"], value="No", info="If yes, the image should reflect the lyrics, but be aware that because we add a step (getting lyrics), inference will take more time.")
+            song_title = gr.Textbox(label="Song Title", placeholder="Title: ", interactive=True, info="If you want to share your result, please provide the title of your audio sample :)", elem_id="song-title")
         infer_btn = gr.Button("Generate Image from Music")
         #lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
+        with gr.Group():
+            with gr.Row():
+                llama_trans_cap = gr.Textbox(label="Llama Image Suggestion", placeholder="Llama2 image prompt suggestion will be displayed here ;)", visible=True, lines=12, max_lines=18, elem_id="llama-prompt")
+                with gr.Tab("Image Result"):
+                    img_result = gr.Image(label="Image Result", elem_id="image-out", interactive=False, type="filepath")
+                with gr.Tab("Video visualizer"):
+                    with gr.Column():
+                        processed_audio = gr.Audio(type="filepath", visible=False)
+                        visualizer_video = gr.Video(label="Video visualizer output")
+                        get_visualizer_vid = gr.Button("Export as video !")
         with gr.Row():
             tryagain_btn = gr.Button("Try another image ?", visible=False)
             with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
                     community_icon = gr.HTML(community_icon_html)
                     loading_icon = gr.HTML(loading_icon_html)
                     share_button = gr.Button("Share to community", elem_id="share-btn")
         gr.Examples(examples=[["./examples/electronic.mp3", "No"],["./examples/folk.wav", "No"], ["./examples/orchestra.wav", "No"]],
                     fn=infer,
                     inputs=[audio_input, has_lyrics],
+                    outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group],
                     cache_examples=True
                    )
         """)
     #infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
+    infer_btn.click(fn=infer, inputs=[audio_input, has_lyrics], outputs=[processed_audio, img_result, llama_trans_cap, tryagain_btn, share_group])
     share_button.click(None, [], [], _js=share_js)
     tryagain_btn.click(fn=solo_xd, inputs=[llama_trans_cap], outputs=[img_result])
+    get_visualizer_vid.click(fn=get_visualizer_video, inputs=[processed_audio, img_result, song_title], outputs=[visualizer_video], queue=False)
 demo.queue(max_size=20).launch()