Spaces:

fffiloni
/

YuE

Paused

App Files Files Community

fffiloni commited on Jan 29

Commit

40d0762

verified ·

1 Parent(s): 6481c18

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -26

app.py CHANGED Viewed

@@ -67,19 +67,10 @@ def empty_output_folder(output_dir):
 # Function to create a temporary file with string content
 def create_temp_file(content, prefix, suffix=".txt"):
-    temp_file = tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=prefix, suffix=suffix)
-    # Ensure content ends with newline and normalize line endings
-    content = content.strip() + "\n\n"  # Add extra newline at end
-    content = content.replace("\r\n", "\n").replace("\r", "\n")
-    temp_file.write(content)
-    temp_file.close()
-    # Debug: Print file contents
-    print(f"\nContent written to {prefix}{suffix}:")
-    print(content)
-    print("---")
-    return temp_file.name
 def get_last_mp3_file(output_dir):
     # List all files in the output directory
@@ -121,13 +112,13 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         "python", "infer.py",
         "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
         "--stage2_model", "m-a-p/YuE-s2-1B-general",
-        "--genre_txt", f"{genre_txt_path}",
-        "--lyrics_txt", f"{lyrics_txt_path}",
-        "--run_n_segments", f"{num_segments}",
         "--stage2_batch_size", "4",
-        "--output_dir", f"{output_dir}",
         "--cuda_idx", "0",
-        "--max_new_tokens", f"{max_new_tokens}",
         "--disable_offload_model"
     ]
@@ -191,16 +182,38 @@ with gr.Blocks() as demo:
         """)
         with gr.Row():
             with gr.Column():
-                genre_txt = gr.Textbox(label="Genre")
-                lyrics_txt = gr.Textbox(label="Lyrics")
             with gr.Column():
-                if is_shared_ui:
-                    num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
-                    max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="3000", step=500, value=1500, interactive=True)
-                else:
-                    num_segments = gr.Number(label="Number of Song Segments", value=2, interactive=True)
-                    max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="24000", step=500, value=3000, interactive=True)
                 submit_btn = gr.Button("Submit")
                 music_out = gr.Audio(label="Audio Result")

 # Function to create a temporary file with string content
 def create_temp_file(content, prefix, suffix=".txt"):
+    fd, path = tempfile.mkstemp(prefix=prefix, suffix=suffix)
+    with os.fdopen(fd, "w", encoding="utf-8") as f:
+        f.write(content)
+    return path
 def get_last_mp3_file(output_dir):
     # List all files in the output directory
         "python", "infer.py",
         "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
         "--stage2_model", "m-a-p/YuE-s2-1B-general",
+        "--genre_txt", f"'{genre_txt_path}'",
+        "--lyrics_txt", f"'{lyrics_txt_path}'",
+        "--run_n_segments", str(num_segments),
         "--stage2_batch_size", "4",
+        "--output_dir", f"'{output_dir}'",
         "--cuda_idx", "0",
+        "--max_new_tokens", str(max_new_tokens),
         "--disable_offload_model"
     ]
         """)
         with gr.Row():
             with gr.Column():
+                with gr.Accordion("Pro Tips", open=False):
+                    gr.Markdown(f"""
+                        **Tips:**
+                        1. `genres` should include details like instruments, genre, mood, vocal timbre, and vocal gender.
+                        2. The length of `lyrics` segments and the `--max_new_tokens` value should be matched. For example, if `--max_new_tokens` is set to 3000, the maximum duration for a segment is around 30 seconds. Ensure your lyrics fit this time frame.
+                        **Notice:**
+                        1. A suitable [Genre] tag consists of five components: genre, instrument, mood, gender, and timbre. All five should be included if possible, separated by spaces. The values of timbre should include "vocal" (e.g., "bright vocal").
+                        2. Although our tags have an open vocabulary, we have provided the 200 most commonly used <a href="https://github.com/multimodal-art-projection/YuE/blob/main/top_200_tags.json" id="tags_link" target="_blank">tags</a>. It is recommended to select tags from this list for more stable results.
+                        3. The order of the tags is flexible. For example, a stable genre control string might look like: "inspiring female uplifting pop airy vocal electronic bright vocal vocal."
+                        4. Additionally, we have introduced the "Mandarin" and "Cantonese" tags to distinguish between Mandarin and Cantonese, as their lyrics often share similarities.
+                        """)
+                genre_txt = gr.Textbox(
+                    label="Genre",
+                    placeholder="Example: inspiring female uplifting pop airy vocal...",
+                    info="Text containing genre tags that describe the musical style or characteristics (e.g., instrumental, genre, mood, vocal timbre, vocal gender). This is used as part of the generation prompt."
+                )
+                lyrics_txt = gr.Textbox(
+                    label="Lyrics", lines=12,
+                    placeholder="Type the lyrics here...",
+                    info="Text containing the lyrics for the music generation. These lyrics will be processed and split into structured segments to guide the generation process."
+                )
             with gr.Column():
+                num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
+                max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="3000", step=500, value=1500, interactive=True)
                 submit_btn = gr.Button("Submit")
                 music_out = gr.Audio(label="Audio Result")