Spaces:

LR36
/

TexttoMusic

Runtime error

App Files Files Community

LR36 commited on Mar 20, 2025

Commit

77f602d

verified ·

1 Parent(s): dcd888f

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -82

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from audiocraft.models import MusicGen
-import streamlit as st
 import os
 import torch
 import torchaudio
@@ -7,101 +7,70 @@ import numpy as np
 import base64
 from dotenv import load_dotenv
 import google.generativeai as genai
-load_dotenv()
 genai.configure(api_key=os.getenv("API_KEY"))
 llm = genai.GenerativeModel("gemini-pro")
-@st.cache_resource
 def load_model():
     model = MusicGen.get_pretrained("facebook/musicgen-small")
     return model
-def generate_music_tensors(description, duration:int):
-    print(f"Description: {description}")
-    print(f"Duration: {duration}")
-    model = load_model()
-    model.set_generation_params(
-        use_sampling=True,
-        top_k=250,
-        duration=duration
-    )
-    output = model.generate(
-        descriptions=[description],
-        progress=True,
-        return_tokens=True
-    )
-    return output[0]
-def save_audio(samples: torch.Tensor):
     sample_rate = 32000
-    save_path = "saved_audio/"
-    assert samples.dim() == 2 or samples.dim() == 3
     samples = samples.detach().cpu()
     if samples.dim() == 2:
         samples = samples[None, ...]
-    for idx, audio in enumerate(samples):
-        audio_path = os.path.join(save_path, f"audio_{idx}.wav")
-        torchaudio.save(audio_path, audio, sample_rate)
-def download_music(bin_file, file_label="File"):
-    with open(bin_file, 'rb') as f:
-        data = f.read()
-    bin_str = base64.b64encode(data).decode()
-    href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}">Download {file_label}</a>'
-    return href
-st.set_page_config(
-    page_icon=":musical_note:",
-    page_title="MusicGen"
-)
-def main():
-    st.title("Text to Music Generation")
-    with st.expander("View Details..."):
-        st.write("This was built by https://github.com/ishan-kshirsagar0-7 using Meta's Audiocraft library. Enter the description of the music you want to generate, and set the duration with the slider given below. The longer the duration slider, the longer it will take to generate the music.")
-    text_area = st.text_area("Enter your description...")
-    time_slider = st.slider("Select time duration (in seconds)", 2, 20, 5)
-    context = f"""Given the basic description of a prompt for a text-to-music generator below, enhance that prompt by using specific, direct, accurate and relevant vocabulary. This enhanced prompt must clearly assert and describe the kind of music user wants to generate, with the help of appropriate musical terminology or taxonomy. Craft a creative prompt that clearly explains the text-to-music model what music the user desires. DO NOT respond with anything other than the output prompt. You can be as creative as you like with the descriptions, but DO NOT make up details that the original prompt did not ask for. Also, make sure the description is not too lengthy, keep it concise. Your prompt must explain the flow of the music from start through the middle towards the finish, explicitly mentioning the way instruments are played and what they should sound like.
-    ORIGINAL PROMPT : {text_area}
-    YOUR OUTPUT PROMPT :
-    """
-    llm_result = llm.generate_content(context)
-    prompt = llm_result.text
-    print("")
-    if text_area and time_slider:
-        st.json(
-            {
-                "Description": prompt,
-                "Duration": time_slider
-            }
-        )
-        st.subheader("Generated Music")
-        music_tensors = generate_music_tensors(prompt, time_slider)
-        print(f"Music Tensors: {music_tensors}")
-        save_music_file = save_audio(music_tensors)
-        audio_filepath = "saved_audio/audio_0.wav"
-        audio_file = open(audio_filepath, 'rb')
-        audio_bytes = audio_file.read()
-        st.audio(audio_bytes)
-        st.markdown(download_music(audio_filepath, 'Audio'), unsafe_allow_html=True)
-if __name__ == '__main__':
-    main()

 from audiocraft.models import MusicGen
+import gradio as gr
 import os
 import torch
 import torchaudio
 import base64
 from dotenv import load_dotenv
 import google.generativeai as genai
+# Load API key from environment
+load_dotenv()
 genai.configure(api_key=os.getenv("API_KEY"))
 llm = genai.GenerativeModel("gemini-pro")
+# Load MusicGen Model
 def load_model():
     model = MusicGen.get_pretrained("facebook/musicgen-small")
     return model
+model = load_model()
+# Function to generate music
+def generate_music(description, duration):
+    context = f"""Enhance the following music prompt by adding relevant musical terms, structure, and flow.
+    Ensure it's concise but descriptive:
+    ORIGINAL PROMPT: {description}
+    YOUR OUTPUT PROMPT:"""
+    llm_result = llm.generate_content(context)
+    enhanced_prompt = llm_result.text.strip()
+    model.set_generation_params(use_sampling=True, top_k=250, duration=duration)
+    output = model.generate(descriptions=[enhanced_prompt], progress=True, return_tokens=True)
+    return output[0], enhanced_prompt
+# Save and return music file path
+def save_audio(samples):
     sample_rate = 32000
+    save_path = "generated_audio.wav"
     samples = samples.detach().cpu()
     if samples.dim() == 2:
         samples = samples[None, ...]
+    torchaudio.save(save_path, samples[0], sample_rate)
+    return save_path
+# Function to integrate with Gradio
+def generate_music_and_return(description, duration):
+    music_tensors, enhanced_prompt = generate_music(description, duration)
+    audio_file_path = save_audio(music_tensors)
+    return enhanced_prompt, audio_file_path
+# Gradio UI
+with gr.Blocks() as app:
+    gr.Markdown("# 🎵 Text-to-Music Generator")
+    gr.Markdown("Enter a music description, and our AI will generate a unique audio clip.")
+    with gr.Row():
+        description_input = gr.Textbox(label="Enter music description")
+        duration_input = gr.Slider(2, 20, value=5, step=1, label="Select duration (seconds)")
+    generate_button = gr.Button("🎼 Generate Music")
+    enhanced_description_output = gr.Textbox(label="Enhanced Description", interactive=False)
+    audio_output = gr.Audio(label="Generated Audio")
+    generate_button.click(
+        generate_music_and_return,
+        inputs=[description_input, duration_input],
+        outputs=[enhanced_description_output, audio_output]
+    )
+app.launch()