RohitCSharp commited on
Commit
244dbca
Β·
verified Β·
1 Parent(s): 99711d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -24
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # MusicGen + Gradio + GPT Demo App (Optimized for Hugging Face Spaces)
2
 
3
  import gradio as gr
4
  import os
@@ -8,15 +8,18 @@ from transformers import AutoProcessor, MusicgenForConditionalGeneration
8
  from openai import OpenAI
9
  import scipy.io.wavfile
10
 
11
- # Load MusicGen model
 
 
 
12
  model_name = "facebook/musicgen-small"
13
- model = MusicgenForConditionalGeneration.from_pretrained(model_name)
14
  processor = AutoProcessor.from_pretrained(model_name)
15
 
16
- # Initialize OpenAI client with Hugging Face environment variable
17
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
18
 
19
- # Function to enhance user input prompt
20
  def refine_prompt(user_input):
21
  completion = client.chat.completions.create(
22
  model="gpt-4",
@@ -27,10 +30,11 @@ def refine_prompt(user_input):
27
  )
28
  return completion.choices[0].message.content.strip()
29
 
30
- # Function to generate music
31
- def generate_music(prompt):
32
- inputs = processor(text=[prompt], return_tensors="pt")
33
- audio_values = model.generate(**inputs, max_new_tokens=256)
 
34
  sampling_rate = model.config.audio_encoder.sampling_rate
35
  audio = audio_values[0].cpu().numpy()
36
 
@@ -38,7 +42,7 @@ def generate_music(prompt):
38
  audio = audio / np.max(np.abs(audio))
39
  audio = audio.astype(np.float32)
40
 
41
- # Save as .wav file for optional download (not used in UI here)
42
  int_audio = (audio * 32767).astype(np.int16)
43
  scipy.io.wavfile.write("/tmp/output.wav", sampling_rate, int_audio)
44
 
@@ -50,21 +54,24 @@ def main(user_input):
50
  sampling_rate, audio = generate_music(detailed_prompt)
51
  return detailed_prompt, (sampling_rate, audio)
52
 
53
- # Gradio interface
54
- def build_ui():
55
- with gr.Blocks() as demo:
56
- gr.Markdown("""# 🎡 AI Music Generator\nEnter a music idea or mood and get a short AI-generated track.""")
57
-
58
- user_input = gr.Textbox(label="Describe the mood or style of music")
59
- generate_btn = gr.Button("Generate Music")
60
 
61
- refined_output = gr.Textbox(label="Enhanced Prompt by GPT")
62
- audio_output = gr.Audio(label="Generated Audio", type="numpy")
 
63
 
64
- generate_btn.click(main, inputs=user_input, outputs=[refined_output, audio_output])
 
 
65
 
66
- return demo
 
 
 
 
67
 
68
- # Launch app in SSR mode (better for Spaces)
69
- demo = build_ui()
70
- demo.launch()
 
1
+ # MusicGen + Gradio + GPT Demo App (CPU-Optimized for Hugging Face Spaces)
2
 
3
  import gradio as gr
4
  import os
 
8
  from openai import OpenAI
9
  import scipy.io.wavfile
10
 
11
+ # Force CPU device (no GPU required)
12
+ device = torch.device("cpu")
13
+
14
+ # Load MusicGen model onto CPU
15
  model_name = "facebook/musicgen-small"
16
+ model = MusicgenForConditionalGeneration.from_pretrained(model_name).to(device)
17
  processor = AutoProcessor.from_pretrained(model_name)
18
 
19
+ # Initialize OpenAI client (set OPENAI_API_KEY in HF Spaces Secrets)
20
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
21
 
22
+ # Refine user prompt via GPT
23
  def refine_prompt(user_input):
24
  completion = client.chat.completions.create(
25
  model="gpt-4",
 
30
  )
31
  return completion.choices[0].message.content.strip()
32
 
33
+ # Generate music (shorter tokens for CPU speed)
34
+ def generate_music(prompt, max_new_tokens: int = 128):
35
+ inputs = processor(text=[prompt], return_tensors="pt").to(device)
36
+ # Warning: Generation on CPU may be slow
37
+ audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
38
  sampling_rate = model.config.audio_encoder.sampling_rate
39
  audio = audio_values[0].cpu().numpy()
40
 
 
42
  audio = audio / np.max(np.abs(audio))
43
  audio = audio.astype(np.float32)
44
 
45
+ # Save as .wav file (in /tmp for Spaces)
46
  int_audio = (audio * 32767).astype(np.int16)
47
  scipy.io.wavfile.write("/tmp/output.wav", sampling_rate, int_audio)
48
 
 
54
  sampling_rate, audio = generate_music(detailed_prompt)
55
  return detailed_prompt, (sampling_rate, audio)
56
 
57
+ # Build Gradio UI
58
+ with gr.Blocks() as demo:
59
+ gr.Markdown("""# 🎡 AI Music Generator
60
+ Enter a music idea or mood and get a short AI-generated track. (CPU mode)""")
 
 
 
61
 
62
+ user_input = gr.Textbox(label="Describe the mood or style of music")
63
+ max_tokens = gr.Slider(32, 256, value=128, step=32, label="Length (tokens) for CPU")
64
+ generate_btn = gr.Button("Generate Music")
65
 
66
+ refined_output = gr.Textbox(label="Enhanced Prompt by GPT")
67
+ audio_output = gr.Audio(label="Generated Audio", type="numpy")
68
+ download_wav = gr.File(label="Download .wav file", file_name="generated.wav")
69
 
70
+ generate_btn.click(
71
+ lambda inp, tok: (main(inp)[0], (main(inp)[1][0], main(inp)[1][1]), "/tmp/output.wav"),
72
+ inputs=[user_input, max_tokens],
73
+ outputs=[refined_output, audio_output, download_wav]
74
+ )
75
 
76
+ # Launch in SSR mode
77
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False, enable_queue=True)