kmaes commited on
Commit
5838ea5
·
verified ·
1 Parent(s): f9d7844

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -109
app.py CHANGED
@@ -1,12 +1,9 @@
1
  """
2
  VR Music Generator - HuggingFace Spaces Version
3
  Generates music from text descriptions using the text2midi AI model.
4
- Exposes a Gradio API for Unity integration.
5
- Audio is streamed directly - no files are persisted.
6
  """
7
  import gradio as gr
8
  import torch
9
- import torch.nn as nn
10
  import subprocess
11
  import os
12
  import sys
@@ -94,38 +91,30 @@ def generate_midi_with_model(prompt: str, output_path: str, max_len: int = 512,
94
  """Generate MIDI using the text2midi model."""
95
  global text2midi_model, midi_tokenizer, text_tokenizer
96
 
97
- # Tokenize input text
98
  inputs = text_tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
99
  input_ids = inputs.input_ids.to(device)
100
  attention_mask = inputs.attention_mask.to(device)
101
 
102
- # Generate MIDI tokens
103
  with torch.no_grad():
104
  output = text2midi_model.generate(input_ids, attention_mask, max_len=max_len, temperature=temperature)
105
 
106
  output_list = output[0].tolist()
107
-
108
- # Decode to MIDI
109
  generated_midi = midi_tokenizer.decode(output_list)
110
  generated_midi.dump_midi(output_path)
111
-
112
  return output_path
113
 
114
  def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool:
115
- """
116
- Convert MIDI to WAV using FluidSynth.
117
- Returns True if successful, False otherwise.
118
- """
119
  if not SOUNDFONT_PATH:
120
  return False
121
 
122
  result = subprocess.run([
123
  "fluidsynth",
124
- "-ni", # No interactive mode
125
- "-F", wav_path, # Output file
126
- "-r", str(sample_rate), # Sample rate
127
- SOUNDFONT_PATH, # SoundFont file
128
- midi_path, # MIDI file
129
  ], capture_output=True, text=True, timeout=120)
130
 
131
  if result.returncode != 0:
@@ -134,24 +123,13 @@ def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool
134
 
135
  return os.path.exists(wav_path)
136
 
137
- def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9):
138
- """
139
- Generate music from text prompt.
140
- Returns audio file path for Gradio to serve.
141
-
142
- Args:
143
- prompt: Text description of the music to generate
144
- max_length: Maximum length in tokens (256-2048)
145
- temperature: Generation temperature (0.1-1.5)
146
-
147
- Returns:
148
- Tuple of (audio_filepath, status_message)
149
- """
150
  if not prompt or not prompt.strip():
151
- return None, "Please enter a music prompt"
152
 
153
  try:
154
- # Create temporary files - Gradio will handle cleanup of the WAV
155
  midi_file = tempfile.NamedTemporaryFile(suffix='.mid', delete=False)
156
  midi_path = midi_file.name
157
  midi_file.close()
@@ -161,13 +139,10 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
161
  wav_file.close()
162
 
163
  try:
164
- # Generate MIDI using the model or fallback
165
  if MODEL_LOADED:
166
- status_prefix = "AI-generated"
167
- generate_midi_with_model(prompt, midi_path, max_len=int(max_length), temperature=temperature)
168
  else:
169
- status_prefix = "Simple"
170
- # Fallback: create simple MIDI
171
  from midiutil import MIDIFile
172
  midi = MIDIFile(1)
173
  midi.addTempo(0, 0, 120)
@@ -177,18 +152,13 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
177
  with open(midi_path, "wb") as f:
178
  midi.writeFile(f)
179
 
180
- # Convert MIDI to WAV
181
- if SOUNDFONT_PATH:
182
- if midi_to_wav(midi_path, wav_path):
183
- status_msg = f"{status_prefix} audio for: '{prompt[:50]}...'" if len(prompt) > 50 else f"{status_prefix} audio for: '{prompt}'"
184
- return wav_path, status_msg
185
- else:
186
- return None, "Error: FluidSynth conversion failed"
187
  else:
188
- return None, "Error: FluidSynth/SoundFont not available"
189
 
190
  finally:
191
- # Clean up MIDI file (WAV is kept for Gradio to serve)
192
  try:
193
  os.unlink(midi_path)
194
  except:
@@ -197,67 +167,27 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
197
  except Exception as e:
198
  import traceback
199
  traceback.print_exc()
200
- return None, f"Error: {str(e)}"
201
-
202
- # Create Gradio interface with API enabled
203
- with gr.Blocks(title="VR Music Generator") as demo:
204
- gr.Markdown("# VR Game Music Generator")
205
- gr.Markdown("Generate music from text descriptions using the text2midi AI model")
206
-
207
- if not MODEL_LOADED:
208
- gr.Markdown("**Warning:** AI model not loaded - using simple placeholder MIDI")
209
- if not SOUNDFONT_PATH:
210
- gr.Markdown("**Note:** FluidSynth not configured - audio generation disabled")
211
-
212
- with gr.Row():
213
- with gr.Column():
214
- prompt_input = gr.Textbox(
215
- label="Music Prompt",
216
- placeholder="A cheerful pop song with piano and drums in C major at 120 BPM",
217
- lines=3
218
- )
219
- with gr.Row():
220
- max_length = gr.Slider(
221
- minimum=256,
222
- maximum=2048,
223
- value=512,
224
- step=256,
225
- label="Max Length (tokens)"
226
- )
227
- temperature = gr.Slider(
228
- minimum=0.1,
229
- maximum=1.5,
230
- value=0.9,
231
- step=0.1,
232
- label="Temperature"
233
- )
234
- generate_btn = gr.Button("Generate Music", variant="primary")
235
-
236
- with gr.Column():
237
- audio_output = gr.Audio(label="Generated Music", type="filepath")
238
- status_output = gr.Textbox(label="Status", lines=2)
239
-
240
- generate_btn.click(
241
- fn=generate_music,
242
- inputs=[prompt_input, max_length, temperature],
243
- outputs=[audio_output, status_output]
244
- )
245
-
246
- gr.Markdown("---")
247
- gr.Markdown("""
248
- **Example prompts:**
249
- - A cheerful and melodic pop Christmas song featuring piano, acoustic guitar, and drums
250
- - An energetic electronic trance track with synth bass and drums at 138 BPM
251
- - A slow and emotional classical piece featuring cello and violin in C minor
252
- - A cinematic electronic soundtrack with an epic and dark atmosphere
253
-
254
- **API Usage (for Unity):**
255
- ```csharp
256
- // POST to: https://YOUR-SPACE.hf.space/api/generate
257
- // Body: {"data": ["your music prompt", 512, 0.9]}
258
- // Response: {"data": [{"path": "audio_url", ...}, "status"]}
259
- ```
260
- """)
261
-
262
- # Launch the app
263
  demo.launch()
 
1
  """
2
  VR Music Generator - HuggingFace Spaces Version
3
  Generates music from text descriptions using the text2midi AI model.
 
 
4
  """
5
  import gradio as gr
6
  import torch
 
7
  import subprocess
8
  import os
9
  import sys
 
91
  """Generate MIDI using the text2midi model."""
92
  global text2midi_model, midi_tokenizer, text_tokenizer
93
 
 
94
  inputs = text_tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
95
  input_ids = inputs.input_ids.to(device)
96
  attention_mask = inputs.attention_mask.to(device)
97
 
 
98
  with torch.no_grad():
99
  output = text2midi_model.generate(input_ids, attention_mask, max_len=max_len, temperature=temperature)
100
 
101
  output_list = output[0].tolist()
 
 
102
  generated_midi = midi_tokenizer.decode(output_list)
103
  generated_midi.dump_midi(output_path)
 
104
  return output_path
105
 
106
  def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool:
107
+ """Convert MIDI to WAV using FluidSynth."""
 
 
 
108
  if not SOUNDFONT_PATH:
109
  return False
110
 
111
  result = subprocess.run([
112
  "fluidsynth",
113
+ "-ni",
114
+ "-F", wav_path,
115
+ "-r", str(sample_rate),
116
+ SOUNDFONT_PATH,
117
+ midi_path,
118
  ], capture_output=True, text=True, timeout=120)
119
 
120
  if result.returncode != 0:
 
123
 
124
  return os.path.exists(wav_path)
125
 
126
+ def generate_music(prompt: str):
127
+ """Generate music from text prompt. Returns audio file path."""
 
 
 
 
 
 
 
 
 
 
 
128
  if not prompt or not prompt.strip():
129
+ return None
130
 
131
  try:
132
+ # Create temporary files
133
  midi_file = tempfile.NamedTemporaryFile(suffix='.mid', delete=False)
134
  midi_path = midi_file.name
135
  midi_file.close()
 
139
  wav_file.close()
140
 
141
  try:
142
+ # Generate MIDI
143
  if MODEL_LOADED:
144
+ generate_midi_with_model(prompt, midi_path, max_len=512, temperature=0.9)
 
145
  else:
 
 
146
  from midiutil import MIDIFile
147
  midi = MIDIFile(1)
148
  midi.addTempo(0, 0, 120)
 
152
  with open(midi_path, "wb") as f:
153
  midi.writeFile(f)
154
 
155
+ # Convert to WAV
156
+ if SOUNDFONT_PATH and midi_to_wav(midi_path, wav_path):
157
+ return wav_path
 
 
 
 
158
  else:
159
+ return None
160
 
161
  finally:
 
162
  try:
163
  os.unlink(midi_path)
164
  except:
 
167
  except Exception as e:
168
  import traceback
169
  traceback.print_exc()
170
+ return None
171
+
172
+ # Create simple Gradio Interface (avoids schema generation bugs in gr.Blocks)
173
+ demo = gr.Interface(
174
+ fn=generate_music,
175
+ inputs=gr.Textbox(
176
+ label="Music Prompt",
177
+ placeholder="A cheerful pop song with piano and drums in C major",
178
+ lines=2
179
+ ),
180
+ outputs=gr.Audio(label="Generated Music", type="filepath"),
181
+ title="VR Game Music Generator",
182
+ description="Generate music from text descriptions using AI. Enter a prompt describing the music you want.",
183
+ examples=[
184
+ ["A cheerful pop song with piano and drums"],
185
+ ["An energetic electronic trance track at 138 BPM"],
186
+ ["A slow emotional classical piece with violin"],
187
+ ["Epic cinematic soundtrack with dark atmosphere"],
188
+ ],
189
+ allow_flagging="never"
190
+ )
191
+
192
+ # Launch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  demo.launch()