kmaes commited on
Commit
ad462a2
·
verified ·
1 Parent(s): 94e758a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -38
app.py CHANGED
@@ -12,9 +12,6 @@ import os
12
  import sys
13
  import pickle
14
  import tempfile
15
- import io
16
- import numpy as np
17
- from scipy.io import wavfile
18
  from huggingface_hub import hf_hub_download
19
 
20
  # Add text2midi model to path
@@ -114,45 +111,33 @@ def generate_midi_with_model(prompt: str, output_path: str, max_len: int = 512,
114
 
115
  return output_path
116
 
117
- def midi_to_audio_bytes(midi_path: str, sample_rate: int = 44100) -> tuple:
118
  """
119
- Convert MIDI to audio using FluidSynth, returning numpy array.
120
- Uses stdout piping to avoid creating intermediate files.
121
  """
122
  if not SOUNDFONT_PATH:
123
- return None
124
 
125
- # Use FluidSynth to render MIDI to raw audio via stdout
126
- # -T raw outputs raw audio, -F - outputs to stdout
127
  result = subprocess.run([
128
  "fluidsynth",
129
  "-ni", # No interactive mode
130
- "-T", "raw", # Output raw audio format
131
- "-F", "-", # Output to stdout
132
  "-r", str(sample_rate), # Sample rate
133
  SOUNDFONT_PATH, # SoundFont file
134
  midi_path, # MIDI file
135
- ], capture_output=True, timeout=120)
136
 
137
  if result.returncode != 0:
138
- print(f"FluidSynth error: {result.stderr.decode()}")
139
- return None
140
-
141
- # Convert raw audio bytes to numpy array (16-bit signed, stereo)
142
- audio_data = np.frombuffer(result.stdout, dtype=np.int16)
143
-
144
- # FluidSynth outputs stereo by default, reshape if needed
145
- if len(audio_data) > 0:
146
- # Convert to float32 normalized [-1, 1] for Gradio
147
- audio_float = audio_data.astype(np.float32) / 32768.0
148
- return (sample_rate, audio_float)
149
 
150
- return None
151
 
152
  def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9):
153
  """
154
  Generate music from text prompt.
155
- Returns audio data directly without saving files.
156
 
157
  Args:
158
  prompt: Text description of the music to generate
@@ -160,16 +145,20 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
160
  temperature: Generation temperature (0.1-1.5)
161
 
162
  Returns:
163
- Tuple of (audio_data, status_message)
164
- audio_data is (sample_rate, numpy_array) for Gradio
165
  """
166
  if not prompt or not prompt.strip():
167
  return None, "Please enter a music prompt"
168
 
169
  try:
170
- # Create temporary MIDI file (auto-deleted when closed)
171
- with tempfile.NamedTemporaryFile(suffix='.mid', delete=False) as midi_file:
172
- midi_path = midi_file.name
 
 
 
 
 
173
 
174
  try:
175
  # Generate MIDI using the model or fallback
@@ -188,18 +177,18 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
188
  with open(midi_path, "wb") as f:
189
  midi.writeFile(f)
190
 
191
- # Convert MIDI to audio
192
  if SOUNDFONT_PATH:
193
- audio_result = midi_to_audio_bytes(midi_path)
194
- if audio_result:
195
- return audio_result, f"{status_prefix} audio for: '{prompt[:50]}...'" if len(prompt) > 50 else f"{status_prefix} audio for: '{prompt}'"
196
  else:
197
- return None, f"Error: FluidSynth conversion failed"
198
  else:
199
- return None, f"Error: FluidSynth/SoundFont not available"
200
 
201
  finally:
202
- # Clean up temporary MIDI file
203
  try:
204
  os.unlink(midi_path)
205
  except:
@@ -245,7 +234,7 @@ with gr.Blocks(title="VR Music Generator") as demo:
245
  generate_btn = gr.Button("Generate Music", variant="primary")
246
 
247
  with gr.Column():
248
- audio_output = gr.Audio(label="Generated Music", type="numpy")
249
  status_output = gr.Textbox(label="Status", lines=2)
250
 
251
  generate_btn.click(
 
12
  import sys
13
  import pickle
14
  import tempfile
 
 
 
15
  from huggingface_hub import hf_hub_download
16
 
17
  # Add text2midi model to path
 
111
 
112
  return output_path
113
 
114
+ def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool:
115
  """
116
+ Convert MIDI to WAV using FluidSynth.
117
+ Returns True if successful, False otherwise.
118
  """
119
  if not SOUNDFONT_PATH:
120
+ return False
121
 
 
 
122
  result = subprocess.run([
123
  "fluidsynth",
124
  "-ni", # No interactive mode
125
+ "-F", wav_path, # Output file
 
126
  "-r", str(sample_rate), # Sample rate
127
  SOUNDFONT_PATH, # SoundFont file
128
  midi_path, # MIDI file
129
+ ], capture_output=True, text=True, timeout=120)
130
 
131
  if result.returncode != 0:
132
+ print(f"FluidSynth error: {result.stderr}")
133
+ return False
 
 
 
 
 
 
 
 
 
134
 
135
+ return os.path.exists(wav_path)
136
 
137
  def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9):
138
  """
139
  Generate music from text prompt.
140
+ Returns audio file path for Gradio to serve.
141
 
142
  Args:
143
  prompt: Text description of the music to generate
 
145
  temperature: Generation temperature (0.1-1.5)
146
 
147
  Returns:
148
+ Tuple of (audio_filepath, status_message)
 
149
  """
150
  if not prompt or not prompt.strip():
151
  return None, "Please enter a music prompt"
152
 
153
  try:
154
+ # Create temporary files - Gradio will handle cleanup of the WAV
155
+ midi_file = tempfile.NamedTemporaryFile(suffix='.mid', delete=False)
156
+ midi_path = midi_file.name
157
+ midi_file.close()
158
+
159
+ wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
160
+ wav_path = wav_file.name
161
+ wav_file.close()
162
 
163
  try:
164
  # Generate MIDI using the model or fallback
 
177
  with open(midi_path, "wb") as f:
178
  midi.writeFile(f)
179
 
180
+ # Convert MIDI to WAV
181
  if SOUNDFONT_PATH:
182
+ if midi_to_wav(midi_path, wav_path):
183
+ status_msg = f"{status_prefix} audio for: '{prompt[:50]}...'" if len(prompt) > 50 else f"{status_prefix} audio for: '{prompt}'"
184
+ return wav_path, status_msg
185
  else:
186
+ return None, "Error: FluidSynth conversion failed"
187
  else:
188
+ return None, "Error: FluidSynth/SoundFont not available"
189
 
190
  finally:
191
+ # Clean up MIDI file (WAV is kept for Gradio to serve)
192
  try:
193
  os.unlink(midi_path)
194
  except:
 
234
  generate_btn = gr.Button("Generate Music", variant="primary")
235
 
236
  with gr.Column():
237
+ audio_output = gr.Audio(label="Generated Music", type="filepath")
238
  status_output = gr.Textbox(label="Status", lines=2)
239
 
240
  generate_btn.click(