Spaces:

kmaes
/

TextToAudio

Sleeping

App Files Files Community

kmaes commited on Jan 19

Commit

ad462a2

verified ·

1 Parent(s): 94e758a

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -38

app.py CHANGED Viewed

@@ -12,9 +12,6 @@ import os
 import sys
 import pickle
 import tempfile
-import io
-import numpy as np
-from scipy.io import wavfile
 from huggingface_hub import hf_hub_download
 # Add text2midi model to path
@@ -114,45 +111,33 @@ def generate_midi_with_model(prompt: str, output_path: str, max_len: int = 512,
     return output_path
-def midi_to_audio_bytes(midi_path: str, sample_rate: int = 44100) -> tuple:
     """
-    Convert MIDI to audio using FluidSynth, returning numpy array.
-    Uses stdout piping to avoid creating intermediate files.
     """
     if not SOUNDFONT_PATH:
-        return None
-    # Use FluidSynth to render MIDI to raw audio via stdout
-    # -T raw outputs raw audio, -F - outputs to stdout
     result = subprocess.run([
         "fluidsynth",
         "-ni",                    # No interactive mode
-        "-T", "raw",              # Output raw audio format
-        "-F", "-",                # Output to stdout
         "-r", str(sample_rate),   # Sample rate
         SOUNDFONT_PATH,           # SoundFont file
         midi_path,                # MIDI file
-    ], capture_output=True, timeout=120)
     if result.returncode != 0:
-        print(f"FluidSynth error: {result.stderr.decode()}")
-        return None
-    # Convert raw audio bytes to numpy array (16-bit signed, stereo)
-    audio_data = np.frombuffer(result.stdout, dtype=np.int16)
-    # FluidSynth outputs stereo by default, reshape if needed
-    if len(audio_data) > 0:
-        # Convert to float32 normalized [-1, 1] for Gradio
-        audio_float = audio_data.astype(np.float32) / 32768.0
-        return (sample_rate, audio_float)
-    return None
 def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9):
     """
     Generate music from text prompt.
-    Returns audio data directly without saving files.
     Args:
         prompt: Text description of the music to generate
@@ -160,16 +145,20 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
         temperature: Generation temperature (0.1-1.5)
     Returns:
-        Tuple of (audio_data, status_message)
-        audio_data is (sample_rate, numpy_array) for Gradio
     """
     if not prompt or not prompt.strip():
         return None, "Please enter a music prompt"
     try:
-        # Create temporary MIDI file (auto-deleted when closed)
-        with tempfile.NamedTemporaryFile(suffix='.mid', delete=False) as midi_file:
-            midi_path = midi_file.name
         try:
             # Generate MIDI using the model or fallback
@@ -188,18 +177,18 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
                 with open(midi_path, "wb") as f:
                     midi.writeFile(f)
-            # Convert MIDI to audio
             if SOUNDFONT_PATH:
-                audio_result = midi_to_audio_bytes(midi_path)
-                if audio_result:
-                    return audio_result, f"{status_prefix} audio for: '{prompt[:50]}...'" if len(prompt) > 50 else f"{status_prefix} audio for: '{prompt}'"
                 else:
-                    return None, f"Error: FluidSynth conversion failed"
             else:
-                return None, f"Error: FluidSynth/SoundFont not available"
         finally:
-            # Clean up temporary MIDI file
             try:
                 os.unlink(midi_path)
             except:
@@ -245,7 +234,7 @@ with gr.Blocks(title="VR Music Generator") as demo:
             generate_btn = gr.Button("Generate Music", variant="primary")
         with gr.Column():
-            audio_output = gr.Audio(label="Generated Music", type="numpy")
             status_output = gr.Textbox(label="Status", lines=2)
     generate_btn.click(

 import sys
 import pickle
 import tempfile
 from huggingface_hub import hf_hub_download
 # Add text2midi model to path
     return output_path
+def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool:
     """
+    Convert MIDI to WAV using FluidSynth.
+    Returns True if successful, False otherwise.
     """
     if not SOUNDFONT_PATH:
+        return False
     result = subprocess.run([
         "fluidsynth",
         "-ni",                    # No interactive mode
+        "-F", wav_path,           # Output file
         "-r", str(sample_rate),   # Sample rate
         SOUNDFONT_PATH,           # SoundFont file
         midi_path,                # MIDI file
+    ], capture_output=True, text=True, timeout=120)
     if result.returncode != 0:
+        print(f"FluidSynth error: {result.stderr}")
+        return False
+    return os.path.exists(wav_path)
 def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9):
     """
     Generate music from text prompt.
+    Returns audio file path for Gradio to serve.
     Args:
         prompt: Text description of the music to generate
         temperature: Generation temperature (0.1-1.5)
     Returns:
+        Tuple of (audio_filepath, status_message)
     """
     if not prompt or not prompt.strip():
         return None, "Please enter a music prompt"
     try:
+        # Create temporary files - Gradio will handle cleanup of the WAV
+        midi_file = tempfile.NamedTemporaryFile(suffix='.mid', delete=False)
+        midi_path = midi_file.name
+        midi_file.close()
+        wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+        wav_path = wav_file.name
+        wav_file.close()
         try:
             # Generate MIDI using the model or fallback
                 with open(midi_path, "wb") as f:
                     midi.writeFile(f)
+            # Convert MIDI to WAV
             if SOUNDFONT_PATH:
+                if midi_to_wav(midi_path, wav_path):
+                    status_msg = f"{status_prefix} audio for: '{prompt[:50]}...'" if len(prompt) > 50 else f"{status_prefix} audio for: '{prompt}'"
+                    return wav_path, status_msg
                 else:
+                    return None, "Error: FluidSynth conversion failed"
             else:
+                return None, "Error: FluidSynth/SoundFont not available"
         finally:
+            # Clean up MIDI file (WAV is kept for Gradio to serve)
             try:
                 os.unlink(midi_path)
             except:
             generate_btn = gr.Button("Generate Music", variant="primary")
         with gr.Column():
+            audio_output = gr.Audio(label="Generated Music", type="filepath")
             status_output = gr.Textbox(label="Status", lines=2)
     generate_btn.click(