Spaces:

kmaes
/

TextToAudio

Sleeping

App Files Files Community

kmaes commited on Jan 19

Commit

5838ea5

verified ·

1 Parent(s): f9d7844

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -109

app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 """
 VR Music Generator - HuggingFace Spaces Version
 Generates music from text descriptions using the text2midi AI model.
-Exposes a Gradio API for Unity integration.
-Audio is streamed directly - no files are persisted.
 """
 import gradio as gr
 import torch
-import torch.nn as nn
 import subprocess
 import os
 import sys
@@ -94,38 +91,30 @@ def generate_midi_with_model(prompt: str, output_path: str, max_len: int = 512,
     """Generate MIDI using the text2midi model."""
     global text2midi_model, midi_tokenizer, text_tokenizer
-    # Tokenize input text
     inputs = text_tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
     input_ids = inputs.input_ids.to(device)
     attention_mask = inputs.attention_mask.to(device)
-    # Generate MIDI tokens
     with torch.no_grad():
         output = text2midi_model.generate(input_ids, attention_mask, max_len=max_len, temperature=temperature)
     output_list = output[0].tolist()
-    # Decode to MIDI
     generated_midi = midi_tokenizer.decode(output_list)
     generated_midi.dump_midi(output_path)
     return output_path
 def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool:
-    """
-    Convert MIDI to WAV using FluidSynth.
-    Returns True if successful, False otherwise.
-    """
     if not SOUNDFONT_PATH:
         return False
     result = subprocess.run([
         "fluidsynth",
-        "-ni",                    # No interactive mode
-        "-F", wav_path,           # Output file
-        "-r", str(sample_rate),   # Sample rate
-        SOUNDFONT_PATH,           # SoundFont file
-        midi_path,                # MIDI file
     ], capture_output=True, text=True, timeout=120)
     if result.returncode != 0:
@@ -134,24 +123,13 @@ def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool
     return os.path.exists(wav_path)
-def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9):
-    """
-    Generate music from text prompt.
-    Returns audio file path for Gradio to serve.
-    Args:
-        prompt: Text description of the music to generate
-        max_length: Maximum length in tokens (256-2048)
-        temperature: Generation temperature (0.1-1.5)
-    Returns:
-        Tuple of (audio_filepath, status_message)
-    """
     if not prompt or not prompt.strip():
-        return None, "Please enter a music prompt"
     try:
-        # Create temporary files - Gradio will handle cleanup of the WAV
         midi_file = tempfile.NamedTemporaryFile(suffix='.mid', delete=False)
         midi_path = midi_file.name
         midi_file.close()
@@ -161,13 +139,10 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
         wav_file.close()
         try:
-            # Generate MIDI using the model or fallback
             if MODEL_LOADED:
-                status_prefix = "AI-generated"
-                generate_midi_with_model(prompt, midi_path, max_len=int(max_length), temperature=temperature)
             else:
-                status_prefix = "Simple"
-                # Fallback: create simple MIDI
                 from midiutil import MIDIFile
                 midi = MIDIFile(1)
                 midi.addTempo(0, 0, 120)
@@ -177,18 +152,13 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
                 with open(midi_path, "wb") as f:
                     midi.writeFile(f)
-            # Convert MIDI to WAV
-            if SOUNDFONT_PATH:
-                if midi_to_wav(midi_path, wav_path):
-                    status_msg = f"{status_prefix} audio for: '{prompt[:50]}...'" if len(prompt) > 50 else f"{status_prefix} audio for: '{prompt}'"
-                    return wav_path, status_msg
-                else:
-                    return None, "Error: FluidSynth conversion failed"
             else:
-                return None, "Error: FluidSynth/SoundFont not available"
         finally:
-            # Clean up MIDI file (WAV is kept for Gradio to serve)
             try:
                 os.unlink(midi_path)
             except:
@@ -197,67 +167,27 @@ def generate_music(prompt: str, max_length: int = 512, temperature: float = 0.9)
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return None, f"Error: {str(e)}"
-# Create Gradio interface with API enabled
-with gr.Blocks(title="VR Music Generator") as demo:
-    gr.Markdown("# VR Game Music Generator")
-    gr.Markdown("Generate music from text descriptions using the text2midi AI model")
-    if not MODEL_LOADED:
-        gr.Markdown("**Warning:** AI model not loaded - using simple placeholder MIDI")
-    if not SOUNDFONT_PATH:
-        gr.Markdown("**Note:** FluidSynth not configured - audio generation disabled")
-    with gr.Row():
-        with gr.Column():
-            prompt_input = gr.Textbox(
-                label="Music Prompt",
-                placeholder="A cheerful pop song with piano and drums in C major at 120 BPM",
-                lines=3
-            )
-            with gr.Row():
-                max_length = gr.Slider(
-                    minimum=256,
-                    maximum=2048,
-                    value=512,
-                    step=256,
-                    label="Max Length (tokens)"
-                )
-                temperature = gr.Slider(
-                    minimum=0.1,
-                    maximum=1.5,
-                    value=0.9,
-                    step=0.1,
-                    label="Temperature"
-                )
-            generate_btn = gr.Button("Generate Music", variant="primary")
-        with gr.Column():
-            audio_output = gr.Audio(label="Generated Music", type="filepath")
-            status_output = gr.Textbox(label="Status", lines=2)
-    generate_btn.click(
-        fn=generate_music,
-        inputs=[prompt_input, max_length, temperature],
-        outputs=[audio_output, status_output]
-    )
-    gr.Markdown("---")
-    gr.Markdown("""
-**Example prompts:**
-- A cheerful and melodic pop Christmas song featuring piano, acoustic guitar, and drums
-- An energetic electronic trance track with synth bass and drums at 138 BPM
-- A slow and emotional classical piece featuring cello and violin in C minor
-- A cinematic electronic soundtrack with an epic and dark atmosphere
-**API Usage (for Unity):**
-```csharp
-// POST to: https://YOUR-SPACE.hf.space/api/generate
-// Body: {"data": ["your music prompt", 512, 0.9]}
-// Response: {"data": [{"path": "audio_url", ...}, "status"]}
-```
-    """)
-# Launch the app
 demo.launch()

 """
 VR Music Generator - HuggingFace Spaces Version
 Generates music from text descriptions using the text2midi AI model.
 """
 import gradio as gr
 import torch
 import subprocess
 import os
 import sys
     """Generate MIDI using the text2midi model."""
     global text2midi_model, midi_tokenizer, text_tokenizer
     inputs = text_tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
     input_ids = inputs.input_ids.to(device)
     attention_mask = inputs.attention_mask.to(device)
     with torch.no_grad():
         output = text2midi_model.generate(input_ids, attention_mask, max_len=max_len, temperature=temperature)
     output_list = output[0].tolist()
     generated_midi = midi_tokenizer.decode(output_list)
     generated_midi.dump_midi(output_path)
     return output_path
 def midi_to_wav(midi_path: str, wav_path: str, sample_rate: int = 44100) -> bool:
+    """Convert MIDI to WAV using FluidSynth."""
     if not SOUNDFONT_PATH:
         return False
     result = subprocess.run([
         "fluidsynth",
+        "-ni",
+        "-F", wav_path,
+        "-r", str(sample_rate),
+        SOUNDFONT_PATH,
+        midi_path,
     ], capture_output=True, text=True, timeout=120)
     if result.returncode != 0:
     return os.path.exists(wav_path)
+def generate_music(prompt: str):
+    """Generate music from text prompt. Returns audio file path."""
     if not prompt or not prompt.strip():
+        return None
     try:
+        # Create temporary files
         midi_file = tempfile.NamedTemporaryFile(suffix='.mid', delete=False)
         midi_path = midi_file.name
         midi_file.close()
         wav_file.close()
         try:
+            # Generate MIDI
             if MODEL_LOADED:
+                generate_midi_with_model(prompt, midi_path, max_len=512, temperature=0.9)
             else:
                 from midiutil import MIDIFile
                 midi = MIDIFile(1)
                 midi.addTempo(0, 0, 120)
                 with open(midi_path, "wb") as f:
                     midi.writeFile(f)
+            # Convert to WAV
+            if SOUNDFONT_PATH and midi_to_wav(midi_path, wav_path):
+                return wav_path
             else:
+                return None
         finally:
             try:
                 os.unlink(midi_path)
             except:
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return None
+# Create simple Gradio Interface (avoids schema generation bugs in gr.Blocks)
+demo = gr.Interface(
+    fn=generate_music,
+    inputs=gr.Textbox(
+        label="Music Prompt",
+        placeholder="A cheerful pop song with piano and drums in C major",
+        lines=2
+    ),
+    outputs=gr.Audio(label="Generated Music", type="filepath"),
+    title="VR Game Music Generator",
+    description="Generate music from text descriptions using AI. Enter a prompt describing the music you want.",
+    examples=[
+        ["A cheerful pop song with piano and drums"],
+        ["An energetic electronic trance track at 138 BPM"],
+        ["A slow emotional classical piece with violin"],
+        ["Epic cinematic soundtrack with dark atmosphere"],
+    ],
+    allow_flagging="never"
+)
+# Launch
 demo.launch()