Spaces:

Carter-123
/

anycoder-a72297b2

Runtime error

App Files Files Community

Carter-123 commited on 14 days ago

Commit

8ac663f

verified ·

1 Parent(s): 098bb99

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +356 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,356 @@

+import gradio as gr
+import torch
+import torchaudio
+import numpy as np
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import os
+import tempfile
+from typing import Tuple, Optional
+# Global variables for model caching
+_model = None
+_tokenizer = None
+def load_model():
+    """Lazy load the SongGen model"""
+    global _model, _tokenizer
+    if _model is None or _tokenizer is None:
+        print("Loading SongGen model...")
+        # Using a smaller music generation model optimized for CPU
+        # SongGen or similar music generation model
+        model_name = "facebook/musicgen-small"
+        try:
+            from transformers import MusicgenForConditionalGeneration, AutoProcessor
+            _model = MusicgenForConditionalGeneration.from_pretrained(
+                model_name,
+                torch_dtype=torch.float32,  # Use float32 for CPU compatibility
+                low_cpu_mem_usage=True
+            )
+            _tokenizer = AutoProcessor.from_pretrained(model_name)
+            # Move to CPU
+            _model = _model.to("cpu")
+            _model.eval()
+            print("Model loaded successfully!")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            # Fallback to a simpler approach if model fails to load
+            raise gr.Error(f"Failed to load model: {str(e)}")
+    return _model, _tokenizer
+def generate_music(
+    prompt: str,
+    duration: float,
+    guidance_scale: float,
+    num_inference_steps: int,
+    temperature: float,
+    top_k: int,
+    top_p: float,
+    progress: gr.Progress = gr.Progress()
+) -> Tuple[Optional[str], str]:
+    """
+    Generate music based on text prompt using SongGen/MusicGen model.
+    Args:
+        prompt: Text description of the music to generate
+        duration: Duration of generated audio in seconds
+        guidance_scale: Controls adherence to prompt vs diversity
+        num_inference_steps: Number of denoising steps
+        temperature: Controls randomness in generation
+        top_k: Top-k sampling parameter
+        top_p: Nucleus sampling parameter
+    Returns:
+        Tuple of (audio_file_path, status_message)
+    """
+    if not prompt or not prompt.strip():
+        return None, "❌ Please enter a prompt describing the music you want to generate."
+    try:
+        progress(0.1, desc="Loading model...")
+        model, processor = load_model()
+        progress(0.2, desc="Preparing inputs...")
+        # Process the prompt
+        inputs = processor(
+            text=[prompt],
+            return_tensors="pt",
+            padding=True
+        )
+        # Calculate max length based on duration (50 tokens per second approximately)
+        max_length = min(int(duration * 50), 1500)  # Cap at reasonable length
+        progress(0.3, desc="Generating music...")
+        # Generate with progress tracking
+        def progress_callback(step, timestep, latents):
+            progress_val = 0.3 + (0.6 * step / num_inference_steps)
+            progress(progress_val, desc=f"Generating... step {step}/{num_inference_steps}")
+        # Generate audio
+        with torch.no_grad():
+            audio_values = model.generate(
+                **inputs,
+                max_new_tokens=max_length,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps,
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                do_sample=True,
+            )
+        progress(0.9, desc="Processing audio...")
+        # Convert to numpy and save
+        audio_np = audio_values[0, 0].cpu().numpy()
+        # Normalize audio to [-1, 1] range
+        audio_np = audio_np / np.max(np.abs(audio_np))
+        # Get sample rate from model config
+        sample_rate = model.config.audio_encoder.sampling_rate
+        # Save to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            output_path = tmp_file.name
+        # Save as wav file
+        torchaudio.save(
+            output_path,
+            torch.tensor(audio_np).unsqueeze(0),
+            sample_rate=sample_rate,
+            format="wav"
+        )
+        progress(1.0, desc="Complete!")
+        # Create success message with metadata
+        info_msg = f"""✅ Music generated successfully!
+🎵 **Prompt:** {prompt}
+⏱️ **Duration:** {len(audio_np) / sample_rate:.2f}s
+🎚️ **Sample Rate:** {sample_rate}Hz
+🔧 **Settings:** guidance={guidance_scale}, steps={num_inference_steps}, temp={temperature}"""
+        return output_path, info_msg
+    except Exception as e:
+        return None, f"❌ Error generating music: {str(e)}"
+def create_examples():
+    """Create example prompts for the UI"""
+    return [
+        ["Upbeat electronic dance music with a strong bass line and energetic synths", 8.0, 3.0, 50, 1.0, 250, 0.99],
+        ["Calm ambient piano music with soft strings, peaceful and relaxing", 10.0, 3.5, 50, 0.8, 250, 0.95],
+        ["Epic orchestral soundtrack with brass and percussion, cinematic and dramatic", 10.0, 4.0, 50, 1.0, 250, 0.99],
+        ["Lo-fi hip hop beats with jazzy chords, chill and study music", 8.0, 2.5, 50, 0.9, 250, 0.95],
+        ["Acoustic guitar folk melody, warm and nostalgic", 6.0, 3.0, 50, 0.85, 250, 0.95],
+        ["Cyberpunk synthwave with retro 80s vibes, driving and energetic", 8.0, 3.0, 50, 1.0, 250, 0.99],
+    ]
+# Custom theme for modern UI
+custom_theme = gr.themes.Soft(
+    primary_hue="indigo",
+    secondary_hue="violet",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Inter"),
+    text_size="lg",
+    spacing_size="lg",
+    radius_size="lg"
+).set(
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_700",
+    block_title_text_weight="600",
+    block_title_text_size="*text_lg",
+    block_background_fill="*neutral_50",
+    block_border_width="1px",
+    block_border_color="*neutral_200",
+)
+with gr.Blocks() as demo:
+    # Header with branding
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("""
+            # 🎵 SongGen Music Generator
+            Generate custom music from text descriptions using AI.
+            Powered by MusicGen - Meta's state-of-the-art music generation model.
+            [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
+            """)
+    with gr.Row():
+        # Left panel - Controls
+        with gr.Column(scale=1):
+            gr.Markdown("### 🎛️ Generation Settings")
+            prompt_input = gr.Textbox(
+                label="Music Description",
+                placeholder="Describe the music you want to generate...",
+                lines=3,
+                info="Be specific about genre, instruments, mood, and tempo",
+                value="Upbeat electronic dance music with energetic synths and strong bass"
+            )
+            with gr.Accordion("Advanced Settings", open=False):
+                duration_slider = gr.Slider(
+                    minimum=3,
+                    maximum=15,
+                    value=8,
+                    step=0.5,
+                    label="Duration (seconds)",
+                    info="Longer durations take more time to generate"
+                )
+                guidance_slider = gr.Slider(
+                    minimum=1.0,
+                    maximum=5.0,
+                    value=3.0,
+                    step=0.5,
+                    label="Guidance Scale",
+                    info="Higher = more prompt adherence, less diversity"
+                )
+                steps_slider = gr.Slider(
+                    minimum=10,
+                    maximum=100,
+                    value=50,
+                    step=5,
+                    label="Inference Steps",
+                    info="More steps = higher quality, slower generation"
+                )
+                temperature_slider = gr.Slider(
+                    minimum=0.5,
+                    maximum=1.5,
+                    value=1.0,
+                    step=0.05,
+                    label="Temperature",
+                    info="Higher = more random/creative"
+                )
+                topk_slider = gr.Slider(
+                    minimum=50,
+                    maximum=500,
+                    value=250,
+                    step=50,
+                    label="Top-K",
+                    info="Limits vocabulary for sampling"
+                )
+                topp_slider = gr.Slider(
+                    minimum=0.8,
+                    maximum=1.0,
+                    value=0.99,
+                    step=0.01,
+                    label="Top-P (Nucleus)",
+                    info="Cumulative probability threshold"
+                )
+            generate_btn = gr.Button(
+                "🎵 Generate Music",
+                variant="primary",
+                size="lg"
+            )
+            # Status and info
+            status_output = gr.Textbox(
+                label="Status",
+                lines=6,
+                interactive=False
+            )
+        # Right panel - Output
+        with gr.Column(scale=1):
+            gr.Markdown("### 🎧 Generated Music")
+            audio_output = gr.Audio(
+                label="Generated Audio",
+                type="filepath",
+                autoplay=False,
+                show_download_button=True,
+                waveform_options=gr.WaveformOptions(
+                    waveform_color="#4f46e5",
+                    waveform_progress_color="#7c3aed",
+                    show_recording_waveform=False
+                )
+            )
+            # Tips section
+            with gr.Accordion("💡 Tips for Better Results", open=True):
+                gr.Markdown("""
+                **Prompt Engineering Tips:**
+                1. **Be specific about genre:** "electronic", "classical", "jazz", "rock"
+                2. **Mention instruments:** "piano", "synthesizers", "drums", "strings"
+                3. **Describe the mood:** "upbeat", "melancholic", "energetic", "calm"
+                4. **Add tempo hints:** "fast tempo", "slow ballad", "medium groove"
+                5. **Use reference styles:** "like 80s synthwave", "cinematic soundtrack"
+                **Example prompts:**
+                - "Upbeat pop with catchy synth melody and electronic drums"
+                - "Sad piano ballad with emotional strings, slow tempo"
+                - "Heavy metal with distorted guitars and aggressive drums"
+                """)
+    # Examples section
+    gr.Markdown("### 🎯 Quick Examples")
+    examples = gr.Examples(
+        examples=create_examples(),
+        inputs=[prompt_input, duration_slider, guidance_slider, steps_slider,
+                temperature_slider, topk_slider, topp_slider],
+        label="Click to load example",
+        examples_per_page=3
+    )
+    # Footer
+    gr.Markdown("""
+    ---
+    Made with ❤️ using Gradio and MusicGen |
+    [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
+    """)
+    # Event handlers
+    generate_btn.click(
+        fn=generate_music,
+        inputs=[
+            prompt_input,
+            duration_slider,
+            guidance_slider,
+            steps_slider,
+            temperature_slider,
+            topk_slider,
+            topp_slider
+        ],
+        outputs=[audio_output, status_output],
+        api_visibility="public"
+    )
+# Launch with Gradio 6 syntax - all parameters in launch()
+demo.launch(
+    theme=custom_theme,
+    title="SongGen Music Generator",
+    footer_links=[
+        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+        {"label": "Gradio", "url": "https://gradio.app"},
+        {"label": "Settings", "url": "#"}
+    ],
+    show_error=True,
+    quiet=False
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=6.0.2
+numpy
+torch
+torchaudio
+transformers