Spaces:

Carter-123
/

anycoder-6b2d8e1d

Running

App Files Files Community

Carter-123 commited on 22 days ago

Commit

96fc729

verified ·

1 Parent(s): 4d712b3

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

index.html +228 -19

index.html CHANGED Viewed

@@ -1,19 +1,228 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>ComfyUI Workflow</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Text', sans-serif;
+            background-color: #000000;
+            color: #f5f5f7;
+            padding: 40px;
+        }
+        pre {
+            background: #1d1d1f;
+            padding: 24px;
+            border-radius: 12px;
+            overflow-x: auto;
+        }
+    </style>
+</head>
+<body>
+    <h1>ComfyUI Workflow</h1>
+    <p>Error: Invalid JSON format</p>
+    <pre>#!/usr/bin/env python3
+"""
+Text-to-Music Gradio Demo using Riffusion
+Generates music from text prompts via spectrogram diffusion.
+"""
+import gradio as gr
+import torch
+from diffusers import StableDiffusionPipeline
+import numpy as np
+import io
+import os
+from riffusion.spectrogram_image_converter import SpectrogramImageConverter
+from riffusion.audio_utils import audio_buffer_to_wav, normalize_audio
+# Global model cache
+_pipe = None
+_converter = None
+def get_pipeline():
+    """Lazy load the Riffusion pipeline."""
+    global _pipe
+    if _pipe is None:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Loading Riffusion model on {device}...")
+        _pipe = StableDiffusionPipeline.from_pretrained(
+            "riffusion/riffusion-model-v1",
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        )
+        _pipe = _pipe.to(device)
+        _pipe.enable_attention_slicing()
+        print("Model loaded!")
+    return _pipe
+def get_converter():
+    """Lazy load the spectrogram converter."""
+    global _converter
+    if _converter is None:
+        _converter = SpectrogramImageConverter()
+    return _converter
+def generate_music(prompt: str, duration: float, bpm: float, seed: int = None):
+    """
+    Generate music from text prompt using Riffusion.
+    Args:
+        prompt: Text description of desired music
+        duration: Duration in seconds (clamped to model limits)
+        bpm: Beats per minute (affects spectrogram parameters)
+        seed: Random seed for reproducibility
+    Returns:
+        Tuple of (audio_path, spectrogram_path) for Gradio
+    """
+    # Clamp duration to reasonable range (Riffusion works best ~5-10s)
+    duration = max(2.0, min(duration, 10.0))
+    # Adjust prompt with BPM hint if provided
+    full_prompt = f"{prompt}, {int(bpm)} bpm" if bpm > 0 else prompt
+    pipe = get_pipeline()
+    converter = get_converter()
+    # Set seed for reproducibility
+    if seed is None or seed < 0:
+        seed = np.random.randint(0, 2**32)
+    generator = torch.Generator(device=pipe.device).manual_seed(seed)
+    print(f"Generating: '{full_prompt}' ({duration}s @ {bpm} BPM, seed={seed})")
+    # Generate spectrogram image
+    # Riffusion generates 512x512 spectrograms ~5 seconds of audio
+    image = pipe(
+        full_prompt,
+        num_inference_steps=50,
+        guidance_scale=7.5,
+        generator=generator,
+        height=512,
+        width=512,
+    ).images[0]
+    # Convert spectrogram to audio
+    audio = converter.spectrogram_to_audio(image, duration=duration)
+    audio = normalize_audio(audio)
+    # Save outputs
+    os.makedirs("outputs", exist_ok=True)
+    base_name = f"output_{seed % 10000:04d}"
+    audio_path = f"outputs/{base_name}.wav"
+    spec_path = f"outputs/{base_name}_spectrogram.png"
+    # Save audio
+    wav_buffer = audio_buffer_to_wav(audio, sample_rate=converter.sample_rate)
+    with open(audio_path, "wb") as f:
+        f.write(wav_buffer.getvalue())
+    # Save spectrogram for visualization
+    image.save(spec_path)
+    print(f"Saved: {audio_path}")
+    return audio_path, spec_path
+def create_interface():
+    """Create and configure the Gradio interface."""
+    with gr.Blocks(title="Text-to-Music with Riffusion") as demo:
+        gr.Markdown("""
+        # 🎵 Text-to-Music Generator
+        Generate music from text descriptions using **Riffusion** -
+        a Stable Diffusion model trained on spectrograms.
+        *Examples: "jazz piano solo", "upbeat electronic dance music",
+        "acoustic guitar folk melody", "dark ambient synth drone"*
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                prompt_input = gr.Textbox(
+                    label="Music Description",
+                    placeholder="Describe the music you want to hear...",
+                    value="smooth jazz saxophone solo, relaxing, nighttime",
+                    lines=2,
+                )
+                with gr.Row():
+                    duration_slider = gr.Slider(
+                        minimum=2.0,
+                        maximum=10.0,
+                        value=5.0,
+                        step=0.5,
+                        label="Duration (seconds)",
+                    )
+                    bpm_slider = gr.Slider(
+                        minimum=60,
+                        maximum=180,
+                        value=120,
+                        step=5,
+                        label="Tempo (BPM)",
+                    )
+                seed_input = gr.Number(
+                    label="Seed (-1 for random)",
+                    value=-1,
+                    precision=0,
+                )
+                generate_btn = gr.Button("🎹 Generate Music", variant="primary")
+            with gr.Column(scale=1):
+                audio_output = gr.Audio(
+                    label="Generated Music",
+                    type="filepath",
+                )
+                spec_output = gr.Image(
+                    label="Spectrogram Visualization",
+                    type="filepath",
+                )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["piano ballad, emotional, cinematic", 6.0, 70, -1],
+                ["funky bass guitar groove, 1970s style", 5.0, 110, -1],
+                ["ethereal ambient pads, space atmosphere", 8.0, 60, -1],
+                ["heavy metal guitar riff, aggressive", 4.0, 140, -1],
+                ["classical violin concerto, elegant", 7.0, 90, -1],
+            ],
+            inputs=[prompt_input, duration_slider, bpm_slider, seed_input],
+            outputs=[audio_output, spec_output],
+            fn=generate_music,
+            cache_examples=False,
+        )
+        gr.Markdown("""
+        ### How it works
+        1. Your text prompt is used to generate a **spectrogram image** via Stable Diffusion
+        2. The spectrogram is converted back to **audio waveforms** using the Short-Time Fourier Transform (STFT)
+        3. The resulting audio is normalized and returned as a playable WAV file
+        *Note: First generation will download the model (~1.5GB).*
+        """)
+        # Event handlers
+        generate_btn.click(
+            fn=generate_music,
+            inputs=[prompt_input, duration_slider, bpm_slider, seed_input],
+            outputs=[audio_output, spec_output],
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+    )</pre>
+</body>
+</html>