SonicDiffusion / app.py
alpercagann's picture
Update: audio-to-image GPU-ready Gradio app
d9df526
raw
history blame contribute delete
963 Bytes
import gradio as gr
import torch
from diffusers import StableDiffusionPipeline
from pydub import AudioSegment
# === Use GPU if available ===
device = "cuda" if torch.cuda.is_available() else "cpu"
# === Load model ===
model_id = "stabilityai/stable-diffusion-2-1"
pipe = StableDiffusionPipeline.from_pretrained(model_id)
pipe.to(device)
# === Define function ===
def generate_image(audio, prompt):
if audio is None:
return None
# Save audio temporarily
audio_path = "train.wav"
audio.export(audio_path, format="wav")
result = pipe(prompt, guidance_scale=7.5, num_inference_steps=30).images[0]
return result
interface = gr.Interface(
fn=generate_image,
inputs=[
gr.Audio(source="upload", type="pydub"),
gr.Textbox(label="Prompt", value="A surreal dreamscape made of music"),
],
outputs=gr.Image(type="pil"),
title="🎧 SonicDiffusion: Audio β†’ Image Generator"
)
interface.launch()