audioldm_api / app.py
Johann Diedrick
returning audio
83ec4af
from diffusers import AudioLDMPipeline
import torch
import scipy
import gradio as gr
repo_id = "cvssp/audioldm-m-full"
pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
def my_inference_function(prompt):
user_prompt = prompt
#prompt = "a woman singing, dry, sample, one shot, a minor, mastered, legato, clear, dry, high quaility"
neg_prompt = "low quality, average quality, noisy, distortion, realistic"
seed = 51
steps = 5
guidance = 0.1
length = 10
g_cuda = torch.Generator(device='cuda')
g_cuda.manual_seed(seed)
audio = pipe(user_prompt,
num_inference_steps=steps,
audio_length_in_s=length,
negative_prompt=neg_prompt,
guidance_scale=guidance,
num_waveforms_per_prompt=1,
generator = g_cuda
).audios[0]
return gr.make_waveform((16000,audio))
#return scipy.io.wavfile.write(user_prompt+"_s"+str(steps)+"_g"+str(guidance)+"_"+str(length)+"sec_"+str(seed)+".wav", rate=16000, data=audio)
gradio_interface = gr.Interface(
fn = my_inference_function,
inputs = "text",
outputs = "audio",
)
gradio_interface.launch()