Spaces:
Sleeping
Sleeping
File size: 1,741 Bytes
07aaa94 96b2a8c 33facbc 96b2a8c 07aaa94 96b2a8c 07aaa94 96b2a8c 07aaa94 96b2a8c 33facbc 2a3c97e 96b2a8c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import gradio as gr
import torch
import random
import numpy as np
from diffrhythm.infer.infer_utils import (
prepare_model,
get_lrc_token,
get_audio_style_prompt,
get_text_style_prompt,
get_negative_style_prompt,
get_reference_latent
)
from diffrhythm.infer.infer import inference
# FORCE CPU FOR FREE TIER
device = "cpu"
print("Running in CPU mode (Free tier safe)")
MAX_SEED = np.iinfo(np.int32).max
cfm, tokenizer, muq, vae, eval_model, eval_muq = prepare_model(
max_frames=2048,
device=device
)
def generate_music(lrc_text):
torch.manual_seed(0)
lrc_prompt, start_time, end_frame, song_duration = get_lrc_token(
2048, lrc_text, tokenizer, 95, device
)
style_prompt = get_text_style_prompt(muq, "emotional piano")
negative_style_prompt = get_negative_style_prompt(device)
latent_prompt, pred_frames = get_reference_latent(
device, 2048, False, None, None, vae
)
song = inference(
cfm_model=cfm,
vae_model=vae,
eval_model=eval_model,
eval_muq=eval_muq,
cond=latent_prompt,
text=lrc_prompt,
duration=end_frame,
style_prompt=style_prompt,
negative_style_prompt=negative_style_prompt,
steps=10,
cfg_strength=3.0,
sway_sampling_coef=None,
start_time=start_time,
file_type="mp3",
vocal_flag=False,
odeint_method="euler",
pred_frames=pred_frames,
batch_infer_num=1,
song_duration=song_duration
)
return song
demo = gr.Interface(
fn=generate_music,
inputs=gr.Textbox(lines=10, label="LRC Lyrics"),
outputs=gr.Audio(type="filepath")
)
if __name__ == "__main__":
demo.launch() |