DiffRhythm / app.py
Coiland's picture
Update app.py
96b2a8c verified
import gradio as gr
import torch
import random
import numpy as np
from diffrhythm.infer.infer_utils import (
prepare_model,
get_lrc_token,
get_audio_style_prompt,
get_text_style_prompt,
get_negative_style_prompt,
get_reference_latent
)
from diffrhythm.infer.infer import inference
# FORCE CPU FOR FREE TIER
device = "cpu"
print("Running in CPU mode (Free tier safe)")
MAX_SEED = np.iinfo(np.int32).max
cfm, tokenizer, muq, vae, eval_model, eval_muq = prepare_model(
max_frames=2048,
device=device
)
def generate_music(lrc_text):
torch.manual_seed(0)
lrc_prompt, start_time, end_frame, song_duration = get_lrc_token(
2048, lrc_text, tokenizer, 95, device
)
style_prompt = get_text_style_prompt(muq, "emotional piano")
negative_style_prompt = get_negative_style_prompt(device)
latent_prompt, pred_frames = get_reference_latent(
device, 2048, False, None, None, vae
)
song = inference(
cfm_model=cfm,
vae_model=vae,
eval_model=eval_model,
eval_muq=eval_muq,
cond=latent_prompt,
text=lrc_prompt,
duration=end_frame,
style_prompt=style_prompt,
negative_style_prompt=negative_style_prompt,
steps=10,
cfg_strength=3.0,
sway_sampling_coef=None,
start_time=start_time,
file_type="mp3",
vocal_flag=False,
odeint_method="euler",
pred_frames=pred_frames,
batch_infer_num=1,
song_duration=song_duration
)
return song
demo = gr.Interface(
fn=generate_music,
inputs=gr.Textbox(lines=10, label="LRC Lyrics"),
outputs=gr.Audio(type="filepath")
)
if __name__ == "__main__":
demo.launch()