import gradio as gr import torch import random import numpy as np from diffrhythm.infer.infer_utils import ( prepare_model, get_lrc_token, get_audio_style_prompt, get_text_style_prompt, get_negative_style_prompt, get_reference_latent ) from diffrhythm.infer.infer import inference # FORCE CPU FOR FREE TIER device = "cpu" print("Running in CPU mode (Free tier safe)") MAX_SEED = np.iinfo(np.int32).max cfm, tokenizer, muq, vae, eval_model, eval_muq = prepare_model( max_frames=2048, device=device ) def generate_music(lrc_text): torch.manual_seed(0) lrc_prompt, start_time, end_frame, song_duration = get_lrc_token( 2048, lrc_text, tokenizer, 95, device ) style_prompt = get_text_style_prompt(muq, "emotional piano") negative_style_prompt = get_negative_style_prompt(device) latent_prompt, pred_frames = get_reference_latent( device, 2048, False, None, None, vae ) song = inference( cfm_model=cfm, vae_model=vae, eval_model=eval_model, eval_muq=eval_muq, cond=latent_prompt, text=lrc_prompt, duration=end_frame, style_prompt=style_prompt, negative_style_prompt=negative_style_prompt, steps=10, cfg_strength=3.0, sway_sampling_coef=None, start_time=start_time, file_type="mp3", vocal_flag=False, odeint_method="euler", pred_frames=pred_frames, batch_infer_num=1, song_duration=song_duration ) return song demo = gr.Interface( fn=generate_music, inputs=gr.Textbox(lines=10, label="LRC Lyrics"), outputs=gr.Audio(type="filepath") ) if __name__ == "__main__": demo.launch()