Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import random | |
| import numpy as np | |
| from diffrhythm.infer.infer_utils import ( | |
| prepare_model, | |
| get_lrc_token, | |
| get_audio_style_prompt, | |
| get_text_style_prompt, | |
| get_negative_style_prompt, | |
| get_reference_latent | |
| ) | |
| from diffrhythm.infer.infer import inference | |
| # FORCE CPU FOR FREE TIER | |
| device = "cpu" | |
| print("Running in CPU mode (Free tier safe)") | |
| MAX_SEED = np.iinfo(np.int32).max | |
| cfm, tokenizer, muq, vae, eval_model, eval_muq = prepare_model( | |
| max_frames=2048, | |
| device=device | |
| ) | |
| def generate_music(lrc_text): | |
| torch.manual_seed(0) | |
| lrc_prompt, start_time, end_frame, song_duration = get_lrc_token( | |
| 2048, lrc_text, tokenizer, 95, device | |
| ) | |
| style_prompt = get_text_style_prompt(muq, "emotional piano") | |
| negative_style_prompt = get_negative_style_prompt(device) | |
| latent_prompt, pred_frames = get_reference_latent( | |
| device, 2048, False, None, None, vae | |
| ) | |
| song = inference( | |
| cfm_model=cfm, | |
| vae_model=vae, | |
| eval_model=eval_model, | |
| eval_muq=eval_muq, | |
| cond=latent_prompt, | |
| text=lrc_prompt, | |
| duration=end_frame, | |
| style_prompt=style_prompt, | |
| negative_style_prompt=negative_style_prompt, | |
| steps=10, | |
| cfg_strength=3.0, | |
| sway_sampling_coef=None, | |
| start_time=start_time, | |
| file_type="mp3", | |
| vocal_flag=False, | |
| odeint_method="euler", | |
| pred_frames=pred_frames, | |
| batch_infer_num=1, | |
| song_duration=song_duration | |
| ) | |
| return song | |
| demo = gr.Interface( | |
| fn=generate_music, | |
| inputs=gr.Textbox(lines=10, label="LRC Lyrics"), | |
| outputs=gr.Audio(type="filepath") | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |