import gradio as gr
import torch
import random
import numpy as np

from diffrhythm.infer.infer_utils import (
    prepare_model,
    get_lrc_token,
    get_audio_style_prompt,
    get_text_style_prompt,
    get_negative_style_prompt,
    get_reference_latent
)

from diffrhythm.infer.infer import inference


# FORCE CPU FOR FREE TIER
device = "cpu"
print("Running in CPU mode (Free tier safe)")

MAX_SEED = np.iinfo(np.int32).max

cfm, tokenizer, muq, vae, eval_model, eval_muq = prepare_model(
    max_frames=2048,
    device=device
)


def generate_music(lrc_text):
    torch.manual_seed(0)

    lrc_prompt, start_time, end_frame, song_duration = get_lrc_token(
        2048, lrc_text, tokenizer, 95, device
    )

    style_prompt = get_text_style_prompt(muq, "emotional piano")
    negative_style_prompt = get_negative_style_prompt(device)

    latent_prompt, pred_frames = get_reference_latent(
        device, 2048, False, None, None, vae
    )

    song = inference(
        cfm_model=cfm,
        vae_model=vae,
        eval_model=eval_model,
        eval_muq=eval_muq,
        cond=latent_prompt,
        text=lrc_prompt,
        duration=end_frame,
        style_prompt=style_prompt,
        negative_style_prompt=negative_style_prompt,
        steps=10,
        cfg_strength=3.0,
        sway_sampling_coef=None,
        start_time=start_time,
        file_type="mp3",
        vocal_flag=False,
        odeint_method="euler",
        pred_frames=pred_frames,
        batch_infer_num=1,
        song_duration=song_duration
    )

    return song


demo = gr.Interface(
    fn=generate_music,
    inputs=gr.Textbox(lines=10, label="LRC Lyrics"),
    outputs=gr.Audio(type="filepath")
)

if __name__ == "__main__":
    demo.launch()