import gradio as gr

from chat_model import IlographChatModel


# Load the model once at startup so every request reuses it.
chat_model = IlographChatModel()


def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = chat_model.build_messages(
        system_prompt=system_message,
        history=history,
        user_message=message,
    )

    # Delegate token streaming to the model wrapper
    for partial in chat_model.generate_stream(
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    ):
        yield partial


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
chatbot = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value=chat_model.default_system_prompt,
            label="System message",
        ),
    ],
)

with gr.Blocks() as demo:
    gr.Markdown(
        "Note: **this is a lightweight 3B Ilograph model intended for basic diagrams.** "
        "For more complex modeling tasks, use the newer, larger Ilograph models. The current model is **Brigham-Young-University/Qwen2.5-Coder-3B-Ilograph-Instruct**."
        " If you want to use the larger model, you can check the [Hugging Face model page](https://huggingface.co/models?other=ilograph)."
    )
    gr.Markdown(
        "The model might take a few seconds to load, I don't have GPU so it's slow."
    )
    chatbot.render()


if __name__ == "__main__":
    demo.launch()