import gradio as gr from chat_model import IlographChatModel # Load the model once at startup so every request reuses it. chat_model = IlographChatModel() def respond( message, history, system_message, max_tokens, temperature, top_p, ): messages = chat_model.build_messages( system_prompt=system_message, history=history, user_message=message, ) # Delegate token streaming to the model wrapper for partial in chat_model.generate_stream( messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, ): yield partial """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ chatbot = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value=chat_model.default_system_prompt, label="System message", ), ], ) with gr.Blocks() as demo: gr.Markdown( "Note: **this is a lightweight 3B Ilograph model intended for basic diagrams.** " "For more complex modeling tasks, use the newer, larger Ilograph models. The current model is **Brigham-Young-University/Qwen2.5-Coder-3B-Ilograph-Instruct**." " If you want to use the larger model, you can check the [Hugging Face model page](https://huggingface.co/models?other=ilograph)." ) gr.Markdown( "The model might take a few seconds to load, I don't have GPU so it's slow." ) chatbot.render() if __name__ == "__main__": demo.launch()