import gradio as gr from open_cortex.ui.gradio_history import history_to_chat_messages from open_cortex.runtime.client import stream_chat_events def format_runtime(event) -> str: if event.kind == "request_started": return "Phase: request started\nWaiting for first token..." if event.kind == "first_token" and event.snapshot is not None: snapshot = event.snapshot context_tokens = ( snapshot.slot_context_tokens[0] if snapshot.slot_context_tokens else None ) return "\n".join( [ "Phase: first token", f"TTFT: {event.ttft_ms:.1f} ms", f"Context: {context_tokens} / {snapshot.slot_context_size}", f"Token Stream: {snapshot.decode_tps} tok/s", ( "Engine: " f"processing={snapshot.requests_processing} " f"deferred={snapshot.requests_deferred}" ), ] ) if event.kind == "request_completed": return "\n".join( [ "Phase: completed", f"Prompt tokens: {event.prompt_tokens}", f"Output tokens: {event.completion_tokens}", f"Prefill: {event.prompt_tps:.1f} tok/s", f"Decode: {event.decode_tps:.1f} tok/s", ] ) return "Phase: decoding" def user(user_message: str, history: list[dict]) -> tuple[str, list[dict]]: if not user_message.strip(): return "", history return "", history + [ { "role": "user", "content": user_message, } ] def bot(history: list): messages = history_to_chat_messages(history) history.append( { "role": "assistant", "content": "", } ) history.append( { "role": "assistant", "content": "", } ) runtime_text = "Phase: idle" for event in stream_chat_events(messages): runtime_text = format_runtime(event) if event.text_delta: history[-1]["content"] += event.text_delta yield history, runtime_text with gr.Blocks() as demo: gr.Markdown("# OpenCortex Minimal Chat") with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot( height=480, ) msg = gr.Textbox( placeholder="Ask the local model...", show_label=False, ) clear = gr.Button("Clear") with gr.Column(scale=1): runtime = gr.Textbox( label="Runtime", value="Phase: idle", lines=10, interactive=False, ) msg.submit( user, [msg, chatbot], [msg, chatbot], queue=False, ).then( bot, chatbot, [chatbot,runtime], ) clear.click( lambda: ([],"Phase: idle"), None, [chatbot, runtime], queue=False, ) if __name__ == "__main__": demo.queue() demo.launch()