open-cortex / scripts /gradio_chat.py
peninsula123's picture
feat(main): finish data struct process
a1231c7
Raw
History Blame Contribute Delete
3.17 kB
import gradio as gr
from open_cortex.ui.gradio_history import history_to_chat_messages
from open_cortex.runtime.client import stream_chat_events
def format_runtime(event) -> str:
if event.kind == "request_started":
return "Phase: request started\nWaiting for first token..."
if event.kind == "first_token" and event.snapshot is not None:
snapshot = event.snapshot
context_tokens = (
snapshot.slot_context_tokens[0]
if snapshot.slot_context_tokens
else None
)
return "\n".join(
[
"Phase: first token",
f"TTFT: {event.ttft_ms:.1f} ms",
f"Context: {context_tokens} / {snapshot.slot_context_size}",
f"Token Stream: {snapshot.decode_tps} tok/s",
(
"Engine: "
f"processing={snapshot.requests_processing} "
f"deferred={snapshot.requests_deferred}"
),
]
)
if event.kind == "request_completed":
return "\n".join(
[
"Phase: completed",
f"Prompt tokens: {event.prompt_tokens}",
f"Output tokens: {event.completion_tokens}",
f"Prefill: {event.prompt_tps:.1f} tok/s",
f"Decode: {event.decode_tps:.1f} tok/s",
]
)
return "Phase: decoding"
def user(user_message: str, history: list[dict]) -> tuple[str, list[dict]]:
if not user_message.strip():
return "", history
return "", history + [
{
"role": "user",
"content": user_message,
}
]
def bot(history: list):
messages = history_to_chat_messages(history)
history.append(
{
"role": "assistant",
"content": "",
}
)
history.append(
{
"role": "assistant",
"content": "",
}
)
runtime_text = "Phase: idle"
for event in stream_chat_events(messages):
runtime_text = format_runtime(event)
if event.text_delta:
history[-1]["content"] += event.text_delta
yield history, runtime_text
with gr.Blocks() as demo:
gr.Markdown("# OpenCortex Minimal Chat")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(
height=480,
)
msg = gr.Textbox(
placeholder="Ask the local model...",
show_label=False,
)
clear = gr.Button("Clear")
with gr.Column(scale=1):
runtime = gr.Textbox(
label="Runtime",
value="Phase: idle",
lines=10,
interactive=False,
)
msg.submit(
user,
[msg, chatbot],
[msg, chatbot],
queue=False,
).then(
bot,
chatbot,
[chatbot,runtime],
)
clear.click(
lambda: ([],"Phase: idle"),
None,
[chatbot, runtime],
queue=False,
)
if __name__ == "__main__":
demo.queue()
demo.launch()