Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,6 @@ import gradio as gr
|
|
| 17 |
|
| 18 |
# ββ Model to use ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
MODEL_ID = "Trillim/BitNet-TRNQ"
|
| 20 |
-
# Change to e.g. "Trillim/BitNet-GenZ-TRNQ" for a different bundle.
|
| 21 |
|
| 22 |
# ββ Global runtime handle βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
_runtime = None
|
|
@@ -27,16 +26,11 @@ _startup_error: str | None = None
|
|
| 27 |
|
| 28 |
def _pull_model() -> None:
|
| 29 |
"""Pull the model bundle into the Trillim managed store via the CLI binary."""
|
| 30 |
-
# `trillim` installs a console-script entry point next to the Python binary.
|
| 31 |
-
# shutil.which finds it on PATH; fallback to same dir as the interpreter.
|
| 32 |
trillim_bin = shutil.which("trillim") or str(
|
| 33 |
Path(sys.executable).parent / "trillim"
|
| 34 |
)
|
| 35 |
print(f"[trillim] Pulling {MODEL_ID} using '{trillim_bin}' β¦", flush=True)
|
| 36 |
-
result = subprocess.run(
|
| 37 |
-
[trillim_bin, "pull", MODEL_ID],
|
| 38 |
-
capture_output=False,
|
| 39 |
-
)
|
| 40 |
if result.returncode != 0:
|
| 41 |
raise RuntimeError(f"trillim pull exited with code {result.returncode}")
|
| 42 |
print("[trillim] Pull complete.", flush=True)
|
|
@@ -47,28 +41,24 @@ def _start_runtime() -> None:
|
|
| 47 |
global _runtime, _startup_error
|
| 48 |
try:
|
| 49 |
_pull_model()
|
| 50 |
-
|
| 51 |
-
from trillim import LLM, Runtime # noqa: PLC0415
|
| 52 |
-
|
| 53 |
print(f"[trillim] Starting Runtime with {MODEL_ID} β¦", flush=True)
|
| 54 |
_runtime = Runtime(LLM(MODEL_ID))
|
| 55 |
-
_runtime.__enter__()
|
| 56 |
print("[trillim] Runtime ready.", flush=True)
|
| 57 |
-
except Exception as exc:
|
| 58 |
_startup_error = str(exc)
|
| 59 |
print(f"[trillim] Startup failed: {exc}", file=sys.stderr, flush=True)
|
| 60 |
finally:
|
| 61 |
_ready.set()
|
| 62 |
|
| 63 |
|
| 64 |
-
# Start loading in the background so Gradio can serve the UI immediately.
|
| 65 |
threading.Thread(target=_start_runtime, daemon=True).start()
|
| 66 |
|
| 67 |
|
| 68 |
# ββ Chat logic ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 69 |
|
| 70 |
def _wait_or_raise(timeout: float = 300.0) -> None:
|
| 71 |
-
"""Block until the runtime is ready, or raise a clear error."""
|
| 72 |
if not _ready.wait(timeout=timeout):
|
| 73 |
raise RuntimeError("Trillim runtime did not become ready within 5 minutes.")
|
| 74 |
if _startup_error:
|
|
@@ -77,20 +67,15 @@ def _wait_or_raise(timeout: float = 300.0) -> None:
|
|
| 77 |
|
| 78 |
def chat_fn(
|
| 79 |
message: str,
|
| 80 |
-
history: list[dict],
|
| 81 |
system_prompt: str,
|
| 82 |
temperature: float,
|
| 83 |
max_new_tokens: int,
|
| 84 |
):
|
| 85 |
-
"""
|
| 86 |
-
Gradio streaming chat handler.
|
| 87 |
-
|
| 88 |
-
`history` β list of {"role": ..., "content": ...} dicts (Gradio 'messages' format).
|
| 89 |
-
Yields partial strings so the UI streams tokens in real time.
|
| 90 |
-
"""
|
| 91 |
_wait_or_raise()
|
| 92 |
|
| 93 |
-
from trillim.components.llm import ChatDoneEvent, ChatTokenEvent
|
| 94 |
|
| 95 |
messages: list[dict] = []
|
| 96 |
if system_prompt.strip():
|
|
@@ -111,7 +96,11 @@ def chat_fn(
|
|
| 111 |
break
|
| 112 |
|
| 113 |
|
| 114 |
-
# ββ Gradio
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
DESCRIPTION = f"""
|
| 117 |
## π§ Trillim Chat
|
|
@@ -120,17 +109,14 @@ Powered by [Trillim](https://trillim.com) β privacy-first, CPU-native local AI
|
|
| 120 |
Model: **{MODEL_ID}**
|
| 121 |
"""
|
| 122 |
|
| 123 |
-
# In Gradio 6, theme and css belong in launch(), not Blocks().
|
| 124 |
with gr.Blocks(title="Trillim Chat") as demo:
|
| 125 |
gr.Markdown(DESCRIPTION)
|
| 126 |
|
| 127 |
gr.ChatInterface(
|
| 128 |
fn=chat_fn,
|
| 129 |
-
type="messages",
|
| 130 |
chatbot=gr.Chatbot(
|
| 131 |
elem_id="chatbot",
|
| 132 |
show_label=False,
|
| 133 |
-
# bubble_full_width was removed in Gradio 6 β omit it.
|
| 134 |
render_markdown=True,
|
| 135 |
),
|
| 136 |
additional_inputs_accordion=gr.Accordion(label="βοΈ Parameters", open=False),
|
|
@@ -140,14 +126,8 @@ with gr.Blocks(title="Trillim Chat") as demo:
|
|
| 140 |
label="System prompt",
|
| 141 |
lines=2,
|
| 142 |
),
|
| 143 |
-
gr.Slider(
|
| 144 |
-
|
| 145 |
-
label="Temperature",
|
| 146 |
-
),
|
| 147 |
-
gr.Slider(
|
| 148 |
-
minimum=64, maximum=8192, value=512, step=64,
|
| 149 |
-
label="Max new tokens",
|
| 150 |
-
),
|
| 151 |
],
|
| 152 |
title=None,
|
| 153 |
submit_btn="Send",
|
|
@@ -166,7 +146,6 @@ if __name__ == "__main__":
|
|
| 166 |
server_name="0.0.0.0",
|
| 167 |
server_port=7860,
|
| 168 |
show_error=True,
|
| 169 |
-
# Gradio 6: theme and css go in launch(), not Blocks().
|
| 170 |
theme=gr.themes.Soft(
|
| 171 |
primary_hue="indigo",
|
| 172 |
secondary_hue="purple",
|
|
|
|
| 17 |
|
| 18 |
# ββ Model to use ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
MODEL_ID = "Trillim/BitNet-TRNQ"
|
|
|
|
| 20 |
|
| 21 |
# ββ Global runtime handle βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
_runtime = None
|
|
|
|
| 26 |
|
| 27 |
def _pull_model() -> None:
|
| 28 |
"""Pull the model bundle into the Trillim managed store via the CLI binary."""
|
|
|
|
|
|
|
| 29 |
trillim_bin = shutil.which("trillim") or str(
|
| 30 |
Path(sys.executable).parent / "trillim"
|
| 31 |
)
|
| 32 |
print(f"[trillim] Pulling {MODEL_ID} using '{trillim_bin}' β¦", flush=True)
|
| 33 |
+
result = subprocess.run([trillim_bin, "pull", MODEL_ID], capture_output=False)
|
|
|
|
|
|
|
|
|
|
| 34 |
if result.returncode != 0:
|
| 35 |
raise RuntimeError(f"trillim pull exited with code {result.returncode}")
|
| 36 |
print("[trillim] Pull complete.", flush=True)
|
|
|
|
| 41 |
global _runtime, _startup_error
|
| 42 |
try:
|
| 43 |
_pull_model()
|
| 44 |
+
from trillim import LLM, Runtime
|
|
|
|
|
|
|
| 45 |
print(f"[trillim] Starting Runtime with {MODEL_ID} β¦", flush=True)
|
| 46 |
_runtime = Runtime(LLM(MODEL_ID))
|
| 47 |
+
_runtime.__enter__()
|
| 48 |
print("[trillim] Runtime ready.", flush=True)
|
| 49 |
+
except Exception as exc:
|
| 50 |
_startup_error = str(exc)
|
| 51 |
print(f"[trillim] Startup failed: {exc}", file=sys.stderr, flush=True)
|
| 52 |
finally:
|
| 53 |
_ready.set()
|
| 54 |
|
| 55 |
|
|
|
|
| 56 |
threading.Thread(target=_start_runtime, daemon=True).start()
|
| 57 |
|
| 58 |
|
| 59 |
# ββ Chat logic ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 60 |
|
| 61 |
def _wait_or_raise(timeout: float = 300.0) -> None:
|
|
|
|
| 62 |
if not _ready.wait(timeout=timeout):
|
| 63 |
raise RuntimeError("Trillim runtime did not become ready within 5 minutes.")
|
| 64 |
if _startup_error:
|
|
|
|
| 67 |
|
| 68 |
def chat_fn(
|
| 69 |
message: str,
|
| 70 |
+
history: list[dict], # Gradio 6: always [{"role":β¦, "content":β¦}, β¦]
|
| 71 |
system_prompt: str,
|
| 72 |
temperature: float,
|
| 73 |
max_new_tokens: int,
|
| 74 |
):
|
| 75 |
+
"""Streaming chat handler β yields partial assistant strings."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
_wait_or_raise()
|
| 77 |
|
| 78 |
+
from trillim.components.llm import ChatDoneEvent, ChatTokenEvent
|
| 79 |
|
| 80 |
messages: list[dict] = []
|
| 81 |
if system_prompt.strip():
|
|
|
|
| 96 |
break
|
| 97 |
|
| 98 |
|
| 99 |
+
# ββ Gradio 6 UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 100 |
+
# Removed from Gradio 6:
|
| 101 |
+
# β’ Blocks(theme=β¦, css=β¦) β move to launch()
|
| 102 |
+
# β’ ChatInterface(type=β¦) β removed; history is always messages-format
|
| 103 |
+
# β’ Chatbot(bubble_full_width=β¦) β removed
|
| 104 |
|
| 105 |
DESCRIPTION = f"""
|
| 106 |
## π§ Trillim Chat
|
|
|
|
| 109 |
Model: **{MODEL_ID}**
|
| 110 |
"""
|
| 111 |
|
|
|
|
| 112 |
with gr.Blocks(title="Trillim Chat") as demo:
|
| 113 |
gr.Markdown(DESCRIPTION)
|
| 114 |
|
| 115 |
gr.ChatInterface(
|
| 116 |
fn=chat_fn,
|
|
|
|
| 117 |
chatbot=gr.Chatbot(
|
| 118 |
elem_id="chatbot",
|
| 119 |
show_label=False,
|
|
|
|
| 120 |
render_markdown=True,
|
| 121 |
),
|
| 122 |
additional_inputs_accordion=gr.Accordion(label="βοΈ Parameters", open=False),
|
|
|
|
| 126 |
label="System prompt",
|
| 127 |
lines=2,
|
| 128 |
),
|
| 129 |
+
gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="Temperature"),
|
| 130 |
+
gr.Slider(64, 8192, value=512, step=64, label="Max new tokens"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
],
|
| 132 |
title=None,
|
| 133 |
submit_btn="Send",
|
|
|
|
| 146 |
server_name="0.0.0.0",
|
| 147 |
server_port=7860,
|
| 148 |
show_error=True,
|
|
|
|
| 149 |
theme=gr.themes.Soft(
|
| 150 |
primary_hue="indigo",
|
| 151 |
secondary_hue="purple",
|