wop commited on
Commit
abbee5f
Β·
verified Β·
1 Parent(s): 599a2b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -35
app.py CHANGED
@@ -17,7 +17,6 @@ import gradio as gr
17
 
18
  # ── Model to use ──────────────────────────────────────────────────────────────
19
  MODEL_ID = "Trillim/BitNet-TRNQ"
20
- # Change to e.g. "Trillim/BitNet-GenZ-TRNQ" for a different bundle.
21
 
22
  # ── Global runtime handle ─────────────────────────────────────────────────────
23
  _runtime = None
@@ -27,16 +26,11 @@ _startup_error: str | None = None
27
 
28
  def _pull_model() -> None:
29
  """Pull the model bundle into the Trillim managed store via the CLI binary."""
30
- # `trillim` installs a console-script entry point next to the Python binary.
31
- # shutil.which finds it on PATH; fallback to same dir as the interpreter.
32
  trillim_bin = shutil.which("trillim") or str(
33
  Path(sys.executable).parent / "trillim"
34
  )
35
  print(f"[trillim] Pulling {MODEL_ID} using '{trillim_bin}' …", flush=True)
36
- result = subprocess.run(
37
- [trillim_bin, "pull", MODEL_ID],
38
- capture_output=False,
39
- )
40
  if result.returncode != 0:
41
  raise RuntimeError(f"trillim pull exited with code {result.returncode}")
42
  print("[trillim] Pull complete.", flush=True)
@@ -47,28 +41,24 @@ def _start_runtime() -> None:
47
  global _runtime, _startup_error
48
  try:
49
  _pull_model()
50
-
51
- from trillim import LLM, Runtime # noqa: PLC0415
52
-
53
  print(f"[trillim] Starting Runtime with {MODEL_ID} …", flush=True)
54
  _runtime = Runtime(LLM(MODEL_ID))
55
- _runtime.__enter__() # same as `with Runtime(...) as runtime:`
56
  print("[trillim] Runtime ready.", flush=True)
57
- except Exception as exc: # noqa: BLE001
58
  _startup_error = str(exc)
59
  print(f"[trillim] Startup failed: {exc}", file=sys.stderr, flush=True)
60
  finally:
61
  _ready.set()
62
 
63
 
64
- # Start loading in the background so Gradio can serve the UI immediately.
65
  threading.Thread(target=_start_runtime, daemon=True).start()
66
 
67
 
68
  # ── Chat logic ────────────────────────────────────────────────────────────────
69
 
70
  def _wait_or_raise(timeout: float = 300.0) -> None:
71
- """Block until the runtime is ready, or raise a clear error."""
72
  if not _ready.wait(timeout=timeout):
73
  raise RuntimeError("Trillim runtime did not become ready within 5 minutes.")
74
  if _startup_error:
@@ -77,20 +67,15 @@ def _wait_or_raise(timeout: float = 300.0) -> None:
77
 
78
  def chat_fn(
79
  message: str,
80
- history: list[dict],
81
  system_prompt: str,
82
  temperature: float,
83
  max_new_tokens: int,
84
  ):
85
- """
86
- Gradio streaming chat handler.
87
-
88
- `history` β€” list of {"role": ..., "content": ...} dicts (Gradio 'messages' format).
89
- Yields partial strings so the UI streams tokens in real time.
90
- """
91
  _wait_or_raise()
92
 
93
- from trillim.components.llm import ChatDoneEvent, ChatTokenEvent # noqa: PLC0415
94
 
95
  messages: list[dict] = []
96
  if system_prompt.strip():
@@ -111,7 +96,11 @@ def chat_fn(
111
  break
112
 
113
 
114
- # ── Gradio UI (Gradio 6 compatible) ──────────────────────────────────────────
 
 
 
 
115
 
116
  DESCRIPTION = f"""
117
  ## 🧠 Trillim Chat
@@ -120,17 +109,14 @@ Powered by [Trillim](https://trillim.com) β€” privacy-first, CPU-native local AI
120
  Model: **{MODEL_ID}**
121
  """
122
 
123
- # In Gradio 6, theme and css belong in launch(), not Blocks().
124
  with gr.Blocks(title="Trillim Chat") as demo:
125
  gr.Markdown(DESCRIPTION)
126
 
127
  gr.ChatInterface(
128
  fn=chat_fn,
129
- type="messages",
130
  chatbot=gr.Chatbot(
131
  elem_id="chatbot",
132
  show_label=False,
133
- # bubble_full_width was removed in Gradio 6 β€” omit it.
134
  render_markdown=True,
135
  ),
136
  additional_inputs_accordion=gr.Accordion(label="βš™οΈ Parameters", open=False),
@@ -140,14 +126,8 @@ with gr.Blocks(title="Trillim Chat") as demo:
140
  label="System prompt",
141
  lines=2,
142
  ),
143
- gr.Slider(
144
- minimum=0.0, maximum=2.0, value=0.7, step=0.05,
145
- label="Temperature",
146
- ),
147
- gr.Slider(
148
- minimum=64, maximum=8192, value=512, step=64,
149
- label="Max new tokens",
150
- ),
151
  ],
152
  title=None,
153
  submit_btn="Send",
@@ -166,7 +146,6 @@ if __name__ == "__main__":
166
  server_name="0.0.0.0",
167
  server_port=7860,
168
  show_error=True,
169
- # Gradio 6: theme and css go in launch(), not Blocks().
170
  theme=gr.themes.Soft(
171
  primary_hue="indigo",
172
  secondary_hue="purple",
 
17
 
18
  # ── Model to use ──────────────────────────────────────────────────────────────
19
  MODEL_ID = "Trillim/BitNet-TRNQ"
 
20
 
21
  # ── Global runtime handle ─────────────────────────────────────────────────────
22
  _runtime = None
 
26
 
27
  def _pull_model() -> None:
28
  """Pull the model bundle into the Trillim managed store via the CLI binary."""
 
 
29
  trillim_bin = shutil.which("trillim") or str(
30
  Path(sys.executable).parent / "trillim"
31
  )
32
  print(f"[trillim] Pulling {MODEL_ID} using '{trillim_bin}' …", flush=True)
33
+ result = subprocess.run([trillim_bin, "pull", MODEL_ID], capture_output=False)
 
 
 
34
  if result.returncode != 0:
35
  raise RuntimeError(f"trillim pull exited with code {result.returncode}")
36
  print("[trillim] Pull complete.", flush=True)
 
41
  global _runtime, _startup_error
42
  try:
43
  _pull_model()
44
+ from trillim import LLM, Runtime
 
 
45
  print(f"[trillim] Starting Runtime with {MODEL_ID} …", flush=True)
46
  _runtime = Runtime(LLM(MODEL_ID))
47
+ _runtime.__enter__()
48
  print("[trillim] Runtime ready.", flush=True)
49
+ except Exception as exc:
50
  _startup_error = str(exc)
51
  print(f"[trillim] Startup failed: {exc}", file=sys.stderr, flush=True)
52
  finally:
53
  _ready.set()
54
 
55
 
 
56
  threading.Thread(target=_start_runtime, daemon=True).start()
57
 
58
 
59
  # ── Chat logic ────────────────────────────────────────────────────────────────
60
 
61
  def _wait_or_raise(timeout: float = 300.0) -> None:
 
62
  if not _ready.wait(timeout=timeout):
63
  raise RuntimeError("Trillim runtime did not become ready within 5 minutes.")
64
  if _startup_error:
 
67
 
68
  def chat_fn(
69
  message: str,
70
+ history: list[dict], # Gradio 6: always [{"role":…, "content":…}, …]
71
  system_prompt: str,
72
  temperature: float,
73
  max_new_tokens: int,
74
  ):
75
+ """Streaming chat handler β€” yields partial assistant strings."""
 
 
 
 
 
76
  _wait_or_raise()
77
 
78
+ from trillim.components.llm import ChatDoneEvent, ChatTokenEvent
79
 
80
  messages: list[dict] = []
81
  if system_prompt.strip():
 
96
  break
97
 
98
 
99
+ # ── Gradio 6 UI ───────────────────────────────────────────────────────────────
100
+ # Removed from Gradio 6:
101
+ # β€’ Blocks(theme=…, css=…) β†’ move to launch()
102
+ # β€’ ChatInterface(type=…) β†’ removed; history is always messages-format
103
+ # β€’ Chatbot(bubble_full_width=…) β†’ removed
104
 
105
  DESCRIPTION = f"""
106
  ## 🧠 Trillim Chat
 
109
  Model: **{MODEL_ID}**
110
  """
111
 
 
112
  with gr.Blocks(title="Trillim Chat") as demo:
113
  gr.Markdown(DESCRIPTION)
114
 
115
  gr.ChatInterface(
116
  fn=chat_fn,
 
117
  chatbot=gr.Chatbot(
118
  elem_id="chatbot",
119
  show_label=False,
 
120
  render_markdown=True,
121
  ),
122
  additional_inputs_accordion=gr.Accordion(label="βš™οΈ Parameters", open=False),
 
126
  label="System prompt",
127
  lines=2,
128
  ),
129
+ gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="Temperature"),
130
+ gr.Slider(64, 8192, value=512, step=64, label="Max new tokens"),
 
 
 
 
 
 
131
  ],
132
  title=None,
133
  submit_btn="Send",
 
146
  server_name="0.0.0.0",
147
  server_port=7860,
148
  show_error=True,
 
149
  theme=gr.themes.Soft(
150
  primary_hue="indigo",
151
  secondary_hue="purple",