Spaces:

wop
/

Trillim

Paused

App Files Files Community

wop commited on Apr 25

Commit

abbee5f

verified ·

1 Parent(s): 599a2b7

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -35

app.py CHANGED Viewed

@@ -17,7 +17,6 @@ import gradio as gr
 # ── Model to use ──────────────────────────────────────────────────────────────
 MODEL_ID = "Trillim/BitNet-TRNQ"
-# Change to e.g. "Trillim/BitNet-GenZ-TRNQ" for a different bundle.
 # ── Global runtime handle ─────────────────────────────────────────────────────
 _runtime = None
@@ -27,16 +26,11 @@ _startup_error: str | None = None
 def _pull_model() -> None:
     """Pull the model bundle into the Trillim managed store via the CLI binary."""
-    # `trillim` installs a console-script entry point next to the Python binary.
-    # shutil.which finds it on PATH; fallback to same dir as the interpreter.
     trillim_bin = shutil.which("trillim") or str(
         Path(sys.executable).parent / "trillim"
     )
     print(f"[trillim] Pulling {MODEL_ID} using '{trillim_bin}' …", flush=True)
-    result = subprocess.run(
-        [trillim_bin, "pull", MODEL_ID],
-        capture_output=False,
-    )
     if result.returncode != 0:
         raise RuntimeError(f"trillim pull exited with code {result.returncode}")
     print("[trillim] Pull complete.", flush=True)
@@ -47,28 +41,24 @@ def _start_runtime() -> None:
     global _runtime, _startup_error
     try:
         _pull_model()
-        from trillim import LLM, Runtime  # noqa: PLC0415
         print(f"[trillim] Starting Runtime with {MODEL_ID} …", flush=True)
         _runtime = Runtime(LLM(MODEL_ID))
-        _runtime.__enter__()   # same as `with Runtime(...) as runtime:`
         print("[trillim] Runtime ready.", flush=True)
-    except Exception as exc:   # noqa: BLE001
         _startup_error = str(exc)
         print(f"[trillim] Startup failed: {exc}", file=sys.stderr, flush=True)
     finally:
         _ready.set()
-# Start loading in the background so Gradio can serve the UI immediately.
 threading.Thread(target=_start_runtime, daemon=True).start()
 # ── Chat logic ────────────────────────────────────────────────────────────────
 def _wait_or_raise(timeout: float = 300.0) -> None:
-    """Block until the runtime is ready, or raise a clear error."""
     if not _ready.wait(timeout=timeout):
         raise RuntimeError("Trillim runtime did not become ready within 5 minutes.")
     if _startup_error:
@@ -77,20 +67,15 @@ def _wait_or_raise(timeout: float = 300.0) -> None:
 def chat_fn(
     message: str,
-    history: list[dict],
     system_prompt: str,
     temperature: float,
     max_new_tokens: int,
 ):
-    """
-    Gradio streaming chat handler.
-    `history` — list of {"role": ..., "content": ...} dicts (Gradio 'messages' format).
-    Yields partial strings so the UI streams tokens in real time.
-    """
     _wait_or_raise()
-    from trillim.components.llm import ChatDoneEvent, ChatTokenEvent  # noqa: PLC0415
     messages: list[dict] = []
     if system_prompt.strip():
@@ -111,7 +96,11 @@ def chat_fn(
             break
-# ── Gradio UI (Gradio 6 compatible) ──────────────────────────────────────────
 DESCRIPTION = f"""
 ## 🧠 Trillim Chat
@@ -120,17 +109,14 @@ Powered by [Trillim](https://trillim.com) — privacy-first, CPU-native local AI
 Model: **{MODEL_ID}**
 """
-# In Gradio 6, theme and css belong in launch(), not Blocks().
 with gr.Blocks(title="Trillim Chat") as demo:
     gr.Markdown(DESCRIPTION)
     gr.ChatInterface(
         fn=chat_fn,
-        type="messages",
         chatbot=gr.Chatbot(
             elem_id="chatbot",
             show_label=False,
-            # bubble_full_width was removed in Gradio 6 — omit it.
             render_markdown=True,
         ),
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
@@ -140,14 +126,8 @@ with gr.Blocks(title="Trillim Chat") as demo:
                 label="System prompt",
                 lines=2,
             ),
-            gr.Slider(
-                minimum=0.0, maximum=2.0, value=0.7, step=0.05,
-                label="Temperature",
-            ),
-            gr.Slider(
-                minimum=64, maximum=8192, value=512, step=64,
-                label="Max new tokens",
-            ),
         ],
         title=None,
         submit_btn="Send",
@@ -166,7 +146,6 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
-        # Gradio 6: theme and css go in launch(), not Blocks().
         theme=gr.themes.Soft(
             primary_hue="indigo",
             secondary_hue="purple",

 # ── Model to use ──────────────────────────────────────────────────────────────
 MODEL_ID = "Trillim/BitNet-TRNQ"
 # ── Global runtime handle ─────────────────────────────────────────────────────
 _runtime = None
 def _pull_model() -> None:
     """Pull the model bundle into the Trillim managed store via the CLI binary."""
     trillim_bin = shutil.which("trillim") or str(
         Path(sys.executable).parent / "trillim"
     )
     print(f"[trillim] Pulling {MODEL_ID} using '{trillim_bin}' …", flush=True)
+    result = subprocess.run([trillim_bin, "pull", MODEL_ID], capture_output=False)
     if result.returncode != 0:
         raise RuntimeError(f"trillim pull exited with code {result.returncode}")
     print("[trillim] Pull complete.", flush=True)
     global _runtime, _startup_error
     try:
         _pull_model()
+        from trillim import LLM, Runtime
         print(f"[trillim] Starting Runtime with {MODEL_ID} …", flush=True)
         _runtime = Runtime(LLM(MODEL_ID))
+        _runtime.__enter__()
         print("[trillim] Runtime ready.", flush=True)
+    except Exception as exc:
         _startup_error = str(exc)
         print(f"[trillim] Startup failed: {exc}", file=sys.stderr, flush=True)
     finally:
         _ready.set()
 threading.Thread(target=_start_runtime, daemon=True).start()
 # ── Chat logic ────────────────────────────────────────────────────────────────
 def _wait_or_raise(timeout: float = 300.0) -> None:
     if not _ready.wait(timeout=timeout):
         raise RuntimeError("Trillim runtime did not become ready within 5 minutes.")
     if _startup_error:
 def chat_fn(
     message: str,
+    history: list[dict],   # Gradio 6: always [{"role":…, "content":…}, …]
     system_prompt: str,
     temperature: float,
     max_new_tokens: int,
 ):
+    """Streaming chat handler — yields partial assistant strings."""
     _wait_or_raise()
+    from trillim.components.llm import ChatDoneEvent, ChatTokenEvent
     messages: list[dict] = []
     if system_prompt.strip():
             break
+# ── Gradio 6 UI ───────────────────────────────────────────────────────────────
+# Removed from Gradio 6:
+#   • Blocks(theme=…, css=…)  → move to launch()
+#   • ChatInterface(type=…)   → removed; history is always messages-format
+#   • Chatbot(bubble_full_width=…) → removed
 DESCRIPTION = f"""
 ## 🧠 Trillim Chat
 Model: **{MODEL_ID}**
 """
 with gr.Blocks(title="Trillim Chat") as demo:
     gr.Markdown(DESCRIPTION)
     gr.ChatInterface(
         fn=chat_fn,
         chatbot=gr.Chatbot(
             elem_id="chatbot",
             show_label=False,
             render_markdown=True,
         ),
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
                 label="System prompt",
                 lines=2,
             ),
+            gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="Temperature"),
+            gr.Slider(64, 8192, value=512, step=64, label="Max new tokens"),
         ],
         title=None,
         submit_btn="Send",
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
         theme=gr.themes.Soft(
             primary_hue="indigo",
             secondary_hue="purple",