NextTokenPredictor

Sleeping

App Files Files Community

PeterPinetree commited on Aug 16, 2025

Commit

44777fd

verified ·

1 Parent(s): c9b807e

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -108

app.py CHANGED Viewed

@@ -1,25 +1,27 @@
 # app.py
 import json
 from pathlib import Path
-import threading, time
-import anywidget
-import traitlets as t
 import solara
 import pandas as pd
 import plotly.graph_objects as go
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# ---------- versions (shows up in Space logs) ----------
-import plotly
-print("VERSIONS:", "solara", solara.__version__, "plotly", plotly.__version__, "torch", torch.__version__)
 # ---------- Model ----------
-MODEL_ID = "Qwen/Qwen3-0.6B"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
-# ---------- Theme & layout fixes ----------
 theme_css = """
 :root{
   --primary:#38bdf8;     /* light blue */
@@ -29,30 +31,18 @@ theme_css = """
   --border:#e5e7eb;      /* gray-200 */
 }
 body{ background:var(--bg); color:var(--text); }
-.badge{ display:inline-block; padding:2px 8px; border:1px solid var(--border); border-radius:999px; margin:2px; }
-/* Highlight hovered prediction token */
-.badge:hover {
-  background: var(--primary);
-  color: white;
-  border-color: var(--primary);
-  cursor: pointer;
-  transition: all 0.2s ease;
-}
-/* Optional: style an active hovered token */
-.badge.hovered {
-  background: var(--primary);
-  color: white;
-  border-color: var(--primary);
 }
-/* Make sure the prediction list can receive pointer events even if Plotly expands */
-.predictions-panel { position: relative; z-index: 5; }
-.plot-panel        { position: relative; z-index: 1; }
-.plot-panel .js-plotly-plot { position: relative; z-index: 1; }
-/* Row style */
 .rowbtn{
   width:100%; padding:10px 12px; border-radius:12px;
   border:1px solid var(--border); background:#fff; color:var(--text);
@@ -64,15 +54,17 @@ body{ background:var(--bg); color:var(--text); }
 .rowbtn:hover{ background:#f7fbff; border-color:#c3e8fb; }
 """
-# ---------- App state ----------
 text_rx = solara.reactive("twinkle, twinkle, little ")
-preds_rx = solara.reactive(pd.DataFrame(columns=["probs","id","tok"]))
 selected_token_id_rx = solara.reactive(None)
 neighbor_list_rx = solara.reactive([])
 last_hovered_id_rx = solara.reactive(None)
 notice_rx = solara.reactive("Click a candidate (or hover to preview).")
 auto_running_rx = solara.reactive(True)
 # ---------- Embedding assets ----------
 ASSETS = Path("assets/embeddings")
 COORDS_PATH = ASSETS / "pca_top5k_coords.json"
@@ -89,6 +81,7 @@ if COORDS_PATH.exists() and NEIGH_PATH.exists():
 else:
     notice_rx.set("Embedding files not found — add assets/embeddings/*.json to enable the map.")
 # ---------- Helpers ----------
 def display_token_from_id(tid: int) -> str:
     toks = tokenizer.convert_ids_to_tokens([int(tid)], skip_special_tokens=True)
@@ -97,48 +90,47 @@ def display_token_from_id(tid: int) -> str:
         if t.startswith(lead):
             t = t[len(lead):]
     t = t.replace("\n","↵")
-    if t.strip() == "":
-        return "␠"
-    return t
 def fmt_row(idx: int, prob: str, tid: int, tok_disp: str) -> str:
     return f"{idx:<2}  {prob:<7}  {tid:<6}  {tok_disp}"
-# ---------- Predict ----------
 def predict_top10(prompt: str) -> pd.DataFrame:
     if not prompt:
-        return pd.DataFrame(columns=["probs","id","tok"])
-    tokens = tokenizer.encode(prompt, return_tensors="pt")
     out = model.generate(
-        tokens,
         max_new_tokens=1,
         output_scores=True,
         return_dict_in_generate=True,
         pad_token_id=tokenizer.eos_token_id,
-        do_sample=False, temperature=0.0, top_k=1, top_p=1.0,
     )
     scores = torch.softmax(out.scores[0], dim=-1)
     topk = torch.topk(scores, 10)
     ids = [int(topk.indices[0, i]) for i in range(10)]
     probs = [float(topk.values[0, i]) for i in range(10)]
-    toks = [tokenizer.decode([i]) for i in ids]  # used for append only
     df = pd.DataFrame({"probs": probs, "id": ids, "tok": toks})
     df["probs"] = df["probs"].map(lambda p: f"{p:.2%}")
     return df
 def on_predict():
-    """Update predictions; keep current highlight unless none yet."""
     df = predict_top10(text_rx.value)
     preds_rx.set(df)
     if len(df) == 0:
         return
     if selected_token_id_rx.value is None:
-        preview_token(int(df.iloc[0]["id"]))  # first time only
     else:
-        # keep the user's last selection/hover
-        fig_rx.set(highlight(int(selected_token_id_rx.value)))
-# ---------- Plot ----------
 def base_scatter():
     fig = go.Figure()
     if coords:
@@ -192,9 +184,9 @@ def highlight(token_id: int):
     ))
     return fig
 def preview_token(token_id: int):
-    # DEBUG: confirm events reach Python
-    print("preview ->", token_id)
     token_id = int(token_id)
     if last_hovered_id_rx.value == token_id:
         return
@@ -203,14 +195,14 @@ def preview_token(token_id: int):
     fig_rx.set(highlight(token_id))
 def append_token(token_id: int):
-    # DEBUG
-    print("append ->", token_id)
     decoded = tokenizer.decode([int(token_id)])
     text_rx.set(text_rx.value + decoded)
     preview_token(int(token_id))
     on_predict()
-# ---------- Auto-predict (debounced) ----------
 @solara.component
 def AutoPredictWatcher():
     text = text_rx.value
@@ -237,29 +229,30 @@ def AutoPredictWatcher():
     solara.use_effect(effect, [text, auto])
     return solara.Text("", style={"display": "none"})
 class HoverList(anywidget.AnyWidget):
     """
-    Renders the prediction rows in the browser and streams hover/click
-    events back to Python via synced traitlets.
     """
-    # Browser code: builds the list and wires events
     _esm = """
     export function render({ model, el }) {
-      const make = () => {
         const items = model.get('items') || [];
         el.innerHTML = "";
         const wrap = document.createElement('div');
         wrap.style.display = 'flex';
         wrap.style.flexDirection = 'column';
         items.forEach(({tid, label}) => {
           const btn = document.createElement('button');
           btn.textContent = label;
-          btn.className = 'rowbtn';           // your existing CSS
           btn.setAttribute('type', 'button');
           btn.setAttribute('role', 'button');
           btn.setAttribute('tabindex', '0');
-          // hover → preview
           const preview = () => {
             model.set('hovered_id', tid);
             model.save_changes();
@@ -269,7 +262,6 @@ class HoverList(anywidget.AnyWidget):
           btn.addEventListener('mousemove',  preview);
           btn.addEventListener('focus',      preview);
-          // click → append
           btn.addEventListener('click', () => {
             model.set('clicked_id', tid);
             model.save_changes();
@@ -277,26 +269,23 @@ class HoverList(anywidget.AnyWidget):
           wrap.appendChild(btn);
         });
         el.appendChild(wrap);
       };
-      // initial render
-      make();
-      // re-render when items change
-      model.on('change:items', make);
     }
     """
-    # Data flowing between JS and Python
-    items       = t.List(trait=t.Dict()).tag(sync=True)   # [{tid:int, label:str}, ...]
-    hovered_id  = t.Int(allow_none=True).tag(sync=True)
-    clicked_id  = t.Int(allow_none=True).tag(sync=True)
-# ---------- Predictions list ----------
 @solara.component
 def PredictionsList():
-    df = preds_rx.value  # your DataFrame with columns: probs, id, tok
     with solara.Column(gap="6px", style={"maxWidth": "720px"}):
         solara.Markdown("### Prediction")
         solara.Text(
@@ -307,40 +296,33 @@ def PredictionsList():
             },
         )
         for i, row in df.iterrows():
             tid = int(row["id"])
-            prob = row["probs"]            # already formatted like "3.21%"
             tok_disp = display_token_from_id(tid)
-            row_label = fmt_row(i, prob, tid, tok_disp)
-            # Wrapper DIV handles hover reliably
-            with solara.Div(
-                classes=["rowbtn"],  # styling on wrapper
-                style={"justifyContent": "flex-start", "width": "100%"},
-                attributes={"tabindex": "0", "role": "button"},
-                # --- HOVER = preview neighborhood ---
-                on_mouse_enter=lambda *args, tid=tid: preview_token(tid),
-                on_mouse_over=lambda *args, tid=tid: preview_token(tid),
-                on_mouse_move=lambda *args, tid=tid: preview_token(tid),
-                on_pointer_enter=lambda *args, tid=tid: preview_token(tid),
-                on_pointer_move=lambda *args, tid=tid: preview_token(tid),
-                on_focus=lambda *args, tid=tid: preview_token(tid),   # keyboard
-            ):
-                # Inner BUTTON handles click-to-append (and also binds hover for extra safety)
-                solara.Button(
-                    row_label,
-                    classes=[],  # keep wrapper styled; button unstyled
-                    style={"justifyContent": "flex-start", "width": "100%"},
-                    # --- CLICK = append token to text ---
-                    on_click=lambda *args, tid=tid: append_token(tid),
-                    # redundant hover hooks (helps on some builds)
-                    on_mouse_enter=lambda *args, tid=tid: preview_token(tid),
-                    on_mouse_over=lambda *args, tid=tid: preview_token(tid),
-                    on_mouse_move=lambda *args, tid=tid: preview_token(tid),
-                    on_pointer_enter=lambda *args, tid=tid: preview_token(tid),
-                    on_pointer_move=lambda *args, tid=tid: preview_token(tid),
-                    on_focus=lambda *args, tid=tid: preview_token(tid),
-                )
 # ---------- Page ----------
 @solara.component
@@ -354,34 +336,31 @@ def Page():
             "Click a candidate to append it and highlight its **semantic neighborhood**. "
             "Hover a candidate to preview its neighborhood."
         )
         solara.InputText("Enter text", value=text_rx, continuous_update=True, style={"minWidth":"520px"})
         solara.Markdown(f"*{notice_rx.value}*")
         with solara.Row(classes=["app-row"]):
-            # Left column: predictions list (fixed width, sits above plot for events)
             with solara.Column(classes=["predictions-panel"]):
                 PredictionsList()
-            # Right column: plot + neighbor chips
             with solara.Column(classes=["plot-panel"]):
                 solara.Markdown("### Semantic Neighborhood")
                 if not coords:
                     solara.Markdown("> Embedding map unavailable – add `assets/embeddings/*.json`.")
                 else:
                     solara.FigurePlotly(fig_rx.value)
                 if neighbor_list_rx.value:
                     solara.Markdown("**Nearest neighbors:**")
-                    with solara.Row(style={"flex-wrap": "wrap"}):
                         for tok, sim in neighbor_list_rx.value:
-                            solara.HTML(
-                                tag="span",
-                                unsafe_innerHTML=f'<span class="badge">{tok} &nbsp; {(sim*100):.1f}%</span>',
-                            )
         AutoPredictWatcher()
 # ---------- Kickoff ----------
 on_predict()
 Page()

 # app.py
 import json
+import threading
+import time
 from pathlib import Path
 import solara
 import pandas as pd
 import plotly.graph_objects as go
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# for robust hover/click from the browser
+import anywidget
+import traitlets as t
 # ---------- Model ----------
+MODEL_ID = "Qwen/Qwen3-0.6B"  # same as the working HF Space
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+# ---------- Theme & layout (light blue / white / black accents) ----------
 theme_css = """
 :root{
   --primary:#38bdf8;     /* light blue */
   --border:#e5e7eb;      /* gray-200 */
 }
 body{ background:var(--bg); color:var(--text); }
+.badge{
+  display:inline-block; padding:2px 8px; border:1px solid var(--border);
+  border-radius:999px; margin:2px;
 }
+/* Two-column layout with clear stacking (predictions above plot for events) */
+.app-row { display:flex; align-items:flex-start; gap:24px; }
+.predictions-panel { flex:0 0 360px; position:relative; z-index:10; }
+.plot-panel        { flex:1 1 auto;  position:relative; z-index:1; overflow:hidden; }
+/* Prediction rows (styled on a button or wrapper div) */
 .rowbtn{
   width:100%; padding:10px 12px; border-radius:12px;
   border:1px solid var(--border); background:#fff; color:var(--text);
 .rowbtn:hover{ background:#f7fbff; border-color:#c3e8fb; }
 """
+# ---------- Reactive state ----------
 text_rx = solara.reactive("twinkle, twinkle, little ")
+preds_rx = solara.reactive(pd.DataFrame(columns=["probs", "id", "tok"]))
 selected_token_id_rx = solara.reactive(None)
 neighbor_list_rx = solara.reactive([])
 last_hovered_id_rx = solara.reactive(None)
 notice_rx = solara.reactive("Click a candidate (or hover to preview).")
 auto_running_rx = solara.reactive(True)
 # ---------- Embedding assets ----------
 ASSETS = Path("assets/embeddings")
 COORDS_PATH = ASSETS / "pca_top5k_coords.json"
 else:
     notice_rx.set("Embedding files not found — add assets/embeddings/*.json to enable the map.")
 # ---------- Helpers ----------
 def display_token_from_id(tid: int) -> str:
     toks = tokenizer.convert_ids_to_tokens([int(tid)], skip_special_tokens=True)
         if t.startswith(lead):
             t = t[len(lead):]
     t = t.replace("\n","↵")
+    return t if t.strip() else "␠"
 def fmt_row(idx: int, prob: str, tid: int, tok_disp: str) -> str:
+    # columns: index, probability, token id, token text
     return f"{idx:<2}  {prob:<7}  {tid:<6}  {tok_disp}"
+# ---------- Prediction ----------
 def predict_top10(prompt: str) -> pd.DataFrame:
     if not prompt:
+        return pd.DataFrame(columns=["probs", "id", "tok"])
+    tokens = tokenizer(prompt, return_tensors="pt", padding=False)
     out = model.generate(
+        **tokens,
         max_new_tokens=1,
         output_scores=True,
         return_dict_in_generate=True,
         pad_token_id=tokenizer.eos_token_id,
+        do_sample=False,  # greedy; temp/top_k are ignored (by design)
     )
     scores = torch.softmax(out.scores[0], dim=-1)
     topk = torch.topk(scores, 10)
     ids = [int(topk.indices[0, i]) for i in range(10)]
     probs = [float(topk.values[0, i]) for i in range(10)]
+    toks = [tokenizer.decode([i]) for i in ids]  # for append
     df = pd.DataFrame({"probs": probs, "id": ids, "tok": toks})
     df["probs"] = df["probs"].map(lambda p: f"{p:.2%}")
     return df
 def on_predict():
     df = predict_top10(text_rx.value)
     preds_rx.set(df)
     if len(df) == 0:
         return
     if selected_token_id_rx.value is None:
+        preview_token(int(df.iloc[0]["id"]))   # only first time
     else:
+        fig_rx.set(highlight(int(selected_token_id_rx.value)))  # preserve selection
+# ---------- Plot / neighborhood ----------
 def base_scatter():
     fig = go.Figure()
     if coords:
     ))
     return fig
 def preview_token(token_id: int):
+    # print("preview ->", token_id)  # enable for debugging in Space logs
     token_id = int(token_id)
     if last_hovered_id_rx.value == token_id:
         return
     fig_rx.set(highlight(token_id))
 def append_token(token_id: int):
+    # print("append ->", token_id)
     decoded = tokenizer.decode([int(token_id)])
     text_rx.set(text_rx.value + decoded)
     preview_token(int(token_id))
     on_predict()
+# ---------- Debounced auto-predict ----------
 @solara.component
 def AutoPredictWatcher():
     text = text_rx.value
     solara.use_effect(effect, [text, auto])
     return solara.Text("", style={"display": "none"})
+# ---------- Hover-enabled list (browser) ----------
 class HoverList(anywidget.AnyWidget):
     """
+    Renders the prediction rows in the browser and streams hover/click events
+    back to Python via synced traitlets.
     """
     _esm = """
     export function render({ model, el }) {
+      const renderList = () => {
         const items = model.get('items') || [];
         el.innerHTML = "";
         const wrap = document.createElement('div');
         wrap.style.display = 'flex';
         wrap.style.flexDirection = 'column';
         items.forEach(({tid, label}) => {
           const btn = document.createElement('button');
           btn.textContent = label;
+          btn.className = 'rowbtn';
           btn.setAttribute('type', 'button');
           btn.setAttribute('role', 'button');
           btn.setAttribute('tabindex', '0');
           const preview = () => {
             model.set('hovered_id', tid);
             model.save_changes();
           btn.addEventListener('mousemove',  preview);
           btn.addEventListener('focus',      preview);
           btn.addEventListener('click', () => {
             model.set('clicked_id', tid);
             model.save_changes();
           wrap.appendChild(btn);
         });
         el.appendChild(wrap);
       };
+      renderList();
+      model.on('change:items', renderList);
     }
     """
+    items      = t.List(trait=t.Dict()).tag(sync=True)   # [{tid:int, label:str}, ...]
+    hovered_id = t.Int(allow_none=True).tag(sync=True)
+    clicked_id = t.Int(allow_none=True).tag(sync=True)
+# ---------- Predictions list (uses HoverList) ----------
 @solara.component
 def PredictionsList():
+    df = preds_rx.value
     with solara.Column(gap="6px", style={"maxWidth": "720px"}):
         solara.Markdown("### Prediction")
         solara.Text(
             },
         )
+        # Build items for the browser widget
+        items = []
         for i, row in df.iterrows():
             tid = int(row["id"])
+            prob = row["probs"]                 # already a formatted string like "4.12%"
             tok_disp = display_token_from_id(tid)
+            items.append({"tid": tid, "label": fmt_row(i, prob, tid, tok_disp)})
+        w = HoverList()
+        w.items = items
+        # Hover → preview (updates plot + neighbor chips)
+        def _on_hover(change):
+            tid = change["new"]
+            if tid is not None:
+                preview_token(int(tid))
+        w.observe(_on_hover, names="hovered_id")
+        # Click → append
+        def _on_click(change):
+            tid = change["new"]
+            if tid is not None:
+                append_token(int(tid))
+        w.observe(_on_click, names="clicked_id")
+        solara.display(w)
 # ---------- Page ----------
 @solara.component
             "Click a candidate to append it and highlight its **semantic neighborhood**. "
             "Hover a candidate to preview its neighborhood."
         )
         solara.InputText("Enter text", value=text_rx, continuous_update=True, style={"minWidth":"520px"})
         solara.Markdown(f"*{notice_rx.value}*")
         with solara.Row(classes=["app-row"]):
             with solara.Column(classes=["predictions-panel"]):
                 PredictionsList()
             with solara.Column(classes=["plot-panel"]):
                 solara.Markdown("### Semantic Neighborhood")
                 if not coords:
                     solara.Markdown("> Embedding map unavailable – add `assets/embeddings/*.json`.")
                 else:
                     solara.FigurePlotly(fig_rx.value)
                 if neighbor_list_rx.value:
                     solara.Markdown("**Nearest neighbors:**")
+                    with solara.Row(style={"flex-wrap":"wrap"}):
                         for tok, sim in neighbor_list_rx.value:
+                            solara.HTML(tag="span",
+                                unsafe_innerHTML=f'<span class="badge">{tok} &nbsp; {(sim*100):.1f}%</span>')
         AutoPredictWatcher()
 # ---------- Kickoff ----------
 on_predict()
 Page()