Spaces:

chyams
/

embedding-explorer

Sleeping

chyams Claude Opus 4.6 commited on Feb 24

Commit

c18f969

1 Parent(s): aa46db6

Embedding Explorer: new examples, auto-execute, neighbors dropdown, zoom

- Replace 10 example queries with lecture-aligned progression
- Auto-execute on example click via exp_in.change() with example guard
- Add neighbors dropdown (3-12, default 4) next to radio, persisted in share URL
- Zoom default camera in to (1.0, 1.0, 0.8) from (1.5, 1.5, 1.2)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +111 -39

app.py CHANGED Viewed

@@ -28,19 +28,19 @@ warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn")
 # ── Configuration (all changeable via HF Space env vars) ─────
 EXAMPLES = json.loads(os.environ.get("EXAMPLES", json.dumps([
-    "bird bee elephant helicopter spitball",
-    "woman @woman",
-    "car truck dog cat fish",
-    "man - woman, uncle - aunt",
-    "man - woman + aunt, uncle",
-    "woman - man + nephew, niece",
-    "woman - man + king, queen",
-    "paris - france + italy, rome",
-    "hitler - germany + italy, mussolini",
-    "0.5 king - 0.5 man + 0.5 woman, queen",
 ])))
-N_NEIGHBORS = int(os.environ.get("N_NEIGHBORS", "8"))
 # ── Share URL infrastructure ─────────────────────────────────
@@ -310,7 +310,7 @@ def layout_3d(axis_range=1.3, camera=None):
     ax_x["range"] = fixed
     ax_y["range"] = fixed
     ax_z["range"] = fixed
-    default_camera = dict(eye=dict(x=1.5, y=1.5, z=1.2))
     return dict(
         scene=dict(
             xaxis=ax_x,
@@ -520,7 +520,7 @@ def _encode_camera(camera_json):
 # ── Main visualization ───────────────────────────────────────
-def explore(input_text, selected, hidden=None, camera=None):
     """Unified 3D visualization of words and vector expressions.
     Args:
@@ -528,6 +528,7 @@ def explore(input_text, selected, hidden=None, camera=None):
         selected: Currently selected item for neighbor display (or None).
         hidden: Set of labels to hide from rendering (MDS still uses all items).
         camera: Plotly camera dict to set initial view.
     Returns:
         (fig, status_md, radio_update, all_labels)
@@ -611,20 +612,21 @@ def explore(input_text, selected, hidden=None, camera=None):
                     break
     # Gather neighbors if something is selected (and not hidden)
     nbr_data = []
     if selected is not None:
         sel_item = items[sel_idx]
         if sel_item[2]:  # expression
-            raw = model.similar_by_vector(sel_item[1], topn=N_NEIGHBORS + 20)
         else:
-            raw = model.most_similar(sel_item[0], topn=N_NEIGHBORS + 20)
         all_op_words = set()
         for _, _, _, ops, _ in items:
             all_op_words.update(ops)
         label_set = set(labels)
         nbr_data = [(w, s) for w, s in raw
                      if w not in all_op_words and w not in label_set
-                     ][:N_NEIGHBORS]
     # ── MDS on all operand words + neighbors ──
     mds_words = all_words + [w for w, _ in nbr_data]
@@ -913,6 +915,14 @@ h1 { color: #63348d !important; }
     height: 100% !important;
 }
 /* Hidden camera state textbox (visible=False prevents DOM rendering in Gradio 6) */
 .camera-hidden { display: none !important; }
@@ -1075,12 +1085,22 @@ with gr.Blocks(title="Embedding Explorer") as demo:
         visible=False, interactive=True,
         elem_classes=["vis-cbg"],
     )
-    exp_radio = gr.Radio(
-        label="Click to see nearest neighbors",
-        choices=[], value=None,
-        visible=False, interactive=True,
-        elem_classes=["nbr-radio"],
-    )
     # ── Event handlers ──
@@ -1093,44 +1113,66 @@ with gr.Blocks(title="Embedding Explorer") as demo:
         except (json.JSONDecodeError, TypeError):
             return None
-    def on_explore(input_text):
         """Fresh explore — compute MDS, show all items, reset checkboxes.
         Supports @word syntax to auto-select a word for neighbors:
             dog cat fish @dog  →  plots all 3, shows dog's neighbors
         """
         selected = None
         if input_text and "@" in input_text:
             match = re.search(r"@(\S+)", input_text)
             if match:
                 selected = match.group(1).lower()
                 input_text = re.sub(r"\s*@\S+", "", input_text).strip()
-        fig, status, radio, labels = explore(input_text, selected)
         cbg = gr.update(choices=labels, value=labels, visible=bool(labels))
         return fig, status, radio, labels, cbg, gr.update(value=input_text)
-    def on_radio(input_text, selected, all_labels, visible, camera_json, is_loading):
         """Neighbor selection — re-render with current visibility + camera."""
         if is_loading:
             return gr.update(), gr.update(), gr.update(), False
         hidden = set(all_labels) - set(visible) if all_labels and visible else set()
         camera = _parse_camera_json(camera_json)
-        fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera)
         return fig, status, radio, False
-    def on_visibility(input_text, selected, all_labels, visible, camera_json, is_loading):
         """Visibility toggle — re-render with updated hidden set + camera."""
         if is_loading:
             return gr.update(), gr.update(), gr.update(), False
         hidden = set(all_labels) - set(visible) if all_labels else set()
         # If selected item is now hidden, clear selection
         if selected and selected != "(clear)" and selected in hidden:
             selected = None
         camera = _parse_camera_json(camera_json)
-        fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera)
         return fig, status, radio, False
-    def on_share(input_text, selected, visible, camera_json, request: gr.Request):
         """Build share URL encoding current state."""
         params = {}
         if input_text and input_text.strip():
@@ -1144,6 +1186,9 @@ with gr.Blocks(title="Embedding Explorer") as demo:
             encoded = _encode_camera(camera_json)
             if encoded:
                 params["cam"] = encoded
         if not params.get("q"):
             return gr.update(value="Nothing to share", visible=True)
         # Build base URL from request (gets correct port for local dev)
@@ -1162,32 +1207,50 @@ with gr.Blocks(title="Embedding Explorer") as demo:
     # ── Wire up events ──
     exp_btn.click(
         on_explore,
-        inputs=[exp_in],
         outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
     )
     exp_in.submit(
         on_explore,
-        inputs=[exp_in],
         outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
     )
-    # Radio + visibility: camera_txt is kept up-to-date by polling script
     exp_radio.change(
         on_radio,
-        inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share],
         outputs=[exp_plot, exp_status, exp_radio, loading_share],
     )
     vis_cbg.change(
         on_visibility,
-        inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share],
         outputs=[exp_plot, exp_status, exp_radio, loading_share],
     )
     # Share: camera_txt kept up-to-date by polling script
     share_btn.click(
         fn=on_share,
-        inputs=[exp_in, exp_radio, vis_cbg, camera_txt],
         outputs=[share_url],
     )
@@ -1199,8 +1262,11 @@ with gr.Blocks(title="Embedding Explorer") as demo:
         return qp
     def apply_share_params(params):
-        """Step 2: Apply share params — set input, run explore, apply visibility + camera."""
         if not params or "q" not in params:
             return (
                 gr.update(),  # exp_in
                 gr.update(),  # exp_plot
@@ -1210,6 +1276,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
                 [],           # all_labels_state
                 gr.update(),  # camera_txt
                 False,        # loading_share
             )
         input_text = params.get("q", "")
@@ -1218,11 +1285,13 @@ with gr.Blocks(title="Embedding Explorer") as demo:
             selected = None
         vis_str = params.get("vis")
         cam_str = params.get("cam")
         camera = _parse_camera(cam_str)
         # First explore with all items visible to get labels
-        _, _, _, labels = explore(input_text, None, camera=camera)
         # Apply visibility
         if vis_str:
@@ -1233,7 +1302,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
             hidden = set()
         fig, status, radio, _ = explore(
-            input_text, selected, hidden=hidden or None, camera=camera
         )
         cbg = gr.update(
@@ -1245,6 +1314,8 @@ with gr.Blocks(title="Embedding Explorer") as demo:
         # Pre-populate camera_txt so subsequent re-renders preserve camera
         camera_json = json.dumps(camera) if camera else ""
         return (
             gr.update(value=input_text),
             fig,
@@ -1254,6 +1325,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
             labels,
             gr.update(value=camera_json),
             True,  # loading_share — suppress cascading events
         )
     demo.load(
@@ -1262,7 +1334,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
     ).then(
         fn=apply_share_params,
         inputs=[share_params],
-        outputs=[exp_in, exp_plot, exp_status, exp_radio, vis_cbg, all_labels_state, camera_txt, loading_share],
     )
 demo.launch(theme=THEME, css=CSS, head=FORCE_LIGHT)

 # ── Configuration (all changeable via HF Space env vars) ─────
 EXAMPLES = json.loads(os.environ.get("EXAMPLES", json.dumps([
+    "dog cat fish car truck",
+    "paris france berlin germany tokyo japan",
+    "man woman king queen prince princess",
+    "man - woman, uncle - aunt, man woman uncle aunt",
+    "aunt - woman + man, man woman uncle aunt",
+    "nephew - man + woman, man woman nephew niece",
+    "king - man + woman, man woman king queen",
+    "paris - france + italy, paris france italy rome",
+    "sushi - japan + germany, sushi japan germany bratwurst",
+    "hitler - germany + italy, germany italy hitler mussolini",
 ])))
+N_NEIGHBORS = int(os.environ.get("N_NEIGHBORS", "4"))
 # ── Share URL infrastructure ─────────────────────────────────
     ax_x["range"] = fixed
     ax_y["range"] = fixed
     ax_z["range"] = fixed
+    default_camera = dict(eye=dict(x=1.0, y=1.0, z=0.8))
     return dict(
         scene=dict(
             xaxis=ax_x,
 # ── Main visualization ───────────────────────────────────────
+def explore(input_text, selected, hidden=None, camera=None, n_neighbors=None):
     """Unified 3D visualization of words and vector expressions.
     Args:
         selected: Currently selected item for neighbor display (or None).
         hidden: Set of labels to hide from rendering (MDS still uses all items).
         camera: Plotly camera dict to set initial view.
+        n_neighbors: Number of nearest neighbors to show (default N_NEIGHBORS).
     Returns:
         (fig, status_md, radio_update, all_labels)
                     break
     # Gather neighbors if something is selected (and not hidden)
+    nn = n_neighbors if n_neighbors is not None else N_NEIGHBORS
     nbr_data = []
     if selected is not None:
         sel_item = items[sel_idx]
         if sel_item[2]:  # expression
+            raw = model.similar_by_vector(sel_item[1], topn=nn + 20)
         else:
+            raw = model.most_similar(sel_item[0], topn=nn + 20)
         all_op_words = set()
         for _, _, _, ops, _ in items:
             all_op_words.update(ops)
         label_set = set(labels)
         nbr_data = [(w, s) for w, s in raw
                      if w not in all_op_words and w not in label_set
+                     ][:nn]
     # ── MDS on all operand words + neighbors ──
     mds_words = all_words + [w for w, _ in nbr_data]
     height: 100% !important;
 }
+/* Neighbors dropdown — compact */
+.nn-dropdown {
+    max-width: 100px !important;
+}
+.nn-dropdown select {
+    color: #63348d !important;
+}
 /* Hidden camera state textbox (visible=False prevents DOM rendering in Gradio 6) */
 .camera-hidden { display: none !important; }
         visible=False, interactive=True,
         elem_classes=["vis-cbg"],
     )
+    with gr.Row():
+        exp_radio = gr.Radio(
+            label="Click to see nearest neighbors",
+            choices=[], value=None,
+            visible=False, interactive=True,
+            elem_classes=["nbr-radio"],
+        )
+        nn_dropdown = gr.Dropdown(
+            label="Neighbors",
+            choices=[str(i) for i in range(3, 13)],
+            value=str(N_NEIGHBORS),
+            interactive=True,
+            scale=0,
+            min_width=90,
+            elem_classes=["nn-dropdown"],
+        )
     # ── Event handlers ──
         except (json.JSONDecodeError, TypeError):
             return None
+    def _get_nn(nn_val):
+        """Parse neighbor count from dropdown value."""
+        try:
+            return int(nn_val)
+        except (TypeError, ValueError):
+            return N_NEIGHBORS
+    def on_explore(input_text, nn_val=None):
         """Fresh explore — compute MDS, show all items, reset checkboxes.
         Supports @word syntax to auto-select a word for neighbors:
             dog cat fish @dog  →  plots all 3, shows dog's neighbors
         """
+        nn = _get_nn(nn_val)
         selected = None
         if input_text and "@" in input_text:
             match = re.search(r"@(\S+)", input_text)
             if match:
                 selected = match.group(1).lower()
                 input_text = re.sub(r"\s*@\S+", "", input_text).strip()
+        fig, status, radio, labels = explore(input_text, selected, n_neighbors=nn)
         cbg = gr.update(choices=labels, value=labels, visible=bool(labels))
         return fig, status, radio, labels, cbg, gr.update(value=input_text)
+    def on_radio(input_text, selected, all_labels, visible, camera_json, is_loading, nn_val):
         """Neighbor selection — re-render with current visibility + camera."""
         if is_loading:
             return gr.update(), gr.update(), gr.update(), False
+        nn = _get_nn(nn_val)
         hidden = set(all_labels) - set(visible) if all_labels and visible else set()
         camera = _parse_camera_json(camera_json)
+        fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn)
         return fig, status, radio, False
+    def on_visibility(input_text, selected, all_labels, visible, camera_json, is_loading, nn_val):
         """Visibility toggle — re-render with updated hidden set + camera."""
         if is_loading:
             return gr.update(), gr.update(), gr.update(), False
+        nn = _get_nn(nn_val)
         hidden = set(all_labels) - set(visible) if all_labels else set()
         # If selected item is now hidden, clear selection
         if selected and selected != "(clear)" and selected in hidden:
             selected = None
         camera = _parse_camera_json(camera_json)
+        fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn)
         return fig, status, radio, False
+    def on_nn_change(input_text, selected, all_labels, visible, camera_json, is_loading, nn_val):
+        """Neighbor count changed — re-render if a word is selected."""
+        if is_loading:
+            return gr.update(), gr.update(), gr.update(), False
+        if not selected or selected == "(clear)":
+            return gr.update(), gr.update(), gr.update(), False
+        nn = _get_nn(nn_val)
+        hidden = set(all_labels) - set(visible) if all_labels and visible else set()
+        camera = _parse_camera_json(camera_json)
+        fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn)
+        return fig, status, radio, False
+    def on_share(input_text, selected, visible, camera_json, nn_val, request: gr.Request):
         """Build share URL encoding current state."""
         params = {}
         if input_text and input_text.strip():
             encoded = _encode_camera(camera_json)
             if encoded:
                 params["cam"] = encoded
+        nn = _get_nn(nn_val)
+        if nn != N_NEIGHBORS:
+            params["nn"] = str(nn)
         if not params.get("q"):
             return gr.update(value="Nothing to share", visible=True)
         # Build base URL from request (gets correct port for local dev)
     # ── Wire up events ──
+    _EXAMPLE_SET = set(EXAMPLES)
+    def on_input_change(input_text, nn_val):
+        """Auto-explore when input matches an example (set by gr.Examples click)."""
+        if input_text and input_text.strip() in _EXAMPLE_SET:
+            return on_explore(input_text, nn_val)
+        return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
+    exp_in.change(
+        on_input_change,
+        inputs=[exp_in, nn_dropdown],
+        outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
+    )
     exp_btn.click(
         on_explore,
+        inputs=[exp_in, nn_dropdown],
         outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
     )
     exp_in.submit(
         on_explore,
+        inputs=[exp_in, nn_dropdown],
         outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
     )
+    # Radio + visibility + nn: camera_txt is kept up-to-date by polling script
     exp_radio.change(
         on_radio,
+        inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share, nn_dropdown],
         outputs=[exp_plot, exp_status, exp_radio, loading_share],
     )
     vis_cbg.change(
         on_visibility,
+        inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share, nn_dropdown],
+        outputs=[exp_plot, exp_status, exp_radio, loading_share],
+    )
+    nn_dropdown.change(
+        on_nn_change,
+        inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share, nn_dropdown],
         outputs=[exp_plot, exp_status, exp_radio, loading_share],
     )
     # Share: camera_txt kept up-to-date by polling script
     share_btn.click(
         fn=on_share,
+        inputs=[exp_in, exp_radio, vis_cbg, camera_txt, nn_dropdown],
         outputs=[share_url],
     )
         return qp
     def apply_share_params(params):
+        """Step 2: Apply share params — set input, run explore, apply visibility + camera + nn."""
         if not params or "q" not in params:
+            # Check if nn param is present even without q
+            nn_str = params.get("nn") if params else None
+            nn_update = gr.update(value=nn_str) if nn_str else gr.update()
             return (
                 gr.update(),  # exp_in
                 gr.update(),  # exp_plot
                 [],           # all_labels_state
                 gr.update(),  # camera_txt
                 False,        # loading_share
+                nn_update,    # nn_dropdown
             )
         input_text = params.get("q", "")
             selected = None
         vis_str = params.get("vis")
         cam_str = params.get("cam")
+        nn_str = params.get("nn")
         camera = _parse_camera(cam_str)
+        nn = int(nn_str) if nn_str and nn_str.isdigit() else None
         # First explore with all items visible to get labels
+        _, _, _, labels = explore(input_text, None, camera=camera, n_neighbors=nn)
         # Apply visibility
         if vis_str:
             hidden = set()
         fig, status, radio, _ = explore(
+            input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn
         )
         cbg = gr.update(
         # Pre-populate camera_txt so subsequent re-renders preserve camera
         camera_json = json.dumps(camera) if camera else ""
+        nn_update = gr.update(value=str(nn)) if nn else gr.update()
         return (
             gr.update(value=input_text),
             fig,
             labels,
             gr.update(value=camera_json),
             True,  # loading_share — suppress cascading events
+            nn_update,
         )
     demo.load(
     ).then(
         fn=apply_share_params,
         inputs=[share_params],
+        outputs=[exp_in, exp_plot, exp_status, exp_radio, vis_cbg, all_labels_state, camera_txt, loading_share, nn_dropdown],
     )
 demo.launch(theme=THEME, css=CSS, head=FORCE_LIGHT)