chyams Claude Opus 4.6 commited on
Commit
c18f969
Β·
1 Parent(s): aa46db6

Embedding Explorer: new examples, auto-execute, neighbors dropdown, zoom

Browse files

- Replace 10 example queries with lecture-aligned progression
- Auto-execute on example click via exp_in.change() with example guard
- Add neighbors dropdown (3-12, default 4) next to radio, persisted in share URL
- Zoom default camera in to (1.0, 1.0, 0.8) from (1.5, 1.5, 1.2)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +111 -39
app.py CHANGED
@@ -28,19 +28,19 @@ warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn")
28
  # ── Configuration (all changeable via HF Space env vars) ─────
29
 
30
  EXAMPLES = json.loads(os.environ.get("EXAMPLES", json.dumps([
31
- "bird bee elephant helicopter spitball",
32
- "woman @woman",
33
- "car truck dog cat fish",
34
- "man - woman, uncle - aunt",
35
- "man - woman + aunt, uncle",
36
- "woman - man + nephew, niece",
37
- "woman - man + king, queen",
38
- "paris - france + italy, rome",
39
- "hitler - germany + italy, mussolini",
40
- "0.5 king - 0.5 man + 0.5 woman, queen",
41
  ])))
42
 
43
- N_NEIGHBORS = int(os.environ.get("N_NEIGHBORS", "8"))
44
 
45
  # ── Share URL infrastructure ─────────────────────────────────
46
 
@@ -310,7 +310,7 @@ def layout_3d(axis_range=1.3, camera=None):
310
  ax_x["range"] = fixed
311
  ax_y["range"] = fixed
312
  ax_z["range"] = fixed
313
- default_camera = dict(eye=dict(x=1.5, y=1.5, z=1.2))
314
  return dict(
315
  scene=dict(
316
  xaxis=ax_x,
@@ -520,7 +520,7 @@ def _encode_camera(camera_json):
520
 
521
  # ── Main visualization ───────────────────────────────────────
522
 
523
- def explore(input_text, selected, hidden=None, camera=None):
524
  """Unified 3D visualization of words and vector expressions.
525
 
526
  Args:
@@ -528,6 +528,7 @@ def explore(input_text, selected, hidden=None, camera=None):
528
  selected: Currently selected item for neighbor display (or None).
529
  hidden: Set of labels to hide from rendering (MDS still uses all items).
530
  camera: Plotly camera dict to set initial view.
 
531
 
532
  Returns:
533
  (fig, status_md, radio_update, all_labels)
@@ -611,20 +612,21 @@ def explore(input_text, selected, hidden=None, camera=None):
611
  break
612
 
613
  # Gather neighbors if something is selected (and not hidden)
 
614
  nbr_data = []
615
  if selected is not None:
616
  sel_item = items[sel_idx]
617
  if sel_item[2]: # expression
618
- raw = model.similar_by_vector(sel_item[1], topn=N_NEIGHBORS + 20)
619
  else:
620
- raw = model.most_similar(sel_item[0], topn=N_NEIGHBORS + 20)
621
  all_op_words = set()
622
  for _, _, _, ops, _ in items:
623
  all_op_words.update(ops)
624
  label_set = set(labels)
625
  nbr_data = [(w, s) for w, s in raw
626
  if w not in all_op_words and w not in label_set
627
- ][:N_NEIGHBORS]
628
 
629
  # ── MDS on all operand words + neighbors ──
630
  mds_words = all_words + [w for w, _ in nbr_data]
@@ -913,6 +915,14 @@ h1 { color: #63348d !important; }
913
  height: 100% !important;
914
  }
915
 
 
 
 
 
 
 
 
 
916
  /* Hidden camera state textbox (visible=False prevents DOM rendering in Gradio 6) */
917
  .camera-hidden { display: none !important; }
918
 
@@ -1075,12 +1085,22 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1075
  visible=False, interactive=True,
1076
  elem_classes=["vis-cbg"],
1077
  )
1078
- exp_radio = gr.Radio(
1079
- label="Click to see nearest neighbors",
1080
- choices=[], value=None,
1081
- visible=False, interactive=True,
1082
- elem_classes=["nbr-radio"],
1083
- )
 
 
 
 
 
 
 
 
 
 
1084
 
1085
  # ── Event handlers ──
1086
 
@@ -1093,44 +1113,66 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1093
  except (json.JSONDecodeError, TypeError):
1094
  return None
1095
 
1096
- def on_explore(input_text):
 
 
 
 
 
 
 
1097
  """Fresh explore β€” compute MDS, show all items, reset checkboxes.
1098
 
1099
  Supports @word syntax to auto-select a word for neighbors:
1100
  dog cat fish @dog β†’ plots all 3, shows dog's neighbors
1101
  """
 
1102
  selected = None
1103
  if input_text and "@" in input_text:
1104
  match = re.search(r"@(\S+)", input_text)
1105
  if match:
1106
  selected = match.group(1).lower()
1107
  input_text = re.sub(r"\s*@\S+", "", input_text).strip()
1108
- fig, status, radio, labels = explore(input_text, selected)
1109
  cbg = gr.update(choices=labels, value=labels, visible=bool(labels))
1110
  return fig, status, radio, labels, cbg, gr.update(value=input_text)
1111
 
1112
- def on_radio(input_text, selected, all_labels, visible, camera_json, is_loading):
1113
  """Neighbor selection β€” re-render with current visibility + camera."""
1114
  if is_loading:
1115
  return gr.update(), gr.update(), gr.update(), False
 
1116
  hidden = set(all_labels) - set(visible) if all_labels and visible else set()
1117
  camera = _parse_camera_json(camera_json)
1118
- fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera)
1119
  return fig, status, radio, False
1120
 
1121
- def on_visibility(input_text, selected, all_labels, visible, camera_json, is_loading):
1122
  """Visibility toggle β€” re-render with updated hidden set + camera."""
1123
  if is_loading:
1124
  return gr.update(), gr.update(), gr.update(), False
 
1125
  hidden = set(all_labels) - set(visible) if all_labels else set()
1126
  # If selected item is now hidden, clear selection
1127
  if selected and selected != "(clear)" and selected in hidden:
1128
  selected = None
1129
  camera = _parse_camera_json(camera_json)
1130
- fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera)
1131
  return fig, status, radio, False
1132
 
1133
- def on_share(input_text, selected, visible, camera_json, request: gr.Request):
 
 
 
 
 
 
 
 
 
 
 
 
1134
  """Build share URL encoding current state."""
1135
  params = {}
1136
  if input_text and input_text.strip():
@@ -1144,6 +1186,9 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1144
  encoded = _encode_camera(camera_json)
1145
  if encoded:
1146
  params["cam"] = encoded
 
 
 
1147
  if not params.get("q"):
1148
  return gr.update(value="Nothing to share", visible=True)
1149
  # Build base URL from request (gets correct port for local dev)
@@ -1162,32 +1207,50 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1162
 
1163
  # ── Wire up events ──
1164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  exp_btn.click(
1166
  on_explore,
1167
- inputs=[exp_in],
1168
  outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
1169
  )
1170
  exp_in.submit(
1171
  on_explore,
1172
- inputs=[exp_in],
1173
  outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
1174
  )
1175
- # Radio + visibility: camera_txt is kept up-to-date by polling script
1176
  exp_radio.change(
1177
  on_radio,
1178
- inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share],
1179
  outputs=[exp_plot, exp_status, exp_radio, loading_share],
1180
  )
1181
  vis_cbg.change(
1182
  on_visibility,
1183
- inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share],
 
 
 
 
 
1184
  outputs=[exp_plot, exp_status, exp_radio, loading_share],
1185
  )
1186
 
1187
  # Share: camera_txt kept up-to-date by polling script
1188
  share_btn.click(
1189
  fn=on_share,
1190
- inputs=[exp_in, exp_radio, vis_cbg, camera_txt],
1191
  outputs=[share_url],
1192
  )
1193
 
@@ -1199,8 +1262,11 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1199
  return qp
1200
 
1201
  def apply_share_params(params):
1202
- """Step 2: Apply share params β€” set input, run explore, apply visibility + camera."""
1203
  if not params or "q" not in params:
 
 
 
1204
  return (
1205
  gr.update(), # exp_in
1206
  gr.update(), # exp_plot
@@ -1210,6 +1276,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1210
  [], # all_labels_state
1211
  gr.update(), # camera_txt
1212
  False, # loading_share
 
1213
  )
1214
 
1215
  input_text = params.get("q", "")
@@ -1218,11 +1285,13 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1218
  selected = None
1219
  vis_str = params.get("vis")
1220
  cam_str = params.get("cam")
 
1221
 
1222
  camera = _parse_camera(cam_str)
 
1223
 
1224
  # First explore with all items visible to get labels
1225
- _, _, _, labels = explore(input_text, None, camera=camera)
1226
 
1227
  # Apply visibility
1228
  if vis_str:
@@ -1233,7 +1302,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1233
  hidden = set()
1234
 
1235
  fig, status, radio, _ = explore(
1236
- input_text, selected, hidden=hidden or None, camera=camera
1237
  )
1238
 
1239
  cbg = gr.update(
@@ -1245,6 +1314,8 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1245
  # Pre-populate camera_txt so subsequent re-renders preserve camera
1246
  camera_json = json.dumps(camera) if camera else ""
1247
 
 
 
1248
  return (
1249
  gr.update(value=input_text),
1250
  fig,
@@ -1254,6 +1325,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1254
  labels,
1255
  gr.update(value=camera_json),
1256
  True, # loading_share β€” suppress cascading events
 
1257
  )
1258
 
1259
  demo.load(
@@ -1262,7 +1334,7 @@ with gr.Blocks(title="Embedding Explorer") as demo:
1262
  ).then(
1263
  fn=apply_share_params,
1264
  inputs=[share_params],
1265
- outputs=[exp_in, exp_plot, exp_status, exp_radio, vis_cbg, all_labels_state, camera_txt, loading_share],
1266
  )
1267
 
1268
  demo.launch(theme=THEME, css=CSS, head=FORCE_LIGHT)
 
28
  # ── Configuration (all changeable via HF Space env vars) ─────
29
 
30
  EXAMPLES = json.loads(os.environ.get("EXAMPLES", json.dumps([
31
+ "dog cat fish car truck",
32
+ "paris france berlin germany tokyo japan",
33
+ "man woman king queen prince princess",
34
+ "man - woman, uncle - aunt, man woman uncle aunt",
35
+ "aunt - woman + man, man woman uncle aunt",
36
+ "nephew - man + woman, man woman nephew niece",
37
+ "king - man + woman, man woman king queen",
38
+ "paris - france + italy, paris france italy rome",
39
+ "sushi - japan + germany, sushi japan germany bratwurst",
40
+ "hitler - germany + italy, germany italy hitler mussolini",
41
  ])))
42
 
43
+ N_NEIGHBORS = int(os.environ.get("N_NEIGHBORS", "4"))
44
 
45
  # ── Share URL infrastructure ─────────────────────────────────
46
 
 
310
  ax_x["range"] = fixed
311
  ax_y["range"] = fixed
312
  ax_z["range"] = fixed
313
+ default_camera = dict(eye=dict(x=1.0, y=1.0, z=0.8))
314
  return dict(
315
  scene=dict(
316
  xaxis=ax_x,
 
520
 
521
  # ── Main visualization ───────────────────────────────────────
522
 
523
+ def explore(input_text, selected, hidden=None, camera=None, n_neighbors=None):
524
  """Unified 3D visualization of words and vector expressions.
525
 
526
  Args:
 
528
  selected: Currently selected item for neighbor display (or None).
529
  hidden: Set of labels to hide from rendering (MDS still uses all items).
530
  camera: Plotly camera dict to set initial view.
531
+ n_neighbors: Number of nearest neighbors to show (default N_NEIGHBORS).
532
 
533
  Returns:
534
  (fig, status_md, radio_update, all_labels)
 
612
  break
613
 
614
  # Gather neighbors if something is selected (and not hidden)
615
+ nn = n_neighbors if n_neighbors is not None else N_NEIGHBORS
616
  nbr_data = []
617
  if selected is not None:
618
  sel_item = items[sel_idx]
619
  if sel_item[2]: # expression
620
+ raw = model.similar_by_vector(sel_item[1], topn=nn + 20)
621
  else:
622
+ raw = model.most_similar(sel_item[0], topn=nn + 20)
623
  all_op_words = set()
624
  for _, _, _, ops, _ in items:
625
  all_op_words.update(ops)
626
  label_set = set(labels)
627
  nbr_data = [(w, s) for w, s in raw
628
  if w not in all_op_words and w not in label_set
629
+ ][:nn]
630
 
631
  # ── MDS on all operand words + neighbors ──
632
  mds_words = all_words + [w for w, _ in nbr_data]
 
915
  height: 100% !important;
916
  }
917
 
918
+ /* Neighbors dropdown β€” compact */
919
+ .nn-dropdown {
920
+ max-width: 100px !important;
921
+ }
922
+ .nn-dropdown select {
923
+ color: #63348d !important;
924
+ }
925
+
926
  /* Hidden camera state textbox (visible=False prevents DOM rendering in Gradio 6) */
927
  .camera-hidden { display: none !important; }
928
 
 
1085
  visible=False, interactive=True,
1086
  elem_classes=["vis-cbg"],
1087
  )
1088
+ with gr.Row():
1089
+ exp_radio = gr.Radio(
1090
+ label="Click to see nearest neighbors",
1091
+ choices=[], value=None,
1092
+ visible=False, interactive=True,
1093
+ elem_classes=["nbr-radio"],
1094
+ )
1095
+ nn_dropdown = gr.Dropdown(
1096
+ label="Neighbors",
1097
+ choices=[str(i) for i in range(3, 13)],
1098
+ value=str(N_NEIGHBORS),
1099
+ interactive=True,
1100
+ scale=0,
1101
+ min_width=90,
1102
+ elem_classes=["nn-dropdown"],
1103
+ )
1104
 
1105
  # ── Event handlers ──
1106
 
 
1113
  except (json.JSONDecodeError, TypeError):
1114
  return None
1115
 
1116
+ def _get_nn(nn_val):
1117
+ """Parse neighbor count from dropdown value."""
1118
+ try:
1119
+ return int(nn_val)
1120
+ except (TypeError, ValueError):
1121
+ return N_NEIGHBORS
1122
+
1123
+ def on_explore(input_text, nn_val=None):
1124
  """Fresh explore β€” compute MDS, show all items, reset checkboxes.
1125
 
1126
  Supports @word syntax to auto-select a word for neighbors:
1127
  dog cat fish @dog β†’ plots all 3, shows dog's neighbors
1128
  """
1129
+ nn = _get_nn(nn_val)
1130
  selected = None
1131
  if input_text and "@" in input_text:
1132
  match = re.search(r"@(\S+)", input_text)
1133
  if match:
1134
  selected = match.group(1).lower()
1135
  input_text = re.sub(r"\s*@\S+", "", input_text).strip()
1136
+ fig, status, radio, labels = explore(input_text, selected, n_neighbors=nn)
1137
  cbg = gr.update(choices=labels, value=labels, visible=bool(labels))
1138
  return fig, status, radio, labels, cbg, gr.update(value=input_text)
1139
 
1140
+ def on_radio(input_text, selected, all_labels, visible, camera_json, is_loading, nn_val):
1141
  """Neighbor selection β€” re-render with current visibility + camera."""
1142
  if is_loading:
1143
  return gr.update(), gr.update(), gr.update(), False
1144
+ nn = _get_nn(nn_val)
1145
  hidden = set(all_labels) - set(visible) if all_labels and visible else set()
1146
  camera = _parse_camera_json(camera_json)
1147
+ fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn)
1148
  return fig, status, radio, False
1149
 
1150
+ def on_visibility(input_text, selected, all_labels, visible, camera_json, is_loading, nn_val):
1151
  """Visibility toggle β€” re-render with updated hidden set + camera."""
1152
  if is_loading:
1153
  return gr.update(), gr.update(), gr.update(), False
1154
+ nn = _get_nn(nn_val)
1155
  hidden = set(all_labels) - set(visible) if all_labels else set()
1156
  # If selected item is now hidden, clear selection
1157
  if selected and selected != "(clear)" and selected in hidden:
1158
  selected = None
1159
  camera = _parse_camera_json(camera_json)
1160
+ fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn)
1161
  return fig, status, radio, False
1162
 
1163
+ def on_nn_change(input_text, selected, all_labels, visible, camera_json, is_loading, nn_val):
1164
+ """Neighbor count changed β€” re-render if a word is selected."""
1165
+ if is_loading:
1166
+ return gr.update(), gr.update(), gr.update(), False
1167
+ if not selected or selected == "(clear)":
1168
+ return gr.update(), gr.update(), gr.update(), False
1169
+ nn = _get_nn(nn_val)
1170
+ hidden = set(all_labels) - set(visible) if all_labels and visible else set()
1171
+ camera = _parse_camera_json(camera_json)
1172
+ fig, status, radio, _ = explore(input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn)
1173
+ return fig, status, radio, False
1174
+
1175
+ def on_share(input_text, selected, visible, camera_json, nn_val, request: gr.Request):
1176
  """Build share URL encoding current state."""
1177
  params = {}
1178
  if input_text and input_text.strip():
 
1186
  encoded = _encode_camera(camera_json)
1187
  if encoded:
1188
  params["cam"] = encoded
1189
+ nn = _get_nn(nn_val)
1190
+ if nn != N_NEIGHBORS:
1191
+ params["nn"] = str(nn)
1192
  if not params.get("q"):
1193
  return gr.update(value="Nothing to share", visible=True)
1194
  # Build base URL from request (gets correct port for local dev)
 
1207
 
1208
  # ── Wire up events ──
1209
 
1210
+ _EXAMPLE_SET = set(EXAMPLES)
1211
+
1212
+ def on_input_change(input_text, nn_val):
1213
+ """Auto-explore when input matches an example (set by gr.Examples click)."""
1214
+ if input_text and input_text.strip() in _EXAMPLE_SET:
1215
+ return on_explore(input_text, nn_val)
1216
+ return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
1217
+
1218
+ exp_in.change(
1219
+ on_input_change,
1220
+ inputs=[exp_in, nn_dropdown],
1221
+ outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
1222
+ )
1223
  exp_btn.click(
1224
  on_explore,
1225
+ inputs=[exp_in, nn_dropdown],
1226
  outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
1227
  )
1228
  exp_in.submit(
1229
  on_explore,
1230
+ inputs=[exp_in, nn_dropdown],
1231
  outputs=[exp_plot, exp_status, exp_radio, all_labels_state, vis_cbg, exp_in],
1232
  )
1233
+ # Radio + visibility + nn: camera_txt is kept up-to-date by polling script
1234
  exp_radio.change(
1235
  on_radio,
1236
+ inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share, nn_dropdown],
1237
  outputs=[exp_plot, exp_status, exp_radio, loading_share],
1238
  )
1239
  vis_cbg.change(
1240
  on_visibility,
1241
+ inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share, nn_dropdown],
1242
+ outputs=[exp_plot, exp_status, exp_radio, loading_share],
1243
+ )
1244
+ nn_dropdown.change(
1245
+ on_nn_change,
1246
+ inputs=[exp_in, exp_radio, all_labels_state, vis_cbg, camera_txt, loading_share, nn_dropdown],
1247
  outputs=[exp_plot, exp_status, exp_radio, loading_share],
1248
  )
1249
 
1250
  # Share: camera_txt kept up-to-date by polling script
1251
  share_btn.click(
1252
  fn=on_share,
1253
+ inputs=[exp_in, exp_radio, vis_cbg, camera_txt, nn_dropdown],
1254
  outputs=[share_url],
1255
  )
1256
 
 
1262
  return qp
1263
 
1264
  def apply_share_params(params):
1265
+ """Step 2: Apply share params β€” set input, run explore, apply visibility + camera + nn."""
1266
  if not params or "q" not in params:
1267
+ # Check if nn param is present even without q
1268
+ nn_str = params.get("nn") if params else None
1269
+ nn_update = gr.update(value=nn_str) if nn_str else gr.update()
1270
  return (
1271
  gr.update(), # exp_in
1272
  gr.update(), # exp_plot
 
1276
  [], # all_labels_state
1277
  gr.update(), # camera_txt
1278
  False, # loading_share
1279
+ nn_update, # nn_dropdown
1280
  )
1281
 
1282
  input_text = params.get("q", "")
 
1285
  selected = None
1286
  vis_str = params.get("vis")
1287
  cam_str = params.get("cam")
1288
+ nn_str = params.get("nn")
1289
 
1290
  camera = _parse_camera(cam_str)
1291
+ nn = int(nn_str) if nn_str and nn_str.isdigit() else None
1292
 
1293
  # First explore with all items visible to get labels
1294
+ _, _, _, labels = explore(input_text, None, camera=camera, n_neighbors=nn)
1295
 
1296
  # Apply visibility
1297
  if vis_str:
 
1302
  hidden = set()
1303
 
1304
  fig, status, radio, _ = explore(
1305
+ input_text, selected, hidden=hidden or None, camera=camera, n_neighbors=nn
1306
  )
1307
 
1308
  cbg = gr.update(
 
1314
  # Pre-populate camera_txt so subsequent re-renders preserve camera
1315
  camera_json = json.dumps(camera) if camera else ""
1316
 
1317
+ nn_update = gr.update(value=str(nn)) if nn else gr.update()
1318
+
1319
  return (
1320
  gr.update(value=input_text),
1321
  fig,
 
1325
  labels,
1326
  gr.update(value=camera_json),
1327
  True, # loading_share β€” suppress cascading events
1328
+ nn_update,
1329
  )
1330
 
1331
  demo.load(
 
1334
  ).then(
1335
  fn=apply_share_params,
1336
  inputs=[share_params],
1337
+ outputs=[exp_in, exp_plot, exp_status, exp_radio, vis_cbg, all_labels_state, camera_txt, loading_share, nn_dropdown],
1338
  )
1339
 
1340
  demo.launch(theme=THEME, css=CSS, head=FORCE_LIGHT)