Spaces:

chyams
/

embedding-explorer

Sleeping

chyams Claude Opus 4.6 commited on 22 days ago

Commit

aa46db6

1 Parent(s): c17d07c

Add Color Math Explorer + Embedding Explorer fractional coefficients

- New tool: Color Math Explorer (tools/color-math/index.html)
- 3D RBY color mixing visualization using Plotly.js
- 949 xkcd color names for nearest-neighbor labeling
- Trilinear interpolation for subtractive color mixing
- Mirrors Embedding Explorer input parsing (commas, +/-, spaces)
- Embedding Explorer: fractional coefficient support (0.5 king - 0.3 man + 0.5 woman)
- Updated parser, vector math, labels, chain construction
- Backward compatible — existing expressions unchanged
- Updated tools/CLAUDE.md with Color Math Explorer docs
- Updated L7 discussion.md with color→vectors→attention narrative arc

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +50 -25

app.py CHANGED Viewed

@@ -37,6 +37,7 @@ EXAMPLES = json.loads(os.environ.get("EXAMPLES", json.dumps([
     "woman - man + king, queen",
     "paris - france + italy, rome",
     "hitler - germany + italy, mussolini",
 ])))
 N_NEIGHBORS = int(os.environ.get("N_NEIGHBORS", "8"))
@@ -173,20 +174,35 @@ print(f"Ready: {len(VOCAB):,} words, {DIMS} dimensions each")
 def parse_expression(expr):
     """Parse 'king - man + woman' → (positives, negatives, ordered).
-    ordered is [(word, sign), ...] for display formatting.
     """
-    tokens = re.findall(r"[a-z']+|[+-]", expr.lower())
     pos, neg, ordered = [], [], []
     sign = "+"
     for t in tokens:
         if t in "+-":
             sign = t
         elif t in VOCAB:
-            (pos if sign == "+" else neg).append(t)
-            ordered.append((t, sign))
     return pos, neg, ordered
 def parse_items(text):
     """Parse comma-separated input into items (words or vector expressions).
@@ -207,8 +223,8 @@ def parse_items(text):
         if not part:
             continue
-        # Detect expression: contains + or - between word characters
-        if re.search(r"[a-z']\s*[+\-]\s*[a-z']", part.lower()):
             # It's an arithmetic expression
             pos, neg, ordered = parse_expression(part)
             if len(pos) + len(neg) < 2:
@@ -218,23 +234,24 @@ def parse_items(text):
                 continue
             # Compute result vector
             vec = np.zeros(DIMS)
-            for w in pos:
-                vec += model[w]
-            for w in neg:
-                vec -= model[w]
             # Build label
             label_parts = []
-            for w, s in ordered:
                 if not label_parts:
-                    label_parts.append(w)
                 elif s == "+":
-                    label_parts.append(f"+ {w}")
                 else:
-                    label_parts.append(f"− {w}")
             label = " ".join(label_parts)
             if label not in seen_labels:
                 seen_labels.add(label)
-                operand_words = set(pos + neg)
                 items.append((label, vec, True, operand_words, list(ordered)))
         else:
             # Plain words — each word is a separate item
@@ -628,24 +645,27 @@ def explore(input_text, selected, hidden=None, camera=None):
     for label, vec, is_expr, ops, ordered in items:
         if not is_expr:
             continue
-        pos_words = [w for w, s in ordered if s == "+"]
-        neg_words = [w for w, s in ordered if s == "-"]
         if len(neg_words) == 0 and len(pos_words) == 2:
             # Simple addition: chain tip-to-tail + gold result from origin
             a_3d = word_3d[pos_words[0]]
             b_3d = word_3d[pos_words[1]]
-            result_3d = a_3d + b_3d
             extra_points.append(result_3d)
-            expr_info[label] = ('add', pos_words[0], pos_words[1], result_3d)
         else:
             # General: chain through operands + gold result from origin
             cursor = np.zeros(3)
             chain = []
-            for w, s in ordered:
                 prev = cursor.copy()
                 c = word_3d[w]
-                cursor = cursor + c if s == "+" else cursor - c
                 chain.append((prev.copy(), cursor.copy(), w, s))
                 extra_points.append(cursor.copy())
             expr_info[label] = ('chain', chain, cursor.copy())
@@ -710,8 +730,8 @@ def explore(input_text, selected, hidden=None, camera=None):
         else:
             info = expr_info[label]
-            # ── Operand arrows from origin ──
-            for w, s in ordered:
                 c = word_3d[w]
                 add_arrow(fig, c[0], c[1], c[2], arr_color, width=arr_width)
                 txt = f"<b>{w}</b>" if is_sel else w
@@ -746,12 +766,17 @@ def explore(input_text, selected, hidden=None, camera=None):
             if info[0] == 'add':
                 # Second operand drawn from tip of first (chain)
                 a = word_3d[info[1]]
-                result_3d = info[3]
                 chain_color = lighten(color, 0.3) if is_dim else lighten(color, 0.2)
                 add_arrow(fig, result_3d[0], result_3d[1], result_3d[2],
                           chain_color, width=arr_width,
-                          sx=a[0], sy=a[1], sz=a[2], dash="dot")
                 # Gold result from origin
                 add_arrow(fig, result_3d[0], result_3d[1], result_3d[2],
                           gold, width=gold_width)

     "woman - man + king, queen",
     "paris - france + italy, rome",
     "hitler - germany + italy, mussolini",
+    "0.5 king - 0.5 man + 0.5 woman, queen",
 ])))
 N_NEIGHBORS = int(os.environ.get("N_NEIGHBORS", "8"))
 def parse_expression(expr):
     """Parse 'king - man + woman' → (positives, negatives, ordered).
+    ordered is [(word, sign, coeff), ...] for display formatting.
+    Supports fractional coefficients: '0.5 king - 0.3 man + 1.5 woman'.
     """
+    tokens = re.findall(r"\d*\.?\d+|[a-z']+|[+-]", expr.lower())
     pos, neg, ordered = [], [], []
     sign = "+"
+    coeff = 1.0
     for t in tokens:
         if t in "+-":
             sign = t
+            coeff = 1.0
+        elif re.match(r"^\d*\.?\d+$", t):
+            coeff = float(t)
         elif t in VOCAB:
+            (pos if sign == "+" else neg).append((t, coeff))
+            ordered.append((t, sign, coeff))
+            coeff = 1.0
     return pos, neg, ordered
+def _coeff_str(c):
+    """Format coefficient for display. Returns '' for 1.0, '0.5\u00b7' otherwise."""
+    if c == 1.0:
+        return ""
+    if c == int(c):
+        return f"{int(c)}\u00b7"
+    return f"{c:g}\u00b7"
 def parse_items(text):
     """Parse comma-separated input into items (words or vector expressions).
         if not part:
             continue
+        # Detect expression: contains + or - between word characters or digits
+        if re.search(r"(?:[a-z']|\d)\s*[+\-]\s*(?:[a-z']|\d)", part.lower()):
             # It's an arithmetic expression
             pos, neg, ordered = parse_expression(part)
             if len(pos) + len(neg) < 2:
                 continue
             # Compute result vector
             vec = np.zeros(DIMS)
+            for w, c in pos:
+                vec += c * model[w]
+            for w, c in neg:
+                vec -= c * model[w]
             # Build label
             label_parts = []
+            for w, s, c in ordered:
+                cstr = _coeff_str(c)
                 if not label_parts:
+                    label_parts.append(f"{cstr}{w}")
                 elif s == "+":
+                    label_parts.append(f"+ {cstr}{w}")
                 else:
+                    label_parts.append(f"− {cstr}{w}")
             label = " ".join(label_parts)
             if label not in seen_labels:
                 seen_labels.add(label)
+                operand_words = set(w for w, c in pos + neg)
                 items.append((label, vec, True, operand_words, list(ordered)))
         else:
             # Plain words — each word is a separate item
     for label, vec, is_expr, ops, ordered in items:
         if not is_expr:
             continue
+        pos_words = [w for w, s, c in ordered if s == "+"]
+        neg_words = [w for w, s, c in ordered if s == "-"]
+        pos_coeffs = [c for w, s, c in ordered if s == "+"]
+        neg_coeffs = [c for w, s, c in ordered if s == "-"]
         if len(neg_words) == 0 and len(pos_words) == 2:
             # Simple addition: chain tip-to-tail + gold result from origin
             a_3d = word_3d[pos_words[0]]
             b_3d = word_3d[pos_words[1]]
+            result_3d = pos_coeffs[0] * a_3d + pos_coeffs[1] * b_3d
             extra_points.append(result_3d)
+            expr_info[label] = ('add', pos_words[0], pos_words[1],
+                                pos_coeffs[0], pos_coeffs[1], result_3d)
         else:
             # General: chain through operands + gold result from origin
             cursor = np.zeros(3)
             chain = []
+            for w, s, coeff in ordered:
                 prev = cursor.copy()
                 c = word_3d[w]
+                cursor = cursor + coeff * c if s == "+" else cursor - coeff * c
                 chain.append((prev.copy(), cursor.copy(), w, s))
                 extra_points.append(cursor.copy())
             expr_info[label] = ('chain', chain, cursor.copy())
         else:
             info = expr_info[label]
+            # ── Operand arrows from origin (full length — shows where the word IS) ──
+            for w, s, coeff in ordered:
                 c = word_3d[w]
                 add_arrow(fig, c[0], c[1], c[2], arr_color, width=arr_width)
                 txt = f"<b>{w}</b>" if is_sel else w
             if info[0] == 'add':
                 # Second operand drawn from tip of first (chain)
+                # info = ('add', word_a, word_b, coeff_a, coeff_b, result_3d)
                 a = word_3d[info[1]]
+                coeff_a = info[3]
+                coeff_b = info[4]
+                result_3d = info[5]
+                chain_start = coeff_a * a
                 chain_color = lighten(color, 0.3) if is_dim else lighten(color, 0.2)
                 add_arrow(fig, result_3d[0], result_3d[1], result_3d[2],
                           chain_color, width=arr_width,
+                          sx=chain_start[0], sy=chain_start[1], sz=chain_start[2],
+                          dash="dot")
                 # Gold result from origin
                 add_arrow(fig, result_3d[0], result_3d[1], result_3d[2],
                           gold, width=gold_width)