theapemachine commited on
Commit
4389ea4
·
verified ·
1 Parent(s): df69a9e

critique fix: fhrr.py

Browse files
Files changed (1) hide show
  1. tensegrity/engine/fhrr.py +72 -5
tensegrity/engine/fhrr.py CHANGED
@@ -265,11 +265,45 @@ def bind(a: np.ndarray, b: np.ndarray) -> np.ndarray:
265
  """Bind: element-wise complex multiplication."""
266
  return a * b
267
 
268
- def bundle(*vectors: np.ndarray) -> np.ndarray:
269
- """Bundle: element-wise addition + normalize to unit circle."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  if not vectors:
271
  return np.array([], dtype=np.complex64)
272
- stacked = np.stack([np.asarray(v, dtype=np.complex128) for v in vectors], axis=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  result = np.sum(stacked, axis=0).astype(np.complex128)
274
  magnitude = np.maximum(np.abs(result), 1e-8)
275
  return (result / magnitude).astype(np.complex64)
@@ -374,9 +408,42 @@ class FHRREncoder:
374
  bound_pairs = [self.encode_binding(r, f) for r, f in bindings.items()]
375
  return bundle(*bound_pairs) if bound_pairs else np.ones(self.dim, dtype=np.complex64)
376
 
377
- def encode_sequence(self, tokens: List[str]) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  elements = [permute(self.features.get(t), shift=i) for i, t in enumerate(tokens)]
379
- return bundle(*elements) if elements else np.ones(self.dim, dtype=np.complex64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
  def encode_numeric_vector(self, values: np.ndarray) -> np.ndarray:
382
  bound = [bind(self.encode_position(i), self.encode_value(float(v))) for i, v in enumerate(values)]
 
265
  """Bind: element-wise complex multiplication."""
266
  return a * b
267
 
268
+ def bundle(*vectors: np.ndarray, top_k: Optional[int] = None) -> np.ndarray:
269
+ """Bundle: element-wise addition + normalize to unit circle.
270
+
271
+ When top_k is set, applies sparse block coding before bundling:
272
+ only the top_k dimensions with largest magnitude are preserved in
273
+ each input vector before addition. This prevents the superposition
274
+ catastrophe identified in the review: dense SBERT-grounded phasors
275
+ wash out into noise when too many are bundled, because phase wrapping
276
+ destroys high-frequency semantic details.
277
+
278
+ The sparsification ensures that only the most salient semantic features
279
+ contribute to the bundle, keeping the result discriminative even after
280
+ combining many vectors.
281
+
282
+ Args:
283
+ *vectors: Complex phasor vectors to bundle
284
+ top_k: If set, keep only top_k dimensions per vector before bundling.
285
+ Recommended: dim // 4 for sequences > 20 tokens.
286
+ """
287
  if not vectors:
288
  return np.array([], dtype=np.complex64)
289
+
290
+ if top_k is not None and top_k > 0:
291
+ # Sparse block coding: zero out all but top_k dimensions per vector
292
+ sparse_vectors = []
293
+ for v in vectors:
294
+ v = np.asarray(v, dtype=np.complex128)
295
+ magnitudes = np.abs(v)
296
+ if top_k < len(v):
297
+ threshold = np.partition(magnitudes, -top_k)[-top_k]
298
+ mask = magnitudes >= threshold
299
+ sparse_v = np.where(mask, v, 0.0)
300
+ else:
301
+ sparse_v = v
302
+ sparse_vectors.append(sparse_v)
303
+ stacked = np.stack(sparse_vectors, axis=0)
304
+ else:
305
+ stacked = np.stack([np.asarray(v, dtype=np.complex128) for v in vectors], axis=0)
306
+
307
  result = np.sum(stacked, axis=0).astype(np.complex128)
308
  magnitude = np.maximum(np.abs(result), 1e-8)
309
  return (result / magnitude).astype(np.complex64)
 
408
  bound_pairs = [self.encode_binding(r, f) for r, f in bindings.items()]
409
  return bundle(*bound_pairs) if bound_pairs else np.ones(self.dim, dtype=np.complex64)
410
 
411
+ def encode_sequence(self, tokens: List[str],
412
+ window_size: int = 16) -> np.ndarray:
413
+ """Encode a token sequence with hierarchical temporal bundling.
414
+
415
+ For short sequences (≤ window_size), bundles all tokens directly.
416
+ For long sequences, uses a sliding window approach: tokens are
417
+ bundled within local windows first, then windows are bundled together.
418
+ This preserves high-resolution semantic detail within each window
419
+ while summarizing distant context, preventing the phase cancellation
420
+ that occurs when bundling too many dense SBERT-grounded phasors.
421
+
422
+ Args:
423
+ tokens: List of string tokens
424
+ window_size: Tokens per local window (default 16)
425
+ """
426
+ if not tokens:
427
+ return np.ones(self.dim, dtype=np.complex64)
428
+
429
  elements = [permute(self.features.get(t), shift=i) for i, t in enumerate(tokens)]
430
+
431
+ if len(elements) <= window_size:
432
+ # Short sequence: direct bundle (no phase cancellation risk)
433
+ return bundle(*elements)
434
+
435
+ # Hierarchical temporal bundling: bundle within windows, then
436
+ # bundle the window summaries. Uses sparse top_k for the
437
+ # inter-window bundle to preserve discriminative features.
438
+ window_summaries = []
439
+ for start in range(0, len(elements), window_size):
440
+ window = elements[start:start + window_size]
441
+ summary = bundle(*window)
442
+ window_summaries.append(summary)
443
+
444
+ # Bundle window summaries with sparsification to prevent wash-out
445
+ sparse_k = max(self.dim // 4, 64)
446
+ return bundle(*window_summaries, top_k=sparse_k)
447
 
448
  def encode_numeric_vector(self, values: np.ndarray) -> np.ndarray:
449
  bound = [bind(self.encode_position(i), self.encode_value(float(v))) for i, v in enumerate(values)]