Fix bipolar encoding, add permutation temporal binding, add DreamEngine consolidator

Files changed (5) hide show

src/audio_ear/__init__.py +1 -0
src/dream_engine/__init__.py +1 -0
src/dream_engine/consolidator.py +150 -0
src/hdc_encoder/__init__.py +1 -0
src/hdc_encoder/encoder.py +128 -0

src/audio_ear/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/dream_engine/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/dream_engine/consolidator.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""
+DreamEngine — Vitalis FSI
+Runs during idle time. Clusters recent hypervectors,
+compresses them into HelixMemory prototypes.
+This is how Vitalis consolidates experience into long-term patterns.
+No external dependencies. Pure HDC clustering.
+"""
+import numpy as np
+from collections import deque
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Optional
+from src.dream_engine.helix_memory import HelixMemory
+class DreamEngine:
+    DREAM_INTERVAL_MINUTES = 30
+    MIN_BUFFER_SIZE = 50
+    N_CLUSTERS = 4
+    CLUSTER_ITERATIONS = 8
+    def __init__(
+        self,
+        helix: HelixMemory,
+        buffer_max: int = 500,
+    ):
+        self.helix = helix
+        self.buffer: deque = deque(maxlen=buffer_max)
+        self.last_dream: datetime = datetime.min
+        self.dream_count: int = 0
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def ingest(self, hv: np.ndarray, meta: Optional[dict] = None) -> None:
+        """Accept one hypervector into the episodic buffer."""
+        self.buffer.append((datetime.utcnow(), hv, meta or {}))
+    def dream(self, force: bool = False) -> bool:
+        """
+        Consolidate episodic buffer into HelixMemory.
+        Returns True if consolidation ran, False if skipped.
+        """
+        now = datetime.utcnow()
+        if not force:
+            if (now - self.last_dream) < timedelta(minutes=self.DREAM_INTERVAL_MINUTES):
+                return False
+        if len(self.buffer) < self.MIN_BUFFER_SIZE:
+            print(f"[DREAM] Buffer too small ({len(self.buffer)}). Skipping.")
+            return False
+        print(f"[DREAM] Consolidating {len(self.buffer)} vectors...")
+        # Extract hypervectors and metadata
+        hvs = np.stack([hv for _, hv, _ in self.buffer]).astype(np.int8)
+        metas = [meta for _, _, meta in self.buffer]
+        # Cluster
+        centroids, assignments = self._cluster(hvs)
+        # Store each centroid as a helix code
+        consolidated = 0
+        for i, centroid in enumerate(centroids):
+            cluster_mask = assignments == i
+            if not np.any(cluster_mask):
+                continue
+            # Aggregate metadata from this cluster
+            cluster_metas = [metas[j] for j in range(len(metas)) if cluster_mask[j]]
+            merged_meta = self._merge_meta(cluster_metas)
+            merged_meta["cluster_size"] = int(np.sum(cluster_mask))
+            merged_meta["dream_cycle"] = self.dream_count
+            self.helix.add(centroid, merged_meta)
+            consolidated += 1
+        # Generative replay — re-ingest perturbed versions of rare patterns
+        self._replay(hvs, assignments)
+        # Clean up
+        self.buffer.clear()
+        self.last_dream = now
+        self.dream_count += 1
+        print(f"[DREAM] Cycle {self.dream_count} complete. "
+              f"{consolidated} prototypes stored in HelixMemory.")
+        return True
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+    def _cluster(
+        self, hvs: np.ndarray
+    ):
+        """
+        Online bipolar k-means.
+        Distance metric: Hamming (count of differing bits).
+        Centroids are binarized after each update.
+        """
+        k = min(self.N_CLUSTERS, len(hvs))
+        indices = np.random.choice(len(hvs), k, replace=False)
+        centroids = hvs[indices].copy().astype(np.int8)
+        assignments = np.zeros(len(hvs), dtype=np.int32)
+        for _ in range(self.CLUSTER_ITERATIONS):
+            # Hamming distance: count positions where they differ
+            diffs = np.stack(
+                [np.sum(hvs != c, axis=1) for c in centroids],
+                axis=1
+            )
+            assignments = np.argmin(diffs, axis=1)
+            # Update centroids via majority vote (bipolar sign)
+            for i in range(k):
+                mask = assignments == i
+                if np.any(mask):
+                    summed = hvs[mask].astype(np.int32).sum(axis=0)
+                    new_centroid = np.sign(summed).astype(np.int8)
+                    new_centroid[new_centroid == 0] = 1
+                    centroids[i] = new_centroid
+        return centroids, assignments
+    def _replay(self, hvs: np.ndarray, assignments: np.ndarray) -> None:
+        """
+        Generative replay: add small noise to rare cluster members
+        and re-ingest them. Prevents forgetting of low-frequency patterns.
+        """
+        cluster_sizes = np.bincount(assignments, minlength=self.N_CLUSTERS)
+        rare_threshold = np.percentile(cluster_sizes, 25)
+        for i, size in enumerate(cluster_sizes):
+            if size <= rare_threshold and size > 0:
+                rare_hvs = hvs[assignments == i]
+                for hv in rare_hvs[:2]:  # replay at most 2 per rare cluster
+                    noise = np.random.choice(
+                        [-1, 1],
+                        size=len(hv),
+                        p=[0.02, 0.98]
+                    ).astype(np.int8)
+                    perturbed = (hv * noise).astype(np.int8)
+                    self.buffer.append((datetime.utcnow(), perturbed, {"replayed": True}))
+    @staticmethod
+    def _merge_meta(metas: list) -> dict:
+        """Merge a list of metadata dicts into one summary."""
+        merged = {}
+        for m in metas:
+            for k, v in m.items():
+                if k not in merged:
+                    merged[k] = v
+        return merged

src/hdc_encoder/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/hdc_encoder/encoder.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import numpy as np
+from typing import Dict
+DIM = 10_000
+SEED = 42
+_rng = np.random.default_rng(SEED)
+# Bipolar base vectors (-1/1) to match hdc_engine.bind and VitalisKernel
+BASE_MFCC = _rng.choice([-1, 1], size=(13, DIM)).astype(np.int8)
+BASE_PROSODY = {
+    "pitch":       _rng.choice([-1, 1], size=DIM).astype(np.int8),
+    "energy":      _rng.choice([-1, 1], size=DIM).astype(np.int8),
+    "tempo":       _rng.choice([-1, 1], size=DIM).astype(np.int8),
+    "pause_ratio": _rng.choice([-1, 1], size=DIM).astype(np.int8),
+}
+PROSODY_SCALE = {
+    "pitch": 300.0,
+    "energy": 0.5,
+    "tempo": 200.0,
+    "pause_ratio": 1.0,
+}
+def _bipolar_binarize(val: float) -> np.ndarray:
+    """Map a scalar [0,1] to a bipolar hypervector."""
+    bits = (_rng.random(DIM) < val).astype(np.int8)
+    bits[bits == 0] = -1
+    return bits
+def _permute(vec: np.ndarray, shift: int) -> np.ndarray:
+    """Cyclic shift — encodes temporal position."""
+    return np.roll(vec, shift % DIM)
+def _bind(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+    """Bipolar binding: element-wise multiply (-1/1 * -1/1 = -1/1)."""
+    return (a * b).astype(np.int8)
+def _bundle(vecs: list) -> np.ndarray:
+    """
+    Bipolar bundling: sum then binarize via sign.
+    Ties broken toward +1.
+    """
+    stacked = np.stack(vecs, axis=0).astype(np.int32)
+    result = np.sign(stacked.sum(axis=0)).astype(np.int8)
+    result[result == 0] = 1
+    return result
+def encode(
+    mfcc: np.ndarray,
+    prosody: Dict[str, float],
+    chunk_size: int = 5,
+) -> np.ndarray:
+    """
+    Convert one utterance (MFCC matrix + prosody dict) into a single
+    bipolar 10k-dim hypervector that preserves temporal order.
+    Temporal encoding equation:
+        S = V_1 * rho(V_2) * rho^2(V_3) ... rho^n(V_n)
+    where rho is cyclic shift and * is bipolar binding.
+    """
+    n_frames = mfcc.shape[1]
+    # ------------------------------------------------------------------
+    # 1. Frame-level bipolar vectors
+    #    Each frame: 13 MFCC coefficients bound with their base vectors
+    # ------------------------------------------------------------------
+    frame_hvs = []
+    for t in range(n_frames):
+        frame_components = []
+        for i in range(13):
+            coeff_val = float(mfcc[i, t])
+            # Threshold against coefficient median → bipolar
+            bit = np.int8(1) if coeff_val > 0 else np.int8(-1)
+            coeff_vec = np.full(DIM, bit, dtype=np.int8)
+            frame_components.append(_bind(coeff_vec, BASE_MFCC[i]))
+        frame_hvs.append(_bundle(frame_components))
+    # ------------------------------------------------------------------
+    # 2. Forward temporal binding (preserves order)
+    #    S_fwd = frame_0 * rho(frame_1) * rho^2(frame_2) ...
+    # ------------------------------------------------------------------
+    forward_hv = frame_hvs[0].copy() if frame_hvs else np.ones(DIM, dtype=np.int8)
+    for t in range(1, len(frame_hvs)):
+        forward_hv = _bind(forward_hv, _permute(frame_hvs[t], shift=t))
+    # ------------------------------------------------------------------
+    # 3. Backward temporal binding (reverse rhythm)
+    # ------------------------------------------------------------------
+    backward_hv = frame_hvs[-1].copy() if frame_hvs else np.ones(DIM, dtype=np.int8)
+    for t in range(len(frame_hvs) - 2, -1, -1):
+        backward_hv = _bind(backward_hv, _permute(frame_hvs[t], shift=-(t + 1)))
+    # ------------------------------------------------------------------
+    # 4. Chunk-level binding (mid-scale temporal structure)
+    # ------------------------------------------------------------------
+    n_chunks = max(1, n_frames // chunk_size)
+    chunk_hvs = []
+    for c in range(n_chunks):
+        start = c * chunk_size
+        end = min(start + chunk_size, n_frames)
+        chunk_bundle = _bundle(frame_hvs[start:end])
+        chunk_hvs.append(_permute(chunk_bundle, shift=c))
+    chunk_hv = _bundle(chunk_hvs) if chunk_hvs else np.ones(DIM, dtype=np.int8)
+    # ------------------------------------------------------------------
+    # 5. Prosody binding (tone, energy, rhythm, silence)
+    #    Each prosody feature bound with its base vector and
+    #    permuted by frame count (ties prosody to utterance length)
+    # ------------------------------------------------------------------
+    prosody_hvs = []
+    for key, val in prosody.items():
+        norm = min(val / PROSODY_SCALE.get(key, 1.0), 1.0)
+        pv = _bind(_bipolar_binarize(norm), BASE_PROSODY[key])
+        pv = _permute(pv, shift=n_frames)
+        prosody_hvs.append(pv)
+    # ------------------------------------------------------------------
+    # 6. Final composition: bundle all levels
+    #    forward captures sequence, backward captures rhythm,
+    #    chunks capture phrase structure, prosody captures tone
+    # ------------------------------------------------------------------
+    all_components = [forward_hv, backward_hv, chunk_hv] + prosody_hvs
+    return _bundle(all_components)