FerrellSyntheticIntelligence commited on
Commit ·
5f07cd7
1
Parent(s): 0e2503a
Fix bipolar encoding, add permutation temporal binding, add DreamEngine consolidator
Browse files- src/audio_ear/__init__.py +1 -0
- src/dream_engine/__init__.py +1 -0
- src/dream_engine/consolidator.py +150 -0
- src/hdc_encoder/__init__.py +1 -0
- src/hdc_encoder/encoder.py +128 -0
src/audio_ear/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
src/dream_engine/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
src/dream_engine/consolidator.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
DreamEngine — Vitalis FSI
|
| 3 |
+
|
| 4 |
+
Runs during idle time. Clusters recent hypervectors,
|
| 5 |
+
compresses them into HelixMemory prototypes.
|
| 6 |
+
This is how Vitalis consolidates experience into long-term patterns.
|
| 7 |
+
No external dependencies. Pure HDC clustering.
|
| 8 |
+
"""
|
| 9 |
+
import numpy as np
|
| 10 |
+
from collections import deque
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from typing import List, Optional
|
| 14 |
+
from src.dream_engine.helix_memory import HelixMemory
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class DreamEngine:
|
| 18 |
+
DREAM_INTERVAL_MINUTES = 30
|
| 19 |
+
MIN_BUFFER_SIZE = 50
|
| 20 |
+
N_CLUSTERS = 4
|
| 21 |
+
CLUSTER_ITERATIONS = 8
|
| 22 |
+
|
| 23 |
+
def __init__(
|
| 24 |
+
self,
|
| 25 |
+
helix: HelixMemory,
|
| 26 |
+
buffer_max: int = 500,
|
| 27 |
+
):
|
| 28 |
+
self.helix = helix
|
| 29 |
+
self.buffer: deque = deque(maxlen=buffer_max)
|
| 30 |
+
self.last_dream: datetime = datetime.min
|
| 31 |
+
self.dream_count: int = 0
|
| 32 |
+
|
| 33 |
+
# ------------------------------------------------------------------
|
| 34 |
+
# Public API
|
| 35 |
+
# ------------------------------------------------------------------
|
| 36 |
+
def ingest(self, hv: np.ndarray, meta: Optional[dict] = None) -> None:
|
| 37 |
+
"""Accept one hypervector into the episodic buffer."""
|
| 38 |
+
self.buffer.append((datetime.utcnow(), hv, meta or {}))
|
| 39 |
+
|
| 40 |
+
def dream(self, force: bool = False) -> bool:
|
| 41 |
+
"""
|
| 42 |
+
Consolidate episodic buffer into HelixMemory.
|
| 43 |
+
Returns True if consolidation ran, False if skipped.
|
| 44 |
+
"""
|
| 45 |
+
now = datetime.utcnow()
|
| 46 |
+
if not force:
|
| 47 |
+
if (now - self.last_dream) < timedelta(minutes=self.DREAM_INTERVAL_MINUTES):
|
| 48 |
+
return False
|
| 49 |
+
if len(self.buffer) < self.MIN_BUFFER_SIZE:
|
| 50 |
+
print(f"[DREAM] Buffer too small ({len(self.buffer)}). Skipping.")
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
print(f"[DREAM] Consolidating {len(self.buffer)} vectors...")
|
| 54 |
+
|
| 55 |
+
# Extract hypervectors and metadata
|
| 56 |
+
hvs = np.stack([hv for _, hv, _ in self.buffer]).astype(np.int8)
|
| 57 |
+
metas = [meta for _, _, meta in self.buffer]
|
| 58 |
+
|
| 59 |
+
# Cluster
|
| 60 |
+
centroids, assignments = self._cluster(hvs)
|
| 61 |
+
|
| 62 |
+
# Store each centroid as a helix code
|
| 63 |
+
consolidated = 0
|
| 64 |
+
for i, centroid in enumerate(centroids):
|
| 65 |
+
cluster_mask = assignments == i
|
| 66 |
+
if not np.any(cluster_mask):
|
| 67 |
+
continue
|
| 68 |
+
# Aggregate metadata from this cluster
|
| 69 |
+
cluster_metas = [metas[j] for j in range(len(metas)) if cluster_mask[j]]
|
| 70 |
+
merged_meta = self._merge_meta(cluster_metas)
|
| 71 |
+
merged_meta["cluster_size"] = int(np.sum(cluster_mask))
|
| 72 |
+
merged_meta["dream_cycle"] = self.dream_count
|
| 73 |
+
self.helix.add(centroid, merged_meta)
|
| 74 |
+
consolidated += 1
|
| 75 |
+
|
| 76 |
+
# Generative replay — re-ingest perturbed versions of rare patterns
|
| 77 |
+
self._replay(hvs, assignments)
|
| 78 |
+
|
| 79 |
+
# Clean up
|
| 80 |
+
self.buffer.clear()
|
| 81 |
+
self.last_dream = now
|
| 82 |
+
self.dream_count += 1
|
| 83 |
+
print(f"[DREAM] Cycle {self.dream_count} complete. "
|
| 84 |
+
f"{consolidated} prototypes stored in HelixMemory.")
|
| 85 |
+
return True
|
| 86 |
+
|
| 87 |
+
# ------------------------------------------------------------------
|
| 88 |
+
# Internal
|
| 89 |
+
# ------------------------------------------------------------------
|
| 90 |
+
def _cluster(
|
| 91 |
+
self, hvs: np.ndarray
|
| 92 |
+
):
|
| 93 |
+
"""
|
| 94 |
+
Online bipolar k-means.
|
| 95 |
+
Distance metric: Hamming (count of differing bits).
|
| 96 |
+
Centroids are binarized after each update.
|
| 97 |
+
"""
|
| 98 |
+
k = min(self.N_CLUSTERS, len(hvs))
|
| 99 |
+
indices = np.random.choice(len(hvs), k, replace=False)
|
| 100 |
+
centroids = hvs[indices].copy().astype(np.int8)
|
| 101 |
+
|
| 102 |
+
assignments = np.zeros(len(hvs), dtype=np.int32)
|
| 103 |
+
for _ in range(self.CLUSTER_ITERATIONS):
|
| 104 |
+
# Hamming distance: count positions where they differ
|
| 105 |
+
diffs = np.stack(
|
| 106 |
+
[np.sum(hvs != c, axis=1) for c in centroids],
|
| 107 |
+
axis=1
|
| 108 |
+
)
|
| 109 |
+
assignments = np.argmin(diffs, axis=1)
|
| 110 |
+
|
| 111 |
+
# Update centroids via majority vote (bipolar sign)
|
| 112 |
+
for i in range(k):
|
| 113 |
+
mask = assignments == i
|
| 114 |
+
if np.any(mask):
|
| 115 |
+
summed = hvs[mask].astype(np.int32).sum(axis=0)
|
| 116 |
+
new_centroid = np.sign(summed).astype(np.int8)
|
| 117 |
+
new_centroid[new_centroid == 0] = 1
|
| 118 |
+
centroids[i] = new_centroid
|
| 119 |
+
|
| 120 |
+
return centroids, assignments
|
| 121 |
+
|
| 122 |
+
def _replay(self, hvs: np.ndarray, assignments: np.ndarray) -> None:
|
| 123 |
+
"""
|
| 124 |
+
Generative replay: add small noise to rare cluster members
|
| 125 |
+
and re-ingest them. Prevents forgetting of low-frequency patterns.
|
| 126 |
+
"""
|
| 127 |
+
cluster_sizes = np.bincount(assignments, minlength=self.N_CLUSTERS)
|
| 128 |
+
rare_threshold = np.percentile(cluster_sizes, 25)
|
| 129 |
+
|
| 130 |
+
for i, size in enumerate(cluster_sizes):
|
| 131 |
+
if size <= rare_threshold and size > 0:
|
| 132 |
+
rare_hvs = hvs[assignments == i]
|
| 133 |
+
for hv in rare_hvs[:2]: # replay at most 2 per rare cluster
|
| 134 |
+
noise = np.random.choice(
|
| 135 |
+
[-1, 1],
|
| 136 |
+
size=len(hv),
|
| 137 |
+
p=[0.02, 0.98]
|
| 138 |
+
).astype(np.int8)
|
| 139 |
+
perturbed = (hv * noise).astype(np.int8)
|
| 140 |
+
self.buffer.append((datetime.utcnow(), perturbed, {"replayed": True}))
|
| 141 |
+
|
| 142 |
+
@staticmethod
|
| 143 |
+
def _merge_meta(metas: list) -> dict:
|
| 144 |
+
"""Merge a list of metadata dicts into one summary."""
|
| 145 |
+
merged = {}
|
| 146 |
+
for m in metas:
|
| 147 |
+
for k, v in m.items():
|
| 148 |
+
if k not in merged:
|
| 149 |
+
merged[k] = v
|
| 150 |
+
return merged
|
src/hdc_encoder/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
src/hdc_encoder/encoder.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from typing import Dict
|
| 3 |
+
|
| 4 |
+
DIM = 10_000
|
| 5 |
+
SEED = 42
|
| 6 |
+
_rng = np.random.default_rng(SEED)
|
| 7 |
+
|
| 8 |
+
# Bipolar base vectors (-1/1) to match hdc_engine.bind and VitalisKernel
|
| 9 |
+
BASE_MFCC = _rng.choice([-1, 1], size=(13, DIM)).astype(np.int8)
|
| 10 |
+
BASE_PROSODY = {
|
| 11 |
+
"pitch": _rng.choice([-1, 1], size=DIM).astype(np.int8),
|
| 12 |
+
"energy": _rng.choice([-1, 1], size=DIM).astype(np.int8),
|
| 13 |
+
"tempo": _rng.choice([-1, 1], size=DIM).astype(np.int8),
|
| 14 |
+
"pause_ratio": _rng.choice([-1, 1], size=DIM).astype(np.int8),
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
PROSODY_SCALE = {
|
| 18 |
+
"pitch": 300.0,
|
| 19 |
+
"energy": 0.5,
|
| 20 |
+
"tempo": 200.0,
|
| 21 |
+
"pause_ratio": 1.0,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _bipolar_binarize(val: float) -> np.ndarray:
|
| 26 |
+
"""Map a scalar [0,1] to a bipolar hypervector."""
|
| 27 |
+
bits = (_rng.random(DIM) < val).astype(np.int8)
|
| 28 |
+
bits[bits == 0] = -1
|
| 29 |
+
return bits
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _permute(vec: np.ndarray, shift: int) -> np.ndarray:
|
| 33 |
+
"""Cyclic shift — encodes temporal position."""
|
| 34 |
+
return np.roll(vec, shift % DIM)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _bind(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
| 38 |
+
"""Bipolar binding: element-wise multiply (-1/1 * -1/1 = -1/1)."""
|
| 39 |
+
return (a * b).astype(np.int8)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _bundle(vecs: list) -> np.ndarray:
|
| 43 |
+
"""
|
| 44 |
+
Bipolar bundling: sum then binarize via sign.
|
| 45 |
+
Ties broken toward +1.
|
| 46 |
+
"""
|
| 47 |
+
stacked = np.stack(vecs, axis=0).astype(np.int32)
|
| 48 |
+
result = np.sign(stacked.sum(axis=0)).astype(np.int8)
|
| 49 |
+
result[result == 0] = 1
|
| 50 |
+
return result
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def encode(
|
| 54 |
+
mfcc: np.ndarray,
|
| 55 |
+
prosody: Dict[str, float],
|
| 56 |
+
chunk_size: int = 5,
|
| 57 |
+
) -> np.ndarray:
|
| 58 |
+
"""
|
| 59 |
+
Convert one utterance (MFCC matrix + prosody dict) into a single
|
| 60 |
+
bipolar 10k-dim hypervector that preserves temporal order.
|
| 61 |
+
|
| 62 |
+
Temporal encoding equation:
|
| 63 |
+
S = V_1 * rho(V_2) * rho^2(V_3) ... rho^n(V_n)
|
| 64 |
+
where rho is cyclic shift and * is bipolar binding.
|
| 65 |
+
"""
|
| 66 |
+
n_frames = mfcc.shape[1]
|
| 67 |
+
|
| 68 |
+
# ------------------------------------------------------------------
|
| 69 |
+
# 1. Frame-level bipolar vectors
|
| 70 |
+
# Each frame: 13 MFCC coefficients bound with their base vectors
|
| 71 |
+
# ------------------------------------------------------------------
|
| 72 |
+
frame_hvs = []
|
| 73 |
+
for t in range(n_frames):
|
| 74 |
+
frame_components = []
|
| 75 |
+
for i in range(13):
|
| 76 |
+
coeff_val = float(mfcc[i, t])
|
| 77 |
+
# Threshold against coefficient median → bipolar
|
| 78 |
+
bit = np.int8(1) if coeff_val > 0 else np.int8(-1)
|
| 79 |
+
coeff_vec = np.full(DIM, bit, dtype=np.int8)
|
| 80 |
+
frame_components.append(_bind(coeff_vec, BASE_MFCC[i]))
|
| 81 |
+
frame_hvs.append(_bundle(frame_components))
|
| 82 |
+
|
| 83 |
+
# ------------------------------------------------------------------
|
| 84 |
+
# 2. Forward temporal binding (preserves order)
|
| 85 |
+
# S_fwd = frame_0 * rho(frame_1) * rho^2(frame_2) ...
|
| 86 |
+
# ------------------------------------------------------------------
|
| 87 |
+
forward_hv = frame_hvs[0].copy() if frame_hvs else np.ones(DIM, dtype=np.int8)
|
| 88 |
+
for t in range(1, len(frame_hvs)):
|
| 89 |
+
forward_hv = _bind(forward_hv, _permute(frame_hvs[t], shift=t))
|
| 90 |
+
|
| 91 |
+
# ------------------------------------------------------------------
|
| 92 |
+
# 3. Backward temporal binding (reverse rhythm)
|
| 93 |
+
# ------------------------------------------------------------------
|
| 94 |
+
backward_hv = frame_hvs[-1].copy() if frame_hvs else np.ones(DIM, dtype=np.int8)
|
| 95 |
+
for t in range(len(frame_hvs) - 2, -1, -1):
|
| 96 |
+
backward_hv = _bind(backward_hv, _permute(frame_hvs[t], shift=-(t + 1)))
|
| 97 |
+
|
| 98 |
+
# ------------------------------------------------------------------
|
| 99 |
+
# 4. Chunk-level binding (mid-scale temporal structure)
|
| 100 |
+
# ------------------------------------------------------------------
|
| 101 |
+
n_chunks = max(1, n_frames // chunk_size)
|
| 102 |
+
chunk_hvs = []
|
| 103 |
+
for c in range(n_chunks):
|
| 104 |
+
start = c * chunk_size
|
| 105 |
+
end = min(start + chunk_size, n_frames)
|
| 106 |
+
chunk_bundle = _bundle(frame_hvs[start:end])
|
| 107 |
+
chunk_hvs.append(_permute(chunk_bundle, shift=c))
|
| 108 |
+
chunk_hv = _bundle(chunk_hvs) if chunk_hvs else np.ones(DIM, dtype=np.int8)
|
| 109 |
+
|
| 110 |
+
# ------------------------------------------------------------------
|
| 111 |
+
# 5. Prosody binding (tone, energy, rhythm, silence)
|
| 112 |
+
# Each prosody feature bound with its base vector and
|
| 113 |
+
# permuted by frame count (ties prosody to utterance length)
|
| 114 |
+
# ------------------------------------------------------------------
|
| 115 |
+
prosody_hvs = []
|
| 116 |
+
for key, val in prosody.items():
|
| 117 |
+
norm = min(val / PROSODY_SCALE.get(key, 1.0), 1.0)
|
| 118 |
+
pv = _bind(_bipolar_binarize(norm), BASE_PROSODY[key])
|
| 119 |
+
pv = _permute(pv, shift=n_frames)
|
| 120 |
+
prosody_hvs.append(pv)
|
| 121 |
+
|
| 122 |
+
# ------------------------------------------------------------------
|
| 123 |
+
# 6. Final composition: bundle all levels
|
| 124 |
+
# forward captures sequence, backward captures rhythm,
|
| 125 |
+
# chunks capture phrase structure, prosody captures tone
|
| 126 |
+
# ------------------------------------------------------------------
|
| 127 |
+
all_components = [forward_hv, backward_hv, chunk_hv] + prosody_hvs
|
| 128 |
+
return _bundle(all_components)
|