""" Convergence Loop: the core reasoning mechanism. Replaces attention in transformers. Multi-hop spatial search with query anchor. How it works: 1. Encode query → vector 2. Search nearest neurons in the DB 3. Blend neighbors weighted by confidence → activation 4. Mix activation with original query (anchor prevents drift) 5. Repeat until vector stabilizes (converged) or max hops (abort) Converged = answer found. Not converged = "I don't know." (Invariant #4) Each hop is inspectable — the trace shows exactly why the answer was found. (Invariant #2: every answer has a source.) """ from dataclasses import dataclass, field import numpy as np from neuron import Neuron, NeuronDB @dataclass class Hop: """One step in the convergence trace. For inspectability.""" hop_number: int neighbors: list # [(neuron_id, confidence, similarity)] activation: np.ndarray # blended vector before anchor current: np.ndarray # vector after anchor blend movement: float # cosine distance from previous step @dataclass class ConvergenceResult: """Result of a convergence loop.""" converged: bool vector: np.ndarray # final vector position concepts: list # neurons that participated in final hop hops: list = field(default_factory=list) # full trace confidence: float = 0.0 # aggregate confidence of result def trace(self) -> str: """Human-readable trace of the convergence path. Invariant #2.""" lines = [] for hop in self.hops: neighbors_str = ", ".join( f"n{nid}({conf:.2f})" for nid, conf, _ in hop.neighbors ) lines.append( f" Hop {hop.hop_number}: [{neighbors_str}] " f"movement={hop.movement:.4f}" ) status = "CONVERGED" if self.converged else "DID NOT CONVERGE" lines.insert(0, f"Convergence: {status} (confidence={self.confidence:.3f})") return "\n".join(lines) class ConvergenceLoop: """ Multi-hop reasoning through spatial search. The convergence loop IS attention — but each hop is inspectable. Query anchor IS a residual connection — prevents drift. Convergence check IS the stopping criterion — no convergence = abstain. Transformer correspondence: - _weighted_blend() uses softmax (exponential sharpening) over confidence scores, identical to scaled dot-product attention. - Per-hop k and threshold schedules give functional layer specialization: early hops explore broadly, later hops focus. - Concept-to-concept attention (NxN among discovered neighbors) provides compositional reasoning — same as token-to-token attention in transformer self-attention. """ def __init__(self, db: NeuronDB = None, max_hops: int = 10, k: int = 5, convergence_threshold: float = 0.99, min_confidence: float = 0.1, min_relevance: float = 0.3, temperature: float = 1.0, search_fn=None, blend_fn=None, cosine_fn=None): """ Args: db: NeuronDB to search in (optional if search_fn provided) max_hops: maximum reasoning steps before abort k: number of neighbors to retrieve per hop convergence_threshold: cosine sim threshold for "stable" min_confidence: minimum neuron confidence to participate min_relevance: minimum cosine similarity between query and best neighbor to accept convergence. Below this, the system says "I don't know" even if the vector stabilized. Invariant #4: honest about failure. temperature: softmax temperature for confidence weighting. Higher = more uniform, lower = sharper. Default 1.0 gives true softmax behavior. Use float('inf') to recover pre-softmax linear normalization for backward compatibility. search_fn: optional callable(query, k) → list of Neuron-like objects. Allows plugging in sparse search or any other backend. Each returned object must have .id, .vector, .confidence. blend_fn: optional callable(neurons) → blended vector. If None, uses default _weighted_blend. cosine_fn: optional callable(a, b) → float similarity. If None, uses default numpy cosine. """ self.db = db self.max_hops = max_hops self.k = k self.convergence_threshold = convergence_threshold self.min_confidence = min_confidence self.min_relevance = min_relevance self.temperature = temperature self._search_fn = search_fn self._blend_fn = blend_fn self._cosine_fn = cosine_fn def converge(self, query_vector: np.ndarray) -> ConvergenceResult: """ Run the convergence loop. Returns ConvergenceResult with converged=True if stable, converged=False if max hops reached (honest abstention). Per-hop specialization (like transformer layers having different learned parameters): early hops explore broadly (higher k, lower confidence threshold), later hops focus narrowly (lower k, higher threshold). This gives functional layer specialization without learned weights. """ query = np.array(query_vector, dtype=np.float32) query_norm = np.linalg.norm(query) if query_norm == 0: return ConvergenceResult( converged=False, vector=query, concepts=[], confidence=0.0 ) query = query / query_norm current = query.copy() hops = [] last_concepts = [] for hop_num in range(self.max_hops): previous = current.copy() # --- Per-hop specialization (transformer layer analogy) --- # Progress through the loop: 0.0 (first hop) → 1.0 (last hop) progress = hop_num / max(self.max_hops - 1, 1) # Early hops: explore broadly (more neighbors) # Later hops: focus narrowly (fewer neighbors) hop_k = max(2, int(self.k * (1.5 - 0.7 * progress))) # Early hops: accept lower confidence (explore) # Later hops: require higher confidence (focus) hop_min_conf = self.min_confidence * (1.0 + 0.5 * progress) # 1. Search nearest neurons with per-hop k if self._search_fn: neighbors = self._search_fn(current, k=hop_k) else: neighbors = self.db.search(current, k=hop_k) # Filter by per-hop minimum confidence neighbors = [n for n in neighbors if n.confidence >= hop_min_conf] if not neighbors: # No neurons above confidence threshold — honest abort return ConvergenceResult( converged=False, vector=current, concepts=[], hops=hops, confidence=0.0, ) # 2. Concept-to-concept attention (NxN among neighbors) # Transformers compute attention between all tokens. # Here we compute pairwise similarity among discovered # neighbors and boost those that are mutually relevant. # This gives compositional reasoning: concepts that # "attend to each other" get amplified. neighbors = self._mutual_attention(neighbors) # 3. Blend neighbors weighted by confidence → activation # Uses softmax (exponential sharpening) over confidences. if self._blend_fn: activation = self._blend_fn(neighbors) else: activation = self._weighted_blend(neighbors) # 4. Anchor to query (prevents drift) # Early hops: explore (more activation) # Later hops: contract (more query anchor) alpha = hop_num / self.max_hops # 0→1 current = (1 - alpha) * activation + alpha * query # Re-normalize norm = np.linalg.norm(current) if norm > 0: current = current / norm # Compute movement (how much the vector changed) movement = 1.0 - float(self._cosine_sim(current, previous)) # Compute similarities for the trace neighbor_info = [] for n in neighbors: sim = float(self._cosine_sim(n.vector, current)) neighbor_info.append((n.id, n.confidence, sim)) hops.append(Hop( hop_number=hop_num, neighbors=neighbor_info, activation=activation.copy(), current=current.copy(), movement=movement, )) last_concepts = neighbors # 5. Check convergence: has the vector stopped moving? sim = self._cosine_sim(current, previous) if sim >= self.convergence_threshold: # Vector stabilized — but are the neighbors actually relevant? best_relevance = max( self._cosine_sim(n.vector, query) for n in neighbors ) if best_relevance < self.min_relevance: # Converged on garbage — honest abstention return ConvergenceResult( converged=False, vector=current, concepts=last_concepts, hops=hops, confidence=0.0, ) # CONVERGED on relevant neurons avg_confidence = np.mean([n.confidence for n in neighbors]) return ConvergenceResult( converged=True, vector=current, concepts=last_concepts, hops=hops, confidence=float(avg_confidence), ) # DID NOT CONVERGE — "I don't know" (Invariant #4) avg_confidence = ( np.mean([n.confidence for n in last_concepts]) if last_concepts else 0.0 ) return ConvergenceResult( converged=False, vector=current, concepts=last_concepts, hops=hops, confidence=float(avg_confidence) * 0.5, # penalize non-convergence ) def _weighted_blend(self, neurons: list) -> np.ndarray: """ Blend neuron vectors weighted by softmax over confidence scores. This IS softmax attention: exp(c / T) / sum(exp(c / T)). Temperature controls sharpening: - T → 0: winner-take-all (hard attention) - T = 1: standard softmax - T → ∞: uniform weighting (recovers old linear normalization) """ vectors = np.array([n.vector for n in neurons]) confidences = np.array([n.confidence for n in neurons], dtype=np.float32) # Floor at 0 for weighting (negative confidence = no contribution) confidences = np.maximum(confidences, 0) if confidences.sum() == 0: weights = np.ones(len(neurons), dtype=np.float32) / len(neurons) elif self.temperature == float('inf'): # Backward compat: infinite temperature = linear normalization weights = confidences / confidences.sum() else: # Softmax with temperature: exp(c/T) / sum(exp(c/T)) # Subtract max for numerical stability (standard softmax trick) scaled = confidences / max(self.temperature, 1e-8) scaled = scaled - scaled.max() exp_scaled = np.exp(scaled) weights = exp_scaled / exp_scaled.sum() blended = np.average(vectors, axis=0, weights=weights).astype(np.float32) norm = np.linalg.norm(blended) if norm > 0: blended = blended / norm return blended def _mutual_attention(self, neurons: list) -> list: """ Concept-to-concept attention: NxN similarity among discovered neighbors. Boost neurons that are mutually relevant — they "attend to each other." This is the compositional reasoning step that makes transformers work: tokens don't just attend to the query, they attend to each other. Here, concepts that form a coherent cluster get boosted, while isolated concepts get dampened. Returns the same neurons with confidence adjusted by mutual relevance. Does NOT modify the original neuron objects — creates lightweight wrappers. """ if len(neurons) <= 1: return neurons n = len(neurons) # Compute pairwise similarity — uses pluggable cosine if provided if self._cosine_fn: sim_matrix = np.zeros((n, n), dtype=np.float32) for i in range(n): for j in range(i + 1, n): s = self._cosine_fn(neurons[i].vector, neurons[j].vector) sim_matrix[i, j] = s sim_matrix[j, i] = s else: vectors = np.array([nn.vector for nn in neurons]) norms = np.linalg.norm(vectors, axis=1, keepdims=True) norms = np.maximum(norms, 1e-8) normed = vectors / norms sim_matrix = normed @ normed.T # NxN np.fill_diagonal(sim_matrix, 0.0) # Each neuron's mutual relevance = mean similarity to all others mutual_scores = sim_matrix.sum(axis=1) / max(n - 1, 1) # Boost confidence by mutual relevance: # new_confidence = original * (1 + mutual_score) # This preserves ordering but amplifies coherent clusters. boosted = [] for i, neuron in enumerate(neurons): boost_factor = 1.0 + float(mutual_scores[i]) # Create a lightweight copy with boosted confidence boosted_neuron = Neuron( id=neuron.id, vector=neuron.vector, confidence=neuron.confidence * boost_factor, successors=neuron.successors, predecessors=neuron.predecessors, timestamp=neuron.timestamp, temporal=neuron.temporal, level=neuron.level, ) boosted.append(boosted_neuron) return boosted def _cosine_sim(self, a, b) -> float: """Cosine similarity. Uses pluggable cosine_fn if provided.""" if self._cosine_fn: return self._cosine_fn(a, b) dot = float(np.dot(a, b)) na = np.linalg.norm(a) nb = np.linalg.norm(b) if na == 0 or nb == 0: return 0.0 return dot / (na * nb) @dataclass class MultiHopResult: """Result of multi-hop reasoning across convergence rounds.""" converged: bool concepts: list # merged concept set from all rounds rounds: list = field(default_factory=list) # list of ConvergenceResult per round confidence: float = 0.0 vector: np.ndarray = None # final vector position def trace(self) -> str: """Human-readable trace across all rounds. Invariant #2.""" lines = [] for i, r in enumerate(self.rounds): lines.append(f"=== Round {i + 1} ===") lines.append(r.trace()) status = "CONVERGED" if self.converged else "DID NOT CONVERGE" concept_count = len(self.concepts) lines.insert(0, f"Multi-hop: {status} in {len(self.rounds)} round(s), " f"{concept_count} concepts (confidence={self.confidence:.3f})" ) return "\n".join(lines) class MultiHopConvergence: """ Chained convergence: each round's discovered concepts shift the query for the next round, allowing reasoning to cross concept boundaries. Round 1: query → converge → concepts A Round 2: query + concepts_A blend → converge → concepts B ... Stop when: no new concepts found, or max rounds reached. This is iterative retrieval-generation (ITER-RETGEN) done without a neural component. Each round is inspectable. The query anchor prevents drift across rounds. """ def __init__(self, loop: ConvergenceLoop, max_rounds: int = 3, concept_blend_weight: float = 0.4): """ Args: loop: the underlying ConvergenceLoop max_rounds: maximum reasoning rounds concept_blend_weight: how much discovered concepts shift the query (0 = ignore concepts, 1 = ignore query) """ self.loop = loop self.max_rounds = max_rounds self.concept_blend_weight = concept_blend_weight def reason(self, query_vector: np.ndarray) -> MultiHopResult: """ Run multi-hop reasoning. Each round discovers concepts. Those concepts' vectors get blended into the query for the next round, shifting the search into new regions of concept space. """ query = np.array(query_vector, dtype=np.float32) norm = np.linalg.norm(query) if norm == 0: return MultiHopResult( converged=False, concepts=[], confidence=0.0, vector=query, ) query = query / norm all_concepts = [] # merged across rounds seen_ids = set() # avoid duplicates rounds = [] current_query = query.copy() for round_num in range(self.max_rounds): # Run convergence with the current (possibly shifted) query result = self.loop.converge(current_query) rounds.append(result) # Collect new concepts from this round new_concepts = [] for c in result.concepts: if c.id not in seen_ids: new_concepts.append(c) seen_ids.add(c.id) all_concepts.extend(new_concepts) # Stop conditions: # 1. First round didn't converge at all → no point continuing if round_num == 0 and not result.converged and not result.concepts: break # 2. No new concepts found → we've exhausted this reasoning chain if not new_concepts and round_num > 0: break # 3. Last round → don't prepare next query if round_num == self.max_rounds - 1: break # Prepare next round: blend discovered concepts into query # This shifts the search to a new region of concept space if new_concepts: concept_blend = self._blend_concepts(new_concepts) w = self.concept_blend_weight current_query = (1 - w) * query + w * concept_blend norm = np.linalg.norm(current_query) if norm > 0: current_query = current_query / norm # Determine overall result any_converged = any(r.converged for r in rounds) if all_concepts and any_converged: avg_conf = float(np.mean([c.confidence for c in all_concepts])) final_vec = rounds[-1].vector if rounds else query return MultiHopResult( converged=True, concepts=all_concepts, rounds=rounds, confidence=avg_conf, vector=final_vec, ) else: return MultiHopResult( converged=False, concepts=all_concepts, rounds=rounds, confidence=0.0, vector=rounds[-1].vector if rounds else query, ) def _blend_concepts(self, concepts: list) -> np.ndarray: """Blend concept vectors weighted by confidence.""" vectors = np.array([c.vector for c in concepts], dtype=np.float32) confs = np.array([max(c.confidence, 0.01) for c in concepts], dtype=np.float32) total = confs.sum() if total == 0: weights = np.ones(len(concepts), dtype=np.float32) / len(concepts) else: weights = confs / total blended = np.average(vectors, axis=0, weights=weights).astype(np.float32) norm = np.linalg.norm(blended) if norm > 0: blended = blended / norm return blended