Spaces:

sadidft
/

CogniEngine

Sleeping

App Files Files Community

sadidft commited on Mar 8

Commit

d82945e

verified ·

1 Parent(s): 52c9fff

Create knowledge.py

Browse files

Files changed (1) hide show

knowledge.py +1271 -0

knowledge.py ADDED Viewed

	@@ -0,0 +1,1271 @@

+"""
+Cogni-Engine v1 — Knowledge Graph Engine
+In-memory graph structure with nodes, edges, traversal, similarity search.
+This is the core data structure that represents all knowledge.
+The "brain matter" — where concepts live and connect.
+"""
+import time
+import threading
+import json
+from typing import List, Dict, Optional, Set, Tuple, Any
+from collections import defaultdict
+import numpy as np
+import config
+import utils
+from memory import Memory
+# ═══════════════════════════════════════════════════════════
+# DATA STRUCTURES
+# ═══════════════════════════════════════════════════════════
+class Node:
+    """A single knowledge node in the graph."""
+    __slots__ = [
+        'id', 'type', 'content', 'vector', 'weight',
+        'connections', 'source', 'created_at', 'updated_at',
+        '_dirty'
+    ]
+    def __init__(
+        self,
+        node_id: str,
+        node_type: str,
+        content: str,
+        vector: np.ndarray = None,
+        weight: float = 1.0,
+        connections: int = 0,
+        source: str = "data",
+        created_at: str = "",
+        updated_at: str = ""
+    ):
+        self.id = node_id
+        self.type = node_type
+        self.content = content
+        self.vector = vector if vector is not None else np.zeros(config.VECTOR_DIM, dtype=np.float32)
+        self.weight = weight
+        self.connections = connections
+        self.source = source
+        self.created_at = created_at or utils.timestamp_now()
+        self.updated_at = updated_at or utils.timestamp_now()
+        self._dirty = False
+    def to_dict(self) -> dict:
+        """Serialize to dict for DB storage."""
+        return {
+            "id": self.id,
+            "type": self.type,
+            "content": self.content,
+            "vector": utils.vector_to_list(self.vector),
+            "weight": round(self.weight, 6),
+            "connections": self.connections,
+            "source": self.source,
+            "created_at": self.created_at,
+            "updated_at": self.updated_at
+        }
+    @staticmethod
+    def from_dict(data: dict) -> 'Node':
+        """Deserialize from dict."""
+        vector = None
+        if data.get("vector"):
+            vector = utils.list_to_vector(data["vector"])
+        return Node(
+            node_id=data["id"],
+            node_type=data.get("type", "fact"),
+            content=data.get("content", ""),
+            vector=vector,
+            weight=float(data.get("weight", 1.0)),
+            connections=int(data.get("connections", 0)),
+            source=data.get("source", "data"),
+            created_at=data.get("created_at", ""),
+            updated_at=data.get("updated_at", "")
+        )
+    def mark_dirty(self):
+        """Mark this node as needing DB sync."""
+        self._dirty = True
+        self.updated_at = utils.timestamp_now()
+class Edge:
+    """A directed relationship between two nodes."""
+    __slots__ = [
+        'id', 'from_node', 'to_node', 'relation', 'weight',
+        'confidence', 'source', 'used_count', 'created_at',
+        '_dirty'
+    ]
+    def __init__(
+        self,
+        edge_id: str,
+        from_node: str,
+        to_node: str,
+        relation: str = "related_to",
+        weight: float = 1.0,
+        confidence: float = 1.0,
+        source: str = "data",
+        used_count: int = 0,
+        created_at: str = ""
+    ):
+        self.id = edge_id
+        self.from_node = from_node
+        self.to_node = to_node
+        self.relation = relation
+        self.weight = weight
+        self.confidence = confidence
+        self.source = source
+        self.used_count = used_count
+        self.created_at = created_at or utils.timestamp_now()
+        self._dirty = False
+    def to_dict(self) -> dict:
+        """Serialize to dict for DB storage."""
+        return {
+            "id": self.id,
+            "from_node": self.from_node,
+            "to_node": self.to_node,
+            "relation": self.relation,
+            "weight": round(self.weight, 6),
+            "confidence": round(self.confidence, 6),
+            "source": self.source,
+            "used_count": self.used_count,
+            "created_at": self.created_at
+        }
+    @staticmethod
+    def from_dict(data: dict) -> 'Edge':
+        """Deserialize from dict."""
+        return Edge(
+            edge_id=data["id"],
+            from_node=data["from_node"],
+            to_node=data["to_node"],
+            relation=data.get("relation", "related_to"),
+            weight=float(data.get("weight", 1.0)),
+            confidence=float(data.get("confidence", 1.0)),
+            source=data.get("source", "data"),
+            used_count=int(data.get("used_count", 0)),
+            created_at=data.get("created_at", "")
+        )
+    def mark_dirty(self):
+        """Mark edge as needing DB sync."""
+        self._dirty = True
+class ReasoningChain:
+    """A discovered path of reasoning through the graph."""
+    __slots__ = [
+        'id', 'path', 'conclusion', 'confidence',
+        'used_count', 'created_at'
+    ]
+    def __init__(
+        self,
+        chain_id: str,
+        path: list,
+        conclusion: str = "",
+        confidence: float = 0.5,
+        used_count: int = 0,
+        created_at: str = ""
+    ):
+        self.id = chain_id
+        self.path = path  # [node_id, edge_id, node_id, edge_id, ...]
+        self.conclusion = conclusion
+        self.confidence = confidence
+        self.used_count = used_count
+        self.created_at = created_at or utils.timestamp_now()
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "path": self.path,
+            "conclusion": self.conclusion,
+            "confidence": round(self.confidence, 6),
+            "used_count": self.used_count,
+            "created_at": self.created_at
+        }
+    @staticmethod
+    def from_dict(data: dict) -> 'ReasoningChain':
+        return ReasoningChain(
+            chain_id=data["id"],
+            path=data.get("path", []),
+            conclusion=data.get("conclusion", ""),
+            confidence=float(data.get("confidence", 0.5)),
+            used_count=int(data.get("used_count", 0)),
+            created_at=data.get("created_at", "")
+        )
+# ═══════════════════════════════════════════════════════════
+# KNOWLEDGE GRAPH
+# ═══════════════════════════════════════════════════════════
+class KnowledgeGraph:
+    """
+    In-memory knowledge graph with persistence via Memory.
+    Structure:
+    - nodes: dict of Node objects indexed by id
+    - edges: dict of Edge objects indexed by id
+    - adjacency_out: node_id → list of edge_ids (outgoing)
+    - adjacency_in: node_id → list of edge_ids (incoming)
+    - vector_index: numpy matrix of all node vectors for fast search
+    - chains: dict of ReasoningChain objects
+    Thread-safe via read-write lock:
+    - Multiple readers allowed simultaneously
+    - Writers get exclusive access
+    """
+    def __init__(self, memory: Memory):
+        self.memory = memory
+        # Core data
+        self.nodes: Dict[str, Node] = {}
+        self.edges: Dict[str, Edge] = {}
+        self.chains: Dict[str, ReasoningChain] = {}
+        # Adjacency indexes
+        self._adj_out: Dict[str, List[str]] = defaultdict(list)  # node → [edge_ids outgoing]
+        self._adj_in: Dict[str, List[str]] = defaultdict(list)   # node → [edge_ids incoming]
+        # Vector index for fast similarity search
+        self._vector_matrix: Optional[np.ndarray] = None
+        self._vector_node_ids: List[str] = []
+        self._vector_index_dirty = True
+        # Thread safety
+        self._lock = threading.RLock()
+        # Stats
+        self._stats = {
+            "total_nodes": 0,
+            "total_edges": 0,
+            "total_chains": 0,
+            "inferred_nodes": 0,
+            "inferred_edges": 0,
+            "max_abstraction_depth": 0,
+            "avg_connections": 0.0,
+            "avg_confidence": 0.0
+        }
+    # ───────────────────────────────────────────────────
+    # INITIALIZATION
+    # ───────────────────────────────────────────────────
+    def load_from_memory(self) -> bool:
+        """
+        Load entire graph from TiDB via Memory.
+        Called once at startup.
+        """
+        state = self.memory.load_full_state()
+        if not state.get("loaded", False) and not state["nodes"]:
+            print("[GRAPH] No existing state found. Starting fresh.")
+            self._rebuild_stats()
+            return True
+        with self._lock:
+            # Load nodes
+            for node_data in state["nodes"]:
+                node = Node.from_dict(node_data)
+                self.nodes[node.id] = node
+            # Load edges
+            for edge_data in state["edges"]:
+                edge = Edge.from_dict(edge_data)
+                self.edges[edge.id] = edge
+                self._adj_out[edge.from_node].append(edge.id)
+                self._adj_in[edge.to_node].append(edge.id)
+            # Load chains
+            for chain_data in state["chains"]:
+                chain = ReasoningChain.from_dict(chain_data)
+                self.chains[chain.id] = chain
+            # Rebuild vector index
+            self._rebuild_vector_index()
+            self._rebuild_stats()
+        print(f"[GRAPH] Loaded: {len(self.nodes)} nodes, "
+              f"{len(self.edges)} edges, {len(self.chains)} chains")
+        return True
+    # ───────────────────────────────────────────────────
+    # NODE OPERATIONS
+    # ───────────────────────────────────────────────────
+    def add_node(
+        self,
+        content: str,
+        node_type: str = "fact",
+        source: str = "data",
+        weight: float = None,
+        vector: np.ndarray = None,
+        node_id: str = None,
+        tags: List[str] = None
+    ) -> Optional[Node]:
+        """
+        Add a new node to the graph.
+        If node with same id exists, update it instead.
+        Returns the node, or None if invalid.
+        """
+        if not content or not content.strip():
+            return None
+        content = content.strip()
+        if node_id is None:
+            node_id = config.generate_node_id(content, node_type)
+        # Generate vector if not provided
+        if vector is None:
+            vector = utils.text_to_vector_tfidf(content)
+        # Register content with TF-IDF corpus
+        tokens = utils.tokenize(content, remove_stopwords=True)
+        utils.tfidf.add_document(tokens)
+        if weight is None:
+            weight = (config.DATA_KNOWLEDGE_CONFIDENCE
+                      if source == "data"
+                      else config.USER_KNOWLEDGE_CONFIDENCE)
+        with self._lock:
+            if node_id in self.nodes:
+                # Update existing node
+                existing = self.nodes[node_id]
+                # Reinforce weight if seen again
+                existing.weight = min(
+                    existing.weight * config.WEIGHT_REINFORCE,
+                    config.WEIGHT_MAX
+                )
+                existing.mark_dirty()
+                self.memory.save_node(existing.to_dict())
+                return existing
+            # Create new node
+            node = Node(
+                node_id=node_id,
+                node_type=node_type,
+                content=content,
+                vector=vector,
+                weight=weight,
+                connections=0,
+                source=source
+            )
+            # Safety check
+            if len(self.nodes) >= config.MAX_GRAPH_MEMORY_NODES:
+                print(f"[GRAPH] Node limit reached ({config.MAX_GRAPH_MEMORY_NODES}). Skipping.")
+                return None
+            self.nodes[node_id] = node
+            self._vector_index_dirty = True
+            # Buffer for DB write
+            node._dirty = True
+            self.memory.save_node(node.to_dict())
+            # Create edges from tags
+            if tags:
+                for tag in tags:
+                    tag_id = config.generate_node_id(tag, "concept")
+                    if tag_id not in self.nodes:
+                        self.add_node(
+                            content=tag,
+                            node_type="concept",
+                            source=source,
+                            weight=weight * 0.8
+                        )
+                    self.add_edge(
+                        from_id=node_id,
+                        to_id=tag_id,
+                        relation="related_to",
+                        source=source,
+                        confidence=weight * 0.7
+                    )
+        return node
+    def get_node(self, node_id: str) -> Optional[Node]:
+        """Get a node by id."""
+        return self.nodes.get(node_id)
+    def get_node_by_content(self, content: str, node_type: str = "") -> Optional[Node]:
+        """Find node by exact content match."""
+        node_id = config.generate_node_id(content.strip(), node_type)
+        return self.nodes.get(node_id)
+    def remove_node(self, node_id: str) -> bool:
+        """Remove a node and all its edges."""
+        with self._lock:
+            if node_id not in self.nodes:
+                return False
+            # Remove connected edges
+            edge_ids_to_remove = []
+            edge_ids_to_remove.extend(self._adj_out.get(node_id, []))
+            edge_ids_to_remove.extend(self._adj_in.get(node_id, []))
+            for edge_id in set(edge_ids_to_remove):
+                self._remove_edge_internal(edge_id)
+            # Remove adjacency entries
+            self._adj_out.pop(node_id, None)
+            self._adj_in.pop(node_id, None)
+            # Remove node
+            del self.nodes[node_id]
+            self._vector_index_dirty = True
+            # Buffer for DB delete
+            self.memory.delete_node(node_id)
+        return True
+    def update_node_weight(self, node_id: str, new_weight: float):
+        """Update a node's weight."""
+        with self._lock:
+            node = self.nodes.get(node_id)
+            if node:
+                node.weight = utils.clamp(new_weight, config.WEIGHT_MIN, config.WEIGHT_MAX)
+                node.mark_dirty()
+                self.memory.save_node(node.to_dict())
+    def get_nodes_by_type(self, node_type: str) -> List[Node]:
+        """Get all nodes of a specific type."""
+        return [n for n in self.nodes.values() if n.type == node_type]
+    def get_nodes_by_source(self, source: str) -> List[Node]:
+        """Get all nodes from a specific source."""
+        return [n for n in self.nodes.values() if n.source == source]
+    def get_weakest_nodes(self, limit: int = 50) -> List[Node]:
+        """Get nodes with lowest weight (candidates for pruning)."""
+        sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.weight)
+        return sorted_nodes[:limit]
+    def get_least_connected_nodes(self, limit: int = 50) -> List[Node]:
+        """Get nodes with fewest connections (candidates for connecting)."""
+        sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.connections)
+        return sorted_nodes[:limit]
+    # ───────────────────────────────────────────────────
+    # EDGE OPERATIONS
+    # ───────────────────────────────────────────────────
+    def add_edge(
+        self,
+        from_id: str,
+        to_id: str,
+        relation: str = "related_to",
+        weight: float = 1.0,
+        confidence: float = 1.0,
+        source: str = "data",
+        edge_id: str = None
+    ) -> Optional[Edge]:
+        """
+        Add a directed edge between two nodes.
+        If edge exists, reinforce it.
+        """
+        if from_id == to_id:
+            return None  # No self-loops
+        if from_id not in self.nodes or to_id not in self.nodes:
+            return None  # Both nodes must exist
+        if edge_id is None:
+            edge_id = config.generate_edge_id(from_id, to_id, relation)
+        with self._lock:
+            if edge_id in self.edges:
+                # Reinforce existing edge
+                existing = self.edges[edge_id]
+                existing.weight = min(
+                    existing.weight * config.WEIGHT_REINFORCE,
+                    config.WEIGHT_MAX
+                )
+                existing.confidence = min(
+                    (existing.confidence + confidence) / 2.0 * 1.05,
+                    1.0
+                )
+                existing.mark_dirty()
+                self.memory.save_edge(existing.to_dict())
+                return existing
+            # Safety check
+            if len(self.edges) >= config.MAX_GRAPH_MEMORY_EDGES:
+                print(f"[GRAPH] Edge limit reached ({config.MAX_GRAPH_MEMORY_EDGES}). Skipping.")
+                return None
+            edge = Edge(
+                edge_id=edge_id,
+                from_node=from_id,
+                to_node=to_id,
+                relation=relation,
+                weight=weight,
+                confidence=confidence,
+                source=source
+            )
+            self.edges[edge_id] = edge
+            self._adj_out[from_id].append(edge_id)
+            self._adj_in[to_id].append(edge_id)
+            # Update connection counts
+            self.nodes[from_id].connections += 1
+            self.nodes[to_id].connections += 1
+            # Buffer for DB
+            edge._dirty = True
+            self.memory.save_edge(edge.to_dict())
+        return edge
+    def get_edge(self, edge_id: str) -> Optional[Edge]:
+        """Get an edge by id."""
+        return self.edges.get(edge_id)
+    def get_edges_from(self, node_id: str) -> List[Edge]:
+        """Get all outgoing edges from a node."""
+        edge_ids = self._adj_out.get(node_id, [])
+        return [self.edges[eid] for eid in edge_ids if eid in self.edges]
+    def get_edges_to(self, node_id: str) -> List[Edge]:
+        """Get all incoming edges to a node."""
+        edge_ids = self._adj_in.get(node_id, [])
+        return [self.edges[eid] for eid in edge_ids if eid in self.edges]
+    def get_all_edges_for(self, node_id: str) -> List[Edge]:
+        """Get all edges (in + out) connected to a node."""
+        edges = self.get_edges_from(node_id)
+        edges.extend(self.get_edges_to(node_id))
+        return edges
+    def get_neighbors(self, node_id: str) -> List[Tuple[Node, Edge]]:
+        """Get all neighboring nodes with their connecting edges."""
+        neighbors = []
+        for edge in self.get_edges_from(node_id):
+            target = self.nodes.get(edge.to_node)
+            if target:
+                neighbors.append((target, edge))
+        for edge in self.get_edges_to(node_id):
+            source = self.nodes.get(edge.from_node)
+            if source:
+                neighbors.append((source, edge))
+        return neighbors
+    def edge_exists(self, from_id: str, to_id: str, relation: str = None) -> bool:
+        """Check if an edge exists between two nodes."""
+        for edge_id in self._adj_out.get(from_id, []):
+            edge = self.edges.get(edge_id)
+            if edge and edge.to_node == to_id:
+                if relation is None or edge.relation == relation:
+                    return True
+        return False
+    def remove_edge(self, edge_id: str) -> bool:
+        """Remove an edge."""
+        with self._lock:
+            return self._remove_edge_internal(edge_id)
+    def _remove_edge_internal(self, edge_id: str) -> bool:
+        """Internal edge removal (must be called under lock)."""
+        edge = self.edges.get(edge_id)
+        if not edge:
+            return False
+        # Remove from adjacency
+        if edge_id in self._adj_out.get(edge.from_node, []):
+            self._adj_out[edge.from_node].remove(edge_id)
+        if edge_id in self._adj_in.get(edge.to_node, []):
+            self._adj_in[edge.to_node].remove(edge_id)
+        # Update connection counts
+        from_node = self.nodes.get(edge.from_node)
+        to_node = self.nodes.get(edge.to_node)
+        if from_node:
+            from_node.connections = max(0, from_node.connections - 1)
+        if to_node:
+            to_node.connections = max(0, to_node.connections - 1)
+        # Remove edge
+        del self.edges[edge_id]
+        self.memory.delete_edge(edge_id)
+        return True
+    def reinforce_edge(self, edge_id: str, factor: float = None):
+        """Increase edge weight (used when edge participates in response)."""
+        if factor is None:
+            factor = config.WEIGHT_REINFORCE
+        with self._lock:
+            edge = self.edges.get(edge_id)
+            if edge:
+                edge.weight = min(edge.weight * factor, config.WEIGHT_MAX)
+                edge.used_count += 1
+                edge.mark_dirty()
+                self.memory.save_edge(edge.to_dict())
+    def decay_edge(self, edge_id: str, factor: float = None):
+        """Decrease edge weight (unused edge decay)."""
+        if factor is None:
+            factor = config.WEIGHT_DECAY_RATE
+        with self._lock:
+            edge = self.edges.get(edge_id)
+            if edge:
+                edge.weight = max(edge.weight * factor, config.WEIGHT_MIN)
+                edge.mark_dirty()
+                self.memory.save_edge(edge.to_dict())
+    def get_weakest_edges(self, limit: int = 100, source_filter: str = "inferred") -> List[Edge]:
+        """Get edges with lowest weight (candidates for pruning)."""
+        filtered = [
+            e for e in self.edges.values()
+            if source_filter is None or e.source == source_filter
+        ]
+        sorted_edges = sorted(filtered, key=lambda e: e.weight)
+        return sorted_edges[:limit]
+    # ───────────────────────────────────────────────────
+    # VECTOR INDEX & SIMILARITY SEARCH
+    # ───────────────────────────────────────────────────
+    def _rebuild_vector_index(self):
+        """Rebuild the vector matrix for fast batch similarity search."""
+        with self._lock:
+            if not self.nodes:
+                self._vector_matrix = np.zeros((0, config.VECTOR_DIM), dtype=np.float32)
+                self._vector_node_ids = []
+                self._vector_index_dirty = False
+                return
+            node_ids = []
+            vectors = []
+            for nid, node in self.nodes.items():
+                if node.vector is not None and len(node.vector) == config.VECTOR_DIM:
+                    node_ids.append(nid)
+                    vectors.append(node.vector)
+            if vectors:
+                self._vector_matrix = np.array(vectors, dtype=np.float32)
+            else:
+                self._vector_matrix = np.zeros((0, config.VECTOR_DIM), dtype=np.float32)
+            self._vector_node_ids = node_ids
+            self._vector_index_dirty = False
+    def _ensure_vector_index(self):
+        """Rebuild vector index if dirty."""
+        if self._vector_index_dirty:
+            self._rebuild_vector_index()
+    def find_similar_nodes(
+        self,
+        query_vector: np.ndarray,
+        top_k: int = None,
+        min_similarity: float = 0.0,
+        exclude_ids: Set[str] = None,
+        type_filter: str = None
+    ) -> List[Tuple[Node, float]]:
+        """
+        Find nodes most similar to query vector.
+        Returns list of (node, similarity_score) sorted by similarity desc.
+        """
+        if top_k is None:
+            top_k = config.MAX_NODES_PER_SEARCH
+        self._ensure_vector_index()
+        if self._vector_matrix.shape[0] == 0:
+            return []
+        # Batch cosine similarity
+        similarities = utils.batch_cosine_similarity(query_vector, self._vector_matrix)
+        # Apply filters and sort
+        results = []
+        for i, sim in enumerate(similarities):
+            sim_val = float(sim)
+            if sim_val < min_similarity:
+                continue
+            node_id = self._vector_node_ids[i]
+            if exclude_ids and node_id in exclude_ids:
+                continue
+            node = self.nodes.get(node_id)
+            if not node:
+                continue
+            if type_filter and node.type != type_filter:
+                continue
+            results.append((node, sim_val))
+        # Sort by similarity descending
+        results.sort(key=lambda x: x[1], reverse=True)
+        return results[:top_k]
+    def find_similar_to_text(
+        self,
+        text: str,
+        top_k: int = None,
+        min_similarity: float = 0.0,
+        exclude_ids: Set[str] = None,
+        type_filter: str = None
+    ) -> List[Tuple[Node, float]]:
+        """
+        Find nodes most similar to a text query.
+        Convenience wrapper around find_similar_nodes.
+        """
+        query_vector = utils.text_to_vector_tfidf(text)
+        return self.find_similar_nodes(
+            query_vector, top_k, min_similarity,
+            exclude_ids, type_filter
+        )
+    def find_similar_to_node(
+        self,
+        node_id: str,
+        top_k: int = None,
+        min_similarity: float = None
+    ) -> List[Tuple[Node, float]]:
+        """Find nodes most similar to an existing node."""
+        node = self.nodes.get(node_id)
+        if not node:
+            return []
+        if min_similarity is None:
+            min_similarity = config.SIMILARITY_THRESHOLD
+        return self.find_similar_nodes(
+            node.vector, top_k, min_similarity,
+            exclude_ids={node_id}
+        )
+    # ───────────────────────────────────────────────────
+    # GRAPH TRAVERSAL
+    # ───────────────────────────────────────────────────
+    def traverse_bfs(
+        self,
+        start_ids: List[str],
+        max_depth: int = None,
+        max_nodes: int = 100
+    ) -> Dict[str, Tuple[int, List[str]]]:
+        """
+        Breadth-first traversal from starting nodes.
+        Returns: {node_id: (depth, [path_from_start])}
+        """
+        if max_depth is None:
+            max_depth = config.MAX_TRAVERSAL_DEPTH
+        visited = {}  # node_id → (depth, path)
+        queue = []
+        for sid in start_ids:
+            if sid in self.nodes:
+                visited[sid] = (0, [sid])
+                queue.append((sid, 0, [sid]))
+        while queue and len(visited) < max_nodes:
+            current_id, depth, path = queue.pop(0)
+            if depth >= max_depth:
+                continue
+            for neighbor, edge in self.get_neighbors(current_id):
+                if neighbor.id not in visited:
+                    new_path = path + [edge.id, neighbor.id]
+                    visited[neighbor.id] = (depth + 1, new_path)
+                    queue.append((neighbor.id, depth + 1, new_path))
+        return visited
+    def traverse_weighted_random(
+        self,
+        start_id: str,
+        max_depth: int = None,
+        temperature: float = 0.7
+    ) -> List[Tuple[str, str]]:
+        """
+        Weighted random walk from a starting node.
+        Edge weight determines probability of following that edge.
+        Returns: [(node_id, edge_id), ...] — the path taken.
+        """
+        if max_depth is None:
+            max_depth = config.MAX_TRAVERSAL_DEPTH
+        if start_id not in self.nodes:
+            return []
+        path = [(start_id, "")]
+        visited = {start_id}
+        current = start_id
+        for _ in range(max_depth):
+            neighbors = self.get_neighbors(current)
+            # Filter out already visited
+            unvisited = [
+                (node, edge) for node, edge in neighbors
+                if node.id not in visited
+            ]
+            if not unvisited:
+                break
+            # Weight-based selection
+            items = unvisited
+            weights = [
+                edge.weight * edge.confidence * node.weight
+                for node, edge in items
+            ]
+            chosen_node, chosen_edge = utils.weighted_choice(
+                items, weights, temperature
+            )
+            visited.add(chosen_node.id)
+            path.append((chosen_node.id, chosen_edge.id))
+            current = chosen_node.id
+        return path
+    def find_paths(
+        self,
+        from_id: str,
+        to_id: str,
+        max_depth: int = None,
+        max_paths: int = 5
+    ) -> List[List[str]]:
+        """
+        Find paths between two nodes using DFS.
+        Returns list of paths, each path is [node_id, edge_id, node_id, ...].
+        """
+        if max_depth is None:
+            max_depth = config.MAX_TRAVERSAL_DEPTH
+        if from_id not in self.nodes or to_id not in self.nodes:
+            return []
+        all_paths = []
+        def dfs(current: str, target: str, path: list, visited: set, depth: int):
+            if len(all_paths) >= max_paths:
+                return
+            if depth > max_depth:
+                return
+            if current == target:
+                all_paths.append(list(path))
+                return
+            for neighbor, edge in self.get_neighbors(current):
+                if neighbor.id not in visited:
+                    visited.add(neighbor.id)
+                    path.extend([edge.id, neighbor.id])
+                    dfs(neighbor.id, target, path, visited, depth + 1)
+                    # Backtrack
+                    path.pop()
+                    path.pop()
+                    visited.discard(neighbor.id)
+        dfs(from_id, to_id, [from_id], {from_id}, 0)
+        return all_paths
+    # ───────────────────────────────────────────────────
+    # REASONING CHAINS
+    # ───────────────────────────────────────────────────
+    def build_reasoning_chains(
+        self,
+        start_nodes: List[str],
+        max_chains: int = None,
+        max_depth: int = None
+    ) -> List[ReasoningChain]:
+        """
+        Build reasoning chains from starting nodes.
+        Combines BFS exploration with weighted random walks.
+        Returns scored and sorted chains.
+        """
+        if max_chains is None:
+            max_chains = config.MAX_CHAINS_PER_RESPONSE
+        if max_depth is None:
+            max_depth = config.MAX_TRAVERSAL_DEPTH
+        chains = []
+        for start_id in start_nodes:
+            if start_id not in self.nodes:
+                continue
+            # Strategy 1: Weighted random walks (multiple)
+            for _ in range(min(3, max_chains)):
+                walk = self.traverse_weighted_random(start_id, max_depth)
+                if len(walk) >= 2:
+                    path = []
+                    for node_id, edge_id in walk:
+                        if edge_id:
+                            path.append(edge_id)
+                        path.append(node_id)
+                    confidence = self._score_chain(path)
+                    conclusion = self._chain_to_conclusion(path)
+                    chain = ReasoningChain(
+                        chain_id=config.generate_chain_id(path),
+                        path=path,
+                        conclusion=conclusion,
+                        confidence=confidence
+                    )
+                    chains.append(chain)
+            # Strategy 2: Follow high-weight edges
+            high_weight_path = self._follow_strongest_path(start_id, max_depth)
+            if len(high_weight_path) >= 3:
+                confidence = self._score_chain(high_weight_path)
+                conclusion = self._chain_to_conclusion(high_weight_path)
+                chain = ReasoningChain(
+                    chain_id=config.generate_chain_id(high_weight_path),
+                    path=high_weight_path,
+                    conclusion=conclusion,
+                    confidence=confidence
+                )
+                chains.append(chain)
+        # Deduplicate by chain id
+        seen = set()
+        unique_chains = []
+        for c in chains:
+            if c.id not in seen:
+                seen.add(c.id)
+                unique_chains.append(c)
+        # Sort by confidence descending
+        unique_chains.sort(key=lambda c: c.confidence, reverse=True)
+        return unique_chains[:max_chains]
+    def _follow_strongest_path(self, start_id: str, max_depth: int) -> list:
+        """Follow the highest-weight edges from a starting node."""
+        path = [start_id]
+        visited = {start_id}
+        current = start_id
+        for _ in range(max_depth):
+            edges = self.get_edges_from(current)
+            # Filter unvisited
+            candidates = [
+                e for e in edges
+                if e.to_node not in visited and e.to_node in self.nodes
+            ]
+            if not candidates:
+                break
+            # Pick strongest edge
+            best_edge = max(candidates, key=lambda e: e.weight * e.confidence)
+            path.append(best_edge.id)
+            path.append(best_edge.to_node)
+            visited.add(best_edge.to_node)
+            current = best_edge.to_node
+        return path
+    def _score_chain(self, path: list) -> float:
+        """
+        Score a reasoning chain.
+        Considers: edge weights, confidences, chain length, node weights.
+        """
+        if len(path) < 3:
+            return 0.0
+        edge_scores = []
+        node_weights = []
+        for item_id in path:
+            if item_id in self.edges:
+                edge = self.edges[item_id]
+                edge_scores.append(edge.weight * edge.confidence)
+            elif item_id in self.nodes:
+                node_weights.append(self.nodes[item_id].weight)
+        if not edge_scores:
+            return 0.0
+        avg_edge_score = sum(edge_scores) / len(edge_scores)
+        avg_node_weight = sum(node_weights) / len(node_weights) if node_weights else 0.5
+        # Shorter chains are generally more reliable
+        length_penalty = 1.0 / (1.0 + 0.1 * len(edge_scores))
+        score = avg_edge_score * avg_node_weight * length_penalty
+        return utils.clamp(score, 0.0, 1.0)
+    def _chain_to_conclusion(self, path: list) -> str:
+        """
+        Generate a text conclusion from a reasoning chain path.
+        Extracts content from nodes in the path.
+        """
+        node_contents = []
+        for item_id in path:
+            node = self.nodes.get(item_id)
+            if node:
+                node_contents.append(node.content)
+        if not node_contents:
+            return ""
+        return " → ".join(node_contents)
+    def save_chain(self, chain: ReasoningChain):
+        """Save a reasoning chain."""
+        with self._lock:
+            self.chains[chain.id] = chain
+            self.memory.save_chain(chain.to_dict())
+    def reinforce_chain(self, chain_id: str):
+        """Reinforce a chain that was used in a response."""
+        with self._lock:
+            chain = self.chains.get(chain_id)
+            if chain:
+                chain.used_count += 1
+                chain.confidence = min(chain.confidence * 1.02, 1.0)
+                self.memory.save_chain(chain.to_dict())
+                # Also reinforce all edges in the chain
+                for item_id in chain.path:
+                    if item_id in self.edges:
+                        self.reinforce_edge(item_id)
+    # ───────────────────────────────────────────────────
+    # MERGE & PRUNE
+    # ───────────────────────────────────────────────────
+    def merge_nodes(self, node_id_keep: str, node_id_remove: str) -> bool:
+        """
+        Merge two redundant nodes. Keep the first, remove the second.
+        Redirect all edges from removed node to kept node.
+        """
+        with self._lock:
+            keep = self.nodes.get(node_id_keep)
+            remove = self.nodes.get(node_id_remove)
+            if not keep or not remove:
+                return False
+            # Combine weights
+            keep.weight = min(keep.weight + remove.weight * 0.5, config.WEIGHT_MAX)
+            # Average vectors
+            keep.vector = utils.normalize(
+                utils.vector_add(keep.vector, remove.vector) / 2.0
+            )
+            # Redirect edges
+            edges_to_redirect = self.get_all_edges_for(node_id_remove)
+            for edge in edges_to_redirect:
+                new_from = node_id_keep if edge.from_node == node_id_remove else edge.from_node
+                new_to = node_id_keep if edge.to_node == node_id_remove else edge.to_node
+                if new_from == new_to:
+                    continue  # Would create self-loop
+                # Create redirected edge if doesn't exist
+                if not self.edge_exists(new_from, new_to, edge.relation):
+                    self.add_edge(
+                        from_id=new_from,
+                        to_id=new_to,
+                        relation=edge.relation,
+                        weight=edge.weight,
+                        confidence=edge.confidence,
+                        source=edge.source
+                    )
+            # Remove the merged node (and its old edges)
+            self.remove_node(node_id_remove)
+            keep.mark_dirty()
+            self.memory.save_node(keep.to_dict())
+            self._vector_index_dirty = True
+        return True
+    def prune_weak_edges(self, threshold: float = None) -> int:
+        """Remove edges below weight threshold. Returns count removed."""
+        if threshold is None:
+            threshold = config.PRUNE_WEIGHT_THRESHOLD
+        to_remove = []
+        for edge in self.edges.values():
+            if edge.weight < threshold and edge.source == "inferred":
+                to_remove.append(edge.id)
+        with self._lock:
+            for edge_id in to_remove:
+                self._remove_edge_internal(edge_id)
+        return len(to_remove)
+    def prune_orphan_nodes(self) -> int:
+        """Remove nodes with no connections and low weight. Returns count removed."""
+        to_remove = []
+        for node in self.nodes.values():
+            if (node.connections == 0 and
+                    node.weight < config.WEIGHT_MIN * 2 and
+                    node.source == "inferred"):
+                to_remove.append(node.id)
+        with self._lock:
+            for node_id in to_remove:
+                if node_id in self.nodes:
+                    del self.nodes[node_id]
+                    self.memory.delete_node(node_id)
+        if to_remove:
+            self._vector_index_dirty = True
+        return len(to_remove)
+    def find_redundant_pairs(self, limit: int = 20) -> List[Tuple[str, str, float]]:
+        """
+        Find pairs of nodes that might be redundant (very high similarity).
+        Returns [(node_id_1, node_id_2, similarity), ...]
+        """
+        self._ensure_vector_index()
+        pairs = []
+        node_list = list(self.nodes.values())
+        # Sample to avoid O(n²) for large graphs
+        if len(node_list) > 500:
+            sample_indices = np.random.choice(len(node_list), 500, replace=False)
+            node_list = [node_list[i] for i in sample_indices]
+        for i in range(len(node_list)):
+            for j in range(i + 1, len(node_list)):
+                n1 = node_list[i]
+                n2 = node_list[j]
+                if n1.type != n2.type:
+                    continue  # Only merge same-type nodes
+                sim = utils.cosine_similarity(n1.vector, n2.vector)
+                if sim >= config.MERGE_THRESHOLD:
+                    pairs.append((n1.id, n2.id, sim))
+                    if len(pairs) >= limit:
+                        return pairs
+        return pairs
+    # ───────────────────────────────────────────────────
+    # STATISTICS
+    # ───────────────────────────────────────────────────
+    def _rebuild_stats(self):
+        """Rebuild graph statistics."""
+        total_nodes = len(self.nodes)
+        total_edges = len(self.edges)
+        inferred_nodes = sum(1 for n in self.nodes.values() if n.source == "inferred")
+        inferred_edges = sum(1 for e in self.edges.values() if e.source == "inferred")
+        avg_connections = 0.0
+        if total_nodes > 0:
+            avg_connections = sum(n.connections for n in self.nodes.values()) / total_nodes
+        avg_confidence = 0.0
+        if total_edges > 0:
+            avg_confidence = sum(e.confidence for e in self.edges.values()) / total_edges
+        # Max abstraction depth
+        max_depth = 0
+        for node in self.nodes.values():
+            if node.type == "abstraction":
+                depth = self._get_abstraction_depth(node.id)
+                max_depth = max(max_depth, depth)
+        self._stats = {
+            "total_nodes": total_nodes,
+            "total_edges": total_edges,
+            "total_chains": len(self.chains),
+            "inferred_nodes": inferred_nodes,
+            "inferred_edges": inferred_edges,
+            "max_abstraction_depth": max_depth,
+            "avg_connections": round(avg_connections, 2),
+            "avg_confidence": round(avg_confidence, 4),
+            "inference_ratio": round(
+                inferred_edges / max(total_edges, 1), 4
+            ),
+            "avg_chain_length": round(
+                sum(len(c.path) for c in self.chains.values()) / max(len(self.chains), 1), 2
+            )
+        }
+    def _get_abstraction_depth(self, node_id: str, visited: set = None) -> int:
+        """Get the abstraction depth of a node (recursive)."""
+        if visited is None:
+            visited = set()
+        if node_id in visited:
+            return 0
+        visited.add(node_id)
+        max_child_depth = 0
+        for edge in self.get_edges_to(node_id):
+            if edge.relation == "instance_of":
+                child_depth = self._get_abstraction_depth(edge.from_node, visited)
+                max_child_depth = max(max_child_depth, child_depth)
+        return max_child_depth + 1 if max_child_depth > 0 else (
+            1 if self.nodes.get(node_id, Node("", "", "")).type in ("abstraction", "meta_abstraction") else 0
+        )
+    def get_stats(self) -> dict:
+        """Get current graph statistics."""
+        self._rebuild_stats()
+        return dict(self._stats)
+    def get_intelligence_score(self) -> float:
+        """Calculate and return intelligence score."""
+        self._rebuild_stats()
+        return utils.calculate_intelligence_score(self._stats)
+    # ───────────────────────────────────────────────────
+    # SYNC
+    # ───────────────────────────────────────────────────
+    def sync(self) -> Optional[dict]:
+        """Flush buffered changes to DB if needed."""
+        return self.memory.flush_if_needed()
+    def force_sync(self) -> dict:
+        """Force flush all buffered changes to DB."""
+        return self.memory.flush()
+    # ───────────────────────────────────────────────────
+    # DEBUG / INSPECTION
+    # ──────────────��────────────────────────────────────
+    def describe_node(self, node_id: str) -> Optional[dict]:
+        """Get detailed description of a node and its connections."""
+        node = self.nodes.get(node_id)
+        if not node:
+            return None
+        neighbors = self.get_neighbors(node_id)
+        return {
+            "id": node.id,
+            "type": node.type,
+            "content": node.content,
+            "weight": node.weight,
+            "connections": node.connections,
+            "source": node.source,
+            "neighbors": [
+                {
+                    "node_id": n.id,
+                    "content": utils.truncate_text(n.content, 80),
+                    "relation": e.relation,
+                    "edge_weight": e.weight,
+                    "edge_confidence": e.confidence
+                }
+                for n, e in neighbors
+            ]
+        }