Spaces:

sadidft
/

CogniEngine

Sleeping

App Files Files Community

CogniEngine / knowledge.py

sadidft

Create knowledge.py

d82945e verified 2 months ago

raw

history blame contribute delete

45.8 kB

	"""
	Cogni-Engine v1 — Knowledge Graph Engine
	In-memory graph structure with nodes, edges, traversal, similarity search.
	This is the core data structure that represents all knowledge.
	The "brain matter" — where concepts live and connect.
	"""

	import time
	import threading
	import json
	from typing import List, Dict, Optional, Set, Tuple, Any
	from collections import defaultdict

	import numpy as np

	import config
	import utils
	from memory import Memory


	# ═══════════════════════════════════════════════════════════
	# DATA STRUCTURES
	# ═══════════════════════════════════════════════════════════

	class Node:
	"""A single knowledge node in the graph."""

	__slots__ = [
	'id', 'type', 'content', 'vector', 'weight',
	'connections', 'source', 'created_at', 'updated_at',
	'_dirty'
	]

	def __init__(
	self,
	node_id: str,
	node_type: str,
	content: str,
	vector: np.ndarray = None,
	weight: float = 1.0,
	connections: int = 0,
	source: str = "data",
	created_at: str = "",
	updated_at: str = ""
	):
	self.id = node_id
	self.type = node_type
	self.content = content
	self.vector = vector if vector is not None else np.zeros(config.VECTOR_DIM, dtype=np.float32)
	self.weight = weight
	self.connections = connections
	self.source = source
	self.created_at = created_at or utils.timestamp_now()
	self.updated_at = updated_at or utils.timestamp_now()
	self._dirty = False

	def to_dict(self) -> dict:
	"""Serialize to dict for DB storage."""
	return {
	"id": self.id,
	"type": self.type,
	"content": self.content,
	"vector": utils.vector_to_list(self.vector),
	"weight": round(self.weight, 6),
	"connections": self.connections,
	"source": self.source,
	"created_at": self.created_at,
	"updated_at": self.updated_at
	}

	@staticmethod
	def from_dict(data: dict) -> 'Node':
	"""Deserialize from dict."""
	vector = None
	if data.get("vector"):
	vector = utils.list_to_vector(data["vector"])
	return Node(
	node_id=data["id"],
	node_type=data.get("type", "fact"),
	content=data.get("content", ""),
	vector=vector,
	weight=float(data.get("weight", 1.0)),
	connections=int(data.get("connections", 0)),
	source=data.get("source", "data"),
	created_at=data.get("created_at", ""),
	updated_at=data.get("updated_at", "")
	)

	def mark_dirty(self):
	"""Mark this node as needing DB sync."""
	self._dirty = True
	self.updated_at = utils.timestamp_now()


	class Edge:
	"""A directed relationship between two nodes."""

	__slots__ = [
	'id', 'from_node', 'to_node', 'relation', 'weight',
	'confidence', 'source', 'used_count', 'created_at',
	'_dirty'
	]

	def __init__(
	self,
	edge_id: str,
	from_node: str,
	to_node: str,
	relation: str = "related_to",
	weight: float = 1.0,
	confidence: float = 1.0,
	source: str = "data",
	used_count: int = 0,
	created_at: str = ""
	):
	self.id = edge_id
	self.from_node = from_node
	self.to_node = to_node
	self.relation = relation
	self.weight = weight
	self.confidence = confidence
	self.source = source
	self.used_count = used_count
	self.created_at = created_at or utils.timestamp_now()
	self._dirty = False

	def to_dict(self) -> dict:
	"""Serialize to dict for DB storage."""
	return {
	"id": self.id,
	"from_node": self.from_node,
	"to_node": self.to_node,
	"relation": self.relation,
	"weight": round(self.weight, 6),
	"confidence": round(self.confidence, 6),
	"source": self.source,
	"used_count": self.used_count,
	"created_at": self.created_at
	}

	@staticmethod
	def from_dict(data: dict) -> 'Edge':
	"""Deserialize from dict."""
	return Edge(
	edge_id=data["id"],
	from_node=data["from_node"],
	to_node=data["to_node"],
	relation=data.get("relation", "related_to"),
	weight=float(data.get("weight", 1.0)),
	confidence=float(data.get("confidence", 1.0)),
	source=data.get("source", "data"),
	used_count=int(data.get("used_count", 0)),
	created_at=data.get("created_at", "")
	)

	def mark_dirty(self):
	"""Mark edge as needing DB sync."""
	self._dirty = True


	class ReasoningChain:
	"""A discovered path of reasoning through the graph."""

	__slots__ = [
	'id', 'path', 'conclusion', 'confidence',
	'used_count', 'created_at'
	]

	def __init__(
	self,
	chain_id: str,
	path: list,
	conclusion: str = "",
	confidence: float = 0.5,
	used_count: int = 0,
	created_at: str = ""
	):
	self.id = chain_id
	self.path = path # [node_id, edge_id, node_id, edge_id, ...]
	self.conclusion = conclusion
	self.confidence = confidence
	self.used_count = used_count
	self.created_at = created_at or utils.timestamp_now()

	def to_dict(self) -> dict:
	return {
	"id": self.id,
	"path": self.path,
	"conclusion": self.conclusion,
	"confidence": round(self.confidence, 6),
	"used_count": self.used_count,
	"created_at": self.created_at
	}

	@staticmethod
	def from_dict(data: dict) -> 'ReasoningChain':
	return ReasoningChain(
	chain_id=data["id"],
	path=data.get("path", []),
	conclusion=data.get("conclusion", ""),
	confidence=float(data.get("confidence", 0.5)),
	used_count=int(data.get("used_count", 0)),
	created_at=data.get("created_at", "")
	)


	# ═══════════════════════════════════════════════════════════
	# KNOWLEDGE GRAPH
	# ═══════════════════════════════════════════════════════════

	class KnowledgeGraph:
	"""
	In-memory knowledge graph with persistence via Memory.

	Structure:
	- nodes: dict of Node objects indexed by id
	- edges: dict of Edge objects indexed by id
	- adjacency_out: node_id → list of edge_ids (outgoing)
	- adjacency_in: node_id → list of edge_ids (incoming)
	- vector_index: numpy matrix of all node vectors for fast search
	- chains: dict of ReasoningChain objects

	Thread-safe via read-write lock:
	- Multiple readers allowed simultaneously
	- Writers get exclusive access
	"""

	def __init__(self, memory: Memory):
	self.memory = memory

	# Core data
	self.nodes: Dict[str, Node] = {}
	self.edges: Dict[str, Edge] = {}
	self.chains: Dict[str, ReasoningChain] = {}

	# Adjacency indexes
	self._adj_out: Dict[str, List[str]] = defaultdict(list) # node → [edge_ids outgoing]
	self._adj_in: Dict[str, List[str]] = defaultdict(list) # node → [edge_ids incoming]

	# Vector index for fast similarity search
	self._vector_matrix: Optional[np.ndarray] = None
	self._vector_node_ids: List[str] = []
	self._vector_index_dirty = True

	# Thread safety
	self._lock = threading.RLock()

	# Stats
	self._stats = {
	"total_nodes": 0,
	"total_edges": 0,
	"total_chains": 0,
	"inferred_nodes": 0,
	"inferred_edges": 0,
	"max_abstraction_depth": 0,
	"avg_connections": 0.0,
	"avg_confidence": 0.0
	}

	# ───────────────────────────────────────────────────
	# INITIALIZATION
	# ───────────────────────────────────────────────────

	def load_from_memory(self) -> bool:
	"""
	Load entire graph from TiDB via Memory.
	Called once at startup.
	"""
	state = self.memory.load_full_state()

	if not state.get("loaded", False) and not state["nodes"]:
	print("[GRAPH] No existing state found. Starting fresh.")
	self._rebuild_stats()
	return True

	with self._lock:
	# Load nodes
	for node_data in state["nodes"]:
	node = Node.from_dict(node_data)
	self.nodes[node.id] = node

	# Load edges
	for edge_data in state["edges"]:
	edge = Edge.from_dict(edge_data)
	self.edges[edge.id] = edge
	self._adj_out[edge.from_node].append(edge.id)
	self._adj_in[edge.to_node].append(edge.id)

	# Load chains
	for chain_data in state["chains"]:
	chain = ReasoningChain.from_dict(chain_data)
	self.chains[chain.id] = chain

	# Rebuild vector index
	self._rebuild_vector_index()
	self._rebuild_stats()

	print(f"[GRAPH] Loaded: {len(self.nodes)} nodes, "
	f"{len(self.edges)} edges, {len(self.chains)} chains")
	return True

	# ───────────────────────────────────────────────────
	# NODE OPERATIONS
	# ───────────────────────────────────────────────────

	def add_node(
	self,
	content: str,
	node_type: str = "fact",
	source: str = "data",
	weight: float = None,
	vector: np.ndarray = None,
	node_id: str = None,
	tags: List[str] = None
	) -> Optional[Node]:
	"""
	Add a new node to the graph.
	If node with same id exists, update it instead.
	Returns the node, or None if invalid.
	"""
	if not content or not content.strip():
	return None

	content = content.strip()

	if node_id is None:
	node_id = config.generate_node_id(content, node_type)

	# Generate vector if not provided
	if vector is None:
	vector = utils.text_to_vector_tfidf(content)

	# Register content with TF-IDF corpus
	tokens = utils.tokenize(content, remove_stopwords=True)
	utils.tfidf.add_document(tokens)

	if weight is None:
	weight = (config.DATA_KNOWLEDGE_CONFIDENCE
	if source == "data"
	else config.USER_KNOWLEDGE_CONFIDENCE)

	with self._lock:
	if node_id in self.nodes:
	# Update existing node
	existing = self.nodes[node_id]
	# Reinforce weight if seen again
	existing.weight = min(
	existing.weight * config.WEIGHT_REINFORCE,
	config.WEIGHT_MAX
	)
	existing.mark_dirty()
	self.memory.save_node(existing.to_dict())
	return existing

	# Create new node
	node = Node(
	node_id=node_id,
	node_type=node_type,
	content=content,
	vector=vector,
	weight=weight,
	connections=0,
	source=source
	)

	# Safety check
	if len(self.nodes) >= config.MAX_GRAPH_MEMORY_NODES:
	print(f"[GRAPH] Node limit reached ({config.MAX_GRAPH_MEMORY_NODES}). Skipping.")
	return None

	self.nodes[node_id] = node
	self._vector_index_dirty = True

	# Buffer for DB write
	node._dirty = True
	self.memory.save_node(node.to_dict())

	# Create edges from tags
	if tags:
	for tag in tags:
	tag_id = config.generate_node_id(tag, "concept")
	if tag_id not in self.nodes:
	self.add_node(
	content=tag,
	node_type="concept",
	source=source,
	weight=weight * 0.8
	)
	self.add_edge(
	from_id=node_id,
	to_id=tag_id,
	relation="related_to",
	source=source,
	confidence=weight * 0.7
	)

	return node

	def get_node(self, node_id: str) -> Optional[Node]:
	"""Get a node by id."""
	return self.nodes.get(node_id)

	def get_node_by_content(self, content: str, node_type: str = "") -> Optional[Node]:
	"""Find node by exact content match."""
	node_id = config.generate_node_id(content.strip(), node_type)
	return self.nodes.get(node_id)

	def remove_node(self, node_id: str) -> bool:
	"""Remove a node and all its edges."""
	with self._lock:
	if node_id not in self.nodes:
	return False

	# Remove connected edges
	edge_ids_to_remove = []
	edge_ids_to_remove.extend(self._adj_out.get(node_id, []))
	edge_ids_to_remove.extend(self._adj_in.get(node_id, []))

	for edge_id in set(edge_ids_to_remove):
	self._remove_edge_internal(edge_id)

	# Remove adjacency entries
	self._adj_out.pop(node_id, None)
	self._adj_in.pop(node_id, None)

	# Remove node
	del self.nodes[node_id]
	self._vector_index_dirty = True

	# Buffer for DB delete
	self.memory.delete_node(node_id)

	return True

	def update_node_weight(self, node_id: str, new_weight: float):
	"""Update a node's weight."""
	with self._lock:
	node = self.nodes.get(node_id)
	if node:
	node.weight = utils.clamp(new_weight, config.WEIGHT_MIN, config.WEIGHT_MAX)
	node.mark_dirty()
	self.memory.save_node(node.to_dict())

	def get_nodes_by_type(self, node_type: str) -> List[Node]:
	"""Get all nodes of a specific type."""
	return [n for n in self.nodes.values() if n.type == node_type]

	def get_nodes_by_source(self, source: str) -> List[Node]:
	"""Get all nodes from a specific source."""
	return [n for n in self.nodes.values() if n.source == source]

	def get_weakest_nodes(self, limit: int = 50) -> List[Node]:
	"""Get nodes with lowest weight (candidates for pruning)."""
	sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.weight)
	return sorted_nodes[:limit]

	def get_least_connected_nodes(self, limit: int = 50) -> List[Node]:
	"""Get nodes with fewest connections (candidates for connecting)."""
	sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.connections)
	return sorted_nodes[:limit]

	# ───────────────────────────────────────────────────
	# EDGE OPERATIONS
	# ───────────────────────────────────────────────────

	def add_edge(
	self,
	from_id: str,
	to_id: str,
	relation: str = "related_to",
	weight: float = 1.0,
	confidence: float = 1.0,
	source: str = "data",
	edge_id: str = None
	) -> Optional[Edge]:
	"""
	Add a directed edge between two nodes.
	If edge exists, reinforce it.
	"""
	if from_id == to_id:
	return None # No self-loops

	if from_id not in self.nodes or to_id not in self.nodes:
	return None # Both nodes must exist

	if edge_id is None:
	edge_id = config.generate_edge_id(from_id, to_id, relation)

	with self._lock:
	if edge_id in self.edges:
	# Reinforce existing edge
	existing = self.edges[edge_id]
	existing.weight = min(
	existing.weight * config.WEIGHT_REINFORCE,
	config.WEIGHT_MAX
	)
	existing.confidence = min(
	(existing.confidence + confidence) / 2.0 * 1.05,
	1.0
	)
	existing.mark_dirty()
	self.memory.save_edge(existing.to_dict())
	return existing

	# Safety check
	if len(self.edges) >= config.MAX_GRAPH_MEMORY_EDGES:
	print(f"[GRAPH] Edge limit reached ({config.MAX_GRAPH_MEMORY_EDGES}). Skipping.")
	return None

	edge = Edge(
	edge_id=edge_id,
	from_node=from_id,
	to_node=to_id,
	relation=relation,
	weight=weight,
	confidence=confidence,
	source=source
	)

	self.edges[edge_id] = edge
	self._adj_out[from_id].append(edge_id)
	self._adj_in[to_id].append(edge_id)

	# Update connection counts
	self.nodes[from_id].connections += 1
	self.nodes[to_id].connections += 1

	# Buffer for DB
	edge._dirty = True
	self.memory.save_edge(edge.to_dict())

	return edge

	def get_edge(self, edge_id: str) -> Optional[Edge]:
	"""Get an edge by id."""
	return self.edges.get(edge_id)

	def get_edges_from(self, node_id: str) -> List[Edge]:
	"""Get all outgoing edges from a node."""
	edge_ids = self._adj_out.get(node_id, [])
	return [self.edges[eid] for eid in edge_ids if eid in self.edges]

	def get_edges_to(self, node_id: str) -> List[Edge]:
	"""Get all incoming edges to a node."""
	edge_ids = self._adj_in.get(node_id, [])
	return [self.edges[eid] for eid in edge_ids if eid in self.edges]

	def get_all_edges_for(self, node_id: str) -> List[Edge]:
	"""Get all edges (in + out) connected to a node."""
	edges = self.get_edges_from(node_id)
	edges.extend(self.get_edges_to(node_id))
	return edges

	def get_neighbors(self, node_id: str) -> List[Tuple[Node, Edge]]:
	"""Get all neighboring nodes with their connecting edges."""
	neighbors = []
	for edge in self.get_edges_from(node_id):
	target = self.nodes.get(edge.to_node)
	if target:
	neighbors.append((target, edge))
	for edge in self.get_edges_to(node_id):
	source = self.nodes.get(edge.from_node)
	if source:
	neighbors.append((source, edge))
	return neighbors

	def edge_exists(self, from_id: str, to_id: str, relation: str = None) -> bool:
	"""Check if an edge exists between two nodes."""
	for edge_id in self._adj_out.get(from_id, []):
	edge = self.edges.get(edge_id)
	if edge and edge.to_node == to_id:
	if relation is None or edge.relation == relation:
	return True
	return False

	def remove_edge(self, edge_id: str) -> bool:
	"""Remove an edge."""
	with self._lock:
	return self._remove_edge_internal(edge_id)

	def _remove_edge_internal(self, edge_id: str) -> bool:
	"""Internal edge removal (must be called under lock)."""
	edge = self.edges.get(edge_id)
	if not edge:
	return False

	# Remove from adjacency
	if edge_id in self._adj_out.get(edge.from_node, []):
	self._adj_out[edge.from_node].remove(edge_id)
	if edge_id in self._adj_in.get(edge.to_node, []):
	self._adj_in[edge.to_node].remove(edge_id)

	# Update connection counts
	from_node = self.nodes.get(edge.from_node)
	to_node = self.nodes.get(edge.to_node)
	if from_node:
	from_node.connections = max(0, from_node.connections - 1)
	if to_node:
	to_node.connections = max(0, to_node.connections - 1)

	# Remove edge
	del self.edges[edge_id]
	self.memory.delete_edge(edge_id)

	return True

	def reinforce_edge(self, edge_id: str, factor: float = None):
	"""Increase edge weight (used when edge participates in response)."""
	if factor is None:
	factor = config.WEIGHT_REINFORCE
	with self._lock:
	edge = self.edges.get(edge_id)
	if edge:
	edge.weight = min(edge.weight * factor, config.WEIGHT_MAX)
	edge.used_count += 1
	edge.mark_dirty()
	self.memory.save_edge(edge.to_dict())

	def decay_edge(self, edge_id: str, factor: float = None):
	"""Decrease edge weight (unused edge decay)."""
	if factor is None:
	factor = config.WEIGHT_DECAY_RATE
	with self._lock:
	edge = self.edges.get(edge_id)
	if edge:
	edge.weight = max(edge.weight * factor, config.WEIGHT_MIN)
	edge.mark_dirty()
	self.memory.save_edge(edge.to_dict())

	def get_weakest_edges(self, limit: int = 100, source_filter: str = "inferred") -> List[Edge]:
	"""Get edges with lowest weight (candidates for pruning)."""
	filtered = [
	e for e in self.edges.values()
	if source_filter is None or e.source == source_filter
	]
	sorted_edges = sorted(filtered, key=lambda e: e.weight)
	return sorted_edges[:limit]

	# ───────────────────────────────────────────────────
	# VECTOR INDEX & SIMILARITY SEARCH
	# ───────────────────────────────────────────────────

	def _rebuild_vector_index(self):
	"""Rebuild the vector matrix for fast batch similarity search."""
	with self._lock:
	if not self.nodes:
	self._vector_matrix = np.zeros((0, config.VECTOR_DIM), dtype=np.float32)
	self._vector_node_ids = []
	self._vector_index_dirty = False
	return

	node_ids = []
	vectors = []
	for nid, node in self.nodes.items():
	if node.vector is not None and len(node.vector) == config.VECTOR_DIM:
	node_ids.append(nid)
	vectors.append(node.vector)

	if vectors:
	self._vector_matrix = np.array(vectors, dtype=np.float32)
	else:
	self._vector_matrix = np.zeros((0, config.VECTOR_DIM), dtype=np.float32)
	self._vector_node_ids = node_ids
	self._vector_index_dirty = False

	def _ensure_vector_index(self):
	"""Rebuild vector index if dirty."""
	if self._vector_index_dirty:
	self._rebuild_vector_index()

	def find_similar_nodes(
	self,
	query_vector: np.ndarray,
	top_k: int = None,
	min_similarity: float = 0.0,
	exclude_ids: Set[str] = None,
	type_filter: str = None
	) -> List[Tuple[Node, float]]:
	"""
	Find nodes most similar to query vector.
	Returns list of (node, similarity_score) sorted by similarity desc.
	"""
	if top_k is None:
	top_k = config.MAX_NODES_PER_SEARCH

	self._ensure_vector_index()

	if self._vector_matrix.shape[0] == 0:
	return []

	# Batch cosine similarity
	similarities = utils.batch_cosine_similarity(query_vector, self._vector_matrix)

	# Apply filters and sort
	results = []
	for i, sim in enumerate(similarities):
	sim_val = float(sim)
	if sim_val < min_similarity:
	continue
	node_id = self._vector_node_ids[i]
	if exclude_ids and node_id in exclude_ids:
	continue
	node = self.nodes.get(node_id)
	if not node:
	continue
	if type_filter and node.type != type_filter:
	continue
	results.append((node, sim_val))

	# Sort by similarity descending
	results.sort(key=lambda x: x[1], reverse=True)

	return results[:top_k]

	def find_similar_to_text(
	self,
	text: str,
	top_k: int = None,
	min_similarity: float = 0.0,
	exclude_ids: Set[str] = None,
	type_filter: str = None
	) -> List[Tuple[Node, float]]:
	"""
	Find nodes most similar to a text query.
	Convenience wrapper around find_similar_nodes.
	"""
	query_vector = utils.text_to_vector_tfidf(text)
	return self.find_similar_nodes(
	query_vector, top_k, min_similarity,
	exclude_ids, type_filter
	)

	def find_similar_to_node(
	self,
	node_id: str,
	top_k: int = None,
	min_similarity: float = None
	) -> List[Tuple[Node, float]]:
	"""Find nodes most similar to an existing node."""
	node = self.nodes.get(node_id)
	if not node:
	return []
	if min_similarity is None:
	min_similarity = config.SIMILARITY_THRESHOLD
	return self.find_similar_nodes(
	node.vector, top_k, min_similarity,
	exclude_ids={node_id}
	)

	# ───────────────────────────────────────────────────
	# GRAPH TRAVERSAL
	# ───────────────────────────────────────────────────

	def traverse_bfs(
	self,
	start_ids: List[str],
	max_depth: int = None,
	max_nodes: int = 100
	) -> Dict[str, Tuple[int, List[str]]]:
	"""
	Breadth-first traversal from starting nodes.
	Returns: {node_id: (depth, [path_from_start])}
	"""
	if max_depth is None:
	max_depth = config.MAX_TRAVERSAL_DEPTH

	visited = {} # node_id → (depth, path)
	queue = []

	for sid in start_ids:
	if sid in self.nodes:
	visited[sid] = (0, [sid])
	queue.append((sid, 0, [sid]))

	while queue and len(visited) < max_nodes:
	current_id, depth, path = queue.pop(0)
	if depth >= max_depth:
	continue

	for neighbor, edge in self.get_neighbors(current_id):
	if neighbor.id not in visited:
	new_path = path + [edge.id, neighbor.id]
	visited[neighbor.id] = (depth + 1, new_path)
	queue.append((neighbor.id, depth + 1, new_path))

	return visited

	def traverse_weighted_random(
	self,
	start_id: str,
	max_depth: int = None,
	temperature: float = 0.7
	) -> List[Tuple[str, str]]:
	"""
	Weighted random walk from a starting node.
	Edge weight determines probability of following that edge.
	Returns: [(node_id, edge_id), ...] — the path taken.
	"""
	if max_depth is None:
	max_depth = config.MAX_TRAVERSAL_DEPTH

	if start_id not in self.nodes:
	return []

	path = [(start_id, "")]
	visited = {start_id}
	current = start_id

	for _ in range(max_depth):
	neighbors = self.get_neighbors(current)
	# Filter out already visited
	unvisited = [
	(node, edge) for node, edge in neighbors
	if node.id not in visited
	]

	if not unvisited:
	break

	# Weight-based selection
	items = unvisited
	weights = [
	edge.weight * edge.confidence * node.weight
	for node, edge in items
	]

	chosen_node, chosen_edge = utils.weighted_choice(
	items, weights, temperature
	)

	visited.add(chosen_node.id)
	path.append((chosen_node.id, chosen_edge.id))
	current = chosen_node.id

	return path

	def find_paths(
	self,
	from_id: str,
	to_id: str,
	max_depth: int = None,
	max_paths: int = 5
	) -> List[List[str]]:
	"""
	Find paths between two nodes using DFS.
	Returns list of paths, each path is [node_id, edge_id, node_id, ...].
	"""
	if max_depth is None:
	max_depth = config.MAX_TRAVERSAL_DEPTH

	if from_id not in self.nodes or to_id not in self.nodes:
	return []

	all_paths = []

	def dfs(current: str, target: str, path: list, visited: set, depth: int):
	if len(all_paths) >= max_paths:
	return
	if depth > max_depth:
	return
	if current == target:
	all_paths.append(list(path))
	return

	for neighbor, edge in self.get_neighbors(current):
	if neighbor.id not in visited:
	visited.add(neighbor.id)
	path.extend([edge.id, neighbor.id])
	dfs(neighbor.id, target, path, visited, depth + 1)
	# Backtrack
	path.pop()
	path.pop()
	visited.discard(neighbor.id)

	dfs(from_id, to_id, [from_id], {from_id}, 0)
	return all_paths

	# ───────────────────────────────────────────────────
	# REASONING CHAINS
	# ───────────────────────────────────────────────────

	def build_reasoning_chains(
	self,
	start_nodes: List[str],
	max_chains: int = None,
	max_depth: int = None
	) -> List[ReasoningChain]:
	"""
	Build reasoning chains from starting nodes.
	Combines BFS exploration with weighted random walks.
	Returns scored and sorted chains.
	"""
	if max_chains is None:
	max_chains = config.MAX_CHAINS_PER_RESPONSE
	if max_depth is None:
	max_depth = config.MAX_TRAVERSAL_DEPTH

	chains = []

	for start_id in start_nodes:
	if start_id not in self.nodes:
	continue

	# Strategy 1: Weighted random walks (multiple)
	for _ in range(min(3, max_chains)):
	walk = self.traverse_weighted_random(start_id, max_depth)
	if len(walk) >= 2:
	path = []
	for node_id, edge_id in walk:
	if edge_id:
	path.append(edge_id)
	path.append(node_id)

	confidence = self._score_chain(path)
	conclusion = self._chain_to_conclusion(path)

	chain = ReasoningChain(
	chain_id=config.generate_chain_id(path),
	path=path,
	conclusion=conclusion,
	confidence=confidence
	)
	chains.append(chain)

	# Strategy 2: Follow high-weight edges
	high_weight_path = self._follow_strongest_path(start_id, max_depth)
	if len(high_weight_path) >= 3:
	confidence = self._score_chain(high_weight_path)
	conclusion = self._chain_to_conclusion(high_weight_path)

	chain = ReasoningChain(
	chain_id=config.generate_chain_id(high_weight_path),
	path=high_weight_path,
	conclusion=conclusion,
	confidence=confidence
	)
	chains.append(chain)

	# Deduplicate by chain id
	seen = set()
	unique_chains = []
	for c in chains:
	if c.id not in seen:
	seen.add(c.id)
	unique_chains.append(c)

	# Sort by confidence descending
	unique_chains.sort(key=lambda c: c.confidence, reverse=True)
	return unique_chains[:max_chains]

	def _follow_strongest_path(self, start_id: str, max_depth: int) -> list:
	"""Follow the highest-weight edges from a starting node."""
	path = [start_id]
	visited = {start_id}
	current = start_id

	for _ in range(max_depth):
	edges = self.get_edges_from(current)
	# Filter unvisited
	candidates = [
	e for e in edges
	if e.to_node not in visited and e.to_node in self.nodes
	]
	if not candidates:
	break

	# Pick strongest edge
	best_edge = max(candidates, key=lambda e: e.weight * e.confidence)
	path.append(best_edge.id)
	path.append(best_edge.to_node)
	visited.add(best_edge.to_node)
	current = best_edge.to_node

	return path

	def _score_chain(self, path: list) -> float:
	"""
	Score a reasoning chain.
	Considers: edge weights, confidences, chain length, node weights.
	"""
	if len(path) < 3:
	return 0.0

	edge_scores = []
	node_weights = []

	for item_id in path:
	if item_id in self.edges:
	edge = self.edges[item_id]
	edge_scores.append(edge.weight * edge.confidence)
	elif item_id in self.nodes:
	node_weights.append(self.nodes[item_id].weight)

	if not edge_scores:
	return 0.0

	avg_edge_score = sum(edge_scores) / len(edge_scores)
	avg_node_weight = sum(node_weights) / len(node_weights) if node_weights else 0.5

	# Shorter chains are generally more reliable
	length_penalty = 1.0 / (1.0 + 0.1 * len(edge_scores))

	score = avg_edge_score * avg_node_weight * length_penalty
	return utils.clamp(score, 0.0, 1.0)

	def _chain_to_conclusion(self, path: list) -> str:
	"""
	Generate a text conclusion from a reasoning chain path.
	Extracts content from nodes in the path.
	"""
	node_contents = []
	for item_id in path:
	node = self.nodes.get(item_id)
	if node:
	node_contents.append(node.content)

	if not node_contents:
	return ""
	return " → ".join(node_contents)

	def save_chain(self, chain: ReasoningChain):
	"""Save a reasoning chain."""
	with self._lock:
	self.chains[chain.id] = chain
	self.memory.save_chain(chain.to_dict())

	def reinforce_chain(self, chain_id: str):
	"""Reinforce a chain that was used in a response."""
	with self._lock:
	chain = self.chains.get(chain_id)
	if chain:
	chain.used_count += 1
	chain.confidence = min(chain.confidence * 1.02, 1.0)
	self.memory.save_chain(chain.to_dict())

	# Also reinforce all edges in the chain
	for item_id in chain.path:
	if item_id in self.edges:
	self.reinforce_edge(item_id)

	# ───────────────────────────────────────────────────
	# MERGE & PRUNE
	# ───────────────────────────────────────────────────

	def merge_nodes(self, node_id_keep: str, node_id_remove: str) -> bool:
	"""
	Merge two redundant nodes. Keep the first, remove the second.
	Redirect all edges from removed node to kept node.
	"""
	with self._lock:
	keep = self.nodes.get(node_id_keep)
	remove = self.nodes.get(node_id_remove)

	if not keep or not remove:
	return False

	# Combine weights
	keep.weight = min(keep.weight + remove.weight * 0.5, config.WEIGHT_MAX)

	# Average vectors
	keep.vector = utils.normalize(
	utils.vector_add(keep.vector, remove.vector) / 2.0
	)

	# Redirect edges
	edges_to_redirect = self.get_all_edges_for(node_id_remove)
	for edge in edges_to_redirect:
	new_from = node_id_keep if edge.from_node == node_id_remove else edge.from_node
	new_to = node_id_keep if edge.to_node == node_id_remove else edge.to_node

	if new_from == new_to:
	continue # Would create self-loop

	# Create redirected edge if doesn't exist
	if not self.edge_exists(new_from, new_to, edge.relation):
	self.add_edge(
	from_id=new_from,
	to_id=new_to,
	relation=edge.relation,
	weight=edge.weight,
	confidence=edge.confidence,
	source=edge.source
	)

	# Remove the merged node (and its old edges)
	self.remove_node(node_id_remove)
	keep.mark_dirty()
	self.memory.save_node(keep.to_dict())

	self._vector_index_dirty = True

	return True

	def prune_weak_edges(self, threshold: float = None) -> int:
	"""Remove edges below weight threshold. Returns count removed."""
	if threshold is None:
	threshold = config.PRUNE_WEIGHT_THRESHOLD

	to_remove = []
	for edge in self.edges.values():
	if edge.weight < threshold and edge.source == "inferred":
	to_remove.append(edge.id)

	with self._lock:
	for edge_id in to_remove:
	self._remove_edge_internal(edge_id)

	return len(to_remove)

	def prune_orphan_nodes(self) -> int:
	"""Remove nodes with no connections and low weight. Returns count removed."""
	to_remove = []
	for node in self.nodes.values():
	if (node.connections == 0 and
	node.weight < config.WEIGHT_MIN * 2 and
	node.source == "inferred"):
	to_remove.append(node.id)

	with self._lock:
	for node_id in to_remove:
	if node_id in self.nodes:
	del self.nodes[node_id]
	self.memory.delete_node(node_id)

	if to_remove:
	self._vector_index_dirty = True

	return len(to_remove)

	def find_redundant_pairs(self, limit: int = 20) -> List[Tuple[str, str, float]]:
	"""
	Find pairs of nodes that might be redundant (very high similarity).
	Returns [(node_id_1, node_id_2, similarity), ...]
	"""
	self._ensure_vector_index()
	pairs = []

	node_list = list(self.nodes.values())
	# Sample to avoid O(n²) for large graphs
	if len(node_list) > 500:
	sample_indices = np.random.choice(len(node_list), 500, replace=False)
	node_list = [node_list[i] for i in sample_indices]

	for i in range(len(node_list)):
	for j in range(i + 1, len(node_list)):
	n1 = node_list[i]
	n2 = node_list[j]
	if n1.type != n2.type:
	continue # Only merge same-type nodes
	sim = utils.cosine_similarity(n1.vector, n2.vector)
	if sim >= config.MERGE_THRESHOLD:
	pairs.append((n1.id, n2.id, sim))
	if len(pairs) >= limit:
	return pairs

	return pairs

	# ───────────────────────────────────────────────────
	# STATISTICS
	# ───────────────────────────────────────────────────

	def _rebuild_stats(self):
	"""Rebuild graph statistics."""
	total_nodes = len(self.nodes)
	total_edges = len(self.edges)

	inferred_nodes = sum(1 for n in self.nodes.values() if n.source == "inferred")
	inferred_edges = sum(1 for e in self.edges.values() if e.source == "inferred")

	avg_connections = 0.0
	if total_nodes > 0:
	avg_connections = sum(n.connections for n in self.nodes.values()) / total_nodes

	avg_confidence = 0.0
	if total_edges > 0:
	avg_confidence = sum(e.confidence for e in self.edges.values()) / total_edges

	# Max abstraction depth
	max_depth = 0
	for node in self.nodes.values():
	if node.type == "abstraction":
	depth = self._get_abstraction_depth(node.id)
	max_depth = max(max_depth, depth)

	self._stats = {
	"total_nodes": total_nodes,
	"total_edges": total_edges,
	"total_chains": len(self.chains),
	"inferred_nodes": inferred_nodes,
	"inferred_edges": inferred_edges,
	"max_abstraction_depth": max_depth,
	"avg_connections": round(avg_connections, 2),
	"avg_confidence": round(avg_confidence, 4),
	"inference_ratio": round(
	inferred_edges / max(total_edges, 1), 4
	),
	"avg_chain_length": round(
	sum(len(c.path) for c in self.chains.values()) / max(len(self.chains), 1), 2
	)
	}

	def _get_abstraction_depth(self, node_id: str, visited: set = None) -> int:
	"""Get the abstraction depth of a node (recursive)."""
	if visited is None:
	visited = set()
	if node_id in visited:
	return 0
	visited.add(node_id)

	max_child_depth = 0
	for edge in self.get_edges_to(node_id):
	if edge.relation == "instance_of":
	child_depth = self._get_abstraction_depth(edge.from_node, visited)
	max_child_depth = max(max_child_depth, child_depth)

	return max_child_depth + 1 if max_child_depth > 0 else (
	1 if self.nodes.get(node_id, Node("", "", "")).type in ("abstraction", "meta_abstraction") else 0
	)

	def get_stats(self) -> dict:
	"""Get current graph statistics."""
	self._rebuild_stats()
	return dict(self._stats)

	def get_intelligence_score(self) -> float:
	"""Calculate and return intelligence score."""
	self._rebuild_stats()
	return utils.calculate_intelligence_score(self._stats)

	# ───────────────────────────────────────────────────
	# SYNC
	# ───────────────────────────────────────────────────

	def sync(self) -> Optional[dict]:
	"""Flush buffered changes to DB if needed."""
	return self.memory.flush_if_needed()

	def force_sync(self) -> dict:
	"""Force flush all buffered changes to DB."""
	return self.memory.flush()

	# ───────────────────────────────────────────────────
	# DEBUG / INSPECTION
	# ───────────────────────────────────────────────────

	def describe_node(self, node_id: str) -> Optional[dict]:
	"""Get detailed description of a node and its connections."""
	node = self.nodes.get(node_id)
	if not node:
	return None

	neighbors = self.get_neighbors(node_id)

	return {
	"id": node.id,
	"type": node.type,
	"content": node.content,
	"weight": node.weight,
	"connections": node.connections,
	"source": node.source,
	"neighbors": [
	{
	"node_id": n.id,
	"content": utils.truncate_text(n.content, 80),
	"relation": e.relation,
	"edge_weight": e.weight,
	"edge_confidence": e.confidence
	}
	for n, e in neighbors
	]
	}