sadidft commited on
Commit
d82945e
·
verified ·
1 Parent(s): 52c9fff

Create knowledge.py

Browse files
Files changed (1) hide show
  1. knowledge.py +1271 -0
knowledge.py ADDED
@@ -0,0 +1,1271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Cogni-Engine v1 — Knowledge Graph Engine
3
+ In-memory graph structure with nodes, edges, traversal, similarity search.
4
+ This is the core data structure that represents all knowledge.
5
+ The "brain matter" — where concepts live and connect.
6
+ """
7
+
8
+ import time
9
+ import threading
10
+ import json
11
+ from typing import List, Dict, Optional, Set, Tuple, Any
12
+ from collections import defaultdict
13
+
14
+ import numpy as np
15
+
16
+ import config
17
+ import utils
18
+ from memory import Memory
19
+
20
+
21
+ # ═══════════════════════════════════════════════════════════
22
+ # DATA STRUCTURES
23
+ # ═══════════════════════════════════════════════════════════
24
+
25
+ class Node:
26
+ """A single knowledge node in the graph."""
27
+
28
+ __slots__ = [
29
+ 'id', 'type', 'content', 'vector', 'weight',
30
+ 'connections', 'source', 'created_at', 'updated_at',
31
+ '_dirty'
32
+ ]
33
+
34
+ def __init__(
35
+ self,
36
+ node_id: str,
37
+ node_type: str,
38
+ content: str,
39
+ vector: np.ndarray = None,
40
+ weight: float = 1.0,
41
+ connections: int = 0,
42
+ source: str = "data",
43
+ created_at: str = "",
44
+ updated_at: str = ""
45
+ ):
46
+ self.id = node_id
47
+ self.type = node_type
48
+ self.content = content
49
+ self.vector = vector if vector is not None else np.zeros(config.VECTOR_DIM, dtype=np.float32)
50
+ self.weight = weight
51
+ self.connections = connections
52
+ self.source = source
53
+ self.created_at = created_at or utils.timestamp_now()
54
+ self.updated_at = updated_at or utils.timestamp_now()
55
+ self._dirty = False
56
+
57
+ def to_dict(self) -> dict:
58
+ """Serialize to dict for DB storage."""
59
+ return {
60
+ "id": self.id,
61
+ "type": self.type,
62
+ "content": self.content,
63
+ "vector": utils.vector_to_list(self.vector),
64
+ "weight": round(self.weight, 6),
65
+ "connections": self.connections,
66
+ "source": self.source,
67
+ "created_at": self.created_at,
68
+ "updated_at": self.updated_at
69
+ }
70
+
71
+ @staticmethod
72
+ def from_dict(data: dict) -> 'Node':
73
+ """Deserialize from dict."""
74
+ vector = None
75
+ if data.get("vector"):
76
+ vector = utils.list_to_vector(data["vector"])
77
+ return Node(
78
+ node_id=data["id"],
79
+ node_type=data.get("type", "fact"),
80
+ content=data.get("content", ""),
81
+ vector=vector,
82
+ weight=float(data.get("weight", 1.0)),
83
+ connections=int(data.get("connections", 0)),
84
+ source=data.get("source", "data"),
85
+ created_at=data.get("created_at", ""),
86
+ updated_at=data.get("updated_at", "")
87
+ )
88
+
89
+ def mark_dirty(self):
90
+ """Mark this node as needing DB sync."""
91
+ self._dirty = True
92
+ self.updated_at = utils.timestamp_now()
93
+
94
+
95
+ class Edge:
96
+ """A directed relationship between two nodes."""
97
+
98
+ __slots__ = [
99
+ 'id', 'from_node', 'to_node', 'relation', 'weight',
100
+ 'confidence', 'source', 'used_count', 'created_at',
101
+ '_dirty'
102
+ ]
103
+
104
+ def __init__(
105
+ self,
106
+ edge_id: str,
107
+ from_node: str,
108
+ to_node: str,
109
+ relation: str = "related_to",
110
+ weight: float = 1.0,
111
+ confidence: float = 1.0,
112
+ source: str = "data",
113
+ used_count: int = 0,
114
+ created_at: str = ""
115
+ ):
116
+ self.id = edge_id
117
+ self.from_node = from_node
118
+ self.to_node = to_node
119
+ self.relation = relation
120
+ self.weight = weight
121
+ self.confidence = confidence
122
+ self.source = source
123
+ self.used_count = used_count
124
+ self.created_at = created_at or utils.timestamp_now()
125
+ self._dirty = False
126
+
127
+ def to_dict(self) -> dict:
128
+ """Serialize to dict for DB storage."""
129
+ return {
130
+ "id": self.id,
131
+ "from_node": self.from_node,
132
+ "to_node": self.to_node,
133
+ "relation": self.relation,
134
+ "weight": round(self.weight, 6),
135
+ "confidence": round(self.confidence, 6),
136
+ "source": self.source,
137
+ "used_count": self.used_count,
138
+ "created_at": self.created_at
139
+ }
140
+
141
+ @staticmethod
142
+ def from_dict(data: dict) -> 'Edge':
143
+ """Deserialize from dict."""
144
+ return Edge(
145
+ edge_id=data["id"],
146
+ from_node=data["from_node"],
147
+ to_node=data["to_node"],
148
+ relation=data.get("relation", "related_to"),
149
+ weight=float(data.get("weight", 1.0)),
150
+ confidence=float(data.get("confidence", 1.0)),
151
+ source=data.get("source", "data"),
152
+ used_count=int(data.get("used_count", 0)),
153
+ created_at=data.get("created_at", "")
154
+ )
155
+
156
+ def mark_dirty(self):
157
+ """Mark edge as needing DB sync."""
158
+ self._dirty = True
159
+
160
+
161
+ class ReasoningChain:
162
+ """A discovered path of reasoning through the graph."""
163
+
164
+ __slots__ = [
165
+ 'id', 'path', 'conclusion', 'confidence',
166
+ 'used_count', 'created_at'
167
+ ]
168
+
169
+ def __init__(
170
+ self,
171
+ chain_id: str,
172
+ path: list,
173
+ conclusion: str = "",
174
+ confidence: float = 0.5,
175
+ used_count: int = 0,
176
+ created_at: str = ""
177
+ ):
178
+ self.id = chain_id
179
+ self.path = path # [node_id, edge_id, node_id, edge_id, ...]
180
+ self.conclusion = conclusion
181
+ self.confidence = confidence
182
+ self.used_count = used_count
183
+ self.created_at = created_at or utils.timestamp_now()
184
+
185
+ def to_dict(self) -> dict:
186
+ return {
187
+ "id": self.id,
188
+ "path": self.path,
189
+ "conclusion": self.conclusion,
190
+ "confidence": round(self.confidence, 6),
191
+ "used_count": self.used_count,
192
+ "created_at": self.created_at
193
+ }
194
+
195
+ @staticmethod
196
+ def from_dict(data: dict) -> 'ReasoningChain':
197
+ return ReasoningChain(
198
+ chain_id=data["id"],
199
+ path=data.get("path", []),
200
+ conclusion=data.get("conclusion", ""),
201
+ confidence=float(data.get("confidence", 0.5)),
202
+ used_count=int(data.get("used_count", 0)),
203
+ created_at=data.get("created_at", "")
204
+ )
205
+
206
+
207
+ # ═══════════════════════════════════════════════════════════
208
+ # KNOWLEDGE GRAPH
209
+ # ═══════════════════════════════════════════════════════════
210
+
211
+ class KnowledgeGraph:
212
+ """
213
+ In-memory knowledge graph with persistence via Memory.
214
+
215
+ Structure:
216
+ - nodes: dict of Node objects indexed by id
217
+ - edges: dict of Edge objects indexed by id
218
+ - adjacency_out: node_id → list of edge_ids (outgoing)
219
+ - adjacency_in: node_id → list of edge_ids (incoming)
220
+ - vector_index: numpy matrix of all node vectors for fast search
221
+ - chains: dict of ReasoningChain objects
222
+
223
+ Thread-safe via read-write lock:
224
+ - Multiple readers allowed simultaneously
225
+ - Writers get exclusive access
226
+ """
227
+
228
+ def __init__(self, memory: Memory):
229
+ self.memory = memory
230
+
231
+ # Core data
232
+ self.nodes: Dict[str, Node] = {}
233
+ self.edges: Dict[str, Edge] = {}
234
+ self.chains: Dict[str, ReasoningChain] = {}
235
+
236
+ # Adjacency indexes
237
+ self._adj_out: Dict[str, List[str]] = defaultdict(list) # node → [edge_ids outgoing]
238
+ self._adj_in: Dict[str, List[str]] = defaultdict(list) # node → [edge_ids incoming]
239
+
240
+ # Vector index for fast similarity search
241
+ self._vector_matrix: Optional[np.ndarray] = None
242
+ self._vector_node_ids: List[str] = []
243
+ self._vector_index_dirty = True
244
+
245
+ # Thread safety
246
+ self._lock = threading.RLock()
247
+
248
+ # Stats
249
+ self._stats = {
250
+ "total_nodes": 0,
251
+ "total_edges": 0,
252
+ "total_chains": 0,
253
+ "inferred_nodes": 0,
254
+ "inferred_edges": 0,
255
+ "max_abstraction_depth": 0,
256
+ "avg_connections": 0.0,
257
+ "avg_confidence": 0.0
258
+ }
259
+
260
+ # ───────────────────────────────────────────────────
261
+ # INITIALIZATION
262
+ # ───────────────────────────────────────────────────
263
+
264
+ def load_from_memory(self) -> bool:
265
+ """
266
+ Load entire graph from TiDB via Memory.
267
+ Called once at startup.
268
+ """
269
+ state = self.memory.load_full_state()
270
+
271
+ if not state.get("loaded", False) and not state["nodes"]:
272
+ print("[GRAPH] No existing state found. Starting fresh.")
273
+ self._rebuild_stats()
274
+ return True
275
+
276
+ with self._lock:
277
+ # Load nodes
278
+ for node_data in state["nodes"]:
279
+ node = Node.from_dict(node_data)
280
+ self.nodes[node.id] = node
281
+
282
+ # Load edges
283
+ for edge_data in state["edges"]:
284
+ edge = Edge.from_dict(edge_data)
285
+ self.edges[edge.id] = edge
286
+ self._adj_out[edge.from_node].append(edge.id)
287
+ self._adj_in[edge.to_node].append(edge.id)
288
+
289
+ # Load chains
290
+ for chain_data in state["chains"]:
291
+ chain = ReasoningChain.from_dict(chain_data)
292
+ self.chains[chain.id] = chain
293
+
294
+ # Rebuild vector index
295
+ self._rebuild_vector_index()
296
+ self._rebuild_stats()
297
+
298
+ print(f"[GRAPH] Loaded: {len(self.nodes)} nodes, "
299
+ f"{len(self.edges)} edges, {len(self.chains)} chains")
300
+ return True
301
+
302
+ # ───────────────────────────────────────────────────
303
+ # NODE OPERATIONS
304
+ # ───────────────────────────────────────────────────
305
+
306
+ def add_node(
307
+ self,
308
+ content: str,
309
+ node_type: str = "fact",
310
+ source: str = "data",
311
+ weight: float = None,
312
+ vector: np.ndarray = None,
313
+ node_id: str = None,
314
+ tags: List[str] = None
315
+ ) -> Optional[Node]:
316
+ """
317
+ Add a new node to the graph.
318
+ If node with same id exists, update it instead.
319
+ Returns the node, or None if invalid.
320
+ """
321
+ if not content or not content.strip():
322
+ return None
323
+
324
+ content = content.strip()
325
+
326
+ if node_id is None:
327
+ node_id = config.generate_node_id(content, node_type)
328
+
329
+ # Generate vector if not provided
330
+ if vector is None:
331
+ vector = utils.text_to_vector_tfidf(content)
332
+
333
+ # Register content with TF-IDF corpus
334
+ tokens = utils.tokenize(content, remove_stopwords=True)
335
+ utils.tfidf.add_document(tokens)
336
+
337
+ if weight is None:
338
+ weight = (config.DATA_KNOWLEDGE_CONFIDENCE
339
+ if source == "data"
340
+ else config.USER_KNOWLEDGE_CONFIDENCE)
341
+
342
+ with self._lock:
343
+ if node_id in self.nodes:
344
+ # Update existing node
345
+ existing = self.nodes[node_id]
346
+ # Reinforce weight if seen again
347
+ existing.weight = min(
348
+ existing.weight * config.WEIGHT_REINFORCE,
349
+ config.WEIGHT_MAX
350
+ )
351
+ existing.mark_dirty()
352
+ self.memory.save_node(existing.to_dict())
353
+ return existing
354
+
355
+ # Create new node
356
+ node = Node(
357
+ node_id=node_id,
358
+ node_type=node_type,
359
+ content=content,
360
+ vector=vector,
361
+ weight=weight,
362
+ connections=0,
363
+ source=source
364
+ )
365
+
366
+ # Safety check
367
+ if len(self.nodes) >= config.MAX_GRAPH_MEMORY_NODES:
368
+ print(f"[GRAPH] Node limit reached ({config.MAX_GRAPH_MEMORY_NODES}). Skipping.")
369
+ return None
370
+
371
+ self.nodes[node_id] = node
372
+ self._vector_index_dirty = True
373
+
374
+ # Buffer for DB write
375
+ node._dirty = True
376
+ self.memory.save_node(node.to_dict())
377
+
378
+ # Create edges from tags
379
+ if tags:
380
+ for tag in tags:
381
+ tag_id = config.generate_node_id(tag, "concept")
382
+ if tag_id not in self.nodes:
383
+ self.add_node(
384
+ content=tag,
385
+ node_type="concept",
386
+ source=source,
387
+ weight=weight * 0.8
388
+ )
389
+ self.add_edge(
390
+ from_id=node_id,
391
+ to_id=tag_id,
392
+ relation="related_to",
393
+ source=source,
394
+ confidence=weight * 0.7
395
+ )
396
+
397
+ return node
398
+
399
+ def get_node(self, node_id: str) -> Optional[Node]:
400
+ """Get a node by id."""
401
+ return self.nodes.get(node_id)
402
+
403
+ def get_node_by_content(self, content: str, node_type: str = "") -> Optional[Node]:
404
+ """Find node by exact content match."""
405
+ node_id = config.generate_node_id(content.strip(), node_type)
406
+ return self.nodes.get(node_id)
407
+
408
+ def remove_node(self, node_id: str) -> bool:
409
+ """Remove a node and all its edges."""
410
+ with self._lock:
411
+ if node_id not in self.nodes:
412
+ return False
413
+
414
+ # Remove connected edges
415
+ edge_ids_to_remove = []
416
+ edge_ids_to_remove.extend(self._adj_out.get(node_id, []))
417
+ edge_ids_to_remove.extend(self._adj_in.get(node_id, []))
418
+
419
+ for edge_id in set(edge_ids_to_remove):
420
+ self._remove_edge_internal(edge_id)
421
+
422
+ # Remove adjacency entries
423
+ self._adj_out.pop(node_id, None)
424
+ self._adj_in.pop(node_id, None)
425
+
426
+ # Remove node
427
+ del self.nodes[node_id]
428
+ self._vector_index_dirty = True
429
+
430
+ # Buffer for DB delete
431
+ self.memory.delete_node(node_id)
432
+
433
+ return True
434
+
435
+ def update_node_weight(self, node_id: str, new_weight: float):
436
+ """Update a node's weight."""
437
+ with self._lock:
438
+ node = self.nodes.get(node_id)
439
+ if node:
440
+ node.weight = utils.clamp(new_weight, config.WEIGHT_MIN, config.WEIGHT_MAX)
441
+ node.mark_dirty()
442
+ self.memory.save_node(node.to_dict())
443
+
444
+ def get_nodes_by_type(self, node_type: str) -> List[Node]:
445
+ """Get all nodes of a specific type."""
446
+ return [n for n in self.nodes.values() if n.type == node_type]
447
+
448
+ def get_nodes_by_source(self, source: str) -> List[Node]:
449
+ """Get all nodes from a specific source."""
450
+ return [n for n in self.nodes.values() if n.source == source]
451
+
452
+ def get_weakest_nodes(self, limit: int = 50) -> List[Node]:
453
+ """Get nodes with lowest weight (candidates for pruning)."""
454
+ sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.weight)
455
+ return sorted_nodes[:limit]
456
+
457
+ def get_least_connected_nodes(self, limit: int = 50) -> List[Node]:
458
+ """Get nodes with fewest connections (candidates for connecting)."""
459
+ sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.connections)
460
+ return sorted_nodes[:limit]
461
+
462
+ # ───────────────────────────────────────────────────
463
+ # EDGE OPERATIONS
464
+ # ───────────────────────────────────────────────────
465
+
466
+ def add_edge(
467
+ self,
468
+ from_id: str,
469
+ to_id: str,
470
+ relation: str = "related_to",
471
+ weight: float = 1.0,
472
+ confidence: float = 1.0,
473
+ source: str = "data",
474
+ edge_id: str = None
475
+ ) -> Optional[Edge]:
476
+ """
477
+ Add a directed edge between two nodes.
478
+ If edge exists, reinforce it.
479
+ """
480
+ if from_id == to_id:
481
+ return None # No self-loops
482
+
483
+ if from_id not in self.nodes or to_id not in self.nodes:
484
+ return None # Both nodes must exist
485
+
486
+ if edge_id is None:
487
+ edge_id = config.generate_edge_id(from_id, to_id, relation)
488
+
489
+ with self._lock:
490
+ if edge_id in self.edges:
491
+ # Reinforce existing edge
492
+ existing = self.edges[edge_id]
493
+ existing.weight = min(
494
+ existing.weight * config.WEIGHT_REINFORCE,
495
+ config.WEIGHT_MAX
496
+ )
497
+ existing.confidence = min(
498
+ (existing.confidence + confidence) / 2.0 * 1.05,
499
+ 1.0
500
+ )
501
+ existing.mark_dirty()
502
+ self.memory.save_edge(existing.to_dict())
503
+ return existing
504
+
505
+ # Safety check
506
+ if len(self.edges) >= config.MAX_GRAPH_MEMORY_EDGES:
507
+ print(f"[GRAPH] Edge limit reached ({config.MAX_GRAPH_MEMORY_EDGES}). Skipping.")
508
+ return None
509
+
510
+ edge = Edge(
511
+ edge_id=edge_id,
512
+ from_node=from_id,
513
+ to_node=to_id,
514
+ relation=relation,
515
+ weight=weight,
516
+ confidence=confidence,
517
+ source=source
518
+ )
519
+
520
+ self.edges[edge_id] = edge
521
+ self._adj_out[from_id].append(edge_id)
522
+ self._adj_in[to_id].append(edge_id)
523
+
524
+ # Update connection counts
525
+ self.nodes[from_id].connections += 1
526
+ self.nodes[to_id].connections += 1
527
+
528
+ # Buffer for DB
529
+ edge._dirty = True
530
+ self.memory.save_edge(edge.to_dict())
531
+
532
+ return edge
533
+
534
+ def get_edge(self, edge_id: str) -> Optional[Edge]:
535
+ """Get an edge by id."""
536
+ return self.edges.get(edge_id)
537
+
538
+ def get_edges_from(self, node_id: str) -> List[Edge]:
539
+ """Get all outgoing edges from a node."""
540
+ edge_ids = self._adj_out.get(node_id, [])
541
+ return [self.edges[eid] for eid in edge_ids if eid in self.edges]
542
+
543
+ def get_edges_to(self, node_id: str) -> List[Edge]:
544
+ """Get all incoming edges to a node."""
545
+ edge_ids = self._adj_in.get(node_id, [])
546
+ return [self.edges[eid] for eid in edge_ids if eid in self.edges]
547
+
548
+ def get_all_edges_for(self, node_id: str) -> List[Edge]:
549
+ """Get all edges (in + out) connected to a node."""
550
+ edges = self.get_edges_from(node_id)
551
+ edges.extend(self.get_edges_to(node_id))
552
+ return edges
553
+
554
+ def get_neighbors(self, node_id: str) -> List[Tuple[Node, Edge]]:
555
+ """Get all neighboring nodes with their connecting edges."""
556
+ neighbors = []
557
+ for edge in self.get_edges_from(node_id):
558
+ target = self.nodes.get(edge.to_node)
559
+ if target:
560
+ neighbors.append((target, edge))
561
+ for edge in self.get_edges_to(node_id):
562
+ source = self.nodes.get(edge.from_node)
563
+ if source:
564
+ neighbors.append((source, edge))
565
+ return neighbors
566
+
567
+ def edge_exists(self, from_id: str, to_id: str, relation: str = None) -> bool:
568
+ """Check if an edge exists between two nodes."""
569
+ for edge_id in self._adj_out.get(from_id, []):
570
+ edge = self.edges.get(edge_id)
571
+ if edge and edge.to_node == to_id:
572
+ if relation is None or edge.relation == relation:
573
+ return True
574
+ return False
575
+
576
+ def remove_edge(self, edge_id: str) -> bool:
577
+ """Remove an edge."""
578
+ with self._lock:
579
+ return self._remove_edge_internal(edge_id)
580
+
581
+ def _remove_edge_internal(self, edge_id: str) -> bool:
582
+ """Internal edge removal (must be called under lock)."""
583
+ edge = self.edges.get(edge_id)
584
+ if not edge:
585
+ return False
586
+
587
+ # Remove from adjacency
588
+ if edge_id in self._adj_out.get(edge.from_node, []):
589
+ self._adj_out[edge.from_node].remove(edge_id)
590
+ if edge_id in self._adj_in.get(edge.to_node, []):
591
+ self._adj_in[edge.to_node].remove(edge_id)
592
+
593
+ # Update connection counts
594
+ from_node = self.nodes.get(edge.from_node)
595
+ to_node = self.nodes.get(edge.to_node)
596
+ if from_node:
597
+ from_node.connections = max(0, from_node.connections - 1)
598
+ if to_node:
599
+ to_node.connections = max(0, to_node.connections - 1)
600
+
601
+ # Remove edge
602
+ del self.edges[edge_id]
603
+ self.memory.delete_edge(edge_id)
604
+
605
+ return True
606
+
607
+ def reinforce_edge(self, edge_id: str, factor: float = None):
608
+ """Increase edge weight (used when edge participates in response)."""
609
+ if factor is None:
610
+ factor = config.WEIGHT_REINFORCE
611
+ with self._lock:
612
+ edge = self.edges.get(edge_id)
613
+ if edge:
614
+ edge.weight = min(edge.weight * factor, config.WEIGHT_MAX)
615
+ edge.used_count += 1
616
+ edge.mark_dirty()
617
+ self.memory.save_edge(edge.to_dict())
618
+
619
+ def decay_edge(self, edge_id: str, factor: float = None):
620
+ """Decrease edge weight (unused edge decay)."""
621
+ if factor is None:
622
+ factor = config.WEIGHT_DECAY_RATE
623
+ with self._lock:
624
+ edge = self.edges.get(edge_id)
625
+ if edge:
626
+ edge.weight = max(edge.weight * factor, config.WEIGHT_MIN)
627
+ edge.mark_dirty()
628
+ self.memory.save_edge(edge.to_dict())
629
+
630
+ def get_weakest_edges(self, limit: int = 100, source_filter: str = "inferred") -> List[Edge]:
631
+ """Get edges with lowest weight (candidates for pruning)."""
632
+ filtered = [
633
+ e for e in self.edges.values()
634
+ if source_filter is None or e.source == source_filter
635
+ ]
636
+ sorted_edges = sorted(filtered, key=lambda e: e.weight)
637
+ return sorted_edges[:limit]
638
+
639
+ # ───────────────────────────────────────────────────
640
+ # VECTOR INDEX & SIMILARITY SEARCH
641
+ # ───────────────────────────────────────────────────
642
+
643
+ def _rebuild_vector_index(self):
644
+ """Rebuild the vector matrix for fast batch similarity search."""
645
+ with self._lock:
646
+ if not self.nodes:
647
+ self._vector_matrix = np.zeros((0, config.VECTOR_DIM), dtype=np.float32)
648
+ self._vector_node_ids = []
649
+ self._vector_index_dirty = False
650
+ return
651
+
652
+ node_ids = []
653
+ vectors = []
654
+ for nid, node in self.nodes.items():
655
+ if node.vector is not None and len(node.vector) == config.VECTOR_DIM:
656
+ node_ids.append(nid)
657
+ vectors.append(node.vector)
658
+
659
+ if vectors:
660
+ self._vector_matrix = np.array(vectors, dtype=np.float32)
661
+ else:
662
+ self._vector_matrix = np.zeros((0, config.VECTOR_DIM), dtype=np.float32)
663
+ self._vector_node_ids = node_ids
664
+ self._vector_index_dirty = False
665
+
666
+ def _ensure_vector_index(self):
667
+ """Rebuild vector index if dirty."""
668
+ if self._vector_index_dirty:
669
+ self._rebuild_vector_index()
670
+
671
+ def find_similar_nodes(
672
+ self,
673
+ query_vector: np.ndarray,
674
+ top_k: int = None,
675
+ min_similarity: float = 0.0,
676
+ exclude_ids: Set[str] = None,
677
+ type_filter: str = None
678
+ ) -> List[Tuple[Node, float]]:
679
+ """
680
+ Find nodes most similar to query vector.
681
+ Returns list of (node, similarity_score) sorted by similarity desc.
682
+ """
683
+ if top_k is None:
684
+ top_k = config.MAX_NODES_PER_SEARCH
685
+
686
+ self._ensure_vector_index()
687
+
688
+ if self._vector_matrix.shape[0] == 0:
689
+ return []
690
+
691
+ # Batch cosine similarity
692
+ similarities = utils.batch_cosine_similarity(query_vector, self._vector_matrix)
693
+
694
+ # Apply filters and sort
695
+ results = []
696
+ for i, sim in enumerate(similarities):
697
+ sim_val = float(sim)
698
+ if sim_val < min_similarity:
699
+ continue
700
+ node_id = self._vector_node_ids[i]
701
+ if exclude_ids and node_id in exclude_ids:
702
+ continue
703
+ node = self.nodes.get(node_id)
704
+ if not node:
705
+ continue
706
+ if type_filter and node.type != type_filter:
707
+ continue
708
+ results.append((node, sim_val))
709
+
710
+ # Sort by similarity descending
711
+ results.sort(key=lambda x: x[1], reverse=True)
712
+
713
+ return results[:top_k]
714
+
715
+ def find_similar_to_text(
716
+ self,
717
+ text: str,
718
+ top_k: int = None,
719
+ min_similarity: float = 0.0,
720
+ exclude_ids: Set[str] = None,
721
+ type_filter: str = None
722
+ ) -> List[Tuple[Node, float]]:
723
+ """
724
+ Find nodes most similar to a text query.
725
+ Convenience wrapper around find_similar_nodes.
726
+ """
727
+ query_vector = utils.text_to_vector_tfidf(text)
728
+ return self.find_similar_nodes(
729
+ query_vector, top_k, min_similarity,
730
+ exclude_ids, type_filter
731
+ )
732
+
733
+ def find_similar_to_node(
734
+ self,
735
+ node_id: str,
736
+ top_k: int = None,
737
+ min_similarity: float = None
738
+ ) -> List[Tuple[Node, float]]:
739
+ """Find nodes most similar to an existing node."""
740
+ node = self.nodes.get(node_id)
741
+ if not node:
742
+ return []
743
+ if min_similarity is None:
744
+ min_similarity = config.SIMILARITY_THRESHOLD
745
+ return self.find_similar_nodes(
746
+ node.vector, top_k, min_similarity,
747
+ exclude_ids={node_id}
748
+ )
749
+
750
+ # ───────────────────────────────────────────────────
751
+ # GRAPH TRAVERSAL
752
+ # ───────────────────────────────────────────────────
753
+
754
+ def traverse_bfs(
755
+ self,
756
+ start_ids: List[str],
757
+ max_depth: int = None,
758
+ max_nodes: int = 100
759
+ ) -> Dict[str, Tuple[int, List[str]]]:
760
+ """
761
+ Breadth-first traversal from starting nodes.
762
+ Returns: {node_id: (depth, [path_from_start])}
763
+ """
764
+ if max_depth is None:
765
+ max_depth = config.MAX_TRAVERSAL_DEPTH
766
+
767
+ visited = {} # node_id → (depth, path)
768
+ queue = []
769
+
770
+ for sid in start_ids:
771
+ if sid in self.nodes:
772
+ visited[sid] = (0, [sid])
773
+ queue.append((sid, 0, [sid]))
774
+
775
+ while queue and len(visited) < max_nodes:
776
+ current_id, depth, path = queue.pop(0)
777
+ if depth >= max_depth:
778
+ continue
779
+
780
+ for neighbor, edge in self.get_neighbors(current_id):
781
+ if neighbor.id not in visited:
782
+ new_path = path + [edge.id, neighbor.id]
783
+ visited[neighbor.id] = (depth + 1, new_path)
784
+ queue.append((neighbor.id, depth + 1, new_path))
785
+
786
+ return visited
787
+
788
+ def traverse_weighted_random(
789
+ self,
790
+ start_id: str,
791
+ max_depth: int = None,
792
+ temperature: float = 0.7
793
+ ) -> List[Tuple[str, str]]:
794
+ """
795
+ Weighted random walk from a starting node.
796
+ Edge weight determines probability of following that edge.
797
+ Returns: [(node_id, edge_id), ...] — the path taken.
798
+ """
799
+ if max_depth is None:
800
+ max_depth = config.MAX_TRAVERSAL_DEPTH
801
+
802
+ if start_id not in self.nodes:
803
+ return []
804
+
805
+ path = [(start_id, "")]
806
+ visited = {start_id}
807
+ current = start_id
808
+
809
+ for _ in range(max_depth):
810
+ neighbors = self.get_neighbors(current)
811
+ # Filter out already visited
812
+ unvisited = [
813
+ (node, edge) for node, edge in neighbors
814
+ if node.id not in visited
815
+ ]
816
+
817
+ if not unvisited:
818
+ break
819
+
820
+ # Weight-based selection
821
+ items = unvisited
822
+ weights = [
823
+ edge.weight * edge.confidence * node.weight
824
+ for node, edge in items
825
+ ]
826
+
827
+ chosen_node, chosen_edge = utils.weighted_choice(
828
+ items, weights, temperature
829
+ )
830
+
831
+ visited.add(chosen_node.id)
832
+ path.append((chosen_node.id, chosen_edge.id))
833
+ current = chosen_node.id
834
+
835
+ return path
836
+
837
+ def find_paths(
838
+ self,
839
+ from_id: str,
840
+ to_id: str,
841
+ max_depth: int = None,
842
+ max_paths: int = 5
843
+ ) -> List[List[str]]:
844
+ """
845
+ Find paths between two nodes using DFS.
846
+ Returns list of paths, each path is [node_id, edge_id, node_id, ...].
847
+ """
848
+ if max_depth is None:
849
+ max_depth = config.MAX_TRAVERSAL_DEPTH
850
+
851
+ if from_id not in self.nodes or to_id not in self.nodes:
852
+ return []
853
+
854
+ all_paths = []
855
+
856
+ def dfs(current: str, target: str, path: list, visited: set, depth: int):
857
+ if len(all_paths) >= max_paths:
858
+ return
859
+ if depth > max_depth:
860
+ return
861
+ if current == target:
862
+ all_paths.append(list(path))
863
+ return
864
+
865
+ for neighbor, edge in self.get_neighbors(current):
866
+ if neighbor.id not in visited:
867
+ visited.add(neighbor.id)
868
+ path.extend([edge.id, neighbor.id])
869
+ dfs(neighbor.id, target, path, visited, depth + 1)
870
+ # Backtrack
871
+ path.pop()
872
+ path.pop()
873
+ visited.discard(neighbor.id)
874
+
875
+ dfs(from_id, to_id, [from_id], {from_id}, 0)
876
+ return all_paths
877
+
878
+ # ───────────────────────────────────────────────────
879
+ # REASONING CHAINS
880
+ # ───────────────────────────────────────────────────
881
+
882
+ def build_reasoning_chains(
883
+ self,
884
+ start_nodes: List[str],
885
+ max_chains: int = None,
886
+ max_depth: int = None
887
+ ) -> List[ReasoningChain]:
888
+ """
889
+ Build reasoning chains from starting nodes.
890
+ Combines BFS exploration with weighted random walks.
891
+ Returns scored and sorted chains.
892
+ """
893
+ if max_chains is None:
894
+ max_chains = config.MAX_CHAINS_PER_RESPONSE
895
+ if max_depth is None:
896
+ max_depth = config.MAX_TRAVERSAL_DEPTH
897
+
898
+ chains = []
899
+
900
+ for start_id in start_nodes:
901
+ if start_id not in self.nodes:
902
+ continue
903
+
904
+ # Strategy 1: Weighted random walks (multiple)
905
+ for _ in range(min(3, max_chains)):
906
+ walk = self.traverse_weighted_random(start_id, max_depth)
907
+ if len(walk) >= 2:
908
+ path = []
909
+ for node_id, edge_id in walk:
910
+ if edge_id:
911
+ path.append(edge_id)
912
+ path.append(node_id)
913
+
914
+ confidence = self._score_chain(path)
915
+ conclusion = self._chain_to_conclusion(path)
916
+
917
+ chain = ReasoningChain(
918
+ chain_id=config.generate_chain_id(path),
919
+ path=path,
920
+ conclusion=conclusion,
921
+ confidence=confidence
922
+ )
923
+ chains.append(chain)
924
+
925
+ # Strategy 2: Follow high-weight edges
926
+ high_weight_path = self._follow_strongest_path(start_id, max_depth)
927
+ if len(high_weight_path) >= 3:
928
+ confidence = self._score_chain(high_weight_path)
929
+ conclusion = self._chain_to_conclusion(high_weight_path)
930
+
931
+ chain = ReasoningChain(
932
+ chain_id=config.generate_chain_id(high_weight_path),
933
+ path=high_weight_path,
934
+ conclusion=conclusion,
935
+ confidence=confidence
936
+ )
937
+ chains.append(chain)
938
+
939
+ # Deduplicate by chain id
940
+ seen = set()
941
+ unique_chains = []
942
+ for c in chains:
943
+ if c.id not in seen:
944
+ seen.add(c.id)
945
+ unique_chains.append(c)
946
+
947
+ # Sort by confidence descending
948
+ unique_chains.sort(key=lambda c: c.confidence, reverse=True)
949
+ return unique_chains[:max_chains]
950
+
951
+ def _follow_strongest_path(self, start_id: str, max_depth: int) -> list:
952
+ """Follow the highest-weight edges from a starting node."""
953
+ path = [start_id]
954
+ visited = {start_id}
955
+ current = start_id
956
+
957
+ for _ in range(max_depth):
958
+ edges = self.get_edges_from(current)
959
+ # Filter unvisited
960
+ candidates = [
961
+ e for e in edges
962
+ if e.to_node not in visited and e.to_node in self.nodes
963
+ ]
964
+ if not candidates:
965
+ break
966
+
967
+ # Pick strongest edge
968
+ best_edge = max(candidates, key=lambda e: e.weight * e.confidence)
969
+ path.append(best_edge.id)
970
+ path.append(best_edge.to_node)
971
+ visited.add(best_edge.to_node)
972
+ current = best_edge.to_node
973
+
974
+ return path
975
+
976
+ def _score_chain(self, path: list) -> float:
977
+ """
978
+ Score a reasoning chain.
979
+ Considers: edge weights, confidences, chain length, node weights.
980
+ """
981
+ if len(path) < 3:
982
+ return 0.0
983
+
984
+ edge_scores = []
985
+ node_weights = []
986
+
987
+ for item_id in path:
988
+ if item_id in self.edges:
989
+ edge = self.edges[item_id]
990
+ edge_scores.append(edge.weight * edge.confidence)
991
+ elif item_id in self.nodes:
992
+ node_weights.append(self.nodes[item_id].weight)
993
+
994
+ if not edge_scores:
995
+ return 0.0
996
+
997
+ avg_edge_score = sum(edge_scores) / len(edge_scores)
998
+ avg_node_weight = sum(node_weights) / len(node_weights) if node_weights else 0.5
999
+
1000
+ # Shorter chains are generally more reliable
1001
+ length_penalty = 1.0 / (1.0 + 0.1 * len(edge_scores))
1002
+
1003
+ score = avg_edge_score * avg_node_weight * length_penalty
1004
+ return utils.clamp(score, 0.0, 1.0)
1005
+
1006
+ def _chain_to_conclusion(self, path: list) -> str:
1007
+ """
1008
+ Generate a text conclusion from a reasoning chain path.
1009
+ Extracts content from nodes in the path.
1010
+ """
1011
+ node_contents = []
1012
+ for item_id in path:
1013
+ node = self.nodes.get(item_id)
1014
+ if node:
1015
+ node_contents.append(node.content)
1016
+
1017
+ if not node_contents:
1018
+ return ""
1019
+ return " → ".join(node_contents)
1020
+
1021
+ def save_chain(self, chain: ReasoningChain):
1022
+ """Save a reasoning chain."""
1023
+ with self._lock:
1024
+ self.chains[chain.id] = chain
1025
+ self.memory.save_chain(chain.to_dict())
1026
+
1027
+ def reinforce_chain(self, chain_id: str):
1028
+ """Reinforce a chain that was used in a response."""
1029
+ with self._lock:
1030
+ chain = self.chains.get(chain_id)
1031
+ if chain:
1032
+ chain.used_count += 1
1033
+ chain.confidence = min(chain.confidence * 1.02, 1.0)
1034
+ self.memory.save_chain(chain.to_dict())
1035
+
1036
+ # Also reinforce all edges in the chain
1037
+ for item_id in chain.path:
1038
+ if item_id in self.edges:
1039
+ self.reinforce_edge(item_id)
1040
+
1041
+ # ───────────────────────────────────────────────────
1042
+ # MERGE & PRUNE
1043
+ # ───────────────────────────────────────────────────
1044
+
1045
+ def merge_nodes(self, node_id_keep: str, node_id_remove: str) -> bool:
1046
+ """
1047
+ Merge two redundant nodes. Keep the first, remove the second.
1048
+ Redirect all edges from removed node to kept node.
1049
+ """
1050
+ with self._lock:
1051
+ keep = self.nodes.get(node_id_keep)
1052
+ remove = self.nodes.get(node_id_remove)
1053
+
1054
+ if not keep or not remove:
1055
+ return False
1056
+
1057
+ # Combine weights
1058
+ keep.weight = min(keep.weight + remove.weight * 0.5, config.WEIGHT_MAX)
1059
+
1060
+ # Average vectors
1061
+ keep.vector = utils.normalize(
1062
+ utils.vector_add(keep.vector, remove.vector) / 2.0
1063
+ )
1064
+
1065
+ # Redirect edges
1066
+ edges_to_redirect = self.get_all_edges_for(node_id_remove)
1067
+ for edge in edges_to_redirect:
1068
+ new_from = node_id_keep if edge.from_node == node_id_remove else edge.from_node
1069
+ new_to = node_id_keep if edge.to_node == node_id_remove else edge.to_node
1070
+
1071
+ if new_from == new_to:
1072
+ continue # Would create self-loop
1073
+
1074
+ # Create redirected edge if doesn't exist
1075
+ if not self.edge_exists(new_from, new_to, edge.relation):
1076
+ self.add_edge(
1077
+ from_id=new_from,
1078
+ to_id=new_to,
1079
+ relation=edge.relation,
1080
+ weight=edge.weight,
1081
+ confidence=edge.confidence,
1082
+ source=edge.source
1083
+ )
1084
+
1085
+ # Remove the merged node (and its old edges)
1086
+ self.remove_node(node_id_remove)
1087
+ keep.mark_dirty()
1088
+ self.memory.save_node(keep.to_dict())
1089
+
1090
+ self._vector_index_dirty = True
1091
+
1092
+ return True
1093
+
1094
+ def prune_weak_edges(self, threshold: float = None) -> int:
1095
+ """Remove edges below weight threshold. Returns count removed."""
1096
+ if threshold is None:
1097
+ threshold = config.PRUNE_WEIGHT_THRESHOLD
1098
+
1099
+ to_remove = []
1100
+ for edge in self.edges.values():
1101
+ if edge.weight < threshold and edge.source == "inferred":
1102
+ to_remove.append(edge.id)
1103
+
1104
+ with self._lock:
1105
+ for edge_id in to_remove:
1106
+ self._remove_edge_internal(edge_id)
1107
+
1108
+ return len(to_remove)
1109
+
1110
+ def prune_orphan_nodes(self) -> int:
1111
+ """Remove nodes with no connections and low weight. Returns count removed."""
1112
+ to_remove = []
1113
+ for node in self.nodes.values():
1114
+ if (node.connections == 0 and
1115
+ node.weight < config.WEIGHT_MIN * 2 and
1116
+ node.source == "inferred"):
1117
+ to_remove.append(node.id)
1118
+
1119
+ with self._lock:
1120
+ for node_id in to_remove:
1121
+ if node_id in self.nodes:
1122
+ del self.nodes[node_id]
1123
+ self.memory.delete_node(node_id)
1124
+
1125
+ if to_remove:
1126
+ self._vector_index_dirty = True
1127
+
1128
+ return len(to_remove)
1129
+
1130
+ def find_redundant_pairs(self, limit: int = 20) -> List[Tuple[str, str, float]]:
1131
+ """
1132
+ Find pairs of nodes that might be redundant (very high similarity).
1133
+ Returns [(node_id_1, node_id_2, similarity), ...]
1134
+ """
1135
+ self._ensure_vector_index()
1136
+ pairs = []
1137
+
1138
+ node_list = list(self.nodes.values())
1139
+ # Sample to avoid O(n²) for large graphs
1140
+ if len(node_list) > 500:
1141
+ sample_indices = np.random.choice(len(node_list), 500, replace=False)
1142
+ node_list = [node_list[i] for i in sample_indices]
1143
+
1144
+ for i in range(len(node_list)):
1145
+ for j in range(i + 1, len(node_list)):
1146
+ n1 = node_list[i]
1147
+ n2 = node_list[j]
1148
+ if n1.type != n2.type:
1149
+ continue # Only merge same-type nodes
1150
+ sim = utils.cosine_similarity(n1.vector, n2.vector)
1151
+ if sim >= config.MERGE_THRESHOLD:
1152
+ pairs.append((n1.id, n2.id, sim))
1153
+ if len(pairs) >= limit:
1154
+ return pairs
1155
+
1156
+ return pairs
1157
+
1158
+ # ───────────────────────────────────────────────────
1159
+ # STATISTICS
1160
+ # ───────────────────────────────────────────────────
1161
+
1162
+ def _rebuild_stats(self):
1163
+ """Rebuild graph statistics."""
1164
+ total_nodes = len(self.nodes)
1165
+ total_edges = len(self.edges)
1166
+
1167
+ inferred_nodes = sum(1 for n in self.nodes.values() if n.source == "inferred")
1168
+ inferred_edges = sum(1 for e in self.edges.values() if e.source == "inferred")
1169
+
1170
+ avg_connections = 0.0
1171
+ if total_nodes > 0:
1172
+ avg_connections = sum(n.connections for n in self.nodes.values()) / total_nodes
1173
+
1174
+ avg_confidence = 0.0
1175
+ if total_edges > 0:
1176
+ avg_confidence = sum(e.confidence for e in self.edges.values()) / total_edges
1177
+
1178
+ # Max abstraction depth
1179
+ max_depth = 0
1180
+ for node in self.nodes.values():
1181
+ if node.type == "abstraction":
1182
+ depth = self._get_abstraction_depth(node.id)
1183
+ max_depth = max(max_depth, depth)
1184
+
1185
+ self._stats = {
1186
+ "total_nodes": total_nodes,
1187
+ "total_edges": total_edges,
1188
+ "total_chains": len(self.chains),
1189
+ "inferred_nodes": inferred_nodes,
1190
+ "inferred_edges": inferred_edges,
1191
+ "max_abstraction_depth": max_depth,
1192
+ "avg_connections": round(avg_connections, 2),
1193
+ "avg_confidence": round(avg_confidence, 4),
1194
+ "inference_ratio": round(
1195
+ inferred_edges / max(total_edges, 1), 4
1196
+ ),
1197
+ "avg_chain_length": round(
1198
+ sum(len(c.path) for c in self.chains.values()) / max(len(self.chains), 1), 2
1199
+ )
1200
+ }
1201
+
1202
+ def _get_abstraction_depth(self, node_id: str, visited: set = None) -> int:
1203
+ """Get the abstraction depth of a node (recursive)."""
1204
+ if visited is None:
1205
+ visited = set()
1206
+ if node_id in visited:
1207
+ return 0
1208
+ visited.add(node_id)
1209
+
1210
+ max_child_depth = 0
1211
+ for edge in self.get_edges_to(node_id):
1212
+ if edge.relation == "instance_of":
1213
+ child_depth = self._get_abstraction_depth(edge.from_node, visited)
1214
+ max_child_depth = max(max_child_depth, child_depth)
1215
+
1216
+ return max_child_depth + 1 if max_child_depth > 0 else (
1217
+ 1 if self.nodes.get(node_id, Node("", "", "")).type in ("abstraction", "meta_abstraction") else 0
1218
+ )
1219
+
1220
+ def get_stats(self) -> dict:
1221
+ """Get current graph statistics."""
1222
+ self._rebuild_stats()
1223
+ return dict(self._stats)
1224
+
1225
+ def get_intelligence_score(self) -> float:
1226
+ """Calculate and return intelligence score."""
1227
+ self._rebuild_stats()
1228
+ return utils.calculate_intelligence_score(self._stats)
1229
+
1230
+ # ───────────────────────────────────────────────────
1231
+ # SYNC
1232
+ # ───────────────────────────────────────────────────
1233
+
1234
+ def sync(self) -> Optional[dict]:
1235
+ """Flush buffered changes to DB if needed."""
1236
+ return self.memory.flush_if_needed()
1237
+
1238
+ def force_sync(self) -> dict:
1239
+ """Force flush all buffered changes to DB."""
1240
+ return self.memory.flush()
1241
+
1242
+ # ───────────────────────────────────────────────────
1243
+ # DEBUG / INSPECTION
1244
+ # ──────────────��────────────────────────────────────
1245
+
1246
+ def describe_node(self, node_id: str) -> Optional[dict]:
1247
+ """Get detailed description of a node and its connections."""
1248
+ node = self.nodes.get(node_id)
1249
+ if not node:
1250
+ return None
1251
+
1252
+ neighbors = self.get_neighbors(node_id)
1253
+
1254
+ return {
1255
+ "id": node.id,
1256
+ "type": node.type,
1257
+ "content": node.content,
1258
+ "weight": node.weight,
1259
+ "connections": node.connections,
1260
+ "source": node.source,
1261
+ "neighbors": [
1262
+ {
1263
+ "node_id": n.id,
1264
+ "content": utils.truncate_text(n.content, 80),
1265
+ "relation": e.relation,
1266
+ "edge_weight": e.weight,
1267
+ "edge_confidence": e.confidence
1268
+ }
1269
+ for n, e in neighbors
1270
+ ]
1271
+ }