Spaces:
Sleeping
Sleeping
Create thinker.py
Browse files- thinker.py +1565 -0
thinker.py
ADDED
|
@@ -0,0 +1,1565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Cogni-Engine v1 — 24/7 Thinking Loop
|
| 3 |
+
The autonomous cognitive process that never stops.
|
| 4 |
+
8 phases of continuous reasoning that make the AI smarter over time.
|
| 5 |
+
|
| 6 |
+
Phases:
|
| 7 |
+
1. INGEST — Scan /data/ folder, parse new JSONL files
|
| 8 |
+
2. CONNECT — Find hidden connections between nodes via similarity
|
| 9 |
+
3. INFER — Transitive & analogical inference to discover new knowledge
|
| 10 |
+
4. ABSTRACT — Cluster similar nodes into higher-level abstractions
|
| 11 |
+
5. STRENGTHEN/WEAKEN — Reinforce used edges, decay unused ones
|
| 12 |
+
6. COMPRESS — Merge redundant nodes, prune dead edges
|
| 13 |
+
7. VALIDATE — Check logical consistency, resolve contradictions
|
| 14 |
+
8. SELF-QUESTION — Generate and answer internal questions to find gaps
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import os
|
| 18 |
+
import json
|
| 19 |
+
import time
|
| 20 |
+
import threading
|
| 21 |
+
import traceback
|
| 22 |
+
from typing import List, Dict, Optional, Tuple
|
| 23 |
+
|
| 24 |
+
import numpy as np
|
| 25 |
+
|
| 26 |
+
import config
|
| 27 |
+
import utils
|
| 28 |
+
from knowledge import KnowledgeGraph, Node, Edge, ReasoningChain
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ═══════════════════════════════════════════════════════════
|
| 32 |
+
# THINKER ENGINE
|
| 33 |
+
# ═══════════════════════════════════════════════════════════
|
| 34 |
+
|
| 35 |
+
class Thinker:
|
| 36 |
+
"""
|
| 37 |
+
Autonomous thinking engine.
|
| 38 |
+
Runs in a background thread, continuously processing
|
| 39 |
+
and enriching the knowledge graph.
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
def __init__(self, graph: KnowledgeGraph):
|
| 43 |
+
self.graph = graph
|
| 44 |
+
self._thread: Optional[threading.Thread] = None
|
| 45 |
+
self._running = False
|
| 46 |
+
self._paused = False
|
| 47 |
+
|
| 48 |
+
# Thinking state
|
| 49 |
+
self._cycle_count = 0
|
| 50 |
+
self._total_cycles = 0
|
| 51 |
+
self._current_phase = "init"
|
| 52 |
+
self._phase_index = 0
|
| 53 |
+
self._interval = config.THINKING_INTERVAL_FAST
|
| 54 |
+
self._operations_this_cycle = 0
|
| 55 |
+
|
| 56 |
+
# File tracking
|
| 57 |
+
self._file_checksums: Dict[str, str] = {}
|
| 58 |
+
|
| 59 |
+
# Phase definitions (ordered)
|
| 60 |
+
self._phases = [
|
| 61 |
+
("ingest", self._phase_ingest),
|
| 62 |
+
("connect", self._phase_connect),
|
| 63 |
+
("infer", self._phase_infer),
|
| 64 |
+
("abstract", self._phase_abstract),
|
| 65 |
+
("strengthen", self._phase_strengthen_weaken),
|
| 66 |
+
("compress", self._phase_compress),
|
| 67 |
+
("validate", self._phase_validate),
|
| 68 |
+
("self_question", self._phase_self_question),
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
# Metrics
|
| 72 |
+
self._metrics = {
|
| 73 |
+
"nodes_ingested": 0,
|
| 74 |
+
"edges_ingested": 0,
|
| 75 |
+
"connections_found": 0,
|
| 76 |
+
"inferences_made": 0,
|
| 77 |
+
"abstractions_created": 0,
|
| 78 |
+
"edges_reinforced": 0,
|
| 79 |
+
"edges_decayed": 0,
|
| 80 |
+
"nodes_merged": 0,
|
| 81 |
+
"edges_pruned": 0,
|
| 82 |
+
"nodes_pruned": 0,
|
| 83 |
+
"contradictions_resolved": 0,
|
| 84 |
+
"self_questions_asked": 0,
|
| 85 |
+
"self_questions_answered": 0,
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# ───────────────────────────────────────────────────
|
| 89 |
+
# LIFECYCLE
|
| 90 |
+
# ───────────────────────────────────────────────────
|
| 91 |
+
|
| 92 |
+
def start(self):
|
| 93 |
+
"""Start the thinking loop in a background thread."""
|
| 94 |
+
if self._running:
|
| 95 |
+
print("[THINKER] Already running.")
|
| 96 |
+
return
|
| 97 |
+
|
| 98 |
+
# Load previous state
|
| 99 |
+
self._load_state()
|
| 100 |
+
|
| 101 |
+
self._running = True
|
| 102 |
+
self._thread = threading.Thread(
|
| 103 |
+
target=self._thinking_loop,
|
| 104 |
+
name="CogniThinker",
|
| 105 |
+
daemon=True
|
| 106 |
+
)
|
| 107 |
+
self._thread.start()
|
| 108 |
+
print(f"[THINKER] Started. Resuming from cycle {self._total_cycles}.")
|
| 109 |
+
|
| 110 |
+
def stop(self):
|
| 111 |
+
"""Stop the thinking loop gracefully."""
|
| 112 |
+
if not self._running:
|
| 113 |
+
return
|
| 114 |
+
print("[THINKER] Stopping...")
|
| 115 |
+
self._running = False
|
| 116 |
+
if self._thread:
|
| 117 |
+
self._thread.join(timeout=30)
|
| 118 |
+
self._save_state()
|
| 119 |
+
print("[THINKER] Stopped.")
|
| 120 |
+
|
| 121 |
+
def pause(self):
|
| 122 |
+
"""Pause thinking (for heavy API load)."""
|
| 123 |
+
self._paused = True
|
| 124 |
+
|
| 125 |
+
def resume(self):
|
| 126 |
+
"""Resume thinking."""
|
| 127 |
+
self._paused = False
|
| 128 |
+
|
| 129 |
+
@property
|
| 130 |
+
def is_running(self) -> bool:
|
| 131 |
+
return self._running
|
| 132 |
+
|
| 133 |
+
@property
|
| 134 |
+
def current_phase(self) -> str:
|
| 135 |
+
return self._current_phase
|
| 136 |
+
|
| 137 |
+
@property
|
| 138 |
+
def total_cycles(self) -> int:
|
| 139 |
+
return self._total_cycles
|
| 140 |
+
|
| 141 |
+
@property
|
| 142 |
+
def metrics(self) -> dict:
|
| 143 |
+
return dict(self._metrics)
|
| 144 |
+
|
| 145 |
+
def get_status(self) -> dict:
|
| 146 |
+
"""Get detailed thinker status."""
|
| 147 |
+
return {
|
| 148 |
+
"running": self._running,
|
| 149 |
+
"paused": self._paused,
|
| 150 |
+
"current_phase": self._current_phase,
|
| 151 |
+
"cycle_count": self._cycle_count,
|
| 152 |
+
"total_cycles": self._total_cycles,
|
| 153 |
+
"interval_seconds": self._interval,
|
| 154 |
+
"operations_last_cycle": self._operations_this_cycle,
|
| 155 |
+
"metrics": dict(self._metrics)
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
# ───────────────────────────────────────────────────
|
| 159 |
+
# STATE PERSISTENCE
|
| 160 |
+
# ───────────────────────────────────────────────────
|
| 161 |
+
|
| 162 |
+
def _load_state(self):
|
| 163 |
+
"""Load thinking state from DB."""
|
| 164 |
+
state = self.graph.memory.load_thinking_state()
|
| 165 |
+
self._total_cycles = state.get("total_cycles", 0)
|
| 166 |
+
self._cycle_count = state.get("current_cycle", 0)
|
| 167 |
+
self._current_phase = state.get("phase", "init")
|
| 168 |
+
|
| 169 |
+
saved_metrics = state.get("metrics", {})
|
| 170 |
+
if saved_metrics:
|
| 171 |
+
for key in self._metrics:
|
| 172 |
+
if key in saved_metrics:
|
| 173 |
+
self._metrics[key] = saved_metrics[key]
|
| 174 |
+
|
| 175 |
+
# Load file checksums
|
| 176 |
+
self._file_checksums = self.graph.memory.load_file_checksums()
|
| 177 |
+
|
| 178 |
+
def _save_state(self):
|
| 179 |
+
"""Save thinking state to DB."""
|
| 180 |
+
self.graph.memory.save_thinking_state({
|
| 181 |
+
"current_cycle": self._cycle_count,
|
| 182 |
+
"total_cycles": self._total_cycles,
|
| 183 |
+
"cursor_position": "",
|
| 184 |
+
"phase": self._current_phase,
|
| 185 |
+
"metrics": dict(self._metrics)
|
| 186 |
+
})
|
| 187 |
+
|
| 188 |
+
# ───────────────────────────────────────────────────
|
| 189 |
+
# MAIN LOOP
|
| 190 |
+
# ───────────────────────────────────────────────────
|
| 191 |
+
|
| 192 |
+
def _thinking_loop(self):
|
| 193 |
+
"""
|
| 194 |
+
Main thinking loop. Runs continuously until stopped.
|
| 195 |
+
Cycles through all 8 phases, adapting speed based on activity.
|
| 196 |
+
"""
|
| 197 |
+
print("[THINKER] Thinking loop started.")
|
| 198 |
+
|
| 199 |
+
while self._running:
|
| 200 |
+
try:
|
| 201 |
+
# Wait if paused
|
| 202 |
+
if self._paused:
|
| 203 |
+
time.sleep(1)
|
| 204 |
+
continue
|
| 205 |
+
|
| 206 |
+
# Execute current phase
|
| 207 |
+
self._operations_this_cycle = 0
|
| 208 |
+
phase_name, phase_func = self._phases[self._phase_index]
|
| 209 |
+
self._current_phase = phase_name
|
| 210 |
+
|
| 211 |
+
try:
|
| 212 |
+
phase_func()
|
| 213 |
+
except Exception as e:
|
| 214 |
+
print(f"[THINKER] Error in phase '{phase_name}': {e}")
|
| 215 |
+
if config.LOG_THINKING_DETAILS:
|
| 216 |
+
traceback.print_exc()
|
| 217 |
+
|
| 218 |
+
# Advance to next phase
|
| 219 |
+
self._phase_index = (self._phase_index + 1) % len(self._phases)
|
| 220 |
+
|
| 221 |
+
# Count cycles (one full rotation = 1 cycle)
|
| 222 |
+
if self._phase_index == 0:
|
| 223 |
+
self._cycle_count += 1
|
| 224 |
+
self._total_cycles += 1
|
| 225 |
+
|
| 226 |
+
# Periodic state save
|
| 227 |
+
if self._total_cycles % config.SYNC_INTERVAL_CYCLES == 0:
|
| 228 |
+
self._save_state()
|
| 229 |
+
self.graph.sync()
|
| 230 |
+
|
| 231 |
+
# Adaptive speed
|
| 232 |
+
self._adapt_speed()
|
| 233 |
+
|
| 234 |
+
# Log progress periodically
|
| 235 |
+
if self._total_cycles % 100 == 0 and config.LOG_THINKING_DETAILS:
|
| 236 |
+
stats = self.graph.get_stats()
|
| 237 |
+
score = self.graph.get_intelligence_score()
|
| 238 |
+
print(
|
| 239 |
+
f"[THINKER] Cycle {self._total_cycles}: "
|
| 240 |
+
f"nodes={stats['total_nodes']}, "
|
| 241 |
+
f"edges={stats['total_edges']}, "
|
| 242 |
+
f"inferred={stats['inferred_edges']}, "
|
| 243 |
+
f"score={score:.2f}"
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
# Sleep between phases
|
| 247 |
+
time.sleep(self._interval / len(self._phases))
|
| 248 |
+
|
| 249 |
+
except Exception as e:
|
| 250 |
+
print(f"[THINKER] Loop error: {e}")
|
| 251 |
+
traceback.print_exc()
|
| 252 |
+
time.sleep(5) # Recovery pause
|
| 253 |
+
|
| 254 |
+
print("[THINKER] Thinking loop ended.")
|
| 255 |
+
|
| 256 |
+
def _adapt_speed(self):
|
| 257 |
+
"""Adjust thinking speed based on activity."""
|
| 258 |
+
if self._operations_this_cycle > config.THINKING_STABILITY_THRESHOLD:
|
| 259 |
+
# Active: think faster
|
| 260 |
+
self._interval = max(
|
| 261 |
+
config.THINKING_INTERVAL_FAST,
|
| 262 |
+
self._interval * 0.9
|
| 263 |
+
)
|
| 264 |
+
else:
|
| 265 |
+
# Stable: think slower
|
| 266 |
+
self._interval = min(
|
| 267 |
+
config.THINKING_INTERVAL_SLOW,
|
| 268 |
+
self._interval * 1.1
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
# ═══════════════════════════════════════════════════
|
| 272 |
+
# PHASE 1: INGEST
|
| 273 |
+
# ═══════════════════════════════════════════════════
|
| 274 |
+
|
| 275 |
+
def _phase_ingest(self):
|
| 276 |
+
"""
|
| 277 |
+
Scan /data/ folder for new or changed JSONL files.
|
| 278 |
+
Parse entries and create nodes + edges in graph.
|
| 279 |
+
"""
|
| 280 |
+
if not os.path.exists(config.DATA_DIR):
|
| 281 |
+
return
|
| 282 |
+
|
| 283 |
+
files = []
|
| 284 |
+
for fname in os.listdir(config.DATA_DIR):
|
| 285 |
+
if any(fname.endswith(ext) for ext in config.SUPPORTED_DATA_EXTENSIONS):
|
| 286 |
+
files.append(fname)
|
| 287 |
+
|
| 288 |
+
if not files:
|
| 289 |
+
return
|
| 290 |
+
|
| 291 |
+
for fname in files:
|
| 292 |
+
filepath = os.path.join(config.DATA_DIR, fname)
|
| 293 |
+
|
| 294 |
+
try:
|
| 295 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
| 296 |
+
content = f.read()
|
| 297 |
+
except Exception as e:
|
| 298 |
+
print(f"[THINKER/INGEST] Error reading {fname}: {e}")
|
| 299 |
+
continue
|
| 300 |
+
|
| 301 |
+
# Check if file has changed
|
| 302 |
+
checksum = utils.hash_file_content(content)
|
| 303 |
+
if self._file_checksums.get(fname) == checksum:
|
| 304 |
+
continue # File unchanged, skip
|
| 305 |
+
|
| 306 |
+
print(f"[THINKER/INGEST] Processing file: {fname}")
|
| 307 |
+
|
| 308 |
+
lines = content.strip().split('\n')
|
| 309 |
+
processed = 0
|
| 310 |
+
|
| 311 |
+
for line_num, line in enumerate(lines):
|
| 312 |
+
if processed >= config.MAX_LINES_PER_INGEST:
|
| 313 |
+
break
|
| 314 |
+
|
| 315 |
+
line = line.strip()
|
| 316 |
+
if not line:
|
| 317 |
+
continue
|
| 318 |
+
|
| 319 |
+
try:
|
| 320 |
+
entry = json.loads(line)
|
| 321 |
+
except json.JSONDecodeError:
|
| 322 |
+
continue
|
| 323 |
+
|
| 324 |
+
self._ingest_entry(entry, source=fname)
|
| 325 |
+
processed += 1
|
| 326 |
+
|
| 327 |
+
# Mark file as processed
|
| 328 |
+
self._file_checksums[fname] = checksum
|
| 329 |
+
self.graph.memory.save_file_checksum(fname, checksum, processed)
|
| 330 |
+
self._operations_this_cycle += processed
|
| 331 |
+
|
| 332 |
+
print(f"[THINKER/INGEST] Processed {processed} entries from {fname}")
|
| 333 |
+
|
| 334 |
+
def _ingest_entry(self, entry: dict, source: str = "data"):
|
| 335 |
+
"""
|
| 336 |
+
Ingest a single data entry into the knowledge graph.
|
| 337 |
+
Creates nodes and edges based on entry type and fields.
|
| 338 |
+
"""
|
| 339 |
+
entry_type = entry.get("type", "fact")
|
| 340 |
+
content = entry.get("content", "").strip()
|
| 341 |
+
|
| 342 |
+
if not content:
|
| 343 |
+
return
|
| 344 |
+
|
| 345 |
+
tags = entry.get("tags", [])
|
| 346 |
+
confidence = entry.get("confidence", config.DATA_KNOWLEDGE_CONFIDENCE)
|
| 347 |
+
domain = entry.get("domain", "")
|
| 348 |
+
related = entry.get("related", [])
|
| 349 |
+
|
| 350 |
+
# ── Create main content node ──
|
| 351 |
+
main_node = self.graph.add_node(
|
| 352 |
+
content=content,
|
| 353 |
+
node_type=self._map_entry_type_to_node_type(entry_type),
|
| 354 |
+
source="data",
|
| 355 |
+
weight=confidence,
|
| 356 |
+
tags=tags
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
if not main_node:
|
| 360 |
+
return
|
| 361 |
+
|
| 362 |
+
self._metrics["nodes_ingested"] += 1
|
| 363 |
+
|
| 364 |
+
# ── Handle domain as a concept node ──
|
| 365 |
+
if domain:
|
| 366 |
+
domain_node = self.graph.add_node(
|
| 367 |
+
content=domain,
|
| 368 |
+
node_type="concept",
|
| 369 |
+
source="data",
|
| 370 |
+
weight=0.8
|
| 371 |
+
)
|
| 372 |
+
if domain_node:
|
| 373 |
+
self.graph.add_edge(
|
| 374 |
+
from_id=main_node.id,
|
| 375 |
+
to_id=domain_node.id,
|
| 376 |
+
relation="part_of",
|
| 377 |
+
confidence=0.8,
|
| 378 |
+
source="data"
|
| 379 |
+
)
|
| 380 |
+
self._metrics["edges_ingested"] += 1
|
| 381 |
+
|
| 382 |
+
# ── Handle related topics ──
|
| 383 |
+
for rel_topic in related:
|
| 384 |
+
rel_node = self.graph.add_node(
|
| 385 |
+
content=rel_topic,
|
| 386 |
+
node_type="concept",
|
| 387 |
+
source="data",
|
| 388 |
+
weight=0.7
|
| 389 |
+
)
|
| 390 |
+
if rel_node:
|
| 391 |
+
self.graph.add_edge(
|
| 392 |
+
from_id=main_node.id,
|
| 393 |
+
to_id=rel_node.id,
|
| 394 |
+
relation="related_to",
|
| 395 |
+
confidence=0.7,
|
| 396 |
+
source="data"
|
| 397 |
+
)
|
| 398 |
+
self._metrics["edges_ingested"] += 1
|
| 399 |
+
|
| 400 |
+
# ── Type-specific handling ──
|
| 401 |
+
self._ingest_type_specific(entry, main_node, entry_type)
|
| 402 |
+
|
| 403 |
+
def _ingest_type_specific(self, entry: dict, main_node: Node, entry_type: str):
|
| 404 |
+
"""Handle type-specific fields for data entries."""
|
| 405 |
+
|
| 406 |
+
# ── relation type: explicit from/to ──
|
| 407 |
+
if entry_type == "relation":
|
| 408 |
+
from_content = entry.get("from", "")
|
| 409 |
+
to_content = entry.get("to", "")
|
| 410 |
+
relation = entry.get("relation", "related_to")
|
| 411 |
+
|
| 412 |
+
if from_content and to_content:
|
| 413 |
+
from_node = self.graph.add_node(
|
| 414 |
+
content=from_content, node_type="entity", source="data"
|
| 415 |
+
)
|
| 416 |
+
to_node = self.graph.add_node(
|
| 417 |
+
content=to_content, node_type="entity", source="data"
|
| 418 |
+
)
|
| 419 |
+
if from_node and to_node:
|
| 420 |
+
self.graph.add_edge(
|
| 421 |
+
from_id=from_node.id,
|
| 422 |
+
to_id=to_node.id,
|
| 423 |
+
relation=relation,
|
| 424 |
+
confidence=entry.get("confidence", 0.9),
|
| 425 |
+
source="data"
|
| 426 |
+
)
|
| 427 |
+
self._metrics["edges_ingested"] += 1
|
| 428 |
+
|
| 429 |
+
# ── definition / term: term node + defined_as edge ──
|
| 430 |
+
elif entry_type in ("definition", "term"):
|
| 431 |
+
term = entry.get("term", "")
|
| 432 |
+
if term:
|
| 433 |
+
term_node = self.graph.add_node(
|
| 434 |
+
content=term, node_type="entity", source="data"
|
| 435 |
+
)
|
| 436 |
+
if term_node:
|
| 437 |
+
self.graph.add_edge(
|
| 438 |
+
from_id=term_node.id,
|
| 439 |
+
to_id=main_node.id,
|
| 440 |
+
relation="defined_as",
|
| 441 |
+
confidence=0.95,
|
| 442 |
+
source="data"
|
| 443 |
+
)
|
| 444 |
+
self._metrics["edges_ingested"] += 1
|
| 445 |
+
|
| 446 |
+
# ── cause_effect: cause → effect ──
|
| 447 |
+
elif entry_type == "cause_effect":
|
| 448 |
+
cause = entry.get("cause", "")
|
| 449 |
+
effect = entry.get("effect", "")
|
| 450 |
+
if cause and effect:
|
| 451 |
+
cause_node = self.graph.add_node(
|
| 452 |
+
content=cause, node_type="concept", source="data"
|
| 453 |
+
)
|
| 454 |
+
effect_node = self.graph.add_node(
|
| 455 |
+
content=effect, node_type="concept", source="data"
|
| 456 |
+
)
|
| 457 |
+
if cause_node and effect_node:
|
| 458 |
+
self.graph.add_edge(
|
| 459 |
+
from_id=cause_node.id,
|
| 460 |
+
to_id=effect_node.id,
|
| 461 |
+
relation="causes",
|
| 462 |
+
confidence=entry.get("confidence", 0.85),
|
| 463 |
+
source="data"
|
| 464 |
+
)
|
| 465 |
+
self._metrics["edges_ingested"] += 1
|
| 466 |
+
|
| 467 |
+
# ── hierarchy: parent → children ──
|
| 468 |
+
elif entry_type == "hierarchy":
|
| 469 |
+
parent = entry.get("parent", "")
|
| 470 |
+
children = entry.get("children", [])
|
| 471 |
+
if parent and children:
|
| 472 |
+
parent_node = self.graph.add_node(
|
| 473 |
+
content=parent, node_type="concept", source="data"
|
| 474 |
+
)
|
| 475 |
+
if parent_node:
|
| 476 |
+
for child in children:
|
| 477 |
+
child_node = self.graph.add_node(
|
| 478 |
+
content=child, node_type="entity", source="data"
|
| 479 |
+
)
|
| 480 |
+
if child_node:
|
| 481 |
+
self.graph.add_edge(
|
| 482 |
+
from_id=child_node.id,
|
| 483 |
+
to_id=parent_node.id,
|
| 484 |
+
relation="is_a",
|
| 485 |
+
confidence=0.9,
|
| 486 |
+
source="data"
|
| 487 |
+
)
|
| 488 |
+
self._metrics["edges_ingested"] += 1
|
| 489 |
+
|
| 490 |
+
# ── comparison: subject_a ↔ subject_b ──
|
| 491 |
+
elif entry_type == "comparison":
|
| 492 |
+
subj_a = entry.get("subject_a", "")
|
| 493 |
+
subj_b = entry.get("subject_b", "")
|
| 494 |
+
if subj_a and subj_b:
|
| 495 |
+
node_a = self.graph.add_node(
|
| 496 |
+
content=subj_a, node_type="entity", source="data"
|
| 497 |
+
)
|
| 498 |
+
node_b = self.graph.add_node(
|
| 499 |
+
content=subj_b, node_type="entity", source="data"
|
| 500 |
+
)
|
| 501 |
+
if node_a and node_b:
|
| 502 |
+
self.graph.add_edge(
|
| 503 |
+
from_id=node_a.id, to_id=node_b.id,
|
| 504 |
+
relation="related_to", confidence=0.8, source="data"
|
| 505 |
+
)
|
| 506 |
+
self.graph.add_edge(
|
| 507 |
+
from_id=node_b.id, to_id=node_a.id,
|
| 508 |
+
relation="related_to", confidence=0.8, source="data"
|
| 509 |
+
)
|
| 510 |
+
self._metrics["edges_ingested"] += 2
|
| 511 |
+
|
| 512 |
+
# ── qa: question → answer ──
|
| 513 |
+
elif entry_type == "qa":
|
| 514 |
+
question = entry.get("question", "")
|
| 515 |
+
answer = entry.get("answer", "")
|
| 516 |
+
if question and answer:
|
| 517 |
+
q_node = self.graph.add_node(
|
| 518 |
+
content=question, node_type="concept", source="data"
|
| 519 |
+
)
|
| 520 |
+
a_node = self.graph.add_node(
|
| 521 |
+
content=answer, node_type="fact", source="data"
|
| 522 |
+
)
|
| 523 |
+
if q_node and a_node:
|
| 524 |
+
self.graph.add_edge(
|
| 525 |
+
from_id=q_node.id, to_id=a_node.id,
|
| 526 |
+
relation="defined_as", confidence=0.9, source="data"
|
| 527 |
+
)
|
| 528 |
+
self._metrics["edges_ingested"] += 1
|
| 529 |
+
|
| 530 |
+
# ── synonym: bidirectional synonym_of ──
|
| 531 |
+
elif entry_type == "synonym":
|
| 532 |
+
terms = entry.get("terms", [])
|
| 533 |
+
for i in range(len(terms)):
|
| 534 |
+
for j in range(i + 1, len(terms)):
|
| 535 |
+
node_i = self.graph.add_node(
|
| 536 |
+
content=terms[i], node_type="entity", source="data"
|
| 537 |
+
)
|
| 538 |
+
node_j = self.graph.add_node(
|
| 539 |
+
content=terms[j], node_type="entity", source="data"
|
| 540 |
+
)
|
| 541 |
+
if node_i and node_j:
|
| 542 |
+
self.graph.add_edge(
|
| 543 |
+
from_id=node_i.id, to_id=node_j.id,
|
| 544 |
+
relation="synonym_of", confidence=0.9, source="data"
|
| 545 |
+
)
|
| 546 |
+
self.graph.add_edge(
|
| 547 |
+
from_id=node_j.id, to_id=node_i.id,
|
| 548 |
+
relation="synonym_of", confidence=0.9, source="data"
|
| 549 |
+
)
|
| 550 |
+
self._metrics["edges_ingested"] += 2
|
| 551 |
+
|
| 552 |
+
# ── process / procedure: sequential steps ──
|
| 553 |
+
elif entry_type in ("process", "procedure"):
|
| 554 |
+
steps = entry.get("steps", [])
|
| 555 |
+
title = entry.get("title", "")
|
| 556 |
+
if title:
|
| 557 |
+
title_node = self.graph.add_node(
|
| 558 |
+
content=title, node_type="concept", source="data"
|
| 559 |
+
)
|
| 560 |
+
if title_node:
|
| 561 |
+
self.graph.add_edge(
|
| 562 |
+
from_id=main_node.id, to_id=title_node.id,
|
| 563 |
+
relation="defined_as", confidence=0.85, source="data"
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
+
prev_step_node = None
|
| 567 |
+
for step_text in steps:
|
| 568 |
+
step_node = self.graph.add_node(
|
| 569 |
+
content=step_text, node_type="fact", source="data"
|
| 570 |
+
)
|
| 571 |
+
if step_node:
|
| 572 |
+
self.graph.add_edge(
|
| 573 |
+
from_id=step_node.id, to_id=main_node.id,
|
| 574 |
+
relation="part_of", confidence=0.8, source="data"
|
| 575 |
+
)
|
| 576 |
+
if prev_step_node:
|
| 577 |
+
self.graph.add_edge(
|
| 578 |
+
from_id=prev_step_node.id, to_id=step_node.id,
|
| 579 |
+
relation="follows", confidence=0.9, source="data"
|
| 580 |
+
)
|
| 581 |
+
self._metrics["edges_ingested"] += 1
|
| 582 |
+
prev_step_node = step_node
|
| 583 |
+
self._metrics["edges_ingested"] += 1
|
| 584 |
+
|
| 585 |
+
# ── quote: author + content ──
|
| 586 |
+
elif entry_type == "quote":
|
| 587 |
+
author = entry.get("author", "")
|
| 588 |
+
if author:
|
| 589 |
+
author_node = self.graph.add_node(
|
| 590 |
+
content=author, node_type="entity", source="data"
|
| 591 |
+
)
|
| 592 |
+
if author_node:
|
| 593 |
+
self.graph.add_edge(
|
| 594 |
+
from_id=main_node.id, to_id=author_node.id,
|
| 595 |
+
relation="created_by", confidence=0.9, source="data"
|
| 596 |
+
)
|
| 597 |
+
self._metrics["edges_ingested"] += 1
|
| 598 |
+
|
| 599 |
+
# ── event: actors, location, date ──
|
| 600 |
+
elif entry_type == "event":
|
| 601 |
+
actors = entry.get("actors", [])
|
| 602 |
+
location = entry.get("location", "")
|
| 603 |
+
for actor in actors:
|
| 604 |
+
actor_node = self.graph.add_node(
|
| 605 |
+
content=actor, node_type="entity", source="data"
|
| 606 |
+
)
|
| 607 |
+
if actor_node:
|
| 608 |
+
self.graph.add_edge(
|
| 609 |
+
from_id=actor_node.id, to_id=main_node.id,
|
| 610 |
+
relation="related_to", confidence=0.85, source="data"
|
| 611 |
+
)
|
| 612 |
+
self._metrics["edges_ingested"] += 1
|
| 613 |
+
if location:
|
| 614 |
+
loc_node = self.graph.add_node(
|
| 615 |
+
content=location, node_type="entity", source="data"
|
| 616 |
+
)
|
| 617 |
+
if loc_node:
|
| 618 |
+
self.graph.add_edge(
|
| 619 |
+
from_id=main_node.id, to_id=loc_node.id,
|
| 620 |
+
relation="located_in", confidence=0.85, source="data"
|
| 621 |
+
)
|
| 622 |
+
self._metrics["edges_ingested"] += 1
|
| 623 |
+
|
| 624 |
+
# ── analogy: subject ↔ analogy ──
|
| 625 |
+
elif entry_type == "analogy":
|
| 626 |
+
subject = entry.get("subject", "")
|
| 627 |
+
analogy_text = entry.get("analogy", "")
|
| 628 |
+
if subject and analogy_text:
|
| 629 |
+
subj_node = self.graph.add_node(
|
| 630 |
+
content=subject, node_type="concept", source="data"
|
| 631 |
+
)
|
| 632 |
+
ana_node = self.graph.add_node(
|
| 633 |
+
content=analogy_text, node_type="concept", source="data"
|
| 634 |
+
)
|
| 635 |
+
if subj_node and ana_node:
|
| 636 |
+
self.graph.add_edge(
|
| 637 |
+
from_id=subj_node.id, to_id=ana_node.id,
|
| 638 |
+
relation="analogous_to", confidence=0.75, source="data"
|
| 639 |
+
)
|
| 640 |
+
self._metrics["edges_ingested"] += 1
|
| 641 |
+
|
| 642 |
+
# ── Paragraph: extract keywords as connected entities ──
|
| 643 |
+
elif entry_type == "paragraph":
|
| 644 |
+
keywords = utils.extract_keywords(entry.get("content", ""), max_keywords=10)
|
| 645 |
+
for kw in keywords:
|
| 646 |
+
kw_node = self.graph.add_node(
|
| 647 |
+
content=kw, node_type="concept", source="data",
|
| 648 |
+
weight=0.6
|
| 649 |
+
)
|
| 650 |
+
if kw_node:
|
| 651 |
+
self.graph.add_edge(
|
| 652 |
+
from_id=main_node.id, to_id=kw_node.id,
|
| 653 |
+
relation="related_to", confidence=0.6, source="data"
|
| 654 |
+
)
|
| 655 |
+
self._metrics["edges_ingested"] += 1
|
| 656 |
+
|
| 657 |
+
def _map_entry_type_to_node_type(self, entry_type: str) -> str:
|
| 658 |
+
"""Map data entry type to graph node type."""
|
| 659 |
+
type_map = {
|
| 660 |
+
"fact": "fact",
|
| 661 |
+
"definition": "definition",
|
| 662 |
+
"explanation": "fact",
|
| 663 |
+
"description": "fact",
|
| 664 |
+
"property": "fact",
|
| 665 |
+
"statistic": "fact",
|
| 666 |
+
"measurement": "fact",
|
| 667 |
+
"term": "definition",
|
| 668 |
+
"abbreviation": "definition",
|
| 669 |
+
"jargon": "definition",
|
| 670 |
+
"slang": "definition",
|
| 671 |
+
"idiom": "definition",
|
| 672 |
+
"synonym": "entity",
|
| 673 |
+
"antonym": "entity",
|
| 674 |
+
"quote": "fact",
|
| 675 |
+
"rule": "fact",
|
| 676 |
+
"example": "fact",
|
| 677 |
+
"analogy": "concept",
|
| 678 |
+
"opinion": "fact",
|
| 679 |
+
"paragraph": "fact",
|
| 680 |
+
"relation": "fact",
|
| 681 |
+
"cause_effect": "fact",
|
| 682 |
+
"comparison": "fact",
|
| 683 |
+
"hierarchy": "concept",
|
| 684 |
+
"composition": "concept",
|
| 685 |
+
"dependency": "fact",
|
| 686 |
+
"contradiction": "fact",
|
| 687 |
+
"timeline": "fact",
|
| 688 |
+
"process": "fact",
|
| 689 |
+
"procedure": "fact",
|
| 690 |
+
"event": "fact",
|
| 691 |
+
"history": "fact",
|
| 692 |
+
"change": "fact",
|
| 693 |
+
"qa": "fact",
|
| 694 |
+
}
|
| 695 |
+
if entry_type.startswith("custom_"):
|
| 696 |
+
return "fact"
|
| 697 |
+
return type_map.get(entry_type, "fact")
|
| 698 |
+
|
| 699 |
+
# ═══════════════════════════════════════════════════
|
| 700 |
+
# PHASE 2: CONNECT
|
| 701 |
+
# ═══════════════════════════════════════════════════
|
| 702 |
+
|
| 703 |
+
def _phase_connect(self):
|
| 704 |
+
"""
|
| 705 |
+
Find hidden connections between nodes.
|
| 706 |
+
Nodes with high vector similarity but no edge → create edge.
|
| 707 |
+
Focuses on least-connected nodes first.
|
| 708 |
+
"""
|
| 709 |
+
candidates = self.graph.get_least_connected_nodes(
|
| 710 |
+
limit=config.THINKING_BATCH_SIZE
|
| 711 |
+
)
|
| 712 |
+
|
| 713 |
+
connections_made = 0
|
| 714 |
+
|
| 715 |
+
for node in candidates:
|
| 716 |
+
if connections_made >= config.THINKING_BATCH_SIZE:
|
| 717 |
+
break
|
| 718 |
+
|
| 719 |
+
# Find similar nodes
|
| 720 |
+
similar = self.graph.find_similar_to_node(
|
| 721 |
+
node.id,
|
| 722 |
+
top_k=10,
|
| 723 |
+
min_similarity=config.SIMILARITY_THRESHOLD
|
| 724 |
+
)
|
| 725 |
+
|
| 726 |
+
for similar_node, similarity in similar:
|
| 727 |
+
# Skip if edge already exists (either direction)
|
| 728 |
+
if self.graph.edge_exists(node.id, similar_node.id):
|
| 729 |
+
continue
|
| 730 |
+
if self.graph.edge_exists(similar_node.id, node.id):
|
| 731 |
+
continue
|
| 732 |
+
|
| 733 |
+
# Create new connection
|
| 734 |
+
edge = self.graph.add_edge(
|
| 735 |
+
from_id=node.id,
|
| 736 |
+
to_id=similar_node.id,
|
| 737 |
+
relation="similar_to",
|
| 738 |
+
weight=similarity,
|
| 739 |
+
confidence=similarity * 0.9,
|
| 740 |
+
source="inferred"
|
| 741 |
+
)
|
| 742 |
+
|
| 743 |
+
if edge:
|
| 744 |
+
connections_made += 1
|
| 745 |
+
self._operations_this_cycle += 1
|
| 746 |
+
|
| 747 |
+
if connections_made > 0:
|
| 748 |
+
self._metrics["connections_found"] += connections_made
|
| 749 |
+
if config.LOG_THINKING_DETAILS:
|
| 750 |
+
print(f"[THINKER/CONNECT] Found {connections_made} new connections")
|
| 751 |
+
|
| 752 |
+
# ═══════════════════════════════════════════════════
|
| 753 |
+
# PHASE 3: INFER
|
| 754 |
+
# ═══════════════════════════════════════════════════
|
| 755 |
+
|
| 756 |
+
def _phase_infer(self):
|
| 757 |
+
"""
|
| 758 |
+
Transitive and analogical inference.
|
| 759 |
+
If A→B and B→C, maybe A→C.
|
| 760 |
+
Discovers knowledge not present in original data.
|
| 761 |
+
"""
|
| 762 |
+
inferences_made = 0
|
| 763 |
+
|
| 764 |
+
# ── Transitive Inference ──
|
| 765 |
+
inferences_made += self._transitive_inference()
|
| 766 |
+
|
| 767 |
+
# ── Analogical Inference ──
|
| 768 |
+
inferences_made += self._analogical_inference()
|
| 769 |
+
|
| 770 |
+
if inferences_made > 0:
|
| 771 |
+
self._metrics["inferences_made"] += inferences_made
|
| 772 |
+
self._operations_this_cycle += inferences_made
|
| 773 |
+
if config.LOG_THINKING_DETAILS:
|
| 774 |
+
print(f"[THINKER/INFER] Made {inferences_made} inferences")
|
| 775 |
+
|
| 776 |
+
def _transitive_inference(self) -> int:
|
| 777 |
+
"""
|
| 778 |
+
If A→B and B→C exist, infer A→C with decayed confidence.
|
| 779 |
+
Limited per cycle to prevent explosion.
|
| 780 |
+
"""
|
| 781 |
+
count = 0
|
| 782 |
+
|
| 783 |
+
# Sample a batch of nodes to check
|
| 784 |
+
node_ids = list(self.graph.nodes.keys())
|
| 785 |
+
if len(node_ids) > config.THINKING_BATCH_SIZE:
|
| 786 |
+
sample_indices = np.random.choice(
|
| 787 |
+
len(node_ids), config.THINKING_BATCH_SIZE, replace=False
|
| 788 |
+
)
|
| 789 |
+
node_ids = [node_ids[i] for i in sample_indices]
|
| 790 |
+
|
| 791 |
+
for node_a_id in node_ids:
|
| 792 |
+
if count >= config.MAX_INFERENCES_PER_CYCLE:
|
| 793 |
+
break
|
| 794 |
+
|
| 795 |
+
edges_ab = self.graph.get_edges_from(node_a_id)
|
| 796 |
+
|
| 797 |
+
for edge_ab in edges_ab:
|
| 798 |
+
if count >= config.MAX_INFERENCES_PER_CYCLE:
|
| 799 |
+
break
|
| 800 |
+
|
| 801 |
+
node_b_id = edge_ab.to_node
|
| 802 |
+
edges_bc = self.graph.get_edges_from(node_b_id)
|
| 803 |
+
|
| 804 |
+
for edge_bc in edges_bc:
|
| 805 |
+
node_c_id = edge_bc.to_node
|
| 806 |
+
|
| 807 |
+
# Skip self-loops and existing edges
|
| 808 |
+
if node_c_id == node_a_id:
|
| 809 |
+
continue
|
| 810 |
+
if self.graph.edge_exists(node_a_id, node_c_id):
|
| 811 |
+
continue
|
| 812 |
+
|
| 813 |
+
# Calculate inferred confidence
|
| 814 |
+
inferred_confidence = (
|
| 815 |
+
edge_ab.confidence *
|
| 816 |
+
edge_bc.confidence *
|
| 817 |
+
config.INFERENCE_DECAY
|
| 818 |
+
)
|
| 819 |
+
|
| 820 |
+
if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN:
|
| 821 |
+
continue
|
| 822 |
+
|
| 823 |
+
# Determine inferred relation
|
| 824 |
+
inferred_relation = self._infer_relation(
|
| 825 |
+
edge_ab.relation, edge_bc.relation
|
| 826 |
+
)
|
| 827 |
+
|
| 828 |
+
# Create inferred edge
|
| 829 |
+
edge = self.graph.add_edge(
|
| 830 |
+
from_id=node_a_id,
|
| 831 |
+
to_id=node_c_id,
|
| 832 |
+
relation=inferred_relation,
|
| 833 |
+
weight=inferred_confidence,
|
| 834 |
+
confidence=inferred_confidence,
|
| 835 |
+
source="inferred"
|
| 836 |
+
)
|
| 837 |
+
|
| 838 |
+
if edge:
|
| 839 |
+
count += 1
|
| 840 |
+
|
| 841 |
+
if count >= config.MAX_INFERENCES_PER_CYCLE:
|
| 842 |
+
break
|
| 843 |
+
|
| 844 |
+
return count
|
| 845 |
+
|
| 846 |
+
def _analogical_inference(self) -> int:
|
| 847 |
+
"""
|
| 848 |
+
If A relates to B like C relates to ?, find ? using vector arithmetic.
|
| 849 |
+
A - B ≈ C - ? → ? ≈ C - A + B
|
| 850 |
+
"""
|
| 851 |
+
count = 0
|
| 852 |
+
|
| 853 |
+
# Find pairs with strong, specific relations
|
| 854 |
+
strong_edges = [
|
| 855 |
+
e for e in self.graph.edges.values()
|
| 856 |
+
if e.confidence > 0.7 and e.relation not in ("similar_to", "related_to")
|
| 857 |
+
]
|
| 858 |
+
|
| 859 |
+
if len(strong_edges) < 2:
|
| 860 |
+
return 0
|
| 861 |
+
|
| 862 |
+
# Sample pairs to compare
|
| 863 |
+
sample_size = min(20, len(strong_edges))
|
| 864 |
+
sampled = np.random.choice(len(strong_edges), sample_size, replace=False)
|
| 865 |
+
|
| 866 |
+
for i in sampled:
|
| 867 |
+
if count >= config.MAX_INFERENCES_PER_CYCLE // 4:
|
| 868 |
+
break
|
| 869 |
+
|
| 870 |
+
edge = strong_edges[i]
|
| 871 |
+
node_a = self.graph.get_node(edge.from_node)
|
| 872 |
+
node_b = self.graph.get_node(edge.to_node)
|
| 873 |
+
|
| 874 |
+
if not node_a or not node_b:
|
| 875 |
+
continue
|
| 876 |
+
|
| 877 |
+
# Find nodes similar to A (potential C candidates)
|
| 878 |
+
similar_to_a = self.graph.find_similar_to_node(
|
| 879 |
+
node_a.id, top_k=5,
|
| 880 |
+
min_similarity=config.ANALOGICAL_SIMILARITY_MIN
|
| 881 |
+
)
|
| 882 |
+
|
| 883 |
+
for node_c, sim_ac in similar_to_a:
|
| 884 |
+
if node_c.id == node_b.id:
|
| 885 |
+
continue
|
| 886 |
+
|
| 887 |
+
# Vector arithmetic: ? ≈ C - A + B
|
| 888 |
+
target_vector = utils.normalize(
|
| 889 |
+
node_c.vector - node_a.vector + node_b.vector
|
| 890 |
+
)
|
| 891 |
+
|
| 892 |
+
# Find nearest to target vector
|
| 893 |
+
candidates = self.graph.find_similar_nodes(
|
| 894 |
+
target_vector, top_k=3,
|
| 895 |
+
min_similarity=config.ANALOGICAL_SIMILARITY_MIN,
|
| 896 |
+
exclude_ids={node_a.id, node_b.id, node_c.id}
|
| 897 |
+
)
|
| 898 |
+
|
| 899 |
+
for candidate_node, sim_score in candidates:
|
| 900 |
+
if self.graph.edge_exists(node_c.id, candidate_node.id, edge.relation):
|
| 901 |
+
continue
|
| 902 |
+
|
| 903 |
+
inferred_confidence = sim_ac * sim_score * config.INFERENCE_DECAY
|
| 904 |
+
|
| 905 |
+
if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN:
|
| 906 |
+
continue
|
| 907 |
+
|
| 908 |
+
new_edge = self.graph.add_edge(
|
| 909 |
+
from_id=node_c.id,
|
| 910 |
+
to_id=candidate_node.id,
|
| 911 |
+
relation=edge.relation,
|
| 912 |
+
weight=inferred_confidence,
|
| 913 |
+
confidence=inferred_confidence,
|
| 914 |
+
source="inferred"
|
| 915 |
+
)
|
| 916 |
+
|
| 917 |
+
if new_edge:
|
| 918 |
+
count += 1
|
| 919 |
+
break # One analogy per C
|
| 920 |
+
|
| 921 |
+
return count
|
| 922 |
+
|
| 923 |
+
def _infer_relation(self, rel_ab: str, rel_bc: str) -> str:
|
| 924 |
+
"""Determine relation type for transitive inference A→C from A→B→C."""
|
| 925 |
+
# Same relation → same
|
| 926 |
+
if rel_ab == rel_bc:
|
| 927 |
+
return rel_ab
|
| 928 |
+
|
| 929 |
+
# Specific known transitive patterns
|
| 930 |
+
transitive_map = {
|
| 931 |
+
("is_a", "is_a"): "is_a",
|
| 932 |
+
("part_of", "part_of"): "part_of",
|
| 933 |
+
("is_a", "has"): "has",
|
| 934 |
+
("is_a", "located_in"): "located_in",
|
| 935 |
+
("part_of", "located_in"): "located_in",
|
| 936 |
+
("is_a", "used_for"): "used_for",
|
| 937 |
+
("causes", "causes"): "causes",
|
| 938 |
+
("follows", "follows"): "follows",
|
| 939 |
+
("requires", "requires"): "requires",
|
| 940 |
+
("instance_of", "is_a"): "instance_of",
|
| 941 |
+
}
|
| 942 |
+
|
| 943 |
+
return transitive_map.get((rel_ab, rel_bc), "inferred_relation")
|
| 944 |
+
|
| 945 |
+
# ═══════════════════════════════════════════════════
|
| 946 |
+
# PHASE 4: ABSTRACT
|
| 947 |
+
# ═══════════════════════════════════════════════════
|
| 948 |
+
|
| 949 |
+
def _phase_abstract(self):
|
| 950 |
+
"""
|
| 951 |
+
Cluster similar nodes into abstraction nodes.
|
| 952 |
+
Creates higher-level concepts from concrete instances.
|
| 953 |
+
Recursive: abstractions can be abstracted further.
|
| 954 |
+
"""
|
| 955 |
+
# Skip if graph is too small
|
| 956 |
+
if len(self.graph.nodes) < config.CLUSTER_MIN_SIZE * 2:
|
| 957 |
+
return
|
| 958 |
+
|
| 959 |
+
abstractions_created = 0
|
| 960 |
+
|
| 961 |
+
# ── Level 1: Concrete → Abstraction ──
|
| 962 |
+
abstractions_created += self._create_abstractions(
|
| 963 |
+
source_types=["entity", "fact", "concept"],
|
| 964 |
+
abstraction_type="abstraction"
|
| 965 |
+
)
|
| 966 |
+
|
| 967 |
+
# ── Level 2+: Abstraction → Meta-Abstraction ──
|
| 968 |
+
if self._total_cycles % (config.COMPRESS_INTERVAL * 2) == 0:
|
| 969 |
+
existing_abstractions = self.graph.get_nodes_by_type("abstraction")
|
| 970 |
+
if len(existing_abstractions) >= config.CLUSTER_MIN_SIZE * 2:
|
| 971 |
+
abstractions_created += self._create_abstractions(
|
| 972 |
+
source_types=["abstraction"],
|
| 973 |
+
abstraction_type="meta_abstraction"
|
| 974 |
+
)
|
| 975 |
+
|
| 976 |
+
if abstractions_created > 0:
|
| 977 |
+
self._metrics["abstractions_created"] += abstractions_created
|
| 978 |
+
self._operations_this_cycle += abstractions_created
|
| 979 |
+
if config.LOG_THINKING_DETAILS:
|
| 980 |
+
print(f"[THINKER/ABSTRACT] Created {abstractions_created} abstractions")
|
| 981 |
+
|
| 982 |
+
def _create_abstractions(
|
| 983 |
+
self,
|
| 984 |
+
source_types: List[str],
|
| 985 |
+
abstraction_type: str
|
| 986 |
+
) -> int:
|
| 987 |
+
"""Create abstraction nodes from clusters of source-typed nodes."""
|
| 988 |
+
|
| 989 |
+
# Gather source nodes
|
| 990 |
+
source_nodes = []
|
| 991 |
+
for stype in source_types:
|
| 992 |
+
source_nodes.extend(self.graph.get_nodes_by_type(stype))
|
| 993 |
+
|
| 994 |
+
if len(source_nodes) < config.CLUSTER_MIN_SIZE:
|
| 995 |
+
return 0
|
| 996 |
+
|
| 997 |
+
# Build vector matrix for clustering
|
| 998 |
+
vectors = np.array(
|
| 999 |
+
[n.vector for n in source_nodes],
|
| 1000 |
+
dtype=np.float32
|
| 1001 |
+
)
|
| 1002 |
+
node_ids = [n.id for n in source_nodes]
|
| 1003 |
+
|
| 1004 |
+
# Find natural clusters
|
| 1005 |
+
clusters = utils.find_natural_clusters(
|
| 1006 |
+
vectors,
|
| 1007 |
+
similarity_threshold=config.CLUSTER_SIMILARITY_INTRA
|
| 1008 |
+
)
|
| 1009 |
+
|
| 1010 |
+
count = 0
|
| 1011 |
+
for cluster_indices in clusters:
|
| 1012 |
+
if count >= config.THINKING_BATCH_SIZE // 2:
|
| 1013 |
+
break
|
| 1014 |
+
|
| 1015 |
+
# Check if this cluster already has an abstraction
|
| 1016 |
+
member_ids = [node_ids[i] for i in cluster_indices]
|
| 1017 |
+
already_abstracted = False
|
| 1018 |
+
for mid in member_ids:
|
| 1019 |
+
for edge in self.graph.get_edges_from(mid):
|
| 1020 |
+
if edge.relation == "instance_of":
|
| 1021 |
+
already_abstracted = True
|
| 1022 |
+
break
|
| 1023 |
+
if already_abstracted:
|
| 1024 |
+
break
|
| 1025 |
+
|
| 1026 |
+
if already_abstracted:
|
| 1027 |
+
continue
|
| 1028 |
+
|
| 1029 |
+
# Compute centroid
|
| 1030 |
+
cluster_vectors = vectors[cluster_indices]
|
| 1031 |
+
centroid = utils.vector_mean(list(cluster_vectors))
|
| 1032 |
+
|
| 1033 |
+
# Generate label from common keywords
|
| 1034 |
+
all_content = " ".join(
|
| 1035 |
+
self.graph.nodes[node_ids[i]].content
|
| 1036 |
+
for i in cluster_indices
|
| 1037 |
+
if node_ids[i] in self.graph.nodes
|
| 1038 |
+
)
|
| 1039 |
+
keywords = utils.extract_keywords(all_content, max_keywords=5)
|
| 1040 |
+
label = " + ".join(keywords[:3]) if keywords else "abstract_concept"
|
| 1041 |
+
|
| 1042 |
+
# Check depth limit
|
| 1043 |
+
current_depth = 0
|
| 1044 |
+
if abstraction_type == "meta_abstraction":
|
| 1045 |
+
for i in cluster_indices:
|
| 1046 |
+
nid = node_ids[i]
|
| 1047 |
+
depth = self.graph._get_abstraction_depth(nid)
|
| 1048 |
+
current_depth = max(current_depth, depth)
|
| 1049 |
+
|
| 1050 |
+
if current_depth >= config.MAX_ABSTRACTION_DEPTH:
|
| 1051 |
+
continue
|
| 1052 |
+
|
| 1053 |
+
# Create abstraction node
|
| 1054 |
+
abs_node = self.graph.add_node(
|
| 1055 |
+
content=f"[{abstraction_type}] {label}",
|
| 1056 |
+
node_type=abstraction_type,
|
| 1057 |
+
source="inferred",
|
| 1058 |
+
vector=utils.normalize(centroid),
|
| 1059 |
+
weight=config.ABSTRACTION_MIN_CONFIDENCE
|
| 1060 |
+
)
|
| 1061 |
+
|
| 1062 |
+
if not abs_node:
|
| 1063 |
+
continue
|
| 1064 |
+
|
| 1065 |
+
# Link members to abstraction
|
| 1066 |
+
for i in cluster_indices:
|
| 1067 |
+
member_id = node_ids[i]
|
| 1068 |
+
self.graph.add_edge(
|
| 1069 |
+
from_id=member_id,
|
| 1070 |
+
to_id=abs_node.id,
|
| 1071 |
+
relation="instance_of",
|
| 1072 |
+
weight=0.8,
|
| 1073 |
+
confidence=0.8,
|
| 1074 |
+
source="inferred"
|
| 1075 |
+
)
|
| 1076 |
+
|
| 1077 |
+
count += 1
|
| 1078 |
+
|
| 1079 |
+
return count
|
| 1080 |
+
|
| 1081 |
+
# ═══════════════════════════════════════════════════
|
| 1082 |
+
# PHASE 5: STRENGTHEN / WEAKEN
|
| 1083 |
+
# ═══════════════════════════════════════════════════
|
| 1084 |
+
|
| 1085 |
+
def _phase_strengthen_weaken(self):
|
| 1086 |
+
"""
|
| 1087 |
+
Strengthen edges that are frequently used.
|
| 1088 |
+
Weaken edges that haven't been used.
|
| 1089 |
+
Nodes with more connections get slight weight boost.
|
| 1090 |
+
"""
|
| 1091 |
+
# ── Weaken unused edges (periodic) ──
|
| 1092 |
+
if self._total_cycles % config.WEIGHT_DECAY_INTERVAL_CYCLES == 0:
|
| 1093 |
+
decay_count = 0
|
| 1094 |
+
edges = list(self.graph.edges.values())
|
| 1095 |
+
|
| 1096 |
+
for edge in edges:
|
| 1097 |
+
if edge.used_count == 0 and edge.source == "inferred":
|
| 1098 |
+
self.graph.decay_edge(edge.id)
|
| 1099 |
+
decay_count += 1
|
| 1100 |
+
|
| 1101 |
+
self._metrics["edges_decayed"] += decay_count
|
| 1102 |
+
self._operations_this_cycle += decay_count
|
| 1103 |
+
|
| 1104 |
+
if config.LOG_THINKING_DETAILS and decay_count > 0:
|
| 1105 |
+
print(f"[THINKER/WEAKEN] Decayed {decay_count} unused edges")
|
| 1106 |
+
|
| 1107 |
+
# ── Boost well-connected nodes ──
|
| 1108 |
+
nodes = list(self.graph.nodes.values())
|
| 1109 |
+
sample_size = min(config.THINKING_BATCH_SIZE, len(nodes))
|
| 1110 |
+
if sample_size == 0:
|
| 1111 |
+
return
|
| 1112 |
+
|
| 1113 |
+
sampled = np.random.choice(len(nodes), sample_size, replace=False)
|
| 1114 |
+
|
| 1115 |
+
reinforced = 0
|
| 1116 |
+
for idx in sampled:
|
| 1117 |
+
node = nodes[idx]
|
| 1118 |
+
if node.connections > 3:
|
| 1119 |
+
bonus = config.NODE_WEIGHT_CONNECTION_BONUS * min(node.connections, 20)
|
| 1120 |
+
new_weight = min(node.weight + bonus, config.WEIGHT_MAX)
|
| 1121 |
+
if new_weight != node.weight:
|
| 1122 |
+
self.graph.update_node_weight(node.id, new_weight)
|
| 1123 |
+
reinforced += 1
|
| 1124 |
+
|
| 1125 |
+
self._metrics["edges_reinforced"] += reinforced
|
| 1126 |
+
self._operations_this_cycle += reinforced
|
| 1127 |
+
|
| 1128 |
+
# ═══════════════════════════════════════════════════
|
| 1129 |
+
# PHASE 6: COMPRESS
|
| 1130 |
+
# ═══════════════════════════════════════════════════
|
| 1131 |
+
|
| 1132 |
+
def _phase_compress(self):
|
| 1133 |
+
"""
|
| 1134 |
+
Merge redundant nodes.
|
| 1135 |
+
Prune dead edges.
|
| 1136 |
+
Prune orphan nodes.
|
| 1137 |
+
Keep the graph efficient and clean.
|
| 1138 |
+
"""
|
| 1139 |
+
if self._total_cycles % config.COMPRESS_INTERVAL != 0:
|
| 1140 |
+
return
|
| 1141 |
+
|
| 1142 |
+
# ── Merge redundant nodes ──
|
| 1143 |
+
redundant_pairs = self.graph.find_redundant_pairs(limit=10)
|
| 1144 |
+
merged = 0
|
| 1145 |
+
for id_keep, id_remove, similarity in redundant_pairs:
|
| 1146 |
+
if self.graph.merge_nodes(id_keep, id_remove):
|
| 1147 |
+
merged += 1
|
| 1148 |
+
|
| 1149 |
+
# ── Prune weak edges ──
|
| 1150 |
+
pruned_edges = self.graph.prune_weak_edges()
|
| 1151 |
+
|
| 1152 |
+
# ── Prune orphan nodes ──
|
| 1153 |
+
pruned_nodes = self.graph.prune_orphan_nodes()
|
| 1154 |
+
|
| 1155 |
+
self._metrics["nodes_merged"] += merged
|
| 1156 |
+
self._metrics["edges_pruned"] += pruned_edges
|
| 1157 |
+
self._metrics["nodes_pruned"] += pruned_nodes
|
| 1158 |
+
self._operations_this_cycle += merged + pruned_edges + pruned_nodes
|
| 1159 |
+
|
| 1160 |
+
total_ops = merged + pruned_edges + pruned_nodes
|
| 1161 |
+
if config.LOG_THINKING_DETAILS and total_ops > 0:
|
| 1162 |
+
print(
|
| 1163 |
+
f"[THINKER/COMPRESS] Merged {merged} nodes, "
|
| 1164 |
+
f"pruned {pruned_edges} edges, {pruned_nodes} orphan nodes"
|
| 1165 |
+
)
|
| 1166 |
+
|
| 1167 |
+
# ═══════════════════════════════════════════════════
|
| 1168 |
+
# PHASE 7: VALIDATE
|
| 1169 |
+
# ═══════════════════════════════════════════════════
|
| 1170 |
+
|
| 1171 |
+
def _phase_validate(self):
|
| 1172 |
+
"""
|
| 1173 |
+
Check logical consistency of the graph.
|
| 1174 |
+
Resolve contradictions.
|
| 1175 |
+
Detect and break circular inferences.
|
| 1176 |
+
"""
|
| 1177 |
+
if self._total_cycles % config.VALIDATE_INTERVAL != 0:
|
| 1178 |
+
return
|
| 1179 |
+
|
| 1180 |
+
resolved = 0
|
| 1181 |
+
|
| 1182 |
+
# ── Detect contradictions ──
|
| 1183 |
+
resolved += self._resolve_contradictions()
|
| 1184 |
+
|
| 1185 |
+
# ── Detect circular inferences ──
|
| 1186 |
+
resolved += self._break_circular_inferences()
|
| 1187 |
+
|
| 1188 |
+
if resolved > 0:
|
| 1189 |
+
self._metrics["contradictions_resolved"] += resolved
|
| 1190 |
+
self._operations_this_cycle += resolved
|
| 1191 |
+
if config.LOG_THINKING_DETAILS:
|
| 1192 |
+
print(f"[THINKER/VALIDATE] Resolved {resolved} issues")
|
| 1193 |
+
|
| 1194 |
+
def _resolve_contradictions(self) -> int:
|
| 1195 |
+
"""
|
| 1196 |
+
Find and resolve contradictions.
|
| 1197 |
+
If A→B (positive) and A→¬B (opposite_of) exist, keep higher confidence.
|
| 1198 |
+
"""
|
| 1199 |
+
resolved = 0
|
| 1200 |
+
|
| 1201 |
+
# Sample nodes to check
|
| 1202 |
+
node_ids = list(self.graph.nodes.keys())
|
| 1203 |
+
sample_size = min(config.THINKING_BATCH_SIZE, len(node_ids))
|
| 1204 |
+
if sample_size == 0:
|
| 1205 |
+
return 0
|
| 1206 |
+
|
| 1207 |
+
sampled = np.random.choice(len(node_ids), sample_size, replace=False)
|
| 1208 |
+
|
| 1209 |
+
for idx in sampled:
|
| 1210 |
+
node_id = node_ids[idx]
|
| 1211 |
+
edges_out = self.graph.get_edges_from(node_id)
|
| 1212 |
+
|
| 1213 |
+
# Group edges by target
|
| 1214 |
+
target_edges: Dict[str, List[Edge]] = {}
|
| 1215 |
+
for edge in edges_out:
|
| 1216 |
+
key = edge.to_node
|
| 1217 |
+
if key not in target_edges:
|
| 1218 |
+
target_edges[key] = []
|
| 1219 |
+
target_edges[key].append(edge)
|
| 1220 |
+
|
| 1221 |
+
# Check for contradictory relations to same target
|
| 1222 |
+
for target_id, edges in target_edges.items():
|
| 1223 |
+
if len(edges) < 2:
|
| 1224 |
+
continue
|
| 1225 |
+
|
| 1226 |
+
# Check for opposing relations
|
| 1227 |
+
contradictory_pairs = {
|
| 1228 |
+
("causes", "prevents"),
|
| 1229 |
+
("is_a", "opposite_of"),
|
| 1230 |
+
("synonym_of", "opposite_of"),
|
| 1231 |
+
("requires", "prevents"),
|
| 1232 |
+
}
|
| 1233 |
+
|
| 1234 |
+
for i in range(len(edges)):
|
| 1235 |
+
for j in range(i + 1, len(edges)):
|
| 1236 |
+
pair = (edges[i].relation, edges[j].relation)
|
| 1237 |
+
reverse_pair = (edges[j].relation, edges[i].relation)
|
| 1238 |
+
|
| 1239 |
+
if pair in contradictory_pairs or reverse_pair in contradictory_pairs:
|
| 1240 |
+
# Keep higher confidence, remove lower
|
| 1241 |
+
if edges[i].confidence >= edges[j].confidence:
|
| 1242 |
+
self.graph.remove_edge(edges[j].id)
|
| 1243 |
+
else:
|
| 1244 |
+
self.graph.remove_edge(edges[i].id)
|
| 1245 |
+
resolved += 1
|
| 1246 |
+
|
| 1247 |
+
return resolved
|
| 1248 |
+
|
| 1249 |
+
def _break_circular_inferences(self) -> int:
|
| 1250 |
+
"""
|
| 1251 |
+
Detect inference chains that loop back on themselves.
|
| 1252 |
+
Break the weakest link in each cycle.
|
| 1253 |
+
"""
|
| 1254 |
+
broken = 0
|
| 1255 |
+
|
| 1256 |
+
# Sample inferred edges
|
| 1257 |
+
inferred_edges = [
|
| 1258 |
+
e for e in self.graph.edges.values()
|
| 1259 |
+
if e.source == "inferred"
|
| 1260 |
+
]
|
| 1261 |
+
|
| 1262 |
+
sample_size = min(config.THINKING_BATCH_SIZE, len(inferred_edges))
|
| 1263 |
+
if sample_size == 0:
|
| 1264 |
+
return 0
|
| 1265 |
+
|
| 1266 |
+
sampled_indices = np.random.choice(
|
| 1267 |
+
len(inferred_edges), sample_size, replace=False
|
| 1268 |
+
)
|
| 1269 |
+
|
| 1270 |
+
for idx in sampled_indices:
|
| 1271 |
+
edge = inferred_edges[idx]
|
| 1272 |
+
|
| 1273 |
+
# Check if this edge creates a cycle
|
| 1274 |
+
# Simple: does a path exist from to_node back to from_node?
|
| 1275 |
+
paths = self.graph.find_paths(
|
| 1276 |
+
edge.to_node, edge.from_node,
|
| 1277 |
+
max_depth=4, max_paths=1
|
| 1278 |
+
)
|
| 1279 |
+
|
| 1280 |
+
if paths:
|
| 1281 |
+
# Cycle detected — remove weakest edge in cycle
|
| 1282 |
+
cycle_path = [edge.from_node, edge.id] + paths[0]
|
| 1283 |
+
weakest_edge_id = None
|
| 1284 |
+
weakest_weight = float('inf')
|
| 1285 |
+
|
| 1286 |
+
for item_id in cycle_path:
|
| 1287 |
+
if item_id in self.graph.edges:
|
| 1288 |
+
e = self.graph.edges[item_id]
|
| 1289 |
+
if e.weight < weakest_weight and e.source == "inferred":
|
| 1290 |
+
weakest_weight = e.weight
|
| 1291 |
+
weakest_edge_id = e.id
|
| 1292 |
+
|
| 1293 |
+
if weakest_edge_id:
|
| 1294 |
+
self.graph.remove_edge(weakest_edge_id)
|
| 1295 |
+
broken += 1
|
| 1296 |
+
|
| 1297 |
+
return broken
|
| 1298 |
+
|
| 1299 |
+
# ═══════════════════════════════════════════════════
|
| 1300 |
+
# PHASE 8: SELF-QUESTION
|
| 1301 |
+
# ════════════════════════��══════════════════════════
|
| 1302 |
+
|
| 1303 |
+
def _phase_self_question(self):
|
| 1304 |
+
"""
|
| 1305 |
+
Generate internal questions to fill knowledge gaps.
|
| 1306 |
+
Ask: "What connects X to Y?" where X and Y are distant but possibly related.
|
| 1307 |
+
If a new chain is found, save it.
|
| 1308 |
+
"""
|
| 1309 |
+
if self._total_cycles % config.SELF_QUESTION_INTERVAL != 0:
|
| 1310 |
+
return
|
| 1311 |
+
|
| 1312 |
+
if len(self.graph.nodes) < 10:
|
| 1313 |
+
return
|
| 1314 |
+
|
| 1315 |
+
questions_asked = 0
|
| 1316 |
+
questions_answered = 0
|
| 1317 |
+
|
| 1318 |
+
# Strategy 1: Find disconnected clusters and try to bridge them
|
| 1319 |
+
questions_answered += self._bridge_disconnected()
|
| 1320 |
+
questions_asked += 3
|
| 1321 |
+
|
| 1322 |
+
# Strategy 2: Explore high-weight nodes that lack certain relation types
|
| 1323 |
+
questions_answered += self._fill_relation_gaps()
|
| 1324 |
+
questions_asked += 3
|
| 1325 |
+
|
| 1326 |
+
# Strategy 3: Challenge existing weak inferences
|
| 1327 |
+
questions_answered += self._challenge_weak_inferences()
|
| 1328 |
+
questions_asked += 2
|
| 1329 |
+
|
| 1330 |
+
self._metrics["self_questions_asked"] += questions_asked
|
| 1331 |
+
self._metrics["self_questions_answered"] += questions_answered
|
| 1332 |
+
self._operations_this_cycle += questions_answered
|
| 1333 |
+
|
| 1334 |
+
if config.LOG_THINKING_DETAILS and questions_answered > 0:
|
| 1335 |
+
print(
|
| 1336 |
+
f"[THINKER/SELF-Q] Asked {questions_asked} questions, "
|
| 1337 |
+
f"answered {questions_answered}"
|
| 1338 |
+
)
|
| 1339 |
+
|
| 1340 |
+
def _bridge_disconnected(self) -> int:
|
| 1341 |
+
"""Try to find connections between disconnected subgraphs."""
|
| 1342 |
+
connected = 0
|
| 1343 |
+
|
| 1344 |
+
# Pick two random nodes that have no path between them
|
| 1345 |
+
node_ids = list(self.graph.nodes.keys())
|
| 1346 |
+
if len(node_ids) < 10:
|
| 1347 |
+
return 0
|
| 1348 |
+
|
| 1349 |
+
for _ in range(3):
|
| 1350 |
+
idx = np.random.choice(len(node_ids), 2, replace=False)
|
| 1351 |
+
id_a, id_b = node_ids[idx[0]], node_ids[idx[1]]
|
| 1352 |
+
|
| 1353 |
+
node_a = self.graph.get_node(id_a)
|
| 1354 |
+
node_b = self.graph.get_node(id_b)
|
| 1355 |
+
if not node_a or not node_b:
|
| 1356 |
+
continue
|
| 1357 |
+
|
| 1358 |
+
# Are they already connected?
|
| 1359 |
+
paths = self.graph.find_paths(id_a, id_b, max_depth=4, max_paths=1)
|
| 1360 |
+
if paths:
|
| 1361 |
+
continue
|
| 1362 |
+
|
| 1363 |
+
# Can we connect them via vector similarity?
|
| 1364 |
+
sim = utils.cosine_similarity(node_a.vector, node_b.vector)
|
| 1365 |
+
if sim > config.SIMILARITY_THRESHOLD * 0.8:
|
| 1366 |
+
# They're somewhat similar but not connected → connect
|
| 1367 |
+
edge = self.graph.add_edge(
|
| 1368 |
+
from_id=id_a,
|
| 1369 |
+
to_id=id_b,
|
| 1370 |
+
relation="inferred_relation",
|
| 1371 |
+
weight=sim * 0.7,
|
| 1372 |
+
confidence=sim * 0.6,
|
| 1373 |
+
source="inferred"
|
| 1374 |
+
)
|
| 1375 |
+
if edge:
|
| 1376 |
+
connected += 1
|
| 1377 |
+
|
| 1378 |
+
return connected
|
| 1379 |
+
|
| 1380 |
+
def _fill_relation_gaps(self) -> int:
|
| 1381 |
+
"""Find high-weight nodes missing common relations and try to fill them."""
|
| 1382 |
+
filled = 0
|
| 1383 |
+
|
| 1384 |
+
# Get well-known nodes
|
| 1385 |
+
important_nodes = sorted(
|
| 1386 |
+
self.graph.nodes.values(),
|
| 1387 |
+
key=lambda n: n.weight * n.connections,
|
| 1388 |
+
reverse=True
|
| 1389 |
+
)[:20]
|
| 1390 |
+
|
| 1391 |
+
common_relations = ["is_a", "part_of", "has", "used_for", "related_to"]
|
| 1392 |
+
|
| 1393 |
+
for node in important_nodes[:5]:
|
| 1394 |
+
existing_relations = set()
|
| 1395 |
+
for edge in self.graph.get_edges_from(node.id):
|
| 1396 |
+
existing_relations.add(edge.relation)
|
| 1397 |
+
|
| 1398 |
+
for relation in common_relations:
|
| 1399 |
+
if relation in existing_relations:
|
| 1400 |
+
continue
|
| 1401 |
+
|
| 1402 |
+
# Can we find a target for this relation via similarity?
|
| 1403 |
+
# Look for nodes that commonly have this relation
|
| 1404 |
+
candidates = self.graph.find_similar_to_node(
|
| 1405 |
+
node.id, top_k=5,
|
| 1406 |
+
min_similarity=config.SIMILARITY_THRESHOLD
|
| 1407 |
+
)
|
| 1408 |
+
|
| 1409 |
+
for candidate, sim in candidates:
|
| 1410 |
+
# Check if candidate has this relation type outgoing
|
| 1411 |
+
candidate_rels = [
|
| 1412 |
+
e.relation for e in self.graph.get_edges_from(candidate.id)
|
| 1413 |
+
]
|
| 1414 |
+
if relation in candidate_rels:
|
| 1415 |
+
# This candidate has the relation → node might too
|
| 1416 |
+
for edge in self.graph.get_edges_from(candidate.id):
|
| 1417 |
+
if edge.relation == relation:
|
| 1418 |
+
target = self.graph.get_node(edge.to_node)
|
| 1419 |
+
if target and not self.graph.edge_exists(
|
| 1420 |
+
node.id, target.id, relation
|
| 1421 |
+
):
|
| 1422 |
+
confidence = sim * edge.confidence * config.INFERENCE_DECAY
|
| 1423 |
+
if confidence >= config.INFERENCE_CONFIDENCE_MIN:
|
| 1424 |
+
new_edge = self.graph.add_edge(
|
| 1425 |
+
from_id=node.id,
|
| 1426 |
+
to_id=target.id,
|
| 1427 |
+
relation=relation,
|
| 1428 |
+
weight=confidence,
|
| 1429 |
+
confidence=confidence,
|
| 1430 |
+
source="inferred"
|
| 1431 |
+
)
|
| 1432 |
+
if new_edge:
|
| 1433 |
+
filled += 1
|
| 1434 |
+
break
|
| 1435 |
+
break # One fill per missing relation
|
| 1436 |
+
|
| 1437 |
+
if filled >= 5:
|
| 1438 |
+
break
|
| 1439 |
+
|
| 1440 |
+
return filled
|
| 1441 |
+
|
| 1442 |
+
def _challenge_weak_inferences(self) -> int:
|
| 1443 |
+
"""
|
| 1444 |
+
Re-examine weak inferred edges.
|
| 1445 |
+
If supporting evidence exists, strengthen.
|
| 1446 |
+
If contradicting evidence exists, remove.
|
| 1447 |
+
"""
|
| 1448 |
+
improved = 0
|
| 1449 |
+
|
| 1450 |
+
weak_edges = self.graph.get_weakest_edges(
|
| 1451 |
+
limit=20, source_filter="inferred"
|
| 1452 |
+
)
|
| 1453 |
+
|
| 1454 |
+
for edge in weak_edges:
|
| 1455 |
+
from_node = self.graph.get_node(edge.from_node)
|
| 1456 |
+
to_node = self.graph.get_node(edge.to_node)
|
| 1457 |
+
|
| 1458 |
+
if not from_node or not to_node:
|
| 1459 |
+
continue
|
| 1460 |
+
|
| 1461 |
+
# Check if there's additional evidence
|
| 1462 |
+
# (other paths between these nodes)
|
| 1463 |
+
paths = self.graph.find_paths(
|
| 1464 |
+
edge.from_node, edge.to_node,
|
| 1465 |
+
max_depth=4, max_paths=3
|
| 1466 |
+
)
|
| 1467 |
+
|
| 1468 |
+
# Filter paths that don't use this edge
|
| 1469 |
+
alternative_paths = [
|
| 1470 |
+
p for p in paths
|
| 1471 |
+
if edge.id not in p
|
| 1472 |
+
]
|
| 1473 |
+
|
| 1474 |
+
if alternative_paths:
|
| 1475 |
+
# Multiple paths support this edge → strengthen
|
| 1476 |
+
support_factor = 1.0 + 0.05 * len(alternative_paths)
|
| 1477 |
+
new_weight = min(
|
| 1478 |
+
edge.weight * support_factor,
|
| 1479 |
+
config.WEIGHT_MAX
|
| 1480 |
+
)
|
| 1481 |
+
self.graph.edges[edge.id].weight = new_weight
|
| 1482 |
+
self.graph.edges[edge.id].confidence = min(
|
| 1483 |
+
edge.confidence * support_factor, 1.0
|
| 1484 |
+
)
|
| 1485 |
+
self.graph.edges[edge.id].mark_dirty()
|
| 1486 |
+
self.graph.memory.save_edge(edge.to_dict())
|
| 1487 |
+
improved += 1
|
| 1488 |
+
else:
|
| 1489 |
+
# No alternative support → further weaken
|
| 1490 |
+
if edge.weight < config.PRUNE_WEIGHT_THRESHOLD * 2:
|
| 1491 |
+
self.graph.remove_edge(edge.id)
|
| 1492 |
+
improved += 1
|
| 1493 |
+
|
| 1494 |
+
return improved
|
| 1495 |
+
|
| 1496 |
+
# ═══════════════════════════════════════════════════
|
| 1497 |
+
# USER KNOWLEDGE EXTRACTION
|
| 1498 |
+
# ═══════════════════════════════════════════════════
|
| 1499 |
+
|
| 1500 |
+
def extract_from_user_message(self, message: str):
|
| 1501 |
+
"""
|
| 1502 |
+
Extract knowledge from a user's chat message.
|
| 1503 |
+
Called by brain.py after processing a user request.
|
| 1504 |
+
Does NOT store the raw message — only extracted knowledge.
|
| 1505 |
+
"""
|
| 1506 |
+
if not message or len(message.strip()) < 10:
|
| 1507 |
+
return
|
| 1508 |
+
|
| 1509 |
+
message = message.strip()
|
| 1510 |
+
|
| 1511 |
+
# Extract keywords
|
| 1512 |
+
keywords = utils.extract_keywords(message, max_keywords=15)
|
| 1513 |
+
if len(keywords) < 2:
|
| 1514 |
+
return
|
| 1515 |
+
|
| 1516 |
+
# Extract entities
|
| 1517 |
+
entities = utils.extract_entities_simple(message)
|
| 1518 |
+
|
| 1519 |
+
# Create entity nodes
|
| 1520 |
+
entity_nodes = []
|
| 1521 |
+
for entity in entities[:5]:
|
| 1522 |
+
node = self.graph.add_node(
|
| 1523 |
+
content=entity,
|
| 1524 |
+
node_type="entity",
|
| 1525 |
+
source="user_chat",
|
| 1526 |
+
weight=config.USER_KNOWLEDGE_CONFIDENCE
|
| 1527 |
+
)
|
| 1528 |
+
if node:
|
| 1529 |
+
entity_nodes.append(node)
|
| 1530 |
+
|
| 1531 |
+
# Create concept nodes from keywords not already entities
|
| 1532 |
+
entity_lower = {e.lower() for e in entities}
|
| 1533 |
+
for kw in keywords:
|
| 1534 |
+
if kw.lower() not in entity_lower:
|
| 1535 |
+
node = self.graph.add_node(
|
| 1536 |
+
content=kw,
|
| 1537 |
+
node_type="concept",
|
| 1538 |
+
source="user_chat",
|
| 1539 |
+
weight=config.USER_KNOWLEDGE_CONFIDENCE * 0.7
|
| 1540 |
+
)
|
| 1541 |
+
|
| 1542 |
+
# If message contains informational content, create fact node
|
| 1543 |
+
if len(message) > 30 and any(
|
| 1544 |
+
p in message.lower() for p in [
|
| 1545 |
+
"adalah", "merupakan", "yaitu", "ialah",
|
| 1546 |
+
"is", "are", "means", "defined"
|
| 1547 |
+
]
|
| 1548 |
+
):
|
| 1549 |
+
fact_node = self.graph.add_node(
|
| 1550 |
+
content=message[:500],
|
| 1551 |
+
node_type="fact",
|
| 1552 |
+
source="user_chat",
|
| 1553 |
+
weight=config.USER_KNOWLEDGE_CONFIDENCE
|
| 1554 |
+
)
|
| 1555 |
+
|
| 1556 |
+
# Connect fact to entities mentioned
|
| 1557 |
+
if fact_node:
|
| 1558 |
+
for en in entity_nodes:
|
| 1559 |
+
self.graph.add_edge(
|
| 1560 |
+
from_id=fact_node.id,
|
| 1561 |
+
to_id=en.id,
|
| 1562 |
+
relation="related_to",
|
| 1563 |
+
confidence=config.USER_KNOWLEDGE_CONFIDENCE * 0.8,
|
| 1564 |
+
source="user_chat"
|
| 1565 |
+
)
|