Spaces:

sadidft
/

CogniEngine

Sleeping

App Files Files Community

sadidft commited on Mar 8

Commit

f43310f

verified ·

1 Parent(s): d82945e

Create thinker.py

Browse files

Files changed (1) hide show

thinker.py +1565 -0

thinker.py ADDED Viewed

	@@ -0,0 +1,1565 @@

+"""
+Cogni-Engine v1 — 24/7 Thinking Loop
+The autonomous cognitive process that never stops.
+8 phases of continuous reasoning that make the AI smarter over time.
+Phases:
+1. INGEST   — Scan /data/ folder, parse new JSONL files
+2. CONNECT  — Find hidden connections between nodes via similarity
+3. INFER    — Transitive & analogical inference to discover new knowledge
+4. ABSTRACT — Cluster similar nodes into higher-level abstractions
+5. STRENGTHEN/WEAKEN — Reinforce used edges, decay unused ones
+6. COMPRESS — Merge redundant nodes, prune dead edges
+7. VALIDATE — Check logical consistency, resolve contradictions
+8. SELF-QUESTION — Generate and answer internal questions to find gaps
+"""
+import os
+import json
+import time
+import threading
+import traceback
+from typing import List, Dict, Optional, Tuple
+import numpy as np
+import config
+import utils
+from knowledge import KnowledgeGraph, Node, Edge, ReasoningChain
+# ═══════════════════════════════════════════════════════════
+# THINKER ENGINE
+# ═══════════════════════════════════════════════════════════
+class Thinker:
+    """
+    Autonomous thinking engine.
+    Runs in a background thread, continuously processing
+    and enriching the knowledge graph.
+    """
+    def __init__(self, graph: KnowledgeGraph):
+        self.graph = graph
+        self._thread: Optional[threading.Thread] = None
+        self._running = False
+        self._paused = False
+        # Thinking state
+        self._cycle_count = 0
+        self._total_cycles = 0
+        self._current_phase = "init"
+        self._phase_index = 0
+        self._interval = config.THINKING_INTERVAL_FAST
+        self._operations_this_cycle = 0
+        # File tracking
+        self._file_checksums: Dict[str, str] = {}
+        # Phase definitions (ordered)
+        self._phases = [
+            ("ingest", self._phase_ingest),
+            ("connect", self._phase_connect),
+            ("infer", self._phase_infer),
+            ("abstract", self._phase_abstract),
+            ("strengthen", self._phase_strengthen_weaken),
+            ("compress", self._phase_compress),
+            ("validate", self._phase_validate),
+            ("self_question", self._phase_self_question),
+        ]
+        # Metrics
+        self._metrics = {
+            "nodes_ingested": 0,
+            "edges_ingested": 0,
+            "connections_found": 0,
+            "inferences_made": 0,
+            "abstractions_created": 0,
+            "edges_reinforced": 0,
+            "edges_decayed": 0,
+            "nodes_merged": 0,
+            "edges_pruned": 0,
+            "nodes_pruned": 0,
+            "contradictions_resolved": 0,
+            "self_questions_asked": 0,
+            "self_questions_answered": 0,
+        }
+    # ───────────────────────────────────────────────────
+    # LIFECYCLE
+    # ───────────────────────────────────────────────────
+    def start(self):
+        """Start the thinking loop in a background thread."""
+        if self._running:
+            print("[THINKER] Already running.")
+            return
+        # Load previous state
+        self._load_state()
+        self._running = True
+        self._thread = threading.Thread(
+            target=self._thinking_loop,
+            name="CogniThinker",
+            daemon=True
+        )
+        self._thread.start()
+        print(f"[THINKER] Started. Resuming from cycle {self._total_cycles}.")
+    def stop(self):
+        """Stop the thinking loop gracefully."""
+        if not self._running:
+            return
+        print("[THINKER] Stopping...")
+        self._running = False
+        if self._thread:
+            self._thread.join(timeout=30)
+        self._save_state()
+        print("[THINKER] Stopped.")
+    def pause(self):
+        """Pause thinking (for heavy API load)."""
+        self._paused = True
+    def resume(self):
+        """Resume thinking."""
+        self._paused = False
+    @property
+    def is_running(self) -> bool:
+        return self._running
+    @property
+    def current_phase(self) -> str:
+        return self._current_phase
+    @property
+    def total_cycles(self) -> int:
+        return self._total_cycles
+    @property
+    def metrics(self) -> dict:
+        return dict(self._metrics)
+    def get_status(self) -> dict:
+        """Get detailed thinker status."""
+        return {
+            "running": self._running,
+            "paused": self._paused,
+            "current_phase": self._current_phase,
+            "cycle_count": self._cycle_count,
+            "total_cycles": self._total_cycles,
+            "interval_seconds": self._interval,
+            "operations_last_cycle": self._operations_this_cycle,
+            "metrics": dict(self._metrics)
+        }
+    # ───────────────────────────────────────────────────
+    # STATE PERSISTENCE
+    # ───────────────────────────────────────────────────
+    def _load_state(self):
+        """Load thinking state from DB."""
+        state = self.graph.memory.load_thinking_state()
+        self._total_cycles = state.get("total_cycles", 0)
+        self._cycle_count = state.get("current_cycle", 0)
+        self._current_phase = state.get("phase", "init")
+        saved_metrics = state.get("metrics", {})
+        if saved_metrics:
+            for key in self._metrics:
+                if key in saved_metrics:
+                    self._metrics[key] = saved_metrics[key]
+        # Load file checksums
+        self._file_checksums = self.graph.memory.load_file_checksums()
+    def _save_state(self):
+        """Save thinking state to DB."""
+        self.graph.memory.save_thinking_state({
+            "current_cycle": self._cycle_count,
+            "total_cycles": self._total_cycles,
+            "cursor_position": "",
+            "phase": self._current_phase,
+            "metrics": dict(self._metrics)
+        })
+    # ───────────────────────────────────────────────────
+    # MAIN LOOP
+    # ───────────────────────────────────────────────────
+    def _thinking_loop(self):
+        """
+        Main thinking loop. Runs continuously until stopped.
+        Cycles through all 8 phases, adapting speed based on activity.
+        """
+        print("[THINKER] Thinking loop started.")
+        while self._running:
+            try:
+                # Wait if paused
+                if self._paused:
+                    time.sleep(1)
+                    continue
+                # Execute current phase
+                self._operations_this_cycle = 0
+                phase_name, phase_func = self._phases[self._phase_index]
+                self._current_phase = phase_name
+                try:
+                    phase_func()
+                except Exception as e:
+                    print(f"[THINKER] Error in phase '{phase_name}': {e}")
+                    if config.LOG_THINKING_DETAILS:
+                        traceback.print_exc()
+                # Advance to next phase
+                self._phase_index = (self._phase_index + 1) % len(self._phases)
+                # Count cycles (one full rotation = 1 cycle)
+                if self._phase_index == 0:
+                    self._cycle_count += 1
+                    self._total_cycles += 1
+                    # Periodic state save
+                    if self._total_cycles % config.SYNC_INTERVAL_CYCLES == 0:
+                        self._save_state()
+                        self.graph.sync()
+                    # Adaptive speed
+                    self._adapt_speed()
+                    # Log progress periodically
+                    if self._total_cycles % 100 == 0 and config.LOG_THINKING_DETAILS:
+                        stats = self.graph.get_stats()
+                        score = self.graph.get_intelligence_score()
+                        print(
+                            f"[THINKER] Cycle {self._total_cycles}: "
+                            f"nodes={stats['total_nodes']}, "
+                            f"edges={stats['total_edges']}, "
+                            f"inferred={stats['inferred_edges']}, "
+                            f"score={score:.2f}"
+                        )
+                # Sleep between phases
+                time.sleep(self._interval / len(self._phases))
+            except Exception as e:
+                print(f"[THINKER] Loop error: {e}")
+                traceback.print_exc()
+                time.sleep(5)  # Recovery pause
+        print("[THINKER] Thinking loop ended.")
+    def _adapt_speed(self):
+        """Adjust thinking speed based on activity."""
+        if self._operations_this_cycle > config.THINKING_STABILITY_THRESHOLD:
+            # Active: think faster
+            self._interval = max(
+                config.THINKING_INTERVAL_FAST,
+                self._interval * 0.9
+            )
+        else:
+            # Stable: think slower
+            self._interval = min(
+                config.THINKING_INTERVAL_SLOW,
+                self._interval * 1.1
+            )
+    # ═══════════════════════════════════════════════════
+    # PHASE 1: INGEST
+    # ═══════════════════════════════════════════════════
+    def _phase_ingest(self):
+        """
+        Scan /data/ folder for new or changed JSONL files.
+        Parse entries and create nodes + edges in graph.
+        """
+        if not os.path.exists(config.DATA_DIR):
+            return
+        files = []
+        for fname in os.listdir(config.DATA_DIR):
+            if any(fname.endswith(ext) for ext in config.SUPPORTED_DATA_EXTENSIONS):
+                files.append(fname)
+        if not files:
+            return
+        for fname in files:
+            filepath = os.path.join(config.DATA_DIR, fname)
+            try:
+                with open(filepath, 'r', encoding='utf-8') as f:
+                    content = f.read()
+            except Exception as e:
+                print(f"[THINKER/INGEST] Error reading {fname}: {e}")
+                continue
+            # Check if file has changed
+            checksum = utils.hash_file_content(content)
+            if self._file_checksums.get(fname) == checksum:
+                continue  # File unchanged, skip
+            print(f"[THINKER/INGEST] Processing file: {fname}")
+            lines = content.strip().split('\n')
+            processed = 0
+            for line_num, line in enumerate(lines):
+                if processed >= config.MAX_LINES_PER_INGEST:
+                    break
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    entry = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                self._ingest_entry(entry, source=fname)
+                processed += 1
+            # Mark file as processed
+            self._file_checksums[fname] = checksum
+            self.graph.memory.save_file_checksum(fname, checksum, processed)
+            self._operations_this_cycle += processed
+            print(f"[THINKER/INGEST] Processed {processed} entries from {fname}")
+    def _ingest_entry(self, entry: dict, source: str = "data"):
+        """
+        Ingest a single data entry into the knowledge graph.
+        Creates nodes and edges based on entry type and fields.
+        """
+        entry_type = entry.get("type", "fact")
+        content = entry.get("content", "").strip()
+        if not content:
+            return
+        tags = entry.get("tags", [])
+        confidence = entry.get("confidence", config.DATA_KNOWLEDGE_CONFIDENCE)
+        domain = entry.get("domain", "")
+        related = entry.get("related", [])
+        # ── Create main content node ──
+        main_node = self.graph.add_node(
+            content=content,
+            node_type=self._map_entry_type_to_node_type(entry_type),
+            source="data",
+            weight=confidence,
+            tags=tags
+        )
+        if not main_node:
+            return
+        self._metrics["nodes_ingested"] += 1
+        # ── Handle domain as a concept node ──
+        if domain:
+            domain_node = self.graph.add_node(
+                content=domain,
+                node_type="concept",
+                source="data",
+                weight=0.8
+            )
+            if domain_node:
+                self.graph.add_edge(
+                    from_id=main_node.id,
+                    to_id=domain_node.id,
+                    relation="part_of",
+                    confidence=0.8,
+                    source="data"
+                )
+                self._metrics["edges_ingested"] += 1
+        # ── Handle related topics ──
+        for rel_topic in related:
+            rel_node = self.graph.add_node(
+                content=rel_topic,
+                node_type="concept",
+                source="data",
+                weight=0.7
+            )
+            if rel_node:
+                self.graph.add_edge(
+                    from_id=main_node.id,
+                    to_id=rel_node.id,
+                    relation="related_to",
+                    confidence=0.7,
+                    source="data"
+                )
+                self._metrics["edges_ingested"] += 1
+        # ── Type-specific handling ──
+        self._ingest_type_specific(entry, main_node, entry_type)
+    def _ingest_type_specific(self, entry: dict, main_node: Node, entry_type: str):
+        """Handle type-specific fields for data entries."""
+        # ── relation type: explicit from/to ──
+        if entry_type == "relation":
+            from_content = entry.get("from", "")
+            to_content = entry.get("to", "")
+            relation = entry.get("relation", "related_to")
+            if from_content and to_content:
+                from_node = self.graph.add_node(
+                    content=from_content, node_type="entity", source="data"
+                )
+                to_node = self.graph.add_node(
+                    content=to_content, node_type="entity", source="data"
+                )
+                if from_node and to_node:
+                    self.graph.add_edge(
+                        from_id=from_node.id,
+                        to_id=to_node.id,
+                        relation=relation,
+                        confidence=entry.get("confidence", 0.9),
+                        source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+        # ── definition / term: term node + defined_as edge ──
+        elif entry_type in ("definition", "term"):
+            term = entry.get("term", "")
+            if term:
+                term_node = self.graph.add_node(
+                    content=term, node_type="entity", source="data"
+                )
+                if term_node:
+                    self.graph.add_edge(
+                        from_id=term_node.id,
+                        to_id=main_node.id,
+                        relation="defined_as",
+                        confidence=0.95,
+                        source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+        # ── cause_effect: cause → effect ──
+        elif entry_type == "cause_effect":
+            cause = entry.get("cause", "")
+            effect = entry.get("effect", "")
+            if cause and effect:
+                cause_node = self.graph.add_node(
+                    content=cause, node_type="concept", source="data"
+                )
+                effect_node = self.graph.add_node(
+                    content=effect, node_type="concept", source="data"
+                )
+                if cause_node and effect_node:
+                    self.graph.add_edge(
+                        from_id=cause_node.id,
+                        to_id=effect_node.id,
+                        relation="causes",
+                        confidence=entry.get("confidence", 0.85),
+                        source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+        # ── hierarchy: parent → children ──
+        elif entry_type == "hierarchy":
+            parent = entry.get("parent", "")
+            children = entry.get("children", [])
+            if parent and children:
+                parent_node = self.graph.add_node(
+                    content=parent, node_type="concept", source="data"
+                )
+                if parent_node:
+                    for child in children:
+                        child_node = self.graph.add_node(
+                            content=child, node_type="entity", source="data"
+                        )
+                        if child_node:
+                            self.graph.add_edge(
+                                from_id=child_node.id,
+                                to_id=parent_node.id,
+                                relation="is_a",
+                                confidence=0.9,
+                                source="data"
+                            )
+                            self._metrics["edges_ingested"] += 1
+        # ── comparison: subject_a ↔ subject_b ──
+        elif entry_type == "comparison":
+            subj_a = entry.get("subject_a", "")
+            subj_b = entry.get("subject_b", "")
+            if subj_a and subj_b:
+                node_a = self.graph.add_node(
+                    content=subj_a, node_type="entity", source="data"
+                )
+                node_b = self.graph.add_node(
+                    content=subj_b, node_type="entity", source="data"
+                )
+                if node_a and node_b:
+                    self.graph.add_edge(
+                        from_id=node_a.id, to_id=node_b.id,
+                        relation="related_to", confidence=0.8, source="data"
+                    )
+                    self.graph.add_edge(
+                        from_id=node_b.id, to_id=node_a.id,
+                        relation="related_to", confidence=0.8, source="data"
+                    )
+                    self._metrics["edges_ingested"] += 2
+        # ── qa: question → answer ──
+        elif entry_type == "qa":
+            question = entry.get("question", "")
+            answer = entry.get("answer", "")
+            if question and answer:
+                q_node = self.graph.add_node(
+                    content=question, node_type="concept", source="data"
+                )
+                a_node = self.graph.add_node(
+                    content=answer, node_type="fact", source="data"
+                )
+                if q_node and a_node:
+                    self.graph.add_edge(
+                        from_id=q_node.id, to_id=a_node.id,
+                        relation="defined_as", confidence=0.9, source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+        # ── synonym: bidirectional synonym_of ──
+        elif entry_type == "synonym":
+            terms = entry.get("terms", [])
+            for i in range(len(terms)):
+                for j in range(i + 1, len(terms)):
+                    node_i = self.graph.add_node(
+                        content=terms[i], node_type="entity", source="data"
+                    )
+                    node_j = self.graph.add_node(
+                        content=terms[j], node_type="entity", source="data"
+                    )
+                    if node_i and node_j:
+                        self.graph.add_edge(
+                            from_id=node_i.id, to_id=node_j.id,
+                            relation="synonym_of", confidence=0.9, source="data"
+                        )
+                        self.graph.add_edge(
+                            from_id=node_j.id, to_id=node_i.id,
+                            relation="synonym_of", confidence=0.9, source="data"
+                        )
+                        self._metrics["edges_ingested"] += 2
+        # ── process / procedure: sequential steps ──
+        elif entry_type in ("process", "procedure"):
+            steps = entry.get("steps", [])
+            title = entry.get("title", "")
+            if title:
+                title_node = self.graph.add_node(
+                    content=title, node_type="concept", source="data"
+                )
+                if title_node:
+                    self.graph.add_edge(
+                        from_id=main_node.id, to_id=title_node.id,
+                        relation="defined_as", confidence=0.85, source="data"
+                    )
+            prev_step_node = None
+            for step_text in steps:
+                step_node = self.graph.add_node(
+                    content=step_text, node_type="fact", source="data"
+                )
+                if step_node:
+                    self.graph.add_edge(
+                        from_id=step_node.id, to_id=main_node.id,
+                        relation="part_of", confidence=0.8, source="data"
+                    )
+                    if prev_step_node:
+                        self.graph.add_edge(
+                            from_id=prev_step_node.id, to_id=step_node.id,
+                            relation="follows", confidence=0.9, source="data"
+                        )
+                        self._metrics["edges_ingested"] += 1
+                    prev_step_node = step_node
+                    self._metrics["edges_ingested"] += 1
+        # ── quote: author + content ──
+        elif entry_type == "quote":
+            author = entry.get("author", "")
+            if author:
+                author_node = self.graph.add_node(
+                    content=author, node_type="entity", source="data"
+                )
+                if author_node:
+                    self.graph.add_edge(
+                        from_id=main_node.id, to_id=author_node.id,
+                        relation="created_by", confidence=0.9, source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+        # ── event: actors, location, date ──
+        elif entry_type == "event":
+            actors = entry.get("actors", [])
+            location = entry.get("location", "")
+            for actor in actors:
+                actor_node = self.graph.add_node(
+                    content=actor, node_type="entity", source="data"
+                )
+                if actor_node:
+                    self.graph.add_edge(
+                        from_id=actor_node.id, to_id=main_node.id,
+                        relation="related_to", confidence=0.85, source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+            if location:
+                loc_node = self.graph.add_node(
+                    content=location, node_type="entity", source="data"
+                )
+                if loc_node:
+                    self.graph.add_edge(
+                        from_id=main_node.id, to_id=loc_node.id,
+                        relation="located_in", confidence=0.85, source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+        # ── analogy: subject ↔ analogy ──
+        elif entry_type == "analogy":
+            subject = entry.get("subject", "")
+            analogy_text = entry.get("analogy", "")
+            if subject and analogy_text:
+                subj_node = self.graph.add_node(
+                    content=subject, node_type="concept", source="data"
+                )
+                ana_node = self.graph.add_node(
+                    content=analogy_text, node_type="concept", source="data"
+                )
+                if subj_node and ana_node:
+                    self.graph.add_edge(
+                        from_id=subj_node.id, to_id=ana_node.id,
+                        relation="analogous_to", confidence=0.75, source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+        # ── Paragraph: extract keywords as connected entities ──
+        elif entry_type == "paragraph":
+            keywords = utils.extract_keywords(entry.get("content", ""), max_keywords=10)
+            for kw in keywords:
+                kw_node = self.graph.add_node(
+                    content=kw, node_type="concept", source="data",
+                    weight=0.6
+                )
+                if kw_node:
+                    self.graph.add_edge(
+                        from_id=main_node.id, to_id=kw_node.id,
+                        relation="related_to", confidence=0.6, source="data"
+                    )
+                    self._metrics["edges_ingested"] += 1
+    def _map_entry_type_to_node_type(self, entry_type: str) -> str:
+        """Map data entry type to graph node type."""
+        type_map = {
+            "fact": "fact",
+            "definition": "definition",
+            "explanation": "fact",
+            "description": "fact",
+            "property": "fact",
+            "statistic": "fact",
+            "measurement": "fact",
+            "term": "definition",
+            "abbreviation": "definition",
+            "jargon": "definition",
+            "slang": "definition",
+            "idiom": "definition",
+            "synonym": "entity",
+            "antonym": "entity",
+            "quote": "fact",
+            "rule": "fact",
+            "example": "fact",
+            "analogy": "concept",
+            "opinion": "fact",
+            "paragraph": "fact",
+            "relation": "fact",
+            "cause_effect": "fact",
+            "comparison": "fact",
+            "hierarchy": "concept",
+            "composition": "concept",
+            "dependency": "fact",
+            "contradiction": "fact",
+            "timeline": "fact",
+            "process": "fact",
+            "procedure": "fact",
+            "event": "fact",
+            "history": "fact",
+            "change": "fact",
+            "qa": "fact",
+        }
+        if entry_type.startswith("custom_"):
+            return "fact"
+        return type_map.get(entry_type, "fact")
+    # ═══════════════════════════════════════════════════
+    # PHASE 2: CONNECT
+    # ═══════════════════════════════════════════════════
+    def _phase_connect(self):
+        """
+        Find hidden connections between nodes.
+        Nodes with high vector similarity but no edge → create edge.
+        Focuses on least-connected nodes first.
+        """
+        candidates = self.graph.get_least_connected_nodes(
+            limit=config.THINKING_BATCH_SIZE
+        )
+        connections_made = 0
+        for node in candidates:
+            if connections_made >= config.THINKING_BATCH_SIZE:
+                break
+            # Find similar nodes
+            similar = self.graph.find_similar_to_node(
+                node.id,
+                top_k=10,
+                min_similarity=config.SIMILARITY_THRESHOLD
+            )
+            for similar_node, similarity in similar:
+                # Skip if edge already exists (either direction)
+                if self.graph.edge_exists(node.id, similar_node.id):
+                    continue
+                if self.graph.edge_exists(similar_node.id, node.id):
+                    continue
+                # Create new connection
+                edge = self.graph.add_edge(
+                    from_id=node.id,
+                    to_id=similar_node.id,
+                    relation="similar_to",
+                    weight=similarity,
+                    confidence=similarity * 0.9,
+                    source="inferred"
+                )
+                if edge:
+                    connections_made += 1
+                    self._operations_this_cycle += 1
+        if connections_made > 0:
+            self._metrics["connections_found"] += connections_made
+            if config.LOG_THINKING_DETAILS:
+                print(f"[THINKER/CONNECT] Found {connections_made} new connections")
+    # ═══════════════════════════════════════════════════
+    # PHASE 3: INFER
+    # ═══════════════════════════════════════════════════
+    def _phase_infer(self):
+        """
+        Transitive and analogical inference.
+        If A→B and B→C, maybe A→C.
+        Discovers knowledge not present in original data.
+        """
+        inferences_made = 0
+        # ── Transitive Inference ──
+        inferences_made += self._transitive_inference()
+        # ── Analogical Inference ──
+        inferences_made += self._analogical_inference()
+        if inferences_made > 0:
+            self._metrics["inferences_made"] += inferences_made
+            self._operations_this_cycle += inferences_made
+            if config.LOG_THINKING_DETAILS:
+                print(f"[THINKER/INFER] Made {inferences_made} inferences")
+    def _transitive_inference(self) -> int:
+        """
+        If A→B and B→C exist, infer A→C with decayed confidence.
+        Limited per cycle to prevent explosion.
+        """
+        count = 0
+        # Sample a batch of nodes to check
+        node_ids = list(self.graph.nodes.keys())
+        if len(node_ids) > config.THINKING_BATCH_SIZE:
+            sample_indices = np.random.choice(
+                len(node_ids), config.THINKING_BATCH_SIZE, replace=False
+            )
+            node_ids = [node_ids[i] for i in sample_indices]
+        for node_a_id in node_ids:
+            if count >= config.MAX_INFERENCES_PER_CYCLE:
+                break
+            edges_ab = self.graph.get_edges_from(node_a_id)
+            for edge_ab in edges_ab:
+                if count >= config.MAX_INFERENCES_PER_CYCLE:
+                    break
+                node_b_id = edge_ab.to_node
+                edges_bc = self.graph.get_edges_from(node_b_id)
+                for edge_bc in edges_bc:
+                    node_c_id = edge_bc.to_node
+                    # Skip self-loops and existing edges
+                    if node_c_id == node_a_id:
+                        continue
+                    if self.graph.edge_exists(node_a_id, node_c_id):
+                        continue
+                    # Calculate inferred confidence
+                    inferred_confidence = (
+                        edge_ab.confidence *
+                        edge_bc.confidence *
+                        config.INFERENCE_DECAY
+                    )
+                    if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN:
+                        continue
+                    # Determine inferred relation
+                    inferred_relation = self._infer_relation(
+                        edge_ab.relation, edge_bc.relation
+                    )
+                    # Create inferred edge
+                    edge = self.graph.add_edge(
+                        from_id=node_a_id,
+                        to_id=node_c_id,
+                        relation=inferred_relation,
+                        weight=inferred_confidence,
+                        confidence=inferred_confidence,
+                        source="inferred"
+                    )
+                    if edge:
+                        count += 1
+                    if count >= config.MAX_INFERENCES_PER_CYCLE:
+                        break
+        return count
+    def _analogical_inference(self) -> int:
+        """
+        If A relates to B like C relates to ?, find ? using vector arithmetic.
+        A - B ≈ C - ? → ? ≈ C - A + B
+        """
+        count = 0
+        # Find pairs with strong, specific relations
+        strong_edges = [
+            e for e in self.graph.edges.values()
+            if e.confidence > 0.7 and e.relation not in ("similar_to", "related_to")
+        ]
+        if len(strong_edges) < 2:
+            return 0
+        # Sample pairs to compare
+        sample_size = min(20, len(strong_edges))
+        sampled = np.random.choice(len(strong_edges), sample_size, replace=False)
+        for i in sampled:
+            if count >= config.MAX_INFERENCES_PER_CYCLE // 4:
+                break
+            edge = strong_edges[i]
+            node_a = self.graph.get_node(edge.from_node)
+            node_b = self.graph.get_node(edge.to_node)
+            if not node_a or not node_b:
+                continue
+            # Find nodes similar to A (potential C candidates)
+            similar_to_a = self.graph.find_similar_to_node(
+                node_a.id, top_k=5,
+                min_similarity=config.ANALOGICAL_SIMILARITY_MIN
+            )
+            for node_c, sim_ac in similar_to_a:
+                if node_c.id == node_b.id:
+                    continue
+                # Vector arithmetic: ? ≈ C - A + B
+                target_vector = utils.normalize(
+                    node_c.vector - node_a.vector + node_b.vector
+                )
+                # Find nearest to target vector
+                candidates = self.graph.find_similar_nodes(
+                    target_vector, top_k=3,
+                    min_similarity=config.ANALOGICAL_SIMILARITY_MIN,
+                    exclude_ids={node_a.id, node_b.id, node_c.id}
+                )
+                for candidate_node, sim_score in candidates:
+                    if self.graph.edge_exists(node_c.id, candidate_node.id, edge.relation):
+                        continue
+                    inferred_confidence = sim_ac * sim_score * config.INFERENCE_DECAY
+                    if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN:
+                        continue
+                    new_edge = self.graph.add_edge(
+                        from_id=node_c.id,
+                        to_id=candidate_node.id,
+                        relation=edge.relation,
+                        weight=inferred_confidence,
+                        confidence=inferred_confidence,
+                        source="inferred"
+                    )
+                    if new_edge:
+                        count += 1
+                        break  # One analogy per C
+        return count
+    def _infer_relation(self, rel_ab: str, rel_bc: str) -> str:
+        """Determine relation type for transitive inference A→C from A→B→C."""
+        # Same relation → same
+        if rel_ab == rel_bc:
+            return rel_ab
+        # Specific known transitive patterns
+        transitive_map = {
+            ("is_a", "is_a"): "is_a",
+            ("part_of", "part_of"): "part_of",
+            ("is_a", "has"): "has",
+            ("is_a", "located_in"): "located_in",
+            ("part_of", "located_in"): "located_in",
+            ("is_a", "used_for"): "used_for",
+            ("causes", "causes"): "causes",
+            ("follows", "follows"): "follows",
+            ("requires", "requires"): "requires",
+            ("instance_of", "is_a"): "instance_of",
+        }
+        return transitive_map.get((rel_ab, rel_bc), "inferred_relation")
+    # ═══════════════════════════════════════════════════
+    # PHASE 4: ABSTRACT
+    # ═══════════════════════════════════════════════════
+    def _phase_abstract(self):
+        """
+        Cluster similar nodes into abstraction nodes.
+        Creates higher-level concepts from concrete instances.
+        Recursive: abstractions can be abstracted further.
+        """
+        # Skip if graph is too small
+        if len(self.graph.nodes) < config.CLUSTER_MIN_SIZE * 2:
+            return
+        abstractions_created = 0
+        # ── Level 1: Concrete → Abstraction ──
+        abstractions_created += self._create_abstractions(
+            source_types=["entity", "fact", "concept"],
+            abstraction_type="abstraction"
+        )
+        # ── Level 2+: Abstraction → Meta-Abstraction ──
+        if self._total_cycles % (config.COMPRESS_INTERVAL * 2) == 0:
+            existing_abstractions = self.graph.get_nodes_by_type("abstraction")
+            if len(existing_abstractions) >= config.CLUSTER_MIN_SIZE * 2:
+                abstractions_created += self._create_abstractions(
+                    source_types=["abstraction"],
+                    abstraction_type="meta_abstraction"
+                )
+        if abstractions_created > 0:
+            self._metrics["abstractions_created"] += abstractions_created
+            self._operations_this_cycle += abstractions_created
+            if config.LOG_THINKING_DETAILS:
+                print(f"[THINKER/ABSTRACT] Created {abstractions_created} abstractions")
+    def _create_abstractions(
+        self,
+        source_types: List[str],
+        abstraction_type: str
+    ) -> int:
+        """Create abstraction nodes from clusters of source-typed nodes."""
+        # Gather source nodes
+        source_nodes = []
+        for stype in source_types:
+            source_nodes.extend(self.graph.get_nodes_by_type(stype))
+        if len(source_nodes) < config.CLUSTER_MIN_SIZE:
+            return 0
+        # Build vector matrix for clustering
+        vectors = np.array(
+            [n.vector for n in source_nodes],
+            dtype=np.float32
+        )
+        node_ids = [n.id for n in source_nodes]
+        # Find natural clusters
+        clusters = utils.find_natural_clusters(
+            vectors,
+            similarity_threshold=config.CLUSTER_SIMILARITY_INTRA
+        )
+        count = 0
+        for cluster_indices in clusters:
+            if count >= config.THINKING_BATCH_SIZE // 2:
+                break
+            # Check if this cluster already has an abstraction
+            member_ids = [node_ids[i] for i in cluster_indices]
+            already_abstracted = False
+            for mid in member_ids:
+                for edge in self.graph.get_edges_from(mid):
+                    if edge.relation == "instance_of":
+                        already_abstracted = True
+                        break
+                if already_abstracted:
+                    break
+            if already_abstracted:
+                continue
+            # Compute centroid
+            cluster_vectors = vectors[cluster_indices]
+            centroid = utils.vector_mean(list(cluster_vectors))
+            # Generate label from common keywords
+            all_content = " ".join(
+                self.graph.nodes[node_ids[i]].content
+                for i in cluster_indices
+                if node_ids[i] in self.graph.nodes
+            )
+            keywords = utils.extract_keywords(all_content, max_keywords=5)
+            label = " + ".join(keywords[:3]) if keywords else "abstract_concept"
+            # Check depth limit
+            current_depth = 0
+            if abstraction_type == "meta_abstraction":
+                for i in cluster_indices:
+                    nid = node_ids[i]
+                    depth = self.graph._get_abstraction_depth(nid)
+                    current_depth = max(current_depth, depth)
+            if current_depth >= config.MAX_ABSTRACTION_DEPTH:
+                continue
+            # Create abstraction node
+            abs_node = self.graph.add_node(
+                content=f"[{abstraction_type}] {label}",
+                node_type=abstraction_type,
+                source="inferred",
+                vector=utils.normalize(centroid),
+                weight=config.ABSTRACTION_MIN_CONFIDENCE
+            )
+            if not abs_node:
+                continue
+            # Link members to abstraction
+            for i in cluster_indices:
+                member_id = node_ids[i]
+                self.graph.add_edge(
+                    from_id=member_id,
+                    to_id=abs_node.id,
+                    relation="instance_of",
+                    weight=0.8,
+                    confidence=0.8,
+                    source="inferred"
+                )
+            count += 1
+        return count
+    # ═══════════════════════════════════════════════════
+    # PHASE 5: STRENGTHEN / WEAKEN
+    # ═══════════════════════════════════════════════════
+    def _phase_strengthen_weaken(self):
+        """
+        Strengthen edges that are frequently used.
+        Weaken edges that haven't been used.
+        Nodes with more connections get slight weight boost.
+        """
+        # ── Weaken unused edges (periodic) ──
+        if self._total_cycles % config.WEIGHT_DECAY_INTERVAL_CYCLES == 0:
+            decay_count = 0
+            edges = list(self.graph.edges.values())
+            for edge in edges:
+                if edge.used_count == 0 and edge.source == "inferred":
+                    self.graph.decay_edge(edge.id)
+                    decay_count += 1
+            self._metrics["edges_decayed"] += decay_count
+            self._operations_this_cycle += decay_count
+            if config.LOG_THINKING_DETAILS and decay_count > 0:
+                print(f"[THINKER/WEAKEN] Decayed {decay_count} unused edges")
+        # ── Boost well-connected nodes ──
+        nodes = list(self.graph.nodes.values())
+        sample_size = min(config.THINKING_BATCH_SIZE, len(nodes))
+        if sample_size == 0:
+            return
+        sampled = np.random.choice(len(nodes), sample_size, replace=False)
+        reinforced = 0
+        for idx in sampled:
+            node = nodes[idx]
+            if node.connections > 3:
+                bonus = config.NODE_WEIGHT_CONNECTION_BONUS * min(node.connections, 20)
+                new_weight = min(node.weight + bonus, config.WEIGHT_MAX)
+                if new_weight != node.weight:
+                    self.graph.update_node_weight(node.id, new_weight)
+                    reinforced += 1
+        self._metrics["edges_reinforced"] += reinforced
+        self._operations_this_cycle += reinforced
+    # ═══════════════════════════════════════════════════
+    # PHASE 6: COMPRESS
+    # ═══════════════════════════════════════════════════
+    def _phase_compress(self):
+        """
+        Merge redundant nodes.
+        Prune dead edges.
+        Prune orphan nodes.
+        Keep the graph efficient and clean.
+        """
+        if self._total_cycles % config.COMPRESS_INTERVAL != 0:
+            return
+        # ── Merge redundant nodes ──
+        redundant_pairs = self.graph.find_redundant_pairs(limit=10)
+        merged = 0
+        for id_keep, id_remove, similarity in redundant_pairs:
+            if self.graph.merge_nodes(id_keep, id_remove):
+                merged += 1
+        # ── Prune weak edges ──
+        pruned_edges = self.graph.prune_weak_edges()
+        # ── Prune orphan nodes ──
+        pruned_nodes = self.graph.prune_orphan_nodes()
+        self._metrics["nodes_merged"] += merged
+        self._metrics["edges_pruned"] += pruned_edges
+        self._metrics["nodes_pruned"] += pruned_nodes
+        self._operations_this_cycle += merged + pruned_edges + pruned_nodes
+        total_ops = merged + pruned_edges + pruned_nodes
+        if config.LOG_THINKING_DETAILS and total_ops > 0:
+            print(
+                f"[THINKER/COMPRESS] Merged {merged} nodes, "
+                f"pruned {pruned_edges} edges, {pruned_nodes} orphan nodes"
+            )
+    # ═══════════════════════════════════════════════════
+    # PHASE 7: VALIDATE
+    # ═══════════════════════════════════════════════════
+    def _phase_validate(self):
+        """
+        Check logical consistency of the graph.
+        Resolve contradictions.
+        Detect and break circular inferences.
+        """
+        if self._total_cycles % config.VALIDATE_INTERVAL != 0:
+            return
+        resolved = 0
+        # ── Detect contradictions ──
+        resolved += self._resolve_contradictions()
+        # ── Detect circular inferences ──
+        resolved += self._break_circular_inferences()
+        if resolved > 0:
+            self._metrics["contradictions_resolved"] += resolved
+            self._operations_this_cycle += resolved
+            if config.LOG_THINKING_DETAILS:
+                print(f"[THINKER/VALIDATE] Resolved {resolved} issues")
+    def _resolve_contradictions(self) -> int:
+        """
+        Find and resolve contradictions.
+        If A→B (positive) and A→¬B (opposite_of) exist, keep higher confidence.
+        """
+        resolved = 0
+        # Sample nodes to check
+        node_ids = list(self.graph.nodes.keys())
+        sample_size = min(config.THINKING_BATCH_SIZE, len(node_ids))
+        if sample_size == 0:
+            return 0
+        sampled = np.random.choice(len(node_ids), sample_size, replace=False)
+        for idx in sampled:
+            node_id = node_ids[idx]
+            edges_out = self.graph.get_edges_from(node_id)
+            # Group edges by target
+            target_edges: Dict[str, List[Edge]] = {}
+            for edge in edges_out:
+                key = edge.to_node
+                if key not in target_edges:
+                    target_edges[key] = []
+                target_edges[key].append(edge)
+            # Check for contradictory relations to same target
+            for target_id, edges in target_edges.items():
+                if len(edges) < 2:
+                    continue
+                # Check for opposing relations
+                contradictory_pairs = {
+                    ("causes", "prevents"),
+                    ("is_a", "opposite_of"),
+                    ("synonym_of", "opposite_of"),
+                    ("requires", "prevents"),
+                }
+                for i in range(len(edges)):
+                    for j in range(i + 1, len(edges)):
+                        pair = (edges[i].relation, edges[j].relation)
+                        reverse_pair = (edges[j].relation, edges[i].relation)
+                        if pair in contradictory_pairs or reverse_pair in contradictory_pairs:
+                            # Keep higher confidence, remove lower
+                            if edges[i].confidence >= edges[j].confidence:
+                                self.graph.remove_edge(edges[j].id)
+                            else:
+                                self.graph.remove_edge(edges[i].id)
+                            resolved += 1
+        return resolved
+    def _break_circular_inferences(self) -> int:
+        """
+        Detect inference chains that loop back on themselves.
+        Break the weakest link in each cycle.
+        """
+        broken = 0
+        # Sample inferred edges
+        inferred_edges = [
+            e for e in self.graph.edges.values()
+            if e.source == "inferred"
+        ]
+        sample_size = min(config.THINKING_BATCH_SIZE, len(inferred_edges))
+        if sample_size == 0:
+            return 0
+        sampled_indices = np.random.choice(
+            len(inferred_edges), sample_size, replace=False
+        )
+        for idx in sampled_indices:
+            edge = inferred_edges[idx]
+            # Check if this edge creates a cycle
+            # Simple: does a path exist from to_node back to from_node?
+            paths = self.graph.find_paths(
+                edge.to_node, edge.from_node,
+                max_depth=4, max_paths=1
+            )
+            if paths:
+                # Cycle detected — remove weakest edge in cycle
+                cycle_path = [edge.from_node, edge.id] + paths[0]
+                weakest_edge_id = None
+                weakest_weight = float('inf')
+                for item_id in cycle_path:
+                    if item_id in self.graph.edges:
+                        e = self.graph.edges[item_id]
+                        if e.weight < weakest_weight and e.source == "inferred":
+                            weakest_weight = e.weight
+                            weakest_edge_id = e.id
+                if weakest_edge_id:
+                    self.graph.remove_edge(weakest_edge_id)
+                    broken += 1
+        return broken
+    # ═══════════════════════════════════════════════════
+    # PHASE 8: SELF-QUESTION
+    # ════════════════════════��══════════════════════════
+    def _phase_self_question(self):
+        """
+        Generate internal questions to fill knowledge gaps.
+        Ask: "What connects X to Y?" where X and Y are distant but possibly related.
+        If a new chain is found, save it.
+        """
+        if self._total_cycles % config.SELF_QUESTION_INTERVAL != 0:
+            return
+        if len(self.graph.nodes) < 10:
+            return
+        questions_asked = 0
+        questions_answered = 0
+        # Strategy 1: Find disconnected clusters and try to bridge them
+        questions_answered += self._bridge_disconnected()
+        questions_asked += 3
+        # Strategy 2: Explore high-weight nodes that lack certain relation types
+        questions_answered += self._fill_relation_gaps()
+        questions_asked += 3
+        # Strategy 3: Challenge existing weak inferences
+        questions_answered += self._challenge_weak_inferences()
+        questions_asked += 2
+        self._metrics["self_questions_asked"] += questions_asked
+        self._metrics["self_questions_answered"] += questions_answered
+        self._operations_this_cycle += questions_answered
+        if config.LOG_THINKING_DETAILS and questions_answered > 0:
+            print(
+                f"[THINKER/SELF-Q] Asked {questions_asked} questions, "
+                f"answered {questions_answered}"
+            )
+    def _bridge_disconnected(self) -> int:
+        """Try to find connections between disconnected subgraphs."""
+        connected = 0
+        # Pick two random nodes that have no path between them
+        node_ids = list(self.graph.nodes.keys())
+        if len(node_ids) < 10:
+            return 0
+        for _ in range(3):
+            idx = np.random.choice(len(node_ids), 2, replace=False)
+            id_a, id_b = node_ids[idx[0]], node_ids[idx[1]]
+            node_a = self.graph.get_node(id_a)
+            node_b = self.graph.get_node(id_b)
+            if not node_a or not node_b:
+                continue
+            # Are they already connected?
+            paths = self.graph.find_paths(id_a, id_b, max_depth=4, max_paths=1)
+            if paths:
+                continue
+            # Can we connect them via vector similarity?
+            sim = utils.cosine_similarity(node_a.vector, node_b.vector)
+            if sim > config.SIMILARITY_THRESHOLD * 0.8:
+                # They're somewhat similar but not connected → connect
+                edge = self.graph.add_edge(
+                    from_id=id_a,
+                    to_id=id_b,
+                    relation="inferred_relation",
+                    weight=sim * 0.7,
+                    confidence=sim * 0.6,
+                    source="inferred"
+                )
+                if edge:
+                    connected += 1
+        return connected
+    def _fill_relation_gaps(self) -> int:
+        """Find high-weight nodes missing common relations and try to fill them."""
+        filled = 0
+        # Get well-known nodes
+        important_nodes = sorted(
+            self.graph.nodes.values(),
+            key=lambda n: n.weight * n.connections,
+            reverse=True
+        )[:20]
+        common_relations = ["is_a", "part_of", "has", "used_for", "related_to"]
+        for node in important_nodes[:5]:
+            existing_relations = set()
+            for edge in self.graph.get_edges_from(node.id):
+                existing_relations.add(edge.relation)
+            for relation in common_relations:
+                if relation in existing_relations:
+                    continue
+                # Can we find a target for this relation via similarity?
+                # Look for nodes that commonly have this relation
+                candidates = self.graph.find_similar_to_node(
+                    node.id, top_k=5,
+                    min_similarity=config.SIMILARITY_THRESHOLD
+                )
+                for candidate, sim in candidates:
+                    # Check if candidate has this relation type outgoing
+                    candidate_rels = [
+                        e.relation for e in self.graph.get_edges_from(candidate.id)
+                    ]
+                    if relation in candidate_rels:
+                        # This candidate has the relation → node might too
+                        for edge in self.graph.get_edges_from(candidate.id):
+                            if edge.relation == relation:
+                                target = self.graph.get_node(edge.to_node)
+                                if target and not self.graph.edge_exists(
+                                    node.id, target.id, relation
+                                ):
+                                    confidence = sim * edge.confidence * config.INFERENCE_DECAY
+                                    if confidence >= config.INFERENCE_CONFIDENCE_MIN:
+                                        new_edge = self.graph.add_edge(
+                                            from_id=node.id,
+                                            to_id=target.id,
+                                            relation=relation,
+                                            weight=confidence,
+                                            confidence=confidence,
+                                            source="inferred"
+                                        )
+                                        if new_edge:
+                                            filled += 1
+                                            break
+                        break  # One fill per missing relation
+            if filled >= 5:
+                break
+        return filled
+    def _challenge_weak_inferences(self) -> int:
+        """
+        Re-examine weak inferred edges.
+        If supporting evidence exists, strengthen.
+        If contradicting evidence exists, remove.
+        """
+        improved = 0
+        weak_edges = self.graph.get_weakest_edges(
+            limit=20, source_filter="inferred"
+        )
+        for edge in weak_edges:
+            from_node = self.graph.get_node(edge.from_node)
+            to_node = self.graph.get_node(edge.to_node)
+            if not from_node or not to_node:
+                continue
+            # Check if there's additional evidence
+            # (other paths between these nodes)
+            paths = self.graph.find_paths(
+                edge.from_node, edge.to_node,
+                max_depth=4, max_paths=3
+            )
+            # Filter paths that don't use this edge
+            alternative_paths = [
+                p for p in paths
+                if edge.id not in p
+            ]
+            if alternative_paths:
+                # Multiple paths support this edge → strengthen
+                support_factor = 1.0 + 0.05 * len(alternative_paths)
+                new_weight = min(
+                    edge.weight * support_factor,
+                    config.WEIGHT_MAX
+                )
+                self.graph.edges[edge.id].weight = new_weight
+                self.graph.edges[edge.id].confidence = min(
+                    edge.confidence * support_factor, 1.0
+                )
+                self.graph.edges[edge.id].mark_dirty()
+                self.graph.memory.save_edge(edge.to_dict())
+                improved += 1
+            else:
+                # No alternative support → further weaken
+                if edge.weight < config.PRUNE_WEIGHT_THRESHOLD * 2:
+                    self.graph.remove_edge(edge.id)
+                    improved += 1
+        return improved
+    # ═══════════════════════════════════════════════════
+    # USER KNOWLEDGE EXTRACTION
+    # ═══════════════════════════════════════════════════
+    def extract_from_user_message(self, message: str):
+        """
+        Extract knowledge from a user's chat message.
+        Called by brain.py after processing a user request.
+        Does NOT store the raw message — only extracted knowledge.
+        """
+        if not message or len(message.strip()) < 10:
+            return
+        message = message.strip()
+        # Extract keywords
+        keywords = utils.extract_keywords(message, max_keywords=15)
+        if len(keywords) < 2:
+            return
+        # Extract entities
+        entities = utils.extract_entities_simple(message)
+        # Create entity nodes
+        entity_nodes = []
+        for entity in entities[:5]:
+            node = self.graph.add_node(
+                content=entity,
+                node_type="entity",
+                source="user_chat",
+                weight=config.USER_KNOWLEDGE_CONFIDENCE
+            )
+            if node:
+                entity_nodes.append(node)
+        # Create concept nodes from keywords not already entities
+        entity_lower = {e.lower() for e in entities}
+        for kw in keywords:
+            if kw.lower() not in entity_lower:
+                node = self.graph.add_node(
+                    content=kw,
+                    node_type="concept",
+                    source="user_chat",
+                    weight=config.USER_KNOWLEDGE_CONFIDENCE * 0.7
+                )
+        # If message contains informational content, create fact node
+        if len(message) > 30 and any(
+            p in message.lower() for p in [
+                "adalah", "merupakan", "yaitu", "ialah",
+                "is", "are", "means", "defined"
+            ]
+        ):
+            fact_node = self.graph.add_node(
+                content=message[:500],
+                node_type="fact",
+                source="user_chat",
+                weight=config.USER_KNOWLEDGE_CONFIDENCE
+            )
+            # Connect fact to entities mentioned
+            if fact_node:
+                for en in entity_nodes:
+                    self.graph.add_edge(
+                        from_id=fact_node.id,
+                        to_id=en.id,
+                        relation="related_to",
+                        confidence=config.USER_KNOWLEDGE_CONFIDENCE * 0.8,
+                        source="user_chat"
+                    )