Spaces:

sadidft
/

CogniEngine

Sleeping

App Files Files Community

CogniEngine / thinker.py

sadidft

Create thinker.py

f43310f verified 2 months ago

raw

history blame contribute delete

60.1 kB

	"""
	Cogni-Engine v1 — 24/7 Thinking Loop
	The autonomous cognitive process that never stops.
	8 phases of continuous reasoning that make the AI smarter over time.

	Phases:
	1. INGEST — Scan /data/ folder, parse new JSONL files
	2. CONNECT — Find hidden connections between nodes via similarity
	3. INFER — Transitive & analogical inference to discover new knowledge
	4. ABSTRACT — Cluster similar nodes into higher-level abstractions
	5. STRENGTHEN/WEAKEN — Reinforce used edges, decay unused ones
	6. COMPRESS — Merge redundant nodes, prune dead edges
	7. VALIDATE — Check logical consistency, resolve contradictions
	8. SELF-QUESTION — Generate and answer internal questions to find gaps
	"""

	import os
	import json
	import time
	import threading
	import traceback
	from typing import List, Dict, Optional, Tuple

	import numpy as np

	import config
	import utils
	from knowledge import KnowledgeGraph, Node, Edge, ReasoningChain


	# ═══════════════════════════════════════════════════════════
	# THINKER ENGINE
	# ═══════════════════════════════════════════════════════════

	class Thinker:
	"""
	Autonomous thinking engine.
	Runs in a background thread, continuously processing
	and enriching the knowledge graph.
	"""

	def __init__(self, graph: KnowledgeGraph):
	self.graph = graph
	self._thread: Optional[threading.Thread] = None
	self._running = False
	self._paused = False

	# Thinking state
	self._cycle_count = 0
	self._total_cycles = 0
	self._current_phase = "init"
	self._phase_index = 0
	self._interval = config.THINKING_INTERVAL_FAST
	self._operations_this_cycle = 0

	# File tracking
	self._file_checksums: Dict[str, str] = {}

	# Phase definitions (ordered)
	self._phases = [
	("ingest", self._phase_ingest),
	("connect", self._phase_connect),
	("infer", self._phase_infer),
	("abstract", self._phase_abstract),
	("strengthen", self._phase_strengthen_weaken),
	("compress", self._phase_compress),
	("validate", self._phase_validate),
	("self_question", self._phase_self_question),
	]

	# Metrics
	self._metrics = {
	"nodes_ingested": 0,
	"edges_ingested": 0,
	"connections_found": 0,
	"inferences_made": 0,
	"abstractions_created": 0,
	"edges_reinforced": 0,
	"edges_decayed": 0,
	"nodes_merged": 0,
	"edges_pruned": 0,
	"nodes_pruned": 0,
	"contradictions_resolved": 0,
	"self_questions_asked": 0,
	"self_questions_answered": 0,
	}

	# ───────────────────────────────────────────────────
	# LIFECYCLE
	# ───────────────────────────────────────────────────

	def start(self):
	"""Start the thinking loop in a background thread."""
	if self._running:
	print("[THINKER] Already running.")
	return

	# Load previous state
	self._load_state()

	self._running = True
	self._thread = threading.Thread(
	target=self._thinking_loop,
	name="CogniThinker",
	daemon=True
	)
	self._thread.start()
	print(f"[THINKER] Started. Resuming from cycle {self._total_cycles}.")

	def stop(self):
	"""Stop the thinking loop gracefully."""
	if not self._running:
	return
	print("[THINKER] Stopping...")
	self._running = False
	if self._thread:
	self._thread.join(timeout=30)
	self._save_state()
	print("[THINKER] Stopped.")

	def pause(self):
	"""Pause thinking (for heavy API load)."""
	self._paused = True

	def resume(self):
	"""Resume thinking."""
	self._paused = False

	@property
	def is_running(self) -> bool:
	return self._running

	@property
	def current_phase(self) -> str:
	return self._current_phase

	@property
	def total_cycles(self) -> int:
	return self._total_cycles

	@property
	def metrics(self) -> dict:
	return dict(self._metrics)

	def get_status(self) -> dict:
	"""Get detailed thinker status."""
	return {
	"running": self._running,
	"paused": self._paused,
	"current_phase": self._current_phase,
	"cycle_count": self._cycle_count,
	"total_cycles": self._total_cycles,
	"interval_seconds": self._interval,
	"operations_last_cycle": self._operations_this_cycle,
	"metrics": dict(self._metrics)
	}

	# ───────────────────────────────────────────────────
	# STATE PERSISTENCE
	# ───────────────────────────────────────────────────

	def _load_state(self):
	"""Load thinking state from DB."""
	state = self.graph.memory.load_thinking_state()
	self._total_cycles = state.get("total_cycles", 0)
	self._cycle_count = state.get("current_cycle", 0)
	self._current_phase = state.get("phase", "init")

	saved_metrics = state.get("metrics", {})
	if saved_metrics:
	for key in self._metrics:
	if key in saved_metrics:
	self._metrics[key] = saved_metrics[key]

	# Load file checksums
	self._file_checksums = self.graph.memory.load_file_checksums()

	def _save_state(self):
	"""Save thinking state to DB."""
	self.graph.memory.save_thinking_state({
	"current_cycle": self._cycle_count,
	"total_cycles": self._total_cycles,
	"cursor_position": "",
	"phase": self._current_phase,
	"metrics": dict(self._metrics)
	})

	# ───────────────────────────────────────────────────
	# MAIN LOOP
	# ───────────────────────────────────────────────────

	def _thinking_loop(self):
	"""
	Main thinking loop. Runs continuously until stopped.
	Cycles through all 8 phases, adapting speed based on activity.
	"""
	print("[THINKER] Thinking loop started.")

	while self._running:
	try:
	# Wait if paused
	if self._paused:
	time.sleep(1)
	continue

	# Execute current phase
	self._operations_this_cycle = 0
	phase_name, phase_func = self._phases[self._phase_index]
	self._current_phase = phase_name

	try:
	phase_func()
	except Exception as e:
	print(f"[THINKER] Error in phase '{phase_name}': {e}")
	if config.LOG_THINKING_DETAILS:
	traceback.print_exc()

	# Advance to next phase
	self._phase_index = (self._phase_index + 1) % len(self._phases)

	# Count cycles (one full rotation = 1 cycle)
	if self._phase_index == 0:
	self._cycle_count += 1
	self._total_cycles += 1

	# Periodic state save
	if self._total_cycles % config.SYNC_INTERVAL_CYCLES == 0:
	self._save_state()
	self.graph.sync()

	# Adaptive speed
	self._adapt_speed()

	# Log progress periodically
	if self._total_cycles % 100 == 0 and config.LOG_THINKING_DETAILS:
	stats = self.graph.get_stats()
	score = self.graph.get_intelligence_score()
	print(
	f"[THINKER] Cycle {self._total_cycles}: "
	f"nodes={stats['total_nodes']}, "
	f"edges={stats['total_edges']}, "
	f"inferred={stats['inferred_edges']}, "
	f"score={score:.2f}"
	)

	# Sleep between phases
	time.sleep(self._interval / len(self._phases))

	except Exception as e:
	print(f"[THINKER] Loop error: {e}")
	traceback.print_exc()
	time.sleep(5) # Recovery pause

	print("[THINKER] Thinking loop ended.")

	def _adapt_speed(self):
	"""Adjust thinking speed based on activity."""
	if self._operations_this_cycle > config.THINKING_STABILITY_THRESHOLD:
	# Active: think faster
	self._interval = max(
	config.THINKING_INTERVAL_FAST,
	self._interval * 0.9
	)
	else:
	# Stable: think slower
	self._interval = min(
	config.THINKING_INTERVAL_SLOW,
	self._interval * 1.1
	)

	# ═══════════════════════════════════════════════════
	# PHASE 1: INGEST
	# ═══════════════════════════════════════════════════

	def _phase_ingest(self):
	"""
	Scan /data/ folder for new or changed JSONL files.
	Parse entries and create nodes + edges in graph.
	"""
	if not os.path.exists(config.DATA_DIR):
	return

	files = []
	for fname in os.listdir(config.DATA_DIR):
	if any(fname.endswith(ext) for ext in config.SUPPORTED_DATA_EXTENSIONS):
	files.append(fname)

	if not files:
	return

	for fname in files:
	filepath = os.path.join(config.DATA_DIR, fname)

	try:
	with open(filepath, 'r', encoding='utf-8') as f:
	content = f.read()
	except Exception as e:
	print(f"[THINKER/INGEST] Error reading {fname}: {e}")
	continue

	# Check if file has changed
	checksum = utils.hash_file_content(content)
	if self._file_checksums.get(fname) == checksum:
	continue # File unchanged, skip

	print(f"[THINKER/INGEST] Processing file: {fname}")

	lines = content.strip().split('\n')
	processed = 0

	for line_num, line in enumerate(lines):
	if processed >= config.MAX_LINES_PER_INGEST:
	break

	line = line.strip()
	if not line:
	continue

	try:
	entry = json.loads(line)
	except json.JSONDecodeError:
	continue

	self._ingest_entry(entry, source=fname)
	processed += 1

	# Mark file as processed
	self._file_checksums[fname] = checksum
	self.graph.memory.save_file_checksum(fname, checksum, processed)
	self._operations_this_cycle += processed

	print(f"[THINKER/INGEST] Processed {processed} entries from {fname}")

	def _ingest_entry(self, entry: dict, source: str = "data"):
	"""
	Ingest a single data entry into the knowledge graph.
	Creates nodes and edges based on entry type and fields.
	"""
	entry_type = entry.get("type", "fact")
	content = entry.get("content", "").strip()

	if not content:
	return

	tags = entry.get("tags", [])
	confidence = entry.get("confidence", config.DATA_KNOWLEDGE_CONFIDENCE)
	domain = entry.get("domain", "")
	related = entry.get("related", [])

	# ── Create main content node ──
	main_node = self.graph.add_node(
	content=content,
	node_type=self._map_entry_type_to_node_type(entry_type),
	source="data",
	weight=confidence,
	tags=tags
	)

	if not main_node:
	return

	self._metrics["nodes_ingested"] += 1

	# ── Handle domain as a concept node ──
	if domain:
	domain_node = self.graph.add_node(
	content=domain,
	node_type="concept",
	source="data",
	weight=0.8
	)
	if domain_node:
	self.graph.add_edge(
	from_id=main_node.id,
	to_id=domain_node.id,
	relation="part_of",
	confidence=0.8,
	source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── Handle related topics ──
	for rel_topic in related:
	rel_node = self.graph.add_node(
	content=rel_topic,
	node_type="concept",
	source="data",
	weight=0.7
	)
	if rel_node:
	self.graph.add_edge(
	from_id=main_node.id,
	to_id=rel_node.id,
	relation="related_to",
	confidence=0.7,
	source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── Type-specific handling ──
	self._ingest_type_specific(entry, main_node, entry_type)

	def _ingest_type_specific(self, entry: dict, main_node: Node, entry_type: str):
	"""Handle type-specific fields for data entries."""

	# ── relation type: explicit from/to ──
	if entry_type == "relation":
	from_content = entry.get("from", "")
	to_content = entry.get("to", "")
	relation = entry.get("relation", "related_to")

	if from_content and to_content:
	from_node = self.graph.add_node(
	content=from_content, node_type="entity", source="data"
	)
	to_node = self.graph.add_node(
	content=to_content, node_type="entity", source="data"
	)
	if from_node and to_node:
	self.graph.add_edge(
	from_id=from_node.id,
	to_id=to_node.id,
	relation=relation,
	confidence=entry.get("confidence", 0.9),
	source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── definition / term: term node + defined_as edge ──
	elif entry_type in ("definition", "term"):
	term = entry.get("term", "")
	if term:
	term_node = self.graph.add_node(
	content=term, node_type="entity", source="data"
	)
	if term_node:
	self.graph.add_edge(
	from_id=term_node.id,
	to_id=main_node.id,
	relation="defined_as",
	confidence=0.95,
	source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── cause_effect: cause → effect ──
	elif entry_type == "cause_effect":
	cause = entry.get("cause", "")
	effect = entry.get("effect", "")
	if cause and effect:
	cause_node = self.graph.add_node(
	content=cause, node_type="concept", source="data"
	)
	effect_node = self.graph.add_node(
	content=effect, node_type="concept", source="data"
	)
	if cause_node and effect_node:
	self.graph.add_edge(
	from_id=cause_node.id,
	to_id=effect_node.id,
	relation="causes",
	confidence=entry.get("confidence", 0.85),
	source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── hierarchy: parent → children ──
	elif entry_type == "hierarchy":
	parent = entry.get("parent", "")
	children = entry.get("children", [])
	if parent and children:
	parent_node = self.graph.add_node(
	content=parent, node_type="concept", source="data"
	)
	if parent_node:
	for child in children:
	child_node = self.graph.add_node(
	content=child, node_type="entity", source="data"
	)
	if child_node:
	self.graph.add_edge(
	from_id=child_node.id,
	to_id=parent_node.id,
	relation="is_a",
	confidence=0.9,
	source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── comparison: subject_a ↔ subject_b ──
	elif entry_type == "comparison":
	subj_a = entry.get("subject_a", "")
	subj_b = entry.get("subject_b", "")
	if subj_a and subj_b:
	node_a = self.graph.add_node(
	content=subj_a, node_type="entity", source="data"
	)
	node_b = self.graph.add_node(
	content=subj_b, node_type="entity", source="data"
	)
	if node_a and node_b:
	self.graph.add_edge(
	from_id=node_a.id, to_id=node_b.id,
	relation="related_to", confidence=0.8, source="data"
	)
	self.graph.add_edge(
	from_id=node_b.id, to_id=node_a.id,
	relation="related_to", confidence=0.8, source="data"
	)
	self._metrics["edges_ingested"] += 2

	# ── qa: question → answer ──
	elif entry_type == "qa":
	question = entry.get("question", "")
	answer = entry.get("answer", "")
	if question and answer:
	q_node = self.graph.add_node(
	content=question, node_type="concept", source="data"
	)
	a_node = self.graph.add_node(
	content=answer, node_type="fact", source="data"
	)
	if q_node and a_node:
	self.graph.add_edge(
	from_id=q_node.id, to_id=a_node.id,
	relation="defined_as", confidence=0.9, source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── synonym: bidirectional synonym_of ──
	elif entry_type == "synonym":
	terms = entry.get("terms", [])
	for i in range(len(terms)):
	for j in range(i + 1, len(terms)):
	node_i = self.graph.add_node(
	content=terms[i], node_type="entity", source="data"
	)
	node_j = self.graph.add_node(
	content=terms[j], node_type="entity", source="data"
	)
	if node_i and node_j:
	self.graph.add_edge(
	from_id=node_i.id, to_id=node_j.id,
	relation="synonym_of", confidence=0.9, source="data"
	)
	self.graph.add_edge(
	from_id=node_j.id, to_id=node_i.id,
	relation="synonym_of", confidence=0.9, source="data"
	)
	self._metrics["edges_ingested"] += 2

	# ── process / procedure: sequential steps ──
	elif entry_type in ("process", "procedure"):
	steps = entry.get("steps", [])
	title = entry.get("title", "")
	if title:
	title_node = self.graph.add_node(
	content=title, node_type="concept", source="data"
	)
	if title_node:
	self.graph.add_edge(
	from_id=main_node.id, to_id=title_node.id,
	relation="defined_as", confidence=0.85, source="data"
	)

	prev_step_node = None
	for step_text in steps:
	step_node = self.graph.add_node(
	content=step_text, node_type="fact", source="data"
	)
	if step_node:
	self.graph.add_edge(
	from_id=step_node.id, to_id=main_node.id,
	relation="part_of", confidence=0.8, source="data"
	)
	if prev_step_node:
	self.graph.add_edge(
	from_id=prev_step_node.id, to_id=step_node.id,
	relation="follows", confidence=0.9, source="data"
	)
	self._metrics["edges_ingested"] += 1
	prev_step_node = step_node
	self._metrics["edges_ingested"] += 1

	# ── quote: author + content ──
	elif entry_type == "quote":
	author = entry.get("author", "")
	if author:
	author_node = self.graph.add_node(
	content=author, node_type="entity", source="data"
	)
	if author_node:
	self.graph.add_edge(
	from_id=main_node.id, to_id=author_node.id,
	relation="created_by", confidence=0.9, source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── event: actors, location, date ──
	elif entry_type == "event":
	actors = entry.get("actors", [])
	location = entry.get("location", "")
	for actor in actors:
	actor_node = self.graph.add_node(
	content=actor, node_type="entity", source="data"
	)
	if actor_node:
	self.graph.add_edge(
	from_id=actor_node.id, to_id=main_node.id,
	relation="related_to", confidence=0.85, source="data"
	)
	self._metrics["edges_ingested"] += 1
	if location:
	loc_node = self.graph.add_node(
	content=location, node_type="entity", source="data"
	)
	if loc_node:
	self.graph.add_edge(
	from_id=main_node.id, to_id=loc_node.id,
	relation="located_in", confidence=0.85, source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── analogy: subject ↔ analogy ──
	elif entry_type == "analogy":
	subject = entry.get("subject", "")
	analogy_text = entry.get("analogy", "")
	if subject and analogy_text:
	subj_node = self.graph.add_node(
	content=subject, node_type="concept", source="data"
	)
	ana_node = self.graph.add_node(
	content=analogy_text, node_type="concept", source="data"
	)
	if subj_node and ana_node:
	self.graph.add_edge(
	from_id=subj_node.id, to_id=ana_node.id,
	relation="analogous_to", confidence=0.75, source="data"
	)
	self._metrics["edges_ingested"] += 1

	# ── Paragraph: extract keywords as connected entities ──
	elif entry_type == "paragraph":
	keywords = utils.extract_keywords(entry.get("content", ""), max_keywords=10)
	for kw in keywords:
	kw_node = self.graph.add_node(
	content=kw, node_type="concept", source="data",
	weight=0.6
	)
	if kw_node:
	self.graph.add_edge(
	from_id=main_node.id, to_id=kw_node.id,
	relation="related_to", confidence=0.6, source="data"
	)
	self._metrics["edges_ingested"] += 1

	def _map_entry_type_to_node_type(self, entry_type: str) -> str:
	"""Map data entry type to graph node type."""
	type_map = {
	"fact": "fact",
	"definition": "definition",
	"explanation": "fact",
	"description": "fact",
	"property": "fact",
	"statistic": "fact",
	"measurement": "fact",
	"term": "definition",
	"abbreviation": "definition",
	"jargon": "definition",
	"slang": "definition",
	"idiom": "definition",
	"synonym": "entity",
	"antonym": "entity",
	"quote": "fact",
	"rule": "fact",
	"example": "fact",
	"analogy": "concept",
	"opinion": "fact",
	"paragraph": "fact",
	"relation": "fact",
	"cause_effect": "fact",
	"comparison": "fact",
	"hierarchy": "concept",
	"composition": "concept",
	"dependency": "fact",
	"contradiction": "fact",
	"timeline": "fact",
	"process": "fact",
	"procedure": "fact",
	"event": "fact",
	"history": "fact",
	"change": "fact",
	"qa": "fact",
	}
	if entry_type.startswith("custom_"):
	return "fact"
	return type_map.get(entry_type, "fact")

	# ═══════════════════════════════════════════════════
	# PHASE 2: CONNECT
	# ═══════════════════════════════════════════════════

	def _phase_connect(self):
	"""
	Find hidden connections between nodes.
	Nodes with high vector similarity but no edge → create edge.
	Focuses on least-connected nodes first.
	"""
	candidates = self.graph.get_least_connected_nodes(
	limit=config.THINKING_BATCH_SIZE
	)

	connections_made = 0

	for node in candidates:
	if connections_made >= config.THINKING_BATCH_SIZE:
	break

	# Find similar nodes
	similar = self.graph.find_similar_to_node(
	node.id,
	top_k=10,
	min_similarity=config.SIMILARITY_THRESHOLD
	)

	for similar_node, similarity in similar:
	# Skip if edge already exists (either direction)
	if self.graph.edge_exists(node.id, similar_node.id):
	continue
	if self.graph.edge_exists(similar_node.id, node.id):
	continue

	# Create new connection
	edge = self.graph.add_edge(
	from_id=node.id,
	to_id=similar_node.id,
	relation="similar_to",
	weight=similarity,
	confidence=similarity * 0.9,
	source="inferred"
	)

	if edge:
	connections_made += 1
	self._operations_this_cycle += 1

	if connections_made > 0:
	self._metrics["connections_found"] += connections_made
	if config.LOG_THINKING_DETAILS:
	print(f"[THINKER/CONNECT] Found {connections_made} new connections")

	# ═══════════════════════════════════════════════════
	# PHASE 3: INFER
	# ═══════════════════════════════════════════════════

	def _phase_infer(self):
	"""
	Transitive and analogical inference.
	If A→B and B→C, maybe A→C.
	Discovers knowledge not present in original data.
	"""
	inferences_made = 0

	# ── Transitive Inference ──
	inferences_made += self._transitive_inference()

	# ── Analogical Inference ──
	inferences_made += self._analogical_inference()

	if inferences_made > 0:
	self._metrics["inferences_made"] += inferences_made
	self._operations_this_cycle += inferences_made
	if config.LOG_THINKING_DETAILS:
	print(f"[THINKER/INFER] Made {inferences_made} inferences")

	def _transitive_inference(self) -> int:
	"""
	If A→B and B→C exist, infer A→C with decayed confidence.
	Limited per cycle to prevent explosion.
	"""
	count = 0

	# Sample a batch of nodes to check
	node_ids = list(self.graph.nodes.keys())
	if len(node_ids) > config.THINKING_BATCH_SIZE:
	sample_indices = np.random.choice(
	len(node_ids), config.THINKING_BATCH_SIZE, replace=False
	)
	node_ids = [node_ids[i] for i in sample_indices]

	for node_a_id in node_ids:
	if count >= config.MAX_INFERENCES_PER_CYCLE:
	break

	edges_ab = self.graph.get_edges_from(node_a_id)

	for edge_ab in edges_ab:
	if count >= config.MAX_INFERENCES_PER_CYCLE:
	break

	node_b_id = edge_ab.to_node
	edges_bc = self.graph.get_edges_from(node_b_id)

	for edge_bc in edges_bc:
	node_c_id = edge_bc.to_node

	# Skip self-loops and existing edges
	if node_c_id == node_a_id:
	continue
	if self.graph.edge_exists(node_a_id, node_c_id):
	continue

	# Calculate inferred confidence
	inferred_confidence = (
	edge_ab.confidence *
	edge_bc.confidence *
	config.INFERENCE_DECAY
	)

	if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN:
	continue

	# Determine inferred relation
	inferred_relation = self._infer_relation(
	edge_ab.relation, edge_bc.relation
	)

	# Create inferred edge
	edge = self.graph.add_edge(
	from_id=node_a_id,
	to_id=node_c_id,
	relation=inferred_relation,
	weight=inferred_confidence,
	confidence=inferred_confidence,
	source="inferred"
	)

	if edge:
	count += 1

	if count >= config.MAX_INFERENCES_PER_CYCLE:
	break

	return count

	def _analogical_inference(self) -> int:
	"""
	If A relates to B like C relates to ?, find ? using vector arithmetic.
	A - B ≈ C - ? → ? ≈ C - A + B
	"""
	count = 0

	# Find pairs with strong, specific relations
	strong_edges = [
	e for e in self.graph.edges.values()
	if e.confidence > 0.7 and e.relation not in ("similar_to", "related_to")
	]

	if len(strong_edges) < 2:
	return 0

	# Sample pairs to compare
	sample_size = min(20, len(strong_edges))
	sampled = np.random.choice(len(strong_edges), sample_size, replace=False)

	for i in sampled:
	if count >= config.MAX_INFERENCES_PER_CYCLE // 4:
	break

	edge = strong_edges[i]
	node_a = self.graph.get_node(edge.from_node)
	node_b = self.graph.get_node(edge.to_node)

	if not node_a or not node_b:
	continue

	# Find nodes similar to A (potential C candidates)
	similar_to_a = self.graph.find_similar_to_node(
	node_a.id, top_k=5,
	min_similarity=config.ANALOGICAL_SIMILARITY_MIN
	)

	for node_c, sim_ac in similar_to_a:
	if node_c.id == node_b.id:
	continue

	# Vector arithmetic: ? ≈ C - A + B
	target_vector = utils.normalize(
	node_c.vector - node_a.vector + node_b.vector
	)

	# Find nearest to target vector
	candidates = self.graph.find_similar_nodes(
	target_vector, top_k=3,
	min_similarity=config.ANALOGICAL_SIMILARITY_MIN,
	exclude_ids={node_a.id, node_b.id, node_c.id}
	)

	for candidate_node, sim_score in candidates:
	if self.graph.edge_exists(node_c.id, candidate_node.id, edge.relation):
	continue

	inferred_confidence = sim_ac * sim_score * config.INFERENCE_DECAY

	if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN:
	continue

	new_edge = self.graph.add_edge(
	from_id=node_c.id,
	to_id=candidate_node.id,
	relation=edge.relation,
	weight=inferred_confidence,
	confidence=inferred_confidence,
	source="inferred"
	)

	if new_edge:
	count += 1
	break # One analogy per C

	return count

	def _infer_relation(self, rel_ab: str, rel_bc: str) -> str:
	"""Determine relation type for transitive inference A→C from A→B→C."""
	# Same relation → same
	if rel_ab == rel_bc:
	return rel_ab

	# Specific known transitive patterns
	transitive_map = {
	("is_a", "is_a"): "is_a",
	("part_of", "part_of"): "part_of",
	("is_a", "has"): "has",
	("is_a", "located_in"): "located_in",
	("part_of", "located_in"): "located_in",
	("is_a", "used_for"): "used_for",
	("causes", "causes"): "causes",
	("follows", "follows"): "follows",
	("requires", "requires"): "requires",
	("instance_of", "is_a"): "instance_of",
	}

	return transitive_map.get((rel_ab, rel_bc), "inferred_relation")

	# ═══════════════════════════════════════════════════
	# PHASE 4: ABSTRACT
	# ═══════════════════════════════════════════════════

	def _phase_abstract(self):
	"""
	Cluster similar nodes into abstraction nodes.
	Creates higher-level concepts from concrete instances.
	Recursive: abstractions can be abstracted further.
	"""
	# Skip if graph is too small
	if len(self.graph.nodes) < config.CLUSTER_MIN_SIZE * 2:
	return

	abstractions_created = 0

	# ── Level 1: Concrete → Abstraction ──
	abstractions_created += self._create_abstractions(
	source_types=["entity", "fact", "concept"],
	abstraction_type="abstraction"
	)

	# ── Level 2+: Abstraction → Meta-Abstraction ──
	if self._total_cycles % (config.COMPRESS_INTERVAL * 2) == 0:
	existing_abstractions = self.graph.get_nodes_by_type("abstraction")
	if len(existing_abstractions) >= config.CLUSTER_MIN_SIZE * 2:
	abstractions_created += self._create_abstractions(
	source_types=["abstraction"],
	abstraction_type="meta_abstraction"
	)

	if abstractions_created > 0:
	self._metrics["abstractions_created"] += abstractions_created
	self._operations_this_cycle += abstractions_created
	if config.LOG_THINKING_DETAILS:
	print(f"[THINKER/ABSTRACT] Created {abstractions_created} abstractions")

	def _create_abstractions(
	self,
	source_types: List[str],
	abstraction_type: str
	) -> int:
	"""Create abstraction nodes from clusters of source-typed nodes."""

	# Gather source nodes
	source_nodes = []
	for stype in source_types:
	source_nodes.extend(self.graph.get_nodes_by_type(stype))

	if len(source_nodes) < config.CLUSTER_MIN_SIZE:
	return 0

	# Build vector matrix for clustering
	vectors = np.array(
	[n.vector for n in source_nodes],
	dtype=np.float32
	)
	node_ids = [n.id for n in source_nodes]

	# Find natural clusters
	clusters = utils.find_natural_clusters(
	vectors,
	similarity_threshold=config.CLUSTER_SIMILARITY_INTRA
	)

	count = 0
	for cluster_indices in clusters:
	if count >= config.THINKING_BATCH_SIZE // 2:
	break

	# Check if this cluster already has an abstraction
	member_ids = [node_ids[i] for i in cluster_indices]
	already_abstracted = False
	for mid in member_ids:
	for edge in self.graph.get_edges_from(mid):
	if edge.relation == "instance_of":
	already_abstracted = True
	break
	if already_abstracted:
	break

	if already_abstracted:
	continue

	# Compute centroid
	cluster_vectors = vectors[cluster_indices]
	centroid = utils.vector_mean(list(cluster_vectors))

	# Generate label from common keywords
	all_content = " ".join(
	self.graph.nodes[node_ids[i]].content
	for i in cluster_indices
	if node_ids[i] in self.graph.nodes
	)
	keywords = utils.extract_keywords(all_content, max_keywords=5)
	label = " + ".join(keywords[:3]) if keywords else "abstract_concept"

	# Check depth limit
	current_depth = 0
	if abstraction_type == "meta_abstraction":
	for i in cluster_indices:
	nid = node_ids[i]
	depth = self.graph._get_abstraction_depth(nid)
	current_depth = max(current_depth, depth)

	if current_depth >= config.MAX_ABSTRACTION_DEPTH:
	continue

	# Create abstraction node
	abs_node = self.graph.add_node(
	content=f"[{abstraction_type}] {label}",
	node_type=abstraction_type,
	source="inferred",
	vector=utils.normalize(centroid),
	weight=config.ABSTRACTION_MIN_CONFIDENCE
	)

	if not abs_node:
	continue

	# Link members to abstraction
	for i in cluster_indices:
	member_id = node_ids[i]
	self.graph.add_edge(
	from_id=member_id,
	to_id=abs_node.id,
	relation="instance_of",
	weight=0.8,
	confidence=0.8,
	source="inferred"
	)

	count += 1

	return count

	# ═══════════════════════════════════════════════════
	# PHASE 5: STRENGTHEN / WEAKEN
	# ═══════════════════════════════════════════════════

	def _phase_strengthen_weaken(self):
	"""
	Strengthen edges that are frequently used.
	Weaken edges that haven't been used.
	Nodes with more connections get slight weight boost.
	"""
	# ── Weaken unused edges (periodic) ──
	if self._total_cycles % config.WEIGHT_DECAY_INTERVAL_CYCLES == 0:
	decay_count = 0
	edges = list(self.graph.edges.values())

	for edge in edges:
	if edge.used_count == 0 and edge.source == "inferred":
	self.graph.decay_edge(edge.id)
	decay_count += 1

	self._metrics["edges_decayed"] += decay_count
	self._operations_this_cycle += decay_count

	if config.LOG_THINKING_DETAILS and decay_count > 0:
	print(f"[THINKER/WEAKEN] Decayed {decay_count} unused edges")

	# ── Boost well-connected nodes ──
	nodes = list(self.graph.nodes.values())
	sample_size = min(config.THINKING_BATCH_SIZE, len(nodes))
	if sample_size == 0:
	return

	sampled = np.random.choice(len(nodes), sample_size, replace=False)

	reinforced = 0
	for idx in sampled:
	node = nodes[idx]
	if node.connections > 3:
	bonus = config.NODE_WEIGHT_CONNECTION_BONUS * min(node.connections, 20)
	new_weight = min(node.weight + bonus, config.WEIGHT_MAX)
	if new_weight != node.weight:
	self.graph.update_node_weight(node.id, new_weight)
	reinforced += 1

	self._metrics["edges_reinforced"] += reinforced
	self._operations_this_cycle += reinforced

	# ═══════════════════════════════════════════════════
	# PHASE 6: COMPRESS
	# ═══════════════════════════════════════════════════

	def _phase_compress(self):
	"""
	Merge redundant nodes.
	Prune dead edges.
	Prune orphan nodes.
	Keep the graph efficient and clean.
	"""
	if self._total_cycles % config.COMPRESS_INTERVAL != 0:
	return

	# ── Merge redundant nodes ──
	redundant_pairs = self.graph.find_redundant_pairs(limit=10)
	merged = 0
	for id_keep, id_remove, similarity in redundant_pairs:
	if self.graph.merge_nodes(id_keep, id_remove):
	merged += 1

	# ── Prune weak edges ──
	pruned_edges = self.graph.prune_weak_edges()

	# ── Prune orphan nodes ──
	pruned_nodes = self.graph.prune_orphan_nodes()

	self._metrics["nodes_merged"] += merged
	self._metrics["edges_pruned"] += pruned_edges
	self._metrics["nodes_pruned"] += pruned_nodes
	self._operations_this_cycle += merged + pruned_edges + pruned_nodes

	total_ops = merged + pruned_edges + pruned_nodes
	if config.LOG_THINKING_DETAILS and total_ops > 0:
	print(
	f"[THINKER/COMPRESS] Merged {merged} nodes, "
	f"pruned {pruned_edges} edges, {pruned_nodes} orphan nodes"
	)

	# ═══════════════════════════════════════════════════
	# PHASE 7: VALIDATE
	# ═══════════════════════════════════════════════════

	def _phase_validate(self):
	"""
	Check logical consistency of the graph.
	Resolve contradictions.
	Detect and break circular inferences.
	"""
	if self._total_cycles % config.VALIDATE_INTERVAL != 0:
	return

	resolved = 0

	# ── Detect contradictions ──
	resolved += self._resolve_contradictions()

	# ── Detect circular inferences ──
	resolved += self._break_circular_inferences()

	if resolved > 0:
	self._metrics["contradictions_resolved"] += resolved
	self._operations_this_cycle += resolved
	if config.LOG_THINKING_DETAILS:
	print(f"[THINKER/VALIDATE] Resolved {resolved} issues")

	def _resolve_contradictions(self) -> int:
	"""
	Find and resolve contradictions.
	If A→B (positive) and A→¬B (opposite_of) exist, keep higher confidence.
	"""
	resolved = 0

	# Sample nodes to check
	node_ids = list(self.graph.nodes.keys())
	sample_size = min(config.THINKING_BATCH_SIZE, len(node_ids))
	if sample_size == 0:
	return 0

	sampled = np.random.choice(len(node_ids), sample_size, replace=False)

	for idx in sampled:
	node_id = node_ids[idx]
	edges_out = self.graph.get_edges_from(node_id)

	# Group edges by target
	target_edges: Dict[str, List[Edge]] = {}
	for edge in edges_out:
	key = edge.to_node
	if key not in target_edges:
	target_edges[key] = []
	target_edges[key].append(edge)

	# Check for contradictory relations to same target
	for target_id, edges in target_edges.items():
	if len(edges) < 2:
	continue

	# Check for opposing relations
	contradictory_pairs = {
	("causes", "prevents"),
	("is_a", "opposite_of"),
	("synonym_of", "opposite_of"),
	("requires", "prevents"),
	}

	for i in range(len(edges)):
	for j in range(i + 1, len(edges)):
	pair = (edges[i].relation, edges[j].relation)
	reverse_pair = (edges[j].relation, edges[i].relation)

	if pair in contradictory_pairs or reverse_pair in contradictory_pairs:
	# Keep higher confidence, remove lower
	if edges[i].confidence >= edges[j].confidence:
	self.graph.remove_edge(edges[j].id)
	else:
	self.graph.remove_edge(edges[i].id)
	resolved += 1

	return resolved

	def _break_circular_inferences(self) -> int:
	"""
	Detect inference chains that loop back on themselves.
	Break the weakest link in each cycle.
	"""
	broken = 0

	# Sample inferred edges
	inferred_edges = [
	e for e in self.graph.edges.values()
	if e.source == "inferred"
	]

	sample_size = min(config.THINKING_BATCH_SIZE, len(inferred_edges))
	if sample_size == 0:
	return 0

	sampled_indices = np.random.choice(
	len(inferred_edges), sample_size, replace=False
	)

	for idx in sampled_indices:
	edge = inferred_edges[idx]

	# Check if this edge creates a cycle
	# Simple: does a path exist from to_node back to from_node?
	paths = self.graph.find_paths(
	edge.to_node, edge.from_node,
	max_depth=4, max_paths=1
	)

	if paths:
	# Cycle detected — remove weakest edge in cycle
	cycle_path = [edge.from_node, edge.id] + paths[0]
	weakest_edge_id = None
	weakest_weight = float('inf')

	for item_id in cycle_path:
	if item_id in self.graph.edges:
	e = self.graph.edges[item_id]
	if e.weight < weakest_weight and e.source == "inferred":
	weakest_weight = e.weight
	weakest_edge_id = e.id

	if weakest_edge_id:
	self.graph.remove_edge(weakest_edge_id)
	broken += 1

	return broken

	# ═══════════════════════════════════════════════════
	# PHASE 8: SELF-QUESTION
	# ═══════════════════════════════════════════════════

	def _phase_self_question(self):
	"""
	Generate internal questions to fill knowledge gaps.
	Ask: "What connects X to Y?" where X and Y are distant but possibly related.
	If a new chain is found, save it.
	"""
	if self._total_cycles % config.SELF_QUESTION_INTERVAL != 0:
	return

	if len(self.graph.nodes) < 10:
	return

	questions_asked = 0
	questions_answered = 0

	# Strategy 1: Find disconnected clusters and try to bridge them
	questions_answered += self._bridge_disconnected()
	questions_asked += 3

	# Strategy 2: Explore high-weight nodes that lack certain relation types
	questions_answered += self._fill_relation_gaps()
	questions_asked += 3

	# Strategy 3: Challenge existing weak inferences
	questions_answered += self._challenge_weak_inferences()
	questions_asked += 2

	self._metrics["self_questions_asked"] += questions_asked
	self._metrics["self_questions_answered"] += questions_answered
	self._operations_this_cycle += questions_answered

	if config.LOG_THINKING_DETAILS and questions_answered > 0:
	print(
	f"[THINKER/SELF-Q] Asked {questions_asked} questions, "
	f"answered {questions_answered}"
	)

	def _bridge_disconnected(self) -> int:
	"""Try to find connections between disconnected subgraphs."""
	connected = 0

	# Pick two random nodes that have no path between them
	node_ids = list(self.graph.nodes.keys())
	if len(node_ids) < 10:
	return 0

	for _ in range(3):
	idx = np.random.choice(len(node_ids), 2, replace=False)
	id_a, id_b = node_ids[idx[0]], node_ids[idx[1]]

	node_a = self.graph.get_node(id_a)
	node_b = self.graph.get_node(id_b)
	if not node_a or not node_b:
	continue

	# Are they already connected?
	paths = self.graph.find_paths(id_a, id_b, max_depth=4, max_paths=1)
	if paths:
	continue

	# Can we connect them via vector similarity?
	sim = utils.cosine_similarity(node_a.vector, node_b.vector)
	if sim > config.SIMILARITY_THRESHOLD * 0.8:
	# They're somewhat similar but not connected → connect
	edge = self.graph.add_edge(
	from_id=id_a,
	to_id=id_b,
	relation="inferred_relation",
	weight=sim * 0.7,
	confidence=sim * 0.6,
	source="inferred"
	)
	if edge:
	connected += 1

	return connected

	def _fill_relation_gaps(self) -> int:
	"""Find high-weight nodes missing common relations and try to fill them."""
	filled = 0

	# Get well-known nodes
	important_nodes = sorted(
	self.graph.nodes.values(),
	key=lambda n: n.weight * n.connections,
	reverse=True
	)[:20]

	common_relations = ["is_a", "part_of", "has", "used_for", "related_to"]

	for node in important_nodes[:5]:
	existing_relations = set()
	for edge in self.graph.get_edges_from(node.id):
	existing_relations.add(edge.relation)

	for relation in common_relations:
	if relation in existing_relations:
	continue

	# Can we find a target for this relation via similarity?
	# Look for nodes that commonly have this relation
	candidates = self.graph.find_similar_to_node(
	node.id, top_k=5,
	min_similarity=config.SIMILARITY_THRESHOLD
	)

	for candidate, sim in candidates:
	# Check if candidate has this relation type outgoing
	candidate_rels = [
	e.relation for e in self.graph.get_edges_from(candidate.id)
	]
	if relation in candidate_rels:
	# This candidate has the relation → node might too
	for edge in self.graph.get_edges_from(candidate.id):
	if edge.relation == relation:
	target = self.graph.get_node(edge.to_node)
	if target and not self.graph.edge_exists(
	node.id, target.id, relation
	):
	confidence = sim * edge.confidence * config.INFERENCE_DECAY
	if confidence >= config.INFERENCE_CONFIDENCE_MIN:
	new_edge = self.graph.add_edge(
	from_id=node.id,
	to_id=target.id,
	relation=relation,
	weight=confidence,
	confidence=confidence,
	source="inferred"
	)
	if new_edge:
	filled += 1
	break
	break # One fill per missing relation

	if filled >= 5:
	break

	return filled

	def _challenge_weak_inferences(self) -> int:
	"""
	Re-examine weak inferred edges.
	If supporting evidence exists, strengthen.
	If contradicting evidence exists, remove.
	"""
	improved = 0

	weak_edges = self.graph.get_weakest_edges(
	limit=20, source_filter="inferred"
	)

	for edge in weak_edges:
	from_node = self.graph.get_node(edge.from_node)
	to_node = self.graph.get_node(edge.to_node)

	if not from_node or not to_node:
	continue

	# Check if there's additional evidence
	# (other paths between these nodes)
	paths = self.graph.find_paths(
	edge.from_node, edge.to_node,
	max_depth=4, max_paths=3
	)

	# Filter paths that don't use this edge
	alternative_paths = [
	p for p in paths
	if edge.id not in p
	]

	if alternative_paths:
	# Multiple paths support this edge → strengthen
	support_factor = 1.0 + 0.05 * len(alternative_paths)
	new_weight = min(
	edge.weight * support_factor,
	config.WEIGHT_MAX
	)
	self.graph.edges[edge.id].weight = new_weight
	self.graph.edges[edge.id].confidence = min(
	edge.confidence * support_factor, 1.0
	)
	self.graph.edges[edge.id].mark_dirty()
	self.graph.memory.save_edge(edge.to_dict())
	improved += 1
	else:
	# No alternative support → further weaken
	if edge.weight < config.PRUNE_WEIGHT_THRESHOLD * 2:
	self.graph.remove_edge(edge.id)
	improved += 1

	return improved

	# ═══════════════════════════════════════════════════
	# USER KNOWLEDGE EXTRACTION
	# ═══════════════════════════════════════════════════

	def extract_from_user_message(self, message: str):
	"""
	Extract knowledge from a user's chat message.
	Called by brain.py after processing a user request.
	Does NOT store the raw message — only extracted knowledge.
	"""
	if not message or len(message.strip()) < 10:
	return

	message = message.strip()

	# Extract keywords
	keywords = utils.extract_keywords(message, max_keywords=15)
	if len(keywords) < 2:
	return

	# Extract entities
	entities = utils.extract_entities_simple(message)

	# Create entity nodes
	entity_nodes = []
	for entity in entities[:5]:
	node = self.graph.add_node(
	content=entity,
	node_type="entity",
	source="user_chat",
	weight=config.USER_KNOWLEDGE_CONFIDENCE
	)
	if node:
	entity_nodes.append(node)

	# Create concept nodes from keywords not already entities
	entity_lower = {e.lower() for e in entities}
	for kw in keywords:
	if kw.lower() not in entity_lower:
	node = self.graph.add_node(
	content=kw,
	node_type="concept",
	source="user_chat",
	weight=config.USER_KNOWLEDGE_CONFIDENCE * 0.7
	)

	# If message contains informational content, create fact node
	if len(message) > 30 and any(
	p in message.lower() for p in [
	"adalah", "merupakan", "yaitu", "ialah",
	"is", "are", "means", "defined"
	]
	):
	fact_node = self.graph.add_node(
	content=message[:500],
	node_type="fact",
	source="user_chat",
	weight=config.USER_KNOWLEDGE_CONFIDENCE
	)

	# Connect fact to entities mentioned
	if fact_node:
	for en in entity_nodes:
	self.graph.add_edge(
	from_id=fact_node.id,
	to_id=en.id,
	relation="related_to",
	confidence=config.USER_KNOWLEDGE_CONFIDENCE * 0.8,
	source="user_chat"
	)