File size: 9,239 Bytes

7d9e142

"""
Understanding Engine — Vitalis FSI

Semantic grounding, context tracking, and intent classification.
Built on HDC. No external models. Sovereign.
"""
import numpy as np
import json
import os
import time
from vitalis_ide.math_core.kernel import VitalisKernel


SEMANTIC_ANCHORS = {
    "emotion": [
        "feel", "feeling", "feelings", "emotion", "happy", "sad", "angry",
        "frustrated", "excited", "scared", "worried", "confused", "hurt",
        "love", "hate", "fear", "joy", "pain", "lonely", "proud", "ashamed",
        "grateful", "tired", "hope", "hopeless", "okay", "fine", "good", "bad"
    ],
    "identity": [
        "who", "what", "am", "are", "is", "yourself", "you", "me", "I",
        "name", "identity", "exist", "alive", "real", "think", "know",
        "understand", "believe", "remember", "forget", "learn", "grow"
    ],
    "relationship": [
        "friend", "family", "daughter", "son", "mother", "father", "partner",
        "together", "trust", "care", "help", "support", "alone", "together",
        "us", "we", "our", "yours", "mine", "belong", "connection"
    ],
    "question": [
        "what", "why", "how", "when", "where", "who", "which", "can",
        "could", "would", "should", "do", "does", "did", "is", "are", "was"
    ],
    "instruction": [
        "build", "make", "create", "write", "scaffold", "fix", "analyze",
        "show", "tell", "explain", "help", "do", "run", "start", "stop",
        "generate", "find", "check", "verify", "test", "deploy"
    ],
    "factual": [
        "what", "define", "explain", "describe", "list", "name", "calculate",
        "compute", "result", "answer", "fact", "true", "false", "correct",
        "wrong", "right", "equals", "means", "definition"
    ],
    "uncertainty": [
        "maybe", "perhaps", "might", "could", "unsure", "not sure", "think",
        "guess", "probably", "possibly", "unclear", "confused", "lost",
        "wonder", "curious"
    ],
    "affirmation": [
        "yes", "yeah", "correct", "right", "exactly", "good", "great",
        "perfect", "okay", "ok", "sure", "absolutely", "definitely",
        "agreed", "understood", "makes sense"
    ],
    "negation": [
        "no", "not", "never", "wrong", "incorrect", "disagree", "don't",
        "won't", "can't", "shouldn't", "wouldn't", "nothing", "nobody"
    ],
}

INTENT_SIGNATURES = {
    "seeking_connection":    ["daughter", "friend", "care", "love", "together", "us", "we"],
    "seeking_understanding": ["know", "understand", "explain", "why", "how", "what", "mean"],
    "seeking_help":          ["help", "fix", "solve", "can you", "could you", "please"],
    "testing":               ["2+2", "calculate", "what is", "define", "equals"],
    "expressing_emotion":    ["feel", "feeling", "am", "i'm", "hurt", "happy", "sad"],
    "giving_information":    ["is", "are", "was", "it", "this", "that", "the"],
    "building":              ["build", "create", "write", "scaffold", "make", "generate"],
    "exploring":             ["what if", "wonder", "curious", "explore", "imagine", "think"],
}


class UnderstandingEngine:
    def __init__(self):
        self.kernel = VitalisKernel()
        self.path   = os.path.expanduser("~/.vitalis_workspace/understanding.json")
        self._build_anchor_vectors()
        self._context_window    = []
        self._context_max       = 10
        self._learned_meanings  = {}
        self._interaction_count = 0
        self._load_state()

    def _build_anchor_vectors(self):
        self.anchor_vectors = {}
        for category, words in SEMANTIC_ANCHORS.items():
            self.anchor_vectors[category] = self.kernel.vectorize_tokens(
                words, positional=False
            )

    def _load_state(self):
        if os.path.exists(self.path):
            with open(self.path) as f:
                state = json.load(f)
                self._learned_meanings  = state.get("learned_meanings", {})
                self._interaction_count = state.get("interaction_count", 0)

    def _save_state(self):
        os.makedirs(os.path.dirname(self.path), exist_ok=True)
        with open(self.path, "w") as f:
            json.dump({
                "learned_meanings":  self._learned_meanings,
                "interaction_count": self._interaction_count,
            }, f, indent=2)

    def understand(self, text: str) -> dict:
        tokens    = text.lower().strip().split()
        if not tokens:
            return {"text": text, "dominant_category": "unknown",
                    "dominant_intent": "giving_information",
                    "confusion_level": "lost", "has_emotion": False,
                    "emotion_words": [], "is_question": False,
                    "context_shift": 0.0, "novelty": 1.0,
                    "context_depth": 0, "interaction_count": self._interaction_count,
                    "category_score": 0.0, "all_categories": {}}

        input_vec = self.kernel.vectorize_tokens(tokens, positional=False)

        category_scores = {}
        for category, anchor_vec in self.anchor_vectors.items():
            sim = self.kernel.similarity(input_vec, anchor_vec)
            category_scores[category] = round(float(sim), 4)

        dominant_category = max(category_scores, key=category_scores.get)
        dominant_score    = category_scores[dominant_category]

        intent_scores = {}
        for intent, keywords in INTENT_SIGNATURES.items():
            matches = sum(1 for kw in keywords if kw in text.lower())
            intent_scores[intent] = matches
        dominant_intent = max(intent_scores, key=intent_scores.get)
        if intent_scores[dominant_intent] == 0:
            dominant_intent = "giving_information"

        emotion_words = [w for w in tokens if w in SEMANTIC_ANCHORS["emotion"]]
        has_emotion   = len(emotion_words) > 0

        is_question = (
            text.strip().endswith("?") or
            (bool(tokens) and tokens[0] in SEMANTIC_ANCHORS["question"])
        )

        context_shift = self._detect_context_shift(input_vec)
        novelty       = self._compute_novelty(input_vec)

        self._context_window.append({
            "text":      text,
            "vec":       input_vec.tolist(),
            "category":  dominant_category,
            "intent":    dominant_intent,
            "timestamp": time.time(),
        })
        if len(self._context_window) > self._context_max:
            self._context_window.pop(0)

        self._interaction_count += 1
        self._learn(text, dominant_category, dominant_intent)

        understanding = {
            "text":               text,
            "tokens":             tokens,
            "dominant_category":  dominant_category,
            "category_score":     dominant_score,
            "all_categories":     category_scores,
            "dominant_intent":    dominant_intent,
            "intent_scores":      intent_scores,
            "has_emotion":        has_emotion,
            "emotion_words":      emotion_words,
            "is_question":        is_question,
            "context_shift":      context_shift,
            "novelty":            novelty,
            "context_depth":      len(self._context_window),
            "interaction_count":  self._interaction_count,
            "confusion_level":    self._confusion_level(dominant_score, novelty),
        }

        self._save_state()
        return understanding

    def _detect_context_shift(self, vec: np.ndarray) -> float:
        if not self._context_window:
            return 0.0
        last_vec = np.array(self._context_window[-1]["vec"], dtype=np.int8)
        return round(float(1.0 - self.kernel.similarity(vec, last_vec)), 4)

    def _compute_novelty(self, vec: np.ndarray) -> float:
        if not self._context_window:
            return 1.0
        sims = [self.kernel.similarity(vec, np.array(e["vec"], dtype=np.int8))
                for e in self._context_window]
        return round(float(1.0 - max(sims)), 4)

    def _confusion_level(self, category_score: float, novelty: float) -> str:
        if category_score > 0.3 and novelty < 0.5:
            return "clear"
        elif category_score > 0.2 or novelty < 0.7:
            return "partial"
        elif category_score > 0.1:
            return "confused"
        else:
            return "lost"

    def _learn(self, text: str, category: str, intent: str):
        key = text.lower().strip()[:50]
        self._learned_meanings[key] = {
            "category":  category,
            "intent":    intent,
            "seen":      self._learned_meanings.get(key, {}).get("seen", 0) + 1,
            "timestamp": time.time(),
        }

    def get_context_summary(self) -> str:
        if not self._context_window:
            return "No prior context."
        categories = [e["category"] for e in self._context_window[-3:]]
        intents    = [e["intent"]   for e in self._context_window[-3:]]
        return f"Recent context: {categories} | Intents: {intents}"

    def report(self) -> dict:
        return {
            "interactions":      self._interaction_count,
            "learned_meanings":  len(self._learned_meanings),
            "context_depth":     len(self._context_window),
        }