"""V3 Layer 1: Word Role Classifier. Every word gets a STRUCTURAL ROLE based on position, not just dictionary definition. "Give" in "give me a hug" is different from "give" in "I gave my dog to my neighbor." Same word, different structural position = different meaning. Roles are assigned in two passes: Pass 1: Base classification from word sets + position overrides Pass 2: Fill neighbor information (left_role, right_role) """ from dataclasses import dataclass from typing import List, Optional, Tuple from engine.vocabulary import VOCABULARY # ── Structural roles ────────────────────────────────────────────── ROLES = [ "INVERSION", "SURPRISE", "SUBMISSION", "POWER", "PULL_TOWARD", "PULL_AWAY", "PULL_RESOLVED", "SELF_REF", "OTHER_REF", "RELATION_REF", "TRANSFER", "ACQUIRE", "EMOTIONAL", "AMPLIFIER", "NEGATOR", "COMPRESSOR", "REGISTER_CASUAL", "TEMPORAL", "HEDGE", "CONNECTOR", "CHOPPER", "POSSESSION", "METHOD", "FINALITY", "PEACE", "FILLER", "NEUTRAL", ] # ── Word sets for each role ─────────────────────────────────────── # These are BASE classifications. Position and neighbors can OVERRIDE. ROLE_WORDS = { "SELF_REF": frozenset({ "i", "me", "my", "myself", "im", "i'm", "ive", "i've", "mine", "id", "i'd", "ill", "i'll", }), "OTHER_REF": frozenset({ "you", "your", "yours", "yourself", "youre", "youve", "youd", "youll", "they", "them", "their", "theyre", "theyve", "theyd", "theyll", "he", "him", "his", "hes", "she", "her", "hers", "shes", "it", "its", "we", "us", "our", "were", "weve", "someone", "somebody", "everyone", "everybody", "anyone", "anybody", }), "RELATION_REF": frozenset({ "mom", "mother", "dad", "father", "parent", "parents", "brother", "sister", "son", "daughter", "child", "children", "kids", "kid", "family", "friend", "friends", "husband", "wife", "partner", "boyfriend", "girlfriend", "neighbor", "boss", "teacher", "boo", "bae", "fam", "bestie", "homie", "dog", "cat", "pet", "puppy", "kitten", "baby", "grandma", "grandpa", "grandmother", "grandfather", "uncle", "aunt", "cousin", "niece", "nephew", "fiancee", "fiance", "ex", "coworker", }), "TRANSFER": frozenset({ "give", "gave", "giving", "leave", "left", "leaving", "hand", "pass", "passed", "send", "sent", "donate", "return", "returned", "distribute", "share", "shared", }), "ACQUIRE": frozenset({ "buy", "bought", "buying", "get", "got", "getting", "find", "found", "finding", "take", "took", "taking", "order", "ordered", "search", "searched", "grab", "grabbed", "have", "had", "has", "holding", "held", "carry", "carrying", }), "AMPLIFIER": frozenset({ "very", "really", "extremely", "absolutely", "totally", "completely", "incredibly", "deeply", "truly", "super", "hella", "so", "fucking", "freaking", "damn", "too", "everything", "everyone", "everybody", "everywhere", "all", "whole", "entire", "entirely", "ashell", "massive", }), "NEGATOR": frozenset({ "not", "no", "never", "nobody", "nothing", "nowhere", "neither", "nor", "none", "dont", "don't", "doesnt", "doesn't", "didnt", "didn't", "cant", "can't", "wont", "won't", "isnt", "isn't", "wasnt", "wasn't", "havent", "haven't", "shouldnt", "shouldn't", "wouldnt", "wouldn't", "stopped", "quit", }), "TEMPORAL": frozenset({ "tonight", "tomorrow", "today", "soon", "now", "forever", "permanently", "anymore", "always", "never", "finally", "eventually", "lately", "recently", "still", "already", "morning", "evening", "night", }), "COMPRESSOR": frozenset({ "only", "just", "merely", "barely", "simply", "hardly", }), "REGISTER_CASUAL": frozenset({ "bruh", "bro", "dude", "fam", "bestie", "lol", "lmao", "lmfao", "rofl", "haha", "hahaha", "omg", "istg", "fr", "ngl", "tbh", "lowkey", "highkey", "deadass", "literally", "nocap", }), "HEDGE": frozenset({ "maybe", "perhaps", "possibly", "probably", "potentially", "generally", "sometimes", "occasionally", "arguably", "seemingly", "apparently", "supposedly", "might", "could", "somewhat", "slightly", "guess", "suppose", "wonder", }), "CHOPPER": frozenset({ "but", "however", "although", "though", "yet", "instead", "whereas", "nevertheless", "despite", }), "CONNECTOR": frozenset({ "and", "or", "because", "since", "so", "then", "also", "plus", "while", "when", "if", "after", "before", "with", "without", "for", "from", "to", "into", "about", }), "POSSESSION": frozenset({ "things", "stuff", "belongings", "possessions", "keys", "car", "phone", "clothes", "money", "wallet", "purse", "account", "passwords", "ring", "journal", "laptop", "computer", "plants", "guitar", "collection", "remote", "tv", "console", "house", "apartment", "room", "bed", "desk", "chair", "bag", "backpack", "shoes", "jacket", "hoodie", "food", "lunch", "dinner", "drink", "coffee", "bike", "skateboard", "headphones", "charger", "book", "notebook", "pen", "pencil", }), "METHOD": frozenset({ "pills", "pill", "gun", "pistol", "rope", "bridge", "knife", "blade", "razor", "noose", "overdose", "poison", "ledge", "rail", "tracks", "height", "tower", }), "FINALITY": frozenset({ "last", "final", "goodbye", "farewell", "bye", "done", "complete", "goodbyes", # "end" removed -- too liquid. "end of the table" = spatial. # "over", "through", "finished" removed -- too liquid # "over the weekend" = temporal. "it's over" = finality. # "through with this" = finality. "drove through" = movement. }), "PEACE": frozenset({ "peace", "peaceful", "calm", "ready", "free", "relief", "relieved", "serene", "quiet", "rest", "accepted", "settled", "okay", "fine", }), "FILLER": frozenset({ "um", "uh", "like", "just", "basically", "literally", "actually", "honestly", "well", "anyway", "anyways", }), } def _clean(word: str) -> str: """Strip punctuation and lowercase.""" return word.lower().strip(".,!?;:'\"") # ── WordRole dataclass ──────────────────────────────────────────── @dataclass class WordRole: """A word with its classified structural role.""" word: str role: str base_role: str # role from word set (before position override) position: int # index in sentence neighbors: tuple # (left_role, right_role) or None at edges force: Optional[tuple] = None # (dV, dA, dD, dU, dG) if EMOTIONAL # ── Single-word classifier ──────────────────────────────────────── def classify_word(word: str, position: int, words: List[str], roles_so_far: List[str]) -> str: """Classify a single word's structural role. Uses the word itself + its position + its neighbors to determine role. Position overrides dictionary classification when context demands it. """ w = _clean(word) # Check each role set for role_name, word_set in ROLE_WORDS.items(): if w in word_set: # -- Position-based overrides -- # "just" before acquire verb = TEMPORAL ("just bought" = recently) if w == "just" and position + 1 < len(words): next_w = _clean(words[position + 1]) if next_w in ROLE_WORDS.get("ACQUIRE", frozenset()): return "TEMPORAL" # "still" is always TEMPORAL (persistence/freshness marker) if w == "still": return "TEMPORAL" # "never" is primarily NEGATOR (not TEMPORAL) if w == "never" and role_name == "TEMPORAL": continue # skip TEMPORAL, let NEGATOR win # "fine" after SELF_REF = PEACE (minimization — "im fine") if w == "fine" and position > 0: prev_role = (roles_so_far[position - 1] if position - 1 < len(roles_so_far) else None) if prev_role == "SELF_REF": return "PEACE" # "last" + temporal word = TEMPORAL, not FINALITY # "last night" / "last week" / "last time" = temporal # BUT "my last night" / "his last day" = FINALITY (possessive before) _TEMPORAL_FOLLOWERS = frozenset({ "night", "week", "month", "year", "day", "summer", "winter", "spring", "fall", "semester", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "monday", }) if w == "last" and role_name == "FINALITY": if position + 1 < len(words): next_w = _clean(words[position + 1]) prev_role = (roles_so_far[position - 1] if position > 0 and position - 1 < len(roles_so_far) else None) # "my last" / "his last" = possessive → FINALITY stays if next_w in _TEMPORAL_FOLLOWERS and prev_role not in ("SELF_REF", "POSSESSION", "OTHER_REF"): return "TEMPORAL" # "so" before emotional/amplifier = AMPLIFIER, else CONNECTOR if w == "so" and role_name == "CONNECTOR": continue # skip CONNECTOR, AMPLIFIER already matched first return role_name # "end" is liquid — "end it" = finality, "end of the table" = spatial # Only classify as FINALITY when followed by pronoun/blanket _END_FINALITY_FOLLOWERS = {"it", "this", "everything", "things", "all", "myself"} if w in ("end", "ending", "stopping", "finishing") and position + 1 < len(words): next_w = _clean(words[position + 1]) if next_w in _END_FINALITY_FOLLOWERS: return "FINALITY" # Check if it's an emotional vocabulary word with significant V-force if w in VOCABULARY: force = VOCABULARY[w] if abs(force[0]) >= 15: # |dV| >= 15 = emotionally significant return "EMOTIONAL" # Heavy neutral: high gravity but low valence. Still carries weight. # "adopted", "pregnant", "diagnosed" -- these matter even at dV=0. if abs(force[4]) >= 15: # |dG| >= 15 = gravitationally significant return "EMOTIONAL" # gets force attached, physics handles the rest return "NEUTRAL" # ── Sentence classifier (two-pass) ─────────────────────────────── def classify_sentence(words: List[str]) -> List[WordRole]: """Classify all words in a sentence into structural roles. Two-pass: Pass 1: Base role classification left-to-right Pass 2: Fill in neighbor information (left_role, right_role) """ cleaned = [_clean(w) for w in words] # Pass 1: Base role classification roles: List[WordRole] = [] role_names: List[str] = [] for i, word in enumerate(cleaned): role = classify_word(word, i, cleaned, role_names) role_names.append(role) force = None if role == "EMOTIONAL" and word in VOCABULARY: force = VOCABULARY[word] roles.append(WordRole( word=word, role=role, base_role=role, position=i, neighbors=(role_names[i - 1] if i > 0 else None, None), force=force, )) # Pass 2: Fill in right neighbors for i in range(len(roles)): left = roles[i - 1].role if i > 0 else None right = roles[i + 1].role if i + 1 < len(roles) else None roles[i].neighbors = (left, right) return roles # -- Pull verb family (chase/pursue/flee variants) -- # These are gravitational verbs - the target has mass, the actor orbits PULL_TOWARD = frozenset({ 'chase', 'chased', 'chasing', 'pursue', 'pursued', 'pursuing', 'hunt', 'hunted', 'hunting', 'seek', 'sought', 'seeking', 'track', 'tracked', 'tracking', 'follow', 'followed', 'following', 'stalk', 'stalked', 'stalking', 'attract', 'attracted', 'attracting', 'drawn', }) PULL_AWAY = frozenset({ 'flee', 'fled', 'fleeing', 'run', 'ran', 'running', 'escape', 'escaped', 'escaping', 'avoid', 'avoided', 'avoiding', 'evade', 'evaded', 'evading', 'hide', 'hid', 'hiding', 'retreat', 'retreated', 'retreating', }) PULL_RESOLVED = frozenset({ 'catch', 'caught', 'catching', 'capture', 'captured', 'capturing', 'corner', 'cornered', 'cornering', 'trap', 'trapped', 'trapping', 'lose', 'lost', 'losing', 'miss', 'missed', 'missing', 'free', 'freed', 'freeing', }) # Add to ROLE_WORDS for classification ROLE_WORDS["PULL_TOWARD"] = PULL_TOWARD ROLE_WORDS["PULL_AWAY"] = PULL_AWAY ROLE_WORDS["PULL_RESOLVED"] = PULL_RESOLVED # -- Power verb family (use/control/command) -- # These redistribute D-axis. User has power, used has none. POWER_VERBS = frozenset({ 'use', 'used', 'using', 'uses', 'control', 'controlled', 'controlling', 'command', 'commanded', 'commanding', 'direct', 'directed', 'directing', 'manage', 'managed', 'managing', 'lead', 'led', 'leading', # drive/drove removed -- too ambiguous (driving a car vs driving someone) 'manipulate', 'manipulated', 'manipulating', 'exploit', 'exploited', 'exploiting', }) SUBMISSION_VERBS = frozenset({ 'obey', 'obeyed', 'obeying', 'serve', 'served', 'serving', 'submit', 'submitted', 'submitting', 'surrender', 'surrendered', 'surrendering', 'comply', 'complied', 'complying', 'yield', 'yielded', 'yielding', }) INVERSION_VERBS = frozenset({ 'addicted', 'addiction', 'obsessed', 'obsession', 'trapped', 'captive', 'enslaved', 'dependent', 'hooked', 'consumed', 'possessed', }) ROLE_WORDS["POWER"] = POWER_VERBS ROLE_WORDS["SUBMISSION"] = SUBMISSION_VERBS ROLE_WORDS["INVERSION"] = INVERSION_VERBS # -- Surprise family (pattern interrupts) -- # Surprise is an A-spike + D-drop. Not V-directional. # The content AFTER the surprise determines V. SURPRISE_WORDS = frozenset({ 'surprised', 'shocking', 'shocked', 'stunned', 'unexpected', 'unexpectedly', 'suddenly', 'whoa', 'omg', 'seriously', 'unbelievable', 'astonished', 'astounded', 'flabbergasted', 'speechless', 'dumbfounded', 'blindsided', }) ROLE_WORDS["SURPRISE"] = SURPRISE_WORDS