"""
Shared constants for the reasoning engine.

All configurable thresholds, word sets, and user-facing strings
live here — single source of truth, no duplication across modules.
"""

# --- User-facing strings ---
# These are defaults. A future config layer can override them.
ABSTAIN_MESSAGE = "I don't know."
ABSTAIN_OOV_MESSAGE = "I don't know — none of those words are in my vocabulary."

# --- Word classification ---
# Function/stop words: high frequency, carry grammar not content.
# Used for: filtering query words, detecting generic neurons,
# preventing convergence jumps to non-content words.
FUNCTION_WORDS = frozenset({
    "the", "a", "an", "is", "are", "was", "were", "be", "been",
    "being", "have", "has", "had", "do", "does", "did", "will",
    "would", "could", "should", "may", "might", "shall", "can",
    "to", "of", "in", "for", "on", "with", "at", "by", "from",
    "as", "into", "through", "during", "before", "after", "and",
    "but", "or", "nor", "not", "no", "so", "yet", "both",
    "it", "its", "this", "that", "these", "those",
    "who", "what", "which", "where", "when", "how", "why",
})

# Structural words: appear in templates as fixed text (not slots).
# Superset of function words — includes common verbs that form
# sentence structure rather than carrying unique content.
STRUCTURAL_WORDS = FUNCTION_WORDS | frozenset({
    "wrote", "discovered", "invented", "created", "founded",
    "born", "died", "lived", "made", "built", "designed",
})

# --- Generation thresholds ---
# Successor walk: confidence above this = grammar token (fast path)
GRAMMAR_CONFIDENCE_THRESHOLD = 0.8

# Convergence jump: max number of sentence-boundary crossings
MAX_CONVERGENCE_JUMPS = 2

# Query anchor: minimum weight of query vector in context blend
# (prevents generation from forgetting what was asked)
QUERY_ANCHOR_FLOOR = 0.4

# Paragraph: sentences scoring below this fraction of the best
# sentence's score are excluded (prevents noise sentences)
PARAGRAPH_RELEVANCE_FLOOR = 0.5