uslap-query / Code_files /amr_aql.py
uslap's picture
Upload folder using huggingface_hub
7cc8e29 verified
Raw
History Blame Contribute Delete
102 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
أَمْر عَقْل — INTELLECT ENGINE
بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ
Layer 4 of the أَمْر full-stack computing system.
Reasons from roots the way roots derive from letters.
The عَقْل does not guess. It computes:
- Letter values → root meaning (no DB needed)
- Downstream word → candidate roots (reverse shift)
- Root → full knowledge tree (expansion)
- Root × Root → structural relationship (cross-reasoning)
- Root × Time → deployment timeline (temporal)
Every output traces to 28 letters with fixed values.
No statistical weights. No training data. No hallucination.
"""
import sys
import os
from collections import defaultdict
from itertools import product
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from amr_alphabet import ABJAD, ALPHABET
try:
from uslap_db_connect import connect as _connect
_HAS_DB = True
except ImportError:
_HAS_DB = False
# ═══════════════════════════════════════════════════════════════════════
# LETTER SEMANTICS — computed from amr_alphabet.py, cached here
# ═══════════════════════════════════════════════════════════════════════
LETTER_SEMANTIC = {}
for _letter, _meta in ALPHABET.items():
_sem = _meta.get('semantic_tendency', '')
# Extract first word (the core semantic)
_core = _sem.split('.')[0].split(',')[0].strip().split()[0].upper() if _sem else 'UNKNOWN'
LETTER_SEMANTIC[_letter] = _core
# Hamza variants all map to ORIGIN
for _h in ['ء', 'أ', 'إ', 'آ', 'ٱ']:
LETTER_SEMANTIC[_h] = 'ORIGIN'
# ═══════════════════════════════════════════════════════════════════════
# REVERSE SHIFT TABLE — downstream consonant → [(aa_letter, shift_id)]
# Built from shift_lookup in DB, hardcoded here for DB-free operation
# ═══════════════════════════════════════════════════════════════════════
REVERSE_SHIFT = {
'b': [('ب', 'S09')],
'c': [('ق', 'S01'), ('ك', 'S20'), ('ح', 'S03'), ('ص', 'S13'), ('س', 'S21')],
'd': [('ض', 'S06'), ('ذ', 'S12'), ('د', 'S19')],
'f': [('ف', 'S08')],
'g': [('ق', 'S01'), ('ج', 'S02'), ('غ', 'S14'), ('ك', 'S20')],
'h': [('ح', 'S03'), ('ه', 'S23')],
'j': [('ج', 'S02')],
'k': [('ق', 'S01'), ('خ', 'S11'), ('ك', 'S20')],
'l': [('ل', 'S16')],
'm': [('م', 'S17')],
'n': [('ن', 'S18')],
'p': [('ف', 'S08'), ('ب', 'S09')],
'q': [('ق', 'S01')],
'r': [('ر', 'S15'), ('و', 'S10')],
's': [('ش', 'S05'), ('ص', 'S13'), ('س', 'S21'), ('ز', 'S22')],
't': [('ط', 'S04'), ('د', 'S19'), ('ت', 'S24')],
'v': [('ف', 'S08'), ('ب', 'S09'), ('و', 'S10')],
'w': [('و', 'S10')],
'x': [('خ', 'S11')],
'z': [('ص', 'S13'), ('س', 'S21'), ('ز', 'S22'), ('ظ', 'S25')],
'sh': [('ش', 'S05')],
'ch': [('خ', 'S11'), ('ك', 'S20')],
'th': [('ض', 'S06'), ('ذ', 'S12'), ('ظ', 'S25'), ('ث', 'S26')],
'gh': [('غ', 'S14')],
'y': [('ي', 'YA')],
}
# Vowels that can represent dropped AA letters
VOWEL_DROPS = {
'a': [('ع', 'S07'), ('ء', 'HAMZA-DROP')],
'e': [('ع', 'S07'), ('ء', 'HAMZA-DROP')],
'i': [('ع', 'S07'), ('ء', 'HAMZA-DROP')],
'o': [('و', 'S10'), ('ع', 'S07')],
'u': [('و', 'S10'), ('ع', 'S07')],
}
# ═══════════════════════════════════════════════════════════════════════
# 1. ROOT COMPOSITION REASONING — deduce meaning from letters alone
# ═══════════════════════════════════════════════════════════════════════
def deduce_meaning(root_letters):
"""Compute root meaning from letter values. No DB needed.
Args:
root_letters: list of AA letters, e.g. ['ك', 'ف', 'ر']
or hyphenated string 'ك-ف-ر'
Returns:
dict with:
letters: [{letter, abjad, semantic}, ...]
abjad_sum: total
composition: human-readable formula
deduction: computed meaning
"""
if isinstance(root_letters, str):
root_letters = [l for l in root_letters.split('-') if l.strip()]
letters = []
total = 0
parts = []
for letter in root_letters:
letter = letter.strip()
if not letter:
continue
abjad = ABJAD.get(letter, 0)
semantic = LETTER_SEMANTIC.get(letter, 'UNKNOWN')
letters.append({
'letter': letter,
'abjad': abjad,
'semantic': semantic,
})
total += abjad
parts.append(f"{letter}({abjad})={semantic}")
# Compose meaning from semantic fields
semantics = [l['semantic'] for l in letters]
composition = " + ".join(parts) + f" [={total}]"
# Generate deduction based on position
if len(semantics) >= 3:
deduction = (
f"The first radical ({letters[0]['letter']}) provides the DOMAIN: {semantics[0]}. "
f"The second radical ({letters[1]['letter']}) provides the ACTION: {semantics[1]}. "
f"The third radical ({letters[2]['letter']}) provides the RESULT: {semantics[2]}. "
f"Together: {semantics[0]} through {semantics[1]} producing {semantics[2]}."
)
elif len(semantics) == 2:
deduction = f"{semantics[0]} meeting {semantics[1]}."
else:
deduction = semantics[0] if semantics else "EMPTY"
return {
'letters': letters,
'abjad_sum': total,
'composition': composition,
'deduction': deduction,
'semantic_fields': semantics,
}
# ═══════════════════════════════════════════════════════════════════════
# 2. BACKWARD CHAINING — downstream word → candidate AA roots
# ═══════════════════════════════════════════════════════════════════════
# ── PREFIX/SUFFIX STRIPPING ────────────────────────────────────────
# Downstream languages ADD material. Roots are INSIDE. Strip to find them.
# ── AFFIX TABLES — ALL DOWNSTREAM LANGUAGES ──────────────────────
# Downstream languages ADD material around the root. These tables
# identify the additions so we can strip them to find the root inside.
AFFIXES = {
'en': {
'prefixes': [
'trans', 'inter', 'super', 'under', 'over', 'counter', 'dis', 'mis',
'pre', 'pro', 'anti', 'non', 'un', 're', 'de', 'ex', 'in', 'im',
'en', 'em',
],
'suffixes': [
'tion', 'sion', 'ment', 'ness', 'ance', 'ence', 'able', 'ible',
'ious', 'eous', 'ous', 'ing', 'ful', 'less', 'ity',
'ive', 'ise', 'ize', 'ure', 'ate', 'ery', 'ory', 'ary',
'ty', 'ly', 'al', 'er', 'or', 'ed', 'es', 'ry',
'ic', 'y',
],
},
'fr': {
'prefixes': [
'trans', 'inter', 'super', 'contre', 'entre', 'sur',
'pré', 'pro', 'anti', 'dé', 'dés', 'dis', 'in', 'im',
'en', 'em', 're', 'ré',
],
'suffixes': [
'tion', 'sion', 'ment', 'ence', 'ance', 'eur', 'euse',
'ique', 'isme', 'iste', 'able', 'ible', 'eux', 'euse',
'ure', 'age', 'ade', 'ée', 'ier', 'ière', 'if', 'ive',
'el', 'elle', 'al', 'er', 'ir', 'oir', 'é', 'ée',
],
},
'es': {
'prefixes': [
'trans', 'inter', 'super', 'contra', 'sobre', 'entre',
'pre', 'pro', 'anti', 'des', 'dis', 'in', 'im',
'en', 'em', 're',
],
'suffixes': [
'ción', 'sión', 'miento', 'encia', 'ancia', 'ador', 'adora',
'ismo', 'ista', 'able', 'ible', 'oso', 'osa', 'ura',
'aje', 'ado', 'ido', 'ero', 'era', 'ivo', 'iva',
'al', 'ar', 'er', 'ir', 'ón', 'dad', 'tad',
],
},
'it': {
'prefixes': [
'trans', 'inter', 'super', 'contra', 'sopra', 'sotto',
'pre', 'pro', 'anti', 'dis', 'in', 'im', 'ri', 's',
],
'suffixes': [
'zione', 'sione', 'mento', 'enza', 'anza', 'atore', 'atrice',
'ismo', 'ista', 'abile', 'ibile', 'oso', 'osa', 'ura',
'aggio', 'ato', 'ito', 'iere', 'iera', 'ivo', 'iva',
'ale', 'are', 'ere', 'ire', 'one', 'tà',
],
},
'pt': {
'prefixes': [
'trans', 'inter', 'super', 'contra', 'sobre', 'entre',
'pre', 'pro', 'anti', 'des', 'dis', 'in', 'im',
'en', 'em', 're',
],
'suffixes': [
'ção', 'são', 'mento', 'ência', 'ância', 'ador', 'adora',
'ismo', 'ista', 'ável', 'ível', 'oso', 'osa', 'ura',
'agem', 'ado', 'ido', 'eiro', 'eira', 'ivo', 'iva',
'al', 'ar', 'er', 'ir', 'ão', 'dade',
],
},
'de': {
'prefixes': [
'über', 'unter', 'hinter', 'durch', 'gegen', 'wider',
'vor', 'ver', 'zer', 'ent', 'emp', 'miss', 'un',
'be', 'ge', 'er', 'ab', 'an', 'auf', 'aus', 'ein',
],
'suffixes': [
'tion', 'ung', 'heit', 'keit', 'schaft', 'lich', 'isch',
'isch', 'bar', 'sam', 'haft', 'los', 'ig', 'en',
'er', 'el', 'nis', 'tum', 'sal',
],
},
'ru': {
'prefixes': [
'пере', 'рассм', 'расс', 'рас', 'раз', 'про', 'при', 'пре',
'под', 'пол', 'над', 'вос', 'воз', 'вы', 'до', 'за', 'из',
'на', 'об', 'от', 'по', 'с', 'у',
],
'suffixes': [
'ность', 'ство', 'тель', 'ение', 'ание', 'ость', 'ция',
'ище', 'ишк', 'ник', 'щик', 'чик', 'ище', 'тор',
'ный', 'ной', 'ский', 'ской', 'овый', 'евый',
'ать', 'ять', 'еть', 'ить', 'уть', 'оть',
'ка', 'ок', 'ек', 'ик',
],
},
'fa': {
'prefixes': [
'بی', 'نا', 'بر', 'در', 'فرا', 'پیش', 'باز', 'هم',
],
'suffixes': [
'گاه', 'ستان', 'مند', 'بان', 'دان', 'گر', 'کار',
'وار', 'انه', 'گان', 'ها', 'ان', 'ات', 'ین',
'ی', 'ه', 'گی', 'شی', 'ور', 'ار',
],
},
}
# Sort all affix lists longest-first
for _lang, _aff in AFFIXES.items():
_aff['prefixes'].sort(key=len, reverse=True)
_aff['suffixes'].sort(key=len, reverse=True)
def strip_affixes(word, language='en'):
"""Strip known prefixes and suffixes from a downstream word.
Works for ALL downstream languages: EN, FR, ES, IT, PT, DE, RU, FA.
Returns (stem, prefix_stripped, suffix_stripped).
Only strips if remainder has >= 3 characters (minimum for a root).
"""
w = word.lower().strip()
prefix = None
suffix = None
affix_data = AFFIXES.get(language, AFFIXES.get('en', {}))
if not affix_data:
return w, None, None
# Strip suffix first (more reliable for root extraction)
for suf in affix_data.get('suffixes', []):
if w.endswith(suf) and len(w) - len(suf) >= 3:
w = w[:-len(suf)]
suffix = suf
break
# Strip prefix
for pref in affix_data.get('prefixes', []):
if w.startswith(pref) and len(w) - len(pref) >= 3:
w = w[len(pref):]
prefix = pref
break
return w, prefix, suffix
def extract_consonants(word):
"""Extract consonant skeleton from a downstream word."""
word = word.lower().strip()
vowels = set('aeiou')
result = []
i = 0
while i < len(word):
# Check digraphs first
if i + 1 < len(word):
digraph = word[i:i+2]
if digraph in ('sh', 'ch', 'th', 'gh', 'ph'):
result.append(digraph)
i += 2
continue
if word[i] not in vowels and word[i].isalpha():
result.append(word[i])
i += 1
return result
# ═══════════════════════════════════════════════════════════════════════
# PHONETIC OPERATIONS — documented patterns beyond simple shift reversal
# ═══════════════════════════════════════════════════════════════════════
def dedup_gemination(consonants):
"""OP: Gemination deduplication.
Double consonant in downstream = single root letter.
COFFEE: ff→f, MOHAMMED: mm→m, MATTER: tt→t.
"""
if not consonants:
return consonants
result = [consonants[0]]
for i in range(1, len(consonants)):
if consonants[i] != consonants[i-1]:
result.append(consonants[i])
return result
def strip_nasal_insertion(consonants):
"""OP_NASAL: Remove inserted N that has no AA source.
N appears in downstream with no Arabic source consonant.
GOVERN (جَبَّار+N), FURNISH (فَرَش+N), ANTIQUE (عَتِيق+N).
Returns list of variants: [original, n-stripped-at-each-position].
"""
variants = [consonants]
for i, c in enumerate(consonants):
if c == 'n':
stripped = consonants[:i] + consonants[i+1:]
if len(stripped) >= 2:
variants.append(stripped)
return variants
def strip_epenthetic_stop(consonants):
"""OP_STOP: Remove epenthetic stop after geminated nasal.
NN→ND, MM→MB. The D/B is not a root consonant.
TANDOOR (تَنُّور: NN→ND), HIND (حَنَان: NN→ND).
"""
variants = [consonants]
for i in range(len(consonants) - 1):
# n+d → just n (the d is epenthetic)
if consonants[i] == 'n' and consonants[i+1] == 'd':
variants.append(consonants[:i+1] + consonants[i+2:])
# m+b → just m (the b is epenthetic)
if consonants[i] == 'm' and consonants[i+1] == 'b':
variants.append(consonants[:i+1] + consonants[i+2:])
return variants
def strip_tamarbuta(consonants):
"""OP_TAMARBUTA: Final T may be tāʾ marbūṭa (ة) realisation.
Not a root consonant — it's the feminine marker surfacing.
KISMET (قِسْمَة), RACKET (رَاحَة).
"""
variants = [consonants]
if consonants and consonants[-1] == 't':
stripped = consonants[:-1]
if len(stripped) >= 2:
variants.append(stripped)
return variants
def strip_mu_prefix(consonants):
"""R08: مُ prefix stripping.
M- at the start of a word may be مُ (Form IV/active participle prefix).
MIRACLE: strip مُ → R-C-L → ر-س-ل (mursalun).
Only strip if remaining has >= 3 consonants (triliteral root).
"""
variants = [consonants]
if consonants and consonants[0] == 'm' and len(consonants) >= 4:
variants.append(consonants[1:])
return variants
def strip_ba_prefix(consonants):
"""OP_PREFIX: بَ prefix fusion.
B-/P- at start may be بَ preposition fused into the word.
PROPHET: بَ+عارف → P at start is prefix, not root.
"""
variants = [consonants]
if consonants and consonants[0] in ('b', 'p') and len(consonants) >= 4:
variants.append(consonants[1:])
return variants
def generate_metathesis(consonants):
"""R02: Metathesis / root transposition.
Consonants may be reordered in downstream forms.
SACRIFICE: word order S-C-R → root ش-ك-ر (not ش-ر-ك).
Generate all permutations for triliteral sets.
"""
from itertools import permutations
if len(consonants) != 3:
return [consonants]
variants = [consonants]
for perm in permutations(consonants):
p = list(perm)
if p != consonants:
variants.append(p)
return variants
def apply_liquid_interchange(consonants):
"""L/R liquid interchange.
ل↔ر interchange documented across corridors.
"""
variants = [consonants]
for i, c in enumerate(consonants):
if c == 'l':
variant = consonants[:i] + ['r'] + consonants[i+1:]
variants.append(variant)
elif c == 'r':
variant = consonants[:i] + ['l'] + consonants[i+1:]
variants.append(variant)
return variants
def apply_voicing_alternation(consonants):
"""OP_VOICE: Systematic voicing/devoicing.
b↔p, d↔t, g↔k, v↔f, z↔s in downstream.
"""
VOICE_PAIRS = {
'b': 'p', 'p': 'b',
'd': 't', 't': 'd',
'g': 'k', 'k': 'g',
'v': 'f', 'f': 'v',
'z': 's', 's': 'z',
}
variants = [consonants]
for i, c in enumerate(consonants):
if c in VOICE_PAIRS:
variant = consonants[:i] + [VOICE_PAIRS[c]] + consonants[i+1:]
variants.append(variant)
return variants
def apply_nasal_assimilation(consonants):
"""OP_NASSIM: Nasal assimilation ن→م before bilabial.
عنبر→amber: the n→m shift before b.
"""
variants = [consonants]
for i in range(len(consonants) - 1):
if consonants[i] == 'm' and consonants[i+1] in ('b', 'p'):
variant = consonants[:i] + ['n'] + consonants[i+1:]
variants.append(variant)
return variants
def _score_shift(sid):
"""Score a shift ID by commonality."""
if sid in ('S01', 'S08', 'S09', 'S15', 'S16', 'S17', 'S18'):
return 3 # very common
elif sid in ('S02', 'S03', 'S05', 'S10', 'S19', 'S20', 'S21', 'S24'):
return 2 # common
elif sid in ('S04', 'S06', 'S07', 'S11', 'S12', 'S13', 'S14'):
return 1 # less common
return 0 # exotic
def _generate_consonant_variants(consonants):
"""Apply ALL phonetic operations to generate consonant variants.
Each operation is documented in the lattice (OP_NASAL, OP_STOP,
OP_TAMARBUTA, R02, R08, OP_PREFIX, OP_VOICE, OP_NASSIM, etc.).
Returns list of (consonants, ops_applied) tuples.
"""
# Start with base + gemination-deduped
base_variants = set()
base_variants.add(tuple(consonants))
deduped = dedup_gemination(consonants)
base_variants.add(tuple(deduped))
# Apply each operation to expand the variant set
all_variants = []
for base in list(base_variants):
base = list(base)
# Each operation returns a list of variants
ops_and_variants = [
('BASE', [base]),
('OP_NASAL', strip_nasal_insertion(base)),
('OP_STOP', strip_epenthetic_stop(base)),
('OP_TAMARBUTA', strip_tamarbuta(base)),
('R08_MU', strip_mu_prefix(base)),
('OP_PREFIX_BA', strip_ba_prefix(base)),
('OP_NASSIM', apply_nasal_assimilation(base)),
('OP_LIQUID', apply_liquid_interchange(base)),
]
for op_name, variants in ops_and_variants:
for v in variants:
key = tuple(v)
if key not in base_variants or op_name == 'BASE':
all_variants.append((v, op_name))
base_variants.add(key)
# Metathesis on triliteral variants (R02)
metathesis_variants = []
for v, op in all_variants:
if len(v) == 3:
for perm in generate_metathesis(v):
key = tuple(perm)
if key not in base_variants:
metathesis_variants.append((perm, f'{op}+R02'))
base_variants.add(key)
all_variants.extend(metathesis_variants)
# Voicing alternation (OP_VOICE) — applied sparingly to avoid explosion
# Only apply to base form
voice_variants = apply_voicing_alternation(consonants)
for v in voice_variants[1:]: # skip first (it's the original)
key = tuple(v)
if key not in base_variants:
all_variants.append((v, 'OP_VOICE'))
base_variants.add(key)
return all_variants
def _trace_consonants_to_roots(consonants, word, language, seen_roots, ops_applied=''):
"""Map a single consonant list through reverse shift to AA root candidates."""
if not consonants or len(consonants) < 2:
return []
position_candidates = []
for cons in consonants:
candidates = REVERSE_SHIFT.get(cons, [])
if not candidates:
candidates = VOWEL_DROPS.get(cons, [])
if not candidates:
candidates = [('?', 'UNKNOWN')]
position_candidates.append(candidates)
results = []
n = len(position_candidates)
index_sets = []
if n == 3:
index_sets.append((0, 1, 2))
elif n > 3:
from itertools import combinations
for combo_idx in combinations(range(n), 3):
index_sets.append(combo_idx)
if n == 4:
index_sets.append((0, 1, 2, 3))
elif n == 2:
index_sets.append((0, 1))
for idx_set in index_sets:
selected = [position_candidates[i] for i in idx_set]
selected_cons = [consonants[i] for i in idx_set]
all_combos = list(product(*selected))
if len(all_combos) > 200:
all_combos = all_combos[:200]
for combo in all_combos:
aa_letters = [c[0] for c in combo]
shift_ids = [c[1] for c in combo]
root_str = '-'.join(aa_letters)
if root_str in seen_roots or '?' in aa_letters:
continue
seen_roots.add(root_str)
meaning = deduce_meaning(aa_letters)
score = 0
chain = []
for i, (aa, sid) in enumerate(combo):
chain.append(f"{selected_cons[i]}{aa}({sid})")
score += _score_shift(sid)
if len(aa_letters) == 3:
score += 2
results.append({
'root_letters': root_str,
'aa_letters': aa_letters,
'shift_chain': chain,
'shift_ids': shift_ids,
'abjad_sum': meaning['abjad_sum'],
'composition': meaning['composition'],
'deduction': meaning['deduction'],
'score': score,
'downstream_word': word,
'language': language,
'ops_applied': ops_applied,
})
return results
def _build_candidates_from_consonants(consonants, word, language, seen_roots):
"""Build root candidates from a consonant list using ALL phonetic operations.
Pipeline:
1. Generate consonant variants via all documented operations
2. For each variant, map through reverse shift table
3. Try triliteral combinations from longer strings
4. Score by shift commonality + triliteral bonus + operation weight
Operations applied (from lattice documentation):
- Gemination dedup (ff→f, mm→m)
- OP_NASAL: strip inserted N
- OP_STOP: strip epenthetic stop (ND→N, MB→M)
- OP_TAMARBUTA: strip final T (ة realisation)
- R08: strip مُ prefix (M- at start)
- OP_PREFIX: strip بَ prefix (B-/P- at start)
- OP_NASSIM: nasal assimilation (m before b → n before b)
- OP_LIQUID: L/R interchange
- R02: metathesis (consonant reordering)
- OP_VOICE: voicing alternation (b↔p, d↔t, g↔k, v↔f, z↔s)
"""
if not consonants:
return []
results = []
# Generate all variants
variants = _generate_consonant_variants(consonants)
for variant_cons, ops in variants:
cands = _trace_consonants_to_roots(
variant_cons, word, language, seen_roots, ops
)
# Score adjustments based on operation type:
# - Stripping operations (closer to root) get a BONUS
# - Transformative operations (change consonants) get a PENALTY
# This ensures direct phonetic matches outrank indirect ones
if ops == 'BASE':
for c in cands:
c['score'] += 3 # direct match bonus — no operation needed
elif ops in ('OP_NASAL', 'OP_STOP', 'OP_TAMARBUTA', 'R08_MU', 'OP_PREFIX_BA'):
for c in cands:
c['score'] += 2 # stripping bonus — removes downstream addition
elif ops in ('OP_LIQUID', 'OP_VOICE', 'OP_NASSIM'):
for c in cands:
c['score'] -= 2 # transformation penalty — changes a consonant
elif 'R02' in ops:
for c in cands:
c['score'] -= 1 # metathesis penalty — reorders consonants
results.extend(cands)
return results
def reverse_trace(word, language='en', max_candidates=10):
"""Given a downstream word, find candidate AA roots via reverse shift.
Pipeline:
1. Strip known prefixes/suffixes (downstream additions)
2. Extract consonant skeleton
3. Try triliteral combinations (AA roots are overwhelmingly 3-letter)
4. Score by shift commonality + triliteral bonus
5. Return ranked candidates
Args:
word: downstream word (e.g. 'cover', 'mercy')
language: source language (default 'en')
max_candidates: max roots to return
Returns:
list of candidate dicts, each with:
root_letters, shift_chain, abjad_sum, composition, confidence
"""
seen_roots = set()
all_candidates = []
# ── PASS 1: stripped form (primary) ──
stem, prefix, suffix = strip_affixes(word, language)
consonants_stripped = extract_consonants(stem)
if consonants_stripped:
cands = _build_candidates_from_consonants(
consonants_stripped, word, language, seen_roots
)
# Boost stripped candidates — they're closer to the root
for c in cands:
if prefix or suffix:
c['score'] += 3 # stripping bonus
c['stripped_from'] = f"{prefix or ''}+{stem}+{suffix or ''}"
all_candidates.extend(cands)
# ── PASS 2: raw form (fallback) ──
consonants_raw = extract_consonants(word)
if consonants_raw != consonants_stripped:
cands = _build_candidates_from_consonants(
consonants_raw, word, language, seen_roots
)
all_candidates.extend(cands)
# Sort by score descending
all_candidates.sort(key=lambda x: x['score'], reverse=True)
return all_candidates[:max_candidates]
def verify_candidate(candidate):
"""Verify a candidate root against the FULL DB intelligence layer.
Queries:
1. roots → quran_tokens, primary_meaning, root_id
2. entries → downstream entry count
3. qv_translation_register → corruption type, washed meaning
4. dp_register → detection pattern matches
5. disputed_words → known disputed assignments
6. phonetic_reversal → documented shift attestations
7. contamination_blacklist → blacklisted terms
8. Type C scan → check if reversed pair exists in DB
Args:
candidate: dict from reverse_trace()
Returns:
candidate dict with full intelligence overlay
"""
if not _HAS_DB:
candidate['verified'] = False
candidate['note'] = 'No DB connection'
return candidate
conn = _connect()
root_letters = candidate['root_letters']
# ── 1. ROOTS TABLE ──
row = conn.execute(
"SELECT root_id, quran_tokens, primary_meaning FROM roots WHERE root_letters = ?",
(root_letters,)
).fetchone()
if row:
candidate['root_id'] = row[0]
candidate['quranic_tokens'] = row[1]
candidate['primary_meaning'] = row[2]
candidate['verified'] = True
# ── 2. ENTRIES COUNT ──
entry_count = conn.execute(
"SELECT count(*) FROM entries WHERE root_id = ?", (row[0],)
).fetchone()[0]
candidate['existing_entries'] = entry_count
else:
candidate['root_id'] = None
candidate['quranic_tokens'] = 0
candidate['existing_entries'] = 0
candidate['verified'] = False
# ── 3. QV TRANSLATION REGISTER ──
qv_rows = conn.execute(
"SELECT QV_ID, CORRUPTION_TYPE, CORRECT_TRANSLATION, COMMON_MISTRANSLATION "
"FROM qv_translation_register WHERE ROOT = ?",
(root_letters,)
).fetchall()
if qv_rows:
candidate['qv_entries'] = [{
'qv_id': r[0], 'corruption_type': r[1],
'correct': r[2], 'mistranslation': r[3]
} for r in qv_rows]
candidate['qv_count'] = len(qv_rows)
# QV presence = this root has DOCUMENTED translation corruption
# Boost it — it's a known target, more likely to be the correct root
candidate['score'] += min(len(qv_rows), 5)
else:
candidate['qv_entries'] = []
candidate['qv_count'] = 0
# ── 4. DP REGISTER ──
dp_rows = conn.execute(
"SELECT dp_code, name FROM dp_register WHERE example LIKE ?",
(f'%{root_letters}%',)
).fetchall()
if dp_rows:
candidate['dp_hits'] = [{'code': r[0], 'name': r[1]} for r in dp_rows]
else:
candidate['dp_hits'] = []
# ── 5. DISPUTED WORDS ──
root_nohyphen = root_letters.replace('-', '')
disp_rows = conn.execute(
"SELECT lemma, derivation FROM disputed_words "
"WHERE root_assigned = ? OR root_hyphenated = ?",
(root_nohyphen, root_letters)
).fetchall()
if disp_rows:
candidate['disputed_words'] = [{'lemma': r[0], 'derivation': r[1]} for r in disp_rows]
# Disputed word presence = this root has Qur'anic word attestation
candidate['score'] += min(len(disp_rows), 3)
else:
candidate['disputed_words'] = []
# ── 6. PHONETIC REVERSAL ──
# Check if any documented shift in phonetic_reversal matches this candidate's chain
shift_ids = candidate.get('shift_ids', [])
if shift_ids:
placeholders = ','.join('?' * len(shift_ids))
pr_rows = conn.execute(
f"SELECT shift_code, from_modern, to_orig, reliability "
f"FROM phonetic_reversal WHERE shift_code IN ({placeholders})",
shift_ids
).fetchall()
if pr_rows:
candidate['attested_shifts'] = [{'code': r[0], 'from': r[1], 'to': r[2], 'reliability': r[3]} for r in pr_rows]
# Attested shifts = higher confidence
high_rel = sum(1 for r in pr_rows if r[3] == 'HIGH')
candidate['score'] += high_rel
else:
candidate['attested_shifts'] = []
# ── 7. CONTAMINATION BLACKLIST ──
bl_rows = conn.execute(
"SELECT bl_id, contaminated_term, correct_translation "
"FROM contamination_blacklist WHERE correct_translation LIKE ?",
(f'%{root_letters}%',)
).fetchall()
if bl_rows:
candidate['blacklist_hits'] = [{'bl_id': r[0], 'term': r[1], 'correct': r[2]} for r in bl_rows]
# ── 8. TYPE C SCAN — check if reversed root exists ──
letters = [l for l in root_letters.split('-') if l]
if len(letters) == 3:
reversed_root = '-'.join(reversed(letters))
if reversed_root != root_letters:
rev_row = conn.execute(
"SELECT root_id, quran_tokens FROM roots WHERE root_letters = ?",
(reversed_root,)
).fetchone()
if rev_row:
candidate['type_c_pair'] = {
'reversed_root': reversed_root,
'reversed_root_id': rev_row[0],
'reversed_tokens': rev_row[1],
'token_ratio': round(
max(candidate.get('quranic_tokens', 0), rev_row[1]) /
max(min(candidate.get('quranic_tokens', 0), rev_row[1]), 1),
1
),
}
# ── 9. NAMES OF ALLAH — is this root a Name of Allah? ──
root_id = candidate.get('root_id')
if root_id:
allah_rows = conn.execute(
"SELECT allah_id, aa_name, transliteration, meaning "
"FROM names_of_allah WHERE root_id = ?",
(root_id,)
).fetchall()
if allah_rows:
candidate['names_of_allah'] = [
{'id': r[0], 'aa_term': r[1], 'translit': r[2], 'meaning': r[3]}
for r in allah_rows
]
# Name of Allah = significant root — informational boost (not dominant)
candidate['score'] += 2
# ── 10. QURAN KNOWN FORMS — fast form→root lookup ──
root_bare = root_letters.replace('-', '')
qkf_count = conn.execute(
"SELECT COUNT(*) FROM quran_known_forms WHERE root_unhyphenated = ?",
(root_bare,)
).fetchone()[0]
if qkf_count:
candidate['quran_known_forms'] = qkf_count
# More known forms = better attested root
candidate['score'] += min(qkf_count // 5, 3)
# ── 11. AA MORPHEME MAP — does this root generate EN/LA prefixes? ──
morph_rows = conn.execute(
"SELECT morpheme, morpheme_type, qur_meaning FROM aa_morpheme_map WHERE aa_root = ?",
(root_letters,)
).fetchall()
if morph_rows:
candidate['morpheme_map'] = [
{'morpheme': r[0], 'type': r[1], 'meaning': r[2]} for r in morph_rows
]
# ── 12. CHILD ENTRIES — people/nation names from this root ──
if root_id:
child_rows = conn.execute(
"SELECT child_id, shell_name, orig_meaning, operation_role "
"FROM child_entries WHERE orig_root = ?",
(root_letters,)
).fetchall()
if child_rows:
candidate['child_entries'] = [
{'id': r[0], 'name': r[1], 'meaning': r[2], 'role': r[3]}
for r in child_rows
]
# ── 13. BITIG DEGRADATION REGISTER — degraded Bitig forms ──
bitig_deg = conn.execute(
"SELECT deg_id, bitig_original, original_meaning, degradation_type "
"FROM bitig_degradation_register WHERE dp_codes LIKE ? OR bitig_original LIKE ?",
(f'%{root_letters}%', f'%{root_bare}%')
).fetchall()
if bitig_deg:
candidate['bitig_degradation'] = [
{'id': r[0], 'original': r[1], 'meaning': r[2], 'type': r[3]}
for r in bitig_deg
]
# ── 14. BITIG CONVERGENCE REGISTER — ORIG1/ORIG2 convergence ──
bitig_conv = conn.execute(
"SELECT conv_id, orig2_term, convergence_type "
"FROM bitig_convergence_register WHERE orig1_root_letters = ?",
(root_letters,)
).fetchall()
if bitig_conv:
candidate['bitig_convergence'] = [
{'id': r[0], 'orig2_term': r[1], 'type': r[2]} for r in bitig_conv
]
# ── 15. WORD DEPLOYMENT MAP — operation context ──
deploy_rows = conn.execute(
"SELECT deploy_id, operation_phase, deployed_words, mechanism "
"FROM word_deployment_map WHERE aa_roots LIKE ?",
(f'%{root_letters}%',)
).fetchall()
if deploy_rows:
candidate['deployment'] = [
{'id': r[0], 'phase': r[1], 'words': r[2], 'mechanism': r[3]}
for r in deploy_rows
]
# ── 16. CHRONOLOGY — historical deployment ──
if root_id:
chrono_rows = conn.execute(
"SELECT id, date, event FROM chronology WHERE qur_ref LIKE ? OR event LIKE ? LIMIT 5",
(f'%{root_letters}%', f'%{root_bare}%')
).fetchall()
if chrono_rows:
candidate['chronology'] = [
{'id': r[0], 'date': r[1], 'event': r[2][:100]} for r in chrono_rows
]
# ── 17. DECAY LEVEL — if root has entries, what's the decay? ──
if root_id:
decay_row = conn.execute(
"SELECT DISTINCT decay_level FROM entries WHERE root_id = ? AND decay_level IS NOT NULL LIMIT 1",
(root_id,)
).fetchone()
if decay_row:
candidate['decay_level'] = decay_row[0]
# ── 18. NAME ROOT HUB — prophet/special name breakdown ──
nrh_rows = conn.execute(
"SELECT name_id, aa_name, corrected_meaning FROM name_root_hub WHERE root_letters = ?",
(root_letters,)
).fetchall()
if nrh_rows:
candidate['name_root_hub'] = [
{'id': r[0], 'aa_term': r[1], 'meaning': r[2][:100]} for r in nrh_rows
]
# ── 19. ISNAD — protocol chain traces ──
isnad_rows = conn.execute(
"SELECT isnad_id, chain FROM isnad WHERE traces_to_root = ?",
(root_letters,)
).fetchall()
if isnad_rows:
candidate['isnad'] = [{'id': r[0], 'chain': r[1]} for r in isnad_rows]
conn.close()
return candidate
# ═══════════════════════════════════════════════════════════════════════
# 3. ROOT EXPANSION — one root → full knowledge tree
# ═══════════════════════════════════════════════════════════════════════
def expand_root(root_id_or_letters):
"""Given a root, generate the complete knowledge tree.
Args:
root_id_or_letters: 'R24' or 'ك-ف-ر'
Returns:
dict with all downstream data from every table
"""
if not _HAS_DB:
return {'error': 'No DB connection'}
conn = _connect()
# Resolve to root_id
if root_id_or_letters.startswith('R') or root_id_or_letters.startswith('T'):
root = conn.execute(
"SELECT * FROM roots WHERE root_id = ?", (root_id_or_letters,)
).fetchone()
else:
root = conn.execute(
"SELECT * FROM roots WHERE root_letters = ?", (root_id_or_letters,)
).fetchone()
if not root:
conn.close()
return {'error': f'Root not found: {root_id_or_letters}'}
root_id = root['root_id']
root_letters = root['root_letters']
# Compute meaning from letters
meaning = deduce_meaning(root_letters)
tree = {
'root': {
'root_id': root_id,
'root_letters': root_letters,
'primary_meaning': root['primary_meaning'],
'quran_tokens': root['quran_tokens'],
'computed_meaning': meaning,
},
'entries': {'en': [], 'ru': [], 'fa': []},
'european': [],
'latin': [],
'bitig': [],
'uzbek': [],
'derivatives': [],
'cross_refs': [],
'quranic_words': [],
'qv_entries': [],
'names_of_allah': [],
}
# EN/RU/FA entries
for row in conn.execute("SELECT * FROM entries WHERE root_id = ?", (root_id,)):
entry = dict(row)
if entry.get('en_term'):
tree['entries']['en'].append(entry)
if entry.get('ru_term'):
tree['entries']['ru'].append(entry)
if entry.get('fa_term'):
tree['entries']['fa'].append(entry)
# European entries
for row in conn.execute("SELECT * FROM european_a1_entries WHERE root_id = ?", (root_id,)):
tree['european'].append(dict(row))
# Latin entries
for row in conn.execute("SELECT * FROM latin_a1_entries WHERE root_id = ?", (root_id,)):
tree['latin'].append(dict(row))
# Bitig entries
for row in conn.execute("SELECT * FROM bitig_a1_entries WHERE root_id = ?", (root_id,)):
tree['bitig'].append(dict(row))
# Uzbek vocabulary
for row in conn.execute("SELECT * FROM uzbek_vocabulary WHERE aa_root_id = ?", (root_id,)):
tree['uzbek'].append(dict(row))
# Derivatives
entry_ids = [e['entry_id'] for lang in tree['entries'].values() for e in lang]
if entry_ids:
placeholders = ','.join('?' * len(entry_ids))
for row in conn.execute(
f"SELECT * FROM a4_derivatives WHERE entry_id IN ({placeholders})", entry_ids
):
tree['derivatives'].append(dict(row))
# Cross-refs
if entry_ids:
for row in conn.execute(
f"SELECT * FROM a5_cross_refs WHERE from_entry_id IN ({placeholders})", entry_ids
):
tree['cross_refs'].append(dict(row))
# Qur'anic word occurrences
for row in conn.execute(
"SELECT surah, ayah, word_position, aa_word, correct_translation, confidence "
"FROM quran_word_roots WHERE root = ? ORDER BY surah, ayah, word_position LIMIT 50",
(root_letters,)
):
tree['quranic_words'].append(dict(row))
# QV register
for row in conn.execute(
"SELECT * FROM qv_translation_register WHERE ROOT = ?", (root_letters,)
):
tree['qv_entries'].append(dict(row))
# Names of Allah
for row in conn.execute(
"SELECT * FROM names_of_allah WHERE root_id = ?", (root_id,)
):
tree['names_of_allah'].append(dict(row))
# Summary counts
tree['summary'] = {
'en_entries': len(tree['entries']['en']),
'ru_entries': len(tree['entries']['ru']),
'fa_entries': len(tree['entries']['fa']),
'european': len(tree['european']),
'latin': len(tree['latin']),
'bitig': len(tree['bitig']),
'uzbek': len(tree['uzbek']),
'derivatives': len(tree['derivatives']),
'cross_refs': len(tree['cross_refs']),
'quranic_words': len(tree['quranic_words']),
'names_of_allah': len(tree['names_of_allah']),
'total_downstream': (
len(tree['entries']['en']) + len(tree['entries']['ru']) +
len(tree['entries']['fa']) + len(tree['european']) +
len(tree['latin']) + len(tree['bitig']) + len(tree['uzbek']) +
len(tree['derivatives'])
),
}
conn.close()
return tree
# ═══════════════════════════════════════════════════════════════════════
# 4. CROSS-ROOT REASONING — relate two roots structurally
# ═══════════════════════════════════════════════════════════════════════
def relate_roots(root_a, root_b):
"""Find structural relationship between two roots.
Detects:
- SHARED_LETTERS: common radical letters
- METATHESIS (Type C): same letters, different order
- SHARED_SEMANTIC: overlapping semantic fields
- ABJAD_RELATION: mathematical relationship between sums
Args:
root_a, root_b: root_letters strings (e.g. 'ر-ح-م', 'م-ر-ح')
Returns:
dict with relationships found
"""
letters_a = set(root_a.replace('-', ''))
letters_b = set(root_b.replace('-', ''))
list_a = [l for l in root_a.split('-') if l]
list_b = [l for l in root_b.split('-') if l]
meaning_a = deduce_meaning(root_a)
meaning_b = deduce_meaning(root_b)
relationships = []
# Check metathesis (Type C Active Inversion)
if letters_a == letters_b and list_a != list_b:
relationships.append({
'type': 'TYPE_C_ACTIVE_INVERSION',
'description': (
f'Same consonants rearranged: {root_a}{root_b}. '
f'Operator takes root letters, REARRANGES them, builds COUNTER-SYSTEM. '
f'{root_a} [{meaning_a["abjad_sum"]}] vs {root_b} [{meaning_b["abjad_sum"]}].'
),
'severity': 'HIGH',
})
# Shared letters
shared = letters_a & letters_b
if shared and letters_a != letters_b:
relationships.append({
'type': 'SHARED_RADICALS',
'shared': list(shared),
'count': len(shared),
'description': f'Share {len(shared)} radical(s): {", ".join(shared)}',
})
# Shared semantic fields
sem_a = set(meaning_a['semantic_fields'])
sem_b = set(meaning_b['semantic_fields'])
shared_sem = sem_a & sem_b
if shared_sem:
relationships.append({
'type': 'SHARED_SEMANTIC',
'shared_fields': list(shared_sem),
'description': f'Share semantic field(s): {", ".join(shared_sem)}',
})
# Abjad relationship
sum_a = meaning_a['abjad_sum']
sum_b = meaning_b['abjad_sum']
if sum_a == sum_b:
relationships.append({
'type': 'ABJAD_EQUAL',
'value': sum_a,
'description': f'Same abjad sum: {sum_a}',
})
elif sum_a > 0 and sum_b > 0:
if sum_a % sum_b == 0 or sum_b % sum_a == 0:
ratio = max(sum_a, sum_b) // min(sum_a, sum_b)
relationships.append({
'type': 'ABJAD_MULTIPLE',
'ratio': ratio,
'description': f'Abjad ratio {ratio}:1 ({sum_a} vs {sum_b})',
})
return {
'root_a': {'letters': root_a, 'meaning': meaning_a},
'root_b': {'letters': root_b, 'meaning': meaning_b},
'relationships': relationships,
'relationship_count': len(relationships),
}
# ═══════════════════════════════════════════════════════════════════════
# 4b. INTELLIGENCE LAYER — wired to DP, QV, op_codes, phonetic_reversal
# Schema has the muscles. These functions connect them to the skeleton.
# ═══════════════════════════════════════════════════════════════════════
def find_type_c_pairs(min_tokens_original=10, min_tokens_inversion=0):
"""Auto-scan DB for ALL reversed root pairs (Type C Active Inversion).
Returns all triliteral root pairs where letters are identical but
order is reversed. Abjad sum is ALWAYS equal (mathematical guarantee).
Token ratio indicates severity of inversion.
Returns:
list of dicts with original, inversion, token_ratio, severity
"""
if not _HAS_DB:
return []
conn = _connect()
rows = conn.execute("""
SELECT a.root_id, a.root_letters, a.quran_tokens,
b.root_id, b.root_letters, b.quran_tokens
FROM roots a, roots b
WHERE a.rowid < b.rowid
AND length(a.root_letters) = 5 AND length(b.root_letters) = 5
AND a.root_letters != b.root_letters
AND substr(a.root_letters,1,1) = substr(b.root_letters,5,1)
AND substr(a.root_letters,3,1) = substr(b.root_letters,3,1)
AND substr(a.root_letters,5,1) = substr(b.root_letters,1,1)
AND a.quran_tokens >= ? AND b.quran_tokens >= ?
ORDER BY a.quran_tokens DESC
""", (min_tokens_original, min_tokens_inversion)).fetchall()
pairs = []
for row in rows:
# Original = higher token count (Allah used it MORE)
if row[2] >= row[5]:
orig_id, orig_letters, orig_tokens = row[0], row[1], row[2]
inv_id, inv_letters, inv_tokens = row[3], row[4], row[5]
else:
orig_id, orig_letters, orig_tokens = row[3], row[4], row[5]
inv_id, inv_letters, inv_tokens = row[0], row[1], row[2]
ratio = orig_tokens / max(inv_tokens, 1)
severity = (
'EXTREME' if ratio > 100 else
'SEVERE' if ratio > 20 else
'HIGH' if ratio > 5 else
'MODERATE' if ratio > 2 else
'MILD'
)
# Get Qur'anic meanings from DB (NOT from weights)
orig_meaning = conn.execute(
"SELECT DISTINCT correct_translation FROM quran_word_roots WHERE root = ? LIMIT 1",
(orig_letters,)
).fetchone()
inv_meaning = conn.execute(
"SELECT DISTINCT correct_translation FROM quran_word_roots WHERE root = ? LIMIT 1",
(inv_letters,)
).fetchone()
# Compute abjad (guaranteed equal for reversals)
letters = [l for l in orig_letters.split('-') if l]
abjad = sum(ABJAD.get(l, 0) for l in letters)
pairs.append({
'original': {'root_id': orig_id, 'root_letters': orig_letters,
'tokens': orig_tokens,
'db_meaning': orig_meaning[0] if orig_meaning else None},
'inversion': {'root_id': inv_id, 'root_letters': inv_letters,
'tokens': inv_tokens,
'db_meaning': inv_meaning[0] if inv_meaning else None},
'abjad_sum': abjad,
'token_ratio': round(ratio, 1),
'severity': severity,
})
conn.close()
return pairs
def detect_inversion_levels(root_a, root_b):
"""Multi-level inversion detection using QV register + Qur'anic meanings.
Level 1: Letter inversion (same letters, different order, same abjad)
Level 2: Meaning inversion (Qur'anic meanings are opposite)
Level 3: Translation inversion (downstream word masks the Qur'anic meaning)
Returns dict with levels detected and evidence from DB.
"""
if not _HAS_DB:
return {'error': 'No DB connection'}
conn = _connect()
letters_a = set(root_a.replace('-', ''))
letters_b = set(root_b.replace('-', ''))
list_a = [l for l in root_a.split('-') if l]
list_b = [l for l in root_b.split('-') if l]
result = {
'root_a': root_a, 'root_b': root_b,
'levels': [], 'evidence': [], 'qv_hits': [], 'dp_hits': []
}
# LEVEL 1: Letter inversion check
if letters_a == letters_b and list_a != list_b:
abjad = sum(ABJAD.get(l, 0) for l in list_a)
result['levels'].append({
'level': 1,
'type': 'LETTER_INVERSION',
'detail': f'Same letters rearranged. Abjad sum: {abjad} (both).'
})
# Get token counts
for root_letters, label in [(root_a, 'a'), (root_b, 'b')]:
row = conn.execute(
"SELECT quran_tokens FROM roots WHERE root_letters = ?",
(root_letters,)
).fetchone()
result[f'tokens_{label}'] = row[0] if row else 0
# LEVEL 2: Meaning inversion — compare Qur'anic washed translations
meaning_a = conn.execute(
"SELECT DISTINCT correct_translation FROM quran_word_roots WHERE root = ?",
(root_a,)
).fetchall()
meaning_b = conn.execute(
"SELECT DISTINCT correct_translation FROM quran_word_roots WHERE root = ?",
(root_b,)
).fetchall()
if meaning_a and meaning_b:
result['db_meaning_a'] = [r[0] for r in meaning_a]
result['db_meaning_b'] = [r[0] for r in meaning_b]
result['levels'].append({
'level': 2,
'type': 'MEANING_INVERSION',
'detail': f'Qur\'anic meanings — {root_a}: {result["db_meaning_a"][0]} | {root_b}: {result["db_meaning_b"][0]}'
})
# LEVEL 3: Translation inversion — check QV register for either root
for root_letters in [root_a, root_b]:
qv_rows = conn.execute(
"SELECT qv_id, corruption_type, original_meaning, corrupted_meaning "
"FROM qv_translation_register WHERE root_letters = ?",
(root_letters,)
).fetchall()
for qv in qv_rows:
result['qv_hits'].append({
'qv_id': qv[0], 'corruption_type': qv[1],
'original': qv[2], 'corrupted': qv[3],
'root': root_letters
})
if qv_rows:
result['levels'].append({
'level': 3,
'type': 'TRANSLATION_INVERSION',
'detail': f'{root_letters} has {len(qv_rows)} QV entries — downstream translation masks Qur\'anic meaning.'
})
# Check DP register for relevant codes
for root_letters in [root_a, root_b]:
dp_rows = conn.execute(
"SELECT dp_code, name FROM dp_register WHERE example LIKE ?",
(f'%{root_letters}%',)
).fetchall()
for dp in dp_rows:
result['dp_hits'].append({
'dp_code': dp[0], 'name': dp[1], 'root': root_letters
})
# Check disputed_words
for root_letters in [root_a, root_b]:
disp = conn.execute(
"SELECT * FROM disputed_words WHERE root_assigned = ? OR root_hyphenated = ?",
(root_letters.replace('-', ''), root_letters)
).fetchall()
if disp:
result['evidence'].append({
'type': 'DISPUTED_WORD',
'root': root_letters,
'count': len(disp)
})
conn.close()
return result
def cross_wash(words, language='en'):
"""Derivative cross-wash: feed a word FAMILY, extract common skeleton.
The washing algorithm: strip each word to consonants, find the
consonant skeleton that appears in ALL (or most) words. That skeleton
is the root. Individual word hypotheses may disagree — the FAMILY
reveals the truth.
Args:
words: list of related downstream words (e.g. ['mercy', 'merchant', 'market', 'commerce'])
language: source language
Returns:
dict with common_skeleton, root_candidates, per-word breakdowns
"""
if not words:
return {'error': 'No words provided'}
# Step 1: Extract consonant skeletons for each word
word_data = []
for word in words:
stem, pfx, sfx = strip_affixes(word, language)
cons_stripped = extract_consonants(stem)
cons_raw = extract_consonants(word)
word_data.append({
'word': word,
'stem': stem,
'prefix': pfx,
'suffix': sfx,
'consonants_stripped': cons_stripped,
'consonants_raw': cons_raw,
})
# Step 2: Find common consonants across ALL words
# Use raw consonants (more complete)
all_cons_sets = [set(wd['consonants_raw']) for wd in word_data]
common_cons = all_cons_sets[0]
for cs in all_cons_sets[1:]:
common_cons = common_cons & cs
# Step 3: Find consonants in MOST words (>= 75%)
from collections import Counter
all_cons_flat = []
for wd in word_data:
all_cons_flat.extend(set(wd['consonants_raw']))
cons_freq = Counter(all_cons_flat)
threshold = len(words) * 0.75
majority_cons = {c for c, count in cons_freq.items() if count >= threshold}
# Step 4: Ordered common skeleton (preserve order from first word)
first_raw = word_data[0]['consonants_raw']
skeleton = [c for c in first_raw if c in majority_cons]
# Step 5: Map skeleton through reverse shift to find root candidates
seen = set()
candidates = _trace_consonants_to_roots(skeleton, words[0], language, seen, 'CROSS_WASH')
# Step 6: Verify top candidates against DB
if _HAS_DB:
for i in range(min(len(candidates), 20)):
candidates[i] = verify_candidate(candidates[i])
# Sort by tokens
candidates.sort(key=lambda x: (x.get('quranic_tokens', 0), x.get('score', 0)), reverse=True)
# Step 7: Check for Type C pairs among top candidates
type_c_flags = []
top_roots = [c['root_letters'] for c in candidates[:10] if c.get('verified')]
for i, ra in enumerate(top_roots):
for rb in top_roots[i+1:]:
la = set(ra.replace('-', ''))
lb = set(rb.replace('-', ''))
if la == lb and ra != rb:
type_c_flags.append({
'root_a': ra, 'root_b': rb,
'type': 'TYPE_C_ACTIVE_INVERSION'
})
return {
'input_words': words,
'common_consonants': sorted(common_cons),
'majority_consonants': sorted(majority_cons),
'skeleton': skeleton,
'candidates': candidates[:15],
'type_c_flags': type_c_flags,
'word_breakdowns': word_data,
}
def severity_score(original_tokens, inversion_tokens):
"""Token-ratio severity classification for inversions.
The higher the ratio, the more violent the inversion.
When Allah barely uses the inverted form, He condemns what it describes.
Returns:
dict with ratio, severity label, description
"""
inv = max(inversion_tokens, 1)
ratio = original_tokens / inv
if ratio > 100:
severity = 'EXTREME'
desc = f'{ratio:.0f}:1 — Original dominates. Inversion is near-absent from Qur\'an.'
elif ratio > 20:
severity = 'SEVERE'
desc = f'{ratio:.0f}:1 — Original strongly dominant. Inversion used only to condemn.'
elif ratio > 5:
severity = 'HIGH'
desc = f'{ratio:.1f}:1 — Clear dominance. Inversion appears but is clearly secondary.'
elif ratio > 2:
severity = 'MODERATE'
desc = f'{ratio:.1f}:1 — Both present. Original still dominant.'
else:
severity = 'MILD'
desc = f'{ratio:.1f}:1 — Near equal. Both carry legitimate meaning, direction still visible.'
return {
'ratio': round(ratio, 1),
'severity': severity,
'original_tokens': original_tokens,
'inversion_tokens': inversion_tokens,
'description': desc,
}
# ═══════════════════════════════════════════════════════════════════════
# 5. TEMPORAL REASONING — root × time → deployment history
# ═══════════════════════════════════════════════════════════════════════
def trace_timeline(root_id_or_letters):
"""Trace when and how a root's downstream forms were deployed.
Returns chronological data from: chronology, word_deployment_map,
script_corridors, intel connections.
"""
if not _HAS_DB:
return {'error': 'No DB connection'}
conn = _connect()
# Resolve root
if root_id_or_letters.startswith('R') or root_id_or_letters.startswith('T'):
root = conn.execute(
"SELECT root_id, root_letters FROM roots WHERE root_id = ?",
(root_id_or_letters,)
).fetchone()
else:
root = conn.execute(
"SELECT root_id, root_letters FROM roots WHERE root_letters = ?",
(root_id_or_letters,)
).fetchone()
if not root:
conn.close()
return {'error': 'Root not found'}
root_id = root['root_id']
root_letters = root['root_letters']
timeline = {
'root_id': root_id,
'root_letters': root_letters,
'deployments': [],
'corridors': [],
}
# Word deployment map
for row in conn.execute(
"SELECT * FROM word_deployment_map WHERE aa_roots LIKE ? ORDER BY date_period",
(f'%{root_id}%',)
):
timeline['deployments'].append(dict(row))
# Entries with corridor info
for row in conn.execute(
"SELECT entry_id, en_term, ds_corridor, decay_level, dp_codes FROM entries WHERE root_id = ?",
(root_id,)
):
entry = dict(row)
if entry.get('ds_corridor'):
timeline['corridors'].append(entry)
conn.close()
return timeline
# ═══════════════════════════════════════════════════════════════════════
# 6. HYPOTHESIS GENERATION — given unknowns, generate candidates
# ═══════════════════════════════════════════════════════════════════════
def hypothesise(word, language='en'):
"""Full hypothesis pipeline: word → candidates → verify → rank.
This is the core reasoning function. Given ANY word in ANY downstream
language, it:
1. Extracts consonant skeleton
2. Reverses through shift table
3. Computes meaning from letter values
4. Verifies against Qur'an (if DB available)
5. Returns ranked candidates with full provenance
Args:
word: any downstream word
language: source language
Returns:
list of verified, ranked candidates
"""
# ═══ FULL-WORD-FIRST RULE (2026-03-30) ═══
# Wash the full word BEFORE any stripping/splitting.
# If cross-language wash reveals 4+ stable consonants,
# attempt compound detection before triliteral trace.
try:
code_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, code_dir) if code_dir not in sys.path else None
from uslap_quf import cross_language_wash, detect_compound
wash_result = cross_language_wash(word, language)
if wash_result['consonant_count'] >= 4:
# Compound candidate — try decomposition first
compound_candidates = detect_compound(wash_result['skeleton'], language)
if compound_candidates:
# Store for later boosting — compounds found
_compound_hints = compound_candidates[:3]
else:
_compound_hints = []
else:
_compound_hints = []
except Exception:
_compound_hints = []
# Step 1-3: reverse trace (wide net pre-verification)
candidates = reverse_trace(word, language, max_candidates=60)
# Step 3.5: ABJAD PROXIMITY SCORING
# Compute the "direct-map abjad" — the abjad sum using the HIGHEST
# PRIORITY shift for each consonant. This is the structural fingerprint
# of the most natural root. Candidates matching this value get boosted.
# This is pure letter mathematics — no semantics, no external data.
# Use BOTH stripped and raw consonants to cover all paths.
stem, _, _ = strip_affixes(word, language)
cons_stripped = extract_consonants(stem)
cons_raw = extract_consonants(word)
# Merge: use whichever gives more consonants, but include both
cons_for_abjad = cons_raw if len(cons_raw or []) >= len(cons_stripped or []) else cons_stripped
if not cons_for_abjad:
cons_for_abjad = cons_stripped or cons_raw
if cons_for_abjad and len(cons_for_abjad) >= 2:
# Compute direct-map abjad using top-2 shifts per consonant position.
# Each consonant can map to multiple AA letters (e.g. c→ق or c→ك).
# Taking only the first misses valid roots. Taking top-2 covers the
# most common shifts without combinatorial explosion.
from itertools import combinations
position_options = [] # list of lists of (letter, abjad) per position
for c in cons_for_abjad:
mapping = REVERSE_SHIFT.get(c, VOWEL_DROPS.get(c, []))
if mapping:
# Top 2 shifts per position
opts = [(m[0], ABJAD.get(m[0], 0)) for m in mapping[:2]]
position_options.append(opts)
# Generate all abjad sums from triliteral combinations
direct_abjads = set()
n_pos = len(position_options)
if n_pos >= 3:
for pos_combo in combinations(range(n_pos), 3):
# For each triliteral position combo, try all shift combos
selected = [position_options[i] for i in pos_combo]
for opt_combo in product(*selected):
abjad_val = sum(o[1] for o in opt_combo)
direct_abjads.add(abjad_val)
elif n_pos == 2:
for opt_combo in product(*position_options):
direct_abjads.add(sum(o[1] for o in opt_combo))
# Apply abjad proximity boost to candidates
for cand in candidates:
cand_abjad = cand.get('abjad_sum', 0)
if cand_abjad in direct_abjads:
cand['score'] += 8 # EXACT abjad match — strong structural signal
cand['abjad_match'] = 'EXACT'
elif direct_abjads and min(abs(cand_abjad - da) for da in direct_abjads) <= max(da * 0.15 for da in direct_abjads):
cand['score'] += 4 # CLOSE abjad — within 15%
cand['abjad_match'] = 'CLOSE'
else:
cand['abjad_match'] = 'FAR'
# Step 4: verify against DB
# Strategy: verify ALL triliteral candidates (most likely correct roots)
# plus top quadriliterals. Triliteral roots dominate AA.
if _HAS_DB:
triliteral_idx = [i for i, c in enumerate(candidates) if len(c['aa_letters']) == 3]
other_idx = [i for i, c in enumerate(candidates) if len(c['aa_letters']) != 3]
# Verify all triliterals (capped at 50 for sanity)
for i in triliteral_idx[:50]:
candidates[i] = verify_candidate(candidates[i])
# Verify top 10 quadriliterals
for i in other_idx[:10]:
candidates[i] = verify_candidate(candidates[i])
# Boost verified candidates
import math
for cand in candidates:
if cand.get('verified'):
cand['score'] += 10
if cand.get('quranic_tokens', 0) > 0:
# Logarithmic scaling: 1→0, 10→2, 50→4, 100→5, 339→6, 525→6, 660→6
# Preserves differentiation across the full token range
tokens = cand['quranic_tokens']
cand['score'] += min(int(math.log2(max(tokens, 1))), 10)
if cand.get('existing_entries', 0) > 0:
cand['score'] += min(cand['existing_entries'], 5)
# ═══ P11 ENFORCEMENT (2026-03-30) ═══
# Phonetic FIRST, semantic SECOND.
# If a candidate was found via meaning match but has WEAK phonetic
# chain (fewer than 2/3 root letters mapped), CAP its score.
# This prevents meaning-first leakage (e.g. September→س-ب-ع
# because both mean "seven" even though consonants don't match).
for cand in candidates:
n_root = len(cand.get('aa_letters', []))
n_mapped = cand.get('mapped_count', n_root) # assume full if not tracked
if n_root > 0 and n_mapped < (n_root * 2 / 3):
# Weak phonetic chain — cap score
cand['score'] = min(cand['score'], 15)
cand['p11_capped'] = True
# ═══ COMPOUND BOOST (2026-03-30) ═══
# If compound detection found valid decompositions, inject them
# as high-scoring candidates so they appear in results.
if _compound_hints:
for hint in _compound_hints:
compound_entry = {
'root_letters': str(hint),
'aa_letters': [],
'score': hint.get('score', 0) + 5,
'composition': f"COMPOUND: {hint}",
'deduction': f"Detected via cross-language wash + compound detector",
'shift_chain': [],
'verified': False,
'compound': True,
'compound_data': hint,
}
candidates.append(compound_entry)
# Re-sort
candidates.sort(key=lambda x: x['score'], reverse=True)
return candidates
# ═══════════════════════════════════════════════════════════════════════
# QUF GATES — Called by amr_quf.py router
# ═══════════════════════════════════════════════════════════════════════
def _quf_result(q='PENDING', u='PENDING', f='PENDING',
q_ev=None, u_ev=None, f_ev=None):
"""Create a QUF layer result dict."""
GRADE_ORDER = {'HIGH': 4, 'MEDIUM': 3, 'LOW': 2, 'FAIL': 1, 'PENDING': 0}
passes = all(GRADE_ORDER.get(g, 0) >= 3 for g in [q, u, f])
return {
'q': q, 'u': u, 'f': f, 'pass': passes,
'q_evidence': q_ev or [], 'u_evidence': u_ev or [], 'f_evidence': f_ev or [],
}
def linguistic_quf(data: dict) -> dict:
"""
PHONETIC CHAIN VERIFICATION (function name kept for backward compatibility).
Q: consonant alignment via S01-S26 (can root letters be found in downstream word?)
U: cross-sibling coverage (does this root produce valid entries in multiple languages?)
F: competing roots + blacklist (is this the ONLY root that produces this word?)
"""
root_letters = data.get('root_letters', '') or data.get('root', '') or ''
en_term = data.get('en_term', '') or data.get('term', '') or data.get('orig2_term', '') or ''
root_id = data.get('root_id', '') or data.get('aa_root_id', '') or ''
phonetic_chain = data.get('phonetic_chain', '') or ''
score = data.get('score', 0) or 0
if not root_letters and not root_id:
return _quf_result('PENDING', 'PENDING', 'PENDING',
['No root letters or root_id'])
# ── Q: QUANTIFICATION — consonant alignment + token count ──
q_evidence = []
if en_term and root_letters:
# Use existing verify_candidate logic
word_consonants = extract_consonants(en_term.lower())
root_bare = root_letters.replace('-', '').replace(' ', '')
# Simple alignment check: are root consonants traceable in word?
if phonetic_chain and len(phonetic_chain) > 5:
import re
shifts = re.findall(r'S\d{2}', phonetic_chain)
if shifts:
q_grade = 'HIGH'
q_evidence.append(f'{len(shifts)} shifts documented: {",".join(shifts[:5])}')
else:
q_grade = 'MEDIUM'
q_evidence.append(f'Chain present but no shift IDs: {phonetic_chain[:40]}')
elif score and score >= 8:
q_grade = 'HIGH'
q_evidence.append(f'Score {score}/10, chain: {phonetic_chain[:30]}')
elif score and score >= 5:
q_grade = 'MEDIUM'
q_evidence.append(f'Score {score}/10')
else:
q_grade = 'LOW'
q_evidence.append(f'Score {score}/10, weak chain')
elif root_letters:
q_grade = 'MEDIUM'
q_evidence.append(f'Root {root_letters} present, no downstream word to align')
else:
q_grade = 'LOW'
q_evidence.append('No root letters')
# Add Quranic token count
if root_letters and _HAS_DB:
try:
conn = _connect()
tokens = conn.execute(
"SELECT COUNT(*) FROM quran_word_roots WHERE root = ?",
(root_letters,)
).fetchone()[0]
conn.close()
if tokens > 0:
q_evidence.append(f'{tokens} Quranic tokens')
if tokens >= 10:
q_grade = 'HIGH'
else:
q_evidence.append('0 Quranic tokens')
except Exception:
pass
# ── U: UNIVERSALITY — cross-sibling coverage ──
u_evidence = []
sibling_count = 0
surah_count = 0
if _HAS_DB and root_id:
try:
conn = _connect()
# Count sibling tables with entries for this root
sibling_tables = [
('entries', 'root_id'),
('european_a1_entries', 'root_id'),
('latin_a1_entries', 'root_id'),
('bitig_a1_entries', 'root_id'),
('uzbek_vocabulary', 'aa_root_id'),
]
for tbl, col in sibling_tables:
try:
cnt = conn.execute(
f'SELECT COUNT(*) FROM "{tbl}" WHERE "{col}" = ?', (root_id,)
).fetchone()[0]
if cnt > 0:
sibling_count += 1
except Exception:
pass
# Surah spread
if root_letters:
try:
surah_count = conn.execute(
"SELECT COUNT(DISTINCT surah) FROM quran_word_roots WHERE root = ?",
(root_letters,)
).fetchone()[0]
except Exception:
pass
conn.close()
except Exception:
pass
if surah_count >= 20 or sibling_count >= 4:
u_grade = 'HIGH'
elif surah_count >= 5 or sibling_count >= 2:
u_grade = 'HIGH'
elif surah_count >= 1 or sibling_count >= 1:
u_grade = 'MEDIUM'
else:
u_grade = 'LOW'
u_evidence.append(f'{surah_count} surahs, {sibling_count} sibling tables')
# ── F: FALSIFICATION — competing roots + blacklist ──
f_evidence = []
if _HAS_DB and root_id:
try:
conn = _connect()
# Check blacklist (only if en_term is populated — empty matches everything)
# Word-boundary match: check if en_term IS a blacklisted term,
# not if en_term appears as a SUBSTRING of a blacklisted text.
# "STUDY" must not match "The Study Quran" in BL19.
bl_check = 0
if en_term and len(en_term) >= 3:
_bl_term = en_term.lower().strip()
bl_check = conn.execute(
"SELECT COUNT(*) FROM contamination_blacklist "
"WHERE LOWER(contaminated_term) = ? "
"OR LOWER(contaminated_translation) = ?",
(_bl_term, _bl_term)
).fetchone()[0]
if bl_check > 0:
f_grade = 'FAIL'
f_evidence.append(f'Term matches contamination blacklist')
elif phonetic_chain and score and score >= 8:
f_grade = 'HIGH'
f_evidence.append(f'Unique root trace, score {score}/10')
elif phonetic_chain:
f_grade = 'MEDIUM'
f_evidence.append(f'Chain documented, score {score}/10')
else:
f_grade = 'LOW'
f_evidence.append('No phonetic chain documented')
conn.close()
except Exception:
f_grade = 'MEDIUM'
f_evidence.append('DB check unavailable')
else:
f_grade = 'MEDIUM' if phonetic_chain else 'LOW'
f_evidence.append(f'Chain: {bool(phonetic_chain)}')
return _quf_result(q_grade, u_grade, f_grade, q_evidence, u_evidence, f_evidence)
def divine_quf(data: dict) -> dict:
"""QUF for Names of Allah — L3. F gate is axiomatic (always HIGH)."""
root_id = data.get('root_id', '') or ''
qur_ref = data.get('qur_ref', '') or ''
name = data.get('aa_name', '') or ''
q = 'HIGH' if (root_id and qur_ref and name) else ('MEDIUM' if name else 'LOW')
u = 'HIGH' # Names of Allah are universal by definition
f = 'HIGH' # Axiomatic — divine names are not falsifiable
return _quf_result(q, u, f,
[f'root={root_id}, qur_ref={bool(qur_ref)}, name={bool(name)}'],
['Divine name — universal'],
['Divine name — axiomatic'])
def quran_form_quf(data: dict) -> dict:
"""QUF for Qur'anic forms — L4. Compiler output."""
root = data.get('root', '') or data.get('root_unhyphenated', '') or ''
word = data.get('aa_word', '') or data.get('aa_form', '') or ''
surah = data.get('surah', 0)
conf = data.get('confidence', '') or ''
word_type = data.get('word_type', '') or ''
# PARTICLEs may not have roots — that's correct, not a failure
is_particle = word_type.upper() == 'PARTICLE'
if root and word and surah:
q = 'HIGH'
elif word and surah and is_particle:
q = 'HIGH' # Particle in Qur'an with valid surah = attested
elif root and word:
q = 'MEDIUM'
else:
q = 'LOW'
# U: root in roots table? Particles are universal if they appear in Qur'an
if is_particle and word and surah:
u = 'HIGH' # Qur'anic particle = universal by presence
elif _HAS_DB and root:
u = 'MEDIUM'
try:
conn = _connect()
bare = root.replace('-', '')
found = conn.execute(
"SELECT COUNT(*) FROM roots WHERE root_bare = ? OR root_letters = ?",
(bare, root)
).fetchone()[0]
conn.close()
u = 'HIGH' if found else 'MEDIUM'
except Exception:
pass
else:
u = 'MEDIUM'
# F: compiler confidence. Particles and Qur'an data default to MEDIUM minimum
conf_upper = str(conf).upper()
if conf_upper in ('HIGH', 'MEDIUM_A'):
f = 'HIGH'
elif conf_upper in ('MEDIUM_B', 'MEDIUM_C', 'MEDIUM', 'PARTICLE'):
f = 'MEDIUM'
else:
f = 'MEDIUM' # Qur'an data defaults to MEDIUM minimum
return _quf_result(q, u, f,
[f'root={root}, word={bool(word)}, surah={surah}, particle={is_particle}'],
[f'Root in roots table: {u}' if not is_particle else f'Particle in Quran: universal'],
[f'Compiler confidence: {conf_upper or "ungraded"}'])
def diwan_quf(data: dict) -> dict:
"""QUF for diwan_roots — PRIMARY SOURCE but LLM extraction needs validation.
Q: headword + root_letters + gloss documented
U: volume + ms_page traceable to manuscript
F: word_class + structured data
"""
headword = data.get('headword', '') or ''
root_letters = data.get('root_letters', '') or ''
aa_gloss = data.get('aa_gloss', '') or ''
volume = data.get('volume', 0) or 0
ms_page = data.get('ms_page', 0) or 0
word_class = data.get('word_class', '') or ''
kitab = data.get('kitab', '') or ''
token_count = data.get('token_count', 0) or 0
# Q: headword + gloss documented (extraction quality)
q = 'HIGH' if (headword and aa_gloss and root_letters) else (
'MEDIUM' if (headword and aa_gloss) else ('LOW' if headword else 'FAIL'))
# U: traceable to manuscript (volume + page)
u = 'HIGH' if (volume and ms_page) else ('MEDIUM' if volume else 'LOW')
# F: word_class classified + kitab section identified
f = 'HIGH' if (word_class and kitab) else ('MEDIUM' if word_class else 'LOW')
return _quf_result(q, u, f,
[f'headword={bool(headword)}, root={bool(root_letters)}, gloss={bool(aa_gloss)}'],
[f'vol={volume}, ms_page={ms_page}'],
[f'word_class={word_class}, kitab={bool(kitab)}'])
def infrastructure_quf(data: dict) -> dict:
"""QUF for infrastructure/config/protocol tables.
Q: primary ID + content fields populated
U: has category/type/name classification
F: status documented or content substantive
"""
# Skip QUF-internal columns and timestamps
SKIP = {'quf_q', 'quf_u', 'quf_f', 'quf_pass', 'quf_date', 'quf_token',
'created_at', 'modified_at', 'modified_by', 'version', 'rowid', '_rowid'}
data_fields = {k: v for k, v in data.items()
if k not in SKIP and v is not None and str(v).strip()}
total_fields = len([k for k in data.keys() if k not in SKIP])
populated = len(data_fields)
ratio = populated / max(total_fields, 1)
# Q: at least 50% of fields populated
q = 'HIGH' if ratio >= 0.5 else ('MEDIUM' if ratio >= 0.3 else 'LOW')
# U: has an ID field and at least one descriptive field
has_id = any(k for k in data_fields if k.endswith('_id') or k in ('name', 'label', 'code'))
has_desc = any(k for k in data_fields if k in (
'description', 'content', 'item_text', 'preamble_text', 'template_text',
'notes', 'mechanism', 'specific_data', 'phase_name', 'surah_name'))
u = 'HIGH' if (has_id and has_desc) else ('MEDIUM' if has_id or has_desc else 'LOW')
# F: has status/category/type (structured, not freeform)
has_structure = any(k for k in data_fields if k in (
'status', 'category', 'section', 'type', 'class', 'level_code',
'access_level', 'shift_type', 'pattern', 'step_name', 'heptad'))
f = 'HIGH' if (has_structure and ratio >= 0.4) else ('MEDIUM' if has_structure or ratio >= 0.5 else 'LOW')
return _quf_result(q, u, f,
[f'Completeness: {ratio:.0%} ({populated}/{total_fields})'],
[f'ID={has_id}, description={has_desc}'],
[f'Structured: {has_structure}'])
def quran_ayat_quf(data: dict) -> dict:
"""QUF for Qur'anic ayat — L4. Verses, not word forms.
Self-auditing: ayat cross-check against quran_word_roots."""
surah = data.get('surah', 0) or 0
ayah = data.get('ayah', 0) or 0
aa_text = data.get('aa_text', '') or ''
root_translation = data.get('root_translation', '') or ''
# Q: ayah text exists + surah/ayah numbers valid
q = 'HIGH' if (aa_text and surah and ayah) else ('MEDIUM' if aa_text else 'LOW')
# U: ayah in valid surah range (1-114) — structural check, no DB query
valid_surah = 1 <= int(surah) <= 114 if surah else False
u = 'HIGH' if (valid_surah and root_translation) else ('MEDIUM' if valid_surah else 'LOW')
# F: root_translation exists (meaning the ayah has been root-parsed)
f = 'HIGH' if root_translation else 'MEDIUM'
return _quf_result(q, u, f,
[f'surah={surah}, ayah={ayah}, text={bool(aa_text)}'],
[f'Valid surah range: {valid_surah}, root_translation: {bool(root_translation)}'],
[f'Root translation: {bool(root_translation)}'])
def quran_known_form_quf(data: dict) -> dict:
"""QUF for known Qur'anic forms — L4. Form catalog, not per-ayah.
Self-auditing: forms cross-check against quran_word_roots."""
aa_form = data.get('aa_form', '') or ''
bare_form = data.get('bare_form', '') or ''
root = data.get('root_unhyphenated', '') or ''
word_type = data.get('word_type', '') or ''
# Q: form + root documented (particles may lack roots)
is_particle = word_type.upper() == 'PARTICLE' if word_type else False
q = 'HIGH' if (aa_form and root) else ('HIGH' if (aa_form and is_particle) else ('MEDIUM' if aa_form else 'LOW'))
# U: form has root or is classified particle — structural check, no DB query
u = 'HIGH' if (root and aa_form) else ('HIGH' if is_particle else ('MEDIUM' if aa_form else 'LOW'))
# F: word_type classified
f = 'HIGH' if word_type else 'MEDIUM'
return _quf_result(q, u, f,
[f'form={bool(aa_form)}, root={root}, type={word_type}'],
[f'Attestation in quran_word_roots: {u}'],
[f'Classification: {word_type or "unclassified"}'])
def bitig_quf(data: dict) -> dict:
"""QUF for Bitig entries — L6.
Source hierarchy (strict precedence):
PRIMARY: kashgari_attestation (SRC01 Kashgari 1072 CE)
SECONDARY: ibn_sina_attestation (SRC02) | navoi_attestation (SRC07)
+ manuscript sources (SRC08-10 Orkhon/Irk Bitig/Talas)
TERTIARY: tertiary_attestation (SRC11-14: ESTYA, Shipova, Baskakov, Suleimenov)
— ONLY valid when primary+secondary silent,
OR as supplementary data on already-confirmed entries.
"""
kash = data.get('kashgari_attestation', '') or ''
ibn_sina = data.get('ibn_sina_attestation', '') or ''
navoi = data.get('navoi_attestation', '') or ''
tertiary = data.get('tertiary_attestation', '') or ''
root_id = data.get('root_id', '') or ''
dispersal = data.get('dispersal_range', '') or ''
orig2_term = str(data.get('orig2_term', '')).lower()
has_primary = bool(kash)
has_secondary = bool(ibn_sina or navoi)
has_tertiary = bool(tertiary)
# ── Q (quality of attestation) ──
# PRIMARY = HIGH, SECONDARY = MEDIUM, TERTIARY alone = LOW,
# TERTIARY supplementing confirmed = no change (keeps PRIMARY/SECONDARY grade)
q_ev = []
if has_primary:
q = 'HIGH'
q_ev.append(f'PRIMARY: Kashgari attested')
if has_secondary:
q_ev.append(f'SECONDARY corroboration: ibn_sina={bool(ibn_sina)}, navoi={bool(navoi)}')
if has_tertiary:
q_ev.append(f'TERTIARY supplementary: {data.get("tertiary_source", "?")}')
elif has_secondary:
q = 'MEDIUM'
q_ev.append(f'SECONDARY only: ibn_sina={bool(ibn_sina)}, navoi={bool(navoi)}')
if has_tertiary:
q_ev.append(f'TERTIARY supplementary: {data.get("tertiary_source", "?")}')
elif has_tertiary:
q = 'LOW'
q_ev.append(f'TERTIARY only (no primary/secondary): {data.get("tertiary_source", "?")}')
elif root_id:
q = 'MEDIUM'
q_ev.append(f'root_id present, no attestation sources')
else:
q = 'PENDING'
q_ev.append('No attestation sources and no root_id')
# ── U (usage precedent) ──
# root_id+dispersal=HIGH, root_id or primary/secondary=MEDIUM,
# tertiary alone=LOW, nothing=PENDING
u_ev = []
if root_id and dispersal:
u = 'HIGH'
u_ev.append(f'root={root_id}, dispersal present')
elif root_id or has_primary or has_secondary:
u = 'MEDIUM'
u_ev.append(f'root={root_id}, primary={has_primary}, secondary={has_secondary}')
elif has_tertiary:
u = 'LOW'
u_ev.append(f'Tertiary only — no primary/secondary/root_id')
else:
u = 'PENDING'
u_ev.append('No sources, no root_id')
# ── TERTIARY TEXT SCAN ──
# Fires on tertiary_attestation content. Catches direction violations
# that slip past the pre-write gate's explicit banned terms:
# 1. Downstream language named as ORIGIN (Russian, Slavic, Greek, etc.)
# 2. Proto-reconstruction claims (*kor-, *proto-X)
# 3. Reverse direction by implication ("related to RU", "cf. Slavic")
# 4. Lattice contradiction (if AA root exists, tertiary must not claim
# the word originates from a downstream form)
import re as _re_tert
tertiary_contaminated = False
tert_violations = []
if has_tertiary:
tert_lower = tertiary.lower()
# Downstream languages positioned as sources/origins
_DS_AS_SOURCE = [
r'\b(?:from|origin(?:ates?)?|derives?|related to|cf\.?|compare)\s+'
r'(?:russian|ru|slavic|greek|latin|persian|french|german|english|'
r'sanskrit|hindi|chinese|japanese|korean|hungarian|finnish)',
r'\b(?:russian|slavic|greek|latin|persian|french|german|english|'
r'sanskrit)\s+(?:word|term|root|origin|source|etymon)\b',
r'\bfrom\s+(?:old\s+)?(?:russian|slavic|greek|latin|french|german)\b',
]
for pat in _DS_AS_SOURCE:
m = _re_tert.search(pat, tert_lower)
if m:
tertiary_contaminated = True
tert_violations.append(
f'TERTIARY_DIRECTION: downstream-as-source "{m.group()}" '
f'in tertiary_attestation. Direction is AA/ORIG2 -> downstream. ALWAYS.'
)
# Proto-reconstruction used as DIRECTION CLAIM (framework contamination).
# The asterisk notation (*kor-, *qar-) is fine as consonant skeleton data.
# It's only contamination when combined with origin/direction language:
# "*kor- is the proto-Turkic origin" = FAIL (direction claim)
# "ESTYA reconstructs *kor-" = OK (consonant skeleton data point)
_PROTO_DIRECTION = [
r'\*[a-z]{2,}.*?\b(?:origin|source|gave|produced|ancestor|proto-)\b',
r'\b(?:from|origin(?:ates?)?|derives?|ancestor|proto-)\b.*?\*[a-z]{2,}',
r'\bproto-\w+\s+(?:root|form|origin|source|ancestor)\b',
]
for pat in _PROTO_DIRECTION:
m = _re_tert.search(pat, tert_lower)
if m:
tertiary_contaminated = True
tert_violations.append(
f'TERTIARY_FRAMEWORK: proto-form used as direction claim '
f'"{m.group()}" in tertiary_attestation. '
f'Asterisk notation OK for consonant skeletons. '
f'NOT OK as origin/direction claim.'
)
# Lattice contradiction check: if entry has AA root AND tertiary
# mentions origin/source, flag for review. No per-row DB query.
if root_id and any(
_re_tert.search(p, tert_lower) for p in [
r'\b(?:origin|source|etymon)\b',
r'\bfrom\b',
]
):
tert_violations.append(
f'TERTIARY_LATTICE_CHECK: entry has AA root {root_id}. '
f'Tertiary source claims about origin must be consistent. '
f'Verify direction manually.'
)
# ── COMPOUND MORPHEME VALIDATION ──
# Flag compound entries (root_letters contains '+') for review.
# No per-row DB query — compound check is structural.
root_letters = data.get('root_letters', '') or ''
is_compound = '+' in root_letters
# ── F (falsifiability — attestation level + contamination flags) ──
f = 'MEDIUM'
f_ev = []
if tertiary_contaminated:
f = 'FAIL'
f_ev.extend(tert_violations)
elif is_compound:
f = 'MEDIUM'
f_ev.append(f'COMPOUND: {root_letters} — components to verify')
elif has_primary:
f = 'HIGH'
f_ev.append('Kashgari-attested, not blacklisted')
elif has_secondary:
f = 'HIGH'
f_ev.append('Secondary-attested, not blacklisted')
elif has_tertiary:
f = 'MEDIUM'
f_ev.append('Tertiary-only, no direction violations')
if not f_ev:
f_ev.append('Not blacklisted' if f != 'FAIL' else 'Blacklisted')
return _quf_result(q, u, f, q_ev, u_ev, f_ev)
def sibling_quf(data: dict) -> dict:
"""QUF for sibling entries (EU, LA, UZ) — L7."""
root_id = data.get('root_id', '') or data.get('aa_root_id', '') or ''
chain = data.get('phonetic_chain', '') or ''
score = data.get('score', 0) or 0
q = 'HIGH' if (root_id and chain and score > 0) else ('MEDIUM' if root_id else 'LOW')
# U: root_id + chain = well-documented sibling — no per-row DB query
u = 'HIGH' if (root_id and chain) else ('MEDIUM' if root_id else 'LOW')
import re
if chain:
shifts = re.findall(r'S\d{2}', chain)
f = 'HIGH' if shifts else 'MEDIUM'
else:
f = 'LOW'
return _quf_result(q, u, f,
[f'root={root_id}, chain={bool(chain)}, score={score}'],
[f'EN parent exists: {u}'],
[f'Shift chain: {bool(chain)}'])
def derivative_quf(data: dict) -> dict:
"""QUF for derivatives — L8. No per-row DB queries for batch performance."""
entry_id = data.get('entry_id', '')
link_type = data.get('link_type', '') or ''
derivative = data.get('derivative', '') or data.get('term', '') or ''
PERMITTED = {'DIRECT', 'COMPOUND', 'SAME_ROOT', 'PHONETIC', 'SEMANTIC',
'PREFIX', 'SUFFIX', 'ROOT', 'SIBLING'}
BANNED = {'COGNATE', 'LOANWORD', 'BORROWING'}
# Q: entry_id + link_type + derivative documented
q = 'HIGH' if (entry_id and link_type and derivative) else (
'MEDIUM' if (entry_id and link_type) else ('LOW' if entry_id else 'FAIL'))
u = 'MEDIUM' if entry_id else 'LOW'
if link_type and link_type.upper() in BANNED:
f = 'FAIL'
elif link_type and link_type.upper() in PERMITTED:
f = 'HIGH'
elif link_type:
f = 'MEDIUM'
else:
f = 'LOW'
return _quf_result(q, u, f,
[f'parent={entry_id}, link={link_type}, deriv={bool(derivative)}'],
[f'Parent entry: {entry_id}'],
[f'Link type: {link_type}'])
def xref_quf(data: dict) -> dict:
"""QUF for cross-refs — L8.
Self-auditing: verifies both endpoints exist in their source tables."""
from_id = data.get('from_id', '') or data.get('from_entry_id', '') or ''
to_id = data.get('to_id', '') or data.get('to_entry_id', '') or ''
link_type = data.get('link_type', '') or ''
description = data.get('description', '') or ''
PERMITTED = {'SAME_ROOT', 'PHONETIC', 'SEMANTIC', 'COMPOUND', 'DIRECT',
'PREFIX', 'SUFFIX', 'ROOT', 'SIBLING', 'FORMULA', 'BODY'}
q = 'HIGH' if (from_id and to_id and link_type) else ('MEDIUM' if (from_id and to_id) else 'LOW')
# U: both endpoints + description documented — structural check, no DB query
description = data.get('description', '') or ''
u = 'HIGH' if (from_id and to_id and description) else ('MEDIUM' if (from_id and to_id) else 'LOW')
# F: link type is in permitted set
lt_upper = link_type.upper() if link_type else ''
if lt_upper in PERMITTED:
f = 'HIGH'
elif link_type:
f = 'MEDIUM'
else:
f = 'LOW'
return _quf_result(q, u, f,
[f'from={from_id}, to={to_id}, link={link_type}'],
[f'Both endpoints exist: {u}'],
[f'Link type: {lt_upper}'])
def foundation_quf(data: dict) -> dict:
"""QUF for foundation data — F1-F7.
Self-auditing: checks referenced concepts exist in entries/roots."""
layer = data.get('layer', '') or ''
subtable = data.get('subtable', '') or ''
specific_data = data.get('specific_data', '') or ''
orig_id = data.get('orig_id', '') or ''
# Q: layer + subtable + content documented
has_content = bool(specific_data) and len(str(specific_data)) > 10
q = 'HIGH' if (layer and subtable and has_content) else ('MEDIUM' if (layer and has_content) else 'LOW')
# U: foundation is definitional — if layer and orig_id documented, it's universal
u = 'HIGH' if (layer and orig_id) else ('MEDIUM' if layer else 'LOW')
# F: content is parseable JSON with substantive data
f = 'MEDIUM'
if has_content:
try:
import json
parsed = json.loads(specific_data) if isinstance(specific_data, str) else specific_data
f = 'HIGH' if len(parsed) >= 2 else 'MEDIUM'
except Exception:
f = 'MEDIUM' if len(str(specific_data)) > 20 else 'LOW'
return _quf_result(q, u, f,
[f'layer={layer}, subtable={subtable}, content={has_content}'],
[f'Foundation layer={layer}, orig={orig_id}'],
[f'Data parseable: {f}'])
def mechanism_quf(data: dict) -> dict:
"""QUF for mechanism data — M1-M5.
Self-auditing: checks mechanism data is substantive and parseable."""
layer = data.get('layer', '') or ''
subtable = data.get('subtable', '') or ''
specific_data = data.get('specific_data', '') or ''
orig_id = data.get('orig_id', '') or ''
# Q: layer + subtable + content
has_content = bool(specific_data) and len(str(specific_data)) > 10
q = 'HIGH' if (layer and subtable and has_content) else ('MEDIUM' if (layer and has_content) else 'LOW')
# U: mechanism applies universally if layer + orig_id documented
u = 'HIGH' if (layer and orig_id) else ('MEDIUM' if layer else 'LOW')
# F: content parseable with multiple fields
f = 'MEDIUM'
if has_content:
try:
import json
parsed = json.loads(specific_data) if isinstance(specific_data, str) else specific_data
f = 'HIGH' if len(parsed) >= 2 else 'MEDIUM'
except Exception:
f = 'MEDIUM' if len(str(specific_data)) > 20 else 'LOW'
return _quf_result(q, u, f,
[f'layer={layer}, subtable={subtable}, content={has_content}'],
[f'Mechanism layer={layer}, orig={orig_id}'],
[f'Data parseable: {f}'])
# ═══════════════════════════════════════════════════════════════════════
# CLI INTERFACE
# ═══════════════════════════════════════════════════════════════════════
def main():
import json
if len(sys.argv) < 3:
print("أَمْر عَقْل — Intellect Engine")
print()
print("Usage:")
print(" python3 amr_aql.py deduce ك-ف-ر # compute meaning from letters")
print(" python3 amr_aql.py reverse cover # find AA root for English word")
print(" python3 amr_aql.py expand R24 # full knowledge tree for root")
print(" python3 amr_aql.py expand ك-ف-ر # same, by letters")
print(" python3 amr_aql.py relate ر-ح-م م-ر-ح # structural relationship")
print(" python3 amr_aql.py think cover # full hypothesis pipeline")
print(" python3 amr_aql.py timeline R24 # deployment history")
sys.exit(0)
cmd = sys.argv[1]
arg = sys.argv[2]
if cmd == 'deduce':
result = deduce_meaning(arg)
print(f"\nROOT: {arg}")
print(f"COMPOSITION: {result['composition']}")
print(f"DEDUCTION: {result['deduction']}")
for l in result['letters']:
print(f" {l['letter']} = {l['abjad']:>4} = {l['semantic']}")
elif cmd == 'reverse':
candidates = reverse_trace(arg)
print(f"\nREVERSE TRACE: {arg}")
print(f"CANDIDATES: {len(candidates)}")
for i, c in enumerate(candidates[:5]):
print(f"\n [{i+1}] {c['root_letters']} (score={c['score']})")
print(f" {c['composition']}")
print(f" Chain: {' | '.join(c['shift_chain'])}")
elif cmd == 'expand':
tree = expand_root(arg)
if 'error' in tree:
print(f"ERROR: {tree['error']}")
else:
r = tree['root']
s = tree['summary']
print(f"\nROOT: {r['root_id']} | {r['root_letters']}")
print(f"COMPUTED: {r['computed_meaning']['composition']}")
print(f"QURANIC TOKENS: {r['quran_tokens']}")
print(f"\nDOWNSTREAM:")
print(f" EN: {s['en_entries']} | RU: {s['ru_entries']} | FA: {s['fa_entries']}")
print(f" EU: {s['european']} | Latin: {s['latin']} | Bitig: {s['bitig']} | Uzbek: {s['uzbek']}")
print(f" Derivatives: {s['derivatives']} | Cross-refs: {s['cross_refs']}")
print(f" Qur'anic words: {s['quranic_words']} | Names of Allah: {s['names_of_allah']}")
print(f" TOTAL DOWNSTREAM: {s['total_downstream']}")
elif cmd == 'relate':
if len(sys.argv) < 4:
print("Usage: amr_aql.py relate ROOT_A ROOT_B")
sys.exit(1)
root_b = sys.argv[3]
result = relate_roots(arg, root_b)
print(f"\nROOT A: {arg}{result['root_a']['meaning']['composition']}")
print(f"ROOT B: {root_b}{result['root_b']['meaning']['composition']}")
print(f"\nRELATIONSHIPS: {result['relationship_count']}")
for rel in result['relationships']:
print(f" [{rel['type']}] {rel['description']}")
elif cmd == 'think':
candidates = hypothesise(arg)
print(f"\nHYPOTHESIS: {arg}")
print(f"CANDIDATES: {len(candidates)}")
for i, c in enumerate(candidates[:5]):
verified = "✓ VERIFIED" if c.get('verified') else "○ unverified"
tokens = f"Q:{c.get('quranic_tokens', '?')}" if c.get('verified') else ""
entries = f"E:{c.get('existing_entries', '?')}" if c.get('verified') else ""
print(f"\n [{i+1}] {c['root_letters']} (score={c['score']}) {verified} {tokens} {entries}")
print(f" {c['composition']}")
print(f" {c['deduction']}")
print(f" Chain: {' | '.join(c['shift_chain'])}")
elif cmd == 'timeline':
result = trace_timeline(arg)
if 'error' in result:
print(f"ERROR: {result['error']}")
else:
print(f"\nTIMELINE: {result['root_id']} | {result['root_letters']}")
print(f"DEPLOYMENTS: {len(result['deployments'])}")
for d in result['deployments']:
print(f" {d.get('date_period', '?')} | {d.get('operation_phase', '?')} | {d.get('deployed_words', '?')[:60]}")
print(f"CORRIDORS: {len(result['corridors'])}")
for c in result['corridors']:
print(f" {c.get('en_term', '?')}{c.get('ds_corridor', '?')} | {c.get('decay_level', '?')}")
else:
print(f"Unknown command: {cmd}")
sys.exit(1)
if __name__ == "__main__":
main()