Spaces:

uslap
/

uslap-query

Sleeping

App Files Files Community

uslap-query / Code_files /archive /USLaP_Engine.py

uslap

Upload folder using huggingface_hub

7cc8e29 verified 3 months ago

Raw

History Blame Contribute Delete

220 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	USLaP Autonomous Engine v1.0
	Unified Source Language Proof — Linguistic Intelligence Engine

	Processes English words / Arabic roots / ratios / phrases through the QUF pipeline.
	Produces: (A) Lattice placement in master file, (B) 360-degree HTML report.

	Architecture (8 components):
	1. InputRouter — detects input type, routes to pipeline
	2. PhoneticReversal — English consonants → ORIG root candidates
	3. QGate — Qur'anic attestation (binary PASS/FAIL)
	4. UGate — Phonetic unity verification
	5. FGate — Foundation layer (DS/DP/network assignment)
	6. ClusterExpander — root → all English words sharing that root
	7. EntryWriter — writes to A1_ENTRIES + A4/A5/M4/SESSION_INDEX/ENGINE_QUEUE
	8. ReportGenerator — 360-degree HTML report from all domains

	بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ
	"""

	import sys
	import os
	import re
	import json
	import shutil
	import itertools
	from datetime import datetime
	from pathlib import Path
	from typing import Optional, List, Dict

	import openpyxl
	from openpyxl import load_workbook

	# ─── FILE PATHS ───────────────────────────────────────────────────────────────
	MASTER_FILE = "/Users/mmsetubal/Documents/USLaP workplace/USLaP_Final_Data_Consolidated_Master_v3.xlsx"
	QURAN_FILE = "/Users/mmsetubal/Documents/USLaP Master Folder/Linguistic /USLaP_Quran_Root_Count.xlsx"
	REPORTS_DIR = "/Users/mmsetubal/Documents/USLaP workspace/Reports"
	WORKSPACE_DIR = "/Users/mmsetubal/Documents/USLaP workplace"
	KASHGARI_FILE = "/Users/mmsetubal/Documents/USLaP Master Folder/Linguistic /Kashgari 1.2.3.txt"

	# ─── THRESHOLDS ───────────────────────────────────────────────────────────────
	SCORE_AUTO_WRITE = 9 # score >= 9 → queue as auto-write candidate (CONFIRMED_HIGH)
	SCORE_QUEUE = 7 # score 7-8 → queue for oversight (PENDING_REVIEW)
	SCORE_REJECT = 6 # score <= 6 → reject (log only)
	# v2.1: CONFIRMED_HIGH target = 15-25% of batch. If > 33% → scoring inflated.
	# Tightening: require Q+U+positional all pass (score>=9 alone is no longer enough)
	MAX_CLUSTER_DEPTH = 3 # max recursion depth in ClusterExpander

	# ─── v3.4: MODERN TERMINOLOGY (EN→RU direction — exception to RU>EN rule) ────
	# These words entered Russian FROM English (modern tech/medicine/finance).
	# For these, EN cognate IS authoritative. For all others, RU is closer to
	# both originals and EN cognate is confirmatory only.
	MODERN_TECH_TERMS = {
	'КОМПЬЮТЕР', 'ИНТЕРНЕТ', 'ТЕЛЕФОН', 'ТЕЛЕВИЗОР', 'ПРИНТЕР', 'СЕРВЕР',
	'ПРОЦЕССОР', 'МОНИТОР', 'ДИСПЛЕЙ', 'МОДЕМ', 'РОУТЕР', 'БРАУЗЕР',
	'МЕНЕДЖЕР', 'МАРКЕТИНГ', 'БИЗНЕС', 'ОФИС', 'ДИЗАЙН', 'БРЕНД',
	'ИНВЕСТОР', 'ДИЛЕР', 'БРОКЕР', 'ЛИЗИНГ', 'ФАКС', 'ИМЕЙЛ',
	'ПЕНИЦИЛЛИН', 'ИНСУЛИН', 'АНТИБИОТИК', 'ВАКЦИНА', 'ЛАЗЕР',
	'РАДАР', 'ПЛАСТИК', 'НЕЙЛОН', 'ТЕФЛОН', 'СИЛИКОН',
	}

	# ─── SUFFIX LIST (OP_SUFFIX stripping — longest first) ───────────────────────
	LATIN_SUFFIXES = sorted([
	'ation', 'ition', 'ment', 'ness', 'ance', 'ence', 'ancy', 'ency',
	'ical', 'ary', 'ory', 'ery', 'ity', 'ous', 'ious', 'ion', 'ism',
	'ist', 'ize', 'ise', 'ify', 'ship', 'hood', 'ward', 'wise', 'ic',
	'ant', # OP_SUFFIX: Latin -ant (COVENANT→COVEN, PLEASANT→PLEAS, SERVANT→SERV)
	# Note: -ent NOT added — strips too aggressively (ANCIENT, MOMENT broken)
	'al', 'fy', 'ed', 'ing', 'ive', 'ly', 'er', 'or', 'ar', 'es', 'e', 's'
	], key=len, reverse=True)

	# ─── FUNCTION WORDS (for phrase parsing) ─────────────────────────────────────
	FUNCTION_WORDS = {
	'the','a','an','and','or','but','in','on','at','to','for','of',
	'with','by','from','is','are','was','were','be','been','it','its',
	'this','that','these','those','as','into','not','no','if','then'
	}

	# ─── KNOWN مَفْعَل PATTERNS (Gate 3e: M-prefix place noun skeletons) ──────────
	# When a word starts with M and the remaining consonants match a known مَفْعَل
	# skeleton, the مَفْعَل candidate is boosted (same logic as N15 priority for
	# C/G/K-R-N). Each entry: (remaining_consonants_pattern, forced_root, label).
	# Sources: 8 existing lattice entries confirmed as مَفْعَل + new patterns.
	MAFAL_SKELETONS = {
	# مَرْكَز markaz (center/market) → ر-ك-ز — MARKET #249
	'rkt': 'ر-ك-ز', 'rkz': 'ر-ك-ز', 'rks': 'ر-ك-ز',
	# مَخْزَن makhzan (storehouse/magazine) → خ-ز-ن — MAGAZINE #13
	'khzn': 'خ-ز-ن', 'gzn': 'خ-ز-ن', 'kzn': 'خ-ز-ن',
	# مَسْجِد masjid (mosque) → س-ج-د — MOSQUE #20
	'sjd': 'س-ج-د', 'zgd': 'س-ج-د',
	# مَنْزِل manzil (station/house) → ن-ز-ل — from MINARET #21 family
	'nzl': 'ن-ز-ل',
	# مَدْرَسَة madrasa (school) → د-ر-س — MADRASA #19
	'drs': 'د-ر-س',
	# مَطْرَح maṭraḥ (place of throwing) → ط-ر-ح — MATTRESS #54
	'trh': 'ط-ر-ح', 'trs': 'ط-ر-ح',
	# مَقْبَرَة maqbara (graveyard) → ق-ب-ر — MACABRE #82
	'qbr': 'ق-ب-ر', 'kbr': 'ق-ب-ر', 'cbr': 'ق-ب-ر',
	# مِرْآة mirʾāh (mirror) → ر-أ-ي — MIRROR #130
	'rr': 'ر-أ-ي',
	# مَنَارَة manāra (lighthouse) → ن-و-ر — MINARET #21
	'nrt': 'ن-و-ر', 'nr': 'ن-و-ر',
	# مَوْصِل Mawṣil (junction) → و-ص-ل — MUSLIN #77
	'sl': 'و-ص-ل', 'zl': 'و-ص-ل',
	# مِنْهَاج minhāj (methodology) → ن-ه-ج — MANAGER #143
	'nhj': 'ن-ه-ج', 'ngr': 'ن-ه-ج',
	}

	# ─── DERIVATIVE CHAINS (Gate 3f: known parent→child word families) ──────────
	# When a word is a known derivative of a confirmed lattice entry, route it to
	# A4_DERIVATIVES instead of creating a new A1 entry. Maps EN_TERM → parent.
	# Built from: A4_DERIVATIVES (582 entries) + batch root families.
	KNOWN_DERIVATIVES = {
	# MARKET family (R211 ر-ك-ز)
	'merchant': 'MARKET', 'merchandise': 'MARKET', 'mercantile': 'MARKET',
	'mercenary': 'MARKET', 'mercy': 'MARKET', 'commerce': 'MARKET',
	'commercial': 'MARKET', 'marketing': 'MARKET', 'marketplace': 'MARKET',
	'supermarket': 'MARKET',
	# HORN/N15 family (R133 ق-ر-ن) — derivatives of existing entries
	'grain': 'HORN', 'corn': 'HORN', 'caravan': 'HORN', 'cornet': 'HORN',
	'corona': 'HORN', 'coronation': 'HORN', 'cornea': 'HORN',
	# GOVERN family (R08 ج-ب-ر)
	'governor': 'GOVERN', 'government': 'GOVERN', 'governance': 'GOVERN',
	# EMPIRE family (R01 أ-م-ر)
	'emperor': 'EMPIRE', 'empress': 'EMPIRE', 'imperial': 'EMPIRE',
	# ALGEBRA family (R08 ج-ب-ر)
	'algebraic': 'ALGEBRA', 'algebraist': 'ALGEBRA',
	# COFFEE family (R168 ق-ه-ر)
	'cafe': 'COFFEE', 'cafeteria': 'COFFEE', 'caffeine': 'COFFEE',
	# SULTAN family (R07 س-ل-ط)
	'sultanate': 'SULTAN',
	# ALCOHOL family (R27 ك-ح-ل)
	'alcoholic': 'ALCOHOL', 'alcoholism': 'ALCOHOL',
	# SUGAR family (R53 ش-ك-ر)
	'sugary': 'SUGAR', 'sugarcane': 'SUGAR',
	# CRIME family (R10 ح-ر-م)
	'criminal': 'CRIME', 'criminology': 'CRIME',
	# COTTON family
	'cottonseed': 'COTTON', 'cottontail': 'COTTON',
	# CALIBRE family (R31 ق-ل-ب)
	'calibrate': 'CALIBRE', 'calibration': 'CALIBRE',
	# PATTERN family (R85 ف-ط-ر)
	'patterning': 'PATTERN', 'patterned': 'PATTERN',
	# SACRIFICE family (R200 ش-ك-ر)
	'sacrificial': 'SACRIFICE', 'sacrificing': 'SACRIFICE',
	# REVOLUTION family (R195 ب-ل-و)
	'revolutionary': 'REVOLUTION', 'revolt': 'REVOLUTION', 'revolve': 'REVOLUTION',
	'rebellion': 'REVOLUTION', 'rebel': 'REVOLUTION',
	# MEDICINE family (R160 م-ي-د)
	'medical': 'MEDICINE', 'medic': 'MEDICINE', 'medication': 'MEDICINE',
	# MILITARY family (R02 م-ل-ك)
	'militia': 'MILITARY', 'militant': 'MILITARY',
	# MORTAL family (R103 م-و-ت)
	'mortality': 'MORTAL', 'immortal': 'MORTAL',
	# SORCERY family (R10 ح-ر-م)
	'sorcerer': 'SORCERY', 'sorceress': 'SORCERY',
	# PHILOSOPHY family
	'philosopher': 'PHILOSOPHY', 'philosophical': 'PHILOSOPHY',
	# PROTOCOL family
	'protocolar': 'PROTOCOL',
	}

	# ─── COGNATE CROSSREF (v3.3: Russian↔English sibling lookup) ──────────────────
	# Maps Russian words to their known English cognates. When the engine processes
	# a Russian word that has an English cousin, it also processes the ENGLISH form
	# through the English PhoneticReversal pipeline and compares results.
	# Rationale: English preserves root consonants that French→Russian corridor loses.
	# Example: ДЕСАНТ lost the К from سَكَنَ (S-K-N-D→DESCENT), but Russian only has
	# Д-С-Н-Т. The English pipeline recovers the root; the Russian pipeline cannot.
	# Format: { 'russian_lower': 'ENGLISH_UPPER', ... }
	# Bidirectional: the reverse mapping is generated automatically.
	COGNATE_CROSSREF_RU_TO_EN = {
	# ── MILITARY + WARFARE ──────────────────────────────────────────────────
	'десант': 'DESCENT',
	'бастион': 'BASTION',
	'батарея': 'BATTERY',
	'гарнизон': 'GARRISON',
	'мушкет': 'MUSKET',
	'арсенал': 'ARSENAL',
	'маршал': 'MARSHAL',
	'генерал': 'GENERAL',
	'адмирал': 'ADMIRAL',
	'артиллерия': 'ARTILLERY',
	'барьер': 'BARRIER',
	# ── TRADE + ECONOMY ─────────────────────────────────────────────────────
	'банк': 'BANK',
	'тариф': 'TARIFF',
	'караван': 'CARAVAN',
	'магазин': 'MAGAZINE',
	'талант': 'TALENT',
	'базар': 'BAZAAR',
	'баланс': 'BALANCE',
	# ── GOVERNANCE + LAW ────────────────────────────────────────────────────
	'султан': 'SULTAN',
	'эмир': 'EMIR',
	'грамота': 'GRAMMAR',
	# ── RELIGION + FAITH ────────────────────────────────────────────────────
	'минарет': 'MINARET',
	'масджид': 'MOSQUE',
	'джихад': 'JIHAD',
	'намаз': 'NAMAZ',
	'халиф': 'CALIPH',
	'муфтий': 'MUFTI',
	'шариат': 'SHARIAT',
	# ── SCIENCE + CRAFT ─────────────────────────────────────────────────────
	'алхимия': 'ALCHEMY',
	'алгебра': 'ALGEBRA',
	'зенит': 'ZENITH',
	'азимут': 'AZIMUTH',
	'алкоголь': 'ALCOHOL',
	'эликсир': 'ELIXIR',
	'бальзам': 'BALSAM',
	'химия': 'CHEMISTRY',
	# ── FOOD + DRINK ────────────────────────────────────────────────────────
	'кофе': 'COFFEE',
	'сахар': 'SUGAR',
	'лимон': 'LEMON',
	'шафран': 'SAFFRON',
	'йогурт': 'YOGURT',
	# ── TEXTILES + HOUSEHOLD ────────────────────────────────────────────────
	'хлопок': 'COTTON',
	'матрас': 'MATTRESS',
	'диван': 'DIVAN',
	'лак': 'LACQUER',
	# ── NATURE + GEOGRAPHY ──────────────────────────────────────────────────
	'муссон': 'MONSOON',
	'тундра': 'TUNDRA',
	# ── BODY + HEALTH ───────────────────────────────────────────────────────
	'массаж': 'MASSAGE',
	# ── BORROWED INTERNATIONAL (Latin/Greek corridor shared) ────────────────
	'крепость': 'FORTRESS',
	'пошлина': 'CUSTOMS',
	'рубль': 'RUBLE',
	'самовар': 'SAMOVAR', # no English cousin — but included for completeness
	'кинжал': 'DAGGER', # loose cognate — different corridors
	'шахта': 'SHAFT',
	'табурет': 'TABOURET',
	'лакировка': 'LACQUER',
	}

	# Auto-build reverse map: EN → RU
	COGNATE_CROSSREF_EN_TO_RU = {}
	for _ru, _en in COGNATE_CROSSREF_RU_TO_EN.items():
	COGNATE_CROSSREF_EN_TO_RU.setdefault(_en, []).append(_ru)

	# ─── BANNED TERMS (auto-scan before write) ───────────────────────────────────
	BANNED_TERMS = [
	'semitic', 'loanword', 'loan word', 'borrowed from', 'cognate',
	'proto-indo-european', 'proto indo european', 'pie root',
	'prosthetic vowel', 'pre-greek substrate', 'adoption',
	# Wrapper terms (CLAUDE.md §7 + Turkish≠Turkic rule)
	'islamic origin', 'islamic civilization', 'islamic science',
	'turkish origin', 'turkish language', 'from turkish', 'old turkish',
	]


	# ═══════════════════════════════════════════════════════════════════════════════
	# DATA CLASSES
	# ═══════════════════════════════════════════════════════════════════════════════

	class GateResult:
	"""Result of a QUF gate check — binary PASS or FAIL."""
	def __init__(self, passed: bool, details: dict = None):
	self.passed = passed
	self.details = details or {}

	def __bool__(self):
	return self.passed

	def __repr__(self):
	return f"GateResult({'PASS' if self.passed else 'FAIL'}, {self.details})"


	class RootCandidate:
	"""A candidate ORIG1 Arabic root for a given English word."""
	def __init__(self, letters: str, token_count: int = 0, lemma_count: int = 0,
	ar_word: str = '', operations: list = None):
	self.letters = letters # e.g. "ق-ر-ن"
	self.token_count = token_count
	self.lemma_count = lemma_count
	self.ar_word = ar_word # e.g. "قَرَن"
	self.operations = operations or []
	self.phonetic_chain = ''
	self.score = 0
	self.positional_score = 0.5 # R11: consonant ORDER fidelity (0.0–1.0); 0.5 = neutral
	self.transposition_flag = False # R11: True = consonant ORDER inverted vs root order
	self._n15_priority = False # R09: True = N15 skeleton forced this candidate first
	self.extra_consonants = 0 # Coverage: word consonants NOT covered by root

	def __repr__(self):
	return f"RootCandidate({self.letters}, tokens={self.token_count}, score={self.score})"


	class EntryRecord:
	"""A full 14-column lattice entry ready for writing to A1_ENTRIES."""
	def __init__(self):
	self.entry_id : int = 0
	self.score : int = 0
	self.en_term : str = ''
	self.ar_word : str = ''
	self.root_id : str = ''
	self.root_letters : str = ''
	self.qur_meaning : str = ''
	self.pattern : str = 'A'
	self.allah_name_id : str = ''
	self.network_id : str = ''
	self.phonetic_chain: str = ''
	self.inversion_type: str = 'HIDDEN'
	self.source_form : str = ''
	self.foundation_ref: str = ''

	def to_row(self) -> tuple:
	"""Return as 14-tuple matching A1_ENTRIES column order."""
	return (
	self.entry_id, self.score, self.en_term, self.ar_word,
	self.root_id, self.root_letters, self.qur_meaning, self.pattern,
	self.allah_name_id, self.network_id, self.phonetic_chain,
	self.inversion_type, self.source_form, self.foundation_ref
	)


	class ProcessResult:
	"""Full result of processing one input term through the engine."""
	def __init__(self, input_term: str, input_type: str):
	self.input_term = input_term
	self.input_type = input_type
	self.existing_entry_id : Optional[int] = None
	self.root_candidates : List[RootCandidate] = []
	self.confirmed_root : Optional[RootCandidate] = None
	self.q_gate : Optional[GateResult] = None
	self.u_gate : Optional[GateResult] = None
	self.f_gate : Optional[GateResult] = None
	self.entry_record : Optional[EntryRecord] = None
	self.cluster_members : list = []
	self.queue_id : Optional[str] = None
	self.report_path : Optional[str] = None
	self.derivative_of : Optional[tuple] = None # (parent_name, parent_id) if derivative chain detected
	self.orig2_track : bool = False # True if routed through ORIG2/Kashgari track
	self.orig2_details : Optional[dict] = None # Kashgari attestation details
	self.log : list = []

	def add_log(self, msg: str):
	ts = datetime.now().strftime('%H:%M:%S')
	self.log.append(f"[{ts}] {msg}")
	print(f" {msg}")


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 1 — InputRouter
	# ═══════════════════════════════════════════════════════════════════════════════

	class InputRouter:
	"""Detects input type and routes to appropriate processing pipeline."""

	ARABIC_CHARS = set(
	'ابتثجحخدذرزسشصضطظعغفقكلمنهوي'
	'أإآءةىًٌٍَُِّْ'
	)

	CYRILLIC_CHARS = set(
	'абвгдежзийклмнопрстуфхцчшщъыьэюя'
	'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
	'ёЁ'
	)

	def detect(self, raw: str) -> tuple:
	"""
	Returns: (input_type, cleaned, key_terms)
	input_type: 'english_word' \| 'russian_word' \| 'arabic_root' \| 'ratio'
	\| 'quran_ref' \| 'phrase' \| 'russian_phrase'
	key_terms: list of individual terms to process
	"""
	s = raw.strip()

	# Qur'anic reference: Q18:83
	if re.match(r'^Q\d+:\d+$', s, re.IGNORECASE):
	return ('quran_ref', s.upper(), [s.upper()])

	# Ratio: 4/3, 19/7
	if re.match(r'^\d+/\d+$', s):
	return ('ratio', s, [s])

	# Arabic root (contains Arabic characters)
	arabic_count = sum(1 for c in s if c in self.ARABIC_CHARS)
	if arabic_count >= 2:
	cleaned = re.sub(r'[\s\-—–]+', '-', s).strip('-')
	return ('arabic_root', cleaned, [cleaned])

	# Russian word (contains Cyrillic characters)
	cyrillic_count = sum(1 for c in s if c in self.CYRILLIC_CHARS)
	if cyrillic_count >= 2:
	words = s.split()
	if len(words) > 1:
	content = [w for w in words if any(ch in self.CYRILLIC_CHARS for ch in w)]
	if len(content) > 1:
	return ('russian_phrase', s, content)
	if content:
	return ('russian_word', content[0], [content[0]])
	clean = re.sub(r'[^а-яА-ЯёЁ\-]', '', s)
	return ('russian_word', clean, [clean])

	# Phrase (multiple words)
	words = s.split()
	if len(words) > 1:
	content = [w for w in words if w.lower() not in FUNCTION_WORDS and w.isalpha()]
	if len(content) > 1:
	return ('phrase', s, content)
	if content:
	return ('english_word', content[0], [content[0]])

	# Single English word
	clean = re.sub(r'[^a-zA-Z\-]', '', s)
	return ('english_word', clean, [clean])


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 3 — QGate (loaded first; PhoneticReversal depends on it)
	# ═══════════════════════════════════════════════════════════════════════════════

	class QGate:
	"""Binary PASS/FAIL against 1,681 Qur'anic roots in ROOT_LIST."""

	def __init__(self, quran_root_file: str):
	self.roots: Dict[str, dict] = {}
	self._load_roots(quran_root_file)
	print(f" QGate: {len(self.roots)} Qur'anic roots loaded")

	def _load_roots(self, filepath: str):
	try:
	wb = load_workbook(filepath, read_only=True, data_only=True)
	ws = wb['ROOT_LIST']
	headers = None
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	# ROOT_LIST: 'Root' column = bare Arabic string (no hyphens)
	# 'Letters' column = integer count of letters (3 or 4)
	bare_root = str(d.get('Root', '') or '').strip()
	if bare_root and len(bare_root) >= 2:
	try:
	tc = int(d.get('Token Count', 0) or 0)
	except (ValueError, TypeError):
	tc = 0
	try:
	lc = int(d.get('Lemma Count', 0) or 0)
	except (ValueError, TypeError):
	lc = 0
	self.roots[bare_root] = {
	'token_count': tc,
	'lemma_count': lc,
	'ar_word' : bare_root,
	}
	wb.close()
	except Exception as e:
	print(f" QGate load error: {e}")

	def check(self, root_letters: str) -> GateResult:
	normalized = re.sub(r'[\s\-—–]+', '-', root_letters.strip()).strip('-')
	# ROOT_LIST stores bare strings without hyphens — strip them for lookup
	bare_lookup = re.sub(r'[\-\s]', '', normalized)
	bare_lookup = re.sub(r'[ًٌٍَُِّْ]', '', bare_lookup)

	if bare_lookup in self.roots:
	d = self.roots[bare_lookup]
	return GateResult(True, {**d, 'root_letters': normalized})
	# Also try with harakat stripped from normalized (hyphenated) form
	bare_hyph = re.sub(r'[ًٌٍَُِّْ]', '', normalized)
	if bare_hyph in self.roots:
	d = self.roots[bare_hyph]
	return GateResult(True, {**d, 'root_letters': bare_hyph})
	return GateResult(False, {
	'root_letters' : normalized,
	'reason' : 'Not in 1,681 Qur\'anic roots — flagged ORIG2 candidate for human review',
	'orig2_candidate': True, # two-track gate: human must check Kashgari corpus next
	})


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 2b — KashgariIndex + KashgariGate (ORIG2 track)
	# ═══════════════════════════════════════════════════════════════════════════════

	class KashgariIndex:
	"""
	Parses and indexes the Kashgari Dīwān corpus (Dankoff & Kelly, Harvard 1982-1985)
	for ORIG2 attestation. 74K-line OCR'd plain text → searchable by consonant skeleton.

	Three search modes:
	1. skeleton_match: consonant skeleton of English word matches Kashgari entry
	2. translit_match: direct transliteration lookup
	3. meaning_match: English word found in Kashgari definition/gloss
	"""

	# Turkic vowels (broader set than English — includes ü, ö, ı, etc.)
	TURKIC_VOWELS = set('aeiouüöıäəāēīōūAEIOUÜÖ')

	def __init__(self, corpus_file: str):
	self.entries: Dict[str, list] = {} # translit → [entry_dicts]
	self.skeleton_index: Dict[str, list] = {} # consonant_skeleton → [entry_dicts]
	self._parse(corpus_file)
	self._build_skeleton_index()

	@staticmethod
	def extract_consonants(translit: str) -> str:
	"""Strip vowels from transliteration to get consonant skeleton."""
	vowels = KashgariIndex.TURKIC_VOWELS
	return ''.join(c for c in translit.lower()
	if c.isalpha() and c not in vowels)

	def _parse(self, filepath: str):
	"""Parse Kashgari corpus line by line, extracting dictionary entries."""
	if not os.path.exists(filepath):
	print(f" KashgariIndex: corpus file not found: {filepath}")
	return

	# Pattern: HEADWORD (CAPS, possibly with ' or - or special chars)
	# + transliteration (mixed case — OCR uses uppercase for č→C, ş→S, etc.)
	# + definition (in quotes or following text)
	entry_re = re.compile(
	r"""^['"]? # optional leading quote
	([A-Z][A-Z0-9'\-§_\^]+) # headword in CAPS
	\s+
	([a-zA-ZüöıçşğÜÖ][a-zA-Z\-üöıçşğÜÖ]*) # transliteration (mixed case for OCR)
	\s+
	[""\"]? # optional opening quote
	(.+?) # definition text
	[""\"]?\s* # optional closing quote
	(?:0\s*)?$ # optional entry-end marker '0'
	""", re.VERBOSE)

	count = 0
	with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
	for line_no, line in enumerate(f, 1):
	if line_no < 6300: # skip header/intro
	continue
	stripped = line.strip()
	if not stripped or len(stripped) < 5:
	continue
	# Skip page refs, footnotes, numerals-only lines
	if re.match(r'^\[\w', stripped) or re.match(r'^\d+\.?\s*$', stripped):
	continue

	m = entry_re.match(stripped)
	if m:
	headword = m.group(1).strip('-')
	translit = m.group(2).lower().strip('-') # normalize to lowercase
	meaning = m.group(3).strip(' ."\'')

	# Skip very short or noise entries
	if len(translit) < 1 or len(meaning) < 3:
	continue

	# Extract Arabic gloss in parentheses
	ar_m = re.search(r'$([^)]+)$', meaning)
	arabic_gloss = ar_m.group(1) if ar_m else ''

	entry = {
	'headword': headword,
	'translit': translit,
	'meaning': meaning,
	'arabic_gloss': arabic_gloss,
	'line': line_no,
	}

	self.entries.setdefault(translit, []).append(entry)
	count += 1

	print(f" KashgariIndex: parsed {count} entries from corpus "
	f"({len(self.entries)} unique transliterations)")

	def _build_skeleton_index(self):
	"""Build consonant skeleton → entries index for fast lookup."""
	for translit, entries_list in self.entries.items():
	skel = self.extract_consonants(translit)
	if skel and len(skel) >= 1:
	for e in entries_list:
	rec = {**e, 'skeleton': skel}
	self.skeleton_index.setdefault(skel, []).append(rec)
	print(f" KashgariIndex: {len(self.skeleton_index)} unique consonant skeletons indexed")

	def search_skeleton(self, consonants: str) -> list:
	"""Search by consonant skeleton (e.g., 'blq' → balıq = city)."""
	return self.skeleton_index.get(consonants.lower(), [])

	def search_translit(self, term: str) -> list:
	"""Search by exact transliteration."""
	return self.entries.get(term.lower(), [])

	def search_english(self, english_word: str) -> list:
	"""Search all definitions for an English word (whole-word match only)."""
	results = []
	# Require whole-word match to avoid false positives
	# e.g., "head" should NOT match "thread" or "heading"
	pattern = re.compile(r'\b' + re.escape(english_word.lower()) + r'\b')
	for translit, entries_list in self.entries.items():
	for e in entries_list:
	if pattern.search(e['meaning'].lower()):
	results.append(e)
	return results


	class KashgariGate:
	"""
	ORIG2 attestation gate — checks Kashgari corpus for Turkic roots.
	Fires when Q-Gate (ORIG1) FAILS. Implements B01-B07 phonology checks.

	Protocol (from CLAUDE.md §6 two-track gate):
	ROOT_LIST FAIL → Kashgari search → if attested → ORIG2 entry → BITIG_A1_ENTRIES
	If BOTH fail → entry cannot exceed score 7.
	"""

	# ── KNOWN ORIG2 ENTRIES ─────────────────────────────────────────────────
	# v3.2: Manually verified entries from Kashgari corpus research that
	# the KashgariIndex parser misses (thematic sections, OCR issues).
	# Keyed by consonant skeleton → list of entry dicts.
	# Source: verified with page/line citations from Dankoff & Kelly.
	KNOWN_ORIG2_ENTRIES = {
	'çp': [{'translit': 'çap-', 'meaning': 'beat, strike (neck), swim [ḍaraba]',
	'line': 57461, 'headword': 'ÇAP-', 'skeleton': 'çp'}],
	'cp': [{'translit': 'çap-', 'meaning': 'beat, strike (neck), swim [ḍaraba]',
	'line': 57461, 'headword': 'ÇAP-', 'skeleton': 'cp'}],
	'sp': [{'translit': 'sap-', 'meaning': 'thread (needle), bind, mend',
	'line': 61433, 'headword': 'SAP-', 'skeleton': 'sp'}],
	'sb': [{'translit': 'sap-', 'meaning': 'thread (needle), bind, mend [OP_VOICE p→b]',
	'line': 61433, 'headword': 'SAP-', 'skeleton': 'sb'},
	{'translit': 'çap-', 'meaning': 'beat, strike [OP_VOICE ç→s, p→b]',
	'line': 57461, 'headword': 'ÇAP-', 'skeleton': 'sb'}],
	'qlc': [{'translit': 'qılıç', 'meaning': 'sword (sayf)',
	'line': 19685, 'headword': 'QILIÇ', 'skeleton': 'qlc'}],
	'qlç': [{'translit': 'qılıç', 'meaning': 'sword (sayf)',
	'line': 19685, 'headword': 'QILIÇ', 'skeleton': 'qlç'}],
	'krt': [{'translit': 'kirit', 'meaning': 'key (miftāḥ) — al-Kashgari: close to iqlīd',
	'line': 19635, 'headword': 'KIRIT', 'skeleton': 'krt'}],
	'kld': [{'translit': 'kirit', 'meaning': 'key (miftāḥ) — Kashgari: iqlīd with q→k, l→r, d→t',
	'line': 19635, 'headword': 'KIRIT', 'skeleton': 'kld'}],
	'bl': [{'translit': 'böl-', 'meaning': 'divide into groups',
	'line': 57002, 'headword': 'BÖL-', 'skeleton': 'bl'}],
	'tn': [{'translit': 'ton-', 'meaning': 'freeze, become ice',
	'line': 19820, 'headword': 'TON-', 'skeleton': 'tn'}],
	'tmn': [{'translit': 'tuman', 'meaning': 'fog, mist (10,000)',
	'line': 19780, 'headword': 'TUMAN', 'skeleton': 'tmn'}],
	# v3.2: English ch = Turkic ç (same sound, different notation)
	'chp': [{'translit': 'çap-', 'meaning': 'beat, strike (neck), swim [ḍaraba]',
	'line': 57461, 'headword': 'ÇAP-', 'skeleton': 'chp'}],
	'chb': [{'translit': 'çap-', 'meaning': 'beat, strike [OP_VOICE p→b]',
	'line': 57461, 'headword': 'ÇAP-', 'skeleton': 'chb'}],
	}

	def __init__(self, kashgari_index: KashgariIndex):
	self.index = kashgari_index

	def check(self, en_word: str, consonants: list) -> GateResult:
	"""
	ORIG2 attestation check.
	Args:
	en_word: the English word being processed
	consonants: extracted consonant list from PhoneticReversal
	Returns:
	GateResult with Kashgari attestation details if found
	"""
	skel = ''.join(consonants).lower()

	# 1. Direct skeleton search (parsed index)
	hits = self.index.search_skeleton(skel)

	# 1b. v3.2: Check KNOWN_ORIG2_ENTRIES (manually verified, parser-missed)
	if not hits:
	hits = list(self.KNOWN_ORIG2_ENTRIES.get(skel, []))

	# 2. Try Bitig consonant variants (q↔k↔g, p↔b, etc.)
	if not hits:
	for v in self._bitig_variants(skel):
	v_hits = self.index.search_skeleton(v)
	if not v_hits:
	v_hits = list(self.KNOWN_ORIG2_ENTRIES.get(v, []))
	if v_hits:
	hits.extend(v_hits)
	break # first successful variant is enough

	# 3. Try shorter skeletons (strip suffixes — B03 agglutinative)
	if not hits and len(skel) >= 3:
	for trim in range(1, min(3, len(skel) - 1)):
	trimmed = skel[:-trim]
	t_hits = self.index.search_skeleton(trimmed)
	if not t_hits:
	t_hits = list(self.KNOWN_ORIG2_ENTRIES.get(trimmed, []))
	# Also check voicing variants of trimmed skeleton
	if not t_hits:
	for v in self._bitig_variants(trimmed):
	v_hits = self.index.search_skeleton(v)
	if not v_hits:
	v_hits = list(self.KNOWN_ORIG2_ENTRIES.get(v, []))
	if v_hits:
	t_hits.extend(v_hits)
	break
	if t_hits:
	hits.extend(t_hits)
	break

	# 4. Meaning search fallback
	meaning_hits = []
	if not hits:
	meaning_hits = self.index.search_english(en_word)

	# Build result
	if hits:
	best = hits[0]
	warnings = self._phonology_checks(best.get('translit', ''), en_word)
	return GateResult(True, {
	'kashgari_translit': best['translit'],
	'kashgari_meaning': best['meaning'],
	'kashgari_line': best['line'],
	'kashgari_headword': best['headword'],
	'skeleton': best.get('skeleton', skel),
	'all_hits': len(hits),
	'bitig_warnings': warnings,
	'attestation_type': 'skeleton_match',
	})

	if meaning_hits:
	best = meaning_hits[0]
	warnings = self._phonology_checks(best.get('translit', ''), en_word)
	return GateResult(True, {
	'kashgari_translit': best['translit'],
	'kashgari_meaning': best['meaning'],
	'kashgari_line': best['line'],
	'kashgari_headword': best['headword'],
	'skeleton': KashgariIndex.extract_consonants(best['translit']),
	'all_hits': len(meaning_hits),
	'bitig_warnings': warnings,
	'attestation_type': 'meaning_match',
	})

	return GateResult(False, {
	'reason': f"Not in Kashgari corpus (skeleton '{skel}' unattested)",
	'skeleton_searched': skel,
	})

	def _bitig_variants(self, skeleton: str) -> list:
	"""Generate Bitig consonant equivalences for broader search (B01-B05 informed)."""
	equivs = {
	'p': ['b'], # B01: /f/→/p/→/b/ in Bitig
	'b': ['p'],
	'k': ['q', 'g'], # velar variants
	'q': ['k', 'g'],
	'g': ['k', 'q'],
	'c': ['s', 'z', 'j'], # sibilant variants
	's': ['z', 'c'],
	'z': ['s'],
	't': ['d'],
	'd': ['t'],
	'j': ['c'],
	'f': ['p', 'b'], # B01: /f/ non-native, closest Bitig equivalents
	}
	variants = set()
	for i, c in enumerate(skeleton):
	for alt in equivs.get(c, []):
	v = skeleton[:i] + alt + skeleton[i+1:]
	if v != skeleton:
	variants.add(v)
	return list(variants)[:12]

	def _phonology_checks(self, translit: str, en_word: str) -> list:
	"""Run B01-B07 automated phonology checks."""
	warnings = []
	tl = translit.lower()

	# B01: No /f/ phoneme in Bitig
	if 'f' in tl:
	warnings.append("B01: /f/ in Turkic form — foreign contamination flag")

	# B05: No /w/ phoneme in old Bitig
	if 'w' in tl:
	warnings.append("B05: /w/ detected — possible ORIG1, not ORIG2")

	# B03: Agglutinative morphology — flag long forms for suffix stripping
	turkic_suffixes = ['lar', 'ler', 'liq', 'lik', 'chi', 'ci',
	'mak', 'mek', 'gan', 'gen', 'diq', 'dik']
	for sfx in turkic_suffixes:
	if tl.endswith(sfx) and len(tl) > len(sfx) + 2:
	warnings.append(f"B03: suffix -{sfx} detected — strip before root trace")
	break

	return warnings


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 2 — PhoneticReversal Engine
	# ═══════════════════════════════════════════════════════════════════════════════

	class PhoneticReversal:
	"""
	The core missing engine: English word → ranked ORIG1 root candidates.
	Works backward through M1_PHONETIC_SHIFTS.
	"""

	def __init__(self, master_file: str, q_gate: QGate):
	self.q_gate = q_gate
	self.shift_data : List[dict] = []
	self.forward_map : Dict[str, tuple] = {} # AR_letter → (shift_id, [en_chars])
	self.reverse_map : Dict[str, list] = {} # EN_char → [(AR_letter, shift_id)]
	self._load_shifts(master_file)
	self._build_reverse_map()
	print(f" PhoneticReversal: {len(self.shift_data)} shifts, "
	f"{len(self.reverse_map)} EN patterns in reverse map")

	def _load_shifts(self, filepath: str):
	try:
	wb = load_workbook(filepath, read_only=True, data_only=True)
	ws = wb['M1_PHONETIC_SHIFTS']
	headers = None
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	sid = str(d.get('SHIFT_ID', '') or '').strip()
	ar = str(d.get('AR_LETTER', '') or '').strip()
	en_raw = str(d.get('EN_OUTPUTS', '') or '').strip()
	if sid and ar:
	en_list = [x.strip().lower() for x in en_raw.split(',') if x.strip()]
	self.shift_data.append({'shift_id': sid, 'ar_letter': ar, 'en_outputs': en_list})
	self.forward_map[ar] = (sid, en_list)
	wb.close()
	except Exception as e:
	print(f" PhoneticReversal load error: {e}")

	def _build_reverse_map(self):
	for shift in self.shift_data:
	ar, sid = shift['ar_letter'], shift['shift_id']
	for en in shift['en_outputs']:
	key = en.lower()
	self.reverse_map.setdefault(key, [])
	if (ar, sid) not in self.reverse_map[key]:
	self.reverse_map[key].append((ar, sid))

	# ── string-level helpers ──────────────────────────────────────────────────

	def strip_operations(self, word: str) -> tuple:
	"""
	Strip OP_SUFFIX from English word.
	Returns: (stripped_word, operations_list, suffix_removed)

	v2.4: OP_STOP removed from here — now a separate candidate generation
	path in reverse() via _generate_op_stop_variants(). This prevents
	universal ND→N from destroying CALENDAR, CYLINDER, BOUNDARY etc.
	OP_STOP is a HYPOTHESIS about geminated nasals, not a certainty.

	v2.3 FIX — Minimum-consonant guard: if suffix stripping leaves
	fewer than 3 consonants, UNDO the strip. Root letters > suffix.
	"""
	w = word.lower().strip()
	ops = []
	suffix_removed = ''

	# OP_SUFFIX — with minimum-consonant guard
	w_before_suffix = w
	for suffix in LATIN_SUFFIXES:
	if w.endswith(suffix) and len(w) - len(suffix) >= 2:
	candidate = w[:-len(suffix)]
	# Count consonants in stripped form
	vowels = set('aeiou')
	cons_count = sum(1 for ch in candidate if ch.isalpha() and ch not in vowels)
	if cons_count >= 3:
	w = candidate
	suffix_removed = suffix
	ops.append(f'OP_SUFFIX(-{suffix})')
	# else: stripping would leave < 3 consonants — skip this suffix
	break

	return w, ops, suffix_removed

	def extract_consonants(self, word: str) -> list:
	"""Return ordered consonant skeleton (digraphs counted as one unit).

	Fix v3: Terminal-Y rule — Y at the END of a word is treated as a vowel
	(century, glory, victory, territory, democracy…). Y at the START or
	MIDDLE of a word remains a consonant (yard, beyond, style).

	Fix v4 (v2.3): Digraph split fallback — when digraph extraction yields
	fewer than 3 consonants, re-try with digraphs split into separate letters.
	Example: FAITH → ['f','th'] (2 cons) → fallback → ['f','t','h'] (3 cons).
	This catches words where TH = ت+ح (two root letters) rather than ث (one).
	"""
	result = self._extract_consonants_inner(word, use_digraphs=True)
	if len(result) < 3:
	# Try splitting digraphs — might recover hidden root consonants
	split_result = self._extract_consonants_inner(word, use_digraphs=False)
	if len(split_result) > len(result):
	return split_result
	return result

	def _extract_consonants_inner(self, word: str, use_digraphs: bool = True) -> list:
	"""Inner extraction with optional digraph handling."""
	vowels = set('aeiou')
	w = word.lower()
	# Strip terminal-Y before processing (terminal Y = vowel in English)
	if w.endswith('y') and len(w) > 1 and w[-2] not in ('a', 'e', 'i', 'o', 'u'):
	w = w[:-1] # e.g. century→centur, glory→glor, territory→territor
	DIGRAPHS = ('sh', 'ch', 'gh', 'th', 'ph', 'wh', 'qu')
	result = []
	i = 0
	while i < len(w):
	digraph = w[i:i+2] if i + 1 < len(w) else ''
	if use_digraphs and digraph in DIGRAPHS:
	result.append(digraph)
	i += 2
	elif w[i] not in vowels:
	result.append(w[i])
	i += 1
	else:
	i += 1
	return result

	def map_consonants_to_arabic(self, consonants: list) -> list:
	"""Each consonant position → list of (AR_letter, shift_id) pairs."""
	mapped = []
	for c in consonants:
	candidates = self.reverse_map.get(c, [])
	if not candidates and len(c) == 2:
	candidates = self.reverse_map.get(c[0], [])
	mapped.append(candidates)
	return mapped

	def generate_root_permutations(self, mapped: list) -> list:
	"""Generate 3-consonant root strings from mapped consonant candidates."""
	positions = len(mapped)
	if positions < 2:
	return []
	ar_per_pos = [list({ar for ar, sid in pos}) for pos in mapped]
	roots = set()
	n = 3 if positions >= 3 else positions
	for pos_combo in itertools.combinations(range(positions), n):
	for combo in itertools.product(*[ar_per_pos[p] for p in pos_combo]):
	if all(combo):
	roots.add('-'.join(combo))
	return list(roots)

	def reverse(self, en_word: str) -> List[RootCandidate]:
	"""
	Main public method: English word → ranked list of Qur'anic root candidates.
	Returns only candidates that pass Q-Gate.

	v2.4 — VOWEL-STRIP-FIRST ARCHITECTURE:
	PRIMARY: raw consonant skeleton (strip vowels only) — full skeleton
	SECONDARY: suffix-stripped consonants (OP_SUFFIX applied) — Latin/Greek
	OP_STOP: ND→N / MB→M variants on PRIMARY — separate candidate path
	N15, Gate 3e, R08a all run on PRIMARY consonants.

	This replaces v2.3's dual-path (suffix-first + raw fallback) architecture.
	The user's insight: "strip vowels first → consonants are immediately visible.
	POWER minus O and E leaves PWR. No suffix issue."

	Operations are now CANDIDATE GENERATORS, not destructive pre-processors.
	"""
	# ═══ PRIMARY: raw vowel extraction (vowel-strip-first principle) ══════════
	primary_cons = self.extract_consonants(en_word)

	# ═══ SECONDARY: suffix-stripped consonants (OP_SUFFIX only) ═══════════════
	stripped, ops, suffix = self.strip_operations(en_word)
	secondary_cons = self.extract_consonants(stripped)

	# ═══ OP_STOP: generate ND→N / MB→M variants from PRIMARY ═════════════════
	op_stop_variants = self._generate_op_stop_variants(primary_cons)

	passing = []
	seen = set()

	# ── N15 PRIORITY (R09): C/G/K-R-N skeleton → force ق-ر-ن first ──────────
	if self._check_n15_priority(primary_cons):
	n15 = 'ق-ر-ن'
	qr = self.q_gate.check(n15)
	if qr.passed and n15 not in seen:
	seen.add(n15)
	c = RootCandidate(
	letters = n15,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = []
	)
	c.positional_score = 1.0 # N15 priority — forced to head
	c.transposition_flag = False
	c._n15_priority = True # R09: sentinel — survives sort
	c.extra_consonants = max(0, len(primary_cons) - 3)
	passing.append(c)

	# ── GATE 3e: مَفْعَل SKELETON PRIORITY ──────────────────────────────────────
	if en_word.lower().startswith('m'):
	m_remaining = self.extract_consonants(
	self.strip_operations(en_word[1:])[0]
	)
	m_skel = ''.join(m_remaining)
	forced_root = MAFAL_SKELETONS.get(m_skel)
	if forced_root:
	mf_key = forced_root + '__MAFAL'
	qr = self.q_gate.check(forced_root)
	if qr.passed and mf_key not in seen:
	seen.add(mf_key)
	seen.add(forced_root) # prevent standard path duplicate
	mf_c = RootCandidate(
	letters = forced_root,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = ['OP_PREFIX(مَفْعَل→m)']
	)
	mf_c.positional_score = 1.0 # forced to head
	mf_c.transposition_flag = False
	mf_c._n15_priority = True # reuse N15 sentinel for sort
	mf_c.extra_consonants = max(0, len(m_remaining) - 3)
	passing.append(mf_c)

	# ── PRIMARY PATH: all permutations from raw consonants ───────────────────
	mapped_primary = self.map_consonants_to_arabic(primary_cons)
	primary_roots = self.generate_root_permutations(mapped_primary)

	for rs in primary_roots:
	if rs in seen:
	continue
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(primary_cons, rs)
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = []
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = max(0, len(primary_cons) - root_size)
	passing.append(c)

	# ── SECONDARY PATH: suffix-stripped consonants (if different) ─────────────
	# v2.4 FIX: If a root was already found via PRIMARY, the SECONDARY path
	# may have FEWER extra_consonants (suffix stripped → closer to root size).
	# Update the existing candidate's extra_consonants if secondary is better.
	# This prevents the coverage penalty from over-penalizing raw-path roots.
	if secondary_cons != primary_cons and len(secondary_cons) >= 2:
	mapped_sec = self.map_consonants_to_arabic(secondary_cons)
	sec_roots = self.generate_root_permutations(mapped_sec)
	for rs in sec_roots:
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	sec_extra = max(0, len(secondary_cons) - root_size)
	if rs in seen:
	# Root already found via PRIMARY — update extra_consonants
	# if suffix-stripped path gives better coverage
	for existing in passing:
	if existing.letters == rs and sec_extra < existing.extra_consonants:
	existing.extra_consonants = sec_extra
	if ops and not existing.operations:
	existing.operations = ops
	break
	continue
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(secondary_cons, rs)
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = ops
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = sec_extra
	passing.append(c)

	# ── OP_STOP PATH: ND→N / MB→M variants ──────────────────────────────────
	for stop_cons, stop_label in op_stop_variants:
	if len(stop_cons) >= 2:
	mapped_stop = self.map_consonants_to_arabic(stop_cons)
	stop_roots = self.generate_root_permutations(mapped_stop)
	for rs in stop_roots:
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	stop_extra = max(0, len(stop_cons) - root_size)
	if rs in seen:
	# Update extra_consonants if OP_STOP path is better
	for existing in passing:
	if existing.letters == rs and stop_extra < existing.extra_consonants:
	existing.extra_consonants = stop_extra
	existing.operations = [stop_label]
	break
	continue
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(stop_cons, rs)
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = [stop_label]
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = stop_extra
	passing.append(c)

	# ── R08a: M-PREFIX PARALLEL PATH ─────────────────────────────────────────
	# Gate 3d (v2.1): TWO M-prefix patterns tested in parallel:
	# مُ (mu-) = active participle prefix (مُرْسَل → MIRACLE)
	# مَ (ma-) = مَفْعَل place noun prefix (مَرْكَز → MARKET)
	if en_word.lower().startswith('m') and not any('OP_PREFIX' in op for op in ops):
	m_stripped, m_ops, m_sfx = self.strip_operations(en_word[1:])
	m_consonants = self.extract_consonants(m_stripped)
	if len(m_consonants) >= 2:
	m_mapped = self.map_consonants_to_arabic(m_consonants)
	m_roots = self.generate_root_permutations(m_mapped)
	prefix_labels = ['OP_PREFIX(مُ→m)', 'OP_PREFIX(مَفْعَل→m)']
	for pfx_label in prefix_labels:
	for rs in m_roots:
	seen_key = rs + '__' + pfx_label
	if seen_key in seen:
	continue
	seen.add(seen_key)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(m_consonants, rs)
	m_c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = [pfx_label] + m_ops
	)
	m_c.positional_score = pos_s
	m_c.transposition_flag = trans
	m_root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	m_c.extra_consonants = max(0, len(m_consonants) - m_root_size)
	passing.append(m_c)

	# ── SORT: N15 always first (R09), then positional_score DESC, token_count DESC ──
	n15_hits = [c for c in passing if getattr(c, '_n15_priority', False)]
	others = [c for c in passing if not getattr(c, '_n15_priority', False)]
	others.sort(key=lambda r: (r.positional_score, r.token_count), reverse=True)
	# v2.5: Increase candidate pool to 15 (was 10) for multi-candidate scoring.
	# Also guarantee TIER DIVERSITY: include the best candidate from each
	# extra_consonants tier, even if it would otherwise be cut by [:15].
	top_n = (n15_hits + others)[:15]
	# Collect tiers already represented
	tiers_present = set(getattr(c, 'extra_consonants', 99) for c in top_n)
	# Add best-from-missing-tiers from the full 'others' list
	for cand in others[15:]:
	tier = getattr(cand, 'extra_consonants', 99)
	if tier not in tiers_present:
	top_n.append(cand)
	tiers_present.add(tier)
	return top_n

	def _positional_score(self, consonants: list, root_letters: str) -> tuple:
	"""
	R11 — Transposition as Semantic-First Diagnostic.
	Scores how closely the consonant ORDER in the English word matches the root order.
	If an engine assigned the wrong root because of semantic pull, the consonant
	positions will be OUT of order (transposed) — this catches that failure.

	Algorithm:
	For each root letter (in order), find the FIRST English consonant that
	could map to it (via forward_map). Collect the position indices.
	If positions are monotonically increasing → correct order → score 1.0.
	If strictly reversed → transposition detected → score 0.1, flag True.
	Partial disorder → score 0.4, flag True.

	Returns: (positional_score: float, transposition_flag: bool)
	"""
	root_list = [l.strip() for l in re.split(r'[\-\s]+', root_letters) if l.strip()]
	if not root_list or not consonants:
	return 0.5, False

	match_positions = []
	for ar in root_list:
	info = self.forward_map.get(ar)
	if not info:
	continue
	_, en_outputs = info
	found_pos = None
	for i, c in enumerate(consonants):
	if c in en_outputs:
	found_pos = i
	break
	# Partial: single-char match against first char of digraph outputs
	if len(c) == 1 and any(c == eo[0] for eo in en_outputs if eo):
	found_pos = i
	break
	if found_pos is not None:
	match_positions.append(found_pos)

	if len(match_positions) < 2:
	return 0.5, False # Not enough data — neutral

	# Monotonically increasing = correct order
	in_order = all(match_positions[i] < match_positions[i + 1]
	for i in range(len(match_positions) - 1))
	if in_order:
	return 1.0, False

	# Strictly reversed = mirror transposition (strongest R11 signal)
	reversed_order = all(match_positions[i] > match_positions[i + 1]
	for i in range(len(match_positions) - 1))
	if reversed_order:
	return 0.1, True

	# Partial disorder
	return 0.4, True

	def _check_n15_priority(self, consonants: list) -> bool:
	"""
	R09: Check if consonant skeleton matches N15 pattern → force ق-ر-ن first.
	N15 network = القَرْن DERIVATIVE FAMILY. Triggered if ANY valid triple
	(ci, ri, ni) exists where ci < ri < ni — handles words like CONCERN where
	a nasal appears early before 'r' but another 'n' follows (c-n-c-R-N).
	"""
	ck_set = {'c', 'k', 'g', 'q'}
	ck_pos = [i for i, c in enumerate(consonants) if c in ck_set]
	r_pos = [i for i, c in enumerate(consonants) if c == 'r']
	n_pos = [i for i, c in enumerate(consonants) if c == 'n']
	if not (ck_pos and r_pos and n_pos):
	return False
	# Check if ANY valid triple exists with ci < ri < ni
	for ci in ck_pos:
	for ri in r_pos:
	if ri <= ci:
	continue
	for ni in n_pos:
	if ni > ri:
	return True
	return False

	def _generate_op_stop_variants(self, consonants: list) -> list:
	"""
	v2.4: OP_STOP as candidate generator, not destructive pre-processor.
	Finds consecutive N-D or M-B in consonant skeleton and generates
	variants with the stop removed (ND→N, MB→M).

	This is the correct architecture: OP_STOP is a HYPOTHESIS about the
	word's history, not a certainty. CALENDAR has ND but it's NOT from NN
	gemination. TANDOOR has ND from NN (تَنُّور). By generating BOTH
	variants (with and without OP_STOP), the Q-gate and positional score
	determine which is correct.

	Returns: list of (modified_consonants, op_label) tuples
	"""
	variants = []
	# ND→N: find consecutive ['n', 'd'] and remove 'd'
	for i in range(len(consonants) - 1):
	if consonants[i] == 'n' and consonants[i + 1] == 'd':
	new_cons = consonants[:i + 1] + consonants[i + 2:]
	variants.append((new_cons, 'OP_STOP(ND→N)'))
	break # only first occurrence
	# MB→M: find consecutive ['m', 'b'] and remove 'b'
	for i in range(len(consonants) - 1):
	if consonants[i] == 'm' and consonants[i + 1] == 'b':
	new_cons = consonants[:i + 1] + consonants[i + 2:]
	variants.append((new_cons, 'OP_STOP(MB→M)'))
	break
	return variants


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 2b — RussianPhoneticReversal (ORIG1 + ORIG2 dual track)
	# ═══════════════════════════════════════════════════════════════════════════════

	# Russian suffixes for stripping (longest first) — grammatical + derivational
	RUSSIAN_SUFFIXES = sorted([
	'ность', 'ство', 'ение', 'ание', 'ация', 'ация',
	'тель', 'ский', 'ская', 'ское', 'ские',
	'ость', 'ник', 'чик', 'щик', 'ция',
	'ный', 'ная', 'ное', 'ные', 'ной', 'ной',
	'ить', 'ать', 'ять', 'еть', 'ова',
	'ка', 'ок', 'ик', 'ек', 'ёк',
	'ый', 'ая', 'ое', 'ые', 'ий', 'ой',
	'ь', # soft sign at word end — strip
	], key=len, reverse=True)


	class RussianPhoneticReversal:
	"""
	Russian word → ranked ORIG1/ORIG2 root candidates.
	Works backward through M1_ФОНЕТИЧЕСКИЕ_СДВИГИ (Russian shift table).

	Key differences from English PhoneticReversal:
	- Cyrillic consonant extraction (no digraphs — each letter = one phoneme)
	- Russian vowels: а,е,ё,и,о,у,ы,э,ю,я
	- Russian soft/hard signs (ь,ъ) treated as modifiers, not consonants
	- Loads from M1_ФОНЕТИЧЕСКИЕ_СДВИГИ sheet (Russian column names)
	- >50% Bitig (ORIG2) influence — dual-track processing
	v3.1 additions:
	- Compound word detection (САМ+О+ВАР, ПАРО+ВОЗ patterns)
	- Palatalization stripping (Д↔Ж, Т↔Ч, С↔Ш, К↔Ч, Г↔Ж, СТ↔Щ)
	- Latin-to-Cyrillic transliteration (user has no Cyrillic keyboard)
	"""

	CYRILLIC_VOWELS = set('аеёиоуыэюя')
	CYRILLIC_MODIFIERS = set('ьъ') # soft/hard signs
	CYRILLIC_CONSONANTS = set('бвгджзклмнпрстфхцчшщ')

	# ── LATIN → CYRILLIC TRANSLITERATION TABLE ──────────────────────────────
	# User types Latin script → engine converts to Cyrillic before processing.
	# Digraphs FIRST (longest match), then single chars.
	LATIN_TO_CYRILLIC_DIGRAPHS = [
	('shch', 'щ'), ('sch', 'щ'),
	('zh', 'ж'), ('kh', 'х'), ('ch', 'ч'), ('sh', 'ш'),
	('ts', 'ц'), ('yu', 'ю'), ('ya', 'я'), ('yo', 'ё'),
	]
	LATIN_TO_CYRILLIC_SINGLE = {
	'a': 'а', 'b': 'б', 'v': 'в', 'g': 'г', 'd': 'д',
	'e': 'е', 'z': 'з', 'i': 'и', 'j': 'й',
	'k': 'к', 'l': 'л', 'm': 'м', 'n': 'н', 'o': 'о',
	'p': 'п', 'r': 'р', 's': 'с', 't': 'т', 'u': 'у',
	'f': 'ф', 'h': 'х', 'c': 'ц', 'w': 'в',
	'x': 'кс', 'y': 'ы',
	}

	# ── COMPOUND WORD PREFIXES ──────────────────────────────────────────────
	# Russian (like German) merges roots using О or Е as bridge vowels.
	# Pattern: PREFIX + О/Е + ROOT. Engine splits at bridge, processes each part.
	COMPOUND_PREFIXES = {
	# prefix_cyrillic: (meaning, strip_length_including_bridge_vowel)
	'само': ('self/auto', True), # самовар, самолёт, самосвал
	'сам': ('self/auto', True), # when bridge vowel is already next char
	'паро': ('steam', True), # паровоз, пароход
	'пар': ('steam', True),
	'водо': ('water', True), # водопровод, водопад
	'вод': ('water', True),
	'полу': ('half', False), # полуостров — no bridge vowel
	'обще': ('common', False), # общежитие
	'ледо': ('ice', True), # ледокол
	'звуко': ('sound', True), # звукозапись
	'земле': ('earth', True), # землетрясение
	'тепло': ('warmth', True), # теплоход
	'хлебо': ('bread', True), # хлебозавод
	'нефте': ('oil', True), # нефтепровод
	'верто': ('spin', True), # вертолёт
	}

	# ── PALATALIZATION MAP ──────────────────────────────────────────────────
	# Russian morphological alternations — these are NOT separate consonants.
	# The palatalized form must be UN-palatalized to recover the true root.
	# Direction: palatalized → base (what the engine should trace).
	DEPALATALIZE = {
	'ж': ['д', 'г', 'з'], # водить→вождь, бег→бежать, возить→вожу
	'ч': ['т', 'к'], # крутить→кручение, рука→ручной
	'ш': ['с', 'х'], # писать→пишу, тихий→тишина
	'щ': ['ст', 'ск', 'т'], # простить→прощение, искать→ищу, светить→свещение
	}
	# Reverse: which consonants CAN palatalize
	CAN_PALATALIZE = {'д', 'г', 'з', 'т', 'к', 'с', 'х', 'ст', 'ск'}

	# ── CYRILLIC → LATIN CONVERSION (for Kashgari ORIG2 search) ──────────────
	# v3.2: Russian consonants must be converted to Latin equivalents before
	# searching Kashgari corpus (which uses Latin transliteration).
	# Single-char mapping for skeleton matching.
	CYRILLIC_TO_LATIN_SIMPLE = {
	'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'ж': 'j',
	'з': 'z', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n',
	'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'ф': 'f',
	'х': 'h', 'ц': 'c', 'ч': 'c', 'ш': 's', 'щ': 's',
	'дж': 'j',
	}
	# Russian → Turkic voicing equivalences (for broader Kashgari search)
	# These pairs represent systematic voicing alternations in the
	# Turkic→Russian borrowing corridor.
	RU_TURKIC_VOICING = {
	'б': ['п'], # б↔p voicing
	'п': ['б'],
	'г': ['к', 'q'], # г↔k velar voicing
	'к': ['г', 'q'],
	'д': ['т'], # д↔t voicing
	'т': ['д'],
	'ж': ['ч', 'ш'], # affricate/sibilant alternation
	'ч': ['ж', 'ц'],
	'з': ['с'], # з↔s voicing
	'с': ['з'],
	}

	def to_latin_skeleton(self, cyrillic_consonants: list) -> str:
	"""Convert Cyrillic consonant list to Latin skeleton for Kashgari search."""
	return ''.join(self.CYRILLIC_TO_LATIN_SIMPLE.get(c, c) for c in cyrillic_consonants)

	def to_latin_skeleton_variants(self, cyrillic_consonants: list) -> list:
	"""Generate Latin skeleton + voicing variants for Kashgari search.

	Returns list of Latin skeleton strings (main + voicing alternations).
	Also generates suffix-stripped variants (B03 agglutinative morphology).
	"""
	main = self.to_latin_skeleton(cyrillic_consonants)
	variants = [main]

	# Voicing variants (swap one consonant at a time)
	for i, cyr_c in enumerate(cyrillic_consonants):
	for alt_cyr in self.RU_TURKIC_VOICING.get(cyr_c, []):
	alt_lat = self.CYRILLIC_TO_LATIN_SIMPLE.get(alt_cyr, alt_cyr)
	v = main[:i] + alt_lat + main[i+1:]
	if v != main and v not in variants:
	variants.append(v)

	# Suffix-stripped variants (Russian endings that aren't root consonants)
	# Common Russian noun/adj endings: -ля, -ка, -ня, -ра etc.
	if len(main) >= 3:
	stripped_1 = main[:-1] # drop last consonant
	if stripped_1 not in variants:
	variants.append(stripped_1)
	# Also voicing variants of stripped
	for i, cyr_c in enumerate(cyrillic_consonants[:-1]):
	for alt_cyr in self.RU_TURKIC_VOICING.get(cyr_c, []):
	alt_lat = self.CYRILLIC_TO_LATIN_SIMPLE.get(alt_cyr, alt_cyr)
	v = stripped_1[:i] + alt_lat + stripped_1[i+1:]
	if v != stripped_1 and v not in variants:
	variants.append(v)

	return variants[:20] # cap to prevent explosion

	def __init__(self, master_file: str, q_gate: 'QGate'):
	self.q_gate = q_gate
	self.shift_data : List[dict] = []
	self.forward_map : Dict[str, tuple] = {} # AR_letter → (shift_id, [ru_chars])
	self.reverse_map : Dict[str, list] = {} # RU_char → [(AR_letter, shift_id)]
	self._load_russian_shifts(master_file)
	self._build_reverse_map()
	# Extra mappings not in shift table (gap fill)
	self._add_gap_mappings()
	print(f" RussianPhoneticReversal: {len(self.shift_data)} shifts, "
	f"{len(self.reverse_map)} RU patterns in reverse map")

	def _load_russian_shifts(self, filepath: str):
	"""Load from M1_ФОНЕТИЧЕСКИЕ_СДВИГИ sheet."""
	try:
	wb = load_workbook(filepath, read_only=True, data_only=True)
	ws = wb['M1_ФОНЕТИЧЕСКИЕ_СДВИГИ']
	headers = None
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	sid = str(d.get('СДВИГ_ID', '') or '').strip()
	ar = str(d.get('АР_БУКВА', '') or '').strip()
	ru_raw = str(d.get('РУС_ВЫХОДЫ', '') or '').strip()
	if sid and ar:
	# Parse Russian outputs — handle "(выпадает)" = drops
	ru_list = []
	for x in ru_raw.split(','):
	x = x.strip().lower()
	if x and x not in ('(выпадает)', ''):
	ru_list.append(x)
	self.shift_data.append({
	'shift_id': sid, 'ar_letter': ar, 'ru_outputs': ru_list
	})
	self.forward_map[ar] = (sid, ru_list)
	wb.close()
	except Exception as e:
	print(f" RussianPhoneticReversal load error: {e}")

	def _build_reverse_map(self):
	"""Build reverse map: Russian char → [(Arabic letter, shift_id)]."""
	for shift in self.shift_data:
	ar, sid = shift['ar_letter'], shift['shift_id']
	for ru in shift['ru_outputs']:
	key = ru.lower()
	self.reverse_map.setdefault(key, [])
	if (ar, sid) not in self.reverse_map[key]:
	self.reverse_map[key].append((ar, sid))

	def _add_gap_mappings(self):
	"""Add consonant mappings not explicitly in the shift table."""
	# ч (ch) — maps to ج (S02) or ش (S05) or ت+ش compound
	# Common in Turkic loanwords: чай (tea), чулан (closet)
	if 'ч' not in self.reverse_map:
	self.reverse_map['ч'] = [('ج', 'S02'), ('ش', 'S05')]
	# Ensure дж is mapped
	if 'дж' not in self.reverse_map:
	self.reverse_map['дж'] = [('ج', 'S02')]

	# ── v3.1: LATIN → CYRILLIC TRANSLITERATION ──────────────────────────────

	def transliterate_latin(self, text: str) -> str:
	"""
	Convert Latin-script Russian to Cyrillic.
	User has no Cyrillic keyboard — types: samovar, moloko, vodka, etc.
	Returns Cyrillic string.

	Handles digraphs first (longest match): zh→ж, kh→х, ch→ч, sh→ш, ts→ц
	Then single chars: a→а, b→б, etc.
	"""
	t = text.lower().strip()
	result = []
	i = 0
	while i < len(t):
	matched = False
	# Try digraphs (longest first — shch before sh)
	for lat, cyr in self.LATIN_TO_CYRILLIC_DIGRAPHS:
	if t[i:i+len(lat)] == lat:
	result.append(cyr)
	i += len(lat)
	matched = True
	break
	if not matched:
	ch = t[i]
	if ch in self.LATIN_TO_CYRILLIC_SINGLE:
	result.append(self.LATIN_TO_CYRILLIC_SINGLE[ch])
	else:
	result.append(ch) # spaces, hyphens, digits pass through
	i += 1
	return ''.join(result)

	def _is_latin_russian(self, text: str) -> bool:
	"""
	Detect if a string is Latin-script Russian (not English).
	Heuristic: contains common Russian transliteration patterns
	OR matches a known Russian word transliteration.
	"""
	t = text.lower().strip()
	# If it has any Cyrillic already → not Latin-Russian
	if any(c in self.CYRILLIC_VOWELS or c in self.CYRILLIC_CONSONANTS
	or c in self.CYRILLIC_MODIFIERS for c in t):
	return False
	# Check for Russian transliteration digraph markers
	ru_digraphs = ['zh', 'kh', 'shch', 'ya', 'yu', 'yo', 'ts']
	if any(d in t for d in ru_digraphs):
	return True
	# Check for Russian word-ending patterns
	ru_endings = ['ov', 'ev', 'aya', 'iya', 'ost', 'nik', 'tel',
	'stvo', 'ok', 'ka', 'ko', 'da', 'lo']
	if any(t.endswith(e) for e in ru_endings):
	# Also check it's NOT a common English word
	english_words = {'book', 'look', 'cook', 'hook', 'took',
	'like', 'make', 'take', 'wake', 'bake',
	'also', 'into', 'onto', 'undo', 'solo'}
	if t not in english_words:
	return True
	return False

	# ── v3.1: COMPOUND WORD DETECTION ────────────────────────────────────────

	def detect_compound(self, word: str) -> tuple:
	"""
	Detect Russian compound words with О/Е bridge vowels.

	Russian and German both merge roots: PREFIX + О/Е + ROOT.
	Examples:
	САМОВАР = САМ + О + ВАР (self + cook/boil)
	САМОЛЁТ = САМ + О + ЛЁТ (self + fly)
	ПАРОВОЗ = ПАР + О + ВОЗ (steam + carry)
	ВОДОПАД = ВОД + О + ПАД (water + fall)
	ВЕРТОЛЁТ = ВЕРТ + О + ЛЁТ (spin + fly)
	ЛЕДОКОЛ = ЛЕД + О + КОЛ (ice + split)

	Returns: (is_compound, prefix_str, root_str, bridge_vowel, compound_label)
	or (False, None, None, None, None) if not compound.
	"""
	w = word.lower().strip()

	# Sort compound prefixes by length (longest first) to avoid partial matches
	sorted_prefixes = sorted(self.COMPOUND_PREFIXES.keys(), key=len, reverse=True)

	for prefix in sorted_prefixes:
	if not w.startswith(prefix):
	continue
	meaning, expects_bridge = self.COMPOUND_PREFIXES[prefix]
	remainder = w[len(prefix):]

	if expects_bridge:
	# Check for О/Е bridge vowel after prefix
	if remainder and remainder[0] in ('о', 'е'):
	bridge = remainder[0]
	root_part = remainder[1:]
	if len(root_part) >= 2: # root must have at least 2 chars
	label = f"COMPOUND({prefix.upper()}+{bridge}+{root_part.upper()})"
	return (True, prefix, root_part, bridge, label)
	# Also check: prefix already ends with the vowel (like САМО, ПАРО, ВОДО)
	# In this case the bridge is already included in the prefix
	elif len(remainder) >= 2:
	label = f"COMPOUND({prefix.upper()}+{remainder.upper()})"
	return (True, prefix, remainder, '', label)
	else:
	# No bridge vowel expected (ПОЛУ, ОБЩЕ)
	if len(remainder) >= 2:
	label = f"COMPOUND({prefix.upper()}+{remainder.upper()})"
	return (True, prefix, remainder, '', label)

	# Also detect non-prefix compounds: ROOT+О/Е+ROOT pattern
	# CONSERVATIVE: only fire when BOTH parts have >= 3 consonants each
	# and the word is long enough (>= 8 chars) to avoid false positives
	# like ХЛОПОК, МОЛОКО, ПОЛОСА which are NOT compounds.
	if len(w) >= 8:
	for i in range(3, len(w) - 3):
	if w[i] in ('о', 'е'):
	# Check: consonant immediately before AND after bridge
	if (w[i-1] in self.CYRILLIC_CONSONANTS and
	w[i+1] in self.CYRILLIC_CONSONANTS):
	left = w[:i]
	right = w[i+1:]
	left_cons = sum(1 for c in left if c in self.CYRILLIC_CONSONANTS)
	right_cons = sum(1 for c in right if c in self.CYRILLIC_CONSONANTS)
	if left_cons >= 3 and right_cons >= 3:
	label = f"COMPOUND({left.upper()}+{w[i]}+{right.upper()})"
	return (True, left, right, w[i], label)

	return (False, None, None, None, None)

	# ── v3.1: PALATALIZATION STRIPPING ───────────────────────────────────────

	def depalatalize(self, consonants: list) -> list:
	"""
	Generate de-palatalized consonant variants.

	Russian has systematic morphological alternations:
	Д → Ж (водить → вождь)
	Т → Ч (крутить → кручение)
	С → Ш (писать → пишу)
	К → Ч (рука → ручной)
	Г → Ж (бег → бежать)
	З → Ж (возить → вожу)
	СТ → Щ (простить → прощение)
	СК → Щ (искать → ищу)

	These are NOT separate consonants — they are surface alternations
	of the SAME underlying root consonant.

	Returns: list of (new_consonants, op_label) tuples.
	Each tuple represents one possible de-palatalization.
	"""
	variants = []

	for i, c in enumerate(consonants):
	if c in self.DEPALATALIZE:
	for base in self.DEPALATALIZE[c]:
	if len(base) == 1:
	# Single consonant replacement: ж→д, ч→т, etc.
	new_cons = consonants[:i] + [base] + consonants[i+1:]
	label = f'OP_DEPALATAL({c.upper()}→{base.upper()})'
	variants.append((new_cons, label))
	elif len(base) == 2:
	# Cluster replacement: щ→ст, щ→ск — one consonant expands to two
	new_cons = consonants[:i] + list(base) + consonants[i+1:]
	label = f'OP_DEPALATAL({c.upper()}→{base.upper()})'
	variants.append((new_cons, label))

	return variants

	# ── consonant extraction ──────────────────────────────────────────────────

	def extract_consonants(self, word: str) -> list:
	"""
	Extract ordered consonant skeleton from Russian word.
	Each Cyrillic letter = one phoneme (no digraphs like English TH/SH).
	Exception: дж = one phoneme (affricate).
	"""
	w = word.lower().strip()
	result = []
	i = 0
	while i < len(w):
	# Check for дж digraph
	if i + 1 < len(w) and w[i:i+2] == 'дж':
	result.append('дж')
	i += 2
	elif w[i] in self.CYRILLIC_CONSONANTS:
	result.append(w[i])
	i += 1
	else:
	# vowels, modifiers, spaces — skip
	i += 1
	return result

	def strip_operations(self, word: str) -> tuple:
	"""
	Strip Russian suffixes (OP_SUFFIX equivalent).
	Returns: (stripped_word, operations_list, suffix_removed)
	Minimum-consonant guard: if stripping leaves < 3 consonants, undo.
	"""
	w = word.lower().strip()
	ops = []
	suffix_removed = ''

	for suffix in RUSSIAN_SUFFIXES:
	if w.endswith(suffix) and len(w) - len(suffix) >= 2:
	candidate = w[:-len(suffix)]
	# Count consonants in stripped form
	cons_count = sum(1 for ch in candidate if ch in self.CYRILLIC_CONSONANTS)
	if cons_count >= 3:
	w = candidate
	suffix_removed = suffix
	ops.append(f'OP_SUFFIX(-{suffix})')
	break
	return w, ops, suffix_removed

	def map_consonants_to_arabic(self, consonants: list) -> list:
	"""Each consonant position → list of (AR_letter, shift_id) pairs."""
	mapped = []
	for c in consonants:
	candidates = self.reverse_map.get(c, [])
	mapped.append(candidates)
	return mapped

	def generate_root_permutations(self, mapped: list) -> list:
	"""Generate 3-consonant root strings from mapped consonant candidates."""
	positions = len(mapped)
	if positions < 2:
	return []
	ar_per_pos = [list({ar for ar, sid in pos}) for pos in mapped]
	roots = set()
	n = 3 if positions >= 3 else positions
	for pos_combo in itertools.combinations(range(positions), n):
	for combo in itertools.product(*[ar_per_pos[p] for p in pos_combo]):
	if all(combo):
	roots.add('-'.join(combo))
	return list(roots)

	def _positional_score(self, consonants: list, root_letters: str) -> tuple:
	"""
	R11 — Positional score (same logic as English).
	Scores how closely consonant ORDER in Russian word matches root order.
	"""
	root_list = [l.strip() for l in re.split(r'[\-\s]+', root_letters) if l.strip()]
	if not root_list or not consonants:
	return 0.5, False

	match_positions = []
	for ar in root_list:
	info = self.forward_map.get(ar)
	if not info:
	continue
	_, ru_outputs = info
	found_pos = None
	for i, c in enumerate(consonants):
	if c in ru_outputs:
	found_pos = i
	break
	if found_pos is not None:
	match_positions.append(found_pos)

	if len(match_positions) < 2:
	return 0.5, False

	monotone = all(match_positions[i] <= match_positions[i + 1]
	for i in range(len(match_positions) - 1))
	if monotone:
	return 1.0, False

	reversed_check = all(match_positions[i] >= match_positions[i + 1]
	for i in range(len(match_positions) - 1))
	if reversed_check:
	return 0.1, True
	return 0.4, True

	def _generate_op_stop_variants(self, consonants: list) -> list:
	"""OP_STOP: НД→Н / МБ→М variants (same logic as English)."""
	variants = []
	# НД→Н: find consecutive ['н', 'д'] and remove 'д'
	for i in range(len(consonants) - 1):
	if consonants[i] == 'н' and consonants[i + 1] == 'д':
	new_cons = consonants[:i + 1] + consonants[i + 2:]
	variants.append((new_cons, 'OP_STOP(НД→Н)'))
	break
	# МБ→М: find consecutive ['м', 'б'] and remove 'б'
	for i in range(len(consonants) - 1):
	if consonants[i] == 'м' and consonants[i + 1] == 'б':
	new_cons = consonants[:i + 1] + consonants[i + 2:]
	variants.append((new_cons, 'OP_STOP(МБ→М)'))
	break
	return variants

	def reverse(self, ru_word: str) -> List[RootCandidate]:
	"""
	Main public method: Russian word → ranked list of Qur'anic root candidates.
	Returns only candidates that pass Q-Gate.

	Architecture v3.1:
	COMPOUND: detect САМ+О+ВАР type → split + process each part
	PRIMARY: raw vowel extraction (vowel-strip-first)
	SECONDARY: suffix-stripped consonants
	DEPALATAL: undo Д↔Ж, Т↔Ч, С↔Ш alternations → re-run
	OP_STOP: НД→Н / МБ→М variants
	M-PREFIX: М-prefix parallel path (same as English R08a)
	"""
	# ═══ v3.1: COMPOUND DETECTION ════════════════════════════════════════════
	is_compound, prefix_part, root_part, bridge, compound_label = self.detect_compound(ru_word)
	if is_compound:
	# Process only the ROOT part through the pipeline
	# The prefix is a known morpheme (САМ=self, ПАРО=steam, etc.)
	# Mark the result with the compound label
	root_candidates = self._reverse_inner(root_part, compound_label)
	# Also try the full word (some compounds have fused so much
	# that the root has its own Q-gate entry)
	full_candidates = self._reverse_inner(ru_word, None)
	# Merge: compound-rooted candidates get priority
	seen_letters = {c.letters for c in root_candidates}
	for fc in full_candidates:
	if fc.letters not in seen_letters:
	root_candidates.append(fc)
	return root_candidates

	return self._reverse_inner(ru_word, None)

	def _reverse_inner(self, ru_word: str, compound_label: str = None) -> List[RootCandidate]:
	"""
	Inner reverse logic — processes a single word (or compound root part).
	Separated from reverse() to allow compound detection to call this
	on just the root portion.
	"""
	# ═══ PRIMARY: raw consonant extraction ═════════════════════════════════════
	primary_cons = self.extract_consonants(ru_word)

	# ═══ SECONDARY: suffix-stripped ═════════════════════════════════════════════
	stripped, ops, suffix = self.strip_operations(ru_word)
	secondary_cons = self.extract_consonants(stripped)

	# ═══ OP_STOP: НД→Н / МБ→М variants ════════════════════════════════════════
	op_stop_variants = self._generate_op_stop_variants(primary_cons)

	passing = []
	seen = set()

	# ── PRIMARY PATH ───────────────────────────────────────────────────────────
	mapped_primary = self.map_consonants_to_arabic(primary_cons)
	primary_roots = self.generate_root_permutations(mapped_primary)

	for rs in primary_roots:
	if rs in seen:
	continue
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(primary_cons, rs)
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = []
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = max(0, len(primary_cons) - root_size)
	passing.append(c)

	# ── SECONDARY PATH ─────────────────────────────────────────────────────────
	if secondary_cons != primary_cons and len(secondary_cons) >= 2:
	mapped_sec = self.map_consonants_to_arabic(secondary_cons)
	sec_roots = self.generate_root_permutations(mapped_sec)
	for rs in sec_roots:
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	sec_extra = max(0, len(secondary_cons) - root_size)
	if rs in seen:
	for existing in passing:
	if existing.letters == rs and sec_extra < existing.extra_consonants:
	existing.extra_consonants = sec_extra
	if ops and not existing.operations:
	existing.operations = ops
	break
	continue
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(secondary_cons, rs)
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = ops
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = sec_extra
	passing.append(c)

	# ── OP_STOP PATH ──────────────────────────────────────────────────────────
	for stop_cons, stop_label in op_stop_variants:
	if len(stop_cons) >= 2:
	mapped_stop = self.map_consonants_to_arabic(stop_cons)
	stop_roots = self.generate_root_permutations(mapped_stop)
	for rs in stop_roots:
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	stop_extra = max(0, len(stop_cons) - root_size)
	if rs in seen:
	for existing in passing:
	if existing.letters == rs and stop_extra < existing.extra_consonants:
	existing.extra_consonants = stop_extra
	existing.operations = [stop_label]
	break
	continue
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(stop_cons, rs)
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = [stop_label]
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = stop_extra
	passing.append(c)

	# ── v3.1: DEPALATALIZATION PATH ──────────────────────────────────────────
	# Russian Д↔Ж, Т↔Ч, С↔Ш, К↔Ч, Г↔Ж are morphological alternations,
	# NOT separate consonants. Try un-palatalizing and re-running.
	depal_variants = self.depalatalize(primary_cons)
	for depal_cons, depal_label in depal_variants:
	if len(depal_cons) >= 2:
	mapped_depal = self.map_consonants_to_arabic(depal_cons)
	depal_roots = self.generate_root_permutations(mapped_depal)
	for rs in depal_roots:
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	depal_extra = max(0, len(depal_cons) - root_size)
	depal_key = rs + '__' + depal_label
	if depal_key in seen:
	continue
	# Also skip if same root already found without depal
	if rs in seen:
	for existing in passing:
	if existing.letters == rs and depal_extra < existing.extra_consonants:
	existing.extra_consonants = depal_extra
	if depal_label not in existing.operations:
	existing.operations.append(depal_label)
	break
	continue
	seen.add(depal_key)
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(depal_cons, rs)
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = [depal_label]
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = depal_extra
	passing.append(c)

	# ── M-PREFIX PARALLEL PATH (R08a) ──────────────────────────────────────────
	if ru_word.lower().startswith('м'):
	m_stripped, m_ops, m_sfx = self.strip_operations(ru_word[1:])
	m_consonants = self.extract_consonants(m_stripped)
	if len(m_consonants) >= 2:
	m_mapped = self.map_consonants_to_arabic(m_consonants)
	m_roots = self.generate_root_permutations(m_mapped)
	for rs in m_roots:
	seen_key = rs + '__OP_PREFIX(مُ→م)'
	if seen_key in seen:
	continue
	seen.add(seen_key)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(m_consonants, rs)
	m_c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = ['OP_PREFIX(مُ→м)'] + m_ops
	)
	m_c.positional_score = pos_s
	m_c.transposition_flag = trans
	m_root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	m_c.extra_consonants = max(0, len(m_consonants) - m_root_size)
	passing.append(m_c)

	# ── v3.4b: OP_RU_PREFIX — Russian grammatical prefix stripping ──────
	# Russian is FULL of prefixes (ДО-, ПО-, НА-, ПРИ-, ПРО-, ЗА-, etc.)
	# These are NOT root consonants — strip before tracing.
	# Same principle as OP_SUFFIX for Latin/Greek but at the FRONT.
	# Example: ДОГОВОР: strip ДО- → ГОВОР → Г-В-Р → ج-ب-ر (21 tokens)
	# ЗАГОВОР: strip ЗА- → ГОВОР → Г-В-Р → ج-ب-ر (same root)
	RU_PREFIXES = [
	('пере', 4), ('рас', 3), ('раз', 3), ('вос', 3), ('воз', 3),
	('при', 3), ('пре', 3), ('про', 3), ('под', 3), ('над', 3),
	('по', 2), ('на', 2), ('за', 2), ('до', 2), ('от', 2),
	('из', 2), ('вы', 2), ('об', 2),
	('у', 1), ('с', 1),
	]
	ru_lower = ru_word.lower()
	for pfx, pfx_len in RU_PREFIXES:
	if ru_lower.startswith(pfx) and len(ru_lower) > pfx_len + 2:
	remainder = ru_lower[pfx_len:]
	rem_cons = self.extract_consonants(remainder)
	if len(rem_cons) >= 2:
	rem_mapped = self.map_consonants_to_arabic(rem_cons)
	rem_roots = self.generate_root_permutations(rem_mapped)
	for rs in rem_roots:
	seen_key = rs + f'__OP_RU_PREFIX({pfx.upper()}-)'
	if seen_key in seen:
	continue
	if rs in seen:
	# Same root found without prefix — check if prefix
	# version has fewer extra consonants
	for existing in passing:
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	pfx_extra = max(0, len(rem_cons) - root_size)
	if existing.letters == rs and pfx_extra < existing.extra_consonants:
	existing.extra_consonants = pfx_extra
	op_label = f'OP_RU_PREFIX({pfx.upper()}-)'
	if op_label not in existing.operations:
	existing.operations.append(op_label)
	break
	continue
	seen.add(seen_key)
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(rem_cons, rs)
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = [f'OP_RU_PREFIX({pfx.upper()}-)', f'remainder={remainder}']
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = max(0, len(rem_cons) - root_size)
	passing.append(c)

	# ── v3.4: INITIAL STRIP — fallback for imperfect coverage ────────────
	# When NO candidate has perfect consonant coverage (extra=0), try
	# stripping the initial consonant. This catches words like ТАЛАНТ/
	# ГАЛАНТ where the initial T/G varies — the root is in the shared
	# ending (-ЛАНТ → Л-Н-Т).
	# Fires when: (a) no candidates at all, OR (b) all candidates have
	# extra_consonants >= 1 (no clean trilateral match found).
	no_clean_match = (not passing or
	all(getattr(c, 'extra_consonants', 99) >= 1
	for c in passing))
	if no_clean_match and len(primary_cons) >= 3:
	init_stripped = primary_cons[1:] # drop first consonant
	if len(init_stripped) >= 2:
	init_mapped = self.map_consonants_to_arabic(init_stripped)
	init_roots = self.generate_root_permutations(init_mapped)
	for rs in init_roots:
	if rs in seen:
	continue
	seen.add(rs)
	qr = self.q_gate.check(rs)
	if qr.passed:
	pos_s, trans = self._positional_score(init_stripped, rs)
	root_size = len([l for l in re.split(r'[\-\s]+', rs) if l.strip()])
	c = RootCandidate(
	letters = rs,
	token_count = qr.details.get('token_count', 0),
	lemma_count = qr.details.get('lemma_count', 0),
	ar_word = qr.details.get('ar_word', ''),
	operations = [f'OP_INITIAL_STRIP({primary_cons[0].upper()}-)']
	)
	c.positional_score = pos_s
	c.transposition_flag = trans
	c.extra_consonants = max(0, len(init_stripped) - root_size)
	passing.append(c)

	# ── v3.1: COMPOUND LABEL PROPAGATION ─────────────────────────────────────
	if compound_label:
	for cand in passing:
	if compound_label not in cand.operations:
	cand.operations.insert(0, compound_label)

	# ── SORT + TIER DIVERSITY (same as English v2.5) ──────────────────────────
	passing.sort(key=lambda r: (r.positional_score, r.token_count), reverse=True)
	top_n = passing[:15]
	tiers_present = set(getattr(c, 'extra_consonants', 99) for c in top_n)
	for cand in passing[15:]:
	tier = getattr(cand, 'extra_consonants', 99)
	if tier not in tiers_present:
	top_n.append(cand)
	tiers_present.add(tier)
	return top_n


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 4 — UGate
	# ═══════════════════════════════════════════════════════════════════════════════

	class UGate:
	"""Phonetic unity gate — every consonant accounted for via M1 shifts."""

	# OP_VOICE pairs: voicing/devoicing equivalences (documented phonological process)
	# Maps each consonant to ALL possible voicing/devoicing partners.
	VOICE_PAIRS_EN = {
	'z': ['t', 's'], # ز→t (markaz→market), ز→s (sibilant)
	't': ['d', 'z'], # ت→d (DEBT), ت→z
	's': ['z'], # sibilant voicing
	'd': ['t'], # dental devoicing
	'p': ['b'], # bilabial voicing
	'b': ['p'], # bilabial devoicing
	'f': ['v'], # labiodental voicing
	'v': ['f'], # labiodental devoicing
	'k': ['g'], # velar voicing
	'g': ['k'], # velar devoicing
	}

	# Russian Cyrillic OP_VOICE pairs (v3.0)
	VOICE_PAIRS_RU = {
	'з': ['т', 'с'], # з↔т, з↔с (sibilant)
	'т': ['д', 'з'], # т↔д, т↔з
	'с': ['з'], # sibilant voicing
	'д': ['т'], # dental devoicing
	'п': ['б'], # bilabial voicing
	'б': ['п'], # bilabial devoicing
	'ф': ['в'], # labiodental voicing
	'в': ['ф'], # labiodental devoicing
	'к': ['г'], # velar voicing
	'г': ['к'], # velar devoicing
	'ш': ['ж'], # шипящие (sibilant voicing)
	'ж': ['ш'], # шипящие (sibilant devoicing)
	}

	def __init__(self, reversal):
	self.reversal = reversal
	# Select voice pairs based on reversal type
	self.VOICE_PAIRS = (self.VOICE_PAIRS_RU
	if isinstance(reversal, RussianPhoneticReversal)
	else self.VOICE_PAIRS_EN)

	def verify(self, en_word: str, root_letters: str,
	operations: list = None) -> GateResult:
	stripped, ops_applied, suffix = self.reversal.strip_operations(en_word)
	consonants = self.reversal.extract_consonants(stripped)
	root_list = [l.strip() for l in re.split(r'[\-\s]+', root_letters) if l.strip()]

	if not root_list:
	return GateResult(False, {'reason': 'Could not parse root letters'})

	chain_parts = []
	unmapped = []

	for ar in root_list:
	info = self.reversal.forward_map.get(ar)
	if not info:
	unmapped.append(ar)
	continue
	sid, en_outputs = info
	matched = None
	voice_match = False
	for en_out in en_outputs:
	if en_out in consonants or en_out in stripped.lower():
	matched = en_out
	break
	# OP_VOICE fallback: if direct match failed, check voicing pairs
	if not matched:
	for en_out in en_outputs:
	partners = self.VOICE_PAIRS.get(en_out, [])
	for voiced in partners:
	if voiced in consonants or voiced in stripped.lower():
	matched = voiced
	voice_match = True
	break
	if voice_match:
	break
	if matched:
	if voice_match:
	chain_parts.append(f"{ar}→{matched}({sid}+OP_VOICE)")
	else:
	chain_parts.append(f"{ar}→{matched}({sid})")
	else:
	unmapped.append(ar)

	chain = ', '.join(chain_parts)
	all_ops = (operations or []) + ops_applied
	ops_str = ' \| '.join(all_ops) if all_ops else ''

	if unmapped:
	# v2.4: RAW CONSONANT FALLBACK — try matching against full word
	# before declaring U-gate failure. Catches CALENDAR (OP_SUFFIX strips
	# -ar removing ر→r) and similar cases where suffix stripping removes
	# a root consonant that IS present in the original word.
	raw_cons = self.reversal.extract_consonants(en_word)
	if raw_cons != consonants:
	raw_chain_parts = []
	raw_unmapped = []
	for ar in root_list:
	info = self.reversal.forward_map.get(ar)
	if not info:
	raw_unmapped.append(ar)
	continue
	sid, en_outputs = info
	matched = None
	voice_match = False
	for en_out in en_outputs:
	if en_out in raw_cons or en_out in en_word.lower():
	matched = en_out
	break
	if not matched:
	for en_out in en_outputs:
	partners = self.VOICE_PAIRS.get(en_out, [])
	for voiced in partners:
	if voiced in raw_cons or voiced in en_word.lower():
	matched = voiced
	voice_match = True
	break
	if voice_match:
	break
	if matched:
	if voice_match:
	raw_chain_parts.append(f"{ar}→{matched}({sid}+OP_VOICE)")
	else:
	raw_chain_parts.append(f"{ar}→{matched}({sid})")
	else:
	raw_unmapped.append(ar)

	if not raw_unmapped:
	# Raw consonants pass — use raw chain
	raw_chain = ', '.join(raw_chain_parts)
	all_ops_raw = (operations or []) + ops_applied
	ops_str_raw = ' \| '.join(all_ops_raw) if all_ops_raw else ''
	return GateResult(True, {
	'phonetic_chain': raw_chain,
	'operations': ops_str_raw,
	'consonant_skeleton': ''.join(raw_cons),
	'stripped_word': en_word.lower(),
	'note': 'Verified via raw consonants (vowel-strip-first)'
	})

	return GateResult(False, {
	'phonetic_chain': chain,
	'unmapped': unmapped,
	'reason': f"Unmapped root letters: {', '.join(unmapped)}"
	})
	return GateResult(True, {
	'phonetic_chain': chain,
	'operations': ops_str,
	'consonant_skeleton': ''.join(consonants),
	'stripped_word': stripped
	})


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 5 — FGate
	# ═══════════════════════════════════════════════════════════════════════════════

	class FGate:
	"""Foundation layer assignment — DS corridor, DP codes, network membership."""

	DP08_TRIGGERS = {
	'philosophy','medicine','algebra','algorithm','chemistry',
	'geometry','astronomy','physics','biology','science'
	}

	def __init__(self, master_file: str):
	self.networks : Dict[str, dict] = {}
	self.root_to_network : Dict[str, str] = {}
	self._load_networks(master_file)
	self._load_entry_networks(master_file)

	def _load_networks(self, filepath: str):
	try:
	wb = load_workbook(filepath, read_only=True, data_only=True)
	ws = wb['M4_NETWORKS']
	headers = None
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	nid = str(d.get('NETWORK_ID', '') or '').strip()
	if nid:
	self.networks[nid] = {
	'title' : str(d.get('TITLE', '') or '').strip(),
	'link_verse': str(d.get('LINK_VERSE', '') or '').strip(),
	'entry_ids' : str(d.get('ENTRY_IDS', '') or '').strip()
	}
	wb.close()
	except Exception as e:
	print(f" FGate networks load error: {e}")

	def _load_entry_networks(self, filepath: str):
	try:
	wb = load_workbook(filepath, read_only=True, data_only=True)
	ws = wb['A1_ENTRIES']
	headers = None
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	rl = str(d.get('ROOT_LETTERS', '') or '').strip()
	nid = str(d.get('NETWORK_ID', '') or '').strip()
	if rl and nid:
	self.root_to_network[rl] = nid
	wb.close()
	except Exception as e:
	print(f" FGate entries load error: {e}")

	def assign(self, en_word: str, root_letters: str,
	phonetic_chain: str = '') -> GateResult:
	ds_code = self._detect_corridor(en_word)
	dp_codes = self._detect_dp(en_word)
	network = self.root_to_network.get(root_letters.strip(), '')

	parts = [f"F2: {ds_code}→AL"]
	if dp_codes:
	parts.append(' \| '.join(dp_codes))
	if network:
	parts.append(network)

	return GateResult(True, {
	'ds_code' : ds_code,
	'dp_codes' : dp_codes,
	'network_id' : network,
	'foundation_ref': ' \| '.join(parts)
	})

	def _detect_corridor(self, word: str) -> str:
	w = word.lower()
	if any(x in w for x in ('ph','th','ys','ps','mn')):
	return 'DS04→DS05'
	if any(w.endswith(s) for s in ('tion','ment','ance','ence','ity','ous')):
	return 'DS05'
	if any(x in w for x in ('sch','tz','gh','wh')):
	return 'DS06'
	return 'DS05→AL'

	def _detect_dp(self, word: str) -> list:
	w = word.lower()
	dp = []
	if w in self.DP08_TRIGGERS or any(w.endswith(s) for s in ('ology','ics','phy')):
	dp.append('DP08')
	return dp


	# ═══════════════════════════════════════════════════════════════════════════════
	# SCORING ENGINE
	# ═══════════════════════════════════════════════════════════════════════════════

	class Scorer:
	"""10-point scoring for a candidate entry."""

	def score(self, candidate: RootCandidate, en_word: str,
	q_result: GateResult, u_result: GateResult,
	f_result: GateResult) -> tuple:
	"""
	Returns (score: int, breakdown: dict).

	v2 scoring — corrected per USLaP_BATCH_ENGINE_PROTOCOL:
	Token count capped at 1 point (was 3 — caused semantic-first bias).
	Positional fidelity (R11) contributes 2 points — replaces token inflation.
	Transposition penalty (-2) fires when R11 detects inverted consonant order.
	Max score breakdown: Q(2) + tokens(1) + positional(2) + U(2) + F(1) + chain(1) + network(1) = 10
	"""
	s = 0
	b = {}

	# Q-gate pass (+2)
	if q_result and q_result.passed:
	s += 2; b['q_gate_pass'] = 2

	# Token count — capped at 1 point (was 3 — semantic-first bias eliminated)
	tokens = q_result.details.get('token_count', 0) if q_result else 0
	if tokens >= 100:
	s += 1; b['token_count_100'] = 1
	# Removed: >=20 (+1) and >=5 (+1) tiers — both caused semantic-first root selection

	# Positional fidelity — R11 (replaces raw token count as primary ranking signal)
	pos_score = getattr(candidate, 'positional_score', 0.5)
	trans_flag = getattr(candidate, 'transposition_flag', False)
	if trans_flag:
	# R11 fires: consonant ORDER is inverted → deduct 2 (can go negative, min 0)
	s = max(0, s - 2); b['r11_transposition_penalty'] = -2
	elif pos_score >= 0.8:
	s += 2; b['positional_fidelity_high'] = 2
	elif pos_score >= 0.5:
	s += 1; b['positional_fidelity_ok'] = 1

	# U-gate pass (+2)
	if u_result and u_result.passed:
	s += 2; b['u_gate_pass'] = 2

	# F-gate pass (+1)
	if f_result and f_result.passed:
	s += 1; b['f_gate_pass'] = 1

	# Clean chain (+1) — no unmapped consonants
	if u_result and u_result.passed:
	chain = u_result.details.get('phonetic_chain', '')
	if chain and not u_result.details.get('unmapped'):
	s += 1; b['clean_chain'] = 1

	# Network membership found (+1)
	if f_result and f_result.details.get('network_id'):
	s += 1; b['network_found'] = 1

	# ── COVERAGE PENALTY: penalize if root covers too few word consonants ────
	# extra_consonants = word_consonants_stripped - root_size
	# Allows 1 extra for OP_NASAL or OP_STOP. 2+ = suspicious (unmapped letters).
	extra = getattr(candidate, 'extra_consonants', 0)
	if extra >= 3:
	s = max(0, s - 3); b['excess_consonants_major'] = -3
	elif extra == 2:
	s = max(0, s - 2); b['excess_consonants_minor'] = -2

	return min(s, 10), b


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 6 — ClusterExpander
	# ═══════════════════════════════════════════════════════════════════════════════

	class ClusterExpander:
	"""
	Given a confirmed root, discovers all English words sharing that root.
	Searches /usr/share/dict/words (macOS) against forward-mapped consonant patterns.
	Places confirmed entries → A1_ENTRIES queue; ambiguous → ENGINE_QUEUE.
	"""

	WORDLIST_PATH = '/usr/share/dict/words'

	def __init__(self, reversal: PhoneticReversal, existing_terms: dict):
	self.reversal = reversal
	self.existing_terms = existing_terms # en_term.upper() → entry_id
	self._wordlist : Optional[List[str]] = None
	self._load_wordlist()

	def _load_wordlist(self):
	try:
	with open(self.WORDLIST_PATH, 'r', encoding='utf-8', errors='ignore') as f:
	self._wordlist = [line.strip().lower() for line in f
	if line.strip().isalpha() and len(line.strip()) >= 4]
	print(f" ClusterExpander: {len(self._wordlist):,} words in wordlist")
	except FileNotFoundError:
	print(f" ClusterExpander: wordlist not found at {self.WORDLIST_PATH} — cluster expansion limited")
	self._wordlist = []

	def _build_consonant_patterns(self, root_letters: str) -> list:
	"""Forward-map root consonants to all EN character combinations."""
	root_list = [l.strip() for l in re.split(r'[\-\s]+', root_letters) if l.strip()]
	patterns = []
	per_root = []
	for ar in root_list:
	info = self.reversal.forward_map.get(ar)
	if info:
	_, en_outputs = info
	per_root.append([e for e in en_outputs if 1 <= len(e) <= 2]) # exclude empty strings
	else:
	per_root.append([])
	# Generate consonant skeletons: all combinations of en outputs
	for combo in itertools.product(*per_root):
	skeleton = ''.join(combo)
	if skeleton:
	patterns.append(skeleton)
	return list(set(patterns))

	def _word_matches_pattern(self, word: str, patterns: list) -> bool:
	"""
	Check if word's consonant skeleton contains a root pattern with sufficient coverage.
	v2 fixes:
	- Require pattern length >= 3 (prevents spurious 1-2 char matches like 'bc').
	- Require coverage >= 40% (prevents matching long words on a tiny 3-char skeleton).
	"""
	consonants = ''.join(self.reversal.extract_consonants(word))
	if not consonants:
	return False
	for pat in patterns:
	if len(pat) < 3: # Skip trivially short patterns
	continue
	if pat in consonants:
	# Coverage: pattern must cover >= 40% of word's consonants.
	# This blocks CONTROL (5 cons) matching a 2-char skeleton pattern.
	if len(pat) / len(consonants) >= 0.40:
	return True
	return False

	def expand(self, root_letters: str, source_en_term: str,
	depth: int = 0) -> list:
	"""
	Find all English words sharing root_letters.
	Returns list of candidate words (excluding source_en_term and existing entries).
	"""
	if depth >= MAX_CLUSTER_DEPTH or not self._wordlist:
	return []

	patterns = self._build_consonant_patterns(root_letters)
	if not patterns:
	return []

	candidates = []
	for word in self._wordlist:
	if word.upper() == source_en_term.upper():
	continue
	if word.upper() in self.existing_terms:
	continue
	if self._word_matches_pattern(word, patterns):
	candidates.append(word)

	# Cap at 15 candidates per expansion cycle (was 50 — produced dictionary dumps)
	return candidates[:15]


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 7 — EntryWriter
	# ═══════════════════════════════════════════════════════════════════════════════

	class EntryWriter:
	"""
	Writes confirmed entries to master file.
	Updates: A1_ENTRIES (14 cols), A4_DERIVATIVES, SESSION_INDEX, ENGINE_QUEUE.
	Uses backup-before-write pattern.
	"""

	def __init__(self, master_file: str):
	self.master_file = master_file
	self.backup_dir = str(Path(master_file).parent / 'backups')
	os.makedirs(self.backup_dir, exist_ok=True)

	def _backup(self) -> str:
	ts = datetime.now().strftime('%Y%m%d_%H%M%S')
	dest = os.path.join(self.backup_dir, f"Master_backup_{ts}.xlsx")
	shutil.copy2(self.master_file, dest)
	return dest

	def _next_entry_id(self, ws) -> int:
	max_id = 248 # updated baseline — last confirmed entry is NORM #248
	for row in ws.iter_rows(min_row=2, values_only=True):
	if row[0] and isinstance(row[0], (int, float)):
	max_id = max(max_id, int(row[0]))
	return max_id + 1

	def _next_empty_row(self, ws) -> int:
	for i, row in enumerate(ws.iter_rows(min_row=2, values_only=True), start=2):
	if not any(c for c in row if c is not None):
	return i
	return ws.max_row + 1

	# ── public methods ────────────────────────────────────────────────────────

	def write_entry(self, entry: EntryRecord) -> int:
	"""Write to A1_ENTRIES. Returns assigned ENTRY_ID."""
	# Self-audit: scan for banned terms
	self._self_audit(entry)

	backup = self._backup()
	try:
	wb = load_workbook(self.master_file)
	ws = wb['A1_ENTRIES']

	entry.entry_id = self._next_entry_id(ws)
	target = self._next_empty_row(ws)

	for col, val in enumerate(entry.to_row(), start=1):
	ws.cell(row=target, column=col, value=val)

	self._log_session(wb, entry)
	wb.save(self.master_file)
	wb.close()
	print(f" ✓ Written: #{entry.entry_id} {entry.en_term} → {entry.root_letters} (row {target})")
	return entry.entry_id
	except Exception as e:
	print(f" ✗ Write failed: {e} — restoring backup")
	shutil.copy2(backup, self.master_file)
	raise

	def queue_for_oversight(self, entry: EntryRecord, flag_reason: str,
	q_pass: bool, u_pass: bool, f_pass: bool) -> str:
	"""Add entry to ENGINE_QUEUE. Returns QUEUE_ID."""
	try:
	wb = load_workbook(self.master_file)
	if 'ENGINE_QUEUE' not in wb.sheetnames:
	ws = wb.create_sheet('ENGINE_QUEUE')
	headers = [
	'QUEUE_ID','STATUS','INPUT_TERM','ENTRY_CLASS','CANDIDATE_ROOT',
	'Q_GATE','U_GATE','F_GATE','PHONETIC_CHAIN','DRAFT_ENTRY_ID',
	'SCORE','FLAG_REASON','DISCOVERED_VIA','TIMESTAMP',
	'USER_DECISION','DECISION_TIMESTAMP'
	]
	for ci, h in enumerate(headers, 1):
	ws.cell(row=1, column=ci, value=h)
	else:
	ws = wb['ENGINE_QUEUE']

	nxt = ws.max_row + 1
	queue_id = f"Q{nxt - 1:04d}"
	ts = datetime.now().strftime('%Y-%m-%d %H:%M')

	row_vals = [
	queue_id, 'PENDING', entry.en_term, 'LINGUISTIC', entry.root_letters,
	'PASS' if q_pass else 'FAIL',
	'PASS' if u_pass else 'FAIL',
	'PASS' if f_pass else 'FAIL',
	entry.phonetic_chain, None,
	entry.score, flag_reason, 'ENGINE_AUTO', ts, None, None
	]
	for ci, v in enumerate(row_vals, 1):
	ws.cell(row=nxt, column=ci, value=v)

	wb.save(self.master_file)
	wb.close()
	print(f" → Queued: {queue_id} ({entry.en_term}) — {flag_reason}")
	return queue_id
	except Exception as e:
	print(f" ENGINE_QUEUE write error: {e}")
	return ''

	def export_queue_json(self, output_dir: str):
	"""Export PENDING ENGINE_QUEUE rows to JSON for Oversight Dashboard."""
	try:
	wb = load_workbook(self.master_file, read_only=True, data_only=True)
	if 'ENGINE_QUEUE' not in wb.sheetnames:
	wb.close()
	return
	ws = wb['ENGINE_QUEUE']
	headers = None
	rows = []
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = list(row)
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	if str(d.get('STATUS', '')).upper() == 'PENDING':
	rows.append({k: (str(v) if v is not None else '') for k, v in d.items()})
	wb.close()

	os.makedirs(output_dir, exist_ok=True)
	out = os.path.join(output_dir, 'engine_queue_export.json')
	with open(out, 'w', encoding='utf-8') as f:
	json.dump({'pending_count': len(rows), 'entries': rows, 'exported': datetime.now().isoformat()}, f,
	ensure_ascii=False, indent=2)
	print(f" Queue exported: {len(rows)} PENDING → {out}")
	except Exception as e:
	print(f" Queue export error: {e}")

	# ── internal helpers ──────────────────────────────────────────────────────

	def _self_audit(self, entry: EntryRecord):
	"""Scan entry fields for banned terms before writing."""
	fields = [entry.qur_meaning, entry.foundation_ref, entry.phonetic_chain]
	text = ' '.join(f for f in fields if f).lower()
	for term in BANNED_TERMS:
	if term in text:
	print(f" ⚠ SELF-AUDIT: banned term '{term}' detected in entry {entry.en_term} — please review")

	def _log_session(self, wb, entry: EntryRecord):
	"""Append gate closure to SESSION_INDEX."""
	try:
	ws = wb['SESSION_INDEX']
	nxt = ws.max_row + 1
	ts = datetime.now().strftime('%Y-%m-%d %H:%M')
	vals = [
	'GATE CLOSURE', entry.entry_id,
	f"{entry.en_term} → {entry.root_letters} \| Q+U+F \| Score {entry.score}/10",
	'A1_ENTRIES', 'CONFIRMED', f"Engine auto-write {ts}"
	]
	for ci, v in enumerate(vals, 1):
	ws.cell(row=nxt, column=ci, value=v)
	except Exception as e:
	print(f" SESSION_INDEX log error: {e}")


	# ═══════════════════════════════════════════════════════════════════════════════
	# COMPONENT 8 — ReportGenerator (360-degree HTML output)
	# ═══════════════════════════════════════════════════════════════════════════════

	DARK_GOLD_CSS = """
	:root{--gold:#C9A84C;--dark:#1a1a1a;--panel:#242424;--border:#3a3a2a;
	--text:#e8e0d0;--accent:#8B6914;--pass:#4a7c4e;--fail:#7c4a4a;}
	*{box-sizing:border-box;margin:0;padding:0;}
	body{background:var(--dark);color:var(--text);font-family:Georgia,serif;
	line-height:1.7;padding:2rem;}
	.report-header{border:2px solid var(--gold);padding:1.5rem;margin-bottom:2rem;
	background:var(--panel);}
	.report-title{color:var(--gold);font-size:1.8rem;font-weight:bold;}
	.report-meta{color:#999;font-size:.85rem;margin-top:.5rem;}
	.section{border-left:3px solid var(--gold);margin-bottom:1.5rem;
	padding:1rem 1.5rem;background:#1e1e1e;}
	.section-title{color:var(--gold);font-size:1.1rem;font-weight:bold;
	margin-bottom:.75rem;text-transform:uppercase;letter-spacing:.1em;
	border-bottom:1px solid var(--border);padding-bottom:.5rem;}
	.att{color:#d4c4a0;font-style:italic;margin:.3rem 0;}
	.arabic{font-size:1.3rem;direction:rtl;color:var(--gold);}
	.chain{font-family:monospace;background:#2a2a1a;padding:.4rem .8rem;
	border-radius:3px;color:#d4c080;margin:.3rem 0;display:block;}
	.gate-pass{color:#6fbf73;font-weight:bold;}
	.gate-fail{color:#f44336;font-weight:bold;}
	.score-badge{display:inline-block;background:var(--gold);color:var(--dark);
	padding:.2rem .6rem;border-radius:12px;font-weight:bold;font-size:.9rem;}
	.dp-tag{display:inline-block;background:#3a1a1a;border:1px solid #7c4a4a;
	color:#f08080;padding:.1rem .4rem;border-radius:3px;font-size:.8rem;margin:.1rem;}
	.network-tag{display:inline-block;background:#1a2a3a;border:1px solid var(--gold);
	color:var(--gold);padding:.2rem .5rem;border-radius:3px;font-size:.85rem;}
	.decay-arrow{color:#888;}
	table{width:100%;border-collapse:collapse;margin-top:.5rem;}
	th{background:var(--accent);color:var(--dark);padding:.4rem .8rem;text-align:left;}
	td{padding:.35rem .8rem;border-bottom:1px solid var(--border);}
	tr:nth-child(even) td{background:#222;}
	.quf-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:1rem;margin:.75rem 0;}
	.gate-box{padding:.75rem;border:1px solid var(--border);text-align:center;border-radius:4px;}
	.gate-box.pass{border-color:var(--pass);background:#1a2a1a;}
	.gate-box.fail{border-color:var(--fail);background:#2a1a1a;}
	.no-data{color:#666;font-style:italic;}
	"""


	class ReportGenerator:
	"""
	Generates comprehensive 360-degree HTML reports from all lattice domains.
	8 sections: Linguistic \| Qur'anic \| Cluster \| Degradation \|
	Intelligence \| Mathematical \| Current vs Original \| Open Investigations
	"""

	def __init__(self, master_file: str, reports_dir: str):
	self.master_file = master_file
	self.reports_dir = reports_dir
	os.makedirs(reports_dir, exist_ok=True)
	self._cache: Dict[str, list] = {}

	# ── sheet loader ──────────────────────────────────────────────────────────

	def _load_sheet(self, name: str) -> list:
	if name in self._cache:
	return self._cache[name]
	try:
	wb = load_workbook(self.master_file, read_only=True, data_only=True)
	if name not in wb.sheetnames:
	wb.close()
	return []
	ws = wb[name]
	headers = None
	rows = []
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else f'c{i}' for i, h in enumerate(row)]
	continue
	if not any(row):
	continue
	rows.append(dict(zip(headers, row)))
	wb.close()
	self._cache[name] = rows
	return rows
	except Exception as e:
	print(f" Report: cannot load {name}: {e}")
	return []

	def _find_entry(self, en_term: str) -> dict:
	for e in self._load_sheet('A1_ENTRIES'):
	if str(e.get('EN_TERM', '')).upper() == en_term.upper():
	return e
	return {}

	def _get_derivatives(self, root_id: str) -> list:
	if not root_id:
	return []
	return [d for d in self._load_sheet('A4_DERIVATIVES')
	if str(d.get('ROOT_ID', '')).strip() == root_id.strip()]

	def _get_network_members(self, network_id: str) -> list:
	if not network_id:
	return []
	return [e for e in self._load_sheet('A1_ENTRIES')
	if str(e.get('NETWORK_ID', '')).strip() == network_id.strip()]

	def _search_consolidated(self, term: str) -> list:
	term_lower = term.lower()
	matches = []
	for row in self._load_sheet('EXCEL_DATA_CONSOLIDATED'):
	text = ' '.join(str(v) for v in row.values() if v).lower()
	if term_lower in text:
	matches.append(row)
	if len(matches) >= 8:
	break
	return matches

	# ── section builders ──────────────────────────────────────────────────────

	def _s1_linguistic(self, en_term: str, entry: dict, result: ProcessResult) -> str:
	if entry:
	ar_word = entry.get('AR_WORD', '—')
	root_let = entry.get('ROOT_LETTERS', '—')
	root_id = entry.get('ROOT_ID', '—')
	score = entry.get('SCORE', '—')
	pattern = str(entry.get('PATTERN', 'A')).split('+')[0]
	chain = entry.get('PHONETIC_CHAIN', '—')
	inv = entry.get('INVERSION_TYPE', 'HIDDEN')
	qur_mean = entry.get('QUR_MEANING', '—')
	found_ref = entry.get('FOUNDATION_REF', '—')
	q_cls = u_cls = f_cls = 'pass'
	q_lbl = u_lbl = f_lbl = 'PASS'
	else:
	rc = result.confirmed_root
	ar_word = rc.ar_word if rc else '—'
	root_let = rc.letters if rc else '—'
	root_id = '—'
	score = result.entry_record.score if result.entry_record else '—'
	pattern = 'A'
	chain = rc.phonetic_chain if rc else '—'
	inv = 'HIDDEN'
	qur_mean = '—'
	found_ref = result.f_gate.details.get('foundation_ref', '—') if result.f_gate else '—'
	q_cls = 'pass' if result.q_gate and result.q_gate.passed else 'fail'
	u_cls = 'pass' if result.u_gate and result.u_gate.passed else 'fail'
	f_cls = 'pass' if result.f_gate and result.f_gate.passed else 'fail'
	q_lbl = 'PASS' if q_cls == 'pass' else 'FAIL'
	u_lbl = 'PASS' if u_cls == 'pass' else 'FAIL'
	f_lbl = 'PASS' if f_cls == 'pass' else 'FAIL'

	pat_desc = {
	'A': "Hidden — Allah's Arabic origin invisible to English speaker",
	'B': "Weaponised — term deployed against its source population",
	'C': "Confessional — English word confesses its Qur'anic origin",
	'D': "Jāhilīan — Qur'anic weight stripped within the Arabic-speaking community"
	}.get(pattern, pattern)

	return f"""
	<div class="section">
	<div class="section-title">Section 1 — Linguistic Core</div>
	<p class="arabic">{ar_word}</p>
	<p>Root: <strong>{root_let}</strong>   ID: {root_id}
	Score: <span class="score-badge">{score}/10</span></p>
	<span class="chain">{chain}</span>
	<p>Pattern: <strong>{pattern}</strong> — {pat_desc}</p>
	<p>Inversion: {inv}</p>
	<p style="color:#aaa;margin-top:.5rem;">{qur_mean}</p>
	<div class="quf-grid">
	<div class="gate-box {q_cls}"><strong>Q-Gate</strong><br>
	<span class="gate-{q_cls}">{q_lbl}</span><br>Qur'anic attestation</div>
	<div class="gate-box {u_cls}"><strong>U-Gate</strong><br>
	<span class="gate-{u_cls}">{u_lbl}</span><br>Phonetic unity</div>
	<div class="gate-box {f_cls}"><strong>F-Gate</strong><br>
	<span class="gate-{f_cls}">{f_lbl}</span><br>Foundation layer</div>
	</div>
	<p style="color:#888;font-size:.85rem;margin-top:.5rem;">{found_ref}</p>
	</div>"""

	def _s2_quranic(self, entry: dict) -> str:
	root_id = str(entry.get('ROOT_ID', '') or '').strip() if entry else ''
	refs = [r for r in self._load_sheet('A3_QURAN_REFS')
	if str(r.get('ROOT_ID', '')).strip() == root_id] if root_id else []

	allah_id = str(entry.get('ALLAH_NAME_ID', '') or '').strip() if entry else ''
	allah_html = ''
	if allah_id:
	for n in self._load_sheet('A2_NAMES_OF_ALLAH'):
	if str(n.get('ALLAH_NAME_ID', '')).strip() == allah_id:
	allah_html = (f'<p>🌟 Name of Allah: '
	f'<strong>{n.get("ALLAH_NAME","")}'
	f' / {n.get("TRANSLITERATION","")}'
	f' / {n.get("MEANING","")}</strong> ({allah_id})</p>')
	break

	if refs:
	rows_html = ''.join(
	f'<tr><td>{r.get("VERSE_REF","")}</td>'
	f'<td class="arabic" style="font-size:1.1rem;">{r.get("AR_TEXT","")}</td>'
	f'<td><em>{r.get("TRANSLITERATION","")}</em></td>'
	f'<td>{r.get("MEANING","")}</td></tr>'
	for r in refs[:10]
	)
	else:
	rows_html = '<tr><td colspan="4" class="no-data">No Qur\'anic references indexed for this root yet</td></tr>'

	return f"""
	<div class="section">
	<div class="section-title">Section 2 — Qur'anic Context</div>
	{allah_html}
	<table>
	<tr><th>Verse</th><th>Arabic</th><th>Transliteration</th><th>Meaning</th></tr>
	{rows_html}
	</table>
	</div>"""

	def _s3_cluster(self, entry: dict) -> str:
	net_id = str(entry.get('NETWORK_ID', '') or '').strip() if entry else ''
	root_id = str(entry.get('ROOT_ID', '') or '').strip() if entry else ''
	net_title = ''

	if net_id:
	for n in self._load_sheet('M4_NETWORKS'):
	if str(n.get('NETWORK_ID', '')).strip() == net_id:
	net_title = f"{net_id}: {n.get('TITLE','')} — {n.get('LINK_VERSE','')}"
	break

	members = self._get_network_members(net_id)
	mem_html = ''.join(
	f'<tr><td>#{m.get("ENTRY_ID","")}</td><td>{m.get("EN_TERM","")}</td>'
	f'<td class="arabic">{m.get("AR_WORD","")}</td><td>{m.get("ROOT_LETTERS","")}</td></tr>'
	for m in members
	) or '<tr><td colspan="4" class="no-data">No network membership — new network candidate</td></tr>'

	derivs = self._get_derivatives(root_id)
	deriv_list = ', '.join(str(d.get('EN_DERIVATIVE', '')) for d in derivs[:25])
	deriv_ct = len(derivs)

	return f"""
	<div class="section">
	<div class="section-title">Section 3 — Cluster / Network</div>
	<p><span class="network-tag">{net_title or 'No network assigned'}</span></p>
	<table style="margin-top:.75rem;">
	<tr><th>#</th><th>Term</th><th>Arabic</th><th>Root</th></tr>
	{mem_html}
	</table>
	<p style="margin-top:.75rem;"><strong>Derivatives ({deriv_ct}+):</strong>
	{deriv_list or '<span class="no-data">None indexed yet</span>'}</p>
	</div>"""

	def _s4_degradation(self, entry: dict, result: ProcessResult) -> str:
	ar_word = entry.get('AR_WORD', '—') if entry else '—'
	found_ref = entry.get('FOUNDATION_REF', '') if entry else (
	result.f_gate.details.get('foundation_ref', '') if result.f_gate else '')
	ds_match = re.search(r'DS\d+', str(found_ref))
	ds_code = ds_match.group(0) if ds_match else 'DS05'
	ds_desc = {
	'DS04': '"Greek" — Stage 3 decay',
	'DS05': '"Latin" — Stage 3-4 decay',
	'DS06': 'Germanic — Stage 4-5 decay',
	'DS04→DS05': '"Greek" → "Latin" corridor',
	}.get(ds_code, f'{ds_code} — downstream corridor')

	return f"""
	<div class="section">
	<div class="section-title">Section 4 — Degradation Trail</div>
	<p>
	<span class="arabic">{ar_word}</span> (Allah's Arabic — ORIG1)
	<span class="decay-arrow"> → </span>{ds_desc}
	<span class="decay-arrow"> → </span>{result.input_term.upper()} (current downstream form)
	</p>
	<p style="color:#888;font-size:.85rem;margin-top:.5rem;">{found_ref}</p>
	<p style="margin-top:.5rem;color:#aaa;">
	Direction of flow: Allah's Arabic is ALWAYS the source.
	All downstream forms are degradations — never origins.
	</p>
	</div>"""

	def _s5_intelligence(self, en_term: str, root_letters: str) -> str:
	matches = self._search_consolidated(en_term)
	if not matches:
	matches = self._search_consolidated(root_letters.replace('-', ''))

	if matches:
	rows_html = ''.join(
	f'<tr><td>{ " \| ".join(str(v)[:100] for v in list(m.values())[:4] if v) }</td></tr>'
	for m in matches[:5]
	)
	else:
	rows_html = '<tr><td class="no-data">No mentions found in EXCEL_DATA_CONSOLIDATED for this term</td></tr>'

	return f"""
	<div class="section">
	<div class="section-title">Section 5 — Historical & Intelligence</div>
	<table>
	<tr><th>EXCEL_DATA_CONSOLIDATED search (top 5 matches)</th></tr>
	{rows_html}
	</table>
	<p style="color:#666;font-size:.8rem;margin-top:.5rem;">
	For full operator network data check: Historic Lattice / Updated Intelligence /
	</p>
	</div>"""

	def _s6_mathematical(self, root_letters: str) -> str:
	return f"""
	<div class="section">
	<div class="section-title">Section 6 — Mathematical / Ratio</div>
	<p class="no-data">
	Ratio/formula connections for root {root_letters or '—'} —
	check A1_ENTRIES for F-series entries sharing this root, or
	submit a ratio query (e.g. "4/3") for formula domain analysis.
	</p>
	</div>"""

	def _s7_current_vs_original(self, en_term: str, entry: dict) -> str:
	pattern = str(entry.get('PATTERN', 'A')).split('+')[0] if entry else 'A'
	qur_mean = entry.get('QUR_MEANING', '—') if entry else '—'
	gap_desc = {
	'A': 'HIDDEN — complete etymological amnesia in the downstream speaker',
	'B': 'WEAPONISED — term deployed as attack tool against the source civilisation',
	'C': 'CONFESSIONAL — the downstream form confesses its own Qur\'anic origin in its sounds',
	'D': 'JĀHILĪAN — Qur\'anic weight stripped within the Arabic-speaking community itself'
	}.get(pattern, pattern)

	return f"""
	<div class="section">
	<div class="section-title">Section 7 — Current Usage vs. Original Meaning</div>
	<table>
	<tr><th>Dimension</th><th>Content</th></tr>
	<tr><td>Current English usage</td><td>{en_term.lower()}</td></tr>
	<tr><td>Qur'anic original meaning</td><td>{qur_mean}</td></tr>
	<tr><td>Classification</td><td>{gap_desc}</td></tr>
	<tr><td>Decay summary</td>
	<td>The downstream form preserves the consonant skeleton but has lost the Qur'anic semantic weight</td></tr>
	</table>
	</div>"""

	def _s8_open_investigations(self, root_letters: str) -> str:
	pending = [q for q in self._load_sheet('ENGINE_QUEUE')
	if str(q.get('STATUS', '')).upper() == 'PENDING'
	and str(q.get('CANDIDATE_ROOT', '')).strip() == root_letters.strip()]

	if pending:
	rows_html = ''.join(
	f'<tr><td>{p.get("QUEUE_ID","")}</td><td>{p.get("INPUT_TERM","")}</td>'
	f'<td>{p.get("FLAG_REASON","")}</td><td>PENDING</td></tr>'
	for p in pending
	)
	else:
	rows_html = '<tr><td colspan="4" class="no-data">No pending queue entries for this root</td></tr>'

	return f"""
	<div class="section">
	<div class="section-title">Section 8 — Open Investigations</div>
	<table>
	<tr><th>Queue ID</th><th>Term</th><th>Flag Reason</th><th>Status</th></tr>
	{rows_html}
	</table>
	<p style="color:#666;font-size:.8rem;margin-top:.5rem;">
	Open USLaP_Oversight_Dashboard.html to approve/reject pending entries.
	</p>
	</div>"""

	# ── main generate method ──────────────────────────────────────────────────

	def generate(self, result: ProcessResult) -> str:
	"""Generate full 360-degree HTML report. Returns file path."""
	en_term = result.input_term.upper()
	entry = self._find_entry(en_term)
	root_letters = (entry.get('ROOT_LETTERS', '') if entry
	else (result.confirmed_root.letters if result.confirmed_root else ''))

	s1 = self._s1_linguistic(en_term, entry, result)
	s2 = self._s2_quranic(entry)
	s3 = self._s3_cluster(entry)
	s4 = self._s4_degradation(entry, result)
	s5 = self._s5_intelligence(en_term, root_letters)
	s6 = self._s6_mathematical(root_letters)
	s7 = self._s7_current_vs_original(en_term, entry)
	s8 = self._s8_open_investigations(root_letters)

	date_str = datetime.now().strftime('%Y-%m-%d')
	entry_id = entry.get('ENTRY_ID', '—') if entry else '—'

	html = f"""<!DOCTYPE html>
	<html lang="en" dir="ltr">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>USLaP 360 Report — {en_term}</title>
	<style>{DARK_GOLD_CSS}</style>
	</head>
	<body>
	<div class="report-header">
	<div class="report-title">USLaP 360° REPORT — {en_term}</div>
	<div class="report-meta">
	Generated: {date_str}  \|
	Entry: #{entry_id}  \|
	Root: {root_letters or '—'}  \|
	Query type: {result.input_type}
	</div>
	</div>
	{s1}{s2}{s3}{s4}{s5}{s6}{s7}{s8}
	<div style="margin-top:2rem;color:#555;font-size:.75rem;border-top:1px solid #333;padding-top:1rem;">
	USLaP Unified Linguistic Lattice — Q14:24: أصلها ثابت وفرعها في السماء
	</div>
	</body>
	</html>"""

	# Preserve Arabic chars in filename; replace only non-word chars
	fname = re.sub(r'[^\w]', '_', en_term, flags=re.UNICODE).strip('_') or 'REPORT'
	fpath = os.path.join(self.reports_dir, f"{fname}_360_Report_{date_str.replace('-','')}.html")
	with open(fpath, 'w', encoding='utf-8') as f:
	f.write(html)
	print(f" Report saved: {fpath}")
	return fpath


	# ═══════════════════════════════════════════════════════════════════════════════
	# MAIN ORCHESTRATOR — USLaPEngine
	# ═══════════════════════════════════════════════════════════════════════════════

	class USLaPEngine:
	"""
	Coordinates all 8 components.
	Single public entry point: engine.process(input_term)
	"""

	def __init__(self, master_file: str = MASTER_FILE,
	quran_file: str = QURAN_FILE,
	reports_dir: str = REPORTS_DIR,
	skip_reports: bool = False):
	print("\n" + "═" * 62)
	print(" USLaP Autonomous Engine v3.3 (cognate-crossref+parallel-ORIG2+depal-competition)")
	print(" بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ")
	print("═" * 62)
	self.skip_reports = skip_reports

	for path in (master_file, quran_file):
	if not os.path.exists(path):
	raise FileNotFoundError(f"Required file not found: {path}")

	print("\nLoading components...")
	self.master_file = master_file
	self.router = InputRouter()
	self.q_gate = QGate(quran_file)
	# English components
	self.reversal = PhoneticReversal(master_file, self.q_gate)
	self.u_gate = UGate(self.reversal)
	# Russian components (v3.0)
	self.ru_reversal = RussianPhoneticReversal(master_file, self.q_gate)
	self.ru_u_gate = UGate(self.ru_reversal) # UGate parameterised by reversal
	# Shared components
	self.f_gate = FGate(master_file)
	self.scorer = Scorer()
	self.writer = EntryWriter(master_file)
	self.reporter = ReportGenerator(master_file, reports_dir)
	# ORIG2 track (v2.2): Kashgari corpus indexer + gate
	kashgari_path = KASHGARI_FILE
	if os.path.exists(kashgari_path):
	self.kashgari_index = KashgariIndex(kashgari_path)
	self.kashgari_gate = KashgariGate(self.kashgari_index)
	else:
	print(f" WARNING: Kashgari corpus not found at {kashgari_path} — ORIG2 track disabled")
	self.kashgari_index = None
	self.kashgari_gate = None
	self.existing_terms: Dict[str, int] = {}
	self.existing_ru_terms: Dict[str, int] = {}
	self._build_dedup_cache()
	self.expander = ClusterExpander(self.reversal, self.existing_terms)
	print("\n✓ Engine ready (EN + RU).\n" + "═" * 62)

	def _build_dedup_cache(self):
	try:
	wb = load_workbook(self.master_file, read_only=True, data_only=True)
	# English entries
	ws = wb['A1_ENTRIES']
	headers = None
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	et = str(d.get('EN_TERM', '') or '').strip().upper()
	eid = d.get('ENTRY_ID')
	if et and eid:
	self.existing_terms[et] = int(eid)
	# Russian entries (v3.0)
	ws_ru = wb['A1_ЗАПИСИ']
	ru_headers = None
	for row in ws_ru.iter_rows(values_only=True):
	if ru_headers is None:
	ru_headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(ru_headers, row))
	rt = str(d.get('РУС_ТЕРМИН', '') or '').strip().upper()
	rid = d.get('ЗАПИСЬ_ID')
	if rt and rid:
	self.existing_ru_terms[rt] = int(rid)
	wb.close()
	print(f" Dedup cache: {len(self.existing_terms)} EN terms, "
	f"{len(self.existing_ru_terms)} RU terms")
	except Exception as e:
	print(f" Dedup cache error: {e}")

	# ── public process method ─────────────────────────────────────────────────

	def process(self, raw_input: str, dry_run: bool = False) -> ProcessResult:
	"""Full pipeline. Returns ProcessResult."""
	input_type, cleaned, key_terms = self.router.detect(raw_input)

	# v3.1: Latin-script Russian detection
	# If detected as English but looks like transliterated Russian, convert
	if input_type == 'english_word' and self.ru_reversal._is_latin_russian(raw_input):
	cyrillic = self.ru_reversal.transliterate_latin(raw_input)
	print(f" [v3.1] Latin-Russian detected: '{raw_input}' → '{cyrillic}'")
	input_type = 'russian_word'
	cleaned = cyrillic
	key_terms = [cyrillic]

	print(f"\n{'─'*62}")
	print(f" Processing: '{raw_input}'")
	print(f" Type detected: {input_type}")
	print(f"{'─'*62}")

	if input_type == 'phrase':
	return self._process_phrase(raw_input, key_terms, dry_run)
	if input_type == 'russian_phrase':
	# Process each Russian word individually
	results = []
	for t in key_terms:
	results.append(self._process_single_term(t, 'russian_word', dry_run))
	return results[-1] if results else ProcessResult(raw_input, 'russian_phrase')
	if input_type == 'ratio':
	r = ProcessResult(raw_input, 'ratio')
	r.add_log("Ratio input — hand off to formula domain (run forest_v3 GeneratorEngine)")
	return r
	if input_type == 'quran_ref':
	return self._process_quran_ref(cleaned)

	term = key_terms[0] if key_terms else cleaned
	return self._process_single_term(term, input_type, dry_run)

	# ── single-term pipeline ──────────────────────────────────────────────────

	def _process_single_term(self, term: str, input_type: str,
	dry_run: bool) -> ProcessResult:
	result = ProcessResult(term, input_type)

	# ── LANGUAGE ROUTING (v3.0) ────────────────────────────────────────────
	is_russian = input_type == 'russian_word'
	active_reversal = self.ru_reversal if is_russian else self.reversal
	active_u_gate = self.ru_u_gate if is_russian else self.u_gate
	lang_label = 'RU' if is_russian else 'EN'

	# DEDUP CHECK
	if input_type == 'english_word':
	existing_id = self.existing_terms.get(term.upper())
	if existing_id:
	result.add_log(f"EXISTING ENTRY: {term.upper()} = #{existing_id} (already in lattice)")
	result.existing_entry_id = existing_id
	if not self.skip_reports:
	result.report_path = self.reporter.generate(result)
	return result
	elif is_russian:
	existing_id = self.existing_ru_terms.get(term.upper())
	if existing_id:
	result.add_log(f"EXISTING RU ENTRY: {term.upper()} = #{existing_id} (already in lattice)")
	result.existing_entry_id = existing_id
	if not self.skip_reports:
	result.report_path = self.reporter.generate(result)
	return result

	# DERIVATIVE CHAIN CHECK (Gate 3f) — English only for now
	if input_type == 'english_word':
	parent = KNOWN_DERIVATIVES.get(term.lower())
	if parent:
	parent_id = self.existing_terms.get(parent.upper())
	if parent_id:
	result.add_log(
	f"DERIVATIVE CHAIN: '{term}' is derivative of {parent} (#{parent_id}). "
	f"Route to A4_DERIVATIVES, not A1_ENTRIES."
	)
	result.derivative_of = (parent, parent_id)
	else:
	result.add_log(
	f"DERIVATIVE CHAIN: '{term}' maps to parent '{parent}' but parent "
	f"not yet in lattice. Processing as independent candidate."
	)

	# ═══ v3.3: COGNATE CROSS-REFERENCE ═══════════════════════════════════
	# Sibling Database Principle automated: when processing a Russian word,
	# check if there's an English cognate. Process the ENGLISH form through
	# the English pipeline — it often preserves root consonants better.
	# Example: ДЕСАНТ lost the K from سَكَنَ, but DESCENT preserves it as
	# S-C-N-D → root س-ك-ن at score 9+.
	cognate_result = None
	if input_type in ('english_word', 'russian_word'):
	cognate_result = self._try_cognate_crossref(term, is_russian)
	if cognate_result:
	src = cognate_result.get('source', '')
	if src == 'LATTICE_ENTRY':
	result.add_log(
	f"v3.3 COGNATE: {cognate_result.get('note', '')}"
	)
	elif src == 'EN_PIPELINE':
	result.add_log(
	f"v3.3 COGNATE: English cousin '{cognate_result['en_cousin']}' "
	f"→ root {cognate_result['root_letters']} "
	f"({cognate_result['token_count']} tok, "
	f"EN score={cognate_result['score']}/10, "
	f"chain: {cognate_result.get('phonetic_chain', '—')})"
	)
	elif src == 'ERROR':
	result.add_log(
	f"v3.3 COGNATE: English cousin '{cognate_result['en_cousin']}' "
	f"pipeline error: {cognate_result.get('error', '?')}"
	)
	cognate_result = None # discard errors
	result.cognate_crossref = cognate_result

	# REVERSAL (word → root candidates) — uses language-specific reversal
	if input_type in ('english_word', 'russian_word'):
	result.add_log(f"Running {lang_label} phonetic reversal for '{term}'...")
	candidates = active_reversal.reverse(term)
	result.root_candidates = candidates
	if not candidates:
	# ══ ORIG2 TRACK (v2.2) ══════════════════════════════════════════
	# Q-Gate failed for all candidates → try Kashgari/ORIG2 track
	# Protocol: ROOT_LIST FAIL → Kashgari search → BITIG scoring
	result.add_log("No ORIG1 root found — routing to ORIG2 (Kashgari) track...")
	# v3.3: If cognate found a root, try it before giving up
	if cognate_result and cognate_result.get('source') == 'EN_PIPELINE':
	result.add_log(
	f"v3.3 COGNATE RESCUE: No {lang_label} candidates, but "
	f"English cousin '{cognate_result['en_cousin']}' found "
	f"root {cognate_result['root_letters']} — using cognate root."
	)
	# Inject the cognate root as the primary candidate
	cog_cand = RootCandidate(
	letters=cognate_result['root_letters'],
	token_count=cognate_result['token_count'],
	ar_word=cognate_result.get('ar_word', '')
	)
	cog_cand.phonetic_chain = cognate_result.get('phonetic_chain', '')
	cog_cand.positional_score = cognate_result.get('positional', 0.5)
	cog_cand.operations = cognate_result.get('operations', [])
	candidates = [cog_cand]
	result.root_candidates = candidates
	# Continue to normal scoring below
	else:
	orig2_result = self._try_orig2_track(term, result, dry_run, is_russian=is_russian)
	if orig2_result is not None:
	return orig2_result
	# Both ORIG1 and ORIG2 failed
	result.add_log("Neither ORIG1 nor ORIG2 attested — entry cannot enter lattice")
	return result
	# ═════════════════════════════════════════════════════════════════
	# ═══ v2.5 MULTI-CANDIDATE SCORING ═══════════════════════════════
	# Score top N candidates (up to 3) through full Q→U→F→Scorer pipeline.
	# Pick the candidate with the HIGHEST final score.
	# This prevents the coverage penalty from penalising a root that
	# a lower-ranked candidate (with fewer extra_consonants) would
	# score higher on. COMMAND: ح-م-د (extra=2, score=6) vs
	# س-م-ن via OP_STOP (extra=0, score=8) — the OP_STOP candidate
	# is in the list but was never scored in v2.4.
	# ════════════════════════════════════════════════════════════════
	# v2.5: Score top N candidates. With tier-diverse candidate pools,
	# we may have >3 candidates representing different extra_consonants
	# tiers. Score up to 5 to ensure each tier gets evaluated.
	MULTI_N = min(5, len(candidates))
	best_score = -1
	best_top = candidates[0]
	best_q = None
	best_u = None
	best_f = None
	best_bdown = {}

	for ci in range(MULTI_N):
	cand = candidates[ci]
	cq = self.q_gate.check(cand.letters)
	if not cq.passed:
	continue
	cu = active_u_gate.verify(term, cand.letters, cand.operations)
	cf = self.f_gate.assign(term, cand.letters,
	cu.details.get('phonetic_chain', ''))
	cs, cb = self.scorer.score(cand, term, cq, cu, cf)
	if ci == 0:
	result.add_log(
	f"Candidate #{ci+1}: {cand.letters} "
	f"({cand.token_count} tok, extra={getattr(cand,'extra_consonants',0)}) "
	f"→ score={cs}")
	if cs > best_score:
	best_score = cs
	best_top = cand
	best_q = cq
	best_u = cu
	best_f = cf
	best_bdown = cb
	if ci > 0:
	result.add_log(
	f"Candidate #{ci+1}: {cand.letters} "
	f"({cand.token_count} tok, extra={getattr(cand,'extra_consonants',0)}) "
	f"→ score={cs} ← BETTER than #{1}")

	top = best_top
	q_result = best_q
	u_result = best_u
	f_result = best_f
	score = best_score
	breakdown = best_bdown
	result.add_log(f"Selected: {top.letters} ({top.token_count} tokens, score={score})")

	# ═══ v3.2: PARALLEL ORIG2 CHECK ═══════════════════════════════════
	# Run Kashgari check ALONGSIDE ORIG1 (not just as fallback).
	# If ORIG2 finds a hit while ORIG1 also passed → COMPETING TRACKS.
	# Critical for Russian where >50% of vocabulary is Bitig-corridor.
	if self.kashgari_gate is not None and score >= SCORE_QUEUE:
	orig2_par = self._try_orig2_parallel(term, is_russian)
	if orig2_par and orig2_par.get('passed'):
	o2 = orig2_par
	result.add_log(
	f"v3.2 PARALLEL ORIG2 HIT: Kashgari '{o2['kashgari_translit']}' "
	f"= \"{o2['kashgari_meaning']}\" "
	f"(line {o2['kashgari_line']}, {o2['attestation_type']}, "
	f"ORIG2 score={o2['orig2_score']}/10, "
	f"searched='{o2.get('search_skeleton', '?')}')"
	)
	result.orig2_parallel = orig2_par

	# ═══ v3.2: DEPALATALIZATION COMPETITION ═══════════════════════════
	# If both palatalized AND depalatalized candidates exist with
	# different roots and both pass Q-gate → flag for human review.
	# Example: ВОЖДЬ → و-ج-د (via ж→ج) vs و-د-د (via ж→д depal)
	depal_competitor = None
	if is_russian and score >= SCORE_QUEUE:
	for ci in range(min(5, len(candidates))):
	cand = candidates[ci]
	cand_ops = getattr(cand, 'operations', [])
	if (any('OP_DEPALATAL' in str(op) for op in cand_ops)
	and cand.letters != top.letters):
	depal_competitor = cand
	result.add_log(
	f"v3.2 DEPAL COMPETITION: primary {top.letters} "
	f"({top.token_count} tok) vs depalatalized "
	f"{cand.letters} ({cand.token_count} tok, "
	f"ops={cand_ops})"
	)
	result.competing_depal = cand
	break

	# ═══ v3.3/v3.2: LOW SCORE → COGNATE RESCUE then ORIG2 FALLBACK ═══
	# If ORIG1 candidates scored ≤ SCORE_REJECT:
	# 1. Check if cognate found a better root (v3.3)
	# 2. If not, try Kashgari ORIG2 (v3.2)
	if score <= SCORE_REJECT:
	# v3.3: COGNATE RESCUE — if cognate scored higher, use its root
	if (cognate_result and cognate_result.get('source') == 'EN_PIPELINE'
	and cognate_result.get('score', 0) > score):
	cog = cognate_result
	result.add_log(
	f"v3.3 COGNATE RESCUE: {lang_label} score {score} too low, "
	f"but EN cousin '{cog['en_cousin']}' → {cog['root_letters']} "
	f"(EN score {cog['score']}/10, {cog['token_count']} tok). "
	f"Using cognate root."
	)
	# Replace the top candidate with the cognate root
	cog_cand = RootCandidate(
	letters=cog['root_letters'],
	token_count=cog['token_count'],
	ar_word=cog.get('ar_word', '')
	)
	cog_cand.phonetic_chain = cog.get('phonetic_chain', '')
	cog_cand.positional_score = cog.get('positional', 0.5)
	cog_cand.operations = cog.get('operations', [])
	# Re-score through local Q→U gates
	cq = self.q_gate.check(cog_cand.letters)
	if cq.passed:
	cu = active_u_gate.verify(term, cog_cand.letters,
	cog_cand.operations)
	cf = self.f_gate.assign(term, cog_cand.letters,
	cu.details.get('phonetic_chain', ''))
	cs, cb = self.scorer.score(cog_cand, term, cq, cu, cf)
	# Accept if cognate root scores better locally too
	if cs > score:
	top = cog_cand
	q_result = cq
	u_result = cu
	f_result = cf
	score = cs
	breakdown = cb
	result.add_log(
	f"v3.3 COGNATE RESCUE SUCCESS: local re-score "
	f"{cog_cand.letters} = {cs}/10 (was {best_score})")
	else:
	result.add_log(
	f"v3.3 COGNATE: EN root {cog_cand.letters} scored "
	f"{cs} locally — not better than {score}. "
	f"Keeping original, falling to ORIG2.")

	# v3.2: ORIG2 FALLBACK — if still low, try Kashgari
	if score <= SCORE_REJECT and self.kashgari_gate is not None:
	result.add_log(
	f"ORIG1 score {score} <= {SCORE_REJECT} — trying ORIG2 fallback...")
	orig2_fallback = self._try_orig2_track(
	term, result, dry_run, is_russian=is_russian)
	if orig2_fallback is not None:
	return orig2_fallback
	result.add_log("ORIG2 fallback: no Kashgari match either")

	# ═════════════════════════════════════════════════════════════════
	else:
	# Direct Arabic root
	result.add_log(f"Direct root input: {term}")
	qr = self.q_gate.check(term)
	if not qr.passed:
	result.add_log(f"Q-Gate FAIL: {term} not in Qur'anic root list")
	return result
	top = RootCandidate(letters=term,
	token_count=qr.details.get('token_count', 0),
	ar_word=qr.details.get('ar_word', ''))
	q_result = qr
	u_result = GateResult(True, {'phonetic_chain': '', 'note': 'Direct root — U-Gate N/A'})
	f_result = self.f_gate.assign(term, top.letters, '')
	score, breakdown = self.scorer.score(top, term, q_result, u_result, f_result)

	result.confirmed_root = top

	# Store gate results
	result.q_gate = q_result
	result.add_log(f"Q-Gate: {'PASS' if q_result.passed else 'FAIL'} \| tokens={q_result.details.get('token_count',0)}")

	result.u_gate = u_result
	top.phonetic_chain = u_result.details.get('phonetic_chain', '')
	result.add_log(f"U-Gate: {'PASS' if u_result.passed else 'FAIL'} \| chain: {top.phonetic_chain}")

	result.f_gate = f_result
	result.add_log(f"F-Gate: PASS \| DS: {f_result.details.get('ds_code','—')} "
	f"\| Network: {f_result.details.get('network_id','none')}")

	# Score already computed in multi-candidate loop (or direct root path)
	top.score = score
	result.add_log(f"Score: {score}/10 \| {breakdown}")

	# ═══ v3.4: COMPOUND PARTS ANALYSIS ═══════════════════════════════════════
	# For Russian compound words (САМО+ВАР, ПАРО+ВОЗ), trace both parts
	# INDEPENDENTLY for dual-root reporting. Diagnostic only — the primary
	# result (above) stands unchanged. Human reviewer sees both root traces.
	if is_russian and self.ru_reversal is not None:
	is_cmpd, pfx_part, root_part, bridge, cmpd_label = \
	self.ru_reversal.detect_compound(term)
	if is_cmpd:
	result.add_log(f"v3.4 COMPOUND: {cmpd_label}")
	compound_analysis = {
	'label': cmpd_label, 'prefix': None,
	'root': None, 'bridge': bridge
	}

	# ── Trace PREFIX part (e.g. САМО) ────────────────────────
	# v3.4b: САМО/САМА = pronoun "self/auto" — NOT a root.
	# Do NOT trace. Only trace the ROOT part.
	PRONOUN_PREFIXES = {'само', 'сам', 'сама'}
	if pfx_part.lower() in PRONOUN_PREFIXES:
	compound_analysis['prefix'] = {
	'part': pfx_part.upper(),
	'root': None,
	'token_count': 0,
	'chain': 'PRONOUN (self/auto) — not traced',
	'is_pronoun': True,
	}
	result.add_log(
	f" PREFIX '{pfx_part.upper()}' = pronoun 'self/auto' "
	f"(R13: not a root, not traced)")
	else:
	pfx_cands = self.ru_reversal._reverse_inner(pfx_part)
	for pc in pfx_cands[:3]:
	pq = self.q_gate.check(pc.letters)
	if pq.passed:
	pu = self.ru_u_gate.verify(
	pfx_part, pc.letters, pc.operations or [])
	compound_analysis['prefix'] = {
	'part': pfx_part.upper(),
	'root': pc.letters,
	'token_count': pc.token_count,
	'chain': pu.details.get('phonetic_chain', ''),
	}
	result.add_log(
	f" PREFIX '{pfx_part.upper()}' → {pc.letters} "
	f"({pc.token_count} tok)")
	break
	if not compound_analysis.get('prefix'):
	result.add_log(
	f" PREFIX '{pfx_part.upper()}' → no ORIG1 trilateral root "
	f"(2-consonant prefix, or may be ORIG2)")

	# ── Trace ROOT part (e.g. ВАР) ──────────────────────────
	root_cands = self.ru_reversal._reverse_inner(root_part)
	for rc in root_cands[:3]:
	rq = self.q_gate.check(rc.letters)
	if rq.passed:
	ru_check = self.ru_u_gate.verify(
	root_part, rc.letters, rc.operations or [])
	compound_analysis['root'] = {
	'part': root_part.upper(),
	'root': rc.letters,
	'token_count': rc.token_count,
	'chain': ru_check.details.get('phonetic_chain', ''),
	}
	result.add_log(
	f" ROOT '{root_part.upper()}' → {rc.letters} "
	f"({rc.token_count} tok)")
	break
	if not compound_analysis['root']:
	# Try ORIG2 for root part
	if self.kashgari_gate is not None:
	root_cons = self.ru_reversal.extract_consonants(root_part)
	if root_cons:
	latin_skel = self.ru_reversal.to_latin_skeleton(root_cons)
	k_result_cmp = self.kashgari_gate.check(
	root_part, list(latin_skel))
	if k_result_cmp.passed:
	kd_cmp = k_result_cmp.details
	compound_analysis['root'] = {
	'part': root_part.upper(),
	'root': kd_cmp.get('skeleton', latin_skel),
	'token_count': 0,
	'chain': f"ORIG2: {kd_cmp.get('kashgari_translit', '')}",
	'is_orig2': True,
	'kashgari_meaning': kd_cmp.get('kashgari_meaning', ''),
	}
	result.add_log(
	f" ROOT '{root_part.upper()}' → ORIG2 Kashgari: "
	f"'{kd_cmp.get('kashgari_translit', '')}' "
	f"= \"{kd_cmp.get('kashgari_meaning', '')}\"")
	if not compound_analysis.get('root'):
	result.add_log(
	f" ROOT '{root_part.upper()}' → no ORIG1/ORIG2 root found")

	result.compound_parts = compound_analysis

	# ═══ v3.4: SEMANTIC REVIEW FLAG ═══════════════════════════════════════════
	# Engine checks phonetics only — NEVER semantics. Flag ALL entries for
	# semantic review. Like DEPAL flag: informational, does NOT change score.
	# Prevents false confidence from high phonetic scores with no semantic path
	# (e.g. САБЛЯ score=9 but سَبِيل=way/path ≠ saber/sword).
	result.sem_review = True

	# BUILD ENTRY RECORD
	entry = EntryRecord()
	entry.en_term = term.upper()
	entry.ar_word = top.ar_word or top.letters
	entry.root_letters = top.letters
	entry.phonetic_chain = top.phonetic_chain
	entry.score = score
	entry.pattern = 'A'
	entry.network_id = f_result.details.get('network_id', '')
	entry.foundation_ref = f_result.details.get('foundation_ref', '')
	entry.inversion_type = 'HIDDEN'
	result.entry_record = entry

	# CLUSTER EXPANSION (for direct root input or high-score words)
	if input_type == 'arabic_root' or score >= SCORE_AUTO_WRITE:
	result.add_log("Running cluster expansion...")
	cluster_candidates = self.expander.expand(top.letters, term)
	result.cluster_members = cluster_candidates
	result.add_log(f"Cluster: {len(cluster_candidates)} candidate(s) found")

	# WRITE DECISION
	# v2.1 tightened: CONFIRMED_HIGH requires score>=9 AND Q-pass AND U-pass
	# AND positional_score >= 0.8 (no transposition). Previously score>=9 alone
	# was sufficient — this produced 33.5% CONFIRMED_HIGH vs target 15-25%.
	# v3.2: competition flags (ORIG2 parallel, depal) force PENDING_REVIEW.
	# v3.3: cognate cross-ref competition flag added.
	pos_ok = getattr(top, 'positional_score', 0.5) >= 0.8
	is_derivative = hasattr(result, 'derivative_of') and result.derivative_of
	has_competing_orig2 = hasattr(result, 'orig2_parallel') and result.orig2_parallel
	has_competing_depal = hasattr(result, 'competing_depal') and result.competing_depal

	# v3.3/v3.4: COGNATE CROSS-REFERENCE EVALUATION
	# v3.4 DIRECTION OF TRANSMISSION:
	# Russian is closer to BOTH originals than English.
	# EN cognate agreement = CONFIRMATION (both degraded forms converge).
	# EN cognate disagreement = NOTE only (English is more degraded).
	# Exception: MODERN_TECH_TERMS where EN→RU direction applies.
	cognate_ref = getattr(result, 'cognate_crossref', None)
	has_cognate_competition = False
	cognate_agrees = False
	cognate_note_only = False # v3.4: downgraded competition
	if cognate_ref and cognate_ref.get('source') == 'EN_PIPELINE':
	cog_root = cognate_ref.get('root_letters', '')
	local_root = top.letters if top else ''
	if cog_root and local_root and cog_root != local_root:
	cog_score = cognate_ref.get('score', 0)
	if cog_score >= SCORE_QUEUE:
	# v3.4: Check direction of transmission
	is_modern_term = (is_russian and
	term.upper() in MODERN_TECH_TERMS)
	if is_russian and not is_modern_term:
	# RU is closer to originals — EN disagreement is
	# informational only, does NOT force PENDING_REVIEW
	cognate_note_only = True
	result.add_log(
	f"v3.4 COGNATE NOTE (not competition): "
	f"{lang_label} → {local_root} (score {score}) vs "
	f"EN cousin '{cognate_ref['en_cousin']}' → {cog_root} "
	f"(EN score {cog_score}). EN is more degraded — "
	f"RU pipeline authoritative."
	)
	else:
	# EN word or modern tech term — full competition
	has_cognate_competition = True
	result.add_log(
	f"v3.3 COGNATE COMPETITION: {lang_label} pipeline "
	f"→ {local_root} (score {score}) vs EN cousin "
	f"'{cognate_ref['en_cousin']}' → {cog_root} "
	f"(score {cog_score})"
	)
	elif cog_root and local_root and cog_root == local_root:
	cognate_agrees = True
	result.add_log(
	f"v3.3 COGNATE AGREES: both {lang_label} and EN cousin "
	f"'{cognate_ref['en_cousin']}' → {cog_root} ✓"
	)

	if not dry_run:
	# ── v3.4: Build common flag suffixes ─────────────────────────
	# SEM_REVIEW: appended to ALL queue entries (engine never checks
	# semantics — every entry needs semantic verification by human)
	sem_suffix = " \| SEM_REVIEW: semantic pathway not verified"

	# COMPOUND suffix: if compound detected, show both root traces
	compound_suffix = ''
	cp = getattr(result, 'compound_parts', None)
	if cp:
	cp_parts = []
	if cp.get('prefix'):
	cp_parts.append(
	f"PREFIX '{cp['prefix']['part']}'→{cp['prefix']['root']}")
	if cp.get('root'):
	rt = cp['root']
	rtag = 'ORIG2' if rt.get('is_orig2') else 'ORIG1'
	cp_parts.append(
	f"ROOT '{rt['part']}'→{rt['root']}({rtag})")
	if cp_parts:
	compound_suffix = (
	f" \| COMPOUND: {cp['label']} — "
	+ ' + '.join(cp_parts))

	# COGNATE NOTE suffix: v3.4 downgraded competition for RU words
	cognate_note_suffix = ''
	if cognate_note_only and cognate_ref:
	cognate_note_suffix = (
	f" \| COGNATE NOTE (EN more degraded): "
	f"'{cognate_ref['en_cousin']}' → "
	f"{cognate_ref['root_letters']} "
	f"(EN score {cognate_ref.get('score', '?')})")

	if is_derivative:
	parent_name, parent_id = result.derivative_of
	result.add_log(
	f"DERIVATIVE: route to A4_DERIVATIVES of {parent_name} (#{parent_id}), "
	f"NOT A1_ENTRIES"
	)
	qid = self.writer.queue_for_oversight(
	entry,
	f"DERIVATIVE of {parent_name} #{parent_id} — write to A4_DERIVATIVES"
	+ sem_suffix + compound_suffix,
	q_result.passed, u_result.passed, f_result.passed
	)
	result.queue_id = qid

	elif has_competing_orig2:
	# v3.2: COMPETING TRACKS — ORIG1 passed but ORIG2 also found.
	# ALWAYS force PENDING_REVIEW for human decision.
	o2 = result.orig2_parallel
	flag = (
	f"COMPETING TRACKS [{lang_label}]: "
	f"ORIG1 ({top.letters}, {top.token_count} tok, score {score}/10) vs "
	f"ORIG2 (Kashgari '{o2['kashgari_translit']}' "
	f"= \"{o2['kashgari_meaning']}\", line {o2['kashgari_line']}, "
	f"ORIG2 score {o2['orig2_score']}/10). "
	f"Human review required — determine correct origin track."
	+ sem_suffix + compound_suffix + cognate_note_suffix
	)
	result.add_log(f"COMPETING TRACKS → forced PENDING [{lang_label}]")
	qid = self.writer.queue_for_oversight(
	entry, flag,
	q_result.passed, u_result.passed, f_result.passed
	)
	result.queue_id = qid

	elif has_competing_depal:
	# v3.2: DEPALATALIZATION COMPETITION — primary root differs from
	# depalatalized root. Force PENDING_REVIEW.
	dc = result.competing_depal
	dc_ops = getattr(dc, 'operations', [])
	flag = (
	f"DEPAL COMPETITION [{lang_label}]: "
	f"primary {top.letters} ({top.token_count} tok, score {score}/10) vs "
	f"depalatalized {dc.letters} ({dc.token_count} tok, ops={dc_ops}). "
	f"Palatalized consonant may mask true root. Human review required."
	+ sem_suffix + compound_suffix + cognate_note_suffix
	)
	result.add_log(f"DEPAL COMPETITION → forced PENDING [{lang_label}]")
	qid = self.writer.queue_for_oversight(
	entry, flag,
	q_result.passed, u_result.passed, f_result.passed
	)
	result.queue_id = qid

	elif has_cognate_competition:
	# v3.3: COGNATE COMPETITION — local pipeline root disagrees with
	# English cognate root. Force PENDING_REVIEW for human decision.
	# (v3.4: only fires for EN words or MODERN_TECH_TERMS now)
	cog = cognate_ref
	cog_root = cog['root_letters']
	cog_score = cog['score']
	flag = (
	f"COGNATE COMPETITION [{lang_label}]: "
	f"{lang_label} pipeline → {top.letters} ({top.token_count} tok, "
	f"score {score}/10) vs EN cousin '{cog['en_cousin']}' "
	f"→ {cog_root} ({cog['token_count']} tok, "
	f"EN score {cog_score}/10, "
	f"chain: {cog.get('phonetic_chain', '—')}). "
	f"Sibling root disagreement — human review required."
	+ sem_suffix + compound_suffix
	)
	result.add_log(f"COGNATE COMPETITION → forced PENDING [{lang_label}]")
	qid = self.writer.queue_for_oversight(
	entry, flag,
	q_result.passed, u_result.passed, f_result.passed
	)
	result.queue_id = qid

	elif (score >= SCORE_AUTO_WRITE and q_result.passed
	and u_result.passed and pos_ok):
	result.add_log(f"Score {score}/10 >= {SCORE_AUTO_WRITE} + positional OK — queued as AUTO-WRITE candidate [{lang_label}]")
	aw_flag = f"Score {score}/10 [{lang_label}] — auto-write candidate. Confirm ROOT_ID and QUR_MEANING before final write."
	if cognate_agrees and cognate_ref:
	aw_flag += f" \| COGNATE CONFIRMS: EN cousin '{cognate_ref['en_cousin']}' agrees → {cognate_ref['root_letters']}"
	aw_flag += sem_suffix + compound_suffix + cognate_note_suffix
	qid = self.writer.queue_for_oversight(
	entry, aw_flag,
	q_result.passed, u_result.passed, f_result.passed
	)
	result.queue_id = qid
	if is_russian:
	self.existing_ru_terms[term.upper()] = -1
	else:
	self.existing_terms[term.upper()] = -1 # prevent re-queuing
	elif SCORE_QUEUE <= score:
	result.add_log(f"Score {score}/10 — queued for oversight")
	pq_flag = f"Score {score}/10 — review required (positional={getattr(top, 'positional_score', '?')})"
	if cognate_agrees and cognate_ref:
	pq_flag += f" \| COGNATE CONFIRMS: EN cousin '{cognate_ref['en_cousin']}' agrees → {cognate_ref['root_letters']}"
	elif cognate_ref and cognate_ref.get('source') == 'EN_PIPELINE':
	pq_flag += (f" \| COGNATE NOTE: EN cousin '{cognate_ref['en_cousin']}' "
	f"→ {cognate_ref['root_letters']} (EN score {cognate_ref.get('score','?')})")
	pq_flag += sem_suffix + compound_suffix + cognate_note_suffix
	qid = self.writer.queue_for_oversight(
	entry, pq_flag,
	q_result.passed, u_result.passed, f_result.passed
	)
	result.queue_id = qid
	else:
	result.add_log(f"Score {score}/10 <= {SCORE_REJECT} — rejected (not queued)")
	else:
	result.add_log("[DRY RUN] No writes performed")

	# 360-DEGREE REPORT
	if not self.skip_reports:
	result.report_path = self.reporter.generate(result)

	# EXPORT QUEUE JSON for Oversight Dashboard
	self.writer.export_queue_json(WORKSPACE_DIR)

	return result

	# ── phrase pipeline ───────────────────────────────────────────────────────

	def _process_phrase(self, raw: str, key_terms: list, dry_run: bool) -> ProcessResult:
	result = ProcessResult(raw, 'phrase')
	result.add_log(f"Phrase: {len(key_terms)} content words → {key_terms}")
	sub_results = []
	for term in key_terms:
	result.add_log(f"\n → Processing term: {term}")
	sub = self._process_single_term(term, 'english_word', dry_run)
	sub_results.append(sub)
	result.cluster_members = sub_results
	result.add_log(f"\nPhrase complete. {len(sub_results)} terms processed.")
	return result

	# ── Qur'anic ref lookup ───────────────────────────────────────────────────

	def _process_quran_ref(self, ref: str) -> ProcessResult:
	result = ProcessResult(ref, 'quran_ref')
	result.add_log(f"Looking up entries for {ref}...")
	try:
	wb = load_workbook(self.master_file, read_only=True, data_only=True)
	ws = wb['A3_QURAN_REFS']
	headers = None
	found = []
	for row in ws.iter_rows(values_only=True):
	if headers is None:
	headers = [str(h).strip() if h else '' for h in row]
	continue
	if not any(row):
	continue
	d = dict(zip(headers, row))
	vr = str(d.get('VERSE_REF', '') or '').strip()
	if ref in vr or vr in ref:
	found.append(d)
	wb.close()
	result.add_log(f"Found {len(found)} entries linked to {ref}")
	for fd in found:
	result.add_log(f" #{fd.get('ENTRY_ID','?')} {fd.get('EN_TERM','?')} "
	f"(root: {fd.get('ROOT_LETTERS','?')})")
	except Exception as e:
	result.add_log(f"Qur'an ref lookup error: {e}")
	return result

	# ── ORIG2 / Kashgari track (v2.2) ──────────────────────────────────────────

	def _try_orig2_track(self, term: str, result: ProcessResult,
	dry_run: bool,
	is_russian: bool = False) -> Optional[ProcessResult]:
	"""
	Attempt ORIG2 (Kashgari/Bitig) attestation after ORIG1 (Q-Gate) fails.

	Protocol (CLAUDE.md §6 two-track gate):
	ROOT_LIST FAIL → Kashgari search → if attested → ORIG2 entry
	Score under Bitig protocol. Route to ENGINE_QUEUE with ORIG2 flag.

	v3.2: language-aware — uses Russian extraction + Latin conversion
	when is_russian=True.

	Returns ProcessResult if ORIG2 found, None if both tracks fail.
	"""
	if self.kashgari_gate is None:
	result.add_log("ORIG2 track disabled (Kashgari corpus not loaded)")
	return None

	# v3.2: language-aware consonant extraction for ORIG2 search
	if is_russian and self.ru_reversal is not None:
	cyrillic_cons = self.ru_reversal.extract_consonants(term)
	if not cyrillic_cons:
	result.add_log("ORIG2: no consonants to search (Russian)")
	return None
	# Convert Cyrillic → Latin for Kashgari search
	latin_skel = self.ru_reversal.to_latin_skeleton(cyrillic_cons)
	consonants = list(latin_skel)
	_, ops, suffix = self.ru_reversal.strip_operations(term)
	else:
	consonants = self.reversal.extract_consonants(term)
	_, ops, suffix = self.reversal.strip_operations(term) # ops for metadata only

	if not consonants:
	result.add_log("ORIG2: no consonants to search")
	return None

	result.add_log(f"ORIG2: searching Kashgari for skeleton '{(''.join(consonants))}'...")
	k_result = self.kashgari_gate.check(term, consonants)

	if not k_result.passed:
	result.add_log(f"ORIG2 FAIL: {k_result.details.get('reason', 'not in Kashgari')}")
	return None

	# ── ORIG2 PASS ──────────────────────────────────────────────────────
	kd = k_result.details
	result.add_log(
	f"ORIG2 PASS: Kashgari attests '{kd['kashgari_translit']}' "
	f"= \"{kd['kashgari_meaning']}\" "
	f"(line {kd['kashgari_line']}, {kd['attestation_type']})"
	)

	# Log B01-B07 warnings
	for w in kd.get('bitig_warnings', []):
	result.add_log(f" ⚠ {w}")

	# Create ORIG2 root candidate
	orig2_root = RootCandidate(
	letters = kd.get('skeleton', ''.join(consonants)),
	token_count = 0, # no Qur'anic tokens — ORIG2
	ar_word = kd.get('kashgari_translit', ''),
	operations = ['ORIG2_BITIG'] + ops,
	)
	orig2_root.positional_score = 0.5 # neutral for ORIG2

	result.confirmed_root = orig2_root
	result.orig2_track = True
	result.orig2_details = kd

	# Score ORIG2 entry
	score = self._score_orig2(k_result, consonants, term)
	orig2_root.score = score
	result.add_log(f"ORIG2 Score: {score}/10")

	# Build entry record (BITIG format — still uses EntryRecord for queue)
	entry = EntryRecord()
	entry.en_term = term.upper()
	entry.ar_word = kd.get('kashgari_translit', '')
	entry.root_letters = kd.get('skeleton', '')
	entry.score = score
	entry.pattern = 'A' # Hidden — English speaker unaware of Bitig origin
	entry.inversion_type = 'HIDDEN'
	entry.source_form = kd.get('kashgari_translit', '')
	entry.phonetic_chain = f"ORIG2: {kd.get('kashgari_headword', '')} → {term}"
	entry.foundation_ref = (
	f"ORIG2 BITIG: Kashgari Dīwān line {kd.get('kashgari_line', '?')} \| "
	f"Meaning: {kd.get('kashgari_meaning', '?')} \| "
	f"Attestation: {kd.get('attestation_type', '?')} \| "
	f"Hits: {kd.get('all_hits', 0)}"
	)
	result.entry_record = entry

	# Queue for oversight — ORIG2 ALWAYS needs human review
	if not dry_run:
	bitig_warns = kd.get('bitig_warnings', [])
	flag = (
	f"ORIG2 candidate — Kashgari '{kd.get('kashgari_translit', '')}' "
	f"({kd.get('attestation_type', '')}, score {score}/10)"
	)
	if bitig_warns:
	flag += f" \| Warnings: {'; '.join(bitig_warns)}"

	# v3.3b: Append cognate intelligence if available
	cog_ref = getattr(result, 'cognate_crossref', None)
	if cog_ref and cog_ref.get('source') == 'EN_PIPELINE':
	flag += (
	f" \| COGNATE: EN cousin '{cog_ref['en_cousin']}' → "
	f"{cog_ref['root_letters']} ({cog_ref.get('token_count', 0)} tok, "
	f"EN score {cog_ref.get('score', '?')}/10, "
	f"chain: {cog_ref.get('phonetic_chain', '—')})"
	)

	qid = self.writer.queue_for_oversight(
	entry, flag,
	False, # Q-gate (ORIG1) = FAIL
	False, # U-gate N/A for ORIG2
	True # F-gate = PASS (auto)
	)
	result.queue_id = qid
	result.add_log(f"ORIG2 queued for oversight: {qid}")

	# Generate report
	if not self.skip_reports:
	result.report_path = self.reporter.generate(result)
	self.writer.export_queue_json(WORKSPACE_DIR)
	return result

	def _score_orig2(self, kashgari_result: GateResult,
	consonants: list, en_word: str) -> int:
	"""
	Score ORIG2 entry. Parallel to Scorer but Kashgari-based.

	v2.3 FIX — meaning_match phonetic verification:
	meaning_match alone (no consonant overlap) is R10 violation.
	WIFE→'kis' scores LOW because W-F ≠ K-S (zero consonant overlap).
	meaning_match now REQUIRES partial skeleton overlap to score above 5.

	Max score breakdown:
	Kashgari attestation (+3)
	Match quality: skeleton(+2) or meaning_with_overlap(+1) or meaning_only(+0)
	Multiple attestations (+1)
	No B01-B07 warnings (+2)
	Consonant coverage (+2)
	Total possible: 10
	"""
	score = 0
	kd = kashgari_result.details

	# Kashgari attestation (+3) — equivalent to Q-gate for ORIG1
	score += 3

	# Match quality (+2/+1/+0) — v2.3: meaning_match now verified
	att_type = kd.get('attestation_type', '')
	if att_type == 'skeleton_match':
	score += 2
	elif att_type == 'meaning_match':
	# v2.3: check if ANY consonants overlap between English word
	# and the Kashgari transliteration. No overlap = R10 violation.
	k_translit = kd.get('kashgari_translit', '')
	en_cons = set(c for c in en_word.lower() if c.isalpha() and c not in 'aeiou')
	# Bitig consonant equivalences for overlap check
	equiv = {'q': 'kgc', 'k': 'qgc', 'g': 'qk', 'p': 'b', 'b': 'p',
	'c': 'sjz', 's': 'cjz', 'z': 'cs', 'j': 'csz',
	't': 'd', 'd': 't', 'f': 'pv', 'v': 'fpw', 'w': 'v'}
	k_cons = set(c for c in k_translit.lower() if c.isalpha() and c not in 'aeiouüöıäəāēīōū')
	# Check direct or equivalent overlap
	overlap = 0
	for ec in en_cons:
	if ec in k_cons:
	overlap += 1
	elif any(eq in k_cons for eq in equiv.get(ec, '')):
	overlap += 1
	if overlap >= 2:
	score += 1 # meaning_match WITH phonetic overlap — acceptable
	else:
	score += 0 # meaning_match WITHOUT overlap — R10 violation, no bonus
	kd['meaning_only_warning'] = (
	f"meaning_match but only {overlap} consonant overlap "
	f"(EN={sorted(en_cons)}, Kashgari={sorted(k_cons)})"
	)

	# Multiple attestations (+1)
	if kd.get('all_hits', 0) > 1:
	score += 1

	# No B01-B07 warnings (+2)
	warnings = kd.get('bitig_warnings', [])
	if not warnings:
	score += 2
	elif len(warnings) == 1:
	score += 1

	# Consonant coverage (+2)
	skel_len = len(kd.get('skeleton', ''))
	word_cons = len([c for c in en_word.lower()
	if c.isalpha() and c not in 'aeiou'])
	if att_type == 'skeleton_match' and skel_len > 0 and abs(word_cons - skel_len) <= 1:
	score += 2
	elif att_type == 'skeleton_match' and skel_len > 0 and abs(word_cons - skel_len) <= 2:
	score += 1
	elif att_type == 'meaning_match':
	# meaning_match coverage capped at +1 regardless of skeleton
	if skel_len > 0 and abs(word_cons - skel_len) <= 1:
	score += 1

	return min(score, 10)

	# ── v3.2: PARALLEL ORIG2 CHECK ─────────────────────────────────────────────

	def _try_orig2_parallel(self, term: str, is_russian: bool) -> Optional[dict]:
	"""
	Parallel ORIG2 check — runs ALONGSIDE ORIG1, not as fallback.

	v3.2: For words where ORIG1 passes but the word might actually be
	ORIG2 (Bitig/Turkic). Especially critical for Russian words where
	>50% are Bitig-corridor.

	IMPORTANT: This parallel check searches ONLY KNOWN_ORIG2_ENTRIES
	(manually verified Kashgari entries), NOT the full KashgariIndex.
	The full index is too broad and produces false positives.
	The full Kashgari search is reserved for the ORIG2 fallback path
	(when ORIG1 fails completely).

	Converts Russian consonants to Latin for Kashgari search.
	Returns dict with ORIG2 details if found, None otherwise.
	"""
	if self.kashgari_gate is None:
	return None

	# Extract consonants and convert to Latin for Kashgari search
	if is_russian:
	cyrillic_cons = self.ru_reversal.extract_consonants(term)
	if not cyrillic_cons:
	return None
	# Generate all Latin skeleton variants (main + voicing + suffix-stripped)
	latin_variants = self.ru_reversal.to_latin_skeleton_variants(cyrillic_cons)
	else:
	consonants = self.reversal.extract_consonants(term)
	if not consonants:
	return None
	# For English: also generate suffix-stripped and voicing variants
	main_skel = ''.join(consonants).lower()
	latin_variants = [main_skel]
	# Add suffix-stripped (1 consonant)
	if len(main_skel) >= 3:
	latin_variants.append(main_skel[:-1])

	# Search ONLY KNOWN_ORIG2_ENTRIES (not the full KashgariIndex)
	# This prevents false positives from broad skeleton matching.
	known_entries = KashgariGate.KNOWN_ORIG2_ENTRIES
	for latin_skel in latin_variants:
	hits = known_entries.get(latin_skel, [])
	if hits:
	best = hits[0]
	# Build a GateResult-like dict for scoring
	orig2_score = 3 + 2 # Kashgari attested (+3) + skeleton_match (+2)
	if len(hits) > 1:
	orig2_score += 1 # multiple hits
	# Check consonant coverage
	skel_len = len(best.get('skeleton', ''))
	word_cons = len(latin_skel)
	if skel_len > 0 and abs(word_cons - skel_len) <= 1:
	orig2_score += 2
	elif skel_len > 0 and abs(word_cons - skel_len) <= 2:
	orig2_score += 1
	orig2_score = min(orig2_score, 10)

	return {
	'passed': True,
	'kashgari_translit': best.get('translit', ''),
	'kashgari_meaning': best.get('meaning', ''),
	'kashgari_line': best.get('line', 0),
	'attestation_type': 'known_orig2_match',
	'skeleton': best.get('skeleton', latin_skel),
	'orig2_score': orig2_score,
	'all_hits': len(hits),
	'bitig_warnings': [],
	'search_skeleton': latin_skel,
	}
	return None

	# ── v3.3: COGNATE CROSS-REFERENCING ───────────────────────────────────────
	def _try_cognate_crossref(self, term: str, is_russian: bool) -> Optional[dict]:
	"""
	Sibling Database Principle — automated.

	When processing a Russian word, check COGNATE_CROSSREF_RU_TO_EN for
	a known English equivalent. If found, run the ENGLISH form through
	the English PhoneticReversal → Q → U → Scorer pipeline.

	Returns dict with:
	- en_cousin: the English word used
	- root_letters: root found by English pipeline
	- token_count: Q-gate tokens
	- score: English pipeline score
	- phonetic_chain: English U-gate chain
	- candidates: full candidate list from English reversal
	Or None if no cognate found or English pipeline fails.

	Also works EN→RU: when processing English, checks if there's a
	Russian entry already confirmed.
	"""
	if is_russian:
	en_cousin = COGNATE_CROSSREF_RU_TO_EN.get(term.lower())
	if not en_cousin:
	return None

	# Check if English cousin already in lattice (strongest signal)
	existing_en = self.existing_terms.get(en_cousin.upper())
	if existing_en:
	return {
	'en_cousin': en_cousin,
	'source': 'LATTICE_ENTRY',
	'entry_id': existing_en,
	'note': f'{en_cousin} already in A1_ENTRIES #{existing_en}',
	}

	# Process English cousin through English pipeline
	# v3.3b: Try the full word first, then prefix-stripped variants,
	# then suffix-stripped variants (-ent/-ent which main pipeline skips).
	# DESCENT → DE-SCENT → S-C-N → سَكَنَ ; TALENT → TAL-ENT → T-L-N
	LATIN_PREFIXES = ['de', 're', 'in', 'con', 'dis', 'ex', 'pre',
	'pro', 'ad', 'com', 'per', 'sub', 'trans', 'un']
	COGNATE_EXTRA_SUFFIXES = ['ent', 'ment', 'ure', 'ude',
	'ance', 'ence', 'ant']

	try:
	# Build list of word variants to try: (word_form, extra_ops, label)
	variants = [(en_cousin, [], 'FULL')]

	# ── Prefix-stripped variants ─────────────────────────────────
	lw = en_cousin.lower()
	for pfx in LATIN_PREFIXES:
	if lw.startswith(pfx) and len(lw) - len(pfx) >= 3:
	stem = en_cousin[len(pfx):]
	variants.append((stem, [f'OP_PREFIX({pfx}-)'], f'PREFIX_{pfx}'))

	# ── Suffix-stripped variants (for -ent/-ence not in main list) ─
	for sfx in COGNATE_EXTRA_SUFFIXES:
	if lw.endswith(sfx) and len(lw) - len(sfx) >= 3:
	stem = en_cousin[:-len(sfx)]
	variants.append((stem, [f'OP_SUFFIX(-{sfx})'], f'SUFFIX_{sfx}'))

	# ── Prefix+suffix combined (e.g. DE-SCEN-T with -t suffix) ──
	for pfx in LATIN_PREFIXES:
	if lw.startswith(pfx):
	inner = en_cousin[len(pfx):]
	for sfx in COGNATE_EXTRA_SUFFIXES:
	if inner.lower().endswith(sfx) and len(inner) - len(sfx) >= 2:
	stem = inner[:-len(sfx)]
	if len(stem) >= 2:
	variants.append((stem,
	[f'OP_PREFIX({pfx}-)', f'OP_SUFFIX(-{sfx})'],
	f'BOTH_{pfx}_{sfx}'))

	# ── Score all variants, keep the absolute best ──────────────
	best_score = -1
	best_cand = None
	best_q = None
	best_u = None
	best_variant_label = ''
	best_ops = []
	best_word_used = en_cousin

	for (word_form, extra_ops, label) in variants:
	if len(word_form) < 2:
	continue
	cands = self.reversal.reverse(word_form)
	if not cands:
	continue
	for ci in range(min(3, len(cands))):
	cand = cands[ci]
	cq = self.q_gate.check(cand.letters)
	if not cq.passed:
	continue
	cu = self.u_gate.verify(word_form, cand.letters,
	cand.operations + extra_ops)
	cf = self.f_gate.assign(word_form, cand.letters,
	cu.details.get('phonetic_chain', ''))
	cs, cb = self.scorer.score(cand, word_form, cq, cu, cf)
	if cs > best_score:
	best_score = cs
	best_cand = cand
	best_q = cq
	best_u = cu
	best_variant_label = label
	best_ops = extra_ops + (cand.operations or [])
	best_word_used = word_form

	if best_cand is None or best_score < SCORE_QUEUE:
	return None

	return {
	'en_cousin': en_cousin,
	'source': 'EN_PIPELINE',
	'root_letters': best_cand.letters,
	'token_count': best_cand.token_count,
	'score': best_score,
	'phonetic_chain': best_u.details.get('phonetic_chain', '') if best_u else '',
	'ar_word': best_cand.ar_word or '',
	'operations': best_ops,
	'positional': getattr(best_cand, 'positional_score', 0.5),
	'variant_used': best_variant_label,
	'word_form_used': best_word_used,
	}
	except Exception as e:
	return {'en_cousin': en_cousin, 'source': 'ERROR', 'error': str(e)}

	else:
	# EN→RU: check if any Russian siblings exist
	ru_siblings = COGNATE_CROSSREF_EN_TO_RU.get(term.upper(), [])
	if not ru_siblings:
	return None
	for sib in ru_siblings:
	existing_ru = self.existing_ru_terms.get(sib.upper())
	if existing_ru:
	return {
	'ru_sibling': sib,
	'source': 'LATTICE_ENTRY',
	'entry_id': existing_ru,
	'note': f'Russian sibling {sib.upper()} in A1_ЗАПИСИ #{existing_ru}',
	}
	return None


	# ═══════════════════════════════════════════════════════════════════════════════
	# INTERACTIVE CLI — main()
	# ═══════════════════════════════════════════════════════════════════════════════

	def main():
	print("""
	╔══════════════════════════════════════════════════════════════╗
	║ USLaP Autonomous Engine v1.0 ║
	║ Unified Source Language Proof ║
	║ بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ ║
	╚══════════════════════════════════════════════════════════════╝

	Menu:
	1. Process word/root/ratio/phrase (full 360-degree analysis + lattice placement)
	2. Dry run (analysis only, no writes)
	3. Export ENGINE_QUEUE to JSON (for Oversight Dashboard)
	4. Quit

	Or type your query directly (no menu number needed).
	""")

	master = sys.argv[1] if len(sys.argv) > 1 else MASTER_FILE
	if not os.path.exists(master):
	print(f"ERROR: Master file not found:\n {master}")
	sys.exit(1)

	try:
	engine = USLaPEngine(master_file=master)
	except Exception as e:
	print(f"Engine initialisation failed: {e}")
	sys.exit(1)

	while True:
	try:
	user_input = input("\nQuery > ").strip()
	except (EOFError, KeyboardInterrupt):
	print("\nExiting.")
	break

	if not user_input:
	continue

	if user_input == '1':
	term = input("Enter query: ").strip()
	if term:
	engine.process(term, dry_run=False)

	elif user_input == '2':
	term = input("Enter query (DRY RUN): ").strip()
	if term:
	engine.process(term, dry_run=True)

	elif user_input == '3':
	engine.writer.export_queue_json(WORKSPACE_DIR)

	elif user_input in ('4', 'quit', 'exit', 'q'):
	print("Exiting engine.")
	break

	else:
	# Direct query — no menu number
	engine.process(user_input, dry_run=False)


	if __name__ == '__main__':
	main()