uslap-query / Code_files /amr_basar.py
uslap's picture
Upload folder using huggingface_hub
7cc8e29 verified
Raw
History Blame Contribute Delete
35.7 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
أَمْر بَصَر — PERCEPTION ENGINE
بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ
Root: ب-ص-ر — to see, perceive, discern
Q67:4 يَنقَلِبْ إِلَيْكَ ٱلْبَصَرُ — the vision returns to you
The بَصَر perceives. It understands what the user MEANS.
Input layer of the أَمْر AI. Takes raw user input and produces
structured intent that the عَقْل can reason about and the نُطْق can articulate.
Functions:
perceive(user_input) → structured intent + parameters
decompose(complex_query) → ordered sub-queries
detect_root(word, lang) → root_id via DB + shift reversal
track_context(history) → current focus root/topic
classify_input(text) → input type classification
"""
import sys
import os
import re
from collections import defaultdict
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
try:
from uslap_db_connect import connect as _connect
_HAS_DB = True
except ImportError:
_HAS_DB = False
try:
from amr_aql import (
deduce_meaning, reverse_trace, expand_root, relate_roots,
hypothesise, verify_candidate, extract_consonants
)
_HAS_AQL = True
except ImportError:
_HAS_AQL = False
from amr_alphabet import ABJAD
# ═══════════════════════════════════════════════════════════════════════
# INPUT TYPE CLASSIFICATION
# ═══════════════════════════════════════════════════════════════════════
# Intent patterns — what the user is asking for
INTENT_PATTERNS = {
# Root operations
'explain_root': [
r'explain\s+(?:root\s+)?([A-Z]\d+|[\u0621-\u064A][\-\u0621-\u064A]+)',
r'what\s+(?:does|is)\s+(?:root\s+)?([A-Z]\d+|[\u0621-\u064A][\-\u0621-\u064A]+)',
r'tell\s+me\s+about\s+(?:root\s+)?([A-Z]\d+|[\u0621-\u064A][\-\u0621-\u064A]+)',
],
'trace_word': [
r'trace\s+(?:the\s+word\s+)?["\']?(\w+)["\']?',
r'where\s+does\s+["\']?(\w+)["\']?\s+come\s+from',
r'root\s+of\s+["\']?(\w+)["\']?',
r'find\s+root\s+(?:for\s+)?["\']?(\w+)["\']?',
],
'compare_roots': [
r'compare\s+([\u0621-\u064A][\-\u0621-\u064A]+)\s+(?:and|vs|with|to)\s+([\u0621-\u064A][\-\u0621-\u064A]+)',
r'relate\s+([\u0621-\u064A][\-\u0621-\u064A]+)\s+(?:and|to|with)\s+([\u0621-\u064A][\-\u0621-\u064A]+)',
r'([\u0621-\u064A][\-\u0621-\u064A]+)\s+vs\s+([\u0621-\u064A][\-\u0621-\u064A]+)',
],
'search_lattice': [
r'search\s+(?:for\s+)?["\']?(.+?)["\']?$',
r'find\s+(?:entry\s+)?["\']?(.+?)["\']?$',
r'look\s+up\s+["\']?(.+?)["\']?$',
],
'get_entry': [
r'(?:show|get|display)\s+entry\s+([A-Z]{2}\d+)',
r'entry\s+([A-Z]{2}\d+)',
],
'lattice_state': [
r'(?:show\s+)?(?:lattice\s+)?state',
r'(?:show\s+)?(?:lattice\s+)?summary',
r'how\s+many',
r'current\s+state',
],
'report': [
r'(?:generate|create|make)\s+(?:a\s+)?report\s+(?:for\s+)?([A-Z]\d+|[\u0621-\u064A][\-\u0621-\u064A]+)',
r'intelligence\s+(?:on|for|about)\s+([A-Z]\d+|[\u0621-\u064A][\-\u0621-\u064A]+)',
],
# Computational fabric — full cross-domain root scan
'fabric_root': [
r'fabric\s+(.+)',
r'computational[_\s]?fabric\s+(.+)',
],
# Domain reasoning intents
'explain_body': [
r'(?:what\s+(?:does|is)\s+)?(?:root\s+)?(.+?)\s+(?:in|for)\s+(?:the\s+)?body',
r'body\s+(?:of|for)\s+(.+)',
r'(?:which|what)\s+(?:root|organ|system)\s+governs?\s+(?:the\s+)?(.+)',
r'(?:heal|cure|therapy)\s+(?:for\s+)?(.+)',
],
'body_system': [
r'(?:show|explain|describe)\s+(?:the\s+)?(?:body\s+)?(heart|nafs|sensory|skeletal|nutrition|prayer|lifecycle|therapy|architecture|diagnostic)\s*(?:system)?',
r'(heart|nafs|sensory|skeletal|nutrition|prayer|lifecycle)\s+(?:system|lattice|map)',
],
'explain_formula': [
r'(?:what\s+)?formula(?:s)?\s+(?:for|of|using)\s+(.+)',
r'(?:show|explain)\s+formula\s+(.+)',
r'ratio(?:s)?\s+(?:for|of|in)\s+(.+)',
],
'explain_history': [
r'(?:when|how)\s+was\s+(.+?)\s+deployed',
r'timeline\s+(?:of|for)\s+(.+)',
r'(?:show|explain)\s+era\s+(\d+)',
r'(?:deployment|history)\s+(?:of|for)\s+(.+)',
],
'naming_op': [
r'(?:how\s+was\s+)?(.+?)\s+renamed',
r'naming\s+(?:operation|inversion)\s+(?:of|for)\s+(.+)',
r'(?:original|real)\s+name\s+(?:of|for)\s+(.+)',
],
'explain_intel': [
r'(?:what\s+)?(?:intelligence|intel)\s+(?:on|for|about)?\s*(.+)',
r'confession(?:s)?\s+(?:about|for|on)?\s*(.+)',
r'extraction\s+(?:of|for|in)?\s*(.+)',
r'(?:who|what)\s+confessed\s+(?:about\s+)?(.+)',
],
'batch_operation': [
r'batch\s+(.+)',
r'process\s+all\s+(.+)',
],
# QUF operations
'quf_validate': [
r'quf\s+(?:validate\s+)?(?:entry\s+)?(\d+)',
r'validate\s+(?:entry\s+)?(\d+)',
r'quf\s+(\w+)\s+(\d+)',
r'quf\s+([\w_]+)',
],
'quf_status': [
r'quf\s+status',
r'quf\s+coverage',
r'coverage',
],
# Detection patterns
'explain_detection': [
r'(?:what\s+is\s+)?(?:detection\s+pattern\s+)?(DP\d+)',
r'(?:explain\s+)?(DP\d+)',
r'detection\s+(?:pattern\s+)?(.+)',
],
# Keywords
'explain_keyword': [
r'keyword\s+([\u0621-\u064A]+)',
r'(?:explain\s+)?keyword\s+(\w+)',
],
# Bitig tasrif MUST come before tasrif (longer prefix match)
'bitig_tasrif': [
r'bitig\s+tasrif\s+status',
r'bitig\s+tasrif\s+pattern\s+(\w+)',
r'bitig\s+tasrif\s+harmony\s+(.+)',
r'bitig\s+tasrif\s+compound\s+(.+)',
r'bitig\s+tasrif\s+analyze\s+(.+)',
r'bitig\s+tasrif\s+(\w+)',
],
# AA Tasrif (must NOT match "bitig tasrif")
'tasrif': [
r'(?<!bitig\s)tasrif\s+status',
r'(?<!bitig\s)tasrif\s+broken_plurals',
r'(?<!bitig\s)tasrif\s+pattern\s+(\w+)',
r'(?<!bitig\s)tasrif\s+([\u0621-\u064A][\-\u0621-\u064A]+)',
r'(?<!bitig\s)tasrif\s+(\w[\-\w]+)',
],
}
def classify_input(text):
"""Classify user input into an intent type.
Args:
text: raw user input string
Returns:
dict with:
intent: intent name
params: extracted parameters
confidence: HIGH/MEDIUM/LOW
"""
text = text.strip()
# Direct root input (just AA letters with hyphens)
if re.match(r'^[\u0621-\u064A][\-\u0621-\u064A]+$', text):
return {
'intent': 'explain_root',
'params': {'root_letters': text},
'confidence': 'HIGH',
}
# Direct root_id input
if re.match(r'^[RT]\d+$', text):
return {
'intent': 'explain_root',
'params': {'root_id': text},
'confidence': 'HIGH',
}
# Direct entry_id input
if re.match(r'^(EN|RU|FA|EU|BI|LA|UZ)\d+$', text):
return {
'intent': 'get_entry',
'params': {'entry_id': text},
'confidence': 'HIGH',
}
# Try pattern matching
for intent, patterns in INTENT_PATTERNS.items():
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
groups = match.groups()
params = {}
if intent == 'compare_roots':
params = {'root_a': groups[0], 'root_b': groups[1]}
elif groups:
params = {'query': groups[0]}
return {
'intent': intent,
'params': params,
'confidence': 'HIGH',
}
# Fallback: if it's a single word, try tracing it
if re.match(r'^[a-zA-Z]+$', text):
return {
'intent': 'trace_word',
'params': {'word': text.lower(), 'language': 'en'},
'confidence': 'MEDIUM',
}
# Single Cyrillic word
if re.match(r'^[а-яА-ЯёЁ]+$', text):
return {
'intent': 'trace_word',
'params': {'word': text.lower(), 'language': 'ru'},
'confidence': 'MEDIUM',
}
# Arabic word (not root format)
if re.match(r'^[\u0621-\u064A\u0640-\u065F]+$', text):
return {
'intent': 'search_lattice',
'params': {'query': text},
'confidence': 'MEDIUM',
}
# Fallback: general search
return {
'intent': 'search_lattice',
'params': {'query': text},
'confidence': 'LOW',
}
# ═══════════════════════════════════════════════════════════════════════
# PERCEIVE — main perception function
# ═══════════════════════════════════════════════════════════════════════
def perceive(user_input):
"""Understand what the user MEANS from their input.
This is the primary entry point for the بَصَر.
Args:
user_input: raw text from user
Returns:
dict with:
intent: what the user wants
params: extracted parameters
confidence: HIGH/MEDIUM/LOW
enriched: additional context from DB
sub_queries: if decomposed into parts
"""
classification = classify_input(user_input)
result = {
'raw_input': user_input,
'intent': classification['intent'],
'params': classification['params'],
'confidence': classification['confidence'],
'enriched': {},
'sub_queries': [],
}
# Enrich with DB context
_enrich(result)
return result
def _enrich(result):
"""Add DB context to a classified input."""
if not _HAS_DB:
return
intent = result['intent']
params = result['params']
conn = _connect()
try:
if intent == 'explain_root':
root_ref = params.get('root_id') or params.get('root_letters') or params.get('query')
if root_ref:
# Check if root exists
row = None
if root_ref.startswith('R') or root_ref.startswith('T'):
row = conn.execute(
"SELECT root_id, root_letters, quran_tokens, primary_meaning FROM roots WHERE root_id = ?",
(root_ref,)
).fetchone()
else:
row = conn.execute(
"SELECT root_id, root_letters, quran_tokens, primary_meaning FROM roots WHERE root_letters = ?",
(root_ref,)
).fetchone()
if row:
result['enriched'] = {
'root_found': True,
'root_id': row['root_id'],
'root_letters': row['root_letters'],
'quran_tokens': row['quran_tokens'],
'primary_meaning': row['primary_meaning'],
}
else:
result['enriched'] = {'root_found': False}
elif intent == 'trace_word':
word = params.get('word') or params.get('query', '')
lang = params.get('language', 'en')
# Check if word already exists as an entry
if lang == 'en':
row = conn.execute(
"SELECT entry_id, root_id, en_term FROM entries WHERE LOWER(en_term) = ? LIMIT 1",
(word.lower(),)
).fetchone()
elif lang == 'ru':
row = conn.execute(
"SELECT entry_id, root_id, ru_term FROM entries WHERE LOWER(ru_term) = ? LIMIT 1",
(word.lower(),)
).fetchone()
else:
row = None
if row:
result['enriched'] = {
'existing_entry': True,
'entry_id': row['entry_id'],
'root_id': row['root_id'],
}
else:
result['enriched'] = {'existing_entry': False}
elif intent == 'get_entry':
entry_id = params.get('entry_id') or params.get('query', '')
row = conn.execute(
"SELECT entry_id, en_term, root_id, root_letters FROM entries WHERE entry_id = ?",
(entry_id,)
).fetchone()
if row:
result['enriched'] = {
'entry_found': True,
'entry_id': row['entry_id'],
'en_term': row['en_term'],
'root_id': row['root_id'],
'root_letters': row['root_letters'],
}
else:
result['enriched'] = {'entry_found': False}
elif intent == 'search_lattice':
query = params.get('query', '')
# Quick search across entries
hits = conn.execute(
"SELECT entry_id, en_term, root_id FROM entries "
"WHERE LOWER(en_term) LIKE ? OR LOWER(ru_term) LIKE ? "
"OR LOWER(fa_term) LIKE ? LIMIT 5",
(f'%{query.lower()}%', f'%{query.lower()}%', f'%{query.lower()}%')
).fetchall()
result['enriched'] = {
'hit_count': len(hits),
'hits': [dict(h) for h in hits],
}
except Exception:
pass
finally:
conn.close()
# ═══════════════════════════════════════════════════════════════════════
# DECOMPOSE — break complex queries into sub-queries
# ═══════════════════════════════════════════════════════════════════════
def decompose(complex_query):
"""Break a complex query into ordered sub-queries.
Examples:
"trace 'cover' and compare it with 'market'" →
[trace_word('cover'), trace_word('market'), compare_roots(...)]
"explain ك-ف-ر and show all European downstream" →
[explain_root('ك-ف-ر'), search_european(root_id)]
Args:
complex_query: user's full query text
Returns:
list of sub-query dicts, each with intent + params
"""
sub_queries = []
# Split on 'and', 'then', 'also', commas
parts = re.split(r'\s+(?:and|then|also|,)\s+', complex_query, flags=re.IGNORECASE)
for part in parts:
part = part.strip()
if part:
classification = classify_input(part)
sub_queries.append(classification)
# If only one part and it's complex, try to detect compound intents
if len(sub_queries) == 1 and sub_queries[0]['confidence'] == 'LOW':
# Try word-by-word
words = complex_query.split()
# Look for multiple root/word references
roots_found = []
words_found = []
for w in words:
if re.match(r'^[\u0621-\u064A][\-\u0621-\u064A]+$', w):
roots_found.append(w)
elif re.match(r'^[RT]\d+$', w):
roots_found.append(w)
elif re.match(r'^[a-zA-Z]{2,}$', w) and w.lower() not in (
'the', 'and', 'or', 'is', 'what', 'how', 'why', 'show',
'get', 'find', 'trace', 'compare', 'explain', 'search'
):
words_found.append(w)
if len(roots_found) == 2:
sub_queries = [{
'intent': 'compare_roots',
'params': {'root_a': roots_found[0], 'root_b': roots_found[1]},
'confidence': 'MEDIUM',
}]
elif len(roots_found) == 1:
sub_queries = [{
'intent': 'explain_root',
'params': {'root_letters': roots_found[0]},
'confidence': 'MEDIUM',
}]
return sub_queries
# ═══════════════════════════════════════════════════════════════════════
# DETECT ROOT — find root for any word in any language
# ═══════════════════════════════════════════════════════════════════════
def detect_root(word, language='auto'):
"""Detect the AA root of any word in any language.
Pipeline:
1. Auto-detect language if needed
2. Check DB for existing entry
3. If not found, run reverse shift via عَقْل
4. Return best candidate with provenance
Args:
word: input word in any language
language: 'en', 'ru', 'fa', 'auto' (auto-detect)
Returns:
dict with:
word, language, root_id, root_letters, confidence,
source (DB or COMPUTED), shift_chain
"""
# Auto-detect language
if language == 'auto':
language = _detect_language(word)
result = {
'word': word,
'language': language,
'root_id': None,
'root_letters': None,
'confidence': None,
'source': None,
'shift_chain': [],
}
# Step 1: Check DB for existing entry
if _HAS_DB:
conn = _connect()
row = None
if language == 'en':
row = conn.execute(
"SELECT entry_id, root_id, root_letters, phonetic_chain FROM entries "
"WHERE LOWER(en_term) = ? LIMIT 1",
(word.lower(),)
).fetchone()
elif language == 'ru':
row = conn.execute(
"SELECT entry_id, root_id, root_letters, phonetic_chain FROM entries "
"WHERE LOWER(ru_term) = ? LIMIT 1",
(word.lower(),)
).fetchone()
elif language == 'fa':
row = conn.execute(
"SELECT entry_id, root_id, root_letters, phonetic_chain FROM entries "
"WHERE LOWER(fa_term) = ? LIMIT 1",
(word.lower(),)
).fetchone()
# Check European
if not row:
row = conn.execute(
"SELECT entry_id, root_id FROM european_a1_entries "
"WHERE LOWER(term) = ? LIMIT 1",
(word.lower(),)
).fetchone()
# Check Bitig
if not row:
row = conn.execute(
"SELECT entry_id, root_id FROM bitig_a1_entries "
"WHERE LOWER(term) = ? LIMIT 1",
(word.lower(),)
).fetchone()
# Check Uzbek
if not row:
row = conn.execute(
"SELECT id, aa_root_id FROM uzbek_vocabulary "
"WHERE LOWER(latin_form) = ? OR LOWER(cyrillic_form) = ? LIMIT 1",
(word.lower(), word.lower())
).fetchone()
if row:
result['root_id'] = row['aa_root_id']
result['source'] = 'DB_UZBEK'
# Get root letters
if result['root_id']:
root_row = conn.execute(
"SELECT root_letters FROM roots WHERE root_id = ?",
(result['root_id'],)
).fetchone()
if root_row:
result['root_letters'] = root_row['root_letters']
result['confidence'] = 'HIGH'
conn.close()
return result
if row:
row = dict(row)
result['root_id'] = row['root_id']
result['root_letters'] = row.get('root_letters', '')
result['source'] = 'DB'
result['confidence'] = 'HIGH'
if row.get('phonetic_chain'):
result['shift_chain'] = row['phonetic_chain'].split(',')
# Get root_letters if we have root_id but not letters
if result['root_id'] and not result['root_letters']:
root_row = conn.execute(
"SELECT root_letters FROM roots WHERE root_id = ?",
(result['root_id'],)
).fetchone()
if root_row:
result['root_letters'] = root_row['root_letters']
conn.close()
return result
conn.close()
# Step 2: Run reverse shift via عَقْل
if _HAS_AQL:
candidates = hypothesise(word, language)
if candidates:
top = candidates[0]
result['root_letters'] = top['root_letters']
result['root_id'] = top.get('root_id')
result['source'] = 'COMPUTED'
result['shift_chain'] = top['shift_chain']
if top.get('verified'):
result['confidence'] = 'HIGH' if top.get('quranic_tokens', 0) > 50 else 'MEDIUM'
else:
result['confidence'] = 'LOW'
return result
def _detect_language(word):
"""Auto-detect language from script."""
if re.match(r'^[\u0621-\u064A\u0640-\u065F]+$', word):
return 'ar'
if re.match(r'^[а-яА-ЯёЁ]+$', word):
return 'ru'
if re.match(r'^[\u0600-\u06FF]+$', word):
return 'fa' # Could also be Arabic — FA has same script range
return 'en'
# ═══════════════════════════════════════════════════════════════════════
# CONTEXT TRACKER — maintain focus across a session
# ═══════════════════════════════════════════════════════════════════════
class ContextTracker:
"""Tracks the user's focus root/topic across a session.
Maintains:
- Current focus root(s)
- Recent queries
- Related roots discovered
- Pending operations
"""
def __init__(self):
self.focus_roots = [] # stack of root_ids/letters currently in focus
self.recent_queries = [] # last N queries
self.related_roots = set() # roots discovered during session
self.pending_ops = [] # operations waiting for user confirmation
self.max_history = 20
def update(self, perception_result):
"""Update context with a new perception result.
Args:
perception_result: dict from perceive()
"""
self.recent_queries.append({
'input': perception_result['raw_input'],
'intent': perception_result['intent'],
'params': perception_result['params'],
})
if len(self.recent_queries) > self.max_history:
self.recent_queries.pop(0)
# Update focus roots
enriched = perception_result.get('enriched', {})
root_id = enriched.get('root_id')
root_letters = enriched.get('root_letters')
if root_id:
if root_id not in self.focus_roots:
self.focus_roots.append(root_id)
if len(self.focus_roots) > 5:
self.focus_roots.pop(0)
if root_letters:
self.related_roots.add(root_letters)
def get_current_focus(self):
"""Get the current focus root."""
return self.focus_roots[-1] if self.focus_roots else None
def get_context_summary(self):
"""Get a summary of current context."""
return {
'focus_root': self.get_current_focus(),
'focus_history': list(self.focus_roots),
'query_count': len(self.recent_queries),
'related_roots': list(self.related_roots),
'pending_ops': len(self.pending_ops),
}
def suggest_next(self):
"""Suggest what the user might want to do next.
Based on current context, suggest relevant follow-up operations.
"""
suggestions = []
focus = self.get_current_focus()
if focus:
suggestions.append(f"expand {focus} — view full downstream tree")
suggestions.append(f"report {focus} — generate intelligence report")
if len(self.focus_roots) >= 2:
suggestions.append(
f"compare {self.focus_roots[-1]} {self.focus_roots[-2]} — structural comparison"
)
if not focus:
suggestions.append("Type any word to trace its root")
suggestions.append("Type a root (e.g. ك-ف-ر) to explain it")
return suggestions
# Global tracker instance
_tracker = ContextTracker()
def track_context(perception_result):
"""Update the global context tracker."""
_tracker.update(perception_result)
return _tracker.get_context_summary()
def get_context():
"""Get current context state."""
return _tracker.get_context_summary()
def suggest_next():
"""Get suggestions for next action."""
return _tracker.suggest_next()
# ═══════════════════════════════════════════════════════════════════════
# CLI INTERFACE
# ═══════════════════════════════════════════════════════════════════════
def main():
if len(sys.argv) < 2:
print("أَمْر بَصَر — Perception Engine")
print()
print("Usage:")
print(" python3 amr_basar.py perceive 'explain ك-ف-ر' # full perception")
print(" python3 amr_basar.py classify 'cover' # classify input")
print(" python3 amr_basar.py detect cover # detect root")
print(" python3 amr_basar.py detect cover en # detect with language")
print(" python3 amr_basar.py decompose 'trace X and Y' # decompose query")
sys.exit(0)
cmd = sys.argv[1]
arg = ' '.join(sys.argv[2:]) if len(sys.argv) > 2 else ''
if cmd == 'perceive':
result = perceive(arg)
print(f"\nPERCEPTION: '{arg}'")
print(f" INTENT: {result['intent']}")
print(f" PARAMS: {result['params']}")
print(f" CONFIDENCE: {result['confidence']}")
if result['enriched']:
print(f" ENRICHED: {result['enriched']}")
if result['sub_queries']:
print(f" SUB-QUERIES: {len(result['sub_queries'])}")
elif cmd == 'classify':
result = classify_input(arg)
print(f"\nCLASSIFICATION: '{arg}'")
print(f" INTENT: {result['intent']}")
print(f" PARAMS: {result['params']}")
print(f" CONFIDENCE: {result['confidence']}")
elif cmd == 'detect':
parts = arg.split()
word = parts[0] if parts else ''
lang = parts[1] if len(parts) > 1 else 'auto'
result = detect_root(word, lang)
print(f"\nROOT DETECTION: '{word}' ({result['language']})")
print(f" ROOT_ID: {result['root_id']}")
print(f" ROOT: {result['root_letters']}")
print(f" SOURCE: {result['source']}")
print(f" CONFIDENCE: {result['confidence']}")
if result['shift_chain']:
print(f" CHAIN: {' | '.join(str(s) for s in result['shift_chain'])}")
elif cmd == 'decompose':
results = decompose(arg)
print(f"\nDECOMPOSITION: '{arg}'")
print(f" SUB-QUERIES: {len(results)}")
for i, sq in enumerate(results):
print(f" [{i+1}] {sq['intent']} ({sq['confidence']}): {sq['params']}")
else:
print(f"Unknown command: {cmd}")
sys.exit(1)
# ═══════════════════════════════════════════════════════════════════════
# QUF GATE — Called by amr_quf.py router
# ═══════════════════════════════════════════════════════════════════════
def detection_quf(data: dict) -> dict:
"""
DETECTION QUF — L9.
Handles multiple table schemas:
- qv_translation_register: ROOT, CORRUPTION_TYPE, CORRECT_TRANSLATION, COMMON_MISTRANSLATION
- dp_register: dp_code, name, class, mechanism, qur_anchor
- disputed_words: various columns
- contamination_blacklist: contaminated_term, contaminated_translation
- phonetic_reversal: various columns
"""
GRADE_ORDER = {'HIGH': 4, 'MEDIUM': 3, 'LOW': 2, 'FAIL': 1, 'PENDING': 0}
# Unified field extraction across table schemas
root = (data.get('ROOT', '') or data.get('root', '') or
data.get('root_letters', '') or '')
corruption_type = (data.get('CORRUPTION_TYPE', '') or data.get('corruption_type', '') or
data.get('class', '') or '')
correct = (data.get('CORRECT_TRANSLATION', '') or data.get('correct_form', '') or
data.get('mechanism', '') or '')
wrong = (data.get('COMMON_MISTRANSLATION', '') or data.get('corrupted_form', '') or
data.get('contaminated_translation', '') or '')
ayat_count = data.get('AYAT_COUNT', 0) or data.get('ayat_count', 0) or 0
dp_id = (data.get('dp_id', '') or data.get('DP_ID', '') or
data.get('dp_code', '') or '')
qur_anchor = (data.get('qur_anchor', '') or data.get('qur_ref', '') or '')
name = data.get('name', '') or data.get('contaminated_term', '') or ''
status = data.get('status', '') or ''
example = data.get('example', '') or ''
# Q: evidence counted
q_items = sum([bool(root) or bool(name), bool(corruption_type),
int(ayat_count) > 0 or bool(qur_anchor), bool(dp_id)])
q = 'HIGH' if q_items >= 3 else ('MEDIUM' if q_items >= 2 else ('LOW' if q_items >= 1 else 'FAIL'))
q_ev = [f'root/name={bool(root or name)}, type={str(corruption_type)[:20]}, qur={bool(qur_anchor)}, dp={dp_id}']
# U: pattern documented with examples/mechanism
valid_types = {'ROOT_FLATTENED', 'ACTION_TO_ETHNIC', 'ATTRIBUTE_TO_GENERIC',
'SCOPE_NARROWED', 'ROOT_REPLACED', 'ROOT_INVERTED',
'LINGUISTIC', 'CIVILISATION', 'COVENANTAL'}
type_valid = any(vt in str(corruption_type).upper() for vt in valid_types) if corruption_type else False
has_mechanism = bool(correct) or bool(example)
confirmed = str(status).upper() == 'CONFIRMED'
if (type_valid and has_mechanism) or confirmed:
u = 'HIGH'
elif type_valid or has_mechanism:
u = 'MEDIUM'
else:
u = 'LOW'
u_ev = [f'Valid type: {type_valid}, mechanism: {has_mechanism}, confirmed: {confirmed}']
# F: verifiable — qur_anchor or washed≠corrupted or distinct_from documented
distinct = data.get('distinct_from', '') or ''
if qur_anchor and (correct or distinct):
f = 'HIGH'
f_ev = [f'Quranic anchor + mechanism/distinction documented']
elif correct and wrong and str(correct).strip() != str(wrong).strip():
f = 'HIGH'
f_ev = [f'Washed ({str(correct)[:20]}) != corrupted ({str(wrong)[:20]})']
elif qur_anchor or correct:
f = 'MEDIUM'
f_ev = [f'Partial: qur_anchor={bool(qur_anchor)}, mechanism={bool(correct)}']
else:
f = 'LOW'
f_ev = [f'No Quranic anchor or mechanism documented']
passes = all(GRADE_ORDER.get(g, 0) >= 3 for g in [q, u, f])
return {
'q': q, 'u': u, 'f': f, 'pass': passes,
'q_evidence': q_ev, 'u_evidence': u_ev, 'f_evidence': f_ev,
}
def blacklist_quf(data: dict) -> dict:
"""QUF for contamination_blacklist — L9.
These entries ARE the contamination register. They CONTAIN banned terms by design.
Self-auditing: verifies the correction exists and the contamination is documented."""
GRADE_ORDER = {'HIGH': 4, 'MEDIUM': 3, 'LOW': 2, 'FAIL': 1, 'PENDING': 0}
term = data.get('contaminated_term', '') or ''
wrong = data.get('contaminated_translation', '') or ''
correct = data.get('correct_translation', '') or ''
source = data.get('source_of_correction', '') or ''
why = data.get('why_contaminated', '') or ''
# Q: contaminated term + both translations documented
q = 'HIGH' if (term and wrong and correct) else ('MEDIUM' if (term and correct) else 'LOW')
q_ev = [f'term={bool(term)}, wrong={bool(wrong)}, correct={bool(correct)}']
# U: correction source documented
u = 'HIGH' if source else ('MEDIUM' if correct else 'LOW')
u_ev = [f'Source of correction: {bool(source)}']
# F: WHY it's contaminated is documented (falsifiable claim)
f = 'HIGH' if (why and correct) else ('MEDIUM' if why else 'LOW')
f_ev = [f'Why contaminated: {bool(why)}, correction: {bool(correct)}']
passes = all(GRADE_ORDER.get(g, 0) >= 3 for g in [q, u, f])
return {
'q': q, 'u': u, 'f': f, 'pass': passes,
'q_evidence': q_ev, 'u_evidence': u_ev, 'f_evidence': f_ev,
}
def reversal_quf(data: dict) -> dict:
"""QUF for phonetic_reversal — L9.
These entries document phonetic shift patterns. They CONTAIN reversed forms by design.
Self-auditing: verifies shift is attested in actual entries."""
GRADE_ORDER = {'HIGH': 4, 'MEDIUM': 3, 'LOW': 2, 'FAIL': 1, 'PENDING': 0}
shift_code = data.get('shift_code', '') or ''
from_modern = data.get('from_modern', '') or ''
to_orig = data.get('to_orig', '') or ''
mechanism = data.get('mechanism', '') or ''
example = data.get('attested_example', '') or ''
reliability = data.get('reliability', '') or ''
status = data.get('status', '') or ''
# Q: shift code + both endpoints documented
q = 'HIGH' if (shift_code and from_modern and to_orig) else ('MEDIUM' if shift_code else 'LOW')
q_ev = [f'shift={shift_code}, from={from_modern}, to_orig={bool(to_orig)}']
# U: mechanism + attested example (cross-references actual data)
u = 'HIGH' if (mechanism and example) else ('MEDIUM' if mechanism else 'LOW')
u_ev = [f'mechanism={bool(mechanism)}, example={bool(example)}']
# F: reliability assessed + status confirmed
confirmed = str(status).upper() == 'CONFIRMED'
reliable = str(reliability).upper() in ('HIGH', 'MEDIUM')
f = 'HIGH' if (confirmed and reliable) else ('MEDIUM' if confirmed or reliable else 'LOW')
f_ev = [f'reliability={reliability}, status={status}']
passes = all(GRADE_ORDER.get(g, 0) >= 3 for g in [q, u, f])
return {
'q': q, 'u': u, 'f': f, 'pass': passes,
'q_evidence': q_ev, 'u_evidence': u_ev, 'f_evidence': f_ev,
}
if __name__ == "__main__":
main()