Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| al_masudi_setup.py — Create al_masudi_entries table + anchors + triggers. | |
| Run ONCE to bootstrap the al-Masudi extraction infrastructure. | |
| Modelled on ibn_hawqal_setup.py, adapted for: | |
| - DarFikr 2nd edition 1990 (Volume 2: political history with scattered trade data) | |
| - Barbier de Meynard / Pavet de Courteille critical edition (future Vol 1) | |
| - Author: Abu al-Hasan Ali ibn al-Husayn al-Masudi (d. 345 AH / 956 CE) | |
| - Work: Muruj al-Dhahab wa Ma'adin al-Jawhar | |
| """ | |
| import os | |
| import sys | |
| import sqlite3 | |
| DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), | |
| "uslap_database_v3.db") | |
| def setup(): | |
| conn = sqlite3.connect(DB_PATH) | |
| c = conn.cursor() | |
| # ═══ 1. CREATE al_masudi_entries TABLE ═══ | |
| c.execute(""" | |
| CREATE TABLE IF NOT EXISTS al_masudi_entries ( | |
| am_id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| -- ═══ Provenance ═══ | |
| source_ms TEXT NOT NULL, -- DARFIKR_1990 | BDM_PARIS_1861 | |
| edition_page INTEGER, -- page in printed edition | |
| volume INTEGER, -- volume number (1-4) | |
| section_name TEXT, -- section name | |
| -- ═══ Content ═══ | |
| entry_type TEXT NOT NULL CHECK(entry_type IN ( | |
| 'ROUTE','PERSON','PEOPLE','PLACE','CARGO','DISTANCE', | |
| 'NARRATIVE','OFFICIAL','NETWORK','TRADE','GEOGRAPHIC_NOTE', | |
| 'HEADING','INTERPOLATION','EUNUCH_TRADE','SLAVE_TRADE', | |
| 'MILITARY','COMMODITY','CITY_DESCRIPTION','SECTION_OVERVIEW', | |
| 'REVENUE','TREASURY','PROVINCE' | |
| )), | |
| arabic_text TEXT NOT NULL, | |
| transliteration TEXT, | |
| aa_gloss TEXT, -- meaning derived FROM ROOTS | |
| -- ═══ Intel classification ═══ | |
| intel_category TEXT, | |
| dp_codes TEXT, | |
| root_letters TEXT, | |
| root_ids TEXT, | |
| -- ═══ Cross-references ═══ | |
| period_start TEXT, | |
| period_end TEXT, | |
| region TEXT, | |
| qur_refs TEXT, | |
| cross_ref TEXT, | |
| keywords TEXT, | |
| -- ═══ Quality/contamination flags ═══ | |
| operator_flag INTEGER DEFAULT 0 CHECK(operator_flag IN (0,1)), | |
| scribal_interpolation INTEGER DEFAULT 0 CHECK(scribal_interpolation IN (0,1)), | |
| notes TEXT, | |
| -- ═══ QUF columns ═══ | |
| quf_q TEXT, | |
| quf_u TEXT, | |
| quf_f TEXT, | |
| quf_pass TEXT, | |
| quf_date TEXT, | |
| quf_token TEXT, | |
| created_date TEXT DEFAULT (datetime('now')) | |
| ) | |
| """) | |
| print(" TABLE al_masudi_entries created") | |
| # ═══ 2. CREATE al_masudi_anchors TABLE ═══ | |
| c.execute(""" | |
| CREATE TABLE IF NOT EXISTS al_masudi_anchors ( | |
| anchor_id TEXT PRIMARY KEY, | |
| arabic_text TEXT NOT NULL, | |
| root_letters TEXT, | |
| primary_source TEXT NOT NULL, | |
| qur_anchor TEXT, | |
| notes TEXT, | |
| created_date TEXT DEFAULT (datetime('now')) | |
| ) | |
| """) | |
| print(" TABLE al_masudi_anchors created") | |
| # ═══ 3. SEED ANCHORS ═══ | |
| anchors = [ | |
| ("ABBASID_ADMIN", "الإدارة العباسية", "ع-ب-س", "al-Masudi Vol.2", None, "Abbasid administrative data"), | |
| ("UMAYYAD_ADMIN", "الإدارة الأموية", "أ-م-و", "al-Masudi Vol.2", None, "Umayyad administrative data"), | |
| ("TRADE_GOODS", "البضائع", "ب-ض-ع", "al-Masudi", None, "Commodities and merchandise"), | |
| ("CITY_DESCRIPTION","وصف المدينة", "م-د-ن", "al-Masudi", None, "City descriptions"), | |
| ("BAYT_AL_MAL", "بيت المال", "م-و-ل", "al-Masudi", None, "Treasury data"), | |
| ("KHARAJ", "الخراج", "خ-ر-ج", "al-Masudi", None, "Provincial revenue/taxation"), | |
| ("CONQUEST", "الفتح", "ف-ت-ح", "al-Masudi", None, "Conquest narratives with geographic data"), | |
| ("SAQALIBA", "الصقالبة", "ص-ق-ل-ب","al-Masudi", None, "Saqaliba references"), | |
| ("KHAZAR", "الخزر", "خ-ز-ر", "al-Masudi", None, "Khazar references"), | |
| ("KHURASAN", "خراسان", None, "al-Masudi", None, "Khurasan province"), | |
| ("MA_WARA_AL_NAHR", "ما وراء النهر", None, "al-Masudi", None, "Transoxiana"), | |
| ("BILAD_AL_RUM", "بلاد الروم", "ر-و-م", "al-Masudi", "30:2", "Byzantine territories"), | |
| ("MISR", "مصر", "م-ص-ر", "al-Masudi", None, "Egypt"), | |
| ("AL_SHAM", "الشام", "ش-أ-م", "al-Masudi", None, "Greater Syria"), | |
| ("AL_IRAQ", "العراق", "ع-ر-ق", "al-Masudi", None, "Iraq"), | |
| ("INVOCATION", "بسملة وحمدلة", None, "al-Masudi", None, "Opening invocations"), | |
| ("AUTHOR_NOTE", "قال المسعودي", None, "al-Masudi", None, "Author self-references and methodology notes"), | |
| ("SLAVE_TRADE", "تجارة الرقيق", "ر-ق-ق", "al-Masudi", None, "Captive/slave trade references"), | |
| ("EUNUCH_TRADE", "تجارة الخصيان", "خ-ص-ي", "al-Masudi", None, "Eunuch trade references"), | |
| ("AL_ANDALUS", "الأندلس", None, "al-Masudi", None, "al-Andalus"), | |
| ("IFRIQIYA", "إفريقية", None, "al-Masudi", None, "North Africa"), | |
| ("FARS", "فارس", None, "al-Masudi", None, "Fars province"), | |
| ("AL_JAZIRA", "الجزيرة", "ج-ز-ر", "al-Masudi", None, "Upper Mesopotamia"), | |
| ] | |
| for a in anchors: | |
| c.execute(""" | |
| INSERT OR IGNORE INTO al_masudi_anchors | |
| (anchor_id, arabic_text, root_letters, primary_source, qur_anchor, notes) | |
| VALUES (?, ?, ?, ?, ?, ?) | |
| """, a) | |
| print(f" ANCHORS seeded: {len(anchors)}") | |
| # ═══ 4. QUF ENFORCE TRIGGER ═══ | |
| c.execute(""" | |
| CREATE TRIGGER IF NOT EXISTS al_masudi_entries_quf_enforce | |
| BEFORE INSERT ON al_masudi_entries | |
| FOR EACH ROW | |
| WHEN NEW.quf_token IS NULL | |
| BEGIN | |
| SELECT RAISE(ABORT, 'QUF BLOCKED: al_masudi_entries INSERT requires quf_token from handler.write_entry()'); | |
| END | |
| """) | |
| print(" TRIGGER al_masudi_entries_quf_enforce created") | |
| # ═══ 5. INTEL CATEGORY FK TRIGGER ═══ | |
| c.execute(""" | |
| CREATE TRIGGER IF NOT EXISTS trg_al_masudi_intel_category_fk | |
| BEFORE INSERT ON al_masudi_entries | |
| WHEN NEW.intel_category IS NOT NULL | |
| AND NEW.intel_category NOT IN (SELECT anchor_id FROM al_masudi_anchors) | |
| BEGIN | |
| SELECT RAISE(ABORT, 'FK BLOCKED: intel_category must be a registered anchor in al_masudi_anchors'); | |
| END | |
| """) | |
| print(" TRIGGER trg_al_masudi_intel_category_fk created") | |
| # ═══ 6. CONTAMINATION TRIGGER ═══ | |
| c.execute(""" | |
| CREATE TRIGGER IF NOT EXISTS trg_contamination_al_masudi_entries | |
| BEFORE INSERT ON al_masudi_entries | |
| WHEN | |
| INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')), | |
| 'borrowed from') > 0 | |
| OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')), | |
| 'loan from') > 0 | |
| OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')), | |
| 'cognate') > 0 | |
| OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')), | |
| 'proto-indo-european') > 0 | |
| OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')), | |
| 'semitic') > 0 | |
| OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')), | |
| 'indo-european') > 0 | |
| OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')), | |
| 'loanword') > 0 | |
| BEGIN | |
| SELECT RAISE(ABORT, 'CONTAMINATION BLOCKED: banned directional/framework term detected in al_masudi_entries'); | |
| END | |
| """) | |
| print(" TRIGGER trg_contamination_al_masudi_entries created") | |
| conn.commit() | |
| conn.close() | |
| # ═══ 7. COUNT TRIGGERS ═══ | |
| conn2 = sqlite3.connect(DB_PATH) | |
| total = conn2.execute( | |
| "SELECT COUNT(*) FROM sqlite_master WHERE type='trigger'" | |
| ).fetchone()[0] | |
| am_count = conn2.execute( | |
| "SELECT COUNT(*) FROM sqlite_master WHERE type='trigger' AND name LIKE '%al_masudi%'" | |
| ).fetchone()[0] | |
| conn2.close() | |
| print(f"\n TOTAL triggers now: {total} (al_masudi: {am_count})") | |
| print(" DONE — al_masudi infrastructure ready") | |
| if __name__ == "__main__": | |
| setup() | |