uslap-query / Code_files /al_masudi_setup.py
uslap's picture
Upload folder using huggingface_hub
7cc8e29 verified
Raw
History Blame Contribute Delete
9.93 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
al_masudi_setup.py — Create al_masudi_entries table + anchors + triggers.
Run ONCE to bootstrap the al-Masudi extraction infrastructure.
Modelled on ibn_hawqal_setup.py, adapted for:
- DarFikr 2nd edition 1990 (Volume 2: political history with scattered trade data)
- Barbier de Meynard / Pavet de Courteille critical edition (future Vol 1)
- Author: Abu al-Hasan Ali ibn al-Husayn al-Masudi (d. 345 AH / 956 CE)
- Work: Muruj al-Dhahab wa Ma'adin al-Jawhar
"""
import os
import sys
import sqlite3
DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"uslap_database_v3.db")
def setup():
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
# ═══ 1. CREATE al_masudi_entries TABLE ═══
c.execute("""
CREATE TABLE IF NOT EXISTS al_masudi_entries (
am_id INTEGER PRIMARY KEY AUTOINCREMENT,
-- ═══ Provenance ═══
source_ms TEXT NOT NULL, -- DARFIKR_1990 | BDM_PARIS_1861
edition_page INTEGER, -- page in printed edition
volume INTEGER, -- volume number (1-4)
section_name TEXT, -- section name
-- ═══ Content ═══
entry_type TEXT NOT NULL CHECK(entry_type IN (
'ROUTE','PERSON','PEOPLE','PLACE','CARGO','DISTANCE',
'NARRATIVE','OFFICIAL','NETWORK','TRADE','GEOGRAPHIC_NOTE',
'HEADING','INTERPOLATION','EUNUCH_TRADE','SLAVE_TRADE',
'MILITARY','COMMODITY','CITY_DESCRIPTION','SECTION_OVERVIEW',
'REVENUE','TREASURY','PROVINCE'
)),
arabic_text TEXT NOT NULL,
transliteration TEXT,
aa_gloss TEXT, -- meaning derived FROM ROOTS
-- ═══ Intel classification ═══
intel_category TEXT,
dp_codes TEXT,
root_letters TEXT,
root_ids TEXT,
-- ═══ Cross-references ═══
period_start TEXT,
period_end TEXT,
region TEXT,
qur_refs TEXT,
cross_ref TEXT,
keywords TEXT,
-- ═══ Quality/contamination flags ═══
operator_flag INTEGER DEFAULT 0 CHECK(operator_flag IN (0,1)),
scribal_interpolation INTEGER DEFAULT 0 CHECK(scribal_interpolation IN (0,1)),
notes TEXT,
-- ═══ QUF columns ═══
quf_q TEXT,
quf_u TEXT,
quf_f TEXT,
quf_pass TEXT,
quf_date TEXT,
quf_token TEXT,
created_date TEXT DEFAULT (datetime('now'))
)
""")
print(" TABLE al_masudi_entries created")
# ═══ 2. CREATE al_masudi_anchors TABLE ═══
c.execute("""
CREATE TABLE IF NOT EXISTS al_masudi_anchors (
anchor_id TEXT PRIMARY KEY,
arabic_text TEXT NOT NULL,
root_letters TEXT,
primary_source TEXT NOT NULL,
qur_anchor TEXT,
notes TEXT,
created_date TEXT DEFAULT (datetime('now'))
)
""")
print(" TABLE al_masudi_anchors created")
# ═══ 3. SEED ANCHORS ═══
anchors = [
("ABBASID_ADMIN", "الإدارة العباسية", "ع-ب-س", "al-Masudi Vol.2", None, "Abbasid administrative data"),
("UMAYYAD_ADMIN", "الإدارة الأموية", "أ-م-و", "al-Masudi Vol.2", None, "Umayyad administrative data"),
("TRADE_GOODS", "البضائع", "ب-ض-ع", "al-Masudi", None, "Commodities and merchandise"),
("CITY_DESCRIPTION","وصف المدينة", "م-د-ن", "al-Masudi", None, "City descriptions"),
("BAYT_AL_MAL", "بيت المال", "م-و-ل", "al-Masudi", None, "Treasury data"),
("KHARAJ", "الخراج", "خ-ر-ج", "al-Masudi", None, "Provincial revenue/taxation"),
("CONQUEST", "الفتح", "ف-ت-ح", "al-Masudi", None, "Conquest narratives with geographic data"),
("SAQALIBA", "الصقالبة", "ص-ق-ل-ب","al-Masudi", None, "Saqaliba references"),
("KHAZAR", "الخزر", "خ-ز-ر", "al-Masudi", None, "Khazar references"),
("KHURASAN", "خراسان", None, "al-Masudi", None, "Khurasan province"),
("MA_WARA_AL_NAHR", "ما وراء النهر", None, "al-Masudi", None, "Transoxiana"),
("BILAD_AL_RUM", "بلاد الروم", "ر-و-م", "al-Masudi", "30:2", "Byzantine territories"),
("MISR", "مصر", "م-ص-ر", "al-Masudi", None, "Egypt"),
("AL_SHAM", "الشام", "ش-أ-م", "al-Masudi", None, "Greater Syria"),
("AL_IRAQ", "العراق", "ع-ر-ق", "al-Masudi", None, "Iraq"),
("INVOCATION", "بسملة وحمدلة", None, "al-Masudi", None, "Opening invocations"),
("AUTHOR_NOTE", "قال المسعودي", None, "al-Masudi", None, "Author self-references and methodology notes"),
("SLAVE_TRADE", "تجارة الرقيق", "ر-ق-ق", "al-Masudi", None, "Captive/slave trade references"),
("EUNUCH_TRADE", "تجارة الخصيان", "خ-ص-ي", "al-Masudi", None, "Eunuch trade references"),
("AL_ANDALUS", "الأندلس", None, "al-Masudi", None, "al-Andalus"),
("IFRIQIYA", "إفريقية", None, "al-Masudi", None, "North Africa"),
("FARS", "فارس", None, "al-Masudi", None, "Fars province"),
("AL_JAZIRA", "الجزيرة", "ج-ز-ر", "al-Masudi", None, "Upper Mesopotamia"),
]
for a in anchors:
c.execute("""
INSERT OR IGNORE INTO al_masudi_anchors
(anchor_id, arabic_text, root_letters, primary_source, qur_anchor, notes)
VALUES (?, ?, ?, ?, ?, ?)
""", a)
print(f" ANCHORS seeded: {len(anchors)}")
# ═══ 4. QUF ENFORCE TRIGGER ═══
c.execute("""
CREATE TRIGGER IF NOT EXISTS al_masudi_entries_quf_enforce
BEFORE INSERT ON al_masudi_entries
FOR EACH ROW
WHEN NEW.quf_token IS NULL
BEGIN
SELECT RAISE(ABORT, 'QUF BLOCKED: al_masudi_entries INSERT requires quf_token from handler.write_entry()');
END
""")
print(" TRIGGER al_masudi_entries_quf_enforce created")
# ═══ 5. INTEL CATEGORY FK TRIGGER ═══
c.execute("""
CREATE TRIGGER IF NOT EXISTS trg_al_masudi_intel_category_fk
BEFORE INSERT ON al_masudi_entries
WHEN NEW.intel_category IS NOT NULL
AND NEW.intel_category NOT IN (SELECT anchor_id FROM al_masudi_anchors)
BEGIN
SELECT RAISE(ABORT, 'FK BLOCKED: intel_category must be a registered anchor in al_masudi_anchors');
END
""")
print(" TRIGGER trg_al_masudi_intel_category_fk created")
# ═══ 6. CONTAMINATION TRIGGER ═══
c.execute("""
CREATE TRIGGER IF NOT EXISTS trg_contamination_al_masudi_entries
BEFORE INSERT ON al_masudi_entries
WHEN
INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')),
'borrowed from') > 0
OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')),
'loan from') > 0
OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')),
'cognate') > 0
OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')),
'proto-indo-european') > 0
OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')),
'semitic') > 0
OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')),
'indo-european') > 0
OR INSTR(LOWER(COALESCE(NEW.arabic_text,'') || ' ' || COALESCE(NEW.transliteration,'') || ' ' || COALESCE(NEW.aa_gloss,'') || ' ' || COALESCE(NEW.notes,'') || ' ' || COALESCE(NEW.keywords,'')),
'loanword') > 0
BEGIN
SELECT RAISE(ABORT, 'CONTAMINATION BLOCKED: banned directional/framework term detected in al_masudi_entries');
END
""")
print(" TRIGGER trg_contamination_al_masudi_entries created")
conn.commit()
conn.close()
# ═══ 7. COUNT TRIGGERS ═══
conn2 = sqlite3.connect(DB_PATH)
total = conn2.execute(
"SELECT COUNT(*) FROM sqlite_master WHERE type='trigger'"
).fetchone()[0]
am_count = conn2.execute(
"SELECT COUNT(*) FROM sqlite_master WHERE type='trigger' AND name LIKE '%al_masudi%'"
).fetchone()[0]
conn2.close()
print(f"\n TOTAL triggers now: {total} (al_masudi: {am_count})")
print(" DONE — al_masudi infrastructure ready")
if __name__ == "__main__":
setup()