uslap-query / Code_files /archive /ru_batch_v34_comparison.py
uslap's picture
Upload folder using huggingface_hub
7cc8e29 verified
Raw
History Blame Contribute Delete
42.4 kB
#!/usr/bin/env python3
"""
USLaP Russian Batch β€” v3.4 vs v3.3 Comparison Script
Generates formatted comparison report with Unicode box-drawing characters.
"""
import json
from collections import Counter
from datetime import datetime
# ─── File paths ───
V33_PATH = "/Users/mmsetubal/Documents/USLaP workplace/Batch Reports/RU_BATCH_REPORT_20260314_123511.json"
V34_PATH = "/Users/mmsetubal/Documents/USLaP workplace/Batch Reports/RU_BATCH_REPORT_20260314_131602.json"
STYLE_REF = "/Users/mmsetubal/Documents/USLaP workplace/Batch Reports/RU_BATCH_v33_COMPARISON.txt"
OUTPUT_PATH = "/Users/mmsetubal/Documents/USLaP workplace/Batch Reports/RU_BATCH_v34_COMPARISON.txt"
CATS = ['already_in_lattice', 'confirmed_high', 'pending_review', 'auto_rejected']
CAT_LABELS = {
'already_in_lattice': 'EXISTING',
'confirmed_high': 'CONFIRMED_HIGH',
'pending_review': 'PENDING_REVIEW',
'auto_rejected': 'AUTO_REJECTED',
}
def load_json(path):
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def build_word_map(data):
m = {}
for cat in CATS:
for entry in data[cat]:
m[entry['word']] = entry
return m
def score_distribution(data):
scores = Counter()
for cat in CATS:
for entry in data[cat]:
s = entry.get('score')
if s is not None:
scores[s] += 1
return scores
def count_orig2(data):
c = 0
for cat in CATS:
for entry in data[cat]:
if entry.get('orig2_track'):
c += 1
return c
def count_depal(data):
c = 0
for cat in CATS:
for entry in data[cat]:
for line in entry.get('log_lines', []):
if 'DEPAL' in str(line):
c += 1
break
return c
def derive_cognate_status(entry):
"""Derive AGREES/COMPETITION/NOTE from root comparison."""
cr = entry.get('cognate_crossref')
if cr is None:
return None, None
if cr.get('source') == 'LATTICE_ENTRY':
return 'LATTICE_MATCH', cr
en_root = cr.get('root_letters', '')
ru_root = entry.get('root_letters', '')
if en_root and ru_root and en_root == ru_root:
return 'AGREES', cr
elif en_root and ru_root:
return 'COMPETITION', cr
return 'UNKNOWN', cr
def main():
v33 = load_json(V33_PATH)
v34 = load_json(V34_PATH)
v33_map = build_word_map(v33)
v34_map = build_word_map(v34)
lines = []
def w(text=''):
lines.append(text)
# ─── Header ───
w('══════════════════════════════════════════════════════════════════════════')
w(' USLaP Russian Batch β€” v3.4 vs v3.3 Comparison Summary')
w(' بِسْمِ Ψ§Ω„Ω„ΩŽΩ‘Ω‡Ω Ψ§Ω„Ψ±ΩŽΩ‘Ψ­Ω’Ω…ΩŽΩ°Ω†Ω Ψ§Ω„Ψ±ΩŽΩ‘Ψ­ΩΩŠΩ…Ω')
w(f' Generated: {datetime.now().strftime("%Y-%m-%d")}')
w('══════════════════════════════════════════════════════════════════════════')
w()
w(f' Input: {v33["total_words"]} Russian words (built-in list, deduplicated)')
w(f' v3.3 baseline: RU_BATCH_REPORT_20260314_123511.json')
w(f' v3.4 run: RU_BATCH_REPORT_20260314_131602.json')
w(f' Engine: {v34.get("engine_version", "v3.0")}')
w()
# ═══════════════════════════════════════════════════════════════════
# 1. CATEGORY COUNTS
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 1. CATEGORY COUNTS β€” SIDE BY SIDE')
w('──────────────────────────────────────────────────────────────────────────')
w()
w(f' {"Category":<20} {"v3.3":>5} {"v3.4":>5} {"Delta":>5} Direction')
w(f' {"─" * 19} {"─" * 5} {"─" * 5} {"─" * 5} {"─" * 10}')
for cat in CATS:
label = CAT_LABELS[cat]
c33 = len(v33[cat])
c34 = len(v34[cat])
delta = c34 - c33
d_str = f'+{delta}' if delta > 0 else str(delta)
if delta == 0:
direction = '(unchanged)'
elif delta > 0:
direction = f'(+{delta} added)'
else:
direction = f'({abs(delta)} removed)'
w(f' {label:<20} {c33:>5} {c34:>5} {d_str:>5} {direction}')
cb33 = len(v33.get('cluster_backlog', []))
cb34 = len(v34.get('cluster_backlog', []))
cb_delta = cb34 - cb33
cb_d_str = f'+{cb_delta}' if cb_delta > 0 else str(cb_delta)
cb_dir = '(unchanged)' if cb_delta == 0 else f'({abs(cb_delta)} {"more" if cb_delta > 0 else "fewer"} cluster discoveries)'
w(f' {"Cluster Backlog":<20} {cb33:>5} {cb34:>5} {cb_d_str:>5} {cb_dir}')
w()
w(f' TOTAL PROCESSED {v33["total_words"]} {v34["total_words"]}')
w()
# Check for category changes
cat_changes_up = 0
cat_changes_down = 0
for word in v33_map:
if word in v34_map:
if v33_map[word]['category'] != v34_map[word]['category']:
# Determine direction
rank = {'ALREADY_IN_LATTICE': 4, 'CONFIRMED_HIGH': 3, 'PENDING_REVIEW': 2, 'AUTO_REJECTED': 1}
r33 = rank.get(v33_map[word]['category'], 0)
r34 = rank.get(v34_map[word]['category'], 0)
if r34 > r33:
cat_changes_up += 1
else:
cat_changes_down += 1
w(f' NET EFFECT: Category counts IDENTICAL between v3.3 and v3.4.')
w(f' No words promoted or demoted between categories.')
w(f' The v3.4 changes are INTERNAL β€” new fields (sem_review, compound_parts),')
w(f' root refinements within categories, and cognate crossref EN root updates.')
w(f' Cluster backlog decreased by {abs(cb_delta)} (tighter clustering).')
w()
# ═══════════════════════════════════════════════════════════════════
# 2. WORDS THAT CHANGED CATEGORY
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 2. WORDS THAT CHANGED CATEGORY (v3.3 -> v3.4)')
w('──────────────────────────────────────────────────────────────────────────')
w()
category_changes = []
for word in sorted(v33_map.keys()):
if word in v34_map:
if v33_map[word]['category'] != v34_map[word]['category']:
category_changes.append((
word,
v33_map[word]['category'],
v34_map[word]['category'],
v33_map[word].get('score'),
v34_map[word].get('score'),
v33_map[word].get('root_letters'),
v34_map[word].get('root_letters'),
))
if not category_changes:
w(' NO CATEGORY CHANGES.')
w()
w(' All 316 words remain in their v3.3 categories. Zero promotions,')
w(' zero demotions. Category stability is absolute.')
else:
w(f' {len(category_changes)} word(s) changed category:')
w()
w(f' {"Word":<15} {"v3.3":<18} {"v3.4":<18} {"Score v3.3":<12} {"Score v3.4":<12} Root Change')
w(f' {"─"*14} {"─"*17} {"─"*17} {"─"*11} {"─"*11} {"─"*12}')
for word, cat33, cat34, s33, s34, r33, r34 in category_changes:
root_change = f'{r33} -> {r34}' if r33 != r34 else 'None'
w(f' {word:<15} {cat33:<18} {cat34:<18} {str(s33):<12} {str(s34):<12} {root_change}')
w()
# ═══════════════════════════════════════════════════════════════════
# 3. ROOT CHANGES WITHIN SAME CATEGORY
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 3. ROOT CHANGES WITHIN SAME CATEGORY')
w('──────────────────────────────────────────────────────────────────────────')
w()
# Separate by CONFIRMED_HIGH and PENDING_REVIEW
ch_root_changes = []
pr_root_changes = []
for word in sorted(v33_map.keys()):
if word in v34_map:
e33 = v33_map[word]
e34 = v34_map[word]
if e33['category'] == e34['category'] and e33.get('root_letters') != e34.get('root_letters'):
entry = (
word,
e33.get('root_letters'),
e34.get('root_letters'),
e33.get('score'),
e34.get('score'),
e33.get('ar_word'),
e34.get('ar_word'),
e33.get('phonetic_chain', ''),
e34.get('phonetic_chain', ''),
)
if e33['category'] == 'CONFIRMED_HIGH':
ch_root_changes.append(entry)
elif e33['category'] == 'PENDING_REVIEW':
pr_root_changes.append(entry)
total_root_changes = len(ch_root_changes) + len(pr_root_changes)
w(f' {total_root_changes} words show different root assignments between v3.3 and v3.4')
w(f' while remaining in the SAME category:')
w()
if ch_root_changes:
w(f' A. CONFIRMED_HIGH root changes ({len(ch_root_changes)} words):')
w()
w(f' {"Word":<15} {"v3.3 Root":<12} {"v3.4 Root":<12} {"Score v3.3":<12} {"Score v3.4":<12} Assessment')
w(f' {"─"*14} {"─"*11} {"─"*11} {"─"*11} {"─"*11} {"─"*10}')
for word, r33, r34, s33, s34, _, _, _, _ in ch_root_changes:
s_delta = (s34 or 0) - (s33 or 0)
if s_delta > 0:
assessment = f'Improved (+{s_delta})'
elif s_delta < 0:
assessment = f'Regressed ({s_delta})'
else:
assessment = 'Refined'
w(f' {word:<15} {str(r33):<12} {str(r34):<12} {str(s33):<12} {str(s34):<12} {assessment}')
w()
# Detailed explanations
for word, r33, r34, s33, s34, ar33, ar34, ch33, ch34 in ch_root_changes:
if word == 'ΠΠŸΠ’Π•ΠšΠ':
w(f' ΠΠŸΠ’Π•ΠšΠ + ΠŸΠžΠ”Π£Π¨ΠšΠ: Both moved from {r33} to {r34}. In v3.3,')
w(f' these were assigned Ψ¨-Ψͺ-Ωƒ; v3.4 reassigns to ف-Ψͺ-Ω‚ / fatq /')
w(f' to split open, to cleave (Qur\'anic). Chain: {ch34}.')
w(f' Scores unchanged at 8. The first consonant shifted from')
w(f' Ψ¨β†’ΠΏ(S09) to ف→п(S08) β€” both valid mappings for Russian ΠΏ.')
w()
elif word == 'Π”ΠžΠ“ΠžΠ’ΠžΠ ':
w(f' Π”ΠžΠ“ΠžΠ’ΠžΠ : Root REVERTED from {r33} ({ar33}) to {r34} ({ar34}).')
w(f' v3.3 had upgraded this to Ψ¬-Ψ¨-Ψ± (Ψ§Ω„Ψ¬ΩŽΨ¨ΩŽΩ‘Ψ§Ψ±, 21 tokens) at score 9.')
w(f' v3.4 returns to Ψ°-Ω‡-Ψ¨ (to go, 383 tokens) at score 8.')
w(f' Chain: {ch34}. Score regressed 9 -> 8.')
w(f' Assessment: This is a ROOT REVERSION, not a refinement. The v3.3')
w(f' root Ψ¬-Ψ¨-Ψ± had stronger semantic alignment with "covenant/agreement"')
w(f' (compulsion/binding force). Flagged for human adjudication.')
w()
elif word == 'Π‘Π•Π Π”Π¦Π•':
w(f' Π‘Π•Π Π”Π¦Π•: Root shifted from {r33} ({ar33}) to {r34} ({ar34}).')
w(f' v3.3 had refined this from ш-ر-د to س-ر-د (سَرَدَ / to arrange,')
w(f' Q34:11). v3.4 reverts to ш-ر-د. Chain: {ch34}.')
w(f' First consonant: с→س(S21) in v3.3 vs Ρβ†’Ρˆ(S05) in v3.4.')
w(f' Score unchanged at 8.')
w()
if pr_root_changes:
w(f' B. PENDING_REVIEW root changes ({len(pr_root_changes)} words β€” all ORIG2 skeleton matches):')
w()
w(f' {"Word":<15} {"v3.3 Root":<12} {"v3.4 Root":<12} {"Score v3.3":<12} {"Score v3.4":<12} Assessment')
w(f' {"─"*14} {"─"*11} {"─"*11} {"─"*11} {"─"*11} {"─"*10}')
for word, r33, r34, s33, s34, _, _, _, _ in pr_root_changes:
s_delta = (s34 or 0) - (s33 or 0)
if s_delta > 0:
assessment = f'Improved (+{s_delta})'
elif s_delta < 0:
assessment = f'Regressed ({s_delta})'
else:
assessment = 'Refined'
w(f' {word:<15} {str(r33):<12} {str(r34):<12} {str(s33):<12} {str(s34):<12} {assessment}')
w()
w(f' Π“ΠžΠ”: ORIG2 skeleton REVERTED from {[e for e in pr_root_changes if e[0]=="Π“ΠžΠ”"][0][1]} to {[e for e in pr_root_changes if e[0]=="Π“ΠžΠ”"][0][2]}.')
w(f' v3.3 had refined this to qd (Kashgari: qad) at score 10.')
w(f' v3.4 returns to kd at score 9. Score regressed 10 -> 9.')
w()
w(f' ЧАБ + Π§Π˜Π‘Π›Πž: Both REVERTED from {[e for e in pr_root_changes if e[0]=="ЧАБ"][0][1]} to {[e for e in pr_root_changes if e[0]=="ЧАБ"][0][2]}.')
w(f' v3.3 had changed these to cc; v3.4 returns to ss.')
w(f' ЧАБ score improved 9 -> 10; Π§Π˜Π‘Π›Πž score improved 7 -> 8.')
w(f' These are REVERSIONS of v3.3 changes that IMPROVE scores β€”')
w(f' the v3.4 engine prefers the ss skeleton with higher confidence.')
w()
# ═══════════════════════════════════════════════════════════════════
# 4. SCORE CHANGES
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 4. SCORE CHANGES')
w('──────────────────────────────────────────────────────────────────────────')
w()
score_changes = []
for word in sorted(v33_map.keys()):
if word in v34_map:
e33 = v33_map[word]
e34 = v34_map[word]
s33 = e33.get('score')
s34 = e34.get('score')
if s33 is not None and s34 is not None and s33 != s34:
delta = s34 - s33
score_changes.append((word, s33, s34, delta, e33['category'], e34['category'], e33.get('root_letters'), e34.get('root_letters')))
if score_changes:
improved = [x for x in score_changes if x[3] > 0]
regressed = [x for x in score_changes if x[3] < 0]
w(f' {len(score_changes)} words changed score:')
w(f' Improved: {len(improved)}')
w(f' Regressed: {len(regressed)}')
w()
w(f' {"Word":<15} {"v3.3":>5} {"v3.4":>5} {"Delta":>5} {"Category":<18} {"Root v3.3":<12} {"Root v3.4":<12}')
w(f' {"─"*14} {"─"*5} {"─"*5} {"─"*5} {"─"*17} {"─"*11} {"─"*11}')
for word, s33, s34, delta, cat33, cat34, r33, r34 in score_changes:
d_str = f'+{delta}' if delta > 0 else str(delta)
cat_display = cat33 if cat33 == cat34 else f'{cat33}->{cat34}'
w(f' {word:<15} {s33:>5} {s34:>5} {d_str:>5} {cat_display:<18} {str(r33):<12} {str(r34):<12}')
w()
w(f' NET SCORE EFFECT: {len(improved)} improvements, {len(regressed)} regressions.')
if regressed:
w(f' Regressions: {", ".join(x[0] for x in regressed)}')
in_confirmed = [x for x in regressed if 'CONFIRMED' in x[4]]
if in_confirmed:
w(f' Of these, {len(in_confirmed)} are in CONFIRMED_HIGH: {", ".join(x[0] for x in in_confirmed)}')
else:
w(f' None of the regressions are in CONFIRMED_HIGH.')
else:
w(' No score changes between v3.3 and v3.4.')
w()
# ═══════════════════════════════════════════════════════════════════
# 5. NEW v3.4 FEATURES
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 5. NEW v3.4 FEATURES')
w('──────────────────────────────────────────────────────────────────────────')
w()
# 5A. SEM_REVIEW
w(' A. SEM_REVIEW FLAG (NEW in v3.4)')
w()
w(' The sem_review flag marks entries where the engine has performed')
w(' semantic review β€” verifying that the root meaning aligns with the')
w(' downstream word\'s usage. This is a pre-filter for human QUF review.')
w()
sem_by_cat = {}
for cat in CATS:
sem_true = sum(1 for e in v34[cat] if e.get('sem_review'))
sem_false = sum(1 for e in v34[cat] if not e.get('sem_review'))
sem_by_cat[cat] = (sem_true, sem_false)
total_sem_true = sum(v[0] for v in sem_by_cat.values())
total_sem_false = sum(v[1] for v in sem_by_cat.values())
w(f' {"Category":<20} {"sem_review=true":>16} {"sem_review=false":>17} {"Total":>6}')
w(f' {"─"*19} {"─"*16} {"─"*17} {"─"*6}')
for cat in CATS:
label = CAT_LABELS[cat]
st, sf = sem_by_cat[cat]
total = st + sf
w(f' {label:<20} {st:>16} {sf:>17} {total:>6}')
w(f' {"TOTAL":<20} {total_sem_true:>16} {total_sem_false:>17} {total_sem_true + total_sem_false:>6}')
w()
w(f' Key observation: ALL 150 CONFIRMED_HIGH entries have sem_review=true.')
w(f' 11 of 156 PENDING_REVIEW entries also have sem_review=true β€”')
w(f' these are borderline entries where semantic review passed but')
w(f' other gates (positional_score, Q-gate) kept them in PENDING.')
w(f' The 1 AUTO_REJECTED entry (ΠšΠΠœΠ•ΠΠ©Π˜Πš) also has sem_review=true,')
w(f' meaning the semantic check ran but was insufficient to overcome')
w(f' the 2 extra consonants that triggered rejection.')
w()
# 5B. COMPOUND_PARTS
w(' B. COMPOUND DETECTION (NEW in v3.4)')
w()
w(' The compound_parts field identifies multi-root Russian words and')
w(' traces each component to its Allah\'s Arabic or Bitig root separately.')
w()
compound_entries = []
for cat in CATS:
for entry in v34[cat]:
cp = entry.get('compound_parts')
if cp is not None:
compound_entries.append((entry['word'], entry['category'], entry['score'], cp))
has_compound = sum(1 for e in compound_entries)
no_compound = v34['total_words'] - has_compound
w(f' Entries with compound_parts: {has_compound}')
w(f' Entries without compound_parts: {no_compound}')
w()
for word, cat, score, cp in compound_entries:
w(f' {word} (category: {CAT_LABELS.get(cat.lower().replace(" ","_"), cat)}, score: {score}):')
label = cp.get('label', '')
w(f' Label: {label}')
prefix = cp.get('prefix')
root = cp.get('root')
bridge = cp.get('bridge', '')
if prefix:
w(f' Prefix part: {prefix.get("part", "")}')
w(f' Root: {prefix.get("root", "")} ({prefix.get("token_count", 0)} tokens)')
w(f' Chain: {prefix.get("chain", "")}')
if bridge:
w(f' Bridge: {bridge}')
if root:
w(f' Root part: {root.get("part", "")}')
w(f' Root: {root.get("root", "")} ({root.get("token_count", 0)} tokens)')
w(f' Chain: {root.get("chain", "")}')
if root.get('is_orig2'):
w(f' Track: ORIG2 (Bitig)')
km = root.get('kashgari_meaning', '')
if km:
w(f' Kashgari: {km[:80]}')
w()
# ═══════════════════════════════════════════════════════════════════
# 6. COGNATE CROSS-REFERENCE COMPARISON
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 6. COGNATE CROSS-REFERENCE COMPARISON')
w('──────────────────────────────────────────────────────────────────────────')
w()
# Build cognate maps
def build_cognate_map(data):
m = {}
word_map = build_word_map(data)
for word, entry in word_map.items():
cr = entry.get('cognate_crossref')
if cr is not None:
m[word] = (cr, entry)
return m
cog33 = build_cognate_map(v33)
cog34 = build_cognate_map(v34)
w(f' v3.3 entries with cognate_crossref: {len(cog33)}')
w(f' v3.4 entries with cognate_crossref: {len(cog34)}')
w()
# Derive status for both versions
def classify_cognates(data):
lattice = []
agrees = []
competition = []
note = []
unknown = []
word_map = build_word_map(data)
for word, entry in word_map.items():
cr = entry.get('cognate_crossref')
if cr is None:
continue
status, _ = derive_cognate_status(entry)
if status == 'LATTICE_MATCH':
lattice.append((word, cr, entry))
elif status == 'AGREES':
agrees.append((word, cr, entry))
elif status == 'COMPETITION':
competition.append((word, cr, entry))
elif status == 'NOTE':
note.append((word, cr, entry))
else:
unknown.append((word, cr, entry))
return lattice, agrees, competition, note, unknown
lat33, agr33, comp33, note33, unk33 = classify_cognates(v33)
lat34, agr34, comp34, note34, unk34 = classify_cognates(v34)
w(f' BREAKDOWN (derived from root comparison):')
w()
w(f' {"Status":<20} {"v3.3":>5} {"v3.4":>5} Delta')
w(f' {"─"*19} {"─"*5} {"─"*5} {"─"*5}')
for label, c33, c34 in [
('LATTICE_MATCH', len(lat33), len(lat34)),
('ROOT AGREES', len(agr33), len(agr34)),
('ROOT COMPETITION', len(comp33), len(comp34)),
('NOTE', len(note33), len(note34)),
('UNKNOWN', len(unk33), len(unk34)),
]:
delta = c34 - c33
d_str = f'+{delta}' if delta > 0 else str(delta)
w(f' {label:<20} {c33:>5} {c34:>5} {d_str:>5}')
w()
# Check for cognate crossref changes between v3.3 and v3.4
cognate_changes = []
all_cog_words = sorted(set(list(cog33.keys()) + list(cog34.keys())))
for word in all_cog_words:
c33_data = cog33.get(word)
c34_data = cog34.get(word)
if c33_data and c34_data:
cr33 = c33_data[0]
cr34 = c34_data[0]
changes = {}
for key in set(list(cr33.keys()) + list(cr34.keys())):
v33_val = cr33.get(key)
v34_val = cr34.get(key)
if v33_val != v34_val:
changes[key] = (v33_val, v34_val)
if changes:
cognate_changes.append((word, changes))
if cognate_changes:
w(f' COGNATE CROSSREF CHANGES (v3.3 -> v3.4):')
w()
for word, changes in cognate_changes:
w(f' {word}:')
for key, (v33_val, v34_val) in changes.items():
w(f' {key}: {v33_val} -> {v34_val}')
w()
w(f' ШАРИАВ: The EN pipeline now assigns ش-ر-د (شَرَدَ / to flee,')
w(f' to stray β€” S19 Ψ―β†’t) instead of Ψ΄-Ψ±-Ψ· (شَرَطَ / to stipulate')
w(f' β€” S04 Ψ·β†’t). The Russian root remains Ψ΅-Ψ±-Ψ·. Both versions')
w(f' show ROOT COMPETITION between RU and EN pipelines. The change')
w(f' is in the EN trace, not the RU trace.')
else:
w(f' No cognate crossref changes between v3.3 and v3.4.')
w()
# List all cognate crossrefs for reference
w(f' FULL COGNATE CROSSREF TABLE (v3.4):')
w()
w(f' {"Word":<15} {"EN Cousin":<12} {"RU Root":<12} {"EN Root":<12} {"Status":<12} {"Source":<15}')
w(f' {"─"*14} {"─"*11} {"─"*11} {"─"*11} {"─"*11} {"─"*14}')
for word in sorted(cog34.keys()):
cr, entry = cog34[word]
en_cousin = cr.get('en_cousin', '')
en_root = cr.get('root_letters', '')
ru_root = entry.get('root_letters', '')
source = cr.get('source', '')
status, _ = derive_cognate_status(entry)
w(f' {word:<15} {en_cousin:<12} {ru_root:<12} {en_root:<12} {status:<12} {source:<15}')
w()
# ═══════════════════════════════════════════════════════════════════
# 7. SCORE DISTRIBUTION
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 7. SCORE DISTRIBUTION')
w('──────────────────────────────────────────────────────────────────────────')
w()
sd33 = score_distribution(v33)
sd34 = score_distribution(v34)
all_scores = sorted(set(list(sd33.keys()) + list(sd34.keys())))
w(f' {"Score":>5} {"v3.3":>5} {"v3.4":>5} {"Delta":>5}')
w(f' {"─"*5} {"─"*5} {"─"*5} {"─"*5}')
for score in all_scores:
c33 = sd33.get(score, 0)
c34 = sd34.get(score, 0)
delta = c34 - c33
d_str = f'+{delta}' if delta > 0 else str(delta)
w(f' {score:>5} {c33:>5} {c34:>5} {d_str:>5}')
w()
total33 = sum(sd33.values())
total34 = sum(sd34.values())
if total33 > 0 and total34 > 0:
# Calculate median
def median_score(sd):
vals = []
for s, c in sorted(sd.items()):
vals.extend([s] * c)
if not vals:
return 0
mid = len(vals) // 2
return vals[mid]
def mean_score(sd):
total_val = sum(s * c for s, c in sd.items())
total_count = sum(sd.values())
return total_val / total_count if total_count > 0 else 0
med33 = median_score(sd33)
med34 = median_score(sd34)
mean33 = mean_score(sd33)
mean34 = mean_score(sd34)
w(f' Median score: v3.3 = {med33} | v3.4 = {med34} {"(unchanged)" if med33 == med34 else ""}')
w(f' Mean score: v3.3 = {mean33:.2f} | v3.4 = {mean34:.2f}')
w()
w(f' Key observation: Score distribution is nearly identical. The v3.4')
w(f' changes are balanced β€” 2 scores improved (ЧАБ +1, Π§Π˜Π‘Π›Πž +1) and')
w(f' 2 scores regressed (Π”ΠžΠ“ΠžΠ’ΠžΠ  -1, Π“ΠžΠ” -1). Net score movement = 0.')
w(f' The distribution shape is stable with peak at 8.')
w()
# ═══════════════════════════════════════════════════════════════════
# 8. EXISTING FEATURES β€” STABILITY CHECK
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 8. EXISTING FEATURES β€” STABILITY CHECK')
w('──────────────────────────────────────────────────────────────────────────')
w()
depal33 = count_depal(v33)
depal34 = count_depal(v34)
orig2_33 = count_orig2(v33)
orig2_34 = count_orig2(v34)
w(f' DEPAL (Depalatalisation Competition) flags:')
w(f' v3.3: {depal33} words flagged | v3.4: {depal34} words flagged {"(identical)" if depal33 == depal34 else ""}')
w()
w(f' ORIG2 (Kashgari/Bitig) track:')
w(f' v3.3: {orig2_33} words routed | v3.4: {orig2_34} words routed {"(identical)" if orig2_33 == orig2_34 else ""}')
w()
w(f' AUTO_REJECTED:')
ar33 = [e for e in v33['auto_rejected']]
ar34 = [e for e in v34['auto_rejected']]
ar33_words = ', '.join(f'{e["word"]} (score={e["score"]})' for e in ar33)
ar34_words = ', '.join(f'{e["word"]} (score={e["score"]})' for e in ar34)
w(f' v3.3: {ar33_words} | v3.4: {ar34_words} {"(identical)" if ar33_words == ar34_words else ""}')
w()
# Three problem words
w(f' THE THREE PROBLEM WORDS (tracked since v3.1):')
for word in ['БАБЛЯ', 'Π’ΠžΠ–Π”Π¬', 'Π‘ΠΠœΠžΠ’ΠΠ ']:
e33 = v33_map.get(word, {})
e34 = v34_map.get(word, {})
stable = (e33.get('category') == e34.get('category') and
e33.get('score') == e34.get('score') and
e33.get('root_letters') == e34.get('root_letters'))
status = 'STABLE' if stable else 'CHANGED'
extras = []
if e34.get('sem_review'):
extras.append('sem_review=true')
if e34.get('compound_parts') is not None:
extras.append('compound detected')
cr = e34.get('cognate_crossref')
if cr is not None:
cog_status, _ = derive_cognate_status(e34)
extras.append(f'cognate: {cog_status}')
extra_str = f' + NEW: {", ".join(extras)}' if extras else ''
w(f' {word}: {e34.get("category", "?")}, score={e34.get("score")}, root={e34.get("root_letters")} ({status} β€” both versions){extra_str}')
w()
w(f' All existing v3.3 features preserved. Zero feature regressions.')
w()
# ═══════════════════════════════════════════════════════════════════
# 9. CLUSTER BACKLOG
# ═══════════════════════════════════════════════════════════════════
w('──────────────────────────────────────────────────────────────────────────')
w(' 9. CLUSTER BACKLOG')
w('──────────────────────────────────────────────────────────────────────────')
w()
w(f' v3.3: {cb33} cluster members | v3.4: {cb34} cluster members ({cb_delta:+d})')
w()
if cb_delta < 0:
w(f' {abs(cb_delta)} fewer cluster members in v3.4. This indicates tighter')
w(f' clustering β€” the engine is more selective about which words from')
w(f' /usr/share/dict/words qualify as cluster members. Not a regression.')
elif cb_delta > 0:
w(f' {cb_delta} additional bonus discoveries found by the cluster expander.')
else:
w(f' Cluster backlog unchanged.')
w()
# ═══════════════════════════════════════════════════════════════════
# EXECUTIVE SUMMARY
# ═══════════════════════════════════════════════════════════════════
w('══════════════════════════════════════════════════════════════════════════')
w(' EXECUTIVE SUMMARY')
w('══════════════════════════════════════════════════════════════════════════')
w()
w(' v3.4 improvements over v3.3:')
w()
# New features
w(f' [+] NEW FIELD: sem_review flag on ALL {v34["total_words"]} entries')
w(f' - {total_sem_true} entries marked sem_review=true')
w(f' - ALL 150 CONFIRMED_HIGH entries have sem_review=true')
w(f' - 11 PENDING_REVIEW entries have sem_review=true (borderline)')
w(f' - Enables targeted semantic audit: only sem_review=true entries')
w(f' need human semantic QUF check')
w()
w(f' [+] NEW FIELD: compound_parts detection')
w(f' - {has_compound} compound words detected and decomposed:')
for word, cat, score, cp in compound_entries:
w(f' {word}: {cp.get("label", "")}')
w(f' - Each component traced to its own AA/Bitig root independently')
w(f' - Foundation for future Russian compound word analysis at scale')
w()
# Score improvements
improved_list = [x for x in score_changes if x[3] > 0]
regressed_list = [x for x in score_changes if x[3] < 0]
if improved_list:
w(f' [+] {len(improved_list)} score improvement(s):')
for word, s33, s34, delta, cat33, cat34, r33, r34 in improved_list:
w(f' {word}: {s33} -> {s34} ({r33} -> {r34})')
if regressed_list:
w()
w(f' [-] {len(regressed_list)} score regression(s):')
for word, s33, s34, delta, cat33, cat34, r33, r34 in regressed_list:
in_confirmed = 'CONFIRMED' in cat33
flag = ' (IN CONFIRMED_HIGH β€” requires adjudication)' if in_confirmed else ' (in PENDING β€” acceptable)'
w(f' {word}: {s33} -> {s34} ({r33} -> {r34}){flag}')
w()
# Root changes
total_root = len(ch_root_changes) + len(pr_root_changes)
w(f' [~] {total_root} root reassignment(s) within same category:')
w(f' CONFIRMED_HIGH: {len(ch_root_changes)} words')
for word, r33, r34, s33, s34, _, _, _, _ in ch_root_changes:
s_note = f'score {s33}->{s34}' if s33 != s34 else f'score {s33}'
w(f' {word}: {r33} -> {r34} ({s_note})')
w(f' PENDING_REVIEW: {len(pr_root_changes)} words')
for word, r33, r34, s33, s34, _, _, _, _ in pr_root_changes:
s_note = f'score {s33}->{s34}' if s33 != s34 else f'score {s33}'
w(f' {word}: {r33} -> {r34} ({s_note})')
w()
# Stability
w(f' [=] CATEGORY COUNTS: Identical across all 4 categories')
w(f' [=] EXISTING count unchanged at {len(v34["already_in_lattice"])}')
w(f' [=] CONFIRMED_HIGH count unchanged at {len(v34["confirmed_high"])}')
w(f' [=] PENDING_REVIEW count unchanged at {len(v34["pending_review"])}')
w(f' [=] AUTO_REJECTED unchanged at {len(v34["auto_rejected"])} (ΠšΠΠœΠ•ΠΠ©Π˜Πš)')
w(f' [=] DEPAL flags unchanged at {depal34}')
w(f' [=] ORIG2 track count unchanged at {orig2_34}')
w(f' [=] Cognate cross-references unchanged at {len(cog34)} entries')
w(f' [=] All three problem words (БАБЛЯ, Π’ΠžΠ–Π”Π¬, Π‘ΠΠœΠžΠ’ΠΠ ) remain')
w(f' CONFIRMED_HIGH at score 9/10 β€” no regressions')
w(f' [=] Score distribution shape unchanged (median=8, peak at 8)')
w()
# No CONFIRMED_HIGH regressions (category)
confirmed_33_words = set(e['word'] for e in v33['confirmed_high'])
confirmed_34_words = set(e['word'] for e in v34['confirmed_high'])
lost_from_confirmed = confirmed_33_words - confirmed_34_words
if not lost_from_confirmed:
w(f' ZERO REGRESSIONS in CONFIRMED_HIGH category membership.')
w(f' No word was demoted from CONFIRMED_HIGH to a lower category.')
else:
w(f' WARNING: {len(lost_from_confirmed)} word(s) lost from CONFIRMED_HIGH:')
for word in lost_from_confirmed:
w(f' {word}')
w()
# Verdict
w(f' VERDICT: v3.4 is a STRUCTURAL upgrade over v3.3. The headline')
w(f' features are:')
w()
w(f' 1. SEM_REVIEW FLAG β€” the engine now flags which entries have')
w(f' passed semantic review, enabling targeted human QUF audits.')
w(f' 162 of 316 entries (51.3%) are marked sem_review=true.')
w()
w(f' 2. COMPOUND DETECTION β€” the engine now detects multi-root')
w(f' Russian words (Π‘ΠΠœΠžΠ’ΠΠ , Π‘ΠŸΠ ΠΠ’Π•Π”Π›Π˜Π’ΠžΠ‘Π’Π¬) and traces each')
w(f' component independently. This is the foundation for scaling')
w(f' compound word analysis across the Russian batch.')
w()
w(f' 3. ROOT STABILITY β€” category counts are identical to v3.3.')
w(f' 7 root reassignments occurred (4 CONFIRMED, 3 PENDING),')
w(f' with mixed direction: some are v3.4 refinements (ΠΠŸΠ’Π•ΠšΠ,')
w(f' ΠŸΠžΠ”Π£Π¨ΠšΠ), some are reversions of v3.3 changes (Π”ΠžΠ“ΠžΠ’ΠžΠ ,')
w(f' Π‘Π•Π Π”Π¦Π•, Π“ΠžΠ”). The PENDING reversions (ЧАБ, Π§Π˜Π‘Π›Πž)')
w(f' actually improved scores, suggesting v3.4 prefers the')
w(f' original skeleton assignments with higher confidence.')
w()
w(f' 4. COGNATE CROSSREF REFINEMENT β€” 1 EN pipeline root updated')
w(f' (ШАРИАВ: ش-ر-ط -> ش-ر-د). Cognate agreement counts')
w(f' unchanged (4 AGREES, 12 COMPETITION, 6 LATTICE).')
w()
w(f' The 1 CONFIRMED_HIGH score regression (Π”ΠžΠ“ΠžΠ’ΠžΠ : 9->8) is the')
w(f' only item requiring human adjudication β€” the v3.3 root Ψ¬-Ψ¨-Ψ±')
w(f' had stronger semantic alignment. All other changes are neutral')
w(f' or positive.')
w()
w(f' Overall: structural improvement. Zero category regressions.')
w(f' New features (sem_review + compound detection) add diagnostic')
w(f' depth without disturbing existing results.')
w()
w('══════════════════════════════════════════════════════════════════════════')
w(' Source files:')
w(f' v3.3: {V33_PATH}')
w(f' v3.4: {V34_PATH}')
w('══════════════════════════════════════════════════════════════════════════')
# Write output
output_text = '\n'.join(lines) + '\n'
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
f.write(output_text)
print(f'Comparison report written to: {OUTPUT_PATH}')
print(f'Total lines: {len(lines)}')
if __name__ == '__main__':
main()