"""
Generator diagram activity draw.io (.drawio) untuk tiap detektor.
Gaya: judul 30px, titik mulai hitam, end-state bercincin, BELAH KETUPAT (rhombus)
untuk keputusan, kotak membulat untuk aksi. Setiap node diukur agar pas (fit)
dengan teks di dalamnya. Kata bahasa Inggris dibungkus ... agar miring.
Output: docs/diagrams/.drawio (XML mxGraph, divalidasi well-formed).
Jalankan: python scripts/gen_drawio.py
"""
from __future__ import annotations
import math
import re
import xml.etree.ElementTree as ET
from pathlib import Path
OUT = Path(__file__).resolve().parent.parent / "docs" / "diagrams"
START = "ellipse;whiteSpace=wrap;html=1;fillColor=#000000;strokeColor=#000000;"
END = "ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=#000000;"
DEC = "rhombus;whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#000000;"
ACT = "rounded=1;whiteSpace=wrap;html=1;arcSize=20;fillColor=#ffffff;strokeColor=#000000;"
BAR = "rounded=0;whiteSpace=wrap;html=1;fillColor=#000000;strokeColor=#000000;"
TITLE = ("text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;"
"whiteSpace=wrap;fontSize=30;fontStyle=1;")
NOTE = ("text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;"
"whiteSpace=wrap;fontSize=12;fontStyle=2;")
EDGE = "edgeStyle=orthogonalEdgeStyle;rounded=0;html=1;endArrow=block;"
CX = 460 # pusat spine
RX = 720 # kiri cabang kanan
LXR = 250 # kanan cabang kiri (tepi kanan kotak kiri)
GAP = 42
CW = 6.7 # lebar rata-rata karakter @ fontSize 12
def strip_tags(text: str) -> str:
"""Hapus tag HTML untuk keperluan pengukuran panjang teks tampilan."""
return re.sub(r'<[^>]+>', '', text)
def esc(s: str) -> str:
# Escape & dan newline; … dalam label dibiarkan, esc mengubahnya
# menjadi <i> sehingga menjadi HTML valid dalam atribut XML draw.io.
s = s.replace("&", "&").replace("<", "<").replace(">", ">")
return s.replace("\n", "
")
def fit_act(text, max_w=280, pad=14, lh=17):
segs = strip_tags(text).split("\n")
longest = max(len(s) for s in segs)
w = min(max(int(longest * CW) + 2 * pad, 130), max_w)
cpl = max(1, int((w - 2 * pad) / CW))
lines = sum(max(1, math.ceil(len(s) / cpl)) for s in segs)
h = max(40, lines * lh + 2 * pad)
return w, h
def fit_dec(text, pad=14, lh=17):
"""Rhombus perlu ruang ekstra karena area teks efektif di tengah belah ketupat."""
segs = strip_tags(text).split("\n")
longest = max(len(s) for s in segs)
content_w = min(max(int(longest * CW), 100), 210)
cpl = max(1, int(content_w / CW))
lines = sum(max(1, math.ceil(len(s) / cpl)) for s in segs)
w = content_w + 140
h = lines * lh + 96
return int(w), int(h)
def fit_note(text):
return int(len(strip_tags(text)) * 6.6) + 8, 22
class Dia:
def __init__(self, title: str, page_w=1000):
# self.title = nama diagram (teks biasa, tanpa tag HTML)
self.title = strip_tags(title)
self.page_w = page_w
self.cells: list[str] = []
self.geo: dict[str, tuple[int, int, int, int]] = {}
self.y = 96
# Cell judul menggunakan versi dengan HTML italic
self.cells.append(
f''
f'')
def _add(self, cid, style, label, x, y, w, h):
self.geo[cid] = (x, y, w, h)
self.cells.append(
f''
f'')
return cid
def cy(self, ref):
x, y, w, h = self.geo[ref]
return y + h // 2
# spine (center CX, auto y)
def start(self, cid="s"):
self._add(cid, START, "", CX - 15, self.y, 30, 30)
self.y += 30 + GAP
return cid
def act(self, cid, label, max_w=280):
w, h = fit_act(label, max_w)
self._add(cid, ACT, label, CX - w // 2, self.y, w, h)
self.y += h + GAP
return cid
def dec(self, cid, label):
w, h = fit_dec(label)
self._add(cid, DEC, label, CX - w // 2, self.y, w, h)
self.y += h + GAP
return cid
def end(self, cid="e"):
self._add(cid, END, "", CX - 15, self.y, 30, 30)
self.y += 30 + GAP
return cid
# cabang
def right(self, cid, label, ref, max_w=250):
w, h = fit_act(label, max_w)
return self._add(cid, ACT, label, RX, self.cy(ref) - h // 2, w, h)
def left(self, cid, label, ref, max_w=210):
w, h = fit_act(label, max_w)
return self._add(cid, ACT, label, LXR - w, self.cy(ref) - h // 2, w, h)
def side_end(self, cid, ref):
return self._add(cid, END, "", RX + 60, self.cy(ref) - 15, 30, 30)
def note(self, cid, label):
w, h = fit_note(label)
self._add(cid, NOTE, label, 30, self.y - GAP + 4, w, h)
def raw(self, cid, style, label, x, y, w, h):
return self._add(cid, style, label, x, y, w, h)
# edges
def e(self, src, tgt, label="", **kw):
style = EDGE + "".join(f"{k}={v};" for k, v in kw.items())
val = f' value="{esc(label)}"' if label else ""
self.cells.append(
f'')
def e_pts(self, src, tgt, pts, label="", **kw):
style = EDGE + "".join(f"{k}={v};" for k, v in kw.items())
val = f' value="{esc(label)}"' if label else ""
ptxml = "".join(f'' for x, y in pts)
self.cells.append(
f''
f'{ptxml}')
def loop_left(self, src, tgt, label="ya", lane=200):
self.e_pts(src, tgt, [(lane, self.cy(src)), (lane, self.cy(tgt))], label,
exitX=0, exitY=0.5, entryX=0, entryY=0.5)
def loop_right(self, src, tgt, label="ya", lane=960):
self.e_pts(src, tgt, [(lane, self.cy(src)), (lane, self.cy(tgt))], label,
exitX=1, exitY=0.5, entryX=1, entryY=0.5)
def xml(self) -> str:
ph = self.y + 40
body = "".join(self.cells)
return (
''
f''
f''
f'{body}'
'')
def empty_branch(d: Dia, dref):
ae = d.right("aEmpty", "Kembalikan daftar kosong", dref)
ee = d.side_end("eEmpty", dref)
d.e(dref, ae, "ya")
d.e(ae, ee)
# ============================================================ PROSES EVALUASI
def proses_evaluasi():
d = Dia("Proses Evaluasi Prompt", page_w=1120)
s = d.start()
a1 = d.act("a1", "[Antarmuka] Pengguna mengisi field prompt")
a2 = d.act("a2", "[Antarmuka] Hitung badge field wajib kosong secara client-side")
d1 = d.dec("d1", "Ada field yang berubah?")
a3 = d.act("a3", "[Antarmuka] Kirim POST /api/evaluate untuk field berubah\n"
"(satu field per request)")
a4 = d.act("a4", "[Orkestrator] Validasi dan parse JSON {fields}")
a5 = d.act("a5", "[Orkestrator] Jalankan deteksi bahasa + 9 detektor\n"
"pada field terisi")
a6 = d.act("a6", "[Orkestrator] Gabungkan temuan dan dedup level kata")
a7 = d.act("a7", "[Antarmuka] Simpan temuan server ke cache field")
a8 = d.act("a8", "[Antarmuka] Gabungkan cache issue + badge missing;\n"
"susun prompt akhir dari field lokal")
a9 = d.act("a9", "[Antarmuka] Tampilkan sorotan, badge, banner catatan,\n"
"dan prompt akhir")
d2 = d.dec("d2", "Saran perbaikan diterapkan?")
a10 = d.act("a10", "[Antarmuka] Ubah isi field, kosongkan cache field,\n"
"jadwalkan evaluasi ulang")
d3 = d.dec("d3", "Prompt akhir disalin?")
e = d.end()
d.e(s, a1)
d.e(a1, a2)
d.e(a2, d1)
bNoChange = d.right("bNoChange", "Render ulang dari cache yang masih valid", d1)
d.e(d1, bNoChange, "tidak")
d.e(bNoChange, a8)
d.e(d1, a3, "ya")
d.e(a3, a4)
d.e(a4, a5)
d.e(a5, a6)
d.e(a6, a7)
d.e(a7, a8)
d.e(a8, a9)
d.e(a9, d2)
d.e(d2, a10, "ya")
d.loop_left(a10, d1, "", lane=150)
d.e(d2, d3, "tidak")
d.loop_right(d3, a9, "tidak", lane=1030)
d.e(d3, e, "ya")
return d
# ============================================================ USE CASE
def use_case():
d = Dia("Use Case Sistem Prompt Builder", page_w=1220)
boundary = ("rounded=0;whiteSpace=wrap;html=1;fillColor=none;"
"strokeColor=#000000;fontStyle=1;verticalAlign=top;spacingTop=10;")
actor = ("shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;"
"html=1;outlineConnect=0;fillColor=#ffffff;strokeColor=#000000;")
usecase = "ellipse;whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#000000;"
d.raw("sys", boundary, "Aplikasi Prompt Builder", 250, 95, 900, 820)
d.raw("actor", actor, "Mahasiswa", 70, 390, 90, 150)
d.raw("uc1", usecase, "Mengisi field\nPrompt Builder", 310, 170, 230, 85)
d.raw("uc2", usecase, "Mengevaluasi\nprompt", 310, 300, 230, 85)
d.raw("uc3", usecase, "Meninjau hasil\nevaluasi", 310, 430, 230, 85)
d.raw("uc4", usecase, "Menerapkan saran\nperbaikan", 310, 560, 230, 85)
d.raw("uc5", usecase, "Menyalin\nprompt akhir", 310, 690, 230, 85)
d.raw("uc6", usecase, "Menjalankan\nsembilan detektor", 620, 300, 250, 85)
detector_names = [
("d1", "Deteksi PII"),
("d2", "Deteksi Word Quality"),
("d3", "Deteksi Konten Berisiko"),
("d4", "Deteksi NER"),
("d5", "Deteksi Profanity"),
("d6", "Deteksi Filler"),
("d7", "Deteksi Special Char"),
("d8", "Deteksi Syntax"),
("d9", "Deteksi Field-Fit"),
]
y = 125
for cid, label in detector_names:
d.raw(cid, usecase, label, 910, y, 190, 58)
d.e("uc6", cid, "<>", dashed=1)
y += 82
for cid in ("uc1", "uc2", "uc3", "uc4", "uc5"):
d.e("actor", cid)
d.e("uc2", "uc6", "<>", dashed=1)
d.e("uc3", "uc2", "<>", dashed=1)
d.e("uc4", "uc2", "<>", dashed=1)
d.y = 940
return d
# ============================================================ PII
def pii():
d = Dia("Detektor PII (Data Pribadi)")
s = d.start()
d1 = d.dec("d1", "Teks kosong?")
d.e(s, d1)
empty_branch(d, d1)
a1 = d.act("a1", "Cari kandidat lewat pola regex PII Indonesia\n"
"(NIK, NPWP, BPJS, SIM, plat nomor, email, HP,\n"
"rekening, kartu kredit, IP, alamat)")
a2 = d.act("a2", "Ekstrak nilai dari capturing group\n"
"dan pangkas spasi/tanda baca tepi")
d2 = d.dec("d2", "Punya validator struktural?")
d3 = d.dec("d3", "Validasi lolos?")
a3 = d.act("a3", "Naikkan confidence score (+0,15)")
aDrop = d.right("aDrop", "Buang kandidat", d3)
d4 = d.dec("d4", "confidence >= ambang minimum?")
a4 = d.act("a4", "Catat PIIEntity (label, span, konteks)")
aLow = d.right("aLow", "Abaikan kandidat", d4)
d5 = d.dec("d5", "Masih ada kandidat?")
a5 = d.act("a5", "Selesaikan span tumpang-tindih\n"
"(confidence tertinggi menang)")
a6 = d.act("a6", "Urutkan menurut posisi")
a7 = d.act("a7", "Kembalikan daftar PIIEntity")
e = d.end()
d.e(d1, a1, "tidak")
d.e(a1, a2)
d.e(a2, d2)
d.e(d2, d3, "ya")
d.e(d3, a3, "ya")
d.e(d3, aDrop, "tidak")
d.e(aDrop, d5)
d.e(d2, d4, "tidak")
d.e(a3, d4)
d.e(d4, a4, "ya")
d.e(a4, d5)
d.e(d4, aLow, "tidak")
d.e(aLow, d5)
d.loop_left(d5, a1, "ya")
d.e(d5, a5, "tidak")
d.e(a5, a6)
d.e(a6, a7)
d.e(a7, e)
return d
# ============================================================ WORD QUALITY
def word_quality():
d = Dia("Detektor Word Quality")
s = d.start()
d1 = d.dec("d1", "Teks kosong?")
d.e(s, d1)
empty_branch(d, d1)
a1 = d.act("a1", "Tokenisasi teks")
a2 = d.act("a2", "Ambil token berikutnya")
d2 = d.dec("d2", "Token pendek / non-ASCII / pola dilewati?")
a3 = d.act("a3", "Kumpulkan konteks kata sebelum & sesudah")
d3 = d.dec("d3", "SLANG? (kamus + pola regex)")
d4 = d.dec("d4", "ALAY? (l33tspeak)")
d5 = d.dec("d5", "TYPO? (SymSpell + skeleton)")
d6 = d.dec("d6", "Masih ada token?")
d7 = d.dec("d7", "Jumlah SLANG ≥ 2 pada kalimat?")
d8 = d.dec("d8", "Layer-2 ML aktif & ada TYPO?")
a8 = d.act("a8", "Urutkan menurut posisi & kembalikan WordIssue")
e = d.end()
d.e(d1, a1, "tidak")
d.e(a1, a2)
d.e(a2, d2)
bSkip = d.right("bSkip", "Lewati token", d2)
d.e(d2, bSkip, "ya")
d.e(bSkip, d6)
d.e(d2, a3, "tidak")
d.e(a3, d3)
bSlang = d.right("bSlang", "Tandai SLANG", d3)
d.e(d3, bSlang, "ya")
d.e(bSlang, d6)
d.e(d3, d4, "tidak")
bAlay = d.right("bAlay", "Tandai ALAY", d4)
d.e(d4, bAlay, "ya")
d.e(bAlay, d6)
d.e(d4, d5, "tidak")
bTypo = d.right("bTypo", "Tandai TYPO", d5)
d.e(d5, bTypo, "ya")
d.e(bTypo, d6)
bClean = d.left("bClean", "Tidak ada temuan", d5)
d.e(d5, bClean, "bersih")
d.e(bClean, d6)
d.loop_left(d6, a2, "ya")
d.e(d6, d7, "tidak")
bRe = d.right("bRe",
"Reklasifikasi TYPO confidence-rendah → SLANG", d7)
d.e(d7, bRe, "ya")
d.e(bRe, d8)
d.e(d7, d8, "tidak")
bML = d.right("bML",
"Fill-mask IndoBERT kontekstual:\n"
"(a) gugurkan TYPO bila kata asli masuk akal\n"
"(b) rerank kandidat koreksi sesuai konteks", d8)
d.e(d8, bML, "ya")
d.e(bML, a8)
d.e(d8, a8, "tidak")
d.e(a8, e)
return d
# ============================================================ KONTEN BERISIKO
def konten_berisiko():
d = Dia("Detektor Konten Berisiko")
s = d.start()
d1 = d.dec("d1", "Teks kosong?")
d.e(s, d1)
empty_branch(d, d1)
a1 = d.act("a1", "Bentuk varian teks: asli, normalisasi slang, "
"de-obfuscation, de-obfuscation + normalisasi")
a2 = d.act("a2", "Ambil varian teks berikutnya")
a3 = d.act("a3", "Cocokkan pola regex Indonesia (INJECTION, "
"self-harm, konten eksplisit, diskriminasi, "
"ACADEMIC_DISHONESTY, HARMFUL)")
d2 = d.dec("d2", "Pola cocok?")
a4 = d.act("a4", "Petakan offset kembali ke teks asli")
d3 = d.dec("d3", "Evidence sudah dicatat?")
a5 = d.act("a5", "Catat RiskyContentFinding (kode, severity, "
"evidence, saran)")
d4 = d.dec("d4", "Masih ada varian?")
a6 = d.act("a6", "Urutkan HIGH → MEDIUM → LOW")
e = d.end()
d.e(d1, a1, "tidak")
d.e(a1, a2)
d.e(a2, a3)
d.e(a3, d2)
d.e(d2, a4, "ya")
d.e(d2, d4, "tidak")
d.e(a4, d3)
bSeen = d.right("bSeen", "Lewati (duplikat)", d3)
d.e(d3, bSeen, "ya")
d.e(bSeen, d4)
d.e(d3, a5, "tidak")
d.e(a5, d4)
d.loop_left(d4, a2, "ya")
d.e(d4, a6, "tidak")
d.e(a6, e)
return d
# ============================================================ NER (fork/join)
def ner():
d = Dia("Detektor NER", page_w=1020)
s = d.start()
d1 = d.dec("d1", "Teks kosong?")
d.e(s, d1)
empty_branch(d, d1)
fy = d.y
fork = d.raw("fork", BAR, "", 200, fy, 620, 10)
d.y += 10 + 48
by = d.y
b1 = d.raw("b1", ACT,
"Prediksi entitas via transformer XLM-R (bila model dimuat)",
150, by, 220, 70)
b2 = d.raw("b2", ACT,
"Deteksi rule regex Indonesia "
"(PT/CV, kementerian, kota, Rp, tanggal, gelar+nama)",
400, by, 220, 80)
b3 = d.raw("b3", ACT, "Cocokkan daftar nama orang", 650, by, 220, 70)
d.y = by + 80 + 48
jy = d.y
join = d.raw("join", BAR, "", 200, jy, 620, 10)
d.y = jy + 10 + GAP
d.e(d1, fork, "tidak")
d.e(fork, b1, exitX=0.2, exitY=1)
d.e(fork, b2, exitX=0.5, exitY=1)
d.e(fork, b3, exitX=0.8, exitY=1)
d.e(b1, join, entryX=0.2, entryY=0)
d.e(b2, join, entryX=0.5, entryY=0)
d.e(b3, join, entryX=0.8, entryY=0)
a1 = d.act("a1", "Gabungkan entitas ML + rule + nama")
a2 = d.act("a2", "Filter entitas tak masuk akal & konteks tidak valid")
a3 = d.act("a3", "Filter kebutuhan prompt "
"(buang stopword & token tunggal huruf kecil)")
a4 = d.act("a4", "Urutkan menurut posisi & kembalikan NEREntity")
e = d.end()
d.e(join, a1)
d.e(a1, a2)
d.e(a2, a3)
d.e(a3, a4)
d.e(a4, e)
return d
# ============================================================ PROFANITY
def profanity():
d = Dia("Detektor Profanity")
s = d.start()
d1 = d.dec("d1", "Teks kosong?")
d.e(s, d1)
empty_branch(d, d1)
a1 = d.act("a1", "Layer 1 — pre-pass: "
"tangkap kata dieja per huruf (mis. 'a n j i n g')")
a2 = d.act("a2", "Ambil token berikutnya")
d2 = d.dec("d2", "Span sudah tertangkap?")
a3 = d.act("a3", "Klasifikasi token "
"(normalisasi leet/strip/collapse "
"+ skeleton konsonan)")
d3 = d.dec("d3", "Tergolong kasar?")
d4 = d.dec("d4", "Masih ada token?")
d5 = d.dec("d5", "Layer 1 kosong & ML aktif & ≥ 3 kata?")
a4 = d.act("a4", "Urutkan menurut posisi & kembalikan temuan")
e = d.end()
d.e(d1, a1, "tidak")
d.e(a1, a2)
d.e(a2, d2)
bSkip = d.right("bSkip", "Lewati token", d2)
d.e(d2, bSkip, "ya")
d.e(bSkip, d4)
d.e(d2, a3, "tidak")
d.e(a3, d3)
bMark = d.right("bMark",
"Catat ProfanityFinding (HIGH/MEDIUM)", d3)
d.e(d3, bMark, "ya")
d.e(bMark, d4)
d.e(d3, d4, "tidak")
d.loop_left(d4, a2, "ya")
d.e(d4, d5, "tidak")
bML = d.right("bML",
"Layer 2 — classifier toksisitas Indonesia "
"(advice-only, sorot teks penuh)", d5)
d.e(d5, bML, "ya")
d.e(bML, a4)
d.e(d5, a4, "tidak")
d.e(a4, e)
return d
# ============================================================ FILLER
def filler():
d = Dia("Detektor Filler")
s = d.start()
d1 = d.dec("d1", "Teks kosong?")
d.e(s, d1)
empty_branch(d, d1)
a1 = d.act("a1", "Siapkan teks pindai: teks asli + norm_text\n"
"Word Quality bila tersedia")
a2 = d.act("a2", "Terjemahkan l33t -> huruf normal")
a3 = d.act("a3",
"Ambil pola regex berikutnya "
"(GREETING_AI, GREETING_ONLY, THANKS, APOLOGY, EMPTY_OPENER, "
"EMOTIONAL_FILLER, VAGUE_REFERENCE, VAGUE_PARTICLE, "
"UNNECESSARY_PREAMBLE, HESITATION)")
a4 = d.act("a4", "Cari kecocokan berikutnya;\n"
"petakan offset ke teks asli bila dari norm_text")
d2 = d.dec("d2", "Span tumpang-tindih dengan temuan lain?")
d3 = d.dec("d3", "Masih ada kecocokan?")
d4 = d.dec("d4", "Masih ada pola?")
d5 = d.dec("d5", "Masih ada teks pindai?")
a5 = d.act("a5", "Urutkan menurut posisi & kembalikan temuan")
e = d.end()
d.e(d1, a1, "tidak")
d.e(a1, a2)
d.e(a2, a3)
d.e(a3, a4)
d.e(a4, d2)
bSkip = d.right("bSkip", "Lewati", d2)
d.e(d2, bSkip, "ya")
d.e(bSkip, d3)
bMark = d.left("bMark", "Catat FillerFinding", d2)
d.e(d2, bMark, "tidak")
d.e(bMark, d3)
d.loop_left(d3, a4, "ya")
d.e(d3, d4, "tidak")
d.loop_right(d4, a3, "ya")
d.e(d4, d5, "tidak")
d.loop_left(d5, a2, "ya", lane=120)
d.e(d5, a5, "tidak")
d.e(a5, e)
return d
# ============================================================ SPECIAL CHAR
def special_char():
d = Dia("Detektor Special Char")
s = d.start()
a0 = d.act("a0", "Siapkan teks & himpunan span terpakai")
steps = [
("c1", "Zero-width characters → hapus"),
("c2", "Kontrol arah teks (BiDi) → hapus"),
("c3", "Unicode Tag (teks tersembunyi) → hapus"),
("c4", "Homoglif (huruf menyamar) → perbaiki ke Latin"),
("c5", "Spasi non-standar → ganti spasi biasa"),
("c6", "Karakter kontrol → hapus"),
("c7", "Smart quote -> ganti kutip ASCII"),
("c8", "Baris kosong berlebih -> jadikan satu baris kosong"),
("c9", "Spasi ganda & tanda baca berulang -> rapikan"),
]
d.note("nScan", "Scan kategori berurutan (dedup span)")
d.e(s, a0)
prev = a0
for cid, label in steps:
cur = d.act(cid, label)
d.e(prev, cur)
prev = cur
a1 = d.act("a1", "Kembalikan daftar SpecialCharFinding "
"beserta teks pengganti (replacement)")
e = d.end()
d.e(prev, a1)
d.e(a1, e)
return d
# ============================================================ SYNTAX
def syntax():
d = Dia("Detektor Syntax")
s = d.start()
d1 = d.dec("d1", "Teks kosong / model tak dimuat?")
d.e(s, d1)
empty_branch(d, d1)
a1 = d.act("a1", "Pisahkan teks menjadi kalimat (dibatasi jumlah)")
a2 = d.act("a2", "Ambil kalimat berikutnya")
d2 = d.dec("d2", "Jumlah kata ≥ minimum?")
a3 = d.act("a3", "Bangun permutasi acak urutan kata")
a4 = d.act("a4", "Hitung skor PLL kalimat asli + permutasi "
"via IndoBERT MLM (satu forward pass)")
a5 = d.act("a5", "Hitung rasio permutasi yang lebih wajar daripada urutan asli")
d3 = d.dec("d3", "rasio ≥ ambang?")
d4 = d.dec("d4", "Masih ada kalimat?")
a6 = d.act("a6", "Kembalikan daftar SyntaxFinding")
e = d.end()
d.e(d1, a1, "tidak")
d.e(a1, a2)
d.e(a2, d2)
bSkip = d.right("bSkip", "Lewati kalimat", d2)
d.e(d2, bSkip, "tidak")
d.e(bSkip, d4)
d.e(d2, a3, "ya")
d.e(a3, a4)
d.e(a4, a5)
d.e(a5, d3)
bMark = d.right("bMark",
"Catat UNUSUAL_WORD_ORDER (severity LOW)", d3)
d.e(d3, bMark, "ya")
d.e(bMark, d4)
d.e(d3, d4, "tidak")
d.loop_left(d4, a2, "ya")
d.e(d4, a6, "tidak")
d.e(a6, e)
return d
# ============================================================ FIELD-FIT
def field_fit():
d = Dia("Detektor Field-Fit")
s = d.start()
d1 = d.dec("d1", "Teks kosong / bahasa bukan id?")
d.e(s, d1)
empty_branch(d, d1)
d.note("n1", "ML embedding (semua field)")
d3 = d.dec("d3", "Model embedding aktif?")
a1 = d.act("a1", "Embed isi field (ternormalisasi)")
a2 = d.act("a2", "Hitung cosine ke centroid prototipe 9 field")
d4 = d.dec("d4", "Field lain unggul ≥ margin & cos ≥ ambang?")
a3 = d.act("a3", "Saran pindah ke field termirip")
a4 = d.act("a4", "Kembalikan daftar FieldFitFinding")
e = d.end()
d.e(d1, d3, "tidak")
d.e(d3, a1, "ya")
d.e(a1, a2)
d.e(a2, d4)
d.e(d4, a3, "ya")
d.e(a3, a4)
d.e(d4, a4, "tidak")
d.loop_left(d3, a4, "tidak", lane=205)
d.e(a4, e)
return d
def main():
diagrams = {
"Proses Evaluasi": proses_evaluasi,
"Use Case": use_case,
"PII": pii,
"Word Quality": word_quality,
"Konten Berisiko": konten_berisiko,
"NER": ner,
"Profanity": profanity,
"Filler": filler,
"Special Char": special_char,
"Syntax": syntax,
"Field-Fit": field_fit,
}
for name, fn in diagrams.items():
xml = fn().xml()
ET.fromstring(xml)
(OUT / f"{name}.drawio").write_text(xml, encoding="utf-8")
print(f"OK {name}.drawio ({len(xml)} bytes)")
if __name__ == "__main__":
main()