Spaces:
Running
Running
| """ | |
| Generator diagram activity draw.io (.drawio) untuk tiap detektor. | |
| Gaya: judul 30px, titik mulai hitam, end-state bercincin, BELAH KETUPAT (rhombus) | |
| untuk keputusan, kotak membulat untuk aksi. Setiap node diukur agar pas (fit) | |
| dengan teks di dalamnya. Kata bahasa Inggris dibungkus <i>...</i> agar miring. | |
| Output: docs/diagrams/<Nama>.drawio (XML mxGraph, divalidasi well-formed). | |
| Jalankan: python scripts/gen_drawio.py | |
| """ | |
| from __future__ import annotations | |
| import math | |
| import re | |
| import xml.etree.ElementTree as ET | |
| from pathlib import Path | |
| OUT = Path(__file__).resolve().parent.parent / "docs" / "diagrams" | |
| START = "ellipse;whiteSpace=wrap;html=1;fillColor=#000000;strokeColor=#000000;" | |
| END = "ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=#000000;" | |
| DEC = "rhombus;whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#000000;" | |
| ACT = "rounded=1;whiteSpace=wrap;html=1;arcSize=20;fillColor=#ffffff;strokeColor=#000000;" | |
| BAR = "rounded=0;whiteSpace=wrap;html=1;fillColor=#000000;strokeColor=#000000;" | |
| TITLE = ("text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;" | |
| "whiteSpace=wrap;fontSize=30;fontStyle=1;") | |
| NOTE = ("text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;" | |
| "whiteSpace=wrap;fontSize=12;fontStyle=2;") | |
| EDGE = "edgeStyle=orthogonalEdgeStyle;rounded=0;html=1;endArrow=block;" | |
| CX = 460 # pusat spine | |
| RX = 720 # kiri cabang kanan | |
| LXR = 250 # kanan cabang kiri (tepi kanan kotak kiri) | |
| GAP = 42 | |
| CW = 6.7 # lebar rata-rata karakter @ fontSize 12 | |
| def strip_tags(text: str) -> str: | |
| """Hapus tag HTML untuk keperluan pengukuran panjang teks tampilan.""" | |
| return re.sub(r'<[^>]+>', '', text) | |
| def esc(s: str) -> str: | |
| # Escape & dan newline; <i>…</i> dalam label dibiarkan, esc mengubahnya | |
| # menjadi <i> sehingga menjadi HTML valid dalam atribut XML draw.io. | |
| s = s.replace("&", "&").replace("<", "<").replace(">", ">") | |
| return s.replace("\n", " ") | |
| def fit_act(text, max_w=280, pad=14, lh=17): | |
| segs = strip_tags(text).split("\n") | |
| longest = max(len(s) for s in segs) | |
| w = min(max(int(longest * CW) + 2 * pad, 130), max_w) | |
| cpl = max(1, int((w - 2 * pad) / CW)) | |
| lines = sum(max(1, math.ceil(len(s) / cpl)) for s in segs) | |
| h = max(40, lines * lh + 2 * pad) | |
| return w, h | |
| def fit_dec(text, pad=14, lh=17): | |
| """Rhombus perlu ruang ekstra karena area teks efektif di tengah belah ketupat.""" | |
| segs = strip_tags(text).split("\n") | |
| longest = max(len(s) for s in segs) | |
| content_w = min(max(int(longest * CW), 100), 210) | |
| cpl = max(1, int(content_w / CW)) | |
| lines = sum(max(1, math.ceil(len(s) / cpl)) for s in segs) | |
| w = content_w + 140 | |
| h = lines * lh + 96 | |
| return int(w), int(h) | |
| def fit_note(text): | |
| return int(len(strip_tags(text)) * 6.6) + 8, 22 | |
| class Dia: | |
| def __init__(self, title: str, page_w=1000): | |
| # self.title = nama diagram (teks biasa, tanpa tag HTML) | |
| self.title = strip_tags(title) | |
| self.page_w = page_w | |
| self.cells: list[str] = [] | |
| self.geo: dict[str, tuple[int, int, int, int]] = {} | |
| self.y = 96 | |
| # Cell judul menggunakan versi dengan HTML italic | |
| self.cells.append( | |
| f'<mxCell id="t" value="{esc(title)}" style="{TITLE}" vertex="1" parent="1">' | |
| f'<mxGeometry x="240" y="20" width="500" height="46" as="geometry"/></mxCell>') | |
| def _add(self, cid, style, label, x, y, w, h): | |
| self.geo[cid] = (x, y, w, h) | |
| self.cells.append( | |
| f'<mxCell id="{cid}" value="{esc(label)}" style="{style}" vertex="1" parent="1">' | |
| f'<mxGeometry x="{x}" y="{y}" width="{w}" height="{h}" as="geometry"/></mxCell>') | |
| return cid | |
| def cy(self, ref): | |
| x, y, w, h = self.geo[ref] | |
| return y + h // 2 | |
| # spine (center CX, auto y) | |
| def start(self, cid="s"): | |
| self._add(cid, START, "", CX - 15, self.y, 30, 30) | |
| self.y += 30 + GAP | |
| return cid | |
| def act(self, cid, label, max_w=280): | |
| w, h = fit_act(label, max_w) | |
| self._add(cid, ACT, label, CX - w // 2, self.y, w, h) | |
| self.y += h + GAP | |
| return cid | |
| def dec(self, cid, label): | |
| w, h = fit_dec(label) | |
| self._add(cid, DEC, label, CX - w // 2, self.y, w, h) | |
| self.y += h + GAP | |
| return cid | |
| def end(self, cid="e"): | |
| self._add(cid, END, "", CX - 15, self.y, 30, 30) | |
| self.y += 30 + GAP | |
| return cid | |
| # cabang | |
| def right(self, cid, label, ref, max_w=250): | |
| w, h = fit_act(label, max_w) | |
| return self._add(cid, ACT, label, RX, self.cy(ref) - h // 2, w, h) | |
| def left(self, cid, label, ref, max_w=210): | |
| w, h = fit_act(label, max_w) | |
| return self._add(cid, ACT, label, LXR - w, self.cy(ref) - h // 2, w, h) | |
| def side_end(self, cid, ref): | |
| return self._add(cid, END, "", RX + 60, self.cy(ref) - 15, 30, 30) | |
| def note(self, cid, label): | |
| w, h = fit_note(label) | |
| self._add(cid, NOTE, label, 30, self.y - GAP + 4, w, h) | |
| def raw(self, cid, style, label, x, y, w, h): | |
| return self._add(cid, style, label, x, y, w, h) | |
| # edges | |
| def e(self, src, tgt, label="", **kw): | |
| style = EDGE + "".join(f"{k}={v};" for k, v in kw.items()) | |
| val = f' value="{esc(label)}"' if label else "" | |
| self.cells.append( | |
| f'<mxCell id="e_{src}_{tgt}"{val} style="{style}" edge="1" parent="1" ' | |
| f'source="{src}" target="{tgt}"><mxGeometry relative="1" as="geometry"/></mxCell>') | |
| def e_pts(self, src, tgt, pts, label="", **kw): | |
| style = EDGE + "".join(f"{k}={v};" for k, v in kw.items()) | |
| val = f' value="{esc(label)}"' if label else "" | |
| ptxml = "".join(f'<mxPoint x="{x}" y="{y}"/>' for x, y in pts) | |
| self.cells.append( | |
| f'<mxCell id="e_{src}_{tgt}"{val} style="{style}" edge="1" parent="1" ' | |
| f'source="{src}" target="{tgt}"><mxGeometry relative="1" as="geometry">' | |
| f'<Array as="points">{ptxml}</Array></mxGeometry></mxCell>') | |
| def loop_left(self, src, tgt, label="ya", lane=200): | |
| self.e_pts(src, tgt, [(lane, self.cy(src)), (lane, self.cy(tgt))], label, | |
| exitX=0, exitY=0.5, entryX=0, entryY=0.5) | |
| def loop_right(self, src, tgt, label="ya", lane=960): | |
| self.e_pts(src, tgt, [(lane, self.cy(src)), (lane, self.cy(tgt))], label, | |
| exitX=1, exitY=0.5, entryX=1, entryY=0.5) | |
| def xml(self) -> str: | |
| ph = self.y + 40 | |
| body = "".join(self.cells) | |
| return ( | |
| '<mxfile host="app.diagrams.net" type="device">' | |
| f'<diagram name="{esc(self.title)}" id="d">' | |
| f'<mxGraphModel dx="900" dy="700" grid="1" gridSize="10" guides="1" ' | |
| f'tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" ' | |
| f'pageWidth="{self.page_w}" pageHeight="{ph}" background="#ffffff" ' | |
| f'math="0" shadow="0">' | |
| f'<root><mxCell id="0"/><mxCell id="1" parent="0"/>{body}</root>' | |
| '</mxGraphModel></diagram></mxfile>') | |
| def empty_branch(d: Dia, dref): | |
| ae = d.right("aEmpty", "Kembalikan daftar kosong", dref) | |
| ee = d.side_end("eEmpty", dref) | |
| d.e(dref, ae, "ya") | |
| d.e(ae, ee) | |
| # ============================================================ PROSES EVALUASI | |
| def proses_evaluasi(): | |
| d = Dia("Proses Evaluasi Prompt", page_w=1120) | |
| s = d.start() | |
| a1 = d.act("a1", "[Antarmuka] Pengguna mengisi field prompt") | |
| a2 = d.act("a2", "[Antarmuka] Hitung badge field wajib kosong secara client-side") | |
| d1 = d.dec("d1", "Ada field yang berubah?") | |
| a3 = d.act("a3", "[Antarmuka] Kirim POST /api/evaluate untuk field berubah\n" | |
| "(satu field per request)") | |
| a4 = d.act("a4", "[Orkestrator] Validasi dan parse JSON {fields}") | |
| a5 = d.act("a5", "[Orkestrator] Jalankan deteksi bahasa + 9 detektor\n" | |
| "pada field terisi") | |
| a6 = d.act("a6", "[Orkestrator] Gabungkan temuan dan dedup level kata") | |
| a7 = d.act("a7", "[Antarmuka] Simpan temuan server ke cache field") | |
| a8 = d.act("a8", "[Antarmuka] Gabungkan cache issue + badge missing;\n" | |
| "susun prompt akhir dari field lokal") | |
| a9 = d.act("a9", "[Antarmuka] Tampilkan sorotan, badge, banner catatan,\n" | |
| "dan prompt akhir") | |
| d2 = d.dec("d2", "Saran perbaikan diterapkan?") | |
| a10 = d.act("a10", "[Antarmuka] Ubah isi field, kosongkan cache field,\n" | |
| "jadwalkan evaluasi ulang") | |
| d3 = d.dec("d3", "Prompt akhir disalin?") | |
| e = d.end() | |
| d.e(s, a1) | |
| d.e(a1, a2) | |
| d.e(a2, d1) | |
| bNoChange = d.right("bNoChange", "Render ulang dari cache yang masih valid", d1) | |
| d.e(d1, bNoChange, "tidak") | |
| d.e(bNoChange, a8) | |
| d.e(d1, a3, "ya") | |
| d.e(a3, a4) | |
| d.e(a4, a5) | |
| d.e(a5, a6) | |
| d.e(a6, a7) | |
| d.e(a7, a8) | |
| d.e(a8, a9) | |
| d.e(a9, d2) | |
| d.e(d2, a10, "ya") | |
| d.loop_left(a10, d1, "", lane=150) | |
| d.e(d2, d3, "tidak") | |
| d.loop_right(d3, a9, "tidak", lane=1030) | |
| d.e(d3, e, "ya") | |
| return d | |
| # ============================================================ USE CASE | |
| def use_case(): | |
| d = Dia("Use Case Sistem Prompt Builder", page_w=1220) | |
| boundary = ("rounded=0;whiteSpace=wrap;html=1;fillColor=none;" | |
| "strokeColor=#000000;fontStyle=1;verticalAlign=top;spacingTop=10;") | |
| actor = ("shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;" | |
| "html=1;outlineConnect=0;fillColor=#ffffff;strokeColor=#000000;") | |
| usecase = "ellipse;whiteSpace=wrap;html=1;fillColor=#ffffff;strokeColor=#000000;" | |
| d.raw("sys", boundary, "Aplikasi Prompt Builder", 250, 95, 900, 820) | |
| d.raw("actor", actor, "Mahasiswa", 70, 390, 90, 150) | |
| d.raw("uc1", usecase, "Mengisi field\nPrompt Builder", 310, 170, 230, 85) | |
| d.raw("uc2", usecase, "Mengevaluasi\nprompt", 310, 300, 230, 85) | |
| d.raw("uc3", usecase, "Meninjau hasil\nevaluasi", 310, 430, 230, 85) | |
| d.raw("uc4", usecase, "Menerapkan saran\nperbaikan", 310, 560, 230, 85) | |
| d.raw("uc5", usecase, "Menyalin\nprompt akhir", 310, 690, 230, 85) | |
| d.raw("uc6", usecase, "Menjalankan\nsembilan detektor", 620, 300, 250, 85) | |
| detector_names = [ | |
| ("d1", "Deteksi PII"), | |
| ("d2", "Deteksi Word Quality"), | |
| ("d3", "Deteksi Konten Berisiko"), | |
| ("d4", "Deteksi NER"), | |
| ("d5", "Deteksi Profanity"), | |
| ("d6", "Deteksi Filler"), | |
| ("d7", "Deteksi Special Char"), | |
| ("d8", "Deteksi Syntax"), | |
| ("d9", "Deteksi Field-Fit"), | |
| ] | |
| y = 125 | |
| for cid, label in detector_names: | |
| d.raw(cid, usecase, label, 910, y, 190, 58) | |
| d.e("uc6", cid, "<<include>>", dashed=1) | |
| y += 82 | |
| for cid in ("uc1", "uc2", "uc3", "uc4", "uc5"): | |
| d.e("actor", cid) | |
| d.e("uc2", "uc6", "<<include>>", dashed=1) | |
| d.e("uc3", "uc2", "<<extend>>", dashed=1) | |
| d.e("uc4", "uc2", "<<extend>>", dashed=1) | |
| d.y = 940 | |
| return d | |
| # ============================================================ PII | |
| def pii(): | |
| d = Dia("Detektor <i>PII</i> (Data Pribadi)") | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| a1 = d.act("a1", "Cari kandidat lewat pola <i>regex</i> PII Indonesia\n" | |
| "(NIK, NPWP, BPJS, SIM, plat nomor, email, HP,\n" | |
| "rekening, kartu kredit, IP, alamat)") | |
| a2 = d.act("a2", "Ekstrak nilai dari <i>capturing group</i>\n" | |
| "dan pangkas spasi/tanda baca tepi") | |
| d2 = d.dec("d2", "Punya validator struktural?") | |
| d3 = d.dec("d3", "Validasi lolos?") | |
| a3 = d.act("a3", "Naikkan <i>confidence score</i> (+0,15)") | |
| aDrop = d.right("aDrop", "Buang kandidat", d3) | |
| d4 = d.dec("d4", "<i>confidence</i> >= ambang minimum?") | |
| a4 = d.act("a4", "Catat <i>PIIEntity</i> (label, span, konteks)") | |
| aLow = d.right("aLow", "Abaikan kandidat", d4) | |
| d5 = d.dec("d5", "Masih ada kandidat?") | |
| a5 = d.act("a5", "Selesaikan span tumpang-tindih\n" | |
| "(confidence tertinggi menang)") | |
| a6 = d.act("a6", "Urutkan menurut posisi") | |
| a7 = d.act("a7", "Kembalikan daftar <i>PIIEntity</i>") | |
| e = d.end() | |
| d.e(d1, a1, "tidak") | |
| d.e(a1, a2) | |
| d.e(a2, d2) | |
| d.e(d2, d3, "ya") | |
| d.e(d3, a3, "ya") | |
| d.e(d3, aDrop, "tidak") | |
| d.e(aDrop, d5) | |
| d.e(d2, d4, "tidak") | |
| d.e(a3, d4) | |
| d.e(d4, a4, "ya") | |
| d.e(a4, d5) | |
| d.e(d4, aLow, "tidak") | |
| d.e(aLow, d5) | |
| d.loop_left(d5, a1, "ya") | |
| d.e(d5, a5, "tidak") | |
| d.e(a5, a6) | |
| d.e(a6, a7) | |
| d.e(a7, e) | |
| return d | |
| # ============================================================ WORD QUALITY | |
| def word_quality(): | |
| d = Dia("Detektor <i>Word Quality</i>") | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| a1 = d.act("a1", "Tokenisasi teks") | |
| a2 = d.act("a2", "Ambil <i>token</i> berikutnya") | |
| d2 = d.dec("d2", "<i>Token</i> pendek / non-<i>ASCII</i> / pola dilewati?") | |
| a3 = d.act("a3", "Kumpulkan konteks kata sebelum & sesudah") | |
| d3 = d.dec("d3", "<i>SLANG</i>? (kamus + pola <i>regex</i>)") | |
| d4 = d.dec("d4", "<i>ALAY</i>? (<i>l33tspeak</i>)") | |
| d5 = d.dec("d5", "<i>TYPO</i>? (<i>SymSpell</i> + <i>skeleton</i>)") | |
| d6 = d.dec("d6", "Masih ada <i>token</i>?") | |
| d7 = d.dec("d7", "Jumlah <i>SLANG</i> ≥ 2 pada kalimat?") | |
| d8 = d.dec("d8", "<i>Layer-2 ML</i> aktif & ada <i>TYPO</i>?") | |
| a8 = d.act("a8", "Urutkan menurut posisi & kembalikan <i>WordIssue</i>") | |
| e = d.end() | |
| d.e(d1, a1, "tidak") | |
| d.e(a1, a2) | |
| d.e(a2, d2) | |
| bSkip = d.right("bSkip", "Lewati <i>token</i>", d2) | |
| d.e(d2, bSkip, "ya") | |
| d.e(bSkip, d6) | |
| d.e(d2, a3, "tidak") | |
| d.e(a3, d3) | |
| bSlang = d.right("bSlang", "Tandai <i>SLANG</i>", d3) | |
| d.e(d3, bSlang, "ya") | |
| d.e(bSlang, d6) | |
| d.e(d3, d4, "tidak") | |
| bAlay = d.right("bAlay", "Tandai <i>ALAY</i>", d4) | |
| d.e(d4, bAlay, "ya") | |
| d.e(bAlay, d6) | |
| d.e(d4, d5, "tidak") | |
| bTypo = d.right("bTypo", "Tandai <i>TYPO</i>", d5) | |
| d.e(d5, bTypo, "ya") | |
| d.e(bTypo, d6) | |
| bClean = d.left("bClean", "Tidak ada temuan", d5) | |
| d.e(d5, bClean, "bersih") | |
| d.e(bClean, d6) | |
| d.loop_left(d6, a2, "ya") | |
| d.e(d6, d7, "tidak") | |
| bRe = d.right("bRe", | |
| "Reklasifikasi <i>TYPO</i> <i>confidence</i>-rendah → <i>SLANG</i>", d7) | |
| d.e(d7, bRe, "ya") | |
| d.e(bRe, d8) | |
| d.e(d7, d8, "tidak") | |
| bML = d.right("bML", | |
| "<i>Fill-mask</i> <i>IndoBERT</i> kontekstual:\n" | |
| "(a) gugurkan <i>TYPO</i> bila kata asli masuk akal\n" | |
| "(b) <i>rerank</i> kandidat koreksi sesuai konteks", d8) | |
| d.e(d8, bML, "ya") | |
| d.e(bML, a8) | |
| d.e(d8, a8, "tidak") | |
| d.e(a8, e) | |
| return d | |
| # ============================================================ KONTEN BERISIKO | |
| def konten_berisiko(): | |
| d = Dia("Detektor Konten Berisiko") | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| a1 = d.act("a1", "Bentuk varian teks: asli, normalisasi <i>slang</i>, " | |
| "<i>de-obfuscation</i>, <i>de-obfuscation</i> + normalisasi") | |
| a2 = d.act("a2", "Ambil varian teks berikutnya") | |
| a3 = d.act("a3", "Cocokkan pola <i>regex</i> Indonesia (<i>INJECTION</i>, " | |
| "<i>self-harm</i>, konten eksplisit, diskriminasi, " | |
| "<i>ACADEMIC_DISHONESTY</i>, <i>HARMFUL</i>)") | |
| d2 = d.dec("d2", "Pola cocok?") | |
| a4 = d.act("a4", "Petakan <i>offset</i> kembali ke teks asli") | |
| d3 = d.dec("d3", "<i>Evidence</i> sudah dicatat?") | |
| a5 = d.act("a5", "Catat <i>RiskyContentFinding</i> (kode, <i>severity</i>, " | |
| "<i>evidence</i>, saran)") | |
| d4 = d.dec("d4", "Masih ada varian?") | |
| a6 = d.act("a6", "Urutkan <i>HIGH</i> → <i>MEDIUM</i> → <i>LOW</i>") | |
| e = d.end() | |
| d.e(d1, a1, "tidak") | |
| d.e(a1, a2) | |
| d.e(a2, a3) | |
| d.e(a3, d2) | |
| d.e(d2, a4, "ya") | |
| d.e(d2, d4, "tidak") | |
| d.e(a4, d3) | |
| bSeen = d.right("bSeen", "Lewati (duplikat)", d3) | |
| d.e(d3, bSeen, "ya") | |
| d.e(bSeen, d4) | |
| d.e(d3, a5, "tidak") | |
| d.e(a5, d4) | |
| d.loop_left(d4, a2, "ya") | |
| d.e(d4, a6, "tidak") | |
| d.e(a6, e) | |
| return d | |
| # ============================================================ NER (fork/join) | |
| def ner(): | |
| d = Dia("Detektor <i>NER</i>", page_w=1020) | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| fy = d.y | |
| fork = d.raw("fork", BAR, "", 200, fy, 620, 10) | |
| d.y += 10 + 48 | |
| by = d.y | |
| b1 = d.raw("b1", ACT, | |
| "Prediksi entitas via <i>transformer XLM-R</i> (bila model dimuat)", | |
| 150, by, 220, 70) | |
| b2 = d.raw("b2", ACT, | |
| "Deteksi <i>rule</i> <i>regex</i> Indonesia " | |
| "(PT/CV, kementerian, kota, Rp, tanggal, gelar+nama)", | |
| 400, by, 220, 80) | |
| b3 = d.raw("b3", ACT, "Cocokkan daftar nama orang", 650, by, 220, 70) | |
| d.y = by + 80 + 48 | |
| jy = d.y | |
| join = d.raw("join", BAR, "", 200, jy, 620, 10) | |
| d.y = jy + 10 + GAP | |
| d.e(d1, fork, "tidak") | |
| d.e(fork, b1, exitX=0.2, exitY=1) | |
| d.e(fork, b2, exitX=0.5, exitY=1) | |
| d.e(fork, b3, exitX=0.8, exitY=1) | |
| d.e(b1, join, entryX=0.2, entryY=0) | |
| d.e(b2, join, entryX=0.5, entryY=0) | |
| d.e(b3, join, entryX=0.8, entryY=0) | |
| a1 = d.act("a1", "Gabungkan entitas <i>ML</i> + <i>rule</i> + nama") | |
| a2 = d.act("a2", "Filter entitas tak masuk akal & konteks tidak valid") | |
| a3 = d.act("a3", "Filter kebutuhan <i>prompt</i> " | |
| "(buang <i>stopword</i> & <i>token</i> tunggal huruf kecil)") | |
| a4 = d.act("a4", "Urutkan menurut posisi & kembalikan <i>NEREntity</i>") | |
| e = d.end() | |
| d.e(join, a1) | |
| d.e(a1, a2) | |
| d.e(a2, a3) | |
| d.e(a3, a4) | |
| d.e(a4, e) | |
| return d | |
| # ============================================================ PROFANITY | |
| def profanity(): | |
| d = Dia("Detektor <i>Profanity</i>") | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| a1 = d.act("a1", "<i>Layer 1</i> — <i>pre-pass</i>: " | |
| "tangkap kata dieja per huruf (mis. 'a n j i n g')") | |
| a2 = d.act("a2", "Ambil <i>token</i> berikutnya") | |
| d2 = d.dec("d2", "<i>Span</i> sudah tertangkap?") | |
| a3 = d.act("a3", "Klasifikasi <i>token</i> " | |
| "(normalisasi <i>leet</i>/<i>strip</i>/<i>collapse</i> " | |
| "+ <i>skeleton</i> konsonan)") | |
| d3 = d.dec("d3", "Tergolong kasar?") | |
| d4 = d.dec("d4", "Masih ada <i>token</i>?") | |
| d5 = d.dec("d5", "<i>Layer 1</i> kosong & <i>ML</i> aktif & ≥ 3 kata?") | |
| a4 = d.act("a4", "Urutkan menurut posisi & kembalikan temuan") | |
| e = d.end() | |
| d.e(d1, a1, "tidak") | |
| d.e(a1, a2) | |
| d.e(a2, d2) | |
| bSkip = d.right("bSkip", "Lewati <i>token</i>", d2) | |
| d.e(d2, bSkip, "ya") | |
| d.e(bSkip, d4) | |
| d.e(d2, a3, "tidak") | |
| d.e(a3, d3) | |
| bMark = d.right("bMark", | |
| "Catat <i>ProfanityFinding</i> (<i>HIGH</i>/<i>MEDIUM</i>)", d3) | |
| d.e(d3, bMark, "ya") | |
| d.e(bMark, d4) | |
| d.e(d3, d4, "tidak") | |
| d.loop_left(d4, a2, "ya") | |
| d.e(d4, d5, "tidak") | |
| bML = d.right("bML", | |
| "<i>Layer 2</i> — <i>classifier</i> toksisitas Indonesia " | |
| "(<i>advice-only</i>, sorot teks penuh)", d5) | |
| d.e(d5, bML, "ya") | |
| d.e(bML, a4) | |
| d.e(d5, a4, "tidak") | |
| d.e(a4, e) | |
| return d | |
| # ============================================================ FILLER | |
| def filler(): | |
| d = Dia("Detektor <i>Filler</i>") | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| a1 = d.act("a1", "Siapkan teks pindai: teks asli + <i>norm_text</i>\n" | |
| "Word Quality bila tersedia") | |
| a2 = d.act("a2", "Terjemahkan <i>l33t</i> -> huruf normal") | |
| a3 = d.act("a3", | |
| "Ambil pola <i>regex</i> berikutnya " | |
| "(<i>GREETING_AI, GREETING_ONLY, THANKS, APOLOGY, EMPTY_OPENER, " | |
| "EMOTIONAL_FILLER, VAGUE_REFERENCE, VAGUE_PARTICLE, " | |
| "UNNECESSARY_PREAMBLE, HESITATION</i>)") | |
| a4 = d.act("a4", "Cari kecocokan berikutnya;\n" | |
| "petakan offset ke teks asli bila dari <i>norm_text</i>") | |
| d2 = d.dec("d2", "<i>Span</i> tumpang-tindih dengan temuan lain?") | |
| d3 = d.dec("d3", "Masih ada kecocokan?") | |
| d4 = d.dec("d4", "Masih ada pola?") | |
| d5 = d.dec("d5", "Masih ada teks pindai?") | |
| a5 = d.act("a5", "Urutkan menurut posisi & kembalikan temuan") | |
| e = d.end() | |
| d.e(d1, a1, "tidak") | |
| d.e(a1, a2) | |
| d.e(a2, a3) | |
| d.e(a3, a4) | |
| d.e(a4, d2) | |
| bSkip = d.right("bSkip", "Lewati", d2) | |
| d.e(d2, bSkip, "ya") | |
| d.e(bSkip, d3) | |
| bMark = d.left("bMark", "Catat <i>FillerFinding</i>", d2) | |
| d.e(d2, bMark, "tidak") | |
| d.e(bMark, d3) | |
| d.loop_left(d3, a4, "ya") | |
| d.e(d3, d4, "tidak") | |
| d.loop_right(d4, a3, "ya") | |
| d.e(d4, d5, "tidak") | |
| d.loop_left(d5, a2, "ya", lane=120) | |
| d.e(d5, a5, "tidak") | |
| d.e(a5, e) | |
| return d | |
| # ============================================================ SPECIAL CHAR | |
| def special_char(): | |
| d = Dia("Detektor <i>Special Char</i>") | |
| s = d.start() | |
| a0 = d.act("a0", "Siapkan teks & himpunan <i>span</i> terpakai") | |
| steps = [ | |
| ("c1", "<i>Zero-width characters</i> → hapus"), | |
| ("c2", "Kontrol arah teks (<i>BiDi</i>) → hapus"), | |
| ("c3", "<i>Unicode Tag</i> (teks tersembunyi) → hapus"), | |
| ("c4", "Homoglif (huruf menyamar) → perbaiki ke <i>Latin</i>"), | |
| ("c5", "Spasi non-standar → ganti spasi biasa"), | |
| ("c6", "Karakter kontrol → hapus"), | |
| ("c7", "<i>Smart quote</i> -> ganti kutip ASCII"), | |
| ("c8", "Baris kosong berlebih -> jadikan satu baris kosong"), | |
| ("c9", "Spasi ganda & tanda baca berulang -> rapikan"), | |
| ] | |
| d.note("nScan", "Scan kategori berurutan (dedup <i>span</i>)") | |
| d.e(s, a0) | |
| prev = a0 | |
| for cid, label in steps: | |
| cur = d.act(cid, label) | |
| d.e(prev, cur) | |
| prev = cur | |
| a1 = d.act("a1", "Kembalikan daftar <i>SpecialCharFinding</i> " | |
| "beserta teks pengganti (<i>replacement</i>)") | |
| e = d.end() | |
| d.e(prev, a1) | |
| d.e(a1, e) | |
| return d | |
| # ============================================================ SYNTAX | |
| def syntax(): | |
| d = Dia("Detektor <i>Syntax</i>") | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong / model tak dimuat?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| a1 = d.act("a1", "Pisahkan teks menjadi kalimat (dibatasi jumlah)") | |
| a2 = d.act("a2", "Ambil kalimat berikutnya") | |
| d2 = d.dec("d2", "Jumlah kata ≥ minimum?") | |
| a3 = d.act("a3", "Bangun permutasi acak urutan kata") | |
| a4 = d.act("a4", "Hitung skor <i>PLL</i> kalimat asli + permutasi " | |
| "via <i>IndoBERT MLM</i> (satu <i>forward pass</i>)") | |
| a5 = d.act("a5", "Hitung rasio permutasi yang lebih wajar daripada urutan asli") | |
| d3 = d.dec("d3", "rasio ≥ ambang?") | |
| d4 = d.dec("d4", "Masih ada kalimat?") | |
| a6 = d.act("a6", "Kembalikan daftar <i>SyntaxFinding</i>") | |
| e = d.end() | |
| d.e(d1, a1, "tidak") | |
| d.e(a1, a2) | |
| d.e(a2, d2) | |
| bSkip = d.right("bSkip", "Lewati kalimat", d2) | |
| d.e(d2, bSkip, "tidak") | |
| d.e(bSkip, d4) | |
| d.e(d2, a3, "ya") | |
| d.e(a3, a4) | |
| d.e(a4, a5) | |
| d.e(a5, d3) | |
| bMark = d.right("bMark", | |
| "Catat <i>UNUSUAL_WORD_ORDER</i> (<i>severity LOW</i>)", d3) | |
| d.e(d3, bMark, "ya") | |
| d.e(bMark, d4) | |
| d.e(d3, d4, "tidak") | |
| d.loop_left(d4, a2, "ya") | |
| d.e(d4, a6, "tidak") | |
| d.e(a6, e) | |
| return d | |
| # ============================================================ FIELD-FIT | |
| def field_fit(): | |
| d = Dia("Detektor <i>Field-Fit</i>") | |
| s = d.start() | |
| d1 = d.dec("d1", "Teks kosong / bahasa bukan id?") | |
| d.e(s, d1) | |
| empty_branch(d, d1) | |
| d.note("n1", "<i>ML embedding</i> (semua field)") | |
| d3 = d.dec("d3", "Model <i>embedding</i> aktif?") | |
| a1 = d.act("a1", "<i>Embed</i> isi field (ternormalisasi)") | |
| a2 = d.act("a2", "Hitung <i>cosine</i> ke centroid prototipe 9 field") | |
| d4 = d.dec("d4", "Field lain unggul ≥ <i>margin</i> & <i>cos</i> ≥ ambang?") | |
| a3 = d.act("a3", "Saran pindah ke field termirip") | |
| a4 = d.act("a4", "Kembalikan daftar <i>FieldFitFinding</i>") | |
| e = d.end() | |
| d.e(d1, d3, "tidak") | |
| d.e(d3, a1, "ya") | |
| d.e(a1, a2) | |
| d.e(a2, d4) | |
| d.e(d4, a3, "ya") | |
| d.e(a3, a4) | |
| d.e(d4, a4, "tidak") | |
| d.loop_left(d3, a4, "tidak", lane=205) | |
| d.e(a4, e) | |
| return d | |
| def main(): | |
| diagrams = { | |
| "Proses Evaluasi": proses_evaluasi, | |
| "Use Case": use_case, | |
| "PII": pii, | |
| "Word Quality": word_quality, | |
| "Konten Berisiko": konten_berisiko, | |
| "NER": ner, | |
| "Profanity": profanity, | |
| "Filler": filler, | |
| "Special Char": special_char, | |
| "Syntax": syntax, | |
| "Field-Fit": field_fit, | |
| } | |
| for name, fn in diagrams.items(): | |
| xml = fn().xml() | |
| ET.fromstring(xml) | |
| (OUT / f"{name}.drawio").write_text(xml, encoding="utf-8") | |
| print(f"OK {name}.drawio ({len(xml)} bytes)") | |
| if __name__ == "__main__": | |
| main() | |