File size: 34,323 Bytes
e63555f 968d291 5463bd9 e63555f cbf7209 1c38b9f cbf7209 e63555f 169b041 e63555f 5463bd9 e63555f 968d291 d914200 e63555f 5463bd9 e63555f 1c38b9f e63555f 5463bd9 e63555f 5463bd9 e63555f 5463bd9 e63555f 5463bd9 e63555f 5463bd9 e63555f 5463bd9 e63555f 5463bd9 e63555f 5463bd9 e63555f 5463bd9 968d291 5463bd9 968d291 31796e6 968d291 5463bd9 968d291 d914200 e63555f cbf7209 e63555f 5463bd9 d914200 e63555f 968d291 1c38b9f e63555f 31796e6 e63555f 31796e6 e63555f 31796e6 e63555f 31796e6 e63555f 968d291 e63555f 169b041 e63555f 31796e6 169b041 31796e6 169b041 31796e6 169b041 e63555f 169b041 e63555f 5463bd9 31796e6 e63555f 5463bd9 31796e6 169b041 31796e6 5463bd9 e63555f 169b041 e63555f 169b041 e63555f 5463bd9 e63555f 169b041 5463bd9 e63555f 169b041 5463bd9 e63555f 169b041 5463bd9 e63555f 169b041 31796e6 e63555f 1c38b9f e63555f 968d291 e63555f 968d291 e63555f 968d291 e63555f 968d291 e63555f 968d291 e63555f 5463bd9 e63555f 968d291 e63555f 968d291 e63555f 968d291 e63555f 968d291 e63555f 169b041 e63555f 169b041 e63555f 169b041 e63555f 5463bd9 e63555f 169b041 e63555f 5463bd9 31796e6 5463bd9 169b041 5463bd9 e63555f 5463bd9 e63555f 5463bd9 e63555f 169b041 5463bd9 169b041 5463bd9 169b041 5463bd9 169b041 5463bd9 e63555f 5463bd9 e63555f 968d291 e63555f 169b041 e63555f 968d291 e63555f 968d291 169b041 e63555f 5463bd9 169b041 5463bd9 169b041 5463bd9 e63555f d914200 5463bd9 169b041 5463bd9 e63555f 5463bd9 e63555f 5463bd9 cbf7209 1c38b9f cbf7209 5463bd9 cbf7209 1c38b9f cbf7209 1c38b9f 169b041 1c38b9f cbf7209 1c38b9f 5463bd9 cbf7209 169b041 cbf7209 169b041 5463bd9 1c38b9f cbf7209 1c38b9f 169b041 1c38b9f 5463bd9 cbf7209 169b041 cbf7209 169b041 5463bd9 1c38b9f cbf7209 5463bd9 cbf7209 5463bd9 e63555f 5463bd9 1c38b9f e63555f 169b041 e63555f 5463bd9 169b041 d914200 169b041 d914200 169b041 e63555f d914200 e63555f 5463bd9 e63555f 169b041 5463bd9 169b041 e63555f 5463bd9 e63555f 968d291 e63555f 968d291 e63555f 968d291 5463bd9 e63555f 5463bd9 e63555f cbf7209 e63555f d914200 e63555f 5463bd9 e63555f 968d291 e63555f 5463bd9 e63555f 169b041 e63555f cbf7209 e63555f 5463bd9 e63555f 5463bd9 e63555f 968d291 e63555f 968d291 e63555f 5463bd9 e63555f 968d291 e63555f 1c38b9f cbf7209 e63555f 169b041 d914200 e63555f d914200 169b041 e63555f d914200 e63555f d914200 e63555f 5463bd9 cbf7209 5463bd9 e63555f 5463bd9 e63555f 968d291 e63555f 5463bd9 e63555f 169b041 e63555f 5463bd9 1c38b9f cbf7209 e63555f d914200 e63555f cbf7209 1c38b9f cbf7209 e63555f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 |
# -*- coding: utf-8 -*-
"""
app.py β Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS)
FIX FULL:
- Target 68% diambil dari META:
* Kab/Kota: kolom sampel_total
* Provinsi: kolom total _sampel (atau variasinya)
- Normalisasi label diperkuat:
* kab/kota: hapus kata "DAN", seragamkan KAB/KOTA, buang simbol
* provinsi: buang prefix "PROVINSI/PROPINSI", buang simbol
- Jika META tidak match:
* ditandai META_MATCH="TIDAK" + Target NaN (bukan 0), supaya tidak menyesatkan
"""
import os
import re
import tempfile
from pathlib import Path
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from huggingface_hub import InferenceClient
from docx import Document
import plotly.express as px
try:
import kaleido # noqa: F401
HAS_KALEIDO = True
except Exception:
HAS_KALEIDO = False
# ============================================================
# 1) KONFIGURASI FILE
# ============================================================
DATA_FILE = "IPLM_clean_manual_131225.xlsx"
META_KAB_FILE = "Data_populasi_Kab_kota.xlsx"
META_PROV_FILE = "Data_populasi_propinsi.xlsx"
TARGET_COVERAGE = 0.68
# ============================================================
# 1b) LLM
# ============================================================
USE_LLM = True
LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
HF_TOKEN = (
os.getenv("HF_SECRET")
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
or os.getenv("HF_API_TOKEN")
)
_HF_CLIENT = None
def get_llm_client():
global _HF_CLIENT
if _HF_CLIENT is not None:
return _HF_CLIENT
try:
if HF_TOKEN:
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN)
else:
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME)
return _HF_CLIENT
except Exception:
_HF_CLIENT = None
return None
# ============================================================
# 2) UTIL
# ============================================================
def _canon(s: str) -> str:
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
def pick_col(df, candidates):
for c in candidates:
if c in df.columns:
return c
can_map = {_canon(c): c for c in df.columns}
for c in candidates:
k = _canon(c)
if k in can_map:
return can_map[k]
return None
def coerce_num(val):
if pd.isna(val):
return np.nan
t = str(val).strip()
if t == "" or t in {"-", "β", "β"}:
return np.nan
t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
t = re.sub(r"[^0-9,.\-]", "", t)
if t.count(".") > 1 and t.count(",") == 1:
t = t.replace(".", "").replace(",", ".")
elif t.count(",") > 1 and t.count(".") == 1:
t = t.replace(",", "")
elif t.count(",") == 1 and t.count(".") == 0:
t = t.replace(",", ".")
else:
t = t.replace(",", "")
try:
return float(t)
except Exception:
return np.nan
def norm_kew(v):
if pd.isna(v):
return None
t = str(v).strip().upper()
if "KAB" in t or "KOTA" in t:
return "KAB/KOTA"
if "PROV" in t:
return "PROVINSI"
if "PUSAT" in t or "NASIONAL" in t:
return "PUSAT"
return t
def _norm_text(x):
if pd.isna(x):
return None
t = str(x).strip().upper()
return " ".join(t.split())
# ---- Normalisasi PROV (untuk join) ----
def norm_prov_label(s):
if pd.isna(s):
return None
t = str(s).upper().strip()
t = " ".join(t.split())
# buang prefix
t = re.sub(r"^\s*(PROVINSI|PROPINSI)\s+", "", t)
# buang tanda baca
t = re.sub(r"[^A-Z0-9 ]+", " ", t)
t = " ".join(t.split())
# key
return re.sub(r"[^A-Z0-9]+", "", t)
# ---- Normalisasi KAB/KOTA (untuk join) ----
def norm_kab_label(s):
"""
FIX UTAMA:
- Samakan variasi "KABUPATEN/KAB./KAB" dan "KOTA ADM./KOTA ADMINISTRASI"
- Hapus kata 'DAN' agar match kasus: "PANGKAJENE DAN KEPULAUAN" vs "PANGKAJENE KEPULAUAN"
- Buang simbol, spasi ganda
"""
if pd.isna(s):
return None
t = str(s).upper().strip()
t = " ".join(t.split())
# seragamkan kab/kota
t = t.replace("KABUPATEN", "KAB")
t = t.replace("KAB.", "KAB")
t = t.replace("KOTA ADMINISTRASI", "KOTA")
t = t.replace("KOTA ADM.", "KOTA")
t = t.replace("KOTA.", "KOTA")
# FIX: buang "DAN" sebagai stopword join
t = re.sub(r"\bDAN\b", " ", t)
# bersihin simbol
t = re.sub(r"[^A-Z0-9 ]+", " ", t)
t = " ".join(t.split())
return re.sub(r"[^A-Z0-9]+", "", t)
# ---- Display bersih (untuk dropdown/UI) ----
def clean_prov_display(s):
if pd.isna(s):
return None
t = str(s).upper().strip()
t = " ".join(t.split())
t = t.replace("PROPINSI", "PROVINSI")
while t.startswith("PROVINSI PROVINSI "):
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ")
if not t.startswith("PROVINSI "):
t = "PROVINSI " + t
return t
def clean_kab_display(s):
if pd.isna(s):
return None
t = str(s).upper().strip()
t = " ".join(t.split())
t = t.replace("KABUPATEN", "KAB.")
t = t.replace("KAB ", "KAB. ")
t = t.replace("KOTA ADMINISTRASI", "KOTA")
# rapikan variasi "DAN" supaya konsisten tampilan juga
t = re.sub(r"\bDAN\b", " ", t)
t = " ".join(t.split())
return t
def make_pie_plotly(num, den, title):
if not HAS_KALEIDO:
return None
if den is None or pd.isna(den) or den <= 0:
values = [0, 1]
labels = ["Terjangkau", "Belum Terjangkau"]
else:
num = 0 if pd.isna(num) else float(num)
den = float(den)
values = [max(num, 0), max(den - num, 0)]
labels = ["Terjangkau", "Belum Terjangkau"]
fig = px.pie(values=values, names=labels, title=title, hole=0.35)
tmp = tempfile.mktemp(suffix=".png")
try:
fig.write_image(tmp, scale=2)
return tmp
except Exception:
return None
# ============================================================
# 3) LOAD DATA (DM + META)
# ============================================================
DATA_INFO = ""
df_all_raw = None
meta_kab_df = None # kab_key -> target total + opsional sekolah/umum
meta_prov_df = None # prov_key -> target total
prov_col_glob = None
kab_col_glob = None
kew_col_glob = None
jenis_col_glob = None
subjenis_col_glob = None
nama_col_glob = None
extra_info = []
# ---- Load DM ----
try:
fp = Path(DATA_FILE)
if not fp.exists():
raise FileNotFoundError(f"File tidak ditemukan: {DATA_FILE}")
xls = pd.ExcelFile(fp)
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
df_all_raw = pd.concat(frames, ignore_index=True, sort=False)
prov_col_glob = pick_col(df_all_raw, ["provinsi", "Provinsi", "PROVINSI"])
kab_col_glob = pick_col(df_all_raw, ["kab_kota", "kab/kota", "Kab/Kota", "KAB/KOTA", "kabupaten_kota", "kota"])
kew_col_glob = pick_col(df_all_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
jenis_col_glob = pick_col(df_all_raw, ["jenis_perpustakaan", "JENIS_PERPUSTAKAAN", "Jenis Perpustakaan"])
subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"])
nama_col_glob = pick_col(df_all_raw, ["nm_perpustakaan", "nama_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
if kew_col_glob:
df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew)
else:
df_all_raw["KEW_NORM"] = None
val_map_jenis = {
"PERPUSTAKAAN SEKOLAH": "sekolah",
"SEKOLAH": "sekolah",
"PERPUSTAKAAN UMUM": "umum",
"UMUM": "umum",
"PERPUSTAKAAN DAERAH": "umum",
"PERPUSTAKAAN KHUSUS": "khusus",
"KHUSUS": "khusus",
"PERPUSTAKAAN PERGURUAN TINGGI": "khusus",
"PERGURUAN TINGGI": "khusus",
}
if jenis_col_glob:
df_all_raw["_dataset"] = df_all_raw[jenis_col_glob].apply(_norm_text).map(val_map_jenis)
else:
df_all_raw["_dataset"] = None
if prov_col_glob and prov_col_glob in df_all_raw.columns:
df_all_raw["prov_clean"] = df_all_raw[prov_col_glob].apply(clean_prov_display)
else:
df_all_raw["prov_clean"] = None
if kab_col_glob and kab_col_glob in df_all_raw.columns:
df_all_raw["kab_clean"] = df_all_raw[kab_col_glob].apply(clean_kab_display)
else:
df_all_raw["kab_clean"] = None
DATA_INFO = f"Data terbaca dari: **{DATA_FILE}** | Jumlah baris: **{len(df_all_raw)}**"
except Exception as e:
df_all_raw = None
DATA_INFO = f"β οΈ Gagal memuat `{DATA_FILE}` | Error: `{e}`"
# ---- Meta Kab/Kota ----
try:
meta_kab_raw = pd.read_excel(META_KAB_FILE)
col_kab = pick_col(meta_kab_raw, ["KABUPATEN_KOTA", "KAB/KOTA", "Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"])
col_target_total = pick_col(meta_kab_raw, ["sampel_total", "Sampel_total", "SAMPEL_TOTAL"])
col_target_umum = pick_col(meta_kab_raw, ["Sampel_umum_68%", "sampel_umum_68%", "SAMPEL_UMUM_68%"])
col_target_sek = pick_col(meta_kab_raw, ["Sampel_sekolah_68%", "sampel_sekolah_68%", "SAMPEL_SEKOLAH_68%"])
if col_kab and col_target_total:
meta_kab_df = pd.DataFrame({
"Kab_Kota_Label": meta_kab_raw[col_kab].astype(str).str.strip(),
"Target_Total_68": meta_kab_raw[col_target_total].apply(coerce_num),
})
meta_kab_df["Target_Umum_68"] = meta_kab_raw[col_target_umum].apply(coerce_num) if col_target_umum else np.nan
meta_kab_df["Target_Sekolah_68"] = meta_kab_raw[col_target_sek].apply(coerce_num) if col_target_sek else np.nan
meta_kab_df["kab_key"] = meta_kab_df["Kab_Kota_Label"].apply(norm_kab_label)
meta_kab_df = meta_kab_df.groupby("kab_key", as_index=False).agg({
"Kab_Kota_Label": "first",
"Target_Total_68": "first",
"Target_Umum_68": "first",
"Target_Sekolah_68": "first",
})
extra_info.append(f"Meta Kab/Kota terbaca: **{META_KAB_FILE}** (n={len(meta_kab_df)}) | Target=`sampel_total`")
else:
meta_kab_df = None
extra_info.append(f"β οΈ Kolom `KABUPATEN_KOTA` atau `sampel_total` tidak ditemukan di `{META_KAB_FILE}`")
except Exception as e:
meta_kab_df = None
extra_info.append(f"β οΈ Gagal memuat `{META_KAB_FILE}` ({e})")
# ---- Meta Provinsi ----
try:
meta_prov_raw = pd.read_excel(META_PROV_FILE)
col_prov = pick_col(meta_prov_raw, ["Provinsi", "provinsi", "PROVINSI", "NAMA_PROVINSI", "Nama Provinsi", "nm_prov", "nm_provinsi", "prov"])
# banyak variasi spasi/underscore
col_target_total = pick_col(meta_prov_raw, ["total _sampel", "total_sampel", "TOTAL _SAMPEL", "TOTAL_SAMPEL", "total sampel", "TOTAL SAMPEL"])
if col_prov and col_target_total:
meta_prov_df = pd.DataFrame({
"Provinsi_Label": meta_prov_raw[col_prov].astype(str).str.strip(),
"Target_Total_68": meta_prov_raw[col_target_total].apply(coerce_num),
})
meta_prov_df["prov_key"] = meta_prov_df["Provinsi_Label"].apply(norm_prov_label)
meta_prov_df = meta_prov_df.groupby("prov_key", as_index=False).agg({
"Provinsi_Label": "first",
"Target_Total_68": "first",
})
extra_info.append(f"Meta Provinsi terbaca: **{META_PROV_FILE}** ({len(meta_prov_df)} provinsi) | Target=`{col_target_total}`")
else:
meta_prov_df = None
extra_info.append(f"β οΈ Kolom `Provinsi` atau `total _sampel` tidak ditemukan di `{META_PROV_FILE}`")
except Exception as e:
meta_prov_df = None
extra_info.append(f"β οΈ Gagal memuat file populasi provinsi: {e}")
if extra_info:
DATA_INFO = DATA_INFO + "<br>" + "<br>".join(extra_info)
# ============================================================
# 4) DROPDOWN
# ============================================================
def all_prov_choices():
if df_all_raw is None or "prov_clean" not in df_all_raw.columns:
return ["(Semua)"]
s = df_all_raw["prov_clean"].dropna().astype(str).str.strip()
vals = sorted([o for o in s.unique() if o and o != ""])
return ["(Semua)"] + vals
def get_kab_choices_for_prov(prov_value):
if df_all_raw is None or "kab_clean" not in df_all_raw.columns:
return ["(Semua)"]
if prov_value is None or prov_value == "(Semua)":
s = df_all_raw["kab_clean"].dropna().astype(str).str.strip()
else:
m = df_all_raw["prov_clean"].astype(str).str.strip() == str(prov_value).strip()
s = df_all_raw.loc[m, "kab_clean"].dropna().astype(str).str.strip()
vals = sorted([x for x in s.unique() if x and x != ""])
return ["(Semua)"] + vals
def all_kew_choices():
if df_all_raw is None:
return ["(Semua)"]
s = df_all_raw.get("KEW_NORM", pd.Series(dtype=object)).dropna().astype(str).str.strip()
vals = sorted([o for o in s.unique() if o != ""])
return ["(Semua)"] + vals if vals else ["(Semua)"]
prov_choices = all_prov_choices()
kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)")
kew_choices = all_kew_choices()
default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if kew_choices else "(Semua)")
# ============================================================
# 5) VERIFIKASI GAP β TARGET DARI META (bukan hitung ulang)
# ============================================================
def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
if df_filtered is None or len(df_filtered) == 0:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
# =================== KAB/KOTA ===================
if ("KAB" in kew_norm or "KOTA" in kew_norm):
if "kab_clean" not in df_filtered.columns or meta_kab_df is None:
return pd.DataFrame({"Info": ["Kolom kab_clean atau meta kab tidak tersedia."]})
tmp = df_filtered.copy()
tmp = tmp[pd.notna(tmp["kab_clean"])]
if tmp.empty:
return pd.DataFrame()
tmp["kab_key"] = tmp["kab_clean"].apply(norm_kab_label)
g_total = tmp.groupby("kab_key").size().rename("Sampel Total (DM)").reset_index()
tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel Sekolah (DM)").reset_index()
tmp_umum = tmp[tmp["_dataset"] == "umum"].copy() if "_dataset" in tmp.columns else tmp.copy()
g_umum = tmp_umum.groupby("kab_key").size().rename("Sampel Umum (DM)").reset_index()
merged = (
g_total
.merge(g_sek_total, on="kab_key", how="left")
.merge(g_umum, on="kab_key", how="left")
.merge(
meta_kab_df[["kab_key", "Kab_Kota_Label", "Target_Total_68", "Target_Umum_68", "Target_Sekolah_68"]],
on="kab_key", how="left"
)
)
for c in ["Sampel Total (DM)", "Sampel Sekolah (DM)", "Sampel Umum (DM)"]:
merged[c] = merged[c].fillna(0).astype(int)
# marker match meta
merged["META_MATCH"] = np.where(pd.notna(merged["Target_Total_68"]), "YA", "TIDAK")
# target dari meta (ceil biar integer ke atas)
merged["Target Total (68%)"] = np.ceil(pd.to_numeric(merged["Target_Total_68"], errors="coerce"))
merged["Target Sekolah (68%)"] = np.ceil(pd.to_numeric(merged["Target_Sekolah_68"], errors="coerce"))
merged["Target Umum (68%)"] = np.ceil(pd.to_numeric(merged["Target_Umum_68"], errors="coerce"))
# kekurangan: kalau target NaN -> NaN (bukan 0)
def _gap(target_series, sampel_series):
t = pd.to_numeric(target_series, errors="coerce")
s = pd.to_numeric(sampel_series, errors="coerce").fillna(0)
out = t - s
out = out.where(t.notna(), np.nan)
return out.clip(lower=0)
merged["Kekurangan Sampel Total"] = _gap(merged["Target Total (68%)"], merged["Sampel Total (DM)"])
merged["Kekurangan Sampel Sekolah"] = _gap(merged["Target Sekolah (68%)"], merged["Sampel Sekolah (DM)"])
merged["Kekurangan Sampel Umum"] = _gap(merged["Target Umum (68%)"], merged["Sampel Umum (DM)"])
out = pd.DataFrame({
"Kab/Kota": merged["Kab_Kota_Label"].fillna(merged["kab_key"]),
"META_MATCH": merged["META_MATCH"],
"Sampel Total (DM)": merged["Sampel Total (DM)"],
"Target Total (68%) [META:sampel_total]": merged["Target Total (68%)"],
"Kekurangan Sampel Total": merged["Kekurangan Sampel Total"],
"Sampel Sekolah (DM)": merged["Sampel Sekolah (DM)"],
"Target Sekolah (68%) [META]": merged["Target Sekolah (68%)"],
"Kekurangan Sampel Sekolah": merged["Kekurangan Sampel Sekolah"],
"Sampel Umum (DM)": merged["Sampel Umum (DM)"],
"Target Umum (68%) [META]": merged["Target Umum (68%)"],
"Kekurangan Sampel Umum": merged["Kekurangan Sampel Umum"],
})
# cast tampilan angka: biarkan NaN tetap NaN supaya ketahuan mismatch meta
num_cols = [c for c in out.columns if c not in {"Kab/Kota", "META_MATCH"}]
for c in num_cols:
out[c] = pd.to_numeric(out[c], errors="coerce")
return out.sort_values(["META_MATCH", "Kab/Kota"], ascending=[True, True]).reset_index(drop=True)
# =================== PROVINSI ===================
if ("PROV" in kew_norm):
if meta_prov_df is None or "prov_clean" not in df_filtered.columns:
return pd.DataFrame({"Info": ["Meta provinsi atau kolom prov_clean tidak tersedia."]})
tmp = df_filtered.copy()
tmp = tmp[pd.notna(tmp["prov_clean"])]
if tmp.empty:
return pd.DataFrame({"Info": ["Tidak ada data sampel kewenangan provinsi."]})
tmp["prov_key"] = tmp["prov_clean"].apply(norm_prov_label)
g_total = tmp.groupby("prov_key").size().rename("Sampel Total (DM)").reset_index()
merged = g_total.merge(meta_prov_df[["prov_key", "Provinsi_Label", "Target_Total_68"]], on="prov_key", how="left")
merged["Sampel Total (DM)"] = merged["Sampel Total (DM)"].fillna(0).astype(int)
merged["META_MATCH"] = np.where(pd.notna(merged["Target_Total_68"]), "YA", "TIDAK")
merged["Target Total (68%)"] = np.ceil(pd.to_numeric(merged["Target_Total_68"], errors="coerce"))
t = pd.to_numeric(merged["Target Total (68%)"], errors="coerce")
s = pd.to_numeric(merged["Sampel Total (DM)"], errors="coerce").fillna(0)
gap = (t - s).where(t.notna(), np.nan).clip(lower=0)
merged["Kekurangan Sampel Total"] = gap
out = pd.DataFrame({
"Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]),
"META_MATCH": merged["META_MATCH"],
"Sampel Total (DM)": merged["Sampel Total (DM)"],
"Target Total (68%) [META:total _sampel]": merged["Target Total (68%)"],
"Kekurangan Sampel Total": merged["Kekurangan Sampel Total"],
})
for c in ["Sampel Total (DM)", "Target Total (68%) [META:total _sampel]", "Kekurangan Sampel Total"]:
out[c] = pd.to_numeric(out[c], errors="coerce")
return out.sort_values(["META_MATCH", "Provinsi"], ascending=[True, True]).reset_index(drop=True)
return pd.DataFrame({"Info": ["Kewenangan tidak dikenali / tidak didukung."]})
# ============================================================
# 6) GRAFIK GAP β pakai Kekurangan Total (abaikan NaN)
# ============================================================
def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
fig = go.Figure()
if verif_df is None or verif_df.empty:
fig.update_layout(title="Kekurangan Sampel (tidak ada data)", xaxis_title="Unit", yaxis_title="Kekurangan (unit)")
return fig
kew_norm = str(kew_value or "").upper()
def _num(s):
return pd.to_numeric(s, errors="coerce").fillna(0).astype(int)
if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
dfp = verif_df.copy()
dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Total", 0))
dfp = dfp.sort_values("gap_total", ascending=False)
x = dfp["Kab/Kota"].astype(str).tolist()
gap_total = _num(dfp["gap_total"])
fig.add_trace(go.Bar(
x=x, y=gap_total, name="Kekurangan Total",
text=gap_total, textposition="outside",
hovertemplate="%{x}<br>Kekurangan total: %{y} unit<extra></extra>"
))
fig.update_layout(
title=f"Kekurangan Sampel TOTAL (KAB/KOTA) β Target {int(TARGET_COVERAGE*100)}% (META)",
xaxis_title="Kab/Kota", yaxis_title="Kekurangan (unit)",
margin=dict(l=40, r=20, t=60, b=140),
)
fig.update_xaxes(tickangle=-35)
return fig
if ("PROV" in kew_norm) and ("Provinsi" in verif_df.columns):
dfp = verif_df.copy()
dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Total", 0))
dfp = dfp.sort_values("gap_total", ascending=False)
x = dfp["Provinsi"].astype(str).tolist()
gap_total = _num(dfp["gap_total"])
fig.add_trace(go.Bar(
x=x, y=gap_total, name="Kekurangan Total",
text=gap_total, textposition="outside",
hovertemplate="%{x}<br>Kekurangan total: %{y} unit<extra></extra>"
))
fig.update_layout(
title=f"Kekurangan Sampel TOTAL (PROVINSI) β Target {int(TARGET_COVERAGE*100)}% (META)",
xaxis_title="Provinsi", yaxis_title="Kekurangan (unit)",
margin=dict(l=40, r=20, t=60, b=140),
)
fig.update_xaxes(tickangle=-35)
return fig
fig.update_layout(title="Kekurangan Sampel β format data tidak dikenali", xaxis_title="Unit", yaxis_title="Kekurangan (unit)")
return fig
# ============================================================
# 7) LLM NARASI
# ============================================================
def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
lines = []
lines.append(f"Wilayah filter: {wilayah}")
lines.append(f"Kewenangan: {kew}")
lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% (TARGET diambil dari META).")
lines.append(f"Jumlah unit analisis: {len(verif_df)}")
if "Kekurangan Sampel Total" in verif_df.columns:
total_gap = int(pd.to_numeric(verif_df["Kekurangan Sampel Total"], errors="coerce").fillna(0).sum())
lines.append(f"Total Kekurangan Sampel Total: {total_gap}")
if "META_MATCH" in verif_df.columns:
n_no = int((verif_df["META_MATCH"] == "TIDAK").sum())
if n_no > 0:
lines.append(f"PERINGATAN: ada {n_no} unit yang tidak match ke META (target tidak tersedia).")
keycol = "Kab/Kota" if "Kab/Kota" in verif_df.columns else ("Provinsi" if "Provinsi" in verif_df.columns else verif_df.columns[0])
if "Kekurangan Sampel Total" in verif_df.columns:
t = verif_df.copy()
t["Kekurangan Sampel Total"] = pd.to_numeric(t["Kekurangan Sampel Total"], errors="coerce").fillna(0)
top = t.sort_values("Kekurangan Sampel Total", ascending=False).head(10)
lines.append("\nTop prioritas (gap terbesar):")
for _, r in top.iterrows():
lines.append(f"- {r[keycol]}: gap_total={int(r['Kekurangan Sampel Total'])}")
return "\n".join(lines)
def rule_based_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
if verif_df is None or verif_df.empty:
return "Tidak ada data verifikasi yang dapat dilaporkan."
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
lines = []
lines.append("## Ringkasan Kekurangan Sampel IPLM (Rule-based)\n")
lines.append(f"Wilayah: {wilayah}")
lines.append(f"Kewenangan: {kew}")
lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% (TARGET diambil dari META: kab/kota=`sampel_total`, provinsi=`total _sampel`).")
lines.append(f"Jumlah unit analisis: {len(verif_df)}\n")
if "Kekurangan Sampel Total" in verif_df.columns:
total_gap = int(pd.to_numeric(verif_df["Kekurangan Sampel Total"], errors="coerce").fillna(0).sum())
lines.append(f"- Total Kekurangan Sampel Total: **{total_gap}** unit yang perlu dilengkapi menuju target.")
else:
lines.append("Kolom kekurangan sampel total tidak ditemukan.")
if "META_MATCH" in verif_df.columns:
n_no = int((verif_df["META_MATCH"] == "TIDAK").sum())
if n_no > 0:
lines.append(f"- Catatan: **{n_no}** unit belum match ke META, sehingga target tidak tersedia (perlu pembenahan label/meta).")
lines.append("\nArah tindak lanjut: prioritaskan wilayah dengan gap terbesar, dan pastikan mapping unit ke META valid untuk monitoring yang akurat.")
return "\n".join(lines)
def generate_llm_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
ctx = build_context_gap(verif_df, prov, kab, kew)
client = get_llm_client()
if client is None or not USE_LLM:
return "β οΈ LLM tidak tersedia, memakai laporan rule-based.\n\n" + rule_based_gap_report(verif_df, prov, kab, kew)
system_prompt = (
"Anda adalah analis kebijakan dan manajer program IPLM. "
"Fokus Anda hanya pada gap sampel (kekurangan unit) dan strategi menutup kekurangan tersebut."
)
user_prompt = f"""
DATA RINGKAS GAP SAMPEL IPLM:
{ctx}
TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
1) Ringkasan kondisi pengumpulan data (1 paragraf).
2) Total kekurangan sampel yang masih perlu dikumpulkan menuju target {int(TARGET_COVERAGE*100)}% (1 paragraf).
3) Prioritas wilayah (gap terbesar) dan alasan operasional (1 paragraf).
4) Rencana aksi 30β60 hari (naratif, bukan bullet).
BATASAN:
- Jangan membahas indeks/skor IPLM.
- Tegaskan bahwa target berasal dari META: kab/kota=`sampel_total`, provinsi=`total _sampel`.
- Jika ada unit META_MATCH=TIDAK, sebutkan sebagai isu kualitas data/master reference.
"""
try:
resp = client.chat_completion(
model=LLM_MODEL_NAME,
messages=[{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}],
max_tokens=900,
temperature=0.2,
top_p=0.9,
)
text = resp.choices[0].message.content.strip()
if not text:
raise ValueError("Respon LLM kosong.")
return text
except Exception as e:
return (
"β οΈ Error saat memanggil LLM, memakai laporan rule-based.\n\n"
f"(Detail teknis: {repr(e)})\n\n"
+ rule_based_gap_report(verif_df, prov, kab, kew)
)
# ============================================================
# 8) WORD REPORT
# ============================================================
def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str, analysis_text: str):
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
doc = Document()
doc.add_heading(f"Laporan Kekurangan Sampel IPLM β {wilayah}", level=1)
doc.add_paragraph(f"Kewenangan: {kew}")
doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% (TARGET diambil dari META).")
doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
doc.add_heading("Tabel Verifikasi (Target & Kekurangan Sampel)", level=2)
view = verif_df.copy()
if len(view) > 200:
doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama) untuk menjaga ukuran dokumen.")
view = view.head(200)
table = doc.add_table(rows=1, cols=len(view.columns))
hdr = table.rows[0].cells
for i, c in enumerate(view.columns):
hdr[i].text = str(c)
for _, row in view.iterrows():
r = table.add_row().cells
for i, c in enumerate(view.columns):
r[i].text = "" if pd.isna(row[c]) else str(row[c])
doc.add_heading("Ringkasan Visual (Opsional)", level=2)
if not HAS_KALEIDO:
doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia di server.")
else:
pie_made = False
if "Sampel Total (DM)" in verif_df.columns:
samp = pd.to_numeric(verif_df["Sampel Total (DM)"], errors="coerce").fillna(0).sum()
tgt_col = None
for c in verif_df.columns:
if "Target Total (68%)" in c:
tgt_col = c
break
if tgt_col:
tgt = pd.to_numeric(verif_df[tgt_col], errors="coerce").fillna(0).sum()
img = make_pie_plotly(samp, tgt, "Capaian TOTAL (DM) terhadap Target TOTAL (META)")
if img:
doc.add_paragraph("Capaian TOTAL terhadap Target TOTAL (META)")
doc.add_picture(img)
pie_made = True
if not pie_made:
doc.add_paragraph("Tidak ada pasangan kolom sampel-target yang valid untuk dibuat pie chart.")
doc.add_heading("Analisis Naratif (LLM)", level=2)
for p in analysis_text.split("\n"):
if p.strip():
doc.add_paragraph(p)
outpath = tempfile.mktemp(suffix=".docx")
doc.save(outpath)
return outpath
# ============================================================
# 9) CORE RUN
# ============================================================
def run_core(prov_value, kab_value, kew_value):
if df_all_raw is None or df_all_raw.empty:
empty = pd.DataFrame()
return empty, empty, None, None, None, None, "Data DM tidak terbaca.", "Tidak ada analisis."
df = df_all_raw.copy()
if prov_value and prov_value != "(Semua)" and "prov_clean" in df.columns:
df = df[df["prov_clean"].astype(str).str.strip() == str(prov_value).strip()]
if kab_value and kab_value != "(Semua)" and "kab_clean" in df.columns:
df = df[df["kab_clean"].astype(str).str.strip() == str(kab_value).strip()]
if kew_value and kew_value != "(Semua)":
df = df[df["KEW_NORM"] == kew_value]
if len(df) == 0:
empty = pd.DataFrame()
return empty, empty, None, None, None, None, "Tidak ada data untuk kombinasi filter yang dipilih.", "Tidak ada analisis."
verif_df = compute_gap_verification(df, kew_value)
cols = []
for c in ["prov_clean", "kab_clean", nama_col_glob, kew_col_glob, jenis_col_glob, subjenis_col_glob, "_dataset", "KEW_NORM"]:
if c and c in df.columns and c not in cols:
cols.append(c)
detail_df = df[cols].copy() if cols else df.copy()
fig_gap = make_gap_figure(verif_df, kew_value)
tmpdir = tempfile.mkdtemp()
rekap_excel_path = os.path.join(tmpdir, "Rekap_Kekurangan_Sampel_IPLM_Target_META.xlsx")
raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx")
with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w:
verif_df.to_excel(w, sheet_name="Verifikasi_Gap_Target_META", index=False)
detail_df.to_excel(w, sheet_name="Detail_Subset_DM", index=False)
df.to_excel(raw_dm_path, index=False)
analysis_text = generate_llm_gap_report(verif_df, prov_value, kab_value, kew_value)
word_path = generate_word_report_gap(verif_df, prov_value, kab_value, kew_value, analysis_text)
# message ringkas + warning mismatch meta
warn = ""
if "META_MATCH" in verif_df.columns:
n_no = int((verif_df["META_MATCH"] == "TIDAK").sum())
if n_no > 0:
warn = f" β οΈ {n_no} unit tidak match ke META (target NaN)."
msg = f"OK. Subset DM: {len(df)} baris | Verifikasi: {len(verif_df)} baris | Target: {int(TARGET_COVERAGE*100)}% (META).{warn}"
return verif_df, detail_df, fig_gap, rekap_excel_path, raw_dm_path, word_path, msg, analysis_text
def on_prov_change(prov_value):
return gr.update(choices=get_kab_choices_for_prov(prov_value), value="(Semua)")
# ============================================================
# 10) UI
# ============================================================
with gr.Blocks() as demo:
gr.Markdown(
f"""
# Dashboard Kekurangan Sampel IPLM β Target {int(TARGET_COVERAGE*100)}% (Tanpa Hitung Indeks)
**Target dari META (bukan hitung ulang):**
- Kab/Kota: `{META_KAB_FILE}` kolom **`sampel_total`**
- Provinsi: `{META_PROV_FILE}` kolom **`total _sampel`** (variasi spasi/underscore didukung)
{DATA_INFO}
"""
)
with gr.Row():
dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0])
dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value=kab_choices[0])
dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew)
dd_prov.change(fn=on_prov_change, inputs=dd_prov, outputs=dd_kab)
run_btn = gr.Button("Hitung Kekurangan Sampel")
msg_out = gr.Markdown()
gr.Markdown("### Verifikasi (Target & Kekurangan Sampel) β Target dari META")
verif_out = gr.DataFrame(interactive=False)
gr.Markdown("### Grafik Kekurangan Sampel TOTAL (unit)")
gap_plot_out = gr.Plot()
gr.Markdown("### Detail Subset DM (yang terfilter)")
detail_out = gr.DataFrame(interactive=False)
gr.Markdown("### Analisis Naratif (LLM)")
analysis_out = gr.Markdown()
with gr.Row():
rekap_excel_out = gr.File(label="Download Rekap (Verifikasi + Detail) (.xlsx)")
raw_dm_out = gr.File(label="Download Data Mentah Subset DM (.xlsx)")
word_out = gr.File(label="Download Laporan Word (.docx)")
run_btn.click(
fn=run_core,
inputs=[dd_prov, dd_kab, dd_kew],
outputs=[
verif_out,
detail_out,
gap_plot_out,
rekap_excel_out,
raw_dm_out,
word_out,
msg_out,
analysis_out
],
)
demo.launch()
|