Update app.py
Browse files
app.py
CHANGED
|
@@ -1,195 +1,208 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
-
app.py — IPLM
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
- Provinsi/Kab key join stabil (Kep Seribu beres)
|
| 19 |
-
- Output lengkap:
|
| 20 |
-
1) Indeks Agregat (FINAL)
|
| 21 |
-
2) Agregat (FINAL) per Jenis
|
| 22 |
-
3) Detail (FINAL) per Unit
|
| 23 |
-
4) Agregat (RealScore) per Jenis (Subindeks & Dimensi)
|
| 24 |
-
5) Detail (RealScore) per Unit (Subindeks & Dimensi + Indikator raw)
|
| 25 |
-
6) Coverage Populasi vs Sampel (Target 68%) + BAR chart (dibuat TERBACA via HTML)
|
| 26 |
-
7) Bell curve per Jenis (RealScore) — seperti contoh kamu
|
| 27 |
-
8) Analisis (LLM opsional) + Word report opsional
|
| 28 |
-
|
| 29 |
-
Catatan penting untuk kasus Kep. Seribu:
|
| 30 |
-
- Coverage sekolah (SD+SMP) = 0 biasanya karena:
|
| 31 |
-
(a) kolom SD+SMP di meta kab/kota tidak terdeteksi, ATAU
|
| 32 |
-
(b) baris Kep Seribu tidak ada di meta, ATAU
|
| 33 |
-
(c) key join kab/kota tidak match.
|
| 34 |
-
Kode ini memperkeras normalisasi & deteksi kolom meta.
|
| 35 |
"""
|
| 36 |
|
| 37 |
import os
|
| 38 |
import re
|
| 39 |
import math
|
| 40 |
-
import json
|
| 41 |
import tempfile
|
| 42 |
from pathlib import Path
|
| 43 |
-
from collections import Counter
|
| 44 |
|
|
|
|
| 45 |
import numpy as np
|
| 46 |
import pandas as pd
|
| 47 |
-
import gradio as gr
|
| 48 |
import plotly.graph_objects as go
|
| 49 |
import plotly.express as px
|
|
|
|
| 50 |
from sklearn.preprocessing import PowerTransformer
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
TARGET_FRAC = 0.68
|
| 60 |
|
| 61 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
W_KEPATUHAN = 0.30
|
| 63 |
W_KINERJA = 0.70
|
| 64 |
|
| 65 |
-
|
| 66 |
-
#
|
| 67 |
-
#
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
else:
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
|
| 85 |
-
|
| 86 |
-
def clean_spaces(s: str) -> str:
|
| 87 |
-
return re.sub(r"\s+", " ", str(s).strip())
|
| 88 |
-
|
| 89 |
-
def pretty_admin_name(s: str, kind: str = "prov") -> str:
|
| 90 |
-
"""
|
| 91 |
-
Display label manusiawi untuk dropdown.
|
| 92 |
-
- PROVINSI JAWA BARAT
|
| 93 |
-
- KOTA SURABAYA / KAB. BANDUNG
|
| 94 |
-
- KAB. ADM. KEPULAUAN SERIBU (tetap kebaca)
|
| 95 |
-
"""
|
| 96 |
-
t = clean_spaces(str(s)).upper()
|
| 97 |
-
t = t.replace("PROPINSI", "PROVINSI")
|
| 98 |
-
t = re.sub(r"\bKABUPATEN\b", "KAB.", t)
|
| 99 |
-
t = re.sub(r"\bKOTA\s+ADMINISTRASI\b", "KOTA ADM.", t)
|
| 100 |
-
t = re.sub(r"\bKABUPATEN\s+ADMINISTRASI\b", "KAB. ADM.", t)
|
| 101 |
-
t = t.replace("ADMINISTRASI", "ADM.")
|
| 102 |
-
# rapikan spasi titik
|
| 103 |
-
t = re.sub(r"\s+\.", ".", t)
|
| 104 |
-
t = re.sub(r"\.\s+", ". ", t)
|
| 105 |
-
|
| 106 |
-
if kind == "prov":
|
| 107 |
-
# jika belum ada prefiks PROVINSI, tambahkan
|
| 108 |
-
if not t.startswith("PROVINSI "):
|
| 109 |
-
t = "PROVINSI " + t
|
| 110 |
-
return t
|
| 111 |
-
|
| 112 |
-
def norm_key(x) -> str:
|
| 113 |
-
"""
|
| 114 |
-
Key join prov/kab:
|
| 115 |
-
distabilkan supaya:
|
| 116 |
-
KEP. SERIBU == KEPULAUAN SERIBU == KAB. ADM. KEPULAUAN SERIBU
|
| 117 |
-
"""
|
| 118 |
-
if pd.isna(x):
|
| 119 |
-
return ""
|
| 120 |
-
t = clean_spaces(str(x)).upper()
|
| 121 |
-
|
| 122 |
-
# normalisasi umum
|
| 123 |
-
t = t.replace("PROPINSI", "PROVINSI")
|
| 124 |
-
t = re.sub(r"\bKABUPATEN\b", "KAB.", t)
|
| 125 |
-
t = re.sub(r"\bKOTA\s+ADMINISTRASI\b", "KOTA ADM.", t)
|
| 126 |
-
t = re.sub(r"\bKABUPATEN\s+ADMINISTRASI\b", "KAB. ADM.", t)
|
| 127 |
-
t = t.replace("ADMINISTRASI", "ADM.")
|
| 128 |
-
t = t.replace("KEP.", "KEPULAUAN")
|
| 129 |
-
t = re.sub(r"\bKEP\b", "KEPULAUAN", t)
|
| 130 |
-
|
| 131 |
-
# khusus Kepulauan Seribu
|
| 132 |
-
if "SERIBU" in t:
|
| 133 |
-
t = "KAB. ADM. KEPULAUAN SERIBU"
|
| 134 |
|
| 135 |
-
# buang non alnum utk key
|
| 136 |
-
return re.sub(r"[^A-Z0-9]", "", t)
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
return "KAB/KOTA"
|
| 144 |
-
if any(x in t for x in ["PROV", "PROP", "PROVINSI", "PROPINSI"]):
|
| 145 |
-
return "PROVINSI"
|
| 146 |
-
if "PUSAT" in t or "NASIONAL" in t:
|
| 147 |
-
return "PUSAT"
|
| 148 |
-
return t
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
-
# =========================
|
| 152 |
-
# 2) NUM COERCION (AMAN)
|
| 153 |
-
# =========================
|
| 154 |
def coerce_num(val):
|
| 155 |
-
|
| 156 |
-
if isinstance(val, (pd.Series, np.ndarray, list, tuple)):
|
| 157 |
-
if len(val) == 0:
|
| 158 |
-
return np.nan
|
| 159 |
-
val = val[0]
|
| 160 |
-
|
| 161 |
-
if val is None:
|
| 162 |
return np.nan
|
| 163 |
-
if isinstance(val, (int, float, np.integer, np.floating)):
|
| 164 |
-
return float(val)
|
| 165 |
-
|
| 166 |
t = str(val).strip()
|
| 167 |
-
if t == "" or t in {"-", "–", "—"
|
| 168 |
return np.nan
|
| 169 |
-
|
| 170 |
t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
|
| 171 |
t = re.sub(r"[^0-9,.\-]", "", t)
|
| 172 |
-
|
| 173 |
-
# 1.234.567,89 -> 1234567.89
|
| 174 |
if t.count(".") > 1 and t.count(",") == 1:
|
| 175 |
t = t.replace(".", "").replace(",", ".")
|
|
|
|
|
|
|
| 176 |
elif t.count(",") == 1 and t.count(".") == 0:
|
| 177 |
t = t.replace(",", ".")
|
| 178 |
else:
|
| 179 |
t = t.replace(",", "")
|
| 180 |
-
|
| 181 |
try:
|
| 182 |
return float(t)
|
| 183 |
except Exception:
|
| 184 |
return np.nan
|
| 185 |
|
| 186 |
-
def
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
if
|
| 191 |
-
return
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
def ceil_int(x):
|
| 195 |
try:
|
|
@@ -208,112 +221,46 @@ def sampling_factor(sample, target):
|
|
| 208 |
except Exception:
|
| 209 |
return 1.0
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
-
# =========================
|
| 213 |
-
# 3) LOAD MULTISHEET DM
|
| 214 |
-
# =========================
|
| 215 |
-
def load_multisheet_excel(path: str) -> tuple[pd.DataFrame, list]:
|
| 216 |
-
fp = Path(path)
|
| 217 |
-
if not fp.exists():
|
| 218 |
-
raise FileNotFoundError(f"File tidak ditemukan: {path}")
|
| 219 |
-
xls = pd.ExcelFile(fp)
|
| 220 |
-
frames = []
|
| 221 |
-
for s in xls.sheet_names:
|
| 222 |
-
df = pd.read_excel(fp, sheet_name=s)
|
| 223 |
-
df.columns = make_unique_columns(df.columns)
|
| 224 |
-
frames.append(df)
|
| 225 |
-
out = pd.concat(frames, ignore_index=True, sort=False)
|
| 226 |
-
return out, list(xls.sheet_names)
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
# =========================
|
| 230 |
-
# 4) AUTO DETECT COLUMNS (DM & META)
|
| 231 |
-
# =========================
|
| 232 |
-
def pick_col(df: pd.DataFrame, candidates: list[str]) -> str | None:
|
| 233 |
-
cols = list(df.columns)
|
| 234 |
-
cmap = {canon(c): c for c in cols}
|
| 235 |
-
for cand in candidates:
|
| 236 |
-
key = canon(cand)
|
| 237 |
-
if key in cmap:
|
| 238 |
-
return cmap[key]
|
| 239 |
-
# fallback partial contains
|
| 240 |
-
for c in cols:
|
| 241 |
-
cc = canon(c)
|
| 242 |
-
for cand in candidates:
|
| 243 |
-
if canon(cand) in cc:
|
| 244 |
-
return c
|
| 245 |
-
return None
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
kew = pick_col(df, ["kewenangan", "kew"])
|
| 251 |
-
jenis = pick_col(df, ["jenis_perpustakaan", "jenis perpustakaan", "jenis"])
|
| 252 |
-
subjenis = pick_col(df, ["sub_jenis_perpus", "subjenis", "sub_jenis", "sub jenis", "sub jenis perpus"])
|
| 253 |
-
nama = pick_col(df, ["nm_perpustakaan", "nama_perpustakaan", "nama perpus", "nama"])
|
| 254 |
-
|
| 255 |
-
missing = [k for k,v in {"prov":prov, "kab":kab, "kew":kew, "jenis":jenis, "nama":nama}.items() if v is None]
|
| 256 |
-
if missing:
|
| 257 |
-
raise KeyError(f"Kolom DM wajib tidak ketemu: {missing}. Cek header Excel DM kamu.")
|
| 258 |
-
return {"prov":prov, "kab":kab, "kew":kew, "jenis":jenis, "subjenis":subjenis, "nama":nama}
|
| 259 |
-
|
| 260 |
-
def detect_meta_kab(df: pd.DataFrame) -> dict:
|
| 261 |
-
prov = pick_col(df, ["PROVINSI", "provinsi", "Provinsi"])
|
| 262 |
-
kab = pick_col(df, ["KABUPATEN_KOTA", "kabupaten_kota", "KAB/KOTA", "kab/kota", "Kab/Kota", "KABKOTA", "KAB_KOTA"])
|
| 263 |
-
|
| 264 |
-
# 🔥 kandidat lebih luas (biar SD+SMP ketemu)
|
| 265 |
-
pop_sd_smp = pick_col(df, [
|
| 266 |
-
"TOTAL_SD_SMP", "total_sd_smp", "JUMLAH_SD_SMP", "SD_SMP", "TOTAL_SDSMP",
|
| 267 |
-
"SD+SMP", "SD SMP", "TOTAL SD SMP", "JML SD SMP", "JUMLAH SD SMP"
|
| 268 |
-
])
|
| 269 |
-
|
| 270 |
-
pop_kec_desa = pick_col(df, [
|
| 271 |
-
"TOTAL_KEC_DESA", "total_kec_desa", "KEC_DESA", "TOTAL_KECAMATAN_DESA",
|
| 272 |
-
"KECAMATAN+DESA", "KEC+DESA", "KEC DESA", "TOTAL KEC DESA"
|
| 273 |
-
])
|
| 274 |
-
|
| 275 |
-
col_kec = pick_col(df, ["JUMLAH_KECAMATAN", "jumlah_kecamatan", "KECAMATAN", "JML_KEC", "JML KEC"])
|
| 276 |
-
col_desa = pick_col(df, ["JUMLAH_DESA_KEL", "jumlah_desa_kel", "DESA_KEL", "JML_DESA", "JUMLAH_DESA", "JUMLAH_KELURAHAN", "JML DESA", "JML KEL"])
|
| 277 |
-
|
| 278 |
-
if prov is None or kab is None:
|
| 279 |
-
raise KeyError("Meta Kab/Kota minimal harus punya kolom provinsi & kab/kota.")
|
| 280 |
-
|
| 281 |
-
return {"prov": prov, "kab": kab, "pop_sd_smp": pop_sd_smp, "pop_kec_desa": pop_kec_desa, "col_kec": col_kec, "col_desa": col_desa}
|
| 282 |
-
|
| 283 |
-
def detect_meta_prov(df: pd.DataFrame) -> dict:
|
| 284 |
-
prov = pick_col(df, ["PROVINSI", "provinsi", "Provinsi"])
|
| 285 |
-
pop_sma = pick_col(df, ["TOTAL_SMA_SMK_SLB", "total_sma_smk_slb", "SMA_SMK_SLB", "TOTAL_SMA_SMK", "TOTAL_SMA", "SMA+SMK+SLB"])
|
| 286 |
-
if prov is None or pop_sma is None:
|
| 287 |
-
raise KeyError("Meta Provinsi minimal harus punya kolom PROVINSI & TOTAL_SMA_SMK_SLB (atau padanan).")
|
| 288 |
-
return {"prov": prov, "pop_sma": pop_sma}
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
# =========================
|
| 292 |
-
# 5) INDIKATOR IPLM (KANONIK) + ALIAS
|
| 293 |
-
# =========================
|
| 294 |
koleksi_cols = [
|
| 295 |
"JudulTercetak","EksemplarTercetak","JudulElektronik","EksemplarElektronik",
|
| 296 |
"TambahJudulTercetak","TambahEksemplarTercetak",
|
| 297 |
"TambahJudulElektronik","TambahEksemplarElektronik",
|
| 298 |
"KomitmenAnggaranKoleksi"
|
| 299 |
]
|
| 300 |
-
sdm_cols = [
|
| 301 |
-
"TenagaKualifikasiIlmuPerpustakaan",
|
| 302 |
-
"TenagaFungsionalProfesional",
|
| 303 |
-
"TenagaPKB",
|
| 304 |
-
"AnggaranTenaga"
|
| 305 |
-
]
|
| 306 |
pelayanan_cols = [
|
| 307 |
"PesertaBudayaBaca","PemustakaLuringDaring","PemustakaFasilitasTIK",
|
| 308 |
"PemanfaatanJudulTercetak","PemanfaatanEksemplarTercetak",
|
| 309 |
"PemanfaatanJudulElektronik","PemanfaatanEksemplarElektronik"
|
| 310 |
]
|
| 311 |
-
pengelolaan_cols = [
|
| 312 |
-
|
| 313 |
-
]
|
| 314 |
-
all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
|
| 315 |
|
| 316 |
-
|
|
|
|
| 317 |
# koleksi
|
| 318 |
"j_judul_koleksi_tercetak": "JudulTercetak",
|
| 319 |
"j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
|
|
@@ -324,14 +271,11 @@ alias_map_raw = {
|
|
| 324 |
"tambah_judul_koleksi_digital": "TambahJudulElektronik",
|
| 325 |
"tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
|
| 326 |
"j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
|
| 327 |
-
"komitmenanggarankoleksi": "KomitmenAnggaranKoleksi",
|
| 328 |
-
|
| 329 |
# sdm
|
| 330 |
"j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
|
| 331 |
"j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
|
| 332 |
"j_tenaga_pkb": "TenagaPKB",
|
| 333 |
"j_anggaran_diklat_perpus": "AnggaranTenaga",
|
| 334 |
-
|
| 335 |
# pelayanan
|
| 336 |
"j_peserta_budaya_baca": "PesertaBudayaBaca",
|
| 337 |
"j_pemustaka_luring_daring": "PemustakaLuringDaring",
|
|
@@ -340,7 +284,6 @@ alias_map_raw = {
|
|
| 340 |
"j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
|
| 341 |
"j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
|
| 342 |
"j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
|
| 343 |
-
|
| 344 |
# pengelolaan
|
| 345 |
"j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
|
| 346 |
"j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
|
|
@@ -348,809 +291,860 @@ alias_map_raw = {
|
|
| 348 |
"j_kebijakan_prosedur_pelayanan": "Kebijakan",
|
| 349 |
"j_anggaran_peningkatan_pelayanan": "AnggaranLayanan",
|
| 350 |
}
|
| 351 |
-
|
| 352 |
|
| 353 |
-
def
|
| 354 |
-
|
| 355 |
-
rename_map = {}
|
| 356 |
for c in df.columns:
|
| 357 |
-
cc =
|
| 358 |
-
if cc in
|
| 359 |
-
|
| 360 |
else:
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
|
|
|
| 364 |
break
|
| 365 |
-
if
|
| 366 |
-
df = df.rename(columns=rename_map)
|
| 367 |
-
return df
|
| 368 |
|
| 369 |
|
| 370 |
-
# =========================
|
| 371 |
-
#
|
| 372 |
-
# =========================
|
| 373 |
DATA_INFO = ""
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
-
|
| 381 |
-
meta_prov = None
|
| 382 |
|
| 383 |
try:
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
except Exception as e:
|
| 418 |
-
|
|
|
|
| 419 |
|
| 420 |
-
# Meta Kab/Kota
|
| 421 |
-
try:
|
| 422 |
-
if Path(META_KAB_FILE).exists():
|
| 423 |
-
mk = pd.read_excel(META_KAB_FILE)
|
| 424 |
-
mk.columns = make_unique_columns(mk.columns)
|
| 425 |
-
meta_kab_cols = detect_meta_kab(mk)
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
|
| 430 |
-
|
| 431 |
-
|
| 432 |
|
| 433 |
-
|
| 434 |
-
|
| 435 |
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
mk["POP_SD_SMP"] = mk[meta_kab_cols["pop_sd_smp"]].map(coerce_num).fillna(0)
|
| 439 |
-
else:
|
| 440 |
-
mk["POP_SD_SMP"] = 0
|
| 441 |
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
mk["POP_KEC_DESA"] = mk[meta_kab_cols["pop_kec_desa"]].map(coerce_num).fillna(0)
|
| 445 |
-
else:
|
| 446 |
-
kec = mk[meta_kab_cols["col_kec"]].map(coerce_num).fillna(0) if meta_kab_cols["col_kec"] else pd.Series(0, index=mk.index)
|
| 447 |
-
desa = mk[meta_kab_cols["col_desa"]].map(coerce_num).fillna(0) if meta_kab_cols["col_desa"] else pd.Series(0, index=mk.index)
|
| 448 |
-
mk["POP_KEC_DESA"] = (kec + desa).fillna(0)
|
| 449 |
|
| 450 |
-
|
| 451 |
-
|
| 452 |
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
DATA_INFO += f"<br><b>DEBUG Kep Seribu meta rows:</b> {len(ser)}"
|
| 457 |
-
else:
|
| 458 |
-
WARNINGS.append("⚠️ Meta Kab/Kota file tidak ditemukan (skip).")
|
| 459 |
-
except Exception as e:
|
| 460 |
-
meta_kab = None
|
| 461 |
-
WARNINGS.append(f"⚠️ Meta Kab/Kota tidak aktif: {repr(e)}")
|
| 462 |
|
| 463 |
-
#
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
|
| 470 |
-
|
| 471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
|
|
|
| 480 |
else:
|
| 481 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
except Exception as e:
|
| 483 |
-
|
| 484 |
-
|
|
|
|
|
|
|
|
|
|
| 485 |
|
| 486 |
-
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
|
|
|
|
|
|
| 489 |
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
# =========================
|
| 493 |
-
def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
|
| 494 |
-
df = df_src.copy()
|
| 495 |
-
df = rename_indicators(df)
|
| 496 |
|
| 497 |
-
|
| 498 |
-
for c in
|
| 499 |
-
|
|
|
|
| 500 |
|
| 501 |
-
#
|
| 502 |
-
for c in
|
| 503 |
-
x =
|
| 504 |
-
|
| 505 |
y = np.full_like(x, np.nan, dtype=float)
|
| 506 |
-
if
|
| 507 |
pt = PowerTransformer(method="yeo-johnson", standardize=False)
|
| 508 |
-
y[
|
| 509 |
else:
|
| 510 |
-
y[
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
def
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1, skipna=True).fillna(0.0)
|
| 533 |
-
df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1, skipna=True).fillna(0.0)
|
| 534 |
-
|
| 535 |
-
df["Indeks_Real_0_100"] = 100.0 * (W_KEPATUHAN*df["dim_kepatuhan"] + W_KINERJA*df["dim_kinerja"])
|
| 536 |
-
return df
|
| 537 |
-
|
| 538 |
-
df_iplm = None
|
| 539 |
-
if df_dm_raw is not None and len(df_dm_raw) > 0:
|
| 540 |
-
df_iplm = prepare_global_iplm(df_dm_raw)
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
# =========================
|
| 544 |
-
# 8) SAMPLING FACTOR (68%)
|
| 545 |
-
# =========================
|
| 546 |
-
def detect_school_menengah(df: pd.DataFrame) -> pd.Series:
|
| 547 |
-
if dm_cols.get("subjenis") and dm_cols["subjenis"] in df.columns:
|
| 548 |
-
t = df[dm_cols["subjenis"]].astype(str).str.upper()
|
| 549 |
-
else:
|
| 550 |
-
t = df[dm_cols["jenis"]].astype(str).str.upper()
|
| 551 |
-
return t.str.contains(r"\bSMA\b|\bSMK\b|\bSLB\b", na=False)
|
| 552 |
|
| 553 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
out = df.copy()
|
| 555 |
-
out["
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
for c in ["sekolah","umum","khusus"]:
|
| 564 |
if c not in g.columns:
|
| 565 |
g[c] = 0
|
| 566 |
|
| 567 |
-
merged = g.merge(
|
| 568 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 569 |
|
| 570 |
-
|
| 571 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
|
| 573 |
-
merged["
|
| 574 |
-
merged["
|
| 575 |
|
| 576 |
-
merged["
|
| 577 |
-
merged["
|
| 578 |
|
| 579 |
-
|
|
|
|
|
|
|
| 580 |
for _, r in merged.iterrows():
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
]
|
| 590 |
|
| 591 |
-
# PROVINSI
|
| 592 |
-
if
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
mergedp["
|
| 600 |
-
mergedp["
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
def
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
"Jenis Perpustakaan": LABEL_DATASET[ds],
|
| 677 |
-
"Jumlah Perpustakaan": int(len(d)),
|
| 678 |
-
"Rata2_Indeks_Final_0_100": float(d["Indeks_Final_0_100"].mean(skipna=True)) if len(d) else 0.0,
|
| 679 |
-
"Rata2_SamplingFactor_Total": float(d["SamplingFactor_Total"].mean(skipna=True)) if len(d) else 1.0
|
| 680 |
})
|
| 681 |
-
rows.append({
|
| 682 |
-
"Jenis Perpustakaan": "Rata-rata keseluruhan",
|
| 683 |
-
"Jumlah Perpustakaan": int(len(df)),
|
| 684 |
-
"Rata2_Indeks_Final_0_100": float(df["Indeks_Final_0_100"].mean(skipna=True)) if len(df) else 0.0,
|
| 685 |
-
"Rata2_SamplingFactor_Total": float(df["SamplingFactor_Total"].mean(skipna=True)) if len(df) else 1.0
|
| 686 |
-
})
|
| 687 |
-
return pd.DataFrame(rows).round(3)
|
| 688 |
-
|
| 689 |
-
def detail_final(df):
|
| 690 |
-
cols = [dm_cols["prov"], dm_cols["kab"], dm_cols["nama"], dm_cols["jenis"]]
|
| 691 |
-
if dm_cols.get("subjenis") and dm_cols["subjenis"] in df.columns:
|
| 692 |
-
cols.append(dm_cols["subjenis"])
|
| 693 |
-
cols += ["KEW_NORM","_dataset","Indeks_Real_0_100","SamplingFactor_Total","Indeks_Final_0_100"]
|
| 694 |
-
cols = [c for c in cols if c in df.columns]
|
| 695 |
-
return df[cols].copy().round(3)
|
| 696 |
-
|
| 697 |
-
def agg_real_by_jenis(df):
|
| 698 |
-
rows = []
|
| 699 |
-
for ds in ["sekolah","umum","khusus"]:
|
| 700 |
-
d = df[df["_dataset"] == ds]
|
| 701 |
-
rows.append({
|
| 702 |
-
"Jenis Perpustakaan": LABEL_DATASET[ds],
|
| 703 |
-
"Jumlah Perpustakaan": int(len(d)),
|
| 704 |
-
"Rata2_sub_koleksi": float(d["sub_koleksi"].mean(skipna=True)) if len(d) else 0.0,
|
| 705 |
-
"Rata2_sub_sdm": float(d["sub_sdm"].mean(skipna=True)) if len(d) else 0.0,
|
| 706 |
-
"Rata2_sub_pelayanan": float(d["sub_pelayanan"].mean(skipna=True)) if len(d) else 0.0,
|
| 707 |
-
"Rata2_sub_pengelolaan": float(d["sub_pengelolaan"].mean(skipna=True)) if len(d) else 0.0,
|
| 708 |
-
"Rata2_dim_kepatuhan": float(d["dim_kepatuhan"].mean(skipna=True)) if len(d) else 0.0,
|
| 709 |
-
"Rata2_dim_kinerja": float(d["dim_kinerja"].mean(skipna=True)) if len(d) else 0.0,
|
| 710 |
-
"Rata2_Indeks_IPLM_0_100": float(d["Indeks_Real_0_100"].mean(skipna=True)) if len(d) else 0.0
|
| 711 |
-
})
|
| 712 |
-
rows.append({
|
| 713 |
-
"Jenis Perpustakaan": "Rata-rata keseluruhan",
|
| 714 |
-
"Jumlah Perpustakaan": int(len(df)),
|
| 715 |
-
"Rata2_sub_koleksi": float(df["sub_koleksi"].mean(skipna=True)),
|
| 716 |
-
"Rata2_sub_sdm": float(df["sub_sdm"].mean(skipna=True)),
|
| 717 |
-
"Rata2_sub_pelayanan": float(df["sub_pelayanan"].mean(skipna=True)),
|
| 718 |
-
"Rata2_sub_pengelolaan": float(df["sub_pengelolaan"].mean(skipna=True)),
|
| 719 |
-
"Rata2_dim_kepatuhan": float(df["dim_kepatuhan"].mean(skipna=True)),
|
| 720 |
-
"Rata2_dim_kinerja": float(df["dim_kinerja"].mean(skipna=True)),
|
| 721 |
-
"Rata2_Indeks_IPLM_0_100": float(df["Indeks_Real_0_100"].mean(skipna=True)),
|
| 722 |
-
})
|
| 723 |
-
return pd.DataFrame(rows).round(3)
|
| 724 |
-
|
| 725 |
-
def detail_real(df):
|
| 726 |
-
base = [dm_cols["prov"], dm_cols["kab"], dm_cols["nama"], dm_cols["jenis"]]
|
| 727 |
-
if dm_cols.get("subjenis") and dm_cols["subjenis"] in df.columns:
|
| 728 |
-
base.append(dm_cols["subjenis"])
|
| 729 |
-
base += ["KEW_NORM","_dataset","sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Real_0_100"]
|
| 730 |
-
|
| 731 |
-
available_ind = [c for c in all_indicators if c in df.columns]
|
| 732 |
-
cols = [c for c in (base + available_ind) if c in df.columns]
|
| 733 |
-
return df[cols].copy().round(3)
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
# =========================
|
| 737 |
-
# 11) COVERAGE (TERBACA) + BAR
|
| 738 |
-
# =========================
|
| 739 |
-
def df_to_html_big(df: pd.DataFrame, title: str = "") -> str:
|
| 740 |
-
if df is None or df.empty:
|
| 741 |
-
return f"<div style='font-size:16px;'><b>{title}</b><br>(Tidak ada data)</div>"
|
| 742 |
-
d = df.copy()
|
| 743 |
-
for c in d.columns:
|
| 744 |
-
if c == "Jenis":
|
| 745 |
-
continue
|
| 746 |
-
d[c] = pd.to_numeric(d[c], errors="coerce")
|
| 747 |
-
if pd.api.types.is_numeric_dtype(d[c]):
|
| 748 |
-
d[c] = d[c].fillna(0).map(lambda x: f"{int(x):,}".replace(",", "."))
|
| 749 |
-
html = d.to_html(index=False, escape=False)
|
| 750 |
-
return f"""
|
| 751 |
-
<div style="font-size:16px; line-height:1.35;">
|
| 752 |
-
<div style="font-size:18px; font-weight:700; margin-bottom:8px;">{title}</div>
|
| 753 |
-
<div style="overflow-x:auto; border:1px solid #333; border-radius:10px; padding:8px;">
|
| 754 |
-
{html}
|
| 755 |
-
</div>
|
| 756 |
-
</div>
|
| 757 |
-
"""
|
| 758 |
-
|
| 759 |
-
def coverage_table_and_bar(df_subset, kew_value):
|
| 760 |
-
kew = str(kew_value).upper()
|
| 761 |
-
tbl = pd.DataFrame()
|
| 762 |
-
fig = go.Figure()
|
| 763 |
-
fig.update_layout(barmode="group", title=f"BAR — Populasi vs Sampel ({kew})")
|
| 764 |
|
| 765 |
-
|
| 766 |
-
return tbl, fig
|
| 767 |
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
|
|
|
| 771 |
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
|
|
|
| 775 |
|
| 776 |
-
|
| 777 |
-
|
| 778 |
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
{"Jenis":"Sekolah (SD+SMP)", "Populasi":pop_sek, "Target_68%":target_sek, "Sampel":samp_sek, "Gap_ke_68%": max(target_sek - samp_sek, 0)},
|
| 784 |
-
{"Jenis":"Umum (Kec+Desa)", "Populasi":pop_um, "Target_68%":target_um, "Sampel":samp_um, "Gap_ke_68%": max(target_um - samp_um, 0)},
|
| 785 |
-
])
|
| 786 |
-
|
| 787 |
-
fig = px.bar(
|
| 788 |
-
tbl.melt(id_vars="Jenis", value_vars=["Populasi","Sampel"]),
|
| 789 |
-
x="Jenis", y="value", color="variable",
|
| 790 |
-
barmode="group",
|
| 791 |
-
title="BAR — Populasi vs Sampel (KAB/KOTA)"
|
| 792 |
)
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
x="Jenis", y="value", color="variable",
|
| 809 |
-
barmode="group",
|
| 810 |
-
title="BAR — Populasi vs Sampel (PROVINSI)"
|
| 811 |
-
)
|
| 812 |
-
return tbl, fig
|
| 813 |
|
| 814 |
-
return
|
| 815 |
|
| 816 |
|
| 817 |
-
# =========================
|
| 818 |
-
#
|
| 819 |
-
# =========================
|
| 820 |
-
def
|
| 821 |
fig = go.Figure()
|
| 822 |
-
if
|
| 823 |
-
fig.update_layout(title=
|
| 824 |
return fig
|
| 825 |
|
| 826 |
-
|
| 827 |
-
if len(x) < 5:
|
| 828 |
-
fig.update_layout(title=title)
|
| 829 |
-
return fig
|
| 830 |
|
| 831 |
-
|
| 832 |
-
|
| 833 |
|
| 834 |
-
|
| 835 |
-
|
|
|
|
|
|
|
| 836 |
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
|
| 841 |
-
|
|
|
|
| 842 |
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
hovertemplate="%{text}<br>Indeks: %{x:.2f}<extra></extra>" if hover else "Indeks: %{x:.2f}<extra></extra>"
|
| 853 |
-
))
|
| 854 |
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 858 |
|
| 859 |
-
fig.update_layout(
|
| 860 |
-
title=title,
|
| 861 |
-
xaxis_title="Indeks IPLM (0–100)",
|
| 862 |
-
yaxis_title="Kepadatan (relatif)",
|
| 863 |
-
height=420,
|
| 864 |
-
margin=dict(l=40, r=20, t=60, b=40)
|
| 865 |
-
)
|
| 866 |
return fig
|
| 867 |
|
| 868 |
|
| 869 |
-
# =========================
|
| 870 |
-
#
|
| 871 |
-
# =========================
|
| 872 |
-
def
|
| 873 |
-
|
| 874 |
-
mean_final = float(df_subset["Indeks_Final_0_100"].mean(skipna=True)) if len(df_subset) else 0.0
|
| 875 |
-
mean_real = float(df_subset["Indeks_Real_0_100"].mean(skipna=True)) if len(df_subset) else 0.0
|
| 876 |
-
mean_sf = float(df_subset["SamplingFactor_Total"].mean(skipna=True)) if len(df_subset) else 1.0
|
| 877 |
-
|
| 878 |
lines = []
|
| 879 |
-
lines.append(f"
|
| 880 |
-
lines.append(f"
|
| 881 |
-
lines.append(f"
|
| 882 |
-
lines.append(f"
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
client = InferenceClient(model=hf_model, token=token)
|
| 901 |
-
|
| 902 |
-
prompt = (
|
| 903 |
-
"You are a policy analyst for Indonesia National Library (Perpusnas). "
|
| 904 |
-
"Write a concise Indonesian policy analysis (max 180 words) based on this summary:\n"
|
| 905 |
-
f"- Scope: {scope_label}, Authority: {kew}\n"
|
| 906 |
-
f"- Mean Final Index: {mean_final:.2f}\n"
|
| 907 |
-
f"- Mean Real Index: {mean_real:.2f}\n"
|
| 908 |
-
f"- Mean Sampling Factor: {mean_sf:.3f}\n"
|
| 909 |
-
f"- Coverage table: {cov_tbl.to_dict(orient='records') if cov_tbl is not None else []}\n"
|
| 910 |
-
"Include: key interpretation, risk to validity from sampling, and 2 actionable recommendations."
|
| 911 |
-
)
|
| 912 |
-
|
| 913 |
-
resp = client.text_generation(prompt, max_new_tokens=220, temperature=0.2)
|
| 914 |
-
lines.append("\n**Analisis LLM (Perpusnas-ready):**\n" + resp.strip())
|
| 915 |
-
return "\n".join(lines)
|
| 916 |
-
except Exception as e:
|
| 917 |
-
lines.append(f"\n⚠️ LLM call gagal ({repr(e)}). Pakai analisis template.")
|
| 918 |
-
return "\n".join(lines)
|
| 919 |
-
|
| 920 |
-
lines.append("\n**Implikasi kebijakan (template cepat):**")
|
| 921 |
-
lines.append("- SamplingFactor < 1 menandakan keterwakilan belum mencapai target 68% → interpretasi indeks perlu disertai catatan coverage/kualitas data.")
|
| 922 |
-
lines.append("- Prioritaskan percepatan pengisian pada jenis dengan gap terbesar, dan lakukan validasi minimal (kelengkapan indikator kunci) sebelum agregasi.")
|
| 923 |
return "\n".join(lines)
|
| 924 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 925 |
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
from docx import Document
|
| 932 |
-
from docx.shared import Inches
|
| 933 |
-
except Exception:
|
| 934 |
-
HAS_DOCX = False
|
| 935 |
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
|
| 943 |
-
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
|
|
|
|
|
|
|
|
|
|
| 947 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 948 |
doc = Document()
|
| 949 |
-
doc.add_heading(f"Laporan
|
| 950 |
doc.add_paragraph(f"Kewenangan: {kew}")
|
| 951 |
-
doc.add_paragraph("
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
| 956 |
-
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
| 970 |
-
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
doc.
|
| 989 |
-
|
| 990 |
-
doc.add_heading("6) Analisis", level=2)
|
| 991 |
-
doc.add_paragraph(narrative)
|
| 992 |
|
| 993 |
outpath = tempfile.mktemp(suffix=".docx")
|
| 994 |
doc.save(outpath)
|
| 995 |
return outpath
|
| 996 |
|
| 997 |
|
| 998 |
-
# =========================
|
| 999 |
-
#
|
| 1000 |
-
# =========================
|
| 1001 |
-
def
|
| 1002 |
-
|
| 1003 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1008 |
|
| 1009 |
-
|
| 1010 |
-
kab_value = kab_value or "(Semua)"
|
| 1011 |
-
kew_value = kew_value or "(Semua)"
|
| 1012 |
-
kew_norm = str(kew_value).upper()
|
| 1013 |
|
| 1014 |
-
|
| 1015 |
-
|
|
|
|
| 1016 |
|
| 1017 |
-
|
|
|
|
|
|
|
| 1018 |
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
df = df[df[dm_cols["kab"]] == str(kab_value)]
|
| 1023 |
-
if kew_value != "(Semua)":
|
| 1024 |
-
df = df[df["KEW_NORM"] == kew_norm]
|
| 1025 |
|
| 1026 |
if df.empty:
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
# TABLES
|
| 1031 |
-
t1 = agg_final_overall(df)
|
| 1032 |
-
t2 = agg_final_by_jenis(df)
|
| 1033 |
-
t3 = detail_final(df)
|
| 1034 |
-
t4 = agg_real_by_jenis(df)
|
| 1035 |
-
t5 = detail_real(df)
|
| 1036 |
-
|
| 1037 |
-
# COVERAGE + BAR
|
| 1038 |
-
cov_tbl, bar_fig = coverage_table_and_bar(df, kew_norm)
|
| 1039 |
-
cov_html = df_to_html_big(cov_tbl, "Coverage Populasi vs Sampel (Target 68%)")
|
| 1040 |
-
|
| 1041 |
-
# BELL CURVES
|
| 1042 |
-
bell_all = bell_curve_fig(df, "Indeks_Real_0_100", "Sebaran Indeks RealScore — Semua", dm_cols["nama"])
|
| 1043 |
-
bell_sek = bell_curve_fig(df[df["_dataset"]=="sekolah"], "Indeks_Real_0_100", "Sebaran Indeks RealScore — Perpustakaan Sekolah", dm_cols["nama"])
|
| 1044 |
-
bell_um = bell_curve_fig(df[df["_dataset"]=="umum"], "Indeks_Real_0_100", "Sebaran Indeks RealScore — Perpustakaan Umum", dm_cols["nama"])
|
| 1045 |
-
bell_kh = bell_curve_fig(df[df["_dataset"]=="khusus"], "Indeks_Real_0_100", "Sebaran Indeks RealScore — Perpustakaan Khusus", dm_cols["nama"])
|
| 1046 |
-
|
| 1047 |
-
# NARASI
|
| 1048 |
-
scope_label = kab_value if (kab_value != "(Semua)" and kew_norm != "PROVINSI") else prov_value
|
| 1049 |
-
if scope_label == "(Semua)":
|
| 1050 |
-
scope_label = "NASIONAL"
|
| 1051 |
-
narrative = llm_analysis_text(df, cov_tbl, scope_label, kew_norm, bool(use_llm), str(hf_model or ""))
|
| 1052 |
-
|
| 1053 |
-
# SAVE FILES
|
| 1054 |
-
tmpdir = tempfile.mkdtemp()
|
| 1055 |
-
f_final_agg = os.path.join(tmpdir, "IPLM2025_Agregat_FINAL.xlsx")
|
| 1056 |
-
f_final_det = os.path.join(tmpdir, "IPLM2025_Detail_FINAL.xlsx")
|
| 1057 |
-
f_real_agg = os.path.join(tmpdir, "IPLM2025_Agregat_Real_SubindeksDimensi.xlsx")
|
| 1058 |
-
f_real_det = os.path.join(tmpdir, "IPLM2025_Detail_Real_SubindeksDimensi_Indikator.xlsx")
|
| 1059 |
-
|
| 1060 |
-
t2.to_excel(f_final_agg, index=False)
|
| 1061 |
-
t3.to_excel(f_final_det, index=False)
|
| 1062 |
-
t4.to_excel(f_real_agg, index=False)
|
| 1063 |
-
t5.to_excel(f_real_det, index=False)
|
| 1064 |
-
|
| 1065 |
-
word_path = generate_word_report(
|
| 1066 |
-
scope_label, kew_norm, t1, t2, t4, cov_tbl, bar_fig,
|
| 1067 |
-
bell_all, bell_sek, bell_um, bell_kh,
|
| 1068 |
-
narrative
|
| 1069 |
-
)
|
| 1070 |
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
f_final_agg, f_final_det, word_path, narrative, msg)
|
| 1074 |
|
|
|
|
|
|
|
| 1075 |
|
| 1076 |
-
#
|
| 1077 |
-
|
| 1078 |
-
# =========================
|
| 1079 |
-
with gr.Blocks() as demo:
|
| 1080 |
-
gr.Markdown(f"""
|
| 1081 |
-
# IPLM 2025 — Real × SamplingFactor 68% (FINAL)
|
| 1082 |
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
""")
|
| 1086 |
|
| 1087 |
-
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1091 |
|
| 1092 |
-
|
| 1093 |
-
|
|
|
|
|
|
|
| 1094 |
|
| 1095 |
-
with
|
| 1096 |
-
|
| 1097 |
-
|
|
|
|
| 1098 |
|
| 1099 |
-
|
| 1100 |
-
|
|
|
|
|
|
|
|
|
|
| 1101 |
|
| 1102 |
-
|
| 1103 |
-
|
|
|
|
|
|
|
| 1104 |
|
| 1105 |
-
|
| 1106 |
-
out_agg_final = gr.DataFrame(interactive=False)
|
| 1107 |
|
| 1108 |
-
|
| 1109 |
-
|
| 1110 |
|
| 1111 |
-
gr.Markdown("## 4) Hasil Agregat (RealScore) — per Jenis (Subindeks & Dimensi)")
|
| 1112 |
-
out_agg_real = gr.DataFrame(interactive=False)
|
| 1113 |
|
| 1114 |
-
|
| 1115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1116 |
|
| 1117 |
-
|
| 1118 |
-
|
|
|
|
|
|
|
| 1119 |
|
| 1120 |
-
|
| 1121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1122 |
|
| 1123 |
-
gr.Markdown("##
|
| 1124 |
-
|
| 1125 |
|
| 1126 |
-
gr.Markdown("##
|
| 1127 |
-
|
| 1128 |
|
| 1129 |
-
gr.Markdown("##
|
| 1130 |
-
|
| 1131 |
|
| 1132 |
-
gr.Markdown("##
|
| 1133 |
-
|
| 1134 |
|
| 1135 |
-
gr.Markdown("## Analisis (LLM
|
| 1136 |
-
|
| 1137 |
|
| 1138 |
with gr.Row():
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
|
| 1143 |
run_btn.click(
|
| 1144 |
-
fn=
|
| 1145 |
-
inputs=[dd_prov, dd_kab, dd_kew
|
| 1146 |
outputs=[
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
f1, f2, f3,
|
| 1152 |
-
out_analysis,
|
| 1153 |
-
msg_out
|
| 1154 |
],
|
| 1155 |
)
|
| 1156 |
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
+
app.py — IPLM (Hitung Indeks) + Dashboard Kekurangan Sampel (Gap 68%) + Analisis AI (HF)
|
| 4 |
+
======================================================================================
|
| 5 |
+
TUJUAN (sesuai maumu):
|
| 6 |
+
1) Tetap HITUNG IPLM (Yeo-Johnson + normalisasi nasional -> subindeks -> dimensi -> indeks 0–100)
|
| 7 |
+
2) Tetap HITUNG GAP sampel (Target 68%) pakai meta populasi (SD+SMP, Kec+Desa, SMA)
|
| 8 |
+
3) Analisis AI (LLM) + Word report pakai kode gap milikmu (tanpa bahas skor IPLM)
|
| 9 |
+
4) FIX utama: "jumlah sekolah tidak kedetect" -> diperbaiki join meta dengan:
|
| 10 |
+
- KEY gabungan: prov_key + kab_key (lebih presisi)
|
| 11 |
+
- fallback: kab_key saja (kalau meta tidak punya prov)
|
| 12 |
+
- fallback-2: mapping alias kab/kota (opsional) + debug table "Meta_Match_Debug"
|
| 13 |
+
|
| 14 |
+
PASTIKAN file ada di root HF Spaces:
|
| 15 |
+
- IPLM_clean_manual_131225.xlsx
|
| 16 |
+
- Data_populasi_Kab_kota.xlsx
|
| 17 |
+
- Data_populasi_propinsi.xlsx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
"""
|
| 19 |
|
| 20 |
import os
|
| 21 |
import re
|
| 22 |
import math
|
|
|
|
| 23 |
import tempfile
|
| 24 |
from pathlib import Path
|
|
|
|
| 25 |
|
| 26 |
+
import gradio as gr
|
| 27 |
import numpy as np
|
| 28 |
import pandas as pd
|
|
|
|
| 29 |
import plotly.graph_objects as go
|
| 30 |
import plotly.express as px
|
| 31 |
+
from huggingface_hub import InferenceClient
|
| 32 |
from sklearn.preprocessing import PowerTransformer
|
| 33 |
|
| 34 |
+
# Word report
|
| 35 |
+
from docx import Document
|
| 36 |
+
|
| 37 |
+
# Pie opsional (butuh kaleido)
|
| 38 |
+
try:
|
| 39 |
+
import kaleido # noqa: F401
|
| 40 |
+
HAS_KALEIDO = True
|
| 41 |
+
except Exception:
|
| 42 |
+
HAS_KALEIDO = False
|
| 43 |
|
|
|
|
| 44 |
|
| 45 |
+
# ============================================================
|
| 46 |
+
# 0) FILES (SESUAIKAN)
|
| 47 |
+
# ============================================================
|
| 48 |
+
DATA_FILE = "IPLM_clean_manual_131225.xlsx" # DM sampel masuk (multi-sheet)
|
| 49 |
+
META_KAB_FILE = "Data_populasi_Kab_kota.xlsx" # populasi kab/kota (kec, desa, SD, SMP)
|
| 50 |
+
META_SMA_FILE = "Data_populasi_propinsi.xlsx" # populasi provinsi (SMA)
|
| 51 |
+
|
| 52 |
+
# ============================================================
|
| 53 |
+
# 0a) TARGET CAKUPAN
|
| 54 |
+
# ============================================================
|
| 55 |
+
TARGET_COVERAGE = 0.68
|
| 56 |
+
|
| 57 |
+
# ============================================================
|
| 58 |
+
# 0b) BOBOT IPLM (REAL)
|
| 59 |
+
# ============================================================
|
| 60 |
W_KEPATUHAN = 0.30
|
| 61 |
W_KINERJA = 0.70
|
| 62 |
|
| 63 |
+
# ============================================================
|
| 64 |
+
# 0c) LLM (HF Inference)
|
| 65 |
+
# ============================================================
|
| 66 |
+
USE_LLM = True
|
| 67 |
+
LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 68 |
+
HF_TOKEN = (
|
| 69 |
+
os.getenv("HF_SECRET")
|
| 70 |
+
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 71 |
+
or os.getenv("HF_API_TOKEN")
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
_HF_CLIENT = None
|
| 75 |
+
def get_llm_client():
|
| 76 |
+
global _HF_CLIENT
|
| 77 |
+
if _HF_CLIENT is not None:
|
| 78 |
+
return _HF_CLIENT
|
| 79 |
+
try:
|
| 80 |
+
if HF_TOKEN:
|
| 81 |
+
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN)
|
| 82 |
else:
|
| 83 |
+
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME)
|
| 84 |
+
return _HF_CLIENT
|
| 85 |
+
except Exception:
|
| 86 |
+
_HF_CLIENT = None
|
| 87 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
# ============================================================
|
| 91 |
+
# 1) UTIL
|
| 92 |
+
# ============================================================
|
| 93 |
+
def _canon(s: str) -> str:
|
| 94 |
+
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
+
def pick_col(df, candidates):
|
| 97 |
+
# exact
|
| 98 |
+
for c in candidates:
|
| 99 |
+
if c in df.columns:
|
| 100 |
+
return c
|
| 101 |
+
# canon exact
|
| 102 |
+
can_map = {_canon(c): c for c in df.columns}
|
| 103 |
+
for c in candidates:
|
| 104 |
+
k = _canon(c)
|
| 105 |
+
if k in can_map:
|
| 106 |
+
return can_map[k]
|
| 107 |
+
# contains fallback
|
| 108 |
+
for cc in df.columns:
|
| 109 |
+
ccc = _canon(cc)
|
| 110 |
+
for c in candidates:
|
| 111 |
+
if _canon(c) in ccc:
|
| 112 |
+
return cc
|
| 113 |
+
return None
|
| 114 |
|
|
|
|
|
|
|
|
|
|
| 115 |
def coerce_num(val):
|
| 116 |
+
if pd.isna(val):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
return np.nan
|
|
|
|
|
|
|
|
|
|
| 118 |
t = str(val).strip()
|
| 119 |
+
if t == "" or t in {"-", "–", "—"}:
|
| 120 |
return np.nan
|
|
|
|
| 121 |
t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
|
| 122 |
t = re.sub(r"[^0-9,.\-]", "", t)
|
|
|
|
|
|
|
| 123 |
if t.count(".") > 1 and t.count(",") == 1:
|
| 124 |
t = t.replace(".", "").replace(",", ".")
|
| 125 |
+
elif t.count(",") > 1 and t.count(".") == 1:
|
| 126 |
+
t = t.replace(",", "")
|
| 127 |
elif t.count(",") == 1 and t.count(".") == 0:
|
| 128 |
t = t.replace(",", ".")
|
| 129 |
else:
|
| 130 |
t = t.replace(",", "")
|
|
|
|
| 131 |
try:
|
| 132 |
return float(t)
|
| 133 |
except Exception:
|
| 134 |
return np.nan
|
| 135 |
|
| 136 |
+
def norm_kew(v):
|
| 137 |
+
if pd.isna(v):
|
| 138 |
+
return None
|
| 139 |
+
t = str(v).strip().upper()
|
| 140 |
+
if "KAB" in t or "KOTA" in t:
|
| 141 |
+
return "KAB/KOTA"
|
| 142 |
+
if "PROV" in t:
|
| 143 |
+
return "PROVINSI"
|
| 144 |
+
if "PUSAT" in t or "NASIONAL" in t:
|
| 145 |
+
return "PUSAT"
|
| 146 |
+
return t
|
| 147 |
+
|
| 148 |
+
def _norm_text(x):
|
| 149 |
+
if pd.isna(x):
|
| 150 |
+
return None
|
| 151 |
+
t = str(x).strip().upper()
|
| 152 |
+
return " ".join(t.split())
|
| 153 |
+
|
| 154 |
+
def clean_prov_display(s):
|
| 155 |
+
if pd.isna(s):
|
| 156 |
+
return None
|
| 157 |
+
t = str(s).upper().strip()
|
| 158 |
+
t = " ".join(t.split())
|
| 159 |
+
while t.startswith("PROVINSI PROVINSI "):
|
| 160 |
+
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
|
| 161 |
+
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ")
|
| 162 |
+
if not t.startswith("PROVINSI "):
|
| 163 |
+
t = "PROVINSI " + t
|
| 164 |
+
return t
|
| 165 |
+
|
| 166 |
+
def clean_kab_display(s):
|
| 167 |
+
if pd.isna(s):
|
| 168 |
+
return None
|
| 169 |
+
t = str(s).upper().strip()
|
| 170 |
+
t = " ".join(t.split())
|
| 171 |
+
t = t.replace("KABUPATEN", "KAB.")
|
| 172 |
+
t = t.replace("KAB ", "KAB. ")
|
| 173 |
+
t = t.replace("KOTA ADMINISTRASI", "KOTA")
|
| 174 |
+
t = t.replace("KOTA ADM.", "KOTA")
|
| 175 |
+
return t
|
| 176 |
+
|
| 177 |
+
def norm_prov_label(s):
|
| 178 |
+
if pd.isna(s):
|
| 179 |
+
return None
|
| 180 |
+
t = str(s).upper()
|
| 181 |
+
for bad in ["PROVINSI", "PROPINSI"]:
|
| 182 |
+
t = t.replace(bad, "")
|
| 183 |
+
t = " ".join(t.split())
|
| 184 |
+
return re.sub(r"[^A-Z0-9]+", "", t)
|
| 185 |
+
|
| 186 |
+
def norm_kab_label(s):
|
| 187 |
+
"""
|
| 188 |
+
KEY join kab/kota super stabil (termasuk Kep Seribu)
|
| 189 |
+
"""
|
| 190 |
+
if pd.isna(s):
|
| 191 |
+
return None
|
| 192 |
+
t = str(s).upper()
|
| 193 |
+
t = t.replace("KABUPATEN", "KAB")
|
| 194 |
+
t = t.replace("KAB.", "KAB")
|
| 195 |
+
t = t.replace("KAB ", "KAB ")
|
| 196 |
+
t = t.replace("KOTA ADMINISTRASI", "KOTA")
|
| 197 |
+
t = t.replace("KOTA ADM.", "KOTA")
|
| 198 |
+
t = t.replace("KOTA.", "KOTA")
|
| 199 |
+
t = t.replace("KEP.", "KEPULAUAN")
|
| 200 |
+
t = t.replace("KEP ", "KEPULAUAN ")
|
| 201 |
+
# khusus Kepulauan Seribu
|
| 202 |
+
if "SERIBU" in t:
|
| 203 |
+
t = "KAB ADM KEPULAUAN SERIBU"
|
| 204 |
+
t = " ".join(t.split())
|
| 205 |
+
return re.sub(r"[^A-Z0-9]+", "", t)
|
| 206 |
|
| 207 |
def ceil_int(x):
|
| 208 |
try:
|
|
|
|
| 221 |
except Exception:
|
| 222 |
return 1.0
|
| 223 |
|
| 224 |
+
def make_pie_plotly(num, den, title):
|
| 225 |
+
if not HAS_KALEIDO:
|
| 226 |
+
return None
|
| 227 |
+
if den is None or pd.isna(den) or den <= 0:
|
| 228 |
+
values = [0, 1]
|
| 229 |
+
labels = ["Terjangkau", "Belum Terjangkau"]
|
| 230 |
+
else:
|
| 231 |
+
num = 0 if pd.isna(num) else float(num)
|
| 232 |
+
den = float(den)
|
| 233 |
+
values = [max(num, 0), max(den - num, 0)]
|
| 234 |
+
labels = ["Terjangkau", "Belum Terjangkau"]
|
| 235 |
+
fig = px.pie(values=values, names=labels, title=title, hole=0.35)
|
| 236 |
+
tmp = tempfile.mktemp(suffix=".png")
|
| 237 |
+
try:
|
| 238 |
+
fig.write_image(tmp, scale=2)
|
| 239 |
+
return tmp
|
| 240 |
+
except Exception:
|
| 241 |
+
return None
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
+
# ============================================================
|
| 245 |
+
# 2) DEFINISI INDIKATOR IPLM (KANONIK)
|
| 246 |
+
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
koleksi_cols = [
|
| 248 |
"JudulTercetak","EksemplarTercetak","JudulElektronik","EksemplarElektronik",
|
| 249 |
"TambahJudulTercetak","TambahEksemplarTercetak",
|
| 250 |
"TambahJudulElektronik","TambahEksemplarElektronik",
|
| 251 |
"KomitmenAnggaranKoleksi"
|
| 252 |
]
|
| 253 |
+
sdm_cols = ["TenagaKualifikasiIlmuPerpustakaan","TenagaFungsionalProfesional","TenagaPKB","AnggaranTenaga"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
pelayanan_cols = [
|
| 255 |
"PesertaBudayaBaca","PemustakaLuringDaring","PemustakaFasilitasTIK",
|
| 256 |
"PemanfaatanJudulTercetak","PemanfaatanEksemplarTercetak",
|
| 257 |
"PemanfaatanJudulElektronik","PemanfaatanEksemplarElektronik"
|
| 258 |
]
|
| 259 |
+
pengelolaan_cols = ["KegiatanBudayaBaca","KegiatanKerjasama","VariasiLayanan","Kebijakan","AnggaranLayanan"]
|
| 260 |
+
ALL_IND = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
|
|
|
|
|
|
|
| 261 |
|
| 262 |
+
# alias (kolom DM kamu -> kanonik)
|
| 263 |
+
ALIAS = {
|
| 264 |
# koleksi
|
| 265 |
"j_judul_koleksi_tercetak": "JudulTercetak",
|
| 266 |
"j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
|
|
|
|
| 271 |
"tambah_judul_koleksi_digital": "TambahJudulElektronik",
|
| 272 |
"tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
|
| 273 |
"j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
|
|
|
|
|
|
|
| 274 |
# sdm
|
| 275 |
"j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
|
| 276 |
"j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
|
| 277 |
"j_tenaga_pkb": "TenagaPKB",
|
| 278 |
"j_anggaran_diklat_perpus": "AnggaranTenaga",
|
|
|
|
| 279 |
# pelayanan
|
| 280 |
"j_peserta_budaya_baca": "PesertaBudayaBaca",
|
| 281 |
"j_pemustaka_luring_daring": "PemustakaLuringDaring",
|
|
|
|
| 284 |
"j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
|
| 285 |
"j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
|
| 286 |
"j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
|
|
|
|
| 287 |
# pengelolaan
|
| 288 |
"j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
|
| 289 |
"j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
|
|
|
|
| 291 |
"j_kebijakan_prosedur_pelayanan": "Kebijakan",
|
| 292 |
"j_anggaran_peningkatan_pelayanan": "AnggaranLayanan",
|
| 293 |
}
|
| 294 |
+
ALIAS_CAN = {_canon(k): v for k, v in ALIAS.items()}
|
| 295 |
|
| 296 |
+
def rename_to_canonical(df: pd.DataFrame) -> pd.DataFrame:
|
| 297 |
+
r = {}
|
|
|
|
| 298 |
for c in df.columns:
|
| 299 |
+
cc = _canon(c)
|
| 300 |
+
if cc in ALIAS_CAN:
|
| 301 |
+
r[c] = ALIAS_CAN[cc]
|
| 302 |
else:
|
| 303 |
+
# kalau sudah kanonik
|
| 304 |
+
for t in ALL_IND:
|
| 305 |
+
if cc == _canon(t):
|
| 306 |
+
r[c] = t
|
| 307 |
break
|
| 308 |
+
return df.rename(columns=r) if r else df
|
|
|
|
|
|
|
| 309 |
|
| 310 |
|
| 311 |
+
# ============================================================
|
| 312 |
+
# 3) LOAD DM (multi-sheet) + DETECT COLS
|
| 313 |
+
# ============================================================
|
| 314 |
DATA_INFO = ""
|
| 315 |
+
df_all_raw = None
|
| 316 |
+
|
| 317 |
+
meta_kab_df = None
|
| 318 |
+
meta_sma_df = None
|
| 319 |
|
| 320 |
+
prov_col_glob = None
|
| 321 |
+
kab_col_glob = None
|
| 322 |
+
kew_col_glob = None
|
| 323 |
+
jenis_col_glob = None
|
| 324 |
+
subjenis_col_glob = None
|
| 325 |
+
nama_col_glob = None
|
| 326 |
|
| 327 |
+
extra_info = []
|
|
|
|
| 328 |
|
| 329 |
try:
|
| 330 |
+
fp = Path(DATA_FILE)
|
| 331 |
+
if not fp.exists():
|
| 332 |
+
raise FileNotFoundError(f"File tidak ditemukan: {DATA_FILE}")
|
| 333 |
+
|
| 334 |
+
xls = pd.ExcelFile(fp)
|
| 335 |
+
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
|
| 336 |
+
df_all_raw = pd.concat(frames, ignore_index=True, sort=False)
|
| 337 |
+
|
| 338 |
+
df_all_raw = rename_to_canonical(df_all_raw)
|
| 339 |
+
|
| 340 |
+
prov_col_glob = pick_col(df_all_raw, ["provinsi", "Provinsi", "PROVINSI"])
|
| 341 |
+
kab_col_glob = pick_col(df_all_raw, ["kab_kota", "kab/kota", "Kab/Kota", "KAB/KOTA", "kabupaten_kota", "kota"])
|
| 342 |
+
kew_col_glob = pick_col(df_all_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
|
| 343 |
+
jenis_col_glob = pick_col(df_all_raw, ["jenis_perpustakaan", "JENIS_PERPUSTAKAAN", "Jenis Perpustakaan"])
|
| 344 |
+
subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"])
|
| 345 |
+
nama_col_glob = pick_col(df_all_raw, ["nm_perpustakaan", "nama_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
|
| 346 |
+
|
| 347 |
+
if prov_col_glob is None or kab_col_glob is None or kew_col_glob is None or jenis_col_glob is None:
|
| 348 |
+
raise KeyError("Kolom minimum DM tidak lengkap (prov/kab/kew/jenis).")
|
| 349 |
+
|
| 350 |
+
df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew)
|
| 351 |
+
|
| 352 |
+
val_map_jenis = {
|
| 353 |
+
"PERPUSTAKAAN SEKOLAH": "sekolah",
|
| 354 |
+
"SEKOLAH": "sekolah",
|
| 355 |
+
"PERPUSTAKAAN UMUM": "umum",
|
| 356 |
+
"UMUM": "umum",
|
| 357 |
+
"PERPUSTAKAAN DAERAH": "umum",
|
| 358 |
+
"PERPUSTAKAAN KHUSUS": "khusus",
|
| 359 |
+
"KHUSUS": "khusus",
|
| 360 |
+
"PERPUSTAKAAN PERGURUAN TINGGI": "khusus",
|
| 361 |
+
"PERGURUAN TINGGI": "khusus",
|
| 362 |
+
}
|
| 363 |
+
df_all_raw["_dataset"] = df_all_raw[jenis_col_glob].apply(_norm_text).map(val_map_jenis)
|
| 364 |
+
|
| 365 |
+
df_all_raw["prov_clean"] = df_all_raw[prov_col_glob].apply(clean_prov_display)
|
| 366 |
+
df_all_raw["kab_clean"] = df_all_raw[kab_col_glob].apply(clean_kab_display)
|
| 367 |
+
|
| 368 |
+
# KEY gabungan untuk join meta (FIX utama)
|
| 369 |
+
df_all_raw["prov_key"] = df_all_raw["prov_clean"].apply(norm_prov_label)
|
| 370 |
+
df_all_raw["kab_key"] = df_all_raw["kab_clean"].apply(norm_kab_label)
|
| 371 |
+
|
| 372 |
+
DATA_INFO = f"DM terbaca: **{DATA_FILE}** | baris: **{len(df_all_raw)}** | sheet: {len(xls.sheet_names)}"
|
| 373 |
except Exception as e:
|
| 374 |
+
df_all_raw = None
|
| 375 |
+
DATA_INFO = f"⚠️ Gagal memuat `{DATA_FILE}` | Error: `{e}`"
|
| 376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
+
# ============================================================
|
| 379 |
+
# 4) LOAD META (KAB/KOTA & SMA) — FIX JOIN DUA KEY + FALLBACK
|
| 380 |
+
# ============================================================
|
| 381 |
+
def load_meta_kab_kota(path: str) -> pd.DataFrame | None:
|
| 382 |
+
fp = Path(path)
|
| 383 |
+
if not fp.exists():
|
| 384 |
+
return None
|
| 385 |
+
m = pd.read_excel(fp)
|
| 386 |
|
| 387 |
+
col_prov = pick_col(m, ["PROVINSI","Provinsi","provinsi","nama_provinsi","nm_provinsi"])
|
| 388 |
+
col_kab = pick_col(m, ["KABUPATEN_KOTA","Kabupaten/Kota","Kab/Kota","KAB/KOTA","kabupaten_kota","kab/kota","Kab_Kota"])
|
| 389 |
|
| 390 |
+
col_kec = pick_col(m, ["Jml Kec","JML KEC","Jml Kecamatan","Jumlah Kecamatan","Kecamatan","jumlah_kecamatan","jml_kecamatan"])
|
| 391 |
+
col_des = pick_col(m, ["Jml Desa","JML DESA","Jml Desa/Kel","Jumlah Desa","Desa/Kel","jumlah_desa","jumlah_kelurahan"])
|
| 392 |
|
| 393 |
+
col_sd = pick_col(m, ["SD","Jumlah SD","Total SD","Jml_SD","jml_sd","jumlah_sd","SD_Total"])
|
| 394 |
+
col_smp = pick_col(m, ["SMP","Jumlah SMP","Total SMP","Jml_SMP","jml_smp","jumlah_smp","SMP_Total"])
|
|
|
|
|
|
|
|
|
|
| 395 |
|
| 396 |
+
col_pop_umum = pick_col(m, ["jumlah_populasi_umum","Populasi Umum","pop_umum"])
|
| 397 |
+
col_pop_sekolah = pick_col(m, ["jumlah_populasi_sekolah","Populasi Sekolah","pop_sekolah"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
+
if col_kab is None:
|
| 400 |
+
return None
|
| 401 |
|
| 402 |
+
out = pd.DataFrame()
|
| 403 |
+
out["prov_meta_raw"] = m[col_prov].astype(str).str.strip() if col_prov else None
|
| 404 |
+
out["kab_meta_raw"] = m[col_kab].astype(str).str.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
|
| 406 |
+
# tampil & key
|
| 407 |
+
out["prov_meta_clean"] = out["prov_meta_raw"].apply(clean_prov_display) if col_prov else None
|
| 408 |
+
out["kab_meta_clean"] = out["kab_meta_raw"].apply(clean_kab_display)
|
| 409 |
+
|
| 410 |
+
out["prov_key"] = out["prov_meta_clean"].apply(norm_prov_label) if col_prov else ""
|
| 411 |
+
out["kab_key"] = out["kab_meta_clean"].apply(norm_kab_label)
|
| 412 |
+
|
| 413 |
+
out["Kab_Kota_Label"] = out["kab_meta_clean"]
|
| 414 |
+
|
| 415 |
+
out["Jml_Kecamatan"] = m[col_kec].apply(coerce_num) if col_kec else np.nan
|
| 416 |
+
out["Jml_DesaKel"] = m[col_des].apply(coerce_num) if col_des else np.nan
|
| 417 |
+
out["Jml_SD"] = m[col_sd].apply(coerce_num) if col_sd else np.nan
|
| 418 |
+
out["Jml_SMP"] = m[col_smp].apply(coerce_num) if col_smp else np.nan
|
| 419 |
+
|
| 420 |
+
out["Pop_Umum_Meta"] = m[col_pop_umum].apply(coerce_num) if col_pop_umum else np.nan
|
| 421 |
+
out["Pop_Sekolah_Meta"] = m[col_pop_sekolah].apply(coerce_num) if col_pop_sekolah else np.nan
|
| 422 |
+
|
| 423 |
+
# agregat gabungan prov+kab
|
| 424 |
+
grp_pk = out.groupby(["prov_key","kab_key"], as_index=False).agg({
|
| 425 |
+
"Kab_Kota_Label":"first",
|
| 426 |
+
"Jml_Kecamatan":"max",
|
| 427 |
+
"Jml_DesaKel":"max",
|
| 428 |
+
"Jml_SD":"max",
|
| 429 |
+
"Jml_SMP":"max",
|
| 430 |
+
"Pop_Umum_Meta":"max",
|
| 431 |
+
"Pop_Sekolah_Meta":"max",
|
| 432 |
+
})
|
| 433 |
|
| 434 |
+
# fallback kab-only
|
| 435 |
+
grp_k = out.groupby(["kab_key"], as_index=False).agg({
|
| 436 |
+
"Kab_Kota_Label":"first",
|
| 437 |
+
"Jml_Kecamatan":"max",
|
| 438 |
+
"Jml_DesaKel":"max",
|
| 439 |
+
"Jml_SD":"max",
|
| 440 |
+
"Jml_SMP":"max",
|
| 441 |
+
"Pop_Umum_Meta":"max",
|
| 442 |
+
"Pop_Sekolah_Meta":"max",
|
| 443 |
+
}).rename(columns={
|
| 444 |
+
"Kab_Kota_Label":"Kab_Kota_Label_kabonly",
|
| 445 |
+
"Jml_Kecamatan":"Jml_Kecamatan_kabonly",
|
| 446 |
+
"Jml_DesaKel":"Jml_DesaKel_kabonly",
|
| 447 |
+
"Jml_SD":"Jml_SD_kabonly",
|
| 448 |
+
"Jml_SMP":"Jml_SMP_kabonly",
|
| 449 |
+
"Pop_Umum_Meta":"Pop_Umum_Meta_kabonly",
|
| 450 |
+
"Pop_Sekolah_Meta":"Pop_Sekolah_Meta_kabonly",
|
| 451 |
+
})
|
| 452 |
+
|
| 453 |
+
merged = grp_pk.merge(grp_k, on="kab_key", how="left")
|
| 454 |
+
|
| 455 |
+
def fill0(a, b):
|
| 456 |
+
aa = pd.to_numeric(a, errors="coerce")
|
| 457 |
+
bb = pd.to_numeric(b, errors="coerce")
|
| 458 |
+
return aa.where(aa.notna() & (aa > 0), bb)
|
| 459 |
+
|
| 460 |
+
merged["Jml_SD"] = fill0(merged["Jml_SD"], merged["Jml_SD_kabonly"])
|
| 461 |
+
merged["Jml_SMP"] = fill0(merged["Jml_SMP"], merged["Jml_SMP_kabonly"])
|
| 462 |
+
merged["Jml_Kecamatan"] = fill0(merged["Jml_Kecamatan"], merged["Jml_Kecamatan_kabonly"])
|
| 463 |
+
merged["Jml_DesaKel"] = fill0(merged["Jml_DesaKel"], merged["Jml_DesaKel_kabonly"])
|
| 464 |
+
merged["Pop_Umum_Meta"] = fill0(merged["Pop_Umum_Meta"], merged["Pop_Umum_Meta_kabonly"])
|
| 465 |
+
merged["Pop_Sekolah_Meta"] = fill0(merged["Pop_Sekolah_Meta"], merged["Pop_Sekolah_Meta_kabonly"])
|
| 466 |
+
|
| 467 |
+
# label final
|
| 468 |
+
merged["Kab_Kota_Label"] = merged["Kab_Kota_Label"].fillna(merged["Kab_Kota_Label_kabonly"])
|
| 469 |
+
|
| 470 |
+
drop_cols = [c for c in merged.columns if c.endswith("_kabonly")]
|
| 471 |
+
merged = merged.drop(columns=drop_cols)
|
| 472 |
+
|
| 473 |
+
return merged
|
| 474 |
+
|
| 475 |
+
def load_meta_sma(path: str) -> pd.DataFrame | None:
|
| 476 |
+
fp = Path(path)
|
| 477 |
+
if not fp.exists():
|
| 478 |
+
return None
|
| 479 |
+
m = pd.read_excel(fp)
|
| 480 |
+
|
| 481 |
+
col_prov = pick_col(m, ["Provinsi","provinsi","PROVINSI","NAMA_PROVINSI","Nama Provinsi","nm_prov"])
|
| 482 |
+
col_sma = pick_col(m, ["sma ","SMA","Total SMA","TOTAL_SMA","Jml_SMA","Jumlah SMA","jumlah_sma","total_sma","jml_sma"])
|
| 483 |
+
|
| 484 |
+
if col_prov is None or col_sma is None:
|
| 485 |
+
return None
|
| 486 |
|
| 487 |
+
out = pd.DataFrame({
|
| 488 |
+
"Provinsi_Label": m[col_prov].astype(str).str.strip(),
|
| 489 |
+
"Jml_SMA": m[col_sma].apply(coerce_num),
|
| 490 |
+
})
|
| 491 |
+
out["prov_key"] = out["Provinsi_Label"].apply(norm_prov_label)
|
| 492 |
+
out = out.groupby("prov_key", as_index=False).agg({"Provinsi_Label":"first","Jml_SMA":"sum"})
|
| 493 |
+
return out
|
| 494 |
|
| 495 |
+
try:
|
| 496 |
+
meta_kab_df = load_meta_kab_kota(META_KAB_FILE)
|
| 497 |
+
if meta_kab_df is not None:
|
| 498 |
+
extra_info.append(f"Meta Kab/Kota terbaca: **{META_KAB_FILE}** (n={len(meta_kab_df)})")
|
| 499 |
else:
|
| 500 |
+
extra_info.append(f"⚠️ Meta Kab/Kota tidak terbaca / kolom tidak ditemukan.")
|
| 501 |
+
|
| 502 |
+
meta_sma_df = load_meta_sma(META_SMA_FILE)
|
| 503 |
+
if meta_sma_df is not None:
|
| 504 |
+
extra_info.append(f"Meta SMA terbaca: **{META_SMA_FILE}** (n={len(meta_sma_df)})")
|
| 505 |
+
else:
|
| 506 |
+
extra_info.append(f"⚠️ Meta SMA tidak terbaca / kolom tidak ditemukan.")
|
| 507 |
except Exception as e:
|
| 508 |
+
extra_info.append(f"⚠️ Gagal load meta: {e}")
|
| 509 |
+
|
| 510 |
+
if extra_info:
|
| 511 |
+
DATA_INFO = DATA_INFO + "<br>" + "<br>".join(extra_info)
|
| 512 |
+
|
| 513 |
|
| 514 |
+
# ============================================================
|
| 515 |
+
# 5) HITUNG IPLM (REAL) — YJ + minmax nasional
|
| 516 |
+
# ============================================================
|
| 517 |
+
def minmax01(s: pd.Series) -> pd.Series:
|
| 518 |
+
x = pd.to_numeric(s, errors="coerce").astype(float)
|
| 519 |
+
mn = x.min(skipna=True)
|
| 520 |
+
mx = x.max(skipna=True)
|
| 521 |
+
if pd.isna(mn) or pd.isna(mx) or mx == mn:
|
| 522 |
+
return pd.Series(0.0, index=s.index)
|
| 523 |
+
return (x - mn) / (mx - mn)
|
| 524 |
|
| 525 |
+
def mean_row_norm(df: pd.DataFrame, cols_norm: list[str]) -> pd.Series:
|
| 526 |
+
return df[cols_norm].mean(axis=1, skipna=True)
|
| 527 |
|
| 528 |
+
def compute_iplm_real(df: pd.DataFrame) -> pd.DataFrame:
|
| 529 |
+
out = df.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
|
| 531 |
+
# pastikan indikator numeric
|
| 532 |
+
avail = [c for c in ALL_IND if c in out.columns]
|
| 533 |
+
for c in avail:
|
| 534 |
+
out[c] = out[c].apply(coerce_num)
|
| 535 |
|
| 536 |
+
# transform Yeo-Johnson tiap indikator -> yj_*
|
| 537 |
+
for c in avail:
|
| 538 |
+
x = out[c].to_numpy(dtype=float)
|
| 539 |
+
m = ~np.isnan(x)
|
| 540 |
y = np.full_like(x, np.nan, dtype=float)
|
| 541 |
+
if m.sum() > 1:
|
| 542 |
pt = PowerTransformer(method="yeo-johnson", standardize=False)
|
| 543 |
+
y[m] = pt.fit_transform(x[m].reshape(-1, 1)).ravel()
|
| 544 |
else:
|
| 545 |
+
y[m] = x[m]
|
| 546 |
+
out[f"yj_{c}"] = y
|
| 547 |
+
out[f"norm_{c}"] = minmax01(pd.Series(out[f"yj_{c}"])).to_numpy()
|
| 548 |
+
|
| 549 |
+
def norm_list(cols):
|
| 550 |
+
return [f"norm_{c}" for c in cols if f"norm_{c}" in out.columns]
|
| 551 |
+
|
| 552 |
+
nk = norm_list(koleksi_cols)
|
| 553 |
+
ns = norm_list(sdm_cols)
|
| 554 |
+
npel = norm_list(pelayanan_cols)
|
| 555 |
+
npeng = norm_list(pengelolaan_cols)
|
| 556 |
+
|
| 557 |
+
out["sub_koleksi"] = mean_row_norm(out, nk).fillna(0.0)
|
| 558 |
+
out["sub_sdm"] = mean_row_norm(out, ns).fillna(0.0)
|
| 559 |
+
out["sub_pelayanan"] = mean_row_norm(out, npel).fillna(0.0)
|
| 560 |
+
out["sub_pengelolaan"] = mean_row_norm(out, npeng).fillna(0.0)
|
| 561 |
+
|
| 562 |
+
out["dim_kepatuhan"] = out[["sub_koleksi","sub_sdm"]].mean(axis=1, skipna=True).fillna(0.0)
|
| 563 |
+
out["dim_kinerja"] = out[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1, skipna=True).fillna(0.0)
|
| 564 |
+
|
| 565 |
+
out["Indeks_Real_0_100"] = 100.0 * (W_KEPATUHAN*out["dim_kepatuhan"] + W_KINERJA*out["dim_kinerja"])
|
| 566 |
+
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
|
| 568 |
+
|
| 569 |
+
# ============================================================
|
| 570 |
+
# 6) SAMPLING FACTOR (68%) UNTUK IPLM FINAL (REAL * factor)
|
| 571 |
+
# ============================================================
|
| 572 |
+
def attach_sampling_factor(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
| 573 |
+
"""
|
| 574 |
+
Output:
|
| 575 |
+
- df dengan kolom: Target_Unit_68, SamplingFactor_68, Indeks_Final_0_100
|
| 576 |
+
- debug table: Meta_Match_Debug (buat cek wilayah yg SD/SMP 0 / gak match meta)
|
| 577 |
+
"""
|
| 578 |
out = df.copy()
|
| 579 |
+
out["Target_Unit_68"] = np.nan
|
| 580 |
+
out["SamplingFactor_68"] = 1.0
|
| 581 |
+
|
| 582 |
+
debug_rows = []
|
| 583 |
+
|
| 584 |
+
# ----------- KAB/KOTA (sekolah & umum) -----------
|
| 585 |
+
if meta_kab_df is not None and len(meta_kab_df) > 0:
|
| 586 |
+
m = out["KEW_NORM"] == "KAB/KOTA"
|
| 587 |
+
kab = out[m].copy()
|
| 588 |
+
if not kab.empty:
|
| 589 |
+
# hitung sampel per prov+kab per dataset
|
| 590 |
+
g = (kab.groupby(["prov_key","kab_key","_dataset"]).size()
|
| 591 |
+
.unstack(fill_value=0).reset_index())
|
| 592 |
for c in ["sekolah","umum","khusus"]:
|
| 593 |
if c not in g.columns:
|
| 594 |
g[c] = 0
|
| 595 |
|
| 596 |
+
merged = g.merge(meta_kab_df, on=["prov_key","kab_key"], how="left", suffixes=("","_meta"))
|
| 597 |
+
|
| 598 |
+
# POP sekolah = SD+SMP fallback Pop_Sekolah_Meta
|
| 599 |
+
sd = pd.to_numeric(merged.get("Jml_SD"), errors="coerce").fillna(0)
|
| 600 |
+
smp = pd.to_numeric(merged.get("Jml_SMP"), errors="coerce").fillna(0)
|
| 601 |
+
pop_sek = sd + smp
|
| 602 |
+
pop_sek = pop_sek.where(pop_sek > 0, pd.to_numeric(merged.get("Pop_Sekolah_Meta"), errors="coerce").fillna(0))
|
| 603 |
+
merged["POP_SD_SMP"] = pop_sek
|
| 604 |
|
| 605 |
+
# POP umum = Kec+Desa fallback Pop_Umum_Meta
|
| 606 |
+
kec = pd.to_numeric(merged.get("Jml_Kecamatan"), errors="coerce").fillna(0)
|
| 607 |
+
desa = pd.to_numeric(merged.get("Jml_DesaKel"), errors="coerce").fillna(0)
|
| 608 |
+
pop_um = kec + desa
|
| 609 |
+
pop_um = pop_um.where(pop_um > 0, pd.to_numeric(merged.get("Pop_Umum_Meta"), errors="coerce").fillna(0))
|
| 610 |
+
merged["POP_KEC_DESA"] = pop_um
|
| 611 |
|
| 612 |
+
merged["Target_Sekolah_68"] = merged["POP_SD_SMP"].apply(lambda x: ceil_int(TARGET_COVERAGE * float(x)))
|
| 613 |
+
merged["Target_Umum_68"] = merged["POP_KEC_DESA"].apply(lambda x: ceil_int(TARGET_COVERAGE * float(x)))
|
| 614 |
|
| 615 |
+
merged["SF_Sekolah"] = merged.apply(lambda r: sampling_factor(r["sekolah"], r["Target_Sekolah_68"]), axis=1)
|
| 616 |
+
merged["SF_Umum"] = merged.apply(lambda r: sampling_factor(r["umum"], r["Target_Umum_68"]), axis=1)
|
| 617 |
|
| 618 |
+
# map faktor ke baris DM
|
| 619 |
+
sf_map = {}
|
| 620 |
+
tgt_map = {}
|
| 621 |
for _, r in merged.iterrows():
|
| 622 |
+
sf_map[(r["prov_key"], r["kab_key"], "sekolah")] = float(r["SF_Sekolah"])
|
| 623 |
+
sf_map[(r["prov_key"], r["kab_key"], "umum")] = float(r["SF_Umum"])
|
| 624 |
+
sf_map[(r["prov_key"], r["kab_key"], "khusus")] = 1.0
|
| 625 |
+
|
| 626 |
+
tgt_map[(r["prov_key"], r["kab_key"], "sekolah")] = float(r["Target_Sekolah_68"])
|
| 627 |
+
tgt_map[(r["prov_key"], r["kab_key"], "umum")] = float(r["Target_Umum_68"])
|
| 628 |
+
tgt_map[(r["prov_key"], r["kab_key"], "khusus")] = np.nan
|
| 629 |
+
|
| 630 |
+
debug_rows.append({
|
| 631 |
+
"prov_key": r["prov_key"],
|
| 632 |
+
"kab_key": r["kab_key"],
|
| 633 |
+
"Kab_Kota_Label_meta": r.get("Kab_Kota_Label", None),
|
| 634 |
+
"sampel_sekolah": int(r["sekolah"]),
|
| 635 |
+
"sd": float(sd.loc[r.name]) if r.name in sd.index else np.nan,
|
| 636 |
+
"smp": float(smp.loc[r.name]) if r.name in smp.index else np.nan,
|
| 637 |
+
"pop_sd_smp": float(r["POP_SD_SMP"]),
|
| 638 |
+
"target_sekolah_68": int(r["Target_Sekolah_68"]),
|
| 639 |
+
"meta_match": "MATCH" if pd.notna(r.get("Kab_Kota_Label", np.nan)) else "NO_META_ROW",
|
| 640 |
+
"flag_pop0": float(r["POP_SD_SMP"]) <= 0
|
| 641 |
+
})
|
| 642 |
+
|
| 643 |
+
idx = out[m].index
|
| 644 |
+
out.loc[idx, "SamplingFactor_68"] = [
|
| 645 |
+
sf_map.get((pk, kk, ds), 1.0)
|
| 646 |
+
for pk, kk, ds in zip(out.loc[idx,"prov_key"], out.loc[idx,"kab_key"], out.loc[idx,"_dataset"])
|
| 647 |
+
]
|
| 648 |
+
out.loc[idx, "Target_Unit_68"] = [
|
| 649 |
+
tgt_map.get((pk, kk, ds), np.nan)
|
| 650 |
+
for pk, kk, ds in zip(out.loc[idx,"prov_key"], out.loc[idx,"kab_key"], out.loc[idx,"_dataset"])
|
| 651 |
]
|
| 652 |
|
| 653 |
+
# ----------- PROVINSI (SMA) -----------
|
| 654 |
+
if meta_sma_df is not None and len(meta_sma_df) > 0:
|
| 655 |
+
mp = out["KEW_NORM"] == "PROVINSI"
|
| 656 |
+
prov = out[mp].copy()
|
| 657 |
+
if not prov.empty:
|
| 658 |
+
# sampel SMA = jumlah baris dataset sekolah di level prov (anggap SMA)
|
| 659 |
+
prov_sek = prov[prov["_dataset"]=="sekolah"].copy()
|
| 660 |
+
g = prov_sek.groupby("prov_key").size().rename("Sampel_SMA_DM").reset_index()
|
| 661 |
+
mergedp = g.merge(meta_sma_df[["prov_key","Jml_SMA","Provinsi_Label"]], on="prov_key", how="left")
|
| 662 |
+
mergedp["Jml_SMA"] = pd.to_numeric(mergedp["Jml_SMA"], errors="coerce").fillna(0)
|
| 663 |
+
mergedp["Target_SMA_68"] = mergedp["Jml_SMA"].apply(lambda x: ceil_int(TARGET_COVERAGE*float(x)))
|
| 664 |
+
mergedp["SF_SMA"] = mergedp.apply(lambda r: sampling_factor(r["Sampel_SMA_DM"], r["Target_SMA_68"]), axis=1)
|
| 665 |
+
|
| 666 |
+
sfp = mergedp.set_index("prov_key")["SF_SMA"].to_dict()
|
| 667 |
+
tgtp = mergedp.set_index("prov_key")["Target_SMA_68"].to_dict()
|
| 668 |
+
|
| 669 |
+
idx = out[(out["KEW_NORM"]=="PROVINSI") & (out["_dataset"]=="sekolah")].index
|
| 670 |
+
out.loc[idx, "SamplingFactor_68"] = [float(sfp.get(pk, 1.0)) for pk in out.loc[idx,"prov_key"]]
|
| 671 |
+
out.loc[idx, "Target_Unit_68"] = [float(tgtp.get(pk, np.nan)) for pk in out.loc[idx,"prov_key"]]
|
| 672 |
+
|
| 673 |
+
out["Indeks_Final_0_100"] = out["Indeks_Real_0_100"].fillna(0) * out["SamplingFactor_68"].fillna(1.0)
|
| 674 |
+
|
| 675 |
+
debug = pd.DataFrame(debug_rows)
|
| 676 |
+
if not debug.empty:
|
| 677 |
+
debug = debug.sort_values(["meta_match","flag_pop0"], ascending=[True, False]).reset_index(drop=True)
|
| 678 |
+
return out, debug
|
| 679 |
+
|
| 680 |
+
|
| 681 |
+
# ============================================================
|
| 682 |
+
# 7) GAP VERIFICATION (TABEL) — pakai meta (FIX join)
|
| 683 |
+
# ============================================================
|
| 684 |
+
def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
|
| 685 |
+
if df_filtered is None or len(df_filtered) == 0:
|
| 686 |
+
return pd.DataFrame()
|
| 687 |
+
|
| 688 |
+
kew_norm = str(kew_value or "").upper()
|
| 689 |
+
|
| 690 |
+
# =================== KAB/KOTA ===================
|
| 691 |
+
if ("KAB" in kew_norm or "KOTA" in kew_norm):
|
| 692 |
+
if meta_kab_df is None:
|
| 693 |
+
return pd.DataFrame({"Info": ["Meta Kab/Kota tidak tersedia."]})
|
| 694 |
+
|
| 695 |
+
tmp = df_filtered.copy()
|
| 696 |
+
tmp = tmp[pd.notna(tmp["kab_clean"])]
|
| 697 |
+
if tmp.empty:
|
| 698 |
+
return pd.DataFrame()
|
| 699 |
+
|
| 700 |
+
g_total = tmp.groupby(["prov_key","kab_key"]).size().rename("Sampel Total").reset_index()
|
| 701 |
+
g_sek_total = tmp[tmp["_dataset"]=="sekolah"].groupby(["prov_key","kab_key"]).size().rename("Sampel Sekolah").reset_index()
|
| 702 |
+
g_umum = tmp[tmp["_dataset"]=="umum"].groupby(["prov_key","kab_key"]).size().rename("Sampel Umum").reset_index()
|
| 703 |
+
|
| 704 |
+
merged = (
|
| 705 |
+
g_total
|
| 706 |
+
.merge(g_sek_total, on=["prov_key","kab_key"], how="left")
|
| 707 |
+
.merge(g_umum, on=["prov_key","kab_key"], how="left")
|
| 708 |
+
.merge(meta_kab_df, on=["prov_key","kab_key"], how="left")
|
| 709 |
+
)
|
| 710 |
|
| 711 |
+
for c in ["Sampel Total","Sampel Sekolah","Sampel Umum"]:
|
| 712 |
+
merged[c] = merged[c].fillna(0).astype(int)
|
| 713 |
+
|
| 714 |
+
# Populasi sekolah: SD+SMP fallback Pop_Sekolah_Meta
|
| 715 |
+
sd = pd.to_numeric(merged.get("Jml_SD"), errors="coerce").fillna(0)
|
| 716 |
+
smp = pd.to_numeric(merged.get("Jml_SMP"), errors="coerce").fillna(0)
|
| 717 |
+
pop_sek = sd + smp
|
| 718 |
+
pop_sek = pop_sek.where(pop_sek > 0, pd.to_numeric(merged.get("Pop_Sekolah_Meta"), errors="coerce").fillna(0))
|
| 719 |
+
merged["Populasi Sekolah (SD+SMP)"] = pop_sek
|
| 720 |
+
|
| 721 |
+
# Populasi umum: Kec+Desa fallback Pop_Umum_Meta
|
| 722 |
+
kec = pd.to_numeric(merged.get("Jml_Kecamatan"), errors="coerce").fillna(0)
|
| 723 |
+
desa = pd.to_numeric(merged.get("Jml_DesaKel"), errors="coerce").fillna(0)
|
| 724 |
+
pop_um = kec + desa
|
| 725 |
+
pop_um = pop_um.where(pop_um > 0, pd.to_numeric(merged.get("Pop_Umum_Meta"), errors="coerce").fillna(0))
|
| 726 |
+
merged["Populasi Admin (Kec+Desa/Kel)"] = pop_um
|
| 727 |
+
|
| 728 |
+
merged["Target Sekolah (68%)"] = np.ceil(merged["Populasi Sekolah (SD+SMP)"] * TARGET_COVERAGE)
|
| 729 |
+
merged["Target Umum (68%)"] = np.ceil(merged["Populasi Admin (Kec+Desa/Kel)"] * TARGET_COVERAGE)
|
| 730 |
+
|
| 731 |
+
merged["Kekurangan Sampel Sekolah"] = (merged["Target Sekolah (68%)"] - merged["Sampel Sekolah"]).fillna(0).astype(int).clip(lower=0)
|
| 732 |
+
merged["Kekurangan Sampel Umum"] = (merged["Target Umum (68%)"] - merged["Sampel Umum"]).fillna(0).astype(int).clip(lower=0)
|
| 733 |
+
|
| 734 |
+
out = pd.DataFrame({
|
| 735 |
+
"Kab/Kota": merged["Kab_Kota_Label"].fillna(merged["kab_key"]),
|
| 736 |
+
"Sampel Total": merged["Sampel Total"],
|
| 737 |
+
|
| 738 |
+
"Sampel Sekolah": merged["Sampel Sekolah"],
|
| 739 |
+
"Populasi Sekolah (SD+SMP)": merged["Populasi Sekolah (SD+SMP)"],
|
| 740 |
+
"Target Sekolah (68%)": merged["Target Sekolah (68%)"],
|
| 741 |
+
"Kekurangan Sampel Sekolah": merged["Kekurangan Sampel Sekolah"],
|
| 742 |
+
|
| 743 |
+
"Sampel Umum": merged["Sampel Umum"],
|
| 744 |
+
"Populasi Admin (Kec+Desa/Kel)": merged["Populasi Admin (Kec+Desa/Kel)"],
|
| 745 |
+
"Target Umum (68%)": merged["Target Umum (68%)"],
|
| 746 |
+
"Kekurangan Sampel Umum": merged["Kekurangan Sampel Umum"],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 748 |
|
| 749 |
+
return out.sort_values("Kab/Kota").reset_index(drop=True).round(0)
|
|
|
|
| 750 |
|
| 751 |
+
# =================== PROVINSI ===================
|
| 752 |
+
if ("PROV" in kew_norm):
|
| 753 |
+
if meta_sma_df is None:
|
| 754 |
+
return pd.DataFrame({"Info": ["Meta SMA tidak tersedia."]})
|
| 755 |
|
| 756 |
+
tmp = df_filtered.copy()
|
| 757 |
+
tmp = tmp[pd.notna(tmp["prov_clean"])]
|
| 758 |
+
if tmp.empty:
|
| 759 |
+
return pd.DataFrame({"Info": ["Tidak ada data sampel kewenangan provinsi."]})
|
| 760 |
|
| 761 |
+
g_total = tmp.groupby("prov_key").size().rename("Sampel Total (Prov)").reset_index()
|
| 762 |
+
g_sma = tmp[tmp["_dataset"]=="sekolah"].groupby("prov_key").size().rename("Sampel SMA (DM)").reset_index()
|
| 763 |
|
| 764 |
+
merged = (
|
| 765 |
+
g_total
|
| 766 |
+
.merge(g_sma, on="prov_key", how="left")
|
| 767 |
+
.merge(meta_sma_df[["prov_key","Provinsi_Label","Jml_SMA"]], on="prov_key", how="left")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 768 |
)
|
| 769 |
+
merged["Sampel SMA (DM)"] = merged["Sampel SMA (DM)"].fillna(0).astype(int)
|
| 770 |
+
merged["Jml_SMA"] = pd.to_numeric(merged["Jml_SMA"], errors="coerce").fillna(0)
|
| 771 |
+
|
| 772 |
+
merged["Target SMA (68%)"] = np.ceil(merged["Jml_SMA"] * TARGET_COVERAGE)
|
| 773 |
+
merged["Kekurangan Sampel SMA"] = (merged["Target SMA (68%)"] - merged["Sampel SMA (DM)"]).fillna(0).astype(int).clip(lower=0)
|
| 774 |
+
|
| 775 |
+
out = pd.DataFrame({
|
| 776 |
+
"Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]),
|
| 777 |
+
"Sampel Total (Prov)": merged["Sampel Total (Prov)"].fillna(0).astype(int),
|
| 778 |
+
"Sampel SMA (DM)": merged["Sampel SMA (DM)"],
|
| 779 |
+
"Populasi SMA (Meta)": merged["Jml_SMA"],
|
| 780 |
+
"Target SMA (68%)": merged["Target SMA (68%)"],
|
| 781 |
+
"Kekurangan Sampel SMA": merged["Kekurangan Sampel SMA"],
|
| 782 |
+
})
|
| 783 |
+
return out.sort_values("Provinsi").reset_index(drop=True).round(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
|
| 785 |
+
return pd.DataFrame({"Info": ["Kewenangan tidak dikenali / tidak didukung."]})
|
| 786 |
|
| 787 |
|
| 788 |
+
# ============================================================
|
| 789 |
+
# 8) GRAFIK GAP (bar)
|
| 790 |
+
# ============================================================
|
| 791 |
+
def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
|
| 792 |
fig = go.Figure()
|
| 793 |
+
if verif_df is None or verif_df.empty:
|
| 794 |
+
fig.update_layout(title="Kekurangan Sampel (tidak ada data)")
|
| 795 |
return fig
|
| 796 |
|
| 797 |
+
kew_norm = str(kew_value or "").upper()
|
|
|
|
|
|
|
|
|
|
| 798 |
|
| 799 |
+
def _num(s):
|
| 800 |
+
return pd.to_numeric(s, errors="coerce").fillna(0).astype(int)
|
| 801 |
|
| 802 |
+
if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
|
| 803 |
+
dfp = verif_df.copy()
|
| 804 |
+
dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Sekolah", 0)) + _num(dfp.get("Kekurangan Sampel Umum", 0))
|
| 805 |
+
dfp = dfp.sort_values("gap_total", ascending=False).head(50) # biar kebaca
|
| 806 |
|
| 807 |
+
x = dfp["Kab/Kota"].astype(str).tolist()
|
| 808 |
+
gap_sek = _num(dfp.get("Kekurangan Sampel Sekolah", 0))
|
| 809 |
+
gap_umum = _num(dfp.get("Kekurangan Sampel Umum", 0))
|
| 810 |
|
| 811 |
+
fig.add_trace(go.Bar(x=x, y=gap_sek, name="Gap Sekolah", text=gap_sek, textposition="outside"))
|
| 812 |
+
fig.add_trace(go.Bar(x=x, y=gap_umum, name="Gap Umum", text=gap_umum, textposition="outside"))
|
| 813 |
|
| 814 |
+
fig.update_layout(
|
| 815 |
+
title=f"Kekurangan Sampel (Top 50) — Target {int(TARGET_COVERAGE*100)}%",
|
| 816 |
+
barmode="group",
|
| 817 |
+
margin=dict(l=40, r=20, t=60, b=160),
|
| 818 |
+
xaxis_title="Kab/Kota",
|
| 819 |
+
yaxis_title="Kekurangan (unit)"
|
| 820 |
+
)
|
| 821 |
+
fig.update_xaxes(tickangle=-35)
|
| 822 |
+
return fig
|
|
|
|
|
|
|
| 823 |
|
| 824 |
+
if ("PROV" in kew_norm) and ("Provinsi" in verif_df.columns):
|
| 825 |
+
dfp = verif_df.copy().sort_values("Kekurangan Sampel SMA", ascending=False)
|
| 826 |
+
x = dfp["Provinsi"].astype(str).tolist()
|
| 827 |
+
gap = _num(dfp.get("Kekurangan Sampel SMA", 0))
|
| 828 |
+
fig.add_trace(go.Bar(x=x, y=gap, name="Gap SMA", text=gap, textposition="outside"))
|
| 829 |
+
fig.update_layout(
|
| 830 |
+
title=f"Kekurangan Sampel (PROVINSI) — Target {int(TARGET_COVERAGE*100)}%",
|
| 831 |
+
margin=dict(l=40, r=20, t=60, b=160),
|
| 832 |
+
xaxis_title="Provinsi",
|
| 833 |
+
yaxis_title="Kekurangan (unit)"
|
| 834 |
+
)
|
| 835 |
+
fig.update_xaxes(tickangle=-35)
|
| 836 |
+
return fig
|
| 837 |
|
| 838 |
+
fig.update_layout(title="Kekurangan Sampel — format data tidak dikenali")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 839 |
return fig
|
| 840 |
|
| 841 |
|
| 842 |
+
# ============================================================
|
| 843 |
+
# 9) LLM NARASI (GAP) + WORD
|
| 844 |
+
# ============================================================
|
| 845 |
+
def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
|
| 846 |
+
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 847 |
lines = []
|
| 848 |
+
lines.append(f"Wilayah filter: {wilayah}")
|
| 849 |
+
lines.append(f"Kewenangan: {kew}")
|
| 850 |
+
lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
|
| 851 |
+
lines.append(f"Jumlah unit analisis: {len(verif_df)}")
|
| 852 |
+
|
| 853 |
+
gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
|
| 854 |
+
for gc in gap_cols:
|
| 855 |
+
total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
|
| 856 |
+
lines.append(f"Total {gc}: {total_gap}")
|
| 857 |
+
|
| 858 |
+
if gap_cols:
|
| 859 |
+
gc = gap_cols[0]
|
| 860 |
+
t = verif_df.copy()
|
| 861 |
+
t[gc] = pd.to_numeric(t[gc], errors="coerce").fillna(0)
|
| 862 |
+
keycol = "Kab/Kota" if "Kab/Kota" in t.columns else ("Provinsi" if "Provinsi" in t.columns else t.columns[0])
|
| 863 |
+
top = t.sort_values(gc, ascending=False).head(10)
|
| 864 |
+
lines.append("\nTop prioritas (gap terbesar):")
|
| 865 |
+
for _, r in top.iterrows():
|
| 866 |
+
lines.append(f"- {r[keycol]}: {gc}={int(r[gc])}")
|
| 867 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 868 |
return "\n".join(lines)
|
| 869 |
|
| 870 |
+
def rule_based_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
|
| 871 |
+
if verif_df is None or verif_df.empty:
|
| 872 |
+
return "Tidak ada data verifikasi yang dapat dilaporkan."
|
| 873 |
+
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
|
| 874 |
+
gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
|
| 875 |
+
lines = [
|
| 876 |
+
"## Ringkasan Kekurangan Sampel IPLM (Rule-based)\n",
|
| 877 |
+
f"Wilayah: {wilayah}",
|
| 878 |
+
f"Kewenangan: {kew}",
|
| 879 |
+
f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).",
|
| 880 |
+
f"Jumlah unit analisis: {len(verif_df)}\n",
|
| 881 |
+
]
|
| 882 |
+
for gc in gap_cols:
|
| 883 |
+
total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
|
| 884 |
+
lines.append(f"- Total {gc}: **{total_gap}** unit.")
|
| 885 |
+
lines.append("\nFokuskan pengumpulan pada wilayah dengan gap terbesar dan lakukan monitoring rutin hingga gap turun.")
|
| 886 |
+
return "\n".join(lines)
|
| 887 |
|
| 888 |
+
def generate_llm_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
|
| 889 |
+
ctx = build_context_gap(verif_df, prov, kab, kew)
|
| 890 |
+
client = get_llm_client()
|
| 891 |
+
if client is None or not USE_LLM:
|
| 892 |
+
return "⚠️ LLM tidak tersedia.\n\n" + rule_based_gap_report(verif_df, prov, kab, kew)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 893 |
|
| 894 |
+
system_prompt = (
|
| 895 |
+
"Anda adalah analis kebijakan dan manajer program IPLM. "
|
| 896 |
+
"Fokus hanya pada gap sampel (kekurangan unit) dan strategi menutup kekurangan."
|
| 897 |
+
)
|
| 898 |
+
user_prompt = f"""
|
| 899 |
+
DATA RINGKAS GAP SAMPEL IPLM:
|
| 900 |
|
| 901 |
+
{ctx}
|
| 902 |
+
|
| 903 |
+
TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
|
| 904 |
+
1) Ringkasan kondisi pengumpulan data (1 paragraf).
|
| 905 |
+
2) Total kekurangan sampel menuju target {int(TARGET_COVERAGE*100)}% (1 paragraf).
|
| 906 |
+
3) Prioritas wilayah (gap terbesar) dan alasan operasional (1 paragraf).
|
| 907 |
+
4) Rencana aksi 30–60 hari (naratif, bukan bullet).
|
| 908 |
|
| 909 |
+
BATASAN:
|
| 910 |
+
- Jangan membahas indeks/skor IPLM.
|
| 911 |
+
- Fokus hanya pada kekurangan sampel, target 68%, strategi.
|
| 912 |
+
"""
|
| 913 |
+
try:
|
| 914 |
+
resp = client.chat_completion(
|
| 915 |
+
model=LLM_MODEL_NAME,
|
| 916 |
+
messages=[
|
| 917 |
+
{"role": "system", "content": system_prompt},
|
| 918 |
+
{"role": "user", "content": user_prompt},
|
| 919 |
+
],
|
| 920 |
+
max_tokens=900,
|
| 921 |
+
temperature=0.2,
|
| 922 |
+
top_p=0.9,
|
| 923 |
+
)
|
| 924 |
+
text = resp.choices[0].message.content.strip()
|
| 925 |
+
if not text:
|
| 926 |
+
raise ValueError("Respon LLM kosong.")
|
| 927 |
+
return text
|
| 928 |
+
except Exception as e:
|
| 929 |
+
return f"⚠️ Error LLM: {repr(e)}\n\n" + rule_based_gap_report(verif_df, prov, kab, kew)
|
| 930 |
+
|
| 931 |
+
def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str, analysis_text: str):
|
| 932 |
+
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
|
| 933 |
doc = Document()
|
| 934 |
+
doc.add_heading(f"Laporan Kekurangan Sampel IPLM – {wilayah}", level=1)
|
| 935 |
doc.add_paragraph(f"Kewenangan: {kew}")
|
| 936 |
+
doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
|
| 937 |
+
doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
|
| 938 |
+
|
| 939 |
+
doc.add_heading("Tabel Verifikasi (Target & Kekurangan Sampel)", level=2)
|
| 940 |
+
view = verif_df.copy()
|
| 941 |
+
if len(view) > 200:
|
| 942 |
+
doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama).")
|
| 943 |
+
view = view.head(200)
|
| 944 |
+
|
| 945 |
+
table = doc.add_table(rows=1, cols=len(view.columns))
|
| 946 |
+
hdr = table.rows[0].cells
|
| 947 |
+
for i, c in enumerate(view.columns):
|
| 948 |
+
hdr[i].text = str(c)
|
| 949 |
+
|
| 950 |
+
for _, row in view.iterrows():
|
| 951 |
+
r = table.add_row().cells
|
| 952 |
+
for i, c in enumerate(view.columns):
|
| 953 |
+
r[i].text = str(row[c])
|
| 954 |
+
|
| 955 |
+
doc.add_heading("Ringkasan Visual (Opsional)", level=2)
|
| 956 |
+
if not HAS_KALEIDO:
|
| 957 |
+
doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia.")
|
| 958 |
+
else:
|
| 959 |
+
pie_made = False
|
| 960 |
+
if "Sampel Sekolah" in verif_df.columns and "Target Sekolah (68%)" in verif_df.columns:
|
| 961 |
+
samp = pd.to_numeric(verif_df["Sampel Sekolah"], errors="coerce").fillna(0).sum()
|
| 962 |
+
tgt = pd.to_numeric(verif_df["Target Sekolah (68%)"], errors="coerce").fillna(0).sum()
|
| 963 |
+
img = make_pie_plotly(samp, tgt, "Capaian Sekolah (Total) terhadap Target")
|
| 964 |
+
if img:
|
| 965 |
+
doc.add_picture(img)
|
| 966 |
+
pie_made = True
|
| 967 |
+
if not pie_made:
|
| 968 |
+
doc.add_paragraph("Pie chart tidak tersedia (kolom sampel/target tidak lengkap).")
|
| 969 |
+
|
| 970 |
+
doc.add_heading("Analisis Naratif (LLM)", level=2)
|
| 971 |
+
for p in analysis_text.split("\n"):
|
| 972 |
+
if p.strip():
|
| 973 |
+
doc.add_paragraph(p)
|
|
|
|
|
|
|
|
|
|
| 974 |
|
| 975 |
outpath = tempfile.mktemp(suffix=".docx")
|
| 976 |
doc.save(outpath)
|
| 977 |
return outpath
|
| 978 |
|
| 979 |
|
| 980 |
+
# ============================================================
|
| 981 |
+
# 10) DROPDOWN
|
| 982 |
+
# ============================================================
|
| 983 |
+
def all_prov_choices():
|
| 984 |
+
if df_all_raw is None or "prov_clean" not in df_all_raw.columns:
|
| 985 |
+
return ["(Semua)"]
|
| 986 |
+
s = df_all_raw["prov_clean"].dropna().astype(str).str.strip()
|
| 987 |
+
vals = sorted([o for o in s.unique() if o])
|
| 988 |
+
return ["(Semua)"] + vals
|
| 989 |
+
|
| 990 |
+
def get_kab_choices_for_prov(prov_value):
|
| 991 |
+
if df_all_raw is None or "kab_clean" not in df_all_raw.columns:
|
| 992 |
+
return ["(Semua)"]
|
| 993 |
+
if prov_value is None or prov_value == "(Semua)":
|
| 994 |
+
s = df_all_raw["kab_clean"].dropna().astype(str).str.strip()
|
| 995 |
+
else:
|
| 996 |
+
m = df_all_raw["prov_clean"].astype(str).str.strip() == str(prov_value).strip()
|
| 997 |
+
s = df_all_raw.loc[m, "kab_clean"].dropna().astype(str).str.strip()
|
| 998 |
+
vals = sorted([x for x in s.unique() if x])
|
| 999 |
+
return ["(Semua)"] + vals
|
| 1000 |
+
|
| 1001 |
+
def all_kew_choices():
|
| 1002 |
+
if df_all_raw is None:
|
| 1003 |
+
return ["(Semua)"]
|
| 1004 |
+
s = df_all_raw.get("KEW_NORM", pd.Series(dtype=object)).dropna().astype(str).str.strip()
|
| 1005 |
+
vals = sorted([o for o in s.unique() if o])
|
| 1006 |
+
return ["(Semua)"] + vals if vals else ["(Semua)"]
|
| 1007 |
+
|
| 1008 |
+
prov_choices = all_prov_choices()
|
| 1009 |
+
kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)")
|
| 1010 |
+
kew_choices = all_kew_choices()
|
| 1011 |
+
default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if kew_choices else "(Semua)")
|
| 1012 |
+
|
| 1013 |
|
| 1014 |
+
# ============================================================
|
| 1015 |
+
# 11) CORE RUN (FILTER + IPLM + GAP + EXPORT)
|
| 1016 |
+
# ============================================================
|
| 1017 |
+
def run_core(prov_value, kab_value, kew_value):
|
| 1018 |
+
if df_all_raw is None or df_all_raw.empty:
|
| 1019 |
+
empty = pd.DataFrame()
|
| 1020 |
+
return empty, empty, empty, None, None, None, None, "DM tidak terbaca.", "Tidak ada analisis."
|
| 1021 |
|
| 1022 |
+
df = df_all_raw.copy()
|
|
|
|
|
|
|
|
|
|
| 1023 |
|
| 1024 |
+
# filter prov
|
| 1025 |
+
if prov_value and prov_value != "(Semua)":
|
| 1026 |
+
df = df[df["prov_clean"].astype(str).str.strip() == str(prov_value).strip()]
|
| 1027 |
|
| 1028 |
+
# filter kab
|
| 1029 |
+
if kab_value and kab_value != "(Semua)":
|
| 1030 |
+
df = df[df["kab_clean"].astype(str).str.strip() == str(kab_value).strip()]
|
| 1031 |
|
| 1032 |
+
# filter kew
|
| 1033 |
+
if kew_value and kew_value != "(Semua)":
|
| 1034 |
+
df = df[df["KEW_NORM"] == kew_value]
|
|
|
|
|
|
|
|
|
|
| 1035 |
|
| 1036 |
if df.empty:
|
| 1037 |
+
empty = pd.DataFrame()
|
| 1038 |
+
return empty, empty, empty, None, None, None, None, "Tidak ada data untuk filter ini.", "Tidak ada analisis."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1039 |
|
| 1040 |
+
# 1) HITUNG IPLM (REAL)
|
| 1041 |
+
df_iplm = compute_iplm_real(df)
|
|
|
|
| 1042 |
|
| 1043 |
+
# 2) SAMPLING FACTOR 68% (untuk Indeks_Final)
|
| 1044 |
+
df_iplm, dbg_meta = attach_sampling_factor(df_iplm)
|
| 1045 |
|
| 1046 |
+
# 3) GAP TABLE (unit kekurangan)
|
| 1047 |
+
verif_df = compute_gap_verification(df_iplm, kew_value)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1048 |
|
| 1049 |
+
# 4) Grafik GAP
|
| 1050 |
+
fig_gap = make_gap_figure(verif_df, kew_value)
|
|
|
|
| 1051 |
|
| 1052 |
+
# 5) Detail subset (ringkas) -> termasuk Indeks
|
| 1053 |
+
keep_cols = [
|
| 1054 |
+
"prov_clean","kab_clean", nama_col_glob, "KEW_NORM", jenis_col_glob, subjenis_col_glob, "_dataset",
|
| 1055 |
+
"sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
|
| 1056 |
+
"dim_kepatuhan","dim_kinerja",
|
| 1057 |
+
"Indeks_Real_0_100","SamplingFactor_68","Indeks_Final_0_100"
|
| 1058 |
+
]
|
| 1059 |
+
keep_cols = [c for c in keep_cols if c and c in df_iplm.columns]
|
| 1060 |
+
detail_df = df_iplm[keep_cols].copy()
|
| 1061 |
|
| 1062 |
+
# 6) Simpan download
|
| 1063 |
+
tmpdir = tempfile.mkdtemp()
|
| 1064 |
+
rekap_excel_path = os.path.join(tmpdir, "Rekap_IPLM_Gap_Target.xlsx")
|
| 1065 |
+
raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx")
|
| 1066 |
|
| 1067 |
+
with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w:
|
| 1068 |
+
verif_df.to_excel(w, sheet_name="Verifikasi_Gap_Target", index=False)
|
| 1069 |
+
detail_df.to_excel(w, sheet_name="Detail_IPLM_Subset", index=False)
|
| 1070 |
+
dbg_meta.to_excel(w, sheet_name="Meta_Match_Debug", index=False)
|
| 1071 |
|
| 1072 |
+
df_iplm.to_excel(raw_dm_path, index=False)
|
| 1073 |
+
|
| 1074 |
+
# 7) Analisis AI (GAP only)
|
| 1075 |
+
analysis_text = generate_llm_gap_report(verif_df, prov_value, kab_value, kew_value)
|
| 1076 |
+
word_path = generate_word_report_gap(verif_df, prov_value, kab_value, kew_value, analysis_text)
|
| 1077 |
|
| 1078 |
+
msg = (
|
| 1079 |
+
f"OK | Subset DM: {len(df_iplm)} baris | Verifikasi: {len(verif_df)} baris | "
|
| 1080 |
+
f"Debug meta: {len(dbg_meta)} baris | Target: {int(TARGET_COVERAGE*100)}%."
|
| 1081 |
+
)
|
| 1082 |
|
| 1083 |
+
return verif_df, detail_df, dbg_meta, fig_gap, rekap_excel_path, raw_dm_path, word_path, msg, analysis_text
|
|
|
|
| 1084 |
|
| 1085 |
+
def on_prov_change(prov_value):
|
| 1086 |
+
return gr.update(choices=get_kab_choices_for_prov(prov_value), value="(Semua)")
|
| 1087 |
|
|
|
|
|
|
|
| 1088 |
|
| 1089 |
+
# ============================================================
|
| 1090 |
+
# 12) BUILD UI
|
| 1091 |
+
# ============================================================
|
| 1092 |
+
with gr.Blocks() as demo:
|
| 1093 |
+
gr.Markdown(
|
| 1094 |
+
f"""
|
| 1095 |
+
# IPLM + Gap Sampel {int(TARGET_COVERAGE*100)}% + LLM Report
|
| 1096 |
|
| 1097 |
+
**Data:**
|
| 1098 |
+
- DM: `{DATA_FILE}` (multi-sheet)
|
| 1099 |
+
- Meta Kab/Kota: `{META_KAB_FILE}`
|
| 1100 |
+
- Meta Provinsi (SMA): `{META_SMA_FILE}`
|
| 1101 |
|
| 1102 |
+
{DATA_INFO}
|
| 1103 |
+
|
| 1104 |
+
**Catatan FIX jumlah sekolah tidak kedetect:**
|
| 1105 |
+
- Join meta sekarang pakai `prov_key + kab_key` (lebih presisi) + fallback `kab_key`.
|
| 1106 |
+
- Lihat sheet/tabel **Meta_Match_Debug** untuk kab/kota yang *NO_META_ROW* atau *pop_sd_smp = 0*.
|
| 1107 |
+
"""
|
| 1108 |
+
)
|
| 1109 |
+
|
| 1110 |
+
with gr.Row():
|
| 1111 |
+
dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0])
|
| 1112 |
+
dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value=kab_choices[0])
|
| 1113 |
+
dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew)
|
| 1114 |
+
|
| 1115 |
+
dd_prov.change(fn=on_prov_change, inputs=dd_prov, outputs=dd_kab)
|
| 1116 |
+
|
| 1117 |
+
run_btn = gr.Button("Run (Hitung IPLM + Gap Sampel)")
|
| 1118 |
+
msg_out = gr.Markdown()
|
| 1119 |
|
| 1120 |
+
gr.Markdown("## 1) Verifikasi Gap (Target & Kekurangan Unit)")
|
| 1121 |
+
verif_out = gr.DataFrame(interactive=False)
|
| 1122 |
|
| 1123 |
+
gr.Markdown("## 2) Grafik Kekurangan Sampel (Top 50 biar kebaca)")
|
| 1124 |
+
gap_plot_out = gr.Plot()
|
| 1125 |
|
| 1126 |
+
gr.Markdown("## 3) Detail Subset (IPLM Real + Final)")
|
| 1127 |
+
detail_out = gr.DataFrame(interactive=False)
|
| 1128 |
|
| 1129 |
+
gr.Markdown("## 4) Debug Meta (Kenapa SD/SMP tidak kedetect?)")
|
| 1130 |
+
dbg_out = gr.DataFrame(interactive=False)
|
| 1131 |
|
| 1132 |
+
gr.Markdown("## 5) Analisis Naratif (LLM) — GAP ONLY")
|
| 1133 |
+
analysis_out = gr.Markdown()
|
| 1134 |
|
| 1135 |
with gr.Row():
|
| 1136 |
+
rekap_excel_out = gr.File(label="Download Rekap (.xlsx)")
|
| 1137 |
+
raw_dm_out = gr.File(label="Download DM Subset + IPLM (.xlsx)")
|
| 1138 |
+
word_out = gr.File(label="Download Laporan Word (.docx)")
|
| 1139 |
|
| 1140 |
run_btn.click(
|
| 1141 |
+
fn=run_core,
|
| 1142 |
+
inputs=[dd_prov, dd_kab, dd_kew],
|
| 1143 |
outputs=[
|
| 1144 |
+
verif_out, detail_out, dbg_out,
|
| 1145 |
+
gap_plot_out,
|
| 1146 |
+
rekap_excel_out, raw_dm_out, word_out,
|
| 1147 |
+
msg_out, analysis_out
|
|
|
|
|
|
|
|
|
|
| 1148 |
],
|
| 1149 |
)
|
| 1150 |
|