Update app.py
Browse files
app.py
CHANGED
|
@@ -1,14 +1,20 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
-
app.py β IPLM 2025 (
|
| 4 |
-
-
|
| 5 |
-
-
|
| 6 |
-
-
|
| 7 |
-
-
|
| 8 |
-
*
|
| 9 |
-
*
|
| 10 |
-
|
| 11 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
import os
|
|
@@ -22,30 +28,104 @@ import pandas as pd
|
|
| 22 |
import plotly.graph_objects as go
|
| 23 |
from sklearn.preprocessing import PowerTransformer
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# ============================================================
|
| 26 |
# 1) KONFIGURASI FILE
|
| 27 |
# ============================================================
|
| 28 |
|
| 29 |
-
DATA_FILE = "
|
| 30 |
-
POP_KAB = "Data_populasi_Kab_kota.xlsx"
|
| 31 |
-
POP_PROV = "Data_populasi_propinsi.xlsx"
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
| 34 |
W_KEPATUHAN = 0.30
|
| 35 |
W_KINERJA = 0.70
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# ============================================================
|
| 38 |
-
# 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# ============================================================
|
| 40 |
|
| 41 |
def _canon(s: str) -> str:
|
| 42 |
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
t = str(x).strip().upper()
|
| 48 |
-
return " ".join(t.split())
|
| 49 |
|
| 50 |
def pick_col(df, candidates):
|
| 51 |
for c in candidates:
|
|
@@ -58,6 +138,7 @@ def pick_col(df, candidates):
|
|
| 58 |
return can_map[k]
|
| 59 |
return None
|
| 60 |
|
|
|
|
| 61 |
def coerce_num(val):
|
| 62 |
if pd.isna(val):
|
| 63 |
return np.nan
|
|
@@ -79,6 +160,7 @@ def coerce_num(val):
|
|
| 79 |
except Exception:
|
| 80 |
return np.nan
|
| 81 |
|
|
|
|
| 82 |
def minmax_norm(s: pd.Series) -> pd.Series:
|
| 83 |
x = s.astype(float)
|
| 84 |
mn, mx = x.min(skipna=True), x.max(skipna=True)
|
|
@@ -86,6 +168,21 @@ def minmax_norm(s: pd.Series) -> pd.Series:
|
|
| 86 |
return pd.Series(0.0, index=s.index)
|
| 87 |
return (x - mn) / (mx - mn)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def norm_kew(v):
|
| 90 |
if pd.isna(v):
|
| 91 |
return None
|
|
@@ -98,7 +195,24 @@ def norm_kew(v):
|
|
| 98 |
return "PUSAT"
|
| 99 |
return t
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
if pd.isna(s):
|
| 103 |
return None
|
| 104 |
t = str(s).upper()
|
|
@@ -107,113 +221,42 @@ def norm_prov_label(s):
|
|
| 107 |
t = " ".join(t.split())
|
| 108 |
return re.sub(r"[^A-Z0-9]+", "", t)
|
| 109 |
|
| 110 |
-
|
|
|
|
| 111 |
if pd.isna(s):
|
| 112 |
return None
|
| 113 |
t = str(s).upper()
|
| 114 |
-
t = t.replace("KABUPATEN", "KAB")
|
| 115 |
-
t = t.replace("
|
| 116 |
-
t = t.replace("KOTA ADMINISTRASI", "KOTA")
|
| 117 |
-
t = t.replace("KOTA ADM.", "KOTA")
|
| 118 |
-
t = t.replace("KOTA.", "KOTA")
|
| 119 |
t = " ".join(t.split())
|
| 120 |
return re.sub(r"[^A-Z0-9]+", "", t)
|
| 121 |
|
| 122 |
-
def safe_div(num, den):
|
| 123 |
-
if den is None or pd.isna(den) or den <= 0:
|
| 124 |
-
return np.nan
|
| 125 |
-
return float(num) / float(den)
|
| 126 |
-
|
| 127 |
-
def cap_bobot(cov: float) -> float:
|
| 128 |
-
if cov is None or pd.isna(cov) or cov <= 0:
|
| 129 |
-
return 0.0
|
| 130 |
-
return float(min(cov / TARGET_COVERAGE, 1.0))
|
| 131 |
-
|
| 132 |
-
def penalized_mean(row, cols):
|
| 133 |
-
vals = []
|
| 134 |
-
for c in cols:
|
| 135 |
-
k = f"norm_{c}"
|
| 136 |
-
if k in row.index:
|
| 137 |
-
v = row[k]
|
| 138 |
-
if pd.isna(v):
|
| 139 |
-
v = 0.0
|
| 140 |
-
vals.append(float(v))
|
| 141 |
-
return float(np.mean(vals)) if vals else 0.0
|
| 142 |
-
|
| 143 |
-
def slugify(s: str) -> str:
|
| 144 |
-
if s is None:
|
| 145 |
-
return "NA"
|
| 146 |
-
t = str(s).strip()
|
| 147 |
-
return re.sub(r"[^A-Z0-9]+", "", t.upper()) or "NA"
|
| 148 |
-
|
| 149 |
-
# ============================================================
|
| 150 |
-
# 3) INDIKATOR IPLM
|
| 151 |
-
# ============================================================
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
]
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
"TenagaFungsionalProfesional",
|
| 162 |
-
"TenagaPKB",
|
| 163 |
-
"AnggaranTenaga"
|
| 164 |
-
]
|
| 165 |
-
pelayanan_cols = [
|
| 166 |
-
"PesertaBudayaBaca","PemustakaLuringDaring","PemustakaFasilitasTIK",
|
| 167 |
-
"PemanfaatanJudulTercetak","PemanfaatanEksemplarTercetak",
|
| 168 |
-
"PemanfaatanJudulElektronik","PemanfaatanEksemplarElektronik"
|
| 169 |
-
]
|
| 170 |
-
pengelolaan_cols = [
|
| 171 |
-
"KegiatanBudayaBaca","KegiatanKerjasama","VariasiLayanan","Kebijakan","AnggaranLayanan"
|
| 172 |
-
]
|
| 173 |
-
all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
|
| 174 |
|
| 175 |
-
alias_map_raw = {
|
| 176 |
-
"j_judul_koleksi_tercetak": "JudulTercetak",
|
| 177 |
-
"j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
|
| 178 |
-
"j_judul_koleksi_digital": "JudulElektronik",
|
| 179 |
-
"j_eksemplar_koleksi_digital": "EksemplarElektronik",
|
| 180 |
-
"tambah_judul_koleksi_tercetak": "TambahJudulTercetak",
|
| 181 |
-
"tambah_eksemplar_koleksi_tercetak": "TambahEksemplarTercetak",
|
| 182 |
-
"tambah_judul_koleksi_digital": "TambahJudulElektronik",
|
| 183 |
-
"tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
|
| 184 |
-
"j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
|
| 185 |
-
"j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
|
| 186 |
-
"j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
|
| 187 |
-
"j_tenaga_pkb": "TenagaPKB",
|
| 188 |
-
"j_anggaran_diklat_perpus": "AnggaranTenaga",
|
| 189 |
-
"j_peserta_budaya_baca": "PesertaBudayaBaca",
|
| 190 |
-
"j_pemustaka_luring_daring": "PemustakaLuringDaring",
|
| 191 |
-
"j_pemustaka_fasilitas_tik": "PemustakaFasilitasTIK",
|
| 192 |
-
"j_judul_koleksi_tercetak_termanfaat": "PemanfaatanJudulTercetak",
|
| 193 |
-
"j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
|
| 194 |
-
"j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
|
| 195 |
-
"j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
|
| 196 |
-
"j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
|
| 197 |
-
"j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
|
| 198 |
-
"j_variasi_layanan": "VariasiLayanan",
|
| 199 |
-
"j_kebijakan_prosedur_pelayanan": "Kebijakan",
|
| 200 |
-
"j_anggaran_peningkatan_pelayanan": "AnggaranLayanan",
|
| 201 |
-
}
|
| 202 |
-
alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
|
| 203 |
|
| 204 |
# ============================================================
|
| 205 |
-
# 4) LOAD DATA
|
| 206 |
# ============================================================
|
| 207 |
|
| 208 |
DATA_INFO = ""
|
| 209 |
df_all_raw = None
|
| 210 |
-
df_pop_kab = None
|
| 211 |
-
df_pop_prov = None
|
| 212 |
|
| 213 |
prov_col = kab_col = kew_col = jenis_col = nama_col = None
|
| 214 |
|
| 215 |
-
#
|
|
|
|
|
|
|
|
|
|
| 216 |
try:
|
|
|
|
| 217 |
fp = Path(DATA_FILE)
|
| 218 |
if not fp.exists():
|
| 219 |
raise FileNotFoundError(f"File tidak ditemukan: {DATA_FILE}")
|
|
@@ -222,376 +265,431 @@ try:
|
|
| 222 |
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
|
| 223 |
df_all_raw = pd.concat(frames, ignore_index=True, sort=False)
|
| 224 |
|
| 225 |
-
prov_col
|
| 226 |
-
kab_col
|
| 227 |
-
kew_col
|
| 228 |
-
jenis_col
|
| 229 |
-
nama_col
|
| 230 |
|
| 231 |
df_all_raw["KEW_NORM"] = df_all_raw[kew_col].apply(norm_kew) if kew_col else None
|
| 232 |
|
| 233 |
val_map_jenis = {
|
| 234 |
-
"PERPUSTAKAAN SEKOLAH": "sekolah",
|
| 235 |
-
"
|
| 236 |
-
"PERPUSTAKAAN
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
}
|
| 238 |
-
df_all_raw["_dataset"] = df_all_raw[jenis_col].
|
| 239 |
|
| 240 |
-
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
-
# ---
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
df_pop_kab = pd.DataFrame({
|
| 261 |
-
"Provinsi_Label": pk[c_prov].astype(str).str.strip() if c_prov else None,
|
| 262 |
-
"Kab_Kota_Label": pk[c_kab].astype(str).str.strip(),
|
| 263 |
-
"Pop_Umum": pk[c_pop_umum].apply(coerce_num) if c_pop_umum else np.nan,
|
| 264 |
-
"Pop_Sekolah": pk[c_pop_sekolah].apply(coerce_num) if c_pop_sekolah else np.nan,
|
| 265 |
})
|
| 266 |
-
df_pop_kab["kab_key"] = df_pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
|
| 267 |
-
POP_INFO.append(f"β
Populasi Kab/Kota terbaca: **{POP_KAB}** (n={len(df_pop_kab)})")
|
| 268 |
-
except Exception as e:
|
| 269 |
-
df_pop_kab = None
|
| 270 |
-
POP_INFO.append(f"β οΈ Gagal memuat populasi Kab/Kota: `{e}`")
|
| 271 |
|
| 272 |
-
#
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
"Provinsi_Label":
|
| 282 |
-
"
|
| 283 |
})
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
|
|
|
|
|
|
| 287 |
except Exception as e:
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
-
if POP_INFO:
|
| 292 |
-
DATA_INFO = DATA_INFO + "<br>" + "<br>".join(POP_INFO)
|
| 293 |
|
| 294 |
# ============================================================
|
| 295 |
-
# 5) PIPELINE NASIONAL: YJ +
|
| 296 |
# ============================================================
|
| 297 |
|
| 298 |
-
def
|
| 299 |
if df_src is None or df_src.empty:
|
| 300 |
return df_src
|
|
|
|
| 301 |
df = df_src.copy()
|
| 302 |
|
|
|
|
| 303 |
rename_map = {}
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
|
|
|
| 308 |
else:
|
| 309 |
-
for tgt in
|
| 310 |
-
if
|
| 311 |
rename_map[col] = tgt
|
| 312 |
break
|
| 313 |
if rename_map:
|
| 314 |
df = df.rename(columns=rename_map)
|
| 315 |
|
| 316 |
available = [c for c in all_indicators if c in df.columns]
|
|
|
|
|
|
|
| 317 |
for c in available:
|
| 318 |
df[c] = df[c].apply(coerce_num)
|
| 319 |
|
|
|
|
| 320 |
for c in available:
|
| 321 |
x = df[c].astype(float).values
|
| 322 |
mask = ~np.isnan(x)
|
| 323 |
-
|
|
|
|
| 324 |
if mask.sum() > 1:
|
| 325 |
pt = PowerTransformer(method="yeo-johnson", standardize=False)
|
| 326 |
-
|
| 327 |
else:
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
| 330 |
|
|
|
|
| 331 |
df["sub_koleksi"] = df.apply(lambda r: penalized_mean(r, [c for c in koleksi_cols if c in available]), axis=1)
|
| 332 |
df["sub_sdm"] = df.apply(lambda r: penalized_mean(r, [c for c in sdm_cols if c in available]), axis=1)
|
| 333 |
df["sub_pelayanan"] = df.apply(lambda r: penalized_mean(r, [c for c in pelayanan_cols if c in available]), axis=1)
|
| 334 |
df["sub_pengelolaan"] = df.apply(lambda r: penalized_mean(r, [c for c in pengelolaan_cols if c in available]), axis=1)
|
| 335 |
|
| 336 |
-
df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1)
|
| 337 |
-
df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1)
|
| 338 |
|
| 339 |
-
df["Indeks_Real_0_100"] =
|
| 340 |
-
|
| 341 |
-
|
| 342 |
|
| 343 |
return df
|
| 344 |
|
| 345 |
-
df_all = prepare_global(df_all_raw) if df_all_raw is not None else None
|
| 346 |
-
|
| 347 |
-
# ============================================================
|
| 348 |
-
# 6) COVERAGE -> FINAL + VERIF (NO DECIMALS)
|
| 349 |
-
# ============================================================
|
| 350 |
-
|
| 351 |
-
def compute_final(df_filtered: pd.DataFrame, kew_value: str):
|
| 352 |
-
if df_filtered is None or df_filtered.empty:
|
| 353 |
-
return df_filtered, pd.DataFrame()
|
| 354 |
-
|
| 355 |
-
df = df_filtered.copy()
|
| 356 |
-
kew_norm = str(kew_value or "").upper()
|
| 357 |
-
|
| 358 |
-
df["bobot_coverage"] = 1.0
|
| 359 |
-
df["coverage"] = np.nan
|
| 360 |
-
|
| 361 |
-
if ("KAB" in kew_norm or "KOTA" in kew_norm) and kab_col and df_pop_kab is not None:
|
| 362 |
-
tmp = df.copy()
|
| 363 |
-
tmp["kab_key"] = tmp["KAB_DISP"].apply(norm_kab_label)
|
| 364 |
-
|
| 365 |
-
g = tmp.groupby(["kab_key","_dataset"]).size().rename("n_sampel").reset_index()
|
| 366 |
-
g_piv = g.pivot(index="kab_key", columns="_dataset", values="n_sampel").fillna(0)
|
| 367 |
|
| 368 |
-
|
| 369 |
|
| 370 |
-
rows = []
|
| 371 |
-
for kk in g_piv.index:
|
| 372 |
-
pop_sek = pop.loc[kk, "Pop_Sekolah"] if kk in pop.index else np.nan
|
| 373 |
-
pop_um = pop.loc[kk, "Pop_Umum"] if kk in pop.index else np.nan
|
| 374 |
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
cov_sek = safe_div(n_sek, pop_sek)
|
| 379 |
-
cov_um = safe_div(n_um, pop_um)
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
-
|
| 385 |
-
|
| 386 |
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
"Coverage_Sekolah_%": (cov_sek * 100) if not pd.isna(cov_sek) else np.nan,
|
| 392 |
-
"Bobot_Sekolah_68_%": (bobot_sek * 100) if not pd.isna(bobot_sek) else np.nan,
|
| 393 |
-
"GAP_Ke_68_Sekolah": max(target_sek - n_sek, 0) if not pd.isna(target_sek) else np.nan,
|
| 394 |
-
|
| 395 |
-
"Pop_Umum": pop_um,
|
| 396 |
-
"Sampel_Umum": n_um,
|
| 397 |
-
"Coverage_Umum_%": (cov_um * 100) if not pd.isna(cov_um) else np.nan,
|
| 398 |
-
"Bobot_Umum_68_%": (bobot_um * 100) if not pd.isna(bobot_um) else np.nan,
|
| 399 |
-
"GAP_Ke_68_Umum": max(target_um - n_um, 0) if not pd.isna(target_um) else np.nan,
|
| 400 |
-
})
|
| 401 |
|
| 402 |
-
|
| 403 |
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
for c in int_cols:
|
| 408 |
-
if c in verif_df.columns:
|
| 409 |
-
verif_df[c] = verif_df[c].fillna(0).round(0).astype(int)
|
| 410 |
-
for c in pct_cols:
|
| 411 |
-
if c in verif_df.columns:
|
| 412 |
-
verif_df[c] = verif_df[c].fillna(0).round(0).astype(int)
|
| 413 |
|
| 414 |
-
|
| 415 |
-
|
| 416 |
|
| 417 |
-
|
| 418 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
|
| 420 |
-
|
|
|
|
| 421 |
|
| 422 |
-
def
|
| 423 |
ds = r.get("_dataset", None)
|
| 424 |
-
kk = r.get("kab_key", None)
|
| 425 |
if ds == "khusus":
|
| 426 |
return 1.0
|
| 427 |
-
if ds == "sekolah":
|
| 428 |
-
return float(bobot_map_sek.get(kk, 0.0))
|
| 429 |
-
if ds == "umum":
|
| 430 |
-
return float(bobot_map_um.get(kk, 0.0))
|
| 431 |
-
return 1.0
|
| 432 |
-
|
| 433 |
-
def row_cov(r):
|
| 434 |
-
ds = r.get("_dataset", None)
|
| 435 |
kk = r.get("kab_key", None)
|
| 436 |
if ds == "sekolah":
|
| 437 |
-
return float(
|
| 438 |
if ds == "umum":
|
| 439 |
-
return float(
|
| 440 |
-
return
|
| 441 |
-
|
| 442 |
-
df["bobot_coverage"] = df.apply(row_weight, axis=1)
|
| 443 |
-
df["coverage"] = df.apply(row_cov, axis=1)
|
| 444 |
-
|
| 445 |
-
elif ("PROV" in kew_norm) and prov_col and df_pop_prov is not None:
|
| 446 |
-
tmp = df.copy()
|
| 447 |
-
tmp["prov_key"] = tmp["PROV_DISP"].apply(norm_prov_label)
|
| 448 |
-
|
| 449 |
-
g = tmp.groupby(["prov_key","_dataset"]).size().rename("n_sampel").reset_index()
|
| 450 |
-
g_piv = g.pivot(index="prov_key", columns="_dataset", values="n_sampel").fillna(0)
|
| 451 |
-
pop = df_pop_prov.set_index("prov_key")
|
| 452 |
-
|
| 453 |
-
rows = []
|
| 454 |
-
for pk in g_piv.index:
|
| 455 |
-
pop_sek = pop.loc[pk, "Pop_Sekolah_Prov"] if pk in pop.index else np.nan
|
| 456 |
-
n_sek = float(g_piv.loc[pk].get("sekolah", 0))
|
| 457 |
-
cov_sek = safe_div(n_sek, pop_sek)
|
| 458 |
-
bobot_sek = cap_bobot(cov_sek)
|
| 459 |
-
target_sek = (TARGET_COVERAGE * pop_sek) if not pd.isna(pop_sek) else np.nan
|
| 460 |
|
| 461 |
-
|
| 462 |
-
"Provinsi": pop.loc[pk, "Provinsi_Label"] if pk in pop.index else pk,
|
| 463 |
-
"Pop_Sekolah": pop_sek,
|
| 464 |
-
"Sampel_Sekolah": n_sek,
|
| 465 |
-
"Coverage_Sekolah_%": (cov_sek * 100) if not pd.isna(cov_sek) else np.nan,
|
| 466 |
-
"Bobot_Sekolah_68_%": (bobot_sek * 100) if not pd.isna(bobot_sek) else np.nan,
|
| 467 |
-
"GAP_Ke_68_Sekolah": max(target_sek - n_sek, 0) if not pd.isna(target_sek) else np.nan,
|
| 468 |
-
})
|
| 469 |
|
| 470 |
-
|
|
|
|
|
|
|
| 471 |
|
| 472 |
-
|
| 473 |
-
pct_cols = ["Coverage_Sekolah_%","Bobot_Sekolah_68_%"]
|
| 474 |
-
for c in int_cols:
|
| 475 |
-
if c in verif_df.columns:
|
| 476 |
-
verif_df[c] = verif_df[c].fillna(0).round(0).astype(int)
|
| 477 |
-
for c in pct_cols:
|
| 478 |
-
if c in verif_df.columns:
|
| 479 |
-
verif_df[c] = verif_df[c].fillna(0).round(0).astype(int)
|
| 480 |
|
| 481 |
-
|
| 482 |
-
|
|
|
|
| 483 |
|
| 484 |
-
|
| 485 |
|
| 486 |
-
def
|
| 487 |
ds = r.get("_dataset", None)
|
| 488 |
if ds == "khusus":
|
| 489 |
return 1.0
|
| 490 |
if ds == "sekolah":
|
| 491 |
-
return float(
|
| 492 |
return 1.0
|
| 493 |
|
| 494 |
-
|
| 495 |
-
if r.get("_dataset", None) != "sekolah":
|
| 496 |
-
return np.nan
|
| 497 |
-
return float(cov_map.get(r.get("prov_key", None), np.nan))
|
| 498 |
|
| 499 |
-
|
| 500 |
-
df["coverage"] = df.apply(row_cov, axis=1)
|
| 501 |
|
| 502 |
-
else:
|
| 503 |
-
verif_df = pd.DataFrame()
|
| 504 |
|
| 505 |
-
|
| 506 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
# ============================================================
|
| 509 |
-
# 7) BELL CURVE (
|
| 510 |
# ============================================================
|
| 511 |
|
| 512 |
-
def make_bell_figure(
|
| 513 |
fig = go.Figure()
|
| 514 |
-
|
| 515 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
return fig
|
| 517 |
|
| 518 |
-
|
| 519 |
-
if
|
| 520 |
-
fig.
|
| 521 |
-
title=title,
|
| 522 |
-
xaxis_title="Indeks (0β100)",
|
| 523 |
-
yaxis_title="Kepadatan (relatif)",
|
| 524 |
-
annotations=[dict(text="Grafik tidak ditampilkan (data terlalu sedikit).",
|
| 525 |
-
x=0.5, y=0.5, xref="paper", yref="paper",
|
| 526 |
-
showarrow=False, font=dict(size=14))]
|
| 527 |
-
)
|
| 528 |
return fig
|
| 529 |
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
sigma =
|
| 533 |
-
if sigma <= 1e-9:
|
| 534 |
sigma = 1.0
|
| 535 |
|
| 536 |
-
xs = np.linspace(max(0,
|
| 537 |
pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
|
| 538 |
-
pdf = pdf /
|
| 539 |
-
|
| 540 |
-
if name_col and name_col in dfp.columns:
|
| 541 |
-
hover_text = [f"{str(n)}<br>Indeks Final: {v:.2f}" for n, v in zip(dfp[name_col], x_vals)]
|
| 542 |
-
else:
|
| 543 |
-
hover_text = [f"Indeks Final: {v:.2f}" for v in x_vals]
|
| 544 |
|
|
|
|
| 545 |
fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Bell curve", hoverinfo="skip"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
fig.add_trace(go.Scatter(
|
| 547 |
-
x=
|
| 548 |
-
|
| 549 |
-
|
|
|
|
|
|
|
|
|
|
| 550 |
))
|
| 551 |
|
| 552 |
-
|
| 553 |
-
|
|
|
|
| 554 |
fig.add_trace(go.Scatter(
|
| 555 |
-
x=[q, q],
|
| 556 |
-
|
|
|
|
|
|
|
| 557 |
hovertemplate=f"{label}: {q:.2f}<extra></extra>"
|
| 558 |
))
|
| 559 |
|
| 560 |
-
fig.update_layout(
|
| 561 |
-
title=title,
|
| 562 |
-
xaxis_title="Indeks IPLM FINAL (0β100)",
|
| 563 |
-
yaxis_title="Kepadatan (relatif)",
|
| 564 |
-
yaxis=dict(showticklabels=False, zeroline=True, range=[0, 1.2]),
|
| 565 |
-
margin=dict(l=40, r=20, t=60, b=40),
|
| 566 |
-
hovermode="x"
|
| 567 |
-
)
|
| 568 |
return fig
|
| 569 |
|
|
|
|
| 570 |
# ============================================================
|
| 571 |
-
#
|
| 572 |
-
#
|
| 573 |
# ============================================================
|
| 574 |
|
| 575 |
-
def
|
| 576 |
-
if
|
| 577 |
-
return
|
| 578 |
-
tmp = df.copy()
|
| 579 |
-
# batasi kolom & baris biar prompt tidak meledak
|
| 580 |
-
tmp = tmp.head(max_rows)
|
| 581 |
-
return tmp.to_string(index=False)
|
| 582 |
|
|
|
|
| 583 |
|
| 584 |
-
|
| 585 |
-
"""
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
idx_col = "Indeks_Final_0_100" if (detail_df is not None and "Indeks_Final_0_100" in detail_df.columns) else "Indeks_Real_0_100"
|
| 590 |
if detail_df is None or detail_df.empty or idx_col not in detail_df.columns:
|
| 591 |
return {"idx_col": idx_col, "all": {}, "by_type": {}}
|
| 592 |
|
| 593 |
-
out = {"idx_col": idx_col, "all": {}, "by_type": {}}
|
| 594 |
-
|
| 595 |
def stats_for(s: pd.Series):
|
| 596 |
s = pd.to_numeric(s, errors="coerce").dropna()
|
| 597 |
if len(s) == 0:
|
|
@@ -600,7 +698,6 @@ def summarize_distribution(detail_df: pd.DataFrame):
|
|
| 600 |
return {
|
| 601 |
"n": int(len(s)),
|
| 602 |
"mean": float(s.mean()),
|
| 603 |
-
"std": float(s.std(ddof=1)) if len(s) > 1 else 0.0,
|
| 604 |
"min": float(s.min()),
|
| 605 |
"q1": float(q1),
|
| 606 |
"median": float(q2),
|
|
@@ -608,105 +705,94 @@ def summarize_distribution(detail_df: pd.DataFrame):
|
|
| 608 |
"max": float(s.max()),
|
| 609 |
}
|
| 610 |
|
| 611 |
-
out
|
| 612 |
-
|
| 613 |
if "_dataset" in detail_df.columns:
|
| 614 |
-
for ds in ["sekolah",
|
| 615 |
-
|
| 616 |
-
out["by_type"][ds] = stats_for(dsub[idx_col])
|
| 617 |
-
|
| 618 |
return out
|
| 619 |
|
| 620 |
|
| 621 |
-
def
|
| 622 |
-
agg_df: pd.DataFrame,
|
| 623 |
-
verif_df: pd.DataFrame,
|
| 624 |
-
kab_name: str,
|
| 625 |
-
kew_value: str) -> str:
|
| 626 |
-
"""
|
| 627 |
-
Narasi LLM yang fokus ke:
|
| 628 |
-
- indeks FINAL (sudah penalti 68% kalau ada)
|
| 629 |
-
- distribusi (mean, Q1/median/Q3)
|
| 630 |
-
- gap coverage (kalau ada)
|
| 631 |
-
"""
|
| 632 |
-
wilayah = kab_name
|
| 633 |
-
if kew_value and kew_value != "(Semua)":
|
| 634 |
-
wilayah = f"{kab_name} (kewenangan {kew_value})"
|
| 635 |
-
|
| 636 |
dist = summarize_distribution(detail_df)
|
| 637 |
-
idx_col = dist
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
by_type = dist.get("by_type", {})
|
| 642 |
|
| 643 |
-
|
| 644 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
return "(tidak tersedia)"
|
| 646 |
-
return
|
| 647 |
-
f"n={d['n']}, mean={d['mean']:.2f}, sd={d['std']:.2f}, "
|
| 648 |
-
f"min={d['min']:.2f}, Q1={d['q1']:.2f}, median={d['median']:.2f}, Q3={d['q3']:.2f}, max={d['max']:.2f}"
|
| 649 |
-
)
|
| 650 |
|
| 651 |
lines = []
|
| 652 |
lines.append(f"Wilayah: {wilayah}")
|
| 653 |
-
lines.append(f"
|
| 654 |
-
lines.append(f"
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
agg_txt = _safe_table_text(agg_df, max_rows=8)
|
| 660 |
-
ver_txt = _safe_table_text(verif_df, max_rows=12)
|
| 661 |
|
| 662 |
-
|
| 663 |
-
if
|
| 664 |
-
|
| 665 |
-
rb = generate_rule_based_analysis(detail_df, agg_df, kab_name, kew_value)
|
| 666 |
-
return (
|
| 667 |
-
"β οΈ LLM tidak tersedia, analisis menggunakan rule-based.\n\n" + rb
|
| 668 |
-
)
|
| 669 |
|
| 670 |
system_prompt = (
|
| 671 |
-
"Anda adalah analis data
|
| 672 |
-
"
|
| 673 |
-
"Anda harus menggunakan pendekatan berbasis data, jelas, dan ringkas."
|
| 674 |
)
|
| 675 |
|
| 676 |
user_prompt = f"""
|
| 677 |
-
DATA RINGKAS IPLM
|
| 678 |
|
| 679 |
-
|
| 680 |
{chr(10).join(lines)}
|
| 681 |
|
| 682 |
-
TABEL AGREGAT (
|
| 683 |
{agg_txt}
|
| 684 |
|
| 685 |
-
|
| 686 |
{ver_txt}
|
| 687 |
|
| 688 |
TUGAS:
|
| 689 |
-
Tulis analisis
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
- Jelaskan implikasi kualitas/representasi data bila coverage belum 68%.
|
| 695 |
-
C. Prioritas intervensi 12β18 bulan (1β2 paragraf) β fokus pada program pembinaan yang realistis.
|
| 696 |
-
D. Rekomendasi kebijakan 3β5 tahun (1β2 paragraf) β penataan tata kelola data, pembinaan, standardisasi.
|
| 697 |
|
| 698 |
GAYA:
|
| 699 |
-
- Jangan
|
| 700 |
-
-
|
| 701 |
-
- Jangan membuat data baru di luar yang tersedia.
|
| 702 |
"""
|
| 703 |
|
| 704 |
try:
|
| 705 |
resp = client.chat_completion(
|
| 706 |
model=LLM_MODEL_NAME,
|
| 707 |
messages=[
|
| 708 |
-
{"role":
|
| 709 |
-
{"role":
|
| 710 |
],
|
| 711 |
max_tokens=1200,
|
| 712 |
temperature=0.25,
|
|
@@ -714,354 +800,269 @@ GAYA:
|
|
| 714 |
)
|
| 715 |
text = resp.choices[0].message.content.strip()
|
| 716 |
if not text:
|
| 717 |
-
raise ValueError("Respon LLM kosong
|
| 718 |
return text
|
| 719 |
-
except Exception
|
| 720 |
-
|
| 721 |
-
return (
|
| 722 |
-
"β οΈ Gagal memanggil LLM untuk data analytics, fallback rule-based.\n\n"
|
| 723 |
-
f"(Detail teknis: {repr(e)})\n\n{rb}"
|
| 724 |
-
)
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
def generate_word_report_llm_analytics(detail_df, agg_df, verif_df, prov, kab, kew, analytics_text):
|
| 728 |
-
"""
|
| 729 |
-
Word report yang menaruh:
|
| 730 |
-
- Ringkasan indeks FINAL (statistik & kuartil)
|
| 731 |
-
- Tabel agregat ringkas
|
| 732 |
-
- Tabel verifikasi coverage (dibulatkan TANPA koma)
|
| 733 |
-
- Narasi LLM data analytics
|
| 734 |
-
"""
|
| 735 |
-
if kew == "PUSAT":
|
| 736 |
-
return None
|
| 737 |
|
| 738 |
-
wilayah = kab if kab != "(Semua)" else prov
|
| 739 |
-
dist = summarize_distribution(detail_df)
|
| 740 |
-
idx_col = dist.get("idx_col", "Indeks_Final_0_100")
|
| 741 |
-
all_stats = dist.get("all", {})
|
| 742 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
doc = Document()
|
| 744 |
doc.add_heading(f"Laporan Analisis IPLM (FINAL) β {wilayah}", level=1)
|
|
|
|
| 745 |
doc.add_paragraph(
|
| 746 |
-
"
|
| 747 |
-
"kecukupan sampel 68% (untuk perpustakaan sekolah dan umum
|
|
|
|
| 748 |
)
|
| 749 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
doc.add_heading("1. Ringkasan Statistik Indeks FINAL", level=2)
|
| 751 |
-
if
|
| 752 |
-
doc.add_paragraph(f"- Indeks
|
| 753 |
-
doc.add_paragraph(f"- Jumlah
|
| 754 |
-
doc.add_paragraph(f"- Rata-rata: {
|
| 755 |
-
doc.add_paragraph(f"- Q1: {
|
| 756 |
-
doc.add_paragraph(f"-
|
| 757 |
-
doc.add_paragraph(f"- Q3: {all_stats.get('q3', 0.0):.2f}")
|
| 758 |
-
doc.add_paragraph(f"- MinimumβMaksimum: {all_stats.get('min', 0.0):.2f} β {all_stats.get('max', 0.0):.2f}")
|
| 759 |
else:
|
| 760 |
-
doc.add_paragraph("Statistik
|
| 761 |
|
| 762 |
-
doc.add_heading("2.
|
| 763 |
if agg_df is not None and not agg_df.empty:
|
| 764 |
-
|
| 765 |
-
hdr = table.rows[0].cells
|
| 766 |
for i, c in enumerate(agg_df.columns):
|
| 767 |
-
|
| 768 |
-
for _,
|
| 769 |
-
|
| 770 |
for i, c in enumerate(agg_df.columns):
|
| 771 |
-
|
| 772 |
else:
|
| 773 |
-
doc.add_paragraph("
|
| 774 |
|
| 775 |
-
doc.add_heading("3. Verifikasi Coverage
|
| 776 |
if verif_df is not None and not verif_df.empty:
|
| 777 |
v = verif_df.copy()
|
| 778 |
-
|
| 779 |
-
# BULATKAN TANPA KOMa: semua numerik -> integer
|
| 780 |
for c in v.columns:
|
| 781 |
if pd.api.types.is_numeric_dtype(v[c]):
|
| 782 |
-
v[c] = pd.to_numeric(v[c], errors="coerce").fillna(0).round(0).astype(int)
|
| 783 |
-
|
| 784 |
-
table = doc.add_table(rows=1, cols=len(v.columns))
|
| 785 |
-
hdr = table.rows[0].cells
|
| 786 |
for i, c in enumerate(v.columns):
|
| 787 |
-
|
| 788 |
-
for _,
|
| 789 |
-
|
| 790 |
for i, c in enumerate(v.columns):
|
| 791 |
-
|
| 792 |
else:
|
| 793 |
-
doc.add_paragraph("Tidak ada tabel verifikasi
|
| 794 |
|
| 795 |
-
doc.add_heading("4. Analisis Naratif Otomatis (LLM
|
| 796 |
-
for
|
| 797 |
-
if
|
| 798 |
-
doc.add_paragraph(
|
| 799 |
|
| 800 |
outpath = tempfile.mktemp(suffix=".docx")
|
| 801 |
doc.save(outpath)
|
| 802 |
return outpath
|
| 803 |
|
|
|
|
| 804 |
# ============================================================
|
| 805 |
-
#
|
| 806 |
# ============================================================
|
| 807 |
|
| 808 |
-
def
|
| 809 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 810 |
rows = []
|
| 811 |
-
|
| 812 |
-
def summarize(sub, jenis_label):
|
| 813 |
-
row = {
|
| 814 |
-
"Jenis": jenis_label,
|
| 815 |
-
"Jumlah": int(len(sub)),
|
| 816 |
-
"Rata2_sub_koleksi": float(sub["sub_koleksi"].mean()) if len(sub) else 0.0,
|
| 817 |
-
"Rata2_sub_sdm": float(sub["sub_sdm"].mean()) if len(sub) else 0.0,
|
| 818 |
-
"Rata2_sub_pelayanan": float(sub["sub_pelayanan"].mean()) if len(sub) else 0.0,
|
| 819 |
-
"Rata2_sub_pengelolaan": float(sub["sub_pengelolaan"].mean()) if len(sub) else 0.0,
|
| 820 |
-
"Rata2_dim_kepatuhan": float(sub["dim_kepatuhan"].mean()) if len(sub) else 0.0,
|
| 821 |
-
"Rata2_dim_kinerja": float(sub["dim_kinerja"].mean()) if len(sub) else 0.0,
|
| 822 |
-
"Rata2_Indeks_Final_0_100": float(sub["Indeks_Final_0_100"].mean()) if len(sub) else 0.0,
|
| 823 |
-
}
|
| 824 |
-
return row
|
| 825 |
-
|
| 826 |
for ds in ["sekolah","umum","khusus"]:
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
go.Figure(), go.Figure(), go.Figure(), go.Figure(),
|
| 853 |
-
"Data DM belum siap.")
|
| 854 |
-
|
| 855 |
-
df = df_all.copy()
|
| 856 |
-
|
| 857 |
-
if "PROV_DISP" in df.columns and prov_value and prov_value != "(Semua)":
|
| 858 |
-
df = df[df["PROV_DISP"] == prov_value]
|
| 859 |
-
if "KAB_DISP" in df.columns and kab_value and kab_value != "(Semua)":
|
| 860 |
-
df = df[df["KAB_DISP"] == kab_value]
|
| 861 |
-
if kew_value and kew_value != "(Semua)":
|
| 862 |
-
df = df[df["KEW_NORM"] == kew_value]
|
| 863 |
|
| 864 |
-
|
| 865 |
-
return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
|
| 866 |
-
None, None, None,
|
| 867 |
-
go.Figure(), go.Figure(), go.Figure(), go.Figure(),
|
| 868 |
-
"Tidak ada data untuk kombinasi filter.")
|
| 869 |
-
|
| 870 |
-
df2, verif_df = compute_final(df, kew_value)
|
| 871 |
-
|
| 872 |
-
# DEDUP kunci (prov,kab,nama,kew,dataset)
|
| 873 |
-
kcols = [c for c in ["PROV_DISP","KAB_DISP","KEW_NORM","_dataset"] if c in df2.columns]
|
| 874 |
-
if nama_col and nama_col in df2.columns:
|
| 875 |
-
kcols.append(nama_col)
|
| 876 |
-
if kcols:
|
| 877 |
-
df2 = df2.drop_duplicates(subset=kcols, keep="first").copy()
|
| 878 |
-
|
| 879 |
-
agg_df = build_agg_ringkas(df2)
|
| 880 |
-
detail_df = build_detail_ringkas(df2, nama_col)
|
| 881 |
-
|
| 882 |
-
# Bell curves (FINAL)
|
| 883 |
-
ncol = nama_col if (nama_col and nama_col in df2.columns) else None
|
| 884 |
-
fig_all = make_bell_figure(df2, "Bell Curve Indeks FINAL β Semua Perpustakaan", name_col=ncol, min_points=5)
|
| 885 |
-
fig_sek = make_bell_figure(df2[df2["_dataset"]=="sekolah"], "Bell Curve Indeks FINAL β Perpustakaan Sekolah", name_col=ncol, min_points=3)
|
| 886 |
-
fig_um = make_bell_figure(df2[df2["_dataset"]=="umum"], "Bell Curve Indeks FINAL β Perpustakaan Umum", name_col=ncol, min_points=3)
|
| 887 |
-
fig_kh = make_bell_figure(df2[df2["_dataset"]=="khusus"], "Bell Curve Indeks FINAL β Perpustakaan Khusus", name_col=ncol, min_points=3)
|
| 888 |
|
|
|
|
| 889 |
tmpdir = tempfile.mkdtemp()
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
detail_path = os.path.join(tmpdir, f"IPLM_Detail_RINGKAS_{slug}.xlsx")
|
| 894 |
-
verif_path = os.path.join(tmpdir, f"IPLM_VerifikasiCoverage_{slug}.xlsx")
|
| 895 |
|
| 896 |
agg_df.to_excel(agg_path, index=False)
|
| 897 |
-
|
| 898 |
-
|
|
|
|
|
|
|
|
|
|
| 899 |
|
| 900 |
-
|
| 901 |
-
|
|
|
|
|
|
|
| 902 |
|
|
|
|
| 903 |
|
| 904 |
-
|
| 905 |
-
#
|
|
|
|
| 906 |
# ============================================================
|
| 907 |
|
| 908 |
-
if "run_app" in globals():
|
| 909 |
-
_run_app_base = run_app # simpan fungsi asli
|
| 910 |
def run_app(prov_value, kab_value, kew_value):
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
agg_df,
|
| 914 |
-
detail_df_view,
|
| 915 |
-
verif_df,
|
| 916 |
-
agg_path,
|
| 917 |
-
detail_path,
|
| 918 |
-
raw_path,
|
| 919 |
-
word_path,
|
| 920 |
-
fig_all,
|
| 921 |
-
fig_sekolah,
|
| 922 |
-
fig_umum,
|
| 923 |
-
fig_khusus,
|
| 924 |
-
msg,
|
| 925 |
-
analysis_text,
|
| 926 |
-
) = _run_app_base(prov_value, kab_value, kew_value)
|
| 927 |
-
|
| 928 |
-
# kalau kosong, langsung return
|
| 929 |
-
if detail_df_view is None or (hasattr(detail_df_view, "empty") and detail_df_view.empty):
|
| 930 |
return (
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
analysis_text
|
| 937 |
)
|
| 938 |
|
| 939 |
-
|
| 940 |
-
# Ambil ulang subset yang sama dari df_all_ipml (supaya lengkap) dengan filter yang sama
|
| 941 |
-
df = df_all_ipml.copy() if df_all_ipml is not None else None
|
| 942 |
-
if df is None or df.empty:
|
| 943 |
-
return (
|
| 944 |
-
agg_df, detail_df_view, verif_df,
|
| 945 |
-
agg_path, detail_path, raw_path,
|
| 946 |
-
word_path,
|
| 947 |
-
fig_all, fig_sekolah, fig_umum, fig_khusus,
|
| 948 |
-
msg,
|
| 949 |
-
analysis_text
|
| 950 |
-
)
|
| 951 |
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
|
|
|
| 956 |
if kew_value and kew_value != "(Semua)":
|
| 957 |
df = df[df["KEW_NORM"] == kew_value]
|
| 958 |
|
| 959 |
-
if df
|
|
|
|
| 960 |
return (
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
analysis_text
|
| 967 |
)
|
| 968 |
|
| 969 |
-
|
| 970 |
-
|
| 971 |
|
| 972 |
-
#
|
| 973 |
-
|
| 974 |
|
| 975 |
-
#
|
| 976 |
-
|
| 977 |
-
detail_df=detail_df_full,
|
| 978 |
-
agg_df=agg_df2 if (agg_df2 is not None and not agg_df2.empty) else agg_df,
|
| 979 |
-
verif_df=verif_df,
|
| 980 |
-
kab_name=kab_name,
|
| 981 |
-
kew_value=kew_value,
|
| 982 |
-
)
|
| 983 |
|
| 984 |
-
#
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
)
|
|
|
|
|
|
|
| 992 |
|
| 993 |
-
# Kembalikan output yang sama seperti run_app asli
|
| 994 |
return (
|
| 995 |
agg_df,
|
| 996 |
-
|
| 997 |
-
verif_df,
|
| 998 |
agg_path,
|
| 999 |
detail_path,
|
| 1000 |
raw_path,
|
| 1001 |
-
|
| 1002 |
fig_all,
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
-
|
| 1006 |
msg,
|
| 1007 |
-
|
| 1008 |
)
|
| 1009 |
|
| 1010 |
-
# ============================================================
|
| 1011 |
-
# 10) DROPDOWN (NO DUPLICATE)
|
| 1012 |
-
# ============================================================
|
| 1013 |
-
|
| 1014 |
-
def all_prov_choices():
|
| 1015 |
-
if df_all_raw is None or "PROV_DISP" not in df_all_raw.columns:
|
| 1016 |
-
return ["(Semua)"]
|
| 1017 |
-
vals = df_all_raw["PROV_DISP"].dropna()
|
| 1018 |
-
vals = sorted(list(dict.fromkeys([v for v in vals.tolist() if str(v).strip() != ""])))
|
| 1019 |
-
return ["(Semua)"] + vals
|
| 1020 |
-
|
| 1021 |
-
def get_kab_choices_for_prov(prov_value):
|
| 1022 |
-
if df_all_raw is None or "KAB_DISP" not in df_all_raw.columns:
|
| 1023 |
-
return ["(Semua)"]
|
| 1024 |
-
tmp = df_all_raw.copy()
|
| 1025 |
-
if prov_value and prov_value != "(Semua)":
|
| 1026 |
-
tmp = tmp[tmp["PROV_DISP"] == prov_value]
|
| 1027 |
-
vals = tmp["KAB_DISP"].dropna()
|
| 1028 |
-
vals = sorted(list(dict.fromkeys([v for v in vals.tolist() if str(v).strip() != ""])))
|
| 1029 |
-
return ["(Semua)"] + vals
|
| 1030 |
-
|
| 1031 |
-
def all_kew_choices():
|
| 1032 |
-
if df_all_raw is None or "KEW_NORM" not in df_all_raw.columns:
|
| 1033 |
-
return ["(Semua)"]
|
| 1034 |
-
vals = df_all_raw["KEW_NORM"].dropna().astype(str).str.strip()
|
| 1035 |
-
vals = sorted(list(dict.fromkeys([v for v in vals.tolist() if v != ""])))
|
| 1036 |
-
return ["(Semua)"] + (vals if vals else ["KAB/KOTA","PROVINSI"])
|
| 1037 |
-
|
| 1038 |
-
prov_choices = all_prov_choices()
|
| 1039 |
-
kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)")
|
| 1040 |
-
kew_choices = all_kew_choices()
|
| 1041 |
-
default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[1] if len(kew_choices) > 1 else "(Semua)")
|
| 1042 |
|
| 1043 |
def on_prov_change(prov_value):
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
|
| 1047 |
# ============================================================
|
| 1048 |
-
#
|
| 1049 |
# ============================================================
|
| 1050 |
|
| 1051 |
with gr.Blocks() as demo:
|
| 1052 |
gr.Markdown(
|
| 1053 |
f"""
|
| 1054 |
-
# IPLM 2025 β
|
| 1055 |
-
|
| 1056 |
-
|
|
|
|
|
|
|
|
|
|
| 1057 |
|
| 1058 |
{DATA_INFO}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1059 |
"""
|
| 1060 |
)
|
| 1061 |
|
| 1062 |
with gr.Row():
|
| 1063 |
dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0])
|
| 1064 |
-
dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value=
|
| 1065 |
dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew)
|
| 1066 |
|
| 1067 |
dd_prov.change(fn=on_prov_change, inputs=dd_prov, outputs=dd_kab)
|
|
@@ -1069,35 +1070,54 @@ Verifikasi ditampilkan dalam integer (tanpa koma) agar bersih.
|
|
| 1069 |
run_btn = gr.Button("Jalankan Perhitungan")
|
| 1070 |
msg_out = gr.Markdown()
|
| 1071 |
|
| 1072 |
-
gr.Markdown("## Agregat (
|
| 1073 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1074 |
|
| 1075 |
-
gr.Markdown("##
|
| 1076 |
-
|
| 1077 |
|
| 1078 |
-
gr.Markdown("##
|
| 1079 |
-
|
| 1080 |
|
| 1081 |
-
gr.Markdown("## Bell Curve Indeks FINAL β
|
| 1082 |
-
|
| 1083 |
|
| 1084 |
-
gr.Markdown("##
|
| 1085 |
-
|
| 1086 |
-
bell_um = gr.Plot()
|
| 1087 |
-
bell_kh = gr.Plot()
|
| 1088 |
|
| 1089 |
with gr.Row():
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
|
|
|
| 1093 |
|
| 1094 |
run_btn.click(
|
| 1095 |
-
fn=
|
| 1096 |
inputs=[dd_prov, dd_kab, dd_kew],
|
| 1097 |
-
outputs=[
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1101 |
)
|
| 1102 |
|
| 1103 |
demo.launch()
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
+
app.py β IPLM 2025 (FINAL)
|
| 4 |
+
- Pipeline nasional: YeoβJohnson (nasional) + MinMax (nasional)
|
| 5 |
+
- Sub-dimensi: Koleksi, SDM, Pelayanan, Pengelolaan
|
| 6 |
+
- Dimensi: Kepatuhan (Koleksi+SDM), Kinerja (Pelayanan+Pengelolaan)
|
| 7 |
+
- Indeks Real (0β100) -> Indeks Final (0β100) dengan sanksi coverage 68%
|
| 8 |
+
* Sekolah & Umum: kena sanksi 68% (68% dianggap 100%)
|
| 9 |
+
* Khusus: tidak kena sanksi (bobot=1) karena populasi pembanding belum baku
|
| 10 |
+
- Sumber populasi:
|
| 11 |
+
* Kab/Kota: Data_populasi_Kab_kota.xlsx
|
| 12 |
+
* Provinsi: Data_populasi_propinsi.xlsx
|
| 13 |
+
- Output:
|
| 14 |
+
* Tabel agregat: sub/dim + Indeks FINAL (integer)
|
| 15 |
+
* Tabel detail: sub/dim + Indeks FINAL (integer)
|
| 16 |
+
* Bell curve: semua + per jenis (pakai Indeks FINAL)
|
| 17 |
+
* Download: agregat/detail/raw + Word (LLM)
|
| 18 |
"""
|
| 19 |
|
| 20 |
import os
|
|
|
|
| 28 |
import plotly.graph_objects as go
|
| 29 |
from sklearn.preprocessing import PowerTransformer
|
| 30 |
|
| 31 |
+
# Word + LLM
|
| 32 |
+
from docx import Document
|
| 33 |
+
from huggingface_hub import InferenceClient
|
| 34 |
+
|
| 35 |
+
|
| 36 |
# ============================================================
|
| 37 |
# 1) KONFIGURASI FILE
|
| 38 |
# ============================================================
|
| 39 |
|
| 40 |
+
DATA_FILE = "DM_001.xlsx"
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
POP_KAB_FILE = "Data_populasi_Kab_kota.xlsx"
|
| 43 |
+
POP_PROV_FILE = "Data_populasi_propinsi.xlsx"
|
| 44 |
+
|
| 45 |
+
# Bobot indeks
|
| 46 |
W_KEPATUHAN = 0.30
|
| 47 |
W_KINERJA = 0.70
|
| 48 |
|
| 49 |
+
# Target coverage: 68% dianggap 100%
|
| 50 |
+
TARGET_COVERAGE = 0.68
|
| 51 |
+
|
| 52 |
+
# LLM
|
| 53 |
+
USE_LLM = True
|
| 54 |
+
LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
|
| 55 |
+
HF_TOKEN = (
|
| 56 |
+
os.getenv("HF_SECRET")
|
| 57 |
+
or os.getenv("HF_TOKEN")
|
| 58 |
+
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 59 |
+
or os.getenv("HF_API_TOKEN")
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
_HF_CLIENT = None
|
| 63 |
+
|
| 64 |
+
|
| 65 |
# ============================================================
|
| 66 |
+
# 2) KELOMPOK INDIKATOR (untuk pipeline real-score)
|
| 67 |
+
# ============================================================
|
| 68 |
+
|
| 69 |
+
koleksi_cols = [
|
| 70 |
+
"JudulTercetak","EksemplarTercetak","JudulElektronik","EksemplarElektronik",
|
| 71 |
+
"TambahJudulTercetak","TambahEksemplarTercetak",
|
| 72 |
+
"TambahJudulElektronik","TambahEksemplarElektronik",
|
| 73 |
+
"KomitmenAnggaranKoleksi"
|
| 74 |
+
]
|
| 75 |
+
sdm_cols = [
|
| 76 |
+
"TenagaKualifikasiIlmuPerpustakaan",
|
| 77 |
+
"TenagaFungsionalProfesional",
|
| 78 |
+
"TenagaPKB",
|
| 79 |
+
"AnggaranTenaga"
|
| 80 |
+
]
|
| 81 |
+
pelayanan_cols = [
|
| 82 |
+
"PesertaBudayaBaca","PemustakaLuringDaring","PemustakaFasilitasTIK",
|
| 83 |
+
"PemanfaatanJudulTercetak","PemanfaatanEksemplarTercetak",
|
| 84 |
+
"PemanfaatanJudulElektronik","PemanfaatanEksemplarElektronik"
|
| 85 |
+
]
|
| 86 |
+
pengelolaan_cols = [
|
| 87 |
+
"KegiatanBudayaBaca","KegiatanKerjasama","VariasiLayanan","Kebijakan","AnggaranLayanan"
|
| 88 |
+
]
|
| 89 |
+
all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
|
| 90 |
+
|
| 91 |
+
# Alias mapping dari DM (raw) -> canonical indikator
|
| 92 |
+
alias_map_raw = {
|
| 93 |
+
"j_judul_koleksi_tercetak": "JudulTercetak",
|
| 94 |
+
"j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
|
| 95 |
+
"j_judul_koleksi_digital": "JudulElektronik",
|
| 96 |
+
"j_eksemplar_koleksi_digital": "EksemplarElektronik",
|
| 97 |
+
"tambah_judul_koleksi_tercetak": "TambahJudulTercetak",
|
| 98 |
+
"tambah_eksemplar_koleksi_tercetak": "TambahEksemplarTercetak",
|
| 99 |
+
"tambah_judul_koleksi_digital": "TambahJudulElektronik",
|
| 100 |
+
"tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
|
| 101 |
+
"j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
|
| 102 |
+
"j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
|
| 103 |
+
"j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
|
| 104 |
+
"j_tenaga_pkb": "TenagaPKB",
|
| 105 |
+
"j_anggaran_diklat_perpus": "AnggaranTenaga",
|
| 106 |
+
"j_peserta_budaya_baca": "PesertaBudayaBaca",
|
| 107 |
+
"j_pemustaka_luring_daring": "PemustakaLuringDaring",
|
| 108 |
+
"j_pemustaka_fasilitas_tik": "PemustakaFasilitasTIK",
|
| 109 |
+
"j_judul_koleksi_tercetak_termanfaat": "PemanfaatanJudulTercetak",
|
| 110 |
+
"j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
|
| 111 |
+
"j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
|
| 112 |
+
"j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
|
| 113 |
+
"j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
|
| 114 |
+
"j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
|
| 115 |
+
"j_variasi_layanan": "VariasiLayanan",
|
| 116 |
+
"j_kebijakan_prosedur_pelayanan": "Kebijakan",
|
| 117 |
+
"j_anggaran_peningkatan_pelayanan": "AnggaranLayanan",
|
| 118 |
+
}
|
| 119 |
+
# ============================================================
|
| 120 |
+
# 3) UTIL
|
| 121 |
# ============================================================
|
| 122 |
|
| 123 |
def _canon(s: str) -> str:
|
| 124 |
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
|
| 125 |
|
| 126 |
+
|
| 127 |
+
alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
|
| 128 |
+
|
|
|
|
|
|
|
| 129 |
|
| 130 |
def pick_col(df, candidates):
|
| 131 |
for c in candidates:
|
|
|
|
| 138 |
return can_map[k]
|
| 139 |
return None
|
| 140 |
|
| 141 |
+
|
| 142 |
def coerce_num(val):
|
| 143 |
if pd.isna(val):
|
| 144 |
return np.nan
|
|
|
|
| 160 |
except Exception:
|
| 161 |
return np.nan
|
| 162 |
|
| 163 |
+
|
| 164 |
def minmax_norm(s: pd.Series) -> pd.Series:
|
| 165 |
x = s.astype(float)
|
| 166 |
mn, mx = x.min(skipna=True), x.max(skipna=True)
|
|
|
|
| 168 |
return pd.Series(0.0, index=s.index)
|
| 169 |
return (x - mn) / (mx - mn)
|
| 170 |
|
| 171 |
+
|
| 172 |
+
def penalized_mean(row, cols):
|
| 173 |
+
vals = []
|
| 174 |
+
for c in cols:
|
| 175 |
+
cn = f"norm_{c}"
|
| 176 |
+
if cn in row.index:
|
| 177 |
+
v = row[cn]
|
| 178 |
+
if pd.isna(v):
|
| 179 |
+
v = 0.0
|
| 180 |
+
vals.append(float(v))
|
| 181 |
+
if not vals:
|
| 182 |
+
return 0.0
|
| 183 |
+
return float(np.mean(vals))
|
| 184 |
+
|
| 185 |
+
|
| 186 |
def norm_kew(v):
|
| 187 |
if pd.isna(v):
|
| 188 |
return None
|
|
|
|
| 195 |
return "PUSAT"
|
| 196 |
return t
|
| 197 |
|
| 198 |
+
|
| 199 |
+
def _norm_text(x):
|
| 200 |
+
if pd.isna(x):
|
| 201 |
+
return None
|
| 202 |
+
t = str(x).strip().upper()
|
| 203 |
+
return " ".join(t.split())
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def slugify(s: str) -> str:
|
| 207 |
+
if s is None:
|
| 208 |
+
return "NA"
|
| 209 |
+
t = str(s).strip()
|
| 210 |
+
if t == "":
|
| 211 |
+
return "NA"
|
| 212 |
+
return re.sub(r"[^A-Z0-9]+", "", t.upper())
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def norm_prov_key(s):
|
| 216 |
if pd.isna(s):
|
| 217 |
return None
|
| 218 |
t = str(s).upper()
|
|
|
|
| 221 |
t = " ".join(t.split())
|
| 222 |
return re.sub(r"[^A-Z0-9]+", "", t)
|
| 223 |
|
| 224 |
+
|
| 225 |
+
def norm_kab_key(s):
|
| 226 |
if pd.isna(s):
|
| 227 |
return None
|
| 228 |
t = str(s).upper()
|
| 229 |
+
t = t.replace("KABUPATEN", "KAB").replace("KAB.", "KAB")
|
| 230 |
+
t = t.replace("KOTA ADMINISTRASI", "KOTA").replace("KOTA.", "KOTA")
|
|
|
|
|
|
|
|
|
|
| 231 |
t = " ".join(t.split())
|
| 232 |
return re.sub(r"[^A-Z0-9]+", "", t)
|
| 233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
def round_int_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 236 |
+
if df is None or df.empty:
|
| 237 |
+
return df
|
| 238 |
+
out = df.copy()
|
| 239 |
+
for c in out.columns:
|
| 240 |
+
if pd.api.types.is_numeric_dtype(out[c]):
|
| 241 |
+
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
|
| 242 |
+
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
# ============================================================
|
| 246 |
+
# 4) LOAD DATA + POPULASI
|
| 247 |
# ============================================================
|
| 248 |
|
| 249 |
DATA_INFO = ""
|
| 250 |
df_all_raw = None
|
|
|
|
|
|
|
| 251 |
|
| 252 |
prov_col = kab_col = kew_col = jenis_col = nama_col = None
|
| 253 |
|
| 254 |
+
# Populasi
|
| 255 |
+
pop_kab = None
|
| 256 |
+
pop_prov = None
|
| 257 |
+
|
| 258 |
try:
|
| 259 |
+
# --- DM ---
|
| 260 |
fp = Path(DATA_FILE)
|
| 261 |
if not fp.exists():
|
| 262 |
raise FileNotFoundError(f"File tidak ditemukan: {DATA_FILE}")
|
|
|
|
| 265 |
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
|
| 266 |
df_all_raw = pd.concat(frames, ignore_index=True, sort=False)
|
| 267 |
|
| 268 |
+
prov_col = pick_col(df_all_raw, ["provinsi", "Provinsi", "PROVINSI"])
|
| 269 |
+
kab_col = pick_col(df_all_raw, ["kab_kota","Kab/Kota","Kab_Kota","KAB/KOTA","kabupaten_kota","kota"])
|
| 270 |
+
kew_col = pick_col(df_all_raw, ["kewenangan","jenis_kewenangan","Kewenangan","KEWENANGAN"])
|
| 271 |
+
jenis_col= pick_col(df_all_raw, ["jenis_perpustakaan","JENIS_PERPUSTAKAAN","Jenis Perpustakaan","jenis perpustakaan"])
|
| 272 |
+
nama_col = pick_col(df_all_raw, ["nama_perpustakaan","nm_perpustakaan","nm_instansi_lembaga","Nama Perpustakaan"])
|
| 273 |
|
| 274 |
df_all_raw["KEW_NORM"] = df_all_raw[kew_col].apply(norm_kew) if kew_col else None
|
| 275 |
|
| 276 |
val_map_jenis = {
|
| 277 |
+
"PERPUSTAKAAN SEKOLAH": "sekolah",
|
| 278 |
+
"SEKOLAH": "sekolah",
|
| 279 |
+
"PERPUSTAKAAN UMUM": "umum",
|
| 280 |
+
"UMUM": "umum",
|
| 281 |
+
"PERPUSTAKAAN DAERAH": "umum",
|
| 282 |
+
"PERPUSTAKAAN KHUSUS": "khusus",
|
| 283 |
+
"KHUSUS": "khusus",
|
| 284 |
}
|
| 285 |
+
df_all_raw["_dataset"] = df_all_raw[jenis_col].apply(_norm_text).map(val_map_jenis) if jenis_col else None
|
| 286 |
|
| 287 |
+
# keys
|
| 288 |
+
if prov_col:
|
| 289 |
+
df_all_raw["prov_key"] = df_all_raw[prov_col].apply(norm_prov_key)
|
| 290 |
+
else:
|
| 291 |
+
df_all_raw["prov_key"] = None
|
| 292 |
|
| 293 |
+
if kab_col:
|
| 294 |
+
df_all_raw["kab_key"] = df_all_raw[kab_col].apply(norm_kab_key)
|
| 295 |
+
else:
|
| 296 |
+
df_all_raw["kab_key"] = None
|
| 297 |
+
|
| 298 |
+
# --- POP KAB/KOTA ---
|
| 299 |
+
pk = pd.read_excel(POP_KAB_FILE)
|
| 300 |
+
# Kolom expected:
|
| 301 |
+
# PROVINSI, KABUPATEN_KOTA, jumlah_populasi_umum, jumlah_populasi_sekolah (dan lain-lain)
|
| 302 |
+
col_pkab = pick_col(pk, ["KABUPATEN_KOTA","Kabupaten_Kota","Kab/Kota","Kabupaten/Kota"])
|
| 303 |
+
col_pprov= pick_col(pk, ["PROVINSI","Provinsi"])
|
| 304 |
+
col_pop_umum = pick_col(pk, ["jumlah_populasi_umum","jumlah populasi umum","pop_umum"])
|
| 305 |
+
col_pop_sek = pick_col(pk, ["jumlah_populasi_sekolah","jumlah populasi sekolah","pop_sekolah"])
|
| 306 |
+
|
| 307 |
+
if col_pkab is None or (col_pop_umum is None and col_pop_sek is None):
|
| 308 |
+
raise ValueError("Kolom populasi kab/kota tidak lengkap pada Data_populasi_Kab_kota.xlsx")
|
| 309 |
+
|
| 310 |
+
pop_kab = pd.DataFrame({
|
| 311 |
+
"kab_key": pk[col_pkab].astype(str).str.strip().apply(norm_kab_key),
|
| 312 |
+
"Kab_Kota_Label": pk[col_pkab].astype(str).str.strip(),
|
| 313 |
+
})
|
| 314 |
+
if col_pprov:
|
| 315 |
+
pop_kab["Provinsi_Label"] = pk[col_pprov].astype(str).str.strip()
|
| 316 |
+
pop_kab["pop_umum"] = pk[col_pop_umum].apply(coerce_num) if col_pop_umum else np.nan
|
| 317 |
+
pop_kab["pop_sekolah"] = pk[col_pop_sek].apply(coerce_num) if col_pop_sek else np.nan
|
| 318 |
+
|
| 319 |
+
# dedup by kab_key (ambil max agar aman)
|
| 320 |
+
pop_kab = pop_kab.groupby("kab_key", as_index=False).agg({
|
| 321 |
+
"Kab_Kota_Label":"first",
|
| 322 |
+
"Provinsi_Label":"first" if "Provinsi_Label" in pop_kab.columns else "first",
|
| 323 |
+
"pop_umum":"max",
|
| 324 |
+
"pop_sekolah":"max",
|
| 325 |
+
})
|
| 326 |
|
| 327 |
+
# --- POP PROV ---
|
| 328 |
+
pp = pd.read_excel(POP_PROV_FILE)
|
| 329 |
+
col_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi"])
|
| 330 |
+
col_pop_pend = pick_col(pp, ["total_pend","total pend","total_pendidikan","total_pend "])
|
| 331 |
+
col_target_samp = pick_col(pp, ["total _sampel","total_sampel","total sampel"])
|
| 332 |
+
|
| 333 |
+
if col_pr is None:
|
| 334 |
+
raise ValueError("Kolom Provinsi tidak ditemukan pada Data_populasi_propinsi.xlsx")
|
| 335 |
+
|
| 336 |
+
pop_prov = pd.DataFrame({
|
| 337 |
+
"prov_key": pp[col_pr].astype(str).str.strip().apply(norm_prov_key),
|
| 338 |
+
"Provinsi_Label": pp[col_pr].astype(str).str.strip(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
+
# gunakan total_pend sebagai populasi; kalau tidak ada, fallback dari total_sampel/0.68
|
| 342 |
+
if col_pop_pend:
|
| 343 |
+
pop_prov["pop_sekolah_prov"] = pp[col_pop_pend].apply(coerce_num)
|
| 344 |
+
elif col_target_samp:
|
| 345 |
+
pop_prov["pop_sekolah_prov"] = pp[col_target_samp].apply(coerce_num) / TARGET_COVERAGE
|
| 346 |
+
else:
|
| 347 |
+
pop_prov["pop_sekolah_prov"] = np.nan
|
| 348 |
+
|
| 349 |
+
pop_prov = pop_prov.groupby("prov_key", as_index=False).agg({
|
| 350 |
+
"Provinsi_Label":"first",
|
| 351 |
+
"pop_sekolah_prov":"max",
|
| 352 |
})
|
| 353 |
+
|
| 354 |
+
DATA_INFO = f"β
Data: **{DATA_FILE}** | Baris: **{len(df_all_raw)}**"
|
| 355 |
+
DATA_INFO += f"<br>β
Populasi Kab/Kota: **{POP_KAB_FILE}** (n={len(pop_kab)})"
|
| 356 |
+
DATA_INFO += f"<br>β
Populasi Provinsi: **{POP_PROV_FILE}** (n={len(pop_prov)})"
|
| 357 |
+
|
| 358 |
except Exception as e:
|
| 359 |
+
df_all_raw = None
|
| 360 |
+
pop_kab = None
|
| 361 |
+
pop_prov = None
|
| 362 |
+
DATA_INFO = f"β οΈ Gagal load data: `{repr(e)}`"
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def all_prov_choices():
|
| 366 |
+
if df_all_raw is None or prov_col is None:
|
| 367 |
+
return ["(Semua)"]
|
| 368 |
+
s = df_all_raw[prov_col].dropna().astype(str).str.strip()
|
| 369 |
+
vals = sorted([o for o in s.unique() if o != ""])
|
| 370 |
+
return ["(Semua)"] + vals
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
def get_kab_choices_for_prov(prov_value):
|
| 374 |
+
if df_all_raw is None or kab_col is None:
|
| 375 |
+
return ["(Semua)"]
|
| 376 |
+
if prov_value is None or prov_value == "(Semua)" or prov_col is None:
|
| 377 |
+
s = df_all_raw[kab_col].dropna().astype(str).str.strip()
|
| 378 |
+
else:
|
| 379 |
+
m = df_all_raw[prov_col].astype(str).str.strip() == prov_value
|
| 380 |
+
s = df_all_raw.loc[m, kab_col].dropna().astype(str).str.strip()
|
| 381 |
+
vals = sorted([x for x in s.unique() if x != ""])
|
| 382 |
+
return ["(Semua)"] + vals
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
def all_kew_choices():
|
| 386 |
+
if df_all_raw is None or "KEW_NORM" not in df_all_raw.columns:
|
| 387 |
+
return ["(Semua)"]
|
| 388 |
+
s = df_all_raw["KEW_NORM"].dropna().astype(str).str.strip()
|
| 389 |
+
vals = sorted([o for o in s.unique() if o != ""])
|
| 390 |
+
return ["(Semua)"] + (vals if vals else [])
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
prov_choices = all_prov_choices()
|
| 394 |
+
kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)")
|
| 395 |
+
kew_choices = all_kew_choices()
|
| 396 |
+
default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if kew_choices else "(Semua)")
|
| 397 |
|
|
|
|
|
|
|
| 398 |
|
| 399 |
# ============================================================
|
| 400 |
+
# 5) PIPELINE NASIONAL: YJ + MinMax + Sub/Dim + Indeks Real
|
| 401 |
# ============================================================
|
| 402 |
|
| 403 |
+
def prepare_global_pipeline(df_src: pd.DataFrame) -> pd.DataFrame:
|
| 404 |
if df_src is None or df_src.empty:
|
| 405 |
return df_src
|
| 406 |
+
|
| 407 |
df = df_src.copy()
|
| 408 |
|
| 409 |
+
# rename ke canonical indikator
|
| 410 |
rename_map = {}
|
| 411 |
+
canonical_targets = set(all_indicators)
|
| 412 |
+
for col in list(df.columns):
|
| 413 |
+
ccol = _canon(col)
|
| 414 |
+
if ccol in alias_map:
|
| 415 |
+
rename_map[col] = alias_map[ccol]
|
| 416 |
else:
|
| 417 |
+
for tgt in canonical_targets:
|
| 418 |
+
if ccol == _canon(tgt):
|
| 419 |
rename_map[col] = tgt
|
| 420 |
break
|
| 421 |
if rename_map:
|
| 422 |
df = df.rename(columns=rename_map)
|
| 423 |
|
| 424 |
available = [c for c in all_indicators if c in df.columns]
|
| 425 |
+
|
| 426 |
+
# numeric
|
| 427 |
for c in available:
|
| 428 |
df[c] = df[c].apply(coerce_num)
|
| 429 |
|
| 430 |
+
# YeoβJohnson (nasional) + MinMax (nasional)
|
| 431 |
for c in available:
|
| 432 |
x = df[c].astype(float).values
|
| 433 |
mask = ~np.isnan(x)
|
| 434 |
+
y = np.full_like(x, np.nan, dtype=float)
|
| 435 |
+
|
| 436 |
if mask.sum() > 1:
|
| 437 |
pt = PowerTransformer(method="yeo-johnson", standardize=False)
|
| 438 |
+
y[mask] = pt.fit_transform(x[mask].reshape(-1, 1)).ravel()
|
| 439 |
else:
|
| 440 |
+
y[mask] = x[mask]
|
| 441 |
+
|
| 442 |
+
df[f"yj_{c}"] = y
|
| 443 |
+
df[f"norm_{c}"] = minmax_norm(pd.Series(y, index=df.index))
|
| 444 |
|
| 445 |
+
# sub-dimensi (0β1) penalized mean
|
| 446 |
df["sub_koleksi"] = df.apply(lambda r: penalized_mean(r, [c for c in koleksi_cols if c in available]), axis=1)
|
| 447 |
df["sub_sdm"] = df.apply(lambda r: penalized_mean(r, [c for c in sdm_cols if c in available]), axis=1)
|
| 448 |
df["sub_pelayanan"] = df.apply(lambda r: penalized_mean(r, [c for c in pelayanan_cols if c in available]), axis=1)
|
| 449 |
df["sub_pengelolaan"] = df.apply(lambda r: penalized_mean(r, [c for c in pengelolaan_cols if c in available]), axis=1)
|
| 450 |
|
| 451 |
+
df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1).fillna(0.0)
|
| 452 |
+
df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1).fillna(0.0)
|
| 453 |
|
| 454 |
+
df["Indeks_Real_0_100"] = (
|
| 455 |
+
100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"])
|
| 456 |
+
).fillna(0.0)
|
| 457 |
|
| 458 |
return df
|
| 459 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
+
df_all = prepare_global_pipeline(df_all_raw) if df_all_raw is not None else None
|
| 462 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
|
| 464 |
+
# ============================================================
|
| 465 |
+
# 6) PENALTI COVERAGE 68% -> INDEKS FINAL
|
| 466 |
+
# ============================================================
|
|
|
|
|
|
|
| 467 |
|
| 468 |
+
def compute_coverage_weight(df_subset: pd.DataFrame, kew_value: str) -> pd.Series:
|
| 469 |
+
"""
|
| 470 |
+
Bobot coverage per baris:
|
| 471 |
+
- KAB/KOTA:
|
| 472 |
+
sekolah: n_sampel sekolah per kab / pop_sekolah kab
|
| 473 |
+
umum : n_sampel umum per kab / pop_umum kab
|
| 474 |
+
khusus: 1.0
|
| 475 |
+
- PROVINSI:
|
| 476 |
+
sekolah: n_sampel sekolah per prov / pop_sekolah_prov
|
| 477 |
+
umum/khusus: 1.0 (untuk umum provinsi tidak didefinisikan di populasi ini)
|
| 478 |
+
"""
|
| 479 |
+
if df_subset is None or df_subset.empty:
|
| 480 |
+
return pd.Series([], dtype=float)
|
| 481 |
|
| 482 |
+
df = df_subset.copy()
|
| 483 |
+
w = pd.Series(1.0, index=df.index, dtype=float)
|
| 484 |
|
| 485 |
+
def cap(cov):
|
| 486 |
+
if pd.isna(cov) or cov <= 0:
|
| 487 |
+
return 0.0
|
| 488 |
+
return float(min(cov / TARGET_COVERAGE, 1.0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
|
| 490 |
+
kew_norm = str(kew_value or "").upper()
|
| 491 |
|
| 492 |
+
# KAB/KOTA
|
| 493 |
+
if ("KAB" in kew_norm or "KOTA" in kew_norm) and pop_kab is not None and "kab_key" in df.columns:
|
| 494 |
+
g = df.groupby(["kab_key","_dataset"]).size().unstack(fill_value=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
|
| 496 |
+
# join populasi
|
| 497 |
+
j = pop_kab.merge(g.reset_index(), on="kab_key", how="left").fillna(0)
|
| 498 |
|
| 499 |
+
# coverage
|
| 500 |
+
cov_sek = np.where(j["pop_sekolah"].replace(0, np.nan).notna(),
|
| 501 |
+
j.get("sekolah", 0) / j["pop_sekolah"].replace(0, np.nan),
|
| 502 |
+
np.nan)
|
| 503 |
+
cov_um = np.where(j["pop_umum"].replace(0, np.nan).notna(),
|
| 504 |
+
j.get("umum", 0) / j["pop_umum"].replace(0, np.nan),
|
| 505 |
+
np.nan)
|
| 506 |
|
| 507 |
+
map_sek = dict(zip(j["kab_key"], pd.Series(cov_sek).apply(cap).fillna(0.0)))
|
| 508 |
+
map_um = dict(zip(j["kab_key"], pd.Series(cov_um).apply(cap).fillna(0.0)))
|
| 509 |
|
| 510 |
+
def row_w(r):
|
| 511 |
ds = r.get("_dataset", None)
|
|
|
|
| 512 |
if ds == "khusus":
|
| 513 |
return 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
kk = r.get("kab_key", None)
|
| 515 |
if ds == "sekolah":
|
| 516 |
+
return float(map_sek.get(kk, 0.0))
|
| 517 |
if ds == "umum":
|
| 518 |
+
return float(map_um.get(kk, 0.0))
|
| 519 |
+
return 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 520 |
|
| 521 |
+
w = df.apply(row_w, axis=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
|
| 523 |
+
# PROVINSI
|
| 524 |
+
if ("PROV" in kew_norm) and pop_prov is not None and "prov_key" in df.columns:
|
| 525 |
+
g = df.groupby(["prov_key","_dataset"]).size().unstack(fill_value=0)
|
| 526 |
|
| 527 |
+
j = pop_prov.merge(g.reset_index(), on="prov_key", how="left").fillna(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
|
| 529 |
+
cov = np.where(j["pop_sekolah_prov"].replace(0, np.nan).notna(),
|
| 530 |
+
j.get("sekolah", 0) / j["pop_sekolah_prov"].replace(0, np.nan),
|
| 531 |
+
np.nan)
|
| 532 |
|
| 533 |
+
map_cov = dict(zip(j["prov_key"], pd.Series(cov).apply(cap).fillna(0.0)))
|
| 534 |
|
| 535 |
+
def row_w2(r):
|
| 536 |
ds = r.get("_dataset", None)
|
| 537 |
if ds == "khusus":
|
| 538 |
return 1.0
|
| 539 |
if ds == "sekolah":
|
| 540 |
+
return float(map_cov.get(r.get("prov_key", None), 0.0))
|
| 541 |
return 1.0
|
| 542 |
|
| 543 |
+
w = df.apply(row_w2, axis=1)
|
|
|
|
|
|
|
|
|
|
| 544 |
|
| 545 |
+
return pd.Series(w, index=df.index, dtype=float)
|
|
|
|
| 546 |
|
|
|
|
|
|
|
| 547 |
|
| 548 |
+
def apply_penalty_and_final(df_subset: pd.DataFrame, kew_value: str) -> pd.DataFrame:
|
| 549 |
+
if df_subset is None or df_subset.empty:
|
| 550 |
+
return df_subset
|
| 551 |
+
|
| 552 |
+
df = df_subset.copy()
|
| 553 |
+
weights = compute_coverage_weight(df, kew_value)
|
| 554 |
+
|
| 555 |
+
df["DimKepatuhan_Final"] = (df["dim_kepatuhan"].fillna(0.0) * weights).fillna(0.0)
|
| 556 |
+
df["DimKinerja_Final"] = (df["dim_kinerja"].fillna(0.0) * weights).fillna(0.0)
|
| 557 |
+
df["Indeks_Final_0_100"] = (df["Indeks_Real_0_100"].fillna(0.0) * weights).fillna(0.0)
|
| 558 |
+
|
| 559 |
+
return df
|
| 560 |
+
|
| 561 |
|
| 562 |
# ============================================================
|
| 563 |
+
# 7) BELL CURVE (Plotly)
|
| 564 |
# ============================================================
|
| 565 |
|
| 566 |
+
def make_bell_figure(df_plot: pd.DataFrame, title: str, idx_col: str, hover_name: str = None) -> go.Figure:
|
| 567 |
fig = go.Figure()
|
| 568 |
+
fig.update_layout(
|
| 569 |
+
title=title,
|
| 570 |
+
xaxis_title="Indeks (0β100)",
|
| 571 |
+
yaxis_title="Kepadatan (relatif)",
|
| 572 |
+
yaxis=dict(showticklabels=False),
|
| 573 |
+
margin=dict(l=40, r=20, t=60, b=40),
|
| 574 |
+
hovermode="x"
|
| 575 |
+
)
|
| 576 |
+
|
| 577 |
+
if df_plot is None or df_plot.empty or idx_col not in df_plot.columns:
|
| 578 |
+
fig.add_annotation(text="Data tidak tersedia.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
|
| 579 |
return fig
|
| 580 |
|
| 581 |
+
x = pd.to_numeric(df_plot[idx_col], errors="coerce").dropna().values.astype(float)
|
| 582 |
+
if len(x) < 3:
|
| 583 |
+
fig.add_annotation(text="Data terlalu sedikit untuk bell curve.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
return fig
|
| 585 |
|
| 586 |
+
mu = x.mean()
|
| 587 |
+
sigma = x.std(ddof=1) if len(x) > 1 else 1.0
|
| 588 |
+
if sigma <= 0:
|
|
|
|
| 589 |
sigma = 1.0
|
| 590 |
|
| 591 |
+
xs = np.linspace(max(0, x.min() - 5), min(100, x.max() + 5), 200)
|
| 592 |
pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
|
| 593 |
+
pdf = pdf / pdf.max()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
|
| 595 |
+
# line
|
| 596 |
fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Bell curve", hoverinfo="skip"))
|
| 597 |
+
|
| 598 |
+
# points
|
| 599 |
+
if hover_name and hover_name in df_plot.columns:
|
| 600 |
+
hv = [f"{n}<br>Indeks: {v:.0f}" for n, v in zip(df_plot[hover_name].astype(str).fillna(""), df_plot[idx_col].fillna(0))]
|
| 601 |
+
else:
|
| 602 |
+
hv = [f"Indeks: {v:.0f}" for v in df_plot[idx_col].fillna(0)]
|
| 603 |
fig.add_trace(go.Scatter(
|
| 604 |
+
x=df_plot[idx_col].fillna(0),
|
| 605 |
+
y=np.zeros(len(df_plot)),
|
| 606 |
+
mode="markers",
|
| 607 |
+
name="Perpustakaan",
|
| 608 |
+
hovertext=hv,
|
| 609 |
+
hovertemplate="%{hovertext}<extra></extra>"
|
| 610 |
))
|
| 611 |
|
| 612 |
+
# quartiles
|
| 613 |
+
q1, q2, q3 = np.quantile(x, [0.25, 0.5, 0.75])
|
| 614 |
+
for q, label in [(q1, "Q1"), (q2, "Median"), (q3, "Q3")]:
|
| 615 |
fig.add_trace(go.Scatter(
|
| 616 |
+
x=[q, q],
|
| 617 |
+
y=[0, 1.05],
|
| 618 |
+
mode="lines",
|
| 619 |
+
name=label,
|
| 620 |
hovertemplate=f"{label}: {q:.2f}<extra></extra>"
|
| 621 |
))
|
| 622 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
return fig
|
| 624 |
|
| 625 |
+
|
| 626 |
# ============================================================
|
| 627 |
+
# 8) VERIFIKASI (opsional untuk kontrol mutu)
|
| 628 |
+
# - kita tetap hitung ringkas untuk Word, tapi TIDAK ditampilkan di detail.
|
| 629 |
# ============================================================
|
| 630 |
|
| 631 |
+
def compute_verification_table(df_subset: pd.DataFrame, kew_value: str) -> pd.DataFrame:
|
| 632 |
+
if df_subset is None or df_subset.empty:
|
| 633 |
+
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
|
| 635 |
+
kew_norm = str(kew_value or "").upper()
|
| 636 |
|
| 637 |
+
# Kab/Kota
|
| 638 |
+
if ("KAB" in kew_norm or "KOTA" in kew_norm) and pop_kab is not None and "kab_key" in df_subset.columns:
|
| 639 |
+
g = df_subset.groupby(["kab_key","_dataset"]).size().unstack(fill_value=0).reset_index()
|
| 640 |
+
j = pop_kab.merge(g, on="kab_key", how="left").fillna(0)
|
| 641 |
+
|
| 642 |
+
out = pd.DataFrame({
|
| 643 |
+
"Kab/Kota": j["Kab_Kota_Label"],
|
| 644 |
+
"Sampel_Sekolah": j.get("sekolah", 0).astype(int),
|
| 645 |
+
"Pop_Sekolah": pd.to_numeric(j["pop_sekolah"], errors="coerce").fillna(0).astype(int),
|
| 646 |
+
"Sampel_Umum": j.get("umum", 0).astype(int),
|
| 647 |
+
"Pop_Umum": pd.to_numeric(j["pop_umum"], errors="coerce").fillna(0).astype(int),
|
| 648 |
+
})
|
| 649 |
+
out["Coverage_Sekolah_%"] = np.where(out["Pop_Sekolah"] > 0, (100*out["Sampel_Sekolah"]/out["Pop_Sekolah"]), 0)
|
| 650 |
+
out["Coverage_Umum_%"] = np.where(out["Pop_Umum"] > 0, (100*out["Sampel_Umum"]/out["Pop_Umum"]), 0)
|
| 651 |
+
return out.sort_values("Kab/Kota").reset_index(drop=True)
|
| 652 |
+
|
| 653 |
+
# Provinsi
|
| 654 |
+
if ("PROV" in kew_norm) and pop_prov is not None and "prov_key" in df_subset.columns:
|
| 655 |
+
g = df_subset.groupby(["prov_key","_dataset"]).size().unstack(fill_value=0).reset_index()
|
| 656 |
+
j = pop_prov.merge(g, on="prov_key", how="left").fillna(0)
|
| 657 |
+
|
| 658 |
+
out = pd.DataFrame({
|
| 659 |
+
"Provinsi": j["Provinsi_Label"],
|
| 660 |
+
"Sampel_Sekolah": j.get("sekolah", 0).astype(int),
|
| 661 |
+
"Pop_Sekolah": pd.to_numeric(j["pop_sekolah_prov"], errors="coerce").fillna(0).astype(int),
|
| 662 |
+
})
|
| 663 |
+
out["Coverage_Sekolah_%"] = np.where(out["Pop_Sekolah"] > 0, (100*out["Sampel_Sekolah"]/out["Pop_Sekolah"]), 0)
|
| 664 |
+
return out.sort_values("Provinsi").reset_index(drop=True)
|
| 665 |
+
|
| 666 |
+
return pd.DataFrame()
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
# ============================================================
|
| 670 |
+
# 9) LLM CLIENT + ANALYSIS + WORD
|
| 671 |
+
# ============================================================
|
| 672 |
+
|
| 673 |
+
def get_llm_client():
|
| 674 |
+
global _HF_CLIENT
|
| 675 |
+
if _HF_CLIENT is not None:
|
| 676 |
+
return _HF_CLIENT
|
| 677 |
+
try:
|
| 678 |
+
if HF_TOKEN:
|
| 679 |
+
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN)
|
| 680 |
+
else:
|
| 681 |
+
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME)
|
| 682 |
+
return _HF_CLIENT
|
| 683 |
+
except Exception:
|
| 684 |
+
_HF_CLIENT = None
|
| 685 |
+
return None
|
| 686 |
+
|
| 687 |
+
|
| 688 |
+
def summarize_distribution(detail_df: pd.DataFrame) -> dict:
|
| 689 |
idx_col = "Indeks_Final_0_100" if (detail_df is not None and "Indeks_Final_0_100" in detail_df.columns) else "Indeks_Real_0_100"
|
| 690 |
if detail_df is None or detail_df.empty or idx_col not in detail_df.columns:
|
| 691 |
return {"idx_col": idx_col, "all": {}, "by_type": {}}
|
| 692 |
|
|
|
|
|
|
|
| 693 |
def stats_for(s: pd.Series):
|
| 694 |
s = pd.to_numeric(s, errors="coerce").dropna()
|
| 695 |
if len(s) == 0:
|
|
|
|
| 698 |
return {
|
| 699 |
"n": int(len(s)),
|
| 700 |
"mean": float(s.mean()),
|
|
|
|
| 701 |
"min": float(s.min()),
|
| 702 |
"q1": float(q1),
|
| 703 |
"median": float(q2),
|
|
|
|
| 705 |
"max": float(s.max()),
|
| 706 |
}
|
| 707 |
|
| 708 |
+
out = {"idx_col": idx_col, "all": stats_for(detail_df[idx_col]), "by_type": {}}
|
|
|
|
| 709 |
if "_dataset" in detail_df.columns:
|
| 710 |
+
for ds in ["sekolah","umum","khusus"]:
|
| 711 |
+
out["by_type"][ds] = stats_for(detail_df.loc[detail_df["_dataset"] == ds, idx_col])
|
|
|
|
|
|
|
| 712 |
return out
|
| 713 |
|
| 714 |
|
| 715 |
+
def generate_rule_based_analytics(detail_df: pd.DataFrame, agg_df: pd.DataFrame, verif_df: pd.DataFrame, wilayah: str, kew: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 716 |
dist = summarize_distribution(detail_df)
|
| 717 |
+
idx_col = dist["idx_col"]
|
| 718 |
+
st = dist.get("all", {})
|
| 719 |
+
lines = []
|
| 720 |
+
lines.append("## Analisis Otomatis (Fallback Rule-based)\n")
|
| 721 |
+
lines.append(f"Wilayah: {wilayah} | Kewenangan: {kew}")
|
| 722 |
+
lines.append(f"Indeks yang digunakan: {idx_col} (FINAL setelah penalti 68% bila berlaku)")
|
| 723 |
+
if st:
|
| 724 |
+
lines.append(f"- Jumlah sampel: {st['n']}")
|
| 725 |
+
lines.append(f"- Rata-rata indeks: {st['mean']:.2f}")
|
| 726 |
+
lines.append(f"- Q1/Median/Q3: {st['q1']:.2f} / {st['median']:.2f} / {st['q3']:.2f}")
|
| 727 |
+
lines.append(f"- MinimumβMaksimum: {st['min']:.2f} β {st['max']:.2f}")
|
| 728 |
+
lines.append("\nCatatan kebijakan: nilai indeks FINAL akan terdampak bila coverage sampel belum mencapai 68% pada perpustakaan sekolah/umum.")
|
| 729 |
+
lines.append("Rekomendasi umum: percepat peningkatan cakupan pelaporan, serta fokus pembinaan pada sub-dimensi yang paling tertinggal.")
|
| 730 |
+
return "\n".join(lines)
|
| 731 |
+
|
| 732 |
+
|
| 733 |
+
def generate_llm_analytics(detail_df: pd.DataFrame, agg_df: pd.DataFrame, verif_df: pd.DataFrame, wilayah: str, kew: str) -> str:
|
| 734 |
+
dist = summarize_distribution(detail_df)
|
| 735 |
+
idx_col = dist["idx_col"]
|
| 736 |
+
st_all = dist.get("all", {})
|
| 737 |
by_type = dist.get("by_type", {})
|
| 738 |
|
| 739 |
+
client = get_llm_client()
|
| 740 |
+
if (client is None) or (not USE_LLM):
|
| 741 |
+
return generate_rule_based_analytics(detail_df, agg_df, verif_df, wilayah, kew)
|
| 742 |
+
|
| 743 |
+
def fmt(st):
|
| 744 |
+
if not st:
|
| 745 |
return "(tidak tersedia)"
|
| 746 |
+
return f"n={st['n']}, mean={st['mean']:.2f}, min={st['min']:.2f}, Q1={st['q1']:.2f}, median={st['median']:.2f}, Q3={st['q3']:.2f}, max={st['max']:.2f}"
|
|
|
|
|
|
|
|
|
|
| 747 |
|
| 748 |
lines = []
|
| 749 |
lines.append(f"Wilayah: {wilayah}")
|
| 750 |
+
lines.append(f"Kewenangan: {kew}")
|
| 751 |
+
lines.append(f"Indeks: {idx_col} (0β100)")
|
| 752 |
+
lines.append(f"Distribusi keseluruhan: {fmt(st_all)}")
|
| 753 |
+
for ds in ["sekolah","umum","khusus"]:
|
| 754 |
+
if ds in by_type:
|
| 755 |
+
lines.append(f"Distribusi {ds}: {fmt(by_type[ds])}")
|
|
|
|
|
|
|
| 756 |
|
| 757 |
+
# ringkas table utk prompt
|
| 758 |
+
agg_txt = agg_df.to_string(index=False) if agg_df is not None and not agg_df.empty else "(kosong)"
|
| 759 |
+
ver_txt = verif_df.head(12).to_string(index=False) if verif_df is not None and not verif_df.empty else "(kosong)"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
|
| 761 |
system_prompt = (
|
| 762 |
+
"Anda adalah analis data dan kebijakan perpustakaan. "
|
| 763 |
+
"Tuliskan analisis resmi berbasis data untuk pemangku kepentingan pemerintah daerah."
|
|
|
|
| 764 |
)
|
| 765 |
|
| 766 |
user_prompt = f"""
|
| 767 |
+
DATA RINGKAS IPLM:
|
| 768 |
|
| 769 |
+
STATISTIK DISTRIBUSI (INDeks FINAL):
|
| 770 |
{chr(10).join(lines)}
|
| 771 |
|
| 772 |
+
TABEL AGREGAT (sub/dim + indeks final):
|
| 773 |
{agg_txt}
|
| 774 |
|
| 775 |
+
VERIFIKASI COVERAGE (kontrol mutu):
|
| 776 |
{ver_txt}
|
| 777 |
|
| 778 |
TUGAS:
|
| 779 |
+
Tulis analisis Bahasa Indonesia formal, struktur:
|
| 780 |
+
A) Ringkasan eksekutif (1 paragraf).
|
| 781 |
+
B) Diagnostik berbasis data (2β3 paragraf): variasi antar jenis perpustakaan, makna Q1/Median/Q3, implikasi penalti 68%.
|
| 782 |
+
C) Prioritas intervensi 12β18 bulan (1β2 paragraf).
|
| 783 |
+
D) Rekomendasi kebijakan 3β5 tahun (1β2 paragraf).
|
|
|
|
|
|
|
|
|
|
| 784 |
|
| 785 |
GAYA:
|
| 786 |
+
- Jangan pakai kata "rendah/sedang/tinggi". Pakai frasa netral: "ruang penguatan", "belum konsisten", dll.
|
| 787 |
+
- Jangan membuat angka baru di luar data.
|
|
|
|
| 788 |
"""
|
| 789 |
|
| 790 |
try:
|
| 791 |
resp = client.chat_completion(
|
| 792 |
model=LLM_MODEL_NAME,
|
| 793 |
messages=[
|
| 794 |
+
{"role":"system","content":system_prompt},
|
| 795 |
+
{"role":"user","content":user_prompt},
|
| 796 |
],
|
| 797 |
max_tokens=1200,
|
| 798 |
temperature=0.25,
|
|
|
|
| 800 |
)
|
| 801 |
text = resp.choices[0].message.content.strip()
|
| 802 |
if not text:
|
| 803 |
+
raise ValueError("Respon LLM kosong")
|
| 804 |
return text
|
| 805 |
+
except Exception:
|
| 806 |
+
return generate_rule_based_analytics(detail_df, agg_df, verif_df, wilayah, kew)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 807 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
|
| 809 |
+
def build_word_report(detail_df_full: pd.DataFrame,
|
| 810 |
+
agg_df: pd.DataFrame,
|
| 811 |
+
verif_df: pd.DataFrame,
|
| 812 |
+
wilayah: str,
|
| 813 |
+
kew: str,
|
| 814 |
+
analysis_text: str) -> str:
|
| 815 |
doc = Document()
|
| 816 |
doc.add_heading(f"Laporan Analisis IPLM (FINAL) β {wilayah}", level=1)
|
| 817 |
+
|
| 818 |
doc.add_paragraph(
|
| 819 |
+
"Dokumen ini menyajikan analisis Indeks IPLM FINAL (0β100) setelah penerapan penalti "
|
| 820 |
+
"kecukupan sampel 68% (untuk perpustakaan sekolah dan umum). Perpustakaan khusus tidak "
|
| 821 |
+
"dikenai penalti karena populasi pembanding belum baku secara nasional."
|
| 822 |
)
|
| 823 |
|
| 824 |
+
dist = summarize_distribution(detail_df_full)
|
| 825 |
+
idx_col = dist["idx_col"]
|
| 826 |
+
st = dist.get("all", {})
|
| 827 |
+
|
| 828 |
doc.add_heading("1. Ringkasan Statistik Indeks FINAL", level=2)
|
| 829 |
+
if st:
|
| 830 |
+
doc.add_paragraph(f"- Indeks: {idx_col}")
|
| 831 |
+
doc.add_paragraph(f"- Jumlah sampel: {st['n']}")
|
| 832 |
+
doc.add_paragraph(f"- Rata-rata: {st['mean']:.2f}")
|
| 833 |
+
doc.add_paragraph(f"- Q1 / Median / Q3: {st['q1']:.2f} / {st['median']:.2f} / {st['q3']:.2f}")
|
| 834 |
+
doc.add_paragraph(f"- MinimumβMaksimum: {st['min']:.2f} β {st['max']:.2f}")
|
|
|
|
|
|
|
| 835 |
else:
|
| 836 |
+
doc.add_paragraph("Statistik tidak tersedia.")
|
| 837 |
|
| 838 |
+
doc.add_heading("2. Agregat per Jenis Perpustakaan", level=2)
|
| 839 |
if agg_df is not None and not agg_df.empty:
|
| 840 |
+
t = doc.add_table(rows=1, cols=len(agg_df.columns))
|
|
|
|
| 841 |
for i, c in enumerate(agg_df.columns):
|
| 842 |
+
t.rows[0].cells[i].text = str(c)
|
| 843 |
+
for _, r in agg_df.iterrows():
|
| 844 |
+
row = t.add_row().cells
|
| 845 |
for i, c in enumerate(agg_df.columns):
|
| 846 |
+
row[i].text = str(r[c])
|
| 847 |
else:
|
| 848 |
+
doc.add_paragraph("Agregat tidak tersedia.")
|
| 849 |
|
| 850 |
+
doc.add_heading("3. Verifikasi Coverage (Kontrol Mutu)", level=2)
|
| 851 |
if verif_df is not None and not verif_df.empty:
|
| 852 |
v = verif_df.copy()
|
|
|
|
|
|
|
| 853 |
for c in v.columns:
|
| 854 |
if pd.api.types.is_numeric_dtype(v[c]):
|
| 855 |
+
v[c] = pd.to_numeric(v[c], errors="coerce").fillna(0).round(0).astype(int) # TANPA KOMA
|
| 856 |
+
t = doc.add_table(rows=1, cols=len(v.columns))
|
|
|
|
|
|
|
| 857 |
for i, c in enumerate(v.columns):
|
| 858 |
+
t.rows[0].cells[i].text = str(c)
|
| 859 |
+
for _, r in v.iterrows():
|
| 860 |
+
row = t.add_row().cells
|
| 861 |
for i, c in enumerate(v.columns):
|
| 862 |
+
row[i].text = str(r[c])
|
| 863 |
else:
|
| 864 |
+
doc.add_paragraph("Tidak ada tabel verifikasi untuk wilayah ini.")
|
| 865 |
|
| 866 |
+
doc.add_heading("4. Analisis Naratif Otomatis (LLM)", level=2)
|
| 867 |
+
for p in str(analysis_text).split("\n"):
|
| 868 |
+
if p.strip():
|
| 869 |
+
doc.add_paragraph(p.strip())
|
| 870 |
|
| 871 |
outpath = tempfile.mktemp(suffix=".docx")
|
| 872 |
doc.save(outpath)
|
| 873 |
return outpath
|
| 874 |
|
| 875 |
+
|
| 876 |
# ============================================================
|
| 877 |
+
# 10) CORE PIPELINE VIEW: AGREGAT + DETAIL + FILES + BELL CURVE
|
| 878 |
# ============================================================
|
| 879 |
|
| 880 |
+
def build_views_and_files(df_filtered_final: pd.DataFrame, wilayah: str, kew: str):
|
| 881 |
+
if df_filtered_final is None or df_filtered_final.empty:
|
| 882 |
+
return (pd.DataFrame(), pd.DataFrame(), None, None, None, None, None, None, None)
|
| 883 |
+
|
| 884 |
+
# DETAIL FULL (untuk analitik + Word)
|
| 885 |
+
# tampilkan final saja
|
| 886 |
+
keep_detail = []
|
| 887 |
+
if prov_col and prov_col in df_filtered_final.columns:
|
| 888 |
+
keep_detail.append(prov_col)
|
| 889 |
+
if kab_col and kab_col in df_filtered_final.columns:
|
| 890 |
+
keep_detail.append(kab_col)
|
| 891 |
+
if nama_col and nama_col in df_filtered_final.columns:
|
| 892 |
+
keep_detail.append(nama_col)
|
| 893 |
+
|
| 894 |
+
keep_detail += [
|
| 895 |
+
"_dataset",
|
| 896 |
+
"sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
|
| 897 |
+
"dim_kepatuhan","dim_kinerja",
|
| 898 |
+
"Indeks_Final_0_100",
|
| 899 |
+
]
|
| 900 |
+
keep_detail = [c for c in keep_detail if c in df_filtered_final.columns]
|
| 901 |
+
detail_full = df_filtered_final[keep_detail].copy()
|
| 902 |
+
|
| 903 |
+
# DETAIL VIEW UI: integer, tanpa koma
|
| 904 |
+
detail_view = detail_full.copy()
|
| 905 |
+
for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
|
| 906 |
+
if c in detail_view.columns:
|
| 907 |
+
detail_view[c] = (detail_view[c].fillna(0.0) * 100).round(0).astype(int) # 0β100
|
| 908 |
+
if "Indeks_Final_0_100" in detail_view.columns:
|
| 909 |
+
detail_view["Indeks_Final_0_100"] = pd.to_numeric(detail_view["Indeks_Final_0_100"], errors="coerce").fillna(0).round(0).astype(int)
|
| 910 |
+
|
| 911 |
+
# AGREGAT: rata-rata sub/dim (pakai skala 0β100 untuk mudah dibaca) + indeks final
|
| 912 |
rows = []
|
| 913 |
+
label_map = {"sekolah":"Perpustakaan Sekolah","umum":"Perpustakaan Umum","khusus":"Perpustakaan Khusus"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 914 |
for ds in ["sekolah","umum","khusus"]:
|
| 915 |
+
dsub = df_filtered_final[df_filtered_final["_dataset"] == ds].copy() if "_dataset" in df_filtered_final.columns else df_filtered_final.copy()
|
| 916 |
+
if dsub.empty:
|
| 917 |
+
rows.append({
|
| 918 |
+
"Jenis Perpustakaan": label_map.get(ds, ds),
|
| 919 |
+
"Jumlah Perpustakaan": 0,
|
| 920 |
+
"Sub_Koleksi(0-100)": 0,
|
| 921 |
+
"Sub_SDM(0-100)": 0,
|
| 922 |
+
"Sub_Pelayanan(0-100)": 0,
|
| 923 |
+
"Sub_Pengelolaan(0-100)": 0,
|
| 924 |
+
"Dim_Kepatuhan(0-100)": 0,
|
| 925 |
+
"Dim_Kinerja(0-100)": 0,
|
| 926 |
+
"Indeks_FINAL(0-100)": 0,
|
| 927 |
+
})
|
| 928 |
+
else:
|
| 929 |
+
rows.append({
|
| 930 |
+
"Jenis Perpustakaan": label_map.get(ds, ds),
|
| 931 |
+
"Jumlah Perpustakaan": int(len(dsub)),
|
| 932 |
+
"Sub_Koleksi(0-100)": int(round(100*dsub["sub_koleksi"].mean(skipna=True))) if "sub_koleksi" in dsub.columns else 0,
|
| 933 |
+
"Sub_SDM(0-100)": int(round(100*dsub["sub_sdm"].mean(skipna=True))) if "sub_sdm" in dsub.columns else 0,
|
| 934 |
+
"Sub_Pelayanan(0-100)": int(round(100*dsub["sub_pelayanan"].mean(skipna=True))) if "sub_pelayanan" in dsub.columns else 0,
|
| 935 |
+
"Sub_Pengelolaan(0-100)": int(round(100*dsub["sub_pengelolaan"].mean(skipna=True))) if "sub_pengelolaan" in dsub.columns else 0,
|
| 936 |
+
"Dim_Kepatuhan(0-100)": int(round(100*dsub["dim_kepatuhan"].mean(skipna=True))) if "dim_kepatuhan" in dsub.columns else 0,
|
| 937 |
+
"Dim_Kinerja(0-100)": int(round(100*dsub["dim_kinerja"].mean(skipna=True))) if "dim_kinerja" in dsub.columns else 0,
|
| 938 |
+
"Indeks_FINAL(0-100)": int(round(dsub["Indeks_Final_0_100"].mean(skipna=True))) if "Indeks_Final_0_100" in dsub.columns else 0,
|
| 939 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 940 |
|
| 941 |
+
agg_df = pd.DataFrame(rows)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
|
| 943 |
+
# FILES
|
| 944 |
tmpdir = tempfile.mkdtemp()
|
| 945 |
+
agg_path = os.path.join(tmpdir, f"IPLM_FINAL_Agregat_{slugify(wilayah)}_{slugify(kew)}.xlsx")
|
| 946 |
+
detail_path = os.path.join(tmpdir, f"IPLM_FINAL_Detail_{slugify(wilayah)}_{slugify(kew)}.xlsx")
|
| 947 |
+
raw_path = os.path.join(tmpdir, f"IPLM_FINAL_Raw_{slugify(wilayah)}_{slugify(kew)}.xlsx")
|
|
|
|
|
|
|
| 948 |
|
| 949 |
agg_df.to_excel(agg_path, index=False)
|
| 950 |
+
detail_view.to_excel(detail_path, index=False)
|
| 951 |
+
df_filtered_final.to_excel(raw_path, index=False)
|
| 952 |
+
|
| 953 |
+
# Bell curve pakai Indeks_Final_0_100
|
| 954 |
+
hover_name = nama_col if (nama_col and nama_col in detail_full.columns) else None
|
| 955 |
|
| 956 |
+
fig_all = make_bell_figure(detail_view, "Bell Curve Indeks FINAL β Semua Perpustakaan", "Indeks_Final_0_100", hover_name)
|
| 957 |
+
fig_sek = make_bell_figure(detail_view[detail_view["_dataset"]=="sekolah"], "Bell Curve Indeks FINAL β Perpustakaan Sekolah", "Indeks_Final_0_100", hover_name)
|
| 958 |
+
fig_um = make_bell_figure(detail_view[detail_view["_dataset"]=="umum"], "Bell Curve Indeks FINAL β Perpustakaan Umum", "Indeks_Final_0_100", hover_name)
|
| 959 |
+
fig_kh = make_bell_figure(detail_view[detail_view["_dataset"]=="khusus"], "Bell Curve Indeks FINAL β Perpustakaan Khusus", "Indeks_Final_0_100", hover_name)
|
| 960 |
|
| 961 |
+
return agg_df, detail_view, agg_path, detail_path, raw_path, detail_full, fig_all, fig_sek, fig_um, fig_kh
|
| 962 |
|
| 963 |
+
|
| 964 |
+
# ============================================================
|
| 965 |
+
# 11) RUN APP
|
| 966 |
# ============================================================
|
| 967 |
|
|
|
|
|
|
|
| 968 |
def run_app(prov_value, kab_value, kew_value):
|
| 969 |
+
if df_all is None or df_all.empty:
|
| 970 |
+
empty = pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 971 |
return (
|
| 972 |
+
empty, empty, empty,
|
| 973 |
+
None, None, None, None,
|
| 974 |
+
None, None, None, None,
|
| 975 |
+
"Data belum siap.",
|
| 976 |
+
"Analisis belum tersedia."
|
|
|
|
| 977 |
)
|
| 978 |
|
| 979 |
+
df = df_all.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 980 |
|
| 981 |
+
# filter
|
| 982 |
+
if prov_col and prov_value and prov_value != "(Semua)":
|
| 983 |
+
df = df[df[prov_col].astype(str).str.strip() == prov_value]
|
| 984 |
+
if kab_col and kab_value and kab_value != "(Semua)":
|
| 985 |
+
df = df[df[kab_col].astype(str).str.strip() == kab_value]
|
| 986 |
if kew_value and kew_value != "(Semua)":
|
| 987 |
df = df[df["KEW_NORM"] == kew_value]
|
| 988 |
|
| 989 |
+
if df.empty:
|
| 990 |
+
empty = pd.DataFrame()
|
| 991 |
return (
|
| 992 |
+
empty, empty, empty,
|
| 993 |
+
None, None, None, None,
|
| 994 |
+
None, None, None, None,
|
| 995 |
+
"Tidak ada data untuk filter ini.",
|
| 996 |
+
"Analisis belum tersedia."
|
|
|
|
| 997 |
)
|
| 998 |
|
| 999 |
+
wilayah = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "NASIONAL")
|
| 1000 |
+
kew = kew_value if (kew_value and kew_value != "(Semua)") else "SEMUA"
|
| 1001 |
|
| 1002 |
+
# Apply penalty -> FINAL
|
| 1003 |
+
df_final = apply_penalty_and_final(df, kew_value)
|
| 1004 |
|
| 1005 |
+
# Views + files + figs
|
| 1006 |
+
agg_df, detail_view, agg_path, detail_path, raw_path, detail_full, fig_all, fig_sek, fig_um, fig_kh = build_views_and_files(df_final, wilayah, kew)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1007 |
|
| 1008 |
+
# Verification (untuk Word)
|
| 1009 |
+
verif_df = compute_verification_table(df_final, kew_value)
|
| 1010 |
+
|
| 1011 |
+
# LLM analytics
|
| 1012 |
+
analysis_text = generate_llm_analytics(detail_full, agg_df, verif_df, wilayah, kew)
|
| 1013 |
+
|
| 1014 |
+
# Word
|
| 1015 |
+
word_path = build_word_report(detail_full, agg_df, verif_df, wilayah, kew, analysis_text)
|
| 1016 |
+
|
| 1017 |
+
msg = f"β
Berhasil dihitung: {len(detail_view)} perpustakaan | Output dibulatkan (tanpa koma)."
|
| 1018 |
|
|
|
|
| 1019 |
return (
|
| 1020 |
agg_df,
|
| 1021 |
+
detail_view,
|
| 1022 |
+
verif_df, # verif tetap ditampilkan (kalau kamu mau sembunyiin di UI, tinggal remove komponen UI-nya)
|
| 1023 |
agg_path,
|
| 1024 |
detail_path,
|
| 1025 |
raw_path,
|
| 1026 |
+
word_path,
|
| 1027 |
fig_all,
|
| 1028 |
+
fig_sek,
|
| 1029 |
+
fig_um,
|
| 1030 |
+
fig_kh,
|
| 1031 |
msg,
|
| 1032 |
+
analysis_text,
|
| 1033 |
)
|
| 1034 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1035 |
|
| 1036 |
def on_prov_change(prov_value):
|
| 1037 |
+
return gr.update(choices=get_kab_choices_for_prov(prov_value), value="(Semua)")
|
| 1038 |
+
|
| 1039 |
|
| 1040 |
# ============================================================
|
| 1041 |
+
# 12) UI GRADIO
|
| 1042 |
# ============================================================
|
| 1043 |
|
| 1044 |
with gr.Blocks() as demo:
|
| 1045 |
gr.Markdown(
|
| 1046 |
f"""
|
| 1047 |
+
# IPLM 2025 (FINAL) β Indeks FINAL + Penalti Coverage 68% + Bell Curve + Word (LLM)
|
| 1048 |
+
|
| 1049 |
+
Sumber data:
|
| 1050 |
+
- **{DATA_FILE}**
|
| 1051 |
+
- **{POP_KAB_FILE}** (populasi Kab/Kota)
|
| 1052 |
+
- **{POP_PROV_FILE}** (populasi Provinsi)
|
| 1053 |
|
| 1054 |
{DATA_INFO}
|
| 1055 |
+
|
| 1056 |
+
Catatan:
|
| 1057 |
+
- Penalti coverage 68% diterapkan untuk **Sekolah & Umum**.
|
| 1058 |
+
- **Khusus tidak dikenai penalti** (bobot=1).
|
| 1059 |
+
- Output tabel dibulatkan **tanpa angka koma**.
|
| 1060 |
"""
|
| 1061 |
)
|
| 1062 |
|
| 1063 |
with gr.Row():
|
| 1064 |
dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0])
|
| 1065 |
+
dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value=kab_choices[0])
|
| 1066 |
dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew)
|
| 1067 |
|
| 1068 |
dd_prov.change(fn=on_prov_change, inputs=dd_prov, outputs=dd_kab)
|
|
|
|
| 1070 |
run_btn = gr.Button("Jalankan Perhitungan")
|
| 1071 |
msg_out = gr.Markdown()
|
| 1072 |
|
| 1073 |
+
gr.Markdown("## Agregat (Sub/Dimensi + Indeks FINAL)")
|
| 1074 |
+
agg_df_out = gr.DataFrame(interactive=False)
|
| 1075 |
+
|
| 1076 |
+
gr.Markdown("## Detail (Sub/Dimensi + Indeks FINAL)")
|
| 1077 |
+
detail_df_out = gr.DataFrame(interactive=False)
|
| 1078 |
+
|
| 1079 |
+
gr.Markdown("## Verifikasi Coverage (Kontrol Mutu)")
|
| 1080 |
+
verif_df_out = gr.DataFrame(interactive=False)
|
| 1081 |
+
|
| 1082 |
+
gr.Markdown("## Bell Curve Indeks FINAL β Semua")
|
| 1083 |
+
bell_all_out = gr.Plot()
|
| 1084 |
|
| 1085 |
+
gr.Markdown("## Bell Curve Indeks FINAL β Sekolah")
|
| 1086 |
+
bell_sekolah_out = gr.Plot()
|
| 1087 |
|
| 1088 |
+
gr.Markdown("## Bell Curve Indeks FINAL β Umum")
|
| 1089 |
+
bell_umum_out = gr.Plot()
|
| 1090 |
|
| 1091 |
+
gr.Markdown("## Bell Curve Indeks FINAL β Khusus")
|
| 1092 |
+
bell_khusus_out = gr.Plot()
|
| 1093 |
|
| 1094 |
+
gr.Markdown("## Analisis Otomatis (LLM)")
|
| 1095 |
+
analysis_out = gr.Markdown()
|
|
|
|
|
|
|
| 1096 |
|
| 1097 |
with gr.Row():
|
| 1098 |
+
agg_file_out = gr.File(label="Download Agregat (.xlsx)")
|
| 1099 |
+
detail_file_out = gr.File(label="Download Detail (.xlsx)")
|
| 1100 |
+
raw_file_out = gr.File(label="Download Raw (.xlsx)")
|
| 1101 |
+
word_file_out = gr.File(label="Download Analisis Word (LLM) (.docx)")
|
| 1102 |
|
| 1103 |
run_btn.click(
|
| 1104 |
+
fn=run_app,
|
| 1105 |
inputs=[dd_prov, dd_kab, dd_kew],
|
| 1106 |
+
outputs=[
|
| 1107 |
+
agg_df_out,
|
| 1108 |
+
detail_df_out,
|
| 1109 |
+
verif_df_out,
|
| 1110 |
+
agg_file_out,
|
| 1111 |
+
detail_file_out,
|
| 1112 |
+
raw_file_out,
|
| 1113 |
+
word_file_out,
|
| 1114 |
+
bell_all_out,
|
| 1115 |
+
bell_sekolah_out,
|
| 1116 |
+
bell_umum_out,
|
| 1117 |
+
bell_khusus_out,
|
| 1118 |
+
msg_out,
|
| 1119 |
+
analysis_out,
|
| 1120 |
+
],
|
| 1121 |
)
|
| 1122 |
|
| 1123 |
demo.launch()
|