Update app.py
Browse files
app.py
CHANGED
|
@@ -1,23 +1,27 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
IPLM 2025 β Final (Target Sampel 33.88% per Jenis) β TANPA Kinerja Relatif / Percentile
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"""
|
| 22 |
|
| 23 |
import os
|
|
@@ -27,7 +31,6 @@ import json
|
|
| 27 |
import math
|
| 28 |
import tempfile
|
| 29 |
from pathlib import Path
|
| 30 |
-
from datetime import datetime
|
| 31 |
|
| 32 |
import gradio as gr
|
| 33 |
import numpy as np
|
|
@@ -35,7 +38,7 @@ import pandas as pd
|
|
| 35 |
import plotly.graph_objects as go
|
| 36 |
from sklearn.preprocessing import PowerTransformer
|
| 37 |
|
| 38 |
-
# python-docx (
|
| 39 |
DOCX_AVAILABLE = True
|
| 40 |
try:
|
| 41 |
from docx import Document
|
|
@@ -46,7 +49,7 @@ except Exception:
|
|
| 46 |
DOCX_AVAILABLE = False
|
| 47 |
Document = None
|
| 48 |
|
| 49 |
-
# huggingface client (opsional
|
| 50 |
HF_AVAILABLE = True
|
| 51 |
try:
|
| 52 |
from huggingface_hub import InferenceClient
|
|
@@ -78,13 +81,6 @@ HF_TOKEN = (
|
|
| 78 |
or os.getenv("HF_API_TOKEN")
|
| 79 |
)
|
| 80 |
|
| 81 |
-
# Prediksi heuristik (bisa di-tuning)
|
| 82 |
-
PRED_BASE_DRIFT_DEFAULT = float(os.getenv("PRED_BASE_DRIFT_DEFAULT", "0.0")) # drift baseline default (0 = konservatif)
|
| 83 |
-
PRED_INTERVAL_MIN = float(os.getenv("PRED_INTERVAL_MIN", "0.75")) # minimal half-width interval
|
| 84 |
-
PRED_INTERVAL_MAX = float(os.getenv("PRED_INTERVAL_MAX", "4.00")) # maksimal half-width interval
|
| 85 |
-
PRED_SCENARIO_DELTA_SUB_MIN = float(os.getenv("PRED_SCENARIO_DELTA_SUB_MIN", "0.03"))
|
| 86 |
-
PRED_SCENARIO_DELTA_SUB_MAX = float(os.getenv("PRED_SCENARIO_DELTA_SUB_MAX", "0.07"))
|
| 87 |
-
|
| 88 |
|
| 89 |
# ============================================================
|
| 90 |
# 2) UTIL
|
|
@@ -209,22 +205,6 @@ def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
|
|
| 209 |
n_total = 0.0
|
| 210 |
return float(min(float(n_total) / float(target_total), 1.0))
|
| 211 |
|
| 212 |
-
def _to_float(x, default=0.0):
|
| 213 |
-
try:
|
| 214 |
-
if x is None:
|
| 215 |
-
return float(default)
|
| 216 |
-
if isinstance(x, float) and math.isnan(x):
|
| 217 |
-
return float(default)
|
| 218 |
-
return float(x)
|
| 219 |
-
except Exception:
|
| 220 |
-
return float(default)
|
| 221 |
-
|
| 222 |
-
def _clamp(x, lo, hi):
|
| 223 |
-
try:
|
| 224 |
-
return max(lo, min(hi, float(x)))
|
| 225 |
-
except Exception:
|
| 226 |
-
return lo
|
| 227 |
-
|
| 228 |
|
| 229 |
# ============================================================
|
| 230 |
# 3) INDIKATOR IPLM
|
|
@@ -303,7 +283,6 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
|
|
| 303 |
|
| 304 |
df = df_src.copy()
|
| 305 |
|
| 306 |
-
# rename indicator columns
|
| 307 |
rename_map = {}
|
| 308 |
for col in df.columns:
|
| 309 |
c = _canon(col)
|
|
@@ -317,12 +296,10 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
|
|
| 317 |
if rename_map:
|
| 318 |
df = df.rename(columns=rename_map)
|
| 319 |
|
| 320 |
-
# coerce numeric for available indicators
|
| 321 |
available = [c for c in all_indicators if c in df.columns]
|
| 322 |
for c in available:
|
| 323 |
df[c] = df[c].apply(coerce_num)
|
| 324 |
|
| 325 |
-
# Yeo-Johnson transform then MinMax
|
| 326 |
for c in available:
|
| 327 |
x = pd.to_numeric(df[c], errors="coerce").astype(float).values
|
| 328 |
mask = ~np.isnan(x)
|
|
@@ -334,17 +311,14 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
|
|
| 334 |
transformed[mask] = x[mask]
|
| 335 |
df[f"norm_{c}"] = minmax_norm(pd.Series(transformed, index=df.index))
|
| 336 |
|
| 337 |
-
# sub dimensions
|
| 338 |
df["sub_koleksi"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in koleksi_cols if c in available]), axis=1)
|
| 339 |
df["sub_sdm"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in sdm_cols if c in available]), axis=1)
|
| 340 |
df["sub_pelayanan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pelayanan_cols if c in available]), axis=1)
|
| 341 |
df["sub_pengelolaan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pengelolaan_cols if c in available]), axis=1)
|
| 342 |
|
| 343 |
-
# dimensions
|
| 344 |
df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1)
|
| 345 |
df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1)
|
| 346 |
|
| 347 |
-
# base index 0β100
|
| 348 |
df["Indeks_Dasar_0_100"] = 100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"])
|
| 349 |
|
| 350 |
for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Dasar_0_100"]:
|
|
@@ -463,7 +437,6 @@ def load_default_files(force=False):
|
|
| 463 |
df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
|
| 464 |
df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
|
| 465 |
|
| 466 |
-
# Dedup row key
|
| 467 |
if nama_col and nama_col in df_raw.columns:
|
| 468 |
kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
|
| 469 |
else:
|
|
@@ -711,11 +684,6 @@ def build_agg_wilayah_jenis(df_filtered, faktor_wilayah_jenis, kew_value):
|
|
| 711 |
|
| 712 |
if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
|
| 713 |
agg["faktor_penyesuaian_jenis"] = 1.0
|
| 714 |
-
agg["target_total_33_88_jenis"] = 0
|
| 715 |
-
agg["pop_total_jenis"] = 0
|
| 716 |
-
agg["coverage_jenis_%"] = 0.0
|
| 717 |
-
agg["gap_target33_88_jenis"] = 0
|
| 718 |
-
agg["n_jenis"] = agg["Jumlah"].copy()
|
| 719 |
else:
|
| 720 |
fw = faktor_wilayah_jenis.copy()
|
| 721 |
fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
|
|
@@ -737,7 +705,6 @@ def build_agg_wilayah_jenis(df_filtered, faktor_wilayah_jenis, kew_value):
|
|
| 737 |
]:
|
| 738 |
if c in agg.columns:
|
| 739 |
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(3)
|
| 740 |
-
|
| 741 |
for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100"]:
|
| 742 |
if c in agg.columns:
|
| 743 |
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
|
|
@@ -750,7 +717,7 @@ def build_agg_wilayah_jenis(df_filtered, faktor_wilayah_jenis, kew_value):
|
|
| 750 |
# 8) AGREGAT WILAYAH (KESELURUHAN) β avg3 dari 3 jenis
|
| 751 |
# ============================================================
|
| 752 |
|
| 753 |
-
def build_agg_wilayah_total_from_jenis(agg_jenis, kew_value):
|
| 754 |
if agg_jenis is None or agg_jenis.empty:
|
| 755 |
return pd.DataFrame()
|
| 756 |
|
|
@@ -774,7 +741,6 @@ def build_agg_wilayah_total_from_jenis(agg_jenis, kew_value):
|
|
| 774 |
|
| 775 |
full = full.merge(a[["group_key", label_name, "Jenis"] + cols_present],
|
| 776 |
on=["group_key", label_name, "Jenis"], how="left")
|
| 777 |
-
|
| 778 |
for c in cols_present:
|
| 779 |
full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
|
| 780 |
|
|
@@ -975,6 +941,7 @@ def build_verif_jenis(faktor_wilayah_jenis, kew_value):
|
|
| 975 |
"pop_total_jenis", "target_total_33_88_jenis", "n_jenis",
|
| 976 |
"coverage_jenis_%", "faktor_penyesuaian_jenis", "gap_target33_88_jenis"
|
| 977 |
] if c in out.columns]
|
|
|
|
| 978 |
out = out[keep].copy()
|
| 979 |
|
| 980 |
for c in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
|
|
@@ -1144,22 +1111,60 @@ def get_llm_client():
|
|
| 1144 |
_HF_CLIENT = None
|
| 1145 |
return None
|
| 1146 |
|
| 1147 |
-
def
|
| 1148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1149 |
return {"sekolah": 0, "umum": 0, "khusus": 0, "total": 0}
|
|
|
|
| 1150 |
a = agg_jenis_full.copy()
|
|
|
|
|
|
|
|
|
|
| 1151 |
a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
|
| 1152 |
if "Jumlah" in a.columns:
|
| 1153 |
a["Jumlah"] = pd.to_numeric(a["Jumlah"], errors="coerce").fillna(0).astype(int)
|
| 1154 |
else:
|
| 1155 |
a["Jumlah"] = 0
|
| 1156 |
-
|
|
|
|
|
|
|
|
|
|
| 1157 |
out["total"] = int(out["sekolah"] + out["umum"] + out["khusus"])
|
| 1158 |
return out
|
| 1159 |
|
| 1160 |
def build_interpretasi_table_values(agg_total, wilayah_label, target_ratio):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1161 |
if agg_total is None or agg_total.empty:
|
| 1162 |
-
base = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1163 |
else:
|
| 1164 |
a = agg_total.copy()
|
| 1165 |
cols_needed = [
|
|
@@ -1176,6 +1181,7 @@ def build_interpretasi_table_values(agg_total, wilayah_label, target_ratio):
|
|
| 1176 |
a[c] = pd.to_numeric(a[c], errors="coerce").fillna(0.0)
|
| 1177 |
else:
|
| 1178 |
a[c] = 0.0
|
|
|
|
| 1179 |
base = {
|
| 1180 |
"kepatuhan": float(a["Rata2_dim_kepatuhan"].mean()),
|
| 1181 |
"koleksi": float(a["Rata2_sub_koleksi"].mean()),
|
|
@@ -1186,6 +1192,8 @@ def build_interpretasi_table_values(agg_total, wilayah_label, target_ratio):
|
|
| 1186 |
"iplm": float(a["Indeks_Final_Wilayah_0_100"].mean()),
|
| 1187 |
}
|
| 1188 |
|
|
|
|
|
|
|
| 1189 |
base_disp = {
|
| 1190 |
"kepatuhan": round(_to_float(base["kepatuhan"]), 3),
|
| 1191 |
"koleksi": round(_to_float(base["koleksi"]), 3),
|
|
@@ -1213,6 +1221,14 @@ def build_interpretasi_table_values(agg_total, wilayah_label, target_ratio):
|
|
| 1213 |
return header, rows
|
| 1214 |
|
| 1215 |
def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, jumlah_perpus_by_jenis):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
client = get_llm_client()
|
| 1217 |
if client is None or (not USE_LLM):
|
| 1218 |
out = []
|
|
@@ -1227,7 +1243,8 @@ def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, ju
|
|
| 1227 |
"jumlah_perpustakaan_sumber_data": jumlah_perpus_by_jenis,
|
| 1228 |
"catatan_skala": (
|
| 1229 |
"Baris Kepatuhan/Koleksi/Tenaga/Kinerja/Pelayanan/Pengelolaan memakai nilai agregat 'apa adanya' "
|
| 1230 |
-
"(umumnya rentang 0β1
|
|
|
|
| 1231 |
),
|
| 1232 |
"baris": rows
|
| 1233 |
}
|
|
@@ -1236,12 +1253,18 @@ def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, ju
|
|
| 1236 |
"Anda adalah analis kebijakan perpustakaan di Indonesia.\n"
|
| 1237 |
"Tugas: isi kolom Interpretasi dan Rekomendasi untuk setiap baris tabel.\n"
|
| 1238 |
"ATURAN WAJIB:\n"
|
| 1239 |
-
"1) Jangan mengubah nilai angka.\n"
|
| 1240 |
-
"2) Netral-deskriptif: dilarang memakai label normatif seperti baik/buruk, tinggi/sedang/rendah, memuaskan/kurang.\n"
|
| 1241 |
-
"3) Interpretasi harus nyambung langsung ke angka dan relasinya antardimensi: lebih besar/kecil, selisih, gap, dominan, konsisten.\n"
|
| 1242 |
-
"4)
|
| 1243 |
-
"
|
| 1244 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1245 |
)
|
| 1246 |
|
| 1247 |
user = (
|
|
@@ -1252,14 +1275,17 @@ def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, ju
|
|
| 1252 |
" ]\n"
|
| 1253 |
"}\n"
|
| 1254 |
"- Urutan dan jumlah baris harus sama.\n"
|
| 1255 |
-
"- 'Rekomendasi' boleh bullet '-' dalam satu string.\n\n"
|
| 1256 |
f"INPUT:\n{json.dumps(payload, ensure_ascii=False)}"
|
| 1257 |
)
|
| 1258 |
|
| 1259 |
try:
|
| 1260 |
resp = client.chat_completion(
|
| 1261 |
model=LLM_MODEL_NAME,
|
| 1262 |
-
messages=[
|
|
|
|
|
|
|
|
|
|
| 1263 |
max_tokens=1100,
|
| 1264 |
temperature=0.2,
|
| 1265 |
top_p=0.9,
|
|
@@ -1274,7 +1300,7 @@ def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, ju
|
|
| 1274 |
cleaned.append({
|
| 1275 |
"No": str(r.get("No", rows[i]["No"])),
|
| 1276 |
"Dimensi": str(r.get("Dimensi", rows[i]["Dimensi"])),
|
| 1277 |
-
"Nilai": rows[i]["Nilai"],
|
| 1278 |
"Interpretasi": str(r.get("Interpretasi","") or ""),
|
| 1279 |
"Rekomendasi": str(r.get("Rekomendasi","") or ""),
|
| 1280 |
})
|
|
@@ -1285,6 +1311,7 @@ def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, ju
|
|
| 1285 |
out.append({k: r.get(k) for k in ["No","Dimensi","Nilai"]} | {"Interpretasi":"", "Rekomendasi":""})
|
| 1286 |
return out, f"LLM error: {repr(e)}"
|
| 1287 |
|
|
|
|
| 1288 |
def _set_cell_shading(cell, fill_hex="1F1F1F"):
|
| 1289 |
tcPr = cell._tc.get_or_add_tcPr()
|
| 1290 |
shd = OxmlElement("w:shd")
|
|
@@ -1317,7 +1344,7 @@ def _set_table_borders(table):
|
|
| 1317 |
tblBorders.append(elem)
|
| 1318 |
tblPr.append(tblBorders)
|
| 1319 |
|
| 1320 |
-
def generate_word_table_interpretasi(header, rows_filled, wilayah_label, jumlah_perpus_by_jenis
|
| 1321 |
if (not DOCX_AVAILABLE) or (Document is None):
|
| 1322 |
return None
|
| 1323 |
|
|
@@ -1350,6 +1377,10 @@ def generate_word_table_interpretasi(header, rows_filled, wilayah_label, jumlah_
|
|
| 1350 |
row_cells = table.add_row().cells
|
| 1351 |
row_cells[0].text = str(r.get("No",""))
|
| 1352 |
row_cells[1].text = str(r.get("Dimensi",""))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1353 |
try:
|
| 1354 |
dim = str(r.get("Dimensi","")).strip().lower()
|
| 1355 |
val = _to_float(r.get("Nilai", 0.0), 0.0)
|
|
@@ -1359,6 +1390,7 @@ def generate_word_table_interpretasi(header, rows_filled, wilayah_label, jumlah_
|
|
| 1359 |
row_cells[2].text = f"{val:.3f}"
|
| 1360 |
except Exception:
|
| 1361 |
row_cells[2].text = str(r.get("Nilai",""))
|
|
|
|
| 1362 |
row_cells[3].text = str(r.get("Interpretasi","") or "")
|
| 1363 |
row_cells[4].text = str(r.get("Rekomendasi","") or "")
|
| 1364 |
|
|
@@ -1366,8 +1398,8 @@ def generate_word_table_interpretasi(header, rows_filled, wilayah_label, jumlah_
|
|
| 1366 |
_set_cell_shading(c, "262626")
|
| 1367 |
_set_cell_text_color(c, "FFFFFF")
|
| 1368 |
|
| 1369 |
-
#
|
| 1370 |
-
doc.add_paragraph("")
|
| 1371 |
j = jumlah_perpus_by_jenis or {"sekolah":0,"umum":0,"khusus":0,"total":0}
|
| 1372 |
p = doc.add_paragraph()
|
| 1373 |
p.add_run("Sumber data (jumlah perpustakaan pada tabel agregat wilayah Γ jenis): ").bold = True
|
|
@@ -1378,419 +1410,13 @@ def generate_word_table_interpretasi(header, rows_filled, wilayah_label, jumlah_
|
|
| 1378 |
f"total = {int(j.get('total',0))}."
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
-
# Insight Pack tambahan
|
| 1382 |
-
if insight_pack is not None and isinstance(insight_pack, dict) and insight_pack:
|
| 1383 |
-
doc.add_paragraph("")
|
| 1384 |
-
h = doc.add_paragraph()
|
| 1385 |
-
rr = h.add_run("Insight Pack (DeteksiβDiagnosaβPreskripsiβPrediksi)")
|
| 1386 |
-
rr.bold = True
|
| 1387 |
-
rr.font.size = Pt(14)
|
| 1388 |
-
|
| 1389 |
-
# Deteksi
|
| 1390 |
-
det = insight_pack.get("deteksi", {})
|
| 1391 |
-
if det:
|
| 1392 |
-
doc.add_paragraph("Deteksi", style=None)
|
| 1393 |
-
for k, v in det.items():
|
| 1394 |
-
doc.add_paragraph(f"- {k}: {v}")
|
| 1395 |
-
|
| 1396 |
-
# Diagnosa
|
| 1397 |
-
diag = insight_pack.get("diagnosa", {})
|
| 1398 |
-
if diag:
|
| 1399 |
-
doc.add_paragraph("Diagnosa", style=None)
|
| 1400 |
-
for k, v in diag.items():
|
| 1401 |
-
doc.add_paragraph(f"- {k}: {v}")
|
| 1402 |
-
|
| 1403 |
-
# Preskripsi
|
| 1404 |
-
pres = insight_pack.get("preskripsi", [])
|
| 1405 |
-
if pres:
|
| 1406 |
-
doc.add_paragraph("Preskripsi", style=None)
|
| 1407 |
-
for it in pres:
|
| 1408 |
-
doc.add_paragraph(f"- {it}")
|
| 1409 |
-
|
| 1410 |
-
# Prediksi
|
| 1411 |
-
pred = insight_pack.get("prediksi", {})
|
| 1412 |
-
if pred:
|
| 1413 |
-
doc.add_paragraph("Prediksi (Tahun Depan)", style=None)
|
| 1414 |
-
for k, v in pred.items():
|
| 1415 |
-
doc.add_paragraph(f"- {k}: {v}")
|
| 1416 |
-
|
| 1417 |
outpath = tempfile.mktemp(suffix=".docx")
|
| 1418 |
doc.save(outpath)
|
| 1419 |
return outpath
|
| 1420 |
|
| 1421 |
|
| 1422 |
# ============================================================
|
| 1423 |
-
# 15)
|
| 1424 |
-
# ============================================================
|
| 1425 |
-
|
| 1426 |
-
def _fmt3(x):
|
| 1427 |
-
try:
|
| 1428 |
-
return f"{float(x):.3f}"
|
| 1429 |
-
except Exception:
|
| 1430 |
-
return "NA"
|
| 1431 |
-
|
| 1432 |
-
def _fmt2(x):
|
| 1433 |
-
try:
|
| 1434 |
-
return f"{float(x):.2f}"
|
| 1435 |
-
except Exception:
|
| 1436 |
-
return "NA"
|
| 1437 |
-
|
| 1438 |
-
def compute_coverage_risk(verif_total: pd.DataFrame) -> dict:
|
| 1439 |
-
"""
|
| 1440 |
-
Ringkas risiko coverage:
|
| 1441 |
-
- coverage_min / coverage_mean (3 jenis)
|
| 1442 |
-
- gap_target_sum
|
| 1443 |
-
- faktor_penyesuaian_mean
|
| 1444 |
-
"""
|
| 1445 |
-
if verif_total is None or verif_total.empty:
|
| 1446 |
-
return {
|
| 1447 |
-
"coverage_min": 0.0,
|
| 1448 |
-
"coverage_mean": 0.0,
|
| 1449 |
-
"gap_target_sum": 0,
|
| 1450 |
-
"faktor_mean": 1.0,
|
| 1451 |
-
}
|
| 1452 |
-
|
| 1453 |
-
v = verif_total.copy()
|
| 1454 |
-
v["Jenis"] = v.get("Jenis", "").astype(str).str.lower().str.strip()
|
| 1455 |
-
v = v[v["Jenis"].isin(["sekolah","umum","khusus"])].copy()
|
| 1456 |
-
|
| 1457 |
-
cov = pd.to_numeric(v.get("coverage_jenis_%", 0.0), errors="coerce").fillna(0.0)
|
| 1458 |
-
gap = pd.to_numeric(v.get("gap_target33_88_jenis", 0), errors="coerce").fillna(0).astype(int)
|
| 1459 |
-
fct = pd.to_numeric(v.get("faktor_penyesuaian_jenis", 1.0), errors="coerce").fillna(1.0)
|
| 1460 |
-
|
| 1461 |
-
return {
|
| 1462 |
-
"coverage_min": float(cov.min()) if len(cov) else 0.0,
|
| 1463 |
-
"coverage_mean": float(cov.mean()) if len(cov) else 0.0,
|
| 1464 |
-
"gap_target_sum": int(gap.sum()) if len(gap) else 0,
|
| 1465 |
-
"faktor_mean": float(fct.mean()) if len(fct) else 1.0,
|
| 1466 |
-
}
|
| 1467 |
-
|
| 1468 |
-
def detect_signals_from_agg_total(agg_total: pd.DataFrame) -> dict:
|
| 1469 |
-
"""
|
| 1470 |
-
Deteksi sinyal dimensi dari agg_total:
|
| 1471 |
-
- gap_dim (kinerja - kepatuhan)
|
| 1472 |
-
- gap internal kepatuhan (koleksi - sdm)
|
| 1473 |
-
- gap internal kinerja (pelayanan - pengelolaan)
|
| 1474 |
-
"""
|
| 1475 |
-
if agg_total is None or agg_total.empty:
|
| 1476 |
-
return {
|
| 1477 |
-
"kepatuhan": 0.0, "kinerja": 0.0,
|
| 1478 |
-
"koleksi": 0.0, "sdm": 0.0,
|
| 1479 |
-
"pelayanan": 0.0, "pengelolaan": 0.0,
|
| 1480 |
-
"gap_dim": 0.0,
|
| 1481 |
-
"gap_koleksi_sdm": 0.0,
|
| 1482 |
-
"gap_pelayanan_pengelolaan": 0.0,
|
| 1483 |
-
"iplm_final": 0.0,
|
| 1484 |
-
"n_total": 0,
|
| 1485 |
-
}
|
| 1486 |
-
|
| 1487 |
-
a = agg_total.copy()
|
| 1488 |
-
|
| 1489 |
-
def _col_mean(name, default=0.0):
|
| 1490 |
-
if name not in a.columns:
|
| 1491 |
-
return float(default)
|
| 1492 |
-
return float(pd.to_numeric(a[name], errors="coerce").fillna(default).mean())
|
| 1493 |
-
|
| 1494 |
-
kepatuhan = _col_mean("Rata2_dim_kepatuhan", 0.0)
|
| 1495 |
-
kinerja = _col_mean("Rata2_dim_kinerja", 0.0)
|
| 1496 |
-
koleksi = _col_mean("Rata2_sub_koleksi", 0.0)
|
| 1497 |
-
sdm = _col_mean("Rata2_sub_sdm", 0.0)
|
| 1498 |
-
pelayanan = _col_mean("Rata2_sub_pelayanan", 0.0)
|
| 1499 |
-
pengelolaan = _col_mean("Rata2_sub_pengelolaan", 0.0)
|
| 1500 |
-
iplm = _col_mean("Indeks_Final_Wilayah_0_100", 0.0)
|
| 1501 |
-
n_total = int(pd.to_numeric(a.get("n_total", 0), errors="coerce").fillna(0).sum()) if "n_total" in a.columns else int(0)
|
| 1502 |
-
|
| 1503 |
-
return {
|
| 1504 |
-
"kepatuhan": float(kepatuhan),
|
| 1505 |
-
"kinerja": float(kinerja),
|
| 1506 |
-
"koleksi": float(koleksi),
|
| 1507 |
-
"sdm": float(sdm),
|
| 1508 |
-
"pelayanan": float(pelayanan),
|
| 1509 |
-
"pengelolaan": float(pengelolaan),
|
| 1510 |
-
"gap_dim": float(kinerja - kepatuhan),
|
| 1511 |
-
"gap_koleksi_sdm": float(koleksi - sdm),
|
| 1512 |
-
"gap_pelayanan_pengelolaan": float(pelayanan - pengelolaan),
|
| 1513 |
-
"iplm_final": float(iplm),
|
| 1514 |
-
"n_total": n_total,
|
| 1515 |
-
}
|
| 1516 |
-
|
| 1517 |
-
def detect_dependency_by_jenis(agg_jenis_full: pd.DataFrame) -> dict:
|
| 1518 |
-
"""
|
| 1519 |
-
Deteksi ketergantungan jenis:
|
| 1520 |
-
- kontribusi jenis mana lebih dominan berdasar Indeks_Final_Agregat_0_100 dan jumlah entitas (Jumlah)
|
| 1521 |
-
"""
|
| 1522 |
-
if agg_jenis_full is None or agg_jenis_full.empty:
|
| 1523 |
-
return {
|
| 1524 |
-
"dominant_jenis_by_final": None,
|
| 1525 |
-
"final_by_jenis": {"sekolah": 0.0, "umum": 0.0, "khusus": 0.0},
|
| 1526 |
-
"jumlah_by_jenis": {"sekolah": 0, "umum": 0, "khusus": 0},
|
| 1527 |
-
}
|
| 1528 |
-
|
| 1529 |
-
a = agg_jenis_full.copy()
|
| 1530 |
-
a["Jenis"] = a.get("Jenis", "").astype(str).str.lower().str.strip()
|
| 1531 |
-
a = a[a["Jenis"].isin(["sekolah","umum","khusus"])].copy()
|
| 1532 |
-
|
| 1533 |
-
def _mean_by(j, col, default=0.0):
|
| 1534 |
-
sub = a[a["Jenis"].eq(j)].copy()
|
| 1535 |
-
if sub.empty or col not in sub.columns:
|
| 1536 |
-
return float(default)
|
| 1537 |
-
return float(pd.to_numeric(sub[col], errors="coerce").fillna(default).mean())
|
| 1538 |
-
|
| 1539 |
-
def _sum_by(j, col, default=0):
|
| 1540 |
-
sub = a[a["Jenis"].eq(j)].copy()
|
| 1541 |
-
if sub.empty or col not in sub.columns:
|
| 1542 |
-
return int(default)
|
| 1543 |
-
return int(pd.to_numeric(sub[col], errors="coerce").fillna(0).sum())
|
| 1544 |
-
|
| 1545 |
-
final_by = {j: _mean_by(j, "Indeks_Final_Agregat_0_100", 0.0) for j in ["sekolah","umum","khusus"]}
|
| 1546 |
-
jumlah_by = {j: _sum_by(j, "Jumlah", 0) for j in ["sekolah","umum","khusus"]}
|
| 1547 |
-
|
| 1548 |
-
dominant = max(final_by.keys(), key=lambda k: final_by[k]) if final_by else None
|
| 1549 |
-
|
| 1550 |
-
return {
|
| 1551 |
-
"dominant_jenis_by_final": dominant,
|
| 1552 |
-
"final_by_jenis": final_by,
|
| 1553 |
-
"jumlah_by_jenis": jumlah_by,
|
| 1554 |
-
}
|
| 1555 |
-
|
| 1556 |
-
def diagnose_bottleneck(signals: dict) -> dict:
|
| 1557 |
-
"""
|
| 1558 |
-
Diagnosa bottleneck berbasis nilai sub/dim terendah & gap internal.
|
| 1559 |
-
Output: bottleneck utama + alasan berbasis relasi angka.
|
| 1560 |
-
"""
|
| 1561 |
-
koleksi = float(signals.get("koleksi", 0.0))
|
| 1562 |
-
sdm = float(signals.get("sdm", 0.0))
|
| 1563 |
-
pelayanan = float(signals.get("pelayanan", 0.0))
|
| 1564 |
-
pengelolaan = float(signals.get("pengelolaan", 0.0))
|
| 1565 |
-
kepatuhan = float(signals.get("kepatuhan", 0.0))
|
| 1566 |
-
kinerja = float(signals.get("kinerja", 0.0))
|
| 1567 |
-
|
| 1568 |
-
candidates = {
|
| 1569 |
-
"Koleksi": koleksi,
|
| 1570 |
-
"SDM": sdm,
|
| 1571 |
-
"Pelayanan": pelayanan,
|
| 1572 |
-
"Pengelolaan": pengelolaan,
|
| 1573 |
-
"Kepatuhan (gabungan Koleksi+SDM)": kepatuhan,
|
| 1574 |
-
"Kinerja (gabungan Pelayanan+Pengelolaan)": kinerja,
|
| 1575 |
-
}
|
| 1576 |
-
bottleneck = min(candidates.keys(), key=lambda k: candidates[k]) if candidates else "NA"
|
| 1577 |
-
bottleneck_val = float(candidates.get(bottleneck, 0.0))
|
| 1578 |
-
|
| 1579 |
-
reasons = []
|
| 1580 |
-
reasons.append(f"Variabel/dimensi dengan nilai paling kecil adalah {bottleneck} = {_fmt3(bottleneck_val)}.")
|
| 1581 |
-
# gap reasoning
|
| 1582 |
-
gd = float(signals.get("gap_dim", 0.0))
|
| 1583 |
-
gks = float(signals.get("gap_koleksi_sdm", 0.0))
|
| 1584 |
-
gpp = float(signals.get("gap_pelayanan_pengelolaan", 0.0))
|
| 1585 |
-
|
| 1586 |
-
reasons.append(f"Gap dimensi (Kinerja - Kepatuhan) = {_fmt3(gd)}.")
|
| 1587 |
-
reasons.append(f"Gap internal Kepatuhan (Koleksi - SDM) = {_fmt3(gks)}.")
|
| 1588 |
-
reasons.append(f"Gap internal Kinerja (Pelayanan - Pengelolaan) = {_fmt3(gpp)}.")
|
| 1589 |
-
|
| 1590 |
-
return {
|
| 1591 |
-
"bottleneck": bottleneck,
|
| 1592 |
-
"bottleneck_value": bottleneck_val,
|
| 1593 |
-
"alasan": " ".join(reasons),
|
| 1594 |
-
}
|
| 1595 |
-
|
| 1596 |
-
def prescribe_actions(diagnosis: dict, dep_jenis: dict, coverage_risk: dict) -> list:
|
| 1597 |
-
"""
|
| 1598 |
-
Preskripsi 2β3 aksi ringkas, menaut ke bottleneck, ketergantungan jenis, dan risiko coverage.
|
| 1599 |
-
"""
|
| 1600 |
-
bottleneck = str(diagnosis.get("bottleneck", "")).lower()
|
| 1601 |
-
dominant_jenis = dep_jenis.get("dominant_jenis_by_final", None)
|
| 1602 |
-
cov_min = float(coverage_risk.get("coverage_min", 0.0))
|
| 1603 |
-
gap_sum = int(coverage_risk.get("gap_target_sum", 0))
|
| 1604 |
-
|
| 1605 |
-
actions = []
|
| 1606 |
-
|
| 1607 |
-
# Coverage-oriented action (jika ada risiko)
|
| 1608 |
-
if cov_min < 50.0 or gap_sum > 0:
|
| 1609 |
-
actions.append(
|
| 1610 |
-
"Penguatan coverage data: prioritas penambahan entri pada jenis dengan gap target terbesar "
|
| 1611 |
-
"(sinkronisasi daftar perpustakaan, verifikasi duplikasi, dan dorongan pelaporan pada unit yang belum masuk)."
|
| 1612 |
-
)
|
| 1613 |
-
|
| 1614 |
-
if "sdm" in bottleneck:
|
| 1615 |
-
actions.append("Paket penguatan SDM: pemetaan kompetensi + penugasan fungsi minimal (layanan, pengolahan, pengelolaan) pada unit dengan skor terendah.")
|
| 1616 |
-
actions.append("Klinik teknis 6β8 minggu: pendampingan pencatatan layanan, pengelolaan koleksi, dan pelaporan indikator untuk memperkecil gap SDM terhadap variabel lain.")
|
| 1617 |
-
elif "koleksi" in bottleneck:
|
| 1618 |
-
actions.append("Optimalisasi siklus koleksi: seleksiβpengadaanβpenyianganβpromosi berbasis pemanfaatan (koleksi yang dipakai), agar selisih dengan SDM/Kinerja mengecil.")
|
| 1619 |
-
actions.append("Paket koleksi minimum per layanan: susun daftar kebutuhan koleksi per segmen sasaran dan integrasikan dengan program layanan (read-aloud/kelas literasi/klub baca).")
|
| 1620 |
-
elif "pengelolaan" in bottleneck:
|
| 1621 |
-
actions.append("Perkuat tata kelola: SOP layanan, kebijakan, dan pencatatan output; pastikan program layanan terhubung dengan dokumen kebijakan dan dukungan anggaran.")
|
| 1622 |
-
actions.append("Dorong kolaborasi: minimal 1β2 kemitraan aktif yang menghasilkan kegiatan layanan/budaya baca dan tercatat sebagai output pengelolaan.")
|
| 1623 |
-
elif "pelayanan" in bottleneck:
|
| 1624 |
-
actions.append("Aktivasi layanan: program rutin bulanan (kelas literasi, klub baca, layanan digital/keliling) untuk menaikkan pemustaka & pemanfaatan koleksi.")
|
| 1625 |
-
actions.append("Segmentasi sasaran: pilih 2 segmen prioritas (pelajar, keluarga, komunitas/ASN) dan susun paket layanan minimum sesuai jenis perpustakaan.")
|
| 1626 |
-
else:
|
| 1627 |
-
actions.append("Konsolidasi program lintas variabel: susun rencana 90 hari (quick wins) pada variabel dengan nilai paling kecil dan rencana 6β12 bulan untuk penyeimbangan dimensi.")
|
| 1628 |
-
|
| 1629 |
-
# Dependency note
|
| 1630 |
-
if dominant_jenis in ["sekolah", "umum", "khusus"]:
|
| 1631 |
-
actions.append(
|
| 1632 |
-
f"Penajaman per jenis: saat ini indeks final relatif lebih dominan pada jenis '{dominant_jenis}'. "
|
| 1633 |
-
"Gunakan pola ini untuk mengarahkan replikasi praktik ke jenis lain yang tertinggal."
|
| 1634 |
-
)
|
| 1635 |
-
|
| 1636 |
-
# keep 2β5 actions, trim if too long
|
| 1637 |
-
if len(actions) > 5:
|
| 1638 |
-
actions = actions[:5]
|
| 1639 |
-
if len(actions) < 2:
|
| 1640 |
-
actions = actions + ["Susun target operasional per variabel (koleksi/SDM/pelayanan/pengelolaan) dan monitoring triwulanan."]
|
| 1641 |
-
|
| 1642 |
-
return actions
|
| 1643 |
-
|
| 1644 |
-
def compute_prediction(signals: dict, coverage_risk: dict, dep_jenis: dict) -> dict:
|
| 1645 |
-
"""
|
| 1646 |
-
Prediksi tahun depan:
|
| 1647 |
-
- baseline: point + interval (P10/P90) heuristik dari coverage & sebaran sederhana
|
| 1648 |
-
- scenario: 3 skenario intervensi (SDM, Pengelolaan+Pelayanan, Koleksi+Aktivasi)
|
| 1649 |
-
"""
|
| 1650 |
-
iplm = float(signals.get("iplm_final", 0.0))
|
| 1651 |
-
cov_min = float(coverage_risk.get("coverage_min", 0.0))
|
| 1652 |
-
faktor_mean = float(coverage_risk.get("faktor_mean", 1.0))
|
| 1653 |
-
|
| 1654 |
-
# interval half-width: makin rendah coverage/faktor makin "sensitif" -> interval lebih lebar
|
| 1655 |
-
# normalize risk: coverage in [0..100], faktor in [0..1]
|
| 1656 |
-
risk = 0.0
|
| 1657 |
-
risk += (1.0 - _clamp(cov_min, 0, 100) / 100.0) * 0.6
|
| 1658 |
-
risk += (1.0 - _clamp(faktor_mean, 0, 1)) * 0.4
|
| 1659 |
-
half_width = PRED_INTERVAL_MIN + (PRED_INTERVAL_MAX - PRED_INTERVAL_MIN) * _clamp(risk, 0, 1)
|
| 1660 |
-
|
| 1661 |
-
drift = float(PRED_BASE_DRIFT_DEFAULT)
|
| 1662 |
-
baseline_point = iplm + drift
|
| 1663 |
-
p10 = baseline_point - half_width
|
| 1664 |
-
p90 = baseline_point + half_width
|
| 1665 |
-
|
| 1666 |
-
# scenario deltas: translate improvement in sub/dim to index scale
|
| 1667 |
-
# Index = 100*(0.30*kepatuhan + 0.70*kinerja) * faktor.
|
| 1668 |
-
# If we add delta on a sub that feeds a dim (mean of two), we approximate:
|
| 1669 |
-
# - Improve SDM or Koleksi by delta -> Kepatuhan increases by delta/2
|
| 1670 |
-
# - Improve Pelayanan or Pengelolaan by delta -> Kinerja increases by delta/2
|
| 1671 |
-
# Choose delta in [min,max] but scaled by risk (if risk high, conservative delta)
|
| 1672 |
-
base_delta = PRED_SCENARIO_DELTA_SUB_MIN + (PRED_SCENARIO_DELTA_SUB_MAX - PRED_SCENARIO_DELTA_SUB_MIN) * (1.0 - _clamp(risk, 0, 1))
|
| 1673 |
-
base_delta = _clamp(base_delta, PRED_SCENARIO_DELTA_SUB_MIN, PRED_SCENARIO_DELTA_SUB_MAX)
|
| 1674 |
-
|
| 1675 |
-
# Scenario 1: SDM focus -> kepatuhan + delta/2
|
| 1676 |
-
delta_idx_sdm = 100.0 * (W_KEPATUHAN * (base_delta / 2.0)) * faktor_mean
|
| 1677 |
-
# Scenario 2: Pengelolaan+Pelayanan focus -> kinerja + delta/2 (approx one sub improved)
|
| 1678 |
-
delta_idx_kinerja = 100.0 * (W_KINERJA * (base_delta / 2.0)) * faktor_mean
|
| 1679 |
-
# Scenario 3: Koleksi + Aktivasi pemanfaatan -> improve koleksi (kepatuhan) + pelayanan (kinerja)
|
| 1680 |
-
delta_idx_combo = 100.0 * ((W_KEPATUHAN * (base_delta / 2.0)) + (W_KINERJA * (base_delta / 2.0))) * faktor_mean
|
| 1681 |
-
|
| 1682 |
-
scen = {
|
| 1683 |
-
"Skenario_SDMPrioritas": baseline_point + delta_idx_sdm,
|
| 1684 |
-
"Skenario_KinerjaPrioritas": baseline_point + delta_idx_kinerja,
|
| 1685 |
-
"Skenario_KoleksiPlusAktivasi": baseline_point + delta_idx_combo,
|
| 1686 |
-
}
|
| 1687 |
-
|
| 1688 |
-
# clamp predictions to 0..100
|
| 1689 |
-
baseline_point = _clamp(baseline_point, 0, 100)
|
| 1690 |
-
p10 = _clamp(p10, 0, 100)
|
| 1691 |
-
p90 = _clamp(p90, 0, 100)
|
| 1692 |
-
for k in list(scen.keys()):
|
| 1693 |
-
scen[k] = _clamp(scen[k], 0, 100)
|
| 1694 |
-
|
| 1695 |
-
return {
|
| 1696 |
-
"baseline_point": float(baseline_point),
|
| 1697 |
-
"baseline_P10": float(p10),
|
| 1698 |
-
"baseline_P90": float(p90),
|
| 1699 |
-
"half_width": float(half_width),
|
| 1700 |
-
"risk_index_0_1": float(_clamp(risk, 0, 1)),
|
| 1701 |
-
"scenario_delta_sub_used": float(base_delta),
|
| 1702 |
-
"scenario_predictions": scen,
|
| 1703 |
-
}
|
| 1704 |
-
|
| 1705 |
-
def build_insight_pack(wilayah_label: str, agg_total: pd.DataFrame, agg_jenis_full: pd.DataFrame, verif_total: pd.DataFrame) -> dict:
|
| 1706 |
-
"""
|
| 1707 |
-
Master insight pack untuk 1 konteks filter (wilayah).
|
| 1708 |
-
"""
|
| 1709 |
-
signals = detect_signals_from_agg_total(agg_total)
|
| 1710 |
-
dep_jenis = detect_dependency_by_jenis(agg_jenis_full)
|
| 1711 |
-
coverage_risk = compute_coverage_risk(verif_total)
|
| 1712 |
-
|
| 1713 |
-
diagnosis = diagnose_bottleneck(signals)
|
| 1714 |
-
preskripsi = prescribe_actions(diagnosis, dep_jenis, coverage_risk)
|
| 1715 |
-
prediksi = compute_prediction(signals, coverage_risk, dep_jenis)
|
| 1716 |
-
|
| 1717 |
-
# Deteksi summary strings
|
| 1718 |
-
deteksi = {
|
| 1719 |
-
"Nilai IPLM Final (0β100)": _fmt2(signals.get("iplm_final", 0.0)),
|
| 1720 |
-
"Kepatuhan (0β1)": _fmt3(signals.get("kepatuhan", 0.0)),
|
| 1721 |
-
"Kinerja (0β1)": _fmt3(signals.get("kinerja", 0.0)),
|
| 1722 |
-
"Gap Dimensi (Kinerja - Kepatuhan)": _fmt3(signals.get("gap_dim", 0.0)),
|
| 1723 |
-
"Gap Internal Kepatuhan (Koleksi - SDM)": _fmt3(signals.get("gap_koleksi_sdm", 0.0)),
|
| 1724 |
-
"Gap Internal Kinerja (Pelayanan - Pengelolaan)": _fmt3(signals.get("gap_pelayanan_pengelolaan", 0.0)),
|
| 1725 |
-
"Dominasi jenis (berdasar indeks final jenis)": str(dep_jenis.get("dominant_jenis_by_final", "")),
|
| 1726 |
-
"Coverage min (%)": _fmt2(coverage_risk.get("coverage_min", 0.0)),
|
| 1727 |
-
"Coverage mean (%)": _fmt2(coverage_risk.get("coverage_mean", 0.0)),
|
| 1728 |
-
"Gap target total (unit)": str(coverage_risk.get("gap_target_sum", 0)),
|
| 1729 |
-
"Faktor penyesuaian mean": _fmt3(coverage_risk.get("faktor_mean", 1.0)),
|
| 1730 |
-
}
|
| 1731 |
-
|
| 1732 |
-
diagnosa = {
|
| 1733 |
-
"Bottleneck utama": str(diagnosis.get("bottleneck", "")),
|
| 1734 |
-
"Alasan berbasis angka": str(diagnosis.get("alasan", "")),
|
| 1735 |
-
}
|
| 1736 |
-
|
| 1737 |
-
pred = {
|
| 1738 |
-
"Baseline (point)": _fmt2(prediksi.get("baseline_point", 0.0)),
|
| 1739 |
-
"Baseline interval (P10βP90)": f"{_fmt2(prediksi.get('baseline_P10',0.0))} β {_fmt2(prediksi.get('baseline_P90',0.0))}",
|
| 1740 |
-
"Risk index (0β1)": _fmt3(prediksi.get("risk_index_0_1", 0.0)),
|
| 1741 |
-
"Scenario SDM": _fmt2(prediksi.get("scenario_predictions", {}).get("Skenario_SDMPrioritas", 0.0)),
|
| 1742 |
-
"Scenario Kinerja": _fmt2(prediksi.get("scenario_predictions", {}).get("Skenario_KinerjaPrioritas", 0.0)),
|
| 1743 |
-
"Scenario Koleksi+Aktivasi": _fmt2(prediksi.get("scenario_predictions", {}).get("Skenario_KoleksiPlusAktivasi", 0.0)),
|
| 1744 |
-
"Delta sub/dim yang dipakai": _fmt3(prediksi.get("scenario_delta_sub_used", 0.0)),
|
| 1745 |
-
}
|
| 1746 |
-
|
| 1747 |
-
return {
|
| 1748 |
-
"wilayah": wilayah_label,
|
| 1749 |
-
"deteksi": deteksi,
|
| 1750 |
-
"diagnosa": diagnosa,
|
| 1751 |
-
"preskripsi": preskripsi,
|
| 1752 |
-
"prediksi": pred,
|
| 1753 |
-
"raw": {
|
| 1754 |
-
"signals": signals,
|
| 1755 |
-
"dependency": dep_jenis,
|
| 1756 |
-
"coverage_risk": coverage_risk,
|
| 1757 |
-
"diagnosis": diagnosis,
|
| 1758 |
-
"prediction_struct": prediksi,
|
| 1759 |
-
}
|
| 1760 |
-
}
|
| 1761 |
-
|
| 1762 |
-
def insight_pack_to_tables(insight_pack: dict) -> tuple:
|
| 1763 |
-
"""
|
| 1764 |
-
Konversi insight_pack menjadi 3 tabel DataFrame:
|
| 1765 |
-
- INSIGHT_WILAYAH (deteksi + diagnosa ringkas)
|
| 1766 |
-
- PRESKRIPSI (list)
|
| 1767 |
-
- PREDIKSI (baseline + scenario)
|
| 1768 |
-
"""
|
| 1769 |
-
wilayah = insight_pack.get("wilayah", "")
|
| 1770 |
-
det = insight_pack.get("deteksi", {}) or {}
|
| 1771 |
-
diag = insight_pack.get("diagnosa", {}) or {}
|
| 1772 |
-
pres = insight_pack.get("preskripsi", []) or []
|
| 1773 |
-
pred = insight_pack.get("prediksi", {}) or {}
|
| 1774 |
-
|
| 1775 |
-
row = {"Wilayah": wilayah}
|
| 1776 |
-
for k, v in det.items():
|
| 1777 |
-
row[f"Deteksi__{k}"] = v
|
| 1778 |
-
for k, v in diag.items():
|
| 1779 |
-
row[f"Diagnosa__{k}"] = v
|
| 1780 |
-
df_insight = pd.DataFrame([row])
|
| 1781 |
-
|
| 1782 |
-
df_pres = pd.DataFrame([{"Wilayah": wilayah, "Preskripsi": p} for p in pres]) if pres else pd.DataFrame([{"Wilayah": wilayah, "Preskripsi": ""}])
|
| 1783 |
-
|
| 1784 |
-
rowp = {"Wilayah": wilayah}
|
| 1785 |
-
for k, v in pred.items():
|
| 1786 |
-
rowp[f"Prediksi__{k}"] = v
|
| 1787 |
-
df_pred = pd.DataFrame([rowp])
|
| 1788 |
-
|
| 1789 |
-
return df_insight, df_pres, df_pred
|
| 1790 |
-
|
| 1791 |
-
|
| 1792 |
-
# ============================================================
|
| 1793 |
-
# 16) CORE RUN
|
| 1794 |
# ============================================================
|
| 1795 |
|
| 1796 |
def _empty_outputs(msg="Data belum siap."):
|
|
@@ -1798,12 +1424,11 @@ def _empty_outputs(msg="Data belum siap."):
|
|
| 1798 |
empty_fig = go.Figure()
|
| 1799 |
return (
|
| 1800 |
"", # kpi_md
|
| 1801 |
-
empty, empty, empty, empty, empty,
|
| 1802 |
-
None, None, None, None, None,
|
| 1803 |
-
empty_fig, empty_fig, empty_fig,
|
| 1804 |
-
empty, empty, empty, # insight dfs
|
| 1805 |
msg, # msg
|
| 1806 |
-
"LLM belum tersedia.", #
|
| 1807 |
None # word path
|
| 1808 |
)
|
| 1809 |
|
|
@@ -1826,7 +1451,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
|
|
| 1826 |
kew_norm = kew_value if (kew_value and kew_value != "(Semua)") else "(Semua)"
|
| 1827 |
faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_norm)
|
| 1828 |
agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_norm)
|
| 1829 |
-
agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, kew_norm)
|
| 1830 |
|
| 1831 |
summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total)
|
| 1832 |
verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_norm)
|
|
@@ -1884,12 +1509,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
|
|
| 1884 |
|
| 1885 |
kpi_md = build_kpi_markdown(summary_jenis)
|
| 1886 |
|
| 1887 |
-
#
|
| 1888 |
-
wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
|
| 1889 |
-
insight_pack = build_insight_pack(wilayah_txt, agg_total, agg_jenis_full, verif_total)
|
| 1890 |
-
df_insight, df_pres, df_pred = insight_pack_to_tables(insight_pack)
|
| 1891 |
-
|
| 1892 |
-
# Export xlsx (6 file + 1 insight workbook optional)
|
| 1893 |
tmpdir = tempfile.mkdtemp()
|
| 1894 |
prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
|
| 1895 |
kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
|
|
@@ -1900,7 +1520,6 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
|
|
| 1900 |
p_raw = str(Path(tmpdir) / f"IPLM_RAW_DATA_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
|
| 1901 |
p_detail = str(Path(tmpdir) / f"IPLM_DetailEntitas_FinalMenempelWilayah_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
|
| 1902 |
p_verif = str(Path(tmpdir) / f"IPLM_KecukupanSampel_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
|
| 1903 |
-
p_insight = str(Path(tmpdir) / f"IPLM_InsightPack_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
|
| 1904 |
|
| 1905 |
summary_jenis.to_excel(p_summary, index=False)
|
| 1906 |
agg_total.to_excel(p_total, index=False)
|
|
@@ -1908,19 +1527,12 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
|
|
| 1908 |
detail_view.to_excel(p_detail, index=False)
|
| 1909 |
verif_total.to_excel(p_verif, index=False)
|
| 1910 |
|
| 1911 |
-
#
|
| 1912 |
-
|
| 1913 |
-
df_insight.to_excel(xw, sheet_name="INSIGHT_WILAYAH", index=False)
|
| 1914 |
-
df_pres.to_excel(xw, sheet_name="PRESKRIPSI", index=False)
|
| 1915 |
-
df_pred.to_excel(xw, sheet_name="PREDIKSI", index=False)
|
| 1916 |
-
# Optional: include agg tables
|
| 1917 |
-
agg_total.to_excel(xw, sheet_name="AGG_TOTAL", index=False)
|
| 1918 |
-
agg_jenis_full.to_excel(xw, sheet_name="AGG_JENIS", index=False)
|
| 1919 |
-
verif_total.to_excel(xw, sheet_name="VERIF", index=False)
|
| 1920 |
-
|
| 1921 |
-
# ===== Word tabel interpretasi & rekomendasi (+ insight pack) =====
|
| 1922 |
header, rows = build_interpretasi_table_values(agg_total, wilayah_txt, TARGET_RATIO)
|
| 1923 |
-
|
|
|
|
|
|
|
| 1924 |
|
| 1925 |
rows_filled, llm_status = llm_fill_interpretasi_rekomendasi(
|
| 1926 |
header=header,
|
|
@@ -1929,7 +1541,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
|
|
| 1929 |
kew_label=(kew_value or "(Semua)"),
|
| 1930 |
jumlah_perpus_by_jenis=jumlah_perpus
|
| 1931 |
)
|
| 1932 |
-
word_path = generate_word_table_interpretasi(header, rows_filled, wilayah_txt, jumlah_perpus
|
| 1933 |
|
| 1934 |
msg = (
|
| 1935 |
f"Selesai (TARGET {TARGET_RATIO*100:.2f}%): raw={len(raw)} | entitas={len(detail_view)} | "
|
|
@@ -1940,9 +1552,8 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
|
|
| 1940 |
return (
|
| 1941 |
kpi_md,
|
| 1942 |
summary_jenis, agg_total, agg_jenis_view, detail_view, verif_total,
|
| 1943 |
-
p_summary, p_total, p_raw, p_detail, p_verif,
|
| 1944 |
fig_umum, fig_sekolah, fig_khusus,
|
| 1945 |
-
df_insight, df_pres, df_pred,
|
| 1946 |
msg,
|
| 1947 |
llm_status,
|
| 1948 |
(word_path if word_path else None)
|
|
@@ -1953,7 +1564,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
|
|
| 1953 |
|
| 1954 |
|
| 1955 |
# ============================================================
|
| 1956 |
-
#
|
| 1957 |
# ============================================================
|
| 1958 |
|
| 1959 |
def ui_load(force=False):
|
|
@@ -2012,11 +1623,6 @@ UPDATE LLM + WORD:
|
|
| 2012 |
- Tabel Word "Interpretasi & Rekomendasi" memakai NILAI APA ADANYA (tanpa dikali 100) untuk sub/dim.
|
| 2013 |
- Baris "Nilai IPLM" memakai Indeks_Final_Wilayah_0_100 apa adanya.
|
| 2014 |
- Di bawah tabel Word ditambahkan ringkasan jumlah perpustakaan sumber data (sekolah/umum/khusus/total) dari tabel agregat wilayah Γ jenis.
|
| 2015 |
-
- Ditambah Insight Pack: DeteksiβDiagnosaβPreskripsiβPrediksi (tahun depan) berbasis output pipeline.
|
| 2016 |
-
|
| 2017 |
-
Prediksi:
|
| 2018 |
-
- Baseline + interval (heuristik berbasis risiko coverage)
|
| 2019 |
-
- 3 skenario intervensi (SDM / Kinerja / Koleksi+Aktivasi)
|
| 2020 |
""")
|
| 2021 |
|
| 2022 |
state_df = gr.State(None)
|
|
@@ -2065,11 +1671,6 @@ Prediksi:
|
|
| 2065 |
gr.Markdown("### Perpustakaan Khusus")
|
| 2066 |
bell_khusus = gr.Plot(scale=1)
|
| 2067 |
|
| 2068 |
-
gr.Markdown("## Insight Pack (DeteksiβDiagnosaβPreskripsiβPrediksi)")
|
| 2069 |
-
out_insight = gr.DataFrame(interactive=False)
|
| 2070 |
-
out_pres = gr.DataFrame(interactive=False)
|
| 2071 |
-
out_pred = gr.DataFrame(interactive=False)
|
| 2072 |
-
|
| 2073 |
gr.Markdown("## Status LLM (Isi Interpretasi & Rekomendasi)")
|
| 2074 |
llm_status_out = gr.Markdown()
|
| 2075 |
|
|
@@ -2079,8 +1680,7 @@ Prediksi:
|
|
| 2079 |
dl_raw = gr.DownloadButton(label="Download Data Mentah (.xlsx)")
|
| 2080 |
dl_detail = gr.DownloadButton(label="Download Detail Entitas (.xlsx)")
|
| 2081 |
dl_verif = gr.DownloadButton(label="Download Kecukupan Sampel (.xlsx)")
|
| 2082 |
-
|
| 2083 |
-
dl_word = gr.DownloadButton(label="Download Word: Interpretasi & Insight (.docx)" if DOCX_AVAILABLE else "Download Word (OFF)")
|
| 2084 |
|
| 2085 |
run_btn.click(
|
| 2086 |
fn=run_calc,
|
|
@@ -2088,9 +1688,8 @@ Prediksi:
|
|
| 2088 |
outputs=[
|
| 2089 |
kpi_out,
|
| 2090 |
out_summary, out_agg_total, out_agg_jenis, out_detail, out_verif,
|
| 2091 |
-
dl_summary, dl_total, dl_raw, dl_detail, dl_verif,
|
| 2092 |
bell_umum, bell_sekolah, bell_khusus,
|
| 2093 |
-
out_insight, out_pres, out_pred,
|
| 2094 |
msg_out,
|
| 2095 |
llm_status_out,
|
| 2096 |
dl_word
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
IPLM 2025 β Final (Target Sampel 33.88% per Jenis) β TANPA Kinerja Relatif / Percentile
|
| 4 |
+
UPDATE (sesuai instruksi terbaru Anda) β TANPA mengubah pipeline lain:
|
| 5 |
+
|
| 6 |
+
FOKUS PEMBENAHAN (LLM + WORD):
|
| 7 |
+
1) Nilai Kepatuhan, Koleksi, Tenaga, Kinerja, Pelayanan, Pengelolaan:
|
| 8 |
+
- TIDAK dikalikan 100.
|
| 9 |
+
- Ditulis APA ADANYA dari kolom agregat aplikasi:
|
| 10 |
+
Rata2_dim_kepatuhan, Rata2_sub_koleksi, Rata2_sub_sdm, Rata2_dim_kinerja,
|
| 11 |
+
Rata2_sub_pelayanan, Rata2_sub_pengelolaan.
|
| 12 |
+
2) Nilai IPLM ditulis apa adanya: Indeks_Final_Wilayah_0_100.
|
| 13 |
+
3) LLM mengisi Interpretasi & Rekomendasi:
|
| 14 |
+
- Interpretasi: deskriptif, kondisi riil berbasis relasi angka (lebih besar/kecil, gap, dominan, konsistensi),
|
| 15 |
+
plus pemaknaan substantif dimensi (koleksi/sdm/pelayanan/pengelolaan) TANPA label normatif.
|
| 16 |
+
- Rekomendasi: operasional, 2β3 butir ringkas, menaut ke pola angka (gap/ketimpangan/kontribusi).
|
| 17 |
+
4) Di bawah tabel Word: tambah deskripsi jumlah perpustakaan sumber data (dari tabel agregat wilayah Γ jenis / βgambar 2β):
|
| 18 |
+
sekolah=..., umum=..., khusus=..., total=...
|
| 19 |
+
|
| 20 |
+
Catatan penting:
|
| 21 |
+
- Semua perhitungan dan dashboard tetap.
|
| 22 |
+
- Yang diubah hanya: (a) cara mengambil nilai untuk tabel Word (tanpa *100),
|
| 23 |
+
(b) prompt LLM untuk isi interpretasi/rekomendasi agar nyambung dengan angka,
|
| 24 |
+
(c) tambahan paragraf jumlah perpustakaan di bawah tabel Word.
|
| 25 |
"""
|
| 26 |
|
| 27 |
import os
|
|
|
|
| 31 |
import math
|
| 32 |
import tempfile
|
| 33 |
from pathlib import Path
|
|
|
|
| 34 |
|
| 35 |
import gradio as gr
|
| 36 |
import numpy as np
|
|
|
|
| 38 |
import plotly.graph_objects as go
|
| 39 |
from sklearn.preprocessing import PowerTransformer
|
| 40 |
|
| 41 |
+
# python-docx (wajib kalau mau Word)
|
| 42 |
DOCX_AVAILABLE = True
|
| 43 |
try:
|
| 44 |
from docx import Document
|
|
|
|
| 49 |
DOCX_AVAILABLE = False
|
| 50 |
Document = None
|
| 51 |
|
| 52 |
+
# huggingface client (opsional)
|
| 53 |
HF_AVAILABLE = True
|
| 54 |
try:
|
| 55 |
from huggingface_hub import InferenceClient
|
|
|
|
| 81 |
or os.getenv("HF_API_TOKEN")
|
| 82 |
)
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
# ============================================================
|
| 86 |
# 2) UTIL
|
|
|
|
| 205 |
n_total = 0.0
|
| 206 |
return float(min(float(n_total) / float(target_total), 1.0))
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
# ============================================================
|
| 210 |
# 3) INDIKATOR IPLM
|
|
|
|
| 283 |
|
| 284 |
df = df_src.copy()
|
| 285 |
|
|
|
|
| 286 |
rename_map = {}
|
| 287 |
for col in df.columns:
|
| 288 |
c = _canon(col)
|
|
|
|
| 296 |
if rename_map:
|
| 297 |
df = df.rename(columns=rename_map)
|
| 298 |
|
|
|
|
| 299 |
available = [c for c in all_indicators if c in df.columns]
|
| 300 |
for c in available:
|
| 301 |
df[c] = df[c].apply(coerce_num)
|
| 302 |
|
|
|
|
| 303 |
for c in available:
|
| 304 |
x = pd.to_numeric(df[c], errors="coerce").astype(float).values
|
| 305 |
mask = ~np.isnan(x)
|
|
|
|
| 311 |
transformed[mask] = x[mask]
|
| 312 |
df[f"norm_{c}"] = minmax_norm(pd.Series(transformed, index=df.index))
|
| 313 |
|
|
|
|
| 314 |
df["sub_koleksi"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in koleksi_cols if c in available]), axis=1)
|
| 315 |
df["sub_sdm"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in sdm_cols if c in available]), axis=1)
|
| 316 |
df["sub_pelayanan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pelayanan_cols if c in available]), axis=1)
|
| 317 |
df["sub_pengelolaan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pengelolaan_cols if c in available]), axis=1)
|
| 318 |
|
|
|
|
| 319 |
df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1)
|
| 320 |
df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1)
|
| 321 |
|
|
|
|
| 322 |
df["Indeks_Dasar_0_100"] = 100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"])
|
| 323 |
|
| 324 |
for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Dasar_0_100"]:
|
|
|
|
| 437 |
df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
|
| 438 |
df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
|
| 439 |
|
|
|
|
| 440 |
if nama_col and nama_col in df_raw.columns:
|
| 441 |
kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
|
| 442 |
else:
|
|
|
|
| 684 |
|
| 685 |
if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
|
| 686 |
agg["faktor_penyesuaian_jenis"] = 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
else:
|
| 688 |
fw = faktor_wilayah_jenis.copy()
|
| 689 |
fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
|
|
|
|
| 705 |
]:
|
| 706 |
if c in agg.columns:
|
| 707 |
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(3)
|
|
|
|
| 708 |
for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100"]:
|
| 709 |
if c in agg.columns:
|
| 710 |
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
|
|
|
|
| 717 |
# 8) AGREGAT WILAYAH (KESELURUHAN) β avg3 dari 3 jenis
|
| 718 |
# ============================================================
|
| 719 |
|
| 720 |
+
def build_agg_wilayah_total_from_jenis(agg_jenis, faktor_wilayah_jenis, kew_value):
|
| 721 |
if agg_jenis is None or agg_jenis.empty:
|
| 722 |
return pd.DataFrame()
|
| 723 |
|
|
|
|
| 741 |
|
| 742 |
full = full.merge(a[["group_key", label_name, "Jenis"] + cols_present],
|
| 743 |
on=["group_key", label_name, "Jenis"], how="left")
|
|
|
|
| 744 |
for c in cols_present:
|
| 745 |
full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
|
| 746 |
|
|
|
|
| 941 |
"pop_total_jenis", "target_total_33_88_jenis", "n_jenis",
|
| 942 |
"coverage_jenis_%", "faktor_penyesuaian_jenis", "gap_target33_88_jenis"
|
| 943 |
] if c in out.columns]
|
| 944 |
+
|
| 945 |
out = out[keep].copy()
|
| 946 |
|
| 947 |
for c in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
|
|
|
|
| 1111 |
_HF_CLIENT = None
|
| 1112 |
return None
|
| 1113 |
|
| 1114 |
+
def _to_float(x, default=0.0):
|
| 1115 |
+
try:
|
| 1116 |
+
if x is None:
|
| 1117 |
+
return float(default)
|
| 1118 |
+
if isinstance(x, float) and math.isnan(x):
|
| 1119 |
+
return float(default)
|
| 1120 |
+
return float(x)
|
| 1121 |
+
except Exception:
|
| 1122 |
+
return float(default)
|
| 1123 |
+
|
| 1124 |
+
def summarize_jumlah_perpus_dari_agg_jenis(agg_jenis_full, wilayah_label, kew_value):
|
| 1125 |
+
"""
|
| 1126 |
+
Ambil jumlah perpustakaan sumber data dari tabel agregat wilayah Γ jenis (gambar 2).
|
| 1127 |
+
Untuk filter 1 wilayah (kab/prov), agg_jenis_full biasanya 3 baris (sekolah/umum/khusus).
|
| 1128 |
+
Untuk nasional/semua wilayah, ini akan menjumlahkan seluruh wilayah per jenis.
|
| 1129 |
+
"""
|
| 1130 |
+
if agg_jenis_full is None or agg_jenis_full.empty:
|
| 1131 |
return {"sekolah": 0, "umum": 0, "khusus": 0, "total": 0}
|
| 1132 |
+
|
| 1133 |
a = agg_jenis_full.copy()
|
| 1134 |
+
if "Jenis" not in a.columns:
|
| 1135 |
+
return {"sekolah": 0, "umum": 0, "khusus": 0, "total": 0}
|
| 1136 |
+
|
| 1137 |
a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
|
| 1138 |
if "Jumlah" in a.columns:
|
| 1139 |
a["Jumlah"] = pd.to_numeric(a["Jumlah"], errors="coerce").fillna(0).astype(int)
|
| 1140 |
else:
|
| 1141 |
a["Jumlah"] = 0
|
| 1142 |
+
|
| 1143 |
+
out = {}
|
| 1144 |
+
for j in ["sekolah", "umum", "khusus"]:
|
| 1145 |
+
out[j] = int(a.loc[a["Jenis"].eq(j), "Jumlah"].sum())
|
| 1146 |
out["total"] = int(out["sekolah"] + out["umum"] + out["khusus"])
|
| 1147 |
return out
|
| 1148 |
|
| 1149 |
def build_interpretasi_table_values(agg_total, wilayah_label, target_ratio):
|
| 1150 |
+
"""
|
| 1151 |
+
MENGAMBIL NILAI APA ADANYA (tanpa *100) dari hasil aplikasi (agg_total):
|
| 1152 |
+
- Kepatuhan = Rata2_dim_kepatuhan
|
| 1153 |
+
- Koleksi = Rata2_sub_koleksi
|
| 1154 |
+
- Tenaga = Rata2_sub_sdm
|
| 1155 |
+
- Kinerja = Rata2_dim_kinerja
|
| 1156 |
+
- Pelayanan = Rata2_sub_pelayanan
|
| 1157 |
+
- Pengelolaan = Rata2_sub_pengelolaan
|
| 1158 |
+
- Nilai IPLM = Indeks_Final_Wilayah_0_100
|
| 1159 |
+
|
| 1160 |
+
Jika agg_total > 1 baris (mis. nasional), diambil mean kolom-kolom tersebut.
|
| 1161 |
+
"""
|
| 1162 |
if agg_total is None or agg_total.empty:
|
| 1163 |
+
base = {
|
| 1164 |
+
"kepatuhan": 0.0, "koleksi": 0.0, "tenaga": 0.0,
|
| 1165 |
+
"kinerja": 0.0, "pelayanan": 0.0, "pengelolaan": 0.0,
|
| 1166 |
+
"iplm": 0.0
|
| 1167 |
+
}
|
| 1168 |
else:
|
| 1169 |
a = agg_total.copy()
|
| 1170 |
cols_needed = [
|
|
|
|
| 1181 |
a[c] = pd.to_numeric(a[c], errors="coerce").fillna(0.0)
|
| 1182 |
else:
|
| 1183 |
a[c] = 0.0
|
| 1184 |
+
|
| 1185 |
base = {
|
| 1186 |
"kepatuhan": float(a["Rata2_dim_kepatuhan"].mean()),
|
| 1187 |
"koleksi": float(a["Rata2_sub_koleksi"].mean()),
|
|
|
|
| 1192 |
"iplm": float(a["Indeks_Final_Wilayah_0_100"].mean()),
|
| 1193 |
}
|
| 1194 |
|
| 1195 |
+
# pembulatan display (nilai tetap "apa adanya", hanya format)
|
| 1196 |
+
# untuk sub/dim (0β1) biasanya 3 desimal; untuk IPLM (0β100) 2 desimal.
|
| 1197 |
base_disp = {
|
| 1198 |
"kepatuhan": round(_to_float(base["kepatuhan"]), 3),
|
| 1199 |
"koleksi": round(_to_float(base["koleksi"]), 3),
|
|
|
|
| 1221 |
return header, rows
|
| 1222 |
|
| 1223 |
def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, jumlah_perpus_by_jenis):
|
| 1224 |
+
"""
|
| 1225 |
+
LLM diminta mengisi kolom Interpretasi dan Rekomendasi dengan narasi yang NYAMBUNG ke angka:
|
| 1226 |
+
- Interpretasi: jelaskan apa arti angka untuk kondisi operasional perpustakaan (koleksi/sdm/pelayanan/pengelolaan),
|
| 1227 |
+
memakai relasi angka antardimensi (lebih besar/kecil, selisih, dominan, gap, konsistensi) TANPA label normatif.
|
| 1228 |
+
- Rekomendasi: 2β3 aksi teknis per baris yang langsung meng-address pola angka (misal dimensi lebih kecil β prioritas aktivitas),
|
| 1229 |
+
serta mengaitkan dengan volume data (jumlah perpustakaan per jenis) bila relevan.
|
| 1230 |
+
Output wajib JSON.
|
| 1231 |
+
"""
|
| 1232 |
client = get_llm_client()
|
| 1233 |
if client is None or (not USE_LLM):
|
| 1234 |
out = []
|
|
|
|
| 1243 |
"jumlah_perpustakaan_sumber_data": jumlah_perpus_by_jenis,
|
| 1244 |
"catatan_skala": (
|
| 1245 |
"Baris Kepatuhan/Koleksi/Tenaga/Kinerja/Pelayanan/Pengelolaan memakai nilai agregat 'apa adanya' "
|
| 1246 |
+
"(umumnya rentang 0β1 karena berasal dari sub/dim hasil normalisasi). "
|
| 1247 |
+
"Baris 'Nilai IPLM' memakai Indeks_Final_Wilayah_0_100 (rentang 0β100)."
|
| 1248 |
),
|
| 1249 |
"baris": rows
|
| 1250 |
}
|
|
|
|
| 1253 |
"Anda adalah analis kebijakan perpustakaan di Indonesia.\n"
|
| 1254 |
"Tugas: isi kolom Interpretasi dan Rekomendasi untuk setiap baris tabel.\n"
|
| 1255 |
"ATURAN WAJIB:\n"
|
| 1256 |
+
"1) Jangan mengubah nilai angka. Jangan menghitung ulang skor.\n"
|
| 1257 |
+
"2) Netral-deskriptif: dilarang memakai label normatif seperti baik/buruk, tinggi/sedang/rendah, memuaskan/kurang, optimal/tidak optimal.\n"
|
| 1258 |
+
"3) Interpretasi harus nyambung langsung ke angka dan relasinya antardimensi: gunakan istilah lebih besar/kecil, selisih, gap, dominan, konsisten/tidak konsisten, kontribusi, proporsi.\n"
|
| 1259 |
+
"4) Interpretasi juga harus menjelaskan kondisi riil berbasis dimensi:\n"
|
| 1260 |
+
" - Koleksi: pengembangan, ketersediaan, pemanfaatan koleksi (sebagai fungsi layanan),\n"
|
| 1261 |
+
" - Tenaga: kecukupan/kapasitas SDM dan pengembangan kompetensi,\n"
|
| 1262 |
+
" - Pelayanan: aktivitas layanan dan pemanfaatan layanan,\n"
|
| 1263 |
+
" - Pengelolaan: tata kelola, kebijakan, kolaborasi, dukungan anggaran layanan,\n"
|
| 1264 |
+
" - Kepatuhan = gabungan koleksi+tenaga; Kinerja = gabungan pelayanan+pengelolaan.\n"
|
| 1265 |
+
" Jelaskan tanpa menghakimi; fokus pada apa yang angka itu representasikan.\n"
|
| 1266 |
+
"5) Rekomendasi harus operasional dan spesifik (2β3 butir singkat) untuk tiap baris. Gunakan pola angka untuk menurunkan aksi.\n"
|
| 1267 |
+
"6) Output HARUS JSON valid saja (tanpa teks tambahan), dengan struktur persis.\n"
|
| 1268 |
)
|
| 1269 |
|
| 1270 |
user = (
|
|
|
|
| 1275 |
" ]\n"
|
| 1276 |
"}\n"
|
| 1277 |
"- Urutan dan jumlah baris harus sama.\n"
|
| 1278 |
+
"- 'Rekomendasi' boleh berupa bullet dengan tanda '-' dalam satu string.\n\n"
|
| 1279 |
f"INPUT:\n{json.dumps(payload, ensure_ascii=False)}"
|
| 1280 |
)
|
| 1281 |
|
| 1282 |
try:
|
| 1283 |
resp = client.chat_completion(
|
| 1284 |
model=LLM_MODEL_NAME,
|
| 1285 |
+
messages=[
|
| 1286 |
+
{"role": "system", "content": system},
|
| 1287 |
+
{"role": "user", "content": user},
|
| 1288 |
+
],
|
| 1289 |
max_tokens=1100,
|
| 1290 |
temperature=0.2,
|
| 1291 |
top_p=0.9,
|
|
|
|
| 1300 |
cleaned.append({
|
| 1301 |
"No": str(r.get("No", rows[i]["No"])),
|
| 1302 |
"Dimensi": str(r.get("Dimensi", rows[i]["Dimensi"])),
|
| 1303 |
+
"Nilai": rows[i]["Nilai"], # paksa nilai dari aplikasi
|
| 1304 |
"Interpretasi": str(r.get("Interpretasi","") or ""),
|
| 1305 |
"Rekomendasi": str(r.get("Rekomendasi","") or ""),
|
| 1306 |
})
|
|
|
|
| 1311 |
out.append({k: r.get(k) for k in ["No","Dimensi","Nilai"]} | {"Interpretasi":"", "Rekomendasi":""})
|
| 1312 |
return out, f"LLM error: {repr(e)}"
|
| 1313 |
|
| 1314 |
+
|
| 1315 |
def _set_cell_shading(cell, fill_hex="1F1F1F"):
|
| 1316 |
tcPr = cell._tc.get_or_add_tcPr()
|
| 1317 |
shd = OxmlElement("w:shd")
|
|
|
|
| 1344 |
tblBorders.append(elem)
|
| 1345 |
tblPr.append(tblBorders)
|
| 1346 |
|
| 1347 |
+
def generate_word_table_interpretasi(header, rows_filled, wilayah_label, jumlah_perpus_by_jenis):
|
| 1348 |
if (not DOCX_AVAILABLE) or (Document is None):
|
| 1349 |
return None
|
| 1350 |
|
|
|
|
| 1377 |
row_cells = table.add_row().cells
|
| 1378 |
row_cells[0].text = str(r.get("No",""))
|
| 1379 |
row_cells[1].text = str(r.get("Dimensi",""))
|
| 1380 |
+
|
| 1381 |
+
# format nilai:
|
| 1382 |
+
# - sub/dim biasanya 0β1 β tampilkan 3 desimal
|
| 1383 |
+
# - IPLM 0β100 β tampilkan 2 desimal
|
| 1384 |
try:
|
| 1385 |
dim = str(r.get("Dimensi","")).strip().lower()
|
| 1386 |
val = _to_float(r.get("Nilai", 0.0), 0.0)
|
|
|
|
| 1390 |
row_cells[2].text = f"{val:.3f}"
|
| 1391 |
except Exception:
|
| 1392 |
row_cells[2].text = str(r.get("Nilai",""))
|
| 1393 |
+
|
| 1394 |
row_cells[3].text = str(r.get("Interpretasi","") or "")
|
| 1395 |
row_cells[4].text = str(r.get("Rekomendasi","") or "")
|
| 1396 |
|
|
|
|
| 1398 |
_set_cell_shading(c, "262626")
|
| 1399 |
_set_cell_text_color(c, "FFFFFF")
|
| 1400 |
|
| 1401 |
+
# ===== tambahan: deskripsi jumlah perpustakaan sumber data (gambar 2) =====
|
| 1402 |
+
doc.add_paragraph("") # spacer
|
| 1403 |
j = jumlah_perpus_by_jenis or {"sekolah":0,"umum":0,"khusus":0,"total":0}
|
| 1404 |
p = doc.add_paragraph()
|
| 1405 |
p.add_run("Sumber data (jumlah perpustakaan pada tabel agregat wilayah Γ jenis): ").bold = True
|
|
|
|
| 1410 |
f"total = {int(j.get('total',0))}."
|
| 1411 |
)
|
| 1412 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1413 |
outpath = tempfile.mktemp(suffix=".docx")
|
| 1414 |
doc.save(outpath)
|
| 1415 |
return outpath
|
| 1416 |
|
| 1417 |
|
| 1418 |
# ============================================================
|
| 1419 |
+
# 15) CORE RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1420 |
# ============================================================
|
| 1421 |
|
| 1422 |
def _empty_outputs(msg="Data belum siap."):
|
|
|
|
| 1424 |
empty_fig = go.Figure()
|
| 1425 |
return (
|
| 1426 |
"", # kpi_md
|
| 1427 |
+
empty, empty, empty, empty, empty,
|
| 1428 |
+
None, None, None, None, None,
|
| 1429 |
+
empty_fig, empty_fig, empty_fig,
|
|
|
|
| 1430 |
msg, # msg
|
| 1431 |
+
"LLM belum tersedia.", # status llm
|
| 1432 |
None # word path
|
| 1433 |
)
|
| 1434 |
|
|
|
|
| 1451 |
kew_norm = kew_value if (kew_value and kew_value != "(Semua)") else "(Semua)"
|
| 1452 |
faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_norm)
|
| 1453 |
agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_norm)
|
| 1454 |
+
agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_norm)
|
| 1455 |
|
| 1456 |
summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total)
|
| 1457 |
verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_norm)
|
|
|
|
| 1509 |
|
| 1510 |
kpi_md = build_kpi_markdown(summary_jenis)
|
| 1511 |
|
| 1512 |
+
# Export xlsx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1513 |
tmpdir = tempfile.mkdtemp()
|
| 1514 |
prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
|
| 1515 |
kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
|
|
|
|
| 1520 |
p_raw = str(Path(tmpdir) / f"IPLM_RAW_DATA_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
|
| 1521 |
p_detail = str(Path(tmpdir) / f"IPLM_DetailEntitas_FinalMenempelWilayah_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
|
| 1522 |
p_verif = str(Path(tmpdir) / f"IPLM_KecukupanSampel_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
|
|
|
|
| 1523 |
|
| 1524 |
summary_jenis.to_excel(p_summary, index=False)
|
| 1525 |
agg_total.to_excel(p_total, index=False)
|
|
|
|
| 1527 |
detail_view.to_excel(p_detail, index=False)
|
| 1528 |
verif_total.to_excel(p_verif, index=False)
|
| 1529 |
|
| 1530 |
+
# ====== Word tabel interpretasi & rekomendasi ======
|
| 1531 |
+
wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1532 |
header, rows = build_interpretasi_table_values(agg_total, wilayah_txt, TARGET_RATIO)
|
| 1533 |
+
|
| 1534 |
+
# jumlah perpustakaan sumber data (gambar 2)
|
| 1535 |
+
jumlah_perpus = summarize_jumlah_perpus_dari_agg_jenis(agg_jenis_full, wilayah_txt, kew_norm)
|
| 1536 |
|
| 1537 |
rows_filled, llm_status = llm_fill_interpretasi_rekomendasi(
|
| 1538 |
header=header,
|
|
|
|
| 1541 |
kew_label=(kew_value or "(Semua)"),
|
| 1542 |
jumlah_perpus_by_jenis=jumlah_perpus
|
| 1543 |
)
|
| 1544 |
+
word_path = generate_word_table_interpretasi(header, rows_filled, wilayah_txt, jumlah_perpus)
|
| 1545 |
|
| 1546 |
msg = (
|
| 1547 |
f"Selesai (TARGET {TARGET_RATIO*100:.2f}%): raw={len(raw)} | entitas={len(detail_view)} | "
|
|
|
|
| 1552 |
return (
|
| 1553 |
kpi_md,
|
| 1554 |
summary_jenis, agg_total, agg_jenis_view, detail_view, verif_total,
|
| 1555 |
+
p_summary, p_total, p_raw, p_detail, p_verif,
|
| 1556 |
fig_umum, fig_sekolah, fig_khusus,
|
|
|
|
| 1557 |
msg,
|
| 1558 |
llm_status,
|
| 1559 |
(word_path if word_path else None)
|
|
|
|
| 1564 |
|
| 1565 |
|
| 1566 |
# ============================================================
|
| 1567 |
+
# 16) UI (NO UPLOAD)
|
| 1568 |
# ============================================================
|
| 1569 |
|
| 1570 |
def ui_load(force=False):
|
|
|
|
| 1623 |
- Tabel Word "Interpretasi & Rekomendasi" memakai NILAI APA ADANYA (tanpa dikali 100) untuk sub/dim.
|
| 1624 |
- Baris "Nilai IPLM" memakai Indeks_Final_Wilayah_0_100 apa adanya.
|
| 1625 |
- Di bawah tabel Word ditambahkan ringkasan jumlah perpustakaan sumber data (sekolah/umum/khusus/total) dari tabel agregat wilayah Γ jenis.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1626 |
""")
|
| 1627 |
|
| 1628 |
state_df = gr.State(None)
|
|
|
|
| 1671 |
gr.Markdown("### Perpustakaan Khusus")
|
| 1672 |
bell_khusus = gr.Plot(scale=1)
|
| 1673 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1674 |
gr.Markdown("## Status LLM (Isi Interpretasi & Rekomendasi)")
|
| 1675 |
llm_status_out = gr.Markdown()
|
| 1676 |
|
|
|
|
| 1680 |
dl_raw = gr.DownloadButton(label="Download Data Mentah (.xlsx)")
|
| 1681 |
dl_detail = gr.DownloadButton(label="Download Detail Entitas (.xlsx)")
|
| 1682 |
dl_verif = gr.DownloadButton(label="Download Kecukupan Sampel (.xlsx)")
|
| 1683 |
+
dl_word = gr.DownloadButton(label="Download Word: Interpretasi & Rekomendasi (.docx)" if DOCX_AVAILABLE else "Download Word (OFF)")
|
|
|
|
| 1684 |
|
| 1685 |
run_btn.click(
|
| 1686 |
fn=run_calc,
|
|
|
|
| 1688 |
outputs=[
|
| 1689 |
kpi_out,
|
| 1690 |
out_summary, out_agg_total, out_agg_jenis, out_detail, out_verif,
|
| 1691 |
+
dl_summary, dl_total, dl_raw, dl_detail, dl_verif,
|
| 1692 |
bell_umum, bell_sekolah, bell_khusus,
|
|
|
|
| 1693 |
msg_out,
|
| 1694 |
llm_status_out,
|
| 1695 |
dl_word
|