# -*- coding: utf-8 -*-
"""
IPLM 2025 — Final (Target Sampel 33.88% per Jenis) — TANPA Kinerja Relatif / Percentile
UPDATE (sesuai instruksi terbaru Anda) — TANPA mengubah pipeline lain:
FOKUS PEMBENAHAN (LLM + WORD):
1) Nilai Kepatuhan, Koleksi, Tenaga, Kinerja, Pelayanan, Pengelolaan:
- TIDAK dikalikan 100.
- Ditulis APA ADANYA dari kolom agregat aplikasi:
Rata2_dim_kepatuhan, Rata2_sub_koleksi, Rata2_sub_sdm, Rata2_dim_kinerja,
Rata2_sub_pelayanan, Rata2_sub_pengelolaan.
2) Nilai IPLM ditulis apa adanya: Indeks_Final_Wilayah_0_100.
3) LLM mengisi Interpretasi & Rekomendasi:
- Interpretasi: deskriptif, kondisi riil berbasis relasi angka (lebih besar/kecil, gap, dominan, konsistensi),
plus pemaknaan substantif dimensi (koleksi/sdm/pelayanan/pengelolaan) TANPA label normatif.
- Rekomendasi: operasional, 2–3 butir ringkas, menaut ke pola angka (gap/ketimpangan/kontribusi).
4) Di bawah tabel Word: tambah deskripsi jumlah perpustakaan sumber data (dari tabel agregat wilayah × jenis / “gambar 2”):
sekolah=..., umum=..., khusus=..., total=...
Catatan penting:
- Semua perhitungan dan dashboard tetap.
- Yang diubah hanya: (a) cara mengambil nilai untuk tabel Word (tanpa *100),
(b) prompt LLM untuk isi interpretasi/rekomendasi agar nyambung dengan angka,
(c) tambahan paragraf jumlah perpustakaan di bawah tabel Word.
"""
import os
import re
import time
import json
import math
import tempfile
from pathlib import Path
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import PowerTransformer
# python-docx (wajib kalau mau Word)
DOCX_AVAILABLE = True
try:
from docx import Document
from docx.shared import Pt
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
except Exception:
DOCX_AVAILABLE = False
Document = None
# huggingface client (opsional)
HF_AVAILABLE = True
try:
from huggingface_hub import InferenceClient
except Exception:
HF_AVAILABLE = False
InferenceClient = None
# ============================================================
# 1) KONFIGURASI
# ============================================================
DATA_FILE = os.getenv("DATA_FILE", "DATA CLEAN GABUNGAN SANGGAH-TIDAK SANGGAH - ALL200226.xlsx")
POP_KAB = os.getenv("POP_KAB", "Data_populasi_Kab_kota_fixed.xlsx")
POP_PROV = os.getenv("POP_PROV", "Data_populasi_propinsi.xlsx")
POP_KHUSUS = os.getenv("POP_KHUSUS", "Data_populasi_perp_khusus.xlsx")
W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388"))
USE_LLM = True
LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
HF_TOKEN = (
os.getenv("HF_SECRET")
or os.getenv("HF_TOKEN")
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
or os.getenv("HF_API_TOKEN")
)
# ============================================================
# 2) UTIL
# ============================================================
def _mtime(path_str: str):
p = Path(path_str)
return p.stat().st_mtime if p.exists() else None
def _canon(s: str) -> str:
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
def _disp_text(x):
if pd.isna(x):
return None
t = str(x).strip().upper()
return " ".join(t.split())
def pick_col(df, candidates):
if df is None or df.empty:
return None
for c in candidates:
if c in df.columns:
return c
can_map = {_canon(c): c for c in df.columns}
for c in candidates:
k = _canon(c)
if k in can_map:
return can_map[k]
return None
def coerce_num(val):
if pd.isna(val):
return np.nan
t = str(val).strip()
if t == "" or t in {"-", "–", "—", "NA", "N/A", "null", "NULL"}:
return np.nan
t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
t = re.sub(r"[^0-9,.\-]", "", t)
if t.count(".") > 1 and t.count(",") == 1:
t = t.replace(".", "").replace(",", ".")
elif t.count(",") > 1 and t.count(".") == 1:
t = t.replace(",", "")
elif t.count(",") == 1 and t.count(".") == 0:
t = t.replace(",", ".")
else:
t = t.replace(",", "")
try:
return float(t)
except Exception:
return np.nan
def minmax_norm(s: pd.Series) -> pd.Series:
x = pd.to_numeric(s, errors="coerce").astype(float)
mn, mx = x.min(skipna=True), x.max(skipna=True)
if pd.isna(mn) or pd.isna(mx) or mx == mn:
return pd.Series(0.0, index=s.index)
return (x - mn) / (mx - mn)
def norm_kew(v):
if pd.isna(v):
return None
t = str(v).strip().upper()
if "KAB" in t or "KOTA" in t:
return "KAB/KOTA"
if "PROV" in t:
return "PROVINSI"
if "PUSAT" in t or "NASIONAL" in t:
return "PUSAT"
return t
def norm_prov_disp(s):
if pd.isna(s):
return None
t = str(s).strip().upper()
t = t.replace("\u00a0", " ")
t = " ".join(t.split())
t = t.replace("PROPINSI", "PROVINSI")
while t.startswith("PROVINSI PROVINSI "):
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
if t.startswith("PROVINSI "):
name = t[len("PROVINSI "):].strip()
else:
name = t
name = " ".join(name.split())
if not name:
return None
return f"PROVINSI {name}"
def norm_prov_label(s):
if pd.isna(s):
return None
t = str(s).strip().upper().replace("\u00a0", " ")
t = " ".join(t.split())
t = t.replace("PROPINSI", "PROVINSI")
t = t.replace("PROVINSI", "").strip()
return re.sub(r"[^A-Z0-9]+", "", t)
def norm_kab_label(s):
if pd.isna(s):
return None
t = str(s).upper()
t = t.replace("KABUPATEN", "KAB")
t = t.replace("KAB.", "KAB")
t = t.replace("KOTA ADMINISTRASI", "KOTA")
t = t.replace("KOTA ADM.", "KOTA")
t = t.replace("KOTA.", "KOTA")
t = " ".join(t.split())
return re.sub(r"[^A-Z0-9]+", "", t)
def safe_div(num, den):
if den is None or pd.isna(den) or float(den) <= 0:
return np.nan
return float(num) / float(den)
def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
return 1.0
if n_total is None or pd.isna(n_total) or float(n_total) < 0:
n_total = 0.0
return float(min(float(n_total) / float(target_total), 1.0))
# ============================================================
# 3) INDIKATOR IPLM
# ============================================================
koleksi_cols = [
"JudulTercetak","EksemplarTercetak","JudulElektronik","EksemplarElektronik",
"TambahJudulTercetak","TambahEksemplarTercetak",
"TambahJudulElektronik","TambahEksemplarElektronik",
"KomitmenAnggaranKoleksi"
]
sdm_cols = [
"TenagaKualifikasiIlmuPerpustakaan",
"TenagaFungsionalProfesional",
"TenagaPKB",
"AnggaranTenaga"
]
pelayanan_cols = [
"PesertaBudayaBaca","PemustakaLuringDaring","PemustakaFasilitasTIK",
"PemanfaatanJudulTercetak","PemanfaatanEksemplarTercetak",
"PemanfaatanJudulElektronik","PemanfaatanEksemplarElektronik"
]
pengelolaan_cols = [
"KegiatanBudayaBaca","KegiatanKerjasama","VariasiLayanan","Kebijakan","AnggaranLayanan"
]
all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
alias_map_raw = {
"j_judul_koleksi_tercetak": "JudulTercetak",
"j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
"j_judul_koleksi_digital": "JudulElektronik",
"j_eksemplar_koleksi_digital": "EksemplarElektronik",
"tambah_judul_koleksi_tercetak": "TambahJudulTercetak",
"tambah_eksemplar_koleksi_tercetak": "TambahEksemplarTercetak",
"tambah_judul_koleksi_digital": "TambahJudulElektronik",
"tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
"j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
"j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
"j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
"j_tenaga_pkb": "TenagaPKB",
"j_anggaran_diklat_perpus": "AnggaranTenaga",
"j_peserta_budaya_baca": "PesertaBudayaBaca",
"j_pemustaka_luring_daring": "PemustakaLuringDaring",
"j_pemustaka_fasilitas_tik": "PemustakaFasilitasTIK",
"j_judul_koleksi_tercetak_termanfaat": "PemanfaatanJudulTercetak",
"j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
"j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
"j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
"j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
"j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
"j_variasi_layanan": "VariasiLayanan",
"j_kebijakan_prosedur_pelayanan": "Kebijakan",
"j_anggaran_peningkatan_pelayanan": "AnggaranLayanan",
}
alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
# ============================================================
# 4) PIPELINE NASIONAL (LEVEL ENTITAS)
# ============================================================
def _mean_norm_cols(row, cols):
vals = []
for c in cols:
k = f"norm_{c}"
if k in row.index:
v = row[k]
if pd.isna(v):
v = 0.0
vals.append(float(v))
return float(np.mean(vals)) if vals else 0.0
def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
if df_src is None or df_src.empty:
return df_src
df = df_src.copy()
rename_map = {}
for col in df.columns:
c = _canon(col)
if c in alias_map:
rename_map[col] = alias_map[c]
else:
for tgt in all_indicators:
if c == _canon(tgt):
rename_map[col] = tgt
break
if rename_map:
df = df.rename(columns=rename_map)
available = [c for c in all_indicators if c in df.columns]
for c in available:
df[c] = df[c].apply(coerce_num)
for c in available:
x = pd.to_numeric(df[c], errors="coerce").astype(float).values
mask = ~np.isnan(x)
transformed = np.full_like(x, np.nan, dtype=float)
if mask.sum() > 1:
pt = PowerTransformer(method="yeo-johnson", standardize=False)
transformed[mask] = pt.fit_transform(x[mask].reshape(-1, 1)).ravel()
else:
transformed[mask] = x[mask]
df[f"norm_{c}"] = minmax_norm(pd.Series(transformed, index=df.index))
df["sub_koleksi"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in koleksi_cols if c in available]), axis=1)
df["sub_sdm"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in sdm_cols if c in available]), axis=1)
df["sub_pelayanan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pelayanan_cols if c in available]), axis=1)
df["sub_pengelolaan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pengelolaan_cols if c in available]), axis=1)
df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1)
df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1)
df["Indeks_Dasar_0_100"] = 100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"])
for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Dasar_0_100"]:
df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0.0)
return df
# ============================================================
# 5) CACHE LOADER (NO UPLOAD)
# ============================================================
_CACHE = {"key": None, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": None, "info": None}
def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
df = pd.read_excel(path_xlsx)
if df is None or df.empty:
return pd.DataFrame()
c_mix = pick_col(df, [
"Propinsi/Kab/kota", "Propinsi/Kab/Kota", "Provinsi/Kab/Kota",
"Provinsi/Kab/kota", "Provinsi/Kabupaten/Kota",
"Wilayah", "Nama Wilayah"
])
if c_mix is None:
raise ValueError("POP_KHUSUS: kolom gabungan Provinsi/Kab/Kota tidak ditemukan.")
c_pop = pick_col(df, ["POP_KHUSUS", "Pop_Khusus", "pop_khusus"])
if c_pop is None:
raise ValueError("POP_KHUSUS: kolom 'POP_KHUSUS' tidak ditemukan.")
mix = df[c_mix].astype(str).fillna("").str.strip()
pop_series = df[c_pop].apply(coerce_num)
rows = []
current_prov = None
for m, pval in zip(mix.tolist(), pop_series.tolist()):
mm = _disp_text(m) or ""
if mm == "":
continue
if mm.startswith("PROVINSI "):
prov_name = mm.replace("PROVINSI", "").strip()
current_prov = prov_name
rows.append({
"LEVEL": "PROV",
"Provinsi_Label": f"PROVINSI {prov_name}",
"Kab_Kota_Label": None,
"Pop_Total_Jenis": pval,
})
continue
rows.append({
"LEVEL": "KAB",
"Provinsi_Label": f"PROVINSI {current_prov}" if current_prov else None,
"Kab_Kota_Label": mm,
"Pop_Total_Jenis": pval,
})
pop = pd.DataFrame(rows)
if pop.empty:
return pop
pop["Pop_Total_Jenis"] = pd.to_numeric(pop["Pop_Total_Jenis"], errors="coerce").fillna(0.0)
pop["prov_key"] = pop["Provinsi_Label"].apply(norm_prov_label)
pop["kab_key"] = pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in pop.columns else None
return pop
def load_default_files(force=False):
key = (
DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS,
_mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS)
)
if (not force) and _CACHE["key"] == key and _CACHE["df_all"] is not None:
return _CACHE["df_all"], _CACHE["df_raw"], _CACHE["pop_kab"], _CACHE["pop_prov"], _CACHE["pop_khusus"], _CACHE["meta"], _CACHE["info"]
for p, label in [(DATA_FILE, "DM"), (POP_KAB, "POP_KAB"), (POP_PROV, "POP_PROV"), (POP_KHUSUS, "POP_KHUSUS")]:
if not Path(p).exists():
info = f"File tidak ditemukan ({label}): {p}"
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
fp = Path(DATA_FILE)
xls = pd.ExcelFile(fp)
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
df_raw = pd.concat(frames, ignore_index=True, sort=False)
prov_col = pick_col(df_raw, ["provinsi", "Provinsi", "PROVINSI"])
kab_col = pick_col(df_raw, ["kab/kota", "Kab/Kota", "Kab_Kota", "KAB/KOTA", "kabupaten_kota", "Kabupaten/Kota", "kabupaten kota", "kota", "kab_kota"])
kew_col = pick_col(df_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
jenis_col = pick_col(df_raw, ["jenis_perpustakaan", "Jenis Perpustakaan", "JENIS_PERPUSTAKAAN"])
nama_col = pick_col(df_raw, ["nm_perpustakaan","nama_perpustakaan","Nama Perpustakaan","nm_instansi_lembaga","nm_perpus"])
missing = []
if prov_col is None: missing.append("Provinsi")
if kab_col is None: missing.append("Kab/Kota")
if kew_col is None: missing.append("Kewenangan")
if jenis_col is None: missing.append("Jenis Perpustakaan")
if missing:
info = f"Kolom wajib tidak ditemukan di DM: {', '.join(missing)}"
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
val_map_jenis = {
"PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
"PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
"PERPUSTAKAAN KHUSUS": "khusus", "KHUSUS": "khusus",
}
df_raw["KEW_NORM"] = df_raw[kew_col].apply(norm_kew)
df_raw["_dataset"] = df_raw[jenis_col].astype(str).str.strip().str.upper().map(val_map_jenis)
df_raw["PROV_DISP"] = df_raw[prov_col].apply(norm_prov_disp)
df_raw["KAB_DISP"] = df_raw[kab_col].apply(_disp_text)
df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
if nama_col and nama_col in df_raw.columns:
kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
else:
kcols = [prov_col, kab_col, kew_col, jenis_col]
tmp = df_raw[kcols].astype(str).fillna("").apply(lambda s: s.str.strip(), axis=0)
df_raw["_row_key"] = tmp.apply(lambda r: "||".join(r.values.tolist()), axis=1).apply(_canon)
before = len(df_raw)
df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy()
after = len(df_raw)
# POP KAB
pk = pd.read_excel(POP_KAB)
c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota","kab_kota","kabupaten_kota"])
c_prov = pick_col(pk, ["PROVINSI","Provinsi","provinsi"])
if c_kab is None:
info = "POP_KAB: wajib ada kolom Kab/Kota."
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
pop_kab = pk.copy()
pop_kab["Kab_Kota_Label"] = pk[c_kab].astype(str).str.strip()
pop_kab["Provinsi_Label"] = pk[c_prov].astype(str).str.strip() if c_prov else ""
pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
pop_kab = pop_kab.groupby("kab_key", as_index=False).first()
# POP PROV
pp = pd.read_excel(POP_PROV)
c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi","Propinsi","PROPINSI","propinsi"])
if c_pr is None:
info = "POP_PROV: wajib ada kolom Provinsi."
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
pop_prov = pp.copy()
pop_prov["Provinsi_Label"] = pp[c_pr].astype(str).str.strip()
pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label)
pop_prov = pop_prov.groupby("prov_key", as_index=False).first()
# POP KHUSUS
try:
pop_khusus = _parse_pop_khusus(POP_KHUSUS)
except Exception as e:
info = f"POP_KHUSUS gagal dibaca: {repr(e)}"
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
df_all = prepare_global(df_raw)
meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
info = (
f"Mode NO UPLOAD (cache aktif)\n"
f"DM: {fp.name} | Baris: {before} -> dedup: {after}\n"
f"POP_KAB: {Path(POP_KAB).name} (n={len(pop_kab)})\n"
f"POP_PROV: {Path(POP_PROV).name} (n={len(pop_prov)})\n"
f"POP_KHUSUS: {Path(POP_KHUSUS).name} (n={len(pop_khusus)})\n"
f"TARGET sampel per jenis: {TARGET_RATIO*100:.2f}%\n"
f"mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}"
)
_CACHE.update({
"key": key,
"df_all": df_all,
"df_raw": df_raw,
"pop_kab": pop_kab,
"pop_prov": pop_prov,
"pop_khusus": pop_khusus,
"meta": meta,
"info": info
})
return df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info
# ============================================================
# 6) FAKTOR WILAYAH — PER JENIS (TARGET 33.88%)
# ============================================================
def build_faktor_wilayah_jenis(df_filtered, pop_kab, pop_prov, pop_khusus, kew_value):
if df_filtered is None or df_filtered.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
df = df_filtered.copy()
df = df[df["_dataset"].isin(["sekolah", "umum", "khusus"])].copy()
if df.empty:
return pd.DataFrame()
jenis_list = ["sekolah", "umum", "khusus"]
if "PROV" in kew_norm:
key_col, label_col, label_name, mode = "prov_key", "PROV_DISP", "Provinsi", "PROV"
base_pop = pop_prov.copy() if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame()
if not base_pop.empty and "prov_key" not in base_pop.columns:
base_pop["prov_key"] = base_pop["Provinsi_Label"].apply(norm_prov_label)
base_pop = base_pop.set_index("prov_key") if (not base_pop.empty and "prov_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
else:
key_col, label_col, label_name, mode = "kab_key", "KAB_DISP", "Kab/Kota", "KAB"
base_pop = pop_kab.copy() if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame()
if not base_pop.empty and "kab_key" not in base_pop.columns:
base_pop["kab_key"] = base_pop["Kab_Kota_Label"].apply(norm_kab_label)
base_pop = base_pop.set_index("kab_key") if (not base_pop.empty and "kab_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp")
cnt = (
df.groupby([key_col, label_col, "_dataset"], dropna=False)
.size()
.reset_index(name="n_jenis")
.rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
)
cnt["Jenis"] = cnt["Jenis"].astype(str).str.lower().str.strip()
base_n = full.merge(cnt, on=["group_key", label_name, "Jenis"], how="left")
base_n["n_jenis"] = pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(int)
base_n["target_total_33_88_jenis"] = 0.0
base_n["pop_total_jenis"] = 0.0
if not base_pop.empty:
if mode == "KAB":
pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah_base", base_pop.get("jumlah_populasi_sekolah", 0)), errors="coerce").fillna(0.0)
pop_umum = pd.to_numeric(base_pop.get("jumlah_populasi_umum_base", base_pop.get("jumlah_populasi_umum", 0)), errors="coerce").fillna(0.0)
tgt_sekolah = pop_sekolah * float(TARGET_RATIO)
tgt_umum = pop_umum * float(TARGET_RATIO)
else:
sma = pd.to_numeric(base_pop.get("sma ", base_pop.get("sma", 0)), errors="coerce").fillna(0.0)
smk = pd.to_numeric(base_pop.get("smk", 0), errors="coerce").fillna(0.0)
slb = pd.to_numeric(base_pop.get("slb", 0), errors="coerce").fillna(0.0)
pop_sekolah = sma + smk + slb
tgt_sekolah = pop_sekolah * float(TARGET_RATIO)
pop_umum = pd.to_numeric(base_pop.get("perpus_umum_prop", 0), errors="coerce").fillna(0.0)
tgt_umum = pop_umum * float(TARGET_RATIO)
m = base_n["Jenis"].eq("sekolah")
base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_sekolah).fillna(0.0).values
base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_sekolah).fillna(0.0).values
m = base_n["Jenis"].eq("umum")
base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_umum).fillna(0.0).values
base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_umum).fillna(0.0).values
if pop_khusus is not None and not pop_khusus.empty:
pk = pop_khusus.copy()
pk["Pop_Total_Jenis"] = pd.to_numeric(pk.get("Pop_Total_Jenis", 0), errors="coerce").fillna(0.0)
if mode == "PROV":
pk_prov = pk[pk["LEVEL"].astype(str).str.upper() == "PROV"].copy()
pk_map = pk_prov.groupby("prov_key", as_index=True).agg(pop=("Pop_Total_Jenis", "sum"))
pop_series = pk_map["pop"]
else:
pk_kab = pk[pk["LEVEL"].astype(str).str.upper() == "KAB"].copy()
pk_map = pk_kab.groupby("kab_key", as_index=True).agg(pop=("Pop_Total_Jenis", "sum"))
pop_series = pk_map["pop"]
tgt_series = pop_series * float(TARGET_RATIO)
m = base_n["Jenis"].eq("khusus")
base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_series).fillna(0.0).values
base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_series).fillna(0.0).values
base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0.0)
base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0.0)
m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0)
base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO)
base_n["faktor_penyesuaian_jenis"] = [
faktor_penyesuaian_total(n, t)
for n, t in zip(
pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float),
pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).astype(float),
)
]
base_n["coverage_jenis_%"] = [
(safe_div(n, p) * 100.0) if (p is not None and not pd.isna(p) and float(p) > 0) else 0.0
for n, p in zip(
pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float),
pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).astype(float),
)
]
base_n["gap_target33_88_jenis"] = [
max(float(t) - float(n), 0.0)
for n, t in zip(
pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float),
pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).astype(float),
)
]
base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).round(0).astype(int)
base_n["coverage_jenis_%"] = pd.to_numeric(base_n["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
base_n["faktor_penyesuaian_jenis"] = pd.to_numeric(base_n["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
base_n["gap_target33_88_jenis"] = pd.to_numeric(base_n["gap_target33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
return base_n
# ============================================================
# 7) AGREGAT WILAYAH × JENIS
# ============================================================
def build_agg_wilayah_jenis(df_filtered, faktor_wilayah_jenis, kew_value):
if df_filtered is None or df_filtered.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
df = df_filtered.copy()
if "PROV" in kew_norm:
key_col, label_col, label_name = "prov_key", "PROV_DISP", "Provinsi"
else:
key_col, label_col, label_name = "kab_key", "KAB_DISP", "Kab/Kota"
df = df[df["_dataset"].isin(["sekolah", "umum", "khusus"])].copy()
if df.empty:
return pd.DataFrame()
jenis_list = ["sekolah", "umum", "khusus"]
base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp")
agg_real = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
Jumlah=("Indeks_Dasar_0_100", "size"),
Rata2_sub_koleksi=("sub_koleksi", "mean"),
Rata2_sub_sdm=("sub_sdm", "mean"),
Rata2_sub_pelayanan=("sub_pelayanan", "mean"),
Rata2_sub_pengelolaan=("sub_pengelolaan", "mean"),
Rata2_dim_kepatuhan=("dim_kepatuhan", "mean"),
Rata2_dim_kinerja=("dim_kinerja", "mean"),
Indeks_Dasar_Agregat_0_100=("Indeks_Dasar_0_100", "mean"),
).reset_index().rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
agg_real["Jenis"] = agg_real["Jenis"].astype(str).str.lower().str.strip()
agg = full.merge(agg_real, on=["group_key", label_name, "Jenis"], how="left")
for c in ["Jumlah","Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja","Indeks_Dasar_Agregat_0_100"]:
if c in agg.columns:
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0)
agg["Jumlah"] = agg["Jumlah"].round(0).astype(int)
if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
agg["faktor_penyesuaian_jenis"] = 1.0
else:
fw = faktor_wilayah_jenis.copy()
fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
keep = ["group_key", label_name, "Jenis",
"faktor_penyesuaian_jenis", "target_total_33_88_jenis", "pop_total_jenis",
"coverage_jenis_%", "gap_target33_88_jenis", "n_jenis"]
fw = fw[[c for c in keep if c in fw.columns]].copy()
agg = agg.merge(fw, on=["group_key", label_name, "Jenis"], how="left")
agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
agg["Indeks_Final_Agregat_0_100"] = (
pd.to_numeric(agg["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0.0)
* pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
)
for c in [
"Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja"
]:
if c in agg.columns:
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(3)
for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100"]:
if c in agg.columns:
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
return agg
# ============================================================
# 8) AGREGAT WILAYAH (KESELURUHAN) — avg3 dari 3 jenis
# ============================================================
def build_agg_wilayah_total_from_jenis(agg_jenis, faktor_wilayah_jenis, kew_value):
if agg_jenis is None or agg_jenis.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
label_name = "Provinsi" if "PROV" in kew_norm else "Kab/Kota"
jenis_list = ["sekolah", "umum", "khusus"]
a = agg_jenis.copy()
a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
base_keys = a[["group_key", label_name]].drop_duplicates()
full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp")
cols_present = [c for c in [
"Jumlah",
"Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja",
"Indeks_Dasar_Agregat_0_100",
"Indeks_Final_Agregat_0_100",
] if c in a.columns]
full = full.merge(a[["group_key", label_name, "Jenis"] + cols_present],
on=["group_key", label_name, "Jenis"], how="left")
for c in cols_present:
full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
out = full.groupby(["group_key", label_name], as_index=False).agg(
n_total=("Jumlah", "sum"),
Rata2_sub_koleksi=("Rata2_sub_koleksi", "mean"),
Rata2_sub_sdm=("Rata2_sub_sdm", "mean"),
Rata2_sub_pelayanan=("Rata2_sub_pelayanan", "mean"),
Rata2_sub_pengelolaan=("Rata2_sub_pengelolaan", "mean"),
Rata2_dim_kepatuhan=("Rata2_dim_kepatuhan", "mean"),
Rata2_dim_kinerja=("Rata2_dim_kinerja", "mean"),
Indeks_Dasar_Agregat_0_100=("Indeks_Dasar_Agregat_0_100", "mean"),
Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
)
for c in ["Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan","Rata2_dim_kepatuhan","Rata2_dim_kinerja"]:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Wilayah_0_100"]:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
out["n_total"] = pd.to_numeric(out["n_total"], errors="coerce").fillna(0).round(0).astype(int)
return out
# ============================================================
# 9) SUMMARY (PER JENIS) + KESELURUHAN
# ============================================================
def build_summary_per_jenis(agg_jenis, agg_total):
jenis_list = ["sekolah", "umum", "khusus"]
def _row_default(jenis):
return {
"Jenis": jenis,
"Jumlah_Wilayah": 0,
"Total_Perpus": 0,
"Pop_Total_Jenis": 0,
"Target33_88_Total_Jenis": 0,
"Terkumpul_Jenis": 0,
"Coverage_Target33_88_Jenis_%": 0.0,
"Indeks_Dasar_0_100": 0.0,
"Indeks_Final_Disesuaikan_0_100": 0.0,
"Penyesuaian_Poin": 0.0,
}
rows_by_jenis = {j: _row_default(j) for j in jenis_list}
if agg_jenis is not None and not agg_jenis.empty:
a = agg_jenis.copy()
a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
for c in ["Jumlah","Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100","pop_total_jenis","target_total_33_88_jenis"]:
if c in a.columns:
a[c] = pd.to_numeric(a[c], errors="coerce").fillna(0)
for jenis in jenis_list:
sub = a[a["Jenis"] == jenis].copy()
if sub.empty:
continue
jumlah_wilayah = int(sub.shape[0])
terkumpul = int(pd.to_numeric(sub.get("Jumlah", 0), errors="coerce").fillna(0).sum())
pop_total = int(pd.to_numeric(sub.get("pop_total_jenis", 0), errors="coerce").fillna(0).sum())
target3388 = int(pd.to_numeric(sub.get("target_total_33_88_jenis", 0), errors="coerce").fillna(0).sum())
coverage = (terkumpul / target3388 * 100.0) if target3388 > 0 else 0.0
dasar = float(pd.to_numeric(sub.get("Indeks_Dasar_Agregat_0_100", 0), errors="coerce").fillna(0).mean())
final = float(pd.to_numeric(sub.get("Indeks_Final_Agregat_0_100", 0), errors="coerce").fillna(0).mean())
rows_by_jenis[jenis] = {
"Jenis": jenis,
"Jumlah_Wilayah": jumlah_wilayah,
"Total_Perpus": terkumpul,
"Pop_Total_Jenis": pop_total,
"Target33_88_Total_Jenis": target3388,
"Terkumpul_Jenis": terkumpul,
"Coverage_Target33_88_Jenis_%": float(coverage),
"Indeks_Dasar_0_100": float(dasar),
"Indeks_Final_Disesuaikan_0_100": float(final),
"Penyesuaian_Poin": float(final - dasar),
}
rows = [rows_by_jenis[j] for j in jenis_list]
dasar_all = (rows_by_jenis["sekolah"]["Indeks_Dasar_0_100"]
+ rows_by_jenis["umum"]["Indeks_Dasar_0_100"]
+ rows_by_jenis["khusus"]["Indeks_Dasar_0_100"]) / 3.0
final_all = (rows_by_jenis["sekolah"]["Indeks_Final_Disesuaikan_0_100"]
+ rows_by_jenis["umum"]["Indeks_Final_Disesuaikan_0_100"]
+ rows_by_jenis["khusus"]["Indeks_Final_Disesuaikan_0_100"]) / 3.0
pop_all = int(rows_by_jenis["sekolah"]["Pop_Total_Jenis"]
+ rows_by_jenis["umum"]["Pop_Total_Jenis"]
+ rows_by_jenis["khusus"]["Pop_Total_Jenis"])
target_all = int(rows_by_jenis["sekolah"]["Target33_88_Total_Jenis"]
+ rows_by_jenis["umum"]["Target33_88_Total_Jenis"]
+ rows_by_jenis["khusus"]["Target33_88_Total_Jenis"])
terkumpul_all = int(rows_by_jenis["sekolah"]["Terkumpul_Jenis"]
+ rows_by_jenis["umum"]["Terkumpul_Jenis"]
+ rows_by_jenis["khusus"]["Terkumpul_Jenis"])
coverage_all = (terkumpul_all / target_all * 100.0) if target_all > 0 else 0.0
jumlah_wilayah_all = int(agg_total.shape[0]) if (agg_total is not None and not agg_total.empty) else int(
max(rows_by_jenis["sekolah"]["Jumlah_Wilayah"],
rows_by_jenis["umum"]["Jumlah_Wilayah"],
rows_by_jenis["khusus"]["Jumlah_Wilayah"])
)
rows.append({
"Jenis": "keseluruhan",
"Jumlah_Wilayah": jumlah_wilayah_all,
"Total_Perpus": terkumpul_all,
"Pop_Total_Jenis": pop_all,
"Target33_88_Total_Jenis": target_all,
"Terkumpul_Jenis": terkumpul_all,
"Coverage_Target33_88_Jenis_%": float(coverage_all),
"Indeks_Dasar_0_100": float(dasar_all),
"Indeks_Final_Disesuaikan_0_100": float(final_all),
"Penyesuaian_Poin": float(final_all - dasar_all),
})
out = pd.DataFrame(rows)
for c in ["Jumlah_Wilayah","Total_Perpus","Pop_Total_Jenis","Target33_88_Total_Jenis","Terkumpul_Jenis"]:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
for c in ["Coverage_Target33_88_Jenis_%","Indeks_Dasar_0_100","Indeks_Final_Disesuaikan_0_100","Penyesuaian_Poin"]:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
return out
# ============================================================
# 10) DETAIL ENTITAS (Final menempel dari wilayah)
# ============================================================
def attach_final_to_detail(df_filtered, agg_total, meta, kew_value):
if df_filtered is None or df_filtered.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
df = df_filtered.copy()
if "PROV" in kew_norm:
key_col = "prov_key"
label_cols = ("PROV_DISP", "KAB_DISP")
else:
key_col = "kab_key"
label_cols = ("PROV_DISP", "KAB_DISP")
if agg_total is None or agg_total.empty:
df["Indeks_Final_0_100"] = df["Indeks_Dasar_0_100"]
else:
m = agg_total[["group_key", "Indeks_Final_Wilayah_0_100"]].copy()
df = df.merge(m, left_on=key_col, right_on="group_key", how="left")
df["Indeks_Final_0_100"] = df["Indeks_Final_Wilayah_0_100"].fillna(df["Indeks_Dasar_0_100"])
df = df.drop(columns=[c for c in ["group_key","Indeks_Final_Wilayah_0_100"] if c in df.columns])
base_cols = [label_cols[0], label_cols[1], "KEW_NORM", "_dataset"]
if meta.get("nama_col") and meta["nama_col"] in df.columns:
df["nm_perpustakaan"] = df[meta["nama_col"]].astype(str)
base_cols.insert(2, "nm_perpustakaan")
keep = base_cols + [
"sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
"dim_kepatuhan","dim_kinerja",
"Indeks_Dasar_0_100",
"Indeks_Final_0_100",
]
keep = [c for c in keep if c in df.columns]
out = df[keep].copy()
out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"})
for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
for c in ["Indeks_Dasar_0_100","Indeks_Final_0_100"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
return out
# ============================================================
# 11) VERIF (kecukupan sampel)
# ============================================================
def build_verif_jenis(faktor_wilayah_jenis, kew_value):
if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
label_col = "Provinsi" if "PROV" in kew_norm else "Kab/Kota"
out = faktor_wilayah_jenis.copy()
keep = [c for c in [
label_col, "Jenis",
"pop_total_jenis", "target_total_33_88_jenis", "n_jenis",
"coverage_jenis_%", "faktor_penyesuaian_jenis", "gap_target33_88_jenis"
] if c in out.columns]
out = out[keep].copy()
for c in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
if "coverage_jenis_%" in out.columns:
out["coverage_jenis_%"] = pd.to_numeric(out["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
if "faktor_penyesuaian_jenis" in out.columns:
out["faktor_penyesuaian_jenis"] = pd.to_numeric(out["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
return out
# ============================================================
# 12) BELL CURVE — Indeks Dasar per Entitas (per Jenis) + Hover
# ============================================================
def _make_bell_curve_entitas(dfp, title, xcol="Indeks_Dasar_0_100", label_col="nm_perpustakaan", hover_cols=None, min_points=2):
fig = go.Figure()
fig.update_layout(
title=title,
xaxis_title="Skor (0–100)",
yaxis_title="Kepadatan",
hovermode="closest",
margin=dict(l=40, r=20, t=60, b=40),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
)
if dfp is None or dfp.empty or xcol not in dfp.columns:
fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
d = dfp.dropna(subset=[xcol]).copy()
if d.empty:
fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
x = pd.to_numeric(d[xcol], errors="coerce").astype(float)
d = d.loc[x.notna()].copy()
x = x.loc[x.notna()].values
if len(x) < 1:
fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
hover_cols = hover_cols or []
def _val(row, col):
if col not in row.index:
return ""
v = row[col]
return "" if pd.isna(v) else str(v)
hover_text = []
for _, row in d.iterrows():
lines = []
nm = _val(row, label_col) if (label_col and label_col in d.columns) else ""
if nm:
lines.append(f"{nm}")
lines.append(f"{xcol}: {float(pd.to_numeric(row[xcol], errors='coerce')):.2f}")
for hc in hover_cols:
vv = _val(row, hc)
if vv:
lines.append(f"{hc}: {vv}")
hover_text.append("
".join(lines))
if len(x) < min_points:
x_single = float(x[0])
fig.add_trace(go.Scatter(
x=[x_single], y=[0],
mode="markers", showlegend=False,
hovertext=[hover_text[0]] if hover_text else None,
hoverinfo="text"
))
fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top")
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
mu = float(np.mean(x))
sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 1.0
sigma = max(sigma, 1e-3)
xmin = max(0.0, float(np.min(x)) - 5.0)
xmax = min(100.0, float(np.max(x)) + 5.0)
xs = np.linspace(xmin, xmax, 250)
pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Kurva Normal (fit)"))
fig.add_trace(go.Scatter(
x=x, y=np.zeros_like(x),
mode="markers", showlegend=False,
hovertext=hover_text if hover_text else None,
hoverinfo="text"
))
q1, q2, q3 = np.percentile(x, [25, 50, 75])
for xv, lab in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3"), (mu, "Mean")]:
fig.add_vline(x=float(xv), line_width=1, line_dash="dash", annotation_text=f"{lab}: {xv:.1f}", annotation_position="top")
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
# ============================================================
# 13) KPI DASHBOARD (2 kartu: final + dasar)
# ============================================================
def _safe_first(df, col, default=0.0, where=None):
if df is None or df.empty or col not in df.columns:
return default
sub = df
if where is not None:
sub = df.loc[where]
if sub is None or sub.empty:
return default
return float(pd.to_numeric(sub[col], errors="coerce").fillna(default).iloc[0])
def build_kpi_markdown(summary_jenis):
if summary_jenis is None or summary_jenis.empty:
return ""
final_all = _safe_first(summary_jenis, "Indeks_Final_Disesuaikan_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan"))
dasar_all = _safe_first(summary_jenis, "Indeks_Dasar_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan"))
def fmt(x, nd=2):
return "NA" if pd.isna(x) else f"{x:.{nd}f}"
return f"""