AI_IPLM / app.py
ilafi's picture
Update app.py
5d267b2 verified
# -*- coding: utf-8 -*-
"""
IPLM 2025 — FINAL (NO UPLOAD) — FULL REWRITE (NO RINGKAS)
✅ Jenis tampil: sekolah, umum, khusus (khusus ditampilkan sebagai jenis)
✅ Indeks dasar per entitas: Yeo-Johnson + MinMax nasional per indikator
✅ Penyesuaian 68% berbasis TOTAL pengumpulan wilayah:
faktor_penyesuaian = min(n_total_terkumpul / target_total_68, 1.0)
✅ AGREGAT WILAYAH (KESELURUHAN) — FIX UTAMA (RUMUS BARU):
Semua kolom “keseluruhan” wilayah WAJIB diambil dari rata-rata 3 jenis
(sekolah + umum + khusus) ÷ 3 (missing=0, tetap ÷3)
-> termasuk Indeks_Dasar_Agregat_0_100 dan Indeks_Final_Wilayah_0_100
✅ Agregat Wilayah × Jenis:
Indeks_Final_Agregat_0_100 = Indeks_Dasar_Agregat_0_100 × faktor_penyesuaian_wilayah
(faktor wilayah sama untuk semua jenis)
✅ Ringkasan (Jenis + Keseluruhan) selalu 4 baris: sekolah, umum, khusus, keseluruhan
✅ Keseluruhan ringkasan = (final_sekolah+final_umum+final_khusus)/3 (missing=0, tetap ÷3)
✅ Detail entitas: Indeks_Final_0_100 menempel dari Agregat Wilayah (Keseluruhan) (bukan per-row)
✅ Bell curve per JENIS berbasis indeks per entitas (row-level)
✅ LLM analysis + Word
✅ Download (tanpa upload box)
✅ Download Data Mentah (.xlsx) = RAW hasil filter (bukan agregat)
FIX DISPLAY:
✅ “null/NaN” untuk target/pop/coverage jenis -> dibuat 0 agar tidak tampil null
✅ Verifikasi 68% (tanpa koma) -> semua angka dibulatkan jadi integer
PERMINTAAN UPDATE (HANYA INI):
1) TABEL faktor_wilayah:
- target_total_68 -> bilangan bulat
- pop_total -> bilangan bulat
- coverage_total_% -> decimal 2 digit
2) TABEL "Agregat Wilayah × Jenis" (UI) hanya sampai kolom Indeks_Dasar_Agregat_0_100
(kolom setelah itu tidak ditampilkan)
"""
import os
import re
import time
import tempfile
from pathlib import Path
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import PowerTransformer
from docx import Document
from huggingface_hub import InferenceClient
# ============================================================
# 1) KONFIGURASI
# ============================================================
DATA_FILE = os.getenv("DATA_FILE", "DATA CLEAN GABUNGAN SANGGAH-TIDAK SANGGAH.xlsx")
POP_KAB = os.getenv("POP_KAB", "Data_populasi_Kab_kota_fixed.xlsx")
POP_PROV = os.getenv("POP_PROV", "Data_populasi_propinsi.xlsx")
POP_KHUSUS = os.getenv("POP_KHUSUS", "Data_populasi_perp_khusus.xlsx")
W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
FALLBACK_TARGET_RATIO = 0.68
USE_LLM = True
LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
HF_TOKEN = (
os.getenv("HF_SECRET")
or os.getenv("HF_TOKEN")
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
or os.getenv("HF_API_TOKEN")
)
# ============================================================
# 2) UTIL
# ============================================================
def _mtime(path_str: str):
p = Path(path_str)
return p.stat().st_mtime if p.exists() else None
def _canon(s: str) -> str:
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
def _disp_text(x):
if pd.isna(x):
return None
t = str(x).strip().upper()
return " ".join(t.split())
def pick_col(df, candidates):
if df is None or df.empty:
return None
for c in candidates:
if c in df.columns:
return c
can_map = {_canon(c): c for c in df.columns}
for c in candidates:
k = _canon(c)
if k in can_map:
return can_map[k]
return None
def coerce_num(val):
if pd.isna(val):
return np.nan
t = str(val).strip()
if t == "" or t in {"-", "–", "—", "NA", "N/A", "null", "NULL"}:
return np.nan
t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
t = re.sub(r"[^0-9,.\-]", "", t)
if t.count(".") > 1 and t.count(",") == 1:
t = t.replace(".", "").replace(",", ".")
elif t.count(",") > 1 and t.count(".") == 1:
t = t.replace(",", "")
elif t.count(",") == 1 and t.count(".") == 0:
t = t.replace(",", ".")
else:
t = t.replace(",", "")
try:
return float(t)
except Exception:
return np.nan
def minmax_norm(s: pd.Series) -> pd.Series:
x = pd.to_numeric(s, errors="coerce").astype(float)
mn, mx = x.min(skipna=True), x.max(skipna=True)
if pd.isna(mn) or pd.isna(mx) or mx == mn:
return pd.Series(0.0, index=s.index)
return (x - mn) / (mx - mn)
def norm_kew(v):
if pd.isna(v):
return None
t = str(v).strip().upper()
if "KAB" in t or "KOTA" in t:
return "KAB/KOTA"
if "PROV" in t:
return "PROVINSI"
if "PUSAT" in t or "NASIONAL" in t:
return "PUSAT"
return t
def norm_prov_disp(s):
if pd.isna(s):
return None
t = str(s).strip().upper()
t = t.replace("\u00a0", " ")
t = " ".join(t.split())
t = t.replace("PROPINSI", "PROVINSI")
while t.startswith("PROVINSI PROVINSI "):
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
if t.startswith("PROVINSI "):
name = t[len("PROVINSI "):].strip()
else:
name = t
name = " ".join(name.split())
if not name:
return None
return f"PROVINSI {name}"
def norm_prov_label(s):
if pd.isna(s):
return None
t = str(s).strip().upper().replace("\u00a0", " ")
t = " ".join(t.split())
t = t.replace("PROPINSI", "PROVINSI")
t = t.replace("PROVINSI", "").strip()
return re.sub(r"[^A-Z0-9]+", "", t)
def norm_kab_label(s):
if pd.isna(s):
return None
t = str(s).upper()
t = t.replace("KABUPATEN", "KAB")
t = t.replace("KAB.", "KAB")
t = t.replace("KOTA ADMINISTRASI", "KOTA")
t = t.replace("KOTA ADM.", "KOTA")
t = t.replace("KOTA.", "KOTA")
t = " ".join(t.split())
return re.sub(r"[^A-Z0-9]+", "", t)
def safe_div(num, den):
if den is None or pd.isna(den) or float(den) <= 0:
return np.nan
return float(num) / float(den)
def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
return 1.0
if n_total is None or pd.isna(n_total) or float(n_total) < 0:
n_total = 0.0
return float(min(float(n_total) / float(target_total), 1.0))
# ============================================================
# 3) INDIKATOR IPLM
# ============================================================
koleksi_cols = [
"JudulTercetak","EksemplarTercetak","JudulElektronik","EksemplarElektronik",
"TambahJudulTercetak","TambahEksemplarTercetak",
"TambahJudulElektronik","TambahEksemplarElektronik",
"KomitmenAnggaranKoleksi"
]
sdm_cols = [
"TenagaKualifikasiIlmuPerpustakaan",
"TenagaFungsionalProfesional",
"TenagaPKB",
"AnggaranTenaga"
]
pelayanan_cols = [
"PesertaBudayaBaca","PemustakaLuringDaring","PemustakaFasilitasTIK",
"PemanfaatanJudulTercetak","PemanfaatanEksemplarTercetak",
"PemanfaatanJudulElektronik","PemanfaatanEksemplarElektronik"
]
pengelolaan_cols = [
"KegiatanBudayaBaca","KegiatanKerjasama","VariasiLayanan","Kebijakan","AnggaranLayanan"
]
all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
alias_map_raw = {
"j_judul_koleksi_tercetak": "JudulTercetak",
"j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
"j_judul_koleksi_digital": "JudulElektronik",
"j_eksemplar_koleksi_digital": "EksemplarElektronik",
"tambah_judul_koleksi_tercetak": "TambahJudulTercetak",
"tambah_eksemplar_koleksi_tercetak": "TambahEksemplarTercetak",
"tambah_judul_koleksi_digital": "TambahJudulElektronik",
"tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
"j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
"j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
"j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
"j_tenaga_pkb": "TenagaPKB",
"j_anggaran_diklat_perpus": "AnggaranTenaga",
"j_peserta_budaya_baca": "PesertaBudayaBaca",
"j_pemustaka_luring_daring": "PemustakaLuringDaring",
"j_pemustaka_fasilitas_tik": "PemustakaFasilitasTIK",
"j_judul_koleksi_tercetak_termanfaat": "PemanfaatanJudulTercetak",
"j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
"j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
"j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
"j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
"j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
"j_variasi_layanan": "VariasiLayanan",
"j_kebijakan_prosedur_pelayanan": "Kebijakan",
"j_anggaran_peningkatan_pelayanan": "AnggaranLayanan",
}
alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
# ============================================================
# 4) PIPELINE NASIONAL (ENTITAS)
# ============================================================
def _mean_norm_cols(row, cols):
vals = []
for c in cols:
k = f"norm_{c}"
if k in row.index:
v = row[k]
if pd.isna(v):
v = 0.0
vals.append(float(v))
return float(np.mean(vals)) if vals else 0.0
def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
if df_src is None or df_src.empty:
return df_src
df = df_src.copy()
rename_map = {}
for col in df.columns:
c = _canon(col)
if c in alias_map:
rename_map[col] = alias_map[c]
else:
for tgt in all_indicators:
if c == _canon(tgt):
rename_map[col] = tgt
break
if rename_map:
df = df.rename(columns=rename_map)
available = [c for c in all_indicators if c in df.columns]
for c in available:
df[c] = df[c].apply(coerce_num)
for c in available:
x = pd.to_numeric(df[c], errors="coerce").astype(float).values
mask = ~np.isnan(x)
transformed = np.full_like(x, np.nan, dtype=float)
if mask.sum() > 1:
pt = PowerTransformer(method="yeo-johnson", standardize=False)
transformed[mask] = pt.fit_transform(x[mask].reshape(-1, 1)).ravel()
else:
transformed[mask] = x[mask]
df[f"norm_{c}"] = minmax_norm(pd.Series(transformed, index=df.index))
df["sub_koleksi"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in koleksi_cols if c in available]), axis=1)
df["sub_sdm"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in sdm_cols if c in available]), axis=1)
df["sub_pelayanan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pelayanan_cols if c in available]), axis=1)
df["sub_pengelolaan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pengelolaan_cols if c in available]), axis=1)
df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1)
df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1)
df["Indeks_Dasar_0_100"] = 100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"])
for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Dasar_0_100"]:
df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0.0)
return df
# ============================================================
# 5) CACHE LOADER (NO UPLOAD)
# ============================================================
_CACHE = {
"key": None,
"df_all": None,
"df_raw": None,
"pop_kab": None,
"pop_prov": None,
"pop_khusus": None,
"meta": None,
"info": None
}
def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
df = pd.read_excel(path_xlsx)
if df is None or df.empty:
return pd.DataFrame()
# file kamu: Propinsi/Kab/kota | POP_KHUSUS | SAMPEL_KHUSUS_68%
c_mix = pick_col(df, [
"Propinsi/Kab/kota", "Propinsi/Kab/Kota", "Provinsi/Kab/Kota",
"Provinsi/Kab/kota", "Provinsi/Kabupaten/Kota",
"Wilayah", "Nama Wilayah"
])
if c_mix is None:
raise ValueError("POP_KHUSUS: kolom gabungan Provinsi/Kab/Kota tidak ditemukan.")
c_pop = pick_col(df, ["POP_KHUSUS", "Pop_Khusus", "pop_khusus"])
c_target = pick_col(df, ["SAMPEL_KHUSUS_68%", "Sampel_Khusus_68%", "sampel_khusus_68%"])
if c_pop is None or c_target is None:
raise ValueError("POP_KHUSUS: kolom 'POP_KHUSUS' dan/atau 'SAMPEL_KHUSUS_68%' tidak ditemukan.")
mix = df[c_mix].astype(str).fillna("").str.strip()
pop_series = df[c_pop].apply(coerce_num)
tgt_series = df[c_target].apply(coerce_num)
rows = []
current_prov = None
for m, pval, tval in zip(mix.tolist(), pop_series.tolist(), tgt_series.tolist()):
mm = _disp_text(m) or ""
if mm == "":
continue
# === PROV row: dianggap TOTAL PROVINSI (punya nilai!) ===
if mm.startswith("PROVINSI "):
prov_name = mm.replace("PROVINSI", "").strip()
current_prov = prov_name
rows.append({
"LEVEL": "PROV",
"Provinsi_Label": f"PROVINSI {prov_name}",
"Kab_Kota_Label": None,
"Pop_Total_Jenis": pval,
"Target68_Total_Jenis": tval,
})
continue
# === KAB/KOTA row ===
rows.append({
"LEVEL": "KAB",
"Provinsi_Label": f"PROVINSI {current_prov}" if current_prov else None,
"Kab_Kota_Label": mm,
"Pop_Total_Jenis": pval,
"Target68_Total_Jenis": tval,
})
pop = pd.DataFrame(rows)
if pop.empty:
return pop
pop["Pop_Total_Jenis"] = pd.to_numeric(pop["Pop_Total_Jenis"], errors="coerce")
pop["Target68_Total_Jenis"] = pd.to_numeric(pop["Target68_Total_Jenis"], errors="coerce")
# fallback aman (jaga-jaga)
m_need_pop = pop["Pop_Total_Jenis"].isna() & pop["Target68_Total_Jenis"].notna() & (pop["Target68_Total_Jenis"] > 0)
pop.loc[m_need_pop, "Pop_Total_Jenis"] = pop.loc[m_need_pop, "Target68_Total_Jenis"] / float(FALLBACK_TARGET_RATIO)
m_need_target = pop["Target68_Total_Jenis"].isna() & pop["Pop_Total_Jenis"].notna() & (pop["Pop_Total_Jenis"] > 0)
pop.loc[m_need_target, "Target68_Total_Jenis"] = pop.loc[m_need_target, "Pop_Total_Jenis"] * float(FALLBACK_TARGET_RATIO)
# keys
pop["prov_key"] = pop["Provinsi_Label"].apply(norm_prov_label)
pop["kab_key"] = pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in pop.columns else None
return pop
def load_default_files(force=False):
key = (
DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS,
_mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS)
)
if (not force) and _CACHE["key"] == key and _CACHE["df_all"] is not None:
return _CACHE["df_all"], _CACHE["df_raw"], _CACHE["pop_kab"], _CACHE["pop_prov"], _CACHE["pop_khusus"], _CACHE["meta"], _CACHE["info"]
for p, label in [(DATA_FILE, "DM"), (POP_KAB, "POP_KAB"), (POP_PROV, "POP_PROV"), (POP_KHUSUS, "POP_KHUSUS")]:
if not Path(p).exists():
info = f"❌ File {label} tidak ditemukan: `{p}`"
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
fp = Path(DATA_FILE)
xls = pd.ExcelFile(fp)
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
df_raw = pd.concat(frames, ignore_index=True, sort=False)
prov_col = pick_col(df_raw, ["provinsi", "Provinsi", "PROVINSI"])
kab_col = pick_col(df_raw, ["kab_kota", "Kab/Kota", "Kab_Kota", "KAB/KOTA", "kabupaten_kota", "Kabupaten/Kota", "kabupaten kota", "kota"])
kew_col = pick_col(df_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
jenis_col = pick_col(df_raw, ["jenis_perpustakaan", "Jenis Perpustakaan", "JENIS_PERPUSTAKAAN"])
nama_col = pick_col(df_raw, ["nm_perpustakaan","nama_perpustakaan","Nama Perpustakaan","nm_instansi_lembaga","nm_perpus"])
missing = []
if prov_col is None: missing.append("Provinsi")
if kab_col is None: missing.append("Kab/Kota")
if kew_col is None: missing.append("Kewenangan")
if jenis_col is None: missing.append("Jenis Perpustakaan")
if missing:
info = f"❌ Kolom wajib tidak ditemukan di DM: {', '.join(missing)}"
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
val_map_jenis = {
"PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
"PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
"PERPUSTAKAAN KHUSUS": "khusus", "KHUSUS": "khusus",
}
df_raw["KEW_NORM"] = df_raw[kew_col].apply(norm_kew)
df_raw["_dataset"] = df_raw[jenis_col].astype(str).str.strip().str.upper().map(val_map_jenis)
df_raw["PROV_DISP"] = df_raw[prov_col].apply(norm_prov_disp)
df_raw["KAB_DISP"] = df_raw[kab_col].apply(_disp_text)
df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
if nama_col and nama_col in df_raw.columns:
kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
else:
kcols = [prov_col, kab_col, kew_col, jenis_col]
tmp = df_raw[kcols].astype(str).fillna("").apply(lambda s: s.str.strip(), axis=0)
df_raw["_row_key"] = tmp.apply(lambda r: "||".join(r.values.tolist()), axis=1).apply(_canon)
before = len(df_raw)
df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy()
after = len(df_raw)
# =========================
# POP KAB
# =========================
pk = pd.read_excel(POP_KAB)
c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota","kab_kota","kabupaten_kota"])
c_prov = pick_col(pk, ["PROVINSI","Provinsi","provinsi"])
# NOTE: kita tetap load semua kolom, karena BLOK 6 akan pakai nama kolom asli
if c_kab is None:
info = "❌ POP_KAB: wajib ada kolom Kab/Kota."
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
pop_kab = pk.copy()
pop_kab["Kab_Kota_Label"] = pk[c_kab].astype(str).str.strip()
pop_kab["Provinsi_Label"] = pk[c_prov].astype(str).str.strip() if c_prov else ""
pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
pop_kab = pop_kab.groupby("kab_key", as_index=False).first()
# =========================
# POP PROV
# =========================
pp = pd.read_excel(POP_PROV)
c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi","Propinsi","PROPINSI","propinsi"])
if c_pr is None:
info = "❌ POP_PROV: wajib ada kolom Provinsi."
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
pop_prov = pp.copy()
pop_prov["Provinsi_Label"] = pp[c_pr].astype(str).str.strip()
pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label)
pop_prov = pop_prov.groupby("prov_key", as_index=False).first()
# =========================
# POP KHUSUS
# =========================
try:
pop_khusus = _parse_pop_khusus(POP_KHUSUS)
except Exception as e:
info = f"❌ POP_KHUSUS gagal dibaca: {repr(e)}"
_CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
return None, None, None, None, None, {}, info
df_all = prepare_global(df_raw)
meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
info = (
f"✅ Mode NO UPLOAD (cache aktif)<br>"
f"✅ DM: <b>{fp.name}</b> | Baris: {before} → dedup: {after}<br>"
f"✅ POP_KAB: <b>{Path(POP_KAB).name}</b> (n={len(pop_kab)})<br>"
f"✅ POP_PROV: <b>{Path(POP_PROV).name}</b> (n={len(pop_prov)})<br>"
f"✅ POP_KHUSUS: <b>{Path(POP_KHUSUS).name}</b> (n={len(pop_khusus)}) — (PROV row ikut dihitung)<br>"
f"🕒 mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}"
)
_CACHE.update({
"key": key,
"df_all": df_all,
"df_raw": df_raw,
"pop_kab": pop_kab,
"pop_prov": pop_prov,
"pop_khusus": pop_khusus,
"meta": meta,
"info": info
})
return df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info
# =========================
# DM gabungan semua sheet
# =========================
fp = Path(DATA_FILE)
xls = pd.ExcelFile(fp)
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
df_raw = pd.concat(frames, ignore_index=True, sort=False)
prov_col = pick_col(df_raw, ["provinsi", "Provinsi", "PROVINSI"])
kab_col = pick_col(df_raw, ["kab_kota", "Kab/Kota", "Kab_Kota", "KAB/KOTA", "kabupaten_kota", "Kabupaten/Kota", "kabupaten kota", "kota"])
kew_col = pick_col(df_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
jenis_col = pick_col(df_raw, ["jenis_perpustakaan", "Jenis Perpustakaan", "JENIS_PERPUSTAKAAN"])
nama_col = pick_col(df_raw, ["nm_perpustakaan","nama_perpustakaan","Nama Perpustakaan","nm_instansi_lembaga","nm_perpus"])
missing = []
if prov_col is None:
missing.append("Provinsi")
if kab_col is None:
missing.append("Kab/Kota")
if kew_col is None:
missing.append("Kewenangan")
if jenis_col is None:
missing.append("Jenis Perpustakaan")
if missing:
info = f"❌ Kolom wajib tidak ditemukan di DM: {', '.join(missing)}"
_CACHE.update({
"key": key, "df_all": None, "df_raw": None,
"pop_kab": None, "pop_prov": None, "pop_khusus": None,
"meta": {}, "info": info
})
return None, None, None, None, None, {}, info
val_map_jenis = {
"PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
"PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
"PERPUSTAKAAN KHUSUS": "khusus", "KHUSUS": "khusus",
}
df_raw["KEW_NORM"] = df_raw[kew_col].apply(norm_kew)
df_raw["_dataset"] = df_raw[jenis_col].astype(str).str.strip().str.upper().map(val_map_jenis)
df_raw["PROV_DISP"] = df_raw[prov_col].apply(norm_prov_disp)
df_raw["KAB_DISP"] = df_raw[kab_col].apply(_disp_text)
df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
if nama_col and nama_col in df_raw.columns:
kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
else:
kcols = [prov_col, kab_col, kew_col, jenis_col]
tmp = df_raw[kcols].astype(str).fillna("").apply(lambda s: s.str.strip(), axis=0)
df_raw["_row_key"] = tmp.apply(lambda r: "||".join(r.values.tolist()), axis=1).apply(_canon)
before = len(df_raw)
df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy()
after = len(df_raw)
# =========================
# POP KAB (KEEP PER-JENIS)
# =========================
pk = pd.read_excel(POP_KAB)
c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota","kab_kota","kabupaten_kota"])
c_prov = pick_col(pk, ["PROVINSI","Provinsi","provinsi"])
c_target_total = pick_col(pk, [
"sampel_total","Sampel_total","Sampel Total","TOTAL_SAMPEL","total_sampel",
"target_total_68","Target_Total_68","target_68","TARGET_68"
])
# REAL kolom file user:
c_pop_umum = pick_col(pk, ["jumlah_populasi_umum","Jumlah_populasi_umum","JUMLAH_POPULASI_UMUM","POP_UMUM","pop_umum"])
c_target_umum = pick_col(pk, ["Sampel_umum_68%","Sampel_umum_68","SAMPEL_UMUM_68%","SAMPEL_UMUM_68","TARGET_UMUM_68"])
c_pop_sekolah = pick_col(pk, ["jumlah_populasi_sekolah","Jumlah_populasi_sekolah","JUMLAH_POPULASI_SEKOLAH","POP_SEKOLAH","pop_sekolah"])
c_target_sekolah = pick_col(pk, ["Sampel_sekolah_68%","Sampel_sekolah_68","SAMPEL_SEKOLAH_68%","SAMPEL_SEKOLAH_68","TARGET_SEKOLAH_68"])
if c_kab is None or c_target_total is None:
info = "❌ POP_KAB: wajib ada kolom Kab/Kota dan sampel_total (target 68%)."
_CACHE.update({
"key": key, "df_all": None, "df_raw": None,
"pop_kab": None, "pop_prov": None, "pop_khusus": None,
"meta": {}, "info": info
})
return None, None, None, None, None, {}, info
pop_kab = pd.DataFrame({
"Provinsi_Label": pk[c_prov].astype(str).str.strip() if c_prov else "",
"Kab_Kota_Label": pk[c_kab].astype(str).str.strip(),
"Target68_Total": pk[c_target_total].apply(coerce_num),
"Pop_Umum": pk[c_pop_umum].apply(coerce_num) if c_pop_umum else np.nan,
"Target68_Umum": pk[c_target_umum].apply(coerce_num) if c_target_umum else np.nan,
"Pop_Sekolah": pk[c_pop_sekolah].apply(coerce_num) if c_pop_sekolah else np.nan,
"Target68_Sekolah": pk[c_target_sekolah].apply(coerce_num) if c_target_sekolah else np.nan,
})
# fallback target per jenis dari pop
m = pop_kab["Target68_Umum"].isna() & pop_kab["Pop_Umum"].notna() & (pop_kab["Pop_Umum"] > 0)
pop_kab.loc[m, "Target68_Umum"] = pop_kab.loc[m, "Pop_Umum"] * float(FALLBACK_TARGET_RATIO)
m = pop_kab["Target68_Sekolah"].isna() & pop_kab["Pop_Sekolah"].notna() & (pop_kab["Pop_Sekolah"] > 0)
pop_kab.loc[m, "Target68_Sekolah"] = pop_kab.loc[m, "Pop_Sekolah"] * float(FALLBACK_TARGET_RATIO)
pop_kab["Pop_Total"] = (
pd.to_numeric(pop_kab["Pop_Umum"], errors="coerce").fillna(0.0)
+ pd.to_numeric(pop_kab["Pop_Sekolah"], errors="coerce").fillna(0.0)
)
m_need_pop = (pop_kab["Pop_Total"] <= 0) & pop_kab["Target68_Total"].notna() & (pop_kab["Target68_Total"] > 0)
pop_kab.loc[m_need_pop, "Pop_Total"] = pop_kab.loc[m_need_pop, "Target68_Total"] / float(FALLBACK_TARGET_RATIO)
pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
pop_kab = pop_kab.groupby("kab_key", as_index=False).agg({
"Kab_Kota_Label": "first",
"Provinsi_Label": "first",
"Target68_Total": "max",
"Pop_Total": "max",
"Pop_Umum": "max",
"Target68_Umum": "max",
"Pop_Sekolah": "max",
"Target68_Sekolah": "max",
})
# =========================
# POP PROV (KEEP PER-JENIS)
# =========================
pp = pd.read_excel(POP_PROV)
c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi","Propinsi","PROPINSI","propinsi"])
c_target_total = pick_col(pp, ["total _sampel","total_sampel","TOTAL_SAMPEL","Total Sampel","target_total_68","Target_Total_68"])
# REAL kolom file user:
c_pop_sekolah = pick_col(pp, ["total_pend","TOTAL_PEND","total_penduduk","Total Penduduk"])
c_pop_umum = pick_col(pp, ["perpus_umum_prop","PERPUS_UMUM_PROP","Perpus_umum_prop"])
if c_pr is None or c_target_total is None:
info = "❌ POP_PROV: wajib ada kolom Provinsi dan total _sampel (target 68%)."
_CACHE.update({
"key": key, "df_all": None, "df_raw": None,
"pop_kab": None, "pop_prov": None, "pop_khusus": None,
"meta": {}, "info": info
})
return None, None, None, None, None, {}, info
pop_prov = pd.DataFrame({
"Provinsi_Label": pp[c_pr].astype(str).str.strip(),
"Target68_Total_Prov": pp[c_target_total].apply(coerce_num),
"Pop_Sekolah_Prov": pp[c_pop_sekolah].apply(coerce_num) if c_pop_sekolah else np.nan,
"Target68_Sekolah_Prov": pp[c_target_total].apply(coerce_num), # sesuai file user
"Pop_Umum_Prov": pp[c_pop_umum].apply(coerce_num) if c_pop_umum else np.nan,
"Target68_Umum_Prov": np.nan,
})
m = pop_prov["Target68_Umum_Prov"].isna() & pop_prov["Pop_Umum_Prov"].notna() & (pop_prov["Pop_Umum_Prov"] > 0)
pop_prov.loc[m, "Target68_Umum_Prov"] = pop_prov.loc[m, "Pop_Umum_Prov"] * float(FALLBACK_TARGET_RATIO)
pop_prov["Pop_Total_Prov"] = (
pd.to_numeric(pop_prov["Pop_Sekolah_Prov"], errors="coerce").fillna(0.0)
+ pd.to_numeric(pop_prov["Pop_Umum_Prov"], errors="coerce").fillna(0.0)
)
m_need_pop = (pop_prov["Pop_Total_Prov"] <= 0) & pop_prov["Target68_Total_Prov"].notna() & (pop_prov["Target68_Total_Prov"] > 0)
pop_prov.loc[m_need_pop, "Pop_Total_Prov"] = pop_prov.loc[m_need_pop, "Target68_Total_Prov"] / float(FALLBACK_TARGET_RATIO)
pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label)
pop_prov = pop_prov.groupby("prov_key", as_index=False).agg({
"Provinsi_Label": "first",
"Target68_Total_Prov": "max",
"Pop_Total_Prov": "max",
"Pop_Sekolah_Prov": "max",
"Target68_Sekolah_Prov": "max",
"Pop_Umum_Prov": "max",
"Target68_Umum_Prov": "max",
})
# =========================
# POP KHUSUS
# =========================
try:
pop_khusus = _parse_pop_khusus(POP_KHUSUS)
except Exception as e:
info = f"❌ POP_KHUSUS gagal dibaca: {repr(e)}"
_CACHE.update({
"key": key, "df_all": None, "df_raw": None,
"pop_kab": None, "pop_prov": None, "pop_khusus": None,
"meta": {}, "info": info
})
return None, None, None, None, None, {}, info
df_all = prepare_global(df_raw)
meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
info = (
f"✅ Mode NO UPLOAD (cache aktif)<br>"
f"✅ DM: <b>{fp.name}</b> | Baris: {before} → dedup: {after}<br>"
f"✅ POP_KAB: <b>{Path(POP_KAB).name}</b> (n={len(pop_kab)}) — keep per-jenis (umum+sekolah)<br>"
f"✅ POP_PROV: <b>{Path(POP_PROV).name}</b> (n={len(pop_prov)}) — keep per-jenis (umum+sekolah)<br>"
f"✅ POP_KHUSUS: <b>{Path(POP_KHUSUS).name}</b> (n={len(pop_khusus)}) — khusus per kab + prov_key<br>"
f"🕒 mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}"
)
_CACHE.update({
"key": key,
"df_all": df_all,
"df_raw": df_raw,
"pop_kab": pop_kab,
"pop_prov": pop_prov,
"pop_khusus": pop_khusus,
"meta": meta,
"info": info
})
return df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info
# ============================================================
# 6) FAKTOR WILAYAH — PER JENIS
# ============================================================
def build_faktor_wilayah_jenis(
df_filtered: pd.DataFrame,
pop_kab: pd.DataFrame,
pop_prov: pd.DataFrame,
pop_khusus: pd.DataFrame,
kew_value: str
):
if df_filtered is None or df_filtered.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
df = df_filtered.copy()
df = df[df["_dataset"].isin(["sekolah", "umum", "khusus"])].copy()
if df.empty:
return pd.DataFrame()
jenis_list = ["sekolah", "umum", "khusus"]
# tentukan level
if "PROV" in kew_norm:
key_col, label_col, label_name, mode = "prov_key", "PROV_DISP", "Provinsi", "PROV"
base_pop = pop_prov.copy() if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame()
if not base_pop.empty and "prov_key" not in base_pop.columns:
base_pop["prov_key"] = base_pop["Provinsi_Label"].apply(norm_prov_label) if "Provinsi_Label" in base_pop.columns else base_pop.iloc[:, 0].apply(norm_prov_label)
base_pop = base_pop.set_index("prov_key") if (not base_pop.empty and "prov_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
else:
key_col, label_col, label_name, mode = "kab_key", "KAB_DISP", "Kab/Kota", "KAB"
base_pop = pop_kab.copy() if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame()
if not base_pop.empty and "kab_key" not in base_pop.columns:
base_pop["kab_key"] = base_pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in base_pop.columns else base_pop.iloc[:, 0].apply(norm_kab_label)
base_pop = base_pop.set_index("kab_key") if (not base_pop.empty and "kab_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
# =========================================================
# ✅ GRID WAJIB: semua wilayah × 3 jenis (meski n=0)
# =========================================================
base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
full = base_keys.assign(_tmp=1).merge(
pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
on="_tmp"
).drop(columns="_tmp")
# hitung n per jenis dari DM (boleh 0)
cnt = (
df.groupby([key_col, label_col, "_dataset"], dropna=False)
.size()
.reset_index(name="n_jenis")
.rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
)
cnt["Jenis"] = cnt["Jenis"].astype(str).str.lower().str.strip()
base_n = full.merge(cnt, on=["group_key", label_name, "Jenis"], how="left")
base_n["n_jenis"] = pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(int)
base_n["target_total_68_jenis"] = 0.0
base_n["pop_total_jenis"] = 0.0
# =========================
# SEKOLAH + UMUM dari POP_KAB / POP_PROV
# =========================
if not base_pop.empty:
if mode == "KAB":
pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah", 0), errors="coerce").fillna(0.0)
tgt_sekolah = pd.to_numeric(base_pop.get("Sampel_sekolah_68%", 0), errors="coerce").fillna(0.0)
pop_umum = pd.to_numeric(base_pop.get("jumlah_populasi_umum", 0), errors="coerce").fillna(0.0)
tgt_umum = pd.to_numeric(base_pop.get("Sampel_umum_68%", 0), errors="coerce").fillna(0.0)
else:
sma = pd.to_numeric(base_pop.get("sma ", base_pop.get("sma", 0)), errors="coerce").fillna(0.0)
smk = pd.to_numeric(base_pop.get("smk", 0), errors="coerce").fillna(0.0)
slb = pd.to_numeric(base_pop.get("slb", 0), errors="coerce").fillna(0.0)
ssma = pd.to_numeric(base_pop.get("sampel sma", 0), errors="coerce").fillna(0.0)
ssmk = pd.to_numeric(base_pop.get("sampel smk", 0), errors="coerce").fillna(0.0)
sslb = pd.to_numeric(base_pop.get("sampel slb", 0), errors="coerce").fillna(0.0)
pop_sekolah = sma + smk + slb
tgt_sekolah = ssma + ssmk + sslb
pop_umum = pd.to_numeric(base_pop.get("perpus_umum_prop", 0), errors="coerce").fillna(0.0)
tgt_umum = pop_umum * float(FALLBACK_TARGET_RATIO)
m = base_n["Jenis"].eq("sekolah")
base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_sekolah).fillna(0.0).values
base_n.loc[m, "target_total_68_jenis"] = base_n.loc[m, "group_key"].map(tgt_sekolah).fillna(0.0).values
m = base_n["Jenis"].eq("umum")
base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_umum).fillna(0.0).values
base_n.loc[m, "target_total_68_jenis"] = base_n.loc[m, "group_key"].map(tgt_umum).fillna(0.0).values
# =========================
# KHUSUS dari POP_KHUSUS
# =========================
if pop_khusus is not None and not pop_khusus.empty:
pk = pop_khusus.copy()
pk["Pop_Total_Jenis"] = pd.to_numeric(pk.get("Pop_Total_Jenis", 0), errors="coerce").fillna(0.0)
pk["Target68_Total_Jenis"] = pd.to_numeric(pk.get("Target68_Total_Jenis", 0), errors="coerce").fillna(0.0)
if mode == "PROV":
pk_prov = pk[pk["LEVEL"].astype(str).str.upper() == "PROV"].copy()
pk_map = pk_prov.groupby("prov_key", as_index=True).agg(
pop=("Pop_Total_Jenis", "sum"),
target=("Target68_Total_Jenis", "sum"),
)
pop_series = pk_map["pop"]
tgt_series = pk_map["target"]
else:
pk_kab = pk[pk["LEVEL"].astype(str).str.upper() == "KAB"].copy()
pk_map = pk_kab.groupby("kab_key", as_index=True).agg(
pop=("Pop_Total_Jenis", "sum"),
target=("Target68_Total_Jenis", "sum"),
)
pop_series = pk_map["pop"]
tgt_series = pk_map["target"]
m = base_n["Jenis"].eq("khusus")
base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_series).fillna(0.0).values
base_n.loc[m, "target_total_68_jenis"] = base_n.loc[m, "group_key"].map(tgt_series).fillna(0.0).values
# fallback pop dari target (jaga-jaga)
base_n["target_total_68_jenis"] = pd.to_numeric(base_n["target_total_68_jenis"], errors="coerce").fillna(0.0)
base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0.0)
m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_68_jenis"] > 0)
base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_68_jenis"] / float(FALLBACK_TARGET_RATIO)
# faktor / coverage / gap
base_n["faktor_penyesuaian_jenis"] = [
faktor_penyesuaian_total(n, t)
for n, t in zip(
pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float),
pd.to_numeric(base_n["target_total_68_jenis"], errors="coerce").fillna(0).astype(float),
)
]
base_n["coverage_jenis_%"] = [
(safe_div(n, p) * 100.0) if (p is not None and not pd.isna(p) and float(p) > 0) else 0.0
for n, p in zip(
pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float),
pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).astype(float),
)
]
base_n["gap_target68_jenis"] = [
max(float(t) - float(n), 0.0)
for n, t in zip(
pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float),
pd.to_numeric(base_n["target_total_68_jenis"], errors="coerce").fillna(0).astype(float),
)
]
# display
base_n["target_total_68_jenis"] = pd.to_numeric(base_n["target_total_68_jenis"], errors="coerce").fillna(0).round(0).astype(int)
base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).round(0).astype(int)
base_n["coverage_jenis_%"] = pd.to_numeric(base_n["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
base_n["faktor_penyesuaian_jenis"] = pd.to_numeric(base_n["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
base_n["gap_target68_jenis"] = pd.to_numeric(base_n["gap_target68_jenis"], errors="coerce").fillna(0).round(0).astype(int)
return base_n
# ============================================================
# 7) AGREGAT WILAYAH × JENIS (PATCH: faktor 68% PER JENIS)
# ============================================================
def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
if df_filtered is None or df_filtered.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
df = df_filtered.copy()
if "PROV" in kew_norm:
key_col, label_col, label_name = "prov_key", "PROV_DISP", "Provinsi"
else:
key_col, label_col, label_name = "kab_key", "KAB_DISP", "Kab/Kota"
df = df[df["_dataset"].isin(["sekolah", "umum", "khusus"])].copy()
if df.empty:
return pd.DataFrame()
jenis_list = ["sekolah", "umum", "khusus"]
# =========================================================
# ✅ GRID WAJIB: semua wilayah × 3 jenis (meski agregat kosong)
# =========================================================
base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
full = base_keys.assign(_tmp=1).merge(
pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
on="_tmp"
).drop(columns="_tmp")
# agregat dari data yang ada
agg_real = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
Jumlah=("Indeks_Dasar_0_100", "size"),
Rata2_sub_koleksi=("sub_koleksi", "mean"),
Rata2_sub_sdm=("sub_sdm", "mean"),
Rata2_sub_pelayanan=("sub_pelayanan", "mean"),
Rata2_sub_pengelolaan=("sub_pengelolaan", "mean"),
Rata2_dim_kepatuhan=("dim_kepatuhan", "mean"),
Rata2_dim_kinerja=("dim_kinerja", "mean"),
Indeks_Dasar_Agregat_0_100=("Indeks_Dasar_0_100", "mean"),
).reset_index().rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
agg_real["Jenis"] = agg_real["Jenis"].astype(str).str.lower().str.strip()
# tempel ke grid + fill 0
agg = full.merge(agg_real, on=["group_key", label_name, "Jenis"], how="left")
for c in ["Jumlah","Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja","Indeks_Dasar_Agregat_0_100"]:
if c in agg.columns:
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0)
agg["Jumlah"] = agg["Jumlah"].round(0).astype(int)
# merge faktor PER JENIS (sekarang pasti match, karena grid ada)
if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
agg["faktor_penyesuaian_jenis"] = 1.0
agg["target_total_68_jenis"] = 0
agg["pop_total_jenis"] = 0
agg["coverage_jenis_%"] = 0.0
agg["gap_target68_jenis"] = 0
else:
fw = faktor_wilayah_jenis.copy()
fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
keep = ["group_key", label_name, "Jenis",
"faktor_penyesuaian_jenis", "target_total_68_jenis", "pop_total_jenis",
"coverage_jenis_%", "gap_target68_jenis"]
fw = fw[[c for c in keep if c in fw.columns]].copy()
agg = agg.merge(fw, on=["group_key", label_name, "Jenis"], how="left")
agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
for c in ["target_total_68_jenis","pop_total_jenis","gap_target68_jenis"]:
if c in agg.columns:
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0).round(0).astype(int)
if "coverage_jenis_%" in agg.columns:
agg["coverage_jenis_%"] = pd.to_numeric(agg["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
# Indeks FINAL PER JENIS
agg["Indeks_Final_Agregat_0_100"] = (
pd.to_numeric(agg["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0.0)
* pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
)
# rounding tampilan
for c in [
"Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja"
]:
if c in agg.columns:
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(3)
for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100"]:
if c in agg.columns:
agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
return agg
# ============================================================
# 8) AGREGAT WILAYAH (KESELURUHAN) — FIX: avg3 dari 3 jenis
# + tampilkan Pop/Target/Terkumpul per jenis & total
# ============================================================
def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
if agg_jenis is None or agg_jenis.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
label_name = "Provinsi" if "PROV" in kew_norm else "Kab/Kota"
jenis_list = ["sekolah", "umum", "khusus"]
a = agg_jenis.copy()
a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
base_keys = a[["group_key", label_name]].drop_duplicates()
full = base_keys.assign(_tmp=1).merge(
pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
on="_tmp"
).drop(columns="_tmp")
cols_need = [
"Jumlah",
"Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja",
"Indeks_Dasar_Agregat_0_100",
"Indeks_Final_Agregat_0_100",
]
cols_present = [c for c in cols_need if c in a.columns]
full = full.merge(
a[["group_key", label_name, "Jenis"] + cols_present],
on=["group_key", label_name, "Jenis"],
how="left"
)
# missing=0 (avg3 tetap ÷3)
for c in cols_present:
full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
out = full.groupby(["group_key", label_name], as_index=False).agg(
n_total=("Jumlah", "sum"),
Rata2_sub_koleksi=("Rata2_sub_koleksi", "mean"),
Rata2_sub_sdm=("Rata2_sub_sdm", "mean"),
Rata2_sub_pelayanan=("Rata2_sub_pelayanan", "mean"),
Rata2_sub_pengelolaan=("Rata2_sub_pengelolaan", "mean"),
Rata2_dim_kepatuhan=("Rata2_dim_kepatuhan", "mean"),
Rata2_dim_kinerja=("Rata2_dim_kinerja", "mean"),
Indeks_Dasar_Agregat_0_100=("Indeks_Dasar_Agregat_0_100", "mean"),
Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
)
# tempel Pop/Target/Terkumpul per jenis & total
if faktor_wilayah_jenis is not None and not faktor_wilayah_jenis.empty:
fw = faktor_wilayah_jenis.copy()
fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
piv = fw.pivot_table(
index=["group_key", label_name],
columns="Jenis",
values=["pop_total_jenis", "target_total_68_jenis", "n_jenis", "gap_target68_jenis", "faktor_penyesuaian_jenis"],
aggfunc="first"
)
piv.columns = [f"{v}_{k}" for v, k in piv.columns]
piv = piv.reset_index()
out = out.merge(piv, on=["group_key", label_name], how="left")
# NaN -> 0 / 1
for j in ["sekolah", "umum", "khusus"]:
for basecol in ["pop_total_jenis", "target_total_68_jenis", "n_jenis", "gap_target68_jenis"]:
c = f"{basecol}_{j}"
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
cfac = f"faktor_penyesuaian_jenis_{j}"
if cfac in out.columns:
out[cfac] = pd.to_numeric(out[cfac], errors="coerce").fillna(1.0).round(3)
# TOTAL (sum 3 jenis)
out["pop_total_all"] = (
out.get("pop_total_jenis_sekolah", 0)
+ out.get("pop_total_jenis_umum", 0)
+ out.get("pop_total_jenis_khusus", 0)
).astype(int)
out["target_total_68_all"] = (
out.get("target_total_68_jenis_sekolah", 0)
+ out.get("target_total_68_jenis_umum", 0)
+ out.get("target_total_68_jenis_khusus", 0)
).astype(int)
out["terkumpul_all"] = (
out.get("n_jenis_sekolah", 0)
+ out.get("n_jenis_umum", 0)
+ out.get("n_jenis_khusus", 0)
).astype(int)
out["coverage_target68_all_%"] = np.where(
pd.to_numeric(out["target_total_68_all"], errors="coerce").fillna(0).values > 0,
(pd.to_numeric(out["terkumpul_all"], errors="coerce").fillna(0).values / pd.to_numeric(out["target_total_68_all"], errors="coerce").fillna(0).values) * 100.0,
0.0
)
out["coverage_target68_all_%"] = pd.to_numeric(out["coverage_target68_all_%"], errors="coerce").fillna(0.0).round(2)
# rounding index
for c in [
"Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja"
]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Wilayah_0_100"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
out["n_total"] = pd.to_numeric(out["n_total"], errors="coerce").fillna(0).round(0).astype(int)
return out
# ============================================================
# 9) SUMMARY (PER JENIS) + KESELURUHAN
# ============================================================
def build_summary_per_jenis(agg_jenis: pd.DataFrame, agg_total: pd.DataFrame):
jenis_list = ["sekolah", "umum", "khusus"]
def _row_default(jenis):
return {
"Jenis": jenis,
"Jumlah_Wilayah": 0,
"Total_Perpus": 0,
"Pop_Total_Jenis": 0,
"Target68_Total_Jenis": 0,
"Terkumpul_Jenis": 0,
"Coverage_Target68_Jenis_%": 0.0,
"Indeks_Dasar_0_100": 0.0,
"Indeks_Final_Disesuaikan_0_100": 0.0,
"Penyesuaian_Poin": 0.0,
}
rows_by_jenis = {j: _row_default(j) for j in jenis_list}
if agg_jenis is not None and not agg_jenis.empty:
a = agg_jenis.copy()
a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
for c in ["Jumlah","Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100","pop_total_jenis","target_total_68_jenis"]:
if c in a.columns:
a[c] = pd.to_numeric(a[c], errors="coerce").fillna(0)
for jenis in jenis_list:
sub = a[a["Jenis"] == jenis].copy()
if sub.empty:
continue
jumlah_wilayah = int(sub.shape[0])
terkumpul = int(pd.to_numeric(sub.get("Jumlah", 0), errors="coerce").fillna(0).sum())
pop_total = int(pd.to_numeric(sub.get("pop_total_jenis", 0), errors="coerce").fillna(0).sum())
target68 = int(pd.to_numeric(sub.get("target_total_68_jenis", 0), errors="coerce").fillna(0).sum())
coverage = (terkumpul / target68 * 100.0) if target68 > 0 else 0.0
dasar = float(pd.to_numeric(sub.get("Indeks_Dasar_Agregat_0_100", 0), errors="coerce").fillna(0).mean())
final = float(pd.to_numeric(sub.get("Indeks_Final_Agregat_0_100", 0), errors="coerce").fillna(0).mean())
rows_by_jenis[jenis] = {
"Jenis": jenis,
"Jumlah_Wilayah": jumlah_wilayah,
"Total_Perpus": terkumpul,
"Pop_Total_Jenis": pop_total,
"Target68_Total_Jenis": target68,
"Terkumpul_Jenis": terkumpul,
"Coverage_Target68_Jenis_%": float(coverage),
"Indeks_Dasar_0_100": float(dasar),
"Indeks_Final_Disesuaikan_0_100": float(final),
"Penyesuaian_Poin": float(final - dasar),
}
rows = [rows_by_jenis[j] for j in jenis_list]
dasar_all = (rows_by_jenis["sekolah"]["Indeks_Dasar_0_100"]
+ rows_by_jenis["umum"]["Indeks_Dasar_0_100"]
+ rows_by_jenis["khusus"]["Indeks_Dasar_0_100"]) / 3.0
final_all = (rows_by_jenis["sekolah"]["Indeks_Final_Disesuaikan_0_100"]
+ rows_by_jenis["umum"]["Indeks_Final_Disesuaikan_0_100"]
+ rows_by_jenis["khusus"]["Indeks_Final_Disesuaikan_0_100"]) / 3.0
pop_all = int(rows_by_jenis["sekolah"]["Pop_Total_Jenis"]
+ rows_by_jenis["umum"]["Pop_Total_Jenis"]
+ rows_by_jenis["khusus"]["Pop_Total_Jenis"])
target_all = int(rows_by_jenis["sekolah"]["Target68_Total_Jenis"]
+ rows_by_jenis["umum"]["Target68_Total_Jenis"]
+ rows_by_jenis["khusus"]["Target68_Total_Jenis"])
terkumpul_all = int(rows_by_jenis["sekolah"]["Terkumpul_Jenis"]
+ rows_by_jenis["umum"]["Terkumpul_Jenis"]
+ rows_by_jenis["khusus"]["Terkumpul_Jenis"])
coverage_all = (terkumpul_all / target_all * 100.0) if target_all > 0 else 0.0
jumlah_wilayah_all = int(agg_total.shape[0]) if (agg_total is not None and not agg_total.empty) else int(
max(rows_by_jenis["sekolah"]["Jumlah_Wilayah"],
rows_by_jenis["umum"]["Jumlah_Wilayah"],
rows_by_jenis["khusus"]["Jumlah_Wilayah"])
)
rows.append({
"Jenis": "keseluruhan",
"Jumlah_Wilayah": jumlah_wilayah_all,
"Total_Perpus": terkumpul_all,
"Pop_Total_Jenis": pop_all,
"Target68_Total_Jenis": target_all,
"Terkumpul_Jenis": terkumpul_all,
"Coverage_Target68_Jenis_%": float(coverage_all),
"Indeks_Dasar_0_100": float(dasar_all),
"Indeks_Final_Disesuaikan_0_100": float(final_all),
"Penyesuaian_Poin": float(final_all - dasar_all),
})
out = pd.DataFrame(rows)
for c in ["Jumlah_Wilayah","Total_Perpus","Pop_Total_Jenis","Target68_Total_Jenis","Terkumpul_Jenis"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
for c in ["Coverage_Target68_Jenis_%","Indeks_Dasar_0_100","Indeks_Final_Disesuaikan_0_100","Penyesuaian_Poin"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
return out
# ============================================================
# 10) DETAIL ENTITAS: Final menempel dari agg_total (wilayah)
# ============================================================
def attach_final_to_detail(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, meta: dict, kew_value: str):
if df_filtered is None or df_filtered.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
df = df_filtered.copy()
if "KAB" in kew_norm or "KOTA" in kew_norm:
key_col = "kab_key"
label_cols = ("PROV_DISP", "KAB_DISP")
elif "PROV" in kew_norm:
key_col = "prov_key"
label_cols = ("PROV_DISP", "KAB_DISP")
else:
key_col = "kab_key"
label_cols = ("PROV_DISP", "KAB_DISP")
if agg_total is None or agg_total.empty:
df["Indeks_Final_0_100"] = df["Indeks_Dasar_0_100"]
else:
m = agg_total[["group_key", "Indeks_Final_Wilayah_0_100"]].copy()
df = df.merge(m, left_on=key_col, right_on="group_key", how="left")
df["Indeks_Final_0_100"] = df["Indeks_Final_Wilayah_0_100"].fillna(df["Indeks_Dasar_0_100"])
df = df.drop(columns=[c for c in ["group_key","Indeks_Final_Wilayah_0_100"] if c in df.columns])
base_cols = [label_cols[0], label_cols[1], "KEW_NORM", "_dataset"]
if meta.get("nama_col") and meta["nama_col"] in df.columns:
df["nm_perpustakaan"] = df[meta["nama_col"]].astype(str)
base_cols.insert(2, "nm_perpustakaan")
keep = base_cols + [
"sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
"dim_kepatuhan","dim_kinerja",
"Indeks_Dasar_0_100",
"Indeks_Final_0_100",
]
keep = [c for c in keep if c in df.columns]
out = df[keep].copy()
out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"})
for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
for c in ["Indeks_Dasar_0_100","Indeks_Final_0_100"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
return out
# ============================================================
# 11) VERIFIKASI PER JENIS (OPSIONAL, TANPA KOMA)
# ============================================================
def build_verif_jenis(faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
label_col = "Provinsi" if "PROV" in kew_norm else "Kab/Kota"
out = faktor_wilayah_jenis.copy()
keep = [c for c in [
label_col, "Jenis",
"pop_total_jenis", "target_total_68_jenis", "n_jenis",
"coverage_jenis_%", "faktor_penyesuaian_jenis", "gap_target68_jenis"
] if c in out.columns]
out = out[keep].copy()
# tanpa koma untuk integer columns
for c in ["pop_total_jenis", "target_total_68_jenis", "n_jenis", "gap_target68_jenis"]:
if c in out.columns:
out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
# coverage 2 desimal tetap boleh (request awal kamu coverage decimal 2)
if "coverage_jenis_%" in out.columns:
out["coverage_jenis_%"] = pd.to_numeric(out["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
# faktor 3 desimal
if "faktor_penyesuaian_jenis" in out.columns:
out["faktor_penyesuaian_jenis"] = pd.to_numeric(out["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
return out
# ============================================================
# 12) BELL CURVE
# ============================================================
def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str | None = None, hover_cols: list[str] | None = None, min_points: int = 2):
fig = go.Figure()
fig.update_layout(
title=title,
xaxis_title="Indeks (0–100)",
yaxis_title="Kepadatan",
hovermode="x unified",
margin=dict(l=40, r=20, t=60, b=40),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
)
if dfp is None or dfp.empty or xcol not in dfp.columns:
fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
d = dfp.dropna(subset=[xcol]).copy()
if len(d) < 1:
fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
if len(d) < min_points:
x_single = float(pd.to_numeric(d[xcol], errors="coerce").iloc[0])
hovertext = None
if label_col and label_col in d.columns:
hovertext = [f"{d[label_col].iloc[0]}<br>{xcol}: {x_single:.2f}"]
fig.add_trace(go.Scatter(
x=[x_single], y=[0], mode="markers", name="Data", marker=dict(size=10),
hovertext=hovertext,
hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Indeks: %{x:.2f}<extra></extra>",
showlegend=False,
))
fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top")
fig.add_annotation(text="Data hanya 1 titik (kurva normal tidak dibuat).", x=0.5, y=0.08, xref="paper", yref="paper", showarrow=False)
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
x = pd.to_numeric(d[xcol], errors="coerce").astype(float).values
x = x[np.isfinite(x)]
if len(x) < 2:
fig.add_annotation(text="Data tidak cukup untuk kurva.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
mu = float(np.mean(x))
sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 0.0
if not np.isfinite(sigma) or sigma <= 1e-6:
sigma = max(float(np.std(x, ddof=0)), 1e-3)
xmin = max(0.0, float(np.min(x)) - 5.0)
xmax = min(100.0, float(np.max(x)) + 5.0)
if xmax - xmin < 1e-6:
xmin = max(0.0, mu - 1.0)
xmax = min(100.0, mu + 1.0)
xs = np.linspace(xmin, xmax, 250)
pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
fig.add_trace(go.Scatter(
x=xs, y=pdf, mode="lines", name="Kurva Normal",
hovertemplate="x=%{x:.2f}<br>pdf=%{y:.4f}<extra></extra>"
))
hovertext = None
if label_col and label_col in d.columns:
hcols = hover_cols or []
parts = []
for _, r in d.iterrows():
try:
xv = float(pd.to_numeric(r.get(xcol, np.nan), errors="coerce"))
except Exception:
xv = np.nan
s = f"{r[label_col]}"
s += f"<br>{xcol}: {xv:.2f}" if np.isfinite(xv) else f"<br>{xcol}: NA"
for c in hcols:
if c in d.columns and pd.notna(r.get(c, np.nan)):
v = r[c]
if isinstance(v, (int, np.integer)):
s += f"<br>{c}: {int(v)}"
elif isinstance(v, (float, np.floating)):
s += f"<br>{c}: {float(v):.3f}"
else:
s += f"<br>{c}: {v}"
parts.append(s)
hovertext = parts
fig.add_trace(go.Scatter(
x=x, y=np.zeros_like(x), mode="markers", name="Data", marker=dict(size=8),
hovertext=hovertext,
hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Indeks: %{x:.2f}<extra></extra>",
showlegend=False
))
q1, q2, q3 = np.percentile(x, [25, 50, 75])
for xv, lab in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3"), (mu, "Mean")]:
fig.add_vline(x=float(xv), line_width=1, line_dash="dash", annotation_text=f"{lab}: {xv:.1f}", annotation_position="top")
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(rangemode="tozero")
return fig
# ============================================================
# 13) KPI DASHBOARD (FINAL: hanya Final & Dasar)
# ============================================================
def compute_dashboard_kpis(summary_jenis: pd.DataFrame):
def _get(j, col):
sub = summary_jenis[summary_jenis["Jenis"].astype(str).str.lower() == j]
if sub.empty:
return 0.0
return float(pd.to_numeric(sub[col], errors="coerce").fillna(0).iloc[0])
final_all = _get("keseluruhan", "Indeks_Final_Disesuaikan_0_100")
dasar_all = _get("keseluruhan", "Indeks_Dasar_0_100")
return {"final_all": final_all, "dasar_all": dasar_all}
def build_kpi_markdown(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame = None, agg_jenis: pd.DataFrame = None, faktor_wilayah_jenis=None) -> str:
if summary_jenis is None or summary_jenis.empty:
return ""
k = compute_dashboard_kpis(summary_jenis)
def fmt(x, nd=2):
return "NA" if pd.isna(x) else f"{x:.{nd}f}"
return f"""
<div style="display:flex; gap:12px; flex-wrap:wrap;">
<div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
<div style="opacity:0.8;">Indeks IPLM FINAL (Disesuaikan)</div>
<div style="font-size:26px; font-weight:700;">{fmt(k["final_all"],2)}</div>
<div style="opacity:0.7;">Sumber: Ringkasan baris “keseluruhan”</div>
</div>
<div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
<div style="opacity:0.8;">Indeks Dasar (Tanpa Penyesuaian)</div>
<div style="font-size:26px; font-weight:700;">{fmt(k["dasar_all"],2)}</div>
<div style="opacity:0.7;">Sumber: Ringkasan baris “keseluruhan”</div>
</div>
</div>
""".strip()
# ============================================================
# 14) LLM + WORD
# ============================================================
_HF_CLIENT = None
def get_llm_client():
global _HF_CLIENT
if _HF_CLIENT is not None:
return _HF_CLIENT
try:
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN) if HF_TOKEN else InferenceClient(model=LLM_MODEL_NAME)
return _HF_CLIENT
except Exception:
_HF_CLIENT = None
return None
def build_context(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame, verif_total: pd.DataFrame, wilayah: str, kew: str) -> str:
lines = []
lines.append(f"Wilayah filter: {wilayah}")
lines.append(f"Kewenangan: {kew}")
if summary_jenis is not None and not summary_jenis.empty:
lines.append("\nRingkasan (jenis + keseluruhan):")
for _, r in summary_jenis.iterrows():
lines.append(
f"- {r['Jenis']}: pop={int(r.get('Pop_Total_Jenis',0))}, target68={int(r.get('Target68_Total_Jenis',0))}, "
f"terkumpul={int(r.get('Terkumpul_Jenis',0))}, coverage={float(r.get('Coverage_Target68_Jenis_%',0)):.2f}%, "
f"dasar={float(r.get('Indeks_Dasar_0_100',0)):.2f}, final={float(r.get('Indeks_Final_Disesuaikan_0_100',0)):.2f}"
)
if agg_total is not None and not agg_total.empty and "Indeks_Final_Wilayah_0_100" in agg_total.columns:
label_col = "Kab/Kota" if "Kab/Kota" in agg_total.columns else ("Provinsi" if "Provinsi" in agg_total.columns else None)
lines.append("\nTop 5 wilayah (Final tertinggi):")
top = agg_total.sort_values("Indeks_Final_Wilayah_0_100", ascending=False).head(5)
for _, r in top.iterrows():
wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
lines.append(f"- {wl}: Final={float(r['Indeks_Final_Wilayah_0_100']):.2f}")
return "\n".join(lines)
def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
ctx = build_context(summary_jenis, agg_total, verif_total, wilayah, kew)
client = get_llm_client()
if client is None or not USE_LLM:
return "Analisis otomatis (LLM) tidak digunakan / tidak tersedia."
system_prompt = "Anda adalah analis kebijakan perpustakaan di Indonesia. Tulis analisis ringkas berbasis data."
user_prompt = f"""
DATA IPLM (RINGKAS):
{ctx}
Buat analisis 3 paragraf:
1) Gambaran umum.
2) Per jenis (sekolah/umum/khusus) + keseluruhan.
3) Rekomendasi singkat.
Catatan khusus : IPLM adalah Indeks Pengembangan Literasi Masyarakat
"""
try:
resp = client.chat_completion(
model=LLM_MODEL_NAME,
messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
max_tokens=700,
temperature=0.25,
top_p=0.9,
)
text = resp.choices[0].message.content.strip()
return text if text else "LLM mengembalikan respon kosong."
except Exception as e:
return f"⚠️ Error LLM: {repr(e)}"
def generate_word_report(wilayah, summary_jenis, agg_total, agg_jenis, analysis_text):
doc = Document()
doc.add_heading(f"Laporan IPLM — {wilayah}", level=1)
doc.add_heading("Ringkasan (Jenis + Keseluruhan)", level=2)
show = summary_jenis.copy() if summary_jenis is not None else pd.DataFrame()
if not show.empty:
preferred = [
"Jenis","Jumlah_Wilayah","Total_Perpus",
"Pop_Total_Jenis","Target68_Total_Jenis","Terkumpul_Jenis","Coverage_Target68_Jenis_%",
"Indeks_Dasar_0_100","Indeks_Final_Disesuaikan_0_100","Penyesuaian_Poin"
]
show = show[[c for c in preferred if c in show.columns]]
table = doc.add_table(rows=1, cols=len(show.columns))
hdr = table.rows[0].cells
for i, c in enumerate(show.columns):
hdr[i].text = str(c)
for _, row in show.iterrows():
cells = table.add_row().cells
for i, c in enumerate(show.columns):
v = row[c]
if pd.isna(v):
cells[i].text = ""
elif isinstance(v, (float, np.floating)):
cells[i].text = f"{float(v):.2f}"
elif isinstance(v, (int, np.integer)):
cells[i].text = str(int(v))
else:
cells[i].text = str(v)
doc.add_heading("Analisis (opsional)", level=2)
for p in (analysis_text or "").split("\n"):
if p.strip():
doc.add_paragraph(p.strip())
outpath = tempfile.mktemp(suffix=".docx")
doc.save(outpath)
return outpath
# ============================================================
# 15) CORE RUN
# ============================================================
def _empty_outputs(msg="⚠️ Data belum siap."):
empty = pd.DataFrame()
empty_fig = go.Figure()
return (
"", # kpi_md
empty, empty, empty, empty, empty,
None, None, None, None, None,
empty_fig, empty_fig, empty_fig,
msg, "Analisis belum tersedia."
)
def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta):
try:
if df_all is None or df_all.empty or df_raw is None or df_raw.empty:
return _empty_outputs("⚠️ Data belum ter-load. Pastikan file tersedia di repo/server.")
# FILTER (df_all)
df = df_all.copy()
if prov_value and prov_value != "(Semua)":
df = df[df["PROV_DISP"] == prov_value]
if kab_value and kab_value != "(Semua)":
df = df[df["KAB_DISP"] == kab_value]
if kew_value and kew_value != "(Semua)":
df = df[df["KEW_NORM"] == kew_value]
if df.empty:
return _empty_outputs("Tidak ada data untuk filter ini.")
# pipeline
faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_value or "(Semua)")
agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_value or "(Semua)")
agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_value or "(Semua)")
# SUMMARY (ini yang tampil Pop/Target/Terkumpul/Coverage)
summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total)
verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_value or "(Semua)")
detail_view = attach_final_to_detail(df, agg_total, meta, kew_value or "(Semua)")
# view agg_jenis (UI cuma sampai indeks dasar)
if agg_jenis_full is None or agg_jenis_full.empty:
agg_jenis_view = agg_jenis_full
else:
kew_norm = str(kew_value or "").upper()
label_name = "Kab/Kota" if ("KAB" in kew_norm or "KOTA" in kew_norm) else ("Provinsi" if "PROV" in kew_norm else "Kab/Kota")
cols_upto = [
"group_key",
label_name,
"Jenis",
"Jumlah",
"Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
"Rata2_dim_kepatuhan","Rata2_dim_kinerja",
"Indeks_Dasar_Agregat_0_100",
]
cols_upto = [c for c in cols_upto if c in agg_jenis_full.columns]
agg_jenis_view = agg_jenis_full[cols_upto].copy()
# FILTER RAW DOWNLOAD (df_raw)
raw = df_raw.copy()
if prov_value and prov_value != "(Semua)":
raw = raw[raw["PROV_DISP"] == prov_value]
if kab_value and kab_value != "(Semua)":
raw = raw[raw["KAB_DISP"] == kab_value]
if kew_value and kew_value != "(Semua)":
raw = raw[raw["KEW_NORM"] == kew_value]
# bell curve per jenis (entitas)
if detail_view is None or detail_view.empty:
fig_sekolah = _make_bell_curve(pd.DataFrame(), "Indeks_Dasar_0_100", "Bell Curve — Jenis: Sekolah", min_points=2)
fig_umum = _make_bell_curve(pd.DataFrame(), "Indeks_Dasar_0_100", "Bell Curve — Jenis: Umum", min_points=2)
fig_khusus = _make_bell_curve(pd.DataFrame(), "Indeks_Dasar_0_100", "Bell Curve — Jenis: Khusus", min_points=2)
else:
xcol_ent = "Indeks_Dasar_0_100" if "Indeks_Dasar_0_100" in detail_view.columns else "Indeks_Final_0_100"
label_col_e = "nm_perpustakaan" if "nm_perpustakaan" in detail_view.columns else None
hover_cols_e = [c for c in ["Provinsi", "Kab/Kota", "KEW_NORM", "Jenis", "Indeks_Dasar_0_100", "Indeks_Final_0_100"] if c in detail_view.columns]
def _fig_jenis_ent(jenis_key: str, judul: str):
d = detail_view[detail_view["Jenis"].astype(str).str.lower() == jenis_key].copy()
return _make_bell_curve(d, xcol=xcol_ent, title=judul, label_col=label_col_e, hover_cols=hover_cols_e, min_points=2)
fig_sekolah = _fig_jenis_ent("sekolah", "Bell Curve — Jenis: Sekolah (Indeks per Entitas)")
fig_umum = _fig_jenis_ent("umum", "Bell Curve — Jenis: Umum (Indeks per Entitas)")
fig_khusus = _fig_jenis_ent("khusus", "Bell Curve — Jenis: Khusus (Indeks per Entitas)")
# KPI (dashboard cuma final & dasar)
kpi_md = build_kpi_markdown(summary_jenis, agg_total, agg_jenis_full, faktor_wilayah_jenis=faktor_wilayah_jenis)
tmpdir = tempfile.mkdtemp()
prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
kew_slug = (_canon(kew_value or "SEMUA").upper() or "SEMUA")
p_summary = str(Path(tmpdir) / f"IPLM_RingkasanJenisKeseluruhan_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
p_total = str(Path(tmpdir) / f"IPLM_AgregatWilayah_Keseluruhan_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
p_jenis = str(Path(tmpdir) / f"IPLM_RAW_DATA_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
p_detail = str(Path(tmpdir) / f"IPLM_DetailEntitas_FinalMenempelWilayah_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
p_verif = str(Path(tmpdir) / f"IPLM_KecukupanSampel68_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
summary_jenis.to_excel(p_summary, index=False)
agg_total.to_excel(p_total, index=False)
raw.to_excel(p_jenis, index=False)
detail_view.to_excel(p_detail, index=False)
verif_total.to_excel(p_verif, index=False)
wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
analysis_text = generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah_txt, kew_value or "(Semua)")
word_path = generate_word_report(wilayah_txt, summary_jenis, agg_total, agg_jenis_full, analysis_text)
msg = (
f"✅ Selesai: raw={len(raw)} | entitas={len(detail_view)} | wilayah(keseluruhan)={len(agg_total)} | "
f"jenis(agregat)={len(agg_jenis_full)} | Pop/Target sekolah+umum+khusus sudah diambil dari Excel populasi"
)
return (
kpi_md,
summary_jenis, agg_total, agg_jenis_view, detail_view, verif_total,
p_summary, p_total, p_jenis, p_detail, word_path,
fig_umum, fig_sekolah, fig_khusus,
msg, analysis_text
)
except Exception as e:
return _empty_outputs(f"⚠️ Runtime error: {repr(e)}")
# ============================================================
# 16) UI (NO UPLOAD)
# ============================================================
def ui_load(force=False):
df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info = load_default_files(force=force)
if df_all is None or (isinstance(df_all, pd.DataFrame) and df_all.empty):
return (
None, None, None, None, None, {}, info,
gr.update(choices=["(Semua)"], value="(Semua)"),
gr.update(choices=["(Semua)"], value="(Semua)"),
gr.update(choices=["(Semua)"], value="(Semua)"),
)
prov_vals = df_all["PROV_DISP"].dropna().astype(str).tolist()
prov_vals = [v for v in prov_vals if v and v.strip()]
prov_choices = ["(Semua)"] + sorted(set(prov_vals))
kab_choices = ["(Semua)"] + sorted([x for x in df_all["KAB_DISP"].dropna().unique().tolist() if x])
kew_choices = ["(Semua)"] + sorted([x for x in df_all["KEW_NORM"].dropna().unique().tolist() if x])
default_kew = "PROVINSI" if "PROVINSI" in kew_choices else ("KAB/KOTA" if "KAB/KOTA" in kew_choices else "(Semua)")
return (
df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info,
gr.update(choices=prov_choices, value="(Semua)"),
gr.update(choices=kab_choices, value="(Semua)"),
gr.update(choices=kew_choices, value=default_kew),
)
def on_prov_change(prov_value):
df_all, _, _, _, _, _, _ = load_default_files(force=False)
if df_all is None or df_all.empty:
return gr.update(choices=["(Semua)"], value="(Semua)")
if prov_value is None or prov_value == "(Semua)":
vals = df_all["KAB_DISP"].dropna().unique().tolist()
else:
vals = df_all.loc[df_all["PROV_DISP"] == prov_value, "KAB_DISP"].dropna().unique().tolist()
vals = sorted([v for v in vals if v])
return gr.update(choices=["(Semua)"] + vals, value="(Semua)")
with gr.Blocks() as demo:
gr.Markdown(f"""
# IPLM 2025 — Final (Pop/Target68 per Jenis dari Excel Populasi)
**Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
- `DATA_FILE` = **{DATA_FILE}**
- `POP_KAB` = **{POP_KAB}**
- `POP_PROV` = **{POP_PROV}**
- `POP_KHUSUS` = **{POP_KHUSUS}**
**FIX UTAMA:**
- Ringkasan tampil Pop/Target68/Terkumpul/Coverage untuk **sekolah, umum, khusus, keseluruhan**
- Pop/Target sekolah+umum dari Excel POP_KAB/POP_PROV (kolom asli)
- Pop/Target khusus dari POP_KHUSUS (Propinsi/Kab/kota | POP_KHUSUS | SAMPEL_KHUSUS_68%)
""")
state_df = gr.State(None)
state_raw = gr.State(None)
state_pop_kab = gr.State(None)
state_pop_prov = gr.State(None)
state_pop_khusus = gr.State(None)
state_meta = gr.State({})
info_box = gr.Markdown()
with gr.Row():
dd_prov = gr.Dropdown(label="Provinsi", choices=["(Semua)"], value="(Semua)")
dd_kab = gr.Dropdown(label="Kab/Kota", choices=["(Semua)"], value="(Semua)")
dd_kew = gr.Dropdown(label="Kewenangan", choices=["(Semua)"], value="(Semua)")
dd_prov.change(fn=on_prov_change, inputs=[dd_prov], outputs=dd_kab)
run_btn = gr.Button("Jalankan Perhitungan")
msg_out = gr.Markdown()
kpi_out = gr.Markdown()
gr.Markdown("## Ringkasan (Jenis + Keseluruhan) — Pop/Target68/Terkumpul/Coverage + Penyesuaian")
out_summary = gr.DataFrame(interactive=False)
gr.Markdown("## Agregat Wilayah (Keseluruhan) — FIX: avg3 dari 3 jenis")
out_agg_total = gr.DataFrame(interactive=False)
gr.Markdown("## Agregat Wilayah × Jenis (Sekolah, Umum, Khusus) — (ditampilkan sampai Indeks_Dasar_Agregat_0_100)")
out_agg_jenis = gr.DataFrame(interactive=False)
gr.Markdown("## Detail Entitas (Final menempel dari wilayah)")
out_detail = gr.DataFrame(interactive=False)
gr.Markdown("## Kecukupan Sampel 68% (tanpa angka koma)")
out_verif = gr.DataFrame(interactive=False)
gr.Markdown("## Bell Curve — per Jenis Perpustakaan (Indeks per Entitas)")
gr.Markdown("### Perpustakaan Umum")
bell_umum = gr.Plot(scale=1)
gr.Markdown("### Perpustakaan Sekolah")
bell_sekolah = gr.Plot(scale=1)
gr.Markdown("### Perpustakaan Khusus")
bell_khusus = gr.Plot(scale=1)
gr.Markdown("## Analisis Otomatis (opsional)")
analysis_out = gr.Markdown()
with gr.Row():
dl_summary = gr.DownloadButton(label="Download Ringkasan (.xlsx)")
dl_total = gr.DownloadButton(label="Download Agregat Wilayah (.xlsx)")
dl_jenis = gr.DownloadButton(label="Download Data Mentah (.xlsx)")
dl_detail = gr.DownloadButton(label="Download Detail Entitas (.xlsx)")
dl_word = gr.DownloadButton(label="Download Laporan Word (.docx)")
run_btn.click(
fn=run_calc,
inputs=[dd_prov, dd_kab, dd_kew, state_df, state_raw, state_pop_kab, state_pop_prov, state_pop_khusus, state_meta],
outputs=[
kpi_out,
out_summary, out_agg_total, out_agg_jenis, out_detail, out_verif,
dl_summary, dl_total, dl_jenis, dl_detail, dl_word,
bell_umum, bell_sekolah, bell_khusus,
msg_out, analysis_out
]
)
demo.load(
fn=lambda: ui_load(force=False),
inputs=[],
outputs=[state_df, state_raw, state_pop_kab, state_pop_prov, state_pop_khusus, state_meta, info_box, dd_prov, dd_kab, dd_kew]
)
demo.launch()