Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| IPLM 2025 β Final (Target Sampel 33.88% per Jenis) β TANPA Kinerja Relatif / Percentile | |
| UPDATE (sesuai instruksi terbaru Anda) β TANPA mengubah pipeline lain: | |
| FOKUS PEMBENAHAN (LLM + WORD): | |
| 1) Nilai Kepatuhan, Koleksi, Tenaga, Kinerja, Pelayanan, Pengelolaan: | |
| - TIDAK dikalikan 100. | |
| - Ditulis APA ADANYA dari kolom agregat aplikasi: | |
| Rata2_dim_kepatuhan, Rata2_sub_koleksi, Rata2_sub_sdm, Rata2_dim_kinerja, | |
| Rata2_sub_pelayanan, Rata2_sub_pengelolaan. | |
| 2) Nilai IPLM ditulis apa adanya: Indeks_Final_Wilayah_0_100. | |
| 3) LLM mengisi Interpretasi & Rekomendasi: | |
| - Interpretasi: deskriptif, kondisi riil berbasis relasi angka (lebih besar/kecil, gap, dominan, konsistensi), | |
| plus pemaknaan substantif dimensi (koleksi/sdm/pelayanan/pengelolaan) TANPA label normatif. | |
| - Rekomendasi: operasional, 2β3 butir ringkas, menaut ke pola angka (gap/ketimpangan/kontribusi). | |
| 4) Di bawah tabel Word: tambah deskripsi jumlah perpustakaan sumber data (dari tabel agregat wilayah Γ jenis / βgambar 2β): | |
| sekolah=..., umum=..., khusus=..., total=... | |
| Catatan penting: | |
| - Semua perhitungan dan dashboard tetap. | |
| - Yang diubah hanya: (a) cara mengambil nilai untuk tabel Word (tanpa *100), | |
| (b) prompt LLM untuk isi interpretasi/rekomendasi agar nyambung dengan angka, | |
| (c) tambahan paragraf jumlah perpustakaan di bawah tabel Word. | |
| """ | |
| import os | |
| import re | |
| import time | |
| import json | |
| import math | |
| import tempfile | |
| from pathlib import Path | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| from sklearn.preprocessing import PowerTransformer | |
| # python-docx (wajib kalau mau Word) | |
| DOCX_AVAILABLE = True | |
| try: | |
| from docx import Document | |
| from docx.shared import Pt | |
| from docx.oxml import OxmlElement | |
| from docx.oxml.ns import qn | |
| except Exception: | |
| DOCX_AVAILABLE = False | |
| Document = None | |
| # huggingface client (opsional) | |
| HF_AVAILABLE = True | |
| try: | |
| from huggingface_hub import InferenceClient | |
| except Exception: | |
| HF_AVAILABLE = False | |
| InferenceClient = None | |
| # ============================================================ | |
| # 1) KONFIGURASI | |
| # ============================================================ | |
| DATA_FILE = os.getenv("DATA_FILE", "DATA CLEAN GABUNGAN SANGGAH-TIDAK SANGGAH-ALL200226.xlsx") | |
| POP_KAB = os.getenv("POP_KAB", "Data_populasi_Kab_kota_fixed.xlsx") | |
| POP_PROV = os.getenv("POP_PROV", "Data_populasi_propinsi.xlsx") | |
| POP_KHUSUS = os.getenv("POP_KHUSUS", "Data_populasi_perp_khusus.xlsx") | |
| W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30")) | |
| W_KINERJA = float(os.getenv("W_KINERJA", "0.70")) | |
| TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388")) | |
| USE_LLM = True | |
| LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct") | |
| HF_TOKEN = ( | |
| os.getenv("HF_SECRET") | |
| or os.getenv("HF_TOKEN") | |
| or os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
| or os.getenv("HF_API_TOKEN") | |
| ) | |
| # ============================================================ | |
| # 2) UTIL | |
| # ============================================================ | |
| def _mtime(path_str: str): | |
| p = Path(path_str) | |
| return p.stat().st_mtime if p.exists() else None | |
| def _canon(s: str) -> str: | |
| return re.sub(r"[^a-z0-9]+", "", str(s).lower()) | |
| def _disp_text(x): | |
| if pd.isna(x): | |
| return None | |
| t = str(x).strip().upper() | |
| return " ".join(t.split()) | |
| def pick_col(df, candidates): | |
| if df is None or df.empty: | |
| return None | |
| for c in candidates: | |
| if c in df.columns: | |
| return c | |
| can_map = {_canon(c): c for c in df.columns} | |
| for c in candidates: | |
| k = _canon(c) | |
| if k in can_map: | |
| return can_map[k] | |
| return None | |
| def coerce_num(val): | |
| if pd.isna(val): | |
| return np.nan | |
| t = str(val).strip() | |
| if t == "" or t in {"-", "β", "β", "NA", "N/A", "null", "NULL"}: | |
| return np.nan | |
| t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "") | |
| t = re.sub(r"[^0-9,.\-]", "", t) | |
| if t.count(".") > 1 and t.count(",") == 1: | |
| t = t.replace(".", "").replace(",", ".") | |
| elif t.count(",") > 1 and t.count(".") == 1: | |
| t = t.replace(",", "") | |
| elif t.count(",") == 1 and t.count(".") == 0: | |
| t = t.replace(",", ".") | |
| else: | |
| t = t.replace(",", "") | |
| try: | |
| return float(t) | |
| except Exception: | |
| return np.nan | |
| def minmax_norm(s: pd.Series) -> pd.Series: | |
| x = pd.to_numeric(s, errors="coerce").astype(float) | |
| mn, mx = x.min(skipna=True), x.max(skipna=True) | |
| if pd.isna(mn) or pd.isna(mx) or mx == mn: | |
| return pd.Series(0.0, index=s.index) | |
| return (x - mn) / (mx - mn) | |
| def norm_kew(v): | |
| if pd.isna(v): | |
| return None | |
| t = str(v).strip().upper() | |
| if "KAB" in t or "KOTA" in t: | |
| return "KAB/KOTA" | |
| if "PROV" in t: | |
| return "PROVINSI" | |
| if "PUSAT" in t or "NASIONAL" in t: | |
| return "PUSAT" | |
| return t | |
| def norm_prov_disp(s): | |
| if pd.isna(s): | |
| return None | |
| t = str(s).strip().upper() | |
| t = t.replace("\u00a0", " ") | |
| t = " ".join(t.split()) | |
| t = t.replace("PROPINSI", "PROVINSI") | |
| while t.startswith("PROVINSI PROVINSI "): | |
| t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1) | |
| if t.startswith("PROVINSI "): | |
| name = t[len("PROVINSI "):].strip() | |
| else: | |
| name = t | |
| name = " ".join(name.split()) | |
| if not name: | |
| return None | |
| return f"PROVINSI {name}" | |
| def norm_prov_label(s): | |
| if pd.isna(s): | |
| return None | |
| t = str(s).strip().upper().replace("\u00a0", " ") | |
| t = " ".join(t.split()) | |
| t = t.replace("PROPINSI", "PROVINSI") | |
| t = t.replace("PROVINSI", "").strip() | |
| return re.sub(r"[^A-Z0-9]+", "", t) | |
| def norm_kab_label(s): | |
| if pd.isna(s): | |
| return None | |
| t = str(s).upper() | |
| t = t.replace("KABUPATEN", "KAB") | |
| t = t.replace("KAB.", "KAB") | |
| t = t.replace("KOTA ADMINISTRASI", "KOTA") | |
| t = t.replace("KOTA ADM.", "KOTA") | |
| t = t.replace("KOTA.", "KOTA") | |
| t = " ".join(t.split()) | |
| return re.sub(r"[^A-Z0-9]+", "", t) | |
| def safe_div(num, den): | |
| if den is None or pd.isna(den) or float(den) <= 0: | |
| return np.nan | |
| return float(num) / float(den) | |
| def faktor_penyesuaian_total(n_total: float, target_total: float) -> float: | |
| if target_total is None or pd.isna(target_total) or float(target_total) <= 0: | |
| return 1.0 | |
| if n_total is None or pd.isna(n_total) or float(n_total) < 0: | |
| n_total = 0.0 | |
| return float(min(float(n_total) / float(target_total), 1.0)) | |
| # ============================================================ | |
| # 3) INDIKATOR IPLM | |
| # ============================================================ | |
| koleksi_cols = [ | |
| "JudulTercetak","EksemplarTercetak","JudulElektronik","EksemplarElektronik", | |
| "TambahJudulTercetak","TambahEksemplarTercetak", | |
| "TambahJudulElektronik","TambahEksemplarElektronik", | |
| "KomitmenAnggaranKoleksi" | |
| ] | |
| sdm_cols = [ | |
| "TenagaKualifikasiIlmuPerpustakaan", | |
| "TenagaFungsionalProfesional", | |
| "TenagaPKB", | |
| "AnggaranTenaga" | |
| ] | |
| pelayanan_cols = [ | |
| "PesertaBudayaBaca","PemustakaLuringDaring","PemustakaFasilitasTIK", | |
| "PemanfaatanJudulTercetak","PemanfaatanEksemplarTercetak", | |
| "PemanfaatanJudulElektronik","PemanfaatanEksemplarElektronik" | |
| ] | |
| pengelolaan_cols = [ | |
| "KegiatanBudayaBaca","KegiatanKerjasama","VariasiLayanan","Kebijakan","AnggaranLayanan" | |
| ] | |
| all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols | |
| alias_map_raw = { | |
| "j_judul_koleksi_tercetak": "JudulTercetak", | |
| "j_eksemplar_koleksi_tercetak": "EksemplarTercetak", | |
| "j_judul_koleksi_digital": "JudulElektronik", | |
| "j_eksemplar_koleksi_digital": "EksemplarElektronik", | |
| "tambah_judul_koleksi_tercetak": "TambahJudulTercetak", | |
| "tambah_eksemplar_koleksi_tercetak": "TambahEksemplarTercetak", | |
| "tambah_judul_koleksi_digital": "TambahJudulElektronik", | |
| "tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik", | |
| "j_anggaran_koleksi": "KomitmenAnggaranKoleksi", | |
| "j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan", | |
| "j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional", | |
| "j_tenaga_pkb": "TenagaPKB", | |
| "j_anggaran_diklat_perpus": "AnggaranTenaga", | |
| "j_peserta_budaya_baca": "PesertaBudayaBaca", | |
| "j_pemustaka_luring_daring": "PemustakaLuringDaring", | |
| "j_pemustaka_fasilitas_tik": "PemustakaFasilitasTIK", | |
| "j_judul_koleksi_tercetak_termanfaat": "PemanfaatanJudulTercetak", | |
| "j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak", | |
| "j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik", | |
| "j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik", | |
| "j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca", | |
| "j_kerjasama_pengembangan_perpus": "KegiatanKerjasama", | |
| "j_variasi_layanan": "VariasiLayanan", | |
| "j_kebijakan_prosedur_pelayanan": "Kebijakan", | |
| "j_anggaran_peningkatan_pelayanan": "AnggaranLayanan", | |
| } | |
| alias_map = {_canon(k): v for k, v in alias_map_raw.items()} | |
| # ============================================================ | |
| # 4) PIPELINE NASIONAL (LEVEL ENTITAS) | |
| # ============================================================ | |
| def _mean_norm_cols(row, cols): | |
| vals = [] | |
| for c in cols: | |
| k = f"norm_{c}" | |
| if k in row.index: | |
| v = row[k] | |
| if pd.isna(v): | |
| v = 0.0 | |
| vals.append(float(v)) | |
| return float(np.mean(vals)) if vals else 0.0 | |
| def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame: | |
| if df_src is None or df_src.empty: | |
| return df_src | |
| df = df_src.copy() | |
| rename_map = {} | |
| for col in df.columns: | |
| c = _canon(col) | |
| if c in alias_map: | |
| rename_map[col] = alias_map[c] | |
| else: | |
| for tgt in all_indicators: | |
| if c == _canon(tgt): | |
| rename_map[col] = tgt | |
| break | |
| if rename_map: | |
| df = df.rename(columns=rename_map) | |
| available = [c for c in all_indicators if c in df.columns] | |
| for c in available: | |
| df[c] = df[c].apply(coerce_num) | |
| for c in available: | |
| x = pd.to_numeric(df[c], errors="coerce").astype(float).values | |
| mask = ~np.isnan(x) | |
| transformed = np.full_like(x, np.nan, dtype=float) | |
| if mask.sum() > 1: | |
| pt = PowerTransformer(method="yeo-johnson", standardize=False) | |
| transformed[mask] = pt.fit_transform(x[mask].reshape(-1, 1)).ravel() | |
| else: | |
| transformed[mask] = x[mask] | |
| df[f"norm_{c}"] = minmax_norm(pd.Series(transformed, index=df.index)) | |
| df["sub_koleksi"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in koleksi_cols if c in available]), axis=1) | |
| df["sub_sdm"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in sdm_cols if c in available]), axis=1) | |
| df["sub_pelayanan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pelayanan_cols if c in available]), axis=1) | |
| df["sub_pengelolaan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pengelolaan_cols if c in available]), axis=1) | |
| df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1) | |
| df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1) | |
| df["Indeks_Dasar_0_100"] = 100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"]) | |
| for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Dasar_0_100"]: | |
| df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0.0) | |
| return df | |
| # ============================================================ | |
| # 5) CACHE LOADER (NO UPLOAD) | |
| # ============================================================ | |
| _CACHE = {"key": None, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": None, "info": None} | |
| def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame: | |
| df = pd.read_excel(path_xlsx) | |
| if df is None or df.empty: | |
| return pd.DataFrame() | |
| c_mix = pick_col(df, [ | |
| "Propinsi/Kab/kota", "Propinsi/Kab/Kota", "Provinsi/Kab/Kota", | |
| "Provinsi/Kab/kota", "Provinsi/Kabupaten/Kota", | |
| "Wilayah", "Nama Wilayah" | |
| ]) | |
| if c_mix is None: | |
| raise ValueError("POP_KHUSUS: kolom gabungan Provinsi/Kab/Kota tidak ditemukan.") | |
| c_pop = pick_col(df, ["POP_KHUSUS", "Pop_Khusus", "pop_khusus"]) | |
| if c_pop is None: | |
| raise ValueError("POP_KHUSUS: kolom 'POP_KHUSUS' tidak ditemukan.") | |
| mix = df[c_mix].astype(str).fillna("").str.strip() | |
| pop_series = df[c_pop].apply(coerce_num) | |
| rows = [] | |
| current_prov = None | |
| for m, pval in zip(mix.tolist(), pop_series.tolist()): | |
| mm = _disp_text(m) or "" | |
| if mm == "": | |
| continue | |
| if mm.startswith("PROVINSI "): | |
| prov_name = mm.replace("PROVINSI", "").strip() | |
| current_prov = prov_name | |
| rows.append({ | |
| "LEVEL": "PROV", | |
| "Provinsi_Label": f"PROVINSI {prov_name}", | |
| "Kab_Kota_Label": None, | |
| "Pop_Total_Jenis": pval, | |
| }) | |
| continue | |
| rows.append({ | |
| "LEVEL": "KAB", | |
| "Provinsi_Label": f"PROVINSI {current_prov}" if current_prov else None, | |
| "Kab_Kota_Label": mm, | |
| "Pop_Total_Jenis": pval, | |
| }) | |
| pop = pd.DataFrame(rows) | |
| if pop.empty: | |
| return pop | |
| pop["Pop_Total_Jenis"] = pd.to_numeric(pop["Pop_Total_Jenis"], errors="coerce").fillna(0.0) | |
| pop["prov_key"] = pop["Provinsi_Label"].apply(norm_prov_label) | |
| pop["kab_key"] = pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in pop.columns else None | |
| return pop | |
| def load_default_files(force=False): | |
| key = ( | |
| DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS, | |
| _mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS) | |
| ) | |
| if (not force) and _CACHE["key"] == key and _CACHE["df_all"] is not None: | |
| return _CACHE["df_all"], _CACHE["df_raw"], _CACHE["pop_kab"], _CACHE["pop_prov"], _CACHE["pop_khusus"], _CACHE["meta"], _CACHE["info"] | |
| for p, label in [(DATA_FILE, "DM"), (POP_KAB, "POP_KAB"), (POP_PROV, "POP_PROV"), (POP_KHUSUS, "POP_KHUSUS")]: | |
| if not Path(p).exists(): | |
| info = f"File tidak ditemukan ({label}): {p}" | |
| _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info}) | |
| return None, None, None, None, None, {}, info | |
| fp = Path(DATA_FILE) | |
| xls = pd.ExcelFile(fp) | |
| frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names] | |
| df_raw = pd.concat(frames, ignore_index=True, sort=False) | |
| prov_col = pick_col(df_raw, ["provinsi", "Provinsi", "PROVINSI"]) | |
| kab_col = pick_col(df_raw, ["kab/kota", "Kab/Kota", "Kab_Kota", "KAB/KOTA", "kabupaten_kota", "Kabupaten/Kota", "kabupaten kota", "kota", "kab_kota"]) | |
| kew_col = pick_col(df_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"]) | |
| jenis_col = pick_col(df_raw, ["jenis_perpustakaan", "Jenis Perpustakaan", "JENIS_PERPUSTAKAAN"]) | |
| nama_col = pick_col(df_raw, ["nm_perpustakaan","nama_perpustakaan","Nama Perpustakaan","nm_instansi_lembaga","nm_perpus"]) | |
| missing = [] | |
| if prov_col is None: missing.append("Provinsi") | |
| if kab_col is None: missing.append("Kab/Kota") | |
| if kew_col is None: missing.append("Kewenangan") | |
| if jenis_col is None: missing.append("Jenis Perpustakaan") | |
| if missing: | |
| info = f"Kolom wajib tidak ditemukan di DM: {', '.join(missing)}" | |
| _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info}) | |
| return None, None, None, None, None, {}, info | |
| val_map_jenis = { | |
| "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah", | |
| "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum", | |
| "PERPUSTAKAAN KHUSUS": "khusus", "KHUSUS": "khusus", | |
| } | |
| df_raw["KEW_NORM"] = df_raw[kew_col].apply(norm_kew) | |
| df_raw["_dataset"] = df_raw[jenis_col].astype(str).str.strip().str.upper().map(val_map_jenis) | |
| df_raw["PROV_DISP"] = df_raw[prov_col].apply(norm_prov_disp) | |
| df_raw["KAB_DISP"] = df_raw[kab_col].apply(_disp_text) | |
| df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label) | |
| df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label) | |
| if nama_col and nama_col in df_raw.columns: | |
| kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col] | |
| else: | |
| kcols = [prov_col, kab_col, kew_col, jenis_col] | |
| tmp = df_raw[kcols].astype(str).fillna("").apply(lambda s: s.str.strip(), axis=0) | |
| df_raw["_row_key"] = tmp.apply(lambda r: "||".join(r.values.tolist()), axis=1).apply(_canon) | |
| before = len(df_raw) | |
| df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy() | |
| after = len(df_raw) | |
| # POP KAB | |
| pk = pd.read_excel(POP_KAB) | |
| c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota","kab_kota","kabupaten_kota"]) | |
| c_prov = pick_col(pk, ["PROVINSI","Provinsi","provinsi"]) | |
| if c_kab is None: | |
| info = "POP_KAB: wajib ada kolom Kab/Kota." | |
| _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info}) | |
| return None, None, None, None, None, {}, info | |
| pop_kab = pk.copy() | |
| pop_kab["Kab_Kota_Label"] = pk[c_kab].astype(str).str.strip() | |
| pop_kab["Provinsi_Label"] = pk[c_prov].astype(str).str.strip() if c_prov else "" | |
| pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label) | |
| pop_kab = pop_kab.groupby("kab_key", as_index=False).first() | |
| # POP PROV | |
| pp = pd.read_excel(POP_PROV) | |
| c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi","Propinsi","PROPINSI","propinsi"]) | |
| if c_pr is None: | |
| info = "POP_PROV: wajib ada kolom Provinsi." | |
| _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info}) | |
| return None, None, None, None, None, {}, info | |
| pop_prov = pp.copy() | |
| pop_prov["Provinsi_Label"] = pp[c_pr].astype(str).str.strip() | |
| pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label) | |
| pop_prov = pop_prov.groupby("prov_key", as_index=False).first() | |
| # POP KHUSUS | |
| try: | |
| pop_khusus = _parse_pop_khusus(POP_KHUSUS) | |
| except Exception as e: | |
| info = f"POP_KHUSUS gagal dibaca: {repr(e)}" | |
| _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info}) | |
| return None, None, None, None, None, {}, info | |
| df_all = prepare_global(df_raw) | |
| meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col) | |
| info = ( | |
| f"Mode NO UPLOAD (cache aktif)\n" | |
| f"DM: {fp.name} | Baris: {before} -> dedup: {after}\n" | |
| f"POP_KAB: {Path(POP_KAB).name} (n={len(pop_kab)})\n" | |
| f"POP_PROV: {Path(POP_PROV).name} (n={len(pop_prov)})\n" | |
| f"POP_KHUSUS: {Path(POP_KHUSUS).name} (n={len(pop_khusus)})\n" | |
| f"TARGET sampel per jenis: {TARGET_RATIO*100:.2f}%\n" | |
| f"mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}" | |
| ) | |
| _CACHE.update({ | |
| "key": key, | |
| "df_all": df_all, | |
| "df_raw": df_raw, | |
| "pop_kab": pop_kab, | |
| "pop_prov": pop_prov, | |
| "pop_khusus": pop_khusus, | |
| "meta": meta, | |
| "info": info | |
| }) | |
| return df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info | |
| # ============================================================ | |
| # 6) FAKTOR WILAYAH β PER JENIS (TARGET 33.88%) | |
| # ============================================================ | |
| def build_faktor_wilayah_jenis(df_filtered, pop_kab, pop_prov, pop_khusus, kew_value): | |
| if df_filtered is None or df_filtered.empty: | |
| return pd.DataFrame() | |
| kew_norm = str(kew_value or "").upper() | |
| df = df_filtered.copy() | |
| df = df[df["_dataset"].isin(["sekolah", "umum", "khusus"])].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| jenis_list = ["sekolah", "umum", "khusus"] | |
| if "PROV" in kew_norm: | |
| key_col, label_col, label_name, mode = "prov_key", "PROV_DISP", "Provinsi", "PROV" | |
| base_pop = pop_prov.copy() if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame() | |
| if not base_pop.empty and "prov_key" not in base_pop.columns: | |
| base_pop["prov_key"] = base_pop["Provinsi_Label"].apply(norm_prov_label) | |
| base_pop = base_pop.set_index("prov_key") if (not base_pop.empty and "prov_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([])) | |
| else: | |
| key_col, label_col, label_name, mode = "kab_key", "KAB_DISP", "Kab/Kota", "KAB" | |
| base_pop = pop_kab.copy() if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame() | |
| if not base_pop.empty and "kab_key" not in base_pop.columns: | |
| base_pop["kab_key"] = base_pop["Kab_Kota_Label"].apply(norm_kab_label) | |
| base_pop = base_pop.set_index("kab_key") if (not base_pop.empty and "kab_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([])) | |
| base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name}) | |
| full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp") | |
| cnt = ( | |
| df.groupby([key_col, label_col, "_dataset"], dropna=False) | |
| .size() | |
| .reset_index(name="n_jenis") | |
| .rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"}) | |
| ) | |
| cnt["Jenis"] = cnt["Jenis"].astype(str).str.lower().str.strip() | |
| base_n = full.merge(cnt, on=["group_key", label_name, "Jenis"], how="left") | |
| base_n["n_jenis"] = pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(int) | |
| base_n["target_total_33_88_jenis"] = 0.0 | |
| base_n["pop_total_jenis"] = 0.0 | |
| if not base_pop.empty: | |
| if mode == "KAB": | |
| pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah_base", base_pop.get("jumlah_populasi_sekolah", 0)), errors="coerce").fillna(0.0) | |
| pop_umum = pd.to_numeric(base_pop.get("jumlah_populasi_umum_base", base_pop.get("jumlah_populasi_umum", 0)), errors="coerce").fillna(0.0) | |
| tgt_sekolah = pop_sekolah * float(TARGET_RATIO) | |
| tgt_umum = pop_umum * float(TARGET_RATIO) | |
| else: | |
| sma = pd.to_numeric(base_pop.get("sma ", base_pop.get("sma", 0)), errors="coerce").fillna(0.0) | |
| smk = pd.to_numeric(base_pop.get("smk", 0), errors="coerce").fillna(0.0) | |
| slb = pd.to_numeric(base_pop.get("slb", 0), errors="coerce").fillna(0.0) | |
| pop_sekolah = sma + smk + slb | |
| tgt_sekolah = pop_sekolah * float(TARGET_RATIO) | |
| pop_umum = pd.to_numeric(base_pop.get("perpus_umum_prop", 0), errors="coerce").fillna(0.0) | |
| tgt_umum = pop_umum * float(TARGET_RATIO) | |
| m = base_n["Jenis"].eq("sekolah") | |
| base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_sekolah).fillna(0.0).values | |
| base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_sekolah).fillna(0.0).values | |
| m = base_n["Jenis"].eq("umum") | |
| base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_umum).fillna(0.0).values | |
| base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_umum).fillna(0.0).values | |
| if pop_khusus is not None and not pop_khusus.empty: | |
| pk = pop_khusus.copy() | |
| pk["Pop_Total_Jenis"] = pd.to_numeric(pk.get("Pop_Total_Jenis", 0), errors="coerce").fillna(0.0) | |
| if mode == "PROV": | |
| pk_prov = pk[pk["LEVEL"].astype(str).str.upper() == "PROV"].copy() | |
| pk_map = pk_prov.groupby("prov_key", as_index=True).agg(pop=("Pop_Total_Jenis", "sum")) | |
| pop_series = pk_map["pop"] | |
| else: | |
| pk_kab = pk[pk["LEVEL"].astype(str).str.upper() == "KAB"].copy() | |
| pk_map = pk_kab.groupby("kab_key", as_index=True).agg(pop=("Pop_Total_Jenis", "sum")) | |
| pop_series = pk_map["pop"] | |
| tgt_series = pop_series * float(TARGET_RATIO) | |
| m = base_n["Jenis"].eq("khusus") | |
| base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_series).fillna(0.0).values | |
| base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_series).fillna(0.0).values | |
| base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0.0) | |
| base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0.0) | |
| m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0) | |
| base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO) | |
| base_n["faktor_penyesuaian_jenis"] = [ | |
| faktor_penyesuaian_total(n, t) | |
| for n, t in zip( | |
| pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float), | |
| pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).astype(float), | |
| ) | |
| ] | |
| base_n["coverage_jenis_%"] = [ | |
| (safe_div(n, p) * 100.0) if (p is not None and not pd.isna(p) and float(p) > 0) else 0.0 | |
| for n, p in zip( | |
| pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float), | |
| pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).astype(float), | |
| ) | |
| ] | |
| base_n["gap_target33_88_jenis"] = [ | |
| max(float(t) - float(n), 0.0) | |
| for n, t in zip( | |
| pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(float), | |
| pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).astype(float), | |
| ) | |
| ] | |
| base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int) | |
| base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).round(0).astype(int) | |
| base_n["coverage_jenis_%"] = pd.to_numeric(base_n["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2) | |
| base_n["faktor_penyesuaian_jenis"] = pd.to_numeric(base_n["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3) | |
| base_n["gap_target33_88_jenis"] = pd.to_numeric(base_n["gap_target33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int) | |
| return base_n | |
| # ============================================================ | |
| # 7) AGREGAT WILAYAH Γ JENIS | |
| # ============================================================ | |
| def build_agg_wilayah_jenis(df_filtered, faktor_wilayah_jenis, kew_value): | |
| if df_filtered is None or df_filtered.empty: | |
| return pd.DataFrame() | |
| kew_norm = str(kew_value or "").upper() | |
| df = df_filtered.copy() | |
| if "PROV" in kew_norm: | |
| key_col, label_col, label_name = "prov_key", "PROV_DISP", "Provinsi" | |
| else: | |
| key_col, label_col, label_name = "kab_key", "KAB_DISP", "Kab/Kota" | |
| df = df[df["_dataset"].isin(["sekolah", "umum", "khusus"])].copy() | |
| if df.empty: | |
| return pd.DataFrame() | |
| jenis_list = ["sekolah", "umum", "khusus"] | |
| base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name}) | |
| full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp") | |
| agg_real = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg( | |
| Jumlah=("Indeks_Dasar_0_100", "size"), | |
| Rata2_sub_koleksi=("sub_koleksi", "mean"), | |
| Rata2_sub_sdm=("sub_sdm", "mean"), | |
| Rata2_sub_pelayanan=("sub_pelayanan", "mean"), | |
| Rata2_sub_pengelolaan=("sub_pengelolaan", "mean"), | |
| Rata2_dim_kepatuhan=("dim_kepatuhan", "mean"), | |
| Rata2_dim_kinerja=("dim_kinerja", "mean"), | |
| Indeks_Dasar_Agregat_0_100=("Indeks_Dasar_0_100", "mean"), | |
| ).reset_index().rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"}) | |
| agg_real["Jenis"] = agg_real["Jenis"].astype(str).str.lower().str.strip() | |
| agg = full.merge(agg_real, on=["group_key", label_name, "Jenis"], how="left") | |
| for c in ["Jumlah","Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan", | |
| "Rata2_dim_kepatuhan","Rata2_dim_kinerja","Indeks_Dasar_Agregat_0_100"]: | |
| if c in agg.columns: | |
| agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0) | |
| agg["Jumlah"] = agg["Jumlah"].round(0).astype(int) | |
| if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty: | |
| agg["faktor_penyesuaian_jenis"] = 1.0 | |
| else: | |
| fw = faktor_wilayah_jenis.copy() | |
| fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip() | |
| keep = ["group_key", label_name, "Jenis", | |
| "faktor_penyesuaian_jenis", "target_total_33_88_jenis", "pop_total_jenis", | |
| "coverage_jenis_%", "gap_target33_88_jenis", "n_jenis"] | |
| fw = fw[[c for c in keep if c in fw.columns]].copy() | |
| agg = agg.merge(fw, on=["group_key", label_name, "Jenis"], how="left") | |
| agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0) | |
| agg["Indeks_Final_Agregat_0_100"] = ( | |
| pd.to_numeric(agg["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0.0) | |
| * pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0) | |
| ) | |
| for c in [ | |
| "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan", | |
| "Rata2_dim_kepatuhan","Rata2_dim_kinerja" | |
| ]: | |
| if c in agg.columns: | |
| agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(3) | |
| for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100"]: | |
| if c in agg.columns: | |
| agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2) | |
| agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3) | |
| return agg | |
| # ============================================================ | |
| # 8) AGREGAT WILAYAH (KESELURUHAN) β avg3 dari 3 jenis | |
| # ============================================================ | |
| def build_agg_wilayah_total_from_jenis(agg_jenis, faktor_wilayah_jenis, kew_value): | |
| if agg_jenis is None or agg_jenis.empty: | |
| return pd.DataFrame() | |
| kew_norm = str(kew_value or "").upper() | |
| label_name = "Provinsi" if "PROV" in kew_norm else "Kab/Kota" | |
| jenis_list = ["sekolah", "umum", "khusus"] | |
| a = agg_jenis.copy() | |
| a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip() | |
| base_keys = a[["group_key", label_name]].drop_duplicates() | |
| full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp") | |
| cols_present = [c for c in [ | |
| "Jumlah", | |
| "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan", | |
| "Rata2_dim_kepatuhan","Rata2_dim_kinerja", | |
| "Indeks_Dasar_Agregat_0_100", | |
| "Indeks_Final_Agregat_0_100", | |
| ] if c in a.columns] | |
| full = full.merge(a[["group_key", label_name, "Jenis"] + cols_present], | |
| on=["group_key", label_name, "Jenis"], how="left") | |
| for c in cols_present: | |
| full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0) | |
| out = full.groupby(["group_key", label_name], as_index=False).agg( | |
| n_total=("Jumlah", "sum"), | |
| Rata2_sub_koleksi=("Rata2_sub_koleksi", "mean"), | |
| Rata2_sub_sdm=("Rata2_sub_sdm", "mean"), | |
| Rata2_sub_pelayanan=("Rata2_sub_pelayanan", "mean"), | |
| Rata2_sub_pengelolaan=("Rata2_sub_pengelolaan", "mean"), | |
| Rata2_dim_kepatuhan=("Rata2_dim_kepatuhan", "mean"), | |
| Rata2_dim_kinerja=("Rata2_dim_kinerja", "mean"), | |
| Indeks_Dasar_Agregat_0_100=("Indeks_Dasar_Agregat_0_100", "mean"), | |
| Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"), | |
| ) | |
| for c in ["Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan","Rata2_dim_kepatuhan","Rata2_dim_kinerja"]: | |
| out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3) | |
| for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Wilayah_0_100"]: | |
| out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2) | |
| out["n_total"] = pd.to_numeric(out["n_total"], errors="coerce").fillna(0).round(0).astype(int) | |
| return out | |
| # ============================================================ | |
| # 9) SUMMARY (PER JENIS) + KESELURUHAN | |
| # ============================================================ | |
| def build_summary_per_jenis(agg_jenis, agg_total): | |
| jenis_list = ["sekolah", "umum", "khusus"] | |
| def _row_default(jenis): | |
| return { | |
| "Jenis": jenis, | |
| "Jumlah_Wilayah": 0, | |
| "Total_Perpus": 0, | |
| "Pop_Total_Jenis": 0, | |
| "Target33_88_Total_Jenis": 0, | |
| "Terkumpul_Jenis": 0, | |
| "Coverage_Target33_88_Jenis_%": 0.0, | |
| "Indeks_Dasar_0_100": 0.0, | |
| "Indeks_Final_Disesuaikan_0_100": 0.0, | |
| "Penyesuaian_Poin": 0.0, | |
| } | |
| rows_by_jenis = {j: _row_default(j) for j in jenis_list} | |
| if agg_jenis is not None and not agg_jenis.empty: | |
| a = agg_jenis.copy() | |
| a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip() | |
| for c in ["Jumlah","Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100","pop_total_jenis","target_total_33_88_jenis"]: | |
| if c in a.columns: | |
| a[c] = pd.to_numeric(a[c], errors="coerce").fillna(0) | |
| for jenis in jenis_list: | |
| sub = a[a["Jenis"] == jenis].copy() | |
| if sub.empty: | |
| continue | |
| jumlah_wilayah = int(sub.shape[0]) | |
| terkumpul = int(pd.to_numeric(sub.get("Jumlah", 0), errors="coerce").fillna(0).sum()) | |
| pop_total = int(pd.to_numeric(sub.get("pop_total_jenis", 0), errors="coerce").fillna(0).sum()) | |
| target3388 = int(pd.to_numeric(sub.get("target_total_33_88_jenis", 0), errors="coerce").fillna(0).sum()) | |
| coverage = (terkumpul / target3388 * 100.0) if target3388 > 0 else 0.0 | |
| dasar = float(pd.to_numeric(sub.get("Indeks_Dasar_Agregat_0_100", 0), errors="coerce").fillna(0).mean()) | |
| final = float(pd.to_numeric(sub.get("Indeks_Final_Agregat_0_100", 0), errors="coerce").fillna(0).mean()) | |
| rows_by_jenis[jenis] = { | |
| "Jenis": jenis, | |
| "Jumlah_Wilayah": jumlah_wilayah, | |
| "Total_Perpus": terkumpul, | |
| "Pop_Total_Jenis": pop_total, | |
| "Target33_88_Total_Jenis": target3388, | |
| "Terkumpul_Jenis": terkumpul, | |
| "Coverage_Target33_88_Jenis_%": float(coverage), | |
| "Indeks_Dasar_0_100": float(dasar), | |
| "Indeks_Final_Disesuaikan_0_100": float(final), | |
| "Penyesuaian_Poin": float(final - dasar), | |
| } | |
| rows = [rows_by_jenis[j] for j in jenis_list] | |
| dasar_all = (rows_by_jenis["sekolah"]["Indeks_Dasar_0_100"] | |
| + rows_by_jenis["umum"]["Indeks_Dasar_0_100"] | |
| + rows_by_jenis["khusus"]["Indeks_Dasar_0_100"]) / 3.0 | |
| final_all = (rows_by_jenis["sekolah"]["Indeks_Final_Disesuaikan_0_100"] | |
| + rows_by_jenis["umum"]["Indeks_Final_Disesuaikan_0_100"] | |
| + rows_by_jenis["khusus"]["Indeks_Final_Disesuaikan_0_100"]) / 3.0 | |
| pop_all = int(rows_by_jenis["sekolah"]["Pop_Total_Jenis"] | |
| + rows_by_jenis["umum"]["Pop_Total_Jenis"] | |
| + rows_by_jenis["khusus"]["Pop_Total_Jenis"]) | |
| target_all = int(rows_by_jenis["sekolah"]["Target33_88_Total_Jenis"] | |
| + rows_by_jenis["umum"]["Target33_88_Total_Jenis"] | |
| + rows_by_jenis["khusus"]["Target33_88_Total_Jenis"]) | |
| terkumpul_all = int(rows_by_jenis["sekolah"]["Terkumpul_Jenis"] | |
| + rows_by_jenis["umum"]["Terkumpul_Jenis"] | |
| + rows_by_jenis["khusus"]["Terkumpul_Jenis"]) | |
| coverage_all = (terkumpul_all / target_all * 100.0) if target_all > 0 else 0.0 | |
| jumlah_wilayah_all = int(agg_total.shape[0]) if (agg_total is not None and not agg_total.empty) else int( | |
| max(rows_by_jenis["sekolah"]["Jumlah_Wilayah"], | |
| rows_by_jenis["umum"]["Jumlah_Wilayah"], | |
| rows_by_jenis["khusus"]["Jumlah_Wilayah"]) | |
| ) | |
| rows.append({ | |
| "Jenis": "keseluruhan", | |
| "Jumlah_Wilayah": jumlah_wilayah_all, | |
| "Total_Perpus": terkumpul_all, | |
| "Pop_Total_Jenis": pop_all, | |
| "Target33_88_Total_Jenis": target_all, | |
| "Terkumpul_Jenis": terkumpul_all, | |
| "Coverage_Target33_88_Jenis_%": float(coverage_all), | |
| "Indeks_Dasar_0_100": float(dasar_all), | |
| "Indeks_Final_Disesuaikan_0_100": float(final_all), | |
| "Penyesuaian_Poin": float(final_all - dasar_all), | |
| }) | |
| out = pd.DataFrame(rows) | |
| for c in ["Jumlah_Wilayah","Total_Perpus","Pop_Total_Jenis","Target33_88_Total_Jenis","Terkumpul_Jenis"]: | |
| out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int) | |
| for c in ["Coverage_Target33_88_Jenis_%","Indeks_Dasar_0_100","Indeks_Final_Disesuaikan_0_100","Penyesuaian_Poin"]: | |
| out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2) | |
| return out | |
| # ============================================================ | |
| # 10) DETAIL ENTITAS (Final menempel dari wilayah) | |
| # ============================================================ | |
| def attach_final_to_detail(df_filtered, agg_total, meta, kew_value): | |
| if df_filtered is None or df_filtered.empty: | |
| return pd.DataFrame() | |
| kew_norm = str(kew_value or "").upper() | |
| df = df_filtered.copy() | |
| if "PROV" in kew_norm: | |
| key_col = "prov_key" | |
| label_cols = ("PROV_DISP", "KAB_DISP") | |
| else: | |
| key_col = "kab_key" | |
| label_cols = ("PROV_DISP", "KAB_DISP") | |
| if agg_total is None or agg_total.empty: | |
| df["Indeks_Final_0_100"] = df["Indeks_Dasar_0_100"] | |
| else: | |
| m = agg_total[["group_key", "Indeks_Final_Wilayah_0_100"]].copy() | |
| df = df.merge(m, left_on=key_col, right_on="group_key", how="left") | |
| df["Indeks_Final_0_100"] = df["Indeks_Final_Wilayah_0_100"].fillna(df["Indeks_Dasar_0_100"]) | |
| df = df.drop(columns=[c for c in ["group_key","Indeks_Final_Wilayah_0_100"] if c in df.columns]) | |
| base_cols = [label_cols[0], label_cols[1], "KEW_NORM", "_dataset"] | |
| if meta.get("nama_col") and meta["nama_col"] in df.columns: | |
| df["nm_perpustakaan"] = df[meta["nama_col"]].astype(str) | |
| base_cols.insert(2, "nm_perpustakaan") | |
| keep = base_cols + [ | |
| "sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan", | |
| "dim_kepatuhan","dim_kinerja", | |
| "Indeks_Dasar_0_100", | |
| "Indeks_Final_0_100", | |
| ] | |
| keep = [c for c in keep if c in df.columns] | |
| out = df[keep].copy() | |
| out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"}) | |
| for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]: | |
| if c in out.columns: | |
| out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3) | |
| for c in ["Indeks_Dasar_0_100","Indeks_Final_0_100"]: | |
| if c in out.columns: | |
| out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2) | |
| return out | |
| # ============================================================ | |
| # 11) VERIF (kecukupan sampel) | |
| # ============================================================ | |
| def build_verif_jenis(faktor_wilayah_jenis, kew_value): | |
| if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty: | |
| return pd.DataFrame() | |
| kew_norm = str(kew_value or "").upper() | |
| label_col = "Provinsi" if "PROV" in kew_norm else "Kab/Kota" | |
| out = faktor_wilayah_jenis.copy() | |
| keep = [c for c in [ | |
| label_col, "Jenis", | |
| "pop_total_jenis", "target_total_33_88_jenis", "n_jenis", | |
| "coverage_jenis_%", "faktor_penyesuaian_jenis", "gap_target33_88_jenis" | |
| ] if c in out.columns] | |
| out = out[keep].copy() | |
| for c in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]: | |
| if c in out.columns: | |
| out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int) | |
| if "coverage_jenis_%" in out.columns: | |
| out["coverage_jenis_%"] = pd.to_numeric(out["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2) | |
| if "faktor_penyesuaian_jenis" in out.columns: | |
| out["faktor_penyesuaian_jenis"] = pd.to_numeric(out["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3) | |
| return out | |
| # ============================================================ | |
| # 12) BELL CURVE β Indeks Dasar per Entitas (per Jenis) + Hover | |
| # ============================================================ | |
| def _make_bell_curve_entitas(dfp, title, xcol="Indeks_Dasar_0_100", label_col="nm_perpustakaan", hover_cols=None, min_points=2): | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title=title, | |
| xaxis_title="Skor (0β100)", | |
| yaxis_title="Kepadatan", | |
| hovermode="closest", | |
| margin=dict(l=40, r=20, t=60, b=40), | |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0), | |
| ) | |
| if dfp is None or dfp.empty or xcol not in dfp.columns: | |
| fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False) | |
| fig.update_xaxes(range=[0, 100]) | |
| fig.update_yaxes(rangemode="tozero") | |
| return fig | |
| d = dfp.dropna(subset=[xcol]).copy() | |
| if d.empty: | |
| fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False) | |
| fig.update_xaxes(range=[0, 100]) | |
| fig.update_yaxes(rangemode="tozero") | |
| return fig | |
| x = pd.to_numeric(d[xcol], errors="coerce").astype(float) | |
| d = d.loc[x.notna()].copy() | |
| x = x.loc[x.notna()].values | |
| if len(x) < 1: | |
| fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False) | |
| fig.update_xaxes(range=[0, 100]) | |
| fig.update_yaxes(rangemode="tozero") | |
| return fig | |
| hover_cols = hover_cols or [] | |
| def _val(row, col): | |
| if col not in row.index: | |
| return "" | |
| v = row[col] | |
| return "" if pd.isna(v) else str(v) | |
| hover_text = [] | |
| for _, row in d.iterrows(): | |
| lines = [] | |
| nm = _val(row, label_col) if (label_col and label_col in d.columns) else "" | |
| if nm: | |
| lines.append(f"<b>{nm}</b>") | |
| lines.append(f"{xcol}: {float(pd.to_numeric(row[xcol], errors='coerce')):.2f}") | |
| for hc in hover_cols: | |
| vv = _val(row, hc) | |
| if vv: | |
| lines.append(f"{hc}: {vv}") | |
| hover_text.append("<br>".join(lines)) | |
| if len(x) < min_points: | |
| x_single = float(x[0]) | |
| fig.add_trace(go.Scatter( | |
| x=[x_single], y=[0], | |
| mode="markers", showlegend=False, | |
| hovertext=[hover_text[0]] if hover_text else None, | |
| hoverinfo="text" | |
| )) | |
| fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top") | |
| fig.update_xaxes(range=[0, 100]) | |
| fig.update_yaxes(rangemode="tozero") | |
| return fig | |
| mu = float(np.mean(x)) | |
| sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 1.0 | |
| sigma = max(sigma, 1e-3) | |
| xmin = max(0.0, float(np.min(x)) - 5.0) | |
| xmax = min(100.0, float(np.max(x)) + 5.0) | |
| xs = np.linspace(xmin, xmax, 250) | |
| pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2) | |
| fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Kurva Normal (fit)")) | |
| fig.add_trace(go.Scatter( | |
| x=x, y=np.zeros_like(x), | |
| mode="markers", showlegend=False, | |
| hovertext=hover_text if hover_text else None, | |
| hoverinfo="text" | |
| )) | |
| q1, q2, q3 = np.percentile(x, [25, 50, 75]) | |
| for xv, lab in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3"), (mu, "Mean")]: | |
| fig.add_vline(x=float(xv), line_width=1, line_dash="dash", annotation_text=f"{lab}: {xv:.1f}", annotation_position="top") | |
| fig.update_xaxes(range=[0, 100]) | |
| fig.update_yaxes(rangemode="tozero") | |
| return fig | |
| # ============================================================ | |
| # 13) KPI DASHBOARD (2 kartu: final + dasar) | |
| # ============================================================ | |
| def _safe_first(df, col, default=0.0, where=None): | |
| if df is None or df.empty or col not in df.columns: | |
| return default | |
| sub = df | |
| if where is not None: | |
| sub = df.loc[where] | |
| if sub is None or sub.empty: | |
| return default | |
| return float(pd.to_numeric(sub[col], errors="coerce").fillna(default).iloc[0]) | |
| def build_kpi_markdown(summary_jenis): | |
| if summary_jenis is None or summary_jenis.empty: | |
| return "" | |
| final_all = _safe_first(summary_jenis, "Indeks_Final_Disesuaikan_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan")) | |
| dasar_all = _safe_first(summary_jenis, "Indeks_Dasar_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan")) | |
| def fmt(x, nd=2): | |
| return "NA" if pd.isna(x) else f"{x:.{nd}f}" | |
| return f""" | |
| <div style="display:flex; gap:12px; flex-wrap:wrap;"> | |
| <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;"> | |
| <div style="opacity:0.8;">Indeks IPLM FINAL (Disesuaikan 33.88%)</div> | |
| <div style="font-size:26px; font-weight:700;">{fmt(final_all,2)}</div> | |
| <div style="opacity:0.7;">Skor absolut (untuk akuntabilitas)</div> | |
| </div> | |
| <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;"> | |
| <div style="opacity:0.8;">Indeks Dasar (Tanpa Penyesuaian)</div> | |
| <div style="font-size:26px; font-weight:700;">{fmt(dasar_all,2)}</div> | |
| <div style="opacity:0.7;">Sebelum faktor kecukupan sampel</div> | |
| </div> | |
| </div> | |
| """.strip() | |
| # ============================================================ | |
| # 14) LLM: Isi Interpretasi & Rekomendasi (TABEL) + WORD | |
| # ============================================================ | |
| _HF_CLIENT = None | |
| def get_llm_client(): | |
| global _HF_CLIENT | |
| if _HF_CLIENT is not None: | |
| return _HF_CLIENT | |
| if (not HF_AVAILABLE) or (InferenceClient is None): | |
| _HF_CLIENT = None | |
| return None | |
| try: | |
| _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN) if HF_TOKEN else InferenceClient(model=LLM_MODEL_NAME) | |
| return _HF_CLIENT | |
| except Exception: | |
| _HF_CLIENT = None | |
| return None | |
| def _to_float(x, default=0.0): | |
| try: | |
| if x is None: | |
| return float(default) | |
| if isinstance(x, float) and math.isnan(x): | |
| return float(default) | |
| return float(x) | |
| except Exception: | |
| return float(default) | |
| def summarize_jumlah_perpus_dari_agg_jenis(agg_jenis_full, wilayah_label, kew_value): | |
| """ | |
| Ambil jumlah perpustakaan sumber data dari tabel agregat wilayah Γ jenis (gambar 2). | |
| Untuk filter 1 wilayah (kab/prov), agg_jenis_full biasanya 3 baris (sekolah/umum/khusus). | |
| Untuk nasional/semua wilayah, ini akan menjumlahkan seluruh wilayah per jenis. | |
| """ | |
| if agg_jenis_full is None or agg_jenis_full.empty: | |
| return {"sekolah": 0, "umum": 0, "khusus": 0, "total": 0} | |
| a = agg_jenis_full.copy() | |
| if "Jenis" not in a.columns: | |
| return {"sekolah": 0, "umum": 0, "khusus": 0, "total": 0} | |
| a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip() | |
| if "Jumlah" in a.columns: | |
| a["Jumlah"] = pd.to_numeric(a["Jumlah"], errors="coerce").fillna(0).astype(int) | |
| else: | |
| a["Jumlah"] = 0 | |
| out = {} | |
| for j in ["sekolah", "umum", "khusus"]: | |
| out[j] = int(a.loc[a["Jenis"].eq(j), "Jumlah"].sum()) | |
| out["total"] = int(out["sekolah"] + out["umum"] + out["khusus"]) | |
| return out | |
| def build_interpretasi_table_values(agg_total, wilayah_label, target_ratio): | |
| """ | |
| MENGAMBIL NILAI APA ADANYA (tanpa *100) dari hasil aplikasi (agg_total): | |
| - Kepatuhan = Rata2_dim_kepatuhan | |
| - Koleksi = Rata2_sub_koleksi | |
| - Tenaga = Rata2_sub_sdm | |
| - Kinerja = Rata2_dim_kinerja | |
| - Pelayanan = Rata2_sub_pelayanan | |
| - Pengelolaan = Rata2_sub_pengelolaan | |
| - Nilai IPLM = Indeks_Final_Wilayah_0_100 | |
| Jika agg_total > 1 baris (mis. nasional), diambil mean kolom-kolom tersebut. | |
| """ | |
| if agg_total is None or agg_total.empty: | |
| base = { | |
| "kepatuhan": 0.0, "koleksi": 0.0, "tenaga": 0.0, | |
| "kinerja": 0.0, "pelayanan": 0.0, "pengelolaan": 0.0, | |
| "iplm": 0.0 | |
| } | |
| else: | |
| a = agg_total.copy() | |
| cols_needed = [ | |
| "Rata2_dim_kepatuhan", | |
| "Rata2_sub_koleksi", | |
| "Rata2_sub_sdm", | |
| "Rata2_dim_kinerja", | |
| "Rata2_sub_pelayanan", | |
| "Rata2_sub_pengelolaan", | |
| "Indeks_Final_Wilayah_0_100", | |
| ] | |
| for c in cols_needed: | |
| if c in a.columns: | |
| a[c] = pd.to_numeric(a[c], errors="coerce").fillna(0.0) | |
| else: | |
| a[c] = 0.0 | |
| base = { | |
| "kepatuhan": float(a["Rata2_dim_kepatuhan"].mean()), | |
| "koleksi": float(a["Rata2_sub_koleksi"].mean()), | |
| "tenaga": float(a["Rata2_sub_sdm"].mean()), | |
| "kinerja": float(a["Rata2_dim_kinerja"].mean()), | |
| "pelayanan": float(a["Rata2_sub_pelayanan"].mean()), | |
| "pengelolaan": float(a["Rata2_sub_pengelolaan"].mean()), | |
| "iplm": float(a["Indeks_Final_Wilayah_0_100"].mean()), | |
| } | |
| # pembulatan display (nilai tetap "apa adanya", hanya format) | |
| # untuk sub/dim (0β1) biasanya 3 desimal; untuk IPLM (0β100) 2 desimal. | |
| base_disp = { | |
| "kepatuhan": round(_to_float(base["kepatuhan"]), 3), | |
| "koleksi": round(_to_float(base["koleksi"]), 3), | |
| "tenaga": round(_to_float(base["tenaga"]), 3), | |
| "kinerja": round(_to_float(base["kinerja"]), 3), | |
| "pelayanan": round(_to_float(base["pelayanan"]), 3), | |
| "pengelolaan": round(_to_float(base["pengelolaan"]), 3), | |
| "iplm": round(_to_float(base["iplm"]), 2), | |
| } | |
| rows = [ | |
| {"No":"1", "Dimensi":"Kepatuhan", "Nilai":base_disp["kepatuhan"], "SumberKolom":"Rata2_dim_kepatuhan"}, | |
| {"No":"1.1", "Dimensi":"Variabel Koleksi", "Nilai":base_disp["koleksi"], "SumberKolom":"Rata2_sub_koleksi"}, | |
| {"No":"1.2", "Dimensi":"Variabel Tenaga Perpustakaan", "Nilai":base_disp["tenaga"], "SumberKolom":"Rata2_sub_sdm"}, | |
| {"No":"2", "Dimensi":"Kinerja", "Nilai":base_disp["kinerja"], "SumberKolom":"Rata2_dim_kinerja"}, | |
| {"No":"2.1", "Dimensi":"Variabel Pelayanan", "Nilai":base_disp["pelayanan"], "SumberKolom":"Rata2_sub_pelayanan"}, | |
| {"No":"2.2", "Dimensi":"Variabel Penyelenggaraan/Pengelolaan", "Nilai":base_disp["pengelolaan"], "SumberKolom":"Rata2_sub_pengelolaan"}, | |
| {"No":"4", "Dimensi":"Nilai IPLM", "Nilai":base_disp["iplm"], "SumberKolom":"Indeks_Final_Wilayah_0_100"}, | |
| ] | |
| header = { | |
| "judul": f"Interpretasi dan Rekomendasi IPLM β {wilayah_label}", | |
| "target_sampel": f"{target_ratio*100:.2f}%" | |
| } | |
| return header, rows | |
| def llm_fill_interpretasi_rekomendasi(header, rows, wilayah_label, kew_label, jumlah_perpus_by_jenis): | |
| """ | |
| LLM diminta mengisi kolom Interpretasi dan Rekomendasi dengan narasi yang NYAMBUNG ke angka: | |
| - Interpretasi: jelaskan apa arti angka untuk kondisi operasional perpustakaan (koleksi/sdm/pelayanan/pengelolaan), | |
| memakai relasi angka antardimensi (lebih besar/kecil, selisih, dominan, gap, konsistensi) TANPA label normatif. | |
| - Rekomendasi: 2β3 aksi teknis per baris yang langsung meng-address pola angka (misal dimensi lebih kecil β prioritas aktivitas), | |
| serta mengaitkan dengan volume data (jumlah perpustakaan per jenis) bila relevan. | |
| Output wajib JSON. | |
| """ | |
| client = get_llm_client() | |
| if client is None or (not USE_LLM): | |
| out = [] | |
| for r in rows: | |
| out.append({k: r.get(k) for k in ["No","Dimensi","Nilai"]} | {"Interpretasi":"", "Rekomendasi":""}) | |
| return out, "LLM tidak digunakan / tidak tersedia." | |
| payload = { | |
| "wilayah": wilayah_label, | |
| "kewenangan": kew_label, | |
| "target_sampel_per_jenis": header["target_sampel"], | |
| "jumlah_perpustakaan_sumber_data": jumlah_perpus_by_jenis, | |
| "catatan_skala": ( | |
| "Baris Kepatuhan/Koleksi/Tenaga/Kinerja/Pelayanan/Pengelolaan memakai nilai agregat 'apa adanya' " | |
| "(umumnya rentang 0β1 karena berasal dari sub/dim hasil normalisasi). " | |
| "Baris 'Nilai IPLM' memakai Indeks_Final_Wilayah_0_100 (rentang 0β100)." | |
| ), | |
| "baris": rows | |
| } | |
| system = ( | |
| "Anda adalah analis kebijakan perpustakaan di Indonesia.\n" | |
| "Tugas: isi kolom Interpretasi dan Rekomendasi untuk setiap baris tabel.\n" | |
| "ATURAN WAJIB:\n" | |
| "1) Jangan mengubah nilai angka. Jangan menghitung ulang skor.\n" | |
| "2) Netral-deskriptif: dilarang memakai label normatif seperti baik/buruk, tinggi/sedang/rendah, memuaskan/kurang, optimal/tidak optimal.\n" | |
| "3) Interpretasi harus nyambung langsung ke angka dan relasinya antardimensi: gunakan istilah lebih besar/kecil, selisih, gap, dominan, konsisten/tidak konsisten, kontribusi, proporsi.\n" | |
| "4) Interpretasi juga harus menjelaskan kondisi riil berbasis dimensi:\n" | |
| " - Koleksi: pengembangan, ketersediaan, pemanfaatan koleksi (sebagai fungsi layanan),\n" | |
| " - Tenaga: kecukupan/kapasitas SDM dan pengembangan kompetensi,\n" | |
| " - Pelayanan: aktivitas layanan dan pemanfaatan layanan,\n" | |
| " - Pengelolaan: tata kelola, kebijakan, kolaborasi, dukungan anggaran layanan,\n" | |
| " - Kepatuhan = gabungan koleksi+tenaga; Kinerja = gabungan pelayanan+pengelolaan.\n" | |
| " Jelaskan tanpa menghakimi; fokus pada apa yang angka itu representasikan.\n" | |
| "5) Rekomendasi harus operasional dan spesifik (2β3 butir singkat) untuk tiap baris. Gunakan pola angka untuk menurunkan aksi.\n" | |
| "6) Output HARUS JSON valid saja (tanpa teks tambahan), dengan struktur persis.\n" | |
| ) | |
| user = ( | |
| "Kembalikan JSON:\n" | |
| "{\n" | |
| ' "rows": [\n' | |
| ' {"No":"...","Dimensi":"...","Nilai":..., "Interpretasi":"...","Rekomendasi":"..."}\n' | |
| " ]\n" | |
| "}\n" | |
| "- Urutan dan jumlah baris harus sama.\n" | |
| "- 'Rekomendasi' boleh berupa bullet dengan tanda '-' dalam satu string.\n\n" | |
| f"INPUT:\n{json.dumps(payload, ensure_ascii=False)}" | |
| ) | |
| try: | |
| resp = client.chat_completion( | |
| model=LLM_MODEL_NAME, | |
| messages=[ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": user}, | |
| ], | |
| max_tokens=1100, | |
| temperature=0.2, | |
| top_p=0.9, | |
| ) | |
| text = resp.choices[0].message.content.strip() | |
| data = json.loads(text) | |
| rows_out = data.get("rows", []) | |
| if not isinstance(rows_out, list) or len(rows_out) != len(rows): | |
| raise ValueError("Format JSON rows tidak sesuai.") | |
| cleaned = [] | |
| for i, r in enumerate(rows_out): | |
| cleaned.append({ | |
| "No": str(r.get("No", rows[i]["No"])), | |
| "Dimensi": str(r.get("Dimensi", rows[i]["Dimensi"])), | |
| "Nilai": rows[i]["Nilai"], # paksa nilai dari aplikasi | |
| "Interpretasi": str(r.get("Interpretasi","") or ""), | |
| "Rekomendasi": str(r.get("Rekomendasi","") or ""), | |
| }) | |
| return cleaned, "LLM mengisi Interpretasi & Rekomendasi." | |
| except Exception as e: | |
| out = [] | |
| for r in rows: | |
| out.append({k: r.get(k) for k in ["No","Dimensi","Nilai"]} | {"Interpretasi":"", "Rekomendasi":""}) | |
| return out, f"LLM error: {repr(e)}" | |
| def _set_cell_shading(cell, fill_hex="1F1F1F"): | |
| tcPr = cell._tc.get_or_add_tcPr() | |
| shd = OxmlElement("w:shd") | |
| shd.set(qn("w:val"), "clear") | |
| shd.set(qn("w:color"), "auto") | |
| shd.set(qn("w:fill"), fill_hex) | |
| tcPr.append(shd) | |
| def _set_cell_text_color(cell, rgb_hex="FFFFFF"): | |
| for p in cell.paragraphs: | |
| for run in p.runs: | |
| rPr = run._r.get_or_add_rPr() | |
| color = OxmlElement("w:color") | |
| color.set(qn("w:val"), rgb_hex) | |
| rPr.append(color) | |
| def _set_table_borders(table): | |
| tbl = table._tbl | |
| tblPr = tbl.tblPr | |
| if tblPr is None: | |
| tblPr = OxmlElement("w:tblPr") | |
| tbl.append(tblPr) | |
| tblBorders = OxmlElement("w:tblBorders") | |
| for edge in ("top", "left", "bottom", "right", "insideH", "insideV"): | |
| elem = OxmlElement(f"w:{edge}") | |
| elem.set(qn("w:val"), "single") | |
| elem.set(qn("w:sz"), "8") | |
| elem.set(qn("w:space"), "0") | |
| elem.set(qn("w:color"), "FFFFFF") | |
| tblBorders.append(elem) | |
| tblPr.append(tblBorders) | |
| def generate_word_table_interpretasi(header, rows_filled, wilayah_label, jumlah_perpus_by_jenis): | |
| if (not DOCX_AVAILABLE) or (Document is None): | |
| return None | |
| doc = Document() | |
| # Title | |
| title = doc.add_paragraph() | |
| run = title.add_run(header["judul"]) | |
| run.bold = True | |
| run.font.size = Pt(18) | |
| doc.add_paragraph(f"Target sampel per jenis: {header['target_sampel']}") | |
| # Table | |
| cols = ["No", "Dimensi", "Nilai", "Interpretasi", "Rekomendasi"] | |
| table = doc.add_table(rows=1, cols=len(cols)) | |
| table.autofit = True | |
| _set_table_borders(table) | |
| hdr_cells = table.rows[0].cells | |
| for i, c in enumerate(cols): | |
| hdr_cells[i].text = c | |
| _set_cell_shading(hdr_cells[i], "1A1A1A") | |
| _set_cell_text_color(hdr_cells[i], "FFFFFF") | |
| for p in hdr_cells[i].paragraphs: | |
| for rr in p.runs: | |
| rr.bold = True | |
| for r in rows_filled: | |
| row_cells = table.add_row().cells | |
| row_cells[0].text = str(r.get("No","")) | |
| row_cells[1].text = str(r.get("Dimensi","")) | |
| # format nilai: | |
| # - sub/dim biasanya 0β1 β tampilkan 3 desimal | |
| # - IPLM 0β100 β tampilkan 2 desimal | |
| try: | |
| dim = str(r.get("Dimensi","")).strip().lower() | |
| val = _to_float(r.get("Nilai", 0.0), 0.0) | |
| if dim == "nilai iplm": | |
| row_cells[2].text = f"{val:.2f}" | |
| else: | |
| row_cells[2].text = f"{val:.3f}" | |
| except Exception: | |
| row_cells[2].text = str(r.get("Nilai","")) | |
| row_cells[3].text = str(r.get("Interpretasi","") or "") | |
| row_cells[4].text = str(r.get("Rekomendasi","") or "") | |
| for c in row_cells: | |
| _set_cell_shading(c, "262626") | |
| _set_cell_text_color(c, "FFFFFF") | |
| # ===== tambahan: deskripsi jumlah perpustakaan sumber data (gambar 2) ===== | |
| doc.add_paragraph("") # spacer | |
| j = jumlah_perpus_by_jenis or {"sekolah":0,"umum":0,"khusus":0,"total":0} | |
| p = doc.add_paragraph() | |
| p.add_run("Sumber data (jumlah perpustakaan pada tabel agregat wilayah Γ jenis): ").bold = True | |
| doc.add_paragraph( | |
| f"Perpustakaan sekolah = {int(j.get('sekolah',0))}, " | |
| f"perpustakaan umum = {int(j.get('umum',0))}, " | |
| f"perpustakaan khusus = {int(j.get('khusus',0))}, " | |
| f"total = {int(j.get('total',0))}." | |
| ) | |
| outpath = tempfile.mktemp(suffix=".docx") | |
| doc.save(outpath) | |
| return outpath | |
| # ============================================================ | |
| # 15) CORE RUN | |
| # ============================================================ | |
| def _empty_outputs(msg="Data belum siap."): | |
| empty = pd.DataFrame() | |
| empty_fig = go.Figure() | |
| return ( | |
| "", # kpi_md | |
| empty, empty, empty, empty, empty, | |
| None, None, None, None, None, | |
| empty_fig, empty_fig, empty_fig, | |
| msg, # msg | |
| "LLM belum tersedia.", # status llm | |
| None # word path | |
| ) | |
| def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta): | |
| try: | |
| if df_all is None or df_all.empty or df_raw is None or df_raw.empty: | |
| return _empty_outputs("Data belum ter-load. Pastikan file tersedia.") | |
| # Filter | |
| df = df_all.copy() | |
| if prov_value and prov_value != "(Semua)": | |
| df = df[df["PROV_DISP"] == prov_value] | |
| if kab_value and kab_value != "(Semua)": | |
| df = df[df["KAB_DISP"] == kab_value] | |
| if kew_value and kew_value != "(Semua)": | |
| df = df[df["KEW_NORM"] == kew_value] | |
| if df.empty: | |
| return _empty_outputs("Tidak ada data untuk filter ini.") | |
| kew_norm = kew_value if (kew_value and kew_value != "(Semua)") else "(Semua)" | |
| faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_norm) | |
| agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_norm) | |
| agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_norm) | |
| summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total) | |
| verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_norm) | |
| detail_view = attach_final_to_detail(df, agg_total, meta, kew_norm) | |
| # agg_jenis view (UI hanya sampai indeks dasar) | |
| if agg_jenis_full is None or agg_jenis_full.empty: | |
| agg_jenis_view = agg_jenis_full | |
| else: | |
| kew_norm2 = str(kew_norm).upper() | |
| label_name = "Kab/Kota" if ("KAB" in kew_norm2 or "KOTA" in kew_norm2) else ("Provinsi" if "PROV" in kew_norm2 else "Kab/Kota") | |
| cols_upto = [ | |
| "group_key", | |
| label_name, | |
| "Jenis", | |
| "Jumlah", | |
| "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan", | |
| "Rata2_dim_kepatuhan","Rata2_dim_kinerja", | |
| "Indeks_Dasar_Agregat_0_100", | |
| ] | |
| cols_upto = [c for c in cols_upto if c in agg_jenis_full.columns] | |
| agg_jenis_view = agg_jenis_full[cols_upto].copy() | |
| # RAW download (hasil filter) | |
| raw = df_raw.copy() | |
| if prov_value and prov_value != "(Semua)": | |
| raw = raw[raw["PROV_DISP"] == prov_value] | |
| if kab_value and kab_value != "(Semua)": | |
| raw = raw[raw["KAB_DISP"] == kab_value] | |
| if kew_value and kew_value != "(Semua)": | |
| raw = raw[raw["KEW_NORM"] == kew_value] | |
| # Bell curve per jenis | |
| if detail_view is None or detail_view.empty: | |
| fig_umum = _make_bell_curve_entitas(pd.DataFrame(), "Bell Curve β Jenis: Umum") | |
| fig_sekolah = _make_bell_curve_entitas(pd.DataFrame(), "Bell Curve β Jenis: Sekolah") | |
| fig_khusus = _make_bell_curve_entitas(pd.DataFrame(), "Bell Curve β Jenis: Khusus") | |
| else: | |
| hover_cols = [hc for hc in ["Provinsi", "Kab/Kota", "Jenis"] if hc in detail_view.columns] | |
| def _fig(j): | |
| d = detail_view[detail_view["Jenis"].astype(str).str.lower() == j].copy() | |
| return _make_bell_curve_entitas( | |
| d, | |
| title=f"Bell Curve β Jenis: {j.title()} (Skor: Indeks_Dasar_0_100)", | |
| xcol="Indeks_Dasar_0_100", | |
| label_col=("nm_perpustakaan" if "nm_perpustakaan" in d.columns else "nm_perpustakaan"), | |
| hover_cols=hover_cols, | |
| min_points=2 | |
| ) | |
| fig_sekolah = _fig("sekolah") | |
| fig_umum = _fig("umum") | |
| fig_khusus = _fig("khusus") | |
| kpi_md = build_kpi_markdown(summary_jenis) | |
| # Export xlsx | |
| tmpdir = tempfile.mkdtemp() | |
| prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA") | |
| kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA") | |
| kew_slug = (_canon(kew_value or "SEMUA").upper() or "SEMUA") | |
| p_summary = str(Path(tmpdir) / f"IPLM_RingkasanJenisKeseluruhan_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx") | |
| p_total = str(Path(tmpdir) / f"IPLM_AgregatWilayah_Keseluruhan_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx") | |
| p_raw = str(Path(tmpdir) / f"IPLM_RAW_DATA_{prov_slug}_{kab_slug}_{kew_slug}.xlsx") | |
| p_detail = str(Path(tmpdir) / f"IPLM_DetailEntitas_FinalMenempelWilayah_{prov_slug}_{kab_slug}_{kew_slug}.xlsx") | |
| p_verif = str(Path(tmpdir) / f"IPLM_KecukupanSampel_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx") | |
| summary_jenis.to_excel(p_summary, index=False) | |
| agg_total.to_excel(p_total, index=False) | |
| raw.to_excel(p_raw, index=False) | |
| detail_view.to_excel(p_detail, index=False) | |
| verif_total.to_excel(p_verif, index=False) | |
| # ====== Word tabel interpretasi & rekomendasi ====== | |
| wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All") | |
| header, rows = build_interpretasi_table_values(agg_total, wilayah_txt, TARGET_RATIO) | |
| # jumlah perpustakaan sumber data (gambar 2) | |
| jumlah_perpus = summarize_jumlah_perpus_dari_agg_jenis(agg_jenis_full, wilayah_txt, kew_norm) | |
| rows_filled, llm_status = llm_fill_interpretasi_rekomendasi( | |
| header=header, | |
| rows=rows, | |
| wilayah_label=wilayah_txt, | |
| kew_label=(kew_value or "(Semua)"), | |
| jumlah_perpus_by_jenis=jumlah_perpus | |
| ) | |
| word_path = generate_word_table_interpretasi(header, rows_filled, wilayah_txt, jumlah_perpus) | |
| msg = ( | |
| f"Selesai (TARGET {TARGET_RATIO*100:.2f}%): raw={len(raw)} | entitas={len(detail_view)} | " | |
| f"wilayah(keseluruhan)={len(agg_total)} | jenis(agregat)={len(agg_jenis_full)}" | |
| + ("" if DOCX_AVAILABLE else " | python-docx tidak tersedia (Word OFF)") | |
| ) | |
| return ( | |
| kpi_md, | |
| summary_jenis, agg_total, agg_jenis_view, detail_view, verif_total, | |
| p_summary, p_total, p_raw, p_detail, p_verif, | |
| fig_umum, fig_sekolah, fig_khusus, | |
| msg, | |
| llm_status, | |
| (word_path if word_path else None) | |
| ) | |
| except Exception as e: | |
| return _empty_outputs(f"Runtime error: {repr(e)}") | |
| # ============================================================ | |
| # 16) UI (NO UPLOAD) | |
| # ============================================================ | |
| def ui_load(force=False): | |
| df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info = load_default_files(force=force) | |
| if df_all is None or (isinstance(df_all, pd.DataFrame) and df_all.empty): | |
| return ( | |
| None, None, None, None, None, {}, info, | |
| gr.update(choices=["(Semua)"], value="(Semua)"), | |
| gr.update(choices=["(Semua)"], value="(Semua)"), | |
| gr.update(choices=["(Semua)"], value="(Semua)"), | |
| ) | |
| prov_vals = df_all["PROV_DISP"].dropna().astype(str).tolist() | |
| prov_vals = [v for v in prov_vals if v and v.strip()] | |
| prov_choices = ["(Semua)"] + sorted(set(prov_vals)) | |
| kab_choices = ["(Semua)"] + sorted([x for x in df_all["KAB_DISP"].dropna().unique().tolist() if x]) | |
| kew_choices = ["(Semua)"] + sorted([x for x in df_all["KEW_NORM"].dropna().unique().tolist() if x]) | |
| default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else ("PROVINSI" if "PROVINSI" in kew_choices else "(Semua)") | |
| return ( | |
| df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info, | |
| gr.update(choices=prov_choices, value="(Semua)"), | |
| gr.update(choices=kab_choices, value="(Semua)"), | |
| gr.update(choices=kew_choices, value=default_kew), | |
| ) | |
| def on_prov_change(prov_value): | |
| df_all, _, _, _, _, _, _ = load_default_files(force=False) | |
| if df_all is None or df_all.empty: | |
| return gr.update(choices=["(Semua)"], value="(Semua)") | |
| if prov_value is None or prov_value == "(Semua)": | |
| vals = df_all["KAB_DISP"].dropna().unique().tolist() | |
| else: | |
| vals = df_all.loc[df_all["PROV_DISP"] == prov_value, "KAB_DISP"].dropna().unique().tolist() | |
| vals = sorted([v for v in vals if v]) | |
| return gr.update(choices=["(Semua)"] + vals, value="(Semua)") | |
| with gr.Blocks() as demo: | |
| gr.Markdown(f""" | |
| # IPLM 2025 β Final (Target Sampel 33.88% per Jenis) β TANPA Kinerja Relatif / Percentile | |
| Mode NO UPLOAD (cache aktif). File dibaca dari repo/server: | |
| - DATA_FILE = {DATA_FILE} | |
| - POP_KAB = {POP_KAB} | |
| - POP_PROV = {POP_PROV} | |
| - POP_KHUSUS = {POP_KHUSUS} | |
| TARGET RATIO (per jenis): {TARGET_RATIO*100:.2f}% | |
| Dashboard KPI: | |
| - Indeks IPLM FINAL (disesuaikan 33.88%) | |
| - Indeks Dasar (tanpa penyesuaian) | |
| UPDATE LLM + WORD: | |
| - Tabel Word "Interpretasi & Rekomendasi" memakai NILAI APA ADANYA (tanpa dikali 100) untuk sub/dim. | |
| - Baris "Nilai IPLM" memakai Indeks_Final_Wilayah_0_100 apa adanya. | |
| - Di bawah tabel Word ditambahkan ringkasan jumlah perpustakaan sumber data (sekolah/umum/khusus/total) dari tabel agregat wilayah Γ jenis. | |
| """) | |
| state_df = gr.State(None) | |
| state_raw = gr.State(None) | |
| state_pop_kab = gr.State(None) | |
| state_pop_prov = gr.State(None) | |
| state_pop_khusus = gr.State(None) | |
| state_meta = gr.State({}) | |
| info_box = gr.Markdown() | |
| with gr.Row(): | |
| dd_prov = gr.Dropdown(label="Provinsi", choices=["(Semua)"], value="(Semua)") | |
| dd_kab = gr.Dropdown(label="Kab/Kota", choices=["(Semua)"], value="(Semua)") | |
| dd_kew = gr.Dropdown(label="Kewenangan", choices=["(Semua)"], value="(Semua)") | |
| dd_prov.change(fn=on_prov_change, inputs=[dd_prov], outputs=dd_kab) | |
| run_btn = gr.Button("Jalankan Perhitungan") | |
| msg_out = gr.Markdown() | |
| kpi_out = gr.Markdown() | |
| gr.Markdown("## Ringkasan (Jenis + Keseluruhan)") | |
| out_summary = gr.DataFrame(interactive=False) | |
| gr.Markdown("## Agregat Wilayah (Keseluruhan) β FIX avg3") | |
| out_agg_total = gr.DataFrame(interactive=False) | |
| gr.Markdown("## Agregat Wilayah x Jenis β (ditampilkan sampai Indeks Dasar)") | |
| out_agg_jenis = gr.DataFrame(interactive=False) | |
| gr.Markdown("## Detail Entitas (Final menempel dari wilayah)") | |
| out_detail = gr.DataFrame(interactive=False) | |
| gr.Markdown("## Kecukupan Sampel 33.88%") | |
| out_verif = gr.DataFrame(interactive=False) | |
| gr.Markdown("## Bell Curve β Indeks Dasar per Entitas (per Jenis) + Nama Perpustakaan") | |
| gr.Markdown("### Perpustakaan Umum") | |
| bell_umum = gr.Plot(scale=1) | |
| gr.Markdown("### Perpustakaan Sekolah") | |
| bell_sekolah = gr.Plot(scale=1) | |
| gr.Markdown("### Perpustakaan Khusus") | |
| bell_khusus = gr.Plot(scale=1) | |
| gr.Markdown("## Status LLM (Isi Interpretasi & Rekomendasi)") | |
| llm_status_out = gr.Markdown() | |
| with gr.Row(): | |
| dl_summary = gr.DownloadButton(label="Download Ringkasan (.xlsx)") | |
| dl_total = gr.DownloadButton(label="Download Agregat Wilayah (.xlsx)") | |
| dl_raw = gr.DownloadButton(label="Download Data Mentah (.xlsx)") | |
| dl_detail = gr.DownloadButton(label="Download Detail Entitas (.xlsx)") | |
| dl_verif = gr.DownloadButton(label="Download Kecukupan Sampel (.xlsx)") | |
| dl_word = gr.DownloadButton(label="Download Word: Interpretasi & Rekomendasi (.docx)" if DOCX_AVAILABLE else "Download Word (OFF)") | |
| run_btn.click( | |
| fn=run_calc, | |
| inputs=[dd_prov, dd_kab, dd_kew, state_df, state_raw, state_pop_kab, state_pop_prov, state_pop_khusus, state_meta], | |
| outputs=[ | |
| kpi_out, | |
| out_summary, out_agg_total, out_agg_jenis, out_detail, out_verif, | |
| dl_summary, dl_total, dl_raw, dl_detail, dl_verif, | |
| bell_umum, bell_sekolah, bell_khusus, | |
| msg_out, | |
| llm_status_out, | |
| dl_word | |
| ] | |
| ) | |
| demo.load( | |
| fn=lambda: ui_load(force=False), | |
| inputs=[], | |
| outputs=[state_df, state_raw, state_pop_kab, state_pop_prov, state_pop_khusus, state_meta, info_box, dd_prov, dd_kab, dd_kew] | |
| ) | |
| demo.launch() |