IPLM_DM / app.py
irhamni's picture
Update app.py
968d291 verified
raw
history blame
35.7 kB
# -*- coding: utf-8 -*-
"""
app.py — Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS)
- Target pengumpulan = 68% (bisa diubah TARGET_COVERAGE)
- KAB/KOTA:
* Sekolah: target = 68% dari (SD + SMP)
* Umum: target = 68% dari (Kecamatan + Desa/Kelurahan)
- PROVINSI:
* SMA: target = 68% dari (Total SMA)
Output utama:
- Tabel verifikasi: target & kekurangan (berapa unit lagi)
- Grafik GAP: kekurangan unit (bukan persen)
- Download:
1) Rekap (Verifikasi + Detail ringkas) .xlsx
2) Data mentah subset DM sesuai filter .xlsx
3) Laporan Word (.docx) + narasi LLM (kekurangan sampel & rencana aksi)
"""
import os
import re
import tempfile
from pathlib import Path
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from huggingface_hub import InferenceClient
# Word report
from docx import Document
# Pie opsional (butuh kaleido)
import plotly.express as px
try:
import kaleido # noqa: F401
HAS_KALEIDO = True
except Exception:
HAS_KALEIDO = False
# ============================================================
# 1) KONFIGURASI FILE
# ============================================================
DATA_FILE = "IPLM_clean_Manual.xlsx" # DM sampel masuk (multi-sheet)
META_KAB_FILE = "jumlahdesa_fixed (1).xlsx" # kecamatan & desa/kel per kab/kota
META_SDSMP_FILE = "SD-SMP-kab.xlsx" # jumlah SD & SMP per kab/kota
META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
# ============================================================
# 1a) TARGET CAKUPAN (KEBIJAKAN)
# ============================================================
TARGET_COVERAGE = 0.68
# ============================================================
# 1b) KONFIGURASI LLM (HF Inference)
# ============================================================
USE_LLM = True
LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
HF_TOKEN = (
os.getenv("HF_SECRET")
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
or os.getenv("HF_API_TOKEN")
)
_HF_CLIENT = None
def get_llm_client():
global _HF_CLIENT
if _HF_CLIENT is not None:
return _HF_CLIENT
try:
if HF_TOKEN:
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN)
else:
_HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME)
return _HF_CLIENT
except Exception:
_HF_CLIENT = None
return None
# ============================================================
# 2) UTIL
# ============================================================
def _canon(s: str) -> str:
return re.sub(r"[^a-z0-9]+", "", str(s).lower())
def pick_col(df, candidates):
for c in candidates:
if c in df.columns:
return c
can_map = {_canon(c): c for c in df.columns}
for c in candidates:
k = _canon(c)
if k in can_map:
return can_map[k]
return None
def coerce_num(val):
if pd.isna(val):
return np.nan
t = str(val).strip()
if t == "" or t in {"-", "–", "—"}:
return np.nan
t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
t = re.sub(r"[^0-9,.\-]", "", t)
if t.count(".") > 1 and t.count(",") == 1:
t = t.replace(".", "").replace(",", ".")
elif t.count(",") > 1 and t.count(".") == 1:
t = t.replace(",", "")
elif t.count(",") == 1 and t.count(".") == 0:
t = t.replace(",", ".")
else:
t = t.replace(",", "")
try:
return float(t)
except Exception:
return np.nan
def norm_kew(v):
if pd.isna(v):
return None
t = str(v).strip().upper()
if "KAB" in t or "KOTA" in t:
return "KAB/KOTA"
if "PROV" in t:
return "PROVINSI"
if "PUSAT" in t or "NASIONAL" in t:
return "PUSAT"
return t
def _norm_text(x):
if pd.isna(x):
return None
t = str(x).strip().upper()
return " ".join(t.split())
def norm_prov_label(s):
if pd.isna(s):
return None
t = str(s).upper()
for bad in ["PROVINSI", "PROPINSI"]:
t = t.replace(bad, "")
t = " ".join(t.split())
return re.sub(r"[^A-Z0-9]+", "", t)
def norm_kab_label(s):
if pd.isna(s):
return None
t = str(s).upper()
t = t.replace("KABUPATEN", "KAB")
t = t.replace("KAB.", "KAB")
t = t.replace("KOTA ADMINISTRASI", "KOTA")
t = t.replace("KOTA ADM.", "KOTA")
t = t.replace("KOTA.", "KOTA")
t = " ".join(t.split())
return re.sub(r"[^A-Z0-9]+", "", t)
# === FIX UTAMA: bersihin display prov/kab biar gak dobel "PROVINSI PROVINSI" ===
def clean_prov_display(s):
if pd.isna(s):
return None
t = str(s).upper().strip()
t = " ".join(t.split())
# hilangkan prefix PROVINSI berulang
while t.startswith("PROVINSI PROVINSI "):
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
t = t.replace("PROVINSI PROVINSI ", "PROVINSI ")
return t
def clean_kab_display(s):
if pd.isna(s):
return None
t = str(s).upper().strip()
t = " ".join(t.split())
# rapihin kab/kota
t = t.replace("KABUPATEN", "KAB.")
t = t.replace("KAB ", "KAB. ")
t = t.replace("KAB.", "KAB.")
t = t.replace("KOTA ADMINISTRASI", "KOTA")
return t
def make_pie_plotly(num, den, title):
if not HAS_KALEIDO:
return None
if den is None or pd.isna(den) or den <= 0:
values = [0, 1]
labels = ["Terjangkau", "Belum Terjangkau"]
else:
num = 0 if pd.isna(num) else float(num)
den = float(den)
values = [max(num, 0), max(den - num, 0)]
labels = ["Terjangkau", "Belum Terjangkau"]
fig = px.pie(values=values, names=labels, title=title, hole=0.35)
tmp = tempfile.mktemp(suffix=".png")
try:
fig.write_image(tmp, scale=2)
return tmp
except Exception:
return None
# ============================================================
# 3) LOAD DATA (DM + META)
# ============================================================
DATA_INFO = ""
df_all_raw = None
meta_kab_df = None # kab_key -> (Jml_Kecamatan, Jml_DesaKel, Jml_SD, Jml_SMP)
meta_sma_df = None # prov_key -> (Jml_SMA)
prov_col_glob = None
kab_col_glob = None
kew_col_glob = None
jenis_col_glob = None
subjenis_col_glob = None
nama_col_glob = None
extra_info = []
# ---- Load DM ----
try:
fp = Path(DATA_FILE)
if not fp.exists():
raise FileNotFoundError(f"File tidak ditemukan: {DATA_FILE}")
xls = pd.ExcelFile(fp)
frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
df_all_raw = pd.concat(frames, ignore_index=True, sort=False)
prov_col_glob = pick_col(df_all_raw, ["provinsi", "Provinsi", "PROVINSI"])
kab_col_glob = pick_col(df_all_raw, ["kab_kota", "Kab_Kota", "Kab/Kota", "KAB/KOTA", "kabupaten_kota", "kota"])
kew_col_glob = pick_col(df_all_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
jenis_col_glob = pick_col(df_all_raw, ["jenis_perpustakaan", "JENIS_PERPUSTAKAAN", "Jenis Perpustakaan", "jenis perpustakaan"])
subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"])
nama_col_glob = pick_col(df_all_raw, ["nama_perpustakaan", "nm_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
# kewenangan normal
if kew_col_glob:
df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew)
else:
df_all_raw["KEW_NORM"] = None
# mapping jenis perpustakaan -> dataset (sekolah/umum/khusus)
val_map_jenis = {
"PERPUSTAKAAN SEKOLAH": "sekolah",
"SEKOLAH": "sekolah",
"PERPUSTAKAAN UMUM": "umum",
"UMUM": "umum",
"PERPUSTAKAAN DAERAH": "umum",
"PERPUSTAKAAN KHUSUS": "khusus",
"KHUSUS": "khusus",
}
if jenis_col_glob:
df_all_raw["_dataset"] = df_all_raw[jenis_col_glob].apply(_norm_text).map(val_map_jenis)
else:
df_all_raw["_dataset"] = None
# === kolom clean untuk dropdown & filter ===
if prov_col_glob and prov_col_glob in df_all_raw.columns:
df_all_raw["prov_clean"] = df_all_raw[prov_col_glob].apply(clean_prov_display)
else:
df_all_raw["prov_clean"] = None
if kab_col_glob and kab_col_glob in df_all_raw.columns:
df_all_raw["kab_clean"] = df_all_raw[kab_col_glob].apply(clean_kab_display)
else:
df_all_raw["kab_clean"] = None
DATA_INFO = f"Data terbaca dari: **{DATA_FILE}** | Jumlah baris: **{len(df_all_raw)}**"
except Exception as e:
df_all_raw = None
DATA_INFO = f"⚠️ Gagal memuat `{DATA_FILE}` | Error: `{e}`"
# ---- Meta Kab (Kec/Desa) ----
try:
meta_kab_raw = pd.read_excel(META_KAB_FILE)
col_kab = pick_col(meta_kab_raw, ["Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"])
col_kec = pick_col(meta_kab_raw, ["Kecamatan", "jml_kecamatan", "jumlah_kecamatan"])
col_des = pick_col(meta_kab_raw, ["Desa/Kel", "Desa Kelurahan", "Desa", "Desa_kel"])
if col_kab and col_kec and col_des:
meta_kab_df = pd.DataFrame({
"Kab_Kota_Label": meta_kab_raw[col_kab].astype(str).str.strip(),
"Jml_Kecamatan": meta_kab_raw[col_kec].apply(coerce_num),
"Jml_DesaKel": meta_kab_raw[col_des].apply(coerce_num),
})
meta_kab_df["kab_key"] = meta_kab_df["Kab_Kota_Label"].apply(norm_kab_label)
extra_info.append(f"Meta Kab/Kota (Kec/Desa) terbaca: **{META_KAB_FILE}** (n={len(meta_kab_df)})")
else:
meta_kab_df = None
extra_info.append(f"⚠️ Kolom kunci meta kab tidak lengkap di `{META_KAB_FILE}`")
except Exception as e:
meta_kab_df = None
extra_info.append(f"⚠️ Gagal memuat `{META_KAB_FILE}` ({e})")
# ---- Meta SD/SMP ----
try:
sd_smp_raw = pd.read_excel(META_SDSMP_FILE)
col_kab2 = pick_col(sd_smp_raw, [
"Kabupaten/Kota_Kabupaten/Kota", "Kabupaten/Kota",
"Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"
])
col_sd = pick_col(sd_smp_raw, ["SD", "Jumlah SD", "Total SD", "SD_Total", "jml_sd", "Jml_SD"])
col_smp = pick_col(sd_smp_raw, ["SMP", "Jumlah SMP", "Total SMP", "SMP_Total", "jml_smp", "Jml_SMP"])
if col_kab2 and (col_sd or col_smp):
df_sd_smp = pd.DataFrame({
"Kab_Kota_Label_SD": sd_smp_raw[col_kab2].astype(str).str.strip(),
})
df_sd_smp["Jml_SD"] = sd_smp_raw[col_sd].apply(coerce_num) if col_sd else 0.0
df_sd_smp["Jml_SMP"] = sd_smp_raw[col_smp].apply(coerce_num) if col_smp else 0.0
df_sd_smp["kab_key"] = df_sd_smp["Kab_Kota_Label_SD"].apply(norm_kab_label)
df_sd_smp_grp = df_sd_smp.groupby("kab_key", as_index=False).agg({
"Jml_SD": "sum",
"Jml_SMP": "sum",
})
if meta_kab_df is not None:
meta_kab_df = meta_kab_df.merge(df_sd_smp_grp, on="kab_key", how="left")
else:
meta_kab_df = df_sd_smp_grp.copy()
meta_kab_df["Kab_Kota_Label"] = df_sd_smp.groupby("kab_key")["Kab_Kota_Label_SD"].first().values
extra_info.append(f"Meta SD/SMP terbaca: **{META_SDSMP_FILE}** (n={len(df_sd_smp_grp)})")
else:
extra_info.append(f"⚠️ Kolom kunci SD/SMP tidak lengkap di `{META_SDSMP_FILE}`")
except Exception as e:
extra_info.append(f"⚠️ Gagal memuat `{META_SDSMP_FILE}` ({e})")
# ---- Meta SMA ----
try:
meta_sma_raw = pd.read_excel(META_SMA_FILE)
col_prov_sma = pick_col(meta_sma_raw, [
"Provinsi", "provinsi", "PROVINSI", "NAMA_PROVINSI", "Nama Provinsi",
"nm_prov", "nm_provinsi", "prov"
])
col_sma = pick_col(meta_sma_raw, [
"Total SMA", "TOTAL_SMA", "TOTAL", "total",
"Jml_SMA", "Jumlah SMA", "SMA", "SMA_Total",
"jumlah_sma", "total_sma", "jml_sma"
])
if col_prov_sma is None:
raise ValueError("Kolom provinsi tidak ditemukan di file SMA.")
if col_sma is None:
raise ValueError("Kolom jumlah SMA tidak ditemukan di file SMA.")
meta_sma_df = pd.DataFrame({
"Provinsi_Label": meta_sma_raw[col_prov_sma].astype(str).str.strip(),
"Jml_SMA": meta_sma_raw[col_sma].apply(coerce_num),
})
meta_sma_df["prov_key"] = meta_sma_df["Provinsi_Label"].apply(norm_prov_label)
meta_sma_df = meta_sma_df.groupby(["prov_key"], as_index=False).agg({
"Provinsi_Label": "first",
"Jml_SMA": "sum"
})
extra_info.append(f"Meta SMA terbaca: **{META_SMA_FILE}** ({len(meta_sma_df)} provinsi)")
except Exception as e:
meta_sma_df = None
extra_info.append(f"⚠️ Gagal memuat file SMA: {e}")
if extra_info:
DATA_INFO = DATA_INFO + "<br>" + "<br>".join(extra_info)
# ============================================================
# 4) DROPDOWN
# ============================================================
def all_prov_choices():
if df_all_raw is None or "prov_clean" not in df_all_raw.columns:
return ["(Semua)"]
s = df_all_raw["prov_clean"].dropna().astype(str).str.strip()
vals = sorted([o for o in s.unique() if o and o != ""])
return ["(Semua)"] + vals
def get_kab_choices_for_prov(prov_value):
if df_all_raw is None or "kab_clean" not in df_all_raw.columns:
return ["(Semua)"]
if prov_value is None or prov_value == "(Semua)":
s = df_all_raw["kab_clean"].dropna().astype(str).str.strip()
else:
m = df_all_raw["prov_clean"].astype(str).str.strip() == str(prov_value).strip()
s = df_all_raw.loc[m, "kab_clean"].dropna().astype(str).str.strip()
vals = sorted([x for x in s.unique() if x and x != ""])
return ["(Semua)"] + vals
def all_kew_choices():
if df_all_raw is None:
return ["(Semua)"]
s = df_all_raw.get("KEW_NORM", pd.Series(dtype=object)).dropna().astype(str).str.strip()
vals = sorted([o for o in s.unique() if o != ""])
return ["(Semua)"] + vals if vals else ["(Semua)"]
prov_choices = all_prov_choices()
kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)")
kew_choices = all_kew_choices()
default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if kew_choices else "(Semua)")
# ============================================================
# 5) VERIFIKASI GAP (TARGET 68%) — OUTPUT: KEKURANGAN UNIT
# ============================================================
def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
if df_filtered is None or len(df_filtered) == 0:
return pd.DataFrame()
kew_norm = str(kew_value or "").upper()
# =================== KAB/KOTA ===================
if ("KAB" in kew_norm or "KOTA" in kew_norm):
if "kab_clean" not in df_filtered.columns or meta_kab_df is None:
return pd.DataFrame({"Info": ["Kolom kab_clean atau meta kab tidak tersedia."]})
tmp = df_filtered.copy()
tmp = tmp[pd.notna(tmp["kab_clean"])]
if tmp.empty:
return pd.DataFrame()
tmp["kab_key"] = tmp["kab_clean"].apply(norm_kab_label)
g_total = tmp.groupby("kab_key").size().rename("Sampel Total").reset_index()
tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel Sekolah").reset_index()
tmp_umum = tmp[tmp["_dataset"] == "umum"].copy() if "_dataset" in tmp.columns else tmp.copy()
g_umum = tmp_umum.groupby("kab_key").size().rename("Sampel Umum").reset_index()
use_cols = ["kab_key", "Kab_Kota_Label", "Jml_Kecamatan", "Jml_DesaKel", "Jml_SD", "Jml_SMP"]
use_cols = [c for c in use_cols if c in meta_kab_df.columns]
merged = (
g_total
.merge(g_sek_total, on="kab_key", how="left")
.merge(g_umum, on="kab_key", how="left")
.merge(meta_kab_df[use_cols], on="kab_key", how="left")
)
for c in ["Sampel Total", "Sampel Sekolah", "Sampel Umum"]:
if c in merged.columns:
merged[c] = merged[c].fillna(0).astype(int)
merged["Populasi Sekolah (SD+SMP)"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
merged["Populasi Admin (Kec+Desa/Kel)"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
merged["Target Sekolah (68%)"] = np.ceil(merged["Populasi Sekolah (SD+SMP)"] * TARGET_COVERAGE)
merged["Target Umum (68%)"] = np.ceil(merged["Populasi Admin (Kec+Desa/Kel)"] * TARGET_COVERAGE)
merged["Kekurangan Sampel Sekolah"] = merged.apply(
lambda r: max(int(r["Target Sekolah (68%)"] - r["Sampel Sekolah"]) if pd.notna(r["Target Sekolah (68%)"]) else 0, 0),
axis=1
)
merged["Kekurangan Sampel Umum"] = merged.apply(
lambda r: max(int(r["Target Umum (68%)"] - r["Sampel Umum"]) if pd.notna(r["Target Umum (68%)"]) else 0, 0),
axis=1
)
out = pd.DataFrame({
"Kab/Kota": merged.get("Kab_Kota_Label", merged["kab_key"]),
"Sampel Total": merged["Sampel Total"],
"Sampel Sekolah": merged["Sampel Sekolah"],
"Populasi Sekolah (SD+SMP)": merged["Populasi Sekolah (SD+SMP)"],
"Target Sekolah (68%)": merged["Target Sekolah (68%)"],
"Kekurangan Sampel Sekolah": merged["Kekurangan Sampel Sekolah"],
"Sampel Umum": merged["Sampel Umum"],
"Populasi Admin (Kec+Desa/Kel)": merged["Populasi Admin (Kec+Desa/Kel)"],
"Target Umum (68%)": merged["Target Umum (68%)"],
"Kekurangan Sampel Umum": merged["Kekurangan Sampel Umum"],
})
return out.sort_values("Kab/Kota").reset_index(drop=True).round(0)
# =================== PROVINSI ===================
if ("PROV" in kew_norm):
if meta_sma_df is None or "prov_clean" not in df_filtered.columns:
return pd.DataFrame({"Info": ["Meta SMA atau kolom prov_clean tidak tersedia."]})
tmp = df_filtered.copy()
tmp = tmp[pd.notna(tmp["prov_clean"])]
if tmp.empty:
return pd.DataFrame({"Info": ["Tidak ada data sampel kewenangan provinsi."]})
tmp["prov_key"] = tmp["prov_clean"].apply(norm_prov_label)
# start dari sampel (tidak bocor prov lain)
g_total = tmp.groupby("prov_key").size().rename("Sampel Total (Prov)").reset_index()
tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
g_sma = tmp_sek.groupby("prov_key").size().rename("Sampel SMA (DM)").reset_index()
merged = (
g_total
.merge(g_sma, on="prov_key", how="left")
.merge(meta_sma_df[["prov_key", "Provinsi_Label", "Jml_SMA"]], on="prov_key", how="left")
)
merged["Sampel SMA (DM)"] = merged["Sampel SMA (DM)"].fillna(0).astype(int)
merged["Populasi SMA (Meta)"] = merged["Jml_SMA"]
merged["Target SMA (68%)"] = np.ceil(merged["Populasi SMA (Meta)"] * TARGET_COVERAGE)
merged["Kekurangan Sampel SMA"] = merged.apply(
lambda r: max(int(r["Target SMA (68%)"] - r["Sampel SMA (DM)"]) if pd.notna(r["Target SMA (68%)"]) else 0, 0),
axis=1
)
out = pd.DataFrame({
"Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]),
"Sampel Total (Prov)": merged["Sampel Total (Prov)"].fillna(0).astype(int),
"Sampel SMA (DM)": merged["Sampel SMA (DM)"],
"Populasi SMA (Meta)": merged["Populasi SMA (Meta)"],
"Target SMA (68%)": merged["Target SMA (68%)"],
"Kekurangan Sampel SMA": merged["Kekurangan Sampel SMA"],
})
return out.sort_values("Provinsi").reset_index(drop=True).round(0)
return pd.DataFrame({"Info": ["Kewenangan tidak dikenali / tidak didukung."]})
# ============================================================
# 6) GRAFIK GAP (KEKURANGAN UNIT) — BUKAN PERSEN
# ============================================================
def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
fig = go.Figure()
if verif_df is None or verif_df.empty:
fig.update_layout(
title="Kekurangan Sampel (tidak ada data)",
xaxis_title="Unit",
yaxis_title="Kekurangan (unit)",
)
return fig
kew_norm = str(kew_value or "").upper()
def _num(s):
return pd.to_numeric(s, errors="coerce").fillna(0).astype(int)
if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
dfp = verif_df.copy()
dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Sekolah", 0)) + _num(dfp.get("Kekurangan Sampel Umum", 0))
dfp = dfp.sort_values("gap_total", ascending=False)
x = dfp["Kab/Kota"].astype(str).tolist()
gap_sek = _num(dfp.get("Kekurangan Sampel Sekolah", 0))
gap_umum = _num(dfp.get("Kekurangan Sampel Umum", 0))
fig.add_trace(go.Bar(
x=x, y=gap_sek, name="Kekurangan Sekolah (SD+SMP)",
text=gap_sek, textposition="outside",
hovertemplate="%{x}<br>Kekurangan sekolah: %{y} unit<extra></extra>"
))
fig.add_trace(go.Bar(
x=x, y=gap_umum, name="Kekurangan Umum (Kec+Desa/Kel)",
text=gap_umum, textposition="outside",
hovertemplate="%{x}<br>Kekurangan umum: %{y} unit<extra></extra>"
))
fig.update_layout(
title=f"Kekurangan Sampel yang Harus Dikumpulkan (KAB/KOTA) — Target {int(TARGET_COVERAGE*100)}%",
barmode="group",
xaxis_title="Kab/Kota",
yaxis_title="Kekurangan (unit)",
margin=dict(l=40, r=20, t=60, b=140),
)
fig.update_xaxes(tickangle=-35)
return fig
if ("PROV" in kew_norm) and ("Provinsi" in verif_df.columns):
dfp = verif_df.copy()
dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel SMA", 0))
dfp = dfp.sort_values("gap_total", ascending=False)
x = dfp["Provinsi"].astype(str).tolist()
gap_sma = _num(dfp.get("Kekurangan Sampel SMA", 0))
fig.add_trace(go.Bar(
x=x, y=gap_sma, name="Kekurangan SMA",
text=gap_sma, textposition="outside",
hovertemplate="%{x}<br>Kekurangan SMA: %{y} unit<extra></extra>"
))
fig.update_layout(
title=f"Kekurangan Sampel yang Harus Dikumpulkan (PROVINSI) — Target {int(TARGET_COVERAGE*100)}%",
xaxis_title="Provinsi",
yaxis_title="Kekurangan (unit)",
margin=dict(l=40, r=20, t=60, b=140),
)
fig.update_xaxes(tickangle=-35)
return fig
fig.update_layout(
title="Kekurangan Sampel — format data tidak dikenali",
xaxis_title="Unit",
yaxis_title="Kekurangan (unit)",
)
return fig
# ============================================================
# 7) LLM NARASI (GAP)
# ============================================================
def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
lines = []
lines.append(f"Wilayah filter: {wilayah}")
lines.append(f"Kewenangan: {kew}")
lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
lines.append(f"Jumlah unit analisis: {len(verif_df)}")
gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
for gc in gap_cols:
total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
lines.append(f"Total {gc}: {total_gap}")
if gap_cols:
gc = gap_cols[0]
t = verif_df.copy()
t[gc] = pd.to_numeric(t[gc], errors="coerce").fillna(0)
keycol = "Kab/Kota" if "Kab/Kota" in t.columns else ("Provinsi" if "Provinsi" in t.columns else t.columns[0])
top = t.sort_values(gc, ascending=False).head(10)
lines.append("\nTop prioritas (gap terbesar):")
for _, r in top.iterrows():
lines.append(f"- {r[keycol]}: {gc}={int(r[gc])}")
return "\n".join(lines)
def rule_based_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
if verif_df is None or verif_df.empty:
return "Tidak ada data verifikasi yang dapat dilaporkan."
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
lines = []
lines.append("## Ringkasan Kekurangan Sampel IPLM (Rule-based)\n")
lines.append(f"Wilayah: {wilayah}")
lines.append(f"Kewenangan: {kew}")
lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
lines.append(f"Jumlah unit analisis: {len(verif_df)}\n")
gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
if not gap_cols:
lines.append("Kolom kekurangan sampel tidak ditemukan.")
return "\n".join(lines)
for gc in gap_cols:
total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
lines.append(f"- Total {gc}: **{total_gap}** unit yang perlu dilengkapi untuk mencapai target.")
lines.append(
"\nArah tindak lanjut: fokuskan mobilisasi pengumpulan data pada unit dengan gap terbesar, "
"pastikan daftar target unit tersedia, dan lakukan monitoring harian hingga gap menurun."
)
return "\n".join(lines)
def generate_llm_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
ctx = build_context_gap(verif_df, prov, kab, kew)
client = get_llm_client()
if client is None or not USE_LLM:
return "⚠️ LLM tidak tersedia, memakai laporan rule-based.\n\n" + rule_based_gap_report(verif_df, prov, kab, kew)
system_prompt = (
"Anda adalah analis kebijakan dan manajer program IPLM. "
"Fokus Anda hanya pada gap sampel (kekurangan unit) dan strategi menutup kekurangan tersebut."
)
user_prompt = f"""
DATA RINGKAS GAP SAMPEL IPLM:
{ctx}
TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
1) Ringkasan kondisi pengumpulan data (1 paragraf).
2) Total kekurangan sampel yang masih perlu dikumpulkan menuju target {int(TARGET_COVERAGE*100)}% (1 paragraf).
3) Prioritas wilayah (gap terbesar) dan alasan operasional (1 paragraf).
4) Rencana aksi 30–60 hari (naratif, bukan bullet).
BATASAN:
- Jangan membahas indeks/skor IPLM.
- Fokus hanya pada kekurangan sampel, target 68%, dan strategi pelengkapannya.
"""
try:
resp = client.chat_completion(
model=LLM_MODEL_NAME,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
max_tokens=900,
temperature=0.2,
top_p=0.9,
)
text = resp.choices[0].message.content.strip()
if not text:
raise ValueError("Respon LLM kosong.")
return text
except Exception as e:
return (
"⚠️ Error saat memanggil LLM, memakai laporan rule-based.\n\n"
f"(Detail teknis: {repr(e)})\n\n"
+ rule_based_gap_report(verif_df, prov, kab, kew)
)
# ============================================================
# 8) WORD REPORT
# ============================================================
def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str, analysis_text: str):
wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
doc = Document()
doc.add_heading(f"Laporan Kekurangan Sampel IPLM – {wilayah}", level=1)
doc.add_paragraph(f"Kewenangan: {kew}")
doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
doc.add_heading("Tabel Verifikasi (Target & Kekurangan Sampel)", level=2)
view = verif_df.copy()
if len(view) > 200:
doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama) untuk menjaga ukuran dokumen.")
view = view.head(200)
table = doc.add_table(rows=1, cols=len(view.columns))
hdr = table.rows[0].cells
for i, c in enumerate(view.columns):
hdr[i].text = str(c)
for _, row in view.iterrows():
r = table.add_row().cells
for i, c in enumerate(view.columns):
r[i].text = str(row[c])
doc.add_heading("Ringkasan Visual (Opsional)", level=2)
if not HAS_KALEIDO:
doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia di server.")
else:
# buat pie total capai vs target kalau ada pasangan kolom sampel-target
pie_made = False
if "Sampel Sekolah" in verif_df.columns and "Target Sekolah (68%)" in verif_df.columns:
samp = pd.to_numeric(verif_df["Sampel Sekolah"], errors="coerce").fillna(0).sum()
tgt = pd.to_numeric(verif_df["Target Sekolah (68%)"], errors="coerce").fillna(0).sum()
img = make_pie_plotly(samp, tgt, "Capaian Sekolah (Total) terhadap Target")
if img:
doc.add_paragraph("Capaian Sekolah (Total) terhadap Target")
doc.add_picture(img)
pie_made = True
if (not pie_made) and ("Sampel Umum" in verif_df.columns and "Target Umum (68%)" in verif_df.columns):
samp = pd.to_numeric(verif_df["Sampel Umum"], errors="coerce").fillna(0).sum()
tgt = pd.to_numeric(verif_df["Target Umum (68%)"], errors="coerce").fillna(0).sum()
img = make_pie_plotly(samp, tgt, "Capaian Umum (Total) terhadap Target")
if img:
doc.add_paragraph("Capaian Umum (Total) terhadap Target")
doc.add_picture(img)
pie_made = True
if (not pie_made) and ("Sampel SMA (DM)" in verif_df.columns and "Target SMA (68%)" in verif_df.columns):
samp = pd.to_numeric(verif_df["Sampel SMA (DM)"], errors="coerce").fillna(0).sum()
tgt = pd.to_numeric(verif_df["Target SMA (68%)"], errors="coerce").fillna(0).sum()
img = make_pie_plotly(samp, tgt, "Capaian SMA (Total) terhadap Target")
if img:
doc.add_paragraph("Capaian SMA (Total) terhadap Target")
doc.add_picture(img)
pie_made = True
if not pie_made:
doc.add_paragraph("Tidak ada pasangan kolom sampel-target yang valid untuk dibuat pie chart.")
doc.add_heading("Analisis Naratif (LLM)", level=2)
for p in analysis_text.split("\n"):
if p.strip():
doc.add_paragraph(p)
outpath = tempfile.mktemp(suffix=".docx")
doc.save(outpath)
return outpath
# ============================================================
# 9) CORE RUN (FILTER + EXPORT)
# ============================================================
def run_core(prov_value, kab_value, kew_value):
if df_all_raw is None or df_all_raw.empty:
empty = pd.DataFrame()
return (
empty, empty, None,
None, None, None,
"Data DM tidak terbaca.",
"Tidak ada analisis."
)
df = df_all_raw.copy()
# filter prov (pakai prov_clean)
if prov_value and prov_value != "(Semua)" and "prov_clean" in df.columns:
df = df[df["prov_clean"].astype(str).str.strip() == str(prov_value).strip()]
# filter kab/kota (pakai kab_clean)
if kab_value and kab_value != "(Semua)" and "kab_clean" in df.columns:
df = df[df["kab_clean"].astype(str).str.strip() == str(kab_value).strip()]
# filter kewenangan
if kew_value and kew_value != "(Semua)":
df = df[df["KEW_NORM"] == kew_value]
if len(df) == 0:
empty = pd.DataFrame()
return (
empty, empty, None,
None, None, None,
"Tidak ada data untuk kombinasi filter yang dipilih.",
"Tidak ada analisis."
)
verif_df = compute_gap_verification(df, kew_value)
# detail subset DM untuk UI (ringkas)
cols = []
for c in ["prov_clean", "kab_clean", nama_col_glob, kew_col_glob, jenis_col_glob, subjenis_col_glob, "_dataset", "KEW_NORM"]:
if c and c in df.columns and c not in cols:
cols.append(c)
detail_df = df[cols].copy() if cols else df.copy()
# grafik GAP
fig_gap = make_gap_figure(verif_df, kew_value)
# simpan file download
tmpdir = tempfile.mkdtemp()
rekap_excel_path = os.path.join(tmpdir, "Rekap_Kekurangan_Sampel_IPLM_Target.xlsx")
raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx")
with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w:
verif_df.to_excel(w, sheet_name="Verifikasi_Gap_Target", index=False)
detail_df.to_excel(w, sheet_name="Detail_Subset_DM", index=False)
df.to_excel(raw_dm_path, index=False)
analysis_text = generate_llm_gap_report(verif_df, prov_value, kab_value, kew_value)
word_path = generate_word_report_gap(verif_df, prov_value, kab_value, kew_value, analysis_text)
msg = (
f"OK. Subset DM: {len(df)} baris | Verifikasi: {len(verif_df)} baris | "
f"Target: {int(TARGET_COVERAGE*100)}%."
)
return (
verif_df,
detail_df,
fig_gap,
rekap_excel_path,
raw_dm_path,
word_path,
msg,
analysis_text
)
def on_prov_change(prov_value):
return gr.update(choices=get_kab_choices_for_prov(prov_value), value="(Semua)")
# ============================================================
# 10) BUILD UI
# ============================================================
with gr.Blocks() as demo:
gr.Markdown(
f"""
# Dashboard Kekurangan Sampel IPLM — Target {int(TARGET_COVERAGE*100)}% (Tanpa Hitung Indeks)
Aplikasi ini menghitung **berapa unit lagi yang harus dikumpulkan** agar memenuhi target minimal representasi.
**File:**
- `{DATA_FILE}` (DM)
- `{META_KAB_FILE}` (Kecamatan + Desa/Kel)
- `{META_SDSMP_FILE}` (SD + SMP)
- `{META_SMA_FILE}` (SMA)
{DATA_INFO}
"""
)
with gr.Row():
dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0])
dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value=kab_choices[0])
dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew)
dd_prov.change(fn=on_prov_change, inputs=dd_prov, outputs=dd_kab)
run_btn = gr.Button("Hitung Kekurangan Sampel")
msg_out = gr.Markdown()
gr.Markdown("### Verifikasi (Target & Kekurangan Sampel)")
verif_out = gr.DataFrame(interactive=False)
gr.Markdown("### Grafik Kekurangan Sampel (berapa unit lagi yang harus dikumpulkan)")
gap_plot_out = gr.Plot()
gr.Markdown("### Detail Subset DM (yang terfilter)")
detail_out = gr.DataFrame(interactive=False)
gr.Markdown("### Analisis Naratif (LLM)")
analysis_out = gr.Markdown()
with gr.Row():
rekap_excel_out = gr.File(label="Download Rekap (Verifikasi + Detail) (.xlsx)")
raw_dm_out = gr.File(label="Download Data Mentah Subset DM (.xlsx)")
word_out = gr.File(label="Download Laporan Word (.docx)")
run_btn.click(
fn=run_core,
inputs=[dd_prov, dd_kab, dd_kew],
outputs=[
verif_out,
detail_out,
gap_plot_out,
rekap_excel_out,
raw_dm_out,
word_out,
msg_out,
analysis_out
],
)
demo.launch()