# -*- coding: utf-8 -*- """ app.py — Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS) FIX FULL: - Target 68% diambil dari META: * Kab/Kota: kolom sampel_total * Provinsi: kolom total _sampel (atau variasinya) - Normalisasi label diperkuat: * kab/kota: hapus kata "DAN", seragamkan KAB/KOTA, buang simbol * provinsi: buang prefix "PROVINSI/PROPINSI", buang simbol - Jika META tidak match: * ditandai META_MATCH="TIDAK" + Target NaN (bukan 0), supaya tidak menyesatkan """ import os import re import tempfile from pathlib import Path import gradio as gr import numpy as np import pandas as pd import plotly.graph_objects as go from huggingface_hub import InferenceClient from docx import Document import plotly.express as px try: import kaleido # noqa: F401 HAS_KALEIDO = True except Exception: HAS_KALEIDO = False # ============================================================ # 1) KONFIGURASI FILE # ============================================================ DATA_FILE = "IPLM_clean_manual_131225.xlsx" META_KAB_FILE = "Data_populasi_Kab_kota.xlsx" META_PROV_FILE = "Data_populasi_propinsi.xlsx" TARGET_COVERAGE = 0.68 # ============================================================ # 1b) LLM # ============================================================ USE_LLM = True LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct" HF_TOKEN = ( os.getenv("HF_SECRET") or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_API_TOKEN") ) _HF_CLIENT = None def get_llm_client(): global _HF_CLIENT if _HF_CLIENT is not None: return _HF_CLIENT try: if HF_TOKEN: _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN) else: _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME) return _HF_CLIENT except Exception: _HF_CLIENT = None return None # ============================================================ # 2) UTIL # ============================================================ def _canon(s: str) -> str: return re.sub(r"[^a-z0-9]+", "", str(s).lower()) def pick_col(df, candidates): for c in candidates: if c in df.columns: return c can_map = {_canon(c): c for c in df.columns} for c in candidates: k = _canon(c) if k in can_map: return can_map[k] return None def coerce_num(val): if pd.isna(val): return np.nan t = str(val).strip() if t == "" or t in {"-", "–", "—"}: return np.nan t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "") t = re.sub(r"[^0-9,.\-]", "", t) if t.count(".") > 1 and t.count(",") == 1: t = t.replace(".", "").replace(",", ".") elif t.count(",") > 1 and t.count(".") == 1: t = t.replace(",", "") elif t.count(",") == 1 and t.count(".") == 0: t = t.replace(",", ".") else: t = t.replace(",", "") try: return float(t) except Exception: return np.nan def norm_kew(v): if pd.isna(v): return None t = str(v).strip().upper() if "KAB" in t or "KOTA" in t: return "KAB/KOTA" if "PROV" in t: return "PROVINSI" if "PUSAT" in t or "NASIONAL" in t: return "PUSAT" return t def _norm_text(x): if pd.isna(x): return None t = str(x).strip().upper() return " ".join(t.split()) # ---- Normalisasi PROV (untuk join) ---- def norm_prov_label(s): if pd.isna(s): return None t = str(s).upper().strip() t = " ".join(t.split()) # buang prefix t = re.sub(r"^\s*(PROVINSI|PROPINSI)\s+", "", t) # buang tanda baca t = re.sub(r"[^A-Z0-9 ]+", " ", t) t = " ".join(t.split()) # key return re.sub(r"[^A-Z0-9]+", "", t) # ---- Normalisasi KAB/KOTA (untuk join) ---- def norm_kab_label(s): """ FIX UTAMA: - Samakan variasi "KABUPATEN/KAB./KAB" dan "KOTA ADM./KOTA ADMINISTRASI" - Hapus kata 'DAN' agar match kasus: "PANGKAJENE DAN KEPULAUAN" vs "PANGKAJENE KEPULAUAN" - Buang simbol, spasi ganda """ if pd.isna(s): return None t = str(s).upper().strip() t = " ".join(t.split()) # seragamkan kab/kota t = t.replace("KABUPATEN", "KAB") t = t.replace("KAB.", "KAB") t = t.replace("KOTA ADMINISTRASI", "KOTA") t = t.replace("KOTA ADM.", "KOTA") t = t.replace("KOTA.", "KOTA") # FIX: buang "DAN" sebagai stopword join t = re.sub(r"\bDAN\b", " ", t) # bersihin simbol t = re.sub(r"[^A-Z0-9 ]+", " ", t) t = " ".join(t.split()) return re.sub(r"[^A-Z0-9]+", "", t) # ---- Display bersih (untuk dropdown/UI) ---- def clean_prov_display(s): if pd.isna(s): return None t = str(s).upper().strip() t = " ".join(t.split()) t = t.replace("PROPINSI", "PROVINSI") while t.startswith("PROVINSI PROVINSI "): t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1) t = t.replace("PROVINSI PROVINSI ", "PROVINSI ") if not t.startswith("PROVINSI "): t = "PROVINSI " + t return t def clean_kab_display(s): if pd.isna(s): return None t = str(s).upper().strip() t = " ".join(t.split()) t = t.replace("KABUPATEN", "KAB.") t = t.replace("KAB ", "KAB. ") t = t.replace("KOTA ADMINISTRASI", "KOTA") # rapikan variasi "DAN" supaya konsisten tampilan juga t = re.sub(r"\bDAN\b", " ", t) t = " ".join(t.split()) return t def make_pie_plotly(num, den, title): if not HAS_KALEIDO: return None if den is None or pd.isna(den) or den <= 0: values = [0, 1] labels = ["Terjangkau", "Belum Terjangkau"] else: num = 0 if pd.isna(num) else float(num) den = float(den) values = [max(num, 0), max(den - num, 0)] labels = ["Terjangkau", "Belum Terjangkau"] fig = px.pie(values=values, names=labels, title=title, hole=0.35) tmp = tempfile.mktemp(suffix=".png") try: fig.write_image(tmp, scale=2) return tmp except Exception: return None # ============================================================ # 3) LOAD DATA (DM + META) # ============================================================ DATA_INFO = "" df_all_raw = None meta_kab_df = None # kab_key -> target total + opsional sekolah/umum meta_prov_df = None # prov_key -> target total prov_col_glob = None kab_col_glob = None kew_col_glob = None jenis_col_glob = None subjenis_col_glob = None nama_col_glob = None extra_info = [] # ---- Load DM ---- try: fp = Path(DATA_FILE) if not fp.exists(): raise FileNotFoundError(f"File tidak ditemukan: {DATA_FILE}") xls = pd.ExcelFile(fp) frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names] df_all_raw = pd.concat(frames, ignore_index=True, sort=False) prov_col_glob = pick_col(df_all_raw, ["provinsi", "Provinsi", "PROVINSI"]) kab_col_glob = pick_col(df_all_raw, ["kab_kota", "kab/kota", "Kab/Kota", "KAB/KOTA", "kabupaten_kota", "kota"]) kew_col_glob = pick_col(df_all_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"]) jenis_col_glob = pick_col(df_all_raw, ["jenis_perpustakaan", "JENIS_PERPUSTAKAAN", "Jenis Perpustakaan"]) subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"]) nama_col_glob = pick_col(df_all_raw, ["nm_perpustakaan", "nama_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"]) if kew_col_glob: df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew) else: df_all_raw["KEW_NORM"] = None val_map_jenis = { "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah", "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum", "PERPUSTAKAAN KHUSUS": "khusus", "KHUSUS": "khusus", "PERPUSTAKAAN PERGURUAN TINGGI": "khusus", "PERGURUAN TINGGI": "khusus", } if jenis_col_glob: df_all_raw["_dataset"] = df_all_raw[jenis_col_glob].apply(_norm_text).map(val_map_jenis) else: df_all_raw["_dataset"] = None if prov_col_glob and prov_col_glob in df_all_raw.columns: df_all_raw["prov_clean"] = df_all_raw[prov_col_glob].apply(clean_prov_display) else: df_all_raw["prov_clean"] = None if kab_col_glob and kab_col_glob in df_all_raw.columns: df_all_raw["kab_clean"] = df_all_raw[kab_col_glob].apply(clean_kab_display) else: df_all_raw["kab_clean"] = None DATA_INFO = f"Data terbaca dari: **{DATA_FILE}** | Jumlah baris: **{len(df_all_raw)}**" except Exception as e: df_all_raw = None DATA_INFO = f"⚠️ Gagal memuat `{DATA_FILE}` | Error: `{e}`" # ---- Meta Kab/Kota ---- try: meta_kab_raw = pd.read_excel(META_KAB_FILE) col_kab = pick_col(meta_kab_raw, ["KABUPATEN_KOTA", "KAB/KOTA", "Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"]) col_target_total = pick_col(meta_kab_raw, ["sampel_total", "Sampel_total", "SAMPEL_TOTAL"]) col_target_umum = pick_col(meta_kab_raw, ["Sampel_umum_68%", "sampel_umum_68%", "SAMPEL_UMUM_68%"]) col_target_sek = pick_col(meta_kab_raw, ["Sampel_sekolah_68%", "sampel_sekolah_68%", "SAMPEL_SEKOLAH_68%"]) if col_kab and col_target_total: meta_kab_df = pd.DataFrame({ "Kab_Kota_Label": meta_kab_raw[col_kab].astype(str).str.strip(), "Target_Total_68": meta_kab_raw[col_target_total].apply(coerce_num), }) meta_kab_df["Target_Umum_68"] = meta_kab_raw[col_target_umum].apply(coerce_num) if col_target_umum else np.nan meta_kab_df["Target_Sekolah_68"] = meta_kab_raw[col_target_sek].apply(coerce_num) if col_target_sek else np.nan meta_kab_df["kab_key"] = meta_kab_df["Kab_Kota_Label"].apply(norm_kab_label) meta_kab_df = meta_kab_df.groupby("kab_key", as_index=False).agg({ "Kab_Kota_Label": "first", "Target_Total_68": "first", "Target_Umum_68": "first", "Target_Sekolah_68": "first", }) extra_info.append(f"Meta Kab/Kota terbaca: **{META_KAB_FILE}** (n={len(meta_kab_df)}) | Target=`sampel_total`") else: meta_kab_df = None extra_info.append(f"⚠️ Kolom `KABUPATEN_KOTA` atau `sampel_total` tidak ditemukan di `{META_KAB_FILE}`") except Exception as e: meta_kab_df = None extra_info.append(f"⚠️ Gagal memuat `{META_KAB_FILE}` ({e})") # ---- Meta Provinsi ---- try: meta_prov_raw = pd.read_excel(META_PROV_FILE) col_prov = pick_col(meta_prov_raw, ["Provinsi", "provinsi", "PROVINSI", "NAMA_PROVINSI", "Nama Provinsi", "nm_prov", "nm_provinsi", "prov"]) # banyak variasi spasi/underscore col_target_total = pick_col(meta_prov_raw, ["total _sampel", "total_sampel", "TOTAL _SAMPEL", "TOTAL_SAMPEL", "total sampel", "TOTAL SAMPEL"]) if col_prov and col_target_total: meta_prov_df = pd.DataFrame({ "Provinsi_Label": meta_prov_raw[col_prov].astype(str).str.strip(), "Target_Total_68": meta_prov_raw[col_target_total].apply(coerce_num), }) meta_prov_df["prov_key"] = meta_prov_df["Provinsi_Label"].apply(norm_prov_label) meta_prov_df = meta_prov_df.groupby("prov_key", as_index=False).agg({ "Provinsi_Label": "first", "Target_Total_68": "first", }) extra_info.append(f"Meta Provinsi terbaca: **{META_PROV_FILE}** ({len(meta_prov_df)} provinsi) | Target=`{col_target_total}`") else: meta_prov_df = None extra_info.append(f"⚠️ Kolom `Provinsi` atau `total _sampel` tidak ditemukan di `{META_PROV_FILE}`") except Exception as e: meta_prov_df = None extra_info.append(f"⚠️ Gagal memuat file populasi provinsi: {e}") if extra_info: DATA_INFO = DATA_INFO + "
" + "
".join(extra_info) # ============================================================ # 4) DROPDOWN # ============================================================ def all_prov_choices(): if df_all_raw is None or "prov_clean" not in df_all_raw.columns: return ["(Semua)"] s = df_all_raw["prov_clean"].dropna().astype(str).str.strip() vals = sorted([o for o in s.unique() if o and o != ""]) return ["(Semua)"] + vals def get_kab_choices_for_prov(prov_value): if df_all_raw is None or "kab_clean" not in df_all_raw.columns: return ["(Semua)"] if prov_value is None or prov_value == "(Semua)": s = df_all_raw["kab_clean"].dropna().astype(str).str.strip() else: m = df_all_raw["prov_clean"].astype(str).str.strip() == str(prov_value).strip() s = df_all_raw.loc[m, "kab_clean"].dropna().astype(str).str.strip() vals = sorted([x for x in s.unique() if x and x != ""]) return ["(Semua)"] + vals def all_kew_choices(): if df_all_raw is None: return ["(Semua)"] s = df_all_raw.get("KEW_NORM", pd.Series(dtype=object)).dropna().astype(str).str.strip() vals = sorted([o for o in s.unique() if o != ""]) return ["(Semua)"] + vals if vals else ["(Semua)"] prov_choices = all_prov_choices() kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)") kew_choices = all_kew_choices() default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if kew_choices else "(Semua)") # ============================================================ # 5) VERIFIKASI GAP — TARGET DARI META (bukan hitung ulang) # ============================================================ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame: if df_filtered is None or len(df_filtered) == 0: return pd.DataFrame() kew_norm = str(kew_value or "").upper() # =================== KAB/KOTA =================== if ("KAB" in kew_norm or "KOTA" in kew_norm): if "kab_clean" not in df_filtered.columns or meta_kab_df is None: return pd.DataFrame({"Info": ["Kolom kab_clean atau meta kab tidak tersedia."]}) tmp = df_filtered.copy() tmp = tmp[pd.notna(tmp["kab_clean"])] if tmp.empty: return pd.DataFrame() tmp["kab_key"] = tmp["kab_clean"].apply(norm_kab_label) g_total = tmp.groupby("kab_key").size().rename("Sampel Total (DM)").reset_index() tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy() g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel Sekolah (DM)").reset_index() tmp_umum = tmp[tmp["_dataset"] == "umum"].copy() if "_dataset" in tmp.columns else tmp.copy() g_umum = tmp_umum.groupby("kab_key").size().rename("Sampel Umum (DM)").reset_index() merged = ( g_total .merge(g_sek_total, on="kab_key", how="left") .merge(g_umum, on="kab_key", how="left") .merge( meta_kab_df[["kab_key", "Kab_Kota_Label", "Target_Total_68", "Target_Umum_68", "Target_Sekolah_68"]], on="kab_key", how="left" ) ) for c in ["Sampel Total (DM)", "Sampel Sekolah (DM)", "Sampel Umum (DM)"]: merged[c] = merged[c].fillna(0).astype(int) # marker match meta merged["META_MATCH"] = np.where(pd.notna(merged["Target_Total_68"]), "YA", "TIDAK") # target dari meta (ceil biar integer ke atas) merged["Target Total (68%)"] = np.ceil(pd.to_numeric(merged["Target_Total_68"], errors="coerce")) merged["Target Sekolah (68%)"] = np.ceil(pd.to_numeric(merged["Target_Sekolah_68"], errors="coerce")) merged["Target Umum (68%)"] = np.ceil(pd.to_numeric(merged["Target_Umum_68"], errors="coerce")) # kekurangan: kalau target NaN -> NaN (bukan 0) def _gap(target_series, sampel_series): t = pd.to_numeric(target_series, errors="coerce") s = pd.to_numeric(sampel_series, errors="coerce").fillna(0) out = t - s out = out.where(t.notna(), np.nan) return out.clip(lower=0) merged["Kekurangan Sampel Total"] = _gap(merged["Target Total (68%)"], merged["Sampel Total (DM)"]) merged["Kekurangan Sampel Sekolah"] = _gap(merged["Target Sekolah (68%)"], merged["Sampel Sekolah (DM)"]) merged["Kekurangan Sampel Umum"] = _gap(merged["Target Umum (68%)"], merged["Sampel Umum (DM)"]) out = pd.DataFrame({ "Kab/Kota": merged["Kab_Kota_Label"].fillna(merged["kab_key"]), "META_MATCH": merged["META_MATCH"], "Sampel Total (DM)": merged["Sampel Total (DM)"], "Target Total (68%) [META:sampel_total]": merged["Target Total (68%)"], "Kekurangan Sampel Total": merged["Kekurangan Sampel Total"], "Sampel Sekolah (DM)": merged["Sampel Sekolah (DM)"], "Target Sekolah (68%) [META]": merged["Target Sekolah (68%)"], "Kekurangan Sampel Sekolah": merged["Kekurangan Sampel Sekolah"], "Sampel Umum (DM)": merged["Sampel Umum (DM)"], "Target Umum (68%) [META]": merged["Target Umum (68%)"], "Kekurangan Sampel Umum": merged["Kekurangan Sampel Umum"], }) # cast tampilan angka: biarkan NaN tetap NaN supaya ketahuan mismatch meta num_cols = [c for c in out.columns if c not in {"Kab/Kota", "META_MATCH"}] for c in num_cols: out[c] = pd.to_numeric(out[c], errors="coerce") return out.sort_values(["META_MATCH", "Kab/Kota"], ascending=[True, True]).reset_index(drop=True) # =================== PROVINSI =================== if ("PROV" in kew_norm): if meta_prov_df is None or "prov_clean" not in df_filtered.columns: return pd.DataFrame({"Info": ["Meta provinsi atau kolom prov_clean tidak tersedia."]}) tmp = df_filtered.copy() tmp = tmp[pd.notna(tmp["prov_clean"])] if tmp.empty: return pd.DataFrame({"Info": ["Tidak ada data sampel kewenangan provinsi."]}) tmp["prov_key"] = tmp["prov_clean"].apply(norm_prov_label) g_total = tmp.groupby("prov_key").size().rename("Sampel Total (DM)").reset_index() merged = g_total.merge(meta_prov_df[["prov_key", "Provinsi_Label", "Target_Total_68"]], on="prov_key", how="left") merged["Sampel Total (DM)"] = merged["Sampel Total (DM)"].fillna(0).astype(int) merged["META_MATCH"] = np.where(pd.notna(merged["Target_Total_68"]), "YA", "TIDAK") merged["Target Total (68%)"] = np.ceil(pd.to_numeric(merged["Target_Total_68"], errors="coerce")) t = pd.to_numeric(merged["Target Total (68%)"], errors="coerce") s = pd.to_numeric(merged["Sampel Total (DM)"], errors="coerce").fillna(0) gap = (t - s).where(t.notna(), np.nan).clip(lower=0) merged["Kekurangan Sampel Total"] = gap out = pd.DataFrame({ "Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]), "META_MATCH": merged["META_MATCH"], "Sampel Total (DM)": merged["Sampel Total (DM)"], "Target Total (68%) [META:total _sampel]": merged["Target Total (68%)"], "Kekurangan Sampel Total": merged["Kekurangan Sampel Total"], }) for c in ["Sampel Total (DM)", "Target Total (68%) [META:total _sampel]", "Kekurangan Sampel Total"]: out[c] = pd.to_numeric(out[c], errors="coerce") return out.sort_values(["META_MATCH", "Provinsi"], ascending=[True, True]).reset_index(drop=True) return pd.DataFrame({"Info": ["Kewenangan tidak dikenali / tidak didukung."]}) # ============================================================ # 6) GRAFIK GAP — pakai Kekurangan Total (abaikan NaN) # ============================================================ def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure: fig = go.Figure() if verif_df is None or verif_df.empty: fig.update_layout(title="Kekurangan Sampel (tidak ada data)", xaxis_title="Unit", yaxis_title="Kekurangan (unit)") return fig kew_norm = str(kew_value or "").upper() def _num(s): return pd.to_numeric(s, errors="coerce").fillna(0).astype(int) if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns): dfp = verif_df.copy() dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Total", 0)) dfp = dfp.sort_values("gap_total", ascending=False) x = dfp["Kab/Kota"].astype(str).tolist() gap_total = _num(dfp["gap_total"]) fig.add_trace(go.Bar( x=x, y=gap_total, name="Kekurangan Total", text=gap_total, textposition="outside", hovertemplate="%{x}
Kekurangan total: %{y} unit" )) fig.update_layout( title=f"Kekurangan Sampel TOTAL (KAB/KOTA) — Target {int(TARGET_COVERAGE*100)}% (META)", xaxis_title="Kab/Kota", yaxis_title="Kekurangan (unit)", margin=dict(l=40, r=20, t=60, b=140), ) fig.update_xaxes(tickangle=-35) return fig if ("PROV" in kew_norm) and ("Provinsi" in verif_df.columns): dfp = verif_df.copy() dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Total", 0)) dfp = dfp.sort_values("gap_total", ascending=False) x = dfp["Provinsi"].astype(str).tolist() gap_total = _num(dfp["gap_total"]) fig.add_trace(go.Bar( x=x, y=gap_total, name="Kekurangan Total", text=gap_total, textposition="outside", hovertemplate="%{x}
Kekurangan total: %{y} unit" )) fig.update_layout( title=f"Kekurangan Sampel TOTAL (PROVINSI) — Target {int(TARGET_COVERAGE*100)}% (META)", xaxis_title="Provinsi", yaxis_title="Kekurangan (unit)", margin=dict(l=40, r=20, t=60, b=140), ) fig.update_xaxes(tickangle=-35) return fig fig.update_layout(title="Kekurangan Sampel — format data tidak dikenali", xaxis_title="Unit", yaxis_title="Kekurangan (unit)") return fig # ============================================================ # 7) LLM NARASI # ============================================================ def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str: wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL") lines = [] lines.append(f"Wilayah filter: {wilayah}") lines.append(f"Kewenangan: {kew}") lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% (TARGET diambil dari META).") lines.append(f"Jumlah unit analisis: {len(verif_df)}") if "Kekurangan Sampel Total" in verif_df.columns: total_gap = int(pd.to_numeric(verif_df["Kekurangan Sampel Total"], errors="coerce").fillna(0).sum()) lines.append(f"Total Kekurangan Sampel Total: {total_gap}") if "META_MATCH" in verif_df.columns: n_no = int((verif_df["META_MATCH"] == "TIDAK").sum()) if n_no > 0: lines.append(f"PERINGATAN: ada {n_no} unit yang tidak match ke META (target tidak tersedia).") keycol = "Kab/Kota" if "Kab/Kota" in verif_df.columns else ("Provinsi" if "Provinsi" in verif_df.columns else verif_df.columns[0]) if "Kekurangan Sampel Total" in verif_df.columns: t = verif_df.copy() t["Kekurangan Sampel Total"] = pd.to_numeric(t["Kekurangan Sampel Total"], errors="coerce").fillna(0) top = t.sort_values("Kekurangan Sampel Total", ascending=False).head(10) lines.append("\nTop prioritas (gap terbesar):") for _, r in top.iterrows(): lines.append(f"- {r[keycol]}: gap_total={int(r['Kekurangan Sampel Total'])}") return "\n".join(lines) def rule_based_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str: if verif_df is None or verif_df.empty: return "Tidak ada data verifikasi yang dapat dilaporkan." wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL") lines = [] lines.append("## Ringkasan Kekurangan Sampel IPLM (Rule-based)\n") lines.append(f"Wilayah: {wilayah}") lines.append(f"Kewenangan: {kew}") lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% (TARGET diambil dari META: kab/kota=`sampel_total`, provinsi=`total _sampel`).") lines.append(f"Jumlah unit analisis: {len(verif_df)}\n") if "Kekurangan Sampel Total" in verif_df.columns: total_gap = int(pd.to_numeric(verif_df["Kekurangan Sampel Total"], errors="coerce").fillna(0).sum()) lines.append(f"- Total Kekurangan Sampel Total: **{total_gap}** unit yang perlu dilengkapi menuju target.") else: lines.append("Kolom kekurangan sampel total tidak ditemukan.") if "META_MATCH" in verif_df.columns: n_no = int((verif_df["META_MATCH"] == "TIDAK").sum()) if n_no > 0: lines.append(f"- Catatan: **{n_no}** unit belum match ke META, sehingga target tidak tersedia (perlu pembenahan label/meta).") lines.append("\nArah tindak lanjut: prioritaskan wilayah dengan gap terbesar, dan pastikan mapping unit ke META valid untuk monitoring yang akurat.") return "\n".join(lines) def generate_llm_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str: ctx = build_context_gap(verif_df, prov, kab, kew) client = get_llm_client() if client is None or not USE_LLM: return "⚠️ LLM tidak tersedia, memakai laporan rule-based.\n\n" + rule_based_gap_report(verif_df, prov, kab, kew) system_prompt = ( "Anda adalah analis kebijakan dan manajer program IPLM. " "Fokus Anda hanya pada gap sampel (kekurangan unit) dan strategi menutup kekurangan tersebut." ) user_prompt = f""" DATA RINGKAS GAP SAMPEL IPLM: {ctx} TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR: 1) Ringkasan kondisi pengumpulan data (1 paragraf). 2) Total kekurangan sampel yang masih perlu dikumpulkan menuju target {int(TARGET_COVERAGE*100)}% (1 paragraf). 3) Prioritas wilayah (gap terbesar) dan alasan operasional (1 paragraf). 4) Rencana aksi 30–60 hari (naratif, bukan bullet). BATASAN: - Jangan membahas indeks/skor IPLM. - Tegaskan bahwa target berasal dari META: kab/kota=`sampel_total`, provinsi=`total _sampel`. - Jika ada unit META_MATCH=TIDAK, sebutkan sebagai isu kualitas data/master reference. """ try: resp = client.chat_completion( model=LLM_MODEL_NAME, messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], max_tokens=900, temperature=0.2, top_p=0.9, ) text = resp.choices[0].message.content.strip() if not text: raise ValueError("Respon LLM kosong.") return text except Exception as e: return ( "⚠️ Error saat memanggil LLM, memakai laporan rule-based.\n\n" f"(Detail teknis: {repr(e)})\n\n" + rule_based_gap_report(verif_df, prov, kab, kew) ) # ============================================================ # 8) WORD REPORT # ============================================================ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str, analysis_text: str): wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL") doc = Document() doc.add_heading(f"Laporan Kekurangan Sampel IPLM – {wilayah}", level=1) doc.add_paragraph(f"Kewenangan: {kew}") doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% (TARGET diambil dari META).") doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}") doc.add_heading("Tabel Verifikasi (Target & Kekurangan Sampel)", level=2) view = verif_df.copy() if len(view) > 200: doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama) untuk menjaga ukuran dokumen.") view = view.head(200) table = doc.add_table(rows=1, cols=len(view.columns)) hdr = table.rows[0].cells for i, c in enumerate(view.columns): hdr[i].text = str(c) for _, row in view.iterrows(): r = table.add_row().cells for i, c in enumerate(view.columns): r[i].text = "" if pd.isna(row[c]) else str(row[c]) doc.add_heading("Ringkasan Visual (Opsional)", level=2) if not HAS_KALEIDO: doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia di server.") else: pie_made = False if "Sampel Total (DM)" in verif_df.columns: samp = pd.to_numeric(verif_df["Sampel Total (DM)"], errors="coerce").fillna(0).sum() tgt_col = None for c in verif_df.columns: if "Target Total (68%)" in c: tgt_col = c break if tgt_col: tgt = pd.to_numeric(verif_df[tgt_col], errors="coerce").fillna(0).sum() img = make_pie_plotly(samp, tgt, "Capaian TOTAL (DM) terhadap Target TOTAL (META)") if img: doc.add_paragraph("Capaian TOTAL terhadap Target TOTAL (META)") doc.add_picture(img) pie_made = True if not pie_made: doc.add_paragraph("Tidak ada pasangan kolom sampel-target yang valid untuk dibuat pie chart.") doc.add_heading("Analisis Naratif (LLM)", level=2) for p in analysis_text.split("\n"): if p.strip(): doc.add_paragraph(p) outpath = tempfile.mktemp(suffix=".docx") doc.save(outpath) return outpath # ============================================================ # 9) CORE RUN # ============================================================ def run_core(prov_value, kab_value, kew_value): if df_all_raw is None or df_all_raw.empty: empty = pd.DataFrame() return empty, empty, None, None, None, None, "Data DM tidak terbaca.", "Tidak ada analisis." df = df_all_raw.copy() if prov_value and prov_value != "(Semua)" and "prov_clean" in df.columns: df = df[df["prov_clean"].astype(str).str.strip() == str(prov_value).strip()] if kab_value and kab_value != "(Semua)" and "kab_clean" in df.columns: df = df[df["kab_clean"].astype(str).str.strip() == str(kab_value).strip()] if kew_value and kew_value != "(Semua)": df = df[df["KEW_NORM"] == kew_value] if len(df) == 0: empty = pd.DataFrame() return empty, empty, None, None, None, None, "Tidak ada data untuk kombinasi filter yang dipilih.", "Tidak ada analisis." verif_df = compute_gap_verification(df, kew_value) cols = [] for c in ["prov_clean", "kab_clean", nama_col_glob, kew_col_glob, jenis_col_glob, subjenis_col_glob, "_dataset", "KEW_NORM"]: if c and c in df.columns and c not in cols: cols.append(c) detail_df = df[cols].copy() if cols else df.copy() fig_gap = make_gap_figure(verif_df, kew_value) tmpdir = tempfile.mkdtemp() rekap_excel_path = os.path.join(tmpdir, "Rekap_Kekurangan_Sampel_IPLM_Target_META.xlsx") raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx") with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w: verif_df.to_excel(w, sheet_name="Verifikasi_Gap_Target_META", index=False) detail_df.to_excel(w, sheet_name="Detail_Subset_DM", index=False) df.to_excel(raw_dm_path, index=False) analysis_text = generate_llm_gap_report(verif_df, prov_value, kab_value, kew_value) word_path = generate_word_report_gap(verif_df, prov_value, kab_value, kew_value, analysis_text) # message ringkas + warning mismatch meta warn = "" if "META_MATCH" in verif_df.columns: n_no = int((verif_df["META_MATCH"] == "TIDAK").sum()) if n_no > 0: warn = f" ⚠️ {n_no} unit tidak match ke META (target NaN)." msg = f"OK. Subset DM: {len(df)} baris | Verifikasi: {len(verif_df)} baris | Target: {int(TARGET_COVERAGE*100)}% (META).{warn}" return verif_df, detail_df, fig_gap, rekap_excel_path, raw_dm_path, word_path, msg, analysis_text def on_prov_change(prov_value): return gr.update(choices=get_kab_choices_for_prov(prov_value), value="(Semua)") # ============================================================ # 10) UI # ============================================================ with gr.Blocks() as demo: gr.Markdown( f""" # Dashboard Kekurangan Sampel IPLM — Target {int(TARGET_COVERAGE*100)}% (Tanpa Hitung Indeks) **Target dari META (bukan hitung ulang):** - Kab/Kota: `{META_KAB_FILE}` kolom **`sampel_total`** - Provinsi: `{META_PROV_FILE}` kolom **`total _sampel`** (variasi spasi/underscore didukung) {DATA_INFO} """ ) with gr.Row(): dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0]) dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value=kab_choices[0]) dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew) dd_prov.change(fn=on_prov_change, inputs=dd_prov, outputs=dd_kab) run_btn = gr.Button("Hitung Kekurangan Sampel") msg_out = gr.Markdown() gr.Markdown("### Verifikasi (Target & Kekurangan Sampel) — Target dari META") verif_out = gr.DataFrame(interactive=False) gr.Markdown("### Grafik Kekurangan Sampel TOTAL (unit)") gap_plot_out = gr.Plot() gr.Markdown("### Detail Subset DM (yang terfilter)") detail_out = gr.DataFrame(interactive=False) gr.Markdown("### Analisis Naratif (LLM)") analysis_out = gr.Markdown() with gr.Row(): rekap_excel_out = gr.File(label="Download Rekap (Verifikasi + Detail) (.xlsx)") raw_dm_out = gr.File(label="Download Data Mentah Subset DM (.xlsx)") word_out = gr.File(label="Download Laporan Word (.docx)") run_btn.click( fn=run_core, inputs=[dd_prov, dd_kab, dd_kew], outputs=[ verif_out, detail_out, gap_plot_out, rekap_excel_out, raw_dm_out, word_out, msg_out, analysis_out ], ) demo.launch()