TKM2025 / app.py
irhamni's picture
Update app.py
467c9a6 verified
# -*- coding: utf-8 -*-
"""
TKM Dashboard — NEW (MSI pooled) + Exclusion Kelembagaan (STRICT) + Penyesuaian Sampel ala IPLM
+ NORMALISASI MIN–MAX DATA (GLOBAL) dilakukan di LEVEL RESPONDEN sebelum agregasi wilayah
+ Subindeks Pra/Saat/Pasca yang ditampilkan adalah HASIL NORMALISASI (0–100) yang DIPAKAI untuk perhitungan Indeks TKM 0–100
+ Download Excel + Download Laporan Word (.docx) pakai Hugging Face (HF Token) [opsional]
PERBAIKAN WAJIB (sesuai komplain user):
1) Output laporan Word HARUS .DOCX (bukan .txt)
- Jika python-docx tidak tersedia, app HARUS error jelas (bukan fallback txt).
2) Laporan Word (LLM) HARUS memuat tabel 5 kolom:
No | Dimensi | Nilai | Interpretasi | Rekomendasi
dengan 4 baris:
1) Pra Membaca
2) Saat Membaca
3) Pasca Membaca
4) Nilai TKM
3) RULE KATEGORI INDEKS FINAL (0–100) DIPAKSA:
- < 50 : sangat rendah
- 51 – < 65 : rendah
- 66 – < 80 : sedang
- 81 – < 90 : tinggi
- >= 91 : sangat tinggi
(Implementasi tanpa gap: <50, <66, <81, <91, else)
4) Tampilan kolom n_responden Kab/Kota:
- Jika n_responden >= 400 -> KOSONG
- Jika n_responden < 400 -> tampil angka
(nilai numeric tetap dipakai internal)
5) Tampilan kolom n_responden Provinsi: selalu kosong (tanpa mengubah perhitungan)
"""
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Any
from datetime import datetime
import os
import json
import re
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import gradio as gr
import requests
# =========================
# DOCX WAJIB
# =========================
try:
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
DOCX_AVAILABLE = True
except Exception:
DOCX_AVAILABLE = False
import openpyxl # noqa: F401
np.random.seed(42)
# =========================
# KONFIGURASI UTAMA
# =========================
DATA_PATH = "DATA_TKM_28_JANUARI_2026.xlsx" # sesuaikan nama file di folder kerja
WEIGHTS = {"pra": 0.15, "saat": 0.50, "pasca": 0.35}
LIKERT_MIN, LIKERT_MAX = 1, 4
MIN_FRAC_AVAILABLE_PER_SUBINDEX = 0.50
MIN_RESPONDEN_SLOVIN = 400
# =========================
# LLM (HUGGING FACE) — opsional
# =========================
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_MODEL = os.getenv("HF_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip()
HF_TIMEOUT = int(os.getenv("HF_TIMEOUT", "90"))
HF_MAX_NEW_TOKENS = int(os.getenv("HF_MAX_NEW_TOKENS", "900"))
HF_TEMPERATURE = float(os.getenv("HF_TEMPERATURE", "0.2"))
HF_TOP_P = float(os.getenv("HF_TOP_P", "0.9"))
# =========================
# EXCLUSION LIST (KELEMBAGAAN) — sesuai dashboard
# =========================
EXCLUDE_PROVINSI_DROP: List[str] = []
EXCLUDE_PROVINSI_NO_AGG = [
"Bali",
"Papua Barat Daya",
"Papua Pegunungan",
"Papua Selatan",
"Papua Tengah",
]
# triple: (Provinsi, Jenis: kab/kota, Nama)
EXCLUDE_KABKOTA_BY_PROV_TYPE: List[Tuple[str, str, str]] = [
# 1) Daerah turun kelembagaan
("Bali", "kab", "Bangli"),
("Bali", "kab", "Jembrana"),
("Sumatera Utara", "kab", "Humbang Hasundutan"),
("Lampung", "kab", "Pesisir Barat"),
("Papua", "kab", "Biak Numfor"),
("Papua Tengah", "kab", "Nabire"),
("Jawa Barat", "kab", "Tasikmalaya"), # hanya KAB tasik
# 2) Belum memiliki kelembagaan
("Kalimantan Timur", "kab", "Mahakam Ulu"),
("Papua Barat", "kab", "Sorong Selatan"),
("Papua Barat Daya", "kab", "Tambrauw"),
("Maluku Utara", "kab", "Halmahera Timur"),
("Papua", "kab", "Mamberamo Raya"),
("Papua Tengah", "kab", "Puncak"),
# 3) Nomenklatur tidak tercantum
("Maluku", "kab", "Seram Bagian Barat"),
("Sumatera Utara", "kab", "Samosir"),
("Kalimantan Tengah", "kab", "Katingan"),
("Sulawesi Selatan", "kab", "Wajo"),
("Papua Selatan", "kab", "Asmat"),
]
# =========================
# AUTO-DETECT KOLOM WILAYAH
# =========================
def _norm(s: str) -> str:
return (
str(s).strip().lower()
.replace(" ", "")
.replace("-", "")
.replace("/", "")
.replace("\\", "")
.replace(".", "")
.replace(",", "")
)
def detect_region_cols(df: pd.DataFrame) -> Tuple[str, str]:
cols = list(df.columns)
norm_map = {_norm(c): c for c in cols}
prov_candidates = ["provinsiasal", "provinsi asal", "provinsi", "province", "namaprovinsi", "prov"]
kab_candidates = [
"kabkota", "kab_kota", "kabkotaasal", "kabupatenkota", "kabupatenkotaasal",
"kab/kota", "kabupaten/kota", "kabupaten", "kota", "kab"
]
prov_col = None
for cand in prov_candidates:
if _norm(cand) in norm_map:
prov_col = norm_map[_norm(cand)]
break
kab_col = None
for cand in kab_candidates:
if _norm(cand) in norm_map:
kab_col = norm_map[_norm(cand)]
break
if prov_col is None or kab_col is None:
raise ValueError(
"Kolom wilayah tidak terdeteksi.\n"
f"Kolom tersedia (contoh 40): {list(df.columns)[:40]}\n"
f"Prov candidates: {prov_candidates}\n"
f"Kab candidates : {kab_candidates}"
)
return prov_col, kab_col
# =========================
# NORMALISASI NAMA WILAYAH
# =========================
def norm_region_name(x: str) -> str:
if pd.isna(x):
return ""
s = str(x).strip().lower()
s = re.sub(r"[^\w\s]", " ", s)
s = re.sub(r"\s+", " ", s).strip()
for pfx in ["provinsi ", "propinsi "]:
if s.startswith(pfx):
s = s[len(pfx):].strip()
return s
def split_kabkota_type_name(x: str) -> Tuple[str, str]:
if pd.isna(x):
return ("", "")
raw = str(x).strip().lower()
s = re.sub(r"[^\w\s]", " ", raw)
s = re.sub(r"\s+", " ", s).strip()
if s.startswith("kabupaten "):
return ("kab", s[len("kabupaten "):].strip())
if s.startswith("kab. "):
return ("kab", s[len("kab. "):].strip())
if s.startswith("kab "):
return ("kab", s[len("kab "):].strip())
if s.startswith("kota. "):
return ("kota", s[len("kota. "):].strip())
if s.startswith("kota "):
return ("kota", s[len("kota "):].strip())
return ("", s)
def apply_exclusions_strict(df: pd.DataFrame, prov_col: str, kab_col: str):
d = df.copy()
d["_prov_norm"] = d[prov_col].apply(norm_region_name)
tmp = d[kab_col].apply(split_kabkota_type_name)
d["_kab_type"] = tmp.apply(lambda t: t[0])
d["_kab_name"] = tmp.apply(lambda t: t[1])
d["_kab_name_norm"] = d["_kab_name"].apply(norm_region_name)
ex_prov_drop = set(norm_region_name(p) for p in EXCLUDE_PROVINSI_DROP)
mask_prov_drop = d["_prov_norm"].isin(ex_prov_drop)
ex_triples = set(
(norm_region_name(p), str(t).strip().lower(), norm_region_name(n))
for p, t, n in EXCLUDE_KABKOTA_BY_PROV_TYPE
)
mask_triple = pd.Series(False, index=d.index)
m_has_type = d["_kab_type"].isin(["kab", "kota"])
mask_triple.loc[m_has_type] = [
(pv, kt, kn) in ex_triples
for pv, kt, kn in zip(
d.loc[m_has_type, "_prov_norm"],
d.loc[m_has_type, "_kab_type"].str.lower(),
d.loc[m_has_type, "_kab_name_norm"],
)
]
# TASIK safety: kalau "kota tasikmalaya" jangan drop
raw_kab = d[kab_col].astype(str).str.lower()
is_tasik = (d["_prov_norm"] == norm_region_name("Jawa Barat")) & (d["_kab_name_norm"] == norm_region_name("Tasikmalaya"))
tasik_is_kota = raw_kab.str.contains(r"^\s*kota\b", regex=True)
mask_triple = mask_triple & ~(is_tasik & tasik_is_kota)
mask_exclude = mask_prov_drop | mask_triple
stats_info = {
"baris_awal": int(len(d)),
"terhapus_total": int(mask_exclude.sum()),
"terhapus_prov_drop": int(mask_prov_drop.sum()),
"terhapus_kabkota_drop": int((~mask_prov_drop & mask_triple).sum()),
"diolah": int((~mask_exclude).sum()),
}
audit_hapus = d.loc[mask_exclude, [prov_col, kab_col]].copy()
out = d.loc[~mask_exclude].copy().reset_index(drop=True)
out.drop(columns=["_prov_norm", "_kab_type", "_kab_name", "_kab_name_norm"], inplace=True, errors="ignore")
return out, stats_info, audit_hapus
# =========================
# UTIL
# =========================
def mean_if_enough(row: pd.Series, min_frac: float) -> float:
non_na = row.dropna()
if len(row) == 0:
return np.nan
return float(non_na.mean()) if (len(non_na) / len(row) >= min_frac) else np.nan
def detect_item_groups(columns: List[str]) -> Dict[str, List[str]]:
cols = [str(c).strip() for c in columns]
return {
"pra": [c for c in cols if str(c).strip().upper().startswith("B")],
"saat": [c for c in cols if str(c).strip().upper().startswith("C")],
"pasca": [c for c in cols if str(c).strip().upper().startswith("D")],
}
def faktor_penyesuaian(n: int, target: int = MIN_RESPONDEN_SLOVIN) -> float:
try:
n = int(n)
except Exception:
return np.nan
if target <= 0:
return np.nan
return float(min(max(n, 0) / target, 1.0))
def status_penyesuaian(n: int) -> str:
f = faktor_penyesuaian(n)
if not np.isfinite(f):
return ""
if f >= 1.0:
return f"n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)"
return f"n<{MIN_RESPONDEN_SLOVIN} (disesuaikan ×{f:.2f})"
# =========================
# KATEGORI INDEKS FINAL (0–100) — RULE USER
# =========================
def kategori_indeks_final(score_0_100: float) -> str:
"""
RULE USER:
- < 50 : sangat rendah
- 51 – < 65 : rendah
- 66 – < 80 : sedang
- 81 – < 90 : tinggi
- >= 91 : sangat tinggi
Implementasi rapat (tanpa gap):
<50, <66, <81, <91, else
"""
x = pd.to_numeric(score_0_100, errors="coerce")
if not np.isfinite(x):
return ""
if x < 50:
return "sangat rendah"
elif x < 66:
return "rendah"
elif x < 81:
return "sedang"
elif x < 91:
return "tinggi"
else:
return "sangat tinggi"
# =========================
# MSI — pooled per fase
# =========================
def compute_msi_mapping_from_values(values: pd.Series,
min_cat: int = LIKERT_MIN,
max_cat: int = LIKERT_MAX) -> Dict[int, float]:
s = pd.to_numeric(values, errors="coerce").dropna().astype(int)
if s.empty:
return {cat: np.nan for cat in range(min_cat, max_cat + 1)}
cats = list(range(min_cat, max_cat + 1))
N = len(s)
counts = np.array([(s == cat).sum() for cat in cats], dtype=float)
p = counts / N
cum_p = np.cumsum(p)
boundaries = np.concatenate([[0.0], cum_p])
eps = 0.5 / N
boundaries[0] = max(boundaries[0], eps)
boundaries[-1] = min(boundaries[-1], 1 - eps)
z = stats.norm.ppf(boundaries)
msi_vals = [(z[i] + z[i + 1]) / 2.0 for i in range(len(cats))]
return {cat: float(val) for cat, val in zip(cats, msi_vals)}
def apply_msi_pooled_phase(df_phase: pd.DataFrame, cols: List[str]) -> Tuple[pd.DataFrame, Dict[int, float]]:
tmp = df_phase.copy()
pooled = pd.Series(tmp[cols].values.ravel())
mapping = compute_msi_mapping_from_values(pooled)
for c in cols:
tmp[c] = pd.to_numeric(tmp[c], errors="coerce").map(mapping)
return tmp, mapping
# =========================
# MIN–MAX DATA NORMALIZATION (GLOBAL) — per responden
# =========================
def minmax_0_100_global(x: pd.Series) -> Tuple[pd.Series, float, float]:
s = pd.to_numeric(x, errors="coerce")
minv = float(s.min(skipna=True))
maxv = float(s.max(skipna=True))
if not np.isfinite(minv) or not np.isfinite(maxv) or maxv <= minv:
y = pd.Series(np.nan, index=s.index)
return y, minv, maxv
z = (s - minv) / (maxv - minv)
z = z.clip(0, 1)
y = (z * 100).round(2)
return y, minv, maxv
# =========================
# AGREGASI
# =========================
def weighted_mean(values: pd.Series, weights: pd.Series) -> float:
v = pd.to_numeric(values, errors="coerce")
w = pd.to_numeric(weights, errors="coerce")
m = v.notna() & w.notna() & (w > 0)
if not m.any():
return np.nan
return float(np.average(v[m], weights=w[m]))
def aggregate_kabkot(df: pd.DataFrame, prov_col: str, kab_col: str) -> pd.DataFrame:
return (
df.groupby([prov_col, kab_col], dropna=False)
.agg(
indeks_mean=("index_0_100", "mean"),
n_responden=(kab_col, "size"),
subidx_pra_0_100_mean=("subidx_pra_0_100", "mean"),
subidx_saat_0_100_mean=("subidx_saat_0_100", "mean"),
subidx_pasca_0_100_mean=("subidx_pasca_0_100", "mean"),
)
.reset_index()
)
def aggregate_prov_from_kab(kab_df: pd.DataFrame, prov_col: str) -> pd.DataFrame:
rows = []
for prov, g in kab_df.groupby(prov_col, dropna=False):
n_series = pd.to_numeric(g["n_responden"], errors="coerce").fillna(0)
any_adjusted = (n_series < MIN_RESPONDEN_SLOVIN).any()
rows.append({
prov_col: prov,
"tkm_final_mean": weighted_mean(g["Indeks_TKM_0_100_final"], g["n_responden"]),
"n_responden": int(n_series.sum()),
"prov_status": (
f"⚠️ Ada Kab/Kota n<{MIN_RESPONDEN_SLOVIN} (nilai Kab/Kota disesuaikan)"
if any_adjusted else
f"✅ Semua Kab/Kota n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)"
)
})
return pd.DataFrame(rows)
# =========================
# DEMOG DETECTION (opsional)
# =========================
DEMOG_CANDIDATES = {
"jenis_kelamin": ["jenis kelamin", "jenis_kelamin", "jk", "gender"],
"usia": ["usia", "umur", "kelompok umur", "kelompok_umur", "age"],
"pendidikan": ["pendidikan", "pendidikan terakhir", "pendidikan_terakhir", "tingkat pendidikan", "tingkat_pendidikan"],
}
def detect_demog_columns(df: pd.DataFrame) -> Dict[str, str]:
found = {}
norm = {str(c).strip().lower(): c for c in df.columns}
for key, cands in DEMOG_CANDIDATES.items():
for cand in cands:
if cand.lower() in norm:
found[key] = norm[cand.lower()]
break
return found
# =========================
# LOAD + HITUNG SEKALI
# =========================
DATA_FILE = Path(DATA_PATH)
if not DATA_FILE.exists():
raise FileNotFoundError(f"File data tidak ditemukan: {DATA_PATH}")
df_raw = pd.read_excel(DATA_FILE)
PROV_COL, KABKOT_COL = detect_region_cols(df_raw)
print("✅ Kolom wilayah terdeteksi:", {"PROV_COL": PROV_COL, "KABKOT_COL": KABKOT_COL})
df_raw[PROV_COL] = df_raw[PROV_COL].astype(str).fillna("").replace({"nan": ""}).str.strip()
df_raw[KABKOT_COL] = df_raw[KABKOT_COL].astype(str).fillna("").replace({"nan": ""}).str.strip()
df_clean, excl_stats, audit_hapus = apply_exclusions_strict(df_raw, PROV_COL, KABKOT_COL)
print("🧹 EXCLUSION (STRICT):", excl_stats)
GROUP_COLS = detect_item_groups(df_clean.columns.tolist())
print("✅ Item detected:", {k: len(v) for k, v in GROUP_COLS.items()})
df_idx = df_clean.copy()
phase_maps: Dict[str, Dict[int, float]] = {}
for phase in ["pra", "saat", "pasca"]:
cols = GROUP_COLS.get(phase, [])
if not cols:
df_idx[f"subidx_{phase}_msi"] = np.nan
phase_maps[phase] = {1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan}
continue
tmp = df_idx[cols].copy()
tmp_msi, mapping = apply_msi_pooled_phase(tmp, cols)
phase_maps[phase] = mapping
df_idx[f"subidx_{phase}_msi"] = tmp_msi.apply(
lambda r: mean_if_enough(r, MIN_FRAC_AVAILABLE_PER_SUBINDEX),
axis=1
)
# =========================
# NORMALISASI MIN–MAX GLOBAL PER FASE (LEVEL RESPONDEN)
# =========================
df_idx["subidx_pra_0_100"], MIN_PRA, MAX_PRA = minmax_0_100_global(df_idx["subidx_pra_msi"])
df_idx["subidx_saat_0_100"], MIN_SAAT, MAX_SAAT = minmax_0_100_global(df_idx["subidx_saat_msi"])
df_idx["subidx_pasca_0_100"],MIN_PAS, MAX_PAS = minmax_0_100_global(df_idx["subidx_pasca_msi"])
print("\n📌 MIN–MAX GLOBAL (MSI->0–100)")
print(f"• PRA : min={MIN_PRA:.6f} max={MAX_PRA:.6f}")
print(f"• SAAT : min={MIN_SAAT:.6f} max={MAX_SAAT:.6f}")
print(f"• PASCA: min={MIN_PAS:.6f} max={MAX_PAS:.6f}")
# =========================
# INDEKS 0–100 (LEVEL RESPONDEN)
# =========================
w_sum = float(sum(WEIGHTS.values()))
df_idx["index_0_100"] = (
WEIGHTS["pra"] * df_idx["subidx_pra_0_100"] +
WEIGHTS["saat"] * df_idx["subidx_saat_0_100"] +
WEIGHTS["pasca"]* df_idx["subidx_pasca_0_100"]
) / w_sum
df_idx["index_0_100"] = pd.to_numeric(df_idx["index_0_100"], errors="coerce").round(2)
DEMOG_COLS = detect_demog_columns(df_idx)
print("ℹ️ Demog cols:", DEMOG_COLS)
# =========================
# PLOT HELPERS
# =========================
def empty_figure(msg: str):
fig, ax = plt.subplots(figsize=(8, 3))
ax.text(0.5, 0.5, msg, ha="center", va="center")
ax.axis("off")
fig.tight_layout()
return fig
def plot_demog_bar(df_dem: pd.DataFrame, cat_col: str, title: str):
cats = df_dem[cat_col].astype(str).tolist()
vals = df_dem["mean_index_0_100"].values
x = np.arange(len(cats))
fig, ax = plt.subplots(figsize=(10, 4))
ax.bar(x, vals)
ax.set_xticks(x)
ax.set_xticklabels(cats, rotation=25, ha="right")
ax.set_ylim(0, 100)
ax.set_ylabel("Indeks TKM (0–100)")
ax.set_title(title)
ax.grid(True, linestyle="--", alpha=0.25)
for i, v in enumerate(vals):
if pd.isna(v):
continue
ax.text(i, v + 1.2, f"{v:.1f}", ha="center")
fig.tight_layout()
return fig
# =========================
# UI HELPERS
# =========================
def get_prov_choices():
provs = sorted([p for p in df_idx[PROV_COL].dropna().unique().tolist() if str(p).strip() != ""])
return ["(Semua)"] + provs
def update_kab_dropdown(provinsi: str):
if provinsi == "(Semua)":
kabs = sorted([k for k in df_idx[KABKOT_COL].dropna().unique().tolist() if str(k).strip() != ""])
else:
sub = df_idx[df_idx[PROV_COL] == provinsi]
kabs = sorted([k for k in sub[KABKOT_COL].dropna().unique().tolist() if str(k).strip() != ""])
return gr.update(choices=["(Semua)"] + kabs, value="(Semua)")
def build_hero_md(provinsi: str,
prov_row: Optional[pd.Series],
n_adjusted_kab: int,
n_total_kab: int,
n_total_resp: int) -> str:
if provinsi == "(Semua)":
return (
"## Ringkasan Eksekutif\n"
f"- Exclusion (STRICT) sebelum MSI: terhapus **{excl_stats['terhapus_total']}** dari **{excl_stats['baris_awal']}**, diolah **{excl_stats['diolah']}**.\n"
f"- Normalisasi: **min–max data global per fase** pada level responden → 0–100.\n"
f"- Indeks TKM 0–100 (responden): komposit berbobot dari **Pra/Saat/Pasca**.\n"
f"- Penyesuaian sampel Kab/Kota: target **{MIN_RESPONDEN_SLOVIN}**; n<{MIN_RESPONDEN_SLOVIN} → dikali faktor.\n"
f"- Bali: provinsi tampil, agregat provinsi **tidak dihitung**.\n\n"
f"**Min–max global per fase:**\n"
f"- Pra : min={MIN_PRA:.6f}, max={MAX_PRA:.6f}\n"
f"- Saat : min={MIN_SAAT:.6f}, max={MAX_SAAT:.6f}\n"
f"- Pasca: min={MIN_PAS:.6f}, max={MAX_PAS:.6f}\n"
)
if prov_row is None:
return f"## Ringkasan Eksekutif — {provinsi}\n⚠️ Skor provinsi tidak tersedia pada filter ini."
prov_status = str(prov_row.get("Status", prov_row.get("prov_status", ""))).strip()
idx_val = prov_row.get("Indeks_TKM_0_100", np.nan)
if not np.isfinite(pd.to_numeric(idx_val, errors="coerce")):
return (
f"## Ringkasan Eksekutif — {provinsi}\n"
f"### **—**\n"
f"- Kab/Kota disesuaikan (n<{MIN_RESPONDEN_SLOVIN}): **{n_adjusted_kab}** dari **{n_total_kab}**\n"
f"- Total responden pada filter: **{n_total_resp}**\n\n"
f"{prov_status}"
).strip()
kat = kategori_indeks_final(float(idx_val))
return (
f"## Ringkasan Eksekutif — {provinsi}\n"
f"### **{float(idx_val):.2f}** ({kat})\n\n"
f"- Kab/Kota disesuaikan (n<{MIN_RESPONDEN_SLOVIN}): **{n_adjusted_kab}** dari **{n_total_kab}**\n"
f"- Total responden pada filter: **{n_total_resp}**\n\n"
f"{prov_status}"
).strip()
# =========================
# CORE PIPELINE PER FILTER
# =========================
def compute_outputs(provinsi: str, kabkota: str):
sub = df_idx.copy()
if provinsi != "(Semua)":
sub = sub[sub[PROV_COL] == provinsi]
if kabkota != "(Semua)":
sub = sub[sub[KABKOT_COL] == kabkota]
if sub.empty:
empty = pd.DataFrame()
hero = "## Ringkasan Eksekutif\n⚠️ Tidak ada data pada filter ini."
info = "⚠️ Tidak ada data."
return (
hero, info,
empty, empty, empty, empty,
[],
pd.DataFrame(columns=["Komponen", "Nilai"]),
empty_figure("Tidak ada data."),
gr.update(visible=False),
gr.update(visible=False),
empty_figure("Tidak ada data."),
"Tidak ada data."
)
# -------------------------
# AGREGASI KAB/KOTA
# -------------------------
kab = aggregate_kabkot(sub, PROV_COL, KABKOT_COL).copy()
kab["_faktor_internal"] = kab["n_responden"].apply(faktor_penyesuaian)
kab["status_sampel"] = kab["n_responden"].apply(status_penyesuaian)
kab["_raw_internal"] = pd.to_numeric(kab["indeks_mean"], errors="coerce")
kab["Indeks_TKM_0_100_final"] = (kab["_raw_internal"] * kab["_faktor_internal"]).round(2)
kab["Kategori_Indeks_TKM_FINAL"] = kab["Indeks_TKM_0_100_final"].apply(kategori_indeks_final)
# display n_responden kab/kota: kosong jika n>=400, tampil angka jika n<400
kab["_n_responden_num"] = pd.to_numeric(kab["n_responden"], errors="coerce").fillna(0).astype(int)
kab["n_responden"] = np.where(
kab["_n_responden_num"] >= MIN_RESPONDEN_SLOVIN,
"",
kab["_n_responden_num"].astype(int)
)
out_kab_all = kab.rename(columns={PROV_COL: "Provinsi", KABKOT_COL: "Wilayah"})[
[
"Provinsi",
"Wilayah",
"subidx_pra_0_100_mean",
"subidx_saat_0_100_mean",
"subidx_pasca_0_100_mean",
"Indeks_TKM_0_100_final",
"Kategori_Indeks_TKM_FINAL",
"n_responden",
"status_sampel",
]
].rename(columns={
"Indeks_TKM_0_100_final": "Indeks_TKM_0_100",
"subidx_pra_0_100_mean": "SubIndeks_Pra_0_100",
"subidx_saat_0_100_mean": "SubIndeks_Saat_0_100",
"subidx_pasca_0_100_mean": "SubIndeks_Pasca_0_100",
}).sort_values(["Provinsi", "Wilayah"])
out_kab_valid = out_kab_all[kab["_n_responden_num"] >= MIN_RESPONDEN_SLOVIN]
out_kab_adjusted = out_kab_all[kab["_n_responden_num"] < MIN_RESPONDEN_SLOVIN]
# -------------------------
# AGREGASI PROVINSI (INTERNAL NUMERIC)
# -------------------------
kab_for_prov = aggregate_kabkot(sub, PROV_COL, KABKOT_COL).copy()
kab_for_prov["_faktor_internal"] = kab_for_prov["n_responden"].apply(faktor_penyesuaian)
kab_for_prov["_raw_internal"] = pd.to_numeric(kab_for_prov["indeks_mean"], errors="coerce")
kab_for_prov["Indeks_TKM_0_100_final"] = (kab_for_prov["_raw_internal"] * kab_for_prov["_faktor_internal"]).round(2)
prov_from_kab = aggregate_prov_from_kab(kab_for_prov, PROV_COL).rename(columns={PROV_COL: "Provinsi"})
prov_from_kab["Indeks_TKM_0_100"] = pd.to_numeric(prov_from_kab["tkm_final_mean"], errors="coerce").round(2)
prov_from_kab["Kategori_Indeks_TKM_FINAL"] = prov_from_kab["Indeks_TKM_0_100"].apply(kategori_indeks_final)
prov_from_kab["Status"] = prov_from_kab["prov_status"]
# EXCLUDE provinsi tertentu (Bali dkk)
no_agg = set(norm_region_name(p) for p in EXCLUDE_PROVINSI_NO_AGG)
mask_no_agg = prov_from_kab["Provinsi"].map(norm_region_name).isin(no_agg)
prov_from_kab.loc[mask_no_agg, "Indeks_TKM_0_100"] = np.nan
prov_from_kab.loc[mask_no_agg, "Kategori_Indeks_TKM_FINAL"] = ""
prov_from_kab.loc[mask_no_agg, "Status"] = "🚫 EXCLUDED (kelembagaan): provinsi ditampilkan tetapi agregat tidak dihitung"
# display n_responden provinsi: selalu kosong (tanpa ubah perhitungan)
prov_from_kab["n_responden"] = ""
out_prov = prov_from_kab[["Provinsi", "Indeks_TKM_0_100", "Kategori_Indeks_TKM_FINAL", "n_responden", "Status"]]
# -------------------------
# INFO & HERO
# -------------------------
n_total_kab = len(kab_for_prov)
n_adjusted_kab = int((pd.to_numeric(kab_for_prov["n_responden"], errors="coerce").fillna(0) < MIN_RESPONDEN_SLOVIN).sum())
hero_md = build_hero_md(
provinsi,
out_prov.iloc[0] if provinsi != "(Semua)" and not out_prov.empty else None,
n_adjusted_kab,
n_total_kab,
len(sub),
)
info = (
f"Responden pada filter: **{len(sub)}** | "
f"Kab/Kota total: **{n_total_kab}** | "
f"Kab/Kota disesuaikan: **{n_adjusted_kab}**"
)
records = sub.to_dict(orient="records")
# verifikasi display
verif = kab.rename(columns={PROV_COL: "Provinsi", KABKOT_COL: "Kab/Kota"})[
["Provinsi", "Kab/Kota", "n_responden", "status_sampel"]
].copy()
# placeholders
detail_df = pd.DataFrame()
detail_plot = empty_figure("")
return (
hero_md,
info,
out_kab_valid,
out_prov,
out_kab_adjusted,
verif,
records,
detail_df,
detail_plot,
gr.update(visible=False),
gr.update(visible=False),
empty_figure(""),
"",
)
# =========================
# DEMOG MODULE
# =========================
def demog_summary_one(records: List[dict], key: str) -> Optional[pd.DataFrame]:
if not records or key not in DEMOG_COLS:
return None
col = DEMOG_COLS[key]
sub = pd.DataFrame(records)
if col not in sub.columns:
return None
sub["index_0_100"] = pd.to_numeric(sub["index_0_100"], errors="coerce")
g = (
sub.groupby(col, dropna=False)["index_0_100"]
.mean()
.reset_index()
.rename(columns={"index_0_100": "mean_index_0_100"})
.sort_values("mean_index_0_100", ascending=False)
)
return g
def demog_rule_text(g: pd.DataFrame, col: str) -> str:
g2 = g.copy()
g2["mean_index_0_100"] = pd.to_numeric(g2["mean_index_0_100"], errors="coerce")
g2 = g2.dropna(subset=["mean_index_0_100"])
if g2.empty:
return "Tidak cukup data untuk analisis demografi."
top = g2.iloc[0]
bot = g2.iloc[-1]
return (
f"Kelompok **{top[col]}** cenderung lebih tinggi (≈{top['mean_index_0_100']:.1f}) "
f"dibanding **{bot[col]}** (≈{bot['mean_index_0_100']:.1f}). "
"Gunakan ini untuk segmentasi program."
)
def demog_run(records: List[dict], dimensi: str):
if not records:
return empty_figure("Tidak ada data pada filter ini."), "Tidak ada data."
key_map = {
"Jenis Kelamin": "jenis_kelamin",
"Kelompok Usia": "usia",
"Tingkat Pendidikan": "pendidikan",
}
label_map = {
"jenis_kelamin": "Jenis Kelamin",
"usia": "Kelompok Usia",
"pendidikan": "Tingkat Pendidikan",
}
key = key_map.get(dimensi, "jenis_kelamin")
g = demog_summary_one(records, key)
if g is None or g.empty or key not in DEMOG_COLS:
return empty_figure("Kolom demografi tidak ditemukan di data."), "Analisis demografi belum dapat dilakukan."
col = DEMOG_COLS[key]
fig = plot_demog_bar(g, col, f"Rerata Indeks TKM (0–100) menurut {label_map.get(key, col)}")
return fig, demog_rule_text(g, col)
# =========================
# EXPORT HELPERS
# =========================
def _safe_sheet_name(name: str) -> str:
bad = ['\\', '/', '*', '[', ']', ':', '?']
out = name
for b in bad:
out = out.replace(b, " ")
out = " ".join(out.split()).strip()
return out[:31] if out else "Sheet1"
def _now_tag() -> str:
return datetime.now().strftime("%Y%m%d_%H%M%S")
def _hf_generate(prompt: str) -> Optional[str]:
if not HF_TOKEN:
return None
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json",
"Accept": "application/json",
}
try:
if HF_ENDPOINT_URL:
url = HF_ENDPOINT_URL.rstrip("/") + "/generate"
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": HF_MAX_NEW_TOKENS,
"temperature": HF_TEMPERATURE,
"top_p": HF_TOP_P,
"return_full_text": False,
}
}
r = requests.post(url, headers=headers, json=payload, timeout=HF_TIMEOUT)
if r.status_code != 200:
return None
data = r.json()
txt = (data.get("generated_text") or data.get("text") or "").strip()
return txt if txt else None
url = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": HF_MAX_NEW_TOKENS,
"temperature": HF_TEMPERATURE,
"top_p": HF_TOP_P,
"return_full_text": False,
},
"options": {"wait_for_model": True}
}
r = requests.post(url, headers=headers, json=payload, timeout=HF_TIMEOUT)
if r.status_code != 200:
return None
data = r.json()
if isinstance(data, dict) and data.get("error"):
return None
if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
txt = (data[0].get("generated_text") or "").strip()
return txt if txt else None
return None
except Exception:
return None
def _df_to_docx_table(doc: "Document", df: pd.DataFrame, title: str, max_rows: int = 60):
doc.add_heading(title, level=2)
if df is None or df.empty:
doc.add_paragraph("Tidak ada data pada tabel ini.")
return
df2 = df.copy()
if len(df2) > max_rows:
df2 = df2.head(max_rows)
table = doc.add_table(rows=1, cols=len(df2.columns))
table.style = "Table Grid"
hdr = table.rows[0].cells
for j, c in enumerate(df2.columns):
p = hdr[j].paragraphs[0]
run = p.add_run(str(c))
run.bold = True
for _, row in df2.iterrows():
cells = table.add_row().cells
for j, c in enumerate(df2.columns):
v = row[c]
if isinstance(v, float) and np.isfinite(v):
cells[j].text = f"{v:.2f}"
else:
cells[j].text = "" if pd.isna(v) else str(v)
def _extract_focus_scores(provinsi: str, kabkota: str) -> Dict[str, Any]:
"""
Ambil 4 nilai untuk tabel dimensi: Pra, Saat, Pasca, TKM.
- Jika kabkota spesifik: ambil baris kabkota.
- Jika provinsi spesifik & kabkota semua: hitung prov (tkm) + subindeks prov = weighted mean subindeks kabkota by n.
- Jika provinsi=(Semua): return NaN.
"""
sub = df_idx.copy()
if provinsi != "(Semua)":
sub = sub[sub[PROV_COL] == provinsi]
if kabkota != "(Semua)":
sub = sub[sub[KABKOT_COL] == kabkota]
if sub.empty:
return {"label": "", "pra": np.nan, "saat": np.nan, "pasca": np.nan, "tkm": np.nan}
kab_num = aggregate_kabkot(sub, PROV_COL, KABKOT_COL).copy()
kab_num["_n"] = pd.to_numeric(kab_num["n_responden"], errors="coerce").fillna(0).astype(int)
kab_num["_f"] = kab_num["_n"].apply(faktor_penyesuaian)
kab_num["_tkm_final"] = (pd.to_numeric(kab_num["indeks_mean"], errors="coerce") * kab_num["_f"]).round(2)
if kabkota != "(Semua)":
row = kab_num.iloc[0]
return {
"label": f"{provinsi}{kabkota}",
"pra": float(pd.to_numeric(row["subidx_pra_0_100_mean"], errors="coerce")),
"saat": float(pd.to_numeric(row["subidx_saat_0_100_mean"], errors="coerce")),
"pasca": float(pd.to_numeric(row["subidx_pasca_0_100_mean"], errors="coerce")),
"tkm": float(pd.to_numeric(row["_tkm_final"], errors="coerce")),
}
if provinsi != "(Semua)":
if norm_region_name(provinsi) in set(norm_region_name(p) for p in EXCLUDE_PROVINSI_NO_AGG):
return {"label": provinsi, "pra": np.nan, "saat": np.nan, "pasca": np.nan, "tkm": np.nan}
tkm_prov = weighted_mean(kab_num["_tkm_final"], kab_num["_n"])
pra_prov = weighted_mean(kab_num["subidx_pra_0_100_mean"], kab_num["_n"])
saat_prov = weighted_mean(kab_num["subidx_saat_0_100_mean"], kab_num["_n"])
pasca_prov = weighted_mean(kab_num["subidx_pasca_0_100_mean"], kab_num["_n"])
return {"label": provinsi, "pra": pra_prov, "saat": saat_prov, "pasca": pasca_prov, "tkm": tkm_prov}
return {"label": "NASIONAL (filter semua)", "pra": np.nan, "saat": np.nan, "pasca": np.nan, "tkm": np.nan}
def _build_llm_table_prompt(focus: Dict[str, Any]) -> str:
def f2(x):
try:
v = float(pd.to_numeric(x, errors="coerce"))
return v if np.isfinite(v) else None
except Exception:
return None
payload = {
"label": focus.get("label", ""),
"nilai": {
"pra": f2(focus.get("pra")),
"saat": f2(focus.get("saat")),
"pasca": f2(focus.get("pasca")),
"tkm": f2(focus.get("tkm")),
},
"kategori": {
"pra": kategori_indeks_final(focus.get("pra")),
"saat": kategori_indeks_final(focus.get("saat")),
"pasca": kategori_indeks_final(focus.get("pasca")),
"tkm": kategori_indeks_final(focus.get("tkm")),
},
"rule_kategori": {
"sangat_rendah": "<50",
"rendah": "51-<65",
"sedang": "66-<80",
"tinggi": "81-<90",
"sangat_tinggi": ">=91"
},
"bobot": WEIGHTS,
"metode": "MSI pooled per fase -> normalisasi min-max global per fase -> komposit berbobot -> agregasi -> penyesuaian sampel (kab/kota n<400)"
}
instruksi = """
Anda adalah analis kebijakan literasi yang tajam dan teknokratis.
Isi kolom Interpretasi dan Rekomendasi untuk tabel 4 baris:
1) Pra Membaca
2) Saat Membaca
3) Pasca Membaca
4) Nilai TKM
ATURAN:
- Jangan mengarang angka.
- Interpretasi harus mengunci makna angka terhadap kesiapan (pra), kualitas pelaksanaan (saat), penguatan/retensi (pasca), dan implikasinya ke TKM komposit.
- Rekomendasi harus operasional, spesifik, dan menunjukkan prioritas bottleneck.
- Output WAJIB JSON valid sesuai skema.
- Jika nilai null, isi interpretasi: "tidak cukup data pada filter ini" dan rekomendasi: "perbaiki kelengkapan data/representasi responden".
SKEMA OUTPUT:
{
"rows": [
{"no": 1, "dimensi": "Pra Membaca", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"},
{"no": 2, "dimensi": "Saat Membaca", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"},
{"no": 3, "dimensi": "Pasca Membaca", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"},
{"no": 4, "dimensi": "Nilai TKM", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"}
]
}
""".strip()
return instruksi + "\n\nINPUT_JSON:\n" + json.dumps(payload, ensure_ascii=False)
def _fallback_rows(focus: Dict[str, Any]) -> List[Dict[str, Any]]:
def mk(no, dim, val):
v = pd.to_numeric(val, errors="coerce")
if not np.isfinite(v):
return {
"no": no, "dimensi": dim, "nilai": None, "kategori": "",
"interpretasi": "tidak cukup data pada filter ini",
"rekomendasi": "perbaiki kelengkapan data/representasi responden"
}
v = float(v)
return {
"no": no, "dimensi": dim, "nilai": round(v, 2), "kategori": kategori_indeks_final(v),
"interpretasi": (
f"Nilai {v:.2f} berada pada kategori '{kategori_indeks_final(v)}'. "
"Baca ini sebagai posisi dimensi terhadap dimensi lain; kesenjangan antar-dimensi adalah sumber utama pelemahan nilai komposit."
),
"rekomendasi": (
"Tetapkan paket intervensi minimum (SOP singkat + pendampingan fasilitator + monitoring indikator inti) "
"dan fokuskan pada dimensi yang paling tertinggal."
)
}
return [
mk(1, "Pra Membaca", focus.get("pra")),
mk(2, "Saat Membaca", focus.get("saat")),
mk(3, "Pasca Membaca", focus.get("pasca")),
mk(4, "Nilai TKM", focus.get("tkm")),
]
def _add_dimensi_table_to_docx(doc: "Document", rows: List[Dict[str, Any]]):
doc.add_heading("Tabel Analisis Dimensi (Pra/Saat/Pasca) dan Nilai TKM", level=1)
cols = ["No", "Dimensi", "Nilai", "Interpretasi", "Rekomendasi"]
table = doc.add_table(rows=1, cols=len(cols))
table.style = "Table Grid"
hdr = table.rows[0].cells
for j, c in enumerate(cols):
p = hdr[j].paragraphs[0]
run = p.add_run(str(c))
run.bold = True
for r in rows:
cells = table.add_row().cells
cells[0].text = str(r.get("no", ""))
cells[1].text = str(r.get("dimensi", ""))
v = r.get("nilai", None)
if v is None:
cells[2].text = ""
else:
try:
vv = float(pd.to_numeric(v, errors="coerce"))
cells[2].text = f"{vv:.2f}" if np.isfinite(vv) else ""
except Exception:
cells[2].text = ""
cells[3].text = str(r.get("interpretasi", "")).strip()
cells[4].text = str(r.get("rekomendasi", "")).strip()
def export_excel(provinsi: str, kabkota: str):
hero_md, info_md, out_kab_valid, out_prov, out_kab_adjusted, out_verif, records, *_ = compute_outputs(provinsi, kabkota)
demog_jk = demog_summary_one(records, "jenis_kelamin") if "jenis_kelamin" in DEMOG_COLS else None
demog_usia = demog_summary_one(records, "usia") if "usia" in DEMOG_COLS else None
demog_pend = demog_summary_one(records, "pendidikan") if "pendidikan" in DEMOG_COLS else None
fn = f"OUTPUT_TKM_{_now_tag()}_{provinsi.replace(' ', '_')}_{kabkota.replace(' ', '_')}.xlsx"
fn = fn.replace("/", "_").replace("\\", "_")
out_path = str(Path.cwd() / fn)
meta = pd.DataFrame({
"Parameter": [
"DATA_PATH",
"MIN_RESPONDEN_SLOVIN",
"WEIGHTS_pra", "WEIGHTS_saat", "WEIGHTS_pasca",
"NORMALISASI",
"MIN_PRA_subidx_pra_msi", "MAX_PRA_subidx_pra_msi",
"MIN_SAAT_subidx_saat_msi", "MAX_SAAT_subidx_saat_msi",
"MIN_PASCA_subidx_pasca_msi", "MAX_PASCA_subidx_pasca_msi",
"Kategori_Indeks_FINAL_Rule",
],
"Value": [
DATA_PATH,
MIN_RESPONDEN_SLOVIN,
WEIGHTS["pra"], WEIGHTS["saat"], WEIGHTS["pasca"],
"min-max global per fase (MSI->0-100) lalu komposit berbobot = index_0_100",
MIN_PRA, MAX_PRA,
MIN_SAAT, MAX_SAAT,
MIN_PAS, MAX_PAS,
"<50 sangat rendah; 51–<65 rendah; 66–<80 sedang; 81–<90 tinggi; >=91 sangat tinggi",
]
})
with pd.ExcelWriter(out_path, engine="openpyxl") as writer:
meta.to_excel(writer, sheet_name=_safe_sheet_name("META"), index=False)
pd.DataFrame({"Hero_MD": [hero_md], "Info_MD": [info_md]}).to_excel(writer, sheet_name=_safe_sheet_name("RINGKASAN"), index=False)
(out_kab_valid if out_kab_valid is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("KabKota_Valid"), index=False)
(out_kab_adjusted if out_kab_adjusted is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("KabKota_Adjusted"), index=False)
(out_prov if out_prov is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Provinsi"), index=False)
(out_verif if out_verif is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Verifikasi_Sampel"), index=False)
(demog_jk if isinstance(demog_jk, pd.DataFrame) else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Demog_JenisKelamin"), index=False)
(demog_usia if isinstance(demog_usia, pd.DataFrame) else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Demog_Usia"), index=False)
(demog_pend if isinstance(demog_pend, pd.DataFrame) else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Demog_Pendidikan"), index=False)
return out_path
def export_word_report(provinsi: str, kabkota: str):
# ====== WAJIB DOCX: kalau tidak ada, ERROR (bukan .txt)
if not DOCX_AVAILABLE:
raise RuntimeError(
"python-docx TIDAK TERPASANG di environment. "
"Pasang: pip install python-docx (atau tambahkan ke requirements.txt HF Spaces) agar output .docx bisa dibuat."
)
hero_md, info_md, out_kab_valid, out_prov, out_kab_adjusted, out_verif, records, detail_df, *_ = compute_outputs(provinsi, kabkota)
demog_jk = demog_summary_one(records, "jenis_kelamin") if "jenis_kelamin" in DEMOG_COLS else None
demog_usia = demog_summary_one(records, "usia") if "usia" in DEMOG_COLS else None
demog_pend = demog_summary_one(records, "pendidikan") if "pendidikan" in DEMOG_COLS else None
# ====== TABEL DIMENSI (Pra/Saat/Pasca/TKM) + LLM
focus = _extract_focus_scores(provinsi, kabkota)
prompt = _build_llm_table_prompt(focus)
llm_text = _hf_generate(prompt)
rows: List[Dict[str, Any]] = []
if llm_text:
try:
m = re.search(r"\{[\s\S]*\}", llm_text.strip())
js = json.loads(m.group(0) if m else llm_text)
rows = js.get("rows", [])
except Exception:
rows = []
if not rows:
rows = _fallback_rows(focus)
# ====== BUILD DOC
doc = Document()
style = doc.styles["Normal"]
style.font.name = "Calibri"
style.font.size = Pt(11)
title = doc.add_paragraph("LAPORAN HASIL INDEKS TKM (MSI) — Normalisasi Min–Max Data (Global per Fase) + Penyesuaian Sampel")
title.runs[0].bold = True
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
subp = doc.add_paragraph(f"Tanggal: {datetime.now().strftime('%d %B %Y')} | Filter: Provinsi={provinsi}, Kab/Kota={kabkota}")
subp.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.add_paragraph("")
doc.add_heading("Ringkasan Dashboard", level=1)
doc.add_paragraph(hero_md.replace("## ", "").replace("# ", ""))
doc.add_paragraph(info_md.replace("**", ""))
doc.add_paragraph("")
_add_dimensi_table_to_docx(doc, rows)
doc.add_page_break()
doc.add_heading("Lampiran Tabel", level=1)
_df_to_docx_table(doc, out_prov, "Tabel Provinsi (dari Indeks_final Kab/Kota)", max_rows=60)
doc.add_paragraph("")
_df_to_docx_table(doc, out_kab_valid, f"Tabel Kab/Kota n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)", max_rows=80)
doc.add_paragraph("")
_df_to_docx_table(doc, out_kab_adjusted, f"Tabel Kab/Kota n<{MIN_RESPONDEN_SLOVIN} (disesuaikan)", max_rows=80)
doc.add_paragraph("")
_df_to_docx_table(doc, out_verif, "Verifikasi Kondisi Sampel", max_rows=120)
doc.add_paragraph("")
doc.add_heading("Lampiran Demografi", level=1)
_df_to_docx_table(doc, demog_jk if isinstance(demog_jk, pd.DataFrame) else pd.DataFrame(), "Demografi — Jenis Kelamin (mean index_0_100)", max_rows=60)
doc.add_paragraph("")
_df_to_docx_table(doc, demog_usia if isinstance(demog_usia, pd.DataFrame) else pd.DataFrame(), "Demografi — Kelompok Usia (mean index_0_100)", max_rows=60)
doc.add_paragraph("")
_df_to_docx_table(doc, demog_pend if isinstance(demog_pend, pd.DataFrame) else pd.DataFrame(), "Demografi — Tingkat Pendidikan (mean index_0_100)", max_rows=60)
fn = f"LAPORAN_TKM_{_now_tag()}_{provinsi.replace(' ', '_')}_{kabkota.replace(' ', '_')}.docx"
fn = fn.replace("/", "_").replace("\\", "_")
out_path = str(Path.cwd() / fn)
doc.save(out_path)
return out_path
# =========================
# GRADIO UI
# =========================
with gr.Blocks(title="Dashboard TKM (MSI) — MinMax Global per Fase + Adjusted by Sample") as demo:
gr.Markdown(
f"""
# Dashboard Indeks TKM (MSI) — NORMALISASI MIN–MAX DATA (GLOBAL PER FASE)
**Kategori Indeks FINAL (0–100) — RULE:**
- < 50 : sangat rendah
- 51 – < 65 : rendah
- 66 – < 80 : sedang
- 81 – < 90 : tinggi
- >= 91 : sangat tinggi
**Exclusion (STRICT)**
- Baris awal: {excl_stats['baris_awal']}
- Terhapus: {excl_stats['terhapus_total']} (prov-drop={excl_stats['terhapus_prov_drop']}, kab/kota-drop={excl_stats['terhapus_kabkota_drop']})
- Diolah: {excl_stats['diolah']}
**Normalisasi min–max global per fase (MSI -> 0–100)**
- Pra : min={MIN_PRA:.6f} | max={MAX_PRA:.6f}
- Saat : min={MIN_SAAT:.6f} | max={MAX_SAAT:.6f}
- Pasca: min={MIN_PAS:.6f} | max={MAX_PAS:.6f}
"""
)
with gr.Row():
dd_prov = gr.Dropdown(label="Provinsi", choices=get_prov_choices(), value="(Semua)")
dd_kab = gr.Dropdown(label="Kab/Kota", choices=["(Semua)"], value="(Semua)")
with gr.Row():
run_btn = gr.Button("Jalankan", variant="primary")
info_md = gr.Markdown("")
hero_md = gr.Markdown("## Ringkasan Eksekutif\nPilih Provinsi lalu klik Jalankan.")
state_records = gr.State([])
with gr.Row():
btn_xlsx = gr.Button("⬇️ Download Excel", variant="secondary")
file_xlsx = gr.File(label="File Excel", interactive=False)
btn_docx = gr.Button("⬇️ Download Laporan Word (LLM)", variant="secondary")
file_docx = gr.File(label="File Word", interactive=False)
with gr.Accordion(f"🟢 Kab/Kota n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)", open=True):
out_kab_valid = gr.DataFrame(interactive=False)
with gr.Accordion("🔵 Skor Provinsi (dari Indeks_final kab/kota)", open=False):
out_prov = gr.DataFrame(interactive=False)
with gr.Accordion("🟠 Analisis Demografi (mean index_0_100)", open=False):
dimensi = gr.Radio(
label="Pilih dimensi demografi",
choices=["Jenis Kelamin", "Kelompok Usia", "Tingkat Pendidikan"],
value="Jenis Kelamin",
)
demog_btn = gr.Button("Tampilkan", variant="secondary")
demog_plot = gr.Plot()
demog_text = gr.Markdown("")
with gr.Accordion(f"🟡 Kab/Kota n<{MIN_RESPONDEN_SLOVIN} (DISESUAIKAN)", open=False):
out_kab_adjusted = gr.DataFrame(interactive=False)
with gr.Accordion("⚪ Verifikasi Kondisi Sampel (faktor penyesuaian)", open=False):
out_verif = gr.DataFrame(interactive=False)
# placeholders (kompatibilitas outputs)
detail_df = gr.DataFrame(interactive=False, visible=False)
detail_plot = gr.Plot(visible=False)
dd_prov.change(fn=update_kab_dropdown, inputs=dd_prov, outputs=dd_kab)
run_btn.click(
fn=compute_outputs,
inputs=[dd_prov, dd_kab],
outputs=[
hero_md, info_md,
out_kab_valid, out_prov, out_kab_adjusted, out_verif,
state_records,
detail_df, detail_plot,
detail_df, detail_plot,
demog_plot, demog_text
],
)
dd_kab.change(
fn=compute_outputs,
inputs=[dd_prov, dd_kab],
outputs=[
hero_md, info_md,
out_kab_valid, out_prov, out_kab_adjusted, out_verif,
state_records,
detail_df, detail_plot,
detail_df, detail_plot,
demog_plot, demog_text
],
)
demog_btn.click(
fn=demog_run,
inputs=[state_records, dimensi],
outputs=[demog_plot, demog_text],
)
btn_xlsx.click(fn=export_excel, inputs=[dd_prov, dd_kab], outputs=[file_xlsx])
btn_docx.click(fn=export_word_report, inputs=[dd_prov, dd_kab], outputs=[file_docx])
# =========================
# ENTRYPOINT
# =========================
if __name__ == "__main__":
demo.launch()