| |
| """ |
| TKM Dashboard — NEW (MSI pooled) + Exclusion Kelembagaan (STRICT) + Penyesuaian Sampel ala IPLM |
| + NORMALISASI MIN–MAX DATA (GLOBAL) dilakukan di LEVEL RESPONDEN sebelum agregasi wilayah |
| + Subindeks Pra/Saat/Pasca yang ditampilkan adalah HASIL NORMALISASI (0–100) yang DIPAKAI untuk perhitungan Indeks TKM 0–100 |
| + Download Excel + Download Laporan Word (.docx) pakai Hugging Face (HF Token) [opsional] |
| |
| PERBAIKAN WAJIB (sesuai komplain user): |
| 1) Output laporan Word HARUS .DOCX (bukan .txt) |
| - Jika python-docx tidak tersedia, app HARUS error jelas (bukan fallback txt). |
| 2) Laporan Word (LLM) HARUS memuat tabel 5 kolom: |
| No | Dimensi | Nilai | Interpretasi | Rekomendasi |
| dengan 4 baris: |
| 1) Pra Membaca |
| 2) Saat Membaca |
| 3) Pasca Membaca |
| 4) Nilai TKM |
| 3) RULE KATEGORI INDEKS FINAL (0–100) DIPAKSA: |
| - < 50 : sangat rendah |
| - 51 – < 65 : rendah |
| - 66 – < 80 : sedang |
| - 81 – < 90 : tinggi |
| - >= 91 : sangat tinggi |
| (Implementasi tanpa gap: <50, <66, <81, <91, else) |
| 4) Tampilan kolom n_responden Kab/Kota: |
| - Jika n_responden >= 400 -> KOSONG |
| - Jika n_responden < 400 -> tampil angka |
| (nilai numeric tetap dipakai internal) |
| 5) Tampilan kolom n_responden Provinsi: selalu kosong (tanpa mengubah perhitungan) |
| """ |
|
|
| from pathlib import Path |
| from typing import Dict, List, Tuple, Optional, Any |
| from datetime import datetime |
| import os |
| import json |
| import re |
|
|
| import numpy as np |
| import pandas as pd |
| from scipy import stats |
| import matplotlib.pyplot as plt |
| import gradio as gr |
| import requests |
|
|
| |
| |
| |
| try: |
| from docx import Document |
| from docx.shared import Pt |
| from docx.enum.text import WD_ALIGN_PARAGRAPH |
| DOCX_AVAILABLE = True |
| except Exception: |
| DOCX_AVAILABLE = False |
|
|
| import openpyxl |
|
|
| np.random.seed(42) |
|
|
| |
| |
| |
| DATA_PATH = "DATA_TKM_28_JANUARI_2026.xlsx" |
|
|
| WEIGHTS = {"pra": 0.15, "saat": 0.50, "pasca": 0.35} |
| LIKERT_MIN, LIKERT_MAX = 1, 4 |
| MIN_FRAC_AVAILABLE_PER_SUBINDEX = 0.50 |
| MIN_RESPONDEN_SLOVIN = 400 |
|
|
| |
| |
| |
| HF_TOKEN = os.getenv("HF_TOKEN", "").strip() |
| HF_MODEL = os.getenv("HF_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") |
| HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip() |
| HF_TIMEOUT = int(os.getenv("HF_TIMEOUT", "90")) |
| HF_MAX_NEW_TOKENS = int(os.getenv("HF_MAX_NEW_TOKENS", "900")) |
| HF_TEMPERATURE = float(os.getenv("HF_TEMPERATURE", "0.2")) |
| HF_TOP_P = float(os.getenv("HF_TOP_P", "0.9")) |
|
|
| |
| |
| |
| EXCLUDE_PROVINSI_DROP: List[str] = [] |
|
|
| EXCLUDE_PROVINSI_NO_AGG = [ |
| "Bali", |
| "Papua Barat Daya", |
| "Papua Pegunungan", |
| "Papua Selatan", |
| "Papua Tengah", |
| ] |
|
|
| |
| EXCLUDE_KABKOTA_BY_PROV_TYPE: List[Tuple[str, str, str]] = [ |
| |
| ("Bali", "kab", "Bangli"), |
| ("Bali", "kab", "Jembrana"), |
| ("Sumatera Utara", "kab", "Humbang Hasundutan"), |
| ("Lampung", "kab", "Pesisir Barat"), |
| ("Papua", "kab", "Biak Numfor"), |
| ("Papua Tengah", "kab", "Nabire"), |
| ("Jawa Barat", "kab", "Tasikmalaya"), |
|
|
| |
| ("Kalimantan Timur", "kab", "Mahakam Ulu"), |
| ("Papua Barat", "kab", "Sorong Selatan"), |
| ("Papua Barat Daya", "kab", "Tambrauw"), |
| ("Maluku Utara", "kab", "Halmahera Timur"), |
| ("Papua", "kab", "Mamberamo Raya"), |
| ("Papua Tengah", "kab", "Puncak"), |
|
|
| |
| ("Maluku", "kab", "Seram Bagian Barat"), |
| ("Sumatera Utara", "kab", "Samosir"), |
| ("Kalimantan Tengah", "kab", "Katingan"), |
| ("Sulawesi Selatan", "kab", "Wajo"), |
| ("Papua Selatan", "kab", "Asmat"), |
| ] |
|
|
| |
| |
| |
| def _norm(s: str) -> str: |
| return ( |
| str(s).strip().lower() |
| .replace(" ", "") |
| .replace("-", "") |
| .replace("/", "") |
| .replace("\\", "") |
| .replace(".", "") |
| .replace(",", "") |
| ) |
|
|
| def detect_region_cols(df: pd.DataFrame) -> Tuple[str, str]: |
| cols = list(df.columns) |
| norm_map = {_norm(c): c for c in cols} |
|
|
| prov_candidates = ["provinsiasal", "provinsi asal", "provinsi", "province", "namaprovinsi", "prov"] |
| kab_candidates = [ |
| "kabkota", "kab_kota", "kabkotaasal", "kabupatenkota", "kabupatenkotaasal", |
| "kab/kota", "kabupaten/kota", "kabupaten", "kota", "kab" |
| ] |
|
|
| prov_col = None |
| for cand in prov_candidates: |
| if _norm(cand) in norm_map: |
| prov_col = norm_map[_norm(cand)] |
| break |
|
|
| kab_col = None |
| for cand in kab_candidates: |
| if _norm(cand) in norm_map: |
| kab_col = norm_map[_norm(cand)] |
| break |
|
|
| if prov_col is None or kab_col is None: |
| raise ValueError( |
| "Kolom wilayah tidak terdeteksi.\n" |
| f"Kolom tersedia (contoh 40): {list(df.columns)[:40]}\n" |
| f"Prov candidates: {prov_candidates}\n" |
| f"Kab candidates : {kab_candidates}" |
| ) |
| return prov_col, kab_col |
|
|
| |
| |
| |
| def norm_region_name(x: str) -> str: |
| if pd.isna(x): |
| return "" |
| s = str(x).strip().lower() |
| s = re.sub(r"[^\w\s]", " ", s) |
| s = re.sub(r"\s+", " ", s).strip() |
| for pfx in ["provinsi ", "propinsi "]: |
| if s.startswith(pfx): |
| s = s[len(pfx):].strip() |
| return s |
|
|
| def split_kabkota_type_name(x: str) -> Tuple[str, str]: |
| if pd.isna(x): |
| return ("", "") |
| raw = str(x).strip().lower() |
| s = re.sub(r"[^\w\s]", " ", raw) |
| s = re.sub(r"\s+", " ", s).strip() |
|
|
| if s.startswith("kabupaten "): |
| return ("kab", s[len("kabupaten "):].strip()) |
| if s.startswith("kab. "): |
| return ("kab", s[len("kab. "):].strip()) |
| if s.startswith("kab "): |
| return ("kab", s[len("kab "):].strip()) |
| if s.startswith("kota. "): |
| return ("kota", s[len("kota. "):].strip()) |
| if s.startswith("kota "): |
| return ("kota", s[len("kota "):].strip()) |
| return ("", s) |
|
|
| def apply_exclusions_strict(df: pd.DataFrame, prov_col: str, kab_col: str): |
| d = df.copy() |
| d["_prov_norm"] = d[prov_col].apply(norm_region_name) |
|
|
| tmp = d[kab_col].apply(split_kabkota_type_name) |
| d["_kab_type"] = tmp.apply(lambda t: t[0]) |
| d["_kab_name"] = tmp.apply(lambda t: t[1]) |
| d["_kab_name_norm"] = d["_kab_name"].apply(norm_region_name) |
|
|
| ex_prov_drop = set(norm_region_name(p) for p in EXCLUDE_PROVINSI_DROP) |
| mask_prov_drop = d["_prov_norm"].isin(ex_prov_drop) |
|
|
| ex_triples = set( |
| (norm_region_name(p), str(t).strip().lower(), norm_region_name(n)) |
| for p, t, n in EXCLUDE_KABKOTA_BY_PROV_TYPE |
| ) |
|
|
| mask_triple = pd.Series(False, index=d.index) |
| m_has_type = d["_kab_type"].isin(["kab", "kota"]) |
| mask_triple.loc[m_has_type] = [ |
| (pv, kt, kn) in ex_triples |
| for pv, kt, kn in zip( |
| d.loc[m_has_type, "_prov_norm"], |
| d.loc[m_has_type, "_kab_type"].str.lower(), |
| d.loc[m_has_type, "_kab_name_norm"], |
| ) |
| ] |
|
|
| |
| raw_kab = d[kab_col].astype(str).str.lower() |
| is_tasik = (d["_prov_norm"] == norm_region_name("Jawa Barat")) & (d["_kab_name_norm"] == norm_region_name("Tasikmalaya")) |
| tasik_is_kota = raw_kab.str.contains(r"^\s*kota\b", regex=True) |
| mask_triple = mask_triple & ~(is_tasik & tasik_is_kota) |
|
|
| mask_exclude = mask_prov_drop | mask_triple |
|
|
| stats_info = { |
| "baris_awal": int(len(d)), |
| "terhapus_total": int(mask_exclude.sum()), |
| "terhapus_prov_drop": int(mask_prov_drop.sum()), |
| "terhapus_kabkota_drop": int((~mask_prov_drop & mask_triple).sum()), |
| "diolah": int((~mask_exclude).sum()), |
| } |
|
|
| audit_hapus = d.loc[mask_exclude, [prov_col, kab_col]].copy() |
|
|
| out = d.loc[~mask_exclude].copy().reset_index(drop=True) |
| out.drop(columns=["_prov_norm", "_kab_type", "_kab_name", "_kab_name_norm"], inplace=True, errors="ignore") |
| return out, stats_info, audit_hapus |
|
|
| |
| |
| |
| def mean_if_enough(row: pd.Series, min_frac: float) -> float: |
| non_na = row.dropna() |
| if len(row) == 0: |
| return np.nan |
| return float(non_na.mean()) if (len(non_na) / len(row) >= min_frac) else np.nan |
|
|
| def detect_item_groups(columns: List[str]) -> Dict[str, List[str]]: |
| cols = [str(c).strip() for c in columns] |
| return { |
| "pra": [c for c in cols if str(c).strip().upper().startswith("B")], |
| "saat": [c for c in cols if str(c).strip().upper().startswith("C")], |
| "pasca": [c for c in cols if str(c).strip().upper().startswith("D")], |
| } |
|
|
| def faktor_penyesuaian(n: int, target: int = MIN_RESPONDEN_SLOVIN) -> float: |
| try: |
| n = int(n) |
| except Exception: |
| return np.nan |
| if target <= 0: |
| return np.nan |
| return float(min(max(n, 0) / target, 1.0)) |
|
|
| def status_penyesuaian(n: int) -> str: |
| f = faktor_penyesuaian(n) |
| if not np.isfinite(f): |
| return "" |
| if f >= 1.0: |
| return f"n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)" |
| return f"n<{MIN_RESPONDEN_SLOVIN} (disesuaikan ×{f:.2f})" |
|
|
| |
| |
| |
| def kategori_indeks_final(score_0_100: float) -> str: |
| """ |
| RULE USER: |
| - < 50 : sangat rendah |
| - 51 – < 65 : rendah |
| - 66 – < 80 : sedang |
| - 81 – < 90 : tinggi |
| - >= 91 : sangat tinggi |
| |
| Implementasi rapat (tanpa gap): |
| <50, <66, <81, <91, else |
| """ |
| x = pd.to_numeric(score_0_100, errors="coerce") |
| if not np.isfinite(x): |
| return "" |
| if x < 50: |
| return "sangat rendah" |
| elif x < 66: |
| return "rendah" |
| elif x < 81: |
| return "sedang" |
| elif x < 91: |
| return "tinggi" |
| else: |
| return "sangat tinggi" |
|
|
| |
| |
| |
| def compute_msi_mapping_from_values(values: pd.Series, |
| min_cat: int = LIKERT_MIN, |
| max_cat: int = LIKERT_MAX) -> Dict[int, float]: |
| s = pd.to_numeric(values, errors="coerce").dropna().astype(int) |
| if s.empty: |
| return {cat: np.nan for cat in range(min_cat, max_cat + 1)} |
|
|
| cats = list(range(min_cat, max_cat + 1)) |
| N = len(s) |
|
|
| counts = np.array([(s == cat).sum() for cat in cats], dtype=float) |
| p = counts / N |
| cum_p = np.cumsum(p) |
| boundaries = np.concatenate([[0.0], cum_p]) |
|
|
| eps = 0.5 / N |
| boundaries[0] = max(boundaries[0], eps) |
| boundaries[-1] = min(boundaries[-1], 1 - eps) |
|
|
| z = stats.norm.ppf(boundaries) |
| msi_vals = [(z[i] + z[i + 1]) / 2.0 for i in range(len(cats))] |
| return {cat: float(val) for cat, val in zip(cats, msi_vals)} |
|
|
| def apply_msi_pooled_phase(df_phase: pd.DataFrame, cols: List[str]) -> Tuple[pd.DataFrame, Dict[int, float]]: |
| tmp = df_phase.copy() |
| pooled = pd.Series(tmp[cols].values.ravel()) |
| mapping = compute_msi_mapping_from_values(pooled) |
| for c in cols: |
| tmp[c] = pd.to_numeric(tmp[c], errors="coerce").map(mapping) |
| return tmp, mapping |
|
|
| |
| |
| |
| def minmax_0_100_global(x: pd.Series) -> Tuple[pd.Series, float, float]: |
| s = pd.to_numeric(x, errors="coerce") |
| minv = float(s.min(skipna=True)) |
| maxv = float(s.max(skipna=True)) |
| if not np.isfinite(minv) or not np.isfinite(maxv) or maxv <= minv: |
| y = pd.Series(np.nan, index=s.index) |
| return y, minv, maxv |
| z = (s - minv) / (maxv - minv) |
| z = z.clip(0, 1) |
| y = (z * 100).round(2) |
| return y, minv, maxv |
|
|
| |
| |
| |
| def weighted_mean(values: pd.Series, weights: pd.Series) -> float: |
| v = pd.to_numeric(values, errors="coerce") |
| w = pd.to_numeric(weights, errors="coerce") |
| m = v.notna() & w.notna() & (w > 0) |
| if not m.any(): |
| return np.nan |
| return float(np.average(v[m], weights=w[m])) |
|
|
| def aggregate_kabkot(df: pd.DataFrame, prov_col: str, kab_col: str) -> pd.DataFrame: |
| return ( |
| df.groupby([prov_col, kab_col], dropna=False) |
| .agg( |
| indeks_mean=("index_0_100", "mean"), |
| n_responden=(kab_col, "size"), |
| subidx_pra_0_100_mean=("subidx_pra_0_100", "mean"), |
| subidx_saat_0_100_mean=("subidx_saat_0_100", "mean"), |
| subidx_pasca_0_100_mean=("subidx_pasca_0_100", "mean"), |
| ) |
| .reset_index() |
| ) |
|
|
| def aggregate_prov_from_kab(kab_df: pd.DataFrame, prov_col: str) -> pd.DataFrame: |
| rows = [] |
| for prov, g in kab_df.groupby(prov_col, dropna=False): |
| n_series = pd.to_numeric(g["n_responden"], errors="coerce").fillna(0) |
| any_adjusted = (n_series < MIN_RESPONDEN_SLOVIN).any() |
|
|
| rows.append({ |
| prov_col: prov, |
| "tkm_final_mean": weighted_mean(g["Indeks_TKM_0_100_final"], g["n_responden"]), |
| "n_responden": int(n_series.sum()), |
| "prov_status": ( |
| f"⚠️ Ada Kab/Kota n<{MIN_RESPONDEN_SLOVIN} (nilai Kab/Kota disesuaikan)" |
| if any_adjusted else |
| f"✅ Semua Kab/Kota n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)" |
| ) |
| }) |
| return pd.DataFrame(rows) |
|
|
| |
| |
| |
| DEMOG_CANDIDATES = { |
| "jenis_kelamin": ["jenis kelamin", "jenis_kelamin", "jk", "gender"], |
| "usia": ["usia", "umur", "kelompok umur", "kelompok_umur", "age"], |
| "pendidikan": ["pendidikan", "pendidikan terakhir", "pendidikan_terakhir", "tingkat pendidikan", "tingkat_pendidikan"], |
| } |
|
|
| def detect_demog_columns(df: pd.DataFrame) -> Dict[str, str]: |
| found = {} |
| norm = {str(c).strip().lower(): c for c in df.columns} |
| for key, cands in DEMOG_CANDIDATES.items(): |
| for cand in cands: |
| if cand.lower() in norm: |
| found[key] = norm[cand.lower()] |
| break |
| return found |
|
|
| |
| |
| |
| DATA_FILE = Path(DATA_PATH) |
| if not DATA_FILE.exists(): |
| raise FileNotFoundError(f"File data tidak ditemukan: {DATA_PATH}") |
|
|
| df_raw = pd.read_excel(DATA_FILE) |
|
|
| PROV_COL, KABKOT_COL = detect_region_cols(df_raw) |
| print("✅ Kolom wilayah terdeteksi:", {"PROV_COL": PROV_COL, "KABKOT_COL": KABKOT_COL}) |
|
|
| df_raw[PROV_COL] = df_raw[PROV_COL].astype(str).fillna("").replace({"nan": ""}).str.strip() |
| df_raw[KABKOT_COL] = df_raw[KABKOT_COL].astype(str).fillna("").replace({"nan": ""}).str.strip() |
|
|
| df_clean, excl_stats, audit_hapus = apply_exclusions_strict(df_raw, PROV_COL, KABKOT_COL) |
| print("🧹 EXCLUSION (STRICT):", excl_stats) |
|
|
| GROUP_COLS = detect_item_groups(df_clean.columns.tolist()) |
| print("✅ Item detected:", {k: len(v) for k, v in GROUP_COLS.items()}) |
|
|
| df_idx = df_clean.copy() |
|
|
| phase_maps: Dict[str, Dict[int, float]] = {} |
| for phase in ["pra", "saat", "pasca"]: |
| cols = GROUP_COLS.get(phase, []) |
| if not cols: |
| df_idx[f"subidx_{phase}_msi"] = np.nan |
| phase_maps[phase] = {1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan} |
| continue |
|
|
| tmp = df_idx[cols].copy() |
| tmp_msi, mapping = apply_msi_pooled_phase(tmp, cols) |
| phase_maps[phase] = mapping |
|
|
| df_idx[f"subidx_{phase}_msi"] = tmp_msi.apply( |
| lambda r: mean_if_enough(r, MIN_FRAC_AVAILABLE_PER_SUBINDEX), |
| axis=1 |
| ) |
|
|
| |
| |
| |
| df_idx["subidx_pra_0_100"], MIN_PRA, MAX_PRA = minmax_0_100_global(df_idx["subidx_pra_msi"]) |
| df_idx["subidx_saat_0_100"], MIN_SAAT, MAX_SAAT = minmax_0_100_global(df_idx["subidx_saat_msi"]) |
| df_idx["subidx_pasca_0_100"],MIN_PAS, MAX_PAS = minmax_0_100_global(df_idx["subidx_pasca_msi"]) |
|
|
| print("\n📌 MIN–MAX GLOBAL (MSI->0–100)") |
| print(f"• PRA : min={MIN_PRA:.6f} max={MAX_PRA:.6f}") |
| print(f"• SAAT : min={MIN_SAAT:.6f} max={MAX_SAAT:.6f}") |
| print(f"• PASCA: min={MIN_PAS:.6f} max={MAX_PAS:.6f}") |
|
|
| |
| |
| |
| w_sum = float(sum(WEIGHTS.values())) |
| df_idx["index_0_100"] = ( |
| WEIGHTS["pra"] * df_idx["subidx_pra_0_100"] + |
| WEIGHTS["saat"] * df_idx["subidx_saat_0_100"] + |
| WEIGHTS["pasca"]* df_idx["subidx_pasca_0_100"] |
| ) / w_sum |
| df_idx["index_0_100"] = pd.to_numeric(df_idx["index_0_100"], errors="coerce").round(2) |
|
|
| DEMOG_COLS = detect_demog_columns(df_idx) |
| print("ℹ️ Demog cols:", DEMOG_COLS) |
|
|
| |
| |
| |
| def empty_figure(msg: str): |
| fig, ax = plt.subplots(figsize=(8, 3)) |
| ax.text(0.5, 0.5, msg, ha="center", va="center") |
| ax.axis("off") |
| fig.tight_layout() |
| return fig |
|
|
| def plot_demog_bar(df_dem: pd.DataFrame, cat_col: str, title: str): |
| cats = df_dem[cat_col].astype(str).tolist() |
| vals = df_dem["mean_index_0_100"].values |
| x = np.arange(len(cats)) |
| fig, ax = plt.subplots(figsize=(10, 4)) |
| ax.bar(x, vals) |
| ax.set_xticks(x) |
| ax.set_xticklabels(cats, rotation=25, ha="right") |
| ax.set_ylim(0, 100) |
| ax.set_ylabel("Indeks TKM (0–100)") |
| ax.set_title(title) |
| ax.grid(True, linestyle="--", alpha=0.25) |
| for i, v in enumerate(vals): |
| if pd.isna(v): |
| continue |
| ax.text(i, v + 1.2, f"{v:.1f}", ha="center") |
| fig.tight_layout() |
| return fig |
|
|
| |
| |
| |
| def get_prov_choices(): |
| provs = sorted([p for p in df_idx[PROV_COL].dropna().unique().tolist() if str(p).strip() != ""]) |
| return ["(Semua)"] + provs |
|
|
| def update_kab_dropdown(provinsi: str): |
| if provinsi == "(Semua)": |
| kabs = sorted([k for k in df_idx[KABKOT_COL].dropna().unique().tolist() if str(k).strip() != ""]) |
| else: |
| sub = df_idx[df_idx[PROV_COL] == provinsi] |
| kabs = sorted([k for k in sub[KABKOT_COL].dropna().unique().tolist() if str(k).strip() != ""]) |
| return gr.update(choices=["(Semua)"] + kabs, value="(Semua)") |
|
|
| def build_hero_md(provinsi: str, |
| prov_row: Optional[pd.Series], |
| n_adjusted_kab: int, |
| n_total_kab: int, |
| n_total_resp: int) -> str: |
| if provinsi == "(Semua)": |
| return ( |
| "## Ringkasan Eksekutif\n" |
| f"- Exclusion (STRICT) sebelum MSI: terhapus **{excl_stats['terhapus_total']}** dari **{excl_stats['baris_awal']}**, diolah **{excl_stats['diolah']}**.\n" |
| f"- Normalisasi: **min–max data global per fase** pada level responden → 0–100.\n" |
| f"- Indeks TKM 0–100 (responden): komposit berbobot dari **Pra/Saat/Pasca**.\n" |
| f"- Penyesuaian sampel Kab/Kota: target **{MIN_RESPONDEN_SLOVIN}**; n<{MIN_RESPONDEN_SLOVIN} → dikali faktor.\n" |
| f"- Bali: provinsi tampil, agregat provinsi **tidak dihitung**.\n\n" |
| f"**Min–max global per fase:**\n" |
| f"- Pra : min={MIN_PRA:.6f}, max={MAX_PRA:.6f}\n" |
| f"- Saat : min={MIN_SAAT:.6f}, max={MAX_SAAT:.6f}\n" |
| f"- Pasca: min={MIN_PAS:.6f}, max={MAX_PAS:.6f}\n" |
| ) |
|
|
| if prov_row is None: |
| return f"## Ringkasan Eksekutif — {provinsi}\n⚠️ Skor provinsi tidak tersedia pada filter ini." |
|
|
| prov_status = str(prov_row.get("Status", prov_row.get("prov_status", ""))).strip() |
| idx_val = prov_row.get("Indeks_TKM_0_100", np.nan) |
|
|
| if not np.isfinite(pd.to_numeric(idx_val, errors="coerce")): |
| return ( |
| f"## Ringkasan Eksekutif — {provinsi}\n" |
| f"### **—**\n" |
| f"- Kab/Kota disesuaikan (n<{MIN_RESPONDEN_SLOVIN}): **{n_adjusted_kab}** dari **{n_total_kab}**\n" |
| f"- Total responden pada filter: **{n_total_resp}**\n\n" |
| f"{prov_status}" |
| ).strip() |
|
|
| kat = kategori_indeks_final(float(idx_val)) |
| return ( |
| f"## Ringkasan Eksekutif — {provinsi}\n" |
| f"### **{float(idx_val):.2f}** ({kat})\n\n" |
| f"- Kab/Kota disesuaikan (n<{MIN_RESPONDEN_SLOVIN}): **{n_adjusted_kab}** dari **{n_total_kab}**\n" |
| f"- Total responden pada filter: **{n_total_resp}**\n\n" |
| f"{prov_status}" |
| ).strip() |
|
|
| |
| |
| |
| def compute_outputs(provinsi: str, kabkota: str): |
| sub = df_idx.copy() |
| if provinsi != "(Semua)": |
| sub = sub[sub[PROV_COL] == provinsi] |
| if kabkota != "(Semua)": |
| sub = sub[sub[KABKOT_COL] == kabkota] |
|
|
| if sub.empty: |
| empty = pd.DataFrame() |
| hero = "## Ringkasan Eksekutif\n⚠️ Tidak ada data pada filter ini." |
| info = "⚠️ Tidak ada data." |
| return ( |
| hero, info, |
| empty, empty, empty, empty, |
| [], |
| pd.DataFrame(columns=["Komponen", "Nilai"]), |
| empty_figure("Tidak ada data."), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| empty_figure("Tidak ada data."), |
| "Tidak ada data." |
| ) |
|
|
| |
| |
| |
| kab = aggregate_kabkot(sub, PROV_COL, KABKOT_COL).copy() |
|
|
| kab["_faktor_internal"] = kab["n_responden"].apply(faktor_penyesuaian) |
| kab["status_sampel"] = kab["n_responden"].apply(status_penyesuaian) |
|
|
| kab["_raw_internal"] = pd.to_numeric(kab["indeks_mean"], errors="coerce") |
| kab["Indeks_TKM_0_100_final"] = (kab["_raw_internal"] * kab["_faktor_internal"]).round(2) |
|
|
| kab["Kategori_Indeks_TKM_FINAL"] = kab["Indeks_TKM_0_100_final"].apply(kategori_indeks_final) |
|
|
| |
| kab["_n_responden_num"] = pd.to_numeric(kab["n_responden"], errors="coerce").fillna(0).astype(int) |
| kab["n_responden"] = np.where( |
| kab["_n_responden_num"] >= MIN_RESPONDEN_SLOVIN, |
| "", |
| kab["_n_responden_num"].astype(int) |
| ) |
|
|
| out_kab_all = kab.rename(columns={PROV_COL: "Provinsi", KABKOT_COL: "Wilayah"})[ |
| [ |
| "Provinsi", |
| "Wilayah", |
| "subidx_pra_0_100_mean", |
| "subidx_saat_0_100_mean", |
| "subidx_pasca_0_100_mean", |
| "Indeks_TKM_0_100_final", |
| "Kategori_Indeks_TKM_FINAL", |
| "n_responden", |
| "status_sampel", |
| ] |
| ].rename(columns={ |
| "Indeks_TKM_0_100_final": "Indeks_TKM_0_100", |
| "subidx_pra_0_100_mean": "SubIndeks_Pra_0_100", |
| "subidx_saat_0_100_mean": "SubIndeks_Saat_0_100", |
| "subidx_pasca_0_100_mean": "SubIndeks_Pasca_0_100", |
| }).sort_values(["Provinsi", "Wilayah"]) |
|
|
| out_kab_valid = out_kab_all[kab["_n_responden_num"] >= MIN_RESPONDEN_SLOVIN] |
| out_kab_adjusted = out_kab_all[kab["_n_responden_num"] < MIN_RESPONDEN_SLOVIN] |
|
|
| |
| |
| |
| kab_for_prov = aggregate_kabkot(sub, PROV_COL, KABKOT_COL).copy() |
| kab_for_prov["_faktor_internal"] = kab_for_prov["n_responden"].apply(faktor_penyesuaian) |
| kab_for_prov["_raw_internal"] = pd.to_numeric(kab_for_prov["indeks_mean"], errors="coerce") |
| kab_for_prov["Indeks_TKM_0_100_final"] = (kab_for_prov["_raw_internal"] * kab_for_prov["_faktor_internal"]).round(2) |
|
|
| prov_from_kab = aggregate_prov_from_kab(kab_for_prov, PROV_COL).rename(columns={PROV_COL: "Provinsi"}) |
| prov_from_kab["Indeks_TKM_0_100"] = pd.to_numeric(prov_from_kab["tkm_final_mean"], errors="coerce").round(2) |
| prov_from_kab["Kategori_Indeks_TKM_FINAL"] = prov_from_kab["Indeks_TKM_0_100"].apply(kategori_indeks_final) |
| prov_from_kab["Status"] = prov_from_kab["prov_status"] |
|
|
| |
| no_agg = set(norm_region_name(p) for p in EXCLUDE_PROVINSI_NO_AGG) |
| mask_no_agg = prov_from_kab["Provinsi"].map(norm_region_name).isin(no_agg) |
| prov_from_kab.loc[mask_no_agg, "Indeks_TKM_0_100"] = np.nan |
| prov_from_kab.loc[mask_no_agg, "Kategori_Indeks_TKM_FINAL"] = "" |
| prov_from_kab.loc[mask_no_agg, "Status"] = "🚫 EXCLUDED (kelembagaan): provinsi ditampilkan tetapi agregat tidak dihitung" |
|
|
| |
| prov_from_kab["n_responden"] = "" |
|
|
| out_prov = prov_from_kab[["Provinsi", "Indeks_TKM_0_100", "Kategori_Indeks_TKM_FINAL", "n_responden", "Status"]] |
|
|
| |
| |
| |
| n_total_kab = len(kab_for_prov) |
| n_adjusted_kab = int((pd.to_numeric(kab_for_prov["n_responden"], errors="coerce").fillna(0) < MIN_RESPONDEN_SLOVIN).sum()) |
|
|
| hero_md = build_hero_md( |
| provinsi, |
| out_prov.iloc[0] if provinsi != "(Semua)" and not out_prov.empty else None, |
| n_adjusted_kab, |
| n_total_kab, |
| len(sub), |
| ) |
|
|
| info = ( |
| f"Responden pada filter: **{len(sub)}** | " |
| f"Kab/Kota total: **{n_total_kab}** | " |
| f"Kab/Kota disesuaikan: **{n_adjusted_kab}**" |
| ) |
|
|
| records = sub.to_dict(orient="records") |
|
|
| |
| verif = kab.rename(columns={PROV_COL: "Provinsi", KABKOT_COL: "Kab/Kota"})[ |
| ["Provinsi", "Kab/Kota", "n_responden", "status_sampel"] |
| ].copy() |
|
|
| |
| detail_df = pd.DataFrame() |
| detail_plot = empty_figure("") |
|
|
| return ( |
| hero_md, |
| info, |
| out_kab_valid, |
| out_prov, |
| out_kab_adjusted, |
| verif, |
| records, |
| detail_df, |
| detail_plot, |
| gr.update(visible=False), |
| gr.update(visible=False), |
| empty_figure(""), |
| "", |
| ) |
|
|
| |
| |
| |
| def demog_summary_one(records: List[dict], key: str) -> Optional[pd.DataFrame]: |
| if not records or key not in DEMOG_COLS: |
| return None |
| col = DEMOG_COLS[key] |
| sub = pd.DataFrame(records) |
| if col not in sub.columns: |
| return None |
|
|
| sub["index_0_100"] = pd.to_numeric(sub["index_0_100"], errors="coerce") |
| g = ( |
| sub.groupby(col, dropna=False)["index_0_100"] |
| .mean() |
| .reset_index() |
| .rename(columns={"index_0_100": "mean_index_0_100"}) |
| .sort_values("mean_index_0_100", ascending=False) |
| ) |
| return g |
|
|
| def demog_rule_text(g: pd.DataFrame, col: str) -> str: |
| g2 = g.copy() |
| g2["mean_index_0_100"] = pd.to_numeric(g2["mean_index_0_100"], errors="coerce") |
| g2 = g2.dropna(subset=["mean_index_0_100"]) |
| if g2.empty: |
| return "Tidak cukup data untuk analisis demografi." |
|
|
| top = g2.iloc[0] |
| bot = g2.iloc[-1] |
| return ( |
| f"Kelompok **{top[col]}** cenderung lebih tinggi (≈{top['mean_index_0_100']:.1f}) " |
| f"dibanding **{bot[col]}** (≈{bot['mean_index_0_100']:.1f}). " |
| "Gunakan ini untuk segmentasi program." |
| ) |
|
|
| def demog_run(records: List[dict], dimensi: str): |
| if not records: |
| return empty_figure("Tidak ada data pada filter ini."), "Tidak ada data." |
|
|
| key_map = { |
| "Jenis Kelamin": "jenis_kelamin", |
| "Kelompok Usia": "usia", |
| "Tingkat Pendidikan": "pendidikan", |
| } |
| label_map = { |
| "jenis_kelamin": "Jenis Kelamin", |
| "usia": "Kelompok Usia", |
| "pendidikan": "Tingkat Pendidikan", |
| } |
|
|
| key = key_map.get(dimensi, "jenis_kelamin") |
| g = demog_summary_one(records, key) |
| if g is None or g.empty or key not in DEMOG_COLS: |
| return empty_figure("Kolom demografi tidak ditemukan di data."), "Analisis demografi belum dapat dilakukan." |
|
|
| col = DEMOG_COLS[key] |
| fig = plot_demog_bar(g, col, f"Rerata Indeks TKM (0–100) menurut {label_map.get(key, col)}") |
| return fig, demog_rule_text(g, col) |
|
|
| |
| |
| |
| def _safe_sheet_name(name: str) -> str: |
| bad = ['\\', '/', '*', '[', ']', ':', '?'] |
| out = name |
| for b in bad: |
| out = out.replace(b, " ") |
| out = " ".join(out.split()).strip() |
| return out[:31] if out else "Sheet1" |
|
|
| def _now_tag() -> str: |
| return datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
| def _hf_generate(prompt: str) -> Optional[str]: |
| if not HF_TOKEN: |
| return None |
|
|
| headers = { |
| "Authorization": f"Bearer {HF_TOKEN}", |
| "Content-Type": "application/json", |
| "Accept": "application/json", |
| } |
|
|
| try: |
| if HF_ENDPOINT_URL: |
| url = HF_ENDPOINT_URL.rstrip("/") + "/generate" |
| payload = { |
| "inputs": prompt, |
| "parameters": { |
| "max_new_tokens": HF_MAX_NEW_TOKENS, |
| "temperature": HF_TEMPERATURE, |
| "top_p": HF_TOP_P, |
| "return_full_text": False, |
| } |
| } |
| r = requests.post(url, headers=headers, json=payload, timeout=HF_TIMEOUT) |
| if r.status_code != 200: |
| return None |
| data = r.json() |
| txt = (data.get("generated_text") or data.get("text") or "").strip() |
| return txt if txt else None |
|
|
| url = f"https://api-inference.huggingface.co/models/{HF_MODEL}" |
| payload = { |
| "inputs": prompt, |
| "parameters": { |
| "max_new_tokens": HF_MAX_NEW_TOKENS, |
| "temperature": HF_TEMPERATURE, |
| "top_p": HF_TOP_P, |
| "return_full_text": False, |
| }, |
| "options": {"wait_for_model": True} |
| } |
| r = requests.post(url, headers=headers, json=payload, timeout=HF_TIMEOUT) |
| if r.status_code != 200: |
| return None |
|
|
| data = r.json() |
| if isinstance(data, dict) and data.get("error"): |
| return None |
| if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict): |
| txt = (data[0].get("generated_text") or "").strip() |
| return txt if txt else None |
| return None |
|
|
| except Exception: |
| return None |
|
|
| def _df_to_docx_table(doc: "Document", df: pd.DataFrame, title: str, max_rows: int = 60): |
| doc.add_heading(title, level=2) |
| if df is None or df.empty: |
| doc.add_paragraph("Tidak ada data pada tabel ini.") |
| return |
|
|
| df2 = df.copy() |
| if len(df2) > max_rows: |
| df2 = df2.head(max_rows) |
|
|
| table = doc.add_table(rows=1, cols=len(df2.columns)) |
| table.style = "Table Grid" |
|
|
| hdr = table.rows[0].cells |
| for j, c in enumerate(df2.columns): |
| p = hdr[j].paragraphs[0] |
| run = p.add_run(str(c)) |
| run.bold = True |
|
|
| for _, row in df2.iterrows(): |
| cells = table.add_row().cells |
| for j, c in enumerate(df2.columns): |
| v = row[c] |
| if isinstance(v, float) and np.isfinite(v): |
| cells[j].text = f"{v:.2f}" |
| else: |
| cells[j].text = "" if pd.isna(v) else str(v) |
|
|
| def _extract_focus_scores(provinsi: str, kabkota: str) -> Dict[str, Any]: |
| """ |
| Ambil 4 nilai untuk tabel dimensi: Pra, Saat, Pasca, TKM. |
| - Jika kabkota spesifik: ambil baris kabkota. |
| - Jika provinsi spesifik & kabkota semua: hitung prov (tkm) + subindeks prov = weighted mean subindeks kabkota by n. |
| - Jika provinsi=(Semua): return NaN. |
| """ |
| sub = df_idx.copy() |
| if provinsi != "(Semua)": |
| sub = sub[sub[PROV_COL] == provinsi] |
| if kabkota != "(Semua)": |
| sub = sub[sub[KABKOT_COL] == kabkota] |
|
|
| if sub.empty: |
| return {"label": "", "pra": np.nan, "saat": np.nan, "pasca": np.nan, "tkm": np.nan} |
|
|
| kab_num = aggregate_kabkot(sub, PROV_COL, KABKOT_COL).copy() |
| kab_num["_n"] = pd.to_numeric(kab_num["n_responden"], errors="coerce").fillna(0).astype(int) |
| kab_num["_f"] = kab_num["_n"].apply(faktor_penyesuaian) |
| kab_num["_tkm_final"] = (pd.to_numeric(kab_num["indeks_mean"], errors="coerce") * kab_num["_f"]).round(2) |
|
|
| if kabkota != "(Semua)": |
| row = kab_num.iloc[0] |
| return { |
| "label": f"{provinsi} — {kabkota}", |
| "pra": float(pd.to_numeric(row["subidx_pra_0_100_mean"], errors="coerce")), |
| "saat": float(pd.to_numeric(row["subidx_saat_0_100_mean"], errors="coerce")), |
| "pasca": float(pd.to_numeric(row["subidx_pasca_0_100_mean"], errors="coerce")), |
| "tkm": float(pd.to_numeric(row["_tkm_final"], errors="coerce")), |
| } |
|
|
| if provinsi != "(Semua)": |
| if norm_region_name(provinsi) in set(norm_region_name(p) for p in EXCLUDE_PROVINSI_NO_AGG): |
| return {"label": provinsi, "pra": np.nan, "saat": np.nan, "pasca": np.nan, "tkm": np.nan} |
|
|
| tkm_prov = weighted_mean(kab_num["_tkm_final"], kab_num["_n"]) |
| pra_prov = weighted_mean(kab_num["subidx_pra_0_100_mean"], kab_num["_n"]) |
| saat_prov = weighted_mean(kab_num["subidx_saat_0_100_mean"], kab_num["_n"]) |
| pasca_prov = weighted_mean(kab_num["subidx_pasca_0_100_mean"], kab_num["_n"]) |
| return {"label": provinsi, "pra": pra_prov, "saat": saat_prov, "pasca": pasca_prov, "tkm": tkm_prov} |
|
|
| return {"label": "NASIONAL (filter semua)", "pra": np.nan, "saat": np.nan, "pasca": np.nan, "tkm": np.nan} |
|
|
| def _build_llm_table_prompt(focus: Dict[str, Any]) -> str: |
| def f2(x): |
| try: |
| v = float(pd.to_numeric(x, errors="coerce")) |
| return v if np.isfinite(v) else None |
| except Exception: |
| return None |
|
|
| payload = { |
| "label": focus.get("label", ""), |
| "nilai": { |
| "pra": f2(focus.get("pra")), |
| "saat": f2(focus.get("saat")), |
| "pasca": f2(focus.get("pasca")), |
| "tkm": f2(focus.get("tkm")), |
| }, |
| "kategori": { |
| "pra": kategori_indeks_final(focus.get("pra")), |
| "saat": kategori_indeks_final(focus.get("saat")), |
| "pasca": kategori_indeks_final(focus.get("pasca")), |
| "tkm": kategori_indeks_final(focus.get("tkm")), |
| }, |
| "rule_kategori": { |
| "sangat_rendah": "<50", |
| "rendah": "51-<65", |
| "sedang": "66-<80", |
| "tinggi": "81-<90", |
| "sangat_tinggi": ">=91" |
| }, |
| "bobot": WEIGHTS, |
| "metode": "MSI pooled per fase -> normalisasi min-max global per fase -> komposit berbobot -> agregasi -> penyesuaian sampel (kab/kota n<400)" |
| } |
|
|
| instruksi = """ |
| Anda adalah analis kebijakan literasi yang tajam dan teknokratis. |
| Isi kolom Interpretasi dan Rekomendasi untuk tabel 4 baris: |
| 1) Pra Membaca |
| 2) Saat Membaca |
| 3) Pasca Membaca |
| 4) Nilai TKM |
| |
| ATURAN: |
| - Jangan mengarang angka. |
| - Interpretasi harus mengunci makna angka terhadap kesiapan (pra), kualitas pelaksanaan (saat), penguatan/retensi (pasca), dan implikasinya ke TKM komposit. |
| - Rekomendasi harus operasional, spesifik, dan menunjukkan prioritas bottleneck. |
| - Output WAJIB JSON valid sesuai skema. |
| - Jika nilai null, isi interpretasi: "tidak cukup data pada filter ini" dan rekomendasi: "perbaiki kelengkapan data/representasi responden". |
| |
| SKEMA OUTPUT: |
| { |
| "rows": [ |
| {"no": 1, "dimensi": "Pra Membaca", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"}, |
| {"no": 2, "dimensi": "Saat Membaca", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"}, |
| {"no": 3, "dimensi": "Pasca Membaca", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"}, |
| {"no": 4, "dimensi": "Nilai TKM", "nilai": <number_or_null>, "kategori": "<string>", "interpretasi": "<string>", "rekomendasi": "<string>"} |
| ] |
| } |
| """.strip() |
|
|
| return instruksi + "\n\nINPUT_JSON:\n" + json.dumps(payload, ensure_ascii=False) |
|
|
| def _fallback_rows(focus: Dict[str, Any]) -> List[Dict[str, Any]]: |
| def mk(no, dim, val): |
| v = pd.to_numeric(val, errors="coerce") |
| if not np.isfinite(v): |
| return { |
| "no": no, "dimensi": dim, "nilai": None, "kategori": "", |
| "interpretasi": "tidak cukup data pada filter ini", |
| "rekomendasi": "perbaiki kelengkapan data/representasi responden" |
| } |
| v = float(v) |
| return { |
| "no": no, "dimensi": dim, "nilai": round(v, 2), "kategori": kategori_indeks_final(v), |
| "interpretasi": ( |
| f"Nilai {v:.2f} berada pada kategori '{kategori_indeks_final(v)}'. " |
| "Baca ini sebagai posisi dimensi terhadap dimensi lain; kesenjangan antar-dimensi adalah sumber utama pelemahan nilai komposit." |
| ), |
| "rekomendasi": ( |
| "Tetapkan paket intervensi minimum (SOP singkat + pendampingan fasilitator + monitoring indikator inti) " |
| "dan fokuskan pada dimensi yang paling tertinggal." |
| ) |
| } |
|
|
| return [ |
| mk(1, "Pra Membaca", focus.get("pra")), |
| mk(2, "Saat Membaca", focus.get("saat")), |
| mk(3, "Pasca Membaca", focus.get("pasca")), |
| mk(4, "Nilai TKM", focus.get("tkm")), |
| ] |
|
|
| def _add_dimensi_table_to_docx(doc: "Document", rows: List[Dict[str, Any]]): |
| doc.add_heading("Tabel Analisis Dimensi (Pra/Saat/Pasca) dan Nilai TKM", level=1) |
|
|
| cols = ["No", "Dimensi", "Nilai", "Interpretasi", "Rekomendasi"] |
| table = doc.add_table(rows=1, cols=len(cols)) |
| table.style = "Table Grid" |
|
|
| hdr = table.rows[0].cells |
| for j, c in enumerate(cols): |
| p = hdr[j].paragraphs[0] |
| run = p.add_run(str(c)) |
| run.bold = True |
|
|
| for r in rows: |
| cells = table.add_row().cells |
| cells[0].text = str(r.get("no", "")) |
| cells[1].text = str(r.get("dimensi", "")) |
|
|
| v = r.get("nilai", None) |
| if v is None: |
| cells[2].text = "" |
| else: |
| try: |
| vv = float(pd.to_numeric(v, errors="coerce")) |
| cells[2].text = f"{vv:.2f}" if np.isfinite(vv) else "" |
| except Exception: |
| cells[2].text = "" |
|
|
| cells[3].text = str(r.get("interpretasi", "")).strip() |
| cells[4].text = str(r.get("rekomendasi", "")).strip() |
|
|
| def export_excel(provinsi: str, kabkota: str): |
| hero_md, info_md, out_kab_valid, out_prov, out_kab_adjusted, out_verif, records, *_ = compute_outputs(provinsi, kabkota) |
|
|
| demog_jk = demog_summary_one(records, "jenis_kelamin") if "jenis_kelamin" in DEMOG_COLS else None |
| demog_usia = demog_summary_one(records, "usia") if "usia" in DEMOG_COLS else None |
| demog_pend = demog_summary_one(records, "pendidikan") if "pendidikan" in DEMOG_COLS else None |
|
|
| fn = f"OUTPUT_TKM_{_now_tag()}_{provinsi.replace(' ', '_')}_{kabkota.replace(' ', '_')}.xlsx" |
| fn = fn.replace("/", "_").replace("\\", "_") |
| out_path = str(Path.cwd() / fn) |
|
|
| meta = pd.DataFrame({ |
| "Parameter": [ |
| "DATA_PATH", |
| "MIN_RESPONDEN_SLOVIN", |
| "WEIGHTS_pra", "WEIGHTS_saat", "WEIGHTS_pasca", |
| "NORMALISASI", |
| "MIN_PRA_subidx_pra_msi", "MAX_PRA_subidx_pra_msi", |
| "MIN_SAAT_subidx_saat_msi", "MAX_SAAT_subidx_saat_msi", |
| "MIN_PASCA_subidx_pasca_msi", "MAX_PASCA_subidx_pasca_msi", |
| "Kategori_Indeks_FINAL_Rule", |
| ], |
| "Value": [ |
| DATA_PATH, |
| MIN_RESPONDEN_SLOVIN, |
| WEIGHTS["pra"], WEIGHTS["saat"], WEIGHTS["pasca"], |
| "min-max global per fase (MSI->0-100) lalu komposit berbobot = index_0_100", |
| MIN_PRA, MAX_PRA, |
| MIN_SAAT, MAX_SAAT, |
| MIN_PAS, MAX_PAS, |
| "<50 sangat rendah; 51–<65 rendah; 66–<80 sedang; 81–<90 tinggi; >=91 sangat tinggi", |
| ] |
| }) |
|
|
| with pd.ExcelWriter(out_path, engine="openpyxl") as writer: |
| meta.to_excel(writer, sheet_name=_safe_sheet_name("META"), index=False) |
| pd.DataFrame({"Hero_MD": [hero_md], "Info_MD": [info_md]}).to_excel(writer, sheet_name=_safe_sheet_name("RINGKASAN"), index=False) |
|
|
| (out_kab_valid if out_kab_valid is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("KabKota_Valid"), index=False) |
| (out_kab_adjusted if out_kab_adjusted is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("KabKota_Adjusted"), index=False) |
| (out_prov if out_prov is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Provinsi"), index=False) |
| (out_verif if out_verif is not None else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Verifikasi_Sampel"), index=False) |
|
|
| (demog_jk if isinstance(demog_jk, pd.DataFrame) else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Demog_JenisKelamin"), index=False) |
| (demog_usia if isinstance(demog_usia, pd.DataFrame) else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Demog_Usia"), index=False) |
| (demog_pend if isinstance(demog_pend, pd.DataFrame) else pd.DataFrame()).to_excel(writer, sheet_name=_safe_sheet_name("Demog_Pendidikan"), index=False) |
|
|
| return out_path |
|
|
| def export_word_report(provinsi: str, kabkota: str): |
| |
| if not DOCX_AVAILABLE: |
| raise RuntimeError( |
| "python-docx TIDAK TERPASANG di environment. " |
| "Pasang: pip install python-docx (atau tambahkan ke requirements.txt HF Spaces) agar output .docx bisa dibuat." |
| ) |
|
|
| hero_md, info_md, out_kab_valid, out_prov, out_kab_adjusted, out_verif, records, detail_df, *_ = compute_outputs(provinsi, kabkota) |
|
|
| demog_jk = demog_summary_one(records, "jenis_kelamin") if "jenis_kelamin" in DEMOG_COLS else None |
| demog_usia = demog_summary_one(records, "usia") if "usia" in DEMOG_COLS else None |
| demog_pend = demog_summary_one(records, "pendidikan") if "pendidikan" in DEMOG_COLS else None |
|
|
| |
| focus = _extract_focus_scores(provinsi, kabkota) |
| prompt = _build_llm_table_prompt(focus) |
| llm_text = _hf_generate(prompt) |
|
|
| rows: List[Dict[str, Any]] = [] |
| if llm_text: |
| try: |
| m = re.search(r"\{[\s\S]*\}", llm_text.strip()) |
| js = json.loads(m.group(0) if m else llm_text) |
| rows = js.get("rows", []) |
| except Exception: |
| rows = [] |
|
|
| if not rows: |
| rows = _fallback_rows(focus) |
|
|
| |
| doc = Document() |
| style = doc.styles["Normal"] |
| style.font.name = "Calibri" |
| style.font.size = Pt(11) |
|
|
| title = doc.add_paragraph("LAPORAN HASIL INDEKS TKM (MSI) — Normalisasi Min–Max Data (Global per Fase) + Penyesuaian Sampel") |
| title.runs[0].bold = True |
| title.alignment = WD_ALIGN_PARAGRAPH.CENTER |
|
|
| subp = doc.add_paragraph(f"Tanggal: {datetime.now().strftime('%d %B %Y')} | Filter: Provinsi={provinsi}, Kab/Kota={kabkota}") |
| subp.alignment = WD_ALIGN_PARAGRAPH.CENTER |
|
|
| doc.add_paragraph("") |
| doc.add_heading("Ringkasan Dashboard", level=1) |
| doc.add_paragraph(hero_md.replace("## ", "").replace("# ", "")) |
| doc.add_paragraph(info_md.replace("**", "")) |
|
|
| doc.add_paragraph("") |
| _add_dimensi_table_to_docx(doc, rows) |
|
|
| doc.add_page_break() |
| doc.add_heading("Lampiran Tabel", level=1) |
| _df_to_docx_table(doc, out_prov, "Tabel Provinsi (dari Indeks_final Kab/Kota)", max_rows=60) |
| doc.add_paragraph("") |
| _df_to_docx_table(doc, out_kab_valid, f"Tabel Kab/Kota n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)", max_rows=80) |
| doc.add_paragraph("") |
| _df_to_docx_table(doc, out_kab_adjusted, f"Tabel Kab/Kota n<{MIN_RESPONDEN_SLOVIN} (disesuaikan)", max_rows=80) |
| doc.add_paragraph("") |
| _df_to_docx_table(doc, out_verif, "Verifikasi Kondisi Sampel", max_rows=120) |
|
|
| doc.add_paragraph("") |
| doc.add_heading("Lampiran Demografi", level=1) |
| _df_to_docx_table(doc, demog_jk if isinstance(demog_jk, pd.DataFrame) else pd.DataFrame(), "Demografi — Jenis Kelamin (mean index_0_100)", max_rows=60) |
| doc.add_paragraph("") |
| _df_to_docx_table(doc, demog_usia if isinstance(demog_usia, pd.DataFrame) else pd.DataFrame(), "Demografi — Kelompok Usia (mean index_0_100)", max_rows=60) |
| doc.add_paragraph("") |
| _df_to_docx_table(doc, demog_pend if isinstance(demog_pend, pd.DataFrame) else pd.DataFrame(), "Demografi — Tingkat Pendidikan (mean index_0_100)", max_rows=60) |
|
|
| fn = f"LAPORAN_TKM_{_now_tag()}_{provinsi.replace(' ', '_')}_{kabkota.replace(' ', '_')}.docx" |
| fn = fn.replace("/", "_").replace("\\", "_") |
| out_path = str(Path.cwd() / fn) |
| doc.save(out_path) |
| return out_path |
|
|
| |
| |
| |
| with gr.Blocks(title="Dashboard TKM (MSI) — MinMax Global per Fase + Adjusted by Sample") as demo: |
| gr.Markdown( |
| f""" |
| # Dashboard Indeks TKM (MSI) — NORMALISASI MIN–MAX DATA (GLOBAL PER FASE) |
| |
| **Kategori Indeks FINAL (0–100) — RULE:** |
| - < 50 : sangat rendah |
| - 51 – < 65 : rendah |
| - 66 – < 80 : sedang |
| - 81 – < 90 : tinggi |
| - >= 91 : sangat tinggi |
| |
| **Exclusion (STRICT)** |
| - Baris awal: {excl_stats['baris_awal']} |
| - Terhapus: {excl_stats['terhapus_total']} (prov-drop={excl_stats['terhapus_prov_drop']}, kab/kota-drop={excl_stats['terhapus_kabkota_drop']}) |
| - Diolah: {excl_stats['diolah']} |
| |
| **Normalisasi min–max global per fase (MSI -> 0–100)** |
| - Pra : min={MIN_PRA:.6f} | max={MAX_PRA:.6f} |
| - Saat : min={MIN_SAAT:.6f} | max={MAX_SAAT:.6f} |
| - Pasca: min={MIN_PAS:.6f} | max={MAX_PAS:.6f} |
| """ |
| ) |
|
|
| with gr.Row(): |
| dd_prov = gr.Dropdown(label="Provinsi", choices=get_prov_choices(), value="(Semua)") |
| dd_kab = gr.Dropdown(label="Kab/Kota", choices=["(Semua)"], value="(Semua)") |
|
|
| with gr.Row(): |
| run_btn = gr.Button("Jalankan", variant="primary") |
| info_md = gr.Markdown("") |
|
|
| hero_md = gr.Markdown("## Ringkasan Eksekutif\nPilih Provinsi lalu klik Jalankan.") |
| state_records = gr.State([]) |
|
|
| with gr.Row(): |
| btn_xlsx = gr.Button("⬇️ Download Excel", variant="secondary") |
| file_xlsx = gr.File(label="File Excel", interactive=False) |
| btn_docx = gr.Button("⬇️ Download Laporan Word (LLM)", variant="secondary") |
| file_docx = gr.File(label="File Word", interactive=False) |
|
|
| with gr.Accordion(f"🟢 Kab/Kota n≥{MIN_RESPONDEN_SLOVIN} (tanpa penyesuaian)", open=True): |
| out_kab_valid = gr.DataFrame(interactive=False) |
|
|
| with gr.Accordion("🔵 Skor Provinsi (dari Indeks_final kab/kota)", open=False): |
| out_prov = gr.DataFrame(interactive=False) |
|
|
| with gr.Accordion("🟠 Analisis Demografi (mean index_0_100)", open=False): |
| dimensi = gr.Radio( |
| label="Pilih dimensi demografi", |
| choices=["Jenis Kelamin", "Kelompok Usia", "Tingkat Pendidikan"], |
| value="Jenis Kelamin", |
| ) |
| demog_btn = gr.Button("Tampilkan", variant="secondary") |
| demog_plot = gr.Plot() |
| demog_text = gr.Markdown("") |
|
|
| with gr.Accordion(f"🟡 Kab/Kota n<{MIN_RESPONDEN_SLOVIN} (DISESUAIKAN)", open=False): |
| out_kab_adjusted = gr.DataFrame(interactive=False) |
|
|
| with gr.Accordion("⚪ Verifikasi Kondisi Sampel (faktor penyesuaian)", open=False): |
| out_verif = gr.DataFrame(interactive=False) |
|
|
| |
| detail_df = gr.DataFrame(interactive=False, visible=False) |
| detail_plot = gr.Plot(visible=False) |
|
|
| dd_prov.change(fn=update_kab_dropdown, inputs=dd_prov, outputs=dd_kab) |
|
|
| run_btn.click( |
| fn=compute_outputs, |
| inputs=[dd_prov, dd_kab], |
| outputs=[ |
| hero_md, info_md, |
| out_kab_valid, out_prov, out_kab_adjusted, out_verif, |
| state_records, |
| detail_df, detail_plot, |
| detail_df, detail_plot, |
| demog_plot, demog_text |
| ], |
| ) |
|
|
| dd_kab.change( |
| fn=compute_outputs, |
| inputs=[dd_prov, dd_kab], |
| outputs=[ |
| hero_md, info_md, |
| out_kab_valid, out_prov, out_kab_adjusted, out_verif, |
| state_records, |
| detail_df, detail_plot, |
| detail_df, detail_plot, |
| demog_plot, demog_text |
| ], |
| ) |
|
|
| demog_btn.click( |
| fn=demog_run, |
| inputs=[state_records, dimensi], |
| outputs=[demog_plot, demog_text], |
| ) |
|
|
| btn_xlsx.click(fn=export_excel, inputs=[dd_prov, dd_kab], outputs=[file_xlsx]) |
| btn_docx.click(fn=export_word_report, inputs=[dd_prov, dd_kab], outputs=[file_docx]) |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| demo.launch() |