irhamni commited on
Commit
ee45588
Β·
verified Β·
1 Parent(s): 868b61f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +277 -254
app.py CHANGED
@@ -1,48 +1,83 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- IPLM 2025 β€” FINAL (NO UPLOAD) β€” FULL REWRITE (NO RINGKAS)
4
-
5
- βœ… Jenis tampil: sekolah, umum, khusus (khusus ditampilkan sebagai jenis)
6
- βœ… Indeks dasar per entitas: Yeo-Johnson + MinMax nasional per indikator
7
-
8
- βœ… UPDATE UTAMA (REQUEST):
9
- Penyesuaian berbasis target sampel **33.88%** (bukan 68%) untuk setiap jenis perpustakaan:
10
- TARGET_RATIO = 0.3388
11
- faktor_penyesuaian = min(n_terkumpul / target_33_88, 1.0)
12
-
13
- βœ… AGREGAT WILAYAH (KESELURUHAN) β€” FIX UTAMA:
14
- Semua kolom β€œkeseluruhan” wilayah WAJIB diambil dari rata-rata 3 jenis
15
- (sekolah + umum + khusus) Γ· 3 (missing=0, tetap Γ·3)
16
- -> termasuk Indeks_Dasar_Agregat_0_100 dan Indeks_Final_Wilayah_0_100
17
-
18
- βœ… Agregat Wilayah Γ— Jenis:
19
- Indeks_Final_Agregat_0_100 = Indeks_Dasar_Agregat_0_100 Γ— faktor_penyesuaian_jenis
20
- (faktor per jenis berbasis target 33.88%)
21
-
22
- βœ… Ringkasan (Jenis + Keseluruhan) selalu 4 baris: sekolah, umum, khusus, keseluruhan
23
- βœ… Keseluruhan ringkasan = (final_sekolah+final_umum+final_khusus)/3 (missing=0, tetap Γ·3)
24
-
25
- βœ… Detail entitas: Indeks_Final_0_100 menempel dari Agregat Wilayah (Keseluruhan) (bukan per-row)
26
- βœ… Bell curve per JENIS berbasis skor kinerja per entitas (row-level)
27
-
28
- βœ… METODE PENILAIAN KINERJA (REKOMENDASI UTAMA):
29
- - Tetap tampilkan skor absolut: Indeks_Final_... (disesuaikan target 33.88%)
30
- - Tambahkan skor kinerja relatif yang stabil & audit-friendly:
31
- 1) Score_Kinerja_Percentile_0_100 (0–100) ← utama
32
- 2) Score_Kinerja_RobustZ_0_100 (0–100; 50+10*z_robust) ← opsional, tahan outlier
33
-
34
- βœ… Download (tanpa upload box)
35
- βœ… Download Data Mentah (.xlsx) = RAW hasil filter (bukan agregat)
36
-
37
- FIX DISPLAY:
38
- βœ… β€œnull/NaN” untuk target/pop/coverage jenis -> dibuat 0 agar tidak tampil null
39
- βœ… Verifikasi target 33.88% (tanpa koma untuk integer) -> target/pop/gap dibulatkan integer
40
- βœ… TABEL "Agregat Wilayah Γ— Jenis" (UI) hanya sampai kolom Indeks_Dasar_Agregat_0_100
41
-
42
- πŸ”₯ FIX PENTING (BUG YANG KAMU KENA):
43
- βœ… Dashboard harus menampilkan Score_Kinerja_WilayahTotal_Percentile_0_100 yang dihitung GLOBAL (nasional),
44
- bukan percentile dari data yang sudah terfilter (yang bisa jadi 100 kalau cuma 1 wilayah).
45
- -> Implementasi: hitung agg_total_global (sesuai mode kewenangan) lalu merge ke agg_total filter.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  """
47
 
48
  import os
@@ -86,7 +121,7 @@ POP_KHUSUS = os.getenv("POP_KHUSUS", "Data_populasi_perp_khusus.xlsx")
86
  W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
87
  W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
88
 
89
- # βœ… target sampel 33.88%
90
  TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388"))
91
 
92
  # kinerja relatif
@@ -143,6 +178,7 @@ def coerce_num(val):
143
  t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
144
  t = re.sub(r"[^0-9,.\-]", "", t)
145
 
 
146
  if t.count(".") > 1 and t.count(",") == 1:
147
  t = t.replace(".", "").replace(",", ".")
148
  elif t.count(",") > 1 and t.count(".") == 1:
@@ -221,6 +257,10 @@ def safe_div(num, den):
221
  return float(num) / float(den)
222
 
223
  def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
 
 
 
 
224
  if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
225
  return 1.0
226
  if n_total is None or pd.isna(n_total) or float(n_total) < 0:
@@ -234,10 +274,9 @@ def add_kinerja_scores(
234
  prefix: str = "Score_Kinerja"
235
  ) -> pd.DataFrame:
236
  """
237
- Tambah:
238
- - {prefix}_Percentile_0_100
239
- - {prefix}_RobustZ_0_100 (50+10*z_robust, clip 0..100)
240
- Grouping untuk fairness: misal per Jenis.
241
  """
242
  if df is None or df.empty or score_col not in df.columns:
243
  return df
@@ -253,6 +292,7 @@ def add_kinerja_scores(
253
  )
254
  else:
255
  out[f"{prefix}_Percentile_0_100"] = out[score_col].rank(pct=True, method="average") * 100.0
 
256
  out[f"{prefix}_Percentile_0_100"] = (
257
  pd.to_numeric(out[f"{prefix}_Percentile_0_100"], errors="coerce")
258
  .fillna(0.0).clip(0, 100).round(2)
@@ -265,8 +305,10 @@ def add_kinerja_scores(
265
  v = v.replace([np.inf, -np.inf], np.nan)
266
  if v.dropna().shape[0] < 2:
267
  return pd.Series(50.0, index=v.index)
 
268
  med = float(np.nanmedian(v.values))
269
  mad = float(np.nanmedian(np.abs(v.values - med)))
 
270
  if (not np.isfinite(mad)) or mad <= 1e-12:
271
  sd = float(np.nanstd(v.values, ddof=1))
272
  if (not np.isfinite(sd)) or sd <= 1e-12:
@@ -274,6 +316,7 @@ def add_kinerja_scores(
274
  z = (v - med) / sd
275
  else:
276
  z = (v - med) / (1.4826 * mad)
 
277
  score = 50.0 + 10.0 * z
278
  return score.clip(0, 100).fillna(50.0)
279
 
@@ -316,6 +359,7 @@ pengelolaan_cols = [
316
  ]
317
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
318
 
 
319
  alias_map_raw = {
320
  "j_judul_koleksi_tercetak": "JudulTercetak",
321
  "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
@@ -347,7 +391,7 @@ alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
347
 
348
 
349
  # ============================================================
350
- # 4) PIPELINE NASIONAL (ENTITAS)
351
  # ============================================================
352
 
353
  def _mean_norm_cols(row, cols):
@@ -362,8 +406,17 @@ def _mean_norm_cols(row, cols):
362
  return float(np.mean(vals)) if vals else 0.0
363
 
364
  def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
 
 
 
 
 
 
 
 
365
  if df_src is None or df_src.empty:
366
  return df_src
 
367
  df = df_src.copy()
368
 
369
  # rename indikator
@@ -428,6 +481,15 @@ _CACHE = {
428
  }
429
 
430
  def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
431
  df = pd.read_excel(path_xlsx)
432
  if df is None or df.empty:
433
  return pd.DataFrame()
@@ -483,6 +545,14 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
483
  return pop
484
 
485
  def load_default_files(force=False):
 
 
 
 
 
 
 
 
486
  key = (
487
  DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS,
488
  _mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS)
@@ -518,6 +588,7 @@ def load_default_files(force=False):
518
  _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
519
  return None, None, None, None, None, {}, info
520
 
 
521
  val_map_jenis = {
522
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
523
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
@@ -531,7 +602,7 @@ def load_default_files(force=False):
531
  df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
532
  df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
533
 
534
- # Dedup lebih aman
535
  if nama_col and nama_col in df_raw.columns:
536
  kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
537
  else:
@@ -616,6 +687,12 @@ def build_faktor_wilayah_jenis(
616
  pop_khusus: pd.DataFrame,
617
  kew_value: str
618
  ):
 
 
 
 
 
 
619
  if df_filtered is None or df_filtered.empty:
620
  return pd.DataFrame()
621
 
@@ -627,7 +704,7 @@ def build_faktor_wilayah_jenis(
627
 
628
  jenis_list = ["sekolah", "umum", "khusus"]
629
 
630
- # tentukan level
631
  if "PROV" in kew_norm:
632
  key_col, label_col, label_name, mode = "prov_key", "PROV_DISP", "Provinsi", "PROV"
633
  base_pop = pop_prov.copy() if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame()
@@ -648,6 +725,7 @@ def build_faktor_wilayah_jenis(
648
  on="_tmp"
649
  ).drop(columns="_tmp")
650
 
 
651
  cnt = (
652
  df.groupby([key_col, label_col, "_dataset"], dropna=False)
653
  .size()
@@ -662,7 +740,7 @@ def build_faktor_wilayah_jenis(
662
  base_n["target_total_33_88_jenis"] = 0.0
663
  base_n["pop_total_jenis"] = 0.0
664
 
665
- # SEKOLAH + UMUM dari POP_KAB / POP_PROV
666
  if not base_pop.empty:
667
  if mode == "KAB":
668
  pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah", 0), errors="coerce").fillna(0.0)
@@ -672,6 +750,8 @@ def build_faktor_wilayah_jenis(
672
  tgt_umum = pop_umum * float(TARGET_RATIO)
673
  else:
674
  sma = pd.to_numeric(base_pop.get("sma ", base_pop.get("sma", 0)), errors="coerce").fillna(0.0)
 
 
675
  smk = pd.to_numeric(base_pop.get("smk", 0), errors="coerce").fillna(0.0)
676
  slb = pd.to_numeric(base_pop.get("slb", 0), errors="coerce").fillna(0.0)
677
 
@@ -716,6 +796,7 @@ def build_faktor_wilayah_jenis(
716
  m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0)
717
  base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO)
718
 
 
719
  base_n["faktor_penyesuaian_jenis"] = [
720
  faktor_penyesuaian_total(n, t)
721
  for n, t in zip(
@@ -740,12 +821,12 @@ def build_faktor_wilayah_jenis(
740
  )
741
  ]
742
 
743
- # display
744
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
745
- base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).round(0).astype(int)
746
- base_n["coverage_jenis_%"] = pd.to_numeric(base_n["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
747
  base_n["faktor_penyesuaian_jenis"] = pd.to_numeric(base_n["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
748
- base_n["gap_target33_88_jenis"] = pd.to_numeric(base_n["gap_target33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
749
 
750
  return base_n
751
 
@@ -755,6 +836,16 @@ def build_faktor_wilayah_jenis(
755
  # ============================================================
756
 
757
  def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
 
 
 
 
 
 
 
 
 
 
758
  if df_filtered is None or df_filtered.empty:
759
  return pd.DataFrame()
760
 
@@ -814,14 +905,13 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
814
 
815
  keep = ["group_key", label_name, "Jenis",
816
  "faktor_penyesuaian_jenis", "target_total_33_88_jenis", "pop_total_jenis",
817
- "coverage_jenis_%", "gap_target33_88_jenis"]
818
  fw = fw[[c for c in keep if c in fw.columns]].copy()
819
 
820
  agg = agg.merge(fw, on=["group_key", label_name, "Jenis"], how="left")
821
-
822
  agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
823
 
824
- for c in ["target_total_33_88_jenis","pop_total_jenis","gap_target33_88_jenis"]:
825
  if c in agg.columns:
826
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0).round(0).astype(int)
827
 
@@ -834,7 +924,7 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
834
  * pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
835
  )
836
 
837
- # Kinerja relatif per jenis (dibandingkan sesama jenis)
838
  agg = add_kinerja_scores(
839
  agg,
840
  score_col="Indeks_Final_Agregat_0_100",
@@ -855,7 +945,6 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
855
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
856
 
857
  agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
858
-
859
  return agg
860
 
861
 
@@ -864,6 +953,11 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
864
  # ============================================================
865
 
866
  def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
 
 
 
 
 
867
  if agg_jenis is None or agg_jenis.empty:
868
  return pd.DataFrame()
869
 
@@ -910,7 +1004,7 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
910
  Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
911
  )
912
 
913
- # Tempel info Pop/Target/N per jenis + total
914
  if faktor_wilayah_jenis is not None and not faktor_wilayah_jenis.empty:
915
  fw = faktor_wilayah_jenis.copy()
916
  fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
@@ -959,15 +1053,8 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
959
  )
960
  out["coverage_target33_88_all_%"] = pd.to_numeric(out["coverage_target33_88_all_%"], errors="coerce").fillna(0.0).round(2)
961
 
962
- # Kinerja relatif keseluruhan (dibandingkan semua wilayah pada tabel ini)
963
- out = add_kinerja_scores(
964
- out,
965
- score_col="Indeks_Final_Wilayah_0_100",
966
- group_cols=None,
967
- prefix="Score_Kinerja_WilayahTotal"
968
- )
969
-
970
- # rounding index
971
  for c in [
972
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
973
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
@@ -980,7 +1067,6 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
980
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
981
 
982
  out["n_total"] = pd.to_numeric(out["n_total"], errors="coerce").fillna(0).round(0).astype(int)
983
-
984
  return out
985
 
986
 
@@ -990,13 +1076,20 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
990
 
991
  _GLOBAL_SCORE_CACHE = {}
992
 
993
- def compute_global_score_table(df_all, pop_kab, pop_prov, pop_khusus, kew_value: str):
994
  """
995
- Buat agg_total GLOBAL (nasional) sesuai mode kewenangan (KAB/KOTA vs PROVINSI),
996
- lalu percentilenya dihitung di sini dan dibawa ke hasil filter via merge.
 
 
 
 
 
 
997
  """
 
998
  cache_key = (
999
- str(kew_value or "").upper(),
1000
  _mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS),
1001
  float(TARGET_RATIO), float(W_KEPATUHAN), float(W_KINERJA),
1002
  bool(USE_PERCENTILE), bool(USE_ROBUST_Z)
@@ -1005,21 +1098,49 @@ def compute_global_score_table(df_all, pop_kab, pop_prov, pop_khusus, kew_value:
1005
  return _GLOBAL_SCORE_CACHE[cache_key]
1006
 
1007
  if df_all is None or df_all.empty:
1008
- return pd.DataFrame()
 
1009
 
1010
- faktor_wilayah_jenis = build_faktor_wilayah_jenis(df_all, pop_kab, pop_prov, pop_khusus, kew_value)
1011
- agg_jenis_full = build_agg_wilayah_jenis(df_all, faktor_wilayah_jenis, kew_value)
1012
- agg_total_global = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_value)
 
 
 
1013
 
1014
- # keep minimal columns for merging
1015
- keep = ["group_key"]
1016
- for c in ["Score_Kinerja_WilayahTotal_Percentile_0_100", "Score_Kinerja_WilayahTotal_RobustZ_0_100"]:
1017
- if c in agg_total_global.columns:
1018
- keep.append(c)
1019
 
1020
- out = agg_total_global[keep].copy() if (agg_total_global is not None and not agg_total_global.empty) else pd.DataFrame()
1021
- _GLOBAL_SCORE_CACHE[cache_key] = out
1022
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1023
 
1024
 
1025
  # ============================================================
@@ -1198,7 +1319,7 @@ def attach_final_to_detail(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, m
1198
 
1199
 
1200
  # ============================================================
1201
- # 11) VERIFIKASI PER JENIS (TARGET 33.88%, TANPA KOMA UNTUK INTEGER)
1202
  # ============================================================
1203
 
1204
  def build_verif_jenis(faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
@@ -1231,7 +1352,7 @@ def build_verif_jenis(faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
1231
 
1232
 
1233
  # ============================================================
1234
- # 12) BELL CURVE
1235
  # ============================================================
1236
 
1237
  def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str | None = None, hover_cols: list | None = None, min_points: int = 2):
@@ -1260,77 +1381,25 @@ def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str |
1260
 
1261
  if len(d) < min_points:
1262
  x_single = float(pd.to_numeric(d[xcol], errors="coerce").iloc[0])
1263
- hovertext = None
1264
- if label_col and label_col in d.columns:
1265
- hovertext = [f"{d[label_col].iloc[0]}<br>{xcol}: {x_single:.2f}"]
1266
- fig.add_trace(go.Scatter(
1267
- x=[x_single], y=[0], mode="markers", name="Data", marker=dict(size=10),
1268
- hovertext=hovertext,
1269
- hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Skor: %{x:.2f}<extra></extra>",
1270
- showlegend=False,
1271
- ))
1272
  fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top")
1273
- fig.add_annotation(text="Data hanya 1 titik (kurva normal tidak dibuat).", x=0.5, y=0.08, xref="paper", yref="paper", showarrow=False)
1274
  fig.update_xaxes(range=[0, 100])
1275
  fig.update_yaxes(rangemode="tozero")
1276
  return fig
1277
 
1278
  x = pd.to_numeric(d[xcol], errors="coerce").astype(float).values
1279
  x = x[np.isfinite(x)]
1280
- if len(x) < 2:
1281
- fig.add_annotation(text="Data tidak cukup untuk kurva.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
1282
- fig.update_xaxes(range=[0, 100])
1283
- fig.update_yaxes(rangemode="tozero")
1284
- return fig
1285
-
1286
  mu = float(np.mean(x))
1287
- sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 0.0
1288
- if not np.isfinite(sigma) or sigma <= 1e-6:
1289
- sigma = max(float(np.std(x, ddof=0)), 1e-3)
1290
 
1291
  xmin = max(0.0, float(np.min(x)) - 5.0)
1292
  xmax = min(100.0, float(np.max(x)) + 5.0)
1293
- if xmax - xmin < 1e-6:
1294
- xmin = max(0.0, mu - 1.0)
1295
- xmax = min(100.0, mu + 1.0)
1296
-
1297
  xs = np.linspace(xmin, xmax, 250)
1298
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
1299
 
1300
- fig.add_trace(go.Scatter(
1301
- x=xs, y=pdf, mode="lines", name="Kurva Normal (fit)",
1302
- hovertemplate="x=%{x:.2f}<br>pdf=%{y:.4f}<extra></extra>"
1303
- ))
1304
-
1305
- hovertext = None
1306
- if label_col and label_col in d.columns:
1307
- hcols = hover_cols or []
1308
- parts = []
1309
- for _, r in d.iterrows():
1310
- try:
1311
- xv = float(pd.to_numeric(r.get(xcol, np.nan), errors="coerce"))
1312
- except Exception:
1313
- xv = np.nan
1314
- s = f"{r[label_col]}"
1315
- s += f"<br>{xcol}: {xv:.2f}" if np.isfinite(xv) else f"<br>{xcol}: NA"
1316
- for c in hcols:
1317
- if c in d.columns and pd.notna(r.get(c, np.nan)):
1318
- v = r[c]
1319
- if isinstance(v, (int, np.integer)):
1320
- s += f"<br>{c}: {int(v)}"
1321
- elif isinstance(v, (float, np.floating)):
1322
- s += f"<br>{c}: {float(v):.3f}"
1323
- else:
1324
- s += f"<br>{c}: {v}"
1325
- parts.append(s)
1326
- hovertext = parts
1327
-
1328
- fig.add_trace(go.Scatter(
1329
- x=x, y=np.zeros_like(x), mode="markers", name="Data", marker=dict(size=8),
1330
- hovertext=hovertext,
1331
- hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Skor: %{x:.2f}<extra></extra>",
1332
- showlegend=False
1333
- ))
1334
 
1335
  q1, q2, q3 = np.percentile(x, [25, 50, 75])
1336
  for xv, lab in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3"), (mu, "Mean")]:
@@ -1342,7 +1411,7 @@ def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str |
1342
 
1343
 
1344
  # ============================================================
1345
- # 13) KPI DASHBOARD (FINAL: skor absolut + percentile GLOBAL)
1346
  # ============================================================
1347
 
1348
  def _safe_first(df, col, default=0.0, where=None):
@@ -1355,23 +1424,21 @@ def _safe_first(df, col, default=0.0, where=None):
1355
  return default
1356
  return float(pd.to_numeric(sub[col], errors="coerce").fillna(default).iloc[0])
1357
 
1358
- def _selected_percentile_from_agg_total(agg_total: pd.DataFrame, kew_value: str):
1359
- if agg_total is None or agg_total.empty:
1360
- return 0.0
1361
- # setelah difilter biasanya hanya 1 wilayah -> ambil baris pertama
1362
- if "Score_Kinerja_WilayahTotal_Percentile_0_100" not in agg_total.columns:
1363
- return 0.0
1364
- return float(pd.to_numeric(agg_total["Score_Kinerja_WilayahTotal_Percentile_0_100"], errors="coerce").fillna(0.0).iloc[0])
1365
-
1366
  def compute_dashboard_kpis(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame):
1367
  final_all = _safe_first(summary_jenis, "Indeks_Final_Disesuaikan_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan"))
1368
  dasar_all = _safe_first(summary_jenis, "Indeks_Dasar_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan"))
1369
- pctl_sel = _selected_percentile_from_agg_total(agg_total, "")
 
 
 
 
 
1370
  return {"final_all": final_all, "dasar_all": dasar_all, "pctl_sel": pctl_sel}
1371
 
1372
  def build_kpi_markdown(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame) -> str:
1373
  if summary_jenis is None or summary_jenis.empty:
1374
  return ""
 
1375
  k = compute_dashboard_kpis(summary_jenis, agg_total)
1376
 
1377
  def fmt(x, nd=2):
@@ -1401,7 +1468,7 @@ def build_kpi_markdown(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame) ->
1401
 
1402
 
1403
  # ============================================================
1404
- # 14) LLM + WORD (OPSIONAL, TIDAK MEMBLOK UI)
1405
  # ============================================================
1406
 
1407
  _HF_CLIENT = None
@@ -1420,54 +1487,19 @@ def get_llm_client():
1420
  _HF_CLIENT = None
1421
  return None
1422
 
1423
- def build_context(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame, verif_total: pd.DataFrame, wilayah: str, kew: str) -> str:
1424
- lines = []
1425
- lines.append(f"Wilayah filter: {wilayah}")
1426
- lines.append(f"Kewenangan: {kew}")
1427
- lines.append(f"Target sampel per jenis: {TARGET_RATIO*100:.2f}%")
1428
-
1429
- if summary_jenis is not None and not summary_jenis.empty:
1430
- lines.append("\nRingkasan (jenis + keseluruhan):")
1431
- for _, r in summary_jenis.iterrows():
1432
- lines.append(
1433
- f"- {r['Jenis']}: pop={int(r.get('Pop_Total_Jenis',0))}, target33_88={int(r.get('Target33_88_Total_Jenis',0))}, "
1434
- f"terkumpul={int(r.get('Terkumpul_Jenis',0))}, coverage={float(r.get('Coverage_Target33_88_Jenis_%',0)):.2f}%, "
1435
- f"dasar={float(r.get('Indeks_Dasar_0_100',0)):.2f}, final={float(r.get('Indeks_Final_Disesuaikan_0_100',0)):.2f}"
1436
- )
1437
-
1438
- if agg_total is not None and not agg_total.empty and "Indeks_Final_Wilayah_0_100" in agg_total.columns:
1439
- label_col = "Kab/Kota" if "Kab/Kota" in agg_total.columns else ("Provinsi" if "Provinsi" in agg_total.columns else None)
1440
- lines.append("\nWilayah terpilih:")
1441
- r = agg_total.iloc[0]
1442
- wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
1443
- pctl = r.get("Score_Kinerja_WilayahTotal_Percentile_0_100", 0.0)
1444
- lines.append(f"- {wl}: Final={float(r['Indeks_Final_Wilayah_0_100']):.2f} | Percentile(Global)={float(pctl):.2f}")
1445
-
1446
- return "\n".join(lines)
1447
-
1448
  def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
1449
- ctx = build_context(summary_jenis, agg_total, verif_total, wilayah, kew)
1450
  client = get_llm_client()
1451
  if client is None or (not USE_LLM):
1452
  return "Analisis otomatis (LLM) tidak digunakan / tidak tersedia."
1453
-
1454
- system_prompt = "Anda adalah analis kebijakan perpustakaan di Indonesia. Tulis analisis ringkas berbasis data."
1455
- user_prompt = f"""
1456
- DATA IPLM (RINGKAS):
1457
-
1458
- {ctx}
1459
-
1460
- Buat analisis 3 paragraf:
1461
- 1) Gambaran umum (skor absolut).
1462
- 2) Kinerja relatif (percentile global) + per jenis.
1463
- 3) Rekomendasi singkat.
1464
- Catatan: target sampel yang digunakan adalah {TARGET_RATIO*100:.2f}% (bukan 68%).
1465
- """
1466
  try:
1467
  resp = client.chat_completion(
1468
  model=LLM_MODEL_NAME,
1469
- messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
1470
- max_tokens=700,
 
 
 
1471
  temperature=0.25,
1472
  top_p=0.9,
1473
  )
@@ -1478,30 +1510,17 @@ Catatan: target sampel yang digunakan adalah {TARGET_RATIO*100:.2f}% (bukan 68%)
1478
 
1479
  def generate_word_report(wilayah, summary_jenis, analysis_text):
1480
  if (not DOCX_AVAILABLE) or (Document is None):
1481
- # fallback: tidak bikin docx
1482
  return None
1483
-
1484
  doc = Document()
1485
  doc.add_heading(f"Laporan IPLM β€” {wilayah}", level=1)
1486
  doc.add_paragraph(f"Target sampel per jenis: {TARGET_RATIO*100:.2f}%")
1487
  doc.add_paragraph("Catatan: Percentile kinerja wilayah yang ditampilkan adalah percentile GLOBAL (nasional), bukan dari hasil filter.")
1488
-
1489
  doc.add_heading("Ringkasan (Jenis + Keseluruhan)", level=2)
1490
-
1491
- show = summary_jenis.copy() if summary_jenis is not None else pd.DataFrame()
1492
- if not show.empty:
1493
- preferred = [
1494
- "Jenis","Jumlah_Wilayah","Total_Perpus",
1495
- "Pop_Total_Jenis","Target33_88_Total_Jenis","Terkumpul_Jenis","Coverage_Target33_88_Jenis_%",
1496
- "Indeks_Dasar_0_100","Indeks_Final_Disesuaikan_0_100","Penyesuaian_Poin"
1497
- ]
1498
- show = show[[c for c in preferred if c in show.columns]]
1499
-
1500
  table = doc.add_table(rows=1, cols=len(show.columns))
1501
- hdr = table.rows[0].cells
1502
  for i, c in enumerate(show.columns):
1503
- hdr[i].text = str(c)
1504
-
1505
  for _, row in show.iterrows():
1506
  cells = table.add_row().cells
1507
  for i, c in enumerate(show.columns):
@@ -1514,12 +1533,10 @@ def generate_word_report(wilayah, summary_jenis, analysis_text):
1514
  cells[i].text = str(int(v))
1515
  else:
1516
  cells[i].text = str(v)
1517
-
1518
  doc.add_heading("Analisis (opsional)", level=2)
1519
  for p in (analysis_text or "").split("\n"):
1520
  if p.strip():
1521
  doc.add_paragraph(p.strip())
1522
-
1523
  outpath = tempfile.mktemp(suffix=".docx")
1524
  doc.save(outpath)
1525
  return outpath
@@ -1546,7 +1563,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1546
  return _empty_outputs("⚠️ Data belum ter-load. Pastikan file tersedia di repo/server.")
1547
 
1548
  # =========================================================
1549
- # 1) FILTER df_all (entitas)
1550
  # =========================================================
1551
  df = df_all.copy()
1552
  if prov_value and prov_value != "(Semua)":
@@ -1560,32 +1577,43 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1560
  return _empty_outputs("Tidak ada data untuk filter ini.")
1561
 
1562
  # =========================================================
1563
- # 2) PIPELINE FILTER
1564
  # =========================================================
1565
- faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_value or "(Semua)")
1566
- agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_value or "(Semua)")
1567
- agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_value or "(Semua)")
 
1568
 
1569
  # =========================================================
1570
- # 3) FIX PERCENTILE: merge GLOBAL score table
1571
- # (ini yang bikin Pangkal Pinang jadi 99-an, bukan 100)
1572
  # =========================================================
1573
- global_scores = compute_global_score_table(df_all, pop_kab, pop_prov, pop_khusus, kew_value or "(Semua)")
1574
- if global_scores is not None and (not global_scores.empty) and (agg_total is not None) and (not agg_total.empty):
1575
- agg_total = agg_total.merge(global_scores, on="group_key", how="left")
1576
 
 
 
 
 
 
 
 
 
 
 
 
 
1577
  summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total)
1578
- verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_value or "(Semua)")
1579
- detail_view = attach_final_to_detail(df, agg_total, meta, kew_value or "(Semua)")
1580
 
1581
  # =========================================================
1582
- # 4) agg_jenis view (UI hanya sampai indeks dasar)
1583
  # =========================================================
1584
  if agg_jenis_full is None or agg_jenis_full.empty:
1585
  agg_jenis_view = agg_jenis_full
1586
  else:
1587
- kew_norm = str(kew_value or "").upper()
1588
- label_name = "Kab/Kota" if ("KAB" in kew_norm or "KOTA" in kew_norm) else ("Provinsi" if "PROV" in kew_norm else "Kab/Kota")
1589
  cols_upto = [
1590
  "group_key",
1591
  label_name,
@@ -1599,7 +1627,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1599
  agg_jenis_view = agg_jenis_full[cols_upto].copy()
1600
 
1601
  # =========================================================
1602
- # 5) FILTER RAW DOWNLOAD
1603
  # =========================================================
1604
  raw = df_raw.copy()
1605
  if prov_value and prov_value != "(Semua)":
@@ -1610,32 +1638,28 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1610
  raw = raw[raw["KEW_NORM"] == kew_value]
1611
 
1612
  # =========================================================
1613
- # 6) Bell curve per jenis (entitas)
1614
  # =========================================================
1615
  if detail_view is None or detail_view.empty:
1616
- fig_sekolah = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Sekolah", min_points=2)
1617
  fig_umum = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Umum", min_points=2)
 
1618
  fig_khusus = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Khusus", min_points=2)
1619
  else:
1620
  xcol_ent = "Score_Kinerja_Entitas_Percentile_0_100" if "Score_Kinerja_Entitas_Percentile_0_100" in detail_view.columns else "Indeks_Dasar_0_100"
1621
- label_col_e = "nm_perpustakaan" if "nm_perpustakaan" in detail_view.columns else None
1622
- hover_cols_e = [c for c in ["Provinsi", "Kab/Kota", "KEW_NORM", "Jenis", "Indeks_Dasar_0_100", "Indeks_Final_0_100", xcol_ent] if c in detail_view.columns]
1623
-
1624
- def _fig_jenis_ent(jenis_key: str, judul: str):
1625
- d = detail_view[detail_view["Jenis"].astype(str).str.lower() == jenis_key].copy()
1626
- return _make_bell_curve(d, xcol=xcol_ent, title=judul, label_col=label_col_e, hover_cols=hover_cols_e, min_points=2)
1627
-
1628
- fig_sekolah = _fig_jenis_ent("sekolah", f"Bell Curve β€” Jenis: Sekolah (Skor: {xcol_ent})")
1629
- fig_umum = _fig_jenis_ent("umum", f"Bell Curve β€” Jenis: Umum (Skor: {xcol_ent})")
1630
- fig_khusus = _fig_jenis_ent("khusus", f"Bell Curve β€” Jenis: Khusus (Skor: {xcol_ent})")
1631
 
1632
  # =========================================================
1633
- # 7) KPI (Sekarang sudah GLOBAL percentile)
1634
  # =========================================================
1635
  kpi_md = build_kpi_markdown(summary_jenis, agg_total)
1636
 
1637
  # =========================================================
1638
- # 8) Export
1639
  # =========================================================
1640
  tmpdir = tempfile.mkdtemp()
1641
  prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
@@ -1656,7 +1680,6 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1656
 
1657
  wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
1658
  analysis_text = generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah_txt, kew_value or "(Semua)")
1659
-
1660
  word_path = generate_word_report(wilayah_txt, summary_jenis, analysis_text)
1661
 
1662
  msg = (
@@ -1732,12 +1755,12 @@ with gr.Blocks() as demo:
1732
  βœ… Dashboard KPI menampilkan juga:
1733
  - `Score_Kinerja_WilayahTotal_Percentile_0_100` (**GLOBAL nasional**; bukan hasil filter)
1734
 
1735
- **Kinerja Relatif (untuk evaluasi kinerja):**
1736
- - `Score_Kinerja_*_Percentile_0_100` (utama, stabil tanpa asumsi normal)
1737
- - `Score_Kinerja_*_RobustZ_0_100` (opsional, tahan outlier)
1738
-
1739
  **Skor Absolut (untuk akuntabilitas):**
1740
  - `Indeks_Final_*` (sudah disesuaikan target 33.88%)
 
 
 
 
1741
  """)
1742
 
1743
  state_df = gr.State(None)
@@ -1764,7 +1787,7 @@ with gr.Blocks() as demo:
1764
  gr.Markdown("## Ringkasan (Jenis + Keseluruhan) β€” Pop/Target33.88/Terkumpul/Coverage + Penyesuaian")
1765
  out_summary = gr.DataFrame(interactive=False)
1766
 
1767
- gr.Markdown("## Agregat Wilayah (Keseluruhan) β€” FIX: avg3 dari 3 jenis + Skor Kinerja Relatif (GLOBAL Percentile)")
1768
  out_agg_total = gr.DataFrame(interactive=False)
1769
 
1770
  gr.Markdown("## Agregat Wilayah Γ— Jenis β€” (ditampilkan sampai Indeks_Dasar_Agregat_0_100)")
@@ -1776,7 +1799,7 @@ with gr.Blocks() as demo:
1776
  gr.Markdown("## Kecukupan Sampel 33.88% (tanpa angka koma untuk integer)")
1777
  out_verif = gr.DataFrame(interactive=False)
1778
 
1779
- gr.Markdown("## Bell Curve β€” per Jenis (berbasis Score_Kinerja_Entitas_Percentile_0_100 jika tersedia)")
1780
  gr.Markdown("### Perpustakaan Umum")
1781
  bell_umum = gr.Plot(scale=1)
1782
 
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ IPLM 2025 β€” Final (Target Sampel 33.88% per Jenis) + Kinerja Relatif (Percentile)
4
+
5
+ ───────────────────────────────────────────────────────────────────────────────
6
+ DOKUMENTASI / KONSEP (DIPERTAHANKAN + DIPERJELAS)
7
+
8
+ A. Skor ABSOLUT (untuk akuntabilitas)
9
+ ------------------------------------
10
+ 1) Indeks_Dasar_0_100
11
+ - Dihitung pada LEVEL ENTITAS (baris perpustakaan) dari indikator:
12
+ Yeo-Johnson transform (per indikator) β†’ MinMax global (0–1) β†’ sub-indeks β†’ dimensi β†’ indeks.
13
+ - Rumus:
14
+ dim_kepatuhan = mean(sub_koleksi, sub_sdm)
15
+ dim_kinerja = mean(sub_pelayanan, sub_pengelolaan)
16
+ Indeks_Dasar_0_100 = 100 * (W_KEPATUHAN*dim_kepatuhan + W_KINERJA*dim_kinerja)
17
+
18
+ 2) Penyesuaian kecukupan sampel berbasis TARGET 33.88% (per JENIS)
19
+ - TARGET_RATIO = 0.3388
20
+ - Untuk setiap wilayah Γ— jenis:
21
+ pop_total_jenis = populasi perpustakaan jenis tsb (dari tabel POP)
22
+ target_total_33_88_jenis = pop_total_jenis * TARGET_RATIO
23
+ n_jenis = jumlah entitas (baris) terkumpul pada wilayah Γ— jenis
24
+ faktor_penyesuaian_jenis = min(n_jenis / target_total_33_88_jenis, 1.0)
25
+ - Indeks_Final_Agregat_0_100 (wilayahΓ—jenis):
26
+ Indeks_Final_Agregat_0_100 = Indeks_Dasar_Agregat_0_100 * faktor_penyesuaian_jenis
27
+
28
+ 3) AGREGAT WILAYAH (KESELURUHAN) = rata-rata 3 jenis (FIX)
29
+ - Keseluruhan wajib avg3:
30
+ Indeks_Dasar_Agregat_0_100(keseluruhan) = (dasar_sekolah + dasar_umum + dasar_khusus) / 3
31
+ Indeks_Final_Wilayah_0_100(keseluruhan) = (final_sekolah + final_umum + final_khusus) / 3
32
+ - Missing jenis dianggap 0 tetapi tetap dibagi 3 (sesuai requirement).
33
+
34
+ B. Skor KINERJA RELATIF (untuk benchmarking, bukan pengganti skor absolut)
35
+ ---------------------------------------------------------------------------
36
+ Kolom utama: Score_Kinerja_WilayahTotal_Percentile_0_100
37
+ Definisi: posisi relatif suatu wilayah dibanding wilayah lain secara NASIONAL.
38
+
39
+ Karakteristik utama percentile:
40
+ β€’ Skala 0–100
41
+ β€’ Tidak bergantung pada asumsi distribusi normal
42
+ β€’ Stabil terhadap nilai ekstrem (karena berbasis peringkat)
43
+ β€’ Mudah diinterpretasikan sebagai posisi peringkat
44
+
45
+ RUMUS / IMPLEMENTASI (yang benar dan sesuai FIX bug):
46
+ 1) Tentukan "universe" perhitungan GLOBAL sesuai mode kewenangan:
47
+ - Jika kewenangan = "KAB/KOTA": universe = semua kab/kota (nasional) yang KEW_NORM == "KAB/KOTA"
48
+ - Jika kewenangan = "PROVINSI": universe = semua provinsi (nasional) yang KEW_NORM == "PROVINSI"
49
+ - Jika "(Semua)": default mengikuti pilihan (atau semua yang relevan) β†’ pada UI kita pakai nilai dropdown.
50
+
51
+ 2) Hitung dulu agg_total_global untuk universe tersebut (tanpa filter prov/kab):
52
+ - Dari df_all (nasional) β†’ faktor_wilayah_jenis β†’ agg_jenis_global β†’ agg_total_global
53
+
54
+ 3) Hitung percentile GLOBAL dari Indeks_Final_Wilayah_0_100 pada agg_total_global:
55
+ - Secara konsep:
56
+ Percentile(w) = 100 * (rank_w / N)
57
+ - Implementasi pandas yang audit-friendly:
58
+ rank(pct=True, method="average") * 100
59
+
60
+ 4) Tempelkan nilai percentile global itu ke hasil filter (agg_total yang biasanya hanya 1 baris):
61
+ - WAJIB pakai mapping by group_key (bukan merge yang bikin kolom _x/_y)
62
+ - Kenapa? agar tidak terjadi:
63
+ β€’ percentile jadi 100 karena dihitung dari 1 baris filter
64
+ β€’ atau KPI membaca kolom yang salah akibat suffix merge
65
+
66
+ C. Bug yang kamu laporkan (0.00 / 100 semua)
67
+ --------------------------------------------
68
+ Kasus 1: "100 semua" untuk 1 wilayah yang difilter β†’ terjadi jika percentile dihitung dari data filter.
69
+ Solusi: percentile selalu dihitung di agg_total_global lalu ditempel.
70
+
71
+ Kasus 2: KPI jadi 0.00 (padahal harus 99-an) β†’ terjadi jika merge menghasilkan kolom
72
+ Score_Kinerja_WilayahTotal_Percentile_0_100_x/_y sehingga kolom yang dibaca kosong/NaN.
73
+ Solusi: mapping dengan dict (tidak ada suffix), dan pastikan KPI membaca kolom final.
74
+
75
+ ───────────────────────────────────────────────────────────────────────────────
76
+ KODE DI BAWAH INI SUDAH FIX:
77
+ βœ… Score_Kinerja_WilayahTotal_Percentile_0_100 dihitung GLOBAL (nasional) sesuai kewenangan
78
+ βœ… Ditempel pakai MAP (no _x/_y)
79
+ βœ… KPI selalu baca kolom final yang benar
80
+ βœ… Tetap mempertahankan semua fitur: ringkasan, agregat, verif, detail, bell curve, export
81
  """
82
 
83
  import os
 
121
  W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
122
  W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
123
 
124
+ # βœ… target sampel 33.88% per jenis
125
  TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388"))
126
 
127
  # kinerja relatif
 
178
  t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
179
  t = re.sub(r"[^0-9,.\-]", "", t)
180
 
181
+ # smart decimal
182
  if t.count(".") > 1 and t.count(",") == 1:
183
  t = t.replace(".", "").replace(",", ".")
184
  elif t.count(",") > 1 and t.count(".") == 1:
 
257
  return float(num) / float(den)
258
 
259
  def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
260
+ """
261
+ faktor = min(n / target, 1.0)
262
+ - Jika target <= 0 β†’ default 1.0 (tidak menghukum)
263
+ """
264
  if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
265
  return 1.0
266
  if n_total is None or pd.isna(n_total) or float(n_total) < 0:
 
274
  prefix: str = "Score_Kinerja"
275
  ) -> pd.DataFrame:
276
  """
277
+ Tambah kolom:
278
+ - {prefix}_Percentile_0_100 = rank(pct=True)*100
279
+ - {prefix}_RobustZ_0_100 = 50 + 10*z_robust (MAD-based), clip 0..100
 
280
  """
281
  if df is None or df.empty or score_col not in df.columns:
282
  return df
 
292
  )
293
  else:
294
  out[f"{prefix}_Percentile_0_100"] = out[score_col].rank(pct=True, method="average") * 100.0
295
+
296
  out[f"{prefix}_Percentile_0_100"] = (
297
  pd.to_numeric(out[f"{prefix}_Percentile_0_100"], errors="coerce")
298
  .fillna(0.0).clip(0, 100).round(2)
 
305
  v = v.replace([np.inf, -np.inf], np.nan)
306
  if v.dropna().shape[0] < 2:
307
  return pd.Series(50.0, index=v.index)
308
+
309
  med = float(np.nanmedian(v.values))
310
  mad = float(np.nanmedian(np.abs(v.values - med)))
311
+
312
  if (not np.isfinite(mad)) or mad <= 1e-12:
313
  sd = float(np.nanstd(v.values, ddof=1))
314
  if (not np.isfinite(sd)) or sd <= 1e-12:
 
316
  z = (v - med) / sd
317
  else:
318
  z = (v - med) / (1.4826 * mad)
319
+
320
  score = 50.0 + 10.0 * z
321
  return score.clip(0, 100).fillna(50.0)
322
 
 
359
  ]
360
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
361
 
362
+ # alias kolom DM β†’ nama baku indikator
363
  alias_map_raw = {
364
  "j_judul_koleksi_tercetak": "JudulTercetak",
365
  "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
 
391
 
392
 
393
  # ============================================================
394
+ # 4) PIPELINE NASIONAL (LEVEL ENTITAS)
395
  # ============================================================
396
 
397
  def _mean_norm_cols(row, cols):
 
406
  return float(np.mean(vals)) if vals else 0.0
407
 
408
  def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
409
+ """
410
+ Transform + normalisasi indikator pada level entitas:
411
+ - rename kolom indikator (alias)
412
+ - coerce numeric
413
+ - Yeo-Johnson per indikator (standardize=False)
414
+ - MinMax global 0-1
415
+ - hitung sub_*, dim_*, Indeks_Dasar_0_100
416
+ """
417
  if df_src is None or df_src.empty:
418
  return df_src
419
+
420
  df = df_src.copy()
421
 
422
  # rename indikator
 
481
  }
482
 
483
  def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
484
+ """
485
+ POP_KHUSUS memiliki format campuran:
486
+ - Baris 'PROVINSI X' β†’ dianggap level PROV
487
+ - Baris berikutnya β†’ dianggap KAB/KOTA di bawah prov tersebut
488
+ Output distandarkan:
489
+ LEVEL: PROV / KAB
490
+ prov_key / kab_key
491
+ Pop_Total_Jenis
492
+ """
493
  df = pd.read_excel(path_xlsx)
494
  if df is None or df.empty:
495
  return pd.DataFrame()
 
545
  return pop
546
 
547
  def load_default_files(force=False):
548
+ """
549
+ Load 4 file:
550
+ - DM (DATA_FILE) bisa multi-sheet β†’ concat
551
+ - POP_KAB, POP_PROV, POP_KHUSUS
552
+ + Standarisasi kolom wilayah & jenis
553
+ + Dedup baris DM
554
+ + prepare_global() (YJ+MinMax+Indeks_Dasar)
555
+ """
556
  key = (
557
  DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS,
558
  _mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS)
 
588
  _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
589
  return None, None, None, None, None, {}, info
590
 
591
+ # mapping jenis β†’ baku (sekolah/umum/khusus)
592
  val_map_jenis = {
593
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
594
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
 
602
  df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
603
  df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
604
 
605
+ # Dedup aman berdasarkan (prov,kab,kew,jenis,nama_perpus)
606
  if nama_col and nama_col in df_raw.columns:
607
  kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
608
  else:
 
687
  pop_khusus: pd.DataFrame,
688
  kew_value: str
689
  ):
690
+ """
691
+ Output tabel:
692
+ group_key + (Kab/Kota atau Provinsi) + Jenis
693
+ n_jenis, pop_total_jenis, target_total_33_88_jenis,
694
+ coverage_jenis_%, faktor_penyesuaian_jenis, gap_target33_88_jenis
695
+ """
696
  if df_filtered is None or df_filtered.empty:
697
  return pd.DataFrame()
698
 
 
704
 
705
  jenis_list = ["sekolah", "umum", "khusus"]
706
 
707
+ # tentukan level berdasarkan kewenangan
708
  if "PROV" in kew_norm:
709
  key_col, label_col, label_name, mode = "prov_key", "PROV_DISP", "Provinsi", "PROV"
710
  base_pop = pop_prov.copy() if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame()
 
725
  on="_tmp"
726
  ).drop(columns="_tmp")
727
 
728
+ # count entitas per wilayahΓ—jenis
729
  cnt = (
730
  df.groupby([key_col, label_col, "_dataset"], dropna=False)
731
  .size()
 
740
  base_n["target_total_33_88_jenis"] = 0.0
741
  base_n["pop_total_jenis"] = 0.0
742
 
743
+ # SEKOLAH + UMUM dari POP_KAB/POP_PROV
744
  if not base_pop.empty:
745
  if mode == "KAB":
746
  pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah", 0), errors="coerce").fillna(0.0)
 
750
  tgt_umum = pop_umum * float(TARGET_RATIO)
751
  else:
752
  sma = pd.to_numeric(base_pop.get("sma ", base_pop.get("sma", 0)), errors="coerce").fillna(0.0)
753
+ smk = pd.to_numeric(base_pop.get("smk", 0)),
754
+ slb = pd.to_numeric(base_pop.get("slb", 0)),
755
  smk = pd.to_numeric(base_pop.get("smk", 0), errors="coerce").fillna(0.0)
756
  slb = pd.to_numeric(base_pop.get("slb", 0), errors="coerce").fillna(0.0)
757
 
 
796
  m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0)
797
  base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO)
798
 
799
+ # faktor penyesuaian
800
  base_n["faktor_penyesuaian_jenis"] = [
801
  faktor_penyesuaian_total(n, t)
802
  for n, t in zip(
 
821
  )
822
  ]
823
 
824
+ # display formatting
825
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
826
+ base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).round(0).astype(int)
827
+ base_n["coverage_jenis_%"] = pd.to_numeric(base_n["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
828
  base_n["faktor_penyesuaian_jenis"] = pd.to_numeric(base_n["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
829
+ base_n["gap_target33_88_jenis"] = pd.to_numeric(base_n["gap_target33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
830
 
831
  return base_n
832
 
 
836
  # ============================================================
837
 
838
  def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
839
+ """
840
+ Agregasi:
841
+ wilayah Γ— jenis:
842
+ - Jumlah (n entitas)
843
+ - rata-rata sub/dim
844
+ - Indeks_Dasar_Agregat_0_100 = mean(Indeks_Dasar_0_100)
845
+ - Indeks_Final_Agregat_0_100 = Indeks_Dasar_Agregat_0_100 * faktor_penyesuaian_jenis
846
+ + score kinerja relatif per jenis:
847
+ Score_Kinerja_WilayahJenis_Percentile_0_100
848
+ """
849
  if df_filtered is None or df_filtered.empty:
850
  return pd.DataFrame()
851
 
 
905
 
906
  keep = ["group_key", label_name, "Jenis",
907
  "faktor_penyesuaian_jenis", "target_total_33_88_jenis", "pop_total_jenis",
908
+ "coverage_jenis_%", "gap_target33_88_jenis", "n_jenis"]
909
  fw = fw[[c for c in keep if c in fw.columns]].copy()
910
 
911
  agg = agg.merge(fw, on=["group_key", label_name, "Jenis"], how="left")
 
912
  agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
913
 
914
+ for c in ["target_total_33_88_jenis","pop_total_jenis","gap_target33_88_jenis","n_jenis"]:
915
  if c in agg.columns:
916
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0).round(0).astype(int)
917
 
 
924
  * pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
925
  )
926
 
927
+ # Kinerja relatif per jenis
928
  agg = add_kinerja_scores(
929
  agg,
930
  score_col="Indeks_Final_Agregat_0_100",
 
945
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
946
 
947
  agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
 
948
  return agg
949
 
950
 
 
953
  # ============================================================
954
 
955
  def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
956
+ """
957
+ Membentuk tabel wilayah keseluruhan dari agg_jenis, dengan FIX avg3:
958
+ Indeks_Dasar_Agregat_0_100 (keseluruhan) = mean(dasar_3jenis) [missing=0, tetap /3]
959
+ Indeks_Final_Wilayah_0_100 (keseluruhan) = mean(final_3jenis) [missing=0, tetap /3]
960
+ """
961
  if agg_jenis is None or agg_jenis.empty:
962
  return pd.DataFrame()
963
 
 
1004
  Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
1005
  )
1006
 
1007
+ # Tempel info Pop/Target/N per jenis + total (opsional)
1008
  if faktor_wilayah_jenis is not None and not faktor_wilayah_jenis.empty:
1009
  fw = faktor_wilayah_jenis.copy()
1010
  fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
 
1053
  )
1054
  out["coverage_target33_88_all_%"] = pd.to_numeric(out["coverage_target33_88_all_%"], errors="coerce").fillna(0.0).round(2)
1055
 
1056
+ # NOTE: percentile global untuk wilayah keseluruhan tidak dihitung di sini.
1057
+ # Ia dihitung oleh fungsi global (compute_global_wilayah_scores) lalu ditempel.
 
 
 
 
 
 
 
1058
  for c in [
1059
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
1060
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
 
1067
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
1068
 
1069
  out["n_total"] = pd.to_numeric(out["n_total"], errors="coerce").fillna(0).round(0).astype(int)
 
1070
  return out
1071
 
1072
 
 
1076
 
1077
  _GLOBAL_SCORE_CACHE = {}
1078
 
1079
+ def compute_global_wilayah_scores(df_all, pop_kab, pop_prov, pop_khusus, kew_value: str):
1080
  """
1081
+ FIX UTAMA:
1082
+ - Hitung agg_total GLOBAL (nasional) sesuai mode kewenangan (KAB/KOTA vs PROVINSI)
1083
+ - Lalu hitung Score_Kinerja_WilayahTotal_Percentile_0_100 pada agg_total_global
1084
+ - Return mapping dict: group_key -> percentile (dan robustZ jika dipakai)
1085
+
1086
+ Kenapa mapping dict?
1087
+ - Menghindari merge suffix _x/_y
1088
+ - Mencegah KPI membaca kolom yang salah (0.00)
1089
  """
1090
+ kew_norm = str(kew_value or "").upper()
1091
  cache_key = (
1092
+ kew_norm,
1093
  _mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS),
1094
  float(TARGET_RATIO), float(W_KEPATUHAN), float(W_KINERJA),
1095
  bool(USE_PERCENTILE), bool(USE_ROBUST_Z)
 
1098
  return _GLOBAL_SCORE_CACHE[cache_key]
1099
 
1100
  if df_all is None or df_all.empty:
1101
+ _GLOBAL_SCORE_CACHE[cache_key] = ({}, {})
1102
+ return {}, {}
1103
 
1104
+ # Universe global sesuai kewenangan
1105
+ if kew_norm in {"KAB/KOTA", "PROVINSI"}:
1106
+ df_univ = df_all[df_all["KEW_NORM"] == kew_norm].copy()
1107
+ else:
1108
+ # fallback: pakai semua (tapi tetap nanti label mengikuti agg_total yang dipakai)
1109
+ df_univ = df_all.copy()
1110
 
1111
+ faktor = build_faktor_wilayah_jenis(df_univ, pop_kab, pop_prov, pop_khusus, kew_norm)
1112
+ agg_jenis = build_agg_wilayah_jenis(df_univ, faktor, kew_norm)
1113
+ agg_total = build_agg_wilayah_total_from_jenis(agg_jenis, faktor, kew_norm)
 
 
1114
 
1115
+ # Hitung score relatif global pada agg_total_global
1116
+ agg_total = add_kinerja_scores(
1117
+ agg_total,
1118
+ score_col="Indeks_Final_Wilayah_0_100",
1119
+ group_cols=None,
1120
+ prefix="Score_Kinerja_WilayahTotal"
1121
+ )
1122
+
1123
+ pctl_map = {}
1124
+ rz_map = {}
1125
+
1126
+ if "group_key" in agg_total.columns and "Score_Kinerja_WilayahTotal_Percentile_0_100" in agg_total.columns:
1127
+ pctl_map = (
1128
+ agg_total[["group_key", "Score_Kinerja_WilayahTotal_Percentile_0_100"]]
1129
+ .dropna(subset=["group_key"])
1130
+ .set_index("group_key")["Score_Kinerja_WilayahTotal_Percentile_0_100"]
1131
+ .to_dict()
1132
+ )
1133
+
1134
+ if "group_key" in agg_total.columns and "Score_Kinerja_WilayahTotal_RobustZ_0_100" in agg_total.columns:
1135
+ rz_map = (
1136
+ agg_total[["group_key", "Score_Kinerja_WilayahTotal_RobustZ_0_100"]]
1137
+ .dropna(subset=["group_key"])
1138
+ .set_index("group_key")["Score_Kinerja_WilayahTotal_RobustZ_0_100"]
1139
+ .to_dict()
1140
+ )
1141
+
1142
+ _GLOBAL_SCORE_CACHE[cache_key] = (pctl_map, rz_map)
1143
+ return pctl_map, rz_map
1144
 
1145
 
1146
  # ============================================================
 
1319
 
1320
 
1321
  # ============================================================
1322
+ # 11) VERIFIKASI PER JENIS (TARGET 33.88%)
1323
  # ============================================================
1324
 
1325
  def build_verif_jenis(faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
 
1352
 
1353
 
1354
  # ============================================================
1355
+ # 12) BELL CURVE (sama seperti versi kamu, disederhanakan aman)
1356
  # ============================================================
1357
 
1358
  def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str | None = None, hover_cols: list | None = None, min_points: int = 2):
 
1381
 
1382
  if len(d) < min_points:
1383
  x_single = float(pd.to_numeric(d[xcol], errors="coerce").iloc[0])
1384
+ fig.add_trace(go.Scatter(x=[x_single], y=[0], mode="markers", showlegend=False))
 
 
 
 
 
 
 
 
1385
  fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top")
 
1386
  fig.update_xaxes(range=[0, 100])
1387
  fig.update_yaxes(rangemode="tozero")
1388
  return fig
1389
 
1390
  x = pd.to_numeric(d[xcol], errors="coerce").astype(float).values
1391
  x = x[np.isfinite(x)]
 
 
 
 
 
 
1392
  mu = float(np.mean(x))
1393
+ sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 1.0
1394
+ sigma = max(sigma, 1e-3)
 
1395
 
1396
  xmin = max(0.0, float(np.min(x)) - 5.0)
1397
  xmax = min(100.0, float(np.max(x)) + 5.0)
 
 
 
 
1398
  xs = np.linspace(xmin, xmax, 250)
1399
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
1400
 
1401
+ fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Kurva Normal (fit)"))
1402
+ fig.add_trace(go.Scatter(x=x, y=np.zeros_like(x), mode="markers", showlegend=False))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1403
 
1404
  q1, q2, q3 = np.percentile(x, [25, 50, 75])
1405
  for xv, lab in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3"), (mu, "Mean")]:
 
1411
 
1412
 
1413
  # ============================================================
1414
+ # 13) KPI DASHBOARD (skor absolut + percentile GLOBAL)
1415
  # ============================================================
1416
 
1417
  def _safe_first(df, col, default=0.0, where=None):
 
1424
  return default
1425
  return float(pd.to_numeric(sub[col], errors="coerce").fillna(default).iloc[0])
1426
 
 
 
 
 
 
 
 
 
1427
  def compute_dashboard_kpis(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame):
1428
  final_all = _safe_first(summary_jenis, "Indeks_Final_Disesuaikan_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan"))
1429
  dasar_all = _safe_first(summary_jenis, "Indeks_Dasar_0_100", 0.0, where=summary_jenis["Jenis"].astype(str).str.lower().eq("keseluruhan"))
1430
+
1431
+ # KPI percentile wilayah terpilih: di agg_total (sudah ditempel global)
1432
+ pctl_sel = 0.0
1433
+ if agg_total is not None and not agg_total.empty and "Score_Kinerja_WilayahTotal_Percentile_0_100" in agg_total.columns:
1434
+ pctl_sel = float(pd.to_numeric(agg_total["Score_Kinerja_WilayahTotal_Percentile_0_100"], errors="coerce").fillna(0.0).iloc[0])
1435
+
1436
  return {"final_all": final_all, "dasar_all": dasar_all, "pctl_sel": pctl_sel}
1437
 
1438
  def build_kpi_markdown(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame) -> str:
1439
  if summary_jenis is None or summary_jenis.empty:
1440
  return ""
1441
+
1442
  k = compute_dashboard_kpis(summary_jenis, agg_total)
1443
 
1444
  def fmt(x, nd=2):
 
1468
 
1469
 
1470
  # ============================================================
1471
+ # 14) LLM + WORD (OPSIONAL)
1472
  # ============================================================
1473
 
1474
  _HF_CLIENT = None
 
1487
  _HF_CLIENT = None
1488
  return None
1489
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1490
  def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
 
1491
  client = get_llm_client()
1492
  if client is None or (not USE_LLM):
1493
  return "Analisis otomatis (LLM) tidak digunakan / tidak tersedia."
1494
+ ctx = f"Wilayah={wilayah} | Kewenangan={kew} | Target={TARGET_RATIO*100:.2f}%"
 
 
 
 
 
 
 
 
 
 
 
 
1495
  try:
1496
  resp = client.chat_completion(
1497
  model=LLM_MODEL_NAME,
1498
+ messages=[
1499
+ {"role":"system","content":"Anda adalah analis kebijakan perpustakaan di Indonesia. Tulis analisis ringkas berbasis data."},
1500
+ {"role":"user","content":f"{ctx}\nBuat analisis 3 paragraf: skor absolut, kinerja relatif percentile, rekomendasi singkat."}
1501
+ ],
1502
+ max_tokens=500,
1503
  temperature=0.25,
1504
  top_p=0.9,
1505
  )
 
1510
 
1511
  def generate_word_report(wilayah, summary_jenis, analysis_text):
1512
  if (not DOCX_AVAILABLE) or (Document is None):
 
1513
  return None
 
1514
  doc = Document()
1515
  doc.add_heading(f"Laporan IPLM β€” {wilayah}", level=1)
1516
  doc.add_paragraph(f"Target sampel per jenis: {TARGET_RATIO*100:.2f}%")
1517
  doc.add_paragraph("Catatan: Percentile kinerja wilayah yang ditampilkan adalah percentile GLOBAL (nasional), bukan dari hasil filter.")
 
1518
  doc.add_heading("Ringkasan (Jenis + Keseluruhan)", level=2)
1519
+ if summary_jenis is not None and not summary_jenis.empty:
1520
+ show = summary_jenis.copy()
 
 
 
 
 
 
 
 
1521
  table = doc.add_table(rows=1, cols=len(show.columns))
 
1522
  for i, c in enumerate(show.columns):
1523
+ table.rows[0].cells[i].text = str(c)
 
1524
  for _, row in show.iterrows():
1525
  cells = table.add_row().cells
1526
  for i, c in enumerate(show.columns):
 
1533
  cells[i].text = str(int(v))
1534
  else:
1535
  cells[i].text = str(v)
 
1536
  doc.add_heading("Analisis (opsional)", level=2)
1537
  for p in (analysis_text or "").split("\n"):
1538
  if p.strip():
1539
  doc.add_paragraph(p.strip())
 
1540
  outpath = tempfile.mktemp(suffix=".docx")
1541
  doc.save(outpath)
1542
  return outpath
 
1563
  return _empty_outputs("⚠️ Data belum ter-load. Pastikan file tersedia di repo/server.")
1564
 
1565
  # =========================================================
1566
+ # 1) FILTER df_all (entitas) sesuai dropdown
1567
  # =========================================================
1568
  df = df_all.copy()
1569
  if prov_value and prov_value != "(Semua)":
 
1577
  return _empty_outputs("Tidak ada data untuk filter ini.")
1578
 
1579
  # =========================================================
1580
+ # 2) PIPELINE FILTER β†’ faktor β†’ agg_jenis β†’ agg_total
1581
  # =========================================================
1582
+ kew_norm = kew_value if (kew_value and kew_value != "(Semua)") else "(Semua)"
1583
+ faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_norm)
1584
+ agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_norm)
1585
+ agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_norm)
1586
 
1587
  # =========================================================
1588
+ # 3) FIX PERCENTILE: hitung GLOBAL dulu, lalu TEMPEL via MAP
1589
+ # (NO MERGE β†’ no _x/_y, KPI tidak akan 0.00)
1590
  # =========================================================
1591
+ pctl_map, rz_map = compute_global_wilayah_scores(df_all, pop_kab, pop_prov, pop_khusus, kew_norm)
 
 
1592
 
1593
+ if agg_total is not None and not agg_total.empty and "group_key" in agg_total.columns:
1594
+ agg_total["Score_Kinerja_WilayahTotal_Percentile_0_100"] = (
1595
+ agg_total["group_key"].map(pctl_map).fillna(0.0).astype(float).round(2)
1596
+ )
1597
+ if USE_ROBUST_Z:
1598
+ agg_total["Score_Kinerja_WilayahTotal_RobustZ_0_100"] = (
1599
+ agg_total["group_key"].map(rz_map).fillna(50.0).astype(float).round(2)
1600
+ )
1601
+
1602
+ # =========================================================
1603
+ # 4) OUTPUT TABLES
1604
+ # =========================================================
1605
  summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total)
1606
+ verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_norm)
1607
+ detail_view = attach_final_to_detail(df, agg_total, meta, kew_norm)
1608
 
1609
  # =========================================================
1610
+ # 5) agg_jenis view (UI hanya sampai indeks dasar)
1611
  # =========================================================
1612
  if agg_jenis_full is None or agg_jenis_full.empty:
1613
  agg_jenis_view = agg_jenis_full
1614
  else:
1615
+ kew_norm2 = str(kew_norm).upper()
1616
+ label_name = "Kab/Kota" if ("KAB" in kew_norm2 or "KOTA" in kew_norm2) else ("Provinsi" if "PROV" in kew_norm2 else "Kab/Kota")
1617
  cols_upto = [
1618
  "group_key",
1619
  label_name,
 
1627
  agg_jenis_view = agg_jenis_full[cols_upto].copy()
1628
 
1629
  # =========================================================
1630
+ # 6) FILTER RAW DOWNLOAD (harus raw hasil filter)
1631
  # =========================================================
1632
  raw = df_raw.copy()
1633
  if prov_value and prov_value != "(Semua)":
 
1638
  raw = raw[raw["KEW_NORM"] == kew_value]
1639
 
1640
  # =========================================================
1641
+ # 7) Bell curve per jenis (entitas)
1642
  # =========================================================
1643
  if detail_view is None or detail_view.empty:
 
1644
  fig_umum = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Umum", min_points=2)
1645
+ fig_sekolah = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Sekolah", min_points=2)
1646
  fig_khusus = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Khusus", min_points=2)
1647
  else:
1648
  xcol_ent = "Score_Kinerja_Entitas_Percentile_0_100" if "Score_Kinerja_Entitas_Percentile_0_100" in detail_view.columns else "Indeks_Dasar_0_100"
1649
+ def _fig(j):
1650
+ d = detail_view[detail_view["Jenis"].astype(str).str.lower() == j].copy()
1651
+ return _make_bell_curve(d, xcol_ent, f"Bell Curve β€” Jenis: {j.title()} (Skor: {xcol_ent})", min_points=2)
1652
+ fig_sekolah = _fig("sekolah")
1653
+ fig_umum = _fig("umum")
1654
+ fig_khusus = _fig("khusus")
 
 
 
 
1655
 
1656
  # =========================================================
1657
+ # 8) KPI (percentile sudah GLOBAL)
1658
  # =========================================================
1659
  kpi_md = build_kpi_markdown(summary_jenis, agg_total)
1660
 
1661
  # =========================================================
1662
+ # 9) Export (xlsx + opsional docx)
1663
  # =========================================================
1664
  tmpdir = tempfile.mkdtemp()
1665
  prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
 
1680
 
1681
  wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
1682
  analysis_text = generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah_txt, kew_value or "(Semua)")
 
1683
  word_path = generate_word_report(wilayah_txt, summary_jenis, analysis_text)
1684
 
1685
  msg = (
 
1755
  βœ… Dashboard KPI menampilkan juga:
1756
  - `Score_Kinerja_WilayahTotal_Percentile_0_100` (**GLOBAL nasional**; bukan hasil filter)
1757
 
 
 
 
 
1758
  **Skor Absolut (untuk akuntabilitas):**
1759
  - `Indeks_Final_*` (sudah disesuaikan target 33.88%)
1760
+
1761
+ **Skor Kinerja Relatif (untuk benchmarking):**
1762
+ - `Score_Kinerja_*_Percentile_0_100` (utama, stabil tanpa asumsi normal)
1763
+ - `Score_Kinerja_*_RobustZ_0_100` (opsional, tahan outlier)
1764
  """)
1765
 
1766
  state_df = gr.State(None)
 
1787
  gr.Markdown("## Ringkasan (Jenis + Keseluruhan) β€” Pop/Target33.88/Terkumpul/Coverage + Penyesuaian")
1788
  out_summary = gr.DataFrame(interactive=False)
1789
 
1790
+ gr.Markdown("## Agregat Wilayah (Keseluruhan) β€” FIX avg3 + Score Kinerja Relatif (GLOBAL)")
1791
  out_agg_total = gr.DataFrame(interactive=False)
1792
 
1793
  gr.Markdown("## Agregat Wilayah Γ— Jenis β€” (ditampilkan sampai Indeks_Dasar_Agregat_0_100)")
 
1799
  gr.Markdown("## Kecukupan Sampel 33.88% (tanpa angka koma untuk integer)")
1800
  out_verif = gr.DataFrame(interactive=False)
1801
 
1802
+ gr.Markdown("## Bell Curve β€” per Jenis")
1803
  gr.Markdown("### Perpustakaan Umum")
1804
  bell_umum = gr.Plot(scale=1)
1805