irhamni commited on
Commit
fc5de36
Β·
verified Β·
1 Parent(s): b96215f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -89
app.py CHANGED
@@ -23,18 +23,20 @@ IPLM 2025 β€” FINAL (NO UPLOAD) β€” FULL REWRITE (NO RINGKAS)
23
  βœ… Keseluruhan ringkasan = (final_sekolah+final_umum+final_khusus)/3 (missing=0, tetap Γ·3)
24
 
25
  βœ… Detail entitas: Indeks_Final_0_100 menempel dari Agregat Wilayah (Keseluruhan) (bukan per-row)
26
- βœ… Bell curve per JENIS berbasis indeks per entitas (row-level)
27
- βœ… LLM analysis + Word
 
 
 
 
 
 
28
  βœ… Download (tanpa upload box)
29
  βœ… Download Data Mentah (.xlsx) = RAW hasil filter (bukan agregat)
30
 
31
  FIX DISPLAY:
32
  βœ… β€œnull/NaN” untuk target/pop/coverage jenis -> dibuat 0 agar tidak tampil null
33
  βœ… Verifikasi target 33.88% (tanpa koma untuk integer) -> target/pop/gap dibulatkan integer
34
- βœ… TABEL faktor_wilayah:
35
- - target_total_33_88 -> bilangan bulat
36
- - pop_total -> bilangan bulat
37
- - coverage_total_% -> decimal 2 digit
38
  βœ… TABEL "Agregat Wilayah Γ— Jenis" (UI) hanya sampai kolom Indeks_Dasar_Agregat_0_100
39
  """
40
 
@@ -66,9 +68,13 @@ POP_KHUSUS = os.getenv("POP_KHUSUS", "Data_populasi_perp_khusus.xlsx")
66
  W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
67
  W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
68
 
69
- # βœ… UPDATE: target sampel 33.88% (bukan 68%)
70
  TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388"))
71
 
 
 
 
 
72
  USE_LLM = True
73
  LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
74
  HF_TOKEN = (
@@ -202,6 +208,69 @@ def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
202
  n_total = 0.0
203
  return float(min(float(n_total) / float(target_total), 1.0))
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  # ============================================================
207
  # 3) INDIKATOR IPLM
@@ -279,6 +348,7 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
279
  return df_src
280
  df = df_src.copy()
281
 
 
282
  rename_map = {}
283
  for col in df.columns:
284
  c = _canon(col)
@@ -296,6 +366,7 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
296
  for c in available:
297
  df[c] = df[c].apply(coerce_num)
298
 
 
299
  for c in available:
300
  x = pd.to_numeric(df[c], errors="coerce").astype(float).values
301
  mask = ~np.isnan(x)
@@ -343,7 +414,6 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
343
  if df is None or df.empty:
344
  return pd.DataFrame()
345
 
346
- # file kamu: Propinsi/Kab/kota | POP_KHUSUS | SAMPEL_KHUSUS_68% (kolom target boleh ada, tapi kita akan hitung ulang 33.88%)
347
  c_mix = pick_col(df, [
348
  "Propinsi/Kab/kota", "Propinsi/Kab/Kota", "Provinsi/Kab/Kota",
349
  "Provinsi/Kab/kota", "Provinsi/Kabupaten/Kota",
@@ -367,11 +437,9 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
367
  if mm == "":
368
  continue
369
 
370
- # === PROV row: dianggap TOTAL PROVINSI (punya nilai!) ===
371
  if mm.startswith("PROVINSI "):
372
  prov_name = mm.replace("PROVINSI", "").strip()
373
  current_prov = prov_name
374
-
375
  rows.append({
376
  "LEVEL": "PROV",
377
  "Provinsi_Label": f"PROVINSI {prov_name}",
@@ -380,7 +448,6 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
380
  })
381
  continue
382
 
383
- # === KAB/KOTA row ===
384
  rows.append({
385
  "LEVEL": "KAB",
386
  "Provinsi_Label": f"PROVINSI {current_prov}" if current_prov else None,
@@ -392,18 +459,11 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
392
  if pop.empty:
393
  return pop
394
 
395
- pop["Pop_Total_Jenis"] = pd.to_numeric(pop["Pop_Total_Jenis"], errors="coerce")
396
-
397
- # fallback aman: kalau pop kosong, tetap 0
398
- pop["Pop_Total_Jenis"] = pop["Pop_Total_Jenis"].fillna(0.0)
399
-
400
- # keys
401
  pop["prov_key"] = pop["Provinsi_Label"].apply(norm_prov_label)
402
  pop["kab_key"] = pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in pop.columns else None
403
-
404
  return pop
405
 
406
-
407
  def load_default_files(force=False):
408
  key = (
409
  DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS,
@@ -464,14 +524,10 @@ def load_default_files(force=False):
464
  df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy()
465
  after = len(df_raw)
466
 
467
- # =========================
468
  # POP KAB
469
- # =========================
470
  pk = pd.read_excel(POP_KAB)
471
-
472
  c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota","kab_kota","kabupaten_kota"])
473
  c_prov = pick_col(pk, ["PROVINSI","Provinsi","provinsi"])
474
-
475
  if c_kab is None:
476
  info = "❌ POP_KAB: wajib ada kolom Kab/Kota."
477
  _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
@@ -483,11 +539,8 @@ def load_default_files(force=False):
483
  pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
484
  pop_kab = pop_kab.groupby("kab_key", as_index=False).first()
485
 
486
- # =========================
487
  # POP PROV
488
- # =========================
489
  pp = pd.read_excel(POP_PROV)
490
-
491
  c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi","Propinsi","PROPINSI","propinsi"])
492
  if c_pr is None:
493
  info = "❌ POP_PROV: wajib ada kolom Provinsi."
@@ -499,9 +552,7 @@ def load_default_files(force=False):
499
  pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label)
500
  pop_prov = pop_prov.groupby("prov_key", as_index=False).first()
501
 
502
- # =========================
503
  # POP KHUSUS
504
- # =========================
505
  try:
506
  pop_khusus = _parse_pop_khusus(POP_KHUSUS)
507
  except Exception as e:
@@ -510,7 +561,6 @@ def load_default_files(force=False):
510
  return None, None, None, None, None, {}, info
511
 
512
  df_all = prepare_global(df_raw)
513
-
514
  meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
515
 
516
  info = (
@@ -572,16 +622,13 @@ def build_faktor_wilayah_jenis(
572
  base_pop["kab_key"] = base_pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in base_pop.columns else base_pop.iloc[:, 0].apply(norm_kab_label)
573
  base_pop = base_pop.set_index("kab_key") if (not base_pop.empty and "kab_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
574
 
575
- # =========================================================
576
- # βœ… GRID WAJIB: semua wilayah Γ— 3 jenis (meski n=0)
577
- # =========================================================
578
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
579
  full = base_keys.assign(_tmp=1).merge(
580
  pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
581
  on="_tmp"
582
  ).drop(columns="_tmp")
583
 
584
- # hitung n per jenis dari DM (boleh 0)
585
  cnt = (
586
  df.groupby([key_col, label_col, "_dataset"], dropna=False)
587
  .size()
@@ -593,14 +640,10 @@ def build_faktor_wilayah_jenis(
593
  base_n = full.merge(cnt, on=["group_key", label_name, "Jenis"], how="left")
594
  base_n["n_jenis"] = pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(int)
595
 
596
- # kolom output faktor (target 33.88%)
597
  base_n["target_total_33_88_jenis"] = 0.0
598
  base_n["pop_total_jenis"] = 0.0
599
 
600
- # =========================
601
  # SEKOLAH + UMUM dari POP_KAB / POP_PROV
602
- # Target dihitung ulang: pop * TARGET_RATIO
603
- # =========================
604
  if not base_pop.empty:
605
  if mode == "KAB":
606
  pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah", 0), errors="coerce").fillna(0.0)
@@ -627,10 +670,7 @@ def build_faktor_wilayah_jenis(
627
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_umum).fillna(0.0).values
628
  base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_umum).fillna(0.0).values
629
 
630
- # =========================
631
  # KHUSUS dari POP_KHUSUS
632
- # Target dihitung ulang: pop * TARGET_RATIO
633
- # =========================
634
  if pop_khusus is not None and not pop_khusus.empty:
635
  pk = pop_khusus.copy()
636
  pk["Pop_Total_Jenis"] = pd.to_numeric(pk.get("Pop_Total_Jenis", 0), errors="coerce").fillna(0.0)
@@ -650,14 +690,12 @@ def build_faktor_wilayah_jenis(
650
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_series).fillna(0.0).values
651
  base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_series).fillna(0.0).values
652
 
653
- # fallback pop dari target (jaga-jaga)
654
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0.0)
655
  base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0.0)
656
 
657
  m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0)
658
  base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO)
659
 
660
- # faktor / coverage / gap
661
  base_n["faktor_penyesuaian_jenis"] = [
662
  faktor_penyesuaian_total(n, t)
663
  for n, t in zip(
@@ -714,14 +752,14 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
714
 
715
  jenis_list = ["sekolah", "umum", "khusus"]
716
 
717
- # GRID: semua wilayah Γ— 3 jenis
718
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
719
  full = base_keys.assign(_tmp=1).merge(
720
  pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
721
  on="_tmp"
722
  ).drop(columns="_tmp")
723
 
724
- # agregat dari data yang ada
725
  agg_real = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
726
  Jumlah=("Indeks_Dasar_0_100", "size"),
727
  Rata2_sub_koleksi=("sub_koleksi", "mean"),
@@ -735,7 +773,6 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
735
 
736
  agg_real["Jenis"] = agg_real["Jenis"].astype(str).str.lower().str.strip()
737
 
738
- # tempel ke grid + fill 0
739
  agg = full.merge(agg_real, on=["group_key", label_name, "Jenis"], how="left")
740
  for c in ["Jumlah","Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
741
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja","Indeks_Dasar_Agregat_0_100"]:
@@ -744,7 +781,7 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
744
 
745
  agg["Jumlah"] = agg["Jumlah"].round(0).astype(int)
746
 
747
- # merge faktor PER JENIS
748
  if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
749
  agg["faktor_penyesuaian_jenis"] = 1.0
750
  agg["target_total_33_88_jenis"] = 0
@@ -771,13 +808,21 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
771
  if "coverage_jenis_%" in agg.columns:
772
  agg["coverage_jenis_%"] = pd.to_numeric(agg["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
773
 
774
- # Indeks FINAL PER JENIS
775
  agg["Indeks_Final_Agregat_0_100"] = (
776
  pd.to_numeric(agg["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0.0)
777
  * pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
778
  )
779
 
780
- # rounding tampilan
 
 
 
 
 
 
 
 
781
  for c in [
782
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
783
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
@@ -804,14 +849,12 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
804
 
805
  kew_norm = str(kew_value or "").upper()
806
  label_name = "Provinsi" if "PROV" in kew_norm else "Kab/Kota"
807
-
808
  jenis_list = ["sekolah", "umum", "khusus"]
809
 
810
  a = agg_jenis.copy()
811
  a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
812
 
813
  base_keys = a[["group_key", label_name]].drop_duplicates()
814
-
815
  full = base_keys.assign(_tmp=1).merge(
816
  pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
817
  on="_tmp"
@@ -832,7 +875,6 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
832
  how="left"
833
  )
834
 
835
- # missing=0 (avg3 tetap Γ·3)
836
  for c in cols_present:
837
  full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
838
 
@@ -848,7 +890,7 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
848
  Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
849
  )
850
 
851
- # tempel Pop/Target/Terkumpul per jenis & total
852
  if faktor_wilayah_jenis is not None and not faktor_wilayah_jenis.empty:
853
  fw = faktor_wilayah_jenis.copy()
854
  fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
@@ -859,24 +901,19 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
859
  values=["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis", "faktor_penyesuaian_jenis"],
860
  aggfunc="first"
861
  )
862
-
863
  piv.columns = [f"{v}_{k}" for v, k in piv.columns]
864
  piv = piv.reset_index()
865
-
866
  out = out.merge(piv, on=["group_key", label_name], how="left")
867
 
868
- # NaN -> 0 / 1
869
  for j in ["sekolah", "umum", "khusus"]:
870
  for basecol in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
871
  c = f"{basecol}_{j}"
872
  if c in out.columns:
873
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
874
-
875
  cfac = f"faktor_penyesuaian_jenis_{j}"
876
  if cfac in out.columns:
877
  out[cfac] = pd.to_numeric(out[cfac], errors="coerce").fillna(1.0).round(3)
878
 
879
- # TOTAL (sum 3 jenis)
880
  out["pop_total_all"] = (
881
  out.get("pop_total_jenis_sekolah", 0)
882
  + out.get("pop_total_jenis_umum", 0)
@@ -902,6 +939,14 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
902
  )
903
  out["coverage_target33_88_all_%"] = pd.to_numeric(out["coverage_target33_88_all_%"], errors="coerce").fillna(0.0).round(2)
904
 
 
 
 
 
 
 
 
 
905
  # rounding index
906
  for c in [
907
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
@@ -1035,6 +1080,7 @@ def build_summary_per_jenis(agg_jenis: pd.DataFrame, agg_total: pd.DataFrame):
1035
 
1036
  # ============================================================
1037
  # 10) DETAIL ENTITAS: Final menempel dari agg_total (wilayah)
 
1038
  # ============================================================
1039
 
1040
  def attach_final_to_detail(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, meta: dict, kew_value: str):
@@ -1078,6 +1124,14 @@ def attach_final_to_detail(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, m
1078
  out = df[keep].copy()
1079
  out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"})
1080
 
 
 
 
 
 
 
 
 
1081
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
1082
  if c in out.columns:
1083
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
@@ -1129,7 +1183,7 @@ def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str |
1129
  fig = go.Figure()
1130
  fig.update_layout(
1131
  title=title,
1132
- xaxis_title="Indeks (0–100)",
1133
  yaxis_title="Kepadatan",
1134
  hovermode="x unified",
1135
  margin=dict(l=40, r=20, t=60, b=40),
@@ -1157,7 +1211,7 @@ def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str |
1157
  fig.add_trace(go.Scatter(
1158
  x=[x_single], y=[0], mode="markers", name="Data", marker=dict(size=10),
1159
  hovertext=hovertext,
1160
- hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Indeks: %{x:.2f}<extra></extra>",
1161
  showlegend=False,
1162
  ))
1163
  fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top")
@@ -1189,7 +1243,7 @@ def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str |
1189
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
1190
 
1191
  fig.add_trace(go.Scatter(
1192
- x=xs, y=pdf, mode="lines", name="Kurva Normal",
1193
  hovertemplate="x=%{x:.2f}<br>pdf=%{y:.4f}<extra></extra>"
1194
  ))
1195
 
@@ -1219,7 +1273,7 @@ def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str |
1219
  fig.add_trace(go.Scatter(
1220
  x=x, y=np.zeros_like(x), mode="markers", name="Data", marker=dict(size=8),
1221
  hovertext=hovertext,
1222
- hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Indeks: %{x:.2f}<extra></extra>",
1223
  showlegend=False
1224
  ))
1225
 
@@ -1233,7 +1287,7 @@ def _make_bell_curve(dfp: pd.DataFrame, xcol: str, title: str, label_col: str |
1233
 
1234
 
1235
  # ============================================================
1236
- # 13) KPI DASHBOARD (FINAL: hanya Final & Dasar)
1237
  # ============================================================
1238
 
1239
  def compute_dashboard_kpis(summary_jenis: pd.DataFrame):
@@ -1245,7 +1299,6 @@ def compute_dashboard_kpis(summary_jenis: pd.DataFrame):
1245
 
1246
  final_all = _get("keseluruhan", "Indeks_Final_Disesuaikan_0_100")
1247
  dasar_all = _get("keseluruhan", "Indeks_Dasar_0_100")
1248
-
1249
  return {"final_all": final_all, "dasar_all": dasar_all}
1250
 
1251
  def build_kpi_markdown(summary_jenis: pd.DataFrame) -> str:
@@ -1259,15 +1312,15 @@ def build_kpi_markdown(summary_jenis: pd.DataFrame) -> str:
1259
  return f"""
1260
  <div style="display:flex; gap:12px; flex-wrap:wrap;">
1261
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
1262
- <div style="opacity:0.8;">Indeks IPLM FINAL (Disesuaikan)</div>
1263
  <div style="font-size:26px; font-weight:700;">{fmt(k["final_all"],2)}</div>
1264
- <div style="opacity:0.7;">Sumber: Ringkasan baris β€œkeseluruhan”</div>
1265
  </div>
1266
 
1267
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
1268
  <div style="opacity:0.8;">Indeks Dasar (Tanpa Penyesuaian)</div>
1269
  <div style="font-size:26px; font-weight:700;">{fmt(k["dasar_all"],2)}</div>
1270
- <div style="opacity:0.7;">Sumber: Ringkasan baris β€œkeseluruhan”</div>
1271
  </div>
1272
  </div>
1273
  """.strip()
@@ -1313,6 +1366,15 @@ def build_context(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame, verif_to
1313
  wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
1314
  lines.append(f"- {wl}: Final={float(r['Indeks_Final_Wilayah_0_100']):.2f}")
1315
 
 
 
 
 
 
 
 
 
 
1316
  return "\n".join(lines)
1317
 
1318
  def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
@@ -1328,8 +1390,8 @@ DATA IPLM (RINGKAS):
1328
  {ctx}
1329
 
1330
  Buat analisis 3 paragraf:
1331
- 1) Gambaran umum.
1332
- 2) Per jenis (sekolah/umum/khusus) + keseluruhan.
1333
  3) Rekomendasi singkat.
1334
  Catatan: target sampel yang digunakan adalah {TARGET_RATIO*100:.2f}% (bukan 68%).
1335
  """
@@ -1350,6 +1412,7 @@ def generate_word_report(wilayah, summary_jenis, analysis_text):
1350
  doc = Document()
1351
  doc.add_heading(f"Laporan IPLM β€” {wilayah}", level=1)
1352
  doc.add_paragraph(f"Target sampel per jenis: {TARGET_RATIO*100:.2f}%")
 
1353
 
1354
  doc.add_heading("Ringkasan (Jenis + Keseluruhan)", level=2)
1355
 
@@ -1410,7 +1473,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1410
  if df_all is None or df_all.empty or df_raw is None or df_raw.empty:
1411
  return _empty_outputs("⚠️ Data belum ter-load. Pastikan file tersedia di repo/server.")
1412
 
1413
- # FILTER (df_all)
1414
  df = df_all.copy()
1415
  if prov_value and prov_value != "(Semua)":
1416
  df = df[df["PROV_DISP"] == prov_value]
@@ -1431,7 +1494,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1431
  verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_value or "(Semua)")
1432
  detail_view = attach_final_to_detail(df, agg_total, meta, kew_value or "(Semua)")
1433
 
1434
- # view agg_jenis (UI cuma sampai indeks dasar)
1435
  if agg_jenis_full is None or agg_jenis_full.empty:
1436
  agg_jenis_view = agg_jenis_full
1437
  else:
@@ -1449,7 +1512,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1449
  cols_upto = [c for c in cols_upto if c in agg_jenis_full.columns]
1450
  agg_jenis_view = agg_jenis_full[cols_upto].copy()
1451
 
1452
- # FILTER RAW DOWNLOAD (df_raw)
1453
  raw = df_raw.copy()
1454
  if prov_value and prov_value != "(Semua)":
1455
  raw = raw[raw["PROV_DISP"] == prov_value]
@@ -1458,27 +1521,28 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1458
  if kew_value and kew_value != "(Semua)":
1459
  raw = raw[raw["KEW_NORM"] == kew_value]
1460
 
1461
- # bell curve per jenis (entitas)
1462
  if detail_view is None or detail_view.empty:
1463
- fig_sekolah = _make_bell_curve(pd.DataFrame(), "Indeks_Dasar_0_100", "Bell Curve β€” Jenis: Sekolah", min_points=2)
1464
- fig_umum = _make_bell_curve(pd.DataFrame(), "Indeks_Dasar_0_100", "Bell Curve β€” Jenis: Umum", min_points=2)
1465
- fig_khusus = _make_bell_curve(pd.DataFrame(), "Indeks_Dasar_0_100", "Bell Curve β€” Jenis: Khusus", min_points=2)
1466
  else:
1467
- xcol_ent = "Indeks_Dasar_0_100" if "Indeks_Dasar_0_100" in detail_view.columns else "Indeks_Final_0_100"
1468
  label_col_e = "nm_perpustakaan" if "nm_perpustakaan" in detail_view.columns else None
1469
- hover_cols_e = [c for c in ["Provinsi", "Kab/Kota", "KEW_NORM", "Jenis", "Indeks_Dasar_0_100", "Indeks_Final_0_100"] if c in detail_view.columns]
1470
 
1471
  def _fig_jenis_ent(jenis_key: str, judul: str):
1472
  d = detail_view[detail_view["Jenis"].astype(str).str.lower() == jenis_key].copy()
1473
  return _make_bell_curve(d, xcol=xcol_ent, title=judul, label_col=label_col_e, hover_cols=hover_cols_e, min_points=2)
1474
 
1475
- fig_sekolah = _fig_jenis_ent("sekolah", "Bell Curve β€” Jenis: Sekolah (Indeks per Entitas)")
1476
- fig_umum = _fig_jenis_ent("umum", "Bell Curve β€” Jenis: Umum (Indeks per Entitas)")
1477
- fig_khusus = _fig_jenis_ent("khusus", "Bell Curve β€” Jenis: Khusus (Indeks per Entitas)")
1478
 
1479
  # KPI
1480
  kpi_md = build_kpi_markdown(summary_jenis)
1481
 
 
1482
  tmpdir = tempfile.mkdtemp()
1483
  prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
1484
  kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
@@ -1560,7 +1624,7 @@ def on_prov_change(prov_value):
1560
 
1561
  with gr.Blocks() as demo:
1562
  gr.Markdown(f"""
1563
- # IPLM 2025 β€” Final (Target Sampel **33.88%** per Jenis)
1564
  **Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
1565
  - `DATA_FILE` = **{DATA_FILE}**
1566
  - `POP_KAB` = **{POP_KAB}**
@@ -1569,10 +1633,12 @@ with gr.Blocks() as demo:
1569
 
1570
  **TARGET RATIO (per jenis): {TARGET_RATIO*100:.2f}%**
1571
 
1572
- **FIX UTAMA:**
1573
- - Ringkasan tampil Pop/Target33.88/Terkumpul/Coverage untuk **sekolah, umum, khusus, keseluruhan**
1574
- - Target dihitung ulang dari Pop: **target = pop Γ— {TARGET_RATIO:.4f}**
1575
- - Pop khusus dari POP_KHUSUS (Propinsi/Kab/kota | POP_KHUSUS | ...)
 
 
1576
  """)
1577
 
1578
  state_df = gr.State(None)
@@ -1599,19 +1665,19 @@ with gr.Blocks() as demo:
1599
  gr.Markdown("## Ringkasan (Jenis + Keseluruhan) β€” Pop/Target33.88/Terkumpul/Coverage + Penyesuaian")
1600
  out_summary = gr.DataFrame(interactive=False)
1601
 
1602
- gr.Markdown("## Agregat Wilayah (Keseluruhan) β€” FIX: avg3 dari 3 jenis")
1603
  out_agg_total = gr.DataFrame(interactive=False)
1604
 
1605
- gr.Markdown("## Agregat Wilayah Γ— Jenis (Sekolah, Umum, Khusus) β€” (ditampilkan sampai Indeks_Dasar_Agregat_0_100)")
1606
  out_agg_jenis = gr.DataFrame(interactive=False)
1607
 
1608
- gr.Markdown("## Detail Entitas (Final menempel dari wilayah)")
1609
  out_detail = gr.DataFrame(interactive=False)
1610
 
1611
  gr.Markdown("## Kecukupan Sampel 33.88% (tanpa angka koma untuk integer)")
1612
  out_verif = gr.DataFrame(interactive=False)
1613
 
1614
- gr.Markdown("## Bell Curve β€” per Jenis Perpustakaan (Indeks per Entitas)")
1615
  gr.Markdown("### Perpustakaan Umum")
1616
  bell_umum = gr.Plot(scale=1)
1617
 
 
23
  βœ… Keseluruhan ringkasan = (final_sekolah+final_umum+final_khusus)/3 (missing=0, tetap Γ·3)
24
 
25
  βœ… Detail entitas: Indeks_Final_0_100 menempel dari Agregat Wilayah (Keseluruhan) (bukan per-row)
26
+ βœ… Bell curve per JENIS berbasis skor kinerja per entitas (row-level)
27
+
28
+ βœ… METODE PENILAIAN KINERJA (REKOMENDASI UTAMA):
29
+ - Tetap tampilkan skor absolut: Indeks_Final_... (disesuaikan target 33.88%)
30
+ - Tambahkan skor kinerja relatif yang stabil & audit-friendly:
31
+ 1) Score_Kinerja_Percentile_0_100 (0–100) ← utama
32
+ 2) Score_Kinerja_RobustZ_0_100 (0–100; 50+10*z_robust) ← opsional, tahan outlier
33
+
34
  βœ… Download (tanpa upload box)
35
  βœ… Download Data Mentah (.xlsx) = RAW hasil filter (bukan agregat)
36
 
37
  FIX DISPLAY:
38
  βœ… β€œnull/NaN” untuk target/pop/coverage jenis -> dibuat 0 agar tidak tampil null
39
  βœ… Verifikasi target 33.88% (tanpa koma untuk integer) -> target/pop/gap dibulatkan integer
 
 
 
 
40
  βœ… TABEL "Agregat Wilayah Γ— Jenis" (UI) hanya sampai kolom Indeks_Dasar_Agregat_0_100
41
  """
42
 
 
68
  W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
69
  W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
70
 
71
+ # βœ… target sampel 33.88%
72
  TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388"))
73
 
74
+ # Kinerja relatif
75
+ USE_PERCENTILE = True
76
+ USE_ROBUST_Z = True
77
+
78
  USE_LLM = True
79
  LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
80
  HF_TOKEN = (
 
208
  n_total = 0.0
209
  return float(min(float(n_total) / float(target_total), 1.0))
210
 
211
+ def _clip01(x):
212
+ if pd.isna(x):
213
+ return 0.0
214
+ return float(min(max(float(x), 0.0), 1.0))
215
+
216
+ def add_kinerja_scores(
217
+ df: pd.DataFrame,
218
+ score_col: str,
219
+ group_cols: list[str] | None,
220
+ prefix: str = "Score_Kinerja"
221
+ ) -> pd.DataFrame:
222
+ """
223
+ Tambah:
224
+ - {prefix}_Percentile_0_100
225
+ - {prefix}_RobustZ_0_100 (50+10*z_robust, clip 0..100)
226
+ Grouping untuk fairness: misal per Jenis.
227
+ """
228
+ if df is None or df.empty or score_col not in df.columns:
229
+ return df
230
+
231
+ out = df.copy()
232
+ x = pd.to_numeric(out[score_col], errors="coerce").astype(float)
233
+
234
+ # Percentile 0–100
235
+ if USE_PERCENTILE:
236
+ if group_cols:
237
+ out[f"{prefix}_Percentile_0_100"] = (
238
+ out.groupby(group_cols, dropna=False)[score_col]
239
+ .rank(pct=True, method="average") * 100.0
240
+ )
241
+ else:
242
+ out[f"{prefix}_Percentile_0_100"] = out[score_col].rank(pct=True, method="average") * 100.0
243
+ out[f"{prefix}_Percentile_0_100"] = pd.to_numeric(out[f"{prefix}_Percentile_0_100"], errors="coerce").fillna(0.0).clip(0, 100).round(2)
244
+
245
+ # Robust Z to 0–100
246
+ if USE_ROBUST_Z:
247
+ def _robustz_to_0_100(s: pd.Series) -> pd.Series:
248
+ v = pd.to_numeric(s, errors="coerce").astype(float)
249
+ v = v.replace([np.inf, -np.inf], np.nan)
250
+ if v.dropna().shape[0] < 2:
251
+ return pd.Series(50.0, index=v.index) # netral
252
+ med = float(np.nanmedian(v.values))
253
+ mad = float(np.nanmedian(np.abs(v.values - med)))
254
+ if not np.isfinite(mad) or mad <= 1e-12:
255
+ sd = float(np.nanstd(v.values, ddof=1))
256
+ if not np.isfinite(sd) or sd <= 1e-12:
257
+ return pd.Series(50.0, index=v.index)
258
+ z = (v - med) / sd
259
+ else:
260
+ z = (v - med) / (1.4826 * mad)
261
+ score = 50.0 + 10.0 * z
262
+ score = score.clip(0, 100).fillna(50.0)
263
+ return score
264
+
265
+ if group_cols:
266
+ out[f"{prefix}_RobustZ_0_100"] = out.groupby(group_cols, dropna=False)[score_col].transform(_robustz_to_0_100)
267
+ else:
268
+ out[f"{prefix}_RobustZ_0_100"] = _robustz_to_0_100(out[score_col])
269
+
270
+ out[f"{prefix}_RobustZ_0_100"] = pd.to_numeric(out[f"{prefix}_RobustZ_0_100"], errors="coerce").fillna(50.0).clip(0, 100).round(2)
271
+
272
+ return out
273
+
274
 
275
  # ============================================================
276
  # 3) INDIKATOR IPLM
 
348
  return df_src
349
  df = df_src.copy()
350
 
351
+ # rename indikator
352
  rename_map = {}
353
  for col in df.columns:
354
  c = _canon(col)
 
366
  for c in available:
367
  df[c] = df[c].apply(coerce_num)
368
 
369
+ # YJ per indikator + MinMax global
370
  for c in available:
371
  x = pd.to_numeric(df[c], errors="coerce").astype(float).values
372
  mask = ~np.isnan(x)
 
414
  if df is None or df.empty:
415
  return pd.DataFrame()
416
 
 
417
  c_mix = pick_col(df, [
418
  "Propinsi/Kab/kota", "Propinsi/Kab/Kota", "Provinsi/Kab/Kota",
419
  "Provinsi/Kab/kota", "Provinsi/Kabupaten/Kota",
 
437
  if mm == "":
438
  continue
439
 
 
440
  if mm.startswith("PROVINSI "):
441
  prov_name = mm.replace("PROVINSI", "").strip()
442
  current_prov = prov_name
 
443
  rows.append({
444
  "LEVEL": "PROV",
445
  "Provinsi_Label": f"PROVINSI {prov_name}",
 
448
  })
449
  continue
450
 
 
451
  rows.append({
452
  "LEVEL": "KAB",
453
  "Provinsi_Label": f"PROVINSI {current_prov}" if current_prov else None,
 
459
  if pop.empty:
460
  return pop
461
 
462
+ pop["Pop_Total_Jenis"] = pd.to_numeric(pop["Pop_Total_Jenis"], errors="coerce").fillna(0.0)
 
 
 
 
 
463
  pop["prov_key"] = pop["Provinsi_Label"].apply(norm_prov_label)
464
  pop["kab_key"] = pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in pop.columns else None
 
465
  return pop
466
 
 
467
  def load_default_files(force=False):
468
  key = (
469
  DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS,
 
524
  df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy()
525
  after = len(df_raw)
526
 
 
527
  # POP KAB
 
528
  pk = pd.read_excel(POP_KAB)
 
529
  c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota","kab_kota","kabupaten_kota"])
530
  c_prov = pick_col(pk, ["PROVINSI","Provinsi","provinsi"])
 
531
  if c_kab is None:
532
  info = "❌ POP_KAB: wajib ada kolom Kab/Kota."
533
  _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
 
539
  pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
540
  pop_kab = pop_kab.groupby("kab_key", as_index=False).first()
541
 
 
542
  # POP PROV
 
543
  pp = pd.read_excel(POP_PROV)
 
544
  c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi","Propinsi","PROPINSI","propinsi"])
545
  if c_pr is None:
546
  info = "❌ POP_PROV: wajib ada kolom Provinsi."
 
552
  pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label)
553
  pop_prov = pop_prov.groupby("prov_key", as_index=False).first()
554
 
 
555
  # POP KHUSUS
 
556
  try:
557
  pop_khusus = _parse_pop_khusus(POP_KHUSUS)
558
  except Exception as e:
 
561
  return None, None, None, None, None, {}, info
562
 
563
  df_all = prepare_global(df_raw)
 
564
  meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
565
 
566
  info = (
 
622
  base_pop["kab_key"] = base_pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in base_pop.columns else base_pop.iloc[:, 0].apply(norm_kab_label)
623
  base_pop = base_pop.set_index("kab_key") if (not base_pop.empty and "kab_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
624
 
625
+ # GRID: semua wilayah Γ— 3 jenis
 
 
626
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
627
  full = base_keys.assign(_tmp=1).merge(
628
  pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
629
  on="_tmp"
630
  ).drop(columns="_tmp")
631
 
 
632
  cnt = (
633
  df.groupby([key_col, label_col, "_dataset"], dropna=False)
634
  .size()
 
640
  base_n = full.merge(cnt, on=["group_key", label_name, "Jenis"], how="left")
641
  base_n["n_jenis"] = pd.to_numeric(base_n["n_jenis"], errors="coerce").fillna(0).astype(int)
642
 
 
643
  base_n["target_total_33_88_jenis"] = 0.0
644
  base_n["pop_total_jenis"] = 0.0
645
 
 
646
  # SEKOLAH + UMUM dari POP_KAB / POP_PROV
 
 
647
  if not base_pop.empty:
648
  if mode == "KAB":
649
  pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah", 0), errors="coerce").fillna(0.0)
 
670
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_umum).fillna(0.0).values
671
  base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_umum).fillna(0.0).values
672
 
 
673
  # KHUSUS dari POP_KHUSUS
 
 
674
  if pop_khusus is not None and not pop_khusus.empty:
675
  pk = pop_khusus.copy()
676
  pk["Pop_Total_Jenis"] = pd.to_numeric(pk.get("Pop_Total_Jenis", 0), errors="coerce").fillna(0.0)
 
690
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_series).fillna(0.0).values
691
  base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_series).fillna(0.0).values
692
 
 
693
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0.0)
694
  base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0.0)
695
 
696
  m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0)
697
  base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO)
698
 
 
699
  base_n["faktor_penyesuaian_jenis"] = [
700
  faktor_penyesuaian_total(n, t)
701
  for n, t in zip(
 
752
 
753
  jenis_list = ["sekolah", "umum", "khusus"]
754
 
755
+ # GRID semua wilayah Γ— 3 jenis
756
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
757
  full = base_keys.assign(_tmp=1).merge(
758
  pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
759
  on="_tmp"
760
  ).drop(columns="_tmp")
761
 
762
+ # agregat real
763
  agg_real = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
764
  Jumlah=("Indeks_Dasar_0_100", "size"),
765
  Rata2_sub_koleksi=("sub_koleksi", "mean"),
 
773
 
774
  agg_real["Jenis"] = agg_real["Jenis"].astype(str).str.lower().str.strip()
775
 
 
776
  agg = full.merge(agg_real, on=["group_key", label_name, "Jenis"], how="left")
777
  for c in ["Jumlah","Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
778
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja","Indeks_Dasar_Agregat_0_100"]:
 
781
 
782
  agg["Jumlah"] = agg["Jumlah"].round(0).astype(int)
783
 
784
+ # merge faktor jenis
785
  if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
786
  agg["faktor_penyesuaian_jenis"] = 1.0
787
  agg["target_total_33_88_jenis"] = 0
 
808
  if "coverage_jenis_%" in agg.columns:
809
  agg["coverage_jenis_%"] = pd.to_numeric(agg["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
810
 
811
+ # Indeks FINAL per jenis
812
  agg["Indeks_Final_Agregat_0_100"] = (
813
  pd.to_numeric(agg["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0.0)
814
  * pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
815
  )
816
 
817
+ # Kinerja relatif per jenis (dibandingkan sesama jenis)
818
+ agg = add_kinerja_scores(
819
+ agg,
820
+ score_col="Indeks_Final_Agregat_0_100",
821
+ group_cols=["Jenis"],
822
+ prefix="Score_Kinerja_WilayahJenis"
823
+ )
824
+
825
+ # rounding
826
  for c in [
827
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
828
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
 
849
 
850
  kew_norm = str(kew_value or "").upper()
851
  label_name = "Provinsi" if "PROV" in kew_norm else "Kab/Kota"
 
852
  jenis_list = ["sekolah", "umum", "khusus"]
853
 
854
  a = agg_jenis.copy()
855
  a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
856
 
857
  base_keys = a[["group_key", label_name]].drop_duplicates()
 
858
  full = base_keys.assign(_tmp=1).merge(
859
  pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
860
  on="_tmp"
 
875
  how="left"
876
  )
877
 
 
878
  for c in cols_present:
879
  full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
880
 
 
890
  Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
891
  )
892
 
893
+ # Tempel info Pop/Target/N per jenis + total
894
  if faktor_wilayah_jenis is not None and not faktor_wilayah_jenis.empty:
895
  fw = faktor_wilayah_jenis.copy()
896
  fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
 
901
  values=["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis", "faktor_penyesuaian_jenis"],
902
  aggfunc="first"
903
  )
 
904
  piv.columns = [f"{v}_{k}" for v, k in piv.columns]
905
  piv = piv.reset_index()
 
906
  out = out.merge(piv, on=["group_key", label_name], how="left")
907
 
 
908
  for j in ["sekolah", "umum", "khusus"]:
909
  for basecol in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
910
  c = f"{basecol}_{j}"
911
  if c in out.columns:
912
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
 
913
  cfac = f"faktor_penyesuaian_jenis_{j}"
914
  if cfac in out.columns:
915
  out[cfac] = pd.to_numeric(out[cfac], errors="coerce").fillna(1.0).round(3)
916
 
 
917
  out["pop_total_all"] = (
918
  out.get("pop_total_jenis_sekolah", 0)
919
  + out.get("pop_total_jenis_umum", 0)
 
939
  )
940
  out["coverage_target33_88_all_%"] = pd.to_numeric(out["coverage_target33_88_all_%"], errors="coerce").fillna(0.0).round(2)
941
 
942
+ # Tambah skor kinerja relatif untuk keseluruhan wilayah (dibandingkan seluruh wilayah)
943
+ out = add_kinerja_scores(
944
+ out,
945
+ score_col="Indeks_Final_Wilayah_0_100",
946
+ group_cols=None,
947
+ prefix="Score_Kinerja_WilayahTotal"
948
+ )
949
+
950
  # rounding index
951
  for c in [
952
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
 
1080
 
1081
  # ============================================================
1082
  # 10) DETAIL ENTITAS: Final menempel dari agg_total (wilayah)
1083
+ # + skor kinerja relatif per jenis (entitas-level)
1084
  # ============================================================
1085
 
1086
  def attach_final_to_detail(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, meta: dict, kew_value: str):
 
1124
  out = df[keep].copy()
1125
  out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"})
1126
 
1127
+ # skor kinerja relatif per entitas (dibandingkan sesama jenis)
1128
+ out = add_kinerja_scores(
1129
+ out,
1130
+ score_col="Indeks_Dasar_0_100",
1131
+ group_cols=["Jenis"],
1132
+ prefix="Score_Kinerja_Entitas"
1133
+ )
1134
+
1135
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
1136
  if c in out.columns:
1137
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
 
1183
  fig = go.Figure()
1184
  fig.update_layout(
1185
  title=title,
1186
+ xaxis_title="Skor (0–100)",
1187
  yaxis_title="Kepadatan",
1188
  hovermode="x unified",
1189
  margin=dict(l=40, r=20, t=60, b=40),
 
1211
  fig.add_trace(go.Scatter(
1212
  x=[x_single], y=[0], mode="markers", name="Data", marker=dict(size=10),
1213
  hovertext=hovertext,
1214
+ hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Skor: %{x:.2f}<extra></extra>",
1215
  showlegend=False,
1216
  ))
1217
  fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top")
 
1243
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
1244
 
1245
  fig.add_trace(go.Scatter(
1246
+ x=xs, y=pdf, mode="lines", name="Kurva Normal (fit)",
1247
  hovertemplate="x=%{x:.2f}<br>pdf=%{y:.4f}<extra></extra>"
1248
  ))
1249
 
 
1273
  fig.add_trace(go.Scatter(
1274
  x=x, y=np.zeros_like(x), mode="markers", name="Data", marker=dict(size=8),
1275
  hovertext=hovertext,
1276
+ hovertemplate="%{hovertext}<extra></extra>" if hovertext is not None else "Skor: %{x:.2f}<extra></extra>",
1277
  showlegend=False
1278
  ))
1279
 
 
1287
 
1288
 
1289
  # ============================================================
1290
+ # 13) KPI DASHBOARD (FINAL: skor absolut)
1291
  # ============================================================
1292
 
1293
  def compute_dashboard_kpis(summary_jenis: pd.DataFrame):
 
1299
 
1300
  final_all = _get("keseluruhan", "Indeks_Final_Disesuaikan_0_100")
1301
  dasar_all = _get("keseluruhan", "Indeks_Dasar_0_100")
 
1302
  return {"final_all": final_all, "dasar_all": dasar_all}
1303
 
1304
  def build_kpi_markdown(summary_jenis: pd.DataFrame) -> str:
 
1312
  return f"""
1313
  <div style="display:flex; gap:12px; flex-wrap:wrap;">
1314
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
1315
+ <div style="opacity:0.8;">Indeks IPLM FINAL (Disesuaikan 33.88%)</div>
1316
  <div style="font-size:26px; font-weight:700;">{fmt(k["final_all"],2)}</div>
1317
+ <div style="opacity:0.7;">Skor absolut (untuk akuntabilitas)</div>
1318
  </div>
1319
 
1320
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
1321
  <div style="opacity:0.8;">Indeks Dasar (Tanpa Penyesuaian)</div>
1322
  <div style="font-size:26px; font-weight:700;">{fmt(k["dasar_all"],2)}</div>
1323
+ <div style="opacity:0.7;">Sebelum faktor kecukupan sampel</div>
1324
  </div>
1325
  </div>
1326
  """.strip()
 
1366
  wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
1367
  lines.append(f"- {wl}: Final={float(r['Indeks_Final_Wilayah_0_100']):.2f}")
1368
 
1369
+ # kinerja relatif (percentile) jika ada
1370
+ if agg_total is not None and not agg_total.empty and "Score_Kinerja_WilayahTotal_Percentile_0_100" in agg_total.columns:
1371
+ label_col = "Kab/Kota" if "Kab/Kota" in agg_total.columns else ("Provinsi" if "Provinsi" in agg_total.columns else None)
1372
+ lines.append("\nTop 5 wilayah (Percentile kinerja tertinggi):")
1373
+ top = agg_total.sort_values("Score_Kinerja_WilayahTotal_Percentile_0_100", ascending=False).head(5)
1374
+ for _, r in top.iterrows():
1375
+ wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
1376
+ lines.append(f"- {wl}: Pctl={float(r['Score_Kinerja_WilayahTotal_Percentile_0_100']):.2f}")
1377
+
1378
  return "\n".join(lines)
1379
 
1380
  def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
 
1390
  {ctx}
1391
 
1392
  Buat analisis 3 paragraf:
1393
+ 1) Gambaran umum (skor absolut).
1394
+ 2) Kinerja relatif (percentile) + per jenis.
1395
  3) Rekomendasi singkat.
1396
  Catatan: target sampel yang digunakan adalah {TARGET_RATIO*100:.2f}% (bukan 68%).
1397
  """
 
1412
  doc = Document()
1413
  doc.add_heading(f"Laporan IPLM β€” {wilayah}", level=1)
1414
  doc.add_paragraph(f"Target sampel per jenis: {TARGET_RATIO*100:.2f}%")
1415
+ doc.add_paragraph("Catatan: Skor kinerja relatif menggunakan Percentile (0–100) yang stabil terhadap bentuk distribusi.")
1416
 
1417
  doc.add_heading("Ringkasan (Jenis + Keseluruhan)", level=2)
1418
 
 
1473
  if df_all is None or df_all.empty or df_raw is None or df_raw.empty:
1474
  return _empty_outputs("⚠️ Data belum ter-load. Pastikan file tersedia di repo/server.")
1475
 
1476
+ # FILTER df_all
1477
  df = df_all.copy()
1478
  if prov_value and prov_value != "(Semua)":
1479
  df = df[df["PROV_DISP"] == prov_value]
 
1494
  verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_value or "(Semua)")
1495
  detail_view = attach_final_to_detail(df, agg_total, meta, kew_value or "(Semua)")
1496
 
1497
+ # agg_jenis view (UI hanya sampai indeks dasar)
1498
  if agg_jenis_full is None or agg_jenis_full.empty:
1499
  agg_jenis_view = agg_jenis_full
1500
  else:
 
1512
  cols_upto = [c for c in cols_upto if c in agg_jenis_full.columns]
1513
  agg_jenis_view = agg_jenis_full[cols_upto].copy()
1514
 
1515
+ # FILTER RAW DOWNLOAD
1516
  raw = df_raw.copy()
1517
  if prov_value and prov_value != "(Semua)":
1518
  raw = raw[raw["PROV_DISP"] == prov_value]
 
1521
  if kew_value and kew_value != "(Semua)":
1522
  raw = raw[raw["KEW_NORM"] == kew_value]
1523
 
1524
+ # bell curve per jenis (entitas) -> gunakan Percentile (utama) kalau ada
1525
  if detail_view is None or detail_view.empty:
1526
+ fig_sekolah = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Sekolah", min_points=2)
1527
+ fig_umum = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Umum", min_points=2)
1528
+ fig_khusus = _make_bell_curve(pd.DataFrame(), "Score_Kinerja_Entitas_Percentile_0_100", "Bell Curve β€” Jenis: Khusus", min_points=2)
1529
  else:
1530
+ xcol_ent = "Score_Kinerja_Entitas_Percentile_0_100" if "Score_Kinerja_Entitas_Percentile_0_100" in detail_view.columns else "Indeks_Dasar_0_100"
1531
  label_col_e = "nm_perpustakaan" if "nm_perpustakaan" in detail_view.columns else None
1532
+ hover_cols_e = [c for c in ["Provinsi", "Kab/Kota", "KEW_NORM", "Jenis", "Indeks_Dasar_0_100", "Indeks_Final_0_100", xcol_ent] if c in detail_view.columns]
1533
 
1534
  def _fig_jenis_ent(jenis_key: str, judul: str):
1535
  d = detail_view[detail_view["Jenis"].astype(str).str.lower() == jenis_key].copy()
1536
  return _make_bell_curve(d, xcol=xcol_ent, title=judul, label_col=label_col_e, hover_cols=hover_cols_e, min_points=2)
1537
 
1538
+ fig_sekolah = _fig_jenis_ent("sekolah", f"Bell Curve β€” Jenis: Sekolah (Skor: {xcol_ent})")
1539
+ fig_umum = _fig_jenis_ent("umum", f"Bell Curve β€” Jenis: Umum (Skor: {xcol_ent})")
1540
+ fig_khusus = _fig_jenis_ent("khusus", f"Bell Curve β€” Jenis: Khusus (Skor: {xcol_ent})")
1541
 
1542
  # KPI
1543
  kpi_md = build_kpi_markdown(summary_jenis)
1544
 
1545
+ # export
1546
  tmpdir = tempfile.mkdtemp()
1547
  prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
1548
  kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
 
1624
 
1625
  with gr.Blocks() as demo:
1626
  gr.Markdown(f"""
1627
+ # IPLM 2025 β€” Final (Target Sampel **33.88%** per Jenis) + Penilaian Kinerja Relatif (Percentile)
1628
  **Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
1629
  - `DATA_FILE` = **{DATA_FILE}**
1630
  - `POP_KAB` = **{POP_KAB}**
 
1633
 
1634
  **TARGET RATIO (per jenis): {TARGET_RATIO*100:.2f}%**
1635
 
1636
+ **Kinerja Relatif (untuk evaluasi kinerja):**
1637
+ - `Score_Kinerja_*_Percentile_0_100` (utama, stabil tanpa asumsi normal)
1638
+ - `Score_Kinerja_*_RobustZ_0_100` (opsional, tahan outlier)
1639
+
1640
+ **Skor Absolut (untuk akuntabilitas):**
1641
+ - `Indeks_Final_*` (sudah disesuaikan target 33.88%)
1642
  """)
1643
 
1644
  state_df = gr.State(None)
 
1665
  gr.Markdown("## Ringkasan (Jenis + Keseluruhan) β€” Pop/Target33.88/Terkumpul/Coverage + Penyesuaian")
1666
  out_summary = gr.DataFrame(interactive=False)
1667
 
1668
+ gr.Markdown("## Agregat Wilayah (Keseluruhan) β€” FIX: avg3 dari 3 jenis + Skor Kinerja Relatif")
1669
  out_agg_total = gr.DataFrame(interactive=False)
1670
 
1671
+ gr.Markdown("## Agregat Wilayah Γ— Jenis β€” (ditampilkan sampai Indeks_Dasar_Agregat_0_100)")
1672
  out_agg_jenis = gr.DataFrame(interactive=False)
1673
 
1674
+ gr.Markdown("## Detail Entitas (Final menempel dari wilayah + Skor Kinerja Relatif per Jenis)")
1675
  out_detail = gr.DataFrame(interactive=False)
1676
 
1677
  gr.Markdown("## Kecukupan Sampel 33.88% (tanpa angka koma untuk integer)")
1678
  out_verif = gr.DataFrame(interactive=False)
1679
 
1680
+ gr.Markdown("## Bell Curve β€” per Jenis (berbasis Score_Kinerja_Entitas_Percentile_0_100 jika tersedia)")
1681
  gr.Markdown("### Perpustakaan Umum")
1682
  bell_umum = gr.Plot(scale=1)
1683