irhamni commited on
Commit
5b10245
Β·
verified Β·
1 Parent(s): 5ff0f52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +423 -157
app.py CHANGED
@@ -1,21 +1,44 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- IPLM 2025 β€” Final (Target Sampel 33.88% per Jenis)
4
-
5
- PERUBAHAN SESUAI PERMINTAAN:
6
- 1) KPI Dashboard: HANYA 2 kartu
7
- - Indeks IPLM FINAL (Disesuaikan 33.88%)
8
- - Indeks Dasar (Tanpa Penyesuaian)
9
- βœ… Kartu "Coverage terhadap Target 33.88% (Keseluruhan)" DIHAPUS.
10
-
11
- 2) Bell Curve: DIKEMBALIKAN KE SEMULA
12
- - Menampilkan distribusi **Indeks_Dasar_0_100** pada LEVEL ENTITAS (perpustakaan)
13
- - Dipisah per Jenis: Sekolah / Umum / Khusus
14
- - Titik entitas menampilkan label **nama perpustakaan** (hover) per jenis.
15
-
16
- Catatan:
17
- - Skor tetap berbasis ABSOLUT.
18
- - Penyesuaian target 33.88% tetap dipakai untuk indeks final agregat wilayah.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  """
20
 
21
  import os
@@ -30,7 +53,7 @@ import pandas as pd
30
  import plotly.graph_objects as go
31
  from sklearn.preprocessing import PowerTransformer
32
 
33
- # python-docx opsional
34
  DOCX_AVAILABLE = True
35
  try:
36
  from docx import Document
@@ -59,8 +82,10 @@ POP_KHUSUS = os.getenv("POP_KHUSUS", "Data_populasi_perp_khusus.xlsx")
59
  W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
60
  W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
61
 
 
62
  TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388"))
63
 
 
64
  USE_LLM = True
65
  LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
66
  HF_TOKEN = (
@@ -110,6 +135,7 @@ def coerce_num(val):
110
  t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
111
  t = re.sub(r"[^0-9,.\-]", "", t)
112
 
 
113
  if t.count(".") > 1 and t.count(",") == 1:
114
  t = t.replace(".", "").replace(",", ".")
115
  elif t.count(",") > 1 and t.count(".") == 1:
@@ -131,17 +157,6 @@ def minmax_norm(s: pd.Series) -> pd.Series:
131
  return pd.Series(0.0, index=s.index)
132
  return (x - mn) / (mx - mn)
133
 
134
- def _mean_norm_cols(row, cols):
135
- vals = []
136
- for c in cols:
137
- k = f"norm_{c}"
138
- if k in row.index:
139
- v = row[k]
140
- if pd.isna(v):
141
- v = 0.0
142
- vals.append(float(v))
143
- return float(np.mean(vals)) if vals else 0.0
144
-
145
  def norm_kew(v):
146
  if pd.isna(v):
147
  return None
@@ -199,21 +214,16 @@ def safe_div(num, den):
199
  return float(num) / float(den)
200
 
201
  def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
 
 
 
 
202
  if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
203
  return 1.0
204
  if n_total is None or pd.isna(n_total) or float(n_total) < 0:
205
  n_total = 0.0
206
  return float(min(float(n_total) / float(target_total), 1.0))
207
 
208
- def _first_nonempty(*vals, default=""):
209
- for v in vals:
210
- if v is None:
211
- continue
212
- s = str(v).strip()
213
- if s != "" and s.lower() != "nan":
214
- return s
215
- return default
216
-
217
 
218
  # ============================================================
219
  # 3) INDIKATOR IPLM
@@ -241,6 +251,7 @@ pengelolaan_cols = [
241
  ]
242
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
243
 
 
244
  alias_map_raw = {
245
  "j_judul_koleksi_tercetak": "JudulTercetak",
246
  "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
@@ -275,12 +286,32 @@ alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
275
  # 4) PIPELINE NASIONAL (LEVEL ENTITAS)
276
  # ============================================================
277
 
 
 
 
 
 
 
 
 
 
 
 
278
  def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
 
 
 
 
 
 
 
 
279
  if df_src is None or df_src.empty:
280
  return df_src
281
 
282
  df = df_src.copy()
283
 
 
284
  rename_map = {}
285
  for col in df.columns:
286
  c = _canon(col)
@@ -298,6 +329,7 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
298
  for c in available:
299
  df[c] = df[c].apply(coerce_num)
300
 
 
301
  for c in available:
302
  x = pd.to_numeric(df[c], errors="coerce").astype(float).values
303
  mask = ~np.isnan(x)
@@ -329,9 +361,27 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
329
  # 5) CACHE LOADER (NO UPLOAD)
330
  # ============================================================
331
 
332
- _CACHE = {"key": None, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": None, "info": None}
 
 
 
 
 
 
 
 
 
333
 
334
  def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
335
  df = pd.read_excel(path_xlsx)
336
  if df is None or df.empty:
337
  return pd.DataFrame()
@@ -358,12 +408,24 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
358
  mm = _disp_text(m) or ""
359
  if mm == "":
360
  continue
 
361
  if mm.startswith("PROVINSI "):
362
  prov_name = mm.replace("PROVINSI", "").strip()
363
  current_prov = prov_name
364
- rows.append({"LEVEL": "PROV", "Provinsi_Label": f"PROVINSI {prov_name}", "Kab_Kota_Label": None, "Pop_Total_Jenis": pval})
 
 
 
 
 
365
  continue
366
- rows.append({"LEVEL": "KAB", "Provinsi_Label": f"PROVINSI {current_prov}" if current_prov else None, "Kab_Kota_Label": mm, "Pop_Total_Jenis": pval})
 
 
 
 
 
 
367
 
368
  pop = pd.DataFrame(rows)
369
  if pop.empty:
@@ -375,7 +437,19 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
375
  return pop
376
 
377
  def load_default_files(force=False):
378
- key = (DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS, _mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS))
 
 
 
 
 
 
 
 
 
 
 
 
379
  if (not force) and _CACHE["key"] == key and _CACHE["df_all"] is not None:
380
  return _CACHE["df_all"], _CACHE["df_raw"], _CACHE["pop_kab"], _CACHE["pop_prov"], _CACHE["pop_khusus"], _CACHE["meta"], _CACHE["info"]
381
 
@@ -391,7 +465,7 @@ def load_default_files(force=False):
391
  df_raw = pd.concat(frames, ignore_index=True, sort=False)
392
 
393
  prov_col = pick_col(df_raw, ["provinsi", "Provinsi", "PROVINSI"])
394
- kab_col = pick_col(df_raw, ["kab/kota", "kab_kota", "Kab/Kota", "Kab_Kota", "KAB/KOTA", "kabupaten_kota", "Kabupaten/Kota", "kabupaten kota", "kota"])
395
  kew_col = pick_col(df_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
396
  jenis_col = pick_col(df_raw, ["jenis_perpustakaan", "Jenis Perpustakaan", "JENIS_PERPUSTAKAAN"])
397
  nama_col = pick_col(df_raw, ["nm_perpustakaan","nama_perpustakaan","Nama Perpustakaan","nm_instansi_lembaga","nm_perpus"])
@@ -406,6 +480,7 @@ def load_default_files(force=False):
406
  _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
407
  return None, None, None, None, None, {}, info
408
 
 
409
  val_map_jenis = {
410
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
411
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
@@ -419,7 +494,7 @@ def load_default_files(force=False):
419
  df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
420
  df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
421
 
422
- # Dedup berdasarkan (prov,kab,kew,jenis,nama)
423
  if nama_col and nama_col in df_raw.columns:
424
  kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
425
  else:
@@ -480,7 +555,16 @@ def load_default_files(force=False):
480
  f"πŸ•’ mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}"
481
  )
482
 
483
- _CACHE.update({"key": key, "df_all": df_all, "df_raw": df_raw, "pop_kab": pop_kab, "pop_prov": pop_prov, "pop_khusus": pop_khusus, "meta": meta, "info": info})
 
 
 
 
 
 
 
 
 
484
  return df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info
485
 
486
 
@@ -488,19 +572,19 @@ def load_default_files(force=False):
488
  # 6) FAKTOR WILAYAH β€” PER JENIS (TARGET 33.88%)
489
  # ============================================================
490
 
491
- def _get_series_from_cols(base_pop: pd.DataFrame, col_candidates: list, index_name: str):
492
- for c in col_candidates:
493
- if c in base_pop.columns:
494
- return pd.to_numeric(base_pop[c], errors="coerce").fillna(0.0)
495
- can_map = {_canon(c): c for c in base_pop.columns}
496
- for c in col_candidates:
497
- k = _canon(c)
498
- if k in can_map:
499
- cc = can_map[k]
500
- return pd.to_numeric(base_pop[cc], errors="coerce").fillna(0.0)
501
- return pd.Series(0.0, index=base_pop.index, name=f"{index_name}_zeros")
502
-
503
- def build_faktor_wilayah_jenis(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, pop_prov: pd.DataFrame, pop_khusus: pd.DataFrame, kew_value: str):
504
  if df_filtered is None or df_filtered.empty:
505
  return pd.DataFrame()
506
 
@@ -512,31 +596,32 @@ def build_faktor_wilayah_jenis(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame,
512
 
513
  jenis_list = ["sekolah", "umum", "khusus"]
514
 
 
515
  if "PROV" in kew_norm:
516
  key_col, label_col, label_name, mode = "prov_key", "PROV_DISP", "Provinsi", "PROV"
517
  base_pop = pop_prov.copy() if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame()
518
  if not base_pop.empty and "prov_key" not in base_pop.columns:
519
- if "Provinsi_Label" in base_pop.columns:
520
- base_pop["prov_key"] = base_pop["Provinsi_Label"].apply(norm_prov_label)
521
- else:
522
- base_pop["prov_key"] = base_pop.iloc[:, 0].apply(norm_prov_label)
523
  base_pop = base_pop.set_index("prov_key") if (not base_pop.empty and "prov_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
524
  else:
525
  key_col, label_col, label_name, mode = "kab_key", "KAB_DISP", "Kab/Kota", "KAB"
526
  base_pop = pop_kab.copy() if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame()
527
  if not base_pop.empty and "kab_key" not in base_pop.columns:
528
- if "Kab_Kota_Label" in base_pop.columns:
529
- base_pop["kab_key"] = base_pop["Kab_Kota_Label"].apply(norm_kab_label)
530
- else:
531
- base_pop["kab_key"] = base_pop.iloc[:, 0].apply(norm_kab_label)
532
  base_pop = base_pop.set_index("kab_key") if (not base_pop.empty and "kab_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
533
 
 
534
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
535
- full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp")
 
 
 
536
 
 
537
  cnt = (
538
  df.groupby([key_col, label_col, "_dataset"], dropna=False)
539
- .size().reset_index(name="n_jenis")
 
540
  .rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
541
  )
542
  cnt["Jenis"] = cnt["Jenis"].astype(str).str.lower().str.strip()
@@ -547,21 +632,25 @@ def build_faktor_wilayah_jenis(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame,
547
  base_n["target_total_33_88_jenis"] = 0.0
548
  base_n["pop_total_jenis"] = 0.0
549
 
550
- # sekolah + umum dari POP_KAB/POP_PROV
551
  if not base_pop.empty:
552
  if mode == "KAB":
553
- pop_sekolah = _get_series_from_cols(base_pop, ["jumlah_populasi_sekolah", "pop_sekolah", "sekolah"], "pop_sekolah")
554
- pop_umum = _get_series_from_cols(base_pop, ["jumlah_populasi_umum", "pop_umum", "umum"], "pop_umum")
 
555
  tgt_sekolah = pop_sekolah * float(TARGET_RATIO)
556
  tgt_umum = pop_umum * float(TARGET_RATIO)
557
  else:
558
- sma = _get_series_from_cols(base_pop, ["sma", "SMA"], "sma")
559
- smk = _get_series_from_cols(base_pop, ["smk", "SMK"], "smk")
560
- slb = _get_series_from_cols(base_pop, ["slb", "SLB"], "slb")
561
- pop_sekolah = (sma + smk + slb)
 
 
562
  tgt_sekolah = pop_sekolah * float(TARGET_RATIO)
563
- pop_umum = _get_series_from_cols(base_pop, ["perpus_umum_prop", "perpus_umum", "umum"], "pop_umum")
564
- tgt_umum = pop_umum * float(TARGET_RATIO)
 
565
 
566
  m = base_n["Jenis"].eq("sekolah")
567
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_sekolah).fillna(0.0).values
@@ -571,7 +660,7 @@ def build_faktor_wilayah_jenis(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame,
571
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_umum).fillna(0.0).values
572
  base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_umum).fillna(0.0).values
573
 
574
- # khusus dari POP_KHUSUS
575
  if pop_khusus is not None and not pop_khusus.empty:
576
  pk = pop_khusus.copy()
577
  pk["Pop_Total_Jenis"] = pd.to_numeric(pk.get("Pop_Total_Jenis", 0), errors="coerce").fillna(0.0)
@@ -594,9 +683,11 @@ def build_faktor_wilayah_jenis(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame,
594
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0.0)
595
  base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0.0)
596
 
 
597
  m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0)
598
  base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO)
599
 
 
600
  base_n["faktor_penyesuaian_jenis"] = [
601
  faktor_penyesuaian_total(n, t)
602
  for n, t in zip(
@@ -621,6 +712,7 @@ def build_faktor_wilayah_jenis(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame,
621
  )
622
  ]
623
 
 
624
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
625
  base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).round(0).astype(int)
626
  base_n["coverage_jenis_%"] = pd.to_numeric(base_n["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
@@ -635,6 +727,14 @@ def build_faktor_wilayah_jenis(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame,
635
  # ============================================================
636
 
637
  def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
 
 
 
 
 
 
 
 
638
  if df_filtered is None or df_filtered.empty:
639
  return pd.DataFrame()
640
 
@@ -652,9 +752,14 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
652
 
653
  jenis_list = ["sekolah", "umum", "khusus"]
654
 
 
655
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
656
- full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp")
 
 
 
657
 
 
658
  agg_real = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
659
  Jumlah=("Indeks_Dasar_0_100", "size"),
660
  Rata2_sub_koleksi=("sub_koleksi", "mean"),
@@ -676,15 +781,18 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
676
 
677
  agg["Jumlah"] = agg["Jumlah"].round(0).astype(int)
678
 
 
679
  if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
680
  agg["faktor_penyesuaian_jenis"] = 1.0
681
  agg["target_total_33_88_jenis"] = 0
682
  agg["pop_total_jenis"] = 0
683
  agg["coverage_jenis_%"] = 0.0
684
  agg["gap_target33_88_jenis"] = 0
 
685
  else:
686
  fw = faktor_wilayah_jenis.copy()
687
  fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
 
688
  keep = ["group_key", label_name, "Jenis",
689
  "faktor_penyesuaian_jenis", "target_total_33_88_jenis", "pop_total_jenis",
690
  "coverage_jenis_%", "gap_target33_88_jenis", "n_jenis"]
@@ -696,23 +804,28 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
696
  for c in ["target_total_33_88_jenis","pop_total_jenis","gap_target33_88_jenis","n_jenis"]:
697
  if c in agg.columns:
698
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0).round(0).astype(int)
 
699
  if "coverage_jenis_%" in agg.columns:
700
  agg["coverage_jenis_%"] = pd.to_numeric(agg["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
701
 
 
702
  agg["Indeks_Final_Agregat_0_100"] = (
703
  pd.to_numeric(agg["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0.0)
704
  * pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
705
  )
706
 
 
707
  for c in [
708
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
709
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
710
  ]:
711
  if c in agg.columns:
712
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(3)
 
713
  for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100"]:
714
  if c in agg.columns:
715
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
 
716
  agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
717
  return agg
718
 
@@ -722,6 +835,11 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.
722
  # ============================================================
723
 
724
  def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
 
 
 
 
 
725
  if agg_jenis is None or agg_jenis.empty:
726
  return pd.DataFrame()
727
 
@@ -733,7 +851,10 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
733
  a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
734
 
735
  base_keys = a[["group_key", label_name]].drop_duplicates()
736
- full = base_keys.assign(_tmp=1).merge(pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}), on="_tmp").drop(columns="_tmp")
 
 
 
737
 
738
  cols_need = [
739
  "Jumlah",
@@ -744,7 +865,12 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
744
  ]
745
  cols_present = [c for c in cols_need if c in a.columns]
746
 
747
- full = full.merge(a[["group_key", label_name, "Jenis"] + cols_present], on=["group_key", label_name, "Jenis"], how="left")
 
 
 
 
 
748
  for c in cols_present:
749
  full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
750
 
@@ -760,12 +886,62 @@ def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_j
760
  Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
761
  )
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  for c in [
764
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
765
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
766
  ]:
767
  if c in out.columns:
768
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
 
769
  for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Wilayah_0_100"]:
770
  if c in out.columns:
771
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
@@ -789,7 +965,7 @@ def build_summary_per_jenis(agg_jenis: pd.DataFrame, agg_total: pd.DataFrame):
789
  "Pop_Total_Jenis": 0,
790
  "Target33_88_Total_Jenis": 0,
791
  "Terkumpul_Jenis": 0,
792
- "Coverage_Target33_88_Jenis_%": 0.0, # tetap ada di tabel ringkasan (kalau Anda mau hapus juga, bilang)
793
  "Indeks_Dasar_0_100": 0.0,
794
  "Indeks_Final_Disesuaikan_0_100": 0.0,
795
  "Penyesuaian_Poin": 0.0,
@@ -880,42 +1056,62 @@ def build_summary_per_jenis(agg_jenis: pd.DataFrame, agg_total: pd.DataFrame):
880
  for c in ["Jumlah_Wilayah","Total_Perpus","Pop_Total_Jenis","Target33_88_Total_Jenis","Terkumpul_Jenis"]:
881
  if c in out.columns:
882
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
 
883
  for c in ["Coverage_Target33_88_Jenis_%","Indeks_Dasar_0_100","Indeks_Final_Disesuaikan_0_100","Penyesuaian_Poin"]:
884
  if c in out.columns:
885
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
 
886
  return out
887
 
888
 
889
  # ============================================================
890
- # 10) DETAIL ENTITAS (untuk tabel + bell curve)
891
  # ============================================================
892
 
893
- def build_detail_entitas(df_filtered: pd.DataFrame, meta: dict):
894
  if df_filtered is None or df_filtered.empty:
895
  return pd.DataFrame()
896
 
 
897
  df = df_filtered.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
  if meta.get("nama_col") and meta["nama_col"] in df.columns:
899
  df["nm_perpustakaan"] = df[meta["nama_col"]].astype(str)
900
- else:
901
- df["nm_perpustakaan"] = ""
902
 
903
- keep = [
904
- "nm_perpustakaan", "PROV_DISP", "KAB_DISP", "KEW_NORM", "_dataset",
905
  "sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
906
  "dim_kepatuhan","dim_kinerja",
907
  "Indeks_Dasar_0_100",
 
908
  ]
909
  keep = [c for c in keep if c in df.columns]
910
 
911
  out = df[keep].copy()
912
- out = out.rename(columns={"PROV_DISP":"Provinsi", "KAB_DISP":"Kab/Kota", "_dataset":"Jenis"})
913
 
914
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
915
  if c in out.columns:
916
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
917
- if "Indeks_Dasar_0_100" in out.columns:
918
- out["Indeks_Dasar_0_100"] = pd.to_numeric(out["Indeks_Dasar_0_100"], errors="coerce").fillna(0.0).round(2)
 
919
 
920
  return out
921
 
@@ -943,8 +1139,10 @@ def build_verif_jenis(faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
943
  for c in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
944
  if c in out.columns:
945
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
 
946
  if "coverage_jenis_%" in out.columns:
947
  out["coverage_jenis_%"] = pd.to_numeric(out["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
 
948
  if "faktor_penyesuaian_jenis" in out.columns:
949
  out["faktor_penyesuaian_jenis"] = pd.to_numeric(out["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
950
 
@@ -952,42 +1150,83 @@ def build_verif_jenis(faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
952
 
953
 
954
  # ============================================================
955
- # 12) BELL CURVE β€” Indeks Dasar per Entitas + label nama perpus
956
  # ============================================================
957
 
958
- def _make_bell_curve_entitas(detail_df: pd.DataFrame, jenis: str, title: str):
 
 
 
 
 
 
 
959
  fig = go.Figure()
960
  fig.update_layout(
961
  title=title,
962
- xaxis_title="Indeks Dasar (0–100)",
963
  yaxis_title="Kepadatan",
964
  hovermode="closest",
965
  margin=dict(l=40, r=20, t=60, b=40),
966
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
967
  )
968
 
969
- fig.update_xaxes(range=[0, 100])
970
- fig.update_yaxes(rangemode="tozero")
971
-
972
- if detail_df is None or detail_df.empty:
973
  fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
 
 
974
  return fig
975
 
976
- d = detail_df.copy()
977
- d["Jenis"] = d["Jenis"].astype(str).str.lower().str.strip()
978
- d = d[d["Jenis"] == jenis].copy()
979
- if d.empty or "Indeks_Dasar_0_100" not in d.columns:
980
  fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
 
 
981
  return fig
982
 
983
- x = pd.to_numeric(d["Indeks_Dasar_0_100"], errors="coerce").astype(float).values
984
- mask = np.isfinite(x)
985
- d = d.loc[mask].copy()
986
- x = x[mask]
987
- if len(x) == 0:
988
  fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
 
 
989
  return fig
990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
  mu = float(np.mean(x))
992
  sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 1.0
993
  sigma = max(sigma, 1e-3)
@@ -997,44 +1236,33 @@ def _make_bell_curve_entitas(detail_df: pd.DataFrame, jenis: str, title: str):
997
  xs = np.linspace(xmin, xmax, 250)
998
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
999
 
1000
- # kurva normal fit
1001
  fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Kurva Normal (fit)"))
1002
-
1003
- # titik entitas (y=0) dengan hover label nama perpus
1004
- hover_text = []
1005
- for _, r in d.iterrows():
1006
- nm = _first_nonempty(r.get("nm_perpustakaan"), default="-")
1007
- pv = _first_nonempty(r.get("Provinsi"), default="-")
1008
- kb = _first_nonempty(r.get("Kab/Kota"), default="-")
1009
- sc = r.get("Indeks_Dasar_0_100")
1010
- hover_text.append(f"<b>{nm}</b><br>{pv}<br>{kb}<br>Indeks Dasar: {float(sc):.2f}")
1011
-
1012
  fig.add_trace(go.Scatter(
1013
- x=x,
1014
- y=np.zeros_like(x),
1015
- mode="markers",
1016
- name="Entitas",
1017
- hovertext=hover_text,
1018
- hoverinfo="text",
1019
- showlegend=False
1020
  ))
1021
 
1022
- # garis Q1/Q2/Q3/Mean
1023
  q1, q2, q3 = np.percentile(x, [25, 50, 75])
1024
  for xv, lab in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3"), (mu, "Mean")]:
1025
  fig.add_vline(x=float(xv), line_width=1, line_dash="dash", annotation_text=f"{lab}: {xv:.1f}", annotation_position="top")
1026
 
 
 
1027
  return fig
1028
 
1029
 
1030
  # ============================================================
1031
- # 13) KPI DASHBOARD β€” HANYA 2 KARTU
1032
  # ============================================================
1033
 
1034
  def _safe_first(df, col, default=0.0, where=None):
1035
  if df is None or df.empty or col not in df.columns:
1036
  return default
1037
- sub = df if where is None else df.loc[where]
 
 
1038
  if sub is None or sub.empty:
1039
  return default
1040
  return float(pd.to_numeric(sub[col], errors="coerce").fillna(default).iloc[0])
@@ -1053,11 +1281,10 @@ def build_kpi_markdown(summary_jenis: pd.DataFrame) -> str:
1053
  def fmt(x, nd=2):
1054
  return "NA" if pd.isna(x) else f"{x:.{nd}f}"
1055
 
1056
- # βœ… HANYA 2 KARTU
1057
  return f"""
1058
  <div style="display:flex; gap:12px; flex-wrap:wrap;">
1059
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
1060
- <div style="opacity:0.8;">Indeks IPLM FINAL (Disesuaikan {TARGET_RATIO*100:.2f}%)</div>
1061
  <div style="font-size:26px; font-weight:700;">{fmt(k["final_all"],2)}</div>
1062
  <div style="opacity:0.7;">Skor absolut (untuk akuntabilitas)</div>
1063
  </div>
@@ -1091,7 +1318,7 @@ def get_llm_client():
1091
  _HF_CLIENT = None
1092
  return None
1093
 
1094
- def generate_llm_analysis(summary_jenis, wilayah, kew):
1095
  client = get_llm_client()
1096
  if client is None or (not USE_LLM):
1097
  return "Analisis otomatis (LLM) tidak digunakan / tidak tersedia."
@@ -1100,10 +1327,10 @@ def generate_llm_analysis(summary_jenis, wilayah, kew):
1100
  resp = client.chat_completion(
1101
  model=LLM_MODEL_NAME,
1102
  messages=[
1103
- {"role":"system","content":"Anda adalah analis kebijakan perpustakaan di Indonesia. Tulis analisis ringkas berbasis data."},
1104
- {"role":"user","content":f"{ctx}\nBuat analisis 3 paragraf: (1) skor dasar vs final, (2) penyesuaian 33.88% per jenis, (3) rekomendasi singkat."}
1105
  ],
1106
- max_tokens=520,
1107
  temperature=0.25,
1108
  top_p=0.9,
1109
  )
@@ -1118,7 +1345,6 @@ def generate_word_report(wilayah, summary_jenis, analysis_text):
1118
  doc = Document()
1119
  doc.add_heading(f"Laporan IPLM β€” {wilayah}", level=1)
1120
  doc.add_paragraph(f"Target sampel per jenis: {TARGET_RATIO*100:.2f}%")
1121
-
1122
  doc.add_heading("Ringkasan (Jenis + Keseluruhan)", level=2)
1123
  if summary_jenis is not None and not summary_jenis.empty:
1124
  show = summary_jenis.copy()
@@ -1137,12 +1363,10 @@ def generate_word_report(wilayah, summary_jenis, analysis_text):
1137
  cells[i].text = str(int(v))
1138
  else:
1139
  cells[i].text = str(v)
1140
-
1141
  doc.add_heading("Analisis (opsional)", level=2)
1142
  for p in (analysis_text or "").split("\n"):
1143
  if p.strip():
1144
  doc.add_paragraph(p.strip())
1145
-
1146
  outpath = tempfile.mktemp(suffix=".docx")
1147
  doc.save(outpath)
1148
  return outpath
@@ -1168,6 +1392,9 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1168
  if df_all is None or df_all.empty or df_raw is None or df_raw.empty:
1169
  return _empty_outputs("⚠️ Data belum ter-load. Pastikan file tersedia di repo/server.")
1170
 
 
 
 
1171
  df = df_all.copy()
1172
  if prov_value and prov_value != "(Semua)":
1173
  df = df[df["PROV_DISP"] == prov_value]
@@ -1179,19 +1406,24 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1179
  if df.empty:
1180
  return _empty_outputs("Tidak ada data untuk filter ini.")
1181
 
 
 
 
1182
  kew_norm = kew_value if (kew_value and kew_value != "(Semua)") else "(Semua)"
1183
-
1184
  faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_norm)
1185
  agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_norm)
1186
  agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_norm)
1187
 
 
 
 
1188
  summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total)
1189
  verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_norm)
 
1190
 
1191
- # βœ… detail entitas khusus untuk bell curve + tabel detail (indeks dasar)
1192
- detail_view = build_detail_entitas(df, meta)
1193
-
1194
- # UI: agg_jenis hanya sampai indeks dasar
1195
  if agg_jenis_full is None or agg_jenis_full.empty:
1196
  agg_jenis_view = agg_jenis_full
1197
  else:
@@ -1209,7 +1441,9 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1209
  cols_upto = [c for c in cols_upto if c in agg_jenis_full.columns]
1210
  agg_jenis_view = agg_jenis_full[cols_upto].copy()
1211
 
1212
- # RAW download (hasil filter)
 
 
1213
  raw = df_raw.copy()
1214
  if prov_value and prov_value != "(Semua)":
1215
  raw = raw[raw["PROV_DISP"] == prov_value]
@@ -1218,15 +1452,43 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1218
  if kew_value and kew_value != "(Semua)":
1219
  raw = raw[raw["KEW_NORM"] == kew_value]
1220
 
1221
- # βœ… Bell curve: Indeks Dasar per entitas + hover nama perpus
1222
- fig_sekolah = _make_bell_curve_entitas(detail_view, "sekolah", "Bell Curve β€” Jenis: Sekolah (Indeks Dasar per Entitas)")
1223
- fig_umum = _make_bell_curve_entitas(detail_view, "umum", "Bell Curve β€” Jenis: Umum (Indeks Dasar per Entitas)")
1224
- fig_khusus = _make_bell_curve_entitas(detail_view, "khusus", "Bell Curve β€” Jenis: Khusus (Indeks Dasar per Entitas)")
1225
-
1226
- # βœ… KPI hanya 2 kartu
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1227
  kpi_md = build_kpi_markdown(summary_jenis)
1228
 
1229
- # Export
 
 
1230
  tmpdir = tempfile.mkdtemp()
1231
  prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
1232
  kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
@@ -1235,7 +1497,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1235
  p_summary = str(Path(tmpdir) / f"IPLM_RingkasanJenisKeseluruhan_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1236
  p_total = str(Path(tmpdir) / f"IPLM_AgregatWilayah_Keseluruhan_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1237
  p_raw = str(Path(tmpdir) / f"IPLM_RAW_DATA_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1238
- p_detail = str(Path(tmpdir) / f"IPLM_DetailEntitas_IndeksDasar_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1239
  p_verif = str(Path(tmpdir) / f"IPLM_KecukupanSampel_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1240
 
1241
  summary_jenis.to_excel(p_summary, index=False)
@@ -1245,7 +1507,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, df_raw, pop_kab, pop_prov
1245
  verif_total.to_excel(p_verif, index=False)
1246
 
1247
  wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
1248
- analysis_text = generate_llm_analysis(summary_jenis, wilayah_txt, kew_value or "(Semua)")
1249
  word_path = generate_word_report(wilayah_txt, summary_jenis, analysis_text)
1250
 
1251
  msg = (
@@ -1309,7 +1571,7 @@ def on_prov_change(prov_value):
1309
 
1310
  with gr.Blocks() as demo:
1311
  gr.Markdown(f"""
1312
- # IPLM 2025 β€” Final (Target Sampel **33.88%** per Jenis)
1313
  **Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
1314
  - `DATA_FILE` = **{DATA_FILE}**
1315
  - `POP_KAB` = **{POP_KAB}**
@@ -1318,8 +1580,12 @@ with gr.Blocks() as demo:
1318
 
1319
  **TARGET RATIO (per jenis): {TARGET_RATIO*100:.2f}%**
1320
 
1321
- βœ… Dashboard KPI: **HANYA Indeks Dasar & Indeks Final** (Coverage card dihapus).
1322
- βœ… Bell curve: **Indeks Dasar per entitas** + hover **nama perpustakaan** per jenis.
 
 
 
 
1323
  """)
1324
 
1325
  state_df = gr.State(None)
@@ -1346,19 +1612,19 @@ with gr.Blocks() as demo:
1346
  gr.Markdown("## Ringkasan (Jenis + Keseluruhan) β€” Pop/Target33.88/Terkumpul/Coverage + Penyesuaian")
1347
  out_summary = gr.DataFrame(interactive=False)
1348
 
1349
- gr.Markdown("## Agregat Wilayah (Keseluruhan) β€” FIX avg3 (Skor Absolut)")
1350
  out_agg_total = gr.DataFrame(interactive=False)
1351
 
1352
  gr.Markdown("## Agregat Wilayah Γ— Jenis β€” (ditampilkan sampai Indeks_Dasar_Agregat_0_100)")
1353
  out_agg_jenis = gr.DataFrame(interactive=False)
1354
 
1355
- gr.Markdown("## Detail Entitas (Indeks Dasar per Perpustakaan)")
1356
  out_detail = gr.DataFrame(interactive=False)
1357
 
1358
  gr.Markdown("## Kecukupan Sampel 33.88% (tanpa angka koma untuk integer)")
1359
  out_verif = gr.DataFrame(interactive=False)
1360
 
1361
- gr.Markdown("## Bell Curve β€” per Jenis (Indeks Dasar per Entitas + nama perpustakaan)")
1362
  gr.Markdown("### Perpustakaan Umum")
1363
  bell_umum = gr.Plot(scale=1)
1364
 
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ IPLM 2025 β€” Final (Target Sampel 33.88% per Jenis) β€” TANPA Kinerja Relatif / Percentile
4
+
5
+ ───────────────────────────────────────────────────────────────────────────────
6
+ KONSEP / DOKUMENTASI
7
+
8
+ A. Skor ABSOLUT (untuk akuntabilitas)
9
+ ------------------------------------
10
+ 1) Indeks_Dasar_0_100
11
+ - Dihitung pada LEVEL ENTITAS (baris perpustakaan) dari indikator:
12
+ Yeo-Johnson transform (per indikator) β†’ MinMax global (0–1) β†’ sub-indeks β†’ dimensi β†’ indeks.
13
+ - Rumus:
14
+ dim_kepatuhan = mean(sub_koleksi, sub_sdm)
15
+ dim_kinerja = mean(sub_pelayanan, sub_pengelolaan)
16
+ Indeks_Dasar_0_100 = 100 * (W_KEPATUHAN*dim_kepatuhan + W_KINERJA*dim_kinerja)
17
+
18
+ 2) Penyesuaian kecukupan sampel berbasis TARGET 33.88% (per JENIS)
19
+ - TARGET_RATIO = 0.3388
20
+ - Untuk setiap wilayah Γ— jenis:
21
+ pop_total_jenis = populasi perpustakaan jenis tsb (dari tabel POP)
22
+ target_total_33_88_jenis = pop_total_jenis * TARGET_RATIO
23
+ n_jenis = jumlah entitas (baris) terkumpul pada wilayah Γ— jenis
24
+ faktor_penyesuaian_jenis = min(n_jenis / target_total_33_88_jenis, 1.0)
25
+ - Indeks_Final_Agregat_0_100 (wilayahΓ—jenis):
26
+ Indeks_Final_Agregat_0_100 = Indeks_Dasar_Agregat_0_100 * faktor_penyesuaian_jenis
27
+
28
+ 3) AGREGAT WILAYAH (KESELURUHAN) = rata-rata 3 jenis (FIX)
29
+ - Keseluruhan wajib avg3:
30
+ Indeks_Dasar_Agregat_0_100(keseluruhan) = (dasar_sekolah + dasar_umum + dasar_khusus) / 3
31
+ Indeks_Final_Wilayah_0_100(keseluruhan) = (final_sekolah + final_umum + final_khusus) / 3
32
+ - Missing jenis dianggap 0 tetapi tetap dibagi 3 (sesuai requirement).
33
+
34
+ B. UI (Permintaan)
35
+ ------------------
36
+ βœ… Dashboard KPI: hanya 2 kartu (Indeks Final & Indeks Dasar)
37
+ ❌ Tidak ada KPI Coverage di dashboard
38
+ βœ… Bell curve: kembali menampilkan Indeks_Dasar_0_100 per entitas per jenis
39
+ βœ… Hover bell curve menampilkan nama perpustakaan (nm_perpustakaan) per jenis
40
+
41
+ ───────────────────────────────────────────────────────────────────────────────
42
  """
43
 
44
  import os
 
53
  import plotly.graph_objects as go
54
  from sklearn.preprocessing import PowerTransformer
55
 
56
+ # python-docx opsional (di HF Space kadang belum ter-install)
57
  DOCX_AVAILABLE = True
58
  try:
59
  from docx import Document
 
82
  W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
83
  W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
84
 
85
+ # βœ… target sampel 33.88% per jenis
86
  TARGET_RATIO = float(os.getenv("TARGET_RATIO", "0.3388"))
87
 
88
+ # LLM opsional
89
  USE_LLM = True
90
  LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
91
  HF_TOKEN = (
 
135
  t = t.replace("\u00a0", " ").replace("Rp", "").replace("%", "")
136
  t = re.sub(r"[^0-9,.\-]", "", t)
137
 
138
+ # smart decimal
139
  if t.count(".") > 1 and t.count(",") == 1:
140
  t = t.replace(".", "").replace(",", ".")
141
  elif t.count(",") > 1 and t.count(".") == 1:
 
157
  return pd.Series(0.0, index=s.index)
158
  return (x - mn) / (mx - mn)
159
 
 
 
 
 
 
 
 
 
 
 
 
160
  def norm_kew(v):
161
  if pd.isna(v):
162
  return None
 
214
  return float(num) / float(den)
215
 
216
  def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
217
+ """
218
+ faktor = min(n / target, 1.0)
219
+ - Jika target <= 0 β†’ default 1.0 (tidak menghukum)
220
+ """
221
  if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
222
  return 1.0
223
  if n_total is None or pd.isna(n_total) or float(n_total) < 0:
224
  n_total = 0.0
225
  return float(min(float(n_total) / float(target_total), 1.0))
226
 
 
 
 
 
 
 
 
 
 
227
 
228
  # ============================================================
229
  # 3) INDIKATOR IPLM
 
251
  ]
252
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
253
 
254
+ # alias kolom DM β†’ nama baku indikator
255
  alias_map_raw = {
256
  "j_judul_koleksi_tercetak": "JudulTercetak",
257
  "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
 
286
  # 4) PIPELINE NASIONAL (LEVEL ENTITAS)
287
  # ============================================================
288
 
289
+ def _mean_norm_cols(row, cols):
290
+ vals = []
291
+ for c in cols:
292
+ k = f"norm_{c}"
293
+ if k in row.index:
294
+ v = row[k]
295
+ if pd.isna(v):
296
+ v = 0.0
297
+ vals.append(float(v))
298
+ return float(np.mean(vals)) if vals else 0.0
299
+
300
  def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
301
+ """
302
+ Transform + normalisasi indikator pada level entitas:
303
+ - rename kolom indikator (alias)
304
+ - coerce numeric
305
+ - Yeo-Johnson per indikator (standardize=False)
306
+ - MinMax global 0-1
307
+ - hitung sub_*, dim_*, Indeks_Dasar_0_100
308
+ """
309
  if df_src is None or df_src.empty:
310
  return df_src
311
 
312
  df = df_src.copy()
313
 
314
+ # rename indikator
315
  rename_map = {}
316
  for col in df.columns:
317
  c = _canon(col)
 
329
  for c in available:
330
  df[c] = df[c].apply(coerce_num)
331
 
332
+ # YJ per indikator + MinMax global
333
  for c in available:
334
  x = pd.to_numeric(df[c], errors="coerce").astype(float).values
335
  mask = ~np.isnan(x)
 
361
  # 5) CACHE LOADER (NO UPLOAD)
362
  # ============================================================
363
 
364
+ _CACHE = {
365
+ "key": None,
366
+ "df_all": None,
367
+ "df_raw": None,
368
+ "pop_kab": None,
369
+ "pop_prov": None,
370
+ "pop_khusus": None,
371
+ "meta": None,
372
+ "info": None
373
+ }
374
 
375
  def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
376
+ """
377
+ POP_KHUSUS format campuran:
378
+ - Baris 'PROVINSI X' β†’ level PROV
379
+ - Baris berikutnya β†’ KAB/KOTA dibawah prov tsb
380
+ Output standar:
381
+ LEVEL: PROV / KAB
382
+ prov_key / kab_key
383
+ Pop_Total_Jenis
384
+ """
385
  df = pd.read_excel(path_xlsx)
386
  if df is None or df.empty:
387
  return pd.DataFrame()
 
408
  mm = _disp_text(m) or ""
409
  if mm == "":
410
  continue
411
+
412
  if mm.startswith("PROVINSI "):
413
  prov_name = mm.replace("PROVINSI", "").strip()
414
  current_prov = prov_name
415
+ rows.append({
416
+ "LEVEL": "PROV",
417
+ "Provinsi_Label": f"PROVINSI {prov_name}",
418
+ "Kab_Kota_Label": None,
419
+ "Pop_Total_Jenis": pval,
420
+ })
421
  continue
422
+
423
+ rows.append({
424
+ "LEVEL": "KAB",
425
+ "Provinsi_Label": f"PROVINSI {current_prov}" if current_prov else None,
426
+ "Kab_Kota_Label": mm,
427
+ "Pop_Total_Jenis": pval,
428
+ })
429
 
430
  pop = pd.DataFrame(rows)
431
  if pop.empty:
 
437
  return pop
438
 
439
  def load_default_files(force=False):
440
+ """
441
+ Load 4 file:
442
+ - DM (DATA_FILE) multi-sheet β†’ concat
443
+ - POP_KAB, POP_PROV, POP_KHUSUS
444
+ + Standarisasi kolom wilayah & jenis
445
+ + Dedup baris DM
446
+ + prepare_global() (YJ+MinMax+Indeks_Dasar)
447
+ """
448
+ key = (
449
+ DATA_FILE, POP_KAB, POP_PROV, POP_KHUSUS,
450
+ _mtime(DATA_FILE), _mtime(POP_KAB), _mtime(POP_PROV), _mtime(POP_KHUSUS)
451
+ )
452
+
453
  if (not force) and _CACHE["key"] == key and _CACHE["df_all"] is not None:
454
  return _CACHE["df_all"], _CACHE["df_raw"], _CACHE["pop_kab"], _CACHE["pop_prov"], _CACHE["pop_khusus"], _CACHE["meta"], _CACHE["info"]
455
 
 
465
  df_raw = pd.concat(frames, ignore_index=True, sort=False)
466
 
467
  prov_col = pick_col(df_raw, ["provinsi", "Provinsi", "PROVINSI"])
468
+ kab_col = pick_col(df_raw, ["kab/kota", "Kab/Kota", "Kab_Kota", "KAB/KOTA", "kabupaten_kota", "Kabupaten/Kota", "kabupaten kota", "kota", "kab_kota"])
469
  kew_col = pick_col(df_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
470
  jenis_col = pick_col(df_raw, ["jenis_perpustakaan", "Jenis Perpustakaan", "JENIS_PERPUSTAKAAN"])
471
  nama_col = pick_col(df_raw, ["nm_perpustakaan","nama_perpustakaan","Nama Perpustakaan","nm_instansi_lembaga","nm_perpus"])
 
480
  _CACHE.update({"key": key, "df_all": None, "df_raw": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
481
  return None, None, None, None, None, {}, info
482
 
483
+ # mapping jenis β†’ baku (sekolah/umum/khusus)
484
  val_map_jenis = {
485
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
486
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
 
494
  df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
495
  df_raw["kab_key"] = df_raw["KAB_DISP"].apply(norm_kab_label)
496
 
497
+ # Dedup aman berdasarkan (prov,kab,kew,jenis,nama_perpus)
498
  if nama_col and nama_col in df_raw.columns:
499
  kcols = [prov_col, kab_col, kew_col, jenis_col, nama_col]
500
  else:
 
555
  f"πŸ•’ mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}"
556
  )
557
 
558
+ _CACHE.update({
559
+ "key": key,
560
+ "df_all": df_all,
561
+ "df_raw": df_raw,
562
+ "pop_kab": pop_kab,
563
+ "pop_prov": pop_prov,
564
+ "pop_khusus": pop_khusus,
565
+ "meta": meta,
566
+ "info": info
567
+ })
568
  return df_all, df_raw, pop_kab, pop_prov, pop_khusus, meta, info
569
 
570
 
 
572
  # 6) FAKTOR WILAYAH β€” PER JENIS (TARGET 33.88%)
573
  # ============================================================
574
 
575
+ def build_faktor_wilayah_jenis(
576
+ df_filtered: pd.DataFrame,
577
+ pop_kab: pd.DataFrame,
578
+ pop_prov: pd.DataFrame,
579
+ pop_khusus: pd.DataFrame,
580
+ kew_value: str
581
+ ):
582
+ """
583
+ Output tabel:
584
+ group_key + (Kab/Kota atau Provinsi) + Jenis
585
+ n_jenis, pop_total_jenis, target_total_33_88_jenis,
586
+ coverage_jenis_%, faktor_penyesuaian_jenis, gap_target33_88_jenis
587
+ """
588
  if df_filtered is None or df_filtered.empty:
589
  return pd.DataFrame()
590
 
 
596
 
597
  jenis_list = ["sekolah", "umum", "khusus"]
598
 
599
+ # tentukan level berdasarkan kewenangan
600
  if "PROV" in kew_norm:
601
  key_col, label_col, label_name, mode = "prov_key", "PROV_DISP", "Provinsi", "PROV"
602
  base_pop = pop_prov.copy() if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame()
603
  if not base_pop.empty and "prov_key" not in base_pop.columns:
604
+ base_pop["prov_key"] = base_pop["Provinsi_Label"].apply(norm_prov_label) if "Provinsi_Label" in base_pop.columns else base_pop.iloc[:, 0].apply(norm_prov_label)
 
 
 
605
  base_pop = base_pop.set_index("prov_key") if (not base_pop.empty and "prov_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
606
  else:
607
  key_col, label_col, label_name, mode = "kab_key", "KAB_DISP", "Kab/Kota", "KAB"
608
  base_pop = pop_kab.copy() if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame()
609
  if not base_pop.empty and "kab_key" not in base_pop.columns:
610
+ base_pop["kab_key"] = base_pop["Kab_Kota_Label"].apply(norm_kab_label) if "Kab_Kota_Label" in base_pop.columns else base_pop.iloc[:, 0].apply(norm_kab_label)
 
 
 
611
  base_pop = base_pop.set_index("kab_key") if (not base_pop.empty and "kab_key" in base_pop.columns) else pd.DataFrame().set_index(pd.Index([]))
612
 
613
+ # GRID: semua wilayah Γ— 3 jenis (berdasarkan yang muncul di data filter)
614
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
615
+ full = base_keys.assign(_tmp=1).merge(
616
+ pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
617
+ on="_tmp"
618
+ ).drop(columns="_tmp")
619
 
620
+ # count entitas per wilayahΓ—jenis
621
  cnt = (
622
  df.groupby([key_col, label_col, "_dataset"], dropna=False)
623
+ .size()
624
+ .reset_index(name="n_jenis")
625
  .rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
626
  )
627
  cnt["Jenis"] = cnt["Jenis"].astype(str).str.lower().str.strip()
 
632
  base_n["target_total_33_88_jenis"] = 0.0
633
  base_n["pop_total_jenis"] = 0.0
634
 
635
+ # SEKOLAH + UMUM dari POP_KAB/POP_PROV
636
  if not base_pop.empty:
637
  if mode == "KAB":
638
+ pop_sekolah = pd.to_numeric(base_pop.get("jumlah_populasi_sekolah", 0), errors="coerce").fillna(0.0)
639
+ pop_umum = pd.to_numeric(base_pop.get("jumlah_populasi_umum", 0), errors="coerce").fillna(0.0)
640
+
641
  tgt_sekolah = pop_sekolah * float(TARGET_RATIO)
642
  tgt_umum = pop_umum * float(TARGET_RATIO)
643
  else:
644
+ # PROV: sekolah = sma + smk + slb (sesuai pola file Anda)
645
+ sma = pd.to_numeric(base_pop.get("sma ", base_pop.get("sma", 0)), errors="coerce").fillna(0.0)
646
+ smk = pd.to_numeric(base_pop.get("smk", 0), errors="coerce").fillna(0.0)
647
+ slb = pd.to_numeric(base_pop.get("slb", 0), errors="coerce").fillna(0.0)
648
+
649
+ pop_sekolah = sma + smk + slb
650
  tgt_sekolah = pop_sekolah * float(TARGET_RATIO)
651
+
652
+ pop_umum = pd.to_numeric(base_pop.get("perpus_umum_prop", 0), errors="coerce").fillna(0.0)
653
+ tgt_umum = pop_umum * float(TARGET_RATIO)
654
 
655
  m = base_n["Jenis"].eq("sekolah")
656
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_sekolah).fillna(0.0).values
 
660
  base_n.loc[m, "pop_total_jenis"] = base_n.loc[m, "group_key"].map(pop_umum).fillna(0.0).values
661
  base_n.loc[m, "target_total_33_88_jenis"] = base_n.loc[m, "group_key"].map(tgt_umum).fillna(0.0).values
662
 
663
+ # KHUSUS dari POP_KHUSUS
664
  if pop_khusus is not None and not pop_khusus.empty:
665
  pk = pop_khusus.copy()
666
  pk["Pop_Total_Jenis"] = pd.to_numeric(pk.get("Pop_Total_Jenis", 0), errors="coerce").fillna(0.0)
 
683
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0.0)
684
  base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0.0)
685
 
686
+ # fallback pop jika 0 tapi target ada
687
  m_need_pop = (base_n["pop_total_jenis"] <= 0) & (base_n["target_total_33_88_jenis"] > 0)
688
  base_n.loc[m_need_pop, "pop_total_jenis"] = base_n.loc[m_need_pop, "target_total_33_88_jenis"] / float(TARGET_RATIO)
689
 
690
+ # faktor penyesuaian
691
  base_n["faktor_penyesuaian_jenis"] = [
692
  faktor_penyesuaian_total(n, t)
693
  for n, t in zip(
 
712
  )
713
  ]
714
 
715
+ # display formatting
716
  base_n["target_total_33_88_jenis"] = pd.to_numeric(base_n["target_total_33_88_jenis"], errors="coerce").fillna(0).round(0).astype(int)
717
  base_n["pop_total_jenis"] = pd.to_numeric(base_n["pop_total_jenis"], errors="coerce").fillna(0).round(0).astype(int)
718
  base_n["coverage_jenis_%"] = pd.to_numeric(base_n["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
 
727
  # ============================================================
728
 
729
  def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
730
+ """
731
+ Agregasi:
732
+ wilayah Γ— jenis:
733
+ - Jumlah (n entitas)
734
+ - rata-rata sub/dim
735
+ - Indeks_Dasar_Agregat_0_100 = mean(Indeks_Dasar_0_100)
736
+ - Indeks_Final_Agregat_0_100 = Indeks_Dasar_Agregat_0_100 * faktor_penyesuaian_jenis
737
+ """
738
  if df_filtered is None or df_filtered.empty:
739
  return pd.DataFrame()
740
 
 
752
 
753
  jenis_list = ["sekolah", "umum", "khusus"]
754
 
755
+ # GRID semua wilayah Γ— 3 jenis
756
  base_keys = df[[key_col, label_col]].drop_duplicates().rename(columns={key_col: "group_key", label_col: label_name})
757
+ full = base_keys.assign(_tmp=1).merge(
758
+ pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
759
+ on="_tmp"
760
+ ).drop(columns="_tmp")
761
 
762
+ # agregat real
763
  agg_real = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
764
  Jumlah=("Indeks_Dasar_0_100", "size"),
765
  Rata2_sub_koleksi=("sub_koleksi", "mean"),
 
781
 
782
  agg["Jumlah"] = agg["Jumlah"].round(0).astype(int)
783
 
784
+ # merge faktor jenis
785
  if faktor_wilayah_jenis is None or faktor_wilayah_jenis.empty:
786
  agg["faktor_penyesuaian_jenis"] = 1.0
787
  agg["target_total_33_88_jenis"] = 0
788
  agg["pop_total_jenis"] = 0
789
  agg["coverage_jenis_%"] = 0.0
790
  agg["gap_target33_88_jenis"] = 0
791
+ agg["n_jenis"] = 0
792
  else:
793
  fw = faktor_wilayah_jenis.copy()
794
  fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
795
+
796
  keep = ["group_key", label_name, "Jenis",
797
  "faktor_penyesuaian_jenis", "target_total_33_88_jenis", "pop_total_jenis",
798
  "coverage_jenis_%", "gap_target33_88_jenis", "n_jenis"]
 
804
  for c in ["target_total_33_88_jenis","pop_total_jenis","gap_target33_88_jenis","n_jenis"]:
805
  if c in agg.columns:
806
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0).round(0).astype(int)
807
+
808
  if "coverage_jenis_%" in agg.columns:
809
  agg["coverage_jenis_%"] = pd.to_numeric(agg["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
810
 
811
+ # Indeks FINAL per jenis
812
  agg["Indeks_Final_Agregat_0_100"] = (
813
  pd.to_numeric(agg["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0.0)
814
  * pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0)
815
  )
816
 
817
+ # rounding
818
  for c in [
819
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
820
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
821
  ]:
822
  if c in agg.columns:
823
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(3)
824
+
825
  for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Agregat_0_100"]:
826
  if c in agg.columns:
827
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).round(2)
828
+
829
  agg["faktor_penyesuaian_jenis"] = pd.to_numeric(agg["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
830
  return agg
831
 
 
835
  # ============================================================
836
 
837
  def build_agg_wilayah_total_from_jenis(agg_jenis: pd.DataFrame, faktor_wilayah_jenis: pd.DataFrame, kew_value: str):
838
+ """
839
+ Membentuk tabel wilayah keseluruhan dari agg_jenis, dengan FIX avg3:
840
+ Indeks_Dasar_Agregat_0_100 (keseluruhan) = mean(dasar_3jenis) [missing=0, tetap /3]
841
+ Indeks_Final_Wilayah_0_100 (keseluruhan) = mean(final_3jenis) [missing=0, tetap /3]
842
+ """
843
  if agg_jenis is None or agg_jenis.empty:
844
  return pd.DataFrame()
845
 
 
851
  a["Jenis"] = a["Jenis"].astype(str).str.lower().str.strip()
852
 
853
  base_keys = a[["group_key", label_name]].drop_duplicates()
854
+ full = base_keys.assign(_tmp=1).merge(
855
+ pd.DataFrame({"Jenis": jenis_list, "_tmp": 1}),
856
+ on="_tmp"
857
+ ).drop(columns="_tmp")
858
 
859
  cols_need = [
860
  "Jumlah",
 
865
  ]
866
  cols_present = [c for c in cols_need if c in a.columns]
867
 
868
+ full = full.merge(
869
+ a[["group_key", label_name, "Jenis"] + cols_present],
870
+ on=["group_key", label_name, "Jenis"],
871
+ how="left"
872
+ )
873
+
874
  for c in cols_present:
875
  full[c] = pd.to_numeric(full[c], errors="coerce").fillna(0.0)
876
 
 
886
  Indeks_Final_Wilayah_0_100=("Indeks_Final_Agregat_0_100", "mean"),
887
  )
888
 
889
+ # Tempel info Pop/Target/N per jenis + total (tetap ada untuk verif/ekspor, meski dashboard coverage dihapus)
890
+ if faktor_wilayah_jenis is not None and not faktor_wilayah_jenis.empty:
891
+ fw = faktor_wilayah_jenis.copy()
892
+ fw["Jenis"] = fw["Jenis"].astype(str).str.lower().str.strip()
893
+
894
+ piv = fw.pivot_table(
895
+ index=["group_key", label_name],
896
+ columns="Jenis",
897
+ values=["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis", "faktor_penyesuaian_jenis"],
898
+ aggfunc="first"
899
+ )
900
+ piv.columns = [f"{v}_{k}" for v, k in piv.columns]
901
+ piv = piv.reset_index()
902
+ out = out.merge(piv, on=["group_key", label_name], how="left")
903
+
904
+ for j in ["sekolah", "umum", "khusus"]:
905
+ for basecol in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
906
+ c = f"{basecol}_{j}"
907
+ if c in out.columns:
908
+ out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
909
+ cfac = f"faktor_penyesuaian_jenis_{j}"
910
+ if cfac in out.columns:
911
+ out[cfac] = pd.to_numeric(out[cfac], errors="coerce").fillna(1.0).round(3)
912
+
913
+ out["pop_total_all"] = (
914
+ out.get("pop_total_jenis_sekolah", 0)
915
+ + out.get("pop_total_jenis_umum", 0)
916
+ + out.get("pop_total_jenis_khusus", 0)
917
+ ).astype(int)
918
+
919
+ out["target_total_33_88_all"] = (
920
+ out.get("target_total_33_88_jenis_sekolah", 0)
921
+ + out.get("target_total_33_88_jenis_umum", 0)
922
+ + out.get("target_total_33_88_jenis_khusus", 0)
923
+ ).astype(int)
924
+
925
+ out["terkumpul_all"] = (
926
+ out.get("n_jenis_sekolah", 0)
927
+ + out.get("n_jenis_umum", 0)
928
+ + out.get("n_jenis_khusus", 0)
929
+ ).astype(int)
930
+
931
+ out["coverage_target33_88_all_%"] = np.where(
932
+ pd.to_numeric(out["target_total_33_88_all"], errors="coerce").fillna(0).values > 0,
933
+ (pd.to_numeric(out["terkumpul_all"], errors="coerce").fillna(0).values / pd.to_numeric(out["target_total_33_88_all"], errors="coerce").fillna(0).values) * 100.0,
934
+ 0.0
935
+ )
936
+ out["coverage_target33_88_all_%"] = pd.to_numeric(out["coverage_target33_88_all_%"], errors="coerce").fillna(0.0).round(2)
937
+
938
  for c in [
939
  "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
940
  "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
941
  ]:
942
  if c in out.columns:
943
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
944
+
945
  for c in ["Indeks_Dasar_Agregat_0_100","Indeks_Final_Wilayah_0_100"]:
946
  if c in out.columns:
947
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
 
965
  "Pop_Total_Jenis": 0,
966
  "Target33_88_Total_Jenis": 0,
967
  "Terkumpul_Jenis": 0,
968
+ "Coverage_Target33_88_Jenis_%": 0.0,
969
  "Indeks_Dasar_0_100": 0.0,
970
  "Indeks_Final_Disesuaikan_0_100": 0.0,
971
  "Penyesuaian_Poin": 0.0,
 
1056
  for c in ["Jumlah_Wilayah","Total_Perpus","Pop_Total_Jenis","Target33_88_Total_Jenis","Terkumpul_Jenis"]:
1057
  if c in out.columns:
1058
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
1059
+
1060
  for c in ["Coverage_Target33_88_Jenis_%","Indeks_Dasar_0_100","Indeks_Final_Disesuaikan_0_100","Penyesuaian_Poin"]:
1061
  if c in out.columns:
1062
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
1063
+
1064
  return out
1065
 
1066
 
1067
  # ============================================================
1068
+ # 10) DETAIL ENTITAS: Final menempel dari agg_total (wilayah)
1069
  # ============================================================
1070
 
1071
+ def attach_final_to_detail(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, meta: dict, kew_value: str):
1072
  if df_filtered is None or df_filtered.empty:
1073
  return pd.DataFrame()
1074
 
1075
+ kew_norm = str(kew_value or "").upper()
1076
  df = df_filtered.copy()
1077
+
1078
+ if "PROV" in kew_norm:
1079
+ key_col = "prov_key"
1080
+ label_cols = ("PROV_DISP", "KAB_DISP")
1081
+ else:
1082
+ key_col = "kab_key"
1083
+ label_cols = ("PROV_DISP", "KAB_DISP")
1084
+
1085
+ if agg_total is None or agg_total.empty:
1086
+ df["Indeks_Final_0_100"] = df["Indeks_Dasar_0_100"]
1087
+ else:
1088
+ m = agg_total[["group_key", "Indeks_Final_Wilayah_0_100"]].copy()
1089
+ df = df.merge(m, left_on=key_col, right_on="group_key", how="left")
1090
+ df["Indeks_Final_0_100"] = df["Indeks_Final_Wilayah_0_100"].fillna(df["Indeks_Dasar_0_100"])
1091
+ df = df.drop(columns=[c for c in ["group_key","Indeks_Final_Wilayah_0_100"] if c in df.columns])
1092
+
1093
+ base_cols = [label_cols[0], label_cols[1], "KEW_NORM", "_dataset"]
1094
  if meta.get("nama_col") and meta["nama_col"] in df.columns:
1095
  df["nm_perpustakaan"] = df[meta["nama_col"]].astype(str)
1096
+ base_cols.insert(2, "nm_perpustakaan")
 
1097
 
1098
+ keep = base_cols + [
 
1099
  "sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
1100
  "dim_kepatuhan","dim_kinerja",
1101
  "Indeks_Dasar_0_100",
1102
+ "Indeks_Final_0_100",
1103
  ]
1104
  keep = [c for c in keep if c in df.columns]
1105
 
1106
  out = df[keep].copy()
1107
+ out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"})
1108
 
1109
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
1110
  if c in out.columns:
1111
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(3)
1112
+ for c in ["Indeks_Dasar_0_100","Indeks_Final_0_100"]:
1113
+ if c in out.columns:
1114
+ out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0).round(2)
1115
 
1116
  return out
1117
 
 
1139
  for c in ["pop_total_jenis", "target_total_33_88_jenis", "n_jenis", "gap_target33_88_jenis"]:
1140
  if c in out.columns:
1141
  out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
1142
+
1143
  if "coverage_jenis_%" in out.columns:
1144
  out["coverage_jenis_%"] = pd.to_numeric(out["coverage_jenis_%"], errors="coerce").fillna(0.0).round(2)
1145
+
1146
  if "faktor_penyesuaian_jenis" in out.columns:
1147
  out["faktor_penyesuaian_jenis"] = pd.to_numeric(out["faktor_penyesuaian_jenis"], errors="coerce").fillna(1.0).round(3)
1148
 
 
1150
 
1151
 
1152
  # ============================================================
1153
+ # 12) BELL CURVE β€” Indeks Dasar per Entitas (per Jenis) + Hover Nama Perpus
1154
  # ============================================================
1155
 
1156
+ def _make_bell_curve_entitas(
1157
+ dfp: pd.DataFrame,
1158
+ title: str,
1159
+ xcol: str = "Indeks_Dasar_0_100",
1160
+ label_col: str = "nm_perpustakaan",
1161
+ hover_cols: list | None = None,
1162
+ min_points: int = 2
1163
+ ):
1164
  fig = go.Figure()
1165
  fig.update_layout(
1166
  title=title,
1167
+ xaxis_title="Skor (0–100)",
1168
  yaxis_title="Kepadatan",
1169
  hovermode="closest",
1170
  margin=dict(l=40, r=20, t=60, b=40),
1171
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
1172
  )
1173
 
1174
+ if dfp is None or dfp.empty or xcol not in dfp.columns:
 
 
 
1175
  fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
1176
+ fig.update_xaxes(range=[0, 100])
1177
+ fig.update_yaxes(rangemode="tozero")
1178
  return fig
1179
 
1180
+ d = dfp.dropna(subset=[xcol]).copy()
1181
+ if d.empty:
 
 
1182
  fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
1183
+ fig.update_xaxes(range=[0, 100])
1184
+ fig.update_yaxes(rangemode="tozero")
1185
  return fig
1186
 
1187
+ x = pd.to_numeric(d[xcol], errors="coerce").astype(float)
1188
+ d = d.loc[x.notna()].copy()
1189
+ x = x.loc[x.notna()].values
1190
+ if len(x) < 1:
 
1191
  fig.add_annotation(text="Tidak ada data untuk ditampilkan.", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
1192
+ fig.update_xaxes(range=[0, 100])
1193
+ fig.update_yaxes(rangemode="tozero")
1194
  return fig
1195
 
1196
+ hover_cols = hover_cols or []
1197
+ def _val(row, col):
1198
+ if col not in row.index:
1199
+ return ""
1200
+ v = row[col]
1201
+ return "" if pd.isna(v) else str(v)
1202
+
1203
+ hover_text = []
1204
+ for _, row in d.iterrows():
1205
+ lines = []
1206
+ nm = _val(row, label_col) if (label_col and label_col in d.columns) else ""
1207
+ if nm:
1208
+ lines.append(f"<b>{nm}</b>")
1209
+ lines.append(f"{xcol}: {float(pd.to_numeric(row[xcol], errors='coerce')):.2f}")
1210
+ for hc in hover_cols:
1211
+ vv = _val(row, hc)
1212
+ if vv:
1213
+ lines.append(f"{hc}: {vv}")
1214
+ hover_text.append("<br>".join(lines))
1215
+
1216
+ if len(x) < min_points:
1217
+ x_single = float(x[0])
1218
+ fig.add_trace(go.Scatter(
1219
+ x=[x_single], y=[0],
1220
+ mode="markers", showlegend=False,
1221
+ hovertext=[hover_text[0]] if hover_text else None,
1222
+ hoverinfo="text"
1223
+ ))
1224
+ fig.add_vline(x=x_single, line_width=1, line_dash="dash", annotation_text=f"Nilai: {x_single:.1f}", annotation_position="top")
1225
+ fig.update_xaxes(range=[0, 100])
1226
+ fig.update_yaxes(rangemode="tozero")
1227
+ return fig
1228
+
1229
+ # fit normal curve (untuk visual)
1230
  mu = float(np.mean(x))
1231
  sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 1.0
1232
  sigma = max(sigma, 1e-3)
 
1236
  xs = np.linspace(xmin, xmax, 250)
1237
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
1238
 
 
1239
  fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Kurva Normal (fit)"))
 
 
 
 
 
 
 
 
 
 
1240
  fig.add_trace(go.Scatter(
1241
+ x=x, y=np.zeros_like(x),
1242
+ mode="markers", showlegend=False,
1243
+ hovertext=hover_text if hover_text else None,
1244
+ hoverinfo="text"
 
 
 
1245
  ))
1246
 
 
1247
  q1, q2, q3 = np.percentile(x, [25, 50, 75])
1248
  for xv, lab in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3"), (mu, "Mean")]:
1249
  fig.add_vline(x=float(xv), line_width=1, line_dash="dash", annotation_text=f"{lab}: {xv:.1f}", annotation_position="top")
1250
 
1251
+ fig.update_xaxes(range=[0, 100])
1252
+ fig.update_yaxes(rangemode="tozero")
1253
  return fig
1254
 
1255
 
1256
  # ============================================================
1257
+ # 13) KPI DASHBOARD (HANYA 2 KARTU: FINAL + DASAR)
1258
  # ============================================================
1259
 
1260
  def _safe_first(df, col, default=0.0, where=None):
1261
  if df is None or df.empty or col not in df.columns:
1262
  return default
1263
+ sub = df
1264
+ if where is not None:
1265
+ sub = df.loc[where]
1266
  if sub is None or sub.empty:
1267
  return default
1268
  return float(pd.to_numeric(sub[col], errors="coerce").fillna(default).iloc[0])
 
1281
  def fmt(x, nd=2):
1282
  return "NA" if pd.isna(x) else f"{x:.{nd}f}"
1283
 
 
1284
  return f"""
1285
  <div style="display:flex; gap:12px; flex-wrap:wrap;">
1286
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:260px;">
1287
+ <div style="opacity:0.8;">Indeks IPLM FINAL (Disesuaikan 33.88%)</div>
1288
  <div style="font-size:26px; font-weight:700;">{fmt(k["final_all"],2)}</div>
1289
  <div style="opacity:0.7;">Skor absolut (untuk akuntabilitas)</div>
1290
  </div>
 
1318
  _HF_CLIENT = None
1319
  return None
1320
 
1321
+ def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
1322
  client = get_llm_client()
1323
  if client is None or (not USE_LLM):
1324
  return "Analisis otomatis (LLM) tidak digunakan / tidak tersedia."
 
1327
  resp = client.chat_completion(
1328
  model=LLM_MODEL_NAME,
1329
  messages=[
1330
+ {"role":"system","content":"Anda adalah analis kebijakan perpustakaan di Indonesia. Tulis analisis ringkas berbasis data (tanpa percentile/benchmarking)."},
1331
+ {"role":"user","content":f"{ctx}\nBuat analisis 3 paragraf: (1) indeks dasar, (2) penyesuaian 33.88% dan implikasinya, (3) rekomendasi singkat."}
1332
  ],
1333
+ max_tokens=500,
1334
  temperature=0.25,
1335
  top_p=0.9,
1336
  )
 
1345
  doc = Document()
1346
  doc.add_heading(f"Laporan IPLM β€” {wilayah}", level=1)
1347
  doc.add_paragraph(f"Target sampel per jenis: {TARGET_RATIO*100:.2f}%")
 
1348
  doc.add_heading("Ringkasan (Jenis + Keseluruhan)", level=2)
1349
  if summary_jenis is not None and not summary_jenis.empty:
1350
  show = summary_jenis.copy()
 
1363
  cells[i].text = str(int(v))
1364
  else:
1365
  cells[i].text = str(v)
 
1366
  doc.add_heading("Analisis (opsional)", level=2)
1367
  for p in (analysis_text or "").split("\n"):
1368
  if p.strip():
1369
  doc.add_paragraph(p.strip())
 
1370
  outpath = tempfile.mktemp(suffix=".docx")
1371
  doc.save(outpath)
1372
  return outpath
 
1392
  if df_all is None or df_all.empty or df_raw is None or df_raw.empty:
1393
  return _empty_outputs("⚠️ Data belum ter-load. Pastikan file tersedia di repo/server.")
1394
 
1395
+ # =========================================================
1396
+ # 1) FILTER df_all (entitas) sesuai dropdown
1397
+ # =========================================================
1398
  df = df_all.copy()
1399
  if prov_value and prov_value != "(Semua)":
1400
  df = df[df["PROV_DISP"] == prov_value]
 
1406
  if df.empty:
1407
  return _empty_outputs("Tidak ada data untuk filter ini.")
1408
 
1409
+ # =========================================================
1410
+ # 2) PIPELINE FILTER β†’ faktor β†’ agg_jenis β†’ agg_total
1411
+ # =========================================================
1412
  kew_norm = kew_value if (kew_value and kew_value != "(Semua)") else "(Semua)"
 
1413
  faktor_wilayah_jenis = build_faktor_wilayah_jenis(df, pop_kab, pop_prov, pop_khusus, kew_norm)
1414
  agg_jenis_full = build_agg_wilayah_jenis(df, faktor_wilayah_jenis, kew_norm)
1415
  agg_total = build_agg_wilayah_total_from_jenis(agg_jenis_full, faktor_wilayah_jenis, kew_norm)
1416
 
1417
+ # =========================================================
1418
+ # 3) OUTPUT TABLES
1419
+ # =========================================================
1420
  summary_jenis = build_summary_per_jenis(agg_jenis_full, agg_total)
1421
  verif_total = build_verif_jenis(faktor_wilayah_jenis, kew_norm)
1422
+ detail_view = attach_final_to_detail(df, agg_total, meta, kew_norm)
1423
 
1424
+ # =========================================================
1425
+ # 4) agg_jenis view (UI hanya sampai indeks dasar)
1426
+ # =========================================================
 
1427
  if agg_jenis_full is None or agg_jenis_full.empty:
1428
  agg_jenis_view = agg_jenis_full
1429
  else:
 
1441
  cols_upto = [c for c in cols_upto if c in agg_jenis_full.columns]
1442
  agg_jenis_view = agg_jenis_full[cols_upto].copy()
1443
 
1444
+ # =========================================================
1445
+ # 5) FILTER RAW DOWNLOAD (harus raw hasil filter)
1446
+ # =========================================================
1447
  raw = df_raw.copy()
1448
  if prov_value and prov_value != "(Semua)":
1449
  raw = raw[raw["PROV_DISP"] == prov_value]
 
1452
  if kew_value and kew_value != "(Semua)":
1453
  raw = raw[raw["KEW_NORM"] == kew_value]
1454
 
1455
+ # =========================================================
1456
+ # 6) Bell curve β€” kembali ke Indeks_Dasar_0_100 per entitas per jenis
1457
+ # + hover nama perpustakaan
1458
+ # =========================================================
1459
+ if detail_view is None or detail_view.empty:
1460
+ fig_umum = _make_bell_curve_entitas(pd.DataFrame(), "Bell Curve β€” Jenis: Umum")
1461
+ fig_sekolah = _make_bell_curve_entitas(pd.DataFrame(), "Bell Curve β€” Jenis: Sekolah")
1462
+ fig_khusus = _make_bell_curve_entitas(pd.DataFrame(), "Bell Curve β€” Jenis: Khusus")
1463
+ else:
1464
+ hover_cols = []
1465
+ for hc in ["Provinsi", "Kab/Kota", "Jenis"]:
1466
+ if hc in detail_view.columns:
1467
+ hover_cols.append(hc)
1468
+
1469
+ def _fig(j):
1470
+ d = detail_view[detail_view["Jenis"].astype(str).str.lower() == j].copy()
1471
+ return _make_bell_curve_entitas(
1472
+ d,
1473
+ title=f"Bell Curve β€” Jenis: {j.title()} (Skor: Indeks_Dasar_0_100)",
1474
+ xcol="Indeks_Dasar_0_100",
1475
+ label_col=("nm_perpustakaan" if "nm_perpustakaan" in d.columns else "nm_perpustakaan"),
1476
+ hover_cols=hover_cols,
1477
+ min_points=2
1478
+ )
1479
+
1480
+ fig_sekolah = _fig("sekolah")
1481
+ fig_umum = _fig("umum")
1482
+ fig_khusus = _fig("khusus")
1483
+
1484
+ # =========================================================
1485
+ # 7) KPI (HANYA FINAL + DASAR)
1486
+ # =========================================================
1487
  kpi_md = build_kpi_markdown(summary_jenis)
1488
 
1489
+ # =========================================================
1490
+ # 8) Export (xlsx + opsional docx)
1491
+ # =========================================================
1492
  tmpdir = tempfile.mkdtemp()
1493
  prov_slug = (_canon(prov_value or "SEMUA").upper() or "SEMUA")
1494
  kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
 
1497
  p_summary = str(Path(tmpdir) / f"IPLM_RingkasanJenisKeseluruhan_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1498
  p_total = str(Path(tmpdir) / f"IPLM_AgregatWilayah_Keseluruhan_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1499
  p_raw = str(Path(tmpdir) / f"IPLM_RAW_DATA_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1500
+ p_detail = str(Path(tmpdir) / f"IPLM_DetailEntitas_FinalMenempelWilayah_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1501
  p_verif = str(Path(tmpdir) / f"IPLM_KecukupanSampel_33_88_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1502
 
1503
  summary_jenis.to_excel(p_summary, index=False)
 
1507
  verif_total.to_excel(p_verif, index=False)
1508
 
1509
  wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
1510
+ analysis_text = generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah_txt, kew_value or "(Semua)")
1511
  word_path = generate_word_report(wilayah_txt, summary_jenis, analysis_text)
1512
 
1513
  msg = (
 
1571
 
1572
  with gr.Blocks() as demo:
1573
  gr.Markdown(f"""
1574
+ # IPLM 2025 β€” Final (Target Sampel **33.88%** per Jenis) β€” TANPA Kinerja Relatif / Percentile
1575
  **Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
1576
  - `DATA_FILE` = **{DATA_FILE}**
1577
  - `POP_KAB` = **{POP_KAB}**
 
1580
 
1581
  **TARGET RATIO (per jenis): {TARGET_RATIO*100:.2f}%**
1582
 
1583
+ βœ… Dashboard KPI hanya menampilkan:
1584
+ - Indeks IPLM FINAL (disesuaikan 33.88%)
1585
+ - Indeks Dasar (tanpa penyesuaian)
1586
+
1587
+ βœ… Bell Curve kembali menampilkan:
1588
+ - Indeks_Dasar_0_100 per entitas (per jenis), hover menampilkan nama perpustakaan.
1589
  """)
1590
 
1591
  state_df = gr.State(None)
 
1612
  gr.Markdown("## Ringkasan (Jenis + Keseluruhan) β€” Pop/Target33.88/Terkumpul/Coverage + Penyesuaian")
1613
  out_summary = gr.DataFrame(interactive=False)
1614
 
1615
+ gr.Markdown("## Agregat Wilayah (Keseluruhan) β€” FIX avg3")
1616
  out_agg_total = gr.DataFrame(interactive=False)
1617
 
1618
  gr.Markdown("## Agregat Wilayah Γ— Jenis β€” (ditampilkan sampai Indeks_Dasar_Agregat_0_100)")
1619
  out_agg_jenis = gr.DataFrame(interactive=False)
1620
 
1621
+ gr.Markdown("## Detail Entitas (Final menempel dari wilayah)")
1622
  out_detail = gr.DataFrame(interactive=False)
1623
 
1624
  gr.Markdown("## Kecukupan Sampel 33.88% (tanpa angka koma untuk integer)")
1625
  out_verif = gr.DataFrame(interactive=False)
1626
 
1627
+ gr.Markdown("## Bell Curve β€” Indeks Dasar per Entitas (per Jenis) + Nama Perpustakaan")
1628
  gr.Markdown("### Perpustakaan Umum")
1629
  bell_umum = gr.Plot(scale=1)
1630