irhamni commited on
Commit
dfcd67d
Β·
verified Β·
1 Parent(s): f3ee6d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -131
app.py CHANGED
@@ -1,19 +1,37 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
  app.py β€” IPLM 2025 (STABLE, COPY-PASTE, HF Spaces)
4
- βœ… IPLM Real: Yeo-Johnson per indikator + MinMax nasional (sekali)
5
- βœ… FINAL: Indeks_Final_0_100 = Indeks_Real_0_100 Γ— SamplingFactor_Total (Target 68%)
6
- βœ… Dropdown prov/kab/kew jalan & label rapi (tidak jadi PROVINSIACEH)
7
- βœ… Dedup provinsi by key (hilang dualisme)
8
- βœ… Output lengkap:
9
- 1) Indeks Agregat (FINAL)
10
- 2) Agregat (FINAL) per Jenis
11
- 3) Detail (FINAL) per Unit
12
- 4) Agregat (RealScore) per Jenis (Subindeks & Dimensi)
13
- 5) Detail (RealScore) per Unit (Subindeks & Dimensi + indikator raw)
14
- 6) Coverage Populasi vs Sampel (Target 68%) + BAR chart
15
- 7) Bell curve per Jenis (RealScore) β€” seperti contoh kamu
16
- 8) Analisis LLM (opsional) + Word report (tabel+grafik embedded)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
 
19
  import os
@@ -32,7 +50,7 @@ import plotly.express as px
32
  from sklearn.preprocessing import PowerTransformer
33
 
34
  # =========================
35
- # 0) FILES (SESUIKAN)
36
  # =========================
37
  DATA_FILE = "IPLM_clean_manual_131225.xlsx"
38
  META_KAB_FILE = "Data_populasi_Kab_kota.xlsx"
@@ -44,8 +62,9 @@ TARGET_FRAC = 0.68
44
  W_KEPATUHAN = 0.30
45
  W_KINERJA = 0.70
46
 
 
47
  # =========================
48
- # 1) UTIL: sanitasi kolom & teks
49
  # =========================
50
  def make_unique_columns(cols):
51
  """Hindari kolom duplikat agar df['X'] tidak menjadi DataFrame."""
@@ -69,59 +88,65 @@ def clean_spaces(s: str) -> str:
69
 
70
  def pretty_admin_name(s: str, kind: str = "prov") -> str:
71
  """
72
- Buat label dropdown rapi tapi tetap manusiawi:
73
  - PROVINSI JAWA BARAT
74
  - KOTA SURABAYA / KAB. BANDUNG
 
75
  """
76
  t = clean_spaces(str(s)).upper()
77
- # rapikan beberapa variasi umum
78
  t = t.replace("PROPINSI", "PROVINSI")
79
  t = re.sub(r"\bKABUPATEN\b", "KAB.", t)
80
- t = re.sub(r"\bKOTA\s+ADM\.\b", "KOTA ADM.", t)
 
 
 
 
 
81
 
82
  if kind == "prov":
 
83
  if not t.startswith("PROVINSI "):
84
- # beberapa data sudah "DKI JAKARTA" tanpa prefiks
85
  t = "PROVINSI " + t
86
  return t
87
 
88
  def norm_key(x) -> str:
89
  """
90
- Key join prov/kab yang STABIL & KONSISTEN
91
- Tujuan:
92
- - Menghilangkan dualisme penamaan
93
- - Menyamakan Kepulauan Seribu
94
- - Aman untuk join DM ↔ meta populasi
95
  """
96
  if pd.isna(x):
97
  return ""
98
-
99
  t = clean_spaces(str(x)).upper()
100
 
101
- # =========================
102
- # NORMALISASI UMUM
103
- # =========================
104
  t = t.replace("PROPINSI", "PROVINSI")
105
- t = t.replace("KABUPATEN", "KAB.")
106
- t = t.replace("KOTA ADMINISTRASI", "KOTA ADM.")
107
- t = t.replace("KABUPATEN ADMINISTRASI", "KAB. ADM.")
108
  t = t.replace("ADMINISTRASI", "ADM.")
109
-
110
- # variasi KEPULAUAN
111
  t = t.replace("KEP.", "KEPULAUAN")
112
- t = t.replace("KEP ", "KEPULAUAN ")
113
 
114
- # =========================
115
- # KHUSUS: KEPULAUAN SERIBU
116
- # =========================
117
  if "SERIBU" in t:
118
  t = "KAB. ADM. KEPULAUAN SERIBU"
119
 
120
- # =========================
121
- # FINAL KEY (JOIN ONLY)
122
- # =========================
123
  return re.sub(r"[^A-Z0-9]", "", t)
124
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  # =========================
127
  # 2) NUM COERCION (AMAN)
@@ -183,17 +208,6 @@ def sampling_factor(sample, target):
183
  except Exception:
184
  return 1.0
185
 
186
- def norm_kew(v):
187
- if pd.isna(v):
188
- return ""
189
- t = clean_spaces(v).upper()
190
- if any(x in t for x in ["KAB", "KOTA", "KABUPATEN", "KAB/KOTA"]):
191
- return "KAB/KOTA"
192
- if any(x in t for x in ["PROV", "PROP", "PROVINSI", "PROPINSI"]):
193
- return "PROVINSI"
194
- if "PUSAT" in t or "NASIONAL" in t:
195
- return "PUSAT"
196
- return t
197
 
198
  # =========================
199
  # 3) LOAD MULTISHEET DM
@@ -211,6 +225,7 @@ def load_multisheet_excel(path: str) -> tuple[pd.DataFrame, list]:
211
  out = pd.concat(frames, ignore_index=True, sort=False)
212
  return out, list(xls.sheet_names)
213
 
 
214
  # =========================
215
  # 4) AUTO DETECT COLUMNS (DM & META)
216
  # =========================
@@ -237,44 +252,42 @@ def detect_dm_cols(df: pd.DataFrame) -> dict:
237
  subjenis = pick_col(df, ["sub_jenis_perpus", "subjenis", "sub_jenis", "sub jenis", "sub jenis perpus"])
238
  nama = pick_col(df, ["nm_perpustakaan", "nama_perpustakaan", "nama perpus", "nama"])
239
 
240
- # wajib minimal
241
- missing = [k for k,v in {
242
- "prov":prov, "kab":kab, "kew":kew, "jenis":jenis, "nama":nama
243
- }.items() if v is None]
244
  if missing:
245
  raise KeyError(f"Kolom DM wajib tidak ketemu: {missing}. Cek header Excel DM kamu.")
246
-
247
  return {"prov":prov, "kab":kab, "kew":kew, "jenis":jenis, "subjenis":subjenis, "nama":nama}
248
 
249
  def detect_meta_kab(df: pd.DataFrame) -> dict:
250
  prov = pick_col(df, ["PROVINSI", "provinsi", "Provinsi"])
251
- kab = pick_col(df, ["KABUPATEN_KOTA", "kabupaten_kota", "KAB/KOTA", "kab/kota", "Kab/Kota"])
252
- # Pop sekolah (SD+SMP) bisa beda nama
253
- pop_sd_smp = pick_col(df, ["TOTAL_SD_SMP", "total_sd_smp", "JUMLAH_SD_SMP", "SD_SMP", "TOTAL_SDSMP"])
254
- # Pop umum (Kec+Desa/Kel) bisa disusun dari 2 kolom juga
255
- pop_kec_desa = pick_col(df, ["TOTAL_KEC_DESA", "total_kec_desa", "KEC_DESA", "TOTAL_KECAMATAN_DESA"])
256
- col_kec = pick_col(df, ["JUMLAH_KECAMATAN", "jumlah_kecamatan", "KECAMATAN", "JML_KEC"])
257
- col_desa = pick_col(df, ["JUMLAH_DESA_KEL", "jumlah_desa_kel", "DESA_KEL", "JML_DESA", "JUMLAH_DESA", "JUMLAH_KELURAHAN"])
 
 
 
 
 
 
 
 
258
 
259
  if prov is None or kab is None:
260
  raise KeyError("Meta Kab/Kota minimal harus punya kolom provinsi & kab/kota.")
261
 
262
- return {
263
- "prov": prov,
264
- "kab": kab,
265
- "pop_sd_smp": pop_sd_smp,
266
- "pop_kec_desa": pop_kec_desa,
267
- "col_kec": col_kec,
268
- "col_desa": col_desa
269
- }
270
 
271
  def detect_meta_prov(df: pd.DataFrame) -> dict:
272
  prov = pick_col(df, ["PROVINSI", "provinsi", "Provinsi"])
273
- pop_sma = pick_col(df, ["TOTAL_SMA_SMK_SLB", "total_sma_smk_slb", "SMA_SMK_SLB", "TOTAL_SMA_SMK", "TOTAL_SMA"])
274
  if prov is None or pop_sma is None:
275
  raise KeyError("Meta Provinsi minimal harus punya kolom PROVINSI & TOTAL_SMA_SMK_SLB (atau padanan).")
276
  return {"prov": prov, "pop_sma": pop_sma}
277
 
 
278
  # =========================
279
  # 5) INDIKATOR IPLM (KANONIK) + ALIAS
280
  # =========================
@@ -353,8 +366,9 @@ def rename_indicators(df: pd.DataFrame) -> pd.DataFrame:
353
  df = df.rename(columns=rename_map)
354
  return df
355
 
 
356
  # =========================
357
- # 6) BUILD DATA (DM + META)
358
  # =========================
359
  DATA_INFO = ""
360
  WARNINGS = []
@@ -365,19 +379,18 @@ dm_sheets = []
365
 
366
  meta_kab = None
367
  meta_prov = None
368
- meta_kab_cols = None
369
- meta_prov_cols = None
370
 
371
  try:
372
  df_dm_raw, dm_sheets = load_multisheet_excel(DATA_FILE)
373
  dm_cols = detect_dm_cols(df_dm_raw)
374
 
375
- # bersihkan label display
376
  df_dm_raw[dm_cols["prov"]] = df_dm_raw[dm_cols["prov"]].astype(str).map(lambda x: pretty_admin_name(x, "prov"))
377
  df_dm_raw[dm_cols["kab"]] = df_dm_raw[dm_cols["kab"]].astype(str).map(lambda x: pretty_admin_name(x, "kab"))
378
 
379
  df_dm_raw["KEW_NORM"] = df_dm_raw[dm_cols["kew"]].map(norm_kew)
380
 
 
381
  df_dm_raw["prov_key"] = df_dm_raw[dm_cols["prov"]].map(norm_key)
382
  df_dm_raw["kab_key"] = df_dm_raw[dm_cols["kab"]].map(norm_key)
383
 
@@ -396,9 +409,10 @@ try:
396
  df_dm_raw["_dataset"] = df_dm_raw[dm_cols["jenis"]].map(map_dataset)
397
 
398
  DATA_INFO = (
399
- f"DM: **{DATA_FILE}** | Baris: **{len(df_dm_raw)}** | Kolom: **{len(df_dm_raw.columns)}** | Sheets: **{len(dm_sheets)}**<br>"
400
- f"Deteksi kolom: prov=`{dm_cols['prov']}`, kab=`{dm_cols['kab']}`, kew=`{dm_cols['kew']}`, jenis=`{dm_cols['jenis']}`, "
401
- f"nama=`{dm_cols['nama']}`" + (f", subjenis=`{dm_cols['subjenis']}`" if dm_cols.get("subjenis") else "")
 
402
  )
403
  except Exception as e:
404
  WARNINGS.append(f"⚠️ Gagal memuat DM: {repr(e)}")
@@ -419,23 +433,27 @@ try:
419
  mk["prov_key"] = mk[prov_c].map(norm_key)
420
  mk["kab_key"] = mk[kab_c].map(norm_key)
421
 
422
- # populasi sekolah sd+smp
423
  if meta_kab_cols["pop_sd_smp"]:
424
  mk["POP_SD_SMP"] = mk[meta_kab_cols["pop_sd_smp"]].map(coerce_num).fillna(0)
425
  else:
426
  mk["POP_SD_SMP"] = 0
427
 
428
- # populasi umum kec+desa
429
  if meta_kab_cols["pop_kec_desa"]:
430
  mk["POP_KEC_DESA"] = mk[meta_kab_cols["pop_kec_desa"]].map(coerce_num).fillna(0)
431
  else:
432
- kec = mk[meta_kab_cols["col_kec"]].map(coerce_num).fillna(0) if meta_kab_cols["col_kec"] else 0
433
- desa = mk[meta_kab_cols["col_desa"]].map(coerce_num).fillna(0) if meta_kab_cols["col_desa"] else 0
434
- mk["POP_KEC_DESA"] = (kec + desa) if not isinstance(kec, int) else 0
435
 
436
  meta_kab = (mk.groupby(["prov_key","kab_key"], as_index=False)
437
  .agg({prov_c:"first", kab_c:"first", "POP_SD_SMP":"sum", "POP_KEC_DESA":"sum"}))
438
- DATA_INFO += f"<br>Meta Kab/Kota: **{META_KAB_FILE}** (n={len(meta_kab)})"
 
 
 
 
439
  else:
440
  WARNINGS.append("⚠️ Meta Kab/Kota file tidak ditemukan (skip).")
441
  except Exception as e:
@@ -458,7 +476,7 @@ try:
458
 
459
  meta_prov = (mp.groupby("prov_key", as_index=False)
460
  .agg({prov_c:"first", "POP_SMA_SMK_SLB":"sum"}))
461
- DATA_INFO += f"<br>Meta Provinsi: **{META_PROV_FILE}** (n={len(meta_prov)})"
462
  else:
463
  WARNINGS.append("⚠️ Meta Provinsi file tidak ditemukan (skip).")
464
  except Exception as e:
@@ -468,6 +486,7 @@ except Exception as e:
468
  if WARNINGS:
469
  DATA_INFO += "<br>" + "<br>".join(WARNINGS)
470
 
 
471
  # =========================
472
  # 7) IPLM REAL (NASIONAL)
473
  # =========================
@@ -476,11 +495,10 @@ def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
476
  df = rename_indicators(df)
477
 
478
  available = [c for c in all_indicators if c in df.columns]
479
- # coerce numeric aman
480
  for c in available:
481
  df[c] = df[c].map(coerce_num)
482
 
483
- # transform + minmax per indikator
484
  for c in available:
485
  x = df[c].astype(float).to_numpy()
486
  mask = ~np.isnan(x)
@@ -506,9 +524,9 @@ def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
506
  p_cols = [c for c in pelayanan_cols if c in available]
507
  g_cols = [c for c in pengelolaan_cols if c in available]
508
 
509
- df["sub_koleksi"] = df.apply(lambda r: mean_norm(r, k_cols), axis=1)
510
- df["sub_sdm"] = df.apply(lambda r: mean_norm(r, s_cols), axis=1)
511
- df["sub_pelayanan"] = df.apply(lambda r: mean_norm(r, p_cols), axis=1)
512
  df["sub_pengelolaan"] = df.apply(lambda r: mean_norm(r, g_cols), axis=1)
513
 
514
  df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1, skipna=True).fillna(0.0)
@@ -521,11 +539,11 @@ df_iplm = None
521
  if df_dm_raw is not None and len(df_dm_raw) > 0:
522
  df_iplm = prepare_global_iplm(df_dm_raw)
523
 
 
524
  # =========================
525
  # 8) SAMPLING FACTOR (68%)
526
  # =========================
527
  def detect_school_menengah(df: pd.DataFrame) -> pd.Series:
528
- # SMA/SMK/SLB dari subjenis atau jenis
529
  if dm_cols.get("subjenis") and dm_cols["subjenis"] in df.columns:
530
  t = df[dm_cols["subjenis"]].astype(str).str.upper()
531
  else:
@@ -536,7 +554,7 @@ def apply_sampling_factor(df: pd.DataFrame) -> pd.DataFrame:
536
  out = df.copy()
537
  out["SamplingFactor_Total"] = 1.0
538
 
539
- # KAB/KOTA: sekolah=SD+SMP (POP_SD_SMP); umum=KEC+DESA (POP_KEC_DESA)
540
  if meta_kab is not None and len(meta_kab) > 0:
541
  kab_part = out[out["KEW_NORM"] == "KAB/KOTA"].copy()
542
  if not kab_part.empty:
@@ -548,6 +566,7 @@ def apply_sampling_factor(df: pd.DataFrame) -> pd.DataFrame:
548
 
549
  merged = g.merge(meta_kab[["prov_key","kab_key","POP_SD_SMP","POP_KEC_DESA"]],
550
  on=["prov_key","kab_key"], how="left")
 
551
  merged["POP_SD_SMP"] = pd.to_numeric(merged["POP_SD_SMP"], errors="coerce").fillna(0)
552
  merged["POP_KEC_DESA"] = pd.to_numeric(merged["POP_KEC_DESA"], errors="coerce").fillna(0)
553
 
@@ -590,11 +609,11 @@ def apply_sampling_factor(df: pd.DataFrame) -> pd.DataFrame:
590
  if df_iplm is not None and len(df_iplm) > 0:
591
  df_iplm = apply_sampling_factor(df_iplm)
592
 
 
593
  # =========================
594
  # 9) CHOICES (DEDUP RAPi)
595
  # =========================
596
  def build_prov_choice_map(df: pd.DataFrame) -> dict:
597
- # prov_key -> label yang paling sering muncul (biar stabil)
598
  tmp = df[[dm_cols["prov"], "prov_key"]].dropna()
599
  tmp = tmp[tmp["prov_key"] != ""]
600
  by = tmp.groupby("prov_key")[dm_cols["prov"]].agg(lambda s: Counter(s).most_common(1)[0][0])
@@ -618,7 +637,7 @@ def kew_choices(df: pd.DataFrame):
618
  vals = [v for v in vals if v]
619
  return ["(Semua)"] + vals
620
 
621
- PROV_CHOICES, PROV_KEYMAP = (["(Semua)"], {}) if df_dm_raw is None else prov_choices(df_dm_raw)
622
  KEW_CHOICES = ["(Semua)"] if df_dm_raw is None else kew_choices(df_dm_raw)
623
  DEFAULT_KEW = "KAB/KOTA" if "KAB/KOTA" in KEW_CHOICES else (KEW_CHOICES[0] if KEW_CHOICES else "(Semua)")
624
  KAB_CHOICES = ["(Semua)"] if df_dm_raw is None else kab_choices_for_prov(df_dm_raw, "(Semua)")
@@ -635,8 +654,9 @@ def on_kew_change(kew_value, prov_value):
635
  ch = kab_choices_for_prov(df_dm_raw, prov_value)
636
  return gr.update(choices=ch, value="(Semua)", interactive=True)
637
 
 
638
  # =========================
639
- # 10) BUILD TABLES (FINAL & REAL)
640
  # =========================
641
  LABEL_DATASET = {"sekolah":"Perpustakaan Sekolah","umum":"Perpustakaan Umum","khusus":"Perpustakaan Khusus"}
642
 
@@ -703,19 +723,39 @@ def agg_real_by_jenis(df):
703
  return pd.DataFrame(rows).round(3)
704
 
705
  def detail_real(df):
706
- # tampilkan dimensi + subindeks + indikator raw yang tersedia (tanpa norm_ biar tidak kebanyakan)
707
  base = [dm_cols["prov"], dm_cols["kab"], dm_cols["nama"], dm_cols["jenis"]]
708
  if dm_cols.get("subjenis") and dm_cols["subjenis"] in df.columns:
709
  base.append(dm_cols["subjenis"])
710
  base += ["KEW_NORM","_dataset","sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Real_0_100"]
 
711
  available_ind = [c for c in all_indicators if c in df.columns]
712
- cols = base + available_ind
713
- cols = [c for c in cols if c in df.columns]
714
  return df[cols].copy().round(3)
715
 
 
716
  # =========================
717
- # 11) COVERAGE + BAR (pop vs sampel)
718
  # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  def coverage_table_and_bar(df_subset, kew_value):
720
  kew = str(kew_value).upper()
721
  tbl = pd.DataFrame()
@@ -732,6 +772,7 @@ def coverage_table_and_bar(df_subset, kew_value):
732
  keys = df_subset[["prov_key","kab_key"]].dropna().drop_duplicates()
733
  merged = keys.merge(meta_kab[["prov_key","kab_key","POP_SD_SMP","POP_KEC_DESA"]],
734
  on=["prov_key","kab_key"], how="left")
 
735
  pop_sek = int(pd.to_numeric(merged["POP_SD_SMP"], errors="coerce").fillna(0).sum())
736
  pop_um = int(pd.to_numeric(merged["POP_KEC_DESA"], errors="coerce").fillna(0).sum())
737
 
@@ -772,6 +813,7 @@ def coverage_table_and_bar(df_subset, kew_value):
772
 
773
  return tbl, fig
774
 
 
775
  # =========================
776
  # 12) BELL CURVE (per jenis)
777
  # =========================
@@ -796,23 +838,20 @@ def bell_curve_fig(df, score_col: str, title: str, name_col: str | None = None):
796
  q2 = float(x.quantile(0.50))
797
  q3 = float(x.quantile(0.75))
798
 
799
- # bell curve line
800
  fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Bell curve"))
801
 
802
- # rug points
803
  y0 = np.zeros(len(x))
804
  hover = None
805
  if name_col and name_col in df.columns:
806
- dd = df.loc[x.index, name_col].astype(str).tolist()
807
- hover = dd
808
 
809
  fig.add_trace(go.Scatter(
810
  x=x, y=y0, mode="markers", name="Perpustakaan",
811
  marker=dict(size=6),
812
- text=hover, hovertemplate="%{text}<br>Indeks: %{x:.2f}<extra></extra>" if hover else "Indeks: %{x:.2f}<extra></extra>"
 
813
  ))
814
 
815
- # quantile lines
816
  fig.add_vline(x=q1, line_width=2, line_dash="solid", annotation_text=f"Q1<br>{q1:.1f}", annotation_position="top")
817
  fig.add_vline(x=q2, line_width=2, line_dash="solid", annotation_text=f"Q2 (Median)<br>{q2:.1f}", annotation_position="top")
818
  fig.add_vline(x=q3, line_width=2, line_dash="solid", annotation_text=f"Q3<br>{q3:.1f}", annotation_position="top")
@@ -826,12 +865,12 @@ def bell_curve_fig(df, score_col: str, title: str, name_col: str | None = None):
826
  )
827
  return fig
828
 
 
829
  # =========================
830
- # 13) LLM ANALYSIS (opsional) + fallback template
831
  # =========================
832
  def llm_analysis_text(df_subset: pd.DataFrame, cov_tbl: pd.DataFrame, scope_label: str, kew: str,
833
  use_llm: bool, hf_model: str):
834
- # fallback narrative (selalu ada)
835
  mean_final = float(df_subset["Indeks_Final_0_100"].mean(skipna=True)) if len(df_subset) else 0.0
836
  mean_real = float(df_subset["Indeks_Real_0_100"].mean(skipna=True)) if len(df_subset) else 0.0
837
  mean_sf = float(df_subset["SamplingFactor_Total"].mean(skipna=True)) if len(df_subset) else 1.0
@@ -843,7 +882,6 @@ def llm_analysis_text(df_subset: pd.DataFrame, cov_tbl: pd.DataFrame, scope_labe
843
  lines.append(f"- Rata-rata **SamplingFactor (target 68%)**: {mean_sf:.3f}")
844
 
845
  if cov_tbl is not None and not cov_tbl.empty:
846
- # cari gap terbesar
847
  cov_tbl2 = cov_tbl.copy()
848
  cov_tbl2["Gap_ke_68%"] = pd.to_numeric(cov_tbl2["Gap_ke_68%"], errors="coerce").fillna(0)
849
  top = cov_tbl2.sort_values("Gap_ke_68%", ascending=False).head(1)
@@ -851,7 +889,6 @@ def llm_analysis_text(df_subset: pd.DataFrame, cov_tbl: pd.DataFrame, scope_labe
851
  r = top.iloc[0].to_dict()
852
  lines.append(f"- Kesenjangan keterwakilan terbesar: **{r.get('Jenis')}** (Gap ke 68% = **{int(r.get('Gap_ke_68%',0))}** unit).")
853
 
854
- # kalau user ingin pakai HF Inference (optional)
855
  if use_llm:
856
  try:
857
  from huggingface_hub import InferenceClient
@@ -880,14 +917,14 @@ def llm_analysis_text(df_subset: pd.DataFrame, cov_tbl: pd.DataFrame, scope_labe
880
  lines.append(f"\n⚠️ LLM call gagal ({repr(e)}). Pakai analisis template.")
881
  return "\n".join(lines)
882
 
883
- # template rekomendasi singkat
884
  lines.append("\n**Implikasi kebijakan (template cepat):**")
885
- lines.append("- SamplingFactor < 1 menandakan keterwakilan belum mencapai target 68% β†’ interpretasi indeks perlu disertai catatan kualitas/coverage data.")
886
  lines.append("- Prioritaskan percepatan pengisian pada jenis dengan gap terbesar, dan lakukan validasi minimal (kelengkapan indikator kunci) sebelum agregasi.")
887
  return "\n".join(lines)
888
 
 
889
  # =========================
890
- # 14) WORD REPORT (docx)
891
  # =========================
892
  HAS_DOCX = True
893
  try:
@@ -934,13 +971,11 @@ def generate_word_report(scope_label, kew, agg_overall, agg_final, agg_real, cov
934
  doc.add_heading("5) Grafik", level=2)
935
  tmpdir = tempfile.mkdtemp()
936
 
937
- # bar
938
  p = os.path.join(tmpdir, "bar.png")
939
  if bar_fig is not None and try_plotly_png(bar_fig, p) and Path(p).exists():
940
  doc.add_paragraph("Grafik BAR β€” Populasi vs Sampel")
941
  doc.add_picture(p, width=Inches(6.5))
942
 
943
- # bell curves
944
  for title, fig in [
945
  ("Sebaran Indeks (RealScore) β€” Semua", bell_all),
946
  ("Sebaran Indeks (RealScore) β€” Perpustakaan Sekolah", bell_sek),
@@ -959,6 +994,7 @@ def generate_word_report(scope_label, kew, agg_overall, agg_final, agg_real, cov
959
  doc.save(outpath)
960
  return outpath
961
 
 
962
  # =========================
963
  # 15) RUN CORE (FILTER + OUTPUT)
964
  # =========================
@@ -967,7 +1003,7 @@ def run_app(prov_value, kab_value, kew_value, use_llm, hf_model):
967
  empty_fig = go.Figure()
968
 
969
  if df_iplm is None or df_iplm.empty:
970
- return (empty, empty, empty, empty, empty, empty, empty_fig, empty_fig, empty_fig, empty_fig, empty,
971
  None, None, None, "⚠️ Data belum siap (DM gagal dimuat / kosong).")
972
 
973
  prov_value = prov_value or "(Semua)"
@@ -975,7 +1011,6 @@ def run_app(prov_value, kab_value, kew_value, use_llm, hf_model):
975
  kew_value = kew_value or "(Semua)"
976
  kew_norm = str(kew_value).upper()
977
 
978
- # PROVINSI: kab disabled
979
  if kew_norm == "PROVINSI":
980
  kab_value = "(Semua)"
981
 
@@ -989,10 +1024,10 @@ def run_app(prov_value, kab_value, kew_value, use_llm, hf_model):
989
  df = df[df["KEW_NORM"] == kew_norm]
990
 
991
  if df.empty:
992
- return (empty, empty, empty, empty, empty, empty, empty_fig, empty_fig, empty_fig, empty_fig, empty,
993
  None, None, None, "Tidak ada data untuk filter ini.")
994
 
995
- # OUTPUT TABLES
996
  t1 = agg_final_overall(df)
997
  t2 = agg_final_by_jenis(df)
998
  t3 = detail_final(df)
@@ -1001,14 +1036,15 @@ def run_app(prov_value, kab_value, kew_value, use_llm, hf_model):
1001
 
1002
  # COVERAGE + BAR
1003
  cov_tbl, bar_fig = coverage_table_and_bar(df, kew_norm)
 
1004
 
1005
- # BELL CURVE (RealScore) per jenis
1006
  bell_all = bell_curve_fig(df, "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Semua", dm_cols["nama"])
1007
  bell_sek = bell_curve_fig(df[df["_dataset"]=="sekolah"], "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Perpustakaan Sekolah", dm_cols["nama"])
1008
  bell_um = bell_curve_fig(df[df["_dataset"]=="umum"], "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Perpustakaan Umum", dm_cols["nama"])
1009
  bell_kh = bell_curve_fig(df[df["_dataset"]=="khusus"], "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Perpustakaan Khusus", dm_cols["nama"])
1010
 
1011
- # NARASI (LLM optional)
1012
  scope_label = kab_value if (kab_value != "(Semua)" and kew_norm != "PROVINSI") else prov_value
1013
  if scope_label == "(Semua)":
1014
  scope_label = "NASIONAL"
@@ -1016,8 +1052,6 @@ def run_app(prov_value, kab_value, kew_value, use_llm, hf_model):
1016
 
1017
  # SAVE FILES
1018
  tmpdir = tempfile.mkdtemp()
1019
-
1020
- # excel outputs
1021
  f_final_agg = os.path.join(tmpdir, "IPLM2025_Agregat_FINAL.xlsx")
1022
  f_final_det = os.path.join(tmpdir, "IPLM2025_Detail_FINAL.xlsx")
1023
  f_real_agg = os.path.join(tmpdir, "IPLM2025_Agregat_Real_SubindeksDimensi.xlsx")
@@ -1028,7 +1062,6 @@ def run_app(prov_value, kab_value, kew_value, use_llm, hf_model):
1028
  t4.to_excel(f_real_agg, index=False)
1029
  t5.to_excel(f_real_det, index=False)
1030
 
1031
- # word report
1032
  word_path = generate_word_report(
1033
  scope_label, kew_norm, t1, t2, t4, cov_tbl, bar_fig,
1034
  bell_all, bell_sek, bell_um, bell_kh,
@@ -1036,8 +1069,9 @@ def run_app(prov_value, kab_value, kew_value, use_llm, hf_model):
1036
  )
1037
 
1038
  msg = f"βœ… OK | n={len(df)} | Mean Final={float(df['Indeks_Final_0_100'].mean()):.2f} | Mean SamplingFactor={float(df['SamplingFactor_Total'].mean()):.3f}"
1039
- return (t1, t2, t3, t4, t5, cov_tbl, bar_fig, bell_all, bell_sek, bell_um, bell_kh,
1040
- f_final_agg, f_final_det, word_path, msg)
 
1041
 
1042
  # =========================
1043
  # 16) UI
@@ -1046,8 +1080,7 @@ with gr.Blocks() as demo:
1046
  gr.Markdown(f"""
1047
  # IPLM 2025 β€” Real Γ— SamplingFactor 68% (FINAL)
1048
 
1049
- **Final**: `Indeks_Final_0_100 = Indeks_Real_0_100 Γ— SamplingFactor_Total`
1050
-
1051
  {DATA_INFO}
1052
  """)
1053
 
@@ -1082,7 +1115,7 @@ with gr.Blocks() as demo:
1082
  out_det_real = gr.DataFrame(interactive=False)
1083
 
1084
  gr.Markdown("## 6) Coverage Populasi vs Sampel (Target 68%)")
1085
- out_cov_tbl = gr.DataFrame(interactive=False)
1086
 
1087
  gr.Markdown("## Grafik BAR β€” Populasi vs Sampel")
1088
  out_bar = gr.Plot()
@@ -1105,16 +1138,18 @@ with gr.Blocks() as demo:
1105
  with gr.Row():
1106
  f1 = gr.File(label="Download Agregat FINAL (.xlsx)")
1107
  f2 = gr.File(label="Download Detail FINAL (.xlsx)")
1108
- f3 = gr.File(label="Download Laporan Word (.docx)")
1109
 
1110
  run_btn.click(
1111
  fn=run_app,
1112
  inputs=[dd_prov, dd_kab, dd_kew, use_llm, hf_model],
1113
  outputs=[
1114
  out_agg_overall, out_agg_final, out_det_final,
1115
- out_agg_real, out_det_real, out_cov_tbl,
 
1116
  out_bar, out_bell_all, out_bell_sek, out_bell_um, out_bell_kh,
1117
  f1, f2, f3,
 
1118
  msg_out
1119
  ],
1120
  )
 
1
  # -*- coding: utf-8 -*-
2
  """
3
  app.py β€” IPLM 2025 (STABLE, COPY-PASTE, HF Spaces)
4
+
5
+ βœ… IPLM Real:
6
+ - Rename indikator (alias -> kanonik)
7
+ - Yeo-Johnson per indikator + MinMax nasional (sekali)
8
+ - Subindeks (koleksi/sdm/pelayanan/pengelolaan)
9
+ - Dimensi (kepatuhan/kinerja)
10
+ - Indeks_Real_0_100
11
+
12
+ βœ… FINAL:
13
+ Indeks_Final_0_100 = Indeks_Real_0_100 Γ— SamplingFactor_Total (Target 68%)
14
+
15
+ βœ… UI:
16
+ - Dropdown Provinsi / Kab-Kota / Kewenangan (Kab/Kota disable kalau PROVINSI)
17
+ - Label rapi (tidak jadi PROVINSIACEH)
18
+ - Provinsi/Kab key join stabil (Kep Seribu beres)
19
+ - Output lengkap:
20
+ 1) Indeks Agregat (FINAL)
21
+ 2) Agregat (FINAL) per Jenis
22
+ 3) Detail (FINAL) per Unit
23
+ 4) Agregat (RealScore) per Jenis (Subindeks & Dimensi)
24
+ 5) Detail (RealScore) per Unit (Subindeks & Dimensi + Indikator raw)
25
+ 6) Coverage Populasi vs Sampel (Target 68%) + BAR chart (dibuat TERBACA via HTML)
26
+ 7) Bell curve per Jenis (RealScore) β€” seperti contoh kamu
27
+ 8) Analisis (LLM opsional) + Word report opsional
28
+
29
+ Catatan penting untuk kasus Kep. Seribu:
30
+ - Coverage sekolah (SD+SMP) = 0 biasanya karena:
31
+ (a) kolom SD+SMP di meta kab/kota tidak terdeteksi, ATAU
32
+ (b) baris Kep Seribu tidak ada di meta, ATAU
33
+ (c) key join kab/kota tidak match.
34
+ Kode ini memperkeras normalisasi & deteksi kolom meta.
35
  """
36
 
37
  import os
 
50
  from sklearn.preprocessing import PowerTransformer
51
 
52
  # =========================
53
+ # 0) FILES (SESUAIKAN)
54
  # =========================
55
  DATA_FILE = "IPLM_clean_manual_131225.xlsx"
56
  META_KAB_FILE = "Data_populasi_Kab_kota.xlsx"
 
62
  W_KEPATUHAN = 0.30
63
  W_KINERJA = 0.70
64
 
65
+
66
  # =========================
67
+ # 1) UTIL β€” string & kolom
68
  # =========================
69
  def make_unique_columns(cols):
70
  """Hindari kolom duplikat agar df['X'] tidak menjadi DataFrame."""
 
88
 
89
  def pretty_admin_name(s: str, kind: str = "prov") -> str:
90
  """
91
+ Display label manusiawi untuk dropdown.
92
  - PROVINSI JAWA BARAT
93
  - KOTA SURABAYA / KAB. BANDUNG
94
+ - KAB. ADM. KEPULAUAN SERIBU (tetap kebaca)
95
  """
96
  t = clean_spaces(str(s)).upper()
 
97
  t = t.replace("PROPINSI", "PROVINSI")
98
  t = re.sub(r"\bKABUPATEN\b", "KAB.", t)
99
+ t = re.sub(r"\bKOTA\s+ADMINISTRASI\b", "KOTA ADM.", t)
100
+ t = re.sub(r"\bKABUPATEN\s+ADMINISTRASI\b", "KAB. ADM.", t)
101
+ t = t.replace("ADMINISTRASI", "ADM.")
102
+ # rapikan spasi titik
103
+ t = re.sub(r"\s+\.", ".", t)
104
+ t = re.sub(r"\.\s+", ". ", t)
105
 
106
  if kind == "prov":
107
+ # jika belum ada prefiks PROVINSI, tambahkan
108
  if not t.startswith("PROVINSI "):
 
109
  t = "PROVINSI " + t
110
  return t
111
 
112
  def norm_key(x) -> str:
113
  """
114
+ Key join prov/kab:
115
+ distabilkan supaya:
116
+ KEP. SERIBU == KEPULAUAN SERIBU == KAB. ADM. KEPULAUAN SERIBU
 
 
117
  """
118
  if pd.isna(x):
119
  return ""
 
120
  t = clean_spaces(str(x)).upper()
121
 
122
+ # normalisasi umum
 
 
123
  t = t.replace("PROPINSI", "PROVINSI")
124
+ t = re.sub(r"\bKABUPATEN\b", "KAB.", t)
125
+ t = re.sub(r"\bKOTA\s+ADMINISTRASI\b", "KOTA ADM.", t)
126
+ t = re.sub(r"\bKABUPATEN\s+ADMINISTRASI\b", "KAB. ADM.", t)
127
  t = t.replace("ADMINISTRASI", "ADM.")
 
 
128
  t = t.replace("KEP.", "KEPULAUAN")
129
+ t = re.sub(r"\bKEP\b", "KEPULAUAN", t)
130
 
131
+ # khusus Kepulauan Seribu
 
 
132
  if "SERIBU" in t:
133
  t = "KAB. ADM. KEPULAUAN SERIBU"
134
 
135
+ # buang non alnum utk key
 
 
136
  return re.sub(r"[^A-Z0-9]", "", t)
137
 
138
+ def norm_kew(v):
139
+ if pd.isna(v):
140
+ return ""
141
+ t = clean_spaces(v).upper()
142
+ if any(x in t for x in ["KAB", "KOTA", "KABUPATEN", "KAB/KOTA"]):
143
+ return "KAB/KOTA"
144
+ if any(x in t for x in ["PROV", "PROP", "PROVINSI", "PROPINSI"]):
145
+ return "PROVINSI"
146
+ if "PUSAT" in t or "NASIONAL" in t:
147
+ return "PUSAT"
148
+ return t
149
+
150
 
151
  # =========================
152
  # 2) NUM COERCION (AMAN)
 
208
  except Exception:
209
  return 1.0
210
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  # =========================
213
  # 3) LOAD MULTISHEET DM
 
225
  out = pd.concat(frames, ignore_index=True, sort=False)
226
  return out, list(xls.sheet_names)
227
 
228
+
229
  # =========================
230
  # 4) AUTO DETECT COLUMNS (DM & META)
231
  # =========================
 
252
  subjenis = pick_col(df, ["sub_jenis_perpus", "subjenis", "sub_jenis", "sub jenis", "sub jenis perpus"])
253
  nama = pick_col(df, ["nm_perpustakaan", "nama_perpustakaan", "nama perpus", "nama"])
254
 
255
+ missing = [k for k,v in {"prov":prov, "kab":kab, "kew":kew, "jenis":jenis, "nama":nama}.items() if v is None]
 
 
 
256
  if missing:
257
  raise KeyError(f"Kolom DM wajib tidak ketemu: {missing}. Cek header Excel DM kamu.")
 
258
  return {"prov":prov, "kab":kab, "kew":kew, "jenis":jenis, "subjenis":subjenis, "nama":nama}
259
 
260
  def detect_meta_kab(df: pd.DataFrame) -> dict:
261
  prov = pick_col(df, ["PROVINSI", "provinsi", "Provinsi"])
262
+ kab = pick_col(df, ["KABUPATEN_KOTA", "kabupaten_kota", "KAB/KOTA", "kab/kota", "Kab/Kota", "KABKOTA", "KAB_KOTA"])
263
+
264
+ # πŸ”₯ kandidat lebih luas (biar SD+SMP ketemu)
265
+ pop_sd_smp = pick_col(df, [
266
+ "TOTAL_SD_SMP", "total_sd_smp", "JUMLAH_SD_SMP", "SD_SMP", "TOTAL_SDSMP",
267
+ "SD+SMP", "SD SMP", "TOTAL SD SMP", "JML SD SMP", "JUMLAH SD SMP"
268
+ ])
269
+
270
+ pop_kec_desa = pick_col(df, [
271
+ "TOTAL_KEC_DESA", "total_kec_desa", "KEC_DESA", "TOTAL_KECAMATAN_DESA",
272
+ "KECAMATAN+DESA", "KEC+DESA", "KEC DESA", "TOTAL KEC DESA"
273
+ ])
274
+
275
+ col_kec = pick_col(df, ["JUMLAH_KECAMATAN", "jumlah_kecamatan", "KECAMATAN", "JML_KEC", "JML KEC"])
276
+ col_desa = pick_col(df, ["JUMLAH_DESA_KEL", "jumlah_desa_kel", "DESA_KEL", "JML_DESA", "JUMLAH_DESA", "JUMLAH_KELURAHAN", "JML DESA", "JML KEL"])
277
 
278
  if prov is None or kab is None:
279
  raise KeyError("Meta Kab/Kota minimal harus punya kolom provinsi & kab/kota.")
280
 
281
+ return {"prov": prov, "kab": kab, "pop_sd_smp": pop_sd_smp, "pop_kec_desa": pop_kec_desa, "col_kec": col_kec, "col_desa": col_desa}
 
 
 
 
 
 
 
282
 
283
  def detect_meta_prov(df: pd.DataFrame) -> dict:
284
  prov = pick_col(df, ["PROVINSI", "provinsi", "Provinsi"])
285
+ pop_sma = pick_col(df, ["TOTAL_SMA_SMK_SLB", "total_sma_smk_slb", "SMA_SMK_SLB", "TOTAL_SMA_SMK", "TOTAL_SMA", "SMA+SMK+SLB"])
286
  if prov is None or pop_sma is None:
287
  raise KeyError("Meta Provinsi minimal harus punya kolom PROVINSI & TOTAL_SMA_SMK_SLB (atau padanan).")
288
  return {"prov": prov, "pop_sma": pop_sma}
289
 
290
+
291
  # =========================
292
  # 5) INDIKATOR IPLM (KANONIK) + ALIAS
293
  # =========================
 
366
  df = df.rename(columns=rename_map)
367
  return df
368
 
369
+
370
  # =========================
371
+ # 6) LOAD DATA (DM + META)
372
  # =========================
373
  DATA_INFO = ""
374
  WARNINGS = []
 
379
 
380
  meta_kab = None
381
  meta_prov = None
 
 
382
 
383
  try:
384
  df_dm_raw, dm_sheets = load_multisheet_excel(DATA_FILE)
385
  dm_cols = detect_dm_cols(df_dm_raw)
386
 
387
+ # display label rapi
388
  df_dm_raw[dm_cols["prov"]] = df_dm_raw[dm_cols["prov"]].astype(str).map(lambda x: pretty_admin_name(x, "prov"))
389
  df_dm_raw[dm_cols["kab"]] = df_dm_raw[dm_cols["kab"]].astype(str).map(lambda x: pretty_admin_name(x, "kab"))
390
 
391
  df_dm_raw["KEW_NORM"] = df_dm_raw[dm_cols["kew"]].map(norm_kew)
392
 
393
+ # key join stabil
394
  df_dm_raw["prov_key"] = df_dm_raw[dm_cols["prov"]].map(norm_key)
395
  df_dm_raw["kab_key"] = df_dm_raw[dm_cols["kab"]].map(norm_key)
396
 
 
409
  df_dm_raw["_dataset"] = df_dm_raw[dm_cols["jenis"]].map(map_dataset)
410
 
411
  DATA_INFO = (
412
+ f"DM: <b>{DATA_FILE}</b> | Baris: <b>{len(df_dm_raw)}</b> | Kolom: <b>{len(df_dm_raw.columns)}</b> | Sheets: <b>{len(dm_sheets)}</b><br>"
413
+ f"Deteksi kolom: prov=<code>{dm_cols['prov']}</code>, kab=<code>{dm_cols['kab']}</code>, kew=<code>{dm_cols['kew']}</code>, "
414
+ f"jenis=<code>{dm_cols['jenis']}</code>, nama=<code>{dm_cols['nama']}</code>"
415
+ + (f", subjenis=<code>{dm_cols['subjenis']}</code>" if dm_cols.get("subjenis") else "")
416
  )
417
  except Exception as e:
418
  WARNINGS.append(f"⚠️ Gagal memuat DM: {repr(e)}")
 
433
  mk["prov_key"] = mk[prov_c].map(norm_key)
434
  mk["kab_key"] = mk[kab_c].map(norm_key)
435
 
436
+ # POP_SD_SMP
437
  if meta_kab_cols["pop_sd_smp"]:
438
  mk["POP_SD_SMP"] = mk[meta_kab_cols["pop_sd_smp"]].map(coerce_num).fillna(0)
439
  else:
440
  mk["POP_SD_SMP"] = 0
441
 
442
+ # POP_KEC_DESA
443
  if meta_kab_cols["pop_kec_desa"]:
444
  mk["POP_KEC_DESA"] = mk[meta_kab_cols["pop_kec_desa"]].map(coerce_num).fillna(0)
445
  else:
446
+ kec = mk[meta_kab_cols["col_kec"]].map(coerce_num).fillna(0) if meta_kab_cols["col_kec"] else pd.Series(0, index=mk.index)
447
+ desa = mk[meta_kab_cols["col_desa"]].map(coerce_num).fillna(0) if meta_kab_cols["col_desa"] else pd.Series(0, index=mk.index)
448
+ mk["POP_KEC_DESA"] = (kec + desa).fillna(0)
449
 
450
  meta_kab = (mk.groupby(["prov_key","kab_key"], as_index=False)
451
  .agg({prov_c:"first", kab_c:"first", "POP_SD_SMP":"sum", "POP_KEC_DESA":"sum"}))
452
+
453
+ # DEBUG SERIBU (biar kamu langsung lihat ada/tidak)
454
+ ser = meta_kab[meta_kab["kab_key"].str.contains("SERIBU", na=False)]
455
+ DATA_INFO += f"<br>Meta Kab/Kota: <b>{META_KAB_FILE}</b> (n={len(meta_kab)})"
456
+ DATA_INFO += f"<br><b>DEBUG Kep Seribu meta rows:</b> {len(ser)}"
457
  else:
458
  WARNINGS.append("⚠️ Meta Kab/Kota file tidak ditemukan (skip).")
459
  except Exception as e:
 
476
 
477
  meta_prov = (mp.groupby("prov_key", as_index=False)
478
  .agg({prov_c:"first", "POP_SMA_SMK_SLB":"sum"}))
479
+ DATA_INFO += f"<br>Meta Provinsi: <b>{META_PROV_FILE}</b> (n={len(meta_prov)})"
480
  else:
481
  WARNINGS.append("⚠️ Meta Provinsi file tidak ditemukan (skip).")
482
  except Exception as e:
 
486
  if WARNINGS:
487
  DATA_INFO += "<br>" + "<br>".join(WARNINGS)
488
 
489
+
490
  # =========================
491
  # 7) IPLM REAL (NASIONAL)
492
  # =========================
 
495
  df = rename_indicators(df)
496
 
497
  available = [c for c in all_indicators if c in df.columns]
 
498
  for c in available:
499
  df[c] = df[c].map(coerce_num)
500
 
501
+ # YJ + minmax
502
  for c in available:
503
  x = df[c].astype(float).to_numpy()
504
  mask = ~np.isnan(x)
 
524
  p_cols = [c for c in pelayanan_cols if c in available]
525
  g_cols = [c for c in pengelolaan_cols if c in available]
526
 
527
+ df["sub_koleksi"] = df.apply(lambda r: mean_norm(r, k_cols), axis=1)
528
+ df["sub_sdm"] = df.apply(lambda r: mean_norm(r, s_cols), axis=1)
529
+ df["sub_pelayanan"] = df.apply(lambda r: mean_norm(r, p_cols), axis=1)
530
  df["sub_pengelolaan"] = df.apply(lambda r: mean_norm(r, g_cols), axis=1)
531
 
532
  df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1, skipna=True).fillna(0.0)
 
539
  if df_dm_raw is not None and len(df_dm_raw) > 0:
540
  df_iplm = prepare_global_iplm(df_dm_raw)
541
 
542
+
543
  # =========================
544
  # 8) SAMPLING FACTOR (68%)
545
  # =========================
546
  def detect_school_menengah(df: pd.DataFrame) -> pd.Series:
 
547
  if dm_cols.get("subjenis") and dm_cols["subjenis"] in df.columns:
548
  t = df[dm_cols["subjenis"]].astype(str).str.upper()
549
  else:
 
554
  out = df.copy()
555
  out["SamplingFactor_Total"] = 1.0
556
 
557
+ # KAB/KOTA: sekolah=SD+SMP; umum=KEC+DESA
558
  if meta_kab is not None and len(meta_kab) > 0:
559
  kab_part = out[out["KEW_NORM"] == "KAB/KOTA"].copy()
560
  if not kab_part.empty:
 
566
 
567
  merged = g.merge(meta_kab[["prov_key","kab_key","POP_SD_SMP","POP_KEC_DESA"]],
568
  on=["prov_key","kab_key"], how="left")
569
+
570
  merged["POP_SD_SMP"] = pd.to_numeric(merged["POP_SD_SMP"], errors="coerce").fillna(0)
571
  merged["POP_KEC_DESA"] = pd.to_numeric(merged["POP_KEC_DESA"], errors="coerce").fillna(0)
572
 
 
609
  if df_iplm is not None and len(df_iplm) > 0:
610
  df_iplm = apply_sampling_factor(df_iplm)
611
 
612
+
613
  # =========================
614
  # 9) CHOICES (DEDUP RAPi)
615
  # =========================
616
  def build_prov_choice_map(df: pd.DataFrame) -> dict:
 
617
  tmp = df[[dm_cols["prov"], "prov_key"]].dropna()
618
  tmp = tmp[tmp["prov_key"] != ""]
619
  by = tmp.groupby("prov_key")[dm_cols["prov"]].agg(lambda s: Counter(s).most_common(1)[0][0])
 
637
  vals = [v for v in vals if v]
638
  return ["(Semua)"] + vals
639
 
640
+ PROV_CHOICES, _ = (["(Semua)"], {}) if df_dm_raw is None else prov_choices(df_dm_raw)
641
  KEW_CHOICES = ["(Semua)"] if df_dm_raw is None else kew_choices(df_dm_raw)
642
  DEFAULT_KEW = "KAB/KOTA" if "KAB/KOTA" in KEW_CHOICES else (KEW_CHOICES[0] if KEW_CHOICES else "(Semua)")
643
  KAB_CHOICES = ["(Semua)"] if df_dm_raw is None else kab_choices_for_prov(df_dm_raw, "(Semua)")
 
654
  ch = kab_choices_for_prov(df_dm_raw, prov_value)
655
  return gr.update(choices=ch, value="(Semua)", interactive=True)
656
 
657
+
658
  # =========================
659
+ # 10) TABLE BUILDERS (FINAL & REAL)
660
  # =========================
661
  LABEL_DATASET = {"sekolah":"Perpustakaan Sekolah","umum":"Perpustakaan Umum","khusus":"Perpustakaan Khusus"}
662
 
 
723
  return pd.DataFrame(rows).round(3)
724
 
725
  def detail_real(df):
 
726
  base = [dm_cols["prov"], dm_cols["kab"], dm_cols["nama"], dm_cols["jenis"]]
727
  if dm_cols.get("subjenis") and dm_cols["subjenis"] in df.columns:
728
  base.append(dm_cols["subjenis"])
729
  base += ["KEW_NORM","_dataset","sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Real_0_100"]
730
+
731
  available_ind = [c for c in all_indicators if c in df.columns]
732
+ cols = [c for c in (base + available_ind) if c in df.columns]
 
733
  return df[cols].copy().round(3)
734
 
735
+
736
  # =========================
737
+ # 11) COVERAGE (TERBACA) + BAR
738
  # =========================
739
+ def df_to_html_big(df: pd.DataFrame, title: str = "") -> str:
740
+ if df is None or df.empty:
741
+ return f"<div style='font-size:16px;'><b>{title}</b><br>(Tidak ada data)</div>"
742
+ d = df.copy()
743
+ for c in d.columns:
744
+ if c == "Jenis":
745
+ continue
746
+ d[c] = pd.to_numeric(d[c], errors="coerce")
747
+ if pd.api.types.is_numeric_dtype(d[c]):
748
+ d[c] = d[c].fillna(0).map(lambda x: f"{int(x):,}".replace(",", "."))
749
+ html = d.to_html(index=False, escape=False)
750
+ return f"""
751
+ <div style="font-size:16px; line-height:1.35;">
752
+ <div style="font-size:18px; font-weight:700; margin-bottom:8px;">{title}</div>
753
+ <div style="overflow-x:auto; border:1px solid #333; border-radius:10px; padding:8px;">
754
+ {html}
755
+ </div>
756
+ </div>
757
+ """
758
+
759
  def coverage_table_and_bar(df_subset, kew_value):
760
  kew = str(kew_value).upper()
761
  tbl = pd.DataFrame()
 
772
  keys = df_subset[["prov_key","kab_key"]].dropna().drop_duplicates()
773
  merged = keys.merge(meta_kab[["prov_key","kab_key","POP_SD_SMP","POP_KEC_DESA"]],
774
  on=["prov_key","kab_key"], how="left")
775
+
776
  pop_sek = int(pd.to_numeric(merged["POP_SD_SMP"], errors="coerce").fillna(0).sum())
777
  pop_um = int(pd.to_numeric(merged["POP_KEC_DESA"], errors="coerce").fillna(0).sum())
778
 
 
813
 
814
  return tbl, fig
815
 
816
+
817
  # =========================
818
  # 12) BELL CURVE (per jenis)
819
  # =========================
 
838
  q2 = float(x.quantile(0.50))
839
  q3 = float(x.quantile(0.75))
840
 
 
841
  fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Bell curve"))
842
 
 
843
  y0 = np.zeros(len(x))
844
  hover = None
845
  if name_col and name_col in df.columns:
846
+ hover = df.loc[x.index, name_col].astype(str).tolist()
 
847
 
848
  fig.add_trace(go.Scatter(
849
  x=x, y=y0, mode="markers", name="Perpustakaan",
850
  marker=dict(size=6),
851
+ text=hover,
852
+ hovertemplate="%{text}<br>Indeks: %{x:.2f}<extra></extra>" if hover else "Indeks: %{x:.2f}<extra></extra>"
853
  ))
854
 
 
855
  fig.add_vline(x=q1, line_width=2, line_dash="solid", annotation_text=f"Q1<br>{q1:.1f}", annotation_position="top")
856
  fig.add_vline(x=q2, line_width=2, line_dash="solid", annotation_text=f"Q2 (Median)<br>{q2:.1f}", annotation_position="top")
857
  fig.add_vline(x=q3, line_width=2, line_dash="solid", annotation_text=f"Q3<br>{q3:.1f}", annotation_position="top")
 
865
  )
866
  return fig
867
 
868
+
869
  # =========================
870
+ # 13) ANALISIS (LLM opsional)
871
  # =========================
872
  def llm_analysis_text(df_subset: pd.DataFrame, cov_tbl: pd.DataFrame, scope_label: str, kew: str,
873
  use_llm: bool, hf_model: str):
 
874
  mean_final = float(df_subset["Indeks_Final_0_100"].mean(skipna=True)) if len(df_subset) else 0.0
875
  mean_real = float(df_subset["Indeks_Real_0_100"].mean(skipna=True)) if len(df_subset) else 0.0
876
  mean_sf = float(df_subset["SamplingFactor_Total"].mean(skipna=True)) if len(df_subset) else 1.0
 
882
  lines.append(f"- Rata-rata **SamplingFactor (target 68%)**: {mean_sf:.3f}")
883
 
884
  if cov_tbl is not None and not cov_tbl.empty:
 
885
  cov_tbl2 = cov_tbl.copy()
886
  cov_tbl2["Gap_ke_68%"] = pd.to_numeric(cov_tbl2["Gap_ke_68%"], errors="coerce").fillna(0)
887
  top = cov_tbl2.sort_values("Gap_ke_68%", ascending=False).head(1)
 
889
  r = top.iloc[0].to_dict()
890
  lines.append(f"- Kesenjangan keterwakilan terbesar: **{r.get('Jenis')}** (Gap ke 68% = **{int(r.get('Gap_ke_68%',0))}** unit).")
891
 
 
892
  if use_llm:
893
  try:
894
  from huggingface_hub import InferenceClient
 
917
  lines.append(f"\n⚠️ LLM call gagal ({repr(e)}). Pakai analisis template.")
918
  return "\n".join(lines)
919
 
 
920
  lines.append("\n**Implikasi kebijakan (template cepat):**")
921
+ lines.append("- SamplingFactor < 1 menandakan keterwakilan belum mencapai target 68% β†’ interpretasi indeks perlu disertai catatan coverage/kualitas data.")
922
  lines.append("- Prioritaskan percepatan pengisian pada jenis dengan gap terbesar, dan lakukan validasi minimal (kelengkapan indikator kunci) sebelum agregasi.")
923
  return "\n".join(lines)
924
 
925
+
926
  # =========================
927
+ # 14) WORD REPORT (opsional)
928
  # =========================
929
  HAS_DOCX = True
930
  try:
 
971
  doc.add_heading("5) Grafik", level=2)
972
  tmpdir = tempfile.mkdtemp()
973
 
 
974
  p = os.path.join(tmpdir, "bar.png")
975
  if bar_fig is not None and try_plotly_png(bar_fig, p) and Path(p).exists():
976
  doc.add_paragraph("Grafik BAR β€” Populasi vs Sampel")
977
  doc.add_picture(p, width=Inches(6.5))
978
 
 
979
  for title, fig in [
980
  ("Sebaran Indeks (RealScore) β€” Semua", bell_all),
981
  ("Sebaran Indeks (RealScore) β€” Perpustakaan Sekolah", bell_sek),
 
994
  doc.save(outpath)
995
  return outpath
996
 
997
+
998
  # =========================
999
  # 15) RUN CORE (FILTER + OUTPUT)
1000
  # =========================
 
1003
  empty_fig = go.Figure()
1004
 
1005
  if df_iplm is None or df_iplm.empty:
1006
+ return (empty, empty, empty, empty, empty, "", empty_fig, empty_fig, empty_fig, empty_fig, empty_fig,
1007
  None, None, None, "⚠️ Data belum siap (DM gagal dimuat / kosong).")
1008
 
1009
  prov_value = prov_value or "(Semua)"
 
1011
  kew_value = kew_value or "(Semua)"
1012
  kew_norm = str(kew_value).upper()
1013
 
 
1014
  if kew_norm == "PROVINSI":
1015
  kab_value = "(Semua)"
1016
 
 
1024
  df = df[df["KEW_NORM"] == kew_norm]
1025
 
1026
  if df.empty:
1027
+ return (empty, empty, empty, empty, empty, "", empty_fig, empty_fig, empty_fig, empty_fig, empty_fig,
1028
  None, None, None, "Tidak ada data untuk filter ini.")
1029
 
1030
+ # TABLES
1031
  t1 = agg_final_overall(df)
1032
  t2 = agg_final_by_jenis(df)
1033
  t3 = detail_final(df)
 
1036
 
1037
  # COVERAGE + BAR
1038
  cov_tbl, bar_fig = coverage_table_and_bar(df, kew_norm)
1039
+ cov_html = df_to_html_big(cov_tbl, "Coverage Populasi vs Sampel (Target 68%)")
1040
 
1041
+ # BELL CURVES
1042
  bell_all = bell_curve_fig(df, "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Semua", dm_cols["nama"])
1043
  bell_sek = bell_curve_fig(df[df["_dataset"]=="sekolah"], "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Perpustakaan Sekolah", dm_cols["nama"])
1044
  bell_um = bell_curve_fig(df[df["_dataset"]=="umum"], "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Perpustakaan Umum", dm_cols["nama"])
1045
  bell_kh = bell_curve_fig(df[df["_dataset"]=="khusus"], "Indeks_Real_0_100", "Sebaran Indeks RealScore β€” Perpustakaan Khusus", dm_cols["nama"])
1046
 
1047
+ # NARASI
1048
  scope_label = kab_value if (kab_value != "(Semua)" and kew_norm != "PROVINSI") else prov_value
1049
  if scope_label == "(Semua)":
1050
  scope_label = "NASIONAL"
 
1052
 
1053
  # SAVE FILES
1054
  tmpdir = tempfile.mkdtemp()
 
 
1055
  f_final_agg = os.path.join(tmpdir, "IPLM2025_Agregat_FINAL.xlsx")
1056
  f_final_det = os.path.join(tmpdir, "IPLM2025_Detail_FINAL.xlsx")
1057
  f_real_agg = os.path.join(tmpdir, "IPLM2025_Agregat_Real_SubindeksDimensi.xlsx")
 
1062
  t4.to_excel(f_real_agg, index=False)
1063
  t5.to_excel(f_real_det, index=False)
1064
 
 
1065
  word_path = generate_word_report(
1066
  scope_label, kew_norm, t1, t2, t4, cov_tbl, bar_fig,
1067
  bell_all, bell_sek, bell_um, bell_kh,
 
1069
  )
1070
 
1071
  msg = f"βœ… OK | n={len(df)} | Mean Final={float(df['Indeks_Final_0_100'].mean()):.2f} | Mean SamplingFactor={float(df['SamplingFactor_Total'].mean()):.3f}"
1072
+ return (t1, t2, t3, t4, t5, cov_html, bar_fig, bell_all, bell_sek, bell_um, bell_kh,
1073
+ f_final_agg, f_final_det, word_path, narrative, msg)
1074
+
1075
 
1076
  # =========================
1077
  # 16) UI
 
1080
  gr.Markdown(f"""
1081
  # IPLM 2025 β€” Real Γ— SamplingFactor 68% (FINAL)
1082
 
1083
+ <b>Final</b>: <code>Indeks_Final_0_100 = Indeks_Real_0_100 Γ— SamplingFactor_Total</code><br><br>
 
1084
  {DATA_INFO}
1085
  """)
1086
 
 
1115
  out_det_real = gr.DataFrame(interactive=False)
1116
 
1117
  gr.Markdown("## 6) Coverage Populasi vs Sampel (Target 68%)")
1118
+ out_cov_html = gr.HTML() # βœ… biar kebaca
1119
 
1120
  gr.Markdown("## Grafik BAR β€” Populasi vs Sampel")
1121
  out_bar = gr.Plot()
 
1138
  with gr.Row():
1139
  f1 = gr.File(label="Download Agregat FINAL (.xlsx)")
1140
  f2 = gr.File(label="Download Detail FINAL (.xlsx)")
1141
+ f3 = gr.File(label="Download Laporan Word (.docx) (opsional)")
1142
 
1143
  run_btn.click(
1144
  fn=run_app,
1145
  inputs=[dd_prov, dd_kab, dd_kew, use_llm, hf_model],
1146
  outputs=[
1147
  out_agg_overall, out_agg_final, out_det_final,
1148
+ out_agg_real, out_det_real,
1149
+ out_cov_html,
1150
  out_bar, out_bell_all, out_bell_sek, out_bell_um, out_bell_kh,
1151
  f1, f2, f3,
1152
+ out_analysis,
1153
  msg_out
1154
  ],
1155
  )