irhamni commited on
Commit
42ebafd
Β·
verified Β·
1 Parent(s): 72de2c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -172
app.py CHANGED
@@ -5,8 +5,9 @@ IPLM 2025 β€” FINAL (NO UPLOAD) β€” FULL REWRITE (NO RINGKAS)
5
  βœ… Jenis tampil: sekolah, umum, khusus (khusus ditampilkan sebagai jenis)
6
  βœ… Indeks dasar per entitas: Yeo-Johnson + MinMax nasional per indikator
7
  βœ… Agregasi wilayah (mean) β†’ BARU penyesuaian 68% berbasis TOTAL pengumpulan wilayah:
8
- faktor_penyesuaian = (jumlah_sampel / jumlah_sampel_68%) Γ— 100 (dibatasi maks 100)
9
- Indeks_Final_Wilayah = Indeks_Dasar_Agregat Γ— (faktor_penyesuaian/100)
 
10
  βœ… Detail entitas: Indeks_Final_0_100 menempel dari Indeks_Final_Wilayah (bukan per-row)
11
  βœ… Bell curve per JENIS berbasis indeks per entitas (row-level)
12
  βœ… LLM analysis + Word
@@ -18,9 +19,9 @@ PERBAIKAN UTAMA (menghilangkan NULL di pop_total/coverage):
18
  βœ… POP_KHUSUS: deteksi kolom target/pop; jika hanya ada target -> Pop_Total_Jenis = target/0.68
19
  βœ… Fallback juga diterapkan di tabel agregat (kalau join gagal) agar pop_total/coverage tidak NULL
20
 
21
- CATATAN:
22
- - Kalau masih ada NULL, itu biasanya karena KAB/KOTA di DM tidak match dengan POP (ejaan ekstrem).
23
- Tetapi dengan norm_kab_label() + fallback agregat, kolom pop_total & coverage tetap terisi estimasi.
24
  """
25
 
26
  import os
@@ -142,7 +143,6 @@ def norm_kew(v):
142
 
143
  # =========================
144
  # !!! PERBAIKAN MASALAH (DROPDOWN PROVINSI DOUBLE PREFIX)
145
- # Hanya menambah norm_prov_disp() dan mengganti norm_prov_label() agar aman jika "PROVINSI PROVINSI ..."
146
  # =========================
147
  def norm_prov_disp(s):
148
  """
@@ -158,14 +158,11 @@ def norm_prov_disp(s):
158
  t = t.replace("\u00a0", " ")
159
  t = " ".join(t.split())
160
 
161
- # Samakan ejaan
162
  t = t.replace("PROPINSI", "PROVINSI")
163
 
164
- # Hapus prefix berulang
165
  while t.startswith("PROVINSI PROVINSI "):
166
  t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
167
 
168
- # Pastikan format "PROVINSI <NAMA>"
169
  if t.startswith("PROVINSI "):
170
  name = t[len("PROVINSI "):].strip()
171
  else:
@@ -183,7 +180,6 @@ def norm_prov_label(s):
183
  t = str(s).strip().upper().replace("\u00a0", " ")
184
  t = " ".join(t.split())
185
  t = t.replace("PROPINSI", "PROVINSI")
186
- # buang semua kemunculan kata PROVINSI (aman kalau dobel)
187
  t = t.replace("PROVINSI", "").strip()
188
  return re.sub(r"[^A-Z0-9]+", "", t)
189
 
@@ -207,20 +203,23 @@ def safe_div(num, den):
207
  return float(num) / float(den)
208
 
209
  # =========================
210
- # βœ… PERUBAHAN SESUAI PERMINTAAN:
211
- # faktor_penyesuaian = jumlah_sampel / jumlah_sampel_68% Γ— 100 (cap 100)
 
212
  # =========================
213
- def faktor_penyesuaian_total(n_total: float, target_total: float) -> float:
214
  """
215
- faktor_penyesuaian(%) = min( (n_total_terkumpul / target_total_68) * 100 , 100 )
216
- Jika target invalid/missing/0 -> faktor = 100 (tanpa penyesuaian)
 
 
217
  """
218
- if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
219
  return 100.0
220
  if n_total is None or pd.isna(n_total) or float(n_total) < 0:
221
  n_total = 0.0
222
- faktor_pct = (float(n_total) / float(target_total)) * 100.0
223
- return float(min(faktor_pct, 100.0))
224
 
225
 
226
  # ============================================================
@@ -367,10 +366,7 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
367
  - Kolom gabungan: "Propinsi/Kab/kota"
368
  berisi baris "PROVINSI XXX" lalu daftar "KAB. ..." / "KOTA ..."
369
  - Minimal 1 kolom angka.
370
- Bisa berupa:
371
- A) target 68% (sampel/target_total_68)
372
- B) populasi (populasi/penduduk)
373
- C) dua kolom sekaligus (lebih ideal)
374
 
375
  Output: kab_key, Kab_Kota_Label, Provinsi_Label,
376
  Target68_Total_Jenis, Pop_Total_Jenis
@@ -379,7 +375,6 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
379
  if df is None or df.empty:
380
  return pd.DataFrame()
381
 
382
- # Kolom gabungan prov/kab
383
  c_mix = pick_col(df, [
384
  "Propinsi/Kab/kota", "Propinsi/Kab/Kota", "Propinsi/Kab/kota ",
385
  "Provinsi/Kab/Kota", "Provinsi/Kab/kota", "Provinsi/Kabupaten/Kota",
@@ -394,7 +389,6 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
394
  if c_mix is None:
395
  raise ValueError("POP_KHUSUS: kolom gabungan Provinsi/Kab/Kota tidak ditemukan.")
396
 
397
- # Deteksi kolom target & kolom populasi (kalau ada)
398
  c_target = pick_col(df, [
399
  "target_total_68","Target_Total_68","TARGET_68","target_68",
400
  "sampel_total","Sampel_total","TOTAL_SAMPEL","total_sampel",
@@ -407,7 +401,6 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
407
  "pop_total","Pop_Total"
408
  ])
409
 
410
- # Minimal ada salah satu kolom angka
411
  if c_target is None and c_pop is None:
412
  numeric_cols = [c for c in df.columns if c != c_mix]
413
  if not numeric_cols:
@@ -428,10 +421,9 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
428
  current_prov = mm.replace("PROVINSI", "").strip()
429
  continue
430
 
431
- kab_label = mm
432
  rows.append({
433
  "Provinsi_Label": current_prov or "",
434
- "Kab_Kota_Label": kab_label,
435
  "Target68_Total_Jenis": tval,
436
  "Pop_Total_Jenis": pval
437
  })
@@ -446,11 +438,9 @@ def _parse_pop_khusus(path_xlsx: str) -> pd.DataFrame:
446
  pop["Target68_Total_Jenis"] = pd.to_numeric(pop["Target68_Total_Jenis"], errors="coerce")
447
  pop["Pop_Total_Jenis"] = pd.to_numeric(pop["Pop_Total_Jenis"], errors="coerce")
448
 
449
- # fallback jika hanya ada target: pop β‰ˆ target/0.68
450
  m_need_pop = pop["Pop_Total_Jenis"].isna() & pop["Target68_Total_Jenis"].notna() & (pop["Target68_Total_Jenis"] > 0)
451
  pop.loc[m_need_pop, "Pop_Total_Jenis"] = pop.loc[m_need_pop, "Target68_Total_Jenis"] / float(FALLBACK_TARGET_RATIO)
452
 
453
- # fallback jika hanya ada pop: target β‰ˆ 0.68*pop
454
  m_need_target = pop["Target68_Total_Jenis"].isna() & pop["Pop_Total_Jenis"].notna() & (pop["Pop_Total_Jenis"] > 0)
455
  pop.loc[m_need_target, "Target68_Total_Jenis"] = pop.loc[m_need_target, "Pop_Total_Jenis"] * float(FALLBACK_TARGET_RATIO)
456
 
@@ -479,7 +469,6 @@ def load_default_files(force=False):
479
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
480
  return None, None, None, None, {}, info
481
 
482
- # DM multi-sheet
483
  fp = Path(DATA_FILE)
484
  xls = pd.ExcelFile(fp)
485
  frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
@@ -501,7 +490,6 @@ def load_default_files(force=False):
501
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
502
  return None, None, None, None, {}, info
503
 
504
- # mapping jenis -> sekolah / umum / khusus
505
  val_map_jenis = {
506
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
507
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
@@ -511,9 +499,7 @@ def load_default_files(force=False):
511
  df_raw["KEW_NORM"] = df_raw[kew_col].apply(norm_kew)
512
  df_raw["_dataset"] = df_raw[jenis_col].astype(str).str.strip().str.upper().map(val_map_jenis)
513
 
514
- # !!! PERBAIKAN MASALAH: PROV_DISP pakai norm_prov_disp agar tidak muncul "PROVINSI PROVINSI ..."
515
  df_raw["PROV_DISP"] = df_raw[prov_col].apply(norm_prov_disp)
516
-
517
  df_raw["KAB_DISP"] = df_raw[kab_col].apply(_disp_text)
518
 
519
  df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
@@ -544,7 +530,6 @@ def load_default_files(force=False):
544
  "target_total_68","Target_Total_68","target_68","TARGET_68"
545
  ])
546
 
547
- # Deteksi populasi diperluas
548
  c_pop_total = pick_col(pk, [
549
  "total_populasi","Total Populasi","POPULASI","populasi",
550
  "jumlah_penduduk","Jumlah Penduduk","PENDUDUK","penduduk",
@@ -568,7 +553,6 @@ def load_default_files(force=False):
568
  pop_kab["Pop_Total"] = pd.to_numeric(pop_kab["Pop_Total"], errors="coerce")
569
  pop_kab["Target68_Total"] = pd.to_numeric(pop_kab["Target68_Total"], errors="coerce")
570
 
571
- # fallback Pop_Total = Target/0.68
572
  mask_need_pop = pop_kab["Pop_Total"].isna() & pop_kab["Target68_Total"].notna() & (pop_kab["Target68_Total"] > 0)
573
  pop_kab.loc[mask_need_pop, "Pop_Total"] = pop_kab.loc[mask_need_pop, "Target68_Total"] / float(FALLBACK_TARGET_RATIO)
574
 
@@ -614,7 +598,6 @@ def load_default_files(force=False):
614
  pop_prov["Pop_Total_Prov"] = pd.to_numeric(pop_prov["Pop_Total_Prov"], errors="coerce")
615
  pop_prov["Target68_Total_Prov"] = pd.to_numeric(pop_prov["Target68_Total_Prov"], errors="coerce")
616
 
617
- # fallback Pop_Total_Prov = Target/0.68
618
  mask_need_pop = pop_prov["Pop_Total_Prov"].isna() & pop_prov["Target68_Total_Prov"].notna() & (pop_prov["Target68_Total_Prov"] > 0)
619
  pop_prov.loc[mask_need_pop, "Pop_Total_Prov"] = pop_prov.loc[mask_need_pop, "Target68_Total_Prov"] / float(FALLBACK_TARGET_RATIO)
620
 
@@ -635,7 +618,6 @@ def load_default_files(force=False):
635
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
636
  return None, None, None, None, {}, info
637
 
638
- # pipeline nasional (entitas)
639
  df_all = prepare_global(df_raw)
640
 
641
  meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
@@ -643,9 +625,9 @@ def load_default_files(force=False):
643
  info = (
644
  f"βœ… Mode NO UPLOAD (cache aktif)<br>"
645
  f"βœ… DM: <b>{fp.name}</b> | Baris: {before} β†’ dedup: {after}<br>"
646
- f"βœ… POP_KAB: <b>{Path(POP_KAB).name}</b> (n={len(pop_kab)}) β€” target 68% via <code>sampel_total</code> (Pop_Total auto fallback jika kosong)<br>"
647
- f"βœ… POP_PROV: <b>{Path(POP_PROV).name}</b> (n={len(pop_prov)}) β€” target 68% via <code>total _sampel</code> (Pop_Total auto fallback jika kosong)<br>"
648
- f"βœ… POP_KHUSUS: <b>{Path(POP_KHUSUS).name}</b> (n={len(pop_khusus)}) β€” format gabungan Provinsi/Kab/Kota (Target/Pop auto fallback)<br>"
649
  f"πŸ•’ mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}"
650
  )
651
 
@@ -660,6 +642,11 @@ def load_default_files(force=False):
660
  def build_agg_wilayah_total(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, pop_prov: pd.DataFrame, kew_value: str):
661
  """
662
  Output: satu baris per wilayah
 
 
 
 
 
663
  """
664
  if df_filtered is None or df_filtered.empty:
665
  return pd.DataFrame()
@@ -705,17 +692,17 @@ def build_agg_wilayah_total(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, po
705
 
706
  agg = agg.rename(columns={key_col: "group_key", label_col: label_name})
707
 
708
- # join target_total_68 & pop_total
709
  target_vals, pop_vals, label_fix = [], [], []
710
  for _, r in agg.iterrows():
711
  gk = r["group_key"]
712
  if gk in pop.index:
713
- target_total = pop.loc[gk, target_field] if target_field in pop.columns else np.nan
714
  pop_total = pop.loc[gk, pop_field] if pop_field in pop.columns else np.nan
715
  nm = pop.loc[gk, name_field] if name_field in pop.columns else r[label_name]
716
  else:
717
- target_total, pop_total, nm = np.nan, np.nan, r[label_name]
718
- target_vals.append(target_total)
719
  pop_vals.append(pop_total)
720
  label_fix.append(nm)
721
 
@@ -723,24 +710,29 @@ def build_agg_wilayah_total(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, po
723
  agg["target_total_68"] = pd.to_numeric(pd.Series(target_vals), errors="coerce")
724
  agg["pop_total"] = pd.to_numeric(pd.Series(pop_vals), errors="coerce")
725
 
726
- # fallback jika pop_total masih NaN tapi target_total_68 ada
727
  m = agg["pop_total"].isna() & agg["target_total_68"].notna() & (agg["target_total_68"] > 0)
728
  agg.loc[m, "pop_total"] = agg.loc[m, "target_total_68"] / float(FALLBACK_TARGET_RATIO)
729
 
730
- # βœ… faktor_penyesuaian dalam persen (0–100)
731
  agg["faktor_penyesuaian"] = [
732
- faktor_penyesuaian_total(n, p)
733
- for n, p in zip(
734
- pd.to_numeric(agg["n_total"], errors="coerce").fillna(0).astype(float).tolist(),
735
- pd.to_numeric(agg["pop_total"], errors="coerce").tolist())
 
736
  ]
737
 
 
738
  agg["coverage_total_%"] = [
739
  (safe_div(n, p) * 100) if (p is not None and not pd.isna(p) and float(p) > 0) else np.nan
740
- for n, p in zip(pd.to_numeric(agg["n_total"], errors="coerce").fillna(0).astype(float).tolist(), agg["pop_total"].tolist())
 
 
 
741
  ]
742
 
743
- # βœ… indeks final = indeks dasar Γ— (faktor/100)
744
  agg["Indeks_Final_Wilayah_0_100"] = agg["Indeks_Dasar_Agregat_0_100"] * (agg["faktor_penyesuaian"] / 100.0)
745
 
746
  # rounding
@@ -755,8 +747,8 @@ def build_agg_wilayah_total(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, po
755
  if c in agg.columns:
756
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).apply(lambda x: round(float(x), 2))
757
 
758
- # faktor persen
759
- agg["faktor_penyesuaian"] = pd.to_numeric(agg["faktor_penyesuaian"], errors="coerce").fillna(100.0).apply(lambda x: round(float(x), 1))
760
  agg["coverage_total_%"] = pd.to_numeric(agg["coverage_total_%"], errors="coerce")
761
 
762
  return agg
@@ -768,7 +760,7 @@ def build_agg_wilayah_total(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, po
768
 
769
  def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, pop_khusus: pd.DataFrame, kew_value: str):
770
  """
771
- Final per jenis = Indeks_Dasar_Jenis * (faktor_penyesuaian_wilayah/100) (TERMASUK KHUSUS)
772
  Referensi khusus (target/pop) ditampilkan bila tersedia dari POP_KHUSUS.
773
  """
774
  if df_filtered is None or df_filtered.empty:
@@ -807,7 +799,7 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, agg_total: pd.DataFrame,
807
 
808
  agg = agg.rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
809
 
810
- # join faktor wilayah (PERSEN)
811
  if agg_total is None or agg_total.empty:
812
  agg["faktor_penyesuaian_wilayah"] = 100.0
813
  else:
@@ -815,7 +807,7 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, agg_total: pd.DataFrame,
815
  agg = agg.merge(m, on="group_key", how="left")
816
  agg["faktor_penyesuaian_wilayah"] = pd.to_numeric(agg["faktor_penyesuaian_wilayah"], errors="coerce").fillna(100.0)
817
 
818
- # Final (TERMASUK KHUSUS)
819
  agg["faktor_penyesuaian"] = agg["faktor_penyesuaian_wilayah"]
820
  agg["Indeks_Final_Agregat_0_100"] = agg["Indeks_Dasar_Agregat_0_100"] * (agg["faktor_penyesuaian"] / 100.0)
821
 
@@ -836,14 +828,12 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, agg_total: pd.DataFrame,
836
  agg.at[i, "target_total_68_jenis"] = t
837
  agg.at[i, "pop_total_jenis"] = p
838
 
839
- # fallback jika pop_total_jenis NaN tapi target_total_68_jenis ada
840
  agg["target_total_68_jenis"] = pd.to_numeric(agg["target_total_68_jenis"], errors="coerce")
841
  agg["pop_total_jenis"] = pd.to_numeric(agg["pop_total_jenis"], errors="coerce")
842
 
843
  m = agg["pop_total_jenis"].isna() & agg["target_total_68_jenis"].notna() & (agg["target_total_68_jenis"] > 0)
844
  agg.loc[m, "pop_total_jenis"] = agg.loc[m, "target_total_68_jenis"] / float(FALLBACK_TARGET_RATIO)
845
 
846
- # refresh coverage_jenis jika pop_total_jenis ada
847
  m2 = agg["pop_total_jenis"].notna() & (agg["pop_total_jenis"] > 0)
848
  agg.loc[m2, "coverage_jenis"] = (agg.loc[m2, "Jumlah"].astype(float) / agg.loc[m2, "pop_total_jenis"].astype(float)) * 100.0
849
 
@@ -859,9 +849,10 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, agg_total: pd.DataFrame,
859
  if c in agg.columns:
860
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).apply(lambda x: round(float(x), 2))
861
 
 
862
  for c in ["faktor_penyesuaian_wilayah","faktor_penyesuaian"]:
863
  if c in agg.columns:
864
- agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(100.0).apply(lambda x: round(float(x), 1))
865
 
866
  agg["coverage_jenis"] = pd.to_numeric(agg["coverage_jenis"], errors="coerce")
867
 
@@ -994,10 +985,7 @@ def build_verif_total(agg_total: pd.DataFrame):
994
  "Target_68_Total": df.get("target_total_68", np.nan),
995
  "Sampel_Total_Terkumpul": df.get("n_total", 0),
996
  "Coverage_Total_%": df.get("coverage_total_%", np.nan),
997
-
998
- # βœ… faktor sudah persen, JANGAN dikali 100 lagi
999
  "Faktor_Penyesuaian_(Sampel/Target68)_persen": pd.to_numeric(df.get("faktor_penyesuaian", 100.0), errors="coerce").fillna(100.0),
1000
-
1001
  "GAP_Ke_Target68_Total": [
1002
  max(t - n, 0) if (t is not None and not pd.isna(t)) else 0
1003
  for n, t in zip(pd.to_numeric(df["n_total"], errors="coerce").fillna(0).astype(float).tolist(),
@@ -1173,8 +1161,8 @@ def build_context(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame, verif_to
1173
  lines.append(f"Wilayah filter: {wilayah}")
1174
  lines.append(f"Kewenangan: {kew}")
1175
  lines.append("Metode: Indeks dasar dihitung per entitas (YJ+minmax nasional), lalu diagregasi per wilayah. Setelah itu dilakukan penyesuaian berbasis kecukupan sampel minimum 68% pada level wilayah.")
1176
- # βœ… UPDATE rumus (persen)
1177
- lines.append("Rumus penyesuaian: faktor(%) = min((total_terkumpul / target_total_68) Γ— 100, 100); Indeks_Final = Indeks_Dasar_Agregat Γ— (faktor/100).")
1178
  lines.append("Jenis yang ditampilkan: sekolah, umum, khusus (SEMUA jenis menggunakan faktor wilayah).")
1179
 
1180
  if summary_jenis is not None and not summary_jenis.empty:
@@ -1193,7 +1181,7 @@ def build_context(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame, verif_to
1193
  for _, r in top.iterrows():
1194
  wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
1195
  lines.append(
1196
- f"- {wl}: Final={float(r['Indeks_Final_Wilayah_0_100']):.2f} | Faktor(%)={float(r.get('faktor_penyesuaian', 100.0)):.1f} | total={int(r.get('n_total', 0))}"
1197
  )
1198
 
1199
  if verif_total is not None and not verif_total.empty:
@@ -1219,7 +1207,7 @@ def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
1219
  "Tugas Anda menyusun analisis berbasis data IPLM secara formal, tajam, dan operasional."
1220
  )
1221
  user_prompt = f"""
1222
- DATA RINGKAS IPLM (PENYESUAIAN BERBASIS KECUKUPAN SAMPEL 68% DI LEVEL WILAYAH):
1223
 
1224
  {ctx}
1225
 
@@ -1256,12 +1244,12 @@ def generate_word_report(agg_jenis, wilayah, kew, analysis_text):
1256
  doc.add_paragraph(f"Kewenangan: {kew}")
1257
  doc.add_paragraph(
1258
  "Metode: Indeks dasar dihitung per entitas (YJ+minmax nasional), diagregasi per wilayah, "
1259
- "lalu dilakukan penyesuaian berbasis kecukupan sampel minimum 68% pada level wilayah."
1260
  )
1261
- # βœ… UPDATE rumus (persen)
1262
  doc.add_paragraph(
1263
- "Rumus penyesuaian: faktor(%) = min((total_terkumpul / target_total_68) Γ— 100, 100). "
1264
- "Indeks_Final = Indeks_Dasar_Agregat Γ— (faktor/100)."
 
1265
  )
1266
  doc.add_paragraph(
1267
  "Jenis yang ditampilkan: sekolah, umum, khusus. "
@@ -1325,8 +1313,7 @@ def generate_word_report(agg_jenis, wilayah, kew, analysis_text):
1325
  elif "Rata2_" in str(c):
1326
  cells[i].text = f"{float(v):.3f}"
1327
  elif "faktor" in str(c).lower():
1328
- # faktor sekarang persen
1329
- cells[i].text = f"{float(v):.1f}"
1330
  elif "coverage" in str(c).lower():
1331
  cells[i].text = f"{float(v):.2f}"
1332
  else:
@@ -1370,16 +1357,7 @@ def build_kpi_markdown(agg_total: pd.DataFrame) -> str:
1370
  dasar = float(pd.to_numeric(agg_total["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0).mean()) if "Indeks_Dasar_Agregat_0_100" in agg_total.columns else np.nan
1371
  final = float(pd.to_numeric(agg_total["Indeks_Final_Wilayah_0_100"], errors="coerce").fillna(0).mean()) if "Indeks_Final_Wilayah_0_100" in agg_total.columns else np.nan
1372
 
1373
- n_sum = pd.to_numeric(agg_total.get("n_total", pd.Series(dtype=float)), errors="coerce").fillna(0).sum()
1374
- t_sum = pd.to_numeric(agg_total.get("target_total_68", pd.Series(dtype=float)), errors="coerce").fillna(0)
1375
- t_sum = t_sum[t_sum > 0].sum() if hasattr(t_sum, "sum") else float(t_sum)
1376
-
1377
- # βœ… cakupan_pct langsung persen (0–100) cap 100
1378
- cakupan_pct = min((float(n_sum) / float(t_sum)) * 100.0, 100.0) if (t_sum and t_sum > 0) else 100.0
1379
-
1380
- # faktor_penyesuaian sudah persen
1381
- faktor_mean = float(pd.to_numeric(agg_total.get("faktor_penyesuaian", 100.0), errors="coerce").fillna(100.0).mean())
1382
-
1383
  dampak = (final - dasar) if (pd.notna(final) and pd.notna(dasar)) else np.nan
1384
 
1385
  def fmt(x, nd=2):
@@ -1401,14 +1379,14 @@ def build_kpi_markdown(agg_total: pd.DataFrame) -> str:
1401
 
1402
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:220px;">
1403
  <div style="opacity:0.8;">Cakupan Sampel (berdasarkan target 68%)</div>
1404
- <div style="font-size:26px; font-weight:700;">{fmt(cakupan_pct,0)}%</div>
1405
- <div style="opacity:0.7;">Rumus: min((total/target_68)Γ—100, 100)</div>
1406
  </div>
1407
 
1408
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:220px;">
1409
  <div style="opacity:0.8;">Penyesuaian Nilai (rata-rata)</div>
1410
  <div style="font-size:26px; font-weight:700;">{fmt(dampak,2)} poin</div>
1411
- <div style="opacity:0.7;">Faktor penyesuaian (mean): {fmt(faktor_mean,1)}%</div>
1412
  </div>
1413
  </div>
1414
  """.strip()
@@ -1479,10 +1457,9 @@ def run_calc(prov_value, kab_value, kew_value, df_all, pop_kab, pop_prov, pop_kh
1479
  # Word report
1480
  word_path = generate_word_report(agg_jenis, wilayah_txt, kew_value or "(Semua)", analysis_text)
1481
 
1482
- # βœ… update msg: rumus persen
1483
  msg = (
1484
  f"βœ… Selesai: entitas={len(detail_view)} | wilayah(keseluruhan)={len(agg_total)} | "
1485
- f"jenis={len(agg_jenis)} | penyesuaian=(total_terkumpul/target_total_68)*100 (cap 100)"
1486
  )
1487
 
1488
  return (
@@ -1511,124 +1488,144 @@ def ui_load(force=False):
1511
  gr.update(choices=["(Semua)"], value="(Semua)"),
1512
  )
1513
 
1514
- # !!! PERBAIKAN MASALAH: prov_choices dibuat set() agar tidak ada duplikat akibat variasi format
1515
  prov_vals = df_all["PROV_DISP"].dropna().astype(str).tolist()
1516
  prov_vals = [v for v in prov_vals if v and v.strip()]
1517
  prov_choices = ["(Semua)"] + sorted(set(prov_vals))
1518
 
1519
  kab_choices = ["(Semua)"] + sorted([x for x in df_all["KAB_DISP"].dropna().unique().tolist() if x])
1520
- kew_choices = ["(Semua)"] + sorted([x for x in df_all["KEW_NORM"].dropna().unique().tolist() if x])
1521
- default_kew = "PROVINSI" if "PROVINSI" in kew_choices else ("KAB/KOTA" if "KAB/KOTA" in kew_choices else "(Semua)")
1522
 
1523
  return (
1524
  df_all, pop_kab, pop_prov, pop_khusus, meta, info,
1525
  gr.update(choices=prov_choices, value="(Semua)"),
1526
- gr.update(choices=kab_choices, value="(Semua)"),
1527
- gr.update(choices=kew_choices, value=default_kew),
1528
  )
1529
 
1530
- def on_prov_change(prov_value):
1531
- df_all, _, _, _, _, _ = load_default_files(force=False)
1532
  if df_all is None or df_all.empty:
1533
  return gr.update(choices=["(Semua)"], value="(Semua)")
1534
- if prov_value is None or prov_value == "(Semua)":
1535
- vals = df_all["KAB_DISP"].dropna().unique().tolist()
1536
- else:
1537
- vals = df_all.loc[df_all["PROV_DISP"] == prov_value, "KAB_DISP"].dropna().unique().tolist()
1538
- vals = sorted([v for v in vals if v])
1539
- return gr.update(choices=["(Semua)"] + vals, value="(Semua)")
1540
-
1541
-
1542
- with gr.Blocks() as demo:
1543
- gr.Markdown(f"""
1544
- # IPLM 2025 β€” Final (Penyesuaian Berbasis Kecukupan Sampel 68%)
1545
- **Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
1546
- - `DATA_FILE` = **{DATA_FILE}**
1547
- - `POP_KAB` = **{POP_KAB}** (target 68%: kolom **sampel_total** / alias)
1548
- - `POP_PROV` = **{POP_PROV}** (target 68%: kolom **total _sampel** / alias)
1549
- - `POP_KHUSUS` = **{POP_KHUSUS}** (referensi jenis khusus)
1550
-
1551
- **Aturan penting:**
1552
- - Hitung indeks dasar per entitas (YJ+minmax nasional) β†’ agregasi wilayah β†’ **baru penyesuaian 68% berbasis total wilayah**.
1553
- - Penyesuaian 68% memakai total pengumpulan wilayah: `faktor(%) = min((n_total_terkumpul / target_total_68) Γ— 100, 100)`.
1554
- - Indeks_Final = Indeks_Dasar_Agregat Γ— (faktor/100).
1555
- - Jenis tampil: **sekolah, umum, khusus** (SEMUA jenis menggunakan faktor wilayah).
1556
- - Referensi target/pop khusus ditampilkan bila tersedia dari POP_KHUSUS.
1557
- - **Perbaikan NULL**: jika Pop_Total kosong/tidak ada -> fallback `Pop_Total = Target68 / 0.68` sehingga coverage tidak null.
1558
- """)
1559
-
1560
- state_df = gr.State(None)
1561
- state_pop_kab = gr.State(None)
1562
- state_pop_prov = gr.State(None)
1563
- state_pop_khusus = gr.State(None)
1564
- state_meta = gr.State({})
1565
-
1566
- info_box = gr.Markdown()
1567
 
1568
  with gr.Row():
1569
  dd_prov = gr.Dropdown(label="Provinsi", choices=["(Semua)"], value="(Semua)")
1570
  dd_kab = gr.Dropdown(label="Kab/Kota", choices=["(Semua)"], value="(Semua)")
1571
  dd_kew = gr.Dropdown(label="Kewenangan", choices=["(Semua)"], value="(Semua)")
1572
 
1573
- dd_prov.change(fn=on_prov_change, inputs=[dd_prov], outputs=dd_kab)
 
 
1574
 
1575
- run_btn = gr.Button("Jalankan Perhitungan")
1576
- msg_out = gr.Markdown()
1577
 
1578
- # KPI
1579
- kpi_out = gr.Markdown()
 
1580
 
1581
- gr.Markdown("## Ringkasan (Jenis + Keseluruhan) β€” sub-dimensi, dimensi, indeks final disesuaikan")
1582
- out_summary = gr.DataFrame(interactive=False)
1583
 
1584
- gr.Markdown("## Agregat Wilayah (Keseluruhan) β€” Final disesuaikan (faktor wilayah)")
1585
- out_agg_total = gr.DataFrame(interactive=False)
1586
 
1587
- gr.Markdown("## Agregat Wilayah Γ— Jenis (Sekolah, Umum, Khusus) β€” Final memakai faktor wilayah + referensi khusus")
1588
- out_agg_jenis = gr.DataFrame(interactive=False)
1589
 
1590
- gr.Markdown("## Detail Entitas (Indeks final menempel dari wilayah; tidak ada penyesuaian per entitas)")
1591
- out_detail = gr.DataFrame(interactive=False)
1592
 
1593
- gr.Markdown("## Kecukupan Sampel 68% (tanpa angka koma)")
1594
- out_verif = gr.DataFrame(interactive=False)
1595
 
1596
- gr.Markdown("## Bell Curve β€” per Jenis Perpustakaan (Indeks per Entitas)")
1597
- gr.Markdown("### Perpustakaan Umum")
1598
- bell_umum = gr.Plot(scale=1)
1599
 
1600
- gr.Markdown("### Perpustakaan Sekolah")
1601
- bell_sekolah = gr.Plot(scale=1)
1602
 
1603
- gr.Markdown("### Perpustakaan Khusus")
1604
- bell_khusus = gr.Plot(scale=1)
 
1605
 
1606
- gr.Markdown("## Analisis Otomatis (LLM)")
1607
- analysis_out = gr.Markdown()
 
 
 
 
 
1608
 
1609
- with gr.Row():
1610
- dl_summary = gr.DownloadButton(label="Download Ringkasan (.xlsx)")
1611
- dl_total = gr.DownloadButton(label="Download Agregat Wilayah (.xlsx)")
1612
- dl_jenis = gr.DownloadButton(label="Download Agregat Jenis (.xlsx)")
1613
- dl_detail = gr.DownloadButton(label="Download Detail Entitas (.xlsx)")
1614
- dl_word = gr.DownloadButton(label="Download Laporan Word (.docx)")
1615
-
1616
- run_btn.click(
1617
- fn=run_calc,
1618
- inputs=[dd_prov, dd_kab, dd_kew, state_df, state_pop_kab, state_pop_prov, state_pop_khusus, state_meta],
1619
  outputs=[
1620
- kpi_out,
1621
- out_summary, out_agg_total, out_agg_jenis, out_detail, out_verif,
1622
- dl_summary, dl_total, dl_jenis, dl_detail, dl_word,
1623
- bell_umum, bell_sekolah, bell_khusus,
1624
- msg_out, analysis_out
1625
- ]
1626
  )
1627
 
1628
- demo.load(
1629
- fn=lambda: ui_load(force=False),
 
1630
  inputs=[],
1631
- outputs=[state_df, state_pop_kab, state_pop_prov, state_pop_khusus, state_meta, info_box, dd_prov, dd_kab, dd_kew]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1632
  )
1633
 
1634
- demo.launch()
 
5
  βœ… Jenis tampil: sekolah, umum, khusus (khusus ditampilkan sebagai jenis)
6
  βœ… Indeks dasar per entitas: Yeo-Johnson + MinMax nasional per indikator
7
  βœ… Agregasi wilayah (mean) β†’ BARU penyesuaian 68% berbasis TOTAL pengumpulan wilayah:
8
+ faktor_penyesuaian_% = min((n_total_terkumpul / target_total_68) * 100, 100)
9
+ Indeks_Final_Wilayah = Indeks_Dasar_Agregat * (faktor_penyesuaian_% / 100)
10
+ => Jika n_total_terkumpul >= target_total_68 (artinya sudah >= 68% target), faktor = 100% (TIDAK kena penyesuaian)
11
  βœ… Detail entitas: Indeks_Final_0_100 menempel dari Indeks_Final_Wilayah (bukan per-row)
12
  βœ… Bell curve per JENIS berbasis indeks per entitas (row-level)
13
  βœ… LLM analysis + Word
 
19
  βœ… POP_KHUSUS: deteksi kolom target/pop; jika hanya ada target -> Pop_Total_Jenis = target/0.68
20
  βœ… Fallback juga diterapkan di tabel agregat (kalau join gagal) agar pop_total/coverage tidak NULL
21
 
22
+ CATATAN PENTING:
23
+ - Penyesuaian TIDAK dihitung dari pop_total, tapi dari TARGET 68% (target_total_68).
24
+ - Coverage_total_% tetap dihitung dari pop_total untuk info cakupan populasi (opsional), tapi TIDAK memengaruhi faktor penyesuaian.
25
  """
26
 
27
  import os
 
143
 
144
  # =========================
145
  # !!! PERBAIKAN MASALAH (DROPDOWN PROVINSI DOUBLE PREFIX)
 
146
  # =========================
147
  def norm_prov_disp(s):
148
  """
 
158
  t = t.replace("\u00a0", " ")
159
  t = " ".join(t.split())
160
 
 
161
  t = t.replace("PROPINSI", "PROVINSI")
162
 
 
163
  while t.startswith("PROVINSI PROVINSI "):
164
  t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
165
 
 
166
  if t.startswith("PROVINSI "):
167
  name = t[len("PROVINSI "):].strip()
168
  else:
 
180
  t = str(s).strip().upper().replace("\u00a0", " ")
181
  t = " ".join(t.split())
182
  t = t.replace("PROPINSI", "PROVINSI")
 
183
  t = t.replace("PROVINSI", "").strip()
184
  return re.sub(r"[^A-Z0-9]+", "", t)
185
 
 
203
  return float(num) / float(den)
204
 
205
  # =========================
206
+ # !!! PERUBAHAN UTAMA SESUAI PERMINTAAN:
207
+ # Faktor penyesuaian pakai TARGET 68% (target_total_68), BUKAN pop_total.
208
+ # Jika sampel >= target_total_68 -> faktor = 100% (TIDAK kena penyesuaian).
209
  # =========================
210
+ def faktor_penyesuaian_total_pct(n_total: float, target_total_68: float) -> float:
211
  """
212
+ faktor_penyesuaian_% = min((n_total / target_total_68) * 100, 100)
213
+
214
+ - Jika n_total >= target_total_68 -> 100% (tidak disesuaikan)
215
+ - Jika target invalid -> 100% (anggap tidak disesuaikan)
216
  """
217
+ if target_total_68 is None or pd.isna(target_total_68) or float(target_total_68) <= 0:
218
  return 100.0
219
  if n_total is None or pd.isna(n_total) or float(n_total) < 0:
220
  n_total = 0.0
221
+ pct = (float(n_total) / float(target_total_68)) * 100.0
222
+ return float(min(pct, 100.0))
223
 
224
 
225
  # ============================================================
 
366
  - Kolom gabungan: "Propinsi/Kab/kota"
367
  berisi baris "PROVINSI XXX" lalu daftar "KAB. ..." / "KOTA ..."
368
  - Minimal 1 kolom angka.
369
+ Bisa berupa target 68% atau populasi, atau keduanya.
 
 
 
370
 
371
  Output: kab_key, Kab_Kota_Label, Provinsi_Label,
372
  Target68_Total_Jenis, Pop_Total_Jenis
 
375
  if df is None or df.empty:
376
  return pd.DataFrame()
377
 
 
378
  c_mix = pick_col(df, [
379
  "Propinsi/Kab/kota", "Propinsi/Kab/Kota", "Propinsi/Kab/kota ",
380
  "Provinsi/Kab/Kota", "Provinsi/Kab/kota", "Provinsi/Kabupaten/Kota",
 
389
  if c_mix is None:
390
  raise ValueError("POP_KHUSUS: kolom gabungan Provinsi/Kab/Kota tidak ditemukan.")
391
 
 
392
  c_target = pick_col(df, [
393
  "target_total_68","Target_Total_68","TARGET_68","target_68",
394
  "sampel_total","Sampel_total","TOTAL_SAMPEL","total_sampel",
 
401
  "pop_total","Pop_Total"
402
  ])
403
 
 
404
  if c_target is None and c_pop is None:
405
  numeric_cols = [c for c in df.columns if c != c_mix]
406
  if not numeric_cols:
 
421
  current_prov = mm.replace("PROVINSI", "").strip()
422
  continue
423
 
 
424
  rows.append({
425
  "Provinsi_Label": current_prov or "",
426
+ "Kab_Kota_Label": mm,
427
  "Target68_Total_Jenis": tval,
428
  "Pop_Total_Jenis": pval
429
  })
 
438
  pop["Target68_Total_Jenis"] = pd.to_numeric(pop["Target68_Total_Jenis"], errors="coerce")
439
  pop["Pop_Total_Jenis"] = pd.to_numeric(pop["Pop_Total_Jenis"], errors="coerce")
440
 
 
441
  m_need_pop = pop["Pop_Total_Jenis"].isna() & pop["Target68_Total_Jenis"].notna() & (pop["Target68_Total_Jenis"] > 0)
442
  pop.loc[m_need_pop, "Pop_Total_Jenis"] = pop.loc[m_need_pop, "Target68_Total_Jenis"] / float(FALLBACK_TARGET_RATIO)
443
 
 
444
  m_need_target = pop["Target68_Total_Jenis"].isna() & pop["Pop_Total_Jenis"].notna() & (pop["Pop_Total_Jenis"] > 0)
445
  pop.loc[m_need_target, "Target68_Total_Jenis"] = pop.loc[m_need_target, "Pop_Total_Jenis"] * float(FALLBACK_TARGET_RATIO)
446
 
 
469
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
470
  return None, None, None, None, {}, info
471
 
 
472
  fp = Path(DATA_FILE)
473
  xls = pd.ExcelFile(fp)
474
  frames = [pd.read_excel(fp, sheet_name=s) for s in xls.sheet_names]
 
490
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
491
  return None, None, None, None, {}, info
492
 
 
493
  val_map_jenis = {
494
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
495
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
 
499
  df_raw["KEW_NORM"] = df_raw[kew_col].apply(norm_kew)
500
  df_raw["_dataset"] = df_raw[jenis_col].astype(str).str.strip().str.upper().map(val_map_jenis)
501
 
 
502
  df_raw["PROV_DISP"] = df_raw[prov_col].apply(norm_prov_disp)
 
503
  df_raw["KAB_DISP"] = df_raw[kab_col].apply(_disp_text)
504
 
505
  df_raw["prov_key"] = df_raw["PROV_DISP"].apply(norm_prov_label)
 
530
  "target_total_68","Target_Total_68","target_68","TARGET_68"
531
  ])
532
 
 
533
  c_pop_total = pick_col(pk, [
534
  "total_populasi","Total Populasi","POPULASI","populasi",
535
  "jumlah_penduduk","Jumlah Penduduk","PENDUDUK","penduduk",
 
553
  pop_kab["Pop_Total"] = pd.to_numeric(pop_kab["Pop_Total"], errors="coerce")
554
  pop_kab["Target68_Total"] = pd.to_numeric(pop_kab["Target68_Total"], errors="coerce")
555
 
 
556
  mask_need_pop = pop_kab["Pop_Total"].isna() & pop_kab["Target68_Total"].notna() & (pop_kab["Target68_Total"] > 0)
557
  pop_kab.loc[mask_need_pop, "Pop_Total"] = pop_kab.loc[mask_need_pop, "Target68_Total"] / float(FALLBACK_TARGET_RATIO)
558
 
 
598
  pop_prov["Pop_Total_Prov"] = pd.to_numeric(pop_prov["Pop_Total_Prov"], errors="coerce")
599
  pop_prov["Target68_Total_Prov"] = pd.to_numeric(pop_prov["Target68_Total_Prov"], errors="coerce")
600
 
 
601
  mask_need_pop = pop_prov["Pop_Total_Prov"].isna() & pop_prov["Target68_Total_Prov"].notna() & (pop_prov["Target68_Total_Prov"] > 0)
602
  pop_prov.loc[mask_need_pop, "Pop_Total_Prov"] = pop_prov.loc[mask_need_pop, "Target68_Total_Prov"] / float(FALLBACK_TARGET_RATIO)
603
 
 
618
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "pop_khusus": None, "meta": {}, "info": info})
619
  return None, None, None, None, {}, info
620
 
 
621
  df_all = prepare_global(df_raw)
622
 
623
  meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
 
625
  info = (
626
  f"βœ… Mode NO UPLOAD (cache aktif)<br>"
627
  f"βœ… DM: <b>{fp.name}</b> | Baris: {before} β†’ dedup: {after}<br>"
628
+ f"βœ… POP_KAB: <b>{Path(POP_KAB).name}</b> (n={len(pop_kab)}) β€” target 68% via <code>sampel_total</code><br>"
629
+ f"βœ… POP_PROV: <b>{Path(POP_PROV).name}</b> (n={len(pop_prov)}) β€” target 68% via <code>total _sampel</code><br>"
630
+ f"βœ… POP_KHUSUS: <b>{Path(POP_KHUSUS).name}</b> (n={len(pop_khusus)}) β€” format gabungan Provinsi/Kab/Kota<br>"
631
  f"πŸ•’ mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))} | Khusus={time.ctime(_mtime(POP_KHUSUS))}"
632
  )
633
 
 
642
  def build_agg_wilayah_total(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, pop_prov: pd.DataFrame, kew_value: str):
643
  """
644
  Output: satu baris per wilayah
645
+
646
+ Penyesuaian:
647
+ - faktor_penyesuaian_% dihitung dari (n_total / target_total_68)*100, cap 100
648
+ - Indeks_Final = Indeks_Dasar_Agregat * (faktor_penyesuaian_%/100)
649
+ - Jika n_total >= target_total_68 -> faktor_penyesuaian_% = 100 (tidak disesuaikan)
650
  """
651
  if df_filtered is None or df_filtered.empty:
652
  return pd.DataFrame()
 
692
 
693
  agg = agg.rename(columns={key_col: "group_key", label_col: label_name})
694
 
695
+ # join target_total_68 & pop_total (pop_total hanya untuk info coverage, bukan untuk faktor)
696
  target_vals, pop_vals, label_fix = [], [], []
697
  for _, r in agg.iterrows():
698
  gk = r["group_key"]
699
  if gk in pop.index:
700
+ target_total_68 = pop.loc[gk, target_field] if target_field in pop.columns else np.nan
701
  pop_total = pop.loc[gk, pop_field] if pop_field in pop.columns else np.nan
702
  nm = pop.loc[gk, name_field] if name_field in pop.columns else r[label_name]
703
  else:
704
+ target_total_68, pop_total, nm = np.nan, np.nan, r[label_name]
705
+ target_vals.append(target_total_68)
706
  pop_vals.append(pop_total)
707
  label_fix.append(nm)
708
 
 
710
  agg["target_total_68"] = pd.to_numeric(pd.Series(target_vals), errors="coerce")
711
  agg["pop_total"] = pd.to_numeric(pd.Series(pop_vals), errors="coerce")
712
 
713
+ # fallback pop_total jika kosong, agar coverage_total_% tidak null (INFO SAJA)
714
  m = agg["pop_total"].isna() & agg["target_total_68"].notna() & (agg["target_total_68"] > 0)
715
  agg.loc[m, "pop_total"] = agg.loc[m, "target_total_68"] / float(FALLBACK_TARGET_RATIO)
716
 
717
+ # === PENYESUAIAN: PAKAI TARGET 68% (bukan pop_total) ===
718
  agg["faktor_penyesuaian"] = [
719
+ faktor_penyesuaian_total_pct(n, t) # hasil persen (0..100)
720
+ for n, t in zip(
721
+ pd.to_numeric(agg["n_total"], errors="coerce").fillna(0).astype(float).tolist(),
722
+ pd.to_numeric(agg["target_total_68"], errors="coerce").tolist()
723
+ )
724
  ]
725
 
726
+ # coverage_total_% (INFO) tetap dari pop_total
727
  agg["coverage_total_%"] = [
728
  (safe_div(n, p) * 100) if (p is not None and not pd.isna(p) and float(p) > 0) else np.nan
729
+ for n, p in zip(
730
+ pd.to_numeric(agg["n_total"], errors="coerce").fillna(0).astype(float).tolist(),
731
+ agg["pop_total"].tolist()
732
+ )
733
  ]
734
 
735
+ # INDEKS FINAL: faktor persen / 100
736
  agg["Indeks_Final_Wilayah_0_100"] = agg["Indeks_Dasar_Agregat_0_100"] * (agg["faktor_penyesuaian"] / 100.0)
737
 
738
  # rounding
 
747
  if c in agg.columns:
748
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).apply(lambda x: round(float(x), 2))
749
 
750
+ # faktor_penyesuaian sudah persen; simpan integer/pembulatan sesuai kebutuhan tampilan
751
+ agg["faktor_penyesuaian"] = pd.to_numeric(agg["faktor_penyesuaian"], errors="coerce").fillna(100.0).apply(lambda x: round(float(x), 0))
752
  agg["coverage_total_%"] = pd.to_numeric(agg["coverage_total_%"], errors="coerce")
753
 
754
  return agg
 
760
 
761
  def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, pop_khusus: pd.DataFrame, kew_value: str):
762
  """
763
+ Final per jenis = Indeks_Dasar_Jenis * (faktor_penyesuaian_wilayah% / 100)
764
  Referensi khusus (target/pop) ditampilkan bila tersedia dari POP_KHUSUS.
765
  """
766
  if df_filtered is None or df_filtered.empty:
 
799
 
800
  agg = agg.rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
801
 
802
+ # join faktor wilayah (persen)
803
  if agg_total is None or agg_total.empty:
804
  agg["faktor_penyesuaian_wilayah"] = 100.0
805
  else:
 
807
  agg = agg.merge(m, on="group_key", how="left")
808
  agg["faktor_penyesuaian_wilayah"] = pd.to_numeric(agg["faktor_penyesuaian_wilayah"], errors="coerce").fillna(100.0)
809
 
810
+ # Final (TERMASUK KHUSUS) β€” faktor persen / 100
811
  agg["faktor_penyesuaian"] = agg["faktor_penyesuaian_wilayah"]
812
  agg["Indeks_Final_Agregat_0_100"] = agg["Indeks_Dasar_Agregat_0_100"] * (agg["faktor_penyesuaian"] / 100.0)
813
 
 
828
  agg.at[i, "target_total_68_jenis"] = t
829
  agg.at[i, "pop_total_jenis"] = p
830
 
 
831
  agg["target_total_68_jenis"] = pd.to_numeric(agg["target_total_68_jenis"], errors="coerce")
832
  agg["pop_total_jenis"] = pd.to_numeric(agg["pop_total_jenis"], errors="coerce")
833
 
834
  m = agg["pop_total_jenis"].isna() & agg["target_total_68_jenis"].notna() & (agg["target_total_68_jenis"] > 0)
835
  agg.loc[m, "pop_total_jenis"] = agg.loc[m, "target_total_68_jenis"] / float(FALLBACK_TARGET_RATIO)
836
 
 
837
  m2 = agg["pop_total_jenis"].notna() & (agg["pop_total_jenis"] > 0)
838
  agg.loc[m2, "coverage_jenis"] = (agg.loc[m2, "Jumlah"].astype(float) / agg.loc[m2, "pop_total_jenis"].astype(float)) * 100.0
839
 
 
849
  if c in agg.columns:
850
  agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(0.0).apply(lambda x: round(float(x), 2))
851
 
852
+ # faktor persen dibulatkan 0 desimal
853
  for c in ["faktor_penyesuaian_wilayah","faktor_penyesuaian"]:
854
  if c in agg.columns:
855
+ agg[c] = pd.to_numeric(agg[c], errors="coerce").fillna(100.0).apply(lambda x: round(float(x), 0))
856
 
857
  agg["coverage_jenis"] = pd.to_numeric(agg["coverage_jenis"], errors="coerce")
858
 
 
985
  "Target_68_Total": df.get("target_total_68", np.nan),
986
  "Sampel_Total_Terkumpul": df.get("n_total", 0),
987
  "Coverage_Total_%": df.get("coverage_total_%", np.nan),
 
 
988
  "Faktor_Penyesuaian_(Sampel/Target68)_persen": pd.to_numeric(df.get("faktor_penyesuaian", 100.0), errors="coerce").fillna(100.0),
 
989
  "GAP_Ke_Target68_Total": [
990
  max(t - n, 0) if (t is not None and not pd.isna(t)) else 0
991
  for n, t in zip(pd.to_numeric(df["n_total"], errors="coerce").fillna(0).astype(float).tolist(),
 
1161
  lines.append(f"Wilayah filter: {wilayah}")
1162
  lines.append(f"Kewenangan: {kew}")
1163
  lines.append("Metode: Indeks dasar dihitung per entitas (YJ+minmax nasional), lalu diagregasi per wilayah. Setelah itu dilakukan penyesuaian berbasis kecukupan sampel minimum 68% pada level wilayah.")
1164
+ lines.append("Rumus penyesuaian: faktor(%) = min((total_terkumpul/target_total_68)*100, 100). Indeks_Final = Indeks_Dasar_Agregat Γ— (faktor/100).")
1165
+ lines.append("Catatan: Jika total_terkumpul >= target_total_68 -> faktor=100% (tidak ada penyesuaian).")
1166
  lines.append("Jenis yang ditampilkan: sekolah, umum, khusus (SEMUA jenis menggunakan faktor wilayah).")
1167
 
1168
  if summary_jenis is not None and not summary_jenis.empty:
 
1181
  for _, r in top.iterrows():
1182
  wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
1183
  lines.append(
1184
+ f"- {wl}: Final={float(r['Indeks_Final_Wilayah_0_100']):.2f} | Faktor%={float(r.get('faktor_penyesuaian', 100.0)):.0f} | total={int(r.get('n_total', 0))} | target68={float(r.get('target_total_68', 0)):.2f}"
1185
  )
1186
 
1187
  if verif_total is not None and not verif_total.empty:
 
1207
  "Tugas Anda menyusun analisis berbasis data IPLM secara formal, tajam, dan operasional."
1208
  )
1209
  user_prompt = f"""
1210
+ DATA RINGKAS IPLM (PENYESUAIAN BERBASIS KECUKUPAN SAMPEL TARGET 68% DI LEVEL WILAYAH):
1211
 
1212
  {ctx}
1213
 
 
1244
  doc.add_paragraph(f"Kewenangan: {kew}")
1245
  doc.add_paragraph(
1246
  "Metode: Indeks dasar dihitung per entitas (YJ+minmax nasional), diagregasi per wilayah, "
1247
+ "lalu dilakukan penyesuaian berbasis kecukupan sampel target 68% pada level wilayah."
1248
  )
 
1249
  doc.add_paragraph(
1250
+ "Rumus penyesuaian: faktor(%) = min((total_terkumpul/target_total_68)*100, 100). "
1251
+ "Indeks_Final = Indeks_Dasar_Agregat Γ— (faktor/100). "
1252
+ "Jika total_terkumpul >= target_total_68 maka faktor=100% (tidak ada penyesuaian)."
1253
  )
1254
  doc.add_paragraph(
1255
  "Jenis yang ditampilkan: sekolah, umum, khusus. "
 
1313
  elif "Rata2_" in str(c):
1314
  cells[i].text = f"{float(v):.3f}"
1315
  elif "faktor" in str(c).lower():
1316
+ cells[i].text = f"{float(v):.0f}" # persen
 
1317
  elif "coverage" in str(c).lower():
1318
  cells[i].text = f"{float(v):.2f}"
1319
  else:
 
1357
  dasar = float(pd.to_numeric(agg_total["Indeks_Dasar_Agregat_0_100"], errors="coerce").fillna(0).mean()) if "Indeks_Dasar_Agregat_0_100" in agg_total.columns else np.nan
1358
  final = float(pd.to_numeric(agg_total["Indeks_Final_Wilayah_0_100"], errors="coerce").fillna(0).mean()) if "Indeks_Final_Wilayah_0_100" in agg_total.columns else np.nan
1359
 
1360
+ faktor_mean_pct = float(pd.to_numeric(agg_total.get("faktor_penyesuaian", 100.0), errors="coerce").fillna(100.0).mean())
 
 
 
 
 
 
 
 
 
1361
  dampak = (final - dasar) if (pd.notna(final) and pd.notna(dasar)) else np.nan
1362
 
1363
  def fmt(x, nd=2):
 
1379
 
1380
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:220px;">
1381
  <div style="opacity:0.8;">Cakupan Sampel (berdasarkan target 68%)</div>
1382
+ <div style="font-size:26px; font-weight:700;">{fmt(faktor_mean_pct,0)}%</div>
1383
+ <div style="opacity:0.7;">Rumus: min((total/target_68)*100, 100)</div>
1384
  </div>
1385
 
1386
  <div style="border:1px solid #333; border-radius:10px; padding:10px 12px; min-width:220px;">
1387
  <div style="opacity:0.8;">Penyesuaian Nilai (rata-rata)</div>
1388
  <div style="font-size:26px; font-weight:700;">{fmt(dampak,2)} poin</div>
1389
+ <div style="opacity:0.7;">Faktor penyesuaian (mean): {fmt(faktor_mean_pct,0)}%</div>
1390
  </div>
1391
  </div>
1392
  """.strip()
 
1457
  # Word report
1458
  word_path = generate_word_report(agg_jenis, wilayah_txt, kew_value or "(Semua)", analysis_text)
1459
 
 
1460
  msg = (
1461
  f"βœ… Selesai: entitas={len(detail_view)} | wilayah(keseluruhan)={len(agg_total)} | "
1462
+ f"jenis={len(agg_jenis)} | penyesuaian berbasis target_total_68 (cap 100%)"
1463
  )
1464
 
1465
  return (
 
1488
  gr.update(choices=["(Semua)"], value="(Semua)"),
1489
  )
1490
 
 
1491
  prov_vals = df_all["PROV_DISP"].dropna().astype(str).tolist()
1492
  prov_vals = [v for v in prov_vals if v and v.strip()]
1493
  prov_choices = ["(Semua)"] + sorted(set(prov_vals))
1494
 
1495
  kab_choices = ["(Semua)"] + sorted([x for x in df_all["KAB_DISP"].dropna().unique().tolist() if x])
1496
+ kew_choices = ["(Semua)"] + sorted([x for x in df_all["KEW_NORM"].dropna().unique().tolist() if x])
 
1497
 
1498
  return (
1499
  df_all, pop_kab, pop_prov, pop_khusus, meta, info,
1500
  gr.update(choices=prov_choices, value="(Semua)"),
1501
+ gr.update(choices=["(Semua)"], value="(Semua)"), # kab akan mengikuti prov
1502
+ gr.update(choices=kew_choices, value="(Semua)"),
1503
  )
1504
 
1505
+
1506
+ def ui_update_kab_choices(prov_value, df_all):
1507
  if df_all is None or df_all.empty:
1508
  return gr.update(choices=["(Semua)"], value="(Semua)")
1509
+
1510
+ if not prov_value or prov_value == "(Semua)":
1511
+ # semua kab/kota
1512
+ kab_list = sorted([x for x in df_all["KAB_DISP"].dropna().unique().tolist() if x])
1513
+ return gr.update(choices=["(Semua)"] + kab_list, value="(Semua)")
1514
+
1515
+ # kab/kota hanya dari provinsi terpilih
1516
+ sub = df_all[df_all["PROV_DISP"] == prov_value]
1517
+ kab_list = sorted([x for x in sub["KAB_DISP"].dropna().unique().tolist() if x])
1518
+ return gr.update(choices=["(Semua)"] + kab_list, value="(Semua)")
1519
+
1520
+
1521
+ def ui_run(prov_value, kab_value, kew_value, df_all, pop_kab, pop_prov, pop_khusus, meta):
1522
+ return run_calc(prov_value, kab_value, kew_value, df_all, pop_kab, pop_prov, pop_khusus, meta)
1523
+
1524
+
1525
+ with gr.Blocks(title="IPLM 2025 β€” FINAL (NO UPLOAD)") as demo:
1526
+ gr.Markdown("## IPLM 2025 β€” FINAL (NO UPLOAD)\n"
1527
+ "- Penyesuaian berbasis **target_total_68** (target minimum 68%) pada level wilayah.\n"
1528
+ "- Jika **total sampel >= target_total_68** β†’ **faktor=100%** (tidak disesuaikan).\n"
1529
+ "- Cakupan (coverage) dihitung dari **pop_total** hanya untuk informasi.")
1530
+
1531
+ st_info = gr.HTML()
1532
+
1533
+ # state caches
1534
+ st_df_all = gr.State()
1535
+ st_pop_kab = gr.State()
1536
+ st_pop_prov = gr.State()
1537
+ st_pop_khusus = gr.State()
1538
+ st_meta = gr.State()
 
 
 
1539
 
1540
  with gr.Row():
1541
  dd_prov = gr.Dropdown(label="Provinsi", choices=["(Semua)"], value="(Semua)")
1542
  dd_kab = gr.Dropdown(label="Kab/Kota", choices=["(Semua)"], value="(Semua)")
1543
  dd_kew = gr.Dropdown(label="Kewenangan", choices=["(Semua)"], value="(Semua)")
1544
 
1545
+ with gr.Row():
1546
+ btn_reload = gr.Button("Reload (paksa baca ulang file)")
1547
+ btn_run = gr.Button("Run Audit / Hitung")
1548
 
1549
+ kpi_md = gr.HTML()
 
1550
 
1551
+ with gr.Tabs():
1552
+ with gr.Tab("Ringkasan (Jenis + Keseluruhan)"):
1553
+ tbl_summary = gr.Dataframe(interactive=False)
1554
 
1555
+ with gr.Tab("Agregat Wilayah (Keseluruhan)"):
1556
+ tbl_total = gr.Dataframe(interactive=False)
1557
 
1558
+ with gr.Tab("Agregat Wilayah Γ— Jenis"):
1559
+ tbl_jenis = gr.Dataframe(interactive=False)
1560
 
1561
+ with gr.Tab("Detail Entitas (Final menempel Wilayah)"):
1562
+ tbl_detail = gr.Dataframe(interactive=False)
1563
 
1564
+ with gr.Tab("Verifikasi Kecukupan Sampel (Target 68%)"):
1565
+ tbl_verif = gr.Dataframe(interactive=False)
1566
 
1567
+ with gr.Tab("Bell Curve (Umum)"):
1568
+ fig_umum = gr.Plot()
1569
 
1570
+ with gr.Tab("Bell Curve (Sekolah)"):
1571
+ fig_sekolah = gr.Plot()
 
1572
 
1573
+ with gr.Tab("Bell Curve (Khusus)"):
1574
+ fig_khusus = gr.Plot()
1575
 
1576
+ with gr.Tab("Analisis Naratif"):
1577
+ txt_status = gr.Markdown()
1578
+ txt_analysis = gr.Textbox(lines=22, label="Analisis (LLM)")
1579
 
1580
+ with gr.Tab("Download"):
1581
+ f_summary = gr.File(label="Ringkasan (Jenis + Keseluruhan) β€” Excel")
1582
+ f_total = gr.File(label="Agregat Wilayah (Keseluruhan) β€” Excel")
1583
+ f_jenis = gr.File(label="Agregat Wilayah Γ— Jenis β€” Excel")
1584
+ f_detail = gr.File(label="Detail Entitas (Final menempel Wilayah) β€” Excel")
1585
+ f_word = gr.File(label="Laporan Word (Agregat Wilayah Γ— Jenis + Analisis)")
1586
+ f_verif = gr.File(label="Verifikasi Target 68% β€” Excel")
1587
 
1588
+ # initial load
1589
+ def _load0():
1590
+ return ui_load(force=False)
1591
+
1592
+ demo.load(
1593
+ _load0,
1594
+ inputs=[],
 
 
 
1595
  outputs=[
1596
+ st_df_all, st_pop_kab, st_pop_prov, st_pop_khusus, st_meta, st_info,
1597
+ dd_prov, dd_kab, dd_kew
1598
+ ],
 
 
 
1599
  )
1600
 
1601
+ # reload button
1602
+ btn_reload.click(
1603
+ fn=lambda: ui_load(force=True),
1604
  inputs=[],
1605
+ outputs=[
1606
+ st_df_all, st_pop_kab, st_pop_prov, st_pop_khusus, st_meta, st_info,
1607
+ dd_prov, dd_kab, dd_kew
1608
+ ],
1609
+ )
1610
+
1611
+ # prov -> kab update
1612
+ dd_prov.change(
1613
+ fn=ui_update_kab_choices,
1614
+ inputs=[dd_prov, st_df_all],
1615
+ outputs=[dd_kab]
1616
+ )
1617
+
1618
+ # run
1619
+ btn_run.click(
1620
+ fn=ui_run,
1621
+ inputs=[dd_prov, dd_kab, dd_kew, st_df_all, st_pop_kab, st_pop_prov, st_pop_khusus, st_meta],
1622
+ outputs=[
1623
+ kpi_md,
1624
+ tbl_summary, tbl_total, tbl_jenis, tbl_detail, tbl_verif,
1625
+ f_summary, f_total, f_jenis, f_detail, f_word,
1626
+ fig_umum, fig_sekolah, fig_khusus,
1627
+ txt_status, txt_analysis
1628
+ ]
1629
  )
1630
 
1631
+ demo.queue(concurrency_count=1).launch(share=True)