irhamni commited on
Commit
968d291
Β·
verified Β·
1 Parent(s): d271b9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -104
app.py CHANGED
@@ -1,27 +1,19 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- app.py β€” Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS) + Grafik GAP (Bukan Persen)
4
-
5
- Fokus:
6
- - Target pengumpulan = 68% dari populasi unit (meta), BUKAN 100%
7
- - Output utama: "Kekurangan sampel" = berapa unit lagi yang harus dikumpulkan
8
-
9
- Pembanding:
10
  - KAB/KOTA:
11
  * Sekolah: target = 68% dari (SD + SMP)
12
  * Umum: target = 68% dari (Kecamatan + Desa/Kelurahan)
13
  - PROVINSI:
14
  * SMA: target = 68% dari (Total SMA)
15
-
16
- Fitur:
17
- - Filter: Provinsi, Kab/Kota, Kewenangan
18
- - Tabel Verifikasi (target 68% + kekurangan)
19
- - Detail subset DM (ringkas)
20
- - Grafik GAP (kekurangan unit) per wilayah
21
  - Download:
22
  1) Rekap (Verifikasi + Detail ringkas) .xlsx
23
  2) Data mentah subset DM sesuai filter .xlsx
24
- 3) Laporan Word (.docx) + narasi LLM soal kekurangan sampel
25
  """
26
 
27
  import os
@@ -37,7 +29,6 @@ from huggingface_hub import InferenceClient
37
 
38
  # Word report
39
  from docx import Document
40
- from docx.shared import Inches
41
 
42
  # Pie opsional (butuh kaleido)
43
  import plotly.express as px
@@ -51,18 +42,18 @@ except Exception:
51
  # ============================================================
52
  # 1) KONFIGURASI FILE
53
  # ============================================================
54
- DATA_FILE = "IPLM_clean_Manual.xlsx" # data sampel masuk (multi-sheet)
55
  META_KAB_FILE = "jumlahdesa_fixed (1).xlsx" # kecamatan & desa/kel per kab/kota
56
  META_SDSMP_FILE = "SD-SMP-kab.xlsx" # jumlah SD & SMP per kab/kota
57
  META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
58
 
59
  # ============================================================
60
- # 1a) TARGET CAKUPAN SAMPEL (KEBIJAKAN)
61
  # ============================================================
62
- TARGET_COVERAGE = 0.68 # 68% dari populasi
63
 
64
  # ============================================================
65
- # 1b) KONFIGURASI LLM (Hugging Face Inference)
66
  # ============================================================
67
  USE_LLM = True
68
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
@@ -166,20 +157,33 @@ def norm_kab_label(s):
166
  t = " ".join(t.split())
167
  return re.sub(r"[^A-Z0-9]+", "", t)
168
 
169
- def _infer_jenjang_sd_smp(x):
170
- if pd.isna(x):
171
- return "OTHER"
172
- t = str(x).upper()
173
- if " SD " in f" {t} " or " SD/" in t or " MI " in f" {t} ":
174
- return "SD"
175
- if " SMP " in f" {t} " or " SMP/" in t or " MTS " in f" {t} ":
176
- return "SMP"
177
- return "OTHER"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  def make_pie_plotly(num, den, title):
180
  if not HAS_KALEIDO:
181
  return None
182
-
183
  if den is None or pd.isna(den) or den <= 0:
184
  values = [0, 1]
185
  labels = ["Terjangkau", "Belum Terjangkau"]
@@ -188,7 +192,6 @@ def make_pie_plotly(num, den, title):
188
  den = float(den)
189
  values = [max(num, 0), max(den - num, 0)]
190
  labels = ["Terjangkau", "Belum Terjangkau"]
191
-
192
  fig = px.pie(values=values, names=labels, title=title, hole=0.35)
193
  tmp = tempfile.mktemp(suffix=".png")
194
  try:
@@ -214,6 +217,8 @@ jenis_col_glob = None
214
  subjenis_col_glob = None
215
  nama_col_glob = None
216
 
 
 
217
  # ---- Load DM ----
218
  try:
219
  fp = Path(DATA_FILE)
@@ -231,11 +236,13 @@ try:
231
  subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"])
232
  nama_col_glob = pick_col(df_all_raw, ["nama_perpustakaan", "nm_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
233
 
 
234
  if kew_col_glob:
235
  df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew)
236
  else:
237
  df_all_raw["KEW_NORM"] = None
238
 
 
239
  val_map_jenis = {
240
  "PERPUSTAKAAN SEKOLAH": "sekolah",
241
  "SEKOLAH": "sekolah",
@@ -250,13 +257,22 @@ try:
250
  else:
251
  df_all_raw["_dataset"] = None
252
 
 
 
 
 
 
 
 
 
 
 
 
253
  DATA_INFO = f"Data terbaca dari: **{DATA_FILE}** | Jumlah baris: **{len(df_all_raw)}**"
254
  except Exception as e:
255
  df_all_raw = None
256
  DATA_INFO = f"⚠️ Gagal memuat `{DATA_FILE}` | Error: `{e}`"
257
 
258
- extra_info = []
259
-
260
  # ---- Meta Kab (Kec/Desa) ----
261
  try:
262
  meta_kab_raw = pd.read_excel(META_KAB_FILE)
@@ -354,21 +370,21 @@ if extra_info:
354
  # 4) DROPDOWN
355
  # ============================================================
356
  def all_prov_choices():
357
- if df_all_raw is None or prov_col_glob is None:
358
  return ["(Semua)"]
359
- s = df_all_raw[prov_col_glob].dropna().astype(str).str.strip()
360
- vals = sorted([o for o in s.unique() if o != ""])
361
  return ["(Semua)"] + vals
362
 
363
  def get_kab_choices_for_prov(prov_value):
364
- if df_all_raw is None or kab_col_glob is None:
365
  return ["(Semua)"]
366
- if prov_value is None or prov_value == "(Semua)" or prov_col_glob is None:
367
- s = df_all_raw[kab_col_glob].dropna().astype(str).str.strip()
368
  else:
369
- m = df_all_raw[prov_col_glob].astype(str).str.strip() == prov_value
370
- s = df_all_raw.loc[m, kab_col_glob].dropna().astype(str).str.strip()
371
- vals = sorted([x for x in s.unique() if x != ""])
372
  return ["(Semua)"] + vals
373
 
374
  def all_kew_choices():
@@ -385,7 +401,7 @@ default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if k
385
 
386
 
387
  # ============================================================
388
- # 5) VERIFIKASI GAP (TARGET 68%)
389
  # ============================================================
390
  def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
391
  if df_filtered is None or len(df_filtered) == 0:
@@ -393,31 +409,23 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
393
 
394
  kew_norm = str(kew_value or "").upper()
395
 
396
- # ================ KAB/KOTA ================
397
  if ("KAB" in kew_norm or "KOTA" in kew_norm):
398
- if kab_col_glob is None or meta_kab_df is None:
399
- return pd.DataFrame({"Info": ["Kolom kab/kota atau meta kab tidak tersedia."]})
400
 
401
  tmp = df_filtered.copy()
402
- tmp = tmp[pd.notna(tmp[kab_col_glob])]
403
  if tmp.empty:
404
  return pd.DataFrame()
405
 
406
- tmp["kab_key"] = tmp[kab_col_glob].apply(norm_kab_label)
407
 
408
- # total sampel per kab
409
  g_total = tmp.groupby("kab_key").size().rename("Sampel Total").reset_index()
410
 
411
- # sekolah & jenjang (opsional)
412
- if subjenis_col_glob and subjenis_col_glob in tmp.columns:
413
- tmp["jenjang"] = tmp[subjenis_col_glob].apply(_infer_jenjang_sd_smp)
414
- else:
415
- tmp["jenjang"] = "OTHER"
416
-
417
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
418
  g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel Sekolah").reset_index()
419
 
420
- # umum
421
  tmp_umum = tmp[tmp["_dataset"] == "umum"].copy() if "_dataset" in tmp.columns else tmp.copy()
422
  g_umum = tmp_umum.groupby("kab_key").size().rename("Sampel Umum").reset_index()
423
 
@@ -438,11 +446,9 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
438
  merged["Populasi Sekolah (SD+SMP)"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
439
  merged["Populasi Admin (Kec+Desa/Kel)"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
440
 
441
- # TARGET 68%
442
  merged["Target Sekolah (68%)"] = np.ceil(merged["Populasi Sekolah (SD+SMP)"] * TARGET_COVERAGE)
443
  merged["Target Umum (68%)"] = np.ceil(merged["Populasi Admin (Kec+Desa/Kel)"] * TARGET_COVERAGE)
444
 
445
- # GAP: berapa yang harus dikumpulkan lagi
446
  merged["Kekurangan Sampel Sekolah"] = merged.apply(
447
  lambda r: max(int(r["Target Sekolah (68%)"] - r["Sampel Sekolah"]) if pd.notna(r["Target Sekolah (68%)"]) else 0, 0),
448
  axis=1
@@ -469,21 +475,19 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
469
 
470
  return out.sort_values("Kab/Kota").reset_index(drop=True).round(0)
471
 
472
- # ================ PROVINSI ================
473
  if ("PROV" in kew_norm):
474
- if meta_sma_df is None:
475
- return pd.DataFrame({"Info": ["Meta SMA tidak tersedia."]})
476
- if prov_col_glob is None:
477
- return pd.DataFrame({"Info": ["Kolom provinsi tidak ditemukan di DM."]})
478
 
479
  tmp = df_filtered.copy()
480
- tmp = tmp[pd.notna(tmp[prov_col_glob])]
481
  if tmp.empty:
482
  return pd.DataFrame({"Info": ["Tidak ada data sampel kewenangan provinsi."]})
483
 
484
- tmp["prov_key"] = tmp[prov_col_glob].apply(norm_prov_label)
485
 
486
- # start dari sampel (biar tidak bocor prov lain)
487
  g_total = tmp.groupby("prov_key").size().rename("Sampel Total (Prov)").reset_index()
488
 
489
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
@@ -496,7 +500,6 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
496
  )
497
 
498
  merged["Sampel SMA (DM)"] = merged["Sampel SMA (DM)"].fillna(0).astype(int)
499
-
500
  merged["Populasi SMA (Meta)"] = merged["Jml_SMA"]
501
  merged["Target SMA (68%)"] = np.ceil(merged["Populasi SMA (Meta)"] * TARGET_COVERAGE)
502
 
@@ -521,7 +524,7 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
521
 
522
 
523
  # ============================================================
524
- # 6) GRAFIK GAP (KURANGAN YANG HARUS DIKUMPULIN)
525
  # ============================================================
526
  def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
527
  fig = go.Figure()
@@ -539,7 +542,6 @@ def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
539
  def _num(s):
540
  return pd.to_numeric(s, errors="coerce").fillna(0).astype(int)
541
 
542
- # sort by total gap biar enak dilihat
543
  if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
544
  dfp = verif_df.copy()
545
  dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Sekolah", 0)) + _num(dfp.get("Kekurangan Sampel Umum", 0))
@@ -561,7 +563,7 @@ def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
561
  ))
562
 
563
  fig.update_layout(
564
- title="Kekurangan Sampel yang Harus Dikumpulkan (KAB/KOTA) β€” Target 68%",
565
  barmode="group",
566
  xaxis_title="Kab/Kota",
567
  yaxis_title="Kekurangan (unit)",
@@ -585,7 +587,7 @@ def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
585
  ))
586
 
587
  fig.update_layout(
588
- title="Kekurangan Sampel yang Harus Dikumpulkan (PROVINSI) β€” SMA Target 68%",
589
  xaxis_title="Provinsi",
590
  yaxis_title="Kekurangan (unit)",
591
  margin=dict(l=40, r=20, t=60, b=140),
@@ -602,7 +604,7 @@ def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
602
 
603
 
604
  # ============================================================
605
- # 7) LLM REPORT (GAP)
606
  # ============================================================
607
  def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
608
  wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
@@ -623,7 +625,6 @@ def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) ->
623
  t[gc] = pd.to_numeric(t[gc], errors="coerce").fillna(0)
624
  keycol = "Kab/Kota" if "Kab/Kota" in t.columns else ("Provinsi" if "Provinsi" in t.columns else t.columns[0])
625
  top = t.sort_values(gc, ascending=False).head(10)
626
-
627
  lines.append("\nTop prioritas (gap terbesar):")
628
  for _, r in top.iterrows():
629
  lines.append(f"- {r[keycol]}: {gc}={int(r[gc])}")
@@ -649,11 +650,11 @@ def rule_based_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str)
649
 
650
  for gc in gap_cols:
651
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
652
- lines.append(f"- Total {gc}: **{total_gap}** unit yang perlu dilengkapi untuk mencapai target 68%.")
653
 
654
  lines.append(
655
- "\nRekomendasi operasional: prioritaskan pengumpulan data pada wilayah dengan gap terbesar, "
656
- "dan pastikan konsistensi penamaan provinsi/kab-kota agar pencocokan dengan meta tidak gagal."
657
  )
658
  return "\n".join(lines)
659
 
@@ -666,8 +667,7 @@ def generate_llm_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: st
666
 
667
  system_prompt = (
668
  "Anda adalah analis kebijakan dan manajer program IPLM. "
669
- "Tugas Anda menyusun narasi singkat dan tegas tentang kekurangan sampel data IPLM "
670
- "serta strategi pengumpulan data untuk menutup gap menuju target."
671
  )
672
 
673
  user_prompt = f"""
@@ -677,13 +677,13 @@ DATA RINGKAS GAP SAMPEL IPLM:
677
 
678
  TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
679
  1) Ringkasan kondisi pengumpulan data (1 paragraf).
680
- 2) Angka total kekurangan sampel yang masih perlu dikumpulkan untuk mencapai target 68% (1 paragraf).
681
- 3) Prioritas wilayah (top gap) dan alasan operasionalnya (1 paragraf).
682
- 4) Rencana aksi 30–60 hari (paragraf naratif, bukan bullet).
683
 
684
  BATASAN:
685
- - Jangan bahas indeks / skor IPLM sama sekali.
686
- - Fokus murni pada target 68%, kekurangan sampel, dan strategi pelengkapannya.
687
  """
688
 
689
  try:
@@ -721,7 +721,8 @@ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: s
721
  doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
722
  doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
723
 
724
- doc.add_heading("Tabel Verifikasi (Target 68% & Kekurangan Sampel)", level=2)
 
725
  view = verif_df.copy()
726
  if len(view) > 200:
727
  doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama) untuk menjaga ukuran dokumen.")
@@ -741,30 +742,33 @@ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: s
741
  if not HAS_KALEIDO:
742
  doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia di server.")
743
  else:
 
744
  pie_made = False
745
-
746
  if "Sampel Sekolah" in verif_df.columns and "Target Sekolah (68%)" in verif_df.columns:
747
  samp = pd.to_numeric(verif_df["Sampel Sekolah"], errors="coerce").fillna(0).sum()
748
  tgt = pd.to_numeric(verif_df["Target Sekolah (68%)"], errors="coerce").fillna(0).sum()
749
- img = make_pie_plotly(samp, tgt, "Capaian Sekolah (Total) terhadap Target 68%")
750
  if img:
751
- doc.add_picture(img, width=Inches(5))
 
752
  pie_made = True
753
 
754
  if (not pie_made) and ("Sampel Umum" in verif_df.columns and "Target Umum (68%)" in verif_df.columns):
755
  samp = pd.to_numeric(verif_df["Sampel Umum"], errors="coerce").fillna(0).sum()
756
  tgt = pd.to_numeric(verif_df["Target Umum (68%)"], errors="coerce").fillna(0).sum()
757
- img = make_pie_plotly(samp, tgt, "Capaian Umum (Total) terhadap Target 68%")
758
  if img:
759
- doc.add_picture(img, width=Inches(5))
 
760
  pie_made = True
761
 
762
  if (not pie_made) and ("Sampel SMA (DM)" in verif_df.columns and "Target SMA (68%)" in verif_df.columns):
763
  samp = pd.to_numeric(verif_df["Sampel SMA (DM)"], errors="coerce").fillna(0).sum()
764
  tgt = pd.to_numeric(verif_df["Target SMA (68%)"], errors="coerce").fillna(0).sum()
765
- img = make_pie_plotly(samp, tgt, "Capaian SMA (Total) terhadap Target 68%")
766
  if img:
767
- doc.add_picture(img, width=Inches(5))
 
768
  pie_made = True
769
 
770
  if not pie_made:
@@ -795,15 +799,15 @@ def run_core(prov_value, kab_value, kew_value):
795
 
796
  df = df_all_raw.copy()
797
 
798
- # filter prov
799
- if prov_col_glob and prov_value and prov_value != "(Semua)":
800
- df = df[df[prov_col_glob].astype(str).str.strip() == prov_value]
801
 
802
- # filter kab
803
- if kab_col_glob and kab_value and kab_value != "(Semua)":
804
- df = df[df[kab_col_glob].astype(str).str.strip() == kab_value]
805
 
806
- # filter kew
807
  if kew_value and kew_value != "(Semua)":
808
  df = df[df["KEW_NORM"] == kew_value]
809
 
@@ -820,7 +824,7 @@ def run_core(prov_value, kab_value, kew_value):
820
 
821
  # detail subset DM untuk UI (ringkas)
822
  cols = []
823
- for c in [prov_col_glob, kab_col_glob, nama_col_glob, kew_col_glob, jenis_col_glob, subjenis_col_glob, "_dataset", "KEW_NORM"]:
824
  if c and c in df.columns and c not in cols:
825
  cols.append(c)
826
  detail_df = df[cols].copy() if cols else df.copy()
@@ -830,11 +834,11 @@ def run_core(prov_value, kab_value, kew_value):
830
 
831
  # simpan file download
832
  tmpdir = tempfile.mkdtemp()
833
- rekap_excel_path = os.path.join(tmpdir, "Rekap_Kekurangan_Sampel_IPLM_Target68.xlsx")
834
  raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx")
835
 
836
  with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w:
837
- verif_df.to_excel(w, sheet_name="Verifikasi_Gap_Target68", index=False)
838
  detail_df.to_excel(w, sheet_name="Detail_Subset_DM", index=False)
839
 
840
  df.to_excel(raw_dm_path, index=False)
@@ -863,14 +867,14 @@ def on_prov_change(prov_value):
863
 
864
 
865
  # ============================================================
866
- # 10) UI GRADIO
867
  # ============================================================
868
  with gr.Blocks() as demo:
869
  gr.Markdown(
870
  f"""
871
- # Dashboard Kekurangan Sampel IPLM (Tanpa Hitung Indeks) β€” Target {int(TARGET_COVERAGE*100)}%
872
 
873
- Aplikasi ini mengecek **berapa unit lagi yang harus dikumpulkan** agar memenuhi target minimal representasi.
874
 
875
  **File:**
876
  - `{DATA_FILE}` (DM)
@@ -892,7 +896,7 @@ Aplikasi ini mengecek **berapa unit lagi yang harus dikumpulkan** agar memenuhi
892
  run_btn = gr.Button("Hitung Kekurangan Sampel")
893
  msg_out = gr.Markdown()
894
 
895
- gr.Markdown("### Verifikasi (Target 68% & Kekurangan Sampel)")
896
  verif_out = gr.DataFrame(interactive=False)
897
 
898
  gr.Markdown("### Grafik Kekurangan Sampel (berapa unit lagi yang harus dikumpulkan)")
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ app.py β€” Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS)
4
+ - Target pengumpulan = 68% (bisa diubah TARGET_COVERAGE)
 
 
 
 
 
5
  - KAB/KOTA:
6
  * Sekolah: target = 68% dari (SD + SMP)
7
  * Umum: target = 68% dari (Kecamatan + Desa/Kelurahan)
8
  - PROVINSI:
9
  * SMA: target = 68% dari (Total SMA)
10
+ Output utama:
11
+ - Tabel verifikasi: target & kekurangan (berapa unit lagi)
12
+ - Grafik GAP: kekurangan unit (bukan persen)
 
 
 
13
  - Download:
14
  1) Rekap (Verifikasi + Detail ringkas) .xlsx
15
  2) Data mentah subset DM sesuai filter .xlsx
16
+ 3) Laporan Word (.docx) + narasi LLM (kekurangan sampel & rencana aksi)
17
  """
18
 
19
  import os
 
29
 
30
  # Word report
31
  from docx import Document
 
32
 
33
  # Pie opsional (butuh kaleido)
34
  import plotly.express as px
 
42
  # ============================================================
43
  # 1) KONFIGURASI FILE
44
  # ============================================================
45
+ DATA_FILE = "IPLM_clean_Manual.xlsx" # DM sampel masuk (multi-sheet)
46
  META_KAB_FILE = "jumlahdesa_fixed (1).xlsx" # kecamatan & desa/kel per kab/kota
47
  META_SDSMP_FILE = "SD-SMP-kab.xlsx" # jumlah SD & SMP per kab/kota
48
  META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
49
 
50
  # ============================================================
51
+ # 1a) TARGET CAKUPAN (KEBIJAKAN)
52
  # ============================================================
53
+ TARGET_COVERAGE = 0.68
54
 
55
  # ============================================================
56
+ # 1b) KONFIGURASI LLM (HF Inference)
57
  # ============================================================
58
  USE_LLM = True
59
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
 
157
  t = " ".join(t.split())
158
  return re.sub(r"[^A-Z0-9]+", "", t)
159
 
160
+ # === FIX UTAMA: bersihin display prov/kab biar gak dobel "PROVINSI PROVINSI" ===
161
+ def clean_prov_display(s):
162
+ if pd.isna(s):
163
+ return None
164
+ t = str(s).upper().strip()
165
+ t = " ".join(t.split())
166
+ # hilangkan prefix PROVINSI berulang
167
+ while t.startswith("PROVINSI PROVINSI "):
168
+ t = t.replace("PROVINSI PROVINSI ", "PROVINSI ", 1)
169
+ t = t.replace("PROVINSI PROVINSI ", "PROVINSI ")
170
+ return t
171
+
172
+ def clean_kab_display(s):
173
+ if pd.isna(s):
174
+ return None
175
+ t = str(s).upper().strip()
176
+ t = " ".join(t.split())
177
+ # rapihin kab/kota
178
+ t = t.replace("KABUPATEN", "KAB.")
179
+ t = t.replace("KAB ", "KAB. ")
180
+ t = t.replace("KAB.", "KAB.")
181
+ t = t.replace("KOTA ADMINISTRASI", "KOTA")
182
+ return t
183
 
184
  def make_pie_plotly(num, den, title):
185
  if not HAS_KALEIDO:
186
  return None
 
187
  if den is None or pd.isna(den) or den <= 0:
188
  values = [0, 1]
189
  labels = ["Terjangkau", "Belum Terjangkau"]
 
192
  den = float(den)
193
  values = [max(num, 0), max(den - num, 0)]
194
  labels = ["Terjangkau", "Belum Terjangkau"]
 
195
  fig = px.pie(values=values, names=labels, title=title, hole=0.35)
196
  tmp = tempfile.mktemp(suffix=".png")
197
  try:
 
217
  subjenis_col_glob = None
218
  nama_col_glob = None
219
 
220
+ extra_info = []
221
+
222
  # ---- Load DM ----
223
  try:
224
  fp = Path(DATA_FILE)
 
236
  subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"])
237
  nama_col_glob = pick_col(df_all_raw, ["nama_perpustakaan", "nm_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
238
 
239
+ # kewenangan normal
240
  if kew_col_glob:
241
  df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew)
242
  else:
243
  df_all_raw["KEW_NORM"] = None
244
 
245
+ # mapping jenis perpustakaan -> dataset (sekolah/umum/khusus)
246
  val_map_jenis = {
247
  "PERPUSTAKAAN SEKOLAH": "sekolah",
248
  "SEKOLAH": "sekolah",
 
257
  else:
258
  df_all_raw["_dataset"] = None
259
 
260
+ # === kolom clean untuk dropdown & filter ===
261
+ if prov_col_glob and prov_col_glob in df_all_raw.columns:
262
+ df_all_raw["prov_clean"] = df_all_raw[prov_col_glob].apply(clean_prov_display)
263
+ else:
264
+ df_all_raw["prov_clean"] = None
265
+
266
+ if kab_col_glob and kab_col_glob in df_all_raw.columns:
267
+ df_all_raw["kab_clean"] = df_all_raw[kab_col_glob].apply(clean_kab_display)
268
+ else:
269
+ df_all_raw["kab_clean"] = None
270
+
271
  DATA_INFO = f"Data terbaca dari: **{DATA_FILE}** | Jumlah baris: **{len(df_all_raw)}**"
272
  except Exception as e:
273
  df_all_raw = None
274
  DATA_INFO = f"⚠️ Gagal memuat `{DATA_FILE}` | Error: `{e}`"
275
 
 
 
276
  # ---- Meta Kab (Kec/Desa) ----
277
  try:
278
  meta_kab_raw = pd.read_excel(META_KAB_FILE)
 
370
  # 4) DROPDOWN
371
  # ============================================================
372
  def all_prov_choices():
373
+ if df_all_raw is None or "prov_clean" not in df_all_raw.columns:
374
  return ["(Semua)"]
375
+ s = df_all_raw["prov_clean"].dropna().astype(str).str.strip()
376
+ vals = sorted([o for o in s.unique() if o and o != ""])
377
  return ["(Semua)"] + vals
378
 
379
  def get_kab_choices_for_prov(prov_value):
380
+ if df_all_raw is None or "kab_clean" not in df_all_raw.columns:
381
  return ["(Semua)"]
382
+ if prov_value is None or prov_value == "(Semua)":
383
+ s = df_all_raw["kab_clean"].dropna().astype(str).str.strip()
384
  else:
385
+ m = df_all_raw["prov_clean"].astype(str).str.strip() == str(prov_value).strip()
386
+ s = df_all_raw.loc[m, "kab_clean"].dropna().astype(str).str.strip()
387
+ vals = sorted([x for x in s.unique() if x and x != ""])
388
  return ["(Semua)"] + vals
389
 
390
  def all_kew_choices():
 
401
 
402
 
403
  # ============================================================
404
+ # 5) VERIFIKASI GAP (TARGET 68%) β€” OUTPUT: KEKURANGAN UNIT
405
  # ============================================================
406
  def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
407
  if df_filtered is None or len(df_filtered) == 0:
 
409
 
410
  kew_norm = str(kew_value or "").upper()
411
 
412
+ # =================== KAB/KOTA ===================
413
  if ("KAB" in kew_norm or "KOTA" in kew_norm):
414
+ if "kab_clean" not in df_filtered.columns or meta_kab_df is None:
415
+ return pd.DataFrame({"Info": ["Kolom kab_clean atau meta kab tidak tersedia."]})
416
 
417
  tmp = df_filtered.copy()
418
+ tmp = tmp[pd.notna(tmp["kab_clean"])]
419
  if tmp.empty:
420
  return pd.DataFrame()
421
 
422
+ tmp["kab_key"] = tmp["kab_clean"].apply(norm_kab_label)
423
 
 
424
  g_total = tmp.groupby("kab_key").size().rename("Sampel Total").reset_index()
425
 
 
 
 
 
 
 
426
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
427
  g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel Sekolah").reset_index()
428
 
 
429
  tmp_umum = tmp[tmp["_dataset"] == "umum"].copy() if "_dataset" in tmp.columns else tmp.copy()
430
  g_umum = tmp_umum.groupby("kab_key").size().rename("Sampel Umum").reset_index()
431
 
 
446
  merged["Populasi Sekolah (SD+SMP)"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
447
  merged["Populasi Admin (Kec+Desa/Kel)"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
448
 
 
449
  merged["Target Sekolah (68%)"] = np.ceil(merged["Populasi Sekolah (SD+SMP)"] * TARGET_COVERAGE)
450
  merged["Target Umum (68%)"] = np.ceil(merged["Populasi Admin (Kec+Desa/Kel)"] * TARGET_COVERAGE)
451
 
 
452
  merged["Kekurangan Sampel Sekolah"] = merged.apply(
453
  lambda r: max(int(r["Target Sekolah (68%)"] - r["Sampel Sekolah"]) if pd.notna(r["Target Sekolah (68%)"]) else 0, 0),
454
  axis=1
 
475
 
476
  return out.sort_values("Kab/Kota").reset_index(drop=True).round(0)
477
 
478
+ # =================== PROVINSI ===================
479
  if ("PROV" in kew_norm):
480
+ if meta_sma_df is None or "prov_clean" not in df_filtered.columns:
481
+ return pd.DataFrame({"Info": ["Meta SMA atau kolom prov_clean tidak tersedia."]})
 
 
482
 
483
  tmp = df_filtered.copy()
484
+ tmp = tmp[pd.notna(tmp["prov_clean"])]
485
  if tmp.empty:
486
  return pd.DataFrame({"Info": ["Tidak ada data sampel kewenangan provinsi."]})
487
 
488
+ tmp["prov_key"] = tmp["prov_clean"].apply(norm_prov_label)
489
 
490
+ # start dari sampel (tidak bocor prov lain)
491
  g_total = tmp.groupby("prov_key").size().rename("Sampel Total (Prov)").reset_index()
492
 
493
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
 
500
  )
501
 
502
  merged["Sampel SMA (DM)"] = merged["Sampel SMA (DM)"].fillna(0).astype(int)
 
503
  merged["Populasi SMA (Meta)"] = merged["Jml_SMA"]
504
  merged["Target SMA (68%)"] = np.ceil(merged["Populasi SMA (Meta)"] * TARGET_COVERAGE)
505
 
 
524
 
525
 
526
  # ============================================================
527
+ # 6) GRAFIK GAP (KEKURANGAN UNIT) β€” BUKAN PERSEN
528
  # ============================================================
529
  def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
530
  fig = go.Figure()
 
542
  def _num(s):
543
  return pd.to_numeric(s, errors="coerce").fillna(0).astype(int)
544
 
 
545
  if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
546
  dfp = verif_df.copy()
547
  dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Sekolah", 0)) + _num(dfp.get("Kekurangan Sampel Umum", 0))
 
563
  ))
564
 
565
  fig.update_layout(
566
+ title=f"Kekurangan Sampel yang Harus Dikumpulkan (KAB/KOTA) β€” Target {int(TARGET_COVERAGE*100)}%",
567
  barmode="group",
568
  xaxis_title="Kab/Kota",
569
  yaxis_title="Kekurangan (unit)",
 
587
  ))
588
 
589
  fig.update_layout(
590
+ title=f"Kekurangan Sampel yang Harus Dikumpulkan (PROVINSI) β€” Target {int(TARGET_COVERAGE*100)}%",
591
  xaxis_title="Provinsi",
592
  yaxis_title="Kekurangan (unit)",
593
  margin=dict(l=40, r=20, t=60, b=140),
 
604
 
605
 
606
  # ============================================================
607
+ # 7) LLM NARASI (GAP)
608
  # ============================================================
609
  def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
610
  wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
 
625
  t[gc] = pd.to_numeric(t[gc], errors="coerce").fillna(0)
626
  keycol = "Kab/Kota" if "Kab/Kota" in t.columns else ("Provinsi" if "Provinsi" in t.columns else t.columns[0])
627
  top = t.sort_values(gc, ascending=False).head(10)
 
628
  lines.append("\nTop prioritas (gap terbesar):")
629
  for _, r in top.iterrows():
630
  lines.append(f"- {r[keycol]}: {gc}={int(r[gc])}")
 
650
 
651
  for gc in gap_cols:
652
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
653
+ lines.append(f"- Total {gc}: **{total_gap}** unit yang perlu dilengkapi untuk mencapai target.")
654
 
655
  lines.append(
656
+ "\nArah tindak lanjut: fokuskan mobilisasi pengumpulan data pada unit dengan gap terbesar, "
657
+ "pastikan daftar target unit tersedia, dan lakukan monitoring harian hingga gap menurun."
658
  )
659
  return "\n".join(lines)
660
 
 
667
 
668
  system_prompt = (
669
  "Anda adalah analis kebijakan dan manajer program IPLM. "
670
+ "Fokus Anda hanya pada gap sampel (kekurangan unit) dan strategi menutup kekurangan tersebut."
 
671
  )
672
 
673
  user_prompt = f"""
 
677
 
678
  TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
679
  1) Ringkasan kondisi pengumpulan data (1 paragraf).
680
+ 2) Total kekurangan sampel yang masih perlu dikumpulkan menuju target {int(TARGET_COVERAGE*100)}% (1 paragraf).
681
+ 3) Prioritas wilayah (gap terbesar) dan alasan operasional (1 paragraf).
682
+ 4) Rencana aksi 30–60 hari (naratif, bukan bullet).
683
 
684
  BATASAN:
685
+ - Jangan membahas indeks/skor IPLM.
686
+ - Fokus hanya pada kekurangan sampel, target 68%, dan strategi pelengkapannya.
687
  """
688
 
689
  try:
 
721
  doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
722
  doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
723
 
724
+ doc.add_heading("Tabel Verifikasi (Target & Kekurangan Sampel)", level=2)
725
+
726
  view = verif_df.copy()
727
  if len(view) > 200:
728
  doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama) untuk menjaga ukuran dokumen.")
 
742
  if not HAS_KALEIDO:
743
  doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia di server.")
744
  else:
745
+ # buat pie total capai vs target kalau ada pasangan kolom sampel-target
746
  pie_made = False
 
747
  if "Sampel Sekolah" in verif_df.columns and "Target Sekolah (68%)" in verif_df.columns:
748
  samp = pd.to_numeric(verif_df["Sampel Sekolah"], errors="coerce").fillna(0).sum()
749
  tgt = pd.to_numeric(verif_df["Target Sekolah (68%)"], errors="coerce").fillna(0).sum()
750
+ img = make_pie_plotly(samp, tgt, "Capaian Sekolah (Total) terhadap Target")
751
  if img:
752
+ doc.add_paragraph("Capaian Sekolah (Total) terhadap Target")
753
+ doc.add_picture(img)
754
  pie_made = True
755
 
756
  if (not pie_made) and ("Sampel Umum" in verif_df.columns and "Target Umum (68%)" in verif_df.columns):
757
  samp = pd.to_numeric(verif_df["Sampel Umum"], errors="coerce").fillna(0).sum()
758
  tgt = pd.to_numeric(verif_df["Target Umum (68%)"], errors="coerce").fillna(0).sum()
759
+ img = make_pie_plotly(samp, tgt, "Capaian Umum (Total) terhadap Target")
760
  if img:
761
+ doc.add_paragraph("Capaian Umum (Total) terhadap Target")
762
+ doc.add_picture(img)
763
  pie_made = True
764
 
765
  if (not pie_made) and ("Sampel SMA (DM)" in verif_df.columns and "Target SMA (68%)" in verif_df.columns):
766
  samp = pd.to_numeric(verif_df["Sampel SMA (DM)"], errors="coerce").fillna(0).sum()
767
  tgt = pd.to_numeric(verif_df["Target SMA (68%)"], errors="coerce").fillna(0).sum()
768
+ img = make_pie_plotly(samp, tgt, "Capaian SMA (Total) terhadap Target")
769
  if img:
770
+ doc.add_paragraph("Capaian SMA (Total) terhadap Target")
771
+ doc.add_picture(img)
772
  pie_made = True
773
 
774
  if not pie_made:
 
799
 
800
  df = df_all_raw.copy()
801
 
802
+ # filter prov (pakai prov_clean)
803
+ if prov_value and prov_value != "(Semua)" and "prov_clean" in df.columns:
804
+ df = df[df["prov_clean"].astype(str).str.strip() == str(prov_value).strip()]
805
 
806
+ # filter kab/kota (pakai kab_clean)
807
+ if kab_value and kab_value != "(Semua)" and "kab_clean" in df.columns:
808
+ df = df[df["kab_clean"].astype(str).str.strip() == str(kab_value).strip()]
809
 
810
+ # filter kewenangan
811
  if kew_value and kew_value != "(Semua)":
812
  df = df[df["KEW_NORM"] == kew_value]
813
 
 
824
 
825
  # detail subset DM untuk UI (ringkas)
826
  cols = []
827
+ for c in ["prov_clean", "kab_clean", nama_col_glob, kew_col_glob, jenis_col_glob, subjenis_col_glob, "_dataset", "KEW_NORM"]:
828
  if c and c in df.columns and c not in cols:
829
  cols.append(c)
830
  detail_df = df[cols].copy() if cols else df.copy()
 
834
 
835
  # simpan file download
836
  tmpdir = tempfile.mkdtemp()
837
+ rekap_excel_path = os.path.join(tmpdir, "Rekap_Kekurangan_Sampel_IPLM_Target.xlsx")
838
  raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx")
839
 
840
  with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w:
841
+ verif_df.to_excel(w, sheet_name="Verifikasi_Gap_Target", index=False)
842
  detail_df.to_excel(w, sheet_name="Detail_Subset_DM", index=False)
843
 
844
  df.to_excel(raw_dm_path, index=False)
 
867
 
868
 
869
  # ============================================================
870
+ # 10) BUILD UI
871
  # ============================================================
872
  with gr.Blocks() as demo:
873
  gr.Markdown(
874
  f"""
875
+ # Dashboard Kekurangan Sampel IPLM β€” Target {int(TARGET_COVERAGE*100)}% (Tanpa Hitung Indeks)
876
 
877
+ Aplikasi ini menghitung **berapa unit lagi yang harus dikumpulkan** agar memenuhi target minimal representasi.
878
 
879
  **File:**
880
  - `{DATA_FILE}` (DM)
 
896
  run_btn = gr.Button("Hitung Kekurangan Sampel")
897
  msg_out = gr.Markdown()
898
 
899
+ gr.Markdown("### Verifikasi (Target & Kekurangan Sampel)")
900
  verif_out = gr.DataFrame(interactive=False)
901
 
902
  gr.Markdown("### Grafik Kekurangan Sampel (berapa unit lagi yang harus dikumpulkan)")