irhamni commited on
Commit
cbf7209
Β·
verified Β·
1 Parent(s): 5bc4401

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -77
app.py CHANGED
@@ -1,24 +1,26 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- app.py β€” Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS)
4
 
5
  Fokus:
6
- - Mengecek "kekurangan sampel" pengumpulan data IPLM per wilayah
7
- - Bandingkan sampel yang sudah masuk (DM) vs populasi target (META):
8
- - Kab/Kota: SD+SMP (meta SD/SMP) dan Kec+Desa/Kel (meta jumlah desa)
9
- - Provinsi: SMA (meta SMA provinsi)
 
 
 
 
10
 
11
  Fitur:
12
  - Filter: Provinsi, Kab/Kota, Kewenangan
13
  - Tabel Verifikasi Coverage & Kekurangan Sampel
14
  - Tabel Detail Subset DM (ringkas)
 
15
  - Download:
16
  1) Rekap Excel (verifikasi + detail ringkas)
17
  2) Data mentah subset DM (RAW) sesuai filter user
18
  3) Laporan Word (narasi LLM + tabel verifikasi + pie ringkasan opsional)
19
-
20
- Catatan:
21
- - Tidak ada perhitungan Indeks IPLM sama sekali.
22
  """
23
 
24
  import os
@@ -32,6 +34,9 @@ import numpy as np
32
  import pandas as pd
33
  from huggingface_hub import InferenceClient
34
 
 
 
 
35
  # Word report
36
  from docx import Document
37
  from docx.shared import Inches
@@ -53,6 +58,10 @@ META_KAB_FILE = "jumlahdesa_fixed (1).xlsx" # kecamatan & desa/kel per kab/k
53
  META_SDSMP_FILE = "SD-SMP-kab.xlsx" # jumlah SD & SMP per kab/kota
54
  META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
55
 
 
 
 
 
56
 
57
  # ============================================================
58
  # 1b) KONFIGURASI LLM (Hugging Face Inference)
@@ -61,7 +70,7 @@ USE_LLM = True
61
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
62
 
63
  HF_TOKEN = (
64
- os.getenv("HF_SECRET")
65
  or os.getenv("HUGGINGFACEHUB_API_TOKEN")
66
  or os.getenv("HF_API_TOKEN")
67
  )
@@ -189,7 +198,7 @@ def make_pie_plotly(num, den, title):
189
  values = [max(num, 0), max(den - num, 0)]
190
  labels = ["Terjangkau", "Belum Terjangkau"]
191
 
192
- fig = px.pie(values=values, names=labels, title=title, hole=0.3)
193
  tmp = tempfile.mktemp(suffix=".png")
194
  try:
195
  fig.write_image(tmp, scale=2)
@@ -205,7 +214,7 @@ DATA_INFO = ""
205
  df_all_raw = None
206
 
207
  meta_kab_df = None # kab_key -> (Jml_Kecamatan, Jml_DesaKel, Jml_SD, Jml_SMP)
208
- meta_sma_df = None # prov_key -> Jml_SMA
209
 
210
  prov_col_glob = None
211
  kab_col_glob = None
@@ -230,7 +239,6 @@ try:
230
  subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"])
231
  nama_col_glob = pick_col(df_all_raw, ["nama_perpustakaan", "nm_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
232
 
233
- # kewenangan norm
234
  if kew_col_glob:
235
  df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew)
236
  else:
@@ -386,7 +394,7 @@ default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if k
386
 
387
 
388
  # ============================================================
389
- # 5) INTI: HITUNG COVERAGE & GAP
390
  # ============================================================
391
  def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
392
  if df_filtered is None or len(df_filtered) == 0:
@@ -416,7 +424,7 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
416
  tmp["jenjang"] = "OTHER"
417
 
418
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
419
- g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel_Sekolah_Total").reset_index()
420
  g_sd = tmp_sek[tmp_sek["jenjang"] == "SD"].groupby("kab_key").size().rename("Sampel_SD").reset_index()
421
  g_smp = tmp_sek[tmp_sek["jenjang"] == "SMP"].groupby("kab_key").size().rename("Sampel_SMP").reset_index()
422
 
@@ -436,43 +444,50 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
436
  .merge(meta_kab_df[use_cols], on="kab_key", how="left")
437
  )
438
 
439
- for c in ["Sampel_Total", "Sampel_Sekolah_Total", "Sampel_SD", "Sampel_SMP", "Sampel_Umum"]:
440
  if c in merged.columns:
441
  merged[c] = merged[c].fillna(0).astype(int)
442
 
443
  merged["Pop_SD_SMP"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
444
  merged["Pop_Kec_DesaKel"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
445
 
446
- merged["Coverage_Sekolah_%"] = merged.apply(
447
- lambda r: safe_pct(r["Sampel_Sekolah_Total"], r.get("Pop_SD_SMP", np.nan)), axis=1
 
 
 
 
 
448
  )
449
- merged["Coverage_Umum_%"] = merged.apply(
450
- lambda r: safe_pct(r["Sampel_Umum"], r.get("Pop_Kec_DesaKel", np.nan)), axis=1
451
  )
452
 
453
- # GAP (kekurangan sampel) -> asumsi target = 100% populasi
454
- merged["Gap_Sekolah"] = merged.apply(
455
- lambda r: max(int(math.ceil(r["Pop_SD_SMP"] - r["Sampel_Sekolah_Total"]))
456
- if pd.notna(r["Pop_SD_SMP"]) else 0, 0),
457
  axis=1
458
  )
459
- merged["Gap_Umum"] = merged.apply(
460
- lambda r: max(int(math.ceil(r["Pop_Kec_DesaKel"] - r["Sampel_Umum"]))
461
- if pd.notna(r["Pop_Kec_DesaKel"]) else 0, 0),
462
  axis=1
463
  )
464
 
465
  out = pd.DataFrame({
466
  "Kab/Kota": merged.get("Kab_Kota_Label", merged["kab_key"]),
467
  "Sampel Total": merged["Sampel_Total"],
468
- "Sampel Sekolah (Total)": merged["Sampel_Sekolah_Total"],
 
469
  "Populasi Sekolah (SD+SMP)": merged["Pop_SD_SMP"],
470
- "Coverage Sekolah (%)": merged["Coverage_Sekolah_%"],
471
- "Kekurangan Sampel Sekolah": merged["Gap_Sekolah"],
 
 
472
  "Sampel Umum": merged["Sampel_Umum"],
473
  "Populasi Admin (Kec+Desa/Kel)": merged["Pop_Kec_DesaKel"],
474
- "Coverage Umum (%)": merged["Coverage_Umum_%"],
475
- "Kekurangan Sampel Umum": merged["Gap_Umum"],
 
476
  })
477
 
478
  return out.sort_values("Kab/Kota").reset_index(drop=True).round(3)
@@ -491,7 +506,7 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
491
 
492
  tmp["prov_key"] = tmp[prov_col_glob].apply(norm_prov_label)
493
 
494
- # IMPORTANT: start dari sampel (biar tidak munculin provinsi lain dari meta)
495
  g_total = tmp.groupby("prov_key").size().rename("Sampel_Total").reset_index()
496
 
497
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
@@ -505,22 +520,27 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
505
 
506
  merged["Sampel_SMA"] = merged["Sampel_SMA"].fillna(0).astype(int)
507
 
508
- merged["Coverage_SMA_%"] = merged.apply(
509
- lambda r: safe_pct(r["Sampel_SMA"], r.get("Jml_SMA", np.nan)), axis=1
 
 
 
510
  )
511
- merged["Kekurangan Sampel SMA"] = merged.apply(
512
- lambda r: max(int(math.ceil(r["Jml_SMA"] - r["Sampel_SMA"]))
513
- if pd.notna(r["Jml_SMA"]) else 0, 0),
514
  axis=1
515
  )
516
 
517
  out = pd.DataFrame({
518
  "Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]),
519
  "Sampel Total (Prov)": merged["Sampel_Total"].fillna(0).astype(int),
520
- "Sampel SMA (di DM)": merged["Sampel_SMA"],
 
521
  "Populasi SMA (Meta)": merged["Jml_SMA"],
522
- "Coverage SMA (%)": merged["Coverage_SMA_%"],
523
- "Kekurangan Sampel SMA": merged["Kekurangan Sampel SMA"],
 
524
  })
525
 
526
  return out.sort_values("Provinsi").reset_index(drop=True).round(3)
@@ -529,21 +549,101 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
529
 
530
 
531
  # ============================================================
532
- # 6) LLM REPORT (GAP)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  # ============================================================
534
  def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
535
  wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
536
  lines = []
537
  lines.append(f"Wilayah filter: {wilayah}")
538
  lines.append(f"Kewenangan: {kew}")
 
539
  lines.append(f"Jumlah baris verifikasi: {len(verif_df)}")
540
 
541
- gap_cols = [c for c in verif_df.columns if "Kekurangan" in c]
542
  for gc in gap_cols:
543
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
544
  lines.append(f"Total {gc}: {total_gap}")
545
 
546
- # top prioritas (ambil kolom gap pertama)
547
  if gap_cols:
548
  gc = gap_cols[0]
549
  t = verif_df.copy()
@@ -566,16 +666,17 @@ def rule_based_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: str)
566
  lines.append("## Ringkasan Kekurangan Sampel IPLM (Rule-based)\n")
567
  lines.append(f"Wilayah: {wilayah}")
568
  lines.append(f"Kewenangan: {kew}")
 
569
  lines.append(f"Jumlah unit analisis: {len(verif_df)}\n")
570
 
571
- gap_cols = [c for c in verif_df.columns if "Kekurangan" in c]
572
  if not gap_cols:
573
  lines.append("Kolom kekurangan sampel tidak ditemukan.")
574
  return "\n".join(lines)
575
 
576
  for gc in gap_cols:
577
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
578
- lines.append(f"- Total {gc}: **{total_gap}** unit yang perlu dilengkapi.")
579
 
580
  lines.append(
581
  "\nRekomendasi operasional: prioritaskan pengumpulan data pada wilayah dengan gap terbesar, "
@@ -593,7 +694,7 @@ def generate_llm_gap_report(verif_df: pd.DataFrame, prov: str, kab: str, kew: st
593
  system_prompt = (
594
  "Anda adalah analis kebijakan dan manajer program IPLM. "
595
  "Tugas Anda menyusun narasi singkat dan tegas tentang kekurangan sampel data IPLM "
596
- "serta strategi pengumpulan data untuk menutup gap."
597
  )
598
 
599
  user_prompt = f"""
@@ -603,13 +704,13 @@ DATA RINGKAS GAP SAMPEL IPLM:
603
 
604
  TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
605
  1) Ringkasan kondisi pengumpulan data (1 paragraf).
606
- 2) Angka total kekurangan sampel yang masih perlu dikumpulkan (1 paragraf).
607
  3) Prioritas wilayah (top gap) dan alasan operasionalnya (1 paragraf).
608
  4) Rencana aksi 30–60 hari (paragraf naratif, bukan bullet).
609
 
610
  BATASAN:
611
  - Jangan bahas indeks / skor IPLM sama sekali.
612
- - Fokus murni pada coverage, kekurangan sampel, dan strategi pelengkapannya.
613
  """
614
 
615
  try:
@@ -636,7 +737,7 @@ BATASAN:
636
 
637
 
638
  # ============================================================
639
- # 7) WORD REPORT
640
  # ============================================================
641
  def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str, analysis_text: str):
642
  wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
@@ -644,6 +745,7 @@ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: s
644
  doc = Document()
645
  doc.add_heading(f"Laporan Kekurangan Sampel IPLM – {wilayah}", level=1)
646
  doc.add_paragraph(f"Kewenangan: {kew}")
 
647
  doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
648
 
649
  doc.add_heading("Tabel Verifikasi Coverage & Kekurangan Sampel", level=2)
@@ -667,26 +769,36 @@ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: s
667
  doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia di server.")
668
  else:
669
  pie_made = False
670
- # Ringkas sekolah kab/kota
671
- if "Sampel Sekolah (Total)" in verif_df.columns and "Populasi Sekolah (SD+SMP)" in verif_df.columns:
672
- samp = pd.to_numeric(verif_df["Sampel Sekolah (Total)"], errors="coerce").fillna(0).sum()
673
- pop = pd.to_numeric(verif_df["Populasi Sekolah (SD+SMP)"], errors="coerce").fillna(0).sum()
674
- img = make_pie_plotly(samp, pop, "Coverage Perpustakaan Sekolah (Total)")
 
 
 
 
 
 
 
 
 
 
675
  if img:
676
  doc.add_picture(img, width=Inches(5))
677
  pie_made = True
678
 
679
- # Ringkas SMA provinsi
680
- if (not pie_made) and ("Sampel SMA (di DM)" in verif_df.columns and "Populasi SMA (Meta)" in verif_df.columns):
681
- samp = pd.to_numeric(verif_df["Sampel SMA (di DM)"], errors="coerce").fillna(0).sum()
682
- pop = pd.to_numeric(verif_df["Populasi SMA (Meta)"], errors="coerce").fillna(0).sum()
683
- img = make_pie_plotly(samp, pop, "Coverage Perpustakaan SMA (Total)")
684
  if img:
685
  doc.add_picture(img, width=Inches(5))
686
  pie_made = True
687
 
688
  if not pie_made:
689
- doc.add_paragraph("Tidak ada pasangan kolom sampel-populasi yang valid untuk dibuat pie chart.")
690
 
691
  doc.add_heading("Analisis Naratif (LLM)", level=2)
692
  for p in analysis_text.split("\n"):
@@ -699,13 +811,13 @@ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: s
699
 
700
 
701
  # ============================================================
702
- # 8) CORE RUN (FILTER + EXPORT)
703
  # ============================================================
704
  def run_core(prov_value, kab_value, kew_value):
705
  if df_all_raw is None or df_all_raw.empty:
706
  empty = pd.DataFrame()
707
  return (
708
- empty, empty,
709
  None, None, None,
710
  "Data DM tidak terbaca.",
711
  "Tidak ada analisis."
@@ -728,13 +840,12 @@ def run_core(prov_value, kab_value, kew_value):
728
  if len(df) == 0:
729
  empty = pd.DataFrame()
730
  return (
731
- empty, empty,
732
  None, None, None,
733
  "Tidak ada data untuk kombinasi filter yang dipilih.",
734
  "Tidak ada analisis."
735
  )
736
 
737
- # hitung verifikasi gap
738
  verif_df = compute_gap_verification(df, kew_value)
739
 
740
  # detail subset untuk UI (ringkas)
@@ -744,29 +855,32 @@ def run_core(prov_value, kab_value, kew_value):
744
  cols.append(c)
745
  detail_df = df[cols].copy() if cols else df.copy()
746
 
 
 
 
747
  # simpan file download
748
  tmpdir = tempfile.mkdtemp()
749
- rekap_excel_path = os.path.join(tmpdir, "Rekap_Kekurangan_Sampel_IPLM.xlsx")
750
  raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx")
751
 
752
- # 1) rekap excel (verif + detail ringkas)
753
  with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w:
754
- verif_df.to_excel(w, sheet_name="Verifikasi_Gap", index=False)
755
  detail_df.to_excel(w, sheet_name="Detail_Subset_DM", index=False)
756
 
757
- # 2) raw dm subset (SEMUA kolom DM hasil filter user)
758
  df.to_excel(raw_dm_path, index=False)
759
 
760
- # 3) analisis LLM
761
  analysis_text = generate_llm_gap_report(verif_df, prov_value, kab_value, kew_value)
762
-
763
- # 4) word report
764
  word_path = generate_word_report_gap(verif_df, prov_value, kab_value, kew_value, analysis_text)
765
 
766
- msg = f"OK. Subset DM: {len(df)} baris | Verifikasi: {len(verif_df)} baris."
 
 
 
 
767
  return (
768
  verif_df,
769
  detail_df,
 
770
  rekap_excel_path,
771
  raw_dm_path,
772
  word_path,
@@ -779,15 +893,15 @@ def on_prov_change(prov_value):
779
 
780
 
781
  # ============================================================
782
- # 9) UI GRADIO
783
  # ============================================================
784
  with gr.Blocks() as demo:
785
  gr.Markdown(
786
  f"""
787
- # Dashboard Kekurangan Sampel IPLM (Tanpa Hitung Indeks)
788
 
789
  Aplikasi ini hanya mengecek **kekurangan sampel** berdasarkan:
790
- - **DM (sampel masuk)** vs **Meta populasi (SD/SMP, SMA, Kec/DesaKel)**
791
 
792
  **File:**
793
  - `{DATA_FILE}` (DM)
@@ -809,9 +923,12 @@ Aplikasi ini hanya mengecek **kekurangan sampel** berdasarkan:
809
  run_btn = gr.Button("Hitung Kekurangan Sampel")
810
  msg_out = gr.Markdown()
811
 
812
- gr.Markdown("### Verifikasi Coverage & Kekurangan Sampel")
813
  verif_out = gr.DataFrame(interactive=False)
814
 
 
 
 
815
  gr.Markdown("### Detail Subset DM (yang terfilter)")
816
  detail_out = gr.DataFrame(interactive=False)
817
 
@@ -826,7 +943,16 @@ Aplikasi ini hanya mengecek **kekurangan sampel** berdasarkan:
826
  run_btn.click(
827
  fn=run_core,
828
  inputs=[dd_prov, dd_kab, dd_kew],
829
- outputs=[verif_out, detail_out, rekap_excel_out, raw_dm_out, word_out, msg_out, analysis_out],
 
 
 
 
 
 
 
 
 
830
  )
831
 
832
  demo.launch()
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ app.py β€” Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS) + Grafik Progress
4
 
5
  Fokus:
6
+ - Cek "kekurangan sampel" pengumpulan data IPLM per wilayah
7
+ - Target pengumpulan BUKAN 100% populasi, tetapi 68% dari populasi (TARGET_COVERAGE=0.68)
8
+ - Bandingkan sampel (DM) vs target 68% populasi (META):
9
+ - KAB/KOTA:
10
+ * Sekolah: target = 68% dari (SD+SMP)
11
+ * Umum: target = 68% dari (Kecamatan + Desa/Kelurahan)
12
+ - PROVINSI:
13
+ * SMA: target = 68% dari (Total SMA)
14
 
15
  Fitur:
16
  - Filter: Provinsi, Kab/Kota, Kewenangan
17
  - Tabel Verifikasi Coverage & Kekurangan Sampel
18
  - Tabel Detail Subset DM (ringkas)
19
+ - Grafik progress (coverage terhadap target 68%) per unit wilayah
20
  - Download:
21
  1) Rekap Excel (verifikasi + detail ringkas)
22
  2) Data mentah subset DM (RAW) sesuai filter user
23
  3) Laporan Word (narasi LLM + tabel verifikasi + pie ringkasan opsional)
 
 
 
24
  """
25
 
26
  import os
 
34
  import pandas as pd
35
  from huggingface_hub import InferenceClient
36
 
37
+ # Plot
38
+ import plotly.graph_objects as go
39
+
40
  # Word report
41
  from docx import Document
42
  from docx.shared import Inches
 
58
  META_SDSMP_FILE = "SD-SMP-kab.xlsx" # jumlah SD & SMP per kab/kota
59
  META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
60
 
61
+ # ============================================================
62
+ # 1a) TARGET CAKUPAN SAMPEL (KEBIJAKAN)
63
+ # ============================================================
64
+ TARGET_COVERAGE = 0.68 # 68% dari populasi
65
 
66
  # ============================================================
67
  # 1b) KONFIGURASI LLM (Hugging Face Inference)
 
70
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
71
 
72
  HF_TOKEN = (
73
+ os.getenv("HF_TOKEN")
74
  or os.getenv("HUGGINGFACEHUB_API_TOKEN")
75
  or os.getenv("HF_API_TOKEN")
76
  )
 
198
  values = [max(num, 0), max(den - num, 0)]
199
  labels = ["Terjangkau", "Belum Terjangkau"]
200
 
201
+ fig = px.pie(values=values, names=labels, title=title, hole=0.35)
202
  tmp = tempfile.mktemp(suffix=".png")
203
  try:
204
  fig.write_image(tmp, scale=2)
 
214
  df_all_raw = None
215
 
216
  meta_kab_df = None # kab_key -> (Jml_Kecamatan, Jml_DesaKel, Jml_SD, Jml_SMP)
217
+ meta_sma_df = None # prov_key -> (Jml_SMA)
218
 
219
  prov_col_glob = None
220
  kab_col_glob = None
 
239
  subjenis_col_glob = pick_col(df_all_raw, ["sub_jenis_perpus", "Sub Jenis", "SubJenis", "subjenis", "jenjang"])
240
  nama_col_glob = pick_col(df_all_raw, ["nama_perpustakaan", "nm_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
241
 
 
242
  if kew_col_glob:
243
  df_all_raw["KEW_NORM"] = df_all_raw[kew_col_glob].apply(norm_kew)
244
  else:
 
394
 
395
 
396
  # ============================================================
397
+ # 5) INTI: HITUNG COVERAGE & GAP (TARGET 68%)
398
  # ============================================================
399
  def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
400
  if df_filtered is None or len(df_filtered) == 0:
 
424
  tmp["jenjang"] = "OTHER"
425
 
426
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
427
+ g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel_Sekolah").reset_index()
428
  g_sd = tmp_sek[tmp_sek["jenjang"] == "SD"].groupby("kab_key").size().rename("Sampel_SD").reset_index()
429
  g_smp = tmp_sek[tmp_sek["jenjang"] == "SMP"].groupby("kab_key").size().rename("Sampel_SMP").reset_index()
430
 
 
444
  .merge(meta_kab_df[use_cols], on="kab_key", how="left")
445
  )
446
 
447
+ for c in ["Sampel_Total", "Sampel_Sekolah", "Sampel_SD", "Sampel_SMP", "Sampel_Umum"]:
448
  if c in merged.columns:
449
  merged[c] = merged[c].fillna(0).astype(int)
450
 
451
  merged["Pop_SD_SMP"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
452
  merged["Pop_Kec_DesaKel"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
453
 
454
+ # TARGET 68%
455
+ merged["Target_Sekolah_68"] = np.ceil(merged["Pop_SD_SMP"] * TARGET_COVERAGE)
456
+ merged["Target_Umum_68"] = np.ceil(merged["Pop_Kec_DesaKel"] * TARGET_COVERAGE)
457
+
458
+ # Coverage terhadap target (100% = target terpenuhi)
459
+ merged["Progress_Sekolah_%"] = merged.apply(
460
+ lambda r: safe_pct(r["Sampel_Sekolah"], r.get("Target_Sekolah_68", np.nan)), axis=1
461
  )
462
+ merged["Progress_Umum_%"] = merged.apply(
463
+ lambda r: safe_pct(r["Sampel_Umum"], r.get("Target_Umum_68", np.nan)), axis=1
464
  )
465
 
466
+ # Kekurangan terhadap target 68%
467
+ merged["Kekurangan_Sekolah"] = merged.apply(
468
+ lambda r: max(int(r["Target_Sekolah_68"] - r["Sampel_Sekolah"]) if pd.notna(r["Target_Sekolah_68"]) else 0, 0),
 
469
  axis=1
470
  )
471
+ merged["Kekurangan_Umum"] = merged.apply(
472
+ lambda r: max(int(r["Target_Umum_68"] - r["Sampel_Umum"]) if pd.notna(r["Target_Umum_68"]) else 0, 0),
 
473
  axis=1
474
  )
475
 
476
  out = pd.DataFrame({
477
  "Kab/Kota": merged.get("Kab_Kota_Label", merged["kab_key"]),
478
  "Sampel Total": merged["Sampel_Total"],
479
+
480
+ "Sampel Sekolah": merged["Sampel_Sekolah"],
481
  "Populasi Sekolah (SD+SMP)": merged["Pop_SD_SMP"],
482
+ "Target Sekolah (68%)": merged["Target_Sekolah_68"],
483
+ "Progress Sekolah (% dari target)": merged["Progress_Sekolah_%"],
484
+ "Kekurangan Sampel Sekolah": merged["Kekurangan_Sekolah"],
485
+
486
  "Sampel Umum": merged["Sampel_Umum"],
487
  "Populasi Admin (Kec+Desa/Kel)": merged["Pop_Kec_DesaKel"],
488
+ "Target Umum (68%)": merged["Target_Umum_68"],
489
+ "Progress Umum (% dari target)": merged["Progress_Umum_%"],
490
+ "Kekurangan Sampel Umum": merged["Kekurangan_Umum"],
491
  })
492
 
493
  return out.sort_values("Kab/Kota").reset_index(drop=True).round(3)
 
506
 
507
  tmp["prov_key"] = tmp[prov_col_glob].apply(norm_prov_label)
508
 
509
+ # START dari sampel (biar tidak munculin provinsi lain)
510
  g_total = tmp.groupby("prov_key").size().rename("Sampel_Total").reset_index()
511
 
512
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
 
520
 
521
  merged["Sampel_SMA"] = merged["Sampel_SMA"].fillna(0).astype(int)
522
 
523
+ # TARGET 68%
524
+ merged["Target_SMA_68"] = np.ceil(merged["Jml_SMA"] * TARGET_COVERAGE)
525
+
526
+ merged["Progress_SMA_%"] = merged.apply(
527
+ lambda r: safe_pct(r["Sampel_SMA"], r.get("Target_SMA_68", np.nan)), axis=1
528
  )
529
+
530
+ merged["Kekurangan_SMA"] = merged.apply(
531
+ lambda r: max(int(r["Target_SMA_68"] - r["Sampel_SMA"]) if pd.notna(r["Target_SMA_68"]) else 0, 0),
532
  axis=1
533
  )
534
 
535
  out = pd.DataFrame({
536
  "Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]),
537
  "Sampel Total (Prov)": merged["Sampel_Total"].fillna(0).astype(int),
538
+
539
+ "Sampel SMA (DM)": merged["Sampel_SMA"],
540
  "Populasi SMA (Meta)": merged["Jml_SMA"],
541
+ "Target SMA (68%)": merged["Target_SMA_68"],
542
+ "Progress SMA (% dari target)": merged["Progress_SMA_%"],
543
+ "Kekurangan Sampel SMA": merged["Kekurangan_SMA"],
544
  })
545
 
546
  return out.sort_values("Provinsi").reset_index(drop=True).round(3)
 
549
 
550
 
551
  # ============================================================
552
+ # 6) GRAFIK PROGRESS (Plotly)
553
+ # ============================================================
554
+ def make_progress_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
555
+ fig = go.Figure()
556
+
557
+ if verif_df is None or verif_df.empty:
558
+ fig.update_layout(
559
+ title="Progress Pengumpulan (tidak ada data)",
560
+ xaxis_title="Unit",
561
+ yaxis_title="% dari target 68%",
562
+ )
563
+ return fig
564
+
565
+ kew_norm = str(kew_value or "").upper()
566
+
567
+ # helper: cap 0..120 untuk tampilan (biar gak liar kalau >100)
568
+ def _cap(s):
569
+ x = pd.to_numeric(s, errors="coerce")
570
+ x = x.clip(lower=0, upper=120)
571
+ return x
572
+
573
+ if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
574
+ x = verif_df["Kab/Kota"].astype(str).tolist()
575
+
576
+ p_sek = _cap(verif_df.get("Progress Sekolah (% dari target)", pd.Series([np.nan]*len(verif_df))))
577
+ p_umum = _cap(verif_df.get("Progress Umum (% dari target)", pd.Series([np.nan]*len(verif_df))))
578
+
579
+ fig.add_trace(go.Bar(
580
+ x=x, y=p_sek, name="Sekolah (SD+SMP) β€” % dari target",
581
+ hovertemplate="%{x}<br>%{y:.1f}%<extra></extra>"
582
+ ))
583
+ fig.add_trace(go.Bar(
584
+ x=x, y=p_umum, name="Umum (Kec+Desa/Kel) β€” % dari target",
585
+ hovertemplate="%{x}<br>%{y:.1f}%<extra></extra>"
586
+ ))
587
+
588
+ fig.add_hline(y=100, line_dash="dash")
589
+
590
+ fig.update_layout(
591
+ title="Progress Pengumpulan Data (KAB/KOTA) β€” terhadap Target 68%",
592
+ barmode="group",
593
+ xaxis_title="Kab/Kota",
594
+ yaxis_title="% dari target (100% = target tercapai)",
595
+ margin=dict(l=40, r=20, t=60, b=120),
596
+ )
597
+ fig.update_xaxes(tickangle=-35)
598
+
599
+ return fig
600
+
601
+ if ("PROV" in kew_norm) and ("Provinsi" in verif_df.columns):
602
+ x = verif_df["Provinsi"].astype(str).tolist()
603
+ p_sma = _cap(verif_df.get("Progress SMA (% dari target)", pd.Series([np.nan]*len(verif_df))))
604
+
605
+ fig.add_trace(go.Bar(
606
+ x=x, y=p_sma, name="SMA β€” % dari target",
607
+ hovertemplate="%{x}<br>%{y:.1f}%<extra></extra>"
608
+ ))
609
+
610
+ fig.add_hline(y=100, line_dash="dash")
611
+
612
+ fig.update_layout(
613
+ title="Progress Pengumpulan Data (PROVINSI) β€” SMA terhadap Target 68%",
614
+ xaxis_title="Provinsi",
615
+ yaxis_title="% dari target (100% = target tercapai)",
616
+ margin=dict(l=40, r=20, t=60, b=120),
617
+ )
618
+ fig.update_xaxes(tickangle=-35)
619
+
620
+ return fig
621
+
622
+ fig.update_layout(
623
+ title="Progress Pengumpulan β€” format data tidak dikenali",
624
+ xaxis_title="Unit",
625
+ yaxis_title="% dari target 68%",
626
+ )
627
+ return fig
628
+
629
+
630
+ # ============================================================
631
+ # 7) LLM REPORT (GAP)
632
  # ============================================================
633
  def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) -> str:
634
  wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
635
  lines = []
636
  lines.append(f"Wilayah filter: {wilayah}")
637
  lines.append(f"Kewenangan: {kew}")
638
+ lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
639
  lines.append(f"Jumlah baris verifikasi: {len(verif_df)}")
640
 
641
+ gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
642
  for gc in gap_cols:
643
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
644
  lines.append(f"Total {gc}: {total_gap}")
645
 
646
+ # top prioritas
647
  if gap_cols:
648
  gc = gap_cols[0]
649
  t = verif_df.copy()
 
666
  lines.append("## Ringkasan Kekurangan Sampel IPLM (Rule-based)\n")
667
  lines.append(f"Wilayah: {wilayah}")
668
  lines.append(f"Kewenangan: {kew}")
669
+ lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
670
  lines.append(f"Jumlah unit analisis: {len(verif_df)}\n")
671
 
672
+ gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
673
  if not gap_cols:
674
  lines.append("Kolom kekurangan sampel tidak ditemukan.")
675
  return "\n".join(lines)
676
 
677
  for gc in gap_cols:
678
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
679
+ lines.append(f"- Total {gc}: **{total_gap}** unit yang perlu dilengkapi untuk mencapai target 68%.")
680
 
681
  lines.append(
682
  "\nRekomendasi operasional: prioritaskan pengumpulan data pada wilayah dengan gap terbesar, "
 
694
  system_prompt = (
695
  "Anda adalah analis kebijakan dan manajer program IPLM. "
696
  "Tugas Anda menyusun narasi singkat dan tegas tentang kekurangan sampel data IPLM "
697
+ "serta strategi pengumpulan data untuk menutup gap menuju target."
698
  )
699
 
700
  user_prompt = f"""
 
704
 
705
  TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
706
  1) Ringkasan kondisi pengumpulan data (1 paragraf).
707
+ 2) Angka total kekurangan sampel yang masih perlu dikumpulkan untuk mencapai target 68% (1 paragraf).
708
  3) Prioritas wilayah (top gap) dan alasan operasionalnya (1 paragraf).
709
  4) Rencana aksi 30–60 hari (paragraf naratif, bukan bullet).
710
 
711
  BATASAN:
712
  - Jangan bahas indeks / skor IPLM sama sekali.
713
+ - Fokus murni pada progress terhadap target 68%, kekurangan sampel, dan strategi pelengkapannya.
714
  """
715
 
716
  try:
 
737
 
738
 
739
  # ============================================================
740
+ # 8) WORD REPORT
741
  # ============================================================
742
  def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str, analysis_text: str):
743
  wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
 
745
  doc = Document()
746
  doc.add_heading(f"Laporan Kekurangan Sampel IPLM – {wilayah}", level=1)
747
  doc.add_paragraph(f"Kewenangan: {kew}")
748
+ doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
749
  doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
750
 
751
  doc.add_heading("Tabel Verifikasi Coverage & Kekurangan Sampel", level=2)
 
769
  doc.add_paragraph("Grafik pie tidak dibuat karena 'kaleido' tidak tersedia di server.")
770
  else:
771
  pie_made = False
772
+
773
+ # KAB/KOTA: sekolah
774
+ if "Sampel Sekolah" in verif_df.columns and "Target Sekolah (68%)" in verif_df.columns:
775
+ samp = pd.to_numeric(verif_df["Sampel Sekolah"], errors="coerce").fillna(0).sum()
776
+ tgt = pd.to_numeric(verif_df["Target Sekolah (68%)"], errors="coerce").fillna(0).sum()
777
+ img = make_pie_plotly(samp, tgt, "Progress Sekolah (Total) terhadap Target 68%")
778
+ if img:
779
+ doc.add_picture(img, width=Inches(5))
780
+ pie_made = True
781
+
782
+ # KAB/KOTA: umum
783
+ if (not pie_made) and ("Sampel Umum" in verif_df.columns and "Target Umum (68%)" in verif_df.columns):
784
+ samp = pd.to_numeric(verif_df["Sampel Umum"], errors="coerce").fillna(0).sum()
785
+ tgt = pd.to_numeric(verif_df["Target Umum (68%)"], errors="coerce").fillna(0).sum()
786
+ img = make_pie_plotly(samp, tgt, "Progress Umum (Total) terhadap Target 68%")
787
  if img:
788
  doc.add_picture(img, width=Inches(5))
789
  pie_made = True
790
 
791
+ # PROVINSI: SMA
792
+ if (not pie_made) and ("Sampel SMA (DM)" in verif_df.columns and "Target SMA (68%)" in verif_df.columns):
793
+ samp = pd.to_numeric(verif_df["Sampel SMA (DM)"], errors="coerce").fillna(0).sum()
794
+ tgt = pd.to_numeric(verif_df["Target SMA (68%)"], errors="coerce").fillna(0).sum()
795
+ img = make_pie_plotly(samp, tgt, "Progress SMA (Total) terhadap Target 68%")
796
  if img:
797
  doc.add_picture(img, width=Inches(5))
798
  pie_made = True
799
 
800
  if not pie_made:
801
+ doc.add_paragraph("Tidak ada pasangan kolom sampel-target yang valid untuk dibuat pie chart.")
802
 
803
  doc.add_heading("Analisis Naratif (LLM)", level=2)
804
  for p in analysis_text.split("\n"):
 
811
 
812
 
813
  # ============================================================
814
+ # 9) CORE RUN (FILTER + EXPORT)
815
  # ============================================================
816
  def run_core(prov_value, kab_value, kew_value):
817
  if df_all_raw is None or df_all_raw.empty:
818
  empty = pd.DataFrame()
819
  return (
820
+ empty, empty, None,
821
  None, None, None,
822
  "Data DM tidak terbaca.",
823
  "Tidak ada analisis."
 
840
  if len(df) == 0:
841
  empty = pd.DataFrame()
842
  return (
843
+ empty, empty, None,
844
  None, None, None,
845
  "Tidak ada data untuk kombinasi filter yang dipilih.",
846
  "Tidak ada analisis."
847
  )
848
 
 
849
  verif_df = compute_gap_verification(df, kew_value)
850
 
851
  # detail subset untuk UI (ringkas)
 
855
  cols.append(c)
856
  detail_df = df[cols].copy() if cols else df.copy()
857
 
858
+ # grafik progress
859
+ fig_progress = make_progress_figure(verif_df, kew_value)
860
+
861
  # simpan file download
862
  tmpdir = tempfile.mkdtemp()
863
+ rekap_excel_path = os.path.join(tmpdir, "Rekap_Kekurangan_Sampel_IPLM_Target68.xlsx")
864
  raw_dm_path = os.path.join(tmpdir, "DM_Subset_Raw.xlsx")
865
 
 
866
  with pd.ExcelWriter(rekap_excel_path, engine="openpyxl") as w:
867
+ verif_df.to_excel(w, sheet_name="Verifikasi_Gap_Target68", index=False)
868
  detail_df.to_excel(w, sheet_name="Detail_Subset_DM", index=False)
869
 
 
870
  df.to_excel(raw_dm_path, index=False)
871
 
 
872
  analysis_text = generate_llm_gap_report(verif_df, prov_value, kab_value, kew_value)
 
 
873
  word_path = generate_word_report_gap(verif_df, prov_value, kab_value, kew_value, analysis_text)
874
 
875
+ msg = (
876
+ f"OK. Subset DM: {len(df)} baris | Verifikasi: {len(verif_df)} baris | "
877
+ f"Target: {int(TARGET_COVERAGE*100)}%."
878
+ )
879
+
880
  return (
881
  verif_df,
882
  detail_df,
883
+ fig_progress,
884
  rekap_excel_path,
885
  raw_dm_path,
886
  word_path,
 
893
 
894
 
895
  # ============================================================
896
+ # 10) UI GRADIO
897
  # ============================================================
898
  with gr.Blocks() as demo:
899
  gr.Markdown(
900
  f"""
901
+ # Dashboard Kekurangan Sampel IPLM (Tanpa Hitung Indeks) β€” Target {int(TARGET_COVERAGE*100)}%
902
 
903
  Aplikasi ini hanya mengecek **kekurangan sampel** berdasarkan:
904
+ - **DM (sampel masuk)** vs **Target 68% dari populasi unit (META)**
905
 
906
  **File:**
907
  - `{DATA_FILE}` (DM)
 
923
  run_btn = gr.Button("Hitung Kekurangan Sampel")
924
  msg_out = gr.Markdown()
925
 
926
+ gr.Markdown("### Verifikasi Coverage & Kekurangan Sampel (Target 68%)")
927
  verif_out = gr.DataFrame(interactive=False)
928
 
929
+ gr.Markdown("### Grafik Progress (% dari target β€” 100% = target tercapai)")
930
+ progress_out = gr.Plot()
931
+
932
  gr.Markdown("### Detail Subset DM (yang terfilter)")
933
  detail_out = gr.DataFrame(interactive=False)
934
 
 
943
  run_btn.click(
944
  fn=run_core,
945
  inputs=[dd_prov, dd_kab, dd_kew],
946
+ outputs=[
947
+ verif_out,
948
+ detail_out,
949
+ progress_out,
950
+ rekap_excel_out,
951
+ raw_dm_out,
952
+ word_out,
953
+ msg_out,
954
+ analysis_out
955
+ ],
956
  )
957
 
958
  demo.launch()