irhamni commited on
Commit
1c38b9f
·
verified ·
1 Parent(s): effb700

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -151
app.py CHANGED
@@ -1,47 +1,45 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- app.py — Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS) + Grafik Progress
4
 
5
  Fokus:
6
- - Cek "kekurangan sampel" pengumpulan data IPLM per wilayah
7
- - Target pengumpulan BUKAN 100% populasi, tetapi 68% dari populasi (TARGET_COVERAGE=0.68)
8
- - Bandingkan sampel (DM) vs target 68% populasi (META):
9
- - KAB/KOTA:
10
- * Sekolah: target = 68% dari (SD+SMP)
11
- * Umum: target = 68% dari (Kecamatan + Desa/Kelurahan)
12
- - PROVINSI:
13
- * SMA: target = 68% dari (Total SMA)
 
14
 
15
  Fitur:
16
  - Filter: Provinsi, Kab/Kota, Kewenangan
17
- - Tabel Verifikasi Coverage & Kekurangan Sampel
18
- - Tabel Detail Subset DM (ringkas)
19
- - Grafik progress (coverage terhadap target 68%) per unit wilayah
20
  - Download:
21
- 1) Rekap Excel (verifikasi + detail ringkas)
22
- 2) Data mentah subset DM (RAW) sesuai filter user
23
- 3) Laporan Word (narasi LLM + tabel verifikasi + pie ringkasan opsional)
24
  """
25
 
26
  import os
27
  import re
28
- import math
29
  import tempfile
30
  from pathlib import Path
31
 
32
  import gradio as gr
33
  import numpy as np
34
  import pandas as pd
35
- from huggingface_hub import InferenceClient
36
-
37
- # Plot
38
  import plotly.graph_objects as go
 
39
 
40
  # Word report
41
  from docx import Document
42
  from docx.shared import Inches
43
 
44
- # Pie chart opsional (butuh kaleido)
45
  import plotly.express as px
46
  try:
47
  import kaleido # noqa: F401
@@ -53,7 +51,7 @@ except Exception:
53
  # ============================================================
54
  # 1) KONFIGURASI FILE
55
  # ============================================================
56
- DATA_FILE = "DM_001.xlsx" # data sampel masuk (multi-sheet)
57
  META_KAB_FILE = "jumlahdesa_fixed (1).xlsx" # kecamatan & desa/kel per kab/kota
58
  META_SDSMP_FILE = "SD-SMP-kab.xlsx" # jumlah SD & SMP per kab/kota
59
  META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
@@ -61,7 +59,7 @@ META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
61
  # ============================================================
62
  # 1a) TARGET CAKUPAN SAMPEL (KEBIJAKAN)
63
  # ============================================================
64
- TARGET_COVERAGE = 0.68 # 68% dari populasi
65
 
66
  # ============================================================
67
  # 1b) KONFIGURASI LLM (Hugging Face Inference)
@@ -70,7 +68,7 @@ USE_LLM = True
70
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
71
 
72
  HF_TOKEN = (
73
- os.getenv("HF_TOKEN")
74
  or os.getenv("HUGGINGFACEHUB_API_TOKEN")
75
  or os.getenv("HF_API_TOKEN")
76
  )
@@ -168,13 +166,6 @@ def norm_kab_label(s):
168
  t = " ".join(t.split())
169
  return re.sub(r"[^A-Z0-9]+", "", t)
170
 
171
- def safe_pct(num, den):
172
- if den is None or pd.isna(den) or den <= 0:
173
- return np.nan
174
- if num is None or pd.isna(num):
175
- num = 0
176
- return 100.0 * float(num) / float(den)
177
-
178
  def _infer_jenjang_sd_smp(x):
179
  if pd.isna(x):
180
  return "OTHER"
@@ -223,6 +214,7 @@ jenis_col_glob = None
223
  subjenis_col_glob = None
224
  nama_col_glob = None
225
 
 
226
  try:
227
  fp = Path(DATA_FILE)
228
  if not fp.exists():
@@ -244,7 +236,6 @@ try:
244
  else:
245
  df_all_raw["KEW_NORM"] = None
246
 
247
- # jenis perpustakaan -> dataset {sekolah/umum/khusus}
248
  val_map_jenis = {
249
  "PERPUSTAKAAN SEKOLAH": "sekolah",
250
  "SEKOLAH": "sekolah",
@@ -266,7 +257,7 @@ except Exception as e:
266
 
267
  extra_info = []
268
 
269
- # --- META kab: kec + desa/kel ---
270
  try:
271
  meta_kab_raw = pd.read_excel(META_KAB_FILE)
272
  col_kab = pick_col(meta_kab_raw, ["Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"])
@@ -288,7 +279,7 @@ except Exception as e:
288
  meta_kab_df = None
289
  extra_info.append(f"⚠️ Gagal memuat `{META_KAB_FILE}` ({e})")
290
 
291
- # --- META SD/SMP per kab/kota ---
292
  try:
293
  sd_smp_raw = pd.read_excel(META_SDSMP_FILE)
294
  col_kab2 = pick_col(sd_smp_raw, [
@@ -323,7 +314,7 @@ try:
323
  except Exception as e:
324
  extra_info.append(f"⚠️ Gagal memuat `{META_SDSMP_FILE}` ({e})")
325
 
326
- # --- META SMA per provinsi ---
327
  try:
328
  meta_sma_raw = pd.read_excel(META_SMA_FILE)
329
  col_prov_sma = pick_col(meta_sma_raw, [
@@ -360,7 +351,7 @@ if extra_info:
360
 
361
 
362
  # ============================================================
363
- # 4) PILIHAN DROPDOWN
364
  # ============================================================
365
  def all_prov_choices():
366
  if df_all_raw is None or prov_col_glob is None:
@@ -394,7 +385,7 @@ default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if k
394
 
395
 
396
  # ============================================================
397
- # 5) INTI: HITUNG COVERAGE & GAP (TARGET 68%)
398
  # ============================================================
399
  def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
400
  if df_filtered is None or len(df_filtered) == 0:
@@ -402,7 +393,7 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
402
 
403
  kew_norm = str(kew_value or "").upper()
404
 
405
- # ================= KAB/KOTA =================
406
  if ("KAB" in kew_norm or "KOTA" in kew_norm):
407
  if kab_col_glob is None or meta_kab_df is None:
408
  return pd.DataFrame({"Info": ["Kolom kab/kota atau meta kab tidak tersedia."]})
@@ -415,22 +406,20 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
415
  tmp["kab_key"] = tmp[kab_col_glob].apply(norm_kab_label)
416
 
417
  # total sampel per kab
418
- g_total = tmp.groupby("kab_key").size().rename("Sampel_Total").reset_index()
419
 
420
- # sekolah & jenjang
421
  if subjenis_col_glob and subjenis_col_glob in tmp.columns:
422
  tmp["jenjang"] = tmp[subjenis_col_glob].apply(_infer_jenjang_sd_smp)
423
  else:
424
  tmp["jenjang"] = "OTHER"
425
 
426
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
427
- g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel_Sekolah").reset_index()
428
- g_sd = tmp_sek[tmp_sek["jenjang"] == "SD"].groupby("kab_key").size().rename("Sampel_SD").reset_index()
429
- g_smp = tmp_sek[tmp_sek["jenjang"] == "SMP"].groupby("kab_key").size().rename("Sampel_SMP").reset_index()
430
 
431
  # umum
432
  tmp_umum = tmp[tmp["_dataset"] == "umum"].copy() if "_dataset" in tmp.columns else tmp.copy()
433
- g_umum = tmp_umum.groupby("kab_key").size().rename("Sampel_Umum").reset_index()
434
 
435
  use_cols = ["kab_key", "Kab_Kota_Label", "Jml_Kecamatan", "Jml_DesaKel", "Jml_SD", "Jml_SMP"]
436
  use_cols = [c for c in use_cols if c in meta_kab_df.columns]
@@ -438,61 +427,49 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
438
  merged = (
439
  g_total
440
  .merge(g_sek_total, on="kab_key", how="left")
441
- .merge(g_sd, on="kab_key", how="left")
442
- .merge(g_smp, on="kab_key", how="left")
443
  .merge(g_umum, on="kab_key", how="left")
444
  .merge(meta_kab_df[use_cols], on="kab_key", how="left")
445
  )
446
 
447
- for c in ["Sampel_Total", "Sampel_Sekolah", "Sampel_SD", "Sampel_SMP", "Sampel_Umum"]:
448
  if c in merged.columns:
449
  merged[c] = merged[c].fillna(0).astype(int)
450
 
451
- merged["Pop_SD_SMP"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
452
- merged["Pop_Kec_DesaKel"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
453
 
454
  # TARGET 68%
455
- merged["Target_Sekolah_68"] = np.ceil(merged["Pop_SD_SMP"] * TARGET_COVERAGE)
456
- merged["Target_Umum_68"] = np.ceil(merged["Pop_Kec_DesaKel"] * TARGET_COVERAGE)
457
-
458
- # Coverage terhadap target (100% = target terpenuhi)
459
- merged["Progress_Sekolah_%"] = merged.apply(
460
- lambda r: safe_pct(r["Sampel_Sekolah"], r.get("Target_Sekolah_68", np.nan)), axis=1
461
- )
462
- merged["Progress_Umum_%"] = merged.apply(
463
- lambda r: safe_pct(r["Sampel_Umum"], r.get("Target_Umum_68", np.nan)), axis=1
464
- )
465
 
466
- # Kekurangan terhadap target 68%
467
- merged["Kekurangan_Sekolah"] = merged.apply(
468
- lambda r: max(int(r["Target_Sekolah_68"] - r["Sampel_Sekolah"]) if pd.notna(r["Target_Sekolah_68"]) else 0, 0),
469
  axis=1
470
  )
471
- merged["Kekurangan_Umum"] = merged.apply(
472
- lambda r: max(int(r["Target_Umum_68"] - r["Sampel_Umum"]) if pd.notna(r["Target_Umum_68"]) else 0, 0),
473
  axis=1
474
  )
475
 
476
  out = pd.DataFrame({
477
  "Kab/Kota": merged.get("Kab_Kota_Label", merged["kab_key"]),
478
- "Sampel Total": merged["Sampel_Total"],
479
-
480
- "Sampel Sekolah": merged["Sampel_Sekolah"],
481
- "Populasi Sekolah (SD+SMP)": merged["Pop_SD_SMP"],
482
- "Target Sekolah (68%)": merged["Target_Sekolah_68"],
483
- "Progress Sekolah (% dari target)": merged["Progress_Sekolah_%"],
484
- "Kekurangan Sampel Sekolah": merged["Kekurangan_Sekolah"],
485
-
486
- "Sampel Umum": merged["Sampel_Umum"],
487
- "Populasi Admin (Kec+Desa/Kel)": merged["Pop_Kec_DesaKel"],
488
- "Target Umum (68%)": merged["Target_Umum_68"],
489
- "Progress Umum (% dari target)": merged["Progress_Umum_%"],
490
- "Kekurangan Sampel Umum": merged["Kekurangan_Umum"],
491
  })
492
 
493
- return out.sort_values("Kab/Kota").reset_index(drop=True).round(3)
494
 
495
- # ================= PROVINSI =================
496
  if ("PROV" in kew_norm):
497
  if meta_sma_df is None:
498
  return pd.DataFrame({"Info": ["Meta SMA tidak tersedia."]})
@@ -506,11 +483,11 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
506
 
507
  tmp["prov_key"] = tmp[prov_col_glob].apply(norm_prov_label)
508
 
509
- # START dari sampel (biar tidak munculin provinsi lain)
510
- g_total = tmp.groupby("prov_key").size().rename("Sampel_Total").reset_index()
511
 
512
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
513
- g_sma = tmp_sek.groupby("prov_key").size().rename("Sampel_SMA").reset_index()
514
 
515
  merged = (
516
  g_total
@@ -518,111 +495,108 @@ def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.Da
518
  .merge(meta_sma_df[["prov_key", "Provinsi_Label", "Jml_SMA"]], on="prov_key", how="left")
519
  )
520
 
521
- merged["Sampel_SMA"] = merged["Sampel_SMA"].fillna(0).astype(int)
522
 
523
- # TARGET 68%
524
- merged["Target_SMA_68"] = np.ceil(merged["Jml_SMA"] * TARGET_COVERAGE)
525
-
526
- merged["Progress_SMA_%"] = merged.apply(
527
- lambda r: safe_pct(r["Sampel_SMA"], r.get("Target_SMA_68", np.nan)), axis=1
528
- )
529
 
530
- merged["Kekurangan_SMA"] = merged.apply(
531
- lambda r: max(int(r["Target_SMA_68"] - r["Sampel_SMA"]) if pd.notna(r["Target_SMA_68"]) else 0, 0),
532
  axis=1
533
  )
534
 
535
  out = pd.DataFrame({
536
  "Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]),
537
- "Sampel Total (Prov)": merged["Sampel_Total"].fillna(0).astype(int),
538
 
539
- "Sampel SMA (DM)": merged["Sampel_SMA"],
540
- "Populasi SMA (Meta)": merged["Jml_SMA"],
541
- "Target SMA (68%)": merged["Target_SMA_68"],
542
- "Progress SMA (% dari target)": merged["Progress_SMA_%"],
543
- "Kekurangan Sampel SMA": merged["Kekurangan_SMA"],
544
  })
545
 
546
- return out.sort_values("Provinsi").reset_index(drop=True).round(3)
547
 
548
  return pd.DataFrame({"Info": ["Kewenangan tidak dikenali / tidak didukung."]})
549
 
550
 
551
  # ============================================================
552
- # 6) GRAFIK PROGRESS (Plotly)
553
  # ============================================================
554
- def make_progress_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
555
  fig = go.Figure()
556
 
557
  if verif_df is None or verif_df.empty:
558
  fig.update_layout(
559
- title="Progress Pengumpulan (tidak ada data)",
560
  xaxis_title="Unit",
561
- yaxis_title="% dari target 68%",
562
  )
563
  return fig
564
 
565
  kew_norm = str(kew_value or "").upper()
566
 
567
- # helper: cap 0..120 untuk tampilan (biar gak liar kalau >100)
568
- def _cap(s):
569
- x = pd.to_numeric(s, errors="coerce")
570
- x = x.clip(lower=0, upper=120)
571
- return x
572
 
 
573
  if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
574
- x = verif_df["Kab/Kota"].astype(str).tolist()
 
 
575
 
576
- p_sek = _cap(verif_df.get("Progress Sekolah (% dari target)", pd.Series([np.nan]*len(verif_df))))
577
- p_umum = _cap(verif_df.get("Progress Umum (% dari target)", pd.Series([np.nan]*len(verif_df))))
 
578
 
579
  fig.add_trace(go.Bar(
580
- x=x, y=p_sek, name="Sekolah (SD+SMP) — % dari target",
581
- hovertemplate="%{x}<br>%{y:.1f}%<extra></extra>"
 
582
  ))
583
  fig.add_trace(go.Bar(
584
- x=x, y=p_umum, name="Umum (Kec+Desa/Kel) — % dari target",
585
- hovertemplate="%{x}<br>%{y:.1f}%<extra></extra>"
 
586
  ))
587
 
588
- fig.add_hline(y=100, line_dash="dash")
589
-
590
  fig.update_layout(
591
- title="Progress Pengumpulan Data (KAB/KOTA) — terhadap Target 68%",
592
  barmode="group",
593
  xaxis_title="Kab/Kota",
594
- yaxis_title="% dari target (100% = target tercapai)",
595
- margin=dict(l=40, r=20, t=60, b=120),
596
  )
597
  fig.update_xaxes(tickangle=-35)
598
-
599
  return fig
600
 
601
  if ("PROV" in kew_norm) and ("Provinsi" in verif_df.columns):
602
- x = verif_df["Provinsi"].astype(str).tolist()
603
- p_sma = _cap(verif_df.get("Progress SMA (% dari target)", pd.Series([np.nan]*len(verif_df))))
 
 
 
 
604
 
605
  fig.add_trace(go.Bar(
606
- x=x, y=p_sma, name="SMA — % dari target",
607
- hovertemplate="%{x}<br>%{y:.1f}%<extra></extra>"
 
608
  ))
609
 
610
- fig.add_hline(y=100, line_dash="dash")
611
-
612
  fig.update_layout(
613
- title="Progress Pengumpulan Data (PROVINSI) — SMA terhadap Target 68%",
614
  xaxis_title="Provinsi",
615
- yaxis_title="% dari target (100% = target tercapai)",
616
- margin=dict(l=40, r=20, t=60, b=120),
617
  )
618
  fig.update_xaxes(tickangle=-35)
619
-
620
  return fig
621
 
622
  fig.update_layout(
623
- title="Progress Pengumpulan — format data tidak dikenali",
624
  xaxis_title="Unit",
625
- yaxis_title="% dari target 68%",
626
  )
627
  return fig
628
 
@@ -636,14 +610,13 @@ def build_context_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: str) ->
636
  lines.append(f"Wilayah filter: {wilayah}")
637
  lines.append(f"Kewenangan: {kew}")
638
  lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
639
- lines.append(f"Jumlah baris verifikasi: {len(verif_df)}")
640
 
641
  gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
642
  for gc in gap_cols:
643
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
644
  lines.append(f"Total {gc}: {total_gap}")
645
 
646
- # top prioritas
647
  if gap_cols:
648
  gc = gap_cols[0]
649
  t = verif_df.copy()
@@ -710,7 +683,7 @@ TULIS LAPORAN (BAHASA INDONESIA FORMAL) DENGAN STRUKTUR:
710
 
711
  BATASAN:
712
  - Jangan bahas indeks / skor IPLM sama sekali.
713
- - Fokus murni pada progress terhadap target 68%, kekurangan sampel, dan strategi pelengkapannya.
714
  """
715
 
716
  try:
@@ -748,7 +721,7 @@ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: s
748
  doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
749
  doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
750
 
751
- doc.add_heading("Tabel Verifikasi Coverage & Kekurangan Sampel", level=2)
752
  view = verif_df.copy()
753
  if len(view) > 200:
754
  doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama) untuk menjaga ukuran dokumen.")
@@ -770,29 +743,26 @@ def generate_word_report_gap(verif_df: pd.DataFrame, prov: str, kab: str, kew: s
770
  else:
771
  pie_made = False
772
 
773
- # KAB/KOTA: sekolah
774
  if "Sampel Sekolah" in verif_df.columns and "Target Sekolah (68%)" in verif_df.columns:
775
  samp = pd.to_numeric(verif_df["Sampel Sekolah"], errors="coerce").fillna(0).sum()
776
  tgt = pd.to_numeric(verif_df["Target Sekolah (68%)"], errors="coerce").fillna(0).sum()
777
- img = make_pie_plotly(samp, tgt, "Progress Sekolah (Total) terhadap Target 68%")
778
  if img:
779
  doc.add_picture(img, width=Inches(5))
780
  pie_made = True
781
 
782
- # KAB/KOTA: umum
783
  if (not pie_made) and ("Sampel Umum" in verif_df.columns and "Target Umum (68%)" in verif_df.columns):
784
  samp = pd.to_numeric(verif_df["Sampel Umum"], errors="coerce").fillna(0).sum()
785
  tgt = pd.to_numeric(verif_df["Target Umum (68%)"], errors="coerce").fillna(0).sum()
786
- img = make_pie_plotly(samp, tgt, "Progress Umum (Total) terhadap Target 68%")
787
  if img:
788
  doc.add_picture(img, width=Inches(5))
789
  pie_made = True
790
 
791
- # PROVINSI: SMA
792
  if (not pie_made) and ("Sampel SMA (DM)" in verif_df.columns and "Target SMA (68%)" in verif_df.columns):
793
  samp = pd.to_numeric(verif_df["Sampel SMA (DM)"], errors="coerce").fillna(0).sum()
794
  tgt = pd.to_numeric(verif_df["Target SMA (68%)"], errors="coerce").fillna(0).sum()
795
- img = make_pie_plotly(samp, tgt, "Progress SMA (Total) terhadap Target 68%")
796
  if img:
797
  doc.add_picture(img, width=Inches(5))
798
  pie_made = True
@@ -848,15 +818,15 @@ def run_core(prov_value, kab_value, kew_value):
848
 
849
  verif_df = compute_gap_verification(df, kew_value)
850
 
851
- # detail subset untuk UI (ringkas)
852
  cols = []
853
  for c in [prov_col_glob, kab_col_glob, nama_col_glob, kew_col_glob, jenis_col_glob, subjenis_col_glob, "_dataset", "KEW_NORM"]:
854
  if c and c in df.columns and c not in cols:
855
  cols.append(c)
856
  detail_df = df[cols].copy() if cols else df.copy()
857
 
858
- # grafik progress
859
- fig_progress = make_progress_figure(verif_df, kew_value)
860
 
861
  # simpan file download
862
  tmpdir = tempfile.mkdtemp()
@@ -880,7 +850,7 @@ def run_core(prov_value, kab_value, kew_value):
880
  return (
881
  verif_df,
882
  detail_df,
883
- fig_progress,
884
  rekap_excel_path,
885
  raw_dm_path,
886
  word_path,
@@ -900,8 +870,7 @@ with gr.Blocks() as demo:
900
  f"""
901
  # Dashboard Kekurangan Sampel IPLM (Tanpa Hitung Indeks) — Target {int(TARGET_COVERAGE*100)}%
902
 
903
- Aplikasi ini hanya mengecek **kekurangan sampel** berdasarkan:
904
- - **DM (sampel masuk)** vs **Target 68% dari populasi unit (META)**
905
 
906
  **File:**
907
  - `{DATA_FILE}` (DM)
@@ -923,11 +892,11 @@ Aplikasi ini hanya mengecek **kekurangan sampel** berdasarkan:
923
  run_btn = gr.Button("Hitung Kekurangan Sampel")
924
  msg_out = gr.Markdown()
925
 
926
- gr.Markdown("### Verifikasi Coverage & Kekurangan Sampel (Target 68%)")
927
  verif_out = gr.DataFrame(interactive=False)
928
 
929
- gr.Markdown("### Grafik Progress (% dari target 100% = target tercapai)")
930
- progress_out = gr.Plot()
931
 
932
  gr.Markdown("### Detail Subset DM (yang terfilter)")
933
  detail_out = gr.DataFrame(interactive=False)
@@ -946,7 +915,7 @@ Aplikasi ini hanya mengecek **kekurangan sampel** berdasarkan:
946
  outputs=[
947
  verif_out,
948
  detail_out,
949
- progress_out,
950
  rekap_excel_out,
951
  raw_dm_out,
952
  word_out,
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ app.py — Dashboard Kekurangan Sampel IPLM (TANPA HITUNG INDEKS) + Grafik GAP (Bukan Persen)
4
 
5
  Fokus:
6
+ - Target pengumpulan = 68% dari populasi unit (meta), BUKAN 100%
7
+ - Output utama: "Kekurangan sampel" = berapa unit lagi yang harus dikumpulkan
8
+
9
+ Pembanding:
10
+ - KAB/KOTA:
11
+ * Sekolah: target = 68% dari (SD + SMP)
12
+ * Umum: target = 68% dari (Kecamatan + Desa/Kelurahan)
13
+ - PROVINSI:
14
+ * SMA: target = 68% dari (Total SMA)
15
 
16
  Fitur:
17
  - Filter: Provinsi, Kab/Kota, Kewenangan
18
+ - Tabel Verifikasi (target 68% + kekurangan)
19
+ - Detail subset DM (ringkas)
20
+ - Grafik GAP (kekurangan unit) per wilayah
21
  - Download:
22
+ 1) Rekap (Verifikasi + Detail ringkas) .xlsx
23
+ 2) Data mentah subset DM sesuai filter .xlsx
24
+ 3) Laporan Word (.docx) + narasi LLM soal kekurangan sampel
25
  """
26
 
27
  import os
28
  import re
 
29
  import tempfile
30
  from pathlib import Path
31
 
32
  import gradio as gr
33
  import numpy as np
34
  import pandas as pd
 
 
 
35
  import plotly.graph_objects as go
36
+ from huggingface_hub import InferenceClient
37
 
38
  # Word report
39
  from docx import Document
40
  from docx.shared import Inches
41
 
42
+ # Pie opsional (butuh kaleido)
43
  import plotly.express as px
44
  try:
45
  import kaleido # noqa: F401
 
51
  # ============================================================
52
  # 1) KONFIGURASI FILE
53
  # ============================================================
54
+ DATA_FILE = "IPLM_clean_Manual.xlsx" # data sampel masuk (multi-sheet)
55
  META_KAB_FILE = "jumlahdesa_fixed (1).xlsx" # kecamatan & desa/kel per kab/kota
56
  META_SDSMP_FILE = "SD-SMP-kab.xlsx" # jumlah SD & SMP per kab/kota
57
  META_SMA_FILE = "SMA.xlsx" # jumlah SMA per provinsi
 
59
  # ============================================================
60
  # 1a) TARGET CAKUPAN SAMPEL (KEBIJAKAN)
61
  # ============================================================
62
+ TARGET_COVERAGE = 0.68 # 68% dari populasi
63
 
64
  # ============================================================
65
  # 1b) KONFIGURASI LLM (Hugging Face Inference)
 
68
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
69
 
70
  HF_TOKEN = (
71
+ os.getenv("HF_SECRET")
72
  or os.getenv("HUGGINGFACEHUB_API_TOKEN")
73
  or os.getenv("HF_API_TOKEN")
74
  )
 
166
  t = " ".join(t.split())
167
  return re.sub(r"[^A-Z0-9]+", "", t)
168
 
 
 
 
 
 
 
 
169
  def _infer_jenjang_sd_smp(x):
170
  if pd.isna(x):
171
  return "OTHER"
 
214
  subjenis_col_glob = None
215
  nama_col_glob = None
216
 
217
+ # ---- Load DM ----
218
  try:
219
  fp = Path(DATA_FILE)
220
  if not fp.exists():
 
236
  else:
237
  df_all_raw["KEW_NORM"] = None
238
 
 
239
  val_map_jenis = {
240
  "PERPUSTAKAAN SEKOLAH": "sekolah",
241
  "SEKOLAH": "sekolah",
 
257
 
258
  extra_info = []
259
 
260
+ # ---- Meta Kab (Kec/Desa) ----
261
  try:
262
  meta_kab_raw = pd.read_excel(META_KAB_FILE)
263
  col_kab = pick_col(meta_kab_raw, ["Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"])
 
279
  meta_kab_df = None
280
  extra_info.append(f"⚠️ Gagal memuat `{META_KAB_FILE}` ({e})")
281
 
282
+ # ---- Meta SD/SMP ----
283
  try:
284
  sd_smp_raw = pd.read_excel(META_SDSMP_FILE)
285
  col_kab2 = pick_col(sd_smp_raw, [
 
314
  except Exception as e:
315
  extra_info.append(f"⚠️ Gagal memuat `{META_SDSMP_FILE}` ({e})")
316
 
317
+ # ---- Meta SMA ----
318
  try:
319
  meta_sma_raw = pd.read_excel(META_SMA_FILE)
320
  col_prov_sma = pick_col(meta_sma_raw, [
 
351
 
352
 
353
  # ============================================================
354
+ # 4) DROPDOWN
355
  # ============================================================
356
  def all_prov_choices():
357
  if df_all_raw is None or prov_col_glob is None:
 
385
 
386
 
387
  # ============================================================
388
+ # 5) VERIFIKASI GAP (TARGET 68%)
389
  # ============================================================
390
  def compute_gap_verification(df_filtered: pd.DataFrame, kew_value: str) -> pd.DataFrame:
391
  if df_filtered is None or len(df_filtered) == 0:
 
393
 
394
  kew_norm = str(kew_value or "").upper()
395
 
396
+ # ================ KAB/KOTA ================
397
  if ("KAB" in kew_norm or "KOTA" in kew_norm):
398
  if kab_col_glob is None or meta_kab_df is None:
399
  return pd.DataFrame({"Info": ["Kolom kab/kota atau meta kab tidak tersedia."]})
 
406
  tmp["kab_key"] = tmp[kab_col_glob].apply(norm_kab_label)
407
 
408
  # total sampel per kab
409
+ g_total = tmp.groupby("kab_key").size().rename("Sampel Total").reset_index()
410
 
411
+ # sekolah & jenjang (opsional)
412
  if subjenis_col_glob and subjenis_col_glob in tmp.columns:
413
  tmp["jenjang"] = tmp[subjenis_col_glob].apply(_infer_jenjang_sd_smp)
414
  else:
415
  tmp["jenjang"] = "OTHER"
416
 
417
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
418
+ g_sek_total = tmp_sek.groupby("kab_key").size().rename("Sampel Sekolah").reset_index()
 
 
419
 
420
  # umum
421
  tmp_umum = tmp[tmp["_dataset"] == "umum"].copy() if "_dataset" in tmp.columns else tmp.copy()
422
+ g_umum = tmp_umum.groupby("kab_key").size().rename("Sampel Umum").reset_index()
423
 
424
  use_cols = ["kab_key", "Kab_Kota_Label", "Jml_Kecamatan", "Jml_DesaKel", "Jml_SD", "Jml_SMP"]
425
  use_cols = [c for c in use_cols if c in meta_kab_df.columns]
 
427
  merged = (
428
  g_total
429
  .merge(g_sek_total, on="kab_key", how="left")
 
 
430
  .merge(g_umum, on="kab_key", how="left")
431
  .merge(meta_kab_df[use_cols], on="kab_key", how="left")
432
  )
433
 
434
+ for c in ["Sampel Total", "Sampel Sekolah", "Sampel Umum"]:
435
  if c in merged.columns:
436
  merged[c] = merged[c].fillna(0).astype(int)
437
 
438
+ merged["Populasi Sekolah (SD+SMP)"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
439
+ merged["Populasi Admin (Kec+Desa/Kel)"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
440
 
441
  # TARGET 68%
442
+ merged["Target Sekolah (68%)"] = np.ceil(merged["Populasi Sekolah (SD+SMP)"] * TARGET_COVERAGE)
443
+ merged["Target Umum (68%)"] = np.ceil(merged["Populasi Admin (Kec+Desa/Kel)"] * TARGET_COVERAGE)
 
 
 
 
 
 
 
 
444
 
445
+ # GAP: berapa yang harus dikumpulkan lagi
446
+ merged["Kekurangan Sampel Sekolah"] = merged.apply(
447
+ lambda r: max(int(r["Target Sekolah (68%)"] - r["Sampel Sekolah"]) if pd.notna(r["Target Sekolah (68%)"]) else 0, 0),
448
  axis=1
449
  )
450
+ merged["Kekurangan Sampel Umum"] = merged.apply(
451
+ lambda r: max(int(r["Target Umum (68%)"] - r["Sampel Umum"]) if pd.notna(r["Target Umum (68%)"]) else 0, 0),
452
  axis=1
453
  )
454
 
455
  out = pd.DataFrame({
456
  "Kab/Kota": merged.get("Kab_Kota_Label", merged["kab_key"]),
457
+ "Sampel Total": merged["Sampel Total"],
458
+
459
+ "Sampel Sekolah": merged["Sampel Sekolah"],
460
+ "Populasi Sekolah (SD+SMP)": merged["Populasi Sekolah (SD+SMP)"],
461
+ "Target Sekolah (68%)": merged["Target Sekolah (68%)"],
462
+ "Kekurangan Sampel Sekolah": merged["Kekurangan Sampel Sekolah"],
463
+
464
+ "Sampel Umum": merged["Sampel Umum"],
465
+ "Populasi Admin (Kec+Desa/Kel)": merged["Populasi Admin (Kec+Desa/Kel)"],
466
+ "Target Umum (68%)": merged["Target Umum (68%)"],
467
+ "Kekurangan Sampel Umum": merged["Kekurangan Sampel Umum"],
 
 
468
  })
469
 
470
+ return out.sort_values("Kab/Kota").reset_index(drop=True).round(0)
471
 
472
+ # ================ PROVINSI ================
473
  if ("PROV" in kew_norm):
474
  if meta_sma_df is None:
475
  return pd.DataFrame({"Info": ["Meta SMA tidak tersedia."]})
 
483
 
484
  tmp["prov_key"] = tmp[prov_col_glob].apply(norm_prov_label)
485
 
486
+ # start dari sampel (biar tidak bocor prov lain)
487
+ g_total = tmp.groupby("prov_key").size().rename("Sampel Total (Prov)").reset_index()
488
 
489
  tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy() if "_dataset" in tmp.columns else tmp.copy()
490
+ g_sma = tmp_sek.groupby("prov_key").size().rename("Sampel SMA (DM)").reset_index()
491
 
492
  merged = (
493
  g_total
 
495
  .merge(meta_sma_df[["prov_key", "Provinsi_Label", "Jml_SMA"]], on="prov_key", how="left")
496
  )
497
 
498
+ merged["Sampel SMA (DM)"] = merged["Sampel SMA (DM)"].fillna(0).astype(int)
499
 
500
+ merged["Populasi SMA (Meta)"] = merged["Jml_SMA"]
501
+ merged["Target SMA (68%)"] = np.ceil(merged["Populasi SMA (Meta)"] * TARGET_COVERAGE)
 
 
 
 
502
 
503
+ merged["Kekurangan Sampel SMA"] = merged.apply(
504
+ lambda r: max(int(r["Target SMA (68%)"] - r["Sampel SMA (DM)"]) if pd.notna(r["Target SMA (68%)"]) else 0, 0),
505
  axis=1
506
  )
507
 
508
  out = pd.DataFrame({
509
  "Provinsi": merged["Provinsi_Label"].fillna(merged["prov_key"]),
510
+ "Sampel Total (Prov)": merged["Sampel Total (Prov)"].fillna(0).astype(int),
511
 
512
+ "Sampel SMA (DM)": merged["Sampel SMA (DM)"],
513
+ "Populasi SMA (Meta)": merged["Populasi SMA (Meta)"],
514
+ "Target SMA (68%)": merged["Target SMA (68%)"],
515
+ "Kekurangan Sampel SMA": merged["Kekurangan Sampel SMA"],
 
516
  })
517
 
518
+ return out.sort_values("Provinsi").reset_index(drop=True).round(0)
519
 
520
  return pd.DataFrame({"Info": ["Kewenangan tidak dikenali / tidak didukung."]})
521
 
522
 
523
  # ============================================================
524
+ # 6) GRAFIK GAP (KURANGAN YANG HARUS DIKUMPULIN)
525
  # ============================================================
526
+ def make_gap_figure(verif_df: pd.DataFrame, kew_value: str) -> go.Figure:
527
  fig = go.Figure()
528
 
529
  if verif_df is None or verif_df.empty:
530
  fig.update_layout(
531
+ title="Kekurangan Sampel (tidak ada data)",
532
  xaxis_title="Unit",
533
+ yaxis_title="Kekurangan (unit)",
534
  )
535
  return fig
536
 
537
  kew_norm = str(kew_value or "").upper()
538
 
539
+ def _num(s):
540
+ return pd.to_numeric(s, errors="coerce").fillna(0).astype(int)
 
 
 
541
 
542
+ # sort by total gap biar enak dilihat
543
  if ("KAB" in kew_norm or "KOTA" in kew_norm) and ("Kab/Kota" in verif_df.columns):
544
+ dfp = verif_df.copy()
545
+ dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel Sekolah", 0)) + _num(dfp.get("Kekurangan Sampel Umum", 0))
546
+ dfp = dfp.sort_values("gap_total", ascending=False)
547
 
548
+ x = dfp["Kab/Kota"].astype(str).tolist()
549
+ gap_sek = _num(dfp.get("Kekurangan Sampel Sekolah", 0))
550
+ gap_umum = _num(dfp.get("Kekurangan Sampel Umum", 0))
551
 
552
  fig.add_trace(go.Bar(
553
+ x=x, y=gap_sek, name="Kekurangan Sekolah (SD+SMP)",
554
+ text=gap_sek, textposition="outside",
555
+ hovertemplate="%{x}<br>Kekurangan sekolah: %{y} unit<extra></extra>"
556
  ))
557
  fig.add_trace(go.Bar(
558
+ x=x, y=gap_umum, name="Kekurangan Umum (Kec+Desa/Kel)",
559
+ text=gap_umum, textposition="outside",
560
+ hovertemplate="%{x}<br>Kekurangan umum: %{y} unit<extra></extra>"
561
  ))
562
 
 
 
563
  fig.update_layout(
564
+ title="Kekurangan Sampel yang Harus Dikumpulkan (KAB/KOTA) — Target 68%",
565
  barmode="group",
566
  xaxis_title="Kab/Kota",
567
+ yaxis_title="Kekurangan (unit)",
568
+ margin=dict(l=40, r=20, t=60, b=140),
569
  )
570
  fig.update_xaxes(tickangle=-35)
 
571
  return fig
572
 
573
  if ("PROV" in kew_norm) and ("Provinsi" in verif_df.columns):
574
+ dfp = verif_df.copy()
575
+ dfp["gap_total"] = _num(dfp.get("Kekurangan Sampel SMA", 0))
576
+ dfp = dfp.sort_values("gap_total", ascending=False)
577
+
578
+ x = dfp["Provinsi"].astype(str).tolist()
579
+ gap_sma = _num(dfp.get("Kekurangan Sampel SMA", 0))
580
 
581
  fig.add_trace(go.Bar(
582
+ x=x, y=gap_sma, name="Kekurangan SMA",
583
+ text=gap_sma, textposition="outside",
584
+ hovertemplate="%{x}<br>Kekurangan SMA: %{y} unit<extra></extra>"
585
  ))
586
 
 
 
587
  fig.update_layout(
588
+ title="Kekurangan Sampel yang Harus Dikumpulkan (PROVINSI) — SMA Target 68%",
589
  xaxis_title="Provinsi",
590
+ yaxis_title="Kekurangan (unit)",
591
+ margin=dict(l=40, r=20, t=60, b=140),
592
  )
593
  fig.update_xaxes(tickangle=-35)
 
594
  return fig
595
 
596
  fig.update_layout(
597
+ title="Kekurangan Sampel — format data tidak dikenali",
598
  xaxis_title="Unit",
599
+ yaxis_title="Kekurangan (unit)",
600
  )
601
  return fig
602
 
 
610
  lines.append(f"Wilayah filter: {wilayah}")
611
  lines.append(f"Kewenangan: {kew}")
612
  lines.append(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
613
+ lines.append(f"Jumlah unit analisis: {len(verif_df)}")
614
 
615
  gap_cols = [c for c in verif_df.columns if "Kekurangan Sampel" in c]
616
  for gc in gap_cols:
617
  total_gap = int(pd.to_numeric(verif_df[gc], errors="coerce").fillna(0).sum())
618
  lines.append(f"Total {gc}: {total_gap}")
619
 
 
620
  if gap_cols:
621
  gc = gap_cols[0]
622
  t = verif_df.copy()
 
683
 
684
  BATASAN:
685
  - Jangan bahas indeks / skor IPLM sama sekali.
686
+ - Fokus murni pada target 68%, kekurangan sampel, dan strategi pelengkapannya.
687
  """
688
 
689
  try:
 
721
  doc.add_paragraph(f"Target pengumpulan: {int(TARGET_COVERAGE*100)}% dari populasi unit (meta).")
722
  doc.add_paragraph(f"Jumlah unit analisis: {len(verif_df)}")
723
 
724
+ doc.add_heading("Tabel Verifikasi (Target 68% & Kekurangan Sampel)", level=2)
725
  view = verif_df.copy()
726
  if len(view) > 200:
727
  doc.add_paragraph("Catatan: tabel dipotong (200 baris pertama) untuk menjaga ukuran dokumen.")
 
743
  else:
744
  pie_made = False
745
 
 
746
  if "Sampel Sekolah" in verif_df.columns and "Target Sekolah (68%)" in verif_df.columns:
747
  samp = pd.to_numeric(verif_df["Sampel Sekolah"], errors="coerce").fillna(0).sum()
748
  tgt = pd.to_numeric(verif_df["Target Sekolah (68%)"], errors="coerce").fillna(0).sum()
749
+ img = make_pie_plotly(samp, tgt, "Capaian Sekolah (Total) terhadap Target 68%")
750
  if img:
751
  doc.add_picture(img, width=Inches(5))
752
  pie_made = True
753
 
 
754
  if (not pie_made) and ("Sampel Umum" in verif_df.columns and "Target Umum (68%)" in verif_df.columns):
755
  samp = pd.to_numeric(verif_df["Sampel Umum"], errors="coerce").fillna(0).sum()
756
  tgt = pd.to_numeric(verif_df["Target Umum (68%)"], errors="coerce").fillna(0).sum()
757
+ img = make_pie_plotly(samp, tgt, "Capaian Umum (Total) terhadap Target 68%")
758
  if img:
759
  doc.add_picture(img, width=Inches(5))
760
  pie_made = True
761
 
 
762
  if (not pie_made) and ("Sampel SMA (DM)" in verif_df.columns and "Target SMA (68%)" in verif_df.columns):
763
  samp = pd.to_numeric(verif_df["Sampel SMA (DM)"], errors="coerce").fillna(0).sum()
764
  tgt = pd.to_numeric(verif_df["Target SMA (68%)"], errors="coerce").fillna(0).sum()
765
+ img = make_pie_plotly(samp, tgt, "Capaian SMA (Total) terhadap Target 68%")
766
  if img:
767
  doc.add_picture(img, width=Inches(5))
768
  pie_made = True
 
818
 
819
  verif_df = compute_gap_verification(df, kew_value)
820
 
821
+ # detail subset DM untuk UI (ringkas)
822
  cols = []
823
  for c in [prov_col_glob, kab_col_glob, nama_col_glob, kew_col_glob, jenis_col_glob, subjenis_col_glob, "_dataset", "KEW_NORM"]:
824
  if c and c in df.columns and c not in cols:
825
  cols.append(c)
826
  detail_df = df[cols].copy() if cols else df.copy()
827
 
828
+ # grafik GAP
829
+ fig_gap = make_gap_figure(verif_df, kew_value)
830
 
831
  # simpan file download
832
  tmpdir = tempfile.mkdtemp()
 
850
  return (
851
  verif_df,
852
  detail_df,
853
+ fig_gap,
854
  rekap_excel_path,
855
  raw_dm_path,
856
  word_path,
 
870
  f"""
871
  # Dashboard Kekurangan Sampel IPLM (Tanpa Hitung Indeks) — Target {int(TARGET_COVERAGE*100)}%
872
 
873
+ Aplikasi ini mengecek **berapa unit lagi yang harus dikumpulkan** agar memenuhi target minimal representasi.
 
874
 
875
  **File:**
876
  - `{DATA_FILE}` (DM)
 
892
  run_btn = gr.Button("Hitung Kekurangan Sampel")
893
  msg_out = gr.Markdown()
894
 
895
+ gr.Markdown("### Verifikasi (Target 68% & Kekurangan Sampel)")
896
  verif_out = gr.DataFrame(interactive=False)
897
 
898
+ gr.Markdown("### Grafik Kekurangan Sampel (berapa unit lagi yang harus dikumpulkan)")
899
+ gap_plot_out = gr.Plot()
900
 
901
  gr.Markdown("### Detail Subset DM (yang terfilter)")
902
  detail_out = gr.DataFrame(interactive=False)
 
915
  outputs=[
916
  verif_out,
917
  detail_out,
918
+ gap_plot_out,
919
  rekap_excel_out,
920
  raw_dm_out,
921
  word_out,