irhamni commited on
Commit
bd6de49
Β·
verified Β·
1 Parent(s): 8c977d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +266 -290
app.py CHANGED
@@ -1,17 +1,17 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- app.py β€” IPLM 2025 (FULL)
4
  - Pipeline nasional: Yeo-Johnson + MinMax (sekali nasional)
5
- - RealScore + (FinalScore = RealScore * bobot_coverage_68)
6
- - Bobot coverage:
7
- * 68% = bobot 1.0
8
- * <68% = coverage/0.68
9
- * 0% = 0.0
10
  - Populasi resmi:
11
- * KAB/KOTA: Data_populasi_Kab_kota.xlsx
12
- * PROVINSI: Data_populasi_propinsi.xlsx
13
- - Verifikasi: coverage, gap menuju 68%, bobot
14
- - Export: agregat/detail/raw + Word report
 
 
 
15
  """
16
 
17
  import os
@@ -30,11 +30,11 @@ from sklearn.preprocessing import PowerTransformer
30
  # 1) KONFIGURASI FILE
31
  # ============================================================
32
 
33
- DATA_FILE = "IPLM_clean_manual_131225.xlsx"
34
- POP_KAB = "Data_populasi_Kab_kota.xlsx"
35
- POP_PROV = "Data_populasi_propinsi.xlsx"
36
 
37
- TARGET_COVERAGE = 0.68 # 68% = 100%
38
  W_KEPATUHAN = 0.30
39
  W_KINERJA = 0.70
40
 
@@ -61,10 +61,7 @@ def get_llm_client():
61
  _HF_CLIENT = None
62
  return None
63
  try:
64
- if HF_TOKEN:
65
- _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN)
66
- else:
67
- _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME)
68
  return _HF_CLIENT
69
  except Exception:
70
  _HF_CLIENT = None
@@ -77,6 +74,14 @@ def get_llm_client():
77
  def _canon(s: str) -> str:
78
  return re.sub(r"[^a-z0-9]+", "", str(s).lower())
79
 
 
 
 
 
 
 
 
 
80
  def coerce_num(val):
81
  if pd.isna(val):
82
  return np.nan
@@ -138,7 +143,7 @@ def norm_prov_label(s):
138
  if pd.isna(s):
139
  return None
140
  t = str(s).upper()
141
- for bad in ["PROVINSI", "PROPINSI", "PROVINS "]:
142
  t = t.replace(bad, "")
143
  t = " ".join(t.split())
144
  return re.sub(r"[^A-Z0-9]+", "", t)
@@ -177,7 +182,6 @@ def penalized_mean(row, cols):
177
  return float(np.mean(vals))
178
 
179
  def cap_bobot(cov: float) -> float:
180
- # 68% = 1.0 ; kurang -> proporsional; 0 -> 0
181
  if cov is None or pd.isna(cov) or cov <= 0:
182
  return 0.0
183
  return float(min(cov / TARGET_COVERAGE, 1.0))
@@ -188,7 +192,7 @@ def safe_div(num, den):
188
  return float(num) / float(den)
189
 
190
  # ============================================================
191
- # 3) DEFINISI INDIKATOR IPLM (REAL)
192
  # ============================================================
193
 
194
  koleksi_cols = [
@@ -213,7 +217,6 @@ pengelolaan_cols = [
213
  ]
214
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
215
 
216
- # DM alias -> kanonik
217
  alias_map_raw = {
218
  "j_judul_koleksi_tercetak": "JudulTercetak",
219
  "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
@@ -244,7 +247,7 @@ alias_map_raw = {
244
  alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
245
 
246
  # ============================================================
247
- # 4) LOAD DM + POPULASI (KAB & PROV)
248
  # ============================================================
249
 
250
  DATA_INFO = ""
@@ -268,7 +271,7 @@ try:
268
  kab_col = pick_col(df_all_raw, ["kab_kota", "Kab_Kota", "Kab/Kota", "KAB/KOTA", "kabupaten_kota", "kota"])
269
  kew_col = pick_col(df_all_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
270
  jenis_col = pick_col(df_all_raw, ["jenis_perpustakaan", "JENIS_PERPUSTAKAAN", "Jenis Perpustakaan", "jenis perpustakaan"])
271
- nama_col = pick_col(df_all_raw, ["nama_perpustakaan", "nm_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
272
 
273
  df_all_raw["KEW_NORM"] = df_all_raw[kew_col].apply(norm_kew) if kew_col else None
274
 
@@ -283,13 +286,24 @@ try:
283
  }
284
  df_all_raw["_dataset"] = df_all_raw[jenis_col].apply(_norm_text).map(val_map_jenis) if jenis_col else None
285
 
 
 
 
 
 
 
 
 
 
 
286
  DATA_INFO = f"βœ… DM terbaca: **{DATA_FILE}** | Baris: **{len(df_all_raw)}**"
287
  except Exception as e:
288
  df_all_raw = None
289
  DATA_INFO = f"⚠️ Gagal memuat DM: `{e}`"
290
 
291
- # ---- POPULASI KAB/KOTA ----
292
  POP_INFO = []
 
 
293
  try:
294
  pk = pd.read_excel(POP_KAB)
295
  c_prov = pick_col(pk, ["PROVINSI", "Provinsi"])
@@ -302,7 +316,7 @@ try:
302
  c_pop_sekolah = pick_col(pk, ["jumlah_populasi_sekolah"])
303
 
304
  if c_kab is None:
305
- raise ValueError("Kolom Kab/Kota tidak ditemukan di file populasi kab/kota.")
306
 
307
  df_pop_kab = pd.DataFrame({
308
  "Provinsi_Label": pk[c_prov].astype(str).str.strip() if c_prov else None,
@@ -316,7 +330,6 @@ try:
316
  })
317
  df_pop_kab["kab_key"] = df_pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
318
 
319
- # fallback populasi bila kolom total tidak ada / kosong
320
  if df_pop_kab["Pop_Umum"].isna().all():
321
  df_pop_kab["Pop_Umum"] = df_pop_kab[["Jml_Kecamatan","Jml_DesaKel"]].sum(axis=1, skipna=True)
322
  if df_pop_kab["Pop_Sekolah"].isna().all():
@@ -327,16 +340,17 @@ except Exception as e:
327
  df_pop_kab = None
328
  POP_INFO.append(f"⚠️ Gagal memuat populasi Kab/Kota: `{e}`")
329
 
330
- # ---- POPULASI PROVINSI ----
331
  try:
332
  pp = pd.read_excel(POP_PROV)
333
  c_prov = pick_col(pp, ["Provinsi", "PROVINSI"])
334
  c_total_pend = pick_col(pp, ["total_pend", "TOTAL_PEND", "total pend"])
335
- c_sma = pick_col(pp, ["sma", "sma "]) # ada spasi di file
 
336
  if c_prov is None:
337
- raise ValueError("Kolom Provinsi tidak ditemukan di file populasi provinsi.")
338
  if c_total_pend is None and c_sma is None:
339
- raise ValueError("Kolom total_pend / sma tidak ditemukan di file populasi provinsi.")
340
 
341
  df_pop_prov = pd.DataFrame({
342
  "Provinsi_Label": pp[c_prov].astype(str).str.strip(),
@@ -357,7 +371,7 @@ if POP_INFO:
357
  DATA_INFO = DATA_INFO + "<br>" + "<br>".join(POP_INFO)
358
 
359
  # ============================================================
360
- # 5) PIPELINE NASIONAL: REALSCORE (YJ + MINMAX)
361
  # ============================================================
362
 
363
  def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
@@ -381,7 +395,6 @@ def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
381
  if rename_map:
382
  df = df.rename(columns=rename_map)
383
 
384
- # numeric coercion
385
  available = [c for c in all_indicators if c in df.columns]
386
  for c in available:
387
  df[c] = df[c].apply(coerce_num)
@@ -410,7 +423,6 @@ def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
410
 
411
  df["Indeks_Real_0_100"] = 100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"])
412
 
413
- # paksa tidak NaN
414
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Real_0_100"]:
415
  df[c] = df[c].fillna(0.0)
416
 
@@ -419,15 +431,10 @@ def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
419
  df_all_ipml = prepare_global_iplm(df_all_raw) if df_all_raw is not None else None
420
 
421
  # ============================================================
422
- # 6) HITUNG COVERAGE + BOBOT (68%) + FINAL SCORE
423
  # ============================================================
424
 
425
  def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
426
- """
427
- Return:
428
- - df_out: df_filtered + bobot_coverage + Indeks_Final
429
- - verif_df: tabel verifikasi coverage, gap menuju 68%
430
- """
431
  if df_filtered is None or df_filtered.empty:
432
  return df_filtered, pd.DataFrame()
433
 
@@ -436,14 +443,12 @@ def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
436
 
437
  df["bobot_coverage"] = 1.0
438
  df["coverage"] = np.nan
439
- df["gap_to_68"] = np.nan
440
 
441
- # --- KAB/KOTA ---
442
  if ("KAB" in kew_norm or "KOTA" in kew_norm) and kab_col and df_pop_kab is not None:
443
  tmp = df.copy()
444
- tmp["kab_key"] = tmp[kab_col].apply(norm_kab_label)
445
 
446
- # sampel per kab per dataset
447
  g = tmp.groupby(["kab_key","_dataset"]).size().rename("n_sampel").reset_index()
448
  g_piv = g.pivot(index="kab_key", columns="_dataset", values="n_sampel").fillna(0)
449
 
@@ -472,24 +477,27 @@ def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
472
 
473
  rows.append({
474
  "Kab/Kota": kab_label,
475
- "Pop Sekolah (SD+SMP)": pop_sek,
476
- "Sampel Sekolah": n_sek,
477
- "Coverage Sekolah": cov_sek,
478
- "Bobot Sekolah (68%)": bobot_sek,
479
- "GAP ke 68% (Sekolah)": gap_sek,
480
-
481
- "Pop Umum (Kec+Desa/Kel)": pop_um,
482
- "Sampel Umum": n_um,
483
- "Coverage Umum": cov_um,
484
- "Bobot Umum (68%)": bobot_um,
485
- "GAP ke 68% (Umum)": gap_um,
486
  })
487
 
488
  verif_df = pd.DataFrame(rows)
489
 
490
- # map bobot per baris perpustakaan
491
- bobot_map_sek = {norm_kab_label(r["Kab/Kota"]): r["Bobot Sekolah (68%)"] for _, r in verif_df.iterrows()}
492
- bobot_map_um = {norm_kab_label(r["Kab/Kota"]): r["Bobot Umum (68%)"] for _, r in verif_df.iterrows()}
 
 
 
493
 
494
  def row_weight(r):
495
  ds = r.get("_dataset", None)
@@ -502,27 +510,22 @@ def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
502
  return float(bobot_map_um.get(kk, 0.0))
503
  return 1.0
504
 
505
- df["kab_key"] = df[kab_col].apply(norm_kab_label)
506
- df["bobot_coverage"] = df.apply(row_weight, axis=1)
507
-
508
- # coverage per row (opsional untuk detail)
509
  def row_cov(r):
510
  ds = r.get("_dataset", None)
511
  kk = r.get("kab_key", None)
512
  if ds == "sekolah":
513
- # cari coverage sekolah dari verif_df
514
- v = verif_df.loc[verif_df["Kab/Kota"].apply(norm_kab_label)==kk, "Coverage Sekolah"]
515
- return float(v.iloc[0]) if len(v) else np.nan
516
  if ds == "umum":
517
- v = verif_df.loc[verif_df["Kab/Kota"].apply(norm_kab_label)==kk, "Coverage Umum"]
518
- return float(v.iloc[0]) if len(v) else np.nan
519
  return np.nan
 
 
520
  df["coverage"] = df.apply(row_cov, axis=1)
521
 
522
- # --- PROVINSI ---
523
  elif ("PROV" in kew_norm) and prov_col and df_pop_prov is not None:
524
  tmp = df.copy()
525
- tmp["prov_key"] = tmp[prov_col].apply(norm_prov_label)
526
 
527
  g = tmp.groupby(["prov_key","_dataset"]).size().rename("n_sampel").reset_index()
528
  g_piv = g.pivot(index="prov_key", columns="_dataset", values="n_sampel").fillna(0)
@@ -536,7 +539,6 @@ def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
536
 
537
  cov_sek = safe_div(n_sek, pop_sek)
538
  bobot_sek = cap_bobot(cov_sek)
539
-
540
  target_sek = (TARGET_COVERAGE * pop_sek) if not pd.isna(pop_sek) else np.nan
541
  gap_sek = max(target_sek - n_sek, 0) if not pd.isna(target_sek) else np.nan
542
 
@@ -544,16 +546,19 @@ def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
544
 
545
  rows.append({
546
  "Provinsi": prov_label,
547
- "Pop Sekolah (Total Pend)": pop_sek,
548
- "Sampel Sekolah": n_sek,
549
- "Coverage Sekolah": cov_sek,
550
- "Bobot Sekolah (68%)": bobot_sek,
551
- "GAP ke 68% (Sekolah)": gap_sek,
552
  })
553
 
554
  verif_df = pd.DataFrame(rows)
555
 
556
- bobot_map = {norm_prov_label(r["Provinsi"]): r["Bobot Sekolah (68%)"] for _, r in verif_df.iterrows()}
 
 
 
557
 
558
  def row_weight(r):
559
  ds = r.get("_dataset", None)
@@ -563,26 +568,22 @@ def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
563
  return float(bobot_map.get(r.get("prov_key", None), 0.0))
564
  return 1.0
565
 
566
- df["prov_key"] = df[prov_col].apply(norm_prov_label)
567
- df["bobot_coverage"] = df.apply(row_weight, axis=1)
568
-
569
  def row_cov(r):
570
  if r.get("_dataset", None) != "sekolah":
571
  return np.nan
572
- v = verif_df.loc[verif_df["Provinsi"].apply(norm_prov_label)==r.get("prov_key", None), "Coverage Sekolah"]
573
- return float(v.iloc[0]) if len(v) else np.nan
 
574
  df["coverage"] = df.apply(row_cov, axis=1)
575
 
576
  else:
577
  verif_df = pd.DataFrame()
578
 
579
- # Final score
580
  df["Indeks_Final_0_100"] = (df["Indeks_Real_0_100"].fillna(0.0) * df["bobot_coverage"].fillna(0.0)).fillna(0.0)
581
-
582
  return df, verif_df
583
 
584
  # ============================================================
585
- # 7) BELL CURVE (REAL & FINAL)
586
  # ============================================================
587
 
588
  def make_bell_figure(df_all: pd.DataFrame, title: str, index_col: str, name_col: str = None, min_points: int = 5) -> go.Figure:
@@ -633,123 +634,62 @@ def make_bell_figure(df_all: pd.DataFrame, title: str, index_col: str, name_col:
633
  return fig
634
 
635
  # ============================================================
636
- # 8) EXPORT EXCEL + WORD REPORT
637
  # ============================================================
638
 
639
- from docx import Document
640
- from docx.shared import Inches
641
-
642
- # kaleido for plotly image export (optional)
643
- try:
644
- import kaleido # noqa: F401
645
- HAS_KALEIDO = True
646
- except Exception:
647
- HAS_KALEIDO = False
648
-
649
- def make_pie_plotly(num, den, title):
650
- if not HAS_KALEIDO:
651
- return None
652
- if den is None or pd.isna(den) or den <= 0:
653
- values = [0, 1]
654
- labels = ["Terjangkau", "Belum Terjangkau"]
655
- else:
656
- values = [float(num), max(float(den) - float(num), 0.0)]
657
- labels = ["Terjangkau", "Belum Terjangkau"]
658
- fig = px.pie(values=values, names=labels, title=title, hole=0.3)
659
- tmp = tempfile.mktemp(suffix=".png")
660
- try:
661
- fig.write_image(tmp, scale=2)
662
- return tmp
663
- except Exception:
664
- return None
665
-
666
- def build_analysis_rule(detail_df, agg_df, verif_df, wilayah, kew):
667
- mean_real = float(detail_df["Indeks_Real_0_100"].mean()) if "Indeks_Real_0_100" in detail_df.columns else np.nan
668
- mean_final = float(detail_df["Indeks_Final_0_100"].mean()) if "Indeks_Final_0_100" in detail_df.columns else np.nan
669
  lines = []
670
  lines.append("## Analisis Otomatis (Rule-based)")
671
  lines.append(f"- Wilayah: {wilayah} | Kewenangan: {kew}")
672
- lines.append(f"- Jumlah unit sampel: {len(detail_df)}")
673
  if not pd.isna(mean_real):
674
  lines.append(f"- Rata-rata Indeks Real: {mean_real:.2f}")
675
  if not pd.isna(mean_final):
676
- lines.append(f"- Rata-rata Indeks Final (setelah penalti 68%): {mean_final:.2f}")
677
-
678
  if verif_df is not None and not verif_df.empty:
679
- lines.append("")
680
- lines.append("### Catatan Coverage (68% = bobot 1)")
681
- # ambil ringkas: rata-rata coverage & gap
682
- cand_cov = [c for c in verif_df.columns if "Coverage" in c]
683
- if cand_cov:
684
- for c in cand_cov:
685
- v = verif_df[c].dropna()
686
- if len(v):
687
- lines.append(f"- Rata-rata {c}: {(100*v.mean()):.2f}%")
688
- cand_gap = [c for c in verif_df.columns if "GAP" in c]
689
- if cand_gap:
690
- for c in cand_gap:
691
- v = verif_df[c].dropna()
692
- if len(v):
693
- lines.append(f"- Total {c}: {v.sum():.0f} unit")
694
-
695
  lines.append("")
696
- lines.append("### Rekomendasi Program (ringkas)")
697
- lines.append(
698
- "Fokus penguatan diarahkan pada konsolidasi cakupan sampel agar mendekati standar 68% sehingga pembobotan tidak menurunkan skor final, "
699
- "serta perbaikan indikator layanan dan pengelolaan yang mendorong pemanfaatan. "
700
- "Prioritas implementasi dapat dilakukan melalui penguatan pembinaan berbasis wilayah dengan target unit yang masih memiliki GAP tinggi."
701
- )
702
  return "\n".join(lines)
703
 
704
- def build_analysis_llm(detail_df, agg_df, verif_df, wilayah, kew):
705
- # fallback rule-based jika LLM gagal
706
- rb = build_analysis_rule(detail_df, agg_df, verif_df, wilayah, kew)
707
  if not USE_LLM:
708
  return rb
709
  client = get_llm_client()
710
  if client is None:
711
  return "⚠️ LLM tidak tersedia, memakai rule-based.\n\n" + rb
712
 
713
- # context singkat
714
- mean_real = float(detail_df["Indeks_Real_0_100"].mean()) if "Indeks_Real_0_100" in detail_df.columns else np.nan
715
- mean_final = float(detail_df["Indeks_Final_0_100"].mean()) if "Indeks_Final_0_100" in detail_df.columns else np.nan
716
-
717
- ctx = [
718
- f"Wilayah: {wilayah}",
719
- f"Kewenangan: {kew}",
720
- f"Jumlah unit sampel: {len(detail_df)}",
721
- f"Rata-rata Indeks Real: {mean_real:.2f}" if not pd.isna(mean_real) else "",
722
- f"Rata-rata Indeks Final (penalti 68%): {mean_final:.2f}" if not pd.isna(mean_final) else "",
723
- ]
724
  if verif_df is not None and not verif_df.empty:
725
- # ambil 5 baris gap terbesar bila ada
726
  gap_cols = [c for c in verif_df.columns if "GAP" in c]
727
  if gap_cols:
728
  g0 = gap_cols[0]
729
- vv = verif_df[[c for c in verif_df.columns if c in gap_cols or c in ["Kab/Kota","Provinsi"]]].copy()
730
- vv = vv.sort_values(g0, ascending=False).head(5)
731
- ctx.append("Contoh GAP terbesar (top 5):")
732
  ctx.append(vv.to_string(index=False))
733
 
734
- system_prompt = (
735
- "Anda adalah analis kebijakan perpustakaan dan literasi di Indonesia. "
736
- "Tugas Anda menyusun analisis ringkas, komunikatif, dan berbasis data."
737
- )
738
  user_prompt = f"""
739
- DATA RINGKAS:
740
- {chr(10).join([x for x in ctx if x])}
741
-
742
- TULISKAN:
743
- 1) Gambaran umum (1 paragraf).
744
- 2) Dampak penalti coverage 68% terhadap skor final (1 paragraf).
745
- 3) Rekomendasi prioritas 12–24 bulan (2 paragraf), fokus menutup GAP unit.
746
- Gunakan bahasa Indonesia formal, kalimat efektif, tanpa label "rendah/sedang/tinggi".
747
  """
748
  try:
749
  resp = client.chat_completion(
750
  model=LLM_MODEL_NAME,
751
  messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
752
- max_tokens=900,
753
  temperature=0.25,
754
  top_p=0.9,
755
  )
@@ -758,18 +698,46 @@ Gunakan bahasa Indonesia formal, kalimat efektif, tanpa label "rendah/sedang/tin
758
  except Exception as e:
759
  return f"⚠️ Gagal memanggil LLM ({repr(e)}), memakai rule-based.\n\n{rb}"
760
 
761
- def generate_word_report(detail_df, agg_df, verif_df, wilayah, kew, analysis_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
  doc = Document()
763
  doc.add_heading(f"Laporan IPLM – {wilayah}", level=1)
764
 
765
- doc.add_heading("Ringkasan Indeks", level=2)
766
- doc.add_paragraph(f"- Jumlah unit sampel: {len(detail_df)}")
767
- if "Indeks_Real_0_100" in detail_df.columns:
768
- doc.add_paragraph(f"- Rata-rata Indeks Real: {detail_df['Indeks_Real_0_100'].mean():.2f}")
769
- if "Indeks_Final_0_100" in detail_df.columns:
770
- doc.add_paragraph(f"- Rata-rata Indeks Final (penalti 68%): {detail_df['Indeks_Final_0_100'].mean():.2f}")
771
 
772
- doc.add_heading("Agregat per Jenis Perpustakaan", level=2)
773
  if agg_df is not None and not agg_df.empty:
774
  table = doc.add_table(rows=1, cols=len(agg_df.columns))
775
  hdr = table.rows[0].cells
@@ -780,44 +748,21 @@ def generate_word_report(detail_df, agg_df, verif_df, wilayah, kew, analysis_tex
780
  for i, c in enumerate(agg_df.columns):
781
  r[i].text = str(row[c])
782
 
783
- doc.add_heading("Coverage / Cakupan (68% = bobot 1)", level=2)
784
  if verif_df is None or verif_df.empty:
785
  doc.add_paragraph("Tidak ada tabel verifikasi coverage untuk filter ini.")
786
  else:
787
- # Pie chart ringkas: total sekolah / total populasi sekolah (kalau tersedia)
788
  if HAS_KALEIDO:
789
- if "Kab/Kota" in verif_df.columns:
790
- # total sekolah
791
- if "Pop Sekolah (SD+SMP)" in verif_df.columns and "Sampel Sekolah" in verif_df.columns:
792
- img = make_pie_plotly(
793
- verif_df["Sampel Sekolah"].sum(),
794
- verif_df["Pop Sekolah (SD+SMP)"].sum(),
795
- "Coverage Sekolah (Total)"
796
- )
797
- if img:
798
- doc.add_picture(img, width=Inches(4))
799
- if "Pop Umum (Kec+Desa/Kel)" in verif_df.columns and "Sampel Umum" in verif_df.columns:
800
- img = make_pie_plotly(
801
- verif_df["Sampel Umum"].sum(),
802
- verif_df["Pop Umum (Kec+Desa/Kel)"].sum(),
803
- "Coverage Umum (Total)"
804
- )
805
- if img:
806
- doc.add_picture(img, width=Inches(4))
807
- elif "Provinsi" in verif_df.columns:
808
- if "Pop Sekolah (Total Pend)" in verif_df.columns and "Sampel Sekolah" in verif_df.columns:
809
- img = make_pie_plotly(
810
- verif_df["Sampel Sekolah"].sum(),
811
- verif_df["Pop Sekolah (Total Pend)"].sum(),
812
- "Coverage Sekolah Provinsi (Total)"
813
- )
814
- if img:
815
- doc.add_picture(img, width=Inches(4))
816
  else:
817
  doc.add_paragraph("Pie chart tidak dibuat karena 'kaleido' tidak tersedia.")
818
 
819
- # tabel verifikasi
820
- doc.add_paragraph("Tabel Verifikasi Coverage:")
821
  vtab = doc.add_table(rows=1, cols=len(verif_df.columns))
822
  vh = vtab.rows[0].cells
823
  for i, c in enumerate(verif_df.columns):
@@ -827,7 +772,7 @@ def generate_word_report(detail_df, agg_df, verif_df, wilayah, kew, analysis_tex
827
  for i, c in enumerate(verif_df.columns):
828
  rr[i].text = str(row[c])
829
 
830
- doc.add_heading("Analisis Naratif Otomatis", level=2)
831
  for p in analysis_text.split("\n"):
832
  if p.strip():
833
  doc.add_paragraph(p)
@@ -837,79 +782,110 @@ def generate_word_report(detail_df, agg_df, verif_df, wilayah, kew, analysis_tex
837
  return out
838
 
839
  # ============================================================
840
- # 9) CORE PIPELINE PER FILTER: AGG + DETAIL + EXPORT + BELL
841
  # ============================================================
842
 
843
- def build_agg(detail_df: pd.DataFrame) -> pd.DataFrame:
844
- expected_ds = ["sekolah", "umum", "khusus"]
 
 
 
 
 
 
845
  label_map = {"sekolah":"Perpustakaan Sekolah","umum":"Perpustakaan Umum","khusus":"Perpustakaan Khusus"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
846
 
847
- rows = []
848
- for ds in expected_ds:
849
- d = detail_df[detail_df["_dataset"] == ds].copy() if "_dataset" in detail_df.columns else pd.DataFrame()
850
- if d.empty:
851
- rows.append({"Jenis": label_map.get(ds, ds), "Jumlah": 0, "Mean_Real": 0.0, "Mean_Final": 0.0})
852
- else:
853
- rows.append({
854
- "Jenis": label_map.get(ds, ds),
855
- "Jumlah": int(len(d)),
856
- "Mean_Real": float(d["Indeks_Real_0_100"].mean()) if "Indeks_Real_0_100" in d.columns else 0.0,
857
- "Mean_Final": float(d["Indeks_Final_0_100"].mean()) if "Indeks_Final_0_100" in d.columns else 0.0,
858
- })
859
- # total
860
- rows.append({
861
- "Jenis":"Rata-rata keseluruhan",
862
- "Jumlah": int(len(detail_df)),
863
- "Mean_Real": float(detail_df["Indeks_Real_0_100"].mean()) if "Indeks_Real_0_100" in detail_df.columns else 0.0,
864
- "Mean_Final": float(detail_df["Indeks_Final_0_100"].mean()) if "Indeks_Final_0_100" in detail_df.columns else 0.0,
865
- })
866
- return pd.DataFrame(rows).round(3)
867
 
868
  def run_pipeline_filtered(prov_value, kab_value, kew_value):
869
  if df_all_ipml is None or df_all_ipml.empty:
870
  return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
871
  None, None, None, None,
872
- None, None, None, None,
873
  "Data DM belum siap / gagal diproses.", "Tidak ada analisis.")
874
 
875
  df = df_all_ipml.copy()
876
 
877
- # filter
878
- if prov_col and prov_value and prov_value != "(Semua)":
879
- df = df[df[prov_col].astype(str).str.strip() == prov_value]
880
- if kab_col and kab_value and kab_value != "(Semua)":
881
- df = df[df[kab_col].astype(str).str.strip() == kab_value]
882
  if kew_value and kew_value != "(Semua)":
883
  df = df[df["KEW_NORM"] == kew_value]
884
 
885
  if df.empty:
886
  return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
887
  None, None, None, None,
888
- None, None, None, None,
889
  "Tidak ada data untuk kombinasi filter.", "Tidak ada analisis.")
890
 
891
  wilayah = kab_value if kab_value and kab_value != "(Semua)" else (prov_value if prov_value and prov_value != "(Semua)" else "NASIONAL")
892
  kew = kew_value if kew_value and kew_value != "(Semua)" else "SEMUA"
893
 
894
- # coverage + bobot + final
895
  df2, verif_df = compute_coverage_and_weight(df, kew_value)
896
 
897
- # detail view
898
- base_cols = []
899
- if prov_col and prov_col in df2.columns: base_cols.append(prov_col)
900
- if kab_col and kab_col in df2.columns: base_cols.append(kab_col)
901
- if nama_col and nama_col in df2.columns: base_cols.append(nama_col)
902
- base_cols += ["KEW_NORM","_dataset","dim_kepatuhan","dim_kinerja","Indeks_Real_0_100","bobot_coverage","Indeks_Final_0_100","coverage"]
903
- detail_cols = [c for c in base_cols if c in df2.columns]
904
- detail_df = df2[detail_cols].copy().round(4)
 
 
 
 
 
 
 
 
 
905
 
906
- # agg
907
- agg_df = build_agg(df2)
 
 
 
 
 
 
 
 
 
 
908
 
909
- # export excel
910
  tmpdir = tempfile.mkdtemp()
911
  slug = slugify(wilayah) + "_" + slugify(kew)
912
-
913
  agg_path = os.path.join(tmpdir, f"IPLM_Agregat_{slug}.xlsx")
914
  detail_path = os.path.join(tmpdir, f"IPLM_Detail_{slug}.xlsx")
915
  raw_path = os.path.join(tmpdir, f"IPLM_Raw_{slug}.xlsx")
@@ -918,23 +894,20 @@ def run_pipeline_filtered(prov_value, kab_value, kew_value):
918
  detail_df.to_excel(detail_path, index=False)
919
  df2.to_excel(raw_path, index=False)
920
 
921
- # bells
922
- name_for_hover = nama_col if (nama_col and nama_col in df2.columns) else None
923
-
924
- fig_real_all = make_bell_figure(df2, "Bell Curve β€” Indeks REAL (Semua)", "Indeks_Real_0_100", name_col=name_for_hover)
925
- fig_final_all = make_bell_figure(df2, "Bell Curve β€” Indeks FINAL (Penalti 68%) (Semua)", "Indeks_Final_0_100", name_col=name_for_hover)
926
 
927
- fig_final_sek = make_bell_figure(df2[df2["_dataset"]=="sekolah"], "FINAL β€” Sekolah", "Indeks_Final_0_100", name_col=name_for_hover, min_points=3)
928
- fig_final_um = make_bell_figure(df2[df2["_dataset"]=="umum"], "FINAL β€” Umum", "Indeks_Final_0_100", name_col=name_for_hover, min_points=3)
929
- fig_final_kh = make_bell_figure(df2[df2["_dataset"]=="khusus"], "FINAL β€” Khusus", "Indeks_Final_0_100", name_col=name_for_hover, min_points=3)
930
 
931
- # analisis
932
- analysis_text = build_analysis_llm(detail_df=df2, agg_df=agg_df, verif_df=verif_df, wilayah=wilayah, kew=kew_value)
 
933
 
934
- # word report
935
- word_path = generate_word_report(detail_df=df2, agg_df=agg_df, verif_df=verif_df, wilayah=wilayah, kew=kew_value, analysis_text=analysis_text)
936
-
937
- msg = f"βœ… Selesai. Unit: {len(df2)} | Wilayah: {wilayah} | Kew: {kew_value} | Mean Final: {df2['Indeks_Final_0_100'].mean():.2f}"
938
 
939
  return (agg_df, detail_df, verif_df,
940
  agg_path, detail_path, raw_path, word_path,
@@ -942,32 +915,31 @@ def run_pipeline_filtered(prov_value, kab_value, kew_value):
942
  msg, analysis_text)
943
 
944
  # ============================================================
945
- # 10) UI GRADIO
946
  # ============================================================
947
 
948
  def all_prov_choices():
949
- if df_all_raw is None or prov_col is None:
950
  return ["(Semua)"]
951
- s = df_all_raw[prov_col].dropna().astype(str).str.strip()
952
- vals = sorted([o for o in s.unique() if o != ""])
953
  return ["(Semua)"] + vals
954
 
955
  def get_kab_choices_for_prov(prov_value):
956
- if df_all_raw is None or kab_col is None:
957
  return ["(Semua)"]
958
- if prov_value is None or prov_value == "(Semua)" or prov_col is None:
959
- s = df_all_raw[kab_col].dropna().astype(str).str.strip()
960
- else:
961
- m = df_all_raw[prov_col].astype(str).str.strip() == prov_value
962
- s = df_all_raw.loc[m, kab_col].dropna().astype(str).str.strip()
963
- vals = sorted([x for x in s.unique() if x != ""])
964
  return ["(Semua)"] + vals
965
 
966
  def all_kew_choices():
967
- if df_all_raw is None:
968
  return ["(Semua)"]
969
- s = df_all_raw["KEW_NORM"].dropna().astype(str).str.strip()
970
- vals = sorted([o for o in s.unique() if o != ""])
971
  return ["(Semua)"] + (vals if vals else ["KAB/KOTA","PROVINSI"])
972
 
973
  prov_choices = all_prov_choices()
@@ -979,16 +951,20 @@ def on_prov_change(prov_value):
979
  new_choices = get_kab_choices_for_prov(prov_value)
980
  return gr.update(choices=new_choices, value="(Semua)")
981
 
 
 
 
 
982
  with gr.Blocks() as demo:
983
  gr.Markdown(
984
  f"""
985
- # IPLM 2025 β€” FULL (RealScore + Penalti Coverage 68% + Verifikasi + Export + Word + Analisis)
986
  **Aturan penalti**: 68% coverage dianggap 100% (bobot=1). Jika kurang, bobot = coverage/0.68.
987
 
988
  **Sumber data**:
989
  - DM: `{DATA_FILE}`
990
- - Populasi Kab/Kota: `{POP_KAB}`
991
- - Populasi Provinsi: `{POP_PROV}`
992
 
993
  {DATA_INFO}
994
  """
@@ -1004,13 +980,13 @@ with gr.Blocks() as demo:
1004
  run_btn = gr.Button("Jalankan Perhitungan")
1005
  msg_out = gr.Markdown()
1006
 
1007
- gr.Markdown("## Agregat (Mean Real & Mean Final) per Jenis")
1008
  agg_df_out = gr.DataFrame(interactive=False)
1009
 
1010
- gr.Markdown("## Detail (Real, Coverage, Bobot, Final)")
1011
  detail_df_out = gr.DataFrame(interactive=False)
1012
 
1013
- gr.Markdown("## Verifikasi Coverage + GAP menuju 68%")
1014
  verif_df_out = gr.DataFrame(interactive=False)
1015
 
1016
  gr.Markdown("## Bell Curve β€” REAL (Semua)")
@@ -1031,8 +1007,8 @@ with gr.Blocks() as demo:
1031
  with gr.Row():
1032
  agg_file_out = gr.File(label="Download Agregat (.xlsx)")
1033
  detail_file_out = gr.File(label="Download Detail (.xlsx)")
1034
- raw_file_out = gr.File(label="Download Raw Subset (.xlsx)")
1035
- word_file_out = gr.File(label="Download Laporan Word (.docx)")
1036
 
1037
  run_btn.click(
1038
  fn=run_pipeline_filtered,
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ app.py β€” IPLM 2025 (FULL, FIX DUPLICATE + FULL INDICATORS)
4
  - Pipeline nasional: Yeo-Johnson + MinMax (sekali nasional)
5
+ - FinalScore = RealScore * bobot_coverage_68 (internal)
6
+ - 68% coverage = bobot 1.0 ; <68% bobot = coverage/0.68
 
 
 
7
  - Populasi resmi:
8
+ * Kab/Kota: Data_populasi_Kab_kota.xlsx
9
+ * Provinsi: Data_populasi_propinsi.xlsx
10
+ - FIX:
11
+ * Dropdown prov/kab tidak dobel (PROV_DISP/KAB_DISP)
12
+ * Dedup record (prov,kab,nama,kew,dataset)
13
+ * Detail: tampilkan semua indikator, sembunyikan bobot_coverage & coverage
14
+ * Agregat: tampilkan semua indikator, tanpa Mean_Real/Mean_Final
15
  """
16
 
17
  import os
 
30
  # 1) KONFIGURASI FILE
31
  # ============================================================
32
 
33
+ DATA_FILE = "IPLM_clean_manual_131225.xlsx" # sesuaikan jika nama file DM kamu berbeda
34
+ POP_KAB = "Data_populasi_Kab_kota.xlsx"
35
+ POP_PROV = "Data_populasi_propinsi.xlsx"
36
 
37
+ TARGET_COVERAGE = 0.68
38
  W_KEPATUHAN = 0.30
39
  W_KINERJA = 0.70
40
 
 
61
  _HF_CLIENT = None
62
  return None
63
  try:
64
+ _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN) if HF_TOKEN else InferenceClient(model=LLM_MODEL_NAME)
 
 
 
65
  return _HF_CLIENT
66
  except Exception:
67
  _HF_CLIENT = None
 
74
  def _canon(s: str) -> str:
75
  return re.sub(r"[^a-z0-9]+", "", str(s).lower())
76
 
77
+ def _disp_text(x):
78
+ """Uppercase + rapihin spasi (biar dropdown tidak dobel)."""
79
+ if pd.isna(x):
80
+ return None
81
+ t = str(x).strip().upper()
82
+ t = " ".join(t.split())
83
+ return t
84
+
85
  def coerce_num(val):
86
  if pd.isna(val):
87
  return np.nan
 
143
  if pd.isna(s):
144
  return None
145
  t = str(s).upper()
146
+ for bad in ["PROVINSI", "PROPINSI"]:
147
  t = t.replace(bad, "")
148
  t = " ".join(t.split())
149
  return re.sub(r"[^A-Z0-9]+", "", t)
 
182
  return float(np.mean(vals))
183
 
184
  def cap_bobot(cov: float) -> float:
 
185
  if cov is None or pd.isna(cov) or cov <= 0:
186
  return 0.0
187
  return float(min(cov / TARGET_COVERAGE, 1.0))
 
192
  return float(num) / float(den)
193
 
194
  # ============================================================
195
+ # 3) INDIKATOR IPLM
196
  # ============================================================
197
 
198
  koleksi_cols = [
 
217
  ]
218
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
219
 
 
220
  alias_map_raw = {
221
  "j_judul_koleksi_tercetak": "JudulTercetak",
222
  "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
 
247
  alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
248
 
249
  # ============================================================
250
+ # 4) LOAD DM + POPULASI
251
  # ============================================================
252
 
253
  DATA_INFO = ""
 
271
  kab_col = pick_col(df_all_raw, ["kab_kota", "Kab_Kota", "Kab/Kota", "KAB/KOTA", "kabupaten_kota", "kota"])
272
  kew_col = pick_col(df_all_raw, ["kewenangan", "jenis_kewenangan", "Kewenangan", "KEWENANGAN"])
273
  jenis_col = pick_col(df_all_raw, ["jenis_perpustakaan", "JENIS_PERPUSTAKAAN", "Jenis Perpustakaan", "jenis perpustakaan"])
274
+ nama_col = pick_col(df_all_raw, ["nm_perpustakaan","nama_perpustakaan", "nm_instansi_lembaga", "Nama Perpustakaan"])
275
 
276
  df_all_raw["KEW_NORM"] = df_all_raw[kew_col].apply(norm_kew) if kew_col else None
277
 
 
286
  }
287
  df_all_raw["_dataset"] = df_all_raw[jenis_col].apply(_norm_text).map(val_map_jenis) if jenis_col else None
288
 
289
+ # kolom tampilan konsisten (buat dropdown + filter)
290
+ if prov_col:
291
+ df_all_raw["PROV_DISP"] = df_all_raw[prov_col].apply(_disp_text)
292
+ else:
293
+ df_all_raw["PROV_DISP"] = None
294
+ if kab_col:
295
+ df_all_raw["KAB_DISP"] = df_all_raw[kab_col].apply(_disp_text)
296
+ else:
297
+ df_all_raw["KAB_DISP"] = None
298
+
299
  DATA_INFO = f"βœ… DM terbaca: **{DATA_FILE}** | Baris: **{len(df_all_raw)}**"
300
  except Exception as e:
301
  df_all_raw = None
302
  DATA_INFO = f"⚠️ Gagal memuat DM: `{e}`"
303
 
 
304
  POP_INFO = []
305
+
306
+ # ---- POP KAB ----
307
  try:
308
  pk = pd.read_excel(POP_KAB)
309
  c_prov = pick_col(pk, ["PROVINSI", "Provinsi"])
 
316
  c_pop_sekolah = pick_col(pk, ["jumlah_populasi_sekolah"])
317
 
318
  if c_kab is None:
319
+ raise ValueError("Kolom Kab/Kota tidak ditemukan di populasi kab/kota.")
320
 
321
  df_pop_kab = pd.DataFrame({
322
  "Provinsi_Label": pk[c_prov].astype(str).str.strip() if c_prov else None,
 
330
  })
331
  df_pop_kab["kab_key"] = df_pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
332
 
 
333
  if df_pop_kab["Pop_Umum"].isna().all():
334
  df_pop_kab["Pop_Umum"] = df_pop_kab[["Jml_Kecamatan","Jml_DesaKel"]].sum(axis=1, skipna=True)
335
  if df_pop_kab["Pop_Sekolah"].isna().all():
 
340
  df_pop_kab = None
341
  POP_INFO.append(f"⚠️ Gagal memuat populasi Kab/Kota: `{e}`")
342
 
343
+ # ---- POP PROV ----
344
  try:
345
  pp = pd.read_excel(POP_PROV)
346
  c_prov = pick_col(pp, ["Provinsi", "PROVINSI"])
347
  c_total_pend = pick_col(pp, ["total_pend", "TOTAL_PEND", "total pend"])
348
+ c_sma = pick_col(pp, ["sma", "sma "])
349
+
350
  if c_prov is None:
351
+ raise ValueError("Kolom Provinsi tidak ditemukan di populasi provinsi.")
352
  if c_total_pend is None and c_sma is None:
353
+ raise ValueError("Kolom total_pend/sma tidak ditemukan di populasi provinsi.")
354
 
355
  df_pop_prov = pd.DataFrame({
356
  "Provinsi_Label": pp[c_prov].astype(str).str.strip(),
 
371
  DATA_INFO = DATA_INFO + "<br>" + "<br>".join(POP_INFO)
372
 
373
  # ============================================================
374
+ # 5) PIPELINE NASIONAL: REALSCORE
375
  # ============================================================
376
 
377
  def prepare_global_iplm(df_src: pd.DataFrame) -> pd.DataFrame:
 
395
  if rename_map:
396
  df = df.rename(columns=rename_map)
397
 
 
398
  available = [c for c in all_indicators if c in df.columns]
399
  for c in available:
400
  df[c] = df[c].apply(coerce_num)
 
423
 
424
  df["Indeks_Real_0_100"] = 100 * (W_KEPATUHAN * df["dim_kepatuhan"] + W_KINERJA * df["dim_kinerja"])
425
 
 
426
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja","Indeks_Real_0_100"]:
427
  df[c] = df[c].fillna(0.0)
428
 
 
431
  df_all_ipml = prepare_global_iplm(df_all_raw) if df_all_raw is not None else None
432
 
433
  # ============================================================
434
+ # 6) COVERAGE + BOBOT + FINAL (INTERNAL)
435
  # ============================================================
436
 
437
  def compute_coverage_and_weight(df_filtered: pd.DataFrame, kew_value: str):
 
 
 
 
 
438
  if df_filtered is None or df_filtered.empty:
439
  return df_filtered, pd.DataFrame()
440
 
 
443
 
444
  df["bobot_coverage"] = 1.0
445
  df["coverage"] = np.nan
 
446
 
447
+ # KAB/KOTA
448
  if ("KAB" in kew_norm or "KOTA" in kew_norm) and kab_col and df_pop_kab is not None:
449
  tmp = df.copy()
450
+ tmp["kab_key"] = tmp["KAB_DISP"].apply(norm_kab_label) if "KAB_DISP" in tmp.columns else tmp[kab_col].apply(norm_kab_label)
451
 
 
452
  g = tmp.groupby(["kab_key","_dataset"]).size().rename("n_sampel").reset_index()
453
  g_piv = g.pivot(index="kab_key", columns="_dataset", values="n_sampel").fillna(0)
454
 
 
477
 
478
  rows.append({
479
  "Kab/Kota": kab_label,
480
+ "Pop_Sekolah": pop_sek,
481
+ "Sampel_Sekolah": n_sek,
482
+ "Coverage_Sekolah": cov_sek,
483
+ "Bobot_Sekolah_68": bobot_sek,
484
+ "GAP_Ke_68_Sekolah": gap_sek,
485
+
486
+ "Pop_Umum": pop_um,
487
+ "Sampel_Umum": n_um,
488
+ "Coverage_Umum": cov_um,
489
+ "Bobot_Umum_68": bobot_um,
490
+ "GAP_Ke_68_Umum": gap_um,
491
  })
492
 
493
  verif_df = pd.DataFrame(rows)
494
 
495
+ bobot_map_sek = {norm_kab_label(r["Kab/Kota"]): r["Bobot_Sekolah_68"] for _, r in verif_df.iterrows()}
496
+ bobot_map_um = {norm_kab_label(r["Kab/Kota"]): r["Bobot_Umum_68"] for _, r in verif_df.iterrows()}
497
+ cov_map_sek = {norm_kab_label(r["Kab/Kota"]): r["Coverage_Sekolah"] for _, r in verif_df.iterrows()}
498
+ cov_map_um = {norm_kab_label(r["Kab/Kota"]): r["Coverage_Umum"] for _, r in verif_df.iterrows()}
499
+
500
+ df["kab_key"] = df["KAB_DISP"].apply(norm_kab_label) if "KAB_DISP" in df.columns else df[kab_col].apply(norm_kab_label)
501
 
502
  def row_weight(r):
503
  ds = r.get("_dataset", None)
 
510
  return float(bobot_map_um.get(kk, 0.0))
511
  return 1.0
512
 
 
 
 
 
513
  def row_cov(r):
514
  ds = r.get("_dataset", None)
515
  kk = r.get("kab_key", None)
516
  if ds == "sekolah":
517
+ return float(cov_map_sek.get(kk, np.nan))
 
 
518
  if ds == "umum":
519
+ return float(cov_map_um.get(kk, np.nan))
 
520
  return np.nan
521
+
522
+ df["bobot_coverage"] = df.apply(row_weight, axis=1)
523
  df["coverage"] = df.apply(row_cov, axis=1)
524
 
525
+ # PROVINSI
526
  elif ("PROV" in kew_norm) and prov_col and df_pop_prov is not None:
527
  tmp = df.copy()
528
+ tmp["prov_key"] = tmp["PROV_DISP"].apply(norm_prov_label) if "PROV_DISP" in tmp.columns else tmp[prov_col].apply(norm_prov_label)
529
 
530
  g = tmp.groupby(["prov_key","_dataset"]).size().rename("n_sampel").reset_index()
531
  g_piv = g.pivot(index="prov_key", columns="_dataset", values="n_sampel").fillna(0)
 
539
 
540
  cov_sek = safe_div(n_sek, pop_sek)
541
  bobot_sek = cap_bobot(cov_sek)
 
542
  target_sek = (TARGET_COVERAGE * pop_sek) if not pd.isna(pop_sek) else np.nan
543
  gap_sek = max(target_sek - n_sek, 0) if not pd.isna(target_sek) else np.nan
544
 
 
546
 
547
  rows.append({
548
  "Provinsi": prov_label,
549
+ "Pop_Sekolah": pop_sek,
550
+ "Sampel_Sekolah": n_sek,
551
+ "Coverage_Sekolah": cov_sek,
552
+ "Bobot_Sekolah_68": bobot_sek,
553
+ "GAP_Ke_68_Sekolah": gap_sek,
554
  })
555
 
556
  verif_df = pd.DataFrame(rows)
557
 
558
+ bobot_map = {norm_prov_label(r["Provinsi"]): r["Bobot_Sekolah_68"] for _, r in verif_df.iterrows()}
559
+ cov_map = {norm_prov_label(r["Provinsi"]): r["Coverage_Sekolah"] for _, r in verif_df.iterrows()}
560
+
561
+ df["prov_key"] = df["PROV_DISP"].apply(norm_prov_label) if "PROV_DISP" in df.columns else df[prov_col].apply(norm_prov_label)
562
 
563
  def row_weight(r):
564
  ds = r.get("_dataset", None)
 
568
  return float(bobot_map.get(r.get("prov_key", None), 0.0))
569
  return 1.0
570
 
 
 
 
571
  def row_cov(r):
572
  if r.get("_dataset", None) != "sekolah":
573
  return np.nan
574
+ return float(cov_map.get(r.get("prov_key", None), np.nan))
575
+
576
+ df["bobot_coverage"] = df.apply(row_weight, axis=1)
577
  df["coverage"] = df.apply(row_cov, axis=1)
578
 
579
  else:
580
  verif_df = pd.DataFrame()
581
 
 
582
  df["Indeks_Final_0_100"] = (df["Indeks_Real_0_100"].fillna(0.0) * df["bobot_coverage"].fillna(0.0)).fillna(0.0)
 
583
  return df, verif_df
584
 
585
  # ============================================================
586
+ # 7) BELL CURVE
587
  # ============================================================
588
 
589
  def make_bell_figure(df_all: pd.DataFrame, title: str, index_col: str, name_col: str = None, min_points: int = 5) -> go.Figure:
 
634
  return fig
635
 
636
  # ============================================================
637
+ # 8) ANALISIS (RULE / LLM)
638
  # ============================================================
639
 
640
+ def build_analysis_rule(df2, agg_df, verif_df, wilayah, kew):
641
+ mean_real = float(df2["Indeks_Real_0_100"].mean()) if "Indeks_Real_0_100" in df2.columns else np.nan
642
+ mean_final = float(df2["Indeks_Final_0_100"].mean()) if "Indeks_Final_0_100" in df2.columns else np.nan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  lines = []
644
  lines.append("## Analisis Otomatis (Rule-based)")
645
  lines.append(f"- Wilayah: {wilayah} | Kewenangan: {kew}")
646
+ lines.append(f"- Jumlah unit sampel (setelah dedup): {len(df2)}")
647
  if not pd.isna(mean_real):
648
  lines.append(f"- Rata-rata Indeks Real: {mean_real:.2f}")
649
  if not pd.isna(mean_final):
650
+ lines.append(f"- Rata-rata Indeks Final (penalti 68%): {mean_final:.2f}")
 
651
  if verif_df is not None and not verif_df.empty:
652
+ gap_cols = [c for c in verif_df.columns if "GAP" in c]
653
+ if gap_cols:
654
+ g0 = gap_cols[0]
655
+ lines.append(f"- Total GAP (contoh kolom {g0}): {verif_df[g0].dropna().sum():.0f} unit")
 
 
 
 
 
 
 
 
 
 
 
 
656
  lines.append("")
657
+ lines.append("Rekomendasi: fokus menutup GAP unit menuju 68% pada wilayah dengan kekurangan terbesar, sehingga pembobotan tidak menurunkan skor final.")
 
 
 
 
 
658
  return "\n".join(lines)
659
 
660
+ def build_analysis_llm(df2, agg_df, verif_df, wilayah, kew):
661
+ rb = build_analysis_rule(df2, agg_df, verif_df, wilayah, kew)
 
662
  if not USE_LLM:
663
  return rb
664
  client = get_llm_client()
665
  if client is None:
666
  return "⚠️ LLM tidak tersedia, memakai rule-based.\n\n" + rb
667
 
668
+ mean_real = float(df2["Indeks_Real_0_100"].mean())
669
+ mean_final = float(df2["Indeks_Final_0_100"].mean())
670
+
671
+ ctx = [f"Wilayah: {wilayah}", f"Kew: {kew}", f"Unit: {len(df2)}", f"Mean Real: {mean_real:.2f}", f"Mean Final: {mean_final:.2f}"]
 
 
 
 
 
 
 
672
  if verif_df is not None and not verif_df.empty:
 
673
  gap_cols = [c for c in verif_df.columns if "GAP" in c]
674
  if gap_cols:
675
  g0 = gap_cols[0]
676
+ vv = verif_df.sort_values(g0, ascending=False).head(5)
677
+ ctx.append("Top 5 GAP:")
 
678
  ctx.append(vv.to_string(index=False))
679
 
680
+ system_prompt = "Anda adalah analis kebijakan perpustakaan dan literasi di Indonesia."
 
 
 
681
  user_prompt = f"""
682
+ DATA:
683
+ {chr(10).join(ctx)}
684
+
685
+ Tulis analisis ringkas (3–5 paragraf) tentang dampak penalti coverage 68% dan rekomendasi prioritas menutup GAP.
686
+ Bahasa Indonesia formal, tanpa label 'rendah/sedang/tinggi'.
 
 
 
687
  """
688
  try:
689
  resp = client.chat_completion(
690
  model=LLM_MODEL_NAME,
691
  messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
692
+ max_tokens=700,
693
  temperature=0.25,
694
  top_p=0.9,
695
  )
 
698
  except Exception as e:
699
  return f"⚠️ Gagal memanggil LLM ({repr(e)}), memakai rule-based.\n\n{rb}"
700
 
701
+ # ============================================================
702
+ # 9) WORD REPORT (opsional pie)
703
+ # ============================================================
704
+
705
+ from docx import Document
706
+ from docx.shared import Inches
707
+
708
+ try:
709
+ import kaleido # noqa
710
+ HAS_KALEIDO = True
711
+ except Exception:
712
+ HAS_KALEIDO = False
713
+
714
+ def make_pie_plotly(num, den, title):
715
+ if not HAS_KALEIDO:
716
+ return None
717
+ if den is None or pd.isna(den) or den <= 0:
718
+ values = [0, 1]
719
+ labels = ["Terjangkau", "Belum Terjangkau"]
720
+ else:
721
+ values = [float(num), max(float(den) - float(num), 0.0)]
722
+ labels = ["Terjangkau", "Belum Terjangkau"]
723
+ fig = px.pie(values=values, names=labels, title=title, hole=0.3)
724
+ tmp = tempfile.mktemp(suffix=".png")
725
+ try:
726
+ fig.write_image(tmp, scale=2)
727
+ return tmp
728
+ except Exception:
729
+ return None
730
+
731
+ def generate_word_report(df2, agg_df, verif_df, wilayah, kew, analysis_text):
732
  doc = Document()
733
  doc.add_heading(f"Laporan IPLM – {wilayah}", level=1)
734
 
735
+ doc.add_heading("Ringkasan", level=2)
736
+ doc.add_paragraph(f"- Unit (setelah dedup): {len(df2)}")
737
+ doc.add_paragraph(f"- Rata-rata Indeks Real: {df2['Indeks_Real_0_100'].mean():.2f}")
738
+ doc.add_paragraph(f"- Rata-rata Indeks Final: {df2['Indeks_Final_0_100'].mean():.2f}")
 
 
739
 
740
+ doc.add_heading("Agregat per Jenis", level=2)
741
  if agg_df is not None and not agg_df.empty:
742
  table = doc.add_table(rows=1, cols=len(agg_df.columns))
743
  hdr = table.rows[0].cells
 
748
  for i, c in enumerate(agg_df.columns):
749
  r[i].text = str(row[c])
750
 
751
+ doc.add_heading("Verifikasi Coverage & GAP (68%)", level=2)
752
  if verif_df is None or verif_df.empty:
753
  doc.add_paragraph("Tidak ada tabel verifikasi coverage untuk filter ini.")
754
  else:
 
755
  if HAS_KALEIDO:
756
+ # ringkas total sekolah & umum bila ada
757
+ if "Pop_Sekolah" in verif_df.columns and "Sampel_Sekolah" in verif_df.columns:
758
+ img = make_pie_plotly(verif_df["Sampel_Sekolah"].sum(), verif_df["Pop_Sekolah"].sum(), "Coverage Sekolah (Total)")
759
+ if img: doc.add_picture(img, width=Inches(4))
760
+ if "Pop_Umum" in verif_df.columns and "Sampel_Umum" in verif_df.columns:
761
+ img = make_pie_plotly(verif_df["Sampel_Umum"].sum(), verif_df["Pop_Umum"].sum(), "Coverage Umum (Total)")
762
+ if img: doc.add_picture(img, width=Inches(4))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  else:
764
  doc.add_paragraph("Pie chart tidak dibuat karena 'kaleido' tidak tersedia.")
765
 
 
 
766
  vtab = doc.add_table(rows=1, cols=len(verif_df.columns))
767
  vh = vtab.rows[0].cells
768
  for i, c in enumerate(verif_df.columns):
 
772
  for i, c in enumerate(verif_df.columns):
773
  rr[i].text = str(row[c])
774
 
775
+ doc.add_heading("Analisis Naratif", level=2)
776
  for p in analysis_text.split("\n"):
777
  if p.strip():
778
  doc.add_paragraph(p)
 
782
  return out
783
 
784
  # ============================================================
785
+ # 10) AGREGAT (TANPA Mean_Real/Mean_Final) + FULL INDIKATOR
786
  # ============================================================
787
 
788
+ def build_agg_full(df2: pd.DataFrame) -> pd.DataFrame:
789
+ """
790
+ Output:
791
+ - Jenis, Jumlah
792
+ - Rata2 semua indikator raw yang tersedia
793
+ - Rata2 sub/dim
794
+ - Rata2_Indeks_Real_0_100, Rata2_Indeks_Final_0_100
795
+ """
796
  label_map = {"sekolah":"Perpustakaan Sekolah","umum":"Perpustakaan Umum","khusus":"Perpustakaan Khusus"}
797
+ out_rows = []
798
+
799
+ available_ind = [c for c in all_indicators if c in df2.columns]
800
+
801
+ def summarize(sub, jenis_label):
802
+ row = {"Jenis": jenis_label, "Jumlah": int(len(sub))}
803
+ # indikator raw
804
+ for c in available_ind:
805
+ row[f"Rata2_{c}"] = float(sub[c].mean(skipna=True)) if len(sub) else 0.0
806
+ # sub/dim
807
+ for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
808
+ if c in sub.columns:
809
+ row[f"Rata2_{c}"] = float(sub[c].mean(skipna=True)) if len(sub) else 0.0
810
+ # indeks
811
+ row["Rata2_Indeks_Real_0_100"] = float(sub["Indeks_Real_0_100"].mean(skipna=True)) if "Indeks_Real_0_100" in sub.columns and len(sub) else 0.0
812
+ row["Rata2_Indeks_Final_0_100"] = float(sub["Indeks_Final_0_100"].mean(skipna=True)) if "Indeks_Final_0_100" in sub.columns and len(sub) else 0.0
813
+ return row
814
+
815
+ for ds in ["sekolah","umum","khusus"]:
816
+ sub = df2[df2["_dataset"] == ds].copy() if "_dataset" in df2.columns else df2.iloc[0:0]
817
+ out_rows.append(summarize(sub, label_map.get(ds, ds)))
818
+
819
+ out_rows.append(summarize(df2, "Rata-rata keseluruhan"))
820
+ return pd.DataFrame(out_rows).round(4)
821
 
822
+ # ============================================================
823
+ # 11) PIPELINE FILTERED: DEDUP + DETAIL FULL INDIKATOR
824
+ # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
 
826
  def run_pipeline_filtered(prov_value, kab_value, kew_value):
827
  if df_all_ipml is None or df_all_ipml.empty:
828
  return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
829
  None, None, None, None,
830
+ None, None, None, None, None,
831
  "Data DM belum siap / gagal diproses.", "Tidak ada analisis.")
832
 
833
  df = df_all_ipml.copy()
834
 
835
+ # FILTER pakai PROV_DISP/KAB_DISP agar stabil & tidak dobel
836
+ if "PROV_DISP" in df.columns and prov_value and prov_value != "(Semua)":
837
+ df = df[df["PROV_DISP"] == prov_value]
838
+ if "KAB_DISP" in df.columns and kab_value and kab_value != "(Semua)":
839
+ df = df[df["KAB_DISP"] == kab_value]
840
  if kew_value and kew_value != "(Semua)":
841
  df = df[df["KEW_NORM"] == kew_value]
842
 
843
  if df.empty:
844
  return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
845
  None, None, None, None,
846
+ None, None, None, None, None,
847
  "Tidak ada data untuk kombinasi filter.", "Tidak ada analisis.")
848
 
849
  wilayah = kab_value if kab_value and kab_value != "(Semua)" else (prov_value if prov_value and prov_value != "(Semua)" else "NASIONAL")
850
  kew = kew_value if kew_value and kew_value != "(Semua)" else "SEMUA"
851
 
852
+ # Coverage + bobot + final
853
  df2, verif_df = compute_coverage_and_weight(df, kew_value)
854
 
855
+ # DEDUP: prov,kab,nama,kew,dataset
856
+ # (ini yang bikin tidak dobel di detail & agregat)
857
+ kcols = []
858
+ for c in ["PROV_DISP","KAB_DISP","KEW_NORM","_dataset"]:
859
+ if c in df2.columns:
860
+ kcols.append(c)
861
+ if nama_col and nama_col in df2.columns:
862
+ kcols.append(nama_col)
863
+
864
+ if kcols:
865
+ df2 = df2.drop_duplicates(subset=kcols, keep="first").copy()
866
+
867
+ # AGREGAT (FULL INDIKATOR) β€” tanpa Mean_*
868
+ agg_df = build_agg_full(df2)
869
+
870
+ # DETAIL (FULL INDIKATOR) β€” sembunyikan bobot_coverage & coverage
871
+ available_ind = [c for c in all_indicators if c in df2.columns]
872
 
873
+ base_cols = ["PROV_DISP","KAB_DISP"]
874
+ base_cols = [c for c in base_cols if c in df2.columns]
875
+ if nama_col and nama_col in df2.columns:
876
+ base_cols.append(nama_col)
877
+
878
+ base_cols += ["KEW_NORM","_dataset",
879
+ "sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
880
+ "dim_kepatuhan","dim_kinerja",
881
+ "Indeks_Real_0_100","Indeks_Final_0_100"]
882
+
883
+ detail_cols = [c for c in base_cols if c in df2.columns] + available_ind
884
+ detail_df = df2[detail_cols].copy().round(4)
885
 
886
+ # EXPORT
887
  tmpdir = tempfile.mkdtemp()
888
  slug = slugify(wilayah) + "_" + slugify(kew)
 
889
  agg_path = os.path.join(tmpdir, f"IPLM_Agregat_{slug}.xlsx")
890
  detail_path = os.path.join(tmpdir, f"IPLM_Detail_{slug}.xlsx")
891
  raw_path = os.path.join(tmpdir, f"IPLM_Raw_{slug}.xlsx")
 
894
  detail_df.to_excel(detail_path, index=False)
895
  df2.to_excel(raw_path, index=False)
896
 
897
+ # BELL
898
+ hover_name = nama_col if (nama_col and nama_col in df2.columns) else None
899
+ fig_real_all = make_bell_figure(df2, "Bell Curve β€” Indeks REAL (Semua)", "Indeks_Real_0_100", name_col=hover_name)
900
+ fig_final_all = make_bell_figure(df2, "Bell Curve β€” Indeks FINAL (Semua)", "Indeks_Final_0_100", name_col=hover_name)
 
901
 
902
+ fig_final_sek = make_bell_figure(df2[df2["_dataset"]=="sekolah"], "FINAL β€” Sekolah", "Indeks_Final_0_100", name_col=hover_name, min_points=3)
903
+ fig_final_um = make_bell_figure(df2[df2["_dataset"]=="umum"], "FINAL β€” Umum", "Indeks_Final_0_100", name_col=hover_name, min_points=3)
904
+ fig_final_kh = make_bell_figure(df2[df2["_dataset"]=="khusus"], "FINAL β€” Khusus", "Indeks_Final_0_100", name_col=hover_name, min_points=3)
905
 
906
+ # Analisis + Word
907
+ analysis_text = build_analysis_llm(df2=df2, agg_df=agg_df, verif_df=verif_df, wilayah=wilayah, kew=kew_value)
908
+ word_path = generate_word_report(df2, agg_df, verif_df, wilayah, kew_value, analysis_text)
909
 
910
+ msg = f"βœ… Selesai. Unit (dedup): {len(df2)} | Wilayah: {wilayah} | Kew: {kew_value} | Mean Final: {df2['Indeks_Final_0_100'].mean():.2f}"
 
 
 
911
 
912
  return (agg_df, detail_df, verif_df,
913
  agg_path, detail_path, raw_path, word_path,
 
915
  msg, analysis_text)
916
 
917
  # ============================================================
918
+ # 12) DROPDOWN CHOICES (NO DUPLICATE)
919
  # ============================================================
920
 
921
  def all_prov_choices():
922
+ if df_all_raw is None or "PROV_DISP" not in df_all_raw.columns:
923
  return ["(Semua)"]
924
+ vals = df_all_raw["PROV_DISP"].dropna()
925
+ vals = sorted(list(dict.fromkeys([v for v in vals.tolist() if str(v).strip() != ""])))
926
  return ["(Semua)"] + vals
927
 
928
  def get_kab_choices_for_prov(prov_value):
929
+ if df_all_raw is None or "KAB_DISP" not in df_all_raw.columns:
930
  return ["(Semua)"]
931
+ tmp = df_all_raw.copy()
932
+ if prov_value and prov_value != "(Semua)" and "PROV_DISP" in tmp.columns:
933
+ tmp = tmp[tmp["PROV_DISP"] == prov_value]
934
+ vals = tmp["KAB_DISP"].dropna()
935
+ vals = sorted(list(dict.fromkeys([v for v in vals.tolist() if str(v).strip() != ""])))
 
936
  return ["(Semua)"] + vals
937
 
938
  def all_kew_choices():
939
+ if df_all_raw is None or "KEW_NORM" not in df_all_raw.columns:
940
  return ["(Semua)"]
941
+ vals = df_all_raw["KEW_NORM"].dropna().astype(str).str.strip()
942
+ vals = sorted(list(dict.fromkeys([v for v in vals.tolist() if v != ""])))
943
  return ["(Semua)"] + (vals if vals else ["KAB/KOTA","PROVINSI"])
944
 
945
  prov_choices = all_prov_choices()
 
951
  new_choices = get_kab_choices_for_prov(prov_value)
952
  return gr.update(choices=new_choices, value="(Semua)")
953
 
954
+ # ============================================================
955
+ # 13) UI
956
+ # ============================================================
957
+
958
  with gr.Blocks() as demo:
959
  gr.Markdown(
960
  f"""
961
+ # IPLM 2025 β€” FULL (DEDUP + FULL INDICATORS)
962
  **Aturan penalti**: 68% coverage dianggap 100% (bobot=1). Jika kurang, bobot = coverage/0.68.
963
 
964
  **Sumber data**:
965
  - DM: `{DATA_FILE}`
966
+ - Pop Kab/Kota: `{POP_KAB}`
967
+ - Pop Provinsi: `{POP_PROV}`
968
 
969
  {DATA_INFO}
970
  """
 
980
  run_btn = gr.Button("Jalankan Perhitungan")
981
  msg_out = gr.Markdown()
982
 
983
+ gr.Markdown("## Agregat (FULL indikator, tanpa kolom Mean_*)")
984
  agg_df_out = gr.DataFrame(interactive=False)
985
 
986
+ gr.Markdown("## Detail (FULL indikator) β€” tanpa bobot_coverage & coverage")
987
  detail_df_out = gr.DataFrame(interactive=False)
988
 
989
+ gr.Markdown("## Verifikasi Coverage & GAP menuju 68%")
990
  verif_df_out = gr.DataFrame(interactive=False)
991
 
992
  gr.Markdown("## Bell Curve β€” REAL (Semua)")
 
1007
  with gr.Row():
1008
  agg_file_out = gr.File(label="Download Agregat (.xlsx)")
1009
  detail_file_out = gr.File(label="Download Detail (.xlsx)")
1010
+ raw_file_out = gr.File(label="Download Raw (.xlsx)")
1011
+ word_file_out = gr.File(label="Download Word (.docx)")
1012
 
1013
  run_btn.click(
1014
  fn=run_pipeline_filtered,