irhamni commited on
Commit
be96efe
·
verified ·
1 Parent(s): 8c55148

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +394 -408
app.py CHANGED
@@ -1,27 +1,15 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
  IPLM 2025 — FINAL (NO UPLOAD)
4
- Penalti Coverage 68% DITERAPKAN SETELAH AGREGAT (bukan per entitas perpustakaan)
5
- + Bell Curve (berbasis agregat wilayah)
6
- + Analisis LLM (Word)
7
- + Download (tanpa upload box)
8
-
9
- PERMINTAAN PERBAIKAN:
10
- 1) Hilangkan tombol "Reload Data" dari tampilan UI.
11
- 2) Tabel "Ringkasan (per Jenis)" harus berisi: sub-dimensi, dimensi, dan nilai indeks pasca-penalty (Final agregat).
12
- 3) Pastikan individu perpustakaan tidak terkena penalti (penalti hanya di level agregat wilayah×jenis).
13
- 4) Penalti = rasio (n_sampel / target_68%) dengan batas maksimum 1.0.
14
- - jika n_sampel >= 0.68*pop => bobot = 1
15
- - jika n_sampel < 0.68*pop => bobot = n_sampel/(0.68*pop)
16
- - perpustakaan khusus: bobot = 1 (tanpa penalti)
17
- - jika populasi tidak valid/missing/0: bobot = 1 (tanpa penalti)
18
-
19
- Konsep:
20
- 1) Hitung Indeks_Real per perpustakaan: YJ + minmax nasional + sub/dim + bobot dim
21
- 2) Agregasi wilayah×jenis: mean(sub/dim/Indeks_Real)
22
- 3) Hitung target_68 dan bobot_coverage per wilayah×jenis (khusus bobot=1)
23
- 4) Indeks_Final_Agregat = Indeks_Real_Agregat * bobot_coverage
24
- 5) Detail entitas menampilkan Indeks_Final_0_100 = Indeks_Final_Agregat sesuai group (bukan penalti per-row)
25
  """
26
 
27
  import os
@@ -48,9 +36,8 @@ DATA_FILE = os.getenv("DATA_FILE", "IPLM_clean_manual_131225.xlsx")
48
  POP_KAB = os.getenv("POP_KAB", "Data_populasi_Kab_kota.xlsx")
49
  POP_PROV = os.getenv("POP_PROV", "Data_populasi_propinsi.xlsx")
50
 
51
- TARGET_COVERAGE = float(os.getenv("TARGET_COVERAGE", "0.68"))
52
- W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
53
- W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
54
 
55
  USE_LLM = True
56
  LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
@@ -152,29 +139,20 @@ def norm_kab_label(s):
152
  return re.sub(r"[^A-Z0-9]+", "", t)
153
 
154
  def safe_div(num, den):
155
- if den is None or pd.isna(den) or den <= 0:
156
  return np.nan
157
  return float(num) / float(den)
158
 
159
- def cap_bobot_from_counts(n_sampel: float, pop: float) -> float:
160
  """
161
- Bobot coverage berdasarkan JUMLAH sampel terhadap target 68% populasi.
162
- bobot = min( n_sampel / (0.68*pop), 1.0 )
163
  """
164
- if pop is None or pd.isna(pop) or pop <= 0:
165
- return np.nan
166
- target_n = TARGET_COVERAGE * float(pop)
167
- if target_n <= 0:
168
- return np.nan
169
- if n_sampel is None or pd.isna(n_sampel) or n_sampel < 0:
170
- n_sampel = 0.0
171
- return float(min(float(n_sampel) / target_n, 1.0))
172
-
173
- def _bobot_or_one(b):
174
- # jika pop missing/0/NaN -> bobot=1 (tanpa penalti)
175
- if b is None or pd.isna(b) or b <= 0:
176
  return 1.0
177
- return float(b)
 
 
178
 
179
 
180
  # ============================================================
@@ -237,7 +215,7 @@ alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
237
  # 4) PIPELINE NASIONAL (ENTITAS): YJ + MINMAX + SUBDIM/DIM/INDEKS REAL
238
  # ============================================================
239
 
240
- def penalized_mean(row, cols):
241
  vals = []
242
  for c in cols:
243
  k = f"norm_{c}"
@@ -284,10 +262,10 @@ def prepare_global(df_src: pd.DataFrame) -> pd.DataFrame:
284
  df[f"norm_{c}"] = minmax_norm(pd.Series(transformed, index=df.index))
285
 
286
  # subdim & dim (entitas)
287
- df["sub_koleksi"] = df.apply(lambda r: penalized_mean(r, [c for c in koleksi_cols if c in available]), axis=1)
288
- df["sub_sdm"] = df.apply(lambda r: penalized_mean(r, [c for c in sdm_cols if c in available]), axis=1)
289
- df["sub_pelayanan"] = df.apply(lambda r: penalized_mean(r, [c for c in pelayanan_cols if c in available]), axis=1)
290
- df["sub_pengelolaan"] = df.apply(lambda r: penalized_mean(r, [c for c in pengelolaan_cols if c in available]), axis=1)
291
 
292
  df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1)
293
  df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1)
@@ -340,7 +318,7 @@ def load_default_files(force=False):
340
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "meta": {}, "info": info})
341
  return None, None, None, {}, info
342
 
343
- # mapping jenis
344
  val_map_jenis = {
345
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
346
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
@@ -368,52 +346,54 @@ def load_default_files(force=False):
368
  df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy()
369
  after = len(df_raw)
370
 
371
- # POP KAB
372
  pk = pd.read_excel(POP_KAB)
373
  c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota"])
374
  c_prov = pick_col(pk, ["PROVINSI","Provinsi"])
375
- c_pop_umum = pick_col(pk, ["Pop_Umum","pop_umum","jumlah_populasi_umum","POP_UMUM"])
376
- c_pop_sekolah = pick_col(pk, ["Pop_Sekolah","pop_sekolah","jumlah_populasi_sekolah","POP_SEKOLAH"])
377
-
378
- if c_kab is None:
379
- info = "❌ Populasi Kab/Kota: kolom Kab/Kota tidak ditemukan."
380
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "meta": {}, "info": info})
381
  return None, None, None, {}, info
382
 
383
  pop_kab = pd.DataFrame({
384
  "Provinsi_Label": pk[c_prov].astype(str).str.strip() if c_prov else "",
385
  "Kab_Kota_Label": pk[c_kab].astype(str).str.strip(),
386
- "Pop_Umum": pk[c_pop_umum].apply(coerce_num) if c_pop_umum else np.nan,
387
- "Pop_Sekolah": pk[c_pop_sekolah].apply(coerce_num) if c_pop_sekolah else np.nan,
388
  })
389
  pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
390
  pop_kab = pop_kab.groupby("kab_key", as_index=False).agg({
391
  "Kab_Kota_Label":"first",
392
  "Provinsi_Label":"first",
393
- "Pop_Umum":"max",
394
- "Pop_Sekolah":"max",
395
  })
396
 
397
- # POP PROV
398
  pp = pd.read_excel(POP_PROV)
399
  c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi"])
400
- c_total = pick_col(pp, ["Pop_Sekolah_Prov","pop_sekolah_prov","TOTAL_SMA","total_sma","SMA","Total SMA","total_pend","TOTAL_PEND"])
401
- if c_pr is None or c_total is None:
402
- info = "❌ Populasi Provinsi: kolom Provinsi / total populasi sekolah tidak ditemukan."
 
403
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "meta": {}, "info": info})
404
  return None, None, None, {}, info
405
 
406
  pop_prov = pd.DataFrame({
407
  "Provinsi_Label": pp[c_pr].astype(str).str.strip(),
408
- "Pop_Sekolah_Prov": pp[c_total].apply(coerce_num),
 
409
  })
410
  pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label)
411
  pop_prov = pop_prov.groupby("prov_key", as_index=False).agg({
412
  "Provinsi_Label":"first",
413
- "Pop_Sekolah_Prov":"sum",
 
414
  })
415
 
416
- # pipeline nasional
417
  df_all = prepare_global(df_raw)
418
 
419
  meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
@@ -421,8 +401,8 @@ def load_default_files(force=False):
421
  info = (
422
  f"✅ Mode NO UPLOAD (cache aktif)<br>"
423
  f"✅ DM: <b>{fp.name}</b> | Baris: {before} → dedup: {after}<br>"
424
- f"✅ Pop Kab/Kota: <b>{Path(POP_KAB).name}</b> (n={len(pop_kab)})<br>"
425
- f"✅ Pop Provinsi: <b>{Path(POP_PROV).name}</b> (n={len(pop_prov)})<br>"
426
  f"🕒 mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))}"
427
  )
428
 
@@ -431,151 +411,125 @@ def load_default_files(force=False):
431
 
432
 
433
  # ============================================================
434
- # 6) COVERAGE + BOBOT (LEVEL AGREGAT WILAYAH×JENIS)
435
  # ============================================================
436
 
437
- def build_verif_and_weights(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, pop_prov: pd.DataFrame, kew_value: str):
438
  """
439
  Output:
440
- - weights_df: group_key, Jenis, bobot_coverage, coverage, target_68_n
441
- - verif_df: tabel verifikasi (dibulatkan tanpa koma)
442
  """
443
  if df_filtered is None or df_filtered.empty:
444
- return pd.DataFrame(), pd.DataFrame()
445
 
446
  kew_norm = str(kew_value or "").upper()
447
  df = df_filtered.copy()
448
 
449
- # group_key tergantung kewenangan filter
450
  if "KAB" in kew_norm or "KOTA" in kew_norm:
451
- level = "kab"
452
  key_col = "kab_key"
453
- name_col = "Kab/Kota"
 
 
 
 
 
454
  elif "PROV" in kew_norm:
455
- level = "prov"
456
  key_col = "prov_key"
457
- name_col = "Provinsi"
 
 
 
 
 
458
  else:
459
- # default
460
- level = "kab"
461
  key_col = "kab_key"
462
- name_col = "Kab/Kota"
463
-
464
- # hitung sampel per group×jenis
465
- g = df.groupby([key_col, "_dataset"]).size().rename("n_sampel").reset_index()
466
- g_piv = g.pivot(index=key_col, columns="_dataset", values="n_sampel").fillna(0)
467
-
468
- rows = []
469
- weights_rows = []
470
-
471
- if level == "kab":
472
  pop = pop_kab.set_index("kab_key") if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame().set_index(pd.Index([]))
 
 
 
473
 
474
- for kk in g_piv.index:
475
- # sampel
476
- n_sek = float(g_piv.loc[kk].get("sekolah", 0))
477
- n_um = float(g_piv.loc[kk].get("umum", 0))
478
- n_kh = float(g_piv.loc[kk].get("khusus", 0))
479
-
480
- pop_sek = pop.loc[kk, "Pop_Sekolah"] if kk in pop.index else np.nan
481
- pop_um = pop.loc[kk, "Pop_Umum"] if kk in pop.index else np.nan
482
-
483
- cov_sek = safe_div(n_sek, pop_sek)
484
- cov_um = safe_div(n_um, pop_um)
485
-
486
- # bobot berdasarkan JUMLAH sampel vs target_68%
487
- b_sek = _bobot_or_one(cap_bobot_from_counts(n_sek, pop_sek))
488
- b_um = _bobot_or_one(cap_bobot_from_counts(n_um, pop_um))
489
- b_kh = 1.0 # khusus tanpa penalti
490
-
491
- target_sek = (TARGET_COVERAGE * pop_sek) if not pd.isna(pop_sek) else np.nan
492
- target_um = (TARGET_COVERAGE * pop_um) if not pd.isna(pop_um) else np.nan
493
-
494
- weights_rows += [
495
- {"group_key": kk, "Jenis": "sekolah", "bobot_coverage": b_sek, "coverage": cov_sek, "target_68_n": target_sek},
496
- {"group_key": kk, "Jenis": "umum", "bobot_coverage": b_um, "coverage": cov_um, "target_68_n": target_um},
497
- {"group_key": kk, "Jenis": "khusus", "bobot_coverage": 1.0, "coverage": np.nan, "target_68_n": np.nan},
498
- ]
499
-
500
- kab_name = pop.loc[kk, "Kab_Kota_Label"] if kk in pop.index else kk
501
-
502
- rows.append({
503
- name_col: kab_name,
504
- "Pop_Sekolah": pop_sek,
505
- "Target_68_Sekolah": target_sek,
506
- "Sampel_Sekolah": n_sek,
507
- "Coverage_Sekolah_%": (cov_sek * 100) if not pd.isna(cov_sek) else np.nan,
508
- "Bobot_Sekolah_(Sampel/Target68)": (b_sek * 100),
509
- "GAP_Ke_Target68_Sekolah": max(target_sek - n_sek, 0) if not pd.isna(target_sek) else np.nan,
510
-
511
- "Pop_Umum": pop_um,
512
- "Target_68_Umum": target_um,
513
- "Sampel_Umum": n_um,
514
- "Coverage_Umum_%": (cov_um * 100) if not pd.isna(cov_um) else np.nan,
515
- "Bobot_Umum_(Sampel/Target68)": (b_um * 100),
516
- "GAP_Ke_Target68_Umum": max(target_um - n_um, 0) if not pd.isna(target_um) else np.nan,
517
-
518
- "Catatan": (
519
- ("Pop_Sekolah_tidak_valid; " if (pd.isna(pop_sek) or pop_sek <= 0) else "")
520
- + ("Pop_Umum_tidak_valid; " if (pd.isna(pop_um) or pop_um <= 0) else "")
521
- )
522
- })
523
-
524
- else:
525
- pop = pop_prov.set_index("prov_key") if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame().set_index(pd.Index([]))
526
 
527
- for pk in g_piv.index:
528
- n_sek = float(g_piv.loc[pk].get("sekolah", 0))
529
- pop_sek = pop.loc[pk, "Pop_Sekolah_Prov"] if pk in pop.index else np.nan
530
- cov_sek = safe_div(n_sek, pop_sek)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
 
532
- b_sek = _bobot_or_one(cap_bobot_from_counts(n_sek, pop_sek))
 
 
 
 
533
 
534
- target_sek = (TARGET_COVERAGE * pop_sek) if not pd.isna(pop_sek) else np.nan
535
- prov_name = pop.loc[pk, "Provinsi_Label"] if pk in pop.index else pk
536
 
537
- weights_rows += [
538
- {"group_key": pk, "Jenis": "sekolah", "bobot_coverage": b_sek, "coverage": cov_sek, "target_68_n": target_sek},
539
- {"group_key": pk, "Jenis": "khusus", "bobot_coverage": 1.0, "coverage": np.nan, "target_68_n": np.nan},
540
- {"group_key": pk, "Jenis": "umum", "bobot_coverage": 1.0, "coverage": np.nan, "target_68_n": np.nan},
541
- ]
 
 
542
 
543
- rows.append({
544
- name_col: prov_name,
545
- "Pop_Sekolah": pop_sek,
546
- "Target_68_Sekolah": target_sek,
547
- "Sampel_Sekolah": n_sek,
548
- "Coverage_Sekolah_%": (cov_sek * 100) if not pd.isna(cov_sek) else np.nan,
549
- "Bobot_Sekolah_(Sampel/Target68)": (b_sek * 100),
550
- "GAP_Ke_Target68_Sekolah": max(target_sek - n_sek, 0) if not pd.isna(target_sek) else np.nan,
551
- "Catatan": ("Pop_Sekolah_tidak_valid; " if (pd.isna(pop_sek) or pop_sek <= 0) else "")
552
- })
553
 
554
- verif_df = pd.DataFrame(rows)
555
- weights_df = pd.DataFrame(weights_rows)
 
556
 
557
- # pembulatan verif tanpa koma
558
- if not verif_df.empty:
559
- for c in verif_df.columns:
560
- if c in ["Catatan", name_col]:
561
- continue
562
- if c.endswith("%") or c.endswith("_%"):
563
- verif_df[c] = verif_df[c].fillna(0).round(0).astype(int)
564
- else:
565
- verif_df[c] = pd.to_numeric(verif_df[c], errors="coerce").fillna(0).round(0).astype(int)
566
 
567
- return weights_df, verif_df
568
 
569
 
570
  # ============================================================
571
- # 7) AGREGAT WILAYAH×JENIS + FINAL (penalti setelah agregat)
572
  # ============================================================
573
 
574
- def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, weights_df: pd.DataFrame, kew_value: str):
575
  """
576
- Output:
577
- - agg_df: satu baris per wilayah×jenis
578
- berisi mean sub/dim, mean Indeks_Real, bobot_coverage, Indeks_Final_Agregat
579
  """
580
  if df_filtered is None or df_filtered.empty:
581
  return pd.DataFrame()
@@ -596,7 +550,11 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, weights_df: pd.DataFrame,
596
  label_col = "KAB_DISP"
597
  label_name = "Kab/Kota"
598
 
599
- # agregat di level wilayah×jenis
 
 
 
 
600
  agg = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
601
  Jumlah=("Indeks_Real_0_100", "size"),
602
  Rata2_sub_koleksi=("sub_koleksi", "mean"),
@@ -608,23 +566,17 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, weights_df: pd.DataFrame,
608
  Indeks_Real_Agregat_0_100=("Indeks_Real_0_100", "mean"),
609
  ).reset_index()
610
 
611
- agg = agg.rename(columns={label_col: label_name, "_dataset": "Jenis", key_col: "group_key"})
612
 
613
- # join bobot
614
- if weights_df is None or weights_df.empty:
615
- agg["bobot_coverage"] = 1.0
616
- agg["coverage"] = np.nan
617
- agg["target_68_n"] = np.nan
618
  else:
619
- agg = agg.merge(weights_df, on=["group_key", "Jenis"], how="left")
620
- agg["bobot_coverage"] = agg["bobot_coverage"].fillna(1.0)
621
- if "coverage" not in agg.columns:
622
- agg["coverage"] = np.nan
623
- if "target_68_n" not in agg.columns:
624
- agg["target_68_n"] = np.nan
625
-
626
- # FINAL diterapkan di agregat (bukan per entitas)
627
- agg["Indeks_Final_Agregat_0_100"] = agg["Indeks_Real_Agregat_0_100"] * agg["bobot_coverage"]
628
 
629
  # rounding
630
  for c in [
@@ -634,26 +586,80 @@ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, weights_df: pd.DataFrame,
634
  if c in agg.columns:
635
  agg[c] = agg[c].apply(lambda x: round(float(x), 3) if pd.notna(x) else 0.0)
636
 
637
- for c in ["Indeks_Real_Agregat_0_100","Indeks_Final_Agregat_0_100","bobot_coverage","coverage","target_68_n"]:
638
- if c in agg.columns:
639
- agg[c] = pd.to_numeric(agg[c], errors="coerce")
640
-
641
- # indeks dua desimal
642
- for c in ["Indeks_Real_Agregat_0_100", "Indeks_Final_Agregat_0_100"]:
643
  if c in agg.columns:
644
  agg[c] = agg[c].apply(lambda x: round(float(x), 2) if pd.notna(x) else 0.0)
645
 
646
- # bobot 3 desimal
647
- if "bobot_coverage" in agg.columns:
648
- agg["bobot_coverage"] = agg["bobot_coverage"].apply(lambda x: round(float(x), 3) if pd.notna(x) else 1.0)
649
-
650
  return agg
651
 
652
 
653
- def attach_final_to_detail(df_filtered: pd.DataFrame, agg_df: pd.DataFrame, meta: dict, kew_value: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  """
655
- Detail tetap entitas, tapi Indeks_Final_0_100 = final agregat group (wilayah×jenis).
656
- (jadi individu tidak pernah dihitung penalti sendiri)
657
  """
658
  if df_filtered is None or df_filtered.empty:
659
  return pd.DataFrame()
@@ -671,14 +677,13 @@ def attach_final_to_detail(df_filtered: pd.DataFrame, agg_df: pd.DataFrame, meta
671
  key_col = "kab_key"
672
  label_cols = ("PROV_DISP", "KAB_DISP")
673
 
674
- if agg_df is None or agg_df.empty:
675
  df["Indeks_Final_0_100"] = df["Indeks_Real_0_100"]
676
  else:
677
- m = agg_df[["group_key","Jenis","Indeks_Final_Agregat_0_100"]].copy()
678
- m = m.rename(columns={"Jenis":"_dataset"})
679
- df = df.merge(m, left_on=[key_col,"_dataset"], right_on=["group_key","_dataset"], how="left")
680
- df["Indeks_Final_0_100"] = df["Indeks_Final_Agregat_0_100"].fillna(df["Indeks_Real_0_100"])
681
- df = df.drop(columns=[c for c in ["group_key","Indeks_Final_Agregat_0_100"] if c in df.columns])
682
 
683
  base_cols = [label_cols[0], label_cols[1], "KEW_NORM", "_dataset"]
684
  if meta.get("nama_col") and meta["nama_col"] in df.columns:
@@ -696,7 +701,6 @@ def attach_final_to_detail(df_filtered: pd.DataFrame, agg_df: pd.DataFrame, meta
696
  out = df[keep].copy()
697
  out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"})
698
 
699
- # rounding
700
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
701
  if c in out.columns:
702
  out[c] = out[c].apply(lambda x: round(float(x), 3) if pd.notna(x) else 0.0)
@@ -707,78 +711,69 @@ def attach_final_to_detail(df_filtered: pd.DataFrame, agg_df: pd.DataFrame, meta
707
  return out
708
 
709
 
710
- def build_summary_per_jenis_from_agg(agg_df: pd.DataFrame):
 
 
 
 
711
  """
712
- RINGKASAN (PER JENIS) harus berisi sub-dimensi, dimensi, dan indeks pasca-penalty.
713
- Ringkasan berbasis agregat wilayah (bukan entitas).
714
  """
715
- if agg_df is None or agg_df.empty:
716
  return pd.DataFrame()
717
 
718
- grp = agg_df.groupby("Jenis", dropna=False).agg(
719
- Jumlah_Wilayah=("Jenis","size"),
720
- Total_Perpus=("Jumlah","sum"),
721
-
722
- Rata2_sub_koleksi=("Rata2_sub_koleksi","mean"),
723
- Rata2_sub_sdm=("Rata2_sub_sdm","mean"),
724
- Rata2_sub_pelayanan=("Rata2_sub_pelayanan","mean"),
725
- Rata2_sub_pengelolaan=("Rata2_sub_pengelolaan","mean"),
726
-
727
- Rata2_dim_kepatuhan=("Rata2_dim_kepatuhan","mean"),
728
- Rata2_dim_kinerja=("Rata2_dim_kinerja","mean"),
729
-
730
- Indeks_Pasca_Penalti_0_100=("Indeks_Final_Agregat_0_100","mean"),
731
- ).reset_index()
732
-
733
- # keseluruhan
734
- overall = {
735
- "Jenis": "Rata-rata keseluruhan",
736
- "Jumlah_Wilayah": int(agg_df.shape[0]),
737
- "Total_Perpus": int(agg_df["Jumlah"].sum()),
738
-
739
- "Rata2_sub_koleksi": float(agg_df["Rata2_sub_koleksi"].mean()),
740
- "Rata2_sub_sdm": float(agg_df["Rata2_sub_sdm"].mean()),
741
- "Rata2_sub_pelayanan": float(agg_df["Rata2_sub_pelayanan"].mean()),
742
- "Rata2_sub_pengelolaan": float(agg_df["Rata2_sub_pengelolaan"].mean()),
743
-
744
- "Rata2_dim_kepatuhan": float(agg_df["Rata2_dim_kepatuhan"].mean()),
745
- "Rata2_dim_kinerja": float(agg_df["Rata2_dim_kinerja"].mean()),
746
-
747
- "Indeks_Pasca_Penalti_0_100": float(agg_df["Indeks_Final_Agregat_0_100"].mean()),
748
- }
749
- grp = pd.concat([grp, pd.DataFrame([overall])], ignore_index=True)
750
 
751
- # rounding
752
- for c in [
753
- "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
754
- "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
755
- ]:
756
- if c in grp.columns:
757
- grp[c] = grp[c].apply(lambda x: round(float(x), 3) if pd.notna(x) else 0.0)
758
- if "Indeks_Pasca_Penalti_0_100" in grp.columns:
759
- grp["Indeks_Pasca_Penalti_0_100"] = grp["Indeks_Pasca_Penalti_0_100"].apply(lambda x: round(float(x), 2) if pd.notna(x) else 0.0)
760
 
761
- return grp
762
 
763
 
764
  # ============================================================
765
- # 8) BELL CURVE (BERBASIS AGREGAT WILAYAH)
766
  # ============================================================
767
 
768
- def make_bell_figure_from_agg(agg_df: pd.DataFrame, title: str, min_points: int = 5, label_field: str = "Wilayah"):
769
  fig = go.Figure()
770
- fig.update_layout(title=title, xaxis_title="Indeks FINAL Agregat (0–100)", yaxis_title="Kepadatan (relatif)")
771
 
772
- if agg_df is None or agg_df.empty or "Indeks_Final_Agregat_0_100" not in agg_df.columns:
773
  return fig
774
 
775
- dfp = agg_df.dropna(subset=["Indeks_Final_Agregat_0_100"]).copy()
776
  if len(dfp) < min_points:
777
  fig.add_annotation(text="Grafik tidak ditampilkan (data terlalu sedikit).", x=0.5, y=0.5,
778
  xref="paper", yref="paper", showarrow=False)
779
  return fig
780
 
781
- x = dfp["Indeks_Final_Agregat_0_100"].astype(float).values
782
  mu = float(np.mean(x))
783
  sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 1.0
784
  sigma = max(sigma, 1e-6)
@@ -787,15 +782,19 @@ def make_bell_figure_from_agg(agg_df: pd.DataFrame, title: str, min_points: int
787
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
788
  pdf = pdf / max(pdf.max(), 1e-9)
789
 
790
- # label hover
791
- if label_field in dfp.columns:
792
- hover = [f"{w}<br>Final: {v:.2f}<br>Real: {r:.2f}<br>Bobot: {b:.3f}"
793
- for w, v, r, b in zip(
794
- dfp[label_field].astype(str).tolist(),
795
- dfp["Indeks_Final_Agregat_0_100"].astype(float).tolist(),
796
- dfp["Indeks_Real_Agregat_0_100"].astype(float).tolist() if "Indeks_Real_Agregat_0_100" in dfp.columns else [np.nan]*len(dfp),
797
- dfp["bobot_coverage"].astype(float).tolist() if "bobot_coverage" in dfp.columns else [1.0]*len(dfp),
798
- )]
 
 
 
 
799
  else:
800
  hover = [f"Final: {v:.2f}" for v in x]
801
 
@@ -820,7 +819,7 @@ def make_bell_figure_from_agg(agg_df: pd.DataFrame, title: str, min_points: int
820
 
821
 
822
  # ============================================================
823
- # 9) LLM + WORD
824
  # ============================================================
825
 
826
  _HF_CLIENT = None
@@ -836,49 +835,47 @@ def get_llm_client():
836
  _HF_CLIENT = None
837
  return None
838
 
839
- def build_context_from_agg(summary_jenis: pd.DataFrame, agg_wilayah: pd.DataFrame, verif_df: pd.DataFrame, wilayah: str, kew: str) -> str:
840
  lines = []
841
  lines.append(f"Wilayah filter: {wilayah}")
842
  lines.append(f"Kewenangan: {kew}")
843
- lines.append("Catatan metode: Penalti coverage 68% diterapkan setelah indeks agregat wilayah×jenis dihitung; individu tidak dipenalti.")
844
- lines.append("Definisi bobot coverage: bobot = min(n_sampel / (0.68*populasi), 1.0). Khusus = 1. Populasi invalid = 1.")
 
845
 
846
  if summary_jenis is not None and not summary_jenis.empty:
847
- lines.append("\nRingkasan (per jenis) berbasis agregat wilayah:")
848
  for _, r in summary_jenis.iterrows():
849
- if str(r.get("Jenis","")) == "Rata-rata keseluruhan":
850
- continue
851
  lines.append(
852
  f"- {r['Jenis']}: wilayah={int(r['Jumlah_Wilayah'])}, total_perpus={int(r['Total_Perpus'])}, "
853
  f"dim_kepatuhan={float(r['Rata2_dim_kepatuhan']):.3f}, dim_kinerja={float(r['Rata2_dim_kinerja']):.3f}, "
854
- f"final_pasca_penalti={float(r['Indeks_Pasca_Penalti_0_100']):.2f}"
855
  )
856
 
857
- if agg_wilayah is not None and not agg_wilayah.empty:
858
- lines.append("\nTop 5 wilayah (Final agregat tertinggi):")
859
- top = agg_wilayah.sort_values("Indeks_Final_Agregat_0_100", ascending=False).head(5)
 
860
  for _, r in top.iterrows():
861
- wl = r.get("Kab/Kota", r.get("Provinsi","(wilayah)"))
862
  lines.append(
863
- f"- {wl} ({r['Jenis']}): Final={float(r['Indeks_Final_Agregat_0_100']):.2f} "
864
- f"| Bobot={float(r.get('bobot_coverage', 1.0)):.3f} | Jumlah={int(r.get('Jumlah', 0))}"
865
  )
866
 
867
- lines.append("\nTop 5 wilayah (GAP menuju target 68% terbesar):")
868
- if verif_df is not None and not verif_df.empty:
869
- gap_cols = [c for c in verif_df.columns if c.startswith("GAP_Ke_Target68")]
870
- if gap_cols:
871
- tmp = verif_df.copy()
872
- tmp["GAP_MAX"] = tmp[gap_cols].max(axis=1)
873
- tmp = tmp.sort_values("GAP_MAX", ascending=False).head(5)
874
- for _, r in tmp.iterrows():
875
- nm = r.get("Kab/Kota", r.get("Provinsi",""))
876
- lines.append(f"- {nm}: GAP maks={int(r['GAP_MAX'])}")
877
 
878
  return "\n".join(lines)
879
 
880
- def generate_llm_analysis(summary_jenis: pd.DataFrame, agg_wilayah: pd.DataFrame, verif_df: pd.DataFrame, wilayah: str, kew: str) -> str:
881
- ctx = build_context_from_agg(summary_jenis, agg_wilayah, verif_df, wilayah, kew)
882
  client = get_llm_client()
883
  if client is None or not USE_LLM:
884
  return "Analisis otomatis (LLM) tidak tersedia. Pastikan token HuggingFace tersedia dan model bisa diakses."
@@ -888,20 +885,20 @@ def generate_llm_analysis(summary_jenis: pd.DataFrame, agg_wilayah: pd.DataFrame
888
  "Tugas Anda menyusun analisis berbasis data IPLM secara formal, tajam, dan operasional."
889
  )
890
  user_prompt = f"""
891
- DATA RINGKAS IPLM (PENALTI COVERAGE SETELAH AGREGAT):
892
 
893
  {ctx}
894
 
895
  TULISKAN ANALISIS BAHASA INDONESIA FORMAL, STRUKTUR:
896
- 1) Gambaran umum hasil agregat (1 paragraf).
897
- 2) Analisis per jenis perpustakaan (sub-dimensi/dimensi dan indeks pasca-penalti) (2 paragraf).
898
- 3) Analisis coverage (target 68%) dan implikasi pada indeks final agregat (1 paragraf).
899
- 4) Rekomendasi program 3–5 tahun (2 paragraf, konkret, bisa dieksekusi).
900
 
901
  ATURAN:
902
- - Jangan pakai label menilai eksplisit seperti "rendah/sedang/tinggi".
903
- - Gunakan frasa netral: "masih memiliki ruang penguatan", "memerlukan konsolidasi", dst.
904
- - Fokus pada Indeks FINAL AGREGAT (pasca penalti), bukan individu.
905
  """
906
  try:
907
  resp = client.chat_completion(
@@ -916,15 +913,15 @@ ATURAN:
916
  except Exception as e:
917
  return f"⚠️ Error saat memanggil LLM: {repr(e)}"
918
 
919
- def generate_word_report(detail_df: pd.DataFrame, summary_jenis: pd.DataFrame, agg_wilayah: pd.DataFrame, verif_df: pd.DataFrame,
920
- wilayah: str, kew: str, analysis_text: str) -> str:
921
  doc = Document()
922
  doc.add_heading(f"Laporan IPLM — {wilayah}", level=1)
923
  doc.add_paragraph(f"Kewenangan: {kew}")
924
- doc.add_paragraph("Metode: Penalti coverage 68% diterapkan setelah indeks agregat wilayah×jenis dihitung (bukan per entitas perpustakaan).")
925
- doc.add_paragraph("Bobot coverage: bobot = min(n_sampel / (0.68*populasi), 1.0). Perpustakaan khusus = 1. Populasi invalid/missing = 1.")
 
926
 
927
- doc.add_heading("Ringkasan (per jenis) — sub-dimensi, dimensi, indeks pasca penalti", level=2)
928
  if summary_jenis is not None and not summary_jenis.empty:
929
  table = doc.add_table(rows=1, cols=len(summary_jenis.columns))
930
  hdr = table.rows[0].cells
@@ -935,13 +932,11 @@ def generate_word_report(detail_df: pd.DataFrame, summary_jenis: pd.DataFrame, a
935
  for i, c in enumerate(summary_jenis.columns):
936
  cells[i].text = str(row[c])
937
  else:
938
- doc.add_paragraph("Ringkasan agregat tidak tersedia.")
939
-
940
- doc.add_heading("Agregat Wilayah × Jenis (Final setelah penalti)", level=2)
941
- if agg_wilayah is not None and not agg_wilayah.empty:
942
- show = agg_wilayah.copy()
943
- show = show.sort_values("Indeks_Final_Agregat_0_100", ascending=False).head(200)
944
 
 
 
 
945
  table = doc.add_table(rows=1, cols=len(show.columns))
946
  hdr = table.rows[0].cells
947
  for i, c in enumerate(show.columns):
@@ -953,22 +948,36 @@ def generate_word_report(detail_df: pd.DataFrame, summary_jenis: pd.DataFrame, a
953
  else:
954
  doc.add_paragraph("Agregat wilayah tidak tersedia.")
955
 
956
- doc.add_heading("Verifikasi Coverage & GAP menuju target 68% (tanpa angka koma)", level=2)
957
- if verif_df is not None and not verif_df.empty:
958
- table = doc.add_table(rows=1, cols=len(verif_df.columns))
 
959
  hdr = table.rows[0].cells
960
- for i, c in enumerate(verif_df.columns):
961
  hdr[i].text = str(c)
962
- for _, row in verif_df.iterrows():
963
  cells = table.add_row().cells
964
- for i, c in enumerate(verif_df.columns):
965
  cells[i].text = str(row[c])
966
  else:
967
- doc.add_paragraph("Tidak ada tabel verifikasi untuk filter ini.")
968
 
969
- doc.add_heading("Detail Entitas (Indeks Final menempel pada agregat wilayah×jenis)", level=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
970
  if detail_df is not None and not detail_df.empty:
971
- show = detail_df.copy().head(200)
972
  table = doc.add_table(rows=1, cols=len(show.columns))
973
  hdr = table.rows[0].cells
974
  for i, c in enumerate(show.columns):
@@ -991,16 +1000,16 @@ def generate_word_report(detail_df: pd.DataFrame, summary_jenis: pd.DataFrame, a
991
 
992
 
993
  # ============================================================
994
- # 10) CORE RUN
995
  # ============================================================
996
 
997
  def _empty_outputs(msg="⚠️ Data belum siap."):
998
  empty = pd.DataFrame()
999
  empty_fig = go.Figure()
1000
  return (
1001
- empty, empty, empty, empty,
1002
  None, None, None, None,
1003
- empty_fig, empty_fig, empty_fig, empty_fig,
1004
  msg, "Analisis belum tersedia."
1005
  )
1006
 
@@ -1022,45 +1031,23 @@ def run_calc(prov_value, kab_value, kew_value, df_all, pop_kab, pop_prov, meta):
1022
  if df.empty:
1023
  return _empty_outputs("Tidak ada data untuk filter ini.")
1024
 
1025
- # coverage & weights (AGREGAT)
1026
- weights_df, verif_df = build_verif_and_weights(df, pop_kab, pop_prov, kew_value or "(Semua)")
1027
 
1028
- # agregat wilayah×jenis + final (penalti setelah agregat)
1029
- agg_wilayah = build_agg_wilayah_jenis(df, weights_df, kew_value or "(Semua)")
1030
 
1031
- # ringkasan per jenis (sub/dim + indeks pasca penalti)
1032
- summary_jenis = build_summary_per_jenis_from_agg(agg_wilayah)
1033
 
1034
- # detail entitas: final menempel pada agregat group
1035
- detail_view = attach_final_to_detail(df, agg_wilayah, meta, kew_value or "(Semua)")
1036
 
1037
- # bell curve berbasis agregat wilayah
1038
- label_field = "Kab/Kota" if "Kab/Kota" in agg_wilayah.columns else ("Provinsi" if "Provinsi" in agg_wilayah.columns else "Wilayah")
1039
 
1040
- fig_all = make_bell_figure_from_agg(
1041
- agg_wilayah.assign(Wilayah=agg_wilayah.get(label_field, "")),
1042
- "Bell Curve Final Agregat — Semua Jenis",
1043
- min_points=5,
1044
- label_field="Wilayah"
1045
- )
1046
- fig_sek = make_bell_figure_from_agg(
1047
- agg_wilayah[agg_wilayah["Jenis"]=="sekolah"].assign(Wilayah=agg_wilayah.get(label_field, "")),
1048
- "Bell Curve Final Agregat — Sekolah",
1049
- min_points=3,
1050
- label_field="Wilayah"
1051
- )
1052
- fig_um = make_bell_figure_from_agg(
1053
- agg_wilayah[agg_wilayah["Jenis"]=="umum"].assign(Wilayah=agg_wilayah.get(label_field, "")),
1054
- "Bell Curve Final Agregat — Umum",
1055
- min_points=3,
1056
- label_field="Wilayah"
1057
- )
1058
- fig_kh = make_bell_figure_from_agg(
1059
- agg_wilayah[agg_wilayah["Jenis"]=="khusus"].assign(Wilayah=agg_wilayah.get(label_field, "")),
1060
- "Bell Curve Final Agregat — Khusus",
1061
- min_points=3,
1062
- label_field="Wilayah"
1063
- )
1064
 
1065
  # output files
1066
  tmpdir = tempfile.mkdtemp()
@@ -1068,29 +1055,31 @@ def run_calc(prov_value, kab_value, kew_value, df_all, pop_kab, pop_prov, meta):
1068
  kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
1069
  kew_slug = (_canon(kew_value or "SEMUA").upper() or "SEMUA")
1070
 
1071
- summary_path = str(Path(tmpdir) / f"IPLM_RingkasanJenis_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1072
- wilayah_path = str(Path(tmpdir) / f"IPLM_AgregatWilayahJenis_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1073
- detail_path = str(Path(tmpdir) / f"IPLM_DetailEntitas_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1074
- verif_path = str(Path(tmpdir) / f"IPLM_VerifikasiCoverage_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
 
1075
 
1076
- summary_jenis.to_excel(summary_path, index=False)
1077
- agg_wilayah.to_excel(wilayah_path, index=False)
1078
- detail_view.to_excel(detail_path, index=False)
1079
- verif_df.to_excel(verif_path, index=False)
 
1080
 
1081
  wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
1082
- analysis_text = generate_llm_analysis(summary_jenis, agg_wilayah, verif_df, wilayah_txt, kew_value or "(Semua)")
1083
- word_path = generate_word_report(detail_view, summary_jenis, agg_wilayah, verif_df, wilayah_txt, kew_value or "(Semua)", analysis_text)
1084
 
1085
  msg = (
1086
- f"✅ Selesai: entitas={len(detail_view)} | agregat_wilayah×jenis={len(agg_wilayah)} | "
1087
- f"penalti diterapkan setelah agregat (individu tidak dipenalti)"
1088
  )
1089
 
1090
  return (
1091
- summary_jenis, agg_wilayah, detail_view, verif_df,
1092
- summary_path, wilayah_path, detail_path, word_path,
1093
- fig_all, fig_sek, fig_um, fig_kh,
1094
  msg, analysis_text
1095
  )
1096
 
@@ -1099,7 +1088,7 @@ def run_calc(prov_value, kab_value, kew_value, df_all, pop_kab, pop_prov, meta):
1099
 
1100
 
1101
  # ============================================================
1102
- # 11) UI (NO UPLOAD) — TANPA TOMBOL RELOAD
1103
  # ============================================================
1104
 
1105
  def ui_load(force=False):
@@ -1138,16 +1127,16 @@ def on_prov_change(prov_value):
1138
 
1139
  with gr.Blocks() as demo:
1140
  gr.Markdown(f"""
1141
- # IPLM 2025 — Final (Penalti Coverage 68% Setelah Agregat)
1142
  **Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
1143
  - `DATA_FILE` = **{DATA_FILE}**
1144
- - `POP_KAB` = **{POP_KAB}**
1145
- - `POP_PROV` = **{POP_PROV}**
1146
 
1147
- **Metode penalti (SESUI PERMINTAAN):**
1148
- - Hitung indeks real per entitas → agregasi wilayah×jenisterapkan bobot coverage pada AGREGAT.
1149
- - Bobot coverage = `min(n_sampel / (0.68*populasi), 1.0)`; jika populasi tidak valid → bobot=1.
1150
- - Perpustakaan **khusus** tidak dipenalti (bobot=1).
1151
  """)
1152
 
1153
  state_df = gr.State(None)
@@ -1167,33 +1156,30 @@ with gr.Blocks() as demo:
1167
  run_btn = gr.Button("Jalankan Perhitungan")
1168
  msg_out = gr.Markdown()
1169
 
1170
- gr.Markdown("## Ringkasan (per Jenis) — sub-dimensi, dimensi, indeks pasca penalti (berbasis agregat wilayah)")
1171
  out_summary = gr.DataFrame(interactive=False)
1172
 
1173
- gr.Markdown("## Agregat Wilayah × Jenis (Final setelah penalti)")
1174
- out_agg_wilayah = gr.DataFrame(interactive=False)
 
 
 
1175
 
1176
- gr.Markdown("## Detail Entitas (Indeks Final menempel pada agregat wilayah×jenis; individu tidak dipenalti)")
1177
  out_detail = gr.DataFrame(interactive=False)
1178
 
1179
- gr.Markdown("## Verifikasi Coverage & GAP menuju target 68% (tanpa angka koma)")
1180
  out_verif = gr.DataFrame(interactive=False)
1181
 
1182
- gr.Markdown("## Bell Curve Final Agregat Semua Jenis")
1183
- bell_all = gr.Plot()
1184
- gr.Markdown("## Bell Curve Final Agregat — Sekolah")
1185
- bell_sek = gr.Plot()
1186
- gr.Markdown("## Bell Curve Final Agregat — Umum")
1187
- bell_um = gr.Plot()
1188
- gr.Markdown("## Bell Curve Final Agregat — Khusus")
1189
- bell_kh = gr.Plot()
1190
 
1191
  gr.Markdown("## Analisis Otomatis (LLM)")
1192
  analysis_out = gr.Markdown()
1193
 
1194
  with gr.Row():
1195
- dl_summary = gr.DownloadButton(label="Download Ringkasan Jenis (.xlsx)")
1196
- dl_wilayah = gr.DownloadButton(label="Download Agregat Wilayah×Jenis (.xlsx)")
1197
  dl_detail = gr.DownloadButton(label="Download Detail Entitas (.xlsx)")
1198
  dl_word = gr.DownloadButton(label="Download Laporan Word (.docx)")
1199
 
@@ -1201,9 +1187,9 @@ with gr.Blocks() as demo:
1201
  fn=run_calc,
1202
  inputs=[dd_prov, dd_kab, dd_kew, state_df, state_pop_kab, state_pop_prov, state_meta],
1203
  outputs=[
1204
- out_summary, out_agg_wilayah, out_detail, out_verif,
1205
- dl_summary, dl_wilayah, dl_detail, dl_word,
1206
- bell_all, bell_sek, bell_um, bell_kh,
1207
  msg_out, analysis_out
1208
  ]
1209
  )
 
1
  # -*- coding: utf-8 -*-
2
  """
3
  IPLM 2025 — FINAL (NO UPLOAD)
4
+ Khusus digabung ke "Keseluruhan" (tidak tampil sebagai jenis terpisah)
5
+ Sanksi 68% berbasis TOTAL pengumpulan data wilayah:
6
+ bobot_sanksi = min(n_total_terkumpul / target_total_68, 1.0)
7
+ Indeks_Final = Indeks_Real_Agregat * bobot_sanksi
8
+ ✅ Sanksi diterapkan SETELAH agregat (bukan per entitas)
9
+ Detail entitas: Indeks_Final_0_100 menempel dari Indeks_Final_Wilayah (bukan penalti per-row)
10
+ Bell curve berbasis Indeks_Final_Wilayah (agregat wilayah)
11
+ LLM analysis + Word
12
+ Download (tanpa upload box)
 
 
 
 
 
 
 
 
 
 
 
 
13
  """
14
 
15
  import os
 
36
  POP_KAB = os.getenv("POP_KAB", "Data_populasi_Kab_kota.xlsx")
37
  POP_PROV = os.getenv("POP_PROV", "Data_populasi_propinsi.xlsx")
38
 
39
+ W_KEPATUHAN = float(os.getenv("W_KEPATUHAN", "0.30"))
40
+ W_KINERJA = float(os.getenv("W_KINERJA", "0.70"))
 
41
 
42
  USE_LLM = True
43
  LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
 
139
  return re.sub(r"[^A-Z0-9]+", "", t)
140
 
141
  def safe_div(num, den):
142
+ if den is None or pd.isna(den) or float(den) <= 0:
143
  return np.nan
144
  return float(num) / float(den)
145
 
146
+ def bobot_sanksi_total(n_total: float, target_total: float) -> float:
147
  """
148
+ bobot = min(n_total_terkumpul / target_total_68, 1.0)
149
+ Jika target invalid/missing/0 -> bobot = 1 (tanpa sanksi)
150
  """
151
+ if target_total is None or pd.isna(target_total) or float(target_total) <= 0:
 
 
 
 
 
 
 
 
 
 
 
152
  return 1.0
153
+ if n_total is None or pd.isna(n_total) or float(n_total) < 0:
154
+ n_total = 0.0
155
+ return float(min(float(n_total) / float(target_total), 1.0))
156
 
157
 
158
  # ============================================================
 
215
  # 4) PIPELINE NASIONAL (ENTITAS): YJ + MINMAX + SUBDIM/DIM/INDEKS REAL
216
  # ============================================================
217
 
218
+ def _mean_norm_cols(row, cols):
219
  vals = []
220
  for c in cols:
221
  k = f"norm_{c}"
 
262
  df[f"norm_{c}"] = minmax_norm(pd.Series(transformed, index=df.index))
263
 
264
  # subdim & dim (entitas)
265
+ df["sub_koleksi"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in koleksi_cols if c in available]), axis=1)
266
+ df["sub_sdm"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in sdm_cols if c in available]), axis=1)
267
+ df["sub_pelayanan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pelayanan_cols if c in available]), axis=1)
268
+ df["sub_pengelolaan"] = df.apply(lambda r: _mean_norm_cols(r, [c for c in pengelolaan_cols if c in available]), axis=1)
269
 
270
  df["dim_kepatuhan"] = df[["sub_koleksi","sub_sdm"]].mean(axis=1)
271
  df["dim_kinerja"] = df[["sub_pelayanan","sub_pengelolaan"]].mean(axis=1)
 
318
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "meta": {}, "info": info})
319
  return None, None, None, {}, info
320
 
321
+ # mapping jenis -> sekolah / umum / khusus
322
  val_map_jenis = {
323
  "PERPUSTAKAAN SEKOLAH": "sekolah", "SEKOLAH": "sekolah",
324
  "PERPUSTAKAAN UMUM": "umum", "UMUM": "umum", "PERPUSTAKAAN DAERAH": "umum",
 
346
  df_raw = df_raw.drop_duplicates(subset=["_row_key"], keep="first").copy()
347
  after = len(df_raw)
348
 
349
+ # POP KAB: target total 68% pakai kolom "sampel_total"
350
  pk = pd.read_excel(POP_KAB)
351
  c_kab = pick_col(pk, ["KABUPATEN_KOTA","Kab/Kota","Kabupaten/Kota","KAB/KOTA","Kabupaten_Kota"])
352
  c_prov = pick_col(pk, ["PROVINSI","Provinsi"])
353
+ c_target_total = pick_col(pk, ["sampel_total","Sampel_total","Sampel Total","TOTAL_SAMPEL","total_sampel"])
354
+ c_pop_total = pick_col(pk, ["total_populasi","Total Populasi","POPULASI","populasi"]) # opsional utk coverage %
355
+ if c_kab is None or c_target_total is None:
356
+ info = "❌ POP_KAB: wajib ada kolom Kab/Kota dan sampel_total (target 68%)."
 
357
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "meta": {}, "info": info})
358
  return None, None, None, {}, info
359
 
360
  pop_kab = pd.DataFrame({
361
  "Provinsi_Label": pk[c_prov].astype(str).str.strip() if c_prov else "",
362
  "Kab_Kota_Label": pk[c_kab].astype(str).str.strip(),
363
+ "Target68_Total": pk[c_target_total].apply(coerce_num),
364
+ "Pop_Total": pk[c_pop_total].apply(coerce_num) if c_pop_total else np.nan,
365
  })
366
  pop_kab["kab_key"] = pop_kab["Kab_Kota_Label"].apply(norm_kab_label)
367
  pop_kab = pop_kab.groupby("kab_key", as_index=False).agg({
368
  "Kab_Kota_Label":"first",
369
  "Provinsi_Label":"first",
370
+ "Target68_Total":"max",
371
+ "Pop_Total":"max",
372
  })
373
 
374
+ # POP PROV: target total 68% pakai kolom "total _sampel"
375
  pp = pd.read_excel(POP_PROV)
376
  c_pr = pick_col(pp, ["Provinsi","PROVINSI","provinsi"])
377
+ c_target_total = pick_col(pp, ["total _sampel","total_sampel","TOTAL_SAMPEL","Total Sampel"])
378
+ c_pop_total = pick_col(pp, ["total_populasi","Total Populasi","TOTAL_PEND","total_pend","populasi"]) # opsional
379
+ if c_pr is None or c_target_total is None:
380
+ info = "❌ POP_PROV: wajib ada kolom Provinsi dan total _sampel (target 68%)."
381
  _CACHE.update({"key": key, "df_all": None, "pop_kab": None, "pop_prov": None, "meta": {}, "info": info})
382
  return None, None, None, {}, info
383
 
384
  pop_prov = pd.DataFrame({
385
  "Provinsi_Label": pp[c_pr].astype(str).str.strip(),
386
+ "Target68_Total_Prov": pp[c_target_total].apply(coerce_num),
387
+ "Pop_Total_Prov": pp[c_pop_total].apply(coerce_num) if c_pop_total else np.nan,
388
  })
389
  pop_prov["prov_key"] = pop_prov["Provinsi_Label"].apply(norm_prov_label)
390
  pop_prov = pop_prov.groupby("prov_key", as_index=False).agg({
391
  "Provinsi_Label":"first",
392
+ "Target68_Total_Prov":"max",
393
+ "Pop_Total_Prov":"max",
394
  })
395
 
396
+ # pipeline nasional (entitas)
397
  df_all = prepare_global(df_raw)
398
 
399
  meta = dict(prov_col=prov_col, kab_col=kab_col, kew_col=kew_col, jenis_col=jenis_col, nama_col=nama_col)
 
401
  info = (
402
  f"✅ Mode NO UPLOAD (cache aktif)<br>"
403
  f"✅ DM: <b>{fp.name}</b> | Baris: {before} → dedup: {after}<br>"
404
+ f"✅ POP_KAB: <b>{Path(POP_KAB).name}</b> (n={len(pop_kab)}) — target 68% via <code>sampel_total</code><br>"
405
+ f"✅ POP_PROV: <b>{Path(POP_PROV).name}</b> (n={len(pop_prov)}) — target 68% via <code>total _sampel</code><br>"
406
  f"🕒 mtime: DM={time.ctime(_mtime(DATA_FILE))} | Kab={time.ctime(_mtime(POP_KAB))} | Prov={time.ctime(_mtime(POP_PROV))}"
407
  )
408
 
 
411
 
412
 
413
  # ============================================================
414
+ # 6) AGREGAT WILAYAH (KESSELURUHAN) + SANKSI TOTAL
415
  # ============================================================
416
 
417
+ def build_agg_wilayah_total(df_filtered: pd.DataFrame, pop_kab: pd.DataFrame, pop_prov: pd.DataFrame, kew_value: str):
418
  """
419
  Output:
420
+ - agg_total: satu baris per wilayah
421
+ berisi mean sub/dim, mean Indeks_Real, n_total, target_total_68, bobot_sanksi, Indeks_Final_Wilayah
422
  """
423
  if df_filtered is None or df_filtered.empty:
424
+ return pd.DataFrame()
425
 
426
  kew_norm = str(kew_value or "").upper()
427
  df = df_filtered.copy()
428
 
 
429
  if "KAB" in kew_norm or "KOTA" in kew_norm:
 
430
  key_col = "kab_key"
431
+ label_col = "KAB_DISP"
432
+ label_name = "Kab/Kota"
433
+ pop = pop_kab.set_index("kab_key") if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame().set_index(pd.Index([]))
434
+ target_field = "Target68_Total"
435
+ pop_field = "Pop_Total"
436
+ name_field = "Kab_Kota_Label"
437
  elif "PROV" in kew_norm:
 
438
  key_col = "prov_key"
439
+ label_col = "PROV_DISP"
440
+ label_name = "Provinsi"
441
+ pop = pop_prov.set_index("prov_key") if (pop_prov is not None and not pop_prov.empty) else pd.DataFrame().set_index(pd.Index([]))
442
+ target_field = "Target68_Total_Prov"
443
+ pop_field = "Pop_Total_Prov"
444
+ name_field = "Provinsi_Label"
445
  else:
 
 
446
  key_col = "kab_key"
447
+ label_col = "KAB_DISP"
448
+ label_name = "Kab/Kota"
 
 
 
 
 
 
 
 
449
  pop = pop_kab.set_index("kab_key") if (pop_kab is not None and not pop_kab.empty) else pd.DataFrame().set_index(pd.Index([]))
450
+ target_field = "Target68_Total"
451
+ pop_field = "Pop_Total"
452
+ name_field = "Kab_Kota_Label"
453
 
454
+ agg = df.groupby([key_col, label_col], dropna=False).agg(
455
+ n_total=("Indeks_Real_0_100", "size"),
456
+ Rata2_sub_koleksi=("sub_koleksi", "mean"),
457
+ Rata2_sub_sdm=("sub_sdm", "mean"),
458
+ Rata2_sub_pelayanan=("sub_pelayanan", "mean"),
459
+ Rata2_sub_pengelolaan=("sub_pengelolaan", "mean"),
460
+ Rata2_dim_kepatuhan=("dim_kepatuhan", "mean"),
461
+ Rata2_dim_kinerja=("dim_kinerja", "mean"),
462
+ Indeks_Real_Agregat_0_100=("Indeks_Real_0_100", "mean"),
463
+ ).reset_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
+ agg = agg.rename(columns={key_col: "group_key", label_col: label_name})
466
+
467
+ # join target_total_68
468
+ target_vals = []
469
+ pop_vals = []
470
+ label_fix = []
471
+ for _, r in agg.iterrows():
472
+ gk = r["group_key"]
473
+ if gk in pop.index:
474
+ target_total = pop.loc[gk, target_field] if target_field in pop.columns else np.nan
475
+ pop_total = pop.loc[gk, pop_field] if pop_field in pop.columns else np.nan
476
+ nm = pop.loc[gk, name_field] if name_field in pop.columns else r[label_name]
477
+ else:
478
+ target_total = np.nan
479
+ pop_total = np.nan
480
+ nm = r[label_name]
481
+ target_vals.append(target_total)
482
+ pop_vals.append(pop_total)
483
+ label_fix.append(nm)
484
+
485
+ agg[label_name] = label_fix
486
+ agg["target_total_68"] = pd.to_numeric(pd.Series(target_vals), errors="coerce")
487
+ agg["pop_total"] = pd.to_numeric(pd.Series(pop_vals), errors="coerce")
488
+
489
+ # bobot_sanksi berbasis TOTAL pengumpulan
490
+ agg["bobot_sanksi"] = [
491
+ bobot_sanksi_total(n, t) for n, t in zip(agg["n_total"].astype(float).tolist(), agg["target_total_68"].tolist())
492
+ ]
493
 
494
+ # coverage % opsional (kalau pop_total ada)
495
+ agg["coverage_total_%"] = [
496
+ (safe_div(n, p) * 100) if (p is not None and not pd.isna(p) and float(p) > 0) else np.nan
497
+ for n, p in zip(agg["n_total"].astype(float).tolist(), agg["pop_total"].tolist())
498
+ ]
499
 
500
+ # FINAL wilayah
501
+ agg["Indeks_Final_Wilayah_0_100"] = agg["Indeks_Real_Agregat_0_100"] * agg["bobot_sanksi"]
502
 
503
+ # rounding
504
+ for c in [
505
+ "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
506
+ "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
507
+ ]:
508
+ if c in agg.columns:
509
+ agg[c] = agg[c].apply(lambda x: round(float(x), 3) if pd.notna(x) else 0.0)
510
 
511
+ for c in ["Indeks_Real_Agregat_0_100","Indeks_Final_Wilayah_0_100","bobot_sanksi","target_total_68","coverage_total_%","pop_total"]:
512
+ if c in agg.columns:
513
+ agg[c] = pd.to_numeric(agg[c], errors="coerce")
 
 
 
 
 
 
 
514
 
515
+ for c in ["Indeks_Real_Agregat_0_100","Indeks_Final_Wilayah_0_100"]:
516
+ if c in agg.columns:
517
+ agg[c] = agg[c].apply(lambda x: round(float(x), 2) if pd.notna(x) else 0.0)
518
 
519
+ if "bobot_sanksi" in agg.columns:
520
+ agg["bobot_sanksi"] = agg["bobot_sanksi"].apply(lambda x: round(float(x), 3) if pd.notna(x) else 1.0)
 
 
 
 
 
 
 
521
 
522
+ return agg
523
 
524
 
525
  # ============================================================
526
+ # 7) AGREGAT WILAYAH × JENIS (HANYA sekolah & umum) + FINAL pakai bobot wilayah
527
  # ============================================================
528
 
529
+ def build_agg_wilayah_jenis(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, kew_value: str):
530
  """
531
+ Jenis yang tampil hanya: sekolah, umum (khusus digabung ke keseluruhan).
532
+ Final per jenis = Indeks_Real_Jenis * bobot_sanksi_wilayah
 
533
  """
534
  if df_filtered is None or df_filtered.empty:
535
  return pd.DataFrame()
 
550
  label_col = "KAB_DISP"
551
  label_name = "Kab/Kota"
552
 
553
+ # tampilkan hanya sekolah & umum
554
+ df = df[df["_dataset"].isin(["sekolah", "umum"])].copy()
555
+ if df.empty:
556
+ return pd.DataFrame()
557
+
558
  agg = df.groupby([key_col, label_col, "_dataset"], dropna=False).agg(
559
  Jumlah=("Indeks_Real_0_100", "size"),
560
  Rata2_sub_koleksi=("sub_koleksi", "mean"),
 
566
  Indeks_Real_Agregat_0_100=("Indeks_Real_0_100", "mean"),
567
  ).reset_index()
568
 
569
+ agg = agg.rename(columns={key_col: "group_key", label_col: label_name, "_dataset": "Jenis"})
570
 
571
+ # join bobot wilayah
572
+ if agg_total is None or agg_total.empty:
573
+ agg["bobot_sanksi"] = 1.0
574
+ agg["Indeks_Final_Agregat_0_100"] = agg["Indeks_Real_Agregat_0_100"]
 
575
  else:
576
+ m = agg_total[["group_key", "bobot_sanksi"]].copy()
577
+ agg = agg.merge(m, on="group_key", how="left")
578
+ agg["bobot_sanksi"] = agg["bobot_sanksi"].fillna(1.0)
579
+ agg["Indeks_Final_Agregat_0_100"] = agg["Indeks_Real_Agregat_0_100"] * agg["bobot_sanksi"]
 
 
 
 
 
580
 
581
  # rounding
582
  for c in [
 
586
  if c in agg.columns:
587
  agg[c] = agg[c].apply(lambda x: round(float(x), 3) if pd.notna(x) else 0.0)
588
 
589
+ for c in ["Indeks_Real_Agregat_0_100","Indeks_Final_Agregat_0_100","bobot_sanksi"]:
 
 
 
 
 
590
  if c in agg.columns:
591
  agg[c] = agg[c].apply(lambda x: round(float(x), 2) if pd.notna(x) else 0.0)
592
 
 
 
 
 
593
  return agg
594
 
595
 
596
+ # ============================================================
597
+ # 8) SUMMARY (PER JENIS) + KESELURUHAN
598
+ # ============================================================
599
+
600
+ def build_summary_per_jenis(agg_jenis: pd.DataFrame, agg_total: pd.DataFrame):
601
+ """
602
+ Ringkasan per jenis:
603
+ - sekolah, umum: rata2 final (sudah pakai bobot wilayah)
604
+ - keseluruhan: diambil dari agg_total (final wilayah)
605
+ """
606
+ rows = []
607
+
608
+ if agg_jenis is not None and not agg_jenis.empty:
609
+ for jenis in ["sekolah", "umum"]:
610
+ sub = agg_jenis[agg_jenis["Jenis"] == jenis].copy()
611
+ if sub.empty:
612
+ continue
613
+ rows.append({
614
+ "Jenis": jenis,
615
+ "Jumlah_Wilayah": int(sub.shape[0]),
616
+ "Total_Perpus": int(sub["Jumlah"].sum()),
617
+ "Rata2_sub_koleksi": float(sub["Rata2_sub_koleksi"].mean()),
618
+ "Rata2_sub_sdm": float(sub["Rata2_sub_sdm"].mean()),
619
+ "Rata2_sub_pelayanan": float(sub["Rata2_sub_pelayanan"].mean()),
620
+ "Rata2_sub_pengelolaan": float(sub["Rata2_sub_pengelolaan"].mean()),
621
+ "Rata2_dim_kepatuhan": float(sub["Rata2_dim_kepatuhan"].mean()),
622
+ "Rata2_dim_kinerja": float(sub["Rata2_dim_kinerja"].mean()),
623
+ "Indeks_Pasca_Sanksi_0_100": float(sub["Indeks_Final_Agregat_0_100"].mean()),
624
+ })
625
+
626
+ # keseluruhan: termasuk khusus (karena agg_total dibangun dari semua entitas)
627
+ if agg_total is not None and not agg_total.empty:
628
+ rows.append({
629
+ "Jenis": "keseluruhan",
630
+ "Jumlah_Wilayah": int(agg_total.shape[0]),
631
+ "Total_Perpus": int(agg_total["n_total"].sum()),
632
+ "Rata2_sub_koleksi": float(agg_total["Rata2_sub_koleksi"].mean()),
633
+ "Rata2_sub_sdm": float(agg_total["Rata2_sub_sdm"].mean()),
634
+ "Rata2_sub_pelayanan": float(agg_total["Rata2_sub_pelayanan"].mean()),
635
+ "Rata2_sub_pengelolaan": float(agg_total["Rata2_sub_pengelolaan"].mean()),
636
+ "Rata2_dim_kepatuhan": float(agg_total["Rata2_dim_kepatuhan"].mean()),
637
+ "Rata2_dim_kinerja": float(agg_total["Rata2_dim_kinerja"].mean()),
638
+ "Indeks_Pasca_Sanksi_0_100": float(agg_total["Indeks_Final_Wilayah_0_100"].mean()),
639
+ })
640
+
641
+ out = pd.DataFrame(rows)
642
+ if out.empty:
643
+ return out
644
+
645
+ for c in [
646
+ "Rata2_sub_koleksi","Rata2_sub_sdm","Rata2_sub_pelayanan","Rata2_sub_pengelolaan",
647
+ "Rata2_dim_kepatuhan","Rata2_dim_kinerja"
648
+ ]:
649
+ out[c] = out[c].apply(lambda x: round(float(x), 3) if pd.notna(x) else 0.0)
650
+ out["Indeks_Pasca_Sanksi_0_100"] = out["Indeks_Pasca_Sanksi_0_100"].apply(lambda x: round(float(x), 2) if pd.notna(x) else 0.0)
651
+
652
+ return out
653
+
654
+
655
+ # ============================================================
656
+ # 9) DETAIL ENTITAS: Final menempel dari agg_total (wilayah)
657
+ # ============================================================
658
+
659
+ def attach_final_to_detail(df_filtered: pd.DataFrame, agg_total: pd.DataFrame, meta: dict, kew_value: str):
660
  """
661
+ Indeks_Final_0_100 = Indeks_Final_Wilayah_0_100 (menempel per wilayah),
662
+ sehingga individu TIDAK dihitung penalti sendiri.
663
  """
664
  if df_filtered is None or df_filtered.empty:
665
  return pd.DataFrame()
 
677
  key_col = "kab_key"
678
  label_cols = ("PROV_DISP", "KAB_DISP")
679
 
680
+ if agg_total is None or agg_total.empty:
681
  df["Indeks_Final_0_100"] = df["Indeks_Real_0_100"]
682
  else:
683
+ m = agg_total[["group_key", "Indeks_Final_Wilayah_0_100"]].copy()
684
+ df = df.merge(m, left_on=key_col, right_on="group_key", how="left")
685
+ df["Indeks_Final_0_100"] = df["Indeks_Final_Wilayah_0_100"].fillna(df["Indeks_Real_0_100"])
686
+ df = df.drop(columns=[c for c in ["group_key","Indeks_Final_Wilayah_0_100"] if c in df.columns])
 
687
 
688
  base_cols = [label_cols[0], label_cols[1], "KEW_NORM", "_dataset"]
689
  if meta.get("nama_col") and meta["nama_col"] in df.columns:
 
701
  out = df[keep].copy()
702
  out = out.rename(columns={label_cols[0]:"Provinsi", label_cols[1]:"Kab/Kota", "_dataset":"Jenis"})
703
 
 
704
  for c in ["sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan","dim_kepatuhan","dim_kinerja"]:
705
  if c in out.columns:
706
  out[c] = out[c].apply(lambda x: round(float(x), 3) if pd.notna(x) else 0.0)
 
711
  return out
712
 
713
 
714
+ # ============================================================
715
+ # 10) VERIFIKASI TOTAL (tanpa koma)
716
+ # ============================================================
717
+
718
+ def build_verif_total(agg_total: pd.DataFrame, kew_value: str):
719
  """
720
+ Tabel verifikasi total: n_total, target_total_68, bobot, gap, coverage% (jika ada pop).
721
+ Dibulatkan tanpa koma.
722
  """
723
+ if agg_total is None or agg_total.empty:
724
  return pd.DataFrame()
725
 
726
+ df = agg_total.copy()
727
+ # kolom label dinamis
728
+ label_col = "Kab/Kota" if "Kab/Kota" in df.columns else ("Provinsi" if "Provinsi" in df.columns else "Wilayah")
729
+
730
+ out = pd.DataFrame({
731
+ label_col: df[label_col].astype(str),
732
+ "Pop_Total": df.get("pop_total", np.nan),
733
+ "Target_68_Total": df.get("target_total_68", np.nan),
734
+ "Sampel_Total_Terkumpul": df.get("n_total", 0),
735
+ "Coverage_Total_%": df.get("coverage_total_%", np.nan),
736
+ "Bobot_Sanksi_(Sampel/Target68)": df.get("bobot_sanksi", 1.0) * 100,
737
+ "GAP_Ke_Target68_Total": [
738
+ max(t - n, 0) if (t is not None and not pd.isna(t)) else np.nan
739
+ for n, t in zip(df["n_total"].astype(float).tolist(), df["target_total_68"].tolist())
740
+ ],
741
+ "Catatan": [
742
+ ("Target68_Total_tidak_valid" if (t is None or pd.isna(t) or float(t) <= 0) else "")
743
+ for t in df["target_total_68"].tolist()
744
+ ]
745
+ })
 
 
 
 
 
 
 
 
 
 
 
 
746
 
747
+ # pembulatan tanpa koma
748
+ for c in out.columns:
749
+ if c in [label_col, "Catatan"]:
750
+ continue
751
+ if c.endswith("%") or c.endswith("_%"):
752
+ out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
753
+ else:
754
+ out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0).round(0).astype(int)
 
755
 
756
+ return out
757
 
758
 
759
  # ============================================================
760
+ # 11) BELL CURVE (BERBASIS FINAL WILAYAH)
761
  # ============================================================
762
 
763
+ def make_bell_figure_from_total(agg_total: pd.DataFrame, title: str, min_points: int = 5):
764
  fig = go.Figure()
765
+ fig.update_layout(title=title, xaxis_title="Indeks FINAL Wilayah (0–100)", yaxis_title="Kepadatan (relatif)")
766
 
767
+ if agg_total is None or agg_total.empty or "Indeks_Final_Wilayah_0_100" not in agg_total.columns:
768
  return fig
769
 
770
+ dfp = agg_total.dropna(subset=["Indeks_Final_Wilayah_0_100"]).copy()
771
  if len(dfp) < min_points:
772
  fig.add_annotation(text="Grafik tidak ditampilkan (data terlalu sedikit).", x=0.5, y=0.5,
773
  xref="paper", yref="paper", showarrow=False)
774
  return fig
775
 
776
+ x = dfp["Indeks_Final_Wilayah_0_100"].astype(float).values
777
  mu = float(np.mean(x))
778
  sigma = float(np.std(x, ddof=1)) if len(x) > 1 else 1.0
779
  sigma = max(sigma, 1e-6)
 
782
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
783
  pdf = pdf / max(pdf.max(), 1e-9)
784
 
785
+ label_col = "Kab/Kota" if "Kab/Kota" in dfp.columns else ("Provinsi" if "Provinsi" in dfp.columns else None)
786
+ if label_col:
787
+ hover = [
788
+ f"{w}<br>Final: {v:.2f}<br>Real: {r:.2f}<br>Bobot: {b:.3f}<br>n_total: {int(n)}<br>target_68: {t if pd.notna(t) else 'NA'}"
789
+ for w, v, r, b, n, t in zip(
790
+ dfp[label_col].astype(str).tolist(),
791
+ dfp["Indeks_Final_Wilayah_0_100"].astype(float).tolist(),
792
+ dfp["Indeks_Real_Agregat_0_100"].astype(float).tolist(),
793
+ dfp["bobot_sanksi"].astype(float).tolist(),
794
+ dfp["n_total"].astype(float).tolist(),
795
+ dfp["target_total_68"].tolist(),
796
+ )
797
+ ]
798
  else:
799
  hover = [f"Final: {v:.2f}" for v in x]
800
 
 
819
 
820
 
821
  # ============================================================
822
+ # 12) LLM + WORD
823
  # ============================================================
824
 
825
  _HF_CLIENT = None
 
835
  _HF_CLIENT = None
836
  return None
837
 
838
+ def build_context(summary_jenis: pd.DataFrame, agg_total: pd.DataFrame, verif_total: pd.DataFrame, wilayah: str, kew: str) -> str:
839
  lines = []
840
  lines.append(f"Wilayah filter: {wilayah}")
841
  lines.append(f"Kewenangan: {kew}")
842
+ lines.append("Metode: Indeks real dihitung per entitas (YJ+minmax nasional), lalu diagregasi per wilayah. Setelah itu diberlakukan sanksi 68% berbasis TOTAL pengumpulan wilayah.")
843
+ lines.append("Rumus sanksi: bobot = min(total_terkumpul / target_total_68, 1.0); Indeks_Final = Indeks_Real_Agregat × bobot.")
844
+ lines.append("Catatan: Perpustakaan khusus digabung ke indeks keseluruhan (tidak tampil sebagai jenis terpisah).")
845
 
846
  if summary_jenis is not None and not summary_jenis.empty:
847
+ lines.append("\nRingkasan (jenis + keseluruhan):")
848
  for _, r in summary_jenis.iterrows():
 
 
849
  lines.append(
850
  f"- {r['Jenis']}: wilayah={int(r['Jumlah_Wilayah'])}, total_perpus={int(r['Total_Perpus'])}, "
851
  f"dim_kepatuhan={float(r['Rata2_dim_kepatuhan']):.3f}, dim_kinerja={float(r['Rata2_dim_kinerja']):.3f}, "
852
+ f"final_pasca_sanksi={float(r['Indeks_Pasca_Sanksi_0_100']):.2f}"
853
  )
854
 
855
+ if agg_total is not None and not agg_total.empty:
856
+ label_col = "Kab/Kota" if "Kab/Kota" in agg_total.columns else ("Provinsi" if "Provinsi" in agg_total.columns else None)
857
+ lines.append("\nTop 5 wilayah (Final tertinggi):")
858
+ top = agg_total.sort_values("Indeks_Final_Wilayah_0_100", ascending=False).head(5)
859
  for _, r in top.iterrows():
860
+ wl = r.get(label_col, "(wilayah)") if label_col else "(wilayah)"
861
  lines.append(
862
+ f"- {wl}: Final={float(r['Indeks_Final_Wilayah_0_100']):.2f} | Bobot={float(r.get('bobot_sanksi', 1.0)):.3f} | total={int(r.get('n_total', 0))}"
 
863
  )
864
 
865
+ if verif_total is not None and not verif_total.empty:
866
+ gap_col = "GAP_Ke_Target68_Total"
867
+ if gap_col in verif_total.columns:
868
+ lines.append("\nTop 5 wilayah (GAP ke target 68% terbesar):")
869
+ tmp = verif_total.sort_values(gap_col, ascending=False).head(5)
870
+ name_col = "Kab/Kota" if "Kab/Kota" in tmp.columns else ("Provinsi" if "Provinsi" in tmp.columns else None)
871
+ for _, r in tmp.iterrows():
872
+ nm = r.get(name_col, "") if name_col else ""
873
+ lines.append(f"- {nm}: GAP={int(r[gap_col])}")
 
874
 
875
  return "\n".join(lines)
876
 
877
+ def generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah, kew):
878
+ ctx = build_context(summary_jenis, agg_total, verif_total, wilayah, kew)
879
  client = get_llm_client()
880
  if client is None or not USE_LLM:
881
  return "Analisis otomatis (LLM) tidak tersedia. Pastikan token HuggingFace tersedia dan model bisa diakses."
 
885
  "Tugas Anda menyusun analisis berbasis data IPLM secara formal, tajam, dan operasional."
886
  )
887
  user_prompt = f"""
888
+ DATA RINGKAS IPLM (SANKSI 68% BERBASIS TOTAL WILAYAH):
889
 
890
  {ctx}
891
 
892
  TULISKAN ANALISIS BAHASA INDONESIA FORMAL, STRUKTUR:
893
+ 1) Gambaran umum hasil wilayah (1 paragraf).
894
+ 2) Analisis jenis sekolah & umum serta indeks keseluruhan (sub-dimensi/dimensi dan indeks pasca-sanksi) (2 paragraf).
895
+ 3) Analisis dampak sanksi 68% (berdasarkan total pengumpulan) terhadap indeks final wilayah (1 paragraf).
896
+ 4) Rekomendasi program 3–5 tahun (2 paragraf, konkret dan dapat dieksekusi).
897
 
898
  ATURAN:
899
+ - Jangan memakai label eksplisit "rendah/sedang/tinggi".
900
+ - Pakai frasa netral: "memerlukan penguatan", "memerlukan konsolidasi", dsb.
901
+ - Fokus pada Indeks FINAL WILAYAH (pasca sanksi), bukan individu.
902
  """
903
  try:
904
  resp = client.chat_completion(
 
913
  except Exception as e:
914
  return f"⚠️ Error saat memanggil LLM: {repr(e)}"
915
 
916
+ def generate_word_report(detail_df, summary_jenis, agg_total, agg_jenis, verif_total, wilayah, kew, analysis_text):
 
917
  doc = Document()
918
  doc.add_heading(f"Laporan IPLM — {wilayah}", level=1)
919
  doc.add_paragraph(f"Kewenangan: {kew}")
920
+ doc.add_paragraph("Metode: Indeks real dihitung per entitas (YJ+minmax nasional), diagregasi per wilayah, lalu dikenakan sanksi 68% berbasis TOTAL pengumpulan wilayah.")
921
+ doc.add_paragraph("Rumus sanksi: bobot = min(total_terkumpul / target_total_68, 1.0). Indeks_Final = Indeks_Real_Agregat × bobot.")
922
+ doc.add_paragraph("Catatan: Perpustakaan khusus digabung ke indeks keseluruhan (tidak tampil sebagai jenis terpisah).")
923
 
924
+ doc.add_heading("Ringkasan (Jenis + Keseluruhan) — sub-dimensi, dimensi, indeks pasca sanksi", level=2)
925
  if summary_jenis is not None and not summary_jenis.empty:
926
  table = doc.add_table(rows=1, cols=len(summary_jenis.columns))
927
  hdr = table.rows[0].cells
 
932
  for i, c in enumerate(summary_jenis.columns):
933
  cells[i].text = str(row[c])
934
  else:
935
+ doc.add_paragraph("Ringkasan tidak tersedia.")
 
 
 
 
 
936
 
937
+ doc.add_heading("Agregat Wilayah (Keseluruhan) — Final setelah sanksi", level=2)
938
+ if agg_total is not None and not agg_total.empty:
939
+ show = agg_total.sort_values("Indeks_Final_Wilayah_0_100", ascending=False).head(300)
940
  table = doc.add_table(rows=1, cols=len(show.columns))
941
  hdr = table.rows[0].cells
942
  for i, c in enumerate(show.columns):
 
948
  else:
949
  doc.add_paragraph("Agregat wilayah tidak tersedia.")
950
 
951
+ doc.add_heading("Agregat Wilayah × Jenis (Sekolah & Umum) Final memakai bobot wilayah", level=2)
952
+ if agg_jenis is not None and not agg_jenis.empty:
953
+ show = agg_jenis.sort_values("Indeks_Final_Agregat_0_100", ascending=False).head(300)
954
+ table = doc.add_table(rows=1, cols=len(show.columns))
955
  hdr = table.rows[0].cells
956
+ for i, c in enumerate(show.columns):
957
  hdr[i].text = str(c)
958
+ for _, row in show.iterrows():
959
  cells = table.add_row().cells
960
+ for i, c in enumerate(show.columns):
961
  cells[i].text = str(row[c])
962
  else:
963
+ doc.add_paragraph("Agregat jenis tidak tersedia (atau tidak ada sekolah/umum di filter).")
964
 
965
+ doc.add_heading("Verifikasi Total (Target 68% berbasis kolom Excel, tanpa koma)", level=2)
966
+ if verif_total is not None and not verif_total.empty:
967
+ table = doc.add_table(rows=1, cols=len(verif_total.columns))
968
+ hdr = table.rows[0].cells
969
+ for i, c in enumerate(verif_total.columns):
970
+ hdr[i].text = str(c)
971
+ for _, row in verif_total.iterrows():
972
+ cells = table.add_row().cells
973
+ for i, c in enumerate(verif_total.columns):
974
+ cells[i].text = str(row[c])
975
+ else:
976
+ doc.add_paragraph("Verifikasi tidak tersedia.")
977
+
978
+ doc.add_heading("Detail Entitas (Final menempel dari wilayah)", level=2)
979
  if detail_df is not None and not detail_df.empty:
980
+ show = detail_df.head(250)
981
  table = doc.add_table(rows=1, cols=len(show.columns))
982
  hdr = table.rows[0].cells
983
  for i, c in enumerate(show.columns):
 
1000
 
1001
 
1002
  # ============================================================
1003
+ # 13) CORE RUN
1004
  # ============================================================
1005
 
1006
  def _empty_outputs(msg="⚠️ Data belum siap."):
1007
  empty = pd.DataFrame()
1008
  empty_fig = go.Figure()
1009
  return (
1010
+ empty, empty, empty, empty, empty,
1011
  None, None, None, None,
1012
+ empty_fig,
1013
  msg, "Analisis belum tersedia."
1014
  )
1015
 
 
1031
  if df.empty:
1032
  return _empty_outputs("Tidak ada data untuk filter ini.")
1033
 
1034
+ # 1) agregat wilayah keseluruhan (termasuk khusus)
1035
+ agg_total = build_agg_wilayah_total(df, pop_kab, pop_prov, kew_value or "(Semua)")
1036
 
1037
+ # 2) agregat wilayah×jenis (hanya sekolah & umum) + final pakai bobot wilayah
1038
+ agg_jenis = build_agg_wilayah_jenis(df, agg_total, kew_value or "(Semua)")
1039
 
1040
+ # 3) ringkasan jenis + keseluruhan (khusus tergabung)
1041
+ summary_jenis = build_summary_per_jenis(agg_jenis, agg_total)
1042
 
1043
+ # 4) verifikasi total (tanpa koma)
1044
+ verif_total = build_verif_total(agg_total, kew_value or "(Semua)")
1045
 
1046
+ # 5) detail entitas: final menempel dari wilayah
1047
+ detail_view = attach_final_to_detail(df, agg_total, meta, kew_value or "(Semua)")
1048
 
1049
+ # 6) bell curve (final wilayah)
1050
+ fig_total = make_bell_figure_from_total(agg_total, "Bell Curve — Indeks FINAL Wilayah (Keseluruhan, pasca sanksi)", min_points=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
 
1052
  # output files
1053
  tmpdir = tempfile.mkdtemp()
 
1055
  kab_slug = (_canon(kab_value or "SEMUA").upper() or "SEMUA")
1056
  kew_slug = (_canon(kew_value or "SEMUA").upper() or "SEMUA")
1057
 
1058
+ p_summary = str(Path(tmpdir) / f"IPLM_RingkasanJenisKeseluruhan_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1059
+ p_total = str(Path(tmpdir) / f"IPLM_AgregatWilayah_Keseluruhan_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1060
+ p_jenis = str(Path(tmpdir) / f"IPLM_AgregatWilayah_Jenis_SekUm_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1061
+ p_detail = str(Path(tmpdir) / f"IPLM_DetailEntitas_FinalMenempelWilayah_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1062
+ p_verif = str(Path(tmpdir) / f"IPLM_VerifikasiTotal68_{prov_slug}_{kab_slug}_{kew_slug}.xlsx")
1063
 
1064
+ summary_jenis.to_excel(p_summary, index=False)
1065
+ agg_total.to_excel(p_total, index=False)
1066
+ agg_jenis.to_excel(p_jenis, index=False)
1067
+ detail_view.to_excel(p_detail, index=False)
1068
+ verif_total.to_excel(p_verif, index=False)
1069
 
1070
  wilayah_txt = kab_value if (kab_value and kab_value != "(Semua)") else (prov_value if (prov_value and prov_value != "(Semua)") else "Nasional/All")
1071
+ analysis_text = generate_llm_analysis(summary_jenis, agg_total, verif_total, wilayah_txt, kew_value or "(Semua)")
1072
+ word_path = generate_word_report(detail_view, summary_jenis, agg_total, agg_jenis, verif_total, wilayah_txt, kew_value or "(Semua)", analysis_text)
1073
 
1074
  msg = (
1075
+ f"✅ Selesai: entitas={len(detail_view)} | wilayah(keseluruhan)={len(agg_total)} | "
1076
+ f"jenis(sekolah+umum)={len(agg_jenis)} | sanksi=total_terkumpul/target_total_68 (cap 1.0)"
1077
  )
1078
 
1079
  return (
1080
+ summary_jenis, agg_total, agg_jenis, detail_view, verif_total,
1081
+ p_summary, p_total, p_detail, word_path,
1082
+ fig_total,
1083
  msg, analysis_text
1084
  )
1085
 
 
1088
 
1089
 
1090
  # ============================================================
1091
+ # 14) UI (NO UPLOAD) — TANPA TOMBOL RELOAD
1092
  # ============================================================
1093
 
1094
  def ui_load(force=False):
 
1127
 
1128
  with gr.Blocks() as demo:
1129
  gr.Markdown(f"""
1130
+ # IPLM 2025 — Final (Sanksi 68% Berbasis TOTAL Pengumpulan Wilayah)
1131
  **Mode NO UPLOAD (cache aktif).** File dibaca dari repo/server:
1132
  - `DATA_FILE` = **{DATA_FILE}**
1133
+ - `POP_KAB` = **{POP_KAB}** (target 68%: kolom **sampel_total**)
1134
+ - `POP_PROV` = **{POP_PROV}** (target 68%: kolom **total _sampel**)
1135
 
1136
+ **Aturan penting:**
1137
+ - Hitung indeks real per entitas (YJ+minmax nasional) → agregasi wilayah (keseluruhan) **baru sanksi**.
1138
+ - Sanksi 68% memakai total pengumpulan wilayah: `bobot = min(n_total_terkumpul / target_total_68, 1.0)`.
1139
+ - Perpustakaan **khusus digabung ke indeks keseluruhan** (tidak tampil sebagai jenis terpisah).
1140
  """)
1141
 
1142
  state_df = gr.State(None)
 
1156
  run_btn = gr.Button("Jalankan Perhitungan")
1157
  msg_out = gr.Markdown()
1158
 
1159
+ gr.Markdown("## Ringkasan (Jenis + Keseluruhan) — sub-dimensi, dimensi, indeks pasca sanksi")
1160
  out_summary = gr.DataFrame(interactive=False)
1161
 
1162
+ gr.Markdown("## Agregat Wilayah (Keseluruhan, termasuk khusus) — Final setelah sanksi")
1163
+ out_agg_total = gr.DataFrame(interactive=False)
1164
+
1165
+ gr.Markdown("## Agregat Wilayah × Jenis (Sekolah & Umum) — Final memakai bobot wilayah")
1166
+ out_agg_jenis = gr.DataFrame(interactive=False)
1167
 
1168
+ gr.Markdown("## Detail Entitas (Final menempel dari wilayah; individu tidak dipenalti per-row)")
1169
  out_detail = gr.DataFrame(interactive=False)
1170
 
1171
+ gr.Markdown("## Verifikasi Total 68% (tanpa angka koma)")
1172
  out_verif = gr.DataFrame(interactive=False)
1173
 
1174
+ gr.Markdown("## Bell Curve Indeks FINAL Wilayah (Keseluruhan)")
1175
+ bell_total = gr.Plot()
 
 
 
 
 
 
1176
 
1177
  gr.Markdown("## Analisis Otomatis (LLM)")
1178
  analysis_out = gr.Markdown()
1179
 
1180
  with gr.Row():
1181
+ dl_summary = gr.DownloadButton(label="Download Ringkasan (.xlsx)")
1182
+ dl_total = gr.DownloadButton(label="Download Agregat Wilayah Keseluruhan (.xlsx)")
1183
  dl_detail = gr.DownloadButton(label="Download Detail Entitas (.xlsx)")
1184
  dl_word = gr.DownloadButton(label="Download Laporan Word (.docx)")
1185
 
 
1187
  fn=run_calc,
1188
  inputs=[dd_prov, dd_kab, dd_kew, state_df, state_pop_kab, state_pop_prov, state_meta],
1189
  outputs=[
1190
+ out_summary, out_agg_total, out_agg_jenis, out_detail, out_verif,
1191
+ dl_summary, dl_total, dl_detail, dl_word,
1192
+ bell_total,
1193
  msg_out, analysis_out
1194
  ]
1195
  )