irhamni commited on
Commit
dbd8d99
Β·
verified Β·
1 Parent(s): 77ee2d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +691 -1022
app.py CHANGED
@@ -1,5 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
 
3
  import tempfile
4
  from pathlib import Path
5
 
@@ -14,10 +28,12 @@ from sklearn.preprocessing import PowerTransformer
14
  # 1. KONFIGURASI FILE & PARAMETER
15
  # ============================================================
16
 
17
- DATA_FILE = "DM.xlsx" # data utama perpustakaan
18
- META_KAB_FILE = "jumlahdesa_fixed.xlsx" # kecamatan & desa/kel per kab/kota
19
- META_SDSMP_FILE = "jumlah_SD_SMP.xlsx" # jumlah SD & SMP per kab/kota
20
- META_SMA_FILE = "Data_SMA_propinsi_update.xlsx" # jumlah SMA per provinsi
 
 
21
 
22
  # Kelompok indikator IPLM
23
  koleksi_cols = [
@@ -42,16 +58,15 @@ pengelolaan_cols = [
42
  ]
43
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
44
 
45
- # Bobot indeks IPLM
46
  w_kepatuhan = 0.30
47
  w_kinerja = 0.70
48
 
49
  # Bobot untuk Confidence
50
  W_DATA = 0.7
51
  W_SAMPLE = 0.3
52
- SAMPLE_THRESHOLD = 10 # ambang jumlah perpus per kab/kota
53
 
54
- # Target normatif per jenis perpustakaan
55
  TARGETS = {
56
  "sekolah": {
57
  "JudulTercetak": 1000,
@@ -80,29 +95,19 @@ TARGETS = {
80
  # ============================================================
81
 
82
  USE_LLM = True
83
-
84
- # Pilih salah satu model yang kompatibel
85
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
86
- # LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
87
 
88
  HF_TOKEN = (
89
  os.getenv("HF_TOKEN")
90
  or os.getenv("HUGGINGFACEHUB_API_TOKEN")
91
  or os.getenv("HF_API_TOKEN")
92
  )
93
-
94
  _HF_CLIENT = None
95
 
96
-
97
  def get_llm_client():
98
- """
99
- Inisialisasi InferenceClient sekali, lalu dipakai ulang.
100
- Kalau gagal (misal token salah / model tidak support), kembalikan None.
101
- """
102
  global _HF_CLIENT
103
  if _HF_CLIENT is not None:
104
  return _HF_CLIENT
105
-
106
  try:
107
  if HF_TOKEN:
108
  _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN)
@@ -113,14 +118,23 @@ def get_llm_client():
113
  _HF_CLIENT = None
114
  return None
115
 
116
-
117
  # ============================================================
118
- # 2. FUNGSI UTIL
119
  # ============================================================
120
 
121
  def _canon(s: str) -> str:
122
  return re.sub(r"[^a-z0-9]+", "", str(s).lower())
123
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  def coerce_num(val):
126
  if pd.isna(val):
@@ -143,7 +157,6 @@ def coerce_num(val):
143
  except Exception:
144
  return np.nan
145
 
146
-
147
  def minmax_norm(s: pd.Series) -> pd.Series:
148
  x = s.astype(float)
149
  mn, mx = x.min(skipna=True), x.max(skipna=True)
@@ -151,24 +164,6 @@ def minmax_norm(s: pd.Series) -> pd.Series:
151
  return pd.Series(0.0, index=s.index)
152
  return (x - mn) / (mx - mn)
153
 
154
-
155
- def pick_col(df, candidates):
156
- """
157
- Pilih kolom dari daftar kandidat dengan:
158
- 1) Cocok nama persis dulu
159
- 2) Kalau tidak ada, pakai versi canonical (_canon)
160
- """
161
- for c in candidates:
162
- if c in df.columns:
163
- return c
164
- can_map = {_canon(c): c for c in df.columns}
165
- for c in candidates:
166
- k = _canon(c)
167
- if k in can_map:
168
- return can_map[k]
169
- return None
170
-
171
-
172
  def norm_kew(v):
173
  if pd.isna(v):
174
  return None
@@ -181,34 +176,12 @@ def norm_kew(v):
181
  return "PUSAT"
182
  return t
183
 
184
-
185
  def _norm_text(x):
186
  if pd.isna(x):
187
  return None
188
  t = str(x).strip().upper()
189
  return " ".join(t.split())
190
 
191
-
192
- def penalized_mean(row, cols):
193
- vals = []
194
- for c in cols:
195
- colname = f"norm_{c}"
196
- if colname in row.index:
197
- v = row[colname]
198
- if pd.isna(v):
199
- v = 0.0
200
- vals.append(v)
201
- if not vals:
202
- return np.nan
203
- return float(np.sum(vals) / len(vals))
204
-
205
-
206
- def skor_normatif(value, target):
207
- if pd.isna(value):
208
- return 0.0
209
- return min(float(value) / target, 1.0)
210
-
211
-
212
  def slugify(s: str) -> str:
213
  if s is None:
214
  return "NA"
@@ -217,60 +190,72 @@ def slugify(s: str) -> str:
217
  return "NA"
218
  return _canon(t).upper()
219
 
220
-
221
- def norm_prov_label(s):
222
- """
223
- Normalisasi nama provinsi agar konsisten di semua file:
224
- - Hilangkan kata 'PROVINSI' / 'PROPINSI'
225
- - Hilangkan spasi ganda & non-alnum
226
- - Uppercase
227
- """
228
  if pd.isna(s):
229
  return None
230
- t = str(s).upper()
231
- for bad in ["PROVINSI", "PROPINSI"]:
232
- t = t.replace(bad, "")
233
  t = " ".join(t.split())
 
234
  return re.sub(r"[^A-Z0-9]+", "", t)
235
 
236
-
237
- def norm_kab_label(s):
238
- """
239
- Normalisasi nama Kab/Kota tapi tetap membedakan:
240
- - 'Kabupaten Bandung' -> 'KABBANDUNG'
241
- - 'Kota Bandung' -> 'KOTABANDUNG'
242
- Dipakai untuk:
243
- - DM.xlsx
244
- - jumlahdesa_fixed.xlsx
245
- - jumlah_SD_SMP.xlsx
246
- """
247
  if pd.isna(s):
248
  return None
249
-
250
  t = str(s).upper()
251
- t = t.replace("KABUPATEN", "KAB")
252
- t = t.replace("KAB.", "KAB")
253
- t = t.replace("KAB ", "KAB ")
254
-
255
- t = t.replace("KOTA ADMINISTRASI", "KOTA")
256
- t = t.replace("KOTA ADM.", "KOTA")
257
- t = t.replace("KOTA.", "KOTA")
258
-
259
  t = " ".join(t.split())
260
  return re.sub(r"[^A-Z0-9]+", "", t)
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  # ============================================================
264
- # 3. LOAD DATA DM.xlsx + META
265
  # ============================================================
266
 
267
  DATA_INFO = ""
268
  df_all_raw = None
269
- meta_kab_df = None
270
- meta_sma_df = None
 
271
 
272
  prov_col_glob = kab_col_glob = kew_col_glob = jenis_col_glob = nama_col_glob = None
273
 
 
274
  try:
275
  fp = Path(DATA_FILE)
276
  if not fp.exists():
@@ -291,6 +276,7 @@ try:
291
  else:
292
  df_all_raw["KEW_NORM"] = None
293
 
 
294
  val_map_jenis = {
295
  "PERPUSTAKAAN SEKOLAH": "sekolah",
296
  "SEKOLAH": "sekolah",
@@ -305,70 +291,46 @@ try:
305
  else:
306
  df_all_raw["_dataset"] = None
307
 
308
- def all_prov_choices():
309
- if prov_col_glob is None:
310
- return ["(Semua)"]
311
- s = df_all_raw[prov_col_glob].dropna().astype(str).str.strip()
312
- vals = sorted([o for o in s.unique() if o != ""])
313
- return ["(Semua)"] + vals
314
-
315
- def get_kab_choices_for_prov(prov_value):
316
- if kab_col_glob is None:
317
- return ["(Semua)"]
318
- if prov_value is None or prov_value == "(Semua)" or prov_col_glob is None:
319
- s = df_all_raw[kab_col_glob].dropna().astype(str).str.strip()
320
- else:
321
- m = df_all_raw[prov_col_glob].astype(str).str.strip() == prov_value
322
- s = df_all_raw.loc[m, kab_col_glob].dropna().astype(str).str.strip()
323
- vals = sorted([x for x in s.unique() if x != ""])
324
- return ["(Semua)"] + vals
325
-
326
- def all_kew_choices():
327
- s = df_all_raw["KEW_NORM"].dropna().astype(str).str.strip()
328
- vals = sorted([o for o in s.unique() if o != ""])
329
- if not vals:
330
- return ["(Semua)"]
331
- return ["(Semua)"] + vals
332
-
333
- prov_choices = all_prov_choices()
334
- kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)")
335
- kew_choices = all_kew_choices()
336
- default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else kew_choices[0]
337
-
338
- DATA_INFO = f"Data terbaca dari: **{DATA_FILE}** | Jumlah baris: **{len(df_all_raw)}**"
339
  except Exception as e:
340
  df_all_raw = None
341
- prov_choices = kab_choices = kew_choices = ["(Semua)"]
342
- default_kew = "(Semua)"
343
  DATA_INFO = f"⚠️ Gagal memuat data dari file: `{DATA_FILE}`\n\nError: `{e}`"
344
 
345
- # 3b. META KECAMATAN/DESA + SD/SMP + SMA
346
  extra_info = []
347
-
348
- # --- jumlah kecamatan & desa/kel per kab/kota ---
349
  try:
350
  meta_kab_raw = pd.read_excel(META_KAB_FILE)
351
  col_kab = pick_col(meta_kab_raw, ["Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"])
352
- col_kec = pick_col(meta_kab_raw, ["Kecamatan", "jml_kecamatan", "jumlah_kecamatan"])
353
- col_des = pick_col(meta_kab_raw, ["Desa/Kel", "Desa Kelurahan", "Desa", "Desa_kel"])
354
-
355
- if col_kab and col_kec and col_des:
356
- meta_kab_df = pd.DataFrame({
357
- "Kab_Kota_Label": meta_kab_raw[col_kab].astype(str).str.strip(),
358
- "Jml_Kecamatan": meta_kab_raw[col_kec].apply(coerce_num),
359
- "Jml_DesaKel": meta_kab_raw[col_des].apply(coerce_num),
360
- })
361
- meta_kab_df["kab_key"] = meta_kab_df["Kab_Kota_Label"].apply(norm_kab_label)
362
- extra_info.append(f"Verifikasi Kab/Kota (Kec/Desa) dari **{META_KAB_FILE}** (n={len(meta_kab_df)})")
363
- else:
364
- meta_kab_df = None
365
- extra_info.append(f"Verifikasi Kab/Kota: kolom kunci tidak lengkap di `{META_KAB_FILE}`")
366
- except Exception as e:
367
- meta_kab_df = None
368
- extra_info.append(f"⚠️ Gagal memuat `{META_KAB_FILE}` ({e})")
 
 
369
 
370
- # --- jumlah SD & SMP per kab/kota ---
371
- try:
372
  sd_smp_raw = pd.read_excel(META_SDSMP_FILE)
373
  col_kab2 = pick_col(sd_smp_raw, [
374
  "Kabupaten/Kota_Kabupaten/Kota", "Kabupaten/Kota",
@@ -377,113 +339,100 @@ try:
377
  col_sd = pick_col(sd_smp_raw, ["SD", "Jumlah SD", "Total SD", "SD_Total", "jml_sd", "Jml_SD"])
378
  col_smp = pick_col(sd_smp_raw, ["SMP", "Jumlah SMP", "Total SMP", "SMP_Total", "jml_smp", "Jml_SMP"])
379
 
380
- if col_kab2 and (col_sd or col_smp):
381
- df_sd_smp = pd.DataFrame({
382
- "Kab_Kota_Label_SD": sd_smp_raw[col_kab2].astype(str).str.strip(),
383
- })
384
- df_sd_smp["Jml_SD"] = sd_smp_raw[col_sd].apply(coerce_num) if col_sd else 0.0
385
- df_sd_smp["Jml_SMP"] = sd_smp_raw[col_smp].apply(coerce_num) if col_smp else 0.0
386
 
387
- df_sd_smp["kab_key"] = df_sd_smp["Kab_Kota_Label_SD"].apply(norm_kab_label)
 
 
 
 
 
 
 
 
388
 
389
- df_sd_smp_grp = df_sd_smp.groupby("kab_key", as_index=False).agg({
390
- "Jml_SD": "sum",
391
- "Jml_SMP": "sum",
392
- })
393
 
394
- if meta_kab_df is not None:
395
- meta_kab_df = meta_kab_df.merge(
396
- df_sd_smp_grp,
397
- on="kab_key",
398
- how="left"
399
- )
400
- else:
401
- meta_kab_df = df_sd_smp_grp.copy()
402
- meta_kab_df["Kab_Kota_Label"] = df_sd_smp.groupby("kab_key")["Kab_Kota_Label_SD"].first().values
403
 
404
- extra_info.append(
405
- f"Data SD/SMP per Kab/Kota dari **{META_SDSMP_FILE}** ditambahkan (n={len(df_sd_smp_grp)})"
406
- )
407
- else:
408
- extra_info.append(f"Data SD/SMP: kolom kunci tidak lengkap di `{META_SDSMP_FILE}`")
409
  except Exception as e:
410
- extra_info.append(f"⚠️ Gagal memuat `{META_SDSMP_FILE}` ({e})")
 
411
 
412
- # --- jumlah SMA per provinsi ---
413
  try:
414
- meta_sma_raw = pd.read_excel(META_SMA_FILE)
415
-
416
- col_prov_sma = pick_col(meta_sma_raw, [
417
- "Provinsi", "provinsi", "PROVINSI", "NAMA_PROVINSI", "Nama Provinsi",
418
- "nm_prov", "nm_provinsi", "prov"
419
- ])
420
- # Fokus pada kolom TOTAL / Jml_SMA / SMA / Total SMA / SMA_Total
421
- col_sma = pick_col(meta_sma_raw, [
422
- "Total SMA", "TOTAL_SMA", "TOTAL", "total",
423
- "Jml_SMA", "Jumlah SMA", "SMA", "SMA_Total",
424
- "jumlah_sma", "total_sma", "jml_sma"
425
- ])
426
-
427
- if col_prov_sma is None:
428
- raise ValueError("Kolom provinsi tidak ditemukan dalam file SMA.")
429
- if col_sma is None:
430
- raise ValueError("Kolom total jumlah SMA tidak ditemukan.")
431
-
432
- meta_sma_df = pd.DataFrame({
433
- "Provinsi_Label": meta_sma_raw[col_prov_sma].astype(str).str.strip(),
434
- "Jml_SMA": meta_sma_raw[col_sma].apply(coerce_num),
435
- })
436
- # Normalisasi nama provinsi agar konsisten dengan DM
437
- meta_sma_df["prov_key"] = meta_sma_df["Provinsi_Label"].apply(norm_prov_label)
438
- # Jika ada duplikat (misal variasi penulisan), agregasi ke total per prov_key
439
- meta_sma_df = meta_sma_df.groupby(["prov_key", "Provinsi_Label"], as_index=False).agg(
440
- {"Jml_SMA": "sum"}
441
  )
 
 
442
 
443
- extra_info.append(f"Verifikasi SMA per Provinsi berhasil dimuat ({len(meta_sma_df)} provinsi).")
444
  except Exception as e:
445
- meta_sma_df = None
446
- extra_info.append(f"⚠️ Gagal memuat file SMA: {e}")
447
 
448
  if extra_info:
449
  DATA_INFO = DATA_INFO + "<br>" + "<br>".join(extra_info)
450
 
451
-
452
  # ============================================================
453
- # 4. BELL CURVE
454
  # ============================================================
455
 
456
  def make_bell_figure(df_all: pd.DataFrame,
457
  title: str,
458
- index_col: str = "Indeks_Real_0_100",
459
  name_col: str = None,
460
  min_points: int = 5) -> go.Figure:
461
-
462
  fig = go.Figure()
463
 
464
- if index_col not in df_all.columns:
465
- fig.update_layout(
466
- title=title,
467
- xaxis_title="Indeks (0–100)",
468
- yaxis_title="Kepadatan (relatif)",
469
- )
470
  return fig
471
 
472
  df_plot = df_all.copy()
473
  df_plot = df_plot[pd.notna(df_plot[index_col])]
474
-
475
  if df_plot.empty or len(df_plot) < min_points:
476
  fig.update_layout(
477
  title=title,
478
  xaxis_title="Indeks (0–100)",
479
  yaxis_title="Kepadatan (relatif)",
480
- annotations=[
481
- dict(
482
- text="Grafik tidak ditampilkan (data terlalu sedikit).",
483
- x=0.5, y=0.5, xref="paper", yref="paper",
484
- showarrow=False, font=dict(size=14)
485
- )
486
- ]
487
  )
488
  return fig
489
 
@@ -494,40 +443,24 @@ def make_bell_figure(df_all: pd.DataFrame,
494
  xs = np.linspace(max(0, x_vals.min() - 5), min(100, x_vals.max() + 5), 200)
495
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
496
  pdf = pdf / pdf.max()
497
- y_max = 1.0
498
 
499
  if name_col and name_col in df_plot.columns:
500
- hover_text = [
501
- f"{str(n)}<br>Indeks: {v:.2f}"
502
- for n, v in zip(df_plot[name_col], x_vals)
503
- ]
504
  else:
505
  hover_text = [f"Indeks: {v:.2f}" for v in x_vals]
506
 
 
507
  fig.add_trace(go.Scatter(
508
- x=xs,
509
- y=pdf,
510
- mode="lines",
511
- name="Bell curve",
512
- hoverinfo="skip"
513
- ))
514
-
515
- fig.add_trace(go.Scatter(
516
- x=x_vals,
517
- y=np.zeros_like(x_vals),
518
- mode="markers",
519
- name="Perpustakaan",
520
- hovertext=hover_text,
521
- hovertemplate="%{hovertext}<extra></extra>"
522
  ))
523
 
524
  q1, q2, q3 = np.quantile(x_vals, [0.25, 0.5, 0.75])
525
  for q, label in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3")]:
526
  fig.add_trace(go.Scatter(
527
- x=[q, q],
528
- y=[0, y_max * 1.05],
529
- mode="lines",
530
- name=label,
531
  hovertemplate=f"{label}: {q:.2f}<extra></extra>"
532
  ))
533
 
@@ -535,52 +468,141 @@ def make_bell_figure(df_all: pd.DataFrame,
535
  title=title,
536
  xaxis_title="Indeks IPLM (0–100)",
537
  yaxis_title="Kepadatan (relatif)",
538
- yaxis=dict(showticklabels=False, zeroline=True, range=[0, y_max * 1.2]),
539
  margin=dict(l=40, r=20, t=60, b=40),
540
  hovermode="x"
541
  )
542
-
543
  return fig
544
 
545
-
546
  # ============================================================
547
- # 5. PIPELINE REALSCORE + NORMATIF
548
  # ============================================================
549
 
550
- def run_pipeline_core(df_subset: pd.DataFrame, kab_name=None, kew_name=None):
551
- df = df_subset.copy()
552
- df_raw = df_subset.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
 
554
- canonical_targets = set(all_indicators)
555
- alias_map_raw = {
556
- "j_judul_koleksi_tercetak": "JudulTercetak",
557
- "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
558
- "j_judul_koleksi_digital": "JudulElektronik",
559
- "j_eksemplar_koleksi_digital": "EksemplarElektronik",
560
- "tambah_judul_koleksi_tercetak": "TambahJudulTercetak",
561
- "tambah_eksemplar_koleksi_tercetak": "TambahEksemplarTercetak",
562
- "tambah_judul_koleksi_digital": "TambahJudulElektronik",
563
- "tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
564
- "j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
565
- "j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
566
- "j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
567
- "j_tenaga_pkb": "TenagaPKB",
568
- "j_anggaran_diklat_perpus": "AnggaranTenaga",
569
- "j_peserta_budaya_baca": "PesertaBudayaBaca",
570
- "j_pemustaka_luring_daring": "PemustakaLuringDaring",
571
- "j_pemustaka_fasilitas_tik": "PemustakaFasilitasTIK",
572
- "j_judul_koleksi_tercetak_termanfaat": "PemanfaatanJudulTercetak",
573
- "j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
574
- "j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
575
- "j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
576
- "j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
577
- "j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
578
- "j_variasi_layanan": "VariasiLayanan",
579
- "j_kebijakan_prosedur_pelayanan": "Kebijakan",
580
- "j_anggaran_peningkatan_pelayanan": "AnggaranLayanan"
581
- }
582
- alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
583
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  rename_map = {}
585
  for col in list(df.columns):
586
  ccol = _canon(col)
@@ -594,14 +616,13 @@ def run_pipeline_core(df_subset: pd.DataFrame, kab_name=None, kew_name=None):
594
  if rename_map:
595
  df = df.rename(columns=rename_map)
596
 
 
597
  available_indicators = [c for c in all_indicators if c in df.columns]
598
  for c in available_indicators:
599
  df[c] = df[c].apply(coerce_num)
600
 
601
- # Yeo–Johnson + MinMax
602
- yj_cols = []
603
  for c in available_indicators:
604
- yj_col = f"yj_{c}"
605
  x = df[c].astype(float).values
606
  mask = ~np.isnan(x)
607
  transformed = np.full_like(x, np.nan, dtype=float)
@@ -610,111 +631,122 @@ def run_pipeline_core(df_subset: pd.DataFrame, kab_name=None, kew_name=None):
610
  transformed[mask] = pt.fit_transform(x[mask].reshape(-1, 1)).ravel()
611
  else:
612
  transformed[mask] = x[mask]
613
- df[yj_col] = transformed
614
- yj_cols.append(yj_col)
615
-
616
- for yj_col in yj_cols:
617
- base = yj_col[3:]
618
- df[f"norm_{base}"] = minmax_norm(df[yj_col])
619
 
620
- # Sub-indeks real
621
- df["sub_koleksi"] = df.apply(lambda r: penalized_mean(r, [c for c in koleksi_cols if c in available_indicators]), axis=1)
622
- df["sub_sdm"] = df.apply(lambda r: penalized_mean(r, [c for c in sdm_cols if c in available_indicators]), axis=1)
623
- df["sub_pelayanan"] = df.apply(lambda r: penalized_mean(r, [c for c in pelayanan_cols if c in available_indicators]), axis=1)
624
- df["sub_pengelolaan"] = df.apply(lambda r: penalized_mean(r, [c for c in pengelolaan_cols if c in available_indicators]), axis=1)
625
 
626
  df["dim_kepatuhan"] = df[["sub_koleksi", "sub_sdm"]].mean(axis=1)
627
  df["dim_kinerja"] = df[["sub_pelayanan", "sub_pengelolaan"]].mean(axis=1)
628
 
629
- df["Indeks_Real_0_100"] = 100 * (w_kepatuhan * df["dim_kepatuhan"] + w_kinerja * df["dim_kinerja"])
630
 
631
- # Confidence
632
  df["n_ind_filled"] = df[available_indicators].notna().sum(axis=1)
633
  df["n_ind_total"] = len(available_indicators)
 
634
 
635
- df["Confidence_Data"] = np.where(
636
- df["n_ind_total"] > 0,
637
- df["n_ind_filled"] / df["n_ind_total"],
638
- np.nan
639
- )
640
-
641
- if kab_col_glob and kab_col_glob in df.columns:
642
- df["_Kab_norm"] = df[kab_col_glob].astype(str).str.upper().str.strip()
643
- freq_kab = df["_Kab_norm"].value_counts()
644
- df["Jml_Perpus_Kab"] = df["_Kab_norm"].map(freq_kab)
645
- df["Confidence_Sample"] = (df["Jml_Perpus_Kab"] / SAMPLE_THRESHOLD).clip(0, 1)
646
- else:
647
- df["Jml_Perpus_Kab"] = np.nan
648
- df["Confidence_Sample"] = 1.0
649
-
650
- df["Confidence_IPLM"] = (
651
- W_DATA * df["Confidence_Data"].fillna(0) +
652
- W_SAMPLE * df["Confidence_Sample"].fillna(0)
653
- )
654
-
655
- df["Indeks_Real_AdjData"] = df["Indeks_Real_0_100"] * df["Confidence_Data"].fillna(0)
656
- df["Indeks_Real_AdjConf"] = df["Indeks_Real_0_100"] * df["Confidence_IPLM"].fillna(0)
657
-
658
- # Indeks normatif
659
  df["Indeks_Normatif_0_100"] = np.nan
660
- df["sub_koleksi_n"] = np.nan
661
- df["sub_sdm_n"] = np.nan
662
- df["sub_pelayanan_n"] = np.nan
663
- df["sub_pengelolaan_n"] = np.nan
664
- df["dim_kepatuhan_n"] = np.nan
665
- df["dim_kinerja_n"] = np.nan
666
-
667
  for i, row in df.iterrows():
668
  jenis = row.get("_dataset", None)
669
  if jenis not in TARGETS:
670
  continue
671
  t = TARGETS[jenis]
672
-
673
  skor_ind = {}
674
  for ind, target in t.items():
675
  if ind in df.columns:
676
  skor_ind[ind] = skor_normatif(row[ind], target)
677
 
678
- sub_koleksi_n = np.mean([
679
- skor_ind.get("JudulTercetak", 0),
680
- skor_ind.get("EksemplarTercetak", 0)
681
- ])
682
  sub_sdm_n = skor_ind.get("TenagaKualifikasiIlmuPerpustakaan", 0)
683
- sub_pelayanan_n = np.mean([
684
- skor_ind.get("PemustakaLuringDaring", 0),
685
- skor_ind.get("KegiatanBudayaBaca", 0)
686
- ])
687
  sub_pengelolaan_n = skor_ind.get("VariasiLayanan", 0)
688
 
689
  dim_kepatuhan_n = np.mean([sub_koleksi_n, sub_sdm_n])
690
- dim_kinerja_n = np.mean([sub_pelayanan_n, sub_pengelolaan_n])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
 
692
- indeks_normatif = 100 * (w_kepatuhan * dim_kepatuhan_n + w_kinerja * dim_kinerja_n)
 
 
693
 
694
- df.at[i, "sub_koleksi_n"] = sub_koleksi_n
695
- df.at[i, "sub_sdm_n"] = sub_sdm_n
696
- df.at[i, "sub_pelayanan_n"] = sub_pelayanan_n
697
- df.at[i, "sub_pengelolaan_n"] = sub_pengelolaan_n
698
- df.at[i, "dim_kepatuhan_n"] = dim_kepatuhan_n
699
- df.at[i, "dim_kinerja_n"] = dim_kinerja_n
700
- df.at[i, "Indeks_Normatif_0_100"] = indeks_normatif
701
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702
  df["Indeks_Normatif_AdjConf"] = df["Indeks_Normatif_0_100"] * df["Confidence_IPLM"].fillna(0)
703
 
704
- # DETAIL untuk tampilan (lengkap, nanti di-view akan di-hide kolom tertentu)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
  detail_cols = []
706
- if prov_col_glob and prov_col_glob in df.columns:
707
- detail_cols.append(prov_col_glob)
708
- if kab_col_glob and kab_col_glob in df.columns:
709
- detail_cols.append(kab_col_glob)
710
- if nama_col_glob and nama_col_glob in df.columns:
711
- detail_cols.append(nama_col_glob)
712
 
713
  detail_cols += [
714
- "_dataset",
715
- "sub_koleksi", "sub_sdm", "sub_pelayanan", "sub_pengelolaan",
716
- "dim_kepatuhan", "dim_kinerja",
717
  "Indeks_Real_0_100",
 
 
718
  "Indeks_Real_AdjData",
719
  "Indeks_Real_AdjConf",
720
  "Indeks_Normatif_0_100",
@@ -724,16 +756,11 @@ def run_pipeline_core(df_subset: pd.DataFrame, kab_name=None, kew_name=None):
724
  "Confidence_IPLM",
725
  ]
726
  detail_cols = [c for c in detail_cols if c in df.columns]
727
-
728
  detail_df = df[detail_cols].copy().round(3)
729
 
730
- # AGREGAT per jenis
731
  expected_ds = ["sekolah", "umum", "khusus"]
732
- label_map = {
733
- "sekolah": "Perpustakaan Sekolah",
734
- "umum": "Perpustakaan Umum",
735
- "khusus": "Perpustakaan Khusus"
736
- }
737
 
738
  rows = []
739
  for ds in expected_ds:
@@ -744,257 +771,165 @@ def run_pipeline_core(df_subset: pd.DataFrame, kab_name=None, kew_name=None):
744
  "Jumlah Perpustakaan": 0,
745
  "Rata2_DimKepatuhan": 0.0,
746
  "Rata2_DimKinerja": 0.0,
747
- "Rata2_Indeks_IPLM_0_100": 0.0,
 
 
748
  })
749
  else:
750
  rows.append({
751
  "Jenis Perpustakaan": label_map.get(ds, ds),
752
- "Jumlah Perpustakaan": len(dsub),
753
- "Rata2_DimKepatuhan": dsub["dim_kepatuhan"].mean(skipna=True),
754
- "Rata2_DimKinerja": dsub["dim_kinerja"].mean(skipna=True),
755
- "Rata2_Indeks_IPLM_0_100": dsub["Indeks_Real_0_100"].mean(skipna=True),
 
 
756
  })
757
 
758
- if rows:
759
- base_rows = rows[:len(expected_ds)]
760
- total_jumlah = int(sum(r["Jumlah Perpustakaan"] for r in base_rows))
761
- mean_dim_kep = float(np.mean([r["Rata2_DimKepatuhan"] for r in base_rows]))
762
- mean_dim_kinerja = float(np.mean([r["Rata2_DimKinerja"] for r in base_rows]))
763
- mean_indeks = float(np.mean([r["Rata2_Indeks_IPLM_0_100"] for r in base_rows]))
764
-
765
- rows.append({
766
- "Jenis Perpustakaan": "Rata-rata keseluruhan",
767
- "Jumlah Perpustakaan": total_jumlah,
768
- "Rata2_DimKepatuhan": mean_dim_kep,
769
- "Rata2_DimKinerja": mean_dim_kinerja,
770
- "Rata2_Indeks_IPLM_0_100": mean_indeks,
771
- })
 
 
772
 
773
  agg_view = pd.DataFrame(rows).round(3)
774
 
775
- # Simpan Excel (AGG, DETAIL, RAW)
776
  kab_slug = slugify(kab_name) if kab_name else "SEMUA_KAB"
777
  kew_slug = slugify(kew_name) if kew_name else "SEMUA_KEW"
778
  tmpdir = tempfile.mkdtemp()
779
 
780
- agg_path = os.path.join(tmpdir, f"IPLM_RealscoreNormatif_Agregat_{kab_slug}_{kew_slug}.xlsx")
781
- detail_path = os.path.join(tmpdir, f"IPLM_RealscoreNormatif_Detail_{kab_slug}_{kew_slug}.xlsx")
782
- raw_path = os.path.join(tmpdir, f"IPLM_RealscoreNormatif_Raw_{kab_slug}_{kew_slug}.xlsx")
783
 
784
  agg_view.to_excel(agg_path, index=False)
785
  df.to_excel(detail_path, index=False)
786
  df_raw.to_excel(raw_path, index=False)
787
 
788
- # Bell curve
789
  name_col = nama_col_glob if (nama_col_glob and nama_col_glob in detail_df.columns) else None
790
 
791
- fig_all = make_bell_figure(detail_df, "Sebaran Indeks RealScore – Semua Perpustakaan",
792
- index_col="Indeks_Real_0_100", name_col=name_col)
793
-
794
- fig_sekolah = make_bell_figure(
795
- detail_df[detail_df["_dataset"] == "sekolah"],
796
- "Sebaran Indeks RealScore – Perpustakaan Sekolah",
797
- index_col="Indeks_Real_0_100", name_col=name_col, min_points=3
798
- )
799
-
800
- fig_umum = make_bell_figure(
801
- detail_df[detail_df["_dataset"] == "umum"],
802
- "Sebaran Indeks RealScore – Perpustakaan Umum",
803
- index_col="Indeks_Real_0_100", name_col=name_col, min_points=3
804
- )
805
-
806
- fig_khusus = make_bell_figure(
807
- detail_df[detail_df["_dataset"] == "khusus"],
808
- "Sebaran Indeks RealScore – Perpustakaan Khusus",
809
- index_col="Indeks_Real_0_100", name_col=name_col, min_points=3
810
- )
811
-
812
- return (
813
- agg_view,
814
- detail_df,
815
- agg_path,
816
- detail_path,
817
- raw_path,
818
- fig_all,
819
- fig_sekolah,
820
- fig_umum,
821
- fig_khusus,
822
- )
823
 
 
824
 
825
  # ============================================================
826
- # 6. VERIFIKASI SAMPEL
827
  # ============================================================
828
 
829
- def compute_verification(df_filtered: pd.DataFrame, kew_value):
830
- if df_filtered is None or len(df_filtered) == 0:
831
  return pd.DataFrame()
832
 
833
  kew_norm = str(kew_value or "").upper()
834
 
835
- # ---------- Kewenangan KAB/KOTA ----------
836
- if ("KAB" in kew_norm or "KOTA" in kew_norm) and (kab_col_glob is not None) and (meta_kab_df is not None):
837
- tmp = df_filtered.copy()
838
- tmp = tmp[pd.notna(tmp[kab_col_glob])]
839
- if tmp.empty:
840
- return pd.DataFrame()
841
-
842
- tmp["kab_key"] = tmp[kab_col_glob].apply(norm_kab_label)
843
-
844
- # total perpus
845
- g_total = tmp.groupby("kab_key").size().rename("jml_perpus_sampel_total").reset_index()
846
-
847
- # klasifikasi jenjang sekolah (kalau ada)
848
- if "sub_jenis_perpus" in tmp.columns:
849
- def jenjang(x):
850
- if pd.isna(x):
851
- return "OTHER"
852
- t = str(x).upper()
853
- if " SD " in f" {t} " or " SD/" in t or " MI " in f" {t} ":
854
- return "SD"
855
- if " SMP " in f" {t} " or " SMP/" in t or " MTS " in f" {t} ":
856
- return "SMP"
857
- return "OTHER"
858
- tmp["jenjang_sekolah"] = tmp["sub_jenis_perpus"].apply(jenjang)
859
- else:
860
- tmp["jenjang_sekolah"] = "OTHER"
861
-
862
- if "_dataset" in tmp.columns:
863
- mask_sek = tmp["_dataset"] == "sekolah"
864
- else:
865
- mask_sek = True
866
-
867
- tmp_sek = tmp[mask_sek].copy()
868
- tmp_sd = tmp_sek[tmp_sek["jenjang_sekolah"] == "SD"].copy()
869
- tmp_smp = tmp_sek[tmp_sek["jenjang_sekolah"] == "SMP"].copy()
870
-
871
- g_sd = tmp_sd.groupby("kab_key").size().rename("jml_perpus_sd_sampel").reset_index()
872
- g_smp = tmp_smp.groupby("kab_key").size().rename("jml_perpus_smp_sampel").reset_index()
873
- g_sekolah = tmp_sek.groupby("kab_key").size().rename("jml_perpus_sekolah_total").reset_index()
874
-
875
- if "_dataset" in tmp.columns:
876
- tmp_umum = tmp[tmp["_dataset"] == "umum"].copy()
877
- else:
878
- tmp_umum = tmp.copy()
879
- g_umum = tmp_umum.groupby("kab_key").size().rename("jml_perpus_umum_sampel").reset_index()
880
-
881
- use_cols = ["kab_key", "Kab_Kota_Label", "Jml_Kecamatan", "Jml_DesaKel", "Jml_SD", "Jml_SMP"]
882
- use_cols = [c for c in use_cols if (meta_kab_df is not None and c in meta_kab_df.columns)]
883
-
884
- merged = (
885
- g_total
886
- .merge(g_sd, on="kab_key", how="left")
887
- .merge(g_smp, on="kab_key", how="left")
888
- .merge(g_sekolah, on="kab_key", how="left")
889
- .merge(g_umum, on="kab_key", how="left")
890
- .merge(meta_kab_df[use_cols], on="kab_key", how="left")
891
  )
892
 
893
- for c in ["jml_perpus_sampel_total", "jml_perpus_sd_sampel",
894
- "jml_perpus_smp_sampel", "jml_perpus_sekolah_total",
895
- "jml_perpus_umum_sampel"]:
896
- if c in merged.columns:
897
- merged[c] = merged[c].fillna(0).astype(int)
898
-
899
- def safe_pct(num, den):
900
- if pd.isna(den) or den <= 0:
901
- return np.nan
902
- return 100.0 * float(num) / float(den)
903
 
904
- # sekolah SD+SMP
905
- if "Jml_SD" in merged.columns or "Jml_SMP" in merged.columns:
906
- merged["total_sd_smp"] = merged[["Jml_SD", "Jml_SMP"]].sum(axis=1, skipna=True)
907
- else:
908
- merged["total_sd_smp"] = np.nan
909
-
910
- merged["cov_sekolah_total_%"] = merged.apply(
911
- lambda r: safe_pct(r["jml_perpus_sekolah_total"], r.get("total_sd_smp", np.nan)),
912
- axis=1
913
- )
914
 
915
- # umum vs kombinasi (Kecamatan + Desa/Kel)
916
- merged["total_kec_desakel"] = merged.get("Jml_Kecamatan", np.nan) + merged.get("Jml_DesaKel", np.nan)
917
- merged["cov_umum_vs_kec_desakel_%"] = merged.apply(
918
- lambda r: safe_pct(r["jml_perpus_umum_sampel"], r.get("total_kec_desakel", np.nan)),
919
- axis=1
920
- )
921
 
922
- out = pd.DataFrame({
923
- "Kab/Kota": merged["Kab_Kota_Label"],
924
- "Perpus Sampel (Total)": merged["jml_perpus_sampel_total"],
925
- "Perpus Sampel – SD": merged["jml_perpus_sd_sampel"],
926
- "Perpus Sampel – SMP": merged["jml_perpus_smp_sampel"],
927
- "Perpus Sampel – Sekolah (Total SD+SMP)": merged["jml_perpus_sekolah_total"],
928
- "Sekolah (SD+SMP)": merged.get("total_sd_smp", np.nan),
929
- "Coverage Perpus Sekolah vs Sekolah (%)": merged["cov_sekolah_total_%"],
930
- "Perpus Sampel – Umum": merged["jml_perpus_umum_sampel"],
931
- "Jumlah Kecamatan": merged.get("Jml_Kecamatan", np.nan),
932
- "Jumlah Desa/Kel": merged.get("Jml_DesaKel", np.nan),
933
- "Coverage Perpus Umum vs Kec+Desa/Kel (%)": merged["cov_umum_vs_kec_desakel_%"],
934
  })
935
 
936
- return out.sort_values("Kab/Kota").reset_index(drop=True).round(3)
937
-
938
- # ---------- Kewenangan PROVINSI ----------
939
- if ("PROV" in kew_norm) and (meta_sma_df is not None):
940
- tmp = df_filtered.copy()
941
-
942
- if prov_col_glob is None:
943
- possible = [c for c in tmp.columns if "prov" in c.lower()]
944
- if possible:
945
- prov_use = possible[0]
946
- else:
947
- return pd.DataFrame({"Info": ["Kolom provinsi tidak ditemukan di DM.xlsx"]})
948
- else:
949
- prov_use = prov_col_glob
950
-
951
- tmp = tmp[pd.notna(tmp[prov_use])]
952
- if tmp.empty:
953
- return pd.DataFrame({"Info": ["Tidak ada data perpustakaan pada kewenangan provinsi."]})
954
-
955
- # Normalisasi provinsi di DM agar konsisten dengan meta_sma_df
956
- tmp["prov_key"] = tmp[prov_use].apply(norm_prov_label)
957
-
958
- g_total = tmp.groupby("prov_key").size().rename("Jumlah_Perpus_Sampel").reset_index()
959
-
960
- if "_dataset" in tmp.columns:
961
- tmp_sek = tmp[tmp["_dataset"] == "sekolah"].copy()
962
  else:
963
- tmp_sek = tmp.copy()
964
- g_sek = tmp_sek.groupby("prov_key").size().rename("Jml_Perpus_SMA_Sampel").reset_index()
965
-
966
- merged = g_total.merge(g_sek, on="prov_key", how="left") \
967
- .merge(meta_sma_df[["prov_key", "Provinsi_Label", "Jml_SMA"]],
968
- on="prov_key", how="left")
969
 
970
- merged["Jml_Perpus_SMA_Sampel"] = merged["Jml_Perpus_SMA_Sampel"].fillna(0).astype(int)
 
971
 
972
- def cov_sma(row):
973
- tot = row.get("Jml_SMA", np.nan)
974
- if pd.isna(tot) or tot <= 0:
975
- return np.nan
976
- return 100.0 * row["Jml_Perpus_SMA_Sampel"] / tot
 
977
 
978
- merged["Coverage_Perpus_SMA_vs_SMA_%"] = merged.apply(cov_sma, axis=1)
 
979
 
980
- cols_out = [
981
- "Provinsi_Label",
982
- "Jumlah_Perpus_Sampel",
983
- "Jml_Perpus_SMA_Sampel",
984
- "Jml_SMA",
985
- "Coverage_Perpus_SMA_vs_SMA_%",
986
- ]
987
- exists = [c for c in cols_out if c in merged.columns]
988
- if not exists:
989
- return pd.DataFrame()
990
 
991
- return merged[exists].sort_values("Provinsi_Label").reset_index(drop=True).round(3)
 
 
992
 
993
  return pd.DataFrame()
994
 
995
-
996
  # ============================================================
997
- # 7. KONTEKS RINGKAS UNTUK LLM (RAG MINI)
998
  # ============================================================
999
 
1000
  def build_context_for_llm(detail_df: pd.DataFrame,
@@ -1010,118 +945,52 @@ def build_context_for_llm(detail_df: pd.DataFrame,
1010
  lines.append(f"Wilayah: {wilayah}")
1011
  lines.append(f"Jumlah perpustakaan sampel: {len(detail_df)}")
1012
 
1013
- # Rata-rata indeks: utamakan baris "Rata-rata keseluruhan" di agg_df
1014
- mean_ind = np.nan
1015
  if agg_df is not None and not agg_df.empty and "Jenis Perpustakaan" in agg_df.columns:
1016
  mask_total = agg_df["Jenis Perpustakaan"].astype(str).str.lower().str.startswith("rata-rata")
1017
  if mask_total.any():
1018
  try:
1019
- mean_ind = float(
1020
- agg_df.loc[mask_total, "Rata2_Indeks_IPLM_0_100"].iloc[0]
1021
- )
1022
  except Exception:
1023
- mean_ind = np.nan
 
 
1024
 
1025
- # Fallback ke rata-rata detail bila agregat tidak tersedia
1026
- if (np.isnan(mean_ind) or mean_ind == 0) and "Indeks_Real_0_100" in detail_df.columns:
1027
- mean_ind = detail_df["Indeks_Real_0_100"].mean(skipna=True)
1028
 
1029
- if not np.isnan(mean_ind):
1030
- lines.append(f"Rata-rata Indeks IPLM 0-100: {mean_ind:.2f}")
1031
-
1032
- # Dimensi kepatuhan & kinerja
1033
- mean_kep = np.nan
1034
- mean_kin = np.nan
1035
  if "dim_kepatuhan" in detail_df.columns:
1036
- mean_kep = detail_df["dim_kepatuhan"].mean(skipna=True)
1037
- lines.append(f"Rata-rata dimensi kepatuhan (0-1): {mean_kep:.3f}")
1038
  if "dim_kinerja" in detail_df.columns:
1039
- mean_kin = detail_df["dim_kinerja"].mean(skipna=True)
1040
- lines.append(f"Rata-rata dimensi kinerja (0-1): {mean_kin:.3f}")
1041
-
1042
- # Confidence
1043
- if "Confidence_IPLM" in detail_df.columns:
1044
- mean_conf = detail_df["Confidence_IPLM"].mean(skipna=True)
1045
- if not np.isnan(mean_conf):
1046
- lines.append(f"Rata-rata Confidence_IPLM (0-1): {mean_conf:.2f}")
1047
 
1048
- # Ringkasan per jenis perpustakaan
1049
  if agg_df is not None and not agg_df.empty and "Jenis Perpustakaan" in agg_df.columns:
1050
  lines.append("\nRingkasan per jenis perpustakaan:")
1051
  for _, r in agg_df.iterrows():
1052
- jp = str(r.get("Jenis Perpustakaan", "") or "")
1053
  if jp.lower().startswith("rata-rata"):
1054
  continue
1055
- n = r.get("Jumlah Perpustakaan", np.nan)
1056
- idx = r.get("Rata2_Indeks_IPLM_0_100", np.nan)
 
1057
  if pd.isna(idx):
1058
  continue
1059
- lines.append(f"- {jp}: jumlah sampel={int(n)}, rata-rata indeks={idx:.2f}")
1060
-
1061
- # Contoh perpustakaan dengan indeks yang bervariasi (top-3 dan bottom-3)
1062
- if "Indeks_Real_0_100" in detail_df.columns:
1063
- df_valid = detail_df.dropna(subset=["Indeks_Real_0_100"]).copy()
1064
-
1065
- if "Confidence_IPLM" in df_valid.columns:
1066
- df_valid = df_valid.sort_values("Confidence_IPLM", ascending=False)
1067
-
1068
- col_nama = nama_col_glob if (nama_col_glob and nama_col_glob in df_valid.columns) else None
1069
- if not df_valid.empty and col_nama:
1070
- top3 = df_valid.sort_values("Indeks_Real_0_100", ascending=False).head(3)
1071
- bottom3 = df_valid.sort_values("Indeks_Real_0_100", ascending=True).head(3)
1072
-
1073
- lines.append("\nContoh perpustakaan dengan indeks relatif lebih tinggi:")
1074
- for _, r in top3.iterrows():
1075
- lines.append(
1076
- f"- {str(r[col_nama])}: indeks={r['Indeks_Real_0_100']:.2f}, "
1077
- f"kepatuhan={r['dim_kepatuhan']:.3f}, kinerja={r['dim_kinerja']:.3f}"
1078
- )
1079
-
1080
- lines.append("\nContoh perpustakaan dengan indeks yang masih perlu penguatan:")
1081
- for _, r in bottom3.iterrows():
1082
- lines.append(
1083
- f"- {str(r[col_nama])}: indeks={r['Indeks_Real_0_100']:.2f}, "
1084
- f"kepatuhan={r['dim_kepatuhan']:.3f}, kinerja={r['dim_kinerja']:.3f}"
1085
- )
1086
-
1087
- # Ringkasan coverage (kalau ada verif_df)
1088
  if verif_df is not None and not verif_df.empty:
1089
- try:
1090
- if "Coverage Perpus Sekolah vs Sekolah (%)" in verif_df.columns:
1091
- cov_sek = verif_df["Coverage Perpus Sekolah vs Sekolah (%)"]
1092
- if len(cov_sek.dropna()) > 0:
1093
- avg_cov_sek = cov_sek.mean()
1094
- lines.append(
1095
- f"Rata-rata coverage perpustakaan sekolah terhadap SD+SMP: {avg_cov_sek:.2f}%"
1096
- )
1097
- if "Coverage Perpus Umum vs Kec+Desa/Kel (%)" in verif_df.columns:
1098
- cov_umum = verif_df["Coverage Perpus Umum vs Kec+Desa/Kel (%)"]
1099
- if len(cov_umum.dropna()) > 0:
1100
- avg_cov_umum = cov_umum.mean()
1101
- lines.append(
1102
- f"Rata-rata coverage perpustakaan umum terhadap kecamatan+desa/kelurahan: {avg_cov_umum:.2f}%"
1103
- )
1104
- except Exception:
1105
- pass
1106
 
1107
  return "\n".join(lines)
1108
 
1109
-
1110
- # ============================================================
1111
- # 7a. RULE-BASED ANALYSIS (FALLBACK)
1112
- # ============================================================
1113
-
1114
- def classify_level(x):
1115
- # dipertahankan hanya sebagai placeholder; tidak dipakai untuk teks penilaian
1116
- if pd.isna(x):
1117
- return "tidak tersedia"
1118
- if x < 40:
1119
- return "-"
1120
- if x < 60:
1121
- return "-"
1122
- return "-"
1123
-
1124
-
1125
  def generate_rule_based_analysis(detail_df: pd.DataFrame,
1126
  agg_df: pd.DataFrame,
1127
  kab_name: str,
@@ -1133,420 +1002,246 @@ def generate_rule_based_analysis(detail_df: pd.DataFrame,
1133
  if kew_value and kew_value != "(Semua)":
1134
  wilayah = f"{kab_name} (kewenangan {kew_value})"
1135
 
1136
- # Rata-rata indeks: utamakan baris "Rata-rata keseluruhan" di agg_df
1137
- if agg_df is not None and not agg_df.empty and "Jenis Perpustakaan" in agg_df.columns:
1138
- mask_total = agg_df["Jenis Perpustakaan"].astype(str).str.lower().str.startswith("rata-rata")
1139
- if mask_total.any():
1140
- try:
1141
- mean_ind = float(
1142
- agg_df.loc[mask_total, "Rata2_Indeks_IPLM_0_100"].iloc[0]
1143
- )
1144
- except Exception:
1145
- mean_ind = detail_df.get("Indeks_Real_0_100", pd.Series(dtype=float)).mean(skipna=True)
1146
- else:
1147
- mean_ind = detail_df.get("Indeks_Real_0_100", pd.Series(dtype=float)).mean(skipna=True)
1148
- else:
1149
- mean_ind = detail_df.get("Indeks_Real_0_100", pd.Series(dtype=float)).mean(skipna=True)
1150
-
1151
  mean_kep = detail_df.get("dim_kepatuhan", pd.Series(dtype=float)).mean(skipna=True)
1152
  mean_kin = detail_df.get("dim_kinerja", pd.Series(dtype=float)).mean(skipna=True)
1153
- mean_conf = detail_df.get("Confidence_IPLM", pd.Series(dtype=float)).mean(skipna=True)
1154
 
1155
  lines = []
1156
  lines.append("## Analisis Otomatis & Rekomendasi Kebijakan (Rule-based)\n")
1157
  lines.append("### Gambaran Umum Wilayah")
1158
  lines.append(f"- Wilayah: {wilayah}")
1159
  lines.append(f"- Jumlah perpustakaan dalam sampel: {len(detail_df)}")
1160
- lines.append(f"- Rata-rata Indeks IPLM 2025: {mean_ind:.2f}")
 
1161
  lines.append(f"- Rata-rata dimensi kepatuhan (0–1): {mean_kep:.3f}")
1162
  lines.append(f"- Rata-rata dimensi kinerja (0–1): {mean_kin:.3f}")
1163
- if not pd.isna(mean_conf):
1164
- lines.append(f"- Rata-rata Confidence_IPLM: {mean_conf:.2f}")
1165
 
1166
- lines.append("\n### Capaian per Jenis Perpustakaan")
1167
- if agg_df is not None and not agg_df.empty:
1168
- for _, r in agg_df.iterrows():
1169
- jp = str(r.get("Jenis Perpustakaan", "") or "")
1170
- if not jp or jp.lower().startswith("rata-rata"):
1171
- continue
1172
- idx = r.get("Rata2_Indeks_IPLM_0_100", np.nan)
1173
- n = int(r.get("Jumlah Perpustakaan", 0))
1174
- if pd.isna(idx):
1175
- continue
1176
- lines.append(f"- {jp}: rata-rata indeks {idx:.2f} dengan {n} perpustakaan.")
1177
- else:
1178
- lines.append("- Data agregat per jenis perpustakaan tidak tersedia.")
1179
-
1180
- lines.append("\n### Arah Kebijakan dan Rekomendasi Program")
1181
  lines.append(
1182
- "Prioritas utama adalah penguatan layanan dasar perpustakaan serta peningkatan "
1183
- "ketersediaan SDM dan koleksi. Pola capaian pada dimensi kepatuhan menunjukkan bahwa "
1184
- "aspek koleksi, kebijakan layanan, dan kualifikasi pustakawan masih memiliki ruang penguatan "
1185
- "dan perlu dibenahi secara terencana. Sementara itu, capaian dimensi kinerja mengindikasikan "
1186
- "bahwa intensitas pemanfaatan dan kegiatan literasi perlu diperluas agar perpustakaan "
1187
- "lebih konsisten berfungsi sebagai pusat belajar masyarakat."
1188
  )
1189
  lines.append(
1190
- "Program-program yang dapat diprioritaskan antara lain: peningkatan alokasi anggaran "
1191
- "untuk pengembangan koleksi mutakhir, penguatan kapasitas pustakawan melalui pelatihan "
1192
- "berkelanjutan, perluasan kegiatan budaya baca yang menyasar komunitas rentan, serta "
1193
- "kolaborasi lintas sektor dengan satuan pendidikan, organisasi masyarakat, dan pelaku "
1194
- "usaha lokal. Seluruh intervensi perlu disertai mekanisme monitoring dan evaluasi "
1195
- "berbasis data IPLM agar perbaikan yang dilakukan dapat terpantau dari waktu ke waktu."
1196
- )
1197
-
1198
- lines.append(
1199
- "\n> Catatan: analisis ini disusun secara otomatis berbasis data IPLM. "
1200
- "Untuk penetapan kebijakan, tetap diperlukan verifikasi lapangan dan kajian kualitatif tambahan."
1201
  )
1202
 
1203
  return "\n".join(lines)
1204
 
1205
-
1206
- # ============================================================
1207
- # 7b. ANALISIS BERBASIS LLM (DENGAN FALLBACK RULE-BASED)
1208
- # ============================================================
1209
-
1210
  def generate_llm_analysis(detail_df: pd.DataFrame,
1211
  agg_df: pd.DataFrame,
1212
  verif_df: pd.DataFrame,
1213
  kab_name: str,
1214
  kew_value: str) -> str:
1215
- """
1216
- Analisis otomatis:
1217
- - Jika pemanggilan LLM gagal -> fallback ke rule-based dengan pesan error ringkas.
1218
- """
1219
-
1220
  context = build_context_for_llm(detail_df, agg_df, verif_df, kab_name, kew_value)
1221
-
1222
  client = get_llm_client()
1223
  if client is None or not USE_LLM:
1224
- rb = generate_rule_based_analysis(detail_df, agg_df, kab_name, kew_value)
1225
- return (
1226
- "⚠️ Terjadi kendala saat menginisialisasi model LLM, sehingga analisis otomatis "
1227
- "saat ini menggunakan pendekatan **rule-based**.\n\n"
1228
- + rb
1229
- )
1230
 
1231
  system_prompt = (
1232
- "Anda adalah analis kebijakan perpustakaan dan literasi yang berpengalaman di Indonesia. "
1233
- "Tugas Anda adalah membaca ringkasan data Indeks Pembangunan Literasi Masyarakat (IPLM) "
1234
- "dan menyusun analisis kebijakan yang tajam, tetapi tetap komunikatif dan mudah dipahami "
1235
- "oleh pemangku kepentingan pemerintah daerah."
1236
  )
1237
 
1238
  user_prompt = f"""
1239
- DATA RINGKAS IPLM UNTUK WILAYAH BERIKUT:
1240
 
1241
  {context}
1242
 
1243
  TULISKAN ANALISIS DALAM BAHASA INDONESIA FORMAL, DENGAN STRUKTUR:
1244
-
1245
- 1. Gambaran umum kondisi perpustakaan di wilayah tersebut (1 paragraf).
1246
- 2. Analisis capaian indeks: soroti kekuatan dan area yang masih memerlukan penguatan, terutama perbedaan antar jenis perpustakaan dan sebutkan jenis perpustakaaannya (2 paragraf).
1247
- 3. Analisis risiko dan kesenjangan layanan, termasuk jika coverage perpustakaan terhadap satuan pendidikan atau wilayah administratif masih terbatas (1–2 paragraf).
1248
- 4. Rekomendasi program dan kebijakan prioritas yang konkret untuk 3–5 tahun ke depan. Susun dalam bentuk paragraf naratif, bukan bullet list (2 paragraf).
1249
-
1250
- PANDUAN GAYA:
1251
- - Jangan hanya mengulang angka apa adanya, tetapi jelaskan maknanya.
1252
- - Jangan menggunakan istilah penilaian eksplisit seperti "rendah", "sedang", atau "tinggi" untuk menyebut nilai indeks.
1253
- Gunakan frasa netral seperti "masih memiliki ruang penguatan", "belum sesuai harapan", atau "perlu konsolidasi".
1254
- - Gunakan istilah kebijakan publik dan manajemen program perpustakaan ketika relevan.
1255
- - Hindari kalimat terlalu panjang; gunakan kalimat efektif dan jelas.
1256
  """
1257
 
1258
  try:
1259
- messages = [
1260
- {"role": "system", "content": system_prompt},
1261
- {"role": "user", "content": user_prompt},
1262
- ]
1263
-
1264
  resp = client.chat_completion(
1265
  model=LLM_MODEL_NAME,
1266
- messages=messages,
1267
- max_tokens=1000,
1268
  temperature=0.25,
1269
  top_p=0.9,
1270
  )
1271
-
1272
  text = resp.choices[0].message.content.strip()
1273
  if not text:
1274
  raise ValueError("Respon LLM kosong.")
1275
-
1276
  return text
1277
-
1278
  except Exception as e:
1279
- rb = generate_rule_based_analysis(detail_df, agg_df, kab_name, kew_value)
1280
- return (
1281
- "⚠️ Terjadi error saat memanggil model LLM, sehingga analisis berikut "
1282
- "dibuat menggunakan pendekatan **rule-based**.\n\n"
1283
- f"(Detail teknis: {repr(e)})\n\n"
1284
- f"{rb}"
1285
- )
1286
-
1287
 
1288
  # ============================================================
1289
- # 8. WORD REPORT (Plotly Pie + Indeks + Agregat + LLM Narrative)
1290
  # ============================================================
1291
 
1292
- from docx import Document
1293
- from docx.shared import Inches
1294
- import plotly.express as px
 
 
 
1295
 
1296
- # Cek apakah kaleido tersedia
1297
  try:
1298
  import kaleido # noqa: F401
1299
- HAS_KALEIDO = True
1300
  except Exception:
1301
  HAS_KALEIDO = False
1302
 
1303
-
1304
- def make_pie_plotly(num, den, title):
1305
- """
1306
- Generate pie chart PNG menggunakan Plotly.
1307
- Jika kaleido tidak tersedia / gagal, return None (tanpa error).
1308
- """
1309
- if not HAS_KALEIDO:
1310
- return None
1311
-
1312
- if den is None or den <= 0:
1313
- values = [0, 1]
1314
- labels = ["Terjangkau", "Belum Terjangkau"]
1315
- else:
1316
- values = [num, max(den - num, 0)]
1317
- labels = ["Terjangkau", "Belum Terjangkau"]
1318
-
1319
- fig = px.pie(
1320
- values=values,
1321
- names=labels,
1322
- title=title,
1323
- hole=0.3
1324
- )
1325
-
1326
- tmp = tempfile.mktemp(suffix=".png")
1327
- try:
1328
- fig.write_image(tmp, scale=2)
1329
- return tmp
1330
- except Exception:
1331
- return None
1332
-
1333
-
1334
  def generate_word_report_all(detail_df, agg_df, verif_df, prov, kab, kew, analysis_text):
1335
- """
1336
- Membuat laporan lengkap untuk wilayah yang dipilih:
1337
- - Ringkasan indeks
1338
- - Tabel agregat
1339
- - (opsional) Pie chart coverage
1340
- - Narasi otomatis (LLM/rule-based)
1341
- """
1342
- # Tidak berlaku untuk PUSAT
1343
- if kew == "PUSAT":
1344
  return None
1345
 
1346
- wilayah = kab if kab != "(Semua)" else prov
1347
-
1348
  doc = Document()
1349
  doc.add_heading(f"Laporan IPLM – {wilayah}", level=1)
1350
 
1351
- # =====================
1352
- # 1. Ringkasan Indeks
1353
- # =====================
1354
  doc.add_heading("Ringkasan Indeks", level=2)
1355
-
1356
- # Rata-rata Indeks: pakai agregat "Rata-rata keseluruhan" agar konsisten
1357
- if agg_df is not None and not agg_df.empty and "Jenis Perpustakaan" in agg_df.columns:
1358
- mask_total = agg_df["Jenis Perpustakaan"].astype(str).str.lower().str.startswith("rata-rata")
1359
- if mask_total.any():
1360
- try:
1361
- mean_ind = float(
1362
- agg_df.loc[mask_total, "Rata2_Indeks_IPLM_0_100"].iloc[0]
1363
- )
1364
- except Exception:
1365
- mean_ind = detail_df["Indeks_Real_0_100"].mean(skipna=True)
1366
- else:
1367
- mean_ind = detail_df["Indeks_Real_0_100"].mean(skipna=True)
1368
- else:
1369
- mean_ind = detail_df["Indeks_Real_0_100"].mean(skipna=True)
1370
-
1371
- mean_kep = detail_df["dim_kepatuhan"].mean(skipna=True)
1372
- mean_kin = detail_df["dim_kinerja"].mean(skipna=True)
1373
- mean_conf = detail_df["Confidence_IPLM"].mean(skipna=True)
1374
 
1375
  doc.add_paragraph(f"- Jumlah perpustakaan: {len(detail_df)}")
1376
- doc.add_paragraph(f"- Rata-rata Indeks IPLM: {mean_ind:.2f}")
1377
- doc.add_paragraph(f"- Rata-rata Dimensi Kepatuhan (0–1): {mean_kep:.3f}")
1378
- doc.add_paragraph(f"- Rata-rata Dimensi Kinerja (0–1): {mean_kin:.3f}")
1379
- doc.add_paragraph(f"- Rata-rata Confidence IPLM: {mean_conf:.2f}")
1380
-
1381
- # =====================
1382
- # 2. Tabel Agregat
1383
- # =====================
1384
- doc.add_heading("Ringkasan Agregat per Jenis Perpustakaan", level=2)
1385
-
1386
- table = doc.add_table(rows=1, cols=len(agg_df.columns))
1387
- hdr = table.rows[0].cells
1388
- for i, c in enumerate(agg_df.columns):
1389
- hdr[i].text = str(c)
1390
-
1391
- for _, row in agg_df.iterrows():
1392
- r = table.add_row().cells
1393
  for i, c in enumerate(agg_df.columns):
1394
- r[i].text = str(row[c])
 
 
 
 
 
 
1395
 
1396
- # =====================
1397
- # 3. PIE CHART COVERAGE (opsional)
1398
- # =====================
1399
- doc.add_heading("Coverage / Cakupan Pembinaan", level=2)
 
 
 
 
 
 
 
 
 
 
 
 
1400
 
1401
- if not HAS_KALEIDO:
1402
- doc.add_paragraph(
1403
- "Grafik pie coverage tidak dibuat karena modul 'kaleido' "
1404
- "tidak tersedia di server. Hanya ringkasan teks yang ditampilkan."
1405
- )
1406
- elif verif_df is not None and not verif_df.empty:
1407
-
1408
- if kew == "KAB/KOTA":
1409
- for _, r in verif_df.iterrows():
1410
- nama = r["Kab/Kota"]
1411
-
1412
- # Sekolah SD+SMP
1413
- if "Sekolah (SD+SMP)" in verif_df.columns:
1414
- img_path = make_pie_plotly(
1415
- r["Perpus Sampel – Sekolah (Total SD+SMP)"],
1416
- r["Sekolah (SD+SMP)"],
1417
- f"Coverage Perpustakaan Sekolah – {nama}"
1418
- )
1419
- if img_path:
1420
- doc.add_paragraph(f"Coverage Perpustakaan Sekolah – {nama}")
1421
- doc.add_picture(img_path, width=Inches(4))
1422
-
1423
- # Umum
1424
- if "Jumlah Kecamatan" in verif_df.columns and "Jumlah Desa/Kel" in verif_df.columns:
1425
- denom = r["Jumlah Kecamatan"] + r["Jumlah Desa/Kel"]
1426
- img_path = make_pie_plotly(
1427
- r["Perpus Sampel – Umum"],
1428
- denom,
1429
- f"Coverage Perpustakaan Umum – {nama}"
1430
- )
1431
- if img_path:
1432
- doc.add_paragraph(f"Coverage Perpustakaan Umum – {nama}")
1433
- doc.add_picture(img_path, width=Inches(4))
1434
-
1435
- elif kew == "PROVINSI":
1436
- for _, r in verif_df.iterrows():
1437
- nama = r["Provinsi_Label"]
1438
- img_path = make_pie_plotly(
1439
- r["Jml_Perpus_SMA_Sampel"],
1440
- r["Jml_SMA"],
1441
- f"Coverage Perpustakaan SMA – {nama}"
1442
- )
1443
- if img_path:
1444
- doc.add_paragraph(f"Coverage Perpustakaan SMA – {nama}")
1445
- doc.add_picture(img_path, width=Inches(4))
1446
-
1447
- # =====================
1448
- # 4. Narasi LLM / Rule-based
1449
- # =====================
1450
  doc.add_heading("Analisis Naratif Otomatis", level=2)
1451
  for paragraph in analysis_text.split("\n"):
1452
  if paragraph.strip():
1453
  doc.add_paragraph(paragraph)
1454
 
1455
- # =====================
1456
- # Simpan
1457
- # =====================
1458
  outpath = tempfile.mktemp(suffix=".docx")
1459
  doc.save(outpath)
1460
  return outpath
1461
 
1462
-
1463
  # ============================================================
1464
- # 8. FUNGSI GRADIO
1465
  # ============================================================
1466
 
1467
- def run_app(prov_value, kab_value, kew_value):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1468
  if df_all_raw is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1469
  empty = pd.DataFrame()
1470
- return (
1471
- empty, empty, empty, # agg_df, detail_df, verif_df
1472
- None, None, None, # agg_path, detail_path, raw_path
1473
- None, # word_path
1474
- None, None, None, None, # fig_all, fig_sekolah, fig_umum, fig_khusus
1475
- "Data belum berhasil dimuat. Periksa kembali nama file di DATA_FILE.",
1476
- "Belum ada analisis otomatis yang dapat ditampilkan."
1477
- )
1478
 
1479
- df = df_all_raw.copy()
1480
 
1481
- # Filter provinsi
1482
  if prov_col_glob and prov_value and prov_value != "(Semua)":
1483
  df = df[df[prov_col_glob].astype(str).str.strip() == prov_value]
1484
 
1485
- # Filter kab/kota
1486
  if kab_col_glob and kab_value and kab_value != "(Semua)":
1487
  df = df[df[kab_col_glob].astype(str).str.strip() == kab_value]
1488
 
1489
- # Filter kewenangan
1490
  if kew_value and kew_value != "(Semua)":
1491
  df = df[df["KEW_NORM"] == kew_value]
1492
 
1493
- if len(df) == 0:
1494
  empty = pd.DataFrame()
1495
- return (
1496
- empty, empty, empty, # agg_df, detail_df, verif_df
1497
- None, None, None, # agg_path, detail_path, raw_path
1498
- None, # word_path
1499
- None, None, None, None, # fig_all, fig_sekolah, fig_umum, fig_khusus
1500
- "Tidak ada data untuk kombinasi filter yang dipilih.",
1501
- "Belum ada analisis otomatis yang dapat ditampilkan."
1502
- )
1503
 
1504
  kab_name = kab_value if kab_value and kab_value != "(Semua)" else "SEMUA KAB/KOTA"
1505
  kew_name = kew_value if kew_value and kew_value != "(Semua)" else "SEMUA KEWENANGAN"
1506
 
1507
- (
1508
- agg_df,
1509
- detail_df,
1510
- agg_path,
1511
- detail_path,
1512
- raw_path,
1513
- fig_all,
1514
- fig_sekolah,
1515
- fig_umum,
1516
- fig_khusus,
1517
- ) = run_pipeline_core(df, kab_name=kab_name, kew_name=kew_name)
1518
 
1519
- # Verifikasi sampel
1520
  verif_df = compute_verification(df, kew_value)
1521
 
1522
- # Pesan ringkas di UI (menggunakan detail_df lengkap)
1523
- mean_conf = None
1524
- if "Confidence_IPLM" in detail_df.columns:
1525
- mean_conf = detail_df["Confidence_IPLM"].mean(skipna=True)
1526
-
1527
- msg = f"Berhasil dihitung untuk {len(detail_df)} baris perpustakaan."
1528
- if mean_conf is not None and not np.isnan(mean_conf):
1529
- msg += f" | Rata-rata Confidence_IPLM: {mean_conf:.2f}"
1530
- if not verif_df.empty:
1531
  msg += " | Verifikasi sampel tersedia."
1532
 
1533
- # Analisis otomatis (LLM / rule-based) pakai detail_df lengkap
1534
- analysis_text = generate_llm_analysis(
1535
- detail_df=detail_df,
1536
- agg_df=agg_df,
1537
- verif_df=verif_df,
1538
- kab_name=kab_name,
1539
- kew_value=kew_value,
1540
- )
1541
 
1542
- # Laporan Word (pakai detail_df lengkap)
1543
- word_path = generate_word_report_all(
1544
- detail_df, agg_df, verif_df,
1545
- prov_value, kab_value, kew_value,
1546
- analysis_text
1547
- )
1548
 
1549
- # === VIEW UNTUK UI: sembunyikan indeks normatif & confidence ===
1550
  cols_hide = [
1551
  "Indeks_Normatif_0_100",
1552
  "Indeks_Normatif_AdjConf",
@@ -1560,100 +1255,74 @@ def run_app(prov_value, kab_value, kew_value):
1560
 
1561
  return (
1562
  agg_df,
1563
- detail_df_view, # yang tampil di UI sudah tanpa kolom normatif & confidence
1564
  verif_df,
1565
  agg_path,
1566
  detail_path,
1567
  raw_path,
1568
  word_path,
1569
  fig_all,
1570
- fig_sekolah,
1571
- fig_umum,
1572
- fig_khusus,
1573
  msg,
1574
- analysis_text,
1575
  )
1576
 
1577
-
1578
- def on_prov_change(prov_value):
1579
- if df_all_raw is None or kab_col_glob is None:
1580
- return gr.update(choices=["(Semua)"], value="(Semua)")
1581
- if prov_value is None or prov_value == "(Semua)" or prov_col_glob is None:
1582
- s = df_all_raw[kab_col_glob].dropna().astype(str).str.strip()
1583
- else:
1584
- m = df_all_raw[prov_col_glob].astype(str).str.strip() == prov_value
1585
- s = df_all_raw.loc[m, kab_col_glob].dropna().astype(str).str.strip()
1586
- vals = sorted([x for x in s.unique() if x != ""])
1587
- new_choices = ["(Semua)"] + vals
1588
- return gr.update(choices=new_choices, value="(Semua)")
1589
-
1590
-
1591
- # ============================================================
1592
- # 9. BUILD UI GRADIO
1593
- # ============================================================
1594
-
1595
  with gr.Blocks() as demo:
1596
  gr.Markdown(
1597
  f"""
1598
- # IPLM 2025 – RealScore + Normatif + Verifikasi Sampel + Analisis Otomatis (LLM + Rule-based)
1599
-
1600
- Dataset diambil langsung dari file di repository (tanpa upload):
1601
 
1602
- - **`{DATA_FILE}`** – Data perpustakaan (semua jenis, multi-sheet).
1603
- - **`{META_KAB_FILE}`** – Jumlah kecamatan & desa/kel per kab/kota.
1604
- - **`{META_SDSMP_FILE}`** – Jumlah SD & SMP per kab/kota.
1605
- - **`{META_SMA_FILE}`** – Jumlah SMA per provinsi.
 
1606
 
1607
- {DATA_INFO}
1608
- """
1609
  )
1610
 
1611
  with gr.Row():
1612
- dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0])
1613
- dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value=kab_choices[0])
1614
  dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew)
1615
 
1616
- dd_prov.change(
1617
- fn=on_prov_change,
1618
- inputs=dd_prov,
1619
- outputs=dd_kab,
1620
- )
1621
 
1622
  run_btn = gr.Button("Jalankan Perhitungan")
1623
  msg_out = gr.Markdown()
1624
 
1625
- gr.Markdown("### Hasil Agregat (RealScore) per Jenis Perpustakaan")
1626
  agg_df_out = gr.DataFrame(interactive=False)
1627
 
1628
- gr.Markdown("### Detail Indeks (Real) per Perpustakaan")
1629
  detail_df_out = gr.DataFrame(interactive=False)
1630
 
1631
- gr.Markdown("### Verifikasi Kondisi Sampel di Lapangan")
1632
- verif_df_out = gr.DataFrame(
1633
- label="Perbandingan jumlah sampel dengan populasi unit (SD/SMP/SMA, Kecamatan, Desa/Kel)",
1634
- interactive=False
1635
- )
1636
 
1637
- gr.Markdown("### Sebaran Indeks – Semua Perpustakaan (RealScore)")
1638
  bell_all_out = gr.Plot()
1639
 
1640
- gr.Markdown("### Sebaran Indeks – Perpustakaan Sekolah")
1641
  bell_sekolah_out = gr.Plot()
1642
 
1643
- gr.Markdown("### Sebaran Indeks – Perpustakaan Umum")
1644
  bell_umum_out = gr.Plot()
1645
 
1646
- gr.Markdown("### Sebaran Indeks – Perpustakaan Khusus")
1647
  bell_khusus_out = gr.Plot()
1648
 
1649
  gr.Markdown("### Analisis Otomatis & Rekomendasi Kebijakan")
1650
  analysis_out = gr.Markdown()
1651
 
1652
  with gr.Row():
1653
- agg_file_out = gr.File(label="Download File Agregat (.xlsx)")
1654
- detail_file_out = gr.File(label="Download File Detail (.xlsx)")
1655
- raw_file_out = gr.File(label="Download Data Mentah (.xlsx)")
1656
- word_file_out = gr.File(label="Download Laporan Word (.docx)")
1657
 
1658
  run_btn.click(
1659
  fn=run_app,
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ app.py β€” IPLM 2025 (Merged)
4
+ - Pipeline nasional (transformasi & normalisasi sekali secara nasional)
5
+ - RealScore + Normatif + Confidence
6
+ - Penalti sampling 68% (RULE FINAL):
7
+ * KAB/KOTA: sekolah penalti vs SD+SMP; umum penalti vs (kec+desa/kel); khusus tidak penalti
8
+ * PROVINSI: sekolah menengah penalti vs (SMA+SMK+SLB); umum & khusus tidak penalti
9
+ - Verifikasi sampel (Kab/Kota & Provinsi) -> target 68%, kurang menuju 68%, sampling factor
10
+ - Analisis Otomatis (LLM + rule-based fallback)
11
+ - Download Excel (agregat, detail, raw subset) + Word report (jika python-docx tersedia)
12
+ """
13
+
14
  import os
15
  import re
16
+ import math
17
  import tempfile
18
  from pathlib import Path
19
 
 
28
  # 1. KONFIGURASI FILE & PARAMETER
29
  # ============================================================
30
 
31
+ DATA_FILE = "DM_001.xlsx"
32
+ META_KAB_FILE = "jumlahdesa_fixed.xlsx" # kecamatan & desa/kel per kab/kota
33
+ META_SDSMP_FILE = "jumlah_SD_SMP.xlsx" # jumlah SD & SMP per kab/kota
34
+ META_MENENGAH_FILE= "SMA (2).xlsx" # kab/kota: PROVINSI + (SMA, SMK, SLB) <-- sesuai file Anda terbaru
35
+
36
+ TARGET_FRAC = 0.68
37
 
38
  # Kelompok indikator IPLM
39
  koleksi_cols = [
 
58
  ]
59
  all_indicators = koleksi_cols + sdm_cols + pelayanan_cols + pengelolaan_cols
60
 
61
+ # Bobot indeks IPLM (rumus awal)
62
  w_kepatuhan = 0.30
63
  w_kinerja = 0.70
64
 
65
  # Bobot untuk Confidence
66
  W_DATA = 0.7
67
  W_SAMPLE = 0.3
 
68
 
69
+ # Target normatif per jenis perpustakaan (tetap dihitung, tapi disembunyikan di UI)
70
  TARGETS = {
71
  "sekolah": {
72
  "JudulTercetak": 1000,
 
95
  # ============================================================
96
 
97
  USE_LLM = True
 
 
98
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
 
99
 
100
  HF_TOKEN = (
101
  os.getenv("HF_TOKEN")
102
  or os.getenv("HUGGINGFACEHUB_API_TOKEN")
103
  or os.getenv("HF_API_TOKEN")
104
  )
 
105
  _HF_CLIENT = None
106
 
 
107
  def get_llm_client():
 
 
 
 
108
  global _HF_CLIENT
109
  if _HF_CLIENT is not None:
110
  return _HF_CLIENT
 
111
  try:
112
  if HF_TOKEN:
113
  _HF_CLIENT = InferenceClient(model=LLM_MODEL_NAME, token=HF_TOKEN)
 
118
  _HF_CLIENT = None
119
  return None
120
 
 
121
  # ============================================================
122
+ # 2. UTIL
123
  # ============================================================
124
 
125
  def _canon(s: str) -> str:
126
  return re.sub(r"[^a-z0-9]+", "", str(s).lower())
127
 
128
+ def pick_col(df, candidates):
129
+ for c in candidates:
130
+ if c in df.columns:
131
+ return c
132
+ can_map = {_canon(c): c for c in df.columns}
133
+ for c in candidates:
134
+ k = _canon(c)
135
+ if k in can_map:
136
+ return can_map[k]
137
+ return None
138
 
139
  def coerce_num(val):
140
  if pd.isna(val):
 
157
  except Exception:
158
  return np.nan
159
 
 
160
  def minmax_norm(s: pd.Series) -> pd.Series:
161
  x = s.astype(float)
162
  mn, mx = x.min(skipna=True), x.max(skipna=True)
 
164
  return pd.Series(0.0, index=s.index)
165
  return (x - mn) / (mx - mn)
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  def norm_kew(v):
168
  if pd.isna(v):
169
  return None
 
176
  return "PUSAT"
177
  return t
178
 
 
179
  def _norm_text(x):
180
  if pd.isna(x):
181
  return None
182
  t = str(x).strip().upper()
183
  return " ".join(t.split())
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def slugify(s: str) -> str:
186
  if s is None:
187
  return "NA"
 
190
  return "NA"
191
  return _canon(t).upper()
192
 
193
+ def norm_prov_key(s):
 
 
 
 
 
 
 
194
  if pd.isna(s):
195
  return None
196
+ t = str(s).upper().strip()
 
 
197
  t = " ".join(t.split())
198
+ t = re.sub(r"^PROVINSI\s+", "", t)
199
  return re.sub(r"[^A-Z0-9]+", "", t)
200
 
201
+ def norm_kab_key(s):
 
 
 
 
 
 
 
 
 
 
202
  if pd.isna(s):
203
  return None
 
204
  t = str(s).upper()
205
+ t = t.replace("KABUPATEN", "KAB").replace("KAB.", "KAB")
206
+ t = t.replace("KOTA ADMINISTRASI", "KOTA").replace("KOTA.", "KOTA")
 
 
 
 
 
 
207
  t = " ".join(t.split())
208
  return re.sub(r"[^A-Z0-9]+", "", t)
209
 
210
+ def safe_mean_row(row, cols):
211
+ vals = []
212
+ for c in cols:
213
+ if c in row.index:
214
+ v = row[c]
215
+ if pd.notna(v):
216
+ vals.append(float(v))
217
+ return float(np.mean(vals)) if vals else np.nan
218
+
219
+ def penalized_mean_norm(row, cols):
220
+ # mean dari norm_* dengan missing dianggap 0 (sesuai gaya app lama Anda)
221
+ vals = []
222
+ for c in cols:
223
+ colname = f"norm_{c}"
224
+ if colname in row.index:
225
+ v = row[colname]
226
+ if pd.isna(v):
227
+ v = 0.0
228
+ vals.append(float(v))
229
+ if not vals:
230
+ return np.nan
231
+ return float(np.sum(vals) / len(vals))
232
+
233
+ def skor_normatif(value, target):
234
+ if pd.isna(value):
235
+ return 0.0
236
+ return min(float(value) / float(target), 1.0)
237
+
238
+ def samp_factor(sample, target):
239
+ # target 68% optimum=1. di atas itu tetap 1.
240
+ if pd.isna(target) or target <= 0:
241
+ return 1.0
242
+ if pd.isna(sample) or sample < 0:
243
+ sample = 0
244
+ return float(min(sample / target, 1.0))
245
 
246
  # ============================================================
247
+ # 3) LOAD DATA DM (multi-sheet) + META
248
  # ============================================================
249
 
250
  DATA_INFO = ""
251
  df_all_raw = None
252
+
253
+ meta_kab_df = None # kab_key + kec/desa + SD/SMP + targets
254
+ meta_menengah_prov = None # prov_key + SMA/SMK/SLB totals + target menengah
255
 
256
  prov_col_glob = kab_col_glob = kew_col_glob = jenis_col_glob = nama_col_glob = None
257
 
258
+ # --- Load DM ---
259
  try:
260
  fp = Path(DATA_FILE)
261
  if not fp.exists():
 
276
  else:
277
  df_all_raw["KEW_NORM"] = None
278
 
279
+ # dataset mapping
280
  val_map_jenis = {
281
  "PERPUSTAKAAN SEKOLAH": "sekolah",
282
  "SEKOLAH": "sekolah",
 
291
  else:
292
  df_all_raw["_dataset"] = None
293
 
294
+ # keys
295
+ if kab_col_glob:
296
+ df_all_raw["kab_key"] = df_all_raw[kab_col_glob].apply(norm_kab_key)
297
+ else:
298
+ df_all_raw["kab_key"] = None
299
+ if prov_col_glob:
300
+ df_all_raw["prov_key"] = df_all_raw[prov_col_glob].apply(norm_prov_key)
301
+ else:
302
+ df_all_raw["prov_key"] = None
303
+
304
+ DATA_INFO = f"Data terbaca dari: **{DATA_FILE}** | Jumlah baris: **{len(df_all_raw)}** | Sheets: **{len(xls.sheet_names)}**"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  except Exception as e:
306
  df_all_raw = None
 
 
307
  DATA_INFO = f"⚠️ Gagal memuat data dari file: `{DATA_FILE}`\n\nError: `{e}`"
308
 
309
+ # --- META Kab: kec/desa + SD/SMP -> targets 68% ---
310
  extra_info = []
 
 
311
  try:
312
  meta_kab_raw = pd.read_excel(META_KAB_FILE)
313
  col_kab = pick_col(meta_kab_raw, ["Kab/Kota", "Kab_Kota", "kab/kota", "kabupaten_kota"])
314
+ col_kec = pick_col(meta_kab_raw, ["Kecamatan", "jml_kecamatan", "jumlah_kecamatan", "Jml_Kecamatan", "Jumlah Kecamatan"])
315
+ col_des = pick_col(meta_kab_raw, ["Desa/Kel", "Desa Kelurahan", "Desa_kel", "Jml_DesaKel", "Jumlah Desa/Kel", "Jumlah Desa Kelurahan"])
316
+
317
+ if col_kab is None:
318
+ raise ValueError("Kolom Kab/Kota tidak ditemukan di META_KAB_FILE.")
319
+
320
+ meta_desa = pd.DataFrame({
321
+ "Kab_Kota_Label": meta_kab_raw[col_kab].astype(str).str.strip(),
322
+ "Jml_Kecamatan": pd.to_numeric(meta_kab_raw[col_kec], errors="coerce") if col_kec else 0,
323
+ "Jml_DesaKel": pd.to_numeric(meta_kab_raw[col_des], errors="coerce") if col_des else 0,
324
+ })
325
+ meta_desa["Jml_Kecamatan"] = meta_desa["Jml_Kecamatan"].fillna(0)
326
+ meta_desa["Jml_DesaKel"] = meta_desa["Jml_DesaKel"].fillna(0)
327
+ meta_desa["kab_key"] = meta_desa["Kab_Kota_Label"].apply(norm_kab_key)
328
+ meta_desa = meta_desa.groupby("kab_key", as_index=False).agg({
329
+ "Kab_Kota_Label":"first",
330
+ "Jml_Kecamatan":"sum",
331
+ "Jml_DesaKel":"sum"
332
+ })
333
 
 
 
334
  sd_smp_raw = pd.read_excel(META_SDSMP_FILE)
335
  col_kab2 = pick_col(sd_smp_raw, [
336
  "Kabupaten/Kota_Kabupaten/Kota", "Kabupaten/Kota",
 
339
  col_sd = pick_col(sd_smp_raw, ["SD", "Jumlah SD", "Total SD", "SD_Total", "jml_sd", "Jml_SD"])
340
  col_smp = pick_col(sd_smp_raw, ["SMP", "Jumlah SMP", "Total SMP", "SMP_Total", "jml_smp", "Jml_SMP"])
341
 
342
+ if col_kab2 is None:
343
+ raise ValueError("Kolom Kab/Kota tidak ditemukan di META_SDSMP_FILE.")
344
+ if (col_sd is None) and (col_smp is None):
345
+ raise ValueError("Kolom SD/SMP tidak ditemukan di META_SDSMP_FILE.")
 
 
346
 
347
+ meta_sdsmp = pd.DataFrame({
348
+ "Kab_Kota_Label_SD": sd_smp_raw[col_kab2].astype(str).str.strip(),
349
+ "Jml_SD": pd.to_numeric(sd_smp_raw[col_sd], errors="coerce") if col_sd else 0,
350
+ "Jml_SMP": pd.to_numeric(sd_smp_raw[col_smp], errors="coerce") if col_smp else 0,
351
+ })
352
+ meta_sdsmp["Jml_SD"] = meta_sdsmp["Jml_SD"].fillna(0)
353
+ meta_sdsmp["Jml_SMP"] = meta_sdsmp["Jml_SMP"].fillna(0)
354
+ meta_sdsmp["kab_key"] = meta_sdsmp["Kab_Kota_Label_SD"].apply(norm_kab_key)
355
+ meta_sdsmp = meta_sdsmp.groupby("kab_key", as_index=False).agg({"Jml_SD":"sum","Jml_SMP":"sum"})
356
 
357
+ meta_kab_df = meta_desa.merge(meta_sdsmp, on="kab_key", how="left")
358
+ meta_kab_df["Jml_SD"] = meta_kab_df["Jml_SD"].fillna(0)
359
+ meta_kab_df["Jml_SMP"] = meta_kab_df["Jml_SMP"].fillna(0)
 
360
 
361
+ meta_kab_df["Pop_Sekolah_SD_SMP"] = (meta_kab_df["Jml_SD"] + meta_kab_df["Jml_SMP"]).astype(float)
362
+ meta_kab_df["Pop_Admin_Kec_Desa"] = (meta_kab_df["Jml_Kecamatan"] + meta_kab_df["Jml_DesaKel"]).astype(float)
 
 
 
 
 
 
 
363
 
364
+ meta_kab_df["Target_Sekolah_68"] = np.ceil(TARGET_FRAC * meta_kab_df["Pop_Sekolah_SD_SMP"]).astype("Int64")
365
+ meta_kab_df["Target_Umum_68"] = np.ceil(TARGET_FRAC * meta_kab_df["Pop_Admin_Kec_Desa"]).astype("Int64")
366
+
367
+ extra_info.append(f"Meta Kab/Kota siap (kec/desa + SD/SMP + target 68%): n={len(meta_kab_df)}")
 
368
  except Exception as e:
369
+ meta_kab_df = None
370
+ extra_info.append(f"⚠️ Gagal memuat meta Kab/Kota/SD-SMP: {e}")
371
 
372
+ # --- META menengah per prov (SMA+SMK+SLB) dari file kab/kota ---
373
  try:
374
+ fp2 = Path(META_MENENGAH_FILE)
375
+ if not fp2.exists():
376
+ raise FileNotFoundError(f"File menengah tidak ditemukan: {META_MENENGAH_FILE}")
377
+
378
+ men = pd.read_excel(fp2)
379
+ c_prov = pick_col(men, ["PROVINSI","Provinsi","provinsi"])
380
+ c_sma = pick_col(men, ["SMA","Jumlah SMA","Total SMA","Jml_SMA","jml_sma"])
381
+ c_smk = pick_col(men, ["SMK","Jumlah SMK","Total SMK","Jml_SMK","jml_smk"])
382
+ c_slb = pick_col(men, ["SLB","Jumlah SLB","Total SLB","Jml_SLB","jml_slb"])
383
+
384
+ if c_prov is None or (c_sma is None and c_smk is None and c_slb is None):
385
+ raise ValueError("Kolom PROVINSI atau SMA/SMK/SLB tidak terdeteksi pada META_MENENGAH_FILE.")
386
+
387
+ tmp = men.copy()
388
+ tmp["prov_key"] = tmp[c_prov].apply(norm_prov_key)
389
+ tmp["SMA"] = pd.to_numeric(tmp[c_sma], errors="coerce").fillna(0) if c_sma else 0
390
+ tmp["SMK"] = pd.to_numeric(tmp[c_smk], errors="coerce").fillna(0) if c_smk else 0
391
+ tmp["SLB"] = pd.to_numeric(tmp[c_slb], errors="coerce").fillna(0) if c_slb else 0
392
+
393
+ meta_menengah_prov = tmp.groupby("prov_key", as_index=False).agg(
394
+ Total_SMA=("SMA","sum"),
395
+ Total_SMK=("SMK","sum"),
396
+ Total_SLB=("SLB","sum")
 
 
 
 
397
  )
398
+ meta_menengah_prov["Total_Menengah"] = meta_menengah_prov["Total_SMA"] + meta_menengah_prov["Total_SMK"] + meta_menengah_prov["Total_SLB"]
399
+ meta_menengah_prov["Target_Menengah_68"] = np.ceil(TARGET_FRAC * meta_menengah_prov["Total_Menengah"]).astype("Int64")
400
 
401
+ extra_info.append(f"Meta Prov Menengah (SMA+SMK+SLB + target 68%): n={len(meta_menengah_prov)}")
402
  except Exception as e:
403
+ meta_menengah_prov = None
404
+ extra_info.append(f"⚠️ Gagal memuat meta menengah prov (SMA+SMK+SLB): {e}")
405
 
406
  if extra_info:
407
  DATA_INFO = DATA_INFO + "<br>" + "<br>".join(extra_info)
408
 
 
409
  # ============================================================
410
+ # 4) BELL CURVE (pakai indeks yang dipilih)
411
  # ============================================================
412
 
413
  def make_bell_figure(df_all: pd.DataFrame,
414
  title: str,
415
+ index_col: str,
416
  name_col: str = None,
417
  min_points: int = 5) -> go.Figure:
 
418
  fig = go.Figure()
419
 
420
+ if df_all is None or df_all.empty or index_col not in df_all.columns:
421
+ fig.update_layout(title=title, xaxis_title="Indeks (0–100)", yaxis_title="Kepadatan (relatif)")
 
 
 
 
422
  return fig
423
 
424
  df_plot = df_all.copy()
425
  df_plot = df_plot[pd.notna(df_plot[index_col])]
 
426
  if df_plot.empty or len(df_plot) < min_points:
427
  fig.update_layout(
428
  title=title,
429
  xaxis_title="Indeks (0–100)",
430
  yaxis_title="Kepadatan (relatif)",
431
+ annotations=[dict(
432
+ text="Grafik tidak ditampilkan (data terlalu sedikit).",
433
+ x=0.5, y=0.5, xref="paper", yref="paper",
434
+ showarrow=False, font=dict(size=14)
435
+ )]
 
 
436
  )
437
  return fig
438
 
 
443
  xs = np.linspace(max(0, x_vals.min() - 5), min(100, x_vals.max() + 5), 200)
444
  pdf = (1.0 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((xs - mu) / sigma) ** 2)
445
  pdf = pdf / pdf.max()
 
446
 
447
  if name_col and name_col in df_plot.columns:
448
+ hover_text = [f"{str(n)}<br>Indeks: {v:.2f}" for n, v in zip(df_plot[name_col], x_vals)]
 
 
 
449
  else:
450
  hover_text = [f"Indeks: {v:.2f}" for v in x_vals]
451
 
452
+ fig.add_trace(go.Scatter(x=xs, y=pdf, mode="lines", name="Bell curve", hoverinfo="skip"))
453
  fig.add_trace(go.Scatter(
454
+ x=x_vals, y=np.zeros_like(x_vals),
455
+ mode="markers", name="Perpustakaan",
456
+ hovertext=hover_text, hovertemplate="%{hovertext}<extra></extra>"
 
 
 
 
 
 
 
 
 
 
 
457
  ))
458
 
459
  q1, q2, q3 = np.quantile(x_vals, [0.25, 0.5, 0.75])
460
  for q, label in [(q1, "Q1"), (q2, "Q2 (Median)"), (q3, "Q3")]:
461
  fig.add_trace(go.Scatter(
462
+ x=[q, q], y=[0, 1.05],
463
+ mode="lines", name=label,
 
 
464
  hovertemplate=f"{label}: {q:.2f}<extra></extra>"
465
  ))
466
 
 
468
  title=title,
469
  xaxis_title="Indeks IPLM (0–100)",
470
  yaxis_title="Kepadatan (relatif)",
471
+ yaxis=dict(showticklabels=False, zeroline=True, range=[0, 1.2]),
472
  margin=dict(l=40, r=20, t=60, b=40),
473
  hovermode="x"
474
  )
 
475
  return fig
476
 
 
477
  # ============================================================
478
+ # 5) PIPELINE GLOBAL (NASIONAL): Real + Normatif + Confidence + Penalti 68%
479
  # ============================================================
480
 
481
+ alias_map_raw = {
482
+ "j_judul_koleksi_tercetak": "JudulTercetak",
483
+ "j_eksemplar_koleksi_tercetak": "EksemplarTercetak",
484
+ "j_judul_koleksi_digital": "JudulElektronik",
485
+ "j_eksemplar_koleksi_digital": "EksemplarElektronik",
486
+ "tambah_judul_koleksi_tercetak": "TambahJudulTercetak",
487
+ "tambah_eksemplar_koleksi_tercetak": "TambahEksemplarTercetak",
488
+ "tambah_judul_koleksi_digital": "TambahJudulElektronik",
489
+ "tambah_eksemplar_koleksi_digital": "TambahEksemplarElektronik",
490
+ "j_anggaran_koleksi": "KomitmenAnggaranKoleksi",
491
+ "j_tenaga_ilmu_perpus": "TenagaKualifikasiIlmuPerpustakaan",
492
+ "j_tenaga_nonilmu_perpus": "TenagaFungsionalProfesional",
493
+ "j_tenaga_pkb": "TenagaPKB",
494
+ "j_anggaran_diklat_perpus": "AnggaranTenaga",
495
+ "j_peserta_budaya_baca": "PesertaBudayaBaca",
496
+ "j_pemustaka_luring_daring": "PemustakaLuringDaring",
497
+ "j_pemustaka_fasilitas_tik": "PemustakaFasilitasTIK",
498
+ "j_judul_koleksi_tercetak_termanfaat": "PemanfaatanJudulTercetak",
499
+ "j_eksemplar_koleksi_tercetak_termanfaat": "PemanfaatanEksemplarTercetak",
500
+ "j_judul_koleksi_digital_termanfaat": "PemanfaatanJudulElektronik",
501
+ "j_eksemplar_koleksi_digital_termanfaat": "PemanfaatanEksemplarElektronik",
502
+ "j_kegiatan_budaya_baca_peningkatan_literasi": "KegiatanBudayaBaca",
503
+ "j_kerjasama_pengembangan_perpus": "KegiatanKerjasama",
504
+ "j_variasi_layanan": "VariasiLayanan",
505
+ "j_kebijakan_prosedur_pelayanan": "Kebijakan",
506
+ "j_anggaran_peningkatan_pelayanan": "AnggaranLayanan",
507
+ }
508
+ alias_map = {_canon(k): v for k, v in alias_map_raw.items()}
509
 
510
+ def compute_sampling_verification(df_all: pd.DataFrame):
511
+ """
512
+ Menghasilkan:
513
+ - ver_kab: target 68% sekolah & umum (khusus no target), sampling factor per kab_key
514
+ - ver_prov: target 68% menengah (SMA+SMK+SLB), sampling factor prov untuk dataset sekolah
515
+ """
516
+ ver_kab = pd.DataFrame()
517
+ ver_prov = pd.DataFrame()
518
+
519
+ if df_all is None or df_all.empty:
520
+ return ver_kab, ver_prov
521
+
522
+ # ---------- KAB/KOTA ----------
523
+ if meta_kab_df is not None and "kab_key" in df_all.columns:
524
+ kab = df_all[df_all["KEW_NORM"] == "KAB/KOTA"].copy()
525
+ if not kab.empty:
526
+ g = kab.groupby(["kab_key","_dataset"]).size().unstack(fill_value=0).reset_index()
527
+ for col in ["sekolah","umum","khusus"]:
528
+ if col not in g.columns:
529
+ g[col] = 0
530
+ g = g.rename(columns={
531
+ "sekolah":"Sampel_Sekolah_DM",
532
+ "umum":"Sampel_Umum_DM",
533
+ "khusus":"Sampel_Khusus_DM"
534
+ })
 
 
 
 
535
 
536
+ ver_kab = g.merge(
537
+ meta_kab_df[["kab_key","Kab_Kota_Label","Pop_Sekolah_SD_SMP","Pop_Admin_Kec_Desa","Target_Sekolah_68","Target_Umum_68"]],
538
+ on="kab_key", how="left"
539
+ )
540
+
541
+ for c in ["Target_Sekolah_68","Target_Umum_68","Sampel_Sekolah_DM","Sampel_Umum_DM","Sampel_Khusus_DM"]:
542
+ ver_kab[c] = pd.to_numeric(ver_kab[c], errors="coerce").fillna(0)
543
+
544
+ ver_kab["SamplingFactor_Sekolah"] = ver_kab.apply(lambda r: samp_factor(r["Sampel_Sekolah_DM"], r["Target_Sekolah_68"]), axis=1)
545
+ ver_kab["SamplingFactor_Umum"] = ver_kab.apply(lambda r: samp_factor(r["Sampel_Umum_DM"], r["Target_Umum_68"]), axis=1)
546
+ ver_kab["SamplingFactor_Khusus"] = 1.0
547
+
548
+ ver_kab["Kurang_Sekolah_Menuju_68"] = np.maximum(ver_kab["Target_Sekolah_68"] - ver_kab["Sampel_Sekolah_DM"], 0).astype(int)
549
+ ver_kab["Kurang_Umum_Menuju_68"] = np.maximum(ver_kab["Target_Umum_68"] - ver_kab["Sampel_Umum_DM"], 0).astype(int)
550
+
551
+ # factor total tertimbang target sekolah+umum (khusus tidak ikut)
552
+ def weighted_factor(row):
553
+ ts = float(row.get("Target_Sekolah_68", 0) or 0)
554
+ tu = float(row.get("Target_Umum_68", 0) or 0)
555
+ denom = ts + tu
556
+ if denom <= 0:
557
+ return 1.0
558
+ return float((row["SamplingFactor_Sekolah"]*ts + row["SamplingFactor_Umum"]*tu) / denom)
559
+
560
+ ver_kab["SamplingFactor_Total_Kab"] = ver_kab.apply(weighted_factor, axis=1)
561
+
562
+ # ---------- PROVINSI: hanya sekolah (menengah) ----------
563
+ if meta_menengah_prov is not None and "prov_key" in df_all.columns:
564
+ prov = df_all[df_all["KEW_NORM"] == "PROVINSI"].copy()
565
+ if not prov.empty:
566
+ # detect menengah dengan sub_jenis_perpus jika ada, else fallback dataset sekolah
567
+ subjenis_col = pick_col(prov, ["sub_jenis_perpus","SUB_JENIS_PERPUS","Sub Jenis Perpustakaan","sub jenis perpus"])
568
+ if subjenis_col:
569
+ patt = r"\b(SMA|SMK|SLB)\b"
570
+ prov["_is_menengah"] = prov[subjenis_col].astype(str).str.upper().str.contains(patt, na=False, regex=True)
571
+ else:
572
+ prov["_is_menengah"] = (prov["_dataset"] == "sekolah")
573
+
574
+ samp = prov.groupby("prov_key")["_is_menengah"].sum().reset_index().rename(columns={"_is_menengah":"Sampel_Menengah_DM"})
575
+ samp["Sampel_Menengah_DM"] = pd.to_numeric(samp["Sampel_Menengah_DM"], errors="coerce").fillna(0)
576
+
577
+ ver_prov = samp.merge(
578
+ meta_menengah_prov[["prov_key","Total_SMA","Total_SMK","Total_SLB","Total_Menengah","Target_Menengah_68"]],
579
+ on="prov_key", how="left"
580
+ )
581
+ for c in ["Total_Menengah","Target_Menengah_68","Sampel_Menengah_DM"]:
582
+ ver_prov[c] = pd.to_numeric(ver_prov[c], errors="coerce").fillna(0)
583
+
584
+ ver_prov["SamplingFactor_Prov_Sekolah"] = ver_prov.apply(lambda r: samp_factor(r["Sampel_Menengah_DM"], r["Target_Menengah_68"]), axis=1)
585
+ ver_prov["Kurang_Menengah_Menuju_68"] = np.maximum(ver_prov["Target_Menengah_68"] - ver_prov["Sampel_Menengah_DM"], 0).astype(int)
586
+
587
+ return ver_kab, ver_prov
588
+
589
+ def prepare_global_ipml(df_src: pd.DataFrame):
590
+ """
591
+ Menghitung:
592
+ - Indeks_Real_0_100 (YJ+MinMax nasional)
593
+ - Indeks_Normatif_0_100 (berdasarkan target per jenis)
594
+ - Confidence_Data
595
+ - SamplingFactor_Total (aturan penalti 68% FINAL)
596
+ - Indeks_Final_0_100 = Indeks_Real_0_100 * SamplingFactor_Total
597
+ - Confidence_IPLM = gabungan data+sample (sample = SamplingFactor_Total)
598
+ """
599
+ if df_src is None or df_src.empty:
600
+ return df_src, pd.DataFrame(), pd.DataFrame()
601
+
602
+ df = df_src.copy()
603
+
604
+ # 1) Rename indikator -> kanonik
605
+ canonical_targets = set(all_indicators)
606
  rename_map = {}
607
  for col in list(df.columns):
608
  ccol = _canon(col)
 
616
  if rename_map:
617
  df = df.rename(columns=rename_map)
618
 
619
+ # 2) Coerce numeric
620
  available_indicators = [c for c in all_indicators if c in df.columns]
621
  for c in available_indicators:
622
  df[c] = df[c].apply(coerce_num)
623
 
624
+ # 3) YJ nasional + MinMax nasional
 
625
  for c in available_indicators:
 
626
  x = df[c].astype(float).values
627
  mask = ~np.isnan(x)
628
  transformed = np.full_like(x, np.nan, dtype=float)
 
631
  transformed[mask] = pt.fit_transform(x[mask].reshape(-1, 1)).ravel()
632
  else:
633
  transformed[mask] = x[mask]
634
+ df[f"yj_{c}"] = transformed
635
+ df[f"norm_{c}"] = minmax_norm(df[f"yj_{c}"])
 
 
 
 
636
 
637
+ # 4) Sub-indeks real (mean norm_*, missing=0)
638
+ df["sub_koleksi"] = df.apply(lambda r: penalized_mean_norm(r, [c for c in koleksi_cols if c in available_indicators]), axis=1)
639
+ df["sub_sdm"] = df.apply(lambda r: penalized_mean_norm(r, [c for c in sdm_cols if c in available_indicators]), axis=1)
640
+ df["sub_pelayanan"] = df.apply(lambda r: penalized_mean_norm(r, [c for c in pelayanan_cols if c in available_indicators]), axis=1)
641
+ df["sub_pengelolaan"] = df.apply(lambda r: penalized_mean_norm(r, [c for c in pengelolaan_cols if c in available_indicators]), axis=1)
642
 
643
  df["dim_kepatuhan"] = df[["sub_koleksi", "sub_sdm"]].mean(axis=1)
644
  df["dim_kinerja"] = df[["sub_pelayanan", "sub_pengelolaan"]].mean(axis=1)
645
 
646
+ df["Indeks_Real_0_100"] = 100.0 * (w_kepatuhan*df["dim_kepatuhan"] + w_kinerja*df["dim_kinerja"])
647
 
648
+ # 5) Confidence data
649
  df["n_ind_filled"] = df[available_indicators].notna().sum(axis=1)
650
  df["n_ind_total"] = len(available_indicators)
651
+ df["Confidence_Data"] = np.where(df["n_ind_total"] > 0, df["n_ind_filled"]/df["n_ind_total"], np.nan)
652
 
653
+ # 6) Normatif (tetap dihitung)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  df["Indeks_Normatif_0_100"] = np.nan
 
 
 
 
 
 
 
655
  for i, row in df.iterrows():
656
  jenis = row.get("_dataset", None)
657
  if jenis not in TARGETS:
658
  continue
659
  t = TARGETS[jenis]
 
660
  skor_ind = {}
661
  for ind, target in t.items():
662
  if ind in df.columns:
663
  skor_ind[ind] = skor_normatif(row[ind], target)
664
 
665
+ sub_koleksi_n = np.mean([skor_ind.get("JudulTercetak", 0), skor_ind.get("EksemplarTercetak", 0)])
 
 
 
666
  sub_sdm_n = skor_ind.get("TenagaKualifikasiIlmuPerpustakaan", 0)
667
+ sub_pelayanan_n = np.mean([skor_ind.get("PemustakaLuringDaring", 0), skor_ind.get("KegiatanBudayaBaca", 0)])
 
 
 
668
  sub_pengelolaan_n = skor_ind.get("VariasiLayanan", 0)
669
 
670
  dim_kepatuhan_n = np.mean([sub_koleksi_n, sub_sdm_n])
671
+ dim_kinerja_n = np.mean([sub_pelayanan_n, sub_pengelolaan_n])
672
+
673
+ df.at[i, "Indeks_Normatif_0_100"] = 100.0 * (w_kepatuhan*dim_kepatuhan_n + w_kinerja*dim_kinerja_n)
674
+
675
+ # 7) Verifikasi sampling 68% + penalti factor sesuai RULE
676
+ ver_kab, ver_prov = compute_sampling_verification(df)
677
+
678
+ df["SamplingFactor_Total"] = 1.0
679
+
680
+ # map kab factors (sekolah & umum saja)
681
+ if not ver_kab.empty:
682
+ kab_f_sekolah = ver_kab.set_index("kab_key")["SamplingFactor_Sekolah"].to_dict()
683
+ kab_f_umum = ver_kab.set_index("kab_key")["SamplingFactor_Umum"].to_dict()
684
+
685
+ mask_kab = (df["KEW_NORM"] == "KAB/KOTA")
686
+ mask_kab_sekolah = mask_kab & (df["_dataset"] == "sekolah")
687
+ mask_kab_umum = mask_kab & (df["_dataset"] == "umum")
688
+ mask_kab_khusus = mask_kab & (df["_dataset"] == "khusus")
689
 
690
+ df.loc[mask_kab_sekolah, "SamplingFactor_Total"] = df.loc[mask_kab_sekolah, "kab_key"].map(kab_f_sekolah).fillna(1.0)
691
+ df.loc[mask_kab_umum, "SamplingFactor_Total"] = df.loc[mask_kab_umum, "kab_key"].map(kab_f_umum).fillna(1.0)
692
+ df.loc[mask_kab_khusus, "SamplingFactor_Total"] = 1.0
693
 
694
+ # map prov factor (hanya sekolah menengah)
695
+ if not ver_prov.empty:
696
+ prov_f_school = ver_prov.set_index("prov_key")["SamplingFactor_Prov_Sekolah"].to_dict()
697
+ mask_prov = (df["KEW_NORM"] == "PROVINSI")
698
+ mask_prov_school = mask_prov & (df["_dataset"] == "sekolah")
699
+ df.loc[mask_prov_school, "SamplingFactor_Total"] = df.loc[mask_prov_school, "prov_key"].map(prov_f_school).fillna(1.0)
700
+ # umum/khusus tetap 1.0 (default)
701
 
702
+ # 8) Confidence sample = sampling factor (lebih logis, target-based)
703
+ df["Confidence_Sample"] = df["SamplingFactor_Total"].clip(0,1)
704
+
705
+ df["Confidence_IPLM"] = (
706
+ W_DATA * df["Confidence_Data"].fillna(0) +
707
+ W_SAMPLE * df["Confidence_Sample"].fillna(0)
708
+ )
709
+
710
+ # 9) Final index (penalti)
711
+ df["Indeks_Final_0_100"] = df["Indeks_Real_0_100"] * df["SamplingFactor_Total"]
712
+
713
+ # tambahan (opsional)
714
+ df["Indeks_Real_AdjData"] = df["Indeks_Real_0_100"] * df["Confidence_Data"].fillna(0)
715
+ df["Indeks_Real_AdjConf"] = df["Indeks_Real_0_100"] * df["Confidence_IPLM"].fillna(0)
716
  df["Indeks_Normatif_AdjConf"] = df["Indeks_Normatif_0_100"] * df["Confidence_IPLM"].fillna(0)
717
 
718
+ return df, ver_kab, ver_prov
719
+
720
+ # Jalankan pipeline nasional sekali
721
+ df_all_ipml, ver_kab_global, ver_prov_global = (None, pd.DataFrame(), pd.DataFrame())
722
+ if df_all_raw is not None and not df_all_raw.empty:
723
+ df_all_ipml, ver_kab_global, ver_prov_global = prepare_global_ipml(df_all_raw)
724
+
725
+ # ============================================================
726
+ # 6) AGREGAT + DOWNLOAD + BELL CURVE
727
+ # ============================================================
728
+
729
+ def run_pipeline_core(df_subset: pd.DataFrame, kab_name=None, kew_name=None):
730
+ if df_subset is None or df_subset.empty:
731
+ empty = pd.DataFrame()
732
+ return (empty, empty, None, None, None, None, None, None, None)
733
+
734
+ df = df_subset.copy()
735
+ df_raw = df_subset.copy()
736
+
737
+ # DETAIL untuk tampilan (tetap lengkap; nanti view disembunyikan di run_app)
738
  detail_cols = []
739
+ if prov_col_glob and prov_col_glob in df.columns: detail_cols.append(prov_col_glob)
740
+ if kab_col_glob and kab_col_glob in df.columns: detail_cols.append(kab_col_glob)
741
+ if nama_col_glob and nama_col_glob in df.columns: detail_cols.append(nama_col_glob)
 
 
 
742
 
743
  detail_cols += [
744
+ "_dataset", "KEW_NORM",
745
+ "sub_koleksi","sub_sdm","sub_pelayanan","sub_pengelolaan",
746
+ "dim_kepatuhan","dim_kinerja",
747
  "Indeks_Real_0_100",
748
+ "SamplingFactor_Total",
749
+ "Indeks_Final_0_100",
750
  "Indeks_Real_AdjData",
751
  "Indeks_Real_AdjConf",
752
  "Indeks_Normatif_0_100",
 
756
  "Confidence_IPLM",
757
  ]
758
  detail_cols = [c for c in detail_cols if c in df.columns]
 
759
  detail_df = df[detail_cols].copy().round(3)
760
 
761
+ # AGREGAT per jenis: gunakan Indeks_Final_0_100 sebagai utama
762
  expected_ds = ["sekolah", "umum", "khusus"]
763
+ label_map = {"sekolah":"Perpustakaan Sekolah","umum":"Perpustakaan Umum","khusus":"Perpustakaan Khusus"}
 
 
 
 
764
 
765
  rows = []
766
  for ds in expected_ds:
 
771
  "Jumlah Perpustakaan": 0,
772
  "Rata2_DimKepatuhan": 0.0,
773
  "Rata2_DimKinerja": 0.0,
774
+ "Rata2_Indeks_Asli_0_100": 0.0,
775
+ "Rata2_SamplingFactor": 1.0,
776
+ "Rata2_Indeks_Final_0_100": 0.0,
777
  })
778
  else:
779
  rows.append({
780
  "Jenis Perpustakaan": label_map.get(ds, ds),
781
+ "Jumlah Perpustakaan": int(len(dsub)),
782
+ "Rata2_DimKepatuhan": float(dsub["dim_kepatuhan"].mean(skipna=True)),
783
+ "Rata2_DimKinerja": float(dsub["dim_kinerja"].mean(skipna=True)),
784
+ "Rata2_Indeks_Asli_0_100": float(dsub["Indeks_Real_0_100"].mean(skipna=True)),
785
+ "Rata2_SamplingFactor": float(dsub["SamplingFactor_Total"].mean(skipna=True)),
786
+ "Rata2_Indeks_Final_0_100": float(dsub["Indeks_Final_0_100"].mean(skipna=True)),
787
  })
788
 
789
+ total_jumlah = int(sum(r["Jumlah Perpustakaan"] for r in rows))
790
+ mean_dim_kep = float(np.mean([r["Rata2_DimKepatuhan"] for r in rows])) if rows else 0.0
791
+ mean_dim_kin = float(np.mean([r["Rata2_DimKinerja"] for r in rows])) if rows else 0.0
792
+ mean_asli = float(np.mean([r["Rata2_Indeks_Asli_0_100"] for r in rows])) if rows else 0.0
793
+ mean_sf = float(np.mean([r["Rata2_SamplingFactor"] for r in rows])) if rows else 1.0
794
+ mean_final = float(np.mean([r["Rata2_Indeks_Final_0_100"] for r in rows])) if rows else 0.0
795
+
796
+ rows.append({
797
+ "Jenis Perpustakaan": "Rata-rata keseluruhan",
798
+ "Jumlah Perpustakaan": total_jumlah,
799
+ "Rata2_DimKepatuhan": mean_dim_kep,
800
+ "Rata2_DimKinerja": mean_dim_kin,
801
+ "Rata2_Indeks_Asli_0_100": mean_asli,
802
+ "Rata2_SamplingFactor": mean_sf,
803
+ "Rata2_Indeks_Final_0_100": mean_final,
804
+ })
805
 
806
  agg_view = pd.DataFrame(rows).round(3)
807
 
808
+ # Simpan Excel
809
  kab_slug = slugify(kab_name) if kab_name else "SEMUA_KAB"
810
  kew_slug = slugify(kew_name) if kew_name else "SEMUA_KEW"
811
  tmpdir = tempfile.mkdtemp()
812
 
813
+ agg_path = os.path.join(tmpdir, f"IPLM_Agregat_{kab_slug}_{kew_slug}.xlsx")
814
+ detail_path = os.path.join(tmpdir, f"IPLM_Detail_{kab_slug}_{kew_slug}.xlsx")
815
+ raw_path = os.path.join(tmpdir, f"IPLM_Raw_{kab_slug}_{kew_slug}.xlsx")
816
 
817
  agg_view.to_excel(agg_path, index=False)
818
  df.to_excel(detail_path, index=False)
819
  df_raw.to_excel(raw_path, index=False)
820
 
821
+ # Bell curve pakai Final index
822
  name_col = nama_col_glob if (nama_col_glob and nama_col_glob in detail_df.columns) else None
823
 
824
+ fig_all = make_bell_figure(detail_df, "Sebaran Indeks (Final, setelah penalti 68%) – Semua", "Indeks_Final_0_100", name_col=name_col)
825
+ fig_sek = make_bell_figure(detail_df[detail_df["_dataset"]=="sekolah"], "Sebaran Indeks Final – Sekolah", "Indeks_Final_0_100", name_col=name_col, min_points=3)
826
+ fig_um = make_bell_figure(detail_df[detail_df["_dataset"]=="umum"], "Sebaran Indeks Final – Umum", "Indeks_Final_0_100", name_col=name_col, min_points=3)
827
+ fig_kh = make_bell_figure(detail_df[detail_df["_dataset"]=="khusus"], "Sebaran Indeks Final – Khusus", "Indeks_Final_0_100", name_col=name_col, min_points=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
828
 
829
+ return agg_view, detail_df, agg_path, detail_path, raw_path, fig_all, fig_sek, fig_um, fig_kh
830
 
831
  # ============================================================
832
+ # 7) VERIFIKASI SAMPEL (untuk filter yang dipilih)
833
  # ============================================================
834
 
835
+ def compute_verification(df_filtered: pd.DataFrame, kew_value: str):
836
+ if df_filtered is None or df_filtered.empty:
837
  return pd.DataFrame()
838
 
839
  kew_norm = str(kew_value or "").upper()
840
 
841
+ # --- KAB/KOTA ---
842
+ if "KAB" in kew_norm or "KOTA" in kew_norm:
843
+ if meta_kab_df is None:
844
+ return pd.DataFrame({"Info": ["Meta kab/kota tidak tersedia."]})
845
+ if "kab_key" not in df_filtered.columns:
846
+ return pd.DataFrame({"Info": ["kab_key tidak tersedia di data."]})
847
+
848
+ kab = df_filtered[df_filtered["KEW_NORM"]=="KAB/KOTA"].copy()
849
+ if kab.empty:
850
+ return pd.DataFrame({"Info": ["Tidak ada data untuk KAB/KOTA pada filter ini."]})
851
+
852
+ g = kab.groupby(["kab_key","_dataset"]).size().unstack(fill_value=0).reset_index()
853
+ for col in ["sekolah","umum","khusus"]:
854
+ if col not in g.columns:
855
+ g[col] = 0
856
+ g = g.rename(columns={"sekolah":"Sampel_Sekolah_DM","umum":"Sampel_Umum_DM","khusus":"Sampel_Khusus_DM"})
857
+
858
+ out = g.merge(
859
+ meta_kab_df[["kab_key","Kab_Kota_Label","Pop_Sekolah_SD_SMP","Pop_Admin_Kec_Desa","Target_Sekolah_68","Target_Umum_68"]],
860
+ on="kab_key", how="left"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  )
862
 
863
+ for c in ["Target_Sekolah_68","Target_Umum_68","Sampel_Sekolah_DM","Sampel_Umum_DM","Sampel_Khusus_DM"]:
864
+ out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0)
 
 
 
 
 
 
 
 
865
 
866
+ out["SamplingFactor_Sekolah"] = out.apply(lambda r: samp_factor(r["Sampel_Sekolah_DM"], r["Target_Sekolah_68"]), axis=1)
867
+ out["SamplingFactor_Umum"] = out.apply(lambda r: samp_factor(r["Sampel_Umum_DM"], r["Target_Umum_68"]), axis=1)
868
+ out["SamplingFactor_Khusus"] = 1.0
 
 
 
 
 
 
 
869
 
870
+ out["Kurang_Sekolah_Menuju_68"] = np.maximum(out["Target_Sekolah_68"] - out["Sampel_Sekolah_DM"], 0).astype(int)
871
+ out["Kurang_Umum_Menuju_68"] = np.maximum(out["Target_Umum_68"] - out["Sampel_Umum_DM"], 0).astype(int)
 
 
 
 
872
 
873
+ out = out.rename(columns={
874
+ "Kab_Kota_Label":"Kab/Kota",
875
+ "Pop_Sekolah_SD_SMP":"Populasi SD+SMP",
876
+ "Pop_Admin_Kec_Desa":"Populasi Kec+Desa/Kel",
 
 
 
 
 
 
 
 
877
  })
878
 
879
+ cols = [
880
+ "Kab/Kota",
881
+ "Sampel_Sekolah_DM","Target_Sekolah_68","SamplingFactor_Sekolah","Kurang_Sekolah_Menuju_68",
882
+ "Sampel_Umum_DM","Target_Umum_68","SamplingFactor_Umum","Kurang_Umum_Menuju_68",
883
+ "Sampel_Khusus_DM","SamplingFactor_Khusus",
884
+ "Populasi SD+SMP","Populasi Kec+Desa/Kel",
885
+ ]
886
+ return out[[c for c in cols if c in out.columns]].sort_values("Kab/Kota").reset_index(drop=True).round(3)
887
+
888
+ # --- PROVINSI ---
889
+ if "PROV" in kew_norm:
890
+ if meta_menengah_prov is None:
891
+ return pd.DataFrame({"Info": ["Meta prov menengah (SMA+SMK+SLB) tidak tersedia."]})
892
+ if "prov_key" not in df_filtered.columns:
893
+ return pd.DataFrame({"Info": ["prov_key tidak tersedia di data."]})
894
+
895
+ prov = df_filtered[df_filtered["KEW_NORM"]=="PROVINSI"].copy()
896
+ if prov.empty:
897
+ return pd.DataFrame({"Info": ["Tidak ada data untuk PROVINSI pada filter ini."]})
898
+
899
+ subjenis_col = pick_col(prov, ["sub_jenis_perpus","SUB_JENIS_PERPUS","Sub Jenis Perpustakaan","sub jenis perpus"])
900
+ if subjenis_col:
901
+ patt = r"\b(SMA|SMK|SLB)\b"
902
+ prov["_is_menengah"] = prov[subjenis_col].astype(str).str.upper().str.contains(patt, na=False, regex=True)
 
 
903
  else:
904
+ prov["_is_menengah"] = (prov["_dataset"]=="sekolah")
 
 
 
 
 
905
 
906
+ samp = prov.groupby("prov_key")["_is_menengah"].sum().reset_index().rename(columns={"_is_menengah":"Sampel_Menengah_DM"})
907
+ samp["Sampel_Menengah_DM"] = pd.to_numeric(samp["Sampel_Menengah_DM"], errors="coerce").fillna(0)
908
 
909
+ out = samp.merge(
910
+ meta_menengah_prov[["prov_key","Total_SMA","Total_SMK","Total_SLB","Total_Menengah","Target_Menengah_68"]],
911
+ on="prov_key", how="left"
912
+ )
913
+ for c in ["Total_Menengah","Target_Menengah_68","Sampel_Menengah_DM"]:
914
+ out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0)
915
 
916
+ out["SamplingFactor_Prov_Sekolah"] = out.apply(lambda r: samp_factor(r["Sampel_Menengah_DM"], r["Target_Menengah_68"]), axis=1)
917
+ out["Kurang_Menengah_Menuju_68"] = np.maximum(out["Target_Menengah_68"] - out["Sampel_Menengah_DM"], 0).astype(int)
918
 
919
+ # tampilkan label prov jika tersedia
920
+ if prov_col_glob and prov_col_glob in df_filtered.columns:
921
+ prov_labels = df_filtered.dropna(subset=["prov_key", prov_col_glob]).groupby("prov_key")[prov_col_glob].first().reset_index()
922
+ out = out.merge(prov_labels, on="prov_key", how="left")
923
+ out = out.rename(columns={prov_col_glob: "Provinsi"})
 
 
 
 
 
924
 
925
+ cols = ["Provinsi","Sampel_Menengah_DM","Total_Menengah","Target_Menengah_68","SamplingFactor_Prov_Sekolah","Kurang_Menengah_Menuju_68","Total_SMA","Total_SMK","Total_SLB"]
926
+ cols = [c for c in cols if c in out.columns]
927
+ return out[cols].sort_values(cols[0]).reset_index(drop=True).round(3)
928
 
929
  return pd.DataFrame()
930
 
 
931
  # ============================================================
932
+ # 8) ANALISIS (LLM + fallback)
933
  # ============================================================
934
 
935
  def build_context_for_llm(detail_df: pd.DataFrame,
 
945
  lines.append(f"Wilayah: {wilayah}")
946
  lines.append(f"Jumlah perpustakaan sampel: {len(detail_df)}")
947
 
948
+ mean_final = np.nan
 
949
  if agg_df is not None and not agg_df.empty and "Jenis Perpustakaan" in agg_df.columns:
950
  mask_total = agg_df["Jenis Perpustakaan"].astype(str).str.lower().str.startswith("rata-rata")
951
  if mask_total.any():
952
  try:
953
+ mean_final = float(agg_df.loc[mask_total, "Rata2_Indeks_Final_0_100"].iloc[0])
 
 
954
  except Exception:
955
+ mean_final = np.nan
956
+ if np.isnan(mean_final) and "Indeks_Final_0_100" in detail_df.columns:
957
+ mean_final = detail_df["Indeks_Final_0_100"].mean(skipna=True)
958
 
959
+ if not np.isnan(mean_final):
960
+ lines.append(f"Rata-rata Indeks IPLM Final (0-100): {mean_final:.2f}")
 
961
 
 
 
 
 
 
 
962
  if "dim_kepatuhan" in detail_df.columns:
963
+ lines.append(f"Rata-rata dimensi kepatuhan (0–1): {detail_df['dim_kepatuhan'].mean(skipna=True):.3f}")
 
964
  if "dim_kinerja" in detail_df.columns:
965
+ lines.append(f"Rata-rata dimensi kinerja (0–1): {detail_df['dim_kinerja'].mean(skipna=True):.3f}")
966
+ if "SamplingFactor_Total" in detail_df.columns:
967
+ lines.append(f"Rata-rata SamplingFactor_Total (0–1): {detail_df['SamplingFactor_Total'].mean(skipna=True):.3f}")
 
 
 
 
 
968
 
 
969
  if agg_df is not None and not agg_df.empty and "Jenis Perpustakaan" in agg_df.columns:
970
  lines.append("\nRingkasan per jenis perpustakaan:")
971
  for _, r in agg_df.iterrows():
972
+ jp = str(r.get("Jenis Perpustakaan","") or "")
973
  if jp.lower().startswith("rata-rata"):
974
  continue
975
+ n = r.get("Jumlah Perpustakaan", 0)
976
+ idx = r.get("Rata2_Indeks_Final_0_100", np.nan)
977
+ sf = r.get("Rata2_SamplingFactor", np.nan)
978
  if pd.isna(idx):
979
  continue
980
+ lines.append(f"- {jp}: n={int(n)}, indeks final={idx:.2f}, sampling factor rata-rata={float(sf):.3f}")
981
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
982
  if verif_df is not None and not verif_df.empty:
983
+ lines.append("\nRingkasan verifikasi sampel (target 68%):")
984
+ # ambil rata-rata kekurangan jika ada
985
+ for c in ["Kurang_Sekolah_Menuju_68","Kurang_Umum_Menuju_68","Kurang_Menengah_Menuju_68"]:
986
+ if c in verif_df.columns:
987
+ try:
988
+ lines.append(f"- Rata-rata {c}: {float(pd.to_numeric(verif_df[c], errors='coerce').fillna(0).mean()):.2f}")
989
+ except Exception:
990
+ pass
 
 
 
 
 
 
 
 
 
991
 
992
  return "\n".join(lines)
993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
994
  def generate_rule_based_analysis(detail_df: pd.DataFrame,
995
  agg_df: pd.DataFrame,
996
  kab_name: str,
 
1002
  if kew_value and kew_value != "(Semua)":
1003
  wilayah = f"{kab_name} (kewenangan {kew_value})"
1004
 
1005
+ mean_final = detail_df["Indeks_Final_0_100"].mean(skipna=True) if "Indeks_Final_0_100" in detail_df.columns else np.nan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006
  mean_kep = detail_df.get("dim_kepatuhan", pd.Series(dtype=float)).mean(skipna=True)
1007
  mean_kin = detail_df.get("dim_kinerja", pd.Series(dtype=float)).mean(skipna=True)
1008
+ mean_sf = detail_df.get("SamplingFactor_Total", pd.Series(dtype=float)).mean(skipna=True)
1009
 
1010
  lines = []
1011
  lines.append("## Analisis Otomatis & Rekomendasi Kebijakan (Rule-based)\n")
1012
  lines.append("### Gambaran Umum Wilayah")
1013
  lines.append(f"- Wilayah: {wilayah}")
1014
  lines.append(f"- Jumlah perpustakaan dalam sampel: {len(detail_df)}")
1015
+ if not pd.isna(mean_final):
1016
+ lines.append(f"- Rata-rata Indeks IPLM Final (setelah penalti 68%): {mean_final:.2f}")
1017
  lines.append(f"- Rata-rata dimensi kepatuhan (0–1): {mean_kep:.3f}")
1018
  lines.append(f"- Rata-rata dimensi kinerja (0–1): {mean_kin:.3f}")
1019
+ if not pd.isna(mean_sf):
1020
+ lines.append(f"- Rata-rata SamplingFactor_Total (0–1): {mean_sf:.3f}")
1021
 
1022
+ lines.append("\n### Implikasi & Arah Kebijakan")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1023
  lines.append(
1024
+ "Hasil indeks final merefleksikan capaian kinerja layanan perpustakaan yang telah dikoreksi oleh tingkat "
1025
+ "kecukupan sampel terhadap target 68%. Artinya, konsistensi data dan cakupan pembinaan berpengaruh langsung "
1026
+ "terhadap interpretasi capaian wilayah. Penguatan pengisian data dan perluasan unit yang terjangkau perlu "
1027
+ "dikelola sebagai bagian integral dari perbaikan layanan."
 
 
1028
  )
1029
  lines.append(
1030
+ "Program prioritas dapat diarahkan pada: peningkatan kelengkapan dan kualitas data indikator, "
1031
+ "penguatan ketersediaan koleksi dan SDM, perluasan kegiatan literasi dan pemanfaatan layanan, serta "
1032
+ "konsolidasi kolaborasi lintas sektor untuk memastikan jangkauan unit (sekolah/administratif) mendekati target."
 
 
 
 
 
 
 
 
1033
  )
1034
 
1035
  return "\n".join(lines)
1036
 
 
 
 
 
 
1037
  def generate_llm_analysis(detail_df: pd.DataFrame,
1038
  agg_df: pd.DataFrame,
1039
  verif_df: pd.DataFrame,
1040
  kab_name: str,
1041
  kew_value: str) -> str:
 
 
 
 
 
1042
  context = build_context_for_llm(detail_df, agg_df, verif_df, kab_name, kew_value)
 
1043
  client = get_llm_client()
1044
  if client is None or not USE_LLM:
1045
+ return "⚠️ LLM tidak tersedia. Berikut analisis rule-based.\n\n" + generate_rule_based_analysis(detail_df, agg_df, kab_name, kew_value)
 
 
 
 
 
1046
 
1047
  system_prompt = (
1048
+ "Anda adalah analis kebijakan perpustakaan dan literasi di Indonesia. "
1049
+ "Anda membaca ringkasan IPLM (indeks final sudah mempertimbangkan penalti sampling 68%) "
1050
+ "dan menyusun analisis kebijakan yang tajam namun komunikatif."
 
1051
  )
1052
 
1053
  user_prompt = f"""
1054
+ DATA RINGKAS IPLM:
1055
 
1056
  {context}
1057
 
1058
  TULISKAN ANALISIS DALAM BAHASA INDONESIA FORMAL, DENGAN STRUKTUR:
1059
+ 1) Gambaran umum (1 paragraf).
1060
+ 2) Analisis capaian indeks final dan dimensi (2 paragraf).
1061
+ 3) Analisis kesenjangan cakupan berbasis target 68% (1 paragraf).
1062
+ 4) Rekomendasi program prioritas 3–5 tahun (2 paragraf naratif).
1063
+
1064
+ GAYA:
1065
+ - Jangan gunakan label penilaian eksplisit "rendah/sedang/tinggi".
1066
+ - Gunakan frasa netral: "memerlukan penguatan", "perlu konsolidasi", "belum sesuai harapan".
 
 
 
 
1067
  """
1068
 
1069
  try:
 
 
 
 
 
1070
  resp = client.chat_completion(
1071
  model=LLM_MODEL_NAME,
1072
+ messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
1073
+ max_tokens=900,
1074
  temperature=0.25,
1075
  top_p=0.9,
1076
  )
 
1077
  text = resp.choices[0].message.content.strip()
1078
  if not text:
1079
  raise ValueError("Respon LLM kosong.")
 
1080
  return text
 
1081
  except Exception as e:
1082
+ return "⚠️ Error LLM, fallback rule-based.\n\n" + generate_rule_based_analysis(detail_df, agg_df, kab_name, kew_value) + f"\n\n(Detail teknis: {repr(e)})"
 
 
 
 
 
 
 
1083
 
1084
  # ============================================================
1085
+ # 9) WORD REPORT (opsional)
1086
  # ============================================================
1087
 
1088
+ HAS_DOCX = True
1089
+ try:
1090
+ from docx import Document
1091
+ from docx.shared import Inches
1092
+ except Exception:
1093
+ HAS_DOCX = False
1094
 
1095
+ HAS_KALEIDO = True
1096
  try:
1097
  import kaleido # noqa: F401
 
1098
  except Exception:
1099
  HAS_KALEIDO = False
1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1101
  def generate_word_report_all(detail_df, agg_df, verif_df, prov, kab, kew, analysis_text):
1102
+ if not HAS_DOCX:
 
 
 
 
 
 
 
 
1103
  return None
1104
 
1105
+ wilayah = kab if kab and kab != "(Semua)" else (prov if prov and prov != "(Semua)" else "NASIONAL")
 
1106
  doc = Document()
1107
  doc.add_heading(f"Laporan IPLM – {wilayah}", level=1)
1108
 
 
 
 
1109
  doc.add_heading("Ringkasan Indeks", level=2)
1110
+ mean_final = float(detail_df["Indeks_Final_0_100"].mean(skipna=True)) if "Indeks_Final_0_100" in detail_df.columns else np.nan
1111
+ mean_kep = float(detail_df["dim_kepatuhan"].mean(skipna=True)) if "dim_kepatuhan" in detail_df.columns else np.nan
1112
+ mean_kin = float(detail_df["dim_kinerja"].mean(skipna=True)) if "dim_kinerja" in detail_df.columns else np.nan
1113
+ mean_sf = float(detail_df["SamplingFactor_Total"].mean(skipna=True)) if "SamplingFactor_Total" in detail_df.columns else np.nan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1114
 
1115
  doc.add_paragraph(f"- Jumlah perpustakaan: {len(detail_df)}")
1116
+ if not np.isnan(mean_final): doc.add_paragraph(f"- Rata-rata Indeks IPLM Final (setelah penalti 68%): {mean_final:.2f}")
1117
+ if not np.isnan(mean_kep): doc.add_paragraph(f"- Rata-rata Dimensi Kepatuhan (0–1): {mean_kep:.3f}")
1118
+ if not np.isnan(mean_kin): doc.add_paragraph(f"- Rata-rata Dimensi Kinerja (0–1): {mean_kin:.3f}")
1119
+ if not np.isnan(mean_sf): doc.add_paragraph(f"- Rata-rata SamplingFactor_Total (0–1): {mean_sf:.3f}")
1120
+
1121
+ doc.add_heading("Agregat per Jenis Perpustakaan", level=2)
1122
+ if agg_df is not None and not agg_df.empty:
1123
+ table = doc.add_table(rows=1, cols=len(agg_df.columns))
1124
+ hdr = table.rows[0].cells
 
 
 
 
 
 
 
 
1125
  for i, c in enumerate(agg_df.columns):
1126
+ hdr[i].text = str(c)
1127
+ for _, row in agg_df.iterrows():
1128
+ r = table.add_row().cells
1129
+ for i, c in enumerate(agg_df.columns):
1130
+ r[i].text = str(row[c])
1131
+ else:
1132
+ doc.add_paragraph("Agregat tidak tersedia.")
1133
 
1134
+ doc.add_heading("Verifikasi Sampel (Target 68%)", level=2)
1135
+ if verif_df is not None and not verif_df.empty:
1136
+ # limit baris agar docx tidak kelewat besar
1137
+ preview = verif_df.head(50).copy()
1138
+ table = doc.add_table(rows=1, cols=len(preview.columns))
1139
+ hdr = table.rows[0].cells
1140
+ for i, c in enumerate(preview.columns):
1141
+ hdr[i].text = str(c)
1142
+ for _, row in preview.iterrows():
1143
+ r = table.add_row().cells
1144
+ for i, c in enumerate(preview.columns):
1145
+ r[i].text = str(row[c])
1146
+ if len(verif_df) > 50:
1147
+ doc.add_paragraph("Catatan: tabel dipotong sampai 50 baris pada laporan Word.")
1148
+ else:
1149
+ doc.add_paragraph("Verifikasi sampel tidak tersedia untuk filter ini.")
1150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1151
  doc.add_heading("Analisis Naratif Otomatis", level=2)
1152
  for paragraph in analysis_text.split("\n"):
1153
  if paragraph.strip():
1154
  doc.add_paragraph(paragraph)
1155
 
 
 
 
1156
  outpath = tempfile.mktemp(suffix=".docx")
1157
  doc.save(outpath)
1158
  return outpath
1159
 
 
1160
  # ============================================================
1161
+ # 10) GRADIO UI
1162
  # ============================================================
1163
 
1164
+ def all_prov_choices():
1165
+ if df_all_raw is None or prov_col_glob is None:
1166
+ return ["(Semua)"]
1167
+ s = df_all_raw[prov_col_glob].dropna().astype(str).str.strip()
1168
+ vals = sorted([o for o in s.unique() if o != ""])
1169
+ return ["(Semua)"] + vals
1170
+
1171
+ def get_kab_choices_for_prov(prov_value):
1172
+ if df_all_raw is None or kab_col_glob is None:
1173
+ return ["(Semua)"]
1174
+ if prov_value is None or prov_value == "(Semua)" or prov_col_glob is None:
1175
+ s = df_all_raw[kab_col_glob].dropna().astype(str).str.strip()
1176
+ else:
1177
+ m = df_all_raw[prov_col_glob].astype(str).str.strip() == prov_value
1178
+ s = df_all_raw.loc[m, kab_col_glob].dropna().astype(str).str.strip()
1179
+ vals = sorted([x for x in s.unique() if x != ""])
1180
+ return ["(Semua)"] + vals
1181
+
1182
+ def all_kew_choices():
1183
  if df_all_raw is None:
1184
+ return ["(Semua)"]
1185
+ s = df_all_raw["KEW_NORM"].dropna().astype(str).str.strip()
1186
+ vals = sorted([o for o in s.unique() if o != ""])
1187
+ return ["(Semua)"] + vals if vals else ["(Semua)"]
1188
+
1189
+ prov_choices = all_prov_choices()
1190
+ kab_choices = get_kab_choices_for_prov(prov_choices[0] if prov_choices else "(Semua)")
1191
+ kew_choices = all_kew_choices()
1192
+ default_kew = "KAB/KOTA" if "KAB/KOTA" in kew_choices else (kew_choices[0] if kew_choices else "(Semua)")
1193
+
1194
+ def on_prov_change(prov_value):
1195
+ new_choices = get_kab_choices_for_prov(prov_value)
1196
+ return gr.update(choices=new_choices, value="(Semua)")
1197
+
1198
+ def run_app(prov_value, kab_value, kew_value):
1199
+ if df_all_ipml is None or df_all_ipml.empty:
1200
  empty = pd.DataFrame()
1201
+ return (empty, empty, empty, None, None, None, None, None, None, None, None,
1202
+ "Data belum berhasil dimuat atau pipeline nasional belum terbentuk.",
1203
+ "Belum ada analisis otomatis.")
 
 
 
 
 
1204
 
1205
+ df = df_all_ipml.copy()
1206
 
1207
+ # filter prov
1208
  if prov_col_glob and prov_value and prov_value != "(Semua)":
1209
  df = df[df[prov_col_glob].astype(str).str.strip() == prov_value]
1210
 
1211
+ # filter kab
1212
  if kab_col_glob and kab_value and kab_value != "(Semua)":
1213
  df = df[df[kab_col_glob].astype(str).str.strip() == kab_value]
1214
 
1215
+ # filter kew
1216
  if kew_value and kew_value != "(Semua)":
1217
  df = df[df["KEW_NORM"] == kew_value]
1218
 
1219
+ if df.empty:
1220
  empty = pd.DataFrame()
1221
+ return (empty, empty, empty, None, None, None, None, None, None, None, None,
1222
+ "Tidak ada data untuk kombinasi filter yang dipilih.",
1223
+ "Belum ada analisis otomatis.")
 
 
 
 
 
1224
 
1225
  kab_name = kab_value if kab_value and kab_value != "(Semua)" else "SEMUA KAB/KOTA"
1226
  kew_name = kew_value if kew_value and kew_value != "(Semua)" else "SEMUA KEWENANGAN"
1227
 
1228
+ agg_df, detail_df, agg_path, detail_path, raw_path, fig_all, fig_sek, fig_um, fig_kh = run_pipeline_core(df, kab_name=kab_name, kew_name=kew_name)
 
 
 
 
 
 
 
 
 
 
1229
 
 
1230
  verif_df = compute_verification(df, kew_value)
1231
 
1232
+ msg = f"Berhasil dihitung untuk {len(detail_df)} baris."
1233
+ if "Indeks_Final_0_100" in detail_df.columns:
1234
+ msg += f" | Rata-rata Indeks Final: {detail_df['Indeks_Final_0_100'].mean(skipna=True):.2f}"
1235
+ if "SamplingFactor_Total" in detail_df.columns:
1236
+ msg += f" | Rata-rata SamplingFactor: {detail_df['SamplingFactor_Total'].mean(skipna=True):.3f}"
1237
+ if verif_df is not None and not verif_df.empty:
 
 
 
1238
  msg += " | Verifikasi sampel tersedia."
1239
 
1240
+ analysis_text = generate_llm_analysis(detail_df, agg_df, verif_df, kab_name, kew_value)
 
 
 
 
 
 
 
1241
 
1242
+ word_path = generate_word_report_all(detail_df, agg_df, verif_df, prov_value, kab_value, kew_value, analysis_text)
 
 
 
 
 
1243
 
1244
+ # === VIEW UNTUK UI: sembunyikan normatif & confidence & adj (sesuai kebiasaan Anda) ===
1245
  cols_hide = [
1246
  "Indeks_Normatif_0_100",
1247
  "Indeks_Normatif_AdjConf",
 
1255
 
1256
  return (
1257
  agg_df,
1258
+ detail_df_view,
1259
  verif_df,
1260
  agg_path,
1261
  detail_path,
1262
  raw_path,
1263
  word_path,
1264
  fig_all,
1265
+ fig_sek,
1266
+ fig_um,
1267
+ fig_kh,
1268
  msg,
1269
+ analysis_text
1270
  )
1271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272
  with gr.Blocks() as demo:
1273
  gr.Markdown(
1274
  f"""
1275
+ # IPLM 2025 β€” RealScore (nasional) + Penalti Sampling 68% + Normatif + Analisis Otomatis
 
 
1276
 
1277
+ Sumber file (dibaca dari repository):
1278
+ - **`{DATA_FILE}`** β€” Data perpustakaan (multi-sheet OK)
1279
+ - **`{META_KAB_FILE}`** β€” Kecamatan & Desa/Kel per Kab/Kota
1280
+ - **`{META_SDSMP_FILE}`** β€” SD & SMP per Kab/Kota
1281
+ - **`{META_MENENGAH_FILE}`** β€” SMA+SMK+SLB per Kab/Kota (diagregasi jadi target provinsi)
1282
 
1283
+ {DATA_INFO}
1284
+ """
1285
  )
1286
 
1287
  with gr.Row():
1288
+ dd_prov = gr.Dropdown(label="Provinsi", choices=prov_choices, value=prov_choices[0] if prov_choices else "(Semua)")
1289
+ dd_kab = gr.Dropdown(label="Kab/Kota", choices=kab_choices, value="(Semua)")
1290
  dd_kew = gr.Dropdown(label="Kewenangan", choices=kew_choices, value=default_kew)
1291
 
1292
+ dd_prov.change(fn=on_prov_change, inputs=dd_prov, outputs=dd_kab)
 
 
 
 
1293
 
1294
  run_btn = gr.Button("Jalankan Perhitungan")
1295
  msg_out = gr.Markdown()
1296
 
1297
+ gr.Markdown("### Hasil Agregat (Indeks Final = RealScore Γ— Penalti 68%) per Jenis Perpustakaan")
1298
  agg_df_out = gr.DataFrame(interactive=False)
1299
 
1300
+ gr.Markdown("### Detail Indeks per Perpustakaan (tampilan disederhanakan)")
1301
  detail_df_out = gr.DataFrame(interactive=False)
1302
 
1303
+ gr.Markdown("### Verifikasi Sampel (Target 68% + Kekurangan + Sampling Factor)")
1304
+ verif_df_out = gr.DataFrame(interactive=False)
 
 
 
1305
 
1306
+ gr.Markdown("### Sebaran Indeks Final – Semua")
1307
  bell_all_out = gr.Plot()
1308
 
1309
+ gr.Markdown("### Sebaran Indeks Final – Sekolah")
1310
  bell_sekolah_out = gr.Plot()
1311
 
1312
+ gr.Markdown("### Sebaran Indeks Final – Umum")
1313
  bell_umum_out = gr.Plot()
1314
 
1315
+ gr.Markdown("### Sebaran Indeks Final – Khusus")
1316
  bell_khusus_out = gr.Plot()
1317
 
1318
  gr.Markdown("### Analisis Otomatis & Rekomendasi Kebijakan")
1319
  analysis_out = gr.Markdown()
1320
 
1321
  with gr.Row():
1322
+ agg_file_out = gr.File(label="Download Agregat (.xlsx)")
1323
+ detail_file_out = gr.File(label="Download Detail (.xlsx)")
1324
+ raw_file_out = gr.File(label="Download Raw Subset (.xlsx)")
1325
+ word_file_out = gr.File(label="Download Laporan Word (.docx)")
1326
 
1327
  run_btn.click(
1328
  fn=run_app,