irhamni commited on
Commit
b6e2cae
·
verified ·
1 Parent(s): 63f5584

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +342 -0
app.py CHANGED
@@ -567,6 +567,241 @@ def make_bell_figure(df_in: pd.DataFrame, title: str, index_col="Indeks_Final_0_
567
  )
568
  return fig
569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  # ============================================================
571
  # 8) OUTPUT TABEL: AGREGAT RINGKAS + DETAIL RINGKAS
572
  # ============================================================
@@ -666,6 +901,113 @@ def run_pipeline_filtered(prov_value, kab_value, kew_value):
666
  msg = f"✅ Selesai. Unit (dedup): {len(df2)} | Wilayah: {wilayah} | Kew: {kew_value} | Mean Final: {df2['Indeks_Final_0_100'].mean():.2f}"
667
  return agg_df, detail_df, verif_df, agg_path, detail_path, verif_path, fig_all, fig_sek, fig_um, fig_kh, msg
668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  # ============================================================
670
  # 10) DROPDOWN (NO DUPLICATE)
671
  # ============================================================
 
567
  )
568
  return fig
569
 
570
+ # ============================================================
571
+ # 7c. LLM DATA ANALYTICS (NARASI LEBIH DATA-DRIVEN) + WORD DOCX
572
+ # (TAMBAHAN SAJA — TIDAK MENGUBAH PIPELINE YANG ADA)
573
+ # ============================================================
574
+
575
+ def _safe_table_text(df: pd.DataFrame, max_rows: int = 12) -> str:
576
+ if df is None or df.empty:
577
+ return "(kosong)"
578
+ tmp = df.copy()
579
+ # batasi kolom & baris biar prompt tidak meledak
580
+ tmp = tmp.head(max_rows)
581
+ return tmp.to_string(index=False)
582
+
583
+
584
+ def summarize_distribution(detail_df: pd.DataFrame):
585
+ """
586
+ Ringkas distribusi indeks final untuk LLM:
587
+ - pakai Indeks_Final_0_100 kalau ada, kalau tidak fallback ke Indeks_Real_0_100
588
+ """
589
+ idx_col = "Indeks_Final_0_100" if (detail_df is not None and "Indeks_Final_0_100" in detail_df.columns) else "Indeks_Real_0_100"
590
+ if detail_df is None or detail_df.empty or idx_col not in detail_df.columns:
591
+ return {"idx_col": idx_col, "all": {}, "by_type": {}}
592
+
593
+ out = {"idx_col": idx_col, "all": {}, "by_type": {}}
594
+
595
+ def stats_for(s: pd.Series):
596
+ s = pd.to_numeric(s, errors="coerce").dropna()
597
+ if len(s) == 0:
598
+ return {}
599
+ q1, q2, q3 = np.quantile(s.values, [0.25, 0.5, 0.75])
600
+ return {
601
+ "n": int(len(s)),
602
+ "mean": float(s.mean()),
603
+ "std": float(s.std(ddof=1)) if len(s) > 1 else 0.0,
604
+ "min": float(s.min()),
605
+ "q1": float(q1),
606
+ "median": float(q2),
607
+ "q3": float(q3),
608
+ "max": float(s.max()),
609
+ }
610
+
611
+ out["all"] = stats_for(detail_df[idx_col])
612
+
613
+ if "_dataset" in detail_df.columns:
614
+ for ds in ["sekolah", "umum", "khusus"]:
615
+ dsub = detail_df[detail_df["_dataset"] == ds]
616
+ out["by_type"][ds] = stats_for(dsub[idx_col])
617
+
618
+ return out
619
+
620
+
621
+ def generate_llm_data_analytics(detail_df: pd.DataFrame,
622
+ agg_df: pd.DataFrame,
623
+ verif_df: pd.DataFrame,
624
+ kab_name: str,
625
+ kew_value: str) -> str:
626
+ """
627
+ Narasi LLM yang fokus ke:
628
+ - indeks FINAL (sudah penalti 68% kalau ada)
629
+ - distribusi (mean, Q1/median/Q3)
630
+ - gap coverage (kalau ada)
631
+ """
632
+ wilayah = kab_name
633
+ if kew_value and kew_value != "(Semua)":
634
+ wilayah = f"{kab_name} (kewenangan {kew_value})"
635
+
636
+ dist = summarize_distribution(detail_df)
637
+ idx_col = dist.get("idx_col", "Indeks_Final_0_100")
638
+
639
+ # ringkas angka utama biar prompt padat
640
+ all_stats = dist.get("all", {})
641
+ by_type = dist.get("by_type", {})
642
+
643
+ def fmt_stats(d):
644
+ if not d:
645
+ return "(tidak tersedia)"
646
+ return (
647
+ f"n={d['n']}, mean={d['mean']:.2f}, sd={d['std']:.2f}, "
648
+ f"min={d['min']:.2f}, Q1={d['q1']:.2f}, median={d['median']:.2f}, Q3={d['q3']:.2f}, max={d['max']:.2f}"
649
+ )
650
+
651
+ lines = []
652
+ lines.append(f"Wilayah: {wilayah}")
653
+ lines.append(f"Indeks yang dianalisis: {idx_col} (0–100)")
654
+ lines.append(f"Distribusi keseluruhan: {fmt_stats(all_stats)}")
655
+ if by_type:
656
+ for ds, st in by_type.items():
657
+ lines.append(f"Distribusi {ds}: {fmt_stats(st)}")
658
+
659
+ agg_txt = _safe_table_text(agg_df, max_rows=8)
660
+ ver_txt = _safe_table_text(verif_df, max_rows=12)
661
+
662
+ client = get_llm_client()
663
+ if client is None or not USE_LLM:
664
+ # fallback: pakai yang sudah ada (rule-based)
665
+ rb = generate_rule_based_analysis(detail_df, agg_df, kab_name, kew_value)
666
+ return (
667
+ "⚠️ LLM tidak tersedia, analisis menggunakan rule-based.\n\n" + rb
668
+ )
669
+
670
+ system_prompt = (
671
+ "Anda adalah analis data & kebijakan perpustakaan. "
672
+ "Anda menulis analisis resmi untuk pemangku kepentingan pemerintah daerah. "
673
+ "Anda harus menggunakan pendekatan berbasis data, jelas, dan ringkas."
674
+ )
675
+
676
+ user_prompt = f"""
677
+ DATA RINGKAS IPLM (FINAL) UNTUK ANALISIS:
678
+
679
+ RINGKASAN STATISTIK (indeks final & distribusi):
680
+ {chr(10).join(lines)}
681
+
682
+ TABEL AGREGAT (ringkas):
683
+ {agg_txt}
684
+
685
+ TABEL VERIFIKASI COVERAGE & GAP (ringkas):
686
+ {ver_txt}
687
+
688
+ TUGAS:
689
+ Tulis analisis dalam Bahasa Indonesia formal, struktur:
690
+
691
+ A. Ringkasan eksekutif (1 paragraf) — fokus pada indeks FINAL setelah penalti 68%.
692
+ B. Diagnostik berbasis data (2–3 paragraf):
693
+ - Jelaskan distribusi (Q1/Median/Q3), variasi antar jenis perpustakaan.
694
+ - Jelaskan implikasi kualitas/representasi data bila coverage belum 68%.
695
+ C. Prioritas intervensi 12–18 bulan (1–2 paragraf) — fokus pada program pembinaan yang realistis.
696
+ D. Rekomendasi kebijakan 3–5 tahun (1–2 paragraf) — penataan tata kelola data, pembinaan, standardisasi.
697
+
698
+ GAYA:
699
+ - Jangan menyebut "rendah/sedang/tinggi". Gunakan frasa netral: "ruang penguatan", "belum konsisten", dll.
700
+ - Hindari kalimat terlalu panjang.
701
+ - Jangan membuat data baru di luar yang tersedia.
702
+ """
703
+
704
+ try:
705
+ resp = client.chat_completion(
706
+ model=LLM_MODEL_NAME,
707
+ messages=[
708
+ {"role": "system", "content": system_prompt},
709
+ {"role": "user", "content": user_prompt},
710
+ ],
711
+ max_tokens=1200,
712
+ temperature=0.25,
713
+ top_p=0.9,
714
+ )
715
+ text = resp.choices[0].message.content.strip()
716
+ if not text:
717
+ raise ValueError("Respon LLM kosong.")
718
+ return text
719
+ except Exception as e:
720
+ rb = generate_rule_based_analysis(detail_df, agg_df, kab_name, kew_value)
721
+ return (
722
+ "⚠️ Gagal memanggil LLM untuk data analytics, fallback rule-based.\n\n"
723
+ f"(Detail teknis: {repr(e)})\n\n{rb}"
724
+ )
725
+
726
+
727
+ def generate_word_report_llm_analytics(detail_df, agg_df, verif_df, prov, kab, kew, analytics_text):
728
+ """
729
+ Word report yang menaruh:
730
+ - Ringkasan indeks FINAL (statistik & kuartil)
731
+ - Tabel agregat ringkas
732
+ - Tabel verifikasi coverage (dibulatkan TANPA koma)
733
+ - Narasi LLM data analytics
734
+ """
735
+ if kew == "PUSAT":
736
+ return None
737
+
738
+ wilayah = kab if kab != "(Semua)" else prov
739
+ dist = summarize_distribution(detail_df)
740
+ idx_col = dist.get("idx_col", "Indeks_Final_0_100")
741
+ all_stats = dist.get("all", {})
742
+
743
+ doc = Document()
744
+ doc.add_heading(f"Laporan Analisis IPLM (FINAL) – {wilayah}", level=1)
745
+ doc.add_paragraph(
746
+ "Laporan ini menyajikan analisis Indeks IPLM FINAL (0–100) setelah penerapan penalti "
747
+ "kecukupan sampel 68% (untuk perpustakaan sekolah dan umum, sesuai konfigurasi aplikasi)."
748
+ )
749
+
750
+ doc.add_heading("1. Ringkasan Statistik Indeks FINAL", level=2)
751
+ if all_stats:
752
+ doc.add_paragraph(f"- Indeks yang digunakan: {idx_col}")
753
+ doc.add_paragraph(f"- Jumlah perpustakaan: {int(all_stats.get('n', 0))}")
754
+ doc.add_paragraph(f"- Rata-rata: {all_stats.get('mean', 0.0):.2f}")
755
+ doc.add_paragraph(f"- Q1: {all_stats.get('q1', 0.0):.2f}")
756
+ doc.add_paragraph(f"- Median: {all_stats.get('median', 0.0):.2f}")
757
+ doc.add_paragraph(f"- Q3: {all_stats.get('q3', 0.0):.2f}")
758
+ doc.add_paragraph(f"- Minimum–Maksimum: {all_stats.get('min', 0.0):.2f} – {all_stats.get('max', 0.0):.2f}")
759
+ else:
760
+ doc.add_paragraph("Statistik distribusi tidak tersedia (data indeks tidak ditemukan).")
761
+
762
+ doc.add_heading("2. Ringkasan Agregat per Jenis Perpustakaan", level=2)
763
+ if agg_df is not None and not agg_df.empty:
764
+ table = doc.add_table(rows=1, cols=len(agg_df.columns))
765
+ hdr = table.rows[0].cells
766
+ for i, c in enumerate(agg_df.columns):
767
+ hdr[i].text = str(c)
768
+ for _, row in agg_df.iterrows():
769
+ r = table.add_row().cells
770
+ for i, c in enumerate(agg_df.columns):
771
+ r[i].text = str(row[c])
772
+ else:
773
+ doc.add_paragraph("Tabel agregat tidak tersedia.")
774
+
775
+ doc.add_heading("3. Verifikasi Coverage & GAP menuju 68% (Kontrol Mutu)", level=2)
776
+ if verif_df is not None and not verif_df.empty:
777
+ v = verif_df.copy()
778
+
779
+ # BULATKAN TANPA KOMa: semua numerik -> integer
780
+ for c in v.columns:
781
+ if pd.api.types.is_numeric_dtype(v[c]):
782
+ v[c] = pd.to_numeric(v[c], errors="coerce").fillna(0).round(0).astype(int)
783
+
784
+ table = doc.add_table(rows=1, cols=len(v.columns))
785
+ hdr = table.rows[0].cells
786
+ for i, c in enumerate(v.columns):
787
+ hdr[i].text = str(c)
788
+ for _, row in v.iterrows():
789
+ r = table.add_row().cells
790
+ for i, c in enumerate(v.columns):
791
+ r[i].text = str(row[c])
792
+ else:
793
+ doc.add_paragraph("Tidak ada tabel verifikasi coverage untuk wilayah ini.")
794
+
795
+ doc.add_heading("4. Analisis Naratif Otomatis (LLM Data Analytics)", level=2)
796
+ for paragraph in str(analytics_text).split("\n"):
797
+ if paragraph.strip():
798
+ doc.add_paragraph(paragraph.strip())
799
+
800
+ outpath = tempfile.mktemp(suffix=".docx")
801
+ doc.save(outpath)
802
+ return outpath
803
+
804
+
805
  # ============================================================
806
  # 8) OUTPUT TABEL: AGREGAT RINGKAS + DETAIL RINGKAS
807
  # ============================================================
 
901
  msg = f"✅ Selesai. Unit (dedup): {len(df2)} | Wilayah: {wilayah} | Kew: {kew_value} | Mean Final: {df2['Indeks_Final_0_100'].mean():.2f}"
902
  return agg_df, detail_df, verif_df, agg_path, detail_path, verif_path, fig_all, fig_sek, fig_um, fig_kh, msg
903
 
904
+
905
+ # ============================================================
906
+ # 9b. WRAPPER: PAKAI LLM DATA ANALYTICS + WORD (tanpa ubah run_app lama)
907
+ # ============================================================
908
+
909
+ _run_app_base = run_app # simpan fungsi asli
910
+
911
+ def run_app(prov_value, kab_value, kew_value):
912
+ # jalankan versi asli dulu
913
+ (
914
+ agg_df,
915
+ detail_df_view,
916
+ verif_df,
917
+ agg_path,
918
+ detail_path,
919
+ raw_path,
920
+ word_path,
921
+ fig_all,
922
+ fig_sekolah,
923
+ fig_umum,
924
+ fig_khusus,
925
+ msg,
926
+ analysis_text,
927
+ ) = _run_app_base(prov_value, kab_value, kew_value)
928
+
929
+ # kalau kosong, langsung return
930
+ if detail_df_view is None or (hasattr(detail_df_view, "empty") and detail_df_view.empty):
931
+ return (
932
+ agg_df, detail_df_view, verif_df,
933
+ agg_path, detail_path, raw_path,
934
+ word_path,
935
+ fig_all, fig_sekolah, fig_umum, fig_khusus,
936
+ msg,
937
+ analysis_text
938
+ )
939
+
940
+ # BUTUH detail_df LENGKAP (bukan view) agar punya _dataset + indeks final kalau ada
941
+ # Ambil ulang subset yang sama dari df_all_ipml (supaya lengkap) dengan filter yang sama
942
+ df = df_all_ipml.copy() if df_all_ipml is not None else None
943
+ if df is None or df.empty:
944
+ return (
945
+ agg_df, detail_df_view, verif_df,
946
+ agg_path, detail_path, raw_path,
947
+ word_path,
948
+ fig_all, fig_sekolah, fig_umum, fig_khusus,
949
+ msg,
950
+ analysis_text
951
+ )
952
+
953
+ if prov_col_glob and prov_value and prov_value != "(Semua)":
954
+ df = df[df[prov_col_glob].astype(str).str.strip() == prov_value]
955
+ if kab_col_glob and kab_value and kab_value != "(Semua)":
956
+ df = df[df[kab_col_glob].astype(str).str.strip() == kab_value]
957
+ if kew_value and kew_value != "(Semua)":
958
+ df = df[df["KEW_NORM"] == kew_value]
959
+
960
+ if df is None or df.empty:
961
+ return (
962
+ agg_df, detail_df_view, verif_df,
963
+ agg_path, detail_path, raw_path,
964
+ word_path,
965
+ fig_all, fig_sekolah, fig_umum, fig_khusus,
966
+ msg,
967
+ analysis_text
968
+ )
969
+
970
+ kab_name = kab_value if kab_value and kab_value != "(Semua)" else "SEMUA KAB/KOTA"
971
+ kew_name = kew_value if kew_value and kew_value != "(Semua)" else "SEMUA KEWENANGAN"
972
+
973
+ # Bikin ulang detail_df LENGKAP memakai run_pipeline_core supaya konsisten
974
+ (agg_df2, detail_df_full, *_rest) = run_pipeline_core(df, kab_name=kab_name, kew_name=kew_name)
975
+
976
+ # LLM data analytics text (lebih data-driven)
977
+ analytics_text = generate_llm_data_analytics(
978
+ detail_df=detail_df_full,
979
+ agg_df=agg_df2 if (agg_df2 is not None and not agg_df2.empty) else agg_df,
980
+ verif_df=verif_df,
981
+ kab_name=kab_name,
982
+ kew_value=kew_value,
983
+ )
984
+
985
+ # Word report pakai analytics_text (LLM)
986
+ word_path2 = generate_word_report_llm_analytics(
987
+ detail_df_full,
988
+ (agg_df2 if (agg_df2 is not None and not agg_df2.empty) else agg_df),
989
+ verif_df,
990
+ prov_value, kab_value, kew_value,
991
+ analytics_text
992
+ )
993
+
994
+ # Kembalikan output yang sama seperti run_app asli
995
+ return (
996
+ agg_df,
997
+ detail_df_view,
998
+ verif_df,
999
+ agg_path,
1000
+ detail_path,
1001
+ raw_path,
1002
+ (word_path2 or word_path),
1003
+ fig_all,
1004
+ fig_sekolah,
1005
+ fig_umum,
1006
+ fig_khusus,
1007
+ msg,
1008
+ analytics_text # replace analysis_out dengan versi data analytics
1009
+ )
1010
+
1011
  # ============================================================
1012
  # 10) DROPDOWN (NO DUPLICATE)
1013
  # ============================================================