SHELLAPANDIANGANHUNGING commited on
Commit
6a95279
·
verified ·
1 Parent(s): a6045bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +494 -310
app.py CHANGED
@@ -475,9 +475,38 @@ if 'temuan_kode_distrik' in df_local.columns:
475
  else:
476
  st.error("Column 'temuan_kode_distrik' not found in the data. Cannot determine PG/UM areas.")
477
  st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
- # =================== 2. Treemap: Distribusi Temuan per Area (nama_lokasi_full) - PERBAIKAN ===================
480
- st.markdown("<h3 class='section-title'>OBJECTIVE 2 - Active vs Inactive Locations: Who Leads?</h3>", unsafe_allow_html=True)
 
 
 
 
481
 
482
  # Hitung temuan per bulan per lokasi
483
  findings_by_location_month = df_local.groupby(['created_month', 'nama_lokasi_full']).size().reset_index(name='findings_count')
@@ -490,51 +519,34 @@ merged_loc = merged_loc.fillna({'findings_count': 0, 'unique_creators': 0})
490
  # Filter untuk menghindari pembagian dengan nol
491
  merged_loc = merged_loc[merged_loc['unique_creators'] > 0]
492
  # Hitung rasio (ignore NaN)
493
- # Pembagian oleh 0 akan menghasilkan inf, jadi kita ganti inf dengan NaN
494
  merged_loc['ratio'] = merged_loc['findings_count'] / merged_loc['unique_creators']
495
  merged_loc['ratio'] = merged_loc['ratio'].replace([np.inf, -np.inf], np.nan)
496
 
497
  # Rata-rata bulanan per lokasi
498
- # Group by nama_lokasi_full dan ambil mean dari rasio
499
- # mean() akan mengabaikan NaN secara default
500
  avg_ratio_per_location = merged_loc.groupby('nama_lokasi_full')['ratio'].mean().reset_index(name='avg_monthly_ratio')
501
 
502
  # Filter hasil akhir untuk menghindari NaN
503
  avg_ratio_per_location = avg_ratio_per_location.dropna(subset=['avg_monthly_ratio'])
504
 
505
- # Plot Treemap
506
  if not avg_ratio_per_location.empty:
507
- # Tambahkan kolom untuk warna berdasarkan kriteria
508
- def categorize_risk(r):
509
- if r > 1.3:
510
- return 'High Activity (> 1.3)' # Warna Hijau
511
- elif r > 1.0:
512
- return 'Medium Activity (1.0 - 1.3)' # Warna Kuning
513
- else:
514
- return 'Low Activity (<= 1.0)' # Warna Merah
515
-
516
- avg_ratio_per_location['Activity_Category'] = avg_ratio_per_location['avg_monthly_ratio'].apply(categorize_risk)
517
-
518
- # Peta warna
519
- color_map = {
520
- 'High Activity (> 1.3)': '#4CAF50', # Hijau
521
- 'Medium Activity (1.0 - 1.3)': '#FFB300', # Kuning
522
- 'Low Activity (<= 1.0)': '#D32F2F' # Merah
523
- }
524
-
525
- # Gunakan treemap plot dengan ukuran mencerminkan rata-rata rasio dan warna berdasarkan kategori aktivitas
526
  fig_treemap = px.treemap(
527
  avg_ratio_per_location,
528
  path=['nama_lokasi_full'], # Path untuk hierarki (hanya satu level di sini)
529
  values='avg_monthly_ratio', # Nilai yang menentukan ukuran area
530
  title='Avg Monthly Finding by Location',
531
  labels={'avg_monthly_ratio': 'Avg Monthly Finding/Person Ratio', 'nama_lokasi_full': 'Location'},
532
- color='Activity_Category', # Warna berdasarkan kategori aktivitas
533
- color_discrete_map=color_map
 
 
 
 
534
  )
535
  # Format hover
536
  fig_treemap.update_traces(
537
- hovertemplate="<b>%{label}</b><br>Avg Ratio: %{value:.2f}<br>Activity Level: %{color}<extra></extra>"
538
  )
539
  fig_treemap.update_layout(height=600)
540
  st.plotly_chart(fig_treemap, use_container_width=True)
@@ -548,14 +560,14 @@ if not avg_ratio_per_location.empty:
548
  st.markdown("### Insight")
549
  insight_text = (
550
  f"<div class='ai-insight'>"
551
- f"The treemap visualizes the average finding-to-person ratio per location, indicating reporting activity levels. "
552
- f"Locations with <span style='color:#4CAF50; font-weight:bold;'>green</span> color have a high ratio reporting"
553
- f"Those with <span style='color:#FFB300; font-weight:bold;'>yellow</span> color have a medium ratio, indicating area with moderate reporting. "
554
- f"Locations with <span style='color:#D32F2F; font-weight:bold;'>red</span> color have a low ratio indicate lower activity levels or potentially under-reporting. "
555
  f"<strong>{top_location['nama_lokasi_full']}</strong> shows the highest activity level "
556
- f"(<strong>{top_location['avg_monthly_ratio']:.2f}</strong>, color: {top_location['Activity_Category']}). "
557
  f"<strong>{low_location['nama_lokasi_full']}</strong> shows the lowest activity level "
558
- f"(<strong>{low_location['avg_monthly_ratio']:.2f}</strong>, color: {low_location['Activity_Category']}). "
559
  f"Areas with high activity (green) warrant investigation into the underlying causes of frequent findings. "
560
  f"Areas with low activity (red) should be reviewed to ensure reporting completeness and identify any hidden risks."
561
  f"</div>"
@@ -563,303 +575,475 @@ if not avg_ratio_per_location.empty:
563
  st.markdown(insight_text, unsafe_allow_html=True)
564
  else:
565
  st.warning("No data available for location ratio calculation or all ratios are NaN.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
567
- import plotly.express as px
568
- import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  import plotly.express as px
571
  import numpy as np
572
- # =================== 3. Reporter & Executor Analysis (3a, 3b, 3c, 3d) ===================
573
- st.markdown("<h3 class='section-title'>OBJECTIVE 3 - Frequency & Response Time: Who Reports Well? Who Executes Well?</h3>", unsafe_allow_html=True)
574
 
575
- # 3a & 3b: Reporter Frequency & Executor Lead Time by nama (Average Monthly Rate per Division)
576
- col_3a, col_3b = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
- with col_3a:
579
- st.markdown("<h5>3a. Average Finding by Division (Reporter)</h5>", unsafe_allow_html=True)
580
- if 'nama' in df_local.columns:
581
- # Hitung temuan per bulan per nama
582
- findings_by_nama_month = df_local.groupby(['created_month', 'nama']).size().reset_index(name='findings_count')
583
- # Hitung jumlah orang unik per bulan per nama
584
- creators_by_nama_month = df_local.groupby(['created_month', 'nama'])['creator_nid'].nunique().reset_index(name='unique_creators')
585
- # Gabung
586
- merged_rep = findings_by_nama_month.merge(creators_by_nama_month, on=['created_month', 'nama'], how='outer')
587
- # Isi NaN dengan 0 untuk kolom yang mungkin hilang dari merge
588
- merged_rep = merged_rep.fillna({'findings_count': 0, 'unique_creators': 0})
589
- # Filter untuk menghindari pembagian dengan nol
590
- merged_rep = merged_rep[merged_rep['unique_creators'] > 0]
591
- # Hitung rasio (ignore NaN)
592
- merged_rep['ratio'] = merged_rep['findings_count'] / merged_rep['unique_creators']
593
- merged_rep['ratio'] = merged_rep['ratio'].replace([np.inf, -np.inf], np.nan)
594
 
595
- # Rata-rata bulanan per nama
596
- avg_ratio_per_nama = merged_rep.groupby('nama')['ratio'].mean().reset_index(name='avg_monthly_ratio')
597
 
598
- # Filter hasil akhir untuk menghindari NaN
599
- avg_ratio_per_nama = avg_ratio_per_nama.dropna(subset=['avg_monthly_ratio'])
600
- if not avg_ratio_per_nama.empty:
601
- # Tambahkan kolom untuk warna KE DATAFRAME
602
- # Urutkan untuk menentukan 5 teratas
603
- avg_ratio_per_nama_sorted = avg_ratio_per_nama.sort_values('avg_monthly_ratio', ascending=True)
604
- top_5_indices = avg_ratio_per_nama_sorted.tail(5).index
605
- # Buat warna default, lalu ubah untuk top 5
606
- avg_ratio_per_nama_sorted['color'] = '#1f77b4' # Warna default plotly
607
- avg_ratio_per_nama_sorted.loc[avg_ratio_per_nama_sorted.index.isin(top_5_indices), 'color'] = '#4CAF50' # Warna hijau untuk top 5
608
-
609
- # Pilihan sorting
610
- sort_option_3a = st.selectbox("Sort 3a by:", ["Lowest First", "Highest First"], key='sort_3a')
611
- if sort_option_3a == "Highest First":
612
- avg_ratio_per_nama_sorted = avg_ratio_per_nama_sorted.sort_values('avg_monthly_ratio', ascending=False)
613
- # Jika "Lowest First", sudah diurutkan ascending di atas
614
-
615
- fig_rep_nama = px.bar(
616
- avg_ratio_per_nama_sorted,
617
- x='avg_monthly_ratio',
618
- y='nama',
619
- orientation='h',
620
- title='Avg Monthly Finding by Division',
621
- labels={'avg_monthly_ratio': 'Avg Monthly Finding/Person Ratio', 'nama': 'Division'},
622
- color='color', # Gunakan nama kolom yang ditambahkan
623
- color_discrete_map={c: c for c in avg_ratio_per_nama_sorted['color'].unique()}, # Peta warna
624
- text=avg_ratio_per_nama_sorted['avg_monthly_ratio'].apply(lambda x: f'{x:.2f}') # Format 2 angka desimal
625
- )
626
- # Hapus legend untuk warna karena tidak informatif
627
- fig_rep_nama.update_layout(yaxis={'categoryorder': 'total ascending'}, height=500, showlegend=False)
628
- fig_rep_nama.update_traces(textposition='auto') # Posisi teks otomatis
629
- st.plotly_chart(fig_rep_nama, use_container_width=True)
630
-
631
- # AI Insight for 3a
632
- top_nama = avg_ratio_per_nama_sorted.iloc[-1] if not avg_ratio_per_nama_sorted.empty else None
633
- low_nama = avg_ratio_per_nama_sorted.iloc[0] if not avg_ratio_per_nama_sorted.empty else None
634
- if top_nama is not None and low_nama is not None:
635
- st.markdown("### Insight")
636
- insight_text = (
637
- f"<div class='ai-insight'>"
638
- f"The division <strong>{top_nama['nama']}</strong> has the highest average finding-to-person ratio "
639
- f"(<strong>{top_nama['avg_monthly_ratio']:.2f}</strong>), indicating potentially high reporting activity or exposure. "
640
- f"Conversely, <strong>{low_nama['nama']}</strong> has the lowest ratio "
641
- f"(<strong>{low_nama['avg_monthly_ratio']:.2f}</strong>), suggesting lower activity or potentially under-reporting. "
642
- f"Monitor high-ratio divisions for potential systemic issues and verify reporting completeness in low-ratio ones."
643
- f"</div>"
644
- )
645
- st.markdown(insight_text, unsafe_allow_html=True)
646
- else:
647
- st.warning("No data or all ratios are NaN for reporter analysis by division.")
648
- else:
649
- st.warning("Column 'nama' not available for reporter analysis (3a).")
650
 
651
- with col_3b:
652
- st.markdown("<h5>3b. Average by Division (Executor)</h5>", unsafe_allow_html=True)
653
- if 'nama' in df_local.columns and 'days_to_close' in df_local.columns:
654
- # Hitung rata-rata lead time per nama per bulan
655
- leadtime_by_nama_month = df_local.groupby(['created_month', 'nama'])['days_to_close'].mean().reset_index(name='avg_leadtime')
656
- # Rata-rata bulanan keseluruhan per nama
657
- avg_leadtime_nama = leadtime_by_nama_month.groupby('nama')['avg_leadtime'].mean().reset_index(name='avg_monthly_leadtime')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
 
659
- # Filter hasil akhir untuk menghindari NaN
660
- avg_leadtime_nama = avg_leadtime_nama.dropna(subset=['avg_monthly_leadtime'])
661
- if not avg_leadtime_nama.empty:
662
- # Tambahkan kolom untuk warna KE DATAFRAME
663
- # Urutkan untuk menentukan 5 teratas
664
- avg_leadtime_nama_sorted = avg_leadtime_nama.sort_values('avg_monthly_leadtime', ascending=True)
665
- top_5_indices = avg_leadtime_nama_sorted.tail(5).index
666
- # Buat warna default, lalu ubah untuk top 5
667
- avg_leadtime_nama_sorted['color'] = '#1f77b4' # Warna default plotly
668
- avg_leadtime_nama_sorted.loc[avg_leadtime_nama_sorted.index.isin(top_5_indices), 'color'] = '#D32F2F' # Warna merah untuk top 5
669
-
670
- # Pilihan sorting
671
- sort_option_3b = st.selectbox("Sort 3b by:", ["Fastest First", "Slowest First"], key='sort_3b')
672
- if sort_option_3b == "Slowest First":
673
- avg_leadtime_nama_sorted = avg_leadtime_nama_sorted.sort_values('avg_monthly_leadtime', ascending=False)
674
- # Jika "Fastest First", sudah diurutkan ascending di atas
675
-
676
- fig_exec_nama = px.bar(
677
- avg_leadtime_nama_sorted,
678
- x='avg_monthly_leadtime',
679
- y='nama',
680
- orientation='h',
681
- title='Avg Monthly Lead Time by Division',
682
- labels={'avg_monthly_leadtime': 'Avg Lead Time (Days)', 'nama': 'Division'},
683
- color='color', # Gunakan nama kolom yang ditambahkan
684
- color_discrete_map={c: c for c in avg_leadtime_nama_sorted['color'].unique()}, # Peta warna
685
- text=avg_leadtime_nama_sorted['avg_monthly_leadtime'].apply(lambda x: f'{x:.2f}') # Format 2 angka desimal
686
- )
687
- # Hapus legend untuk warna karena tidak informatif
688
- fig_exec_nama.update_layout(yaxis={'categoryorder': 'total ascending'}, height=500, showlegend=False)
689
- fig_exec_nama.update_traces(textposition='auto') # Posisi teks otomatis
690
- st.plotly_chart(fig_exec_nama, use_container_width=True)
691
-
692
- # AI Insight for 3b
693
- top_nama = avg_leadtime_nama_sorted.iloc[-1] if not avg_leadtime_nama_sorted.empty else None
694
- low_nama = avg_leadtime_nama_sorted.iloc[0] if not avg_leadtime_nama_sorted.empty else None
695
- if top_nama is not None and low_nama is not None:
696
- st.markdown("### Insight")
697
- insight_text = (
698
- f"<div class='ai-insight'>"
699
- f"The division <strong>{top_nama['nama']}</strong> has the highest average lead time "
700
- f"(<strong>{top_nama['avg_monthly_leadtime']:.2f} days</strong>), indicating slower resolution. "
701
- f"<strong>{low_nama['nama']}</strong> has the fastest average resolution "
702
- f"(<strong>{low_nama['avg_monthly_leadtime']:.2f} days</strong>). "
703
- f"Focus on improving SLA compliance in divisions with longer lead times."
704
- f"</div>"
705
- )
706
- st.markdown(insight_text, unsafe_allow_html=True)
707
- else:
708
- st.warning("No data or all lead times are NaN for executor analysis by division.")
 
 
709
  else:
710
- st.warning("Columns 'nama' or 'days_to_close' not available for executor analysis (3b).")
 
 
 
 
 
 
 
 
 
 
 
 
 
711
 
712
- # 3c & 3d: Reporter Frequency & Executor Lead Time by creator_name and nama_pic (Average Monthly Rate per Person)
713
- col_3c, col_3d = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
 
715
  with col_3c:
716
  st.markdown("<h5>3c. Average Finding Rate per Reporter (Name)</h5>", unsafe_allow_html=True)
717
- if 'creator_name' in df_local.columns:
718
- # Hitung temuan per bulan per creator_name
719
- findings_by_creator_month = df_local.groupby(['created_month', 'creator_name']).size().reset_index(name='findings_count')
720
- # Hitung jumlah bulan aktif per creator_name
721
- active_months_by_creator = findings_by_creator_month.groupby('creator_name')['created_month'].nunique().reset_index(name='active_months')
722
- # Gabung untuk mendapatkan total temuan per creator
723
- total_findings_by_creator = findings_by_creator_month.groupby('creator_name')['findings_count'].sum().reset_index()
724
- # Gabung semua
725
- merged_rep_creator = total_findings_by_creator.merge(active_months_by_creator, on='creator_name', how='outer')
726
- # Isi NaN dengan 0
727
- merged_rep_creator = merged_rep_creator.fillna({'findings_count': 0, 'active_months': 0})
728
- # Filter untuk menghindari pembagian dengan nol (jika seseorang tidak aktif sepanjang periode)
729
- merged_rep_creator = merged_rep_creator[merged_rep_creator['active_months'] > 0]
730
- # Hitung rata-rata bulanan (ignore NaN)
731
- merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['findings_count'] / merged_rep_creator['active_months']
732
- merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['avg_monthly_rate'].replace([np.inf, -np.inf], np.nan)
733
-
734
- # Filter hasil akhir untuk menghindari NaN
735
- avg_rate_per_creator = merged_rep_creator.dropna(subset=['avg_monthly_rate'])
736
- if not avg_rate_per_creator.empty:
737
- # Tambahkan kolom untuk warna KE DATAFRAME
738
- # Urutkan untuk menentukan 5 teratas
739
- avg_rate_per_creator_sorted = avg_rate_per_creator.sort_values('avg_monthly_rate', ascending=True)
740
- top_5_indices = avg_rate_per_creator_sorted.tail(5).index
741
- # Buat warna default, lalu ubah untuk top 5
742
- avg_rate_per_creator_sorted['color'] = '#1f77b4' # Warna default plotly
743
- avg_rate_per_creator_sorted.loc[avg_rate_per_creator_sorted.index.isin(top_5_indices), 'color'] = '#4CAF50' # Warna hijau untuk top 5
744
-
745
- # Pilihan sorting
746
- sort_option_3c = st.selectbox("Sort 3c by:", ["Lowest First", "Highest First"], key='sort_3c')
747
- if sort_option_3c == "Highest First":
748
- avg_rate_per_creator_sorted = avg_rate_per_creator_sorted.sort_values('avg_monthly_rate', ascending=False)
749
- # Jika "Lowest First", sudah diurutkan ascending di atas
750
-
751
- # Ambil top 10 untuk visualisasi
752
- top10_creators = avg_rate_per_creator_sorted.tail(1000) # Ambil 10 terakhir setelah sorting
753
- fig_rep_creator = px.bar(
754
- top10_creators,
755
- x='avg_monthly_rate',
756
- y='creator_name',
757
- orientation='h',
758
- title='Avg Monthly Finding by Creator Name',
759
- labels={'avg_monthly_rate': 'Avg Monthly Finding Rate', 'creator_name': 'Creator Name'},
760
- color='color', # Gunakan nama kolom yang ditambahkan
761
- color_discrete_map={c: c for c in top10_creators['color'].unique()}, # Peta warna
762
- text=top10_creators['avg_monthly_rate'].apply(lambda x: f'{x:.2f}') # Format 2 angka desimal
763
- )
764
- # Hapus legend untuk warna karena tidak informatif
765
- fig_rep_creator.update_layout(yaxis={'categoryorder': 'total ascending'}, height=500, showlegend=False)
766
- fig_rep_creator.update_traces(textposition='auto') # Posisi teks otomatis
767
- st.plotly_chart(fig_rep_creator, use_container_width=True)
768
-
769
- # AI Insight for 3c
770
- top_creator = avg_rate_per_creator_sorted.iloc[-1] if not avg_rate_per_creator_sorted.empty else None
771
- low_creator = avg_rate_per_creator_sorted.iloc[0] if not avg_rate_per_creator_sorted.empty else None
772
- if top_creator is not None and low_creator is not None:
773
- st.markdown("### Insight")
774
- insight_text = (
775
- f"<div class='ai-insight'>"
776
- f"The reporter <strong>{top_creator['creator_name']}</strong> has the highest average monthly finding rate "
777
- f"(<strong>{top_creator['avg_monthly_rate']:.2f}</strong>), indicating active engagement. "
778
- f"<strong>{low_creator['creator_name']}</strong> has the lowest rate "
779
- f"(<strong>{low_creator['avg_monthly_rate']:.2f}</strong>), which might indicate lower activity or under-reporting. "
780
- f"Recognize high performers and investigate low performers."
781
- f"</div>"
782
- )
783
- st.markdown(insight_text, unsafe_allow_html=True)
784
  else:
785
- st.warning("No data or all rates are NaN for reporter analysis by creator_name.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
786
  else:
787
- st.warning("Column 'creator_name' not available for reporter analysis (3c).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788
 
789
  with col_3d:
790
  st.markdown("<h5>3d. Average Lead Time by Executor (Name)</h5>", unsafe_allow_html=True)
791
- if 'nama_pic' in df_local.columns and 'days_to_close' in df_local.columns:
792
- # Hitung rata-rata lead time per executor per bulan
793
- leadtime_by_executor_month = df_local.groupby(['created_month', 'nama_pic'])['days_to_close'].mean().reset_index(name='avg_leadtime')
794
- # Hitung jumlah bulan aktif per executor
795
- active_months_by_executor = leadtime_by_executor_month.groupby('nama_pic')['created_month'].nunique().reset_index(name='active_months')
796
- # Hitung total lead time per executor
797
- total_leadtime_by_executor = leadtime_by_executor_month.groupby('nama_pic')['avg_leadtime'].sum().reset_index()
798
- # Gabung semua
799
- merged_exec_pic = total_leadtime_by_executor.merge(active_months_by_executor, on='nama_pic', how='outer')
800
- # Isi NaN dengan 0
801
- merged_exec_pic = merged_exec_pic.fillna({'avg_leadtime': 0, 'active_months': 0})
802
- # Filter untuk menghindari pembagian dengan nol
803
- merged_exec_pic = merged_exec_pic[merged_exec_pic['active_months'] > 0]
804
- # Hitung rata-rata bulanan (ignore NaN)
805
- merged_exec_pic['avg_monthly_leadtime'] = merged_exec_pic['avg_leadtime'] / merged_exec_pic['active_months']
806
- merged_exec_pic['avg_monthly_leadtime'] = merged_exec_pic['avg_monthly_leadtime'].replace([np.inf, -np.inf], np.nan)
807
-
808
- # Filter hasil akhir untuk menghindari NaN
809
- avg_leadtime_per_executor = merged_exec_pic.dropna(subset=['avg_monthly_leadtime'])
810
- if not avg_leadtime_per_executor.empty:
811
- # Tambahkan kolom untuk warna KE DATAFRAME
812
- # Urutkan untuk menentukan 5 teratas
813
- avg_leadtime_per_executor_sorted = avg_leadtime_per_executor.sort_values('avg_monthly_leadtime', ascending=True)
814
- top_5_indices = avg_leadtime_per_executor_sorted.tail(5).index
815
- # Buat warna default, lalu ubah untuk top 5
816
- avg_leadtime_per_executor_sorted['color'] = '#1f77b4' # Warna default plotly
817
- avg_leadtime_per_executor_sorted.loc[avg_leadtime_per_executor_sorted.index.isin(top_5_indices), 'color'] = '#D32F2F' # Warna merah untuk top 5
818
-
819
- # Pilihan sorting
820
- sort_option_3d = st.selectbox("Sort 3d by:", ["Fastest First", "Slowest First"], key='sort_3d')
821
- if sort_option_3d == "Slowest First":
822
- avg_leadtime_per_executor_sorted = avg_leadtime_per_executor_sorted.sort_values('avg_monthly_leadtime', ascending=False)
823
- # Jika "Fastest First", sudah diurutkan ascending di atas
824
-
825
- # Ambil top 10 untuk visualisasi
826
- top10_executors = avg_leadtime_per_executor_sorted.nlargest(1000, 'avg_monthly_leadtime') # Ambil 10 terlama
827
- fig_exec_pic = px.bar(
828
- top10_executors,
829
- x='avg_monthly_leadtime',
830
- y='nama_pic',
831
- orientation='h',
832
- title='Avg Monthly Lead Time by Executor (Name)',
833
- labels={'avg_monthly_leadtime': 'Avg Monthly Lead Time (Days)', 'nama_pic': 'Executor Name'},
834
- color='color', # Gunakan nama kolom yang ditambahkan
835
- color_discrete_map={c: c for c in top10_executors['color'].unique()}, # Peta warna
836
- text=top10_executors['avg_monthly_leadtime'].apply(lambda x: f'{x:.2f}') # Format 2 angka desimal
837
- )
838
- # Hapus legend untuk warna karena tidak informatif
839
- fig_exec_pic.update_layout(yaxis={'categoryorder': 'total ascending'}, height=500, showlegend=False)
840
- fig_exec_pic.update_traces(textposition='auto') # Posisi teks otomatis
841
- st.plotly_chart(fig_exec_pic, use_container_width=True)
842
-
843
- # AI Insight for 3d
844
- top_executor = avg_leadtime_per_executor_sorted.iloc[-1] if not avg_leadtime_per_executor_sorted.empty else None
845
- low_executor = avg_leadtime_per_executor_sorted.iloc[0] if not avg_leadtime_per_executor_sorted.empty else None
846
- if top_executor is not None and low_executor is not None:
847
- st.markdown("### Insight")
848
- insight_text = (
849
- f"<div class='ai-insight'>"
850
- f"The executor <strong>{top_executor['nama_pic']}</strong> has the highest average monthly lead time "
851
- f"(<strong>{top_executor['avg_monthly_leadtime']:.2f} days</strong>), indicating slower resolution. "
852
- f"<strong>{low_executor['nama_pic']}</strong> resolves tasks fastest on average "
853
- f"(<strong>{low_executor['avg_monthly_leadtime']:.2f} days</strong>). "
854
- f"Focus on improving SLA compliance for executors with longer lead times."
855
- f"</div>"
856
- )
857
- st.markdown(insight_text, unsafe_allow_html=True)
858
- else:
859
- st.warning("No data or all lead times are NaN for executor analysis by nama_pic.")
860
  else:
861
- st.warning("Columns 'nama_pic' or 'days_to_close' not available for executor analysis (3d).")
862
- ####OBJECTIVE 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
  try:
864
  from wordcloud import WordCloud
865
  import matplotlib.pyplot as plt
 
475
  else:
476
  st.error("Column 'temuan_kode_distrik' not found in the data. Cannot determine PG/UM areas.")
477
  st.stop()
478
+ # =================== OBJECTIVE 2 — Active vs Inactive Locations (Treemap with Color Gradient) ===================
479
+ st.markdown(
480
+ """
481
+ <style>
482
+ .section-title {
483
+ text-align: center;
484
+ font-size: 1.5rem;
485
+ font-weight: 600;
486
+ color: #2c3e50;
487
+ margin-bottom: 1.2rem;
488
+ }
489
+ .ai-insight {
490
+ background-color: #f8f9fa;
491
+ padding: 12px;
492
+ border-left: 4px solid #27ae60;
493
+ border-radius: 0 4px 4px 0;
494
+ font-size: 0.95rem;
495
+ line-height: 1.5;
496
+ margin-top: 1rem;
497
+ }
498
+ </style>
499
+ <h3 class='section-title'>OBJECTIVE 2 — Active vs Inactive Locations: Who Leads?</h3>
500
+ """,
501
+ unsafe_allow_html=True
502
+ )
503
 
504
+ df_local = df_filtered.copy()
505
+ if df_local.empty:
506
+ st.warning("No data available after filtering.")
507
+ st.stop()
508
+
509
+ df_local['created_month'] = df_local['created_at'].dt.to_period('M')
510
 
511
  # Hitung temuan per bulan per lokasi
512
  findings_by_location_month = df_local.groupby(['created_month', 'nama_lokasi_full']).size().reset_index(name='findings_count')
 
519
  # Filter untuk menghindari pembagian dengan nol
520
  merged_loc = merged_loc[merged_loc['unique_creators'] > 0]
521
  # Hitung rasio (ignore NaN)
 
522
  merged_loc['ratio'] = merged_loc['findings_count'] / merged_loc['unique_creators']
523
  merged_loc['ratio'] = merged_loc['ratio'].replace([np.inf, -np.inf], np.nan)
524
 
525
  # Rata-rata bulanan per lokasi
 
 
526
  avg_ratio_per_location = merged_loc.groupby('nama_lokasi_full')['ratio'].mean().reset_index(name='avg_monthly_ratio')
527
 
528
  # Filter hasil akhir untuk menghindari NaN
529
  avg_ratio_per_location = avg_ratio_per_location.dropna(subset=['avg_monthly_ratio'])
530
 
531
+ # Plot Treemap dengan gradasi warna
532
  if not avg_ratio_per_location.empty:
533
+ # Gunakan color_continuous_scale untuk gradasi warna: merah → kuning → hijau
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  fig_treemap = px.treemap(
535
  avg_ratio_per_location,
536
  path=['nama_lokasi_full'], # Path untuk hierarki (hanya satu level di sini)
537
  values='avg_monthly_ratio', # Nilai yang menentukan ukuran area
538
  title='Avg Monthly Finding by Location',
539
  labels={'avg_monthly_ratio': 'Avg Monthly Finding/Person Ratio', 'nama_lokasi_full': 'Location'},
540
+ color='avg_monthly_ratio', # Warna berdasarkan nilai rasio (bukan kategori)
541
+ color_continuous_scale=[
542
+ [0.0, '#D32F2F'], # Merah untuk rendah
543
+ [0.5, '#FFB300'], # Kuning untuk sedang
544
+ [1.0, '#4CAF50'] # Hijau untuk tinggi
545
+ ]
546
  )
547
  # Format hover
548
  fig_treemap.update_traces(
549
+ hovertemplate="<b>%{label}</b><br>Avg Ratio: %{value:.2f}<extra></extra>"
550
  )
551
  fig_treemap.update_layout(height=600)
552
  st.plotly_chart(fig_treemap, use_container_width=True)
 
560
  st.markdown("### Insight")
561
  insight_text = (
562
  f"<div class='ai-insight'>"
563
+ f"The treemap visualizes the average finding-to-person ratio per location using a <strong>color gradient</strong>, indicating reporting activity levels. "
564
+ f"Locations with <span style='color:#4CAF50; font-weight:bold;'>green</span> color have a high ratio, indicating high reporting activity or exposure. "
565
+ f"Those with <span style='color:#FFB300; font-weight:bold;'>yellow</span> color have a medium ratio, indicating moderate reporting. "
566
+ f"Locations with <span style='color:#D32F2F; font-weight:bold;'>red</span> color have a low ratio, indicating lower activity levels or potentially under-reporting. "
567
  f"<strong>{top_location['nama_lokasi_full']}</strong> shows the highest activity level "
568
+ f"(<strong>{top_location['avg_monthly_ratio']:.2f}</strong>). "
569
  f"<strong>{low_location['nama_lokasi_full']}</strong> shows the lowest activity level "
570
+ f"(<strong>{low_location['avg_monthly_ratio']:.2f}</strong>). "
571
  f"Areas with high activity (green) warrant investigation into the underlying causes of frequent findings. "
572
  f"Areas with low activity (red) should be reviewed to ensure reporting completeness and identify any hidden risks."
573
  f"</div>"
 
575
  st.markdown(insight_text, unsafe_allow_html=True)
576
  else:
577
  st.warning("No data available for location ratio calculation or all ratios are NaN.")
578
+ # =================== OBJECTIVE 2 — Active vs Inactive Locations (Treemap with Color Gradient) ===================
579
+ st.markdown(
580
+ """
581
+ <style>
582
+ .section-title {
583
+ text-align: center;
584
+ font-size: 1.5rem;
585
+ font-weight: 600;
586
+ color: #2c3e50;
587
+ margin-bottom: 1.2rem;
588
+ }
589
+ .ai-insight {
590
+ background-color: #f8f9fa;
591
+ padding: 12px;
592
+ border-left: 4px solid #27ae60;
593
+ border-radius: 0 4px 4px 0;
594
+ font-size: 0.95rem;
595
+ line-height: 1.5;
596
+ margin-top: 1rem;
597
+ }
598
+ </style>
599
+ <h3 class='section-title'>OBJECTIVE 3 — Active vs Inactive Division: Who Leads?</h3>
600
+ """,
601
+ unsafe_allow_html=True
602
+ )
603
 
604
+ df_local = df_filtered.copy()
605
+ if df_local.empty:
606
+ st.warning("No data available after filtering.")
607
+ st.stop()
608
+
609
+ df_local['created_month'] = df_local['created_at'].dt.to_period('M')
610
+
611
+ # Hitung temuan per bulan per lokasi
612
+ findings_by_location_month = df_local.groupby(['created_month', 'nama']).size().reset_index(name='findings_count')
613
+ # Hitung jumlah orang unik per bulan per lokasi
614
+ creators_by_location_month = df_local.groupby(['created_month', 'nama'])['creator_nid'].nunique().reset_index(name='unique_creators')
615
+ # Gabung
616
+ merged_loc = findings_by_location_month.merge(creators_by_location_month, on=['created_month', 'nama'], how='outer')
617
+ # Isi NaN dengan 0 untuk kolom yang mungkin hilang dari merge
618
+ merged_loc = merged_loc.fillna({'findings_count': 0, 'unique_creators': 0})
619
+ # Filter untuk menghindari pembagian dengan nol
620
+ merged_loc = merged_loc[merged_loc['unique_creators'] > 0]
621
+ # Hitung rasio (ignore NaN)
622
+ merged_loc['ratio'] = merged_loc['findings_count'] / merged_loc['unique_creators']
623
+ merged_loc['ratio'] = merged_loc['ratio'].replace([np.inf, -np.inf], np.nan)
624
+
625
+ # Rata-rata bulanan per lokasi
626
+ avg_ratio_per_location = merged_loc.groupby('nama')['ratio'].mean().reset_index(name='avg_monthly_ratio')
627
+
628
+ # Filter hasil akhir untuk menghindari NaN
629
+ avg_ratio_per_location = avg_ratio_per_location.dropna(subset=['avg_monthly_ratio'])
630
 
631
+ # Plot Treemap dengan gradasi warna
632
+ if not avg_ratio_per_location.empty:
633
+ # Gunakan color_continuous_scale untuk gradasi warna: merah → kuning → hijau
634
+ fig_treemap = px.treemap(
635
+ avg_ratio_per_location,
636
+ path=['nama'], # Path untuk hierarki (hanya satu level di sini)
637
+ values='avg_monthly_ratio', # Nilai yang menentukan ukuran area
638
+ title='Avg Monthly Finding by Division',
639
+ labels={'avg_monthly_ratio': 'Avg Monthly Finding/Person Ratio', 'nama': 'Location'},
640
+ color='avg_monthly_ratio', # Warna berdasarkan nilai rasio (bukan kategori)
641
+ color_continuous_scale=[
642
+ [0.0, '#D32F2F'], # Merah untuk rendah
643
+ [0.5, '#FFB300'], # Kuning untuk sedang
644
+ [1.0, '#4CAF50'] # Hijau untuk tinggi
645
+ ]
646
+ )
647
+ # Format hover
648
+ fig_treemap.update_traces(
649
+ hovertemplate="<b>%{label}</b><br>Avg Ratio: %{value:.2f}<extra></extra>"
650
+ )
651
+ fig_treemap.update_layout(height=600)
652
+ st.plotly_chart(fig_treemap, use_container_width=True)
653
+
654
+ # AI Insight untuk Treemap Lokasi (Business-focused)
655
+ if not avg_ratio_per_location.empty:
656
+ # Temukan lokasi dengan rasio tertinggi dan terendah
657
+ top_location = avg_ratio_per_location.loc[avg_ratio_per_location['avg_monthly_ratio'].idxmax()]
658
+ low_location = avg_ratio_per_location.loc[avg_ratio_per_location['avg_monthly_ratio'].idxmin()]
659
+
660
+ st.markdown("### Insight")
661
+ insight_text = (
662
+ f"<div class='ai-insight'>"
663
+ f"The treemap visualizes the average finding-to-person ratio per location using a <strong>color gradient</strong>, indicating reporting activity levels. "
664
+ f"Locations with <span style='color:#4CAF50; font-weight:bold;'>green</span> color have a high ratio, indicating high reporting activity or exposure. "
665
+ f"Those with <span style='color:#FFB300; font-weight:bold;'>yellow</span> color have a medium ratio, indicating moderate reporting. "
666
+ f"Locations with <span style='color:#D32F2F; font-weight:bold;'>red</span> color have a low ratio, indicating lower activity levels or potentially under-reporting. "
667
+ f"<strong>{top_location['nama']}</strong> shows the highest activity level "
668
+ f"(<strong>{top_location['avg_monthly_ratio']:.2f}</strong>). "
669
+ f"<strong>{low_location['nama']}</strong> shows the lowest activity level "
670
+ f"(<strong>{low_location['avg_monthly_ratio']:.2f}</strong>). "
671
+ f"Areas with high activity (green) warrant investigation into the underlying causes of frequent findings. "
672
+ f"Areas with low activity (red) should be reviewed to ensure reporting completeness and identify any hidden risks."
673
+ f"</div>"
674
+ )
675
+ st.markdown(insight_text, unsafe_allow_html=True)
676
+ else:
677
+ st.warning("No data available for location ratio calculation or all ratios are NaN.")
678
+
679
+ import streamlit as st
680
  import plotly.express as px
681
  import numpy as np
682
+ import pandas as pd
 
683
 
684
+ # =================== OBJECTIVE 3 - Frequency & Response Time ===================
685
+ st.markdown(
686
+ """
687
+ <style>
688
+ .section-title {
689
+ text-align: center;
690
+ font-size: 1.5rem;
691
+ font-weight: 600;
692
+ color: #2c3e50;
693
+ margin-bottom: 1.2rem;
694
+ }
695
+ .ai-insight {
696
+ background-color: #f8f9fa;
697
+ padding: 12px;
698
+ border-left: 4px solid #27ae60;
699
+ border-radius: 0 4px 4px 0;
700
+ font-size: 0.95rem;
701
+ line-height: 1.5;
702
+ margin-top: 1rem;
703
+ }
704
+ </style>
705
+ <h3 class='section-title'>OBJECTIVE 3 — Frequency & Response Time: Who Reports Well? Who Executes Well?</h3>
706
+ """,
707
+ unsafe_allow_html=True
708
+ )
709
 
710
+ df_local = df_filtered.copy()
711
+ if df_local.empty:
712
+ st.warning("No data available after filtering.")
713
+ st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
714
 
715
+ df_local['created_month'] = df_local['created_at'].dt.to_period('M')
 
716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717
 
718
+ # ─── Helper: Hitung rasio per nama (Reporter) ────────────────────────────────
719
+ def compute_reporter_ratio_by_nama(df):
720
+ if 'nama' not in df.columns:
721
+ return pd.DataFrame()
722
+
723
+ findings_by_nama_month = df.groupby(['created_month', 'nama']).size().reset_index(name='findings_count')
724
+ creators_by_nama_month = df.groupby(['created_month', 'nama'])['creator_nid'].nunique().reset_index(name='unique_creators')
725
+ merged_rep = findings_by_nama_month.merge(creators_by_nama_month, on=['created_month', 'nama'], how='outer')
726
+ merged_rep = merged_rep.fillna({'findings_count': 0, 'unique_creators': 0})
727
+ merged_rep = merged_rep[merged_rep['unique_creators'] > 0]
728
+ merged_rep['ratio'] = merged_rep['findings_count'] / merged_rep['unique_creators']
729
+ merged_rep['ratio'] = merged_rep['ratio'].replace([np.inf, -np.inf], np.nan)
730
+ avg_ratio_per_nama = merged_rep.groupby('nama')['ratio'].mean().reset_index(name='avg_monthly_ratio')
731
+ avg_ratio_per_nama = avg_ratio_per_nama.dropna(subset=['avg_monthly_ratio'])
732
+ return avg_ratio_per_nama
733
+
734
+
735
+ # ─── Helper: Hitung lead time per nama (Executor) ───────────────────────────
736
+ def compute_executor_leadtime_by_nama(df):
737
+ if 'nama' not in df.columns or 'days_to_close' not in df.columns:
738
+ return pd.DataFrame()
739
+
740
+ leadtime_by_nama_month = df.groupby(['created_month', 'nama'])['days_to_close'].mean().reset_index(name='avg_leadtime')
741
+ avg_leadtime_nama = leadtime_by_nama_month.groupby('nama')['avg_leadtime'].mean().reset_index(name='avg_monthly_leadtime')
742
+ avg_leadtime_nama = avg_leadtime_nama.dropna(subset=['avg_monthly_leadtime'])
743
+ return avg_leadtime_nama
744
 
745
+
746
+ # ─── Helper: Hitung rasio per creator_name ──────────────────────────────────
747
+ def compute_reporter_rate_by_creator(df):
748
+ if 'creator_name' not in df.columns:
749
+ return pd.DataFrame()
750
+
751
+ findings_by_creator_month = df.groupby(['created_month', 'creator_name']).size().reset_index(name='findings_count')
752
+ active_months_by_creator = findings_by_creator_month.groupby('creator_name')['created_month'].nunique().reset_index(name='active_months')
753
+ total_findings_by_creator = findings_by_creator_month.groupby('creator_name')['findings_count'].sum().reset_index()
754
+ merged_rep_creator = total_findings_by_creator.merge(active_months_by_creator, on='creator_name', how='outer')
755
+ merged_rep_creator = merged_rep_creator.fillna({'findings_count': 0, 'active_months': 0})
756
+ merged_rep_creator = merged_rep_creator[merged_rep_creator['active_months'] > 0]
757
+ merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['findings_count'] / merged_rep_creator['active_months']
758
+ merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['avg_monthly_rate'].replace([np.inf, -np.inf], np.nan)
759
+ avg_rate_per_creator = merged_rep_creator.dropna(subset=['avg_monthly_rate'])
760
+ return avg_rate_per_creator
761
+
762
+
763
+ # ─── Helper: Hitung lead time per nama_pic ──────────────────────────────────
764
+ def compute_executor_leadtime_by_pic(df):
765
+ if 'nama_pic' not in df.columns or 'days_to_close' not in df.columns:
766
+ return pd.DataFrame()
767
+
768
+ leadtime_by_executor_month = df.groupby(['created_month', 'nama_pic'])['days_to_close'].mean().reset_index(name='avg_leadtime')
769
+ active_months_by_executor = leadtime_by_executor_month.groupby('nama_pic')['created_month'].nunique().reset_index(name='active_months')
770
+ total_leadtime_by_executor = leadtime_by_executor_month.groupby('nama_pic')['avg_leadtime'].sum().reset_index()
771
+ merged_exec_pic = total_leadtime_by_executor.merge(active_months_by_executor, on='nama_pic', how='outer')
772
+ merged_exec_pic = merged_exec_pic.fillna({'avg_leadtime': 0, 'active_months': 0})
773
+ merged_exec_pic = merged_exec_pic[merged_exec_pic['active_months'] > 0]
774
+ merged_exec_pic['avg_monthly_leadtime'] = merged_exec_pic['avg_leadtime'] / merged_exec_pic['active_months']
775
+ merged_exec_pic['avg_monthly_leadtime'] = merged_exec_pic['avg_monthly_leadtime'].replace([np.inf, -np.inf], np.nan)
776
+ avg_leadtime_per_executor = merged_exec_pic.dropna(subset=['avg_monthly_leadtime'])
777
+ return avg_leadtime_per_executor
778
+
779
+
780
+ # ─── Data untuk 3a & 3c ──────────────────────────────────────────────────────
781
+ avg_ratio_per_nama = compute_reporter_ratio_by_nama(df_local)
782
+ avg_rate_per_creator = compute_reporter_rate_by_creator(df_local)
783
+
784
+ # ─── Data untuk 3b & 3d ──────────────────────────────────────────────────────
785
+ avg_leadtime_nama = compute_executor_leadtime_by_nama(df_local)
786
+ avg_leadtime_per_executor = compute_executor_leadtime_by_pic(df_local)
787
+
788
+
789
+ # ─── Layout: 2 Baris 3a & 3c di baris pertama, 3b & 3d di baris kedua ─────
790
+ # Baris 1: 3a & 3c
791
+ col_3a, col_3c = st.columns(2)
792
+
793
+ with col_3a:
794
+ st.markdown("<h5>3a. Average Finding by Division (Reporter)</h5>", unsafe_allow_html=True)
795
+ if avg_ratio_per_nama.empty:
796
+ st.warning("No data for reporter analysis by division.")
797
  else:
798
+ sort_option_3a = st.selectbox("Show 3a:", ["Top 10", "Bottom 10"], key='sort_3a')
799
+
800
+ # Urutkan data dari tertinggi ke terendah
801
+ sorted_data_all = avg_ratio_per_nama.sort_values('avg_monthly_ratio', ascending=False)
802
+
803
+ # Ambil Top 10 atau Bottom 10
804
+ if sort_option_3a == "Top 10":
805
+ sorted_data = sorted_data_all.head(10)
806
+ else:
807
+ sorted_data = sorted_data_all.tail(10)
808
+
809
+ # 🔥 Urutkan data yang ditampilkan dari besar ke kecil (jika Bottom 10, tetap besar ke kecil)
810
+ sorted_data = sorted_data.sort_values('avg_monthly_ratio', ascending=False).reset_index(drop=True)
811
+ sorted_data = sorted_data.iloc[::-1] # ← Balik posisi data
812
 
813
+
814
+ # Tambahkan warna untuk top 5 dari data yang ditampilkan
815
+ sorted_data['color'] = '#1f77b4'
816
+ top_5_indices = sorted_data.head(5).index
817
+ sorted_data.loc[top_5_indices, 'color'] = '#4CAF50'
818
+
819
+ fig_rep_nama = px.bar(
820
+ sorted_data,
821
+ x='avg_monthly_ratio',
822
+ y='nama',
823
+ orientation='h',
824
+ title='Avg Monthly Finding by Division',
825
+ labels={'avg_monthly_ratio': 'Avg Monthly Finding/Person Ratio', 'nama': 'Division'},
826
+ color='color',
827
+ color_discrete_map={c: c for c in sorted_data['color'].unique()},
828
+ text=sorted_data['avg_monthly_ratio'].apply(lambda x: f'{x:.2f}')
829
+ )
830
+ # 🔥 Atur urutan Y-axis sesuai data yang ditampilkan
831
+ fig_rep_nama.update_layout(
832
+ yaxis={
833
+ 'categoryorder': 'array',
834
+ 'categoryarray': sorted_data['nama'].tolist()
835
+ },
836
+ height=500,
837
+ showlegend=False
838
+ )
839
+ fig_rep_nama.update_traces(textposition='auto')
840
+ st.plotly_chart(fig_rep_nama, use_container_width=True)
841
+
842
+ # Insight
843
+ top = sorted_data.iloc[0] if sort_option_3a == "Top 10" else sorted_data_all.iloc[-1]
844
+ low = sorted_data.iloc[-1] if sort_option_3a == "Top 10" else sorted_data_all.iloc[0]
845
+ insight_text = (
846
+ f"<div class='ai-insight'>"
847
+ f"The division <strong>{top['nama']}</strong> has the highest average finding-to-person ratio "
848
+ f"(<strong>{top['avg_monthly_ratio']:.2f}</strong>). "
849
+ f"<strong>{low['nama']}</strong> has the lowest ratio "
850
+ f"(<strong>{low['avg_monthly_ratio']:.2f}</strong>). "
851
+ f"Monitor high-ratio divisions for potential systemic issues and verify reporting completeness in low-ratio ones."
852
+ f"</div>"
853
+ )
854
+ st.markdown(insight_text, unsafe_allow_html=True)
855
 
856
  with col_3c:
857
  st.markdown("<h5>3c. Average Finding Rate per Reporter (Name)</h5>", unsafe_allow_html=True)
858
+ if avg_rate_per_creator.empty:
859
+ st.warning("No data for reporter analysis by creator_name.")
860
+ else:
861
+ sort_option_3c = st.selectbox("Show 3c:", ["Top 10", "Bottom 10"], key='sort_3c')
862
+
863
+ # Urutkan data dari tertinggi ke terendah
864
+ sorted_data_all = avg_rate_per_creator.sort_values('avg_monthly_rate', ascending=False)
865
+
866
+ # Ambil Top 10 atau Bottom 10
867
+ if sort_option_3c == "Top 10":
868
+ sorted_data = sorted_data_all.head(10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
869
  else:
870
+ sorted_data = sorted_data_all.tail(10)
871
+
872
+ # 🔥 Urutkan data yang ditampilkan dari besar ke kecil (jika Bottom 10, tetap besar ke kecil)
873
+ sorted_data = sorted_data.sort_values('avg_monthly_rate', ascending=False).reset_index(drop=True)
874
+ sorted_data = sorted_data.iloc[::-1] # ← Balik posisi data
875
+
876
+
877
+ # Tambahkan warna untuk top 5 dari data yang ditampilkan
878
+ sorted_data['color'] = '#1f77b4'
879
+ top_5_indices = sorted_data.head(5).index
880
+ sorted_data.loc[top_5_indices, 'color'] = '#4CAF50'
881
+
882
+ fig_rep_creator = px.bar(
883
+ sorted_data,
884
+ x='avg_monthly_rate',
885
+ y='creator_name',
886
+ orientation='h',
887
+ title='Avg Monthly Finding by Creator Name',
888
+ labels={'avg_monthly_rate': 'Avg Monthly Finding Rate', 'creator_name': 'Creator Name'},
889
+ color='color',
890
+ color_discrete_map={c: c for c in sorted_data['color'].unique()},
891
+ text=sorted_data['avg_monthly_rate'].apply(lambda x: f'{x:.2f}')
892
+ )
893
+ # 🔥 Atur urutan Y-axis sesuai data yang ditampilkan
894
+ fig_rep_creator.update_layout(
895
+ yaxis={
896
+ 'categoryorder': 'array',
897
+ 'categoryarray': sorted_data['creator_name'].tolist()
898
+ },
899
+ height=500,
900
+ showlegend=False
901
+ )
902
+ fig_rep_creator.update_traces(textposition='auto')
903
+ st.plotly_chart(fig_rep_creator, use_container_width=True)
904
+
905
+ # Insight
906
+ top = sorted_data.iloc[0] if sort_option_3c == "Top 10" else sorted_data_all.iloc[-1]
907
+ low = sorted_data.iloc[-1] if sort_option_3c == "Top 10" else sorted_data_all.iloc[0]
908
+ insight_text = (
909
+ f"<div class='ai-insight'>"
910
+ f"The reporter <strong>{top['creator_name']}</strong> has the highest average monthly finding rate "
911
+ f"(<strong>{top['avg_monthly_rate']:.2f}</strong>). "
912
+ f"<strong>{low['creator_name']}</strong> has the lowest rate "
913
+ f"(<strong>{low['avg_monthly_rate']:.2f}</strong>). "
914
+ f"Recognize high performers and investigate low performers."
915
+ f"</div>"
916
+ )
917
+ st.markdown(insight_text, unsafe_allow_html=True)
918
+
919
+
920
+ # Baris 2: 3b & 3d
921
+ col_3b, col_3d = st.columns(2)
922
+
923
+ with col_3b:
924
+ st.markdown("<h5>3b. Average Lead Time by Division (Executor)</h5>", unsafe_allow_html=True)
925
+ if avg_leadtime_nama.empty:
926
+ st.warning("No data for executor analysis by division.")
927
  else:
928
+ sort_option_3b = st.selectbox("Show 3b:", ["Top 10", "Bottom 10"], key='sort_3b')
929
+
930
+ # Urutkan data dari tertinggi ke terendah
931
+ sorted_data_all = avg_leadtime_nama.sort_values('avg_monthly_leadtime', ascending=False)
932
+
933
+ # Ambil Top 10 atau Bottom 10
934
+ if sort_option_3b == "Top 10":
935
+ sorted_data = sorted_data_all.head(10)
936
+ else:
937
+ sorted_data = sorted_data_all.tail(10)
938
+
939
+ # 🔥 Urutkan data yang ditampilkan dari besar ke kecil (jika Bottom 10, tetap besar ke kecil)
940
+ sorted_data = sorted_data.sort_values('avg_monthly_leadtime', ascending=False).reset_index(drop=True)
941
+
942
+ # Tambahkan warna untuk top 5 dari data yang ditampilkan
943
+ sorted_data['color'] = '#1f77b4'
944
+ top_5_indices = sorted_data.head(5).index
945
+ sorted_data.loc[top_5_indices, 'color'] = '#D32F2F'
946
+
947
+ fig_exec_nama = px.bar(
948
+ sorted_data,
949
+ x='avg_monthly_leadtime',
950
+ y='nama',
951
+ orientation='h',
952
+ title='Avg Monthly Lead Time by Division',
953
+ labels={'avg_monthly_leadtime': 'Avg Lead Time (Days)', 'nama': 'Division'},
954
+ color='color',
955
+ color_discrete_map={c: c for c in sorted_data['color'].unique()},
956
+ text=sorted_data['avg_monthly_leadtime'].apply(lambda x: f'{x:.2f}')
957
+ )
958
+ # 🔥 Atur urutan Y-axis sesuai data yang ditampilkan
959
+ fig_exec_nama.update_layout(
960
+ yaxis={
961
+ 'categoryorder': 'array',
962
+ 'categoryarray': sorted_data['nama'].tolist()
963
+ },
964
+ height=500,
965
+ showlegend=False
966
+ )
967
+ fig_exec_nama.update_traces(textposition='auto')
968
+ st.plotly_chart(fig_exec_nama, use_container_width=True)
969
+
970
+ # Insight
971
+ top = sorted_data.iloc[0] if sort_option_3b == "Top 10" else sorted_data_all.iloc[-1]
972
+ low = sorted_data.iloc[-1] if sort_option_3b == "Top 10" else sorted_data_all.iloc[0]
973
+ insight_text = (
974
+ f"<div class='ai-insight'>"
975
+ f"The division <strong>{top['nama']}</strong> has the highest average lead time "
976
+ f"(<strong>{top['avg_monthly_leadtime']:.2f} days</strong>). "
977
+ f"<strong>{low['nama']}</strong> has the fastest average resolution "
978
+ f"(<strong>{low['avg_monthly_leadtime']:.2f} days</strong>). "
979
+ f"Focus on improving SLA compliance in divisions with longer lead times."
980
+ f"</div>"
981
+ )
982
+ st.markdown(insight_text, unsafe_allow_html=True)
983
 
984
  with col_3d:
985
  st.markdown("<h5>3d. Average Lead Time by Executor (Name)</h5>", unsafe_allow_html=True)
986
+ if avg_leadtime_per_executor.empty:
987
+ st.warning("No data for executor analysis by nama_pic.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
988
  else:
989
+ sort_option_3d = st.selectbox("Show 3d:", ["Top 10", "Bottom 10"], key='sort_3d')
990
+
991
+ # Urutkan data dari tertinggi ke terendah
992
+ sorted_data_all = avg_leadtime_per_executor.sort_values('avg_monthly_leadtime', ascending=False)
993
+
994
+ # Ambil Top 10 atau Bottom 10
995
+ if sort_option_3d == "Top 10":
996
+ sorted_data = sorted_data_all.head(10)
997
+ else:
998
+ sorted_data = sorted_data_all.tail(10)
999
+
1000
+ # 🔥 Urutkan data yang ditampilkan dari besar ke kecil (jika Bottom 10, tetap besar ke kecil)
1001
+ sorted_data = sorted_data.sort_values('avg_monthly_leadtime', ascending=False).reset_index(drop=True)
1002
+
1003
+ # Tambahkan warna untuk top 5 dari data yang ditampilkan
1004
+ sorted_data['color'] = '#1f77b4'
1005
+ top_5_indices = sorted_data.head(5).index
1006
+ sorted_data.loc[top_5_indices, 'color'] = '#D32F2F'
1007
+
1008
+ fig_exec_pic = px.bar(
1009
+ sorted_data,
1010
+ x='avg_monthly_leadtime',
1011
+ y='nama_pic',
1012
+ orientation='h',
1013
+ title='Avg Monthly Lead Time by Executor (Name)',
1014
+ labels={'avg_monthly_leadtime': 'Avg Monthly Lead Time (Days)', 'nama_pic': 'Executor Name'},
1015
+ color='color',
1016
+ color_discrete_map={c: c for c in sorted_data['color'].unique()},
1017
+ text=sorted_data['avg_monthly_leadtime'].apply(lambda x: f'{x:.2f}')
1018
+ )
1019
+ # 🔥 Atur urutan Y-axis sesuai data yang ditampilkan
1020
+ fig_exec_pic.update_layout(
1021
+ yaxis={
1022
+ 'categoryorder': 'array',
1023
+ 'categoryarray': sorted_data['nama_pic'].tolist()
1024
+ },
1025
+ height=500,
1026
+ showlegend=False
1027
+ )
1028
+ fig_exec_pic.update_traces(textposition='auto')
1029
+ st.plotly_chart(fig_exec_pic, use_container_width=True)
1030
+
1031
+ # Insight
1032
+ top = sorted_data.iloc[0] if sort_option_3d == "Top 10" else sorted_data_all.iloc[-1]
1033
+ low = sorted_data.iloc[-1] if sort_option_3d == "Top 10" else sorted_data_all.iloc[0]
1034
+ insight_text = (
1035
+ f"<div class='ai-insight'>"
1036
+ f"The executor <strong>{top['nama_pic']}</strong> has the highest average monthly lead time "
1037
+ f"(<strong>{top['avg_monthly_leadtime']:.2f} days</strong>). "
1038
+ f"<strong>{low['nama_pic']}</strong> resolves tasks fastest on average "
1039
+ f"(<strong>{low['avg_monthly_leadtime']:.2f} days</strong>). "
1040
+ f"Focus on improving SLA compliance for executors with longer lead times."
1041
+ f"</div>"
1042
+ )
1043
+ st.markdown(insight_text, unsafe_allow_html=True)
1044
+
1045
+
1046
+ ####OBJECTIVE 4
1047
  try:
1048
  from wordcloud import WordCloud
1049
  import matplotlib.pyplot as plt