Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| from typing import List | |
| import os | |
| import sklearn | |
| import kaleido | |
| from transformers import pipeline | |
| import transformers | |
| print("Transformers version:", transformers.__version__) | |
| from transformers import pipeline | |
| print("✅ pipeline imported successfully") | |
| # =================== PAGE CONFIG =================== | |
| st.set_page_config( | |
| page_title="Proactive Safety Intelligence & Analytics Dashboard", | |
| page_icon="", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # =================== CUSTOM CSS (Updated for PLN Colors) =================== | |
| st.markdown("""<style> | |
| .main-header { | |
| background-color: white; | |
| padding: 25px; | |
| border-radius: 12px; | |
| margin-bottom: 25px; | |
| box-shadow: 0 4px 12px rgba(0,0,0,0.06); | |
| border: 1px solid #e0e0e0; | |
| } | |
| h1, h2, h3, h4, h5, .stMarkdown h1, .stMarkdown h2, .stMarkdown h3 { | |
| text-align: center; | |
| font-weight: 700; | |
| color: #003DA5; /* Dark Blue - PLN Color */ | |
| } | |
| .metric-card { | |
| background: white; | |
| padding: 16px; | |
| border-radius: 10px; | |
| box-shadow: 0 3px 10px rgba(0,0,0,0.05); | |
| text-align: center; | |
| border: 1px solid #f0f0f0; | |
| } | |
| .ai-insight { | |
| background: #f0f4ff; /* Light Blue */ | |
| padding: 14px 18px; | |
| border-left: 4px solid #003DA5; /* PLN Blue */ | |
| margin: 10px 0; | |
| border-radius: 0 6px 6px 0; | |
| font-size: 0.95em; | |
| } | |
| .ai-recommendation { | |
| background: #e8f5e9; | |
| padding: 14px 18px; | |
| border-left: 4px solid #4caf50; | |
| margin: 10px 0; | |
| border-radius: 0 6px 6px 0; | |
| font-size: 0.95em; | |
| } | |
| .risk-very-high { color: #c62828; font-weight: bold; } | |
| .risk-high { color: #d32f2f; } | |
| .risk-moderate { color: #f57c00; } | |
| .risk-slight { color: #388e3c; } | |
| .trend-worsening { color: #d32f2f; } | |
| .trend-improvement { color: #388e3c; } | |
| .trend-stable { color: #616161; } | |
| .chart-container { | |
| border: 1px solid #e0e0e0; | |
| border-radius: 8px; | |
| padding: 15px; | |
| margin: 10px 0; | |
| background-color: white; | |
| box-shadow: 0 2px 6px rgba(0,0,0,0.03); | |
| } | |
| .section-title { | |
| color: #003DA5; /* PLN Blue */ | |
| font-weight: 700; | |
| font-size: 1.5em; | |
| text-align: left; | |
| margin-top: 20px; | |
| margin-bottom: 10px; | |
| } | |
| .ai-section { | |
| background: #ffffff; | |
| padding: 20px; | |
| border-radius: 8px; | |
| margin: 10px 0; | |
| box-shadow: 0 2px 6px rgba(0,0,0,0.03); | |
| } | |
| /* PLN Styled Selectbox and Multiselect */ | |
| .stSelectbox > label, .stMultiselect > label { | |
| color: #003DA5; /* PLN Blue */ | |
| font-weight: bold; | |
| } | |
| .stSelectbox > div > div, .stMultiselect > div > div { | |
| border: 2px solid #003DA5; /* PLN Blue Border */ | |
| border-radius: 8px; | |
| } | |
| .st-bq { | |
| background-color: #f0f4ff; /* Light Blue Background */ | |
| } | |
| .stButton > button { | |
| background-color: #003DA5; /* PLN Blue Button */ | |
| color: white; | |
| border: none; | |
| border-radius: 8px; | |
| padding: 8px 16px; | |
| font-weight: bold; | |
| } | |
| .stButton > button:hover { | |
| background-color: #0050A0; /* Darker PLN Blue on Hover */ | |
| } | |
| /* Filter Container Styling */ | |
| .filter-container { | |
| background-color: #f9f9f9; | |
| border-radius: 10px; | |
| padding: 15px; | |
| margin-bottom: 15px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.05); | |
| } | |
| .filter-title { | |
| color: #003DA5; | |
| font-weight: bold; | |
| font-size: 1.1em; | |
| margin-bottom: 10px; | |
| text-align: center; | |
| } | |
| </style>""", unsafe_allow_html=True) | |
| # =================== DATA LOADING (FROM data.xlsx) =================== | |
| # refresh every 5 min | |
| def load_data(): | |
| file_path = "data.xlsx" | |
| if not os.path.exists(file_path): | |
| st.error(f"❌ File **`{file_path}`** not found. Please ensure it's in the same directory as this script.") | |
| return pd.DataFrame() | |
| try: | |
| # Load Excel file | |
| df = pd.read_excel(file_path, sheet_name='Sheet1', engine='openpyxl') | |
| # Check for required columns | |
| required_cols = ['created_at'] | |
| missing = [c for c in required_cols if c not in df.columns] | |
| if missing: | |
| st.error(f"❌ Missing required columns: {missing}. Available: {list(df.columns)}") | |
| return pd.DataFrame() | |
| # Parse datetime | |
| df['created_at'] = pd.to_datetime(df['created_at'], errors='coerce') | |
| if df['created_at'].isna().all(): | |
| st.error("❌ `created_at` column could not be parsed as datetime.") | |
| return pd.DataFrame() | |
| # Optional: close_at | |
| if 'close_at' in df.columns: | |
| df['close_at'] = pd.to_datetime(df['close_at'], errors='coerce') | |
| df['days_to_close'] = (df['close_at'] - df['created_at']).dt.total_seconds() / (24 * 3600) | |
| df['days_to_close'] = df['days_to_close'].apply(lambda x: x if x >= 0 else np.nan) | |
| else: | |
| df['days_to_close'] = np.nan | |
| # Derived columns | |
| df['created_month'] = df['created_at'].dt.to_period('M') | |
| df['created_date'] = df['created_at'].dt.date | |
| df['created_week'] = df['created_at'].dt.to_period('W') | |
| # Keep only valid rows | |
| df = df.dropna(subset=['created_at']).copy() | |
| # Log shape | |
| st.sidebar.success(f"Loaded {len(df):,} Audit Findings from `data.xlsx`") | |
| return df | |
| except Exception as e: | |
| st.exception(f"Error loading data.xlsx: {e}") | |
| return pd.DataFrame() | |
| df = load_data() | |
| if df.empty: | |
| st.stop() | |
| # =================== SIDEBAR FILTERS (Perbaikan) =================== | |
| st.sidebar.markdown('<div class="filter-container">', unsafe_allow_html=True) | |
| st.sidebar.markdown('<h4 class="filter-title">Filter Dashboard</h4>', unsafe_allow_html=True) | |
| # Inisialisasi df_filtered | |
| df_filtered = df.copy() | |
| # Flag to track if filters were applied | |
| filters_applied = False | |
| # 1. Date Range Filter | |
| min_date = df['created_at'].min().date() | |
| max_date = df['created_at'].max().date() | |
| date_range = st.sidebar.date_input( | |
| "Date Range", | |
| value=(min_date, max_date), | |
| min_value=min_date, | |
| max_value=max_date | |
| ) | |
| # 2. Filter by Vendor (nama_perusahaan) - Default to All | |
| if 'nama_perusahaan' in df.columns: | |
| unique_vendors = sorted(df['nama_perusahaan'].dropna().astype(str).unique()) | |
| all_vendors_option = "All Vendors" | |
| vendor_options = [all_vendors_option] + list(unique_vendors) | |
| selected_vendor = st.sidebar.selectbox("Vendor", vendor_options, index=0) # Default to "All" | |
| if selected_vendor != all_vendors_option: | |
| df_filtered = df_filtered[df_filtered['nama_perusahaan'].astype(str) == selected_vendor] | |
| filters_applied = True | |
| # 3. Filter by Area/Unit Type (temuan_nama_distrik or creator_nama_distrik) - Renamed | |
| area_col = None | |
| if 'temuan_nama_distrik' in df_filtered.columns: | |
| area_col = 'temuan_nama_distrik' | |
| elif 'creator_nama_distrik' in df_filtered.columns: | |
| area_col = 'creator_nama_distrik' | |
| if area_col: | |
| # Define mapping for display names | |
| area_mapping = { | |
| 'UMRO': 'Unit Maintenance', | |
| 'UP GRESIK': 'Unit Pembangkit' | |
| } | |
| unique_areas_raw = sorted(df_filtered[area_col].dropna().astype(str).unique()) | |
| # Map raw values to display names, keep unmapped values as is | |
| unique_areas_display = [area_mapping.get(area, area) for area in unique_areas_raw] | |
| # Prepend "All" option | |
| all_areas_option = "All Units" | |
| area_options = [all_areas_option] + unique_areas_display | |
| selected_area_display = st.sidebar.selectbox("Unit Type", area_options, index=0) # Default to "All" | |
| if selected_area_display != all_areas_option: | |
| # Reverse map the selected display name back to the raw value for filtering | |
| selected_area_raw = next((raw for raw, disp in area_mapping.items() if disp == selected_area_display), selected_area_display) | |
| df_filtered = df_filtered[df_filtered[area_col].astype(str) == selected_area_raw] | |
| filters_applied = True | |
| # 4. Status filter - Changed to selectbox (dropdown) | |
| status_filter_applied = False | |
| if 'temuan_status' in df_filtered.columns: | |
| all_status = sorted(df_filtered['temuan_status'].dropna().astype(str).unique()) | |
| # Prepend "All" option | |
| all_status_option = "All Status" | |
| status_options = [all_status_option] + list(all_status) | |
| selected_status = st.sidebar.selectbox( | |
| "Status", | |
| status_options, | |
| index=0 # Default to "All" | |
| ) | |
| if selected_status != all_status_option: | |
| df_filtered = df_filtered[df_filtered['temuan_status'].astype(str) == selected_status] | |
| status_filter_applied = True | |
| filters_applied = True | |
| # Apply date filter *after* other filters | |
| if len(date_range) == 2: | |
| df_filtered = df_filtered[ | |
| (df_filtered['created_at'].dt.date >= date_range[0]) & | |
| (df_filtered['created_at'].dt.date <= date_range[1]) | |
| ] | |
| if date_range[0] != min_date or date_range[1] != max_date: | |
| filters_applied = True | |
| # Submit Button | |
| submit_clicked = st.sidebar.button("Apply Filters") | |
| # Apply filters logic when button is clicked | |
| if submit_clicked: | |
| # The filtering based on selections already happened above | |
| # Here we just update the summary based on the current state of df_filtered | |
| active_filters = [] | |
| if 'selected_vendor' in locals() and selected_vendor != all_vendors_option: | |
| active_filters.append(f"Vendor: {selected_vendor}") | |
| if 'selected_area_display' in locals() and selected_area_display != all_areas_option: | |
| active_filters.append(f"Unit: {selected_area_display}") | |
| if 'selected_status' in locals() and selected_status != all_status_option: | |
| active_filters.append(f"Status: {selected_status}") | |
| if len(date_range) == 2 and (date_range[0] != min_date or date_range[1] != max_date): | |
| active_filters.append(f"Date: {date_range[0]} to {date_range[1]}") | |
| if active_filters: | |
| st.sidebar.success("**Active Filters:**") | |
| for f in active_filters: | |
| st.sidebar.markdown(f"- {f}") | |
| st.sidebar.info(f"Showing {len(df_filtered)} records based on filters.") | |
| else: | |
| st.sidebar.info("No specific filters applied (showing all records).") | |
| else: | |
| # Show default message when not submitted yet | |
| st.sidebar.info("Set filters and click 'Apply Filters'.") | |
| st.sidebar.markdown('</div>', unsafe_allow_html=True) | |
| # =================== HEADER =================== | |
| import streamlit as st | |
| import os | |
| import streamlit as st | |
| import os | |
| # ====================== CSS UNTUK HEADER =========================== | |
| # CSS biar header terlihat seperti 1 kotak besar | |
| st.markdown(""" | |
| <style> | |
| .header-box { | |
| padding: 15px 25px; | |
| background-color: white; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 12px; | |
| margin-bottom: 25px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| } | |
| .header-title { | |
| text-align: center; | |
| flex: 1; | |
| } | |
| .logo-container { | |
| display: flex; | |
| align-items: center; | |
| justify-content: flex-end; | |
| width: 150px; /* 🔥 Diperbesar dari 100px */ | |
| height: auto; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ==== HEADER LAYOUT (Columns) ==== | |
| with st.container(): | |
| st.markdown('<div class="header-box">', unsafe_allow_html=True) | |
| col1, col2, col3 = st.columns([1, 5, 1]) # tengah lebih besar | |
| # LEFT (kosong) | |
| with col1: | |
| st.write("") | |
| # CENTER — TITLE | |
| with col2: | |
| st.markdown(""" | |
| <div class="header-title"> | |
| <h1 style="margin-bottom:6px;">Proactive Safety Intelligence & Analytics Dashboard</h1> | |
| <p style="color:#546e7a; font-size:1.05em; margin-top:-8px;"> | |
| </p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # RIGHT — LOGO | |
| with col3: | |
| try: | |
| st.image("pln.png", width=120) # 🔥 Diperbesar dari 80px | |
| except: | |
| st.write("") # Jika logo tidak ditemukan, kosongkan | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # =================== OBJECTIVE 1 - Company Reporting Activity (Polar Bar Chart) =================== | |
| st.markdown( | |
| """ | |
| <style> | |
| .section-title { | |
| text-align: center; | |
| font-size: 1.5rem; | |
| font-weight: 600; | |
| color: #2c3e50; | |
| margin-bottom: 1.2rem; | |
| } | |
| .ai-insight { | |
| background-color: #f8f9fa; | |
| padding: 12px; | |
| border-left: 4px solid #27ae60; | |
| border-radius: 0 4px 4px 0; | |
| font-size: 0.95rem; | |
| line-height: 1.5; | |
| margin-top: 1rem; | |
| } | |
| </style> | |
| <h3 class='section-title'>OBJECTIVE 1 — Company Reporting Activity: Who Reports the Most?</h3> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| df_local = df_filtered.copy() | |
| df_local['created_month'] = df_local['created_at'].dt.to_period('M') | |
| if 'temuan_kode_distrik' in df_local.columns: | |
| df_local['Area_Type'] = df_local['temuan_kode_distrik'].apply( | |
| lambda x: 'PG' if 'PG' in str(x).upper() | |
| else 'UM' if 'UM' in str(x).upper() | |
| else 'Other' | |
| ) | |
| df_pg = df_local[df_local['Area_Type'] == 'PG'].copy() | |
| df_um = df_local[df_local['Area_Type'] == 'UM'].copy() | |
| # --- Hitung rasio per perusahaan --- | |
| def calculate_avg_ratio_per_company(df_area): | |
| if df_area.empty: | |
| return pd.DataFrame() | |
| findings = df_area.groupby(['created_month', 'nama_perusahaan']).size().reset_index(name='findings_count') | |
| creators = df_area.groupby(['created_month', 'nama_perusahaan'])['creator_nid'].nunique().reset_index(name='unique_creators') | |
| merged = findings.merge(creators, on=['created_month', 'nama_perusahaan'], how='outer') | |
| merged = merged.fillna({'findings_count': 0, 'unique_creators': 0}) | |
| merged = merged[merged['unique_creators'] > 0] | |
| merged['ratio'] = merged['findings_count'] / merged['unique_creators'] | |
| merged['ratio'] = merged['ratio'].replace([np.inf, -np.inf], np.nan) | |
| avg_ratio = merged.groupby('nama_perusahaan')['ratio'].mean().reset_index(name='avg_monthly_ratio') | |
| return avg_ratio.dropna(subset=['avg_monthly_ratio']) | |
| avg_ratio_pg = calculate_avg_ratio_per_company(df_pg) | |
| avg_ratio_um = calculate_avg_ratio_per_company(df_um) | |
| # 🔑 PALET & COLOR MAPPING GLOBAL — KONSISTEN ANTAR CHART | |
| PLN_COLOR = "#FFD700" | |
| PASTEL_BLUES = [ | |
| "#A8DADC", "#E2ECE9", "#CCE4E7", "#B5D9D9", | |
| "#98C8D1", "#7FB9C1", "#6BA9B3", "#5A9CB5", "#4A8FA7", "#3A8399" | |
| ] # diperpanjang sedikit untuk antisipasi banyak kontraktor | |
| # 🔥 Dapatkan daftar *semua* perusahaan non-PLN unik (dari seluruh data), urut alfabetis → assign warna deterministik | |
| all_companies = pd.concat([avg_ratio_pg, avg_ratio_um])['nama_perusahaan'].dropna().unique() | |
| non_pln_companies = sorted([c for c in all_companies if 'PLN' not in str(c).upper()]) | |
| # Mapping: company → color (PLN khusus, lainnya dari palet biru berurut) | |
| COMPANY_COLOR_MAP = {} | |
| for i, company in enumerate(non_pln_companies): | |
| COMPANY_COLOR_MAP[company] = PASTEL_BLUES[i % len(PASTEL_BLUES)] | |
| # Pastikan PLN juga masuk mapping (jika muncul) | |
| for company in all_companies: | |
| if 'PLN' in str(company).upper(): | |
| COMPANY_COLOR_MAP[company] = PLN_COLOR | |
| # 🔥 Fungsi helper: dapatkan warna berdasarkan nama perusahaan (konsisten!) | |
| def get_color(company_name): | |
| return COMPANY_COLOR_MAP.get(company_name, "#CCCCCC") # fallback abu-abu | |
| # 🔥 Fungsi chart — pakai mapping global | |
| def create_polar_bar_chart(df, area_name): | |
| if df.empty: | |
| return None | |
| # Urutkan untuk stabilitas visual (misal: ascending ratio) | |
| df = df.sort_values('avg_monthly_ratio', ascending=True).reset_index(drop=True) | |
| companies = df['nama_perusahaan'].tolist() | |
| ratios = df['avg_monthly_ratio'].tolist() | |
| colors = [get_color(comp) for comp in companies] | |
| # Hitung proporsi temuan (untuk lebar bar) | |
| total_findings = df_local[df_local['Area_Type'] == area_name].groupby('nama_perusahaan').size() | |
| angles = [total_findings.get(comp, 0) / total_findings.sum() * 360 if total_findings.sum() > 0 else 0 for comp in companies] | |
| # Hitung posisi tengah | |
| mid_angles = [] | |
| current = 0 | |
| for a in angles: | |
| mid_angles.append(current + a / 2) | |
| current += a | |
| fig = go.Figure() | |
| # Satu trace saja — lebih clean, legend bisa diatur manual jika perlu | |
| fig.add_trace(go.Barpolar( | |
| r=ratios, | |
| theta=mid_angles, | |
| width=angles, | |
| marker_color=colors, | |
| marker_line_color="white", | |
| marker_line_width=1.2, | |
| opacity=0.9, | |
| hovertemplate="<b>%{text}</b><br>Avg Ratio: %{r:.2f}<extra></extra>", | |
| text=companies, | |
| showlegend=False # Kita akan buat legend manual yang rapi | |
| )) | |
| # 🔥 LEGEND MANUAL — hanya tampilkan setiap perusahaan sekali, dengan warna konsisten | |
| # Tambahkan satu scatter "dummy" per perusahaan untuk legend | |
| for company in sorted(set(companies)): | |
| color = get_color(company) | |
| fig.add_trace(go.Scatterpolar( | |
| r=[None], theta=[None], # invisible | |
| mode='markers', | |
| marker=dict(color=color, size=10), | |
| name=company, | |
| showlegend=True | |
| )) | |
| fig.update_layout( | |
| title=f'{area_name} Area', | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| tickfont=dict(size=9, color="gray"), | |
| gridcolor='lightgray', | |
| title=dict(text='Avg Finding/Person', font=dict(size=10, color="gray")) | |
| ), | |
| angularaxis=dict( | |
| visible=True, | |
| direction='clockwise', | |
| tickfont=dict(size=9, color="white"), | |
| showline=False, | |
| gridcolor="lightgray" | |
| ), | |
| ), | |
| showlegend=True, | |
| legend=dict( | |
| orientation="v", | |
| yanchor="top", | |
| y=1, | |
| xanchor="right", | |
| x=1.02, | |
| font=dict(size=10) | |
| ), | |
| height=450, | |
| margin=dict(t=40, b=20, l=20, r=40) | |
| ) | |
| return fig | |
| # Plot | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("<h5>Unit Pembangkit: Monthly Finding by Company</h5>", unsafe_allow_html=True) | |
| fig_pg = create_polar_bar_chart(avg_ratio_pg, 'PG') | |
| if fig_pg: | |
| st.plotly_chart(fig_pg, use_container_width=True) | |
| if not avg_ratio_pg.empty: | |
| top = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmax()] | |
| low = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmin()] | |
| st.markdown("### Insight") | |
| st.markdown( | |
| f"<div class='ai-insight'>" | |
| f"In PG Area, <strong>{top['nama_perusahaan']}</strong> has the highest ratio (<strong>{top['avg_monthly_ratio']:.2f}</strong>), " | |
| f"while <strong>{low['nama_perusahaan']}</strong> has the lowest (<strong>{low['avg_monthly_ratio']:.2f}</strong>). " | |
| f"Consider cross-learning between them to standardize reporting culture." | |
| f"</div>", | |
| unsafe_allow_html=True | |
| ) | |
| else: | |
| st.warning("No data for PG area.") | |
| with col2: | |
| st.markdown("<h5>Unit Maintenance: Monthly Finding by Company</h5>", unsafe_allow_html=True) | |
| fig_um = create_polar_bar_chart(avg_ratio_um, 'UM') | |
| if fig_um: | |
| st.plotly_chart(fig_um, use_container_width=True) | |
| if not avg_ratio_um.empty: | |
| top = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmax()] | |
| low = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmin()] | |
| st.markdown("### Insight") | |
| st.markdown( | |
| f"<div class='ai-insight'>" | |
| f"Across all companies, the finding-per-person ratio is similar in the UM Area " | |
| f"</div>", | |
| unsafe_allow_html=True | |
| ) | |
| else: | |
| st.warning("No data for UM area.") | |
| else: | |
| st.error("Column 'temuan_kode_distrik' not found.") | |
| st.stop() | |
| # =================== OBJECTIVE 2 — Active vs Inactive Locations (Treemap with Color Gradient) =================== | |
| st.markdown( | |
| """ | |
| <style> | |
| .section-title { | |
| text-align: center; | |
| font-size: 1.5rem; | |
| font-weight: 600; | |
| color: #2c3e50; | |
| margin-bottom: 1.2rem; | |
| } | |
| .ai-insight { | |
| background-color: #f8f9fa; | |
| padding: 12px; | |
| border-left: 4px solid #27ae60; | |
| border-radius: 0 4px 4px 0; | |
| font-size: 0.95rem; | |
| line-height: 1.5; | |
| margin-top: 1rem; | |
| } | |
| </style> | |
| <h3 class='section-title'>OBJECTIVE 2 — Active vs Inactive Locations: Who Leads?</h3> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| df_local = df_filtered.copy() | |
| if df_local.empty: | |
| st.warning("No data available after filtering.") | |
| st.stop() | |
| df_local['created_month'] = df_local['created_at'].dt.to_period('M') | |
| # Hitung temuan per bulan per lokasi | |
| findings_by_location_month = df_local.groupby(['created_month', 'nama_lokasi_full']).size().reset_index(name='findings_count') | |
| # Hitung jumlah orang unik per bulan per lokasi | |
| creators_by_location_month = df_local.groupby(['created_month', 'nama_lokasi_full'])['creator_nid'].nunique().reset_index(name='unique_creators') | |
| # Gabung | |
| merged_loc = findings_by_location_month.merge(creators_by_location_month, on=['created_month', 'nama_lokasi_full'], how='outer') | |
| # Isi NaN dengan 0 | |
| merged_loc = merged_loc.fillna({'findings_count': 0, 'unique_creators': 0}) | |
| # Hindari pembagian dengan nol | |
| merged_loc = merged_loc[merged_loc['unique_creators'] > 0] | |
| # Hitung rasio | |
| merged_loc['ratio'] = merged_loc['findings_count'] / merged_loc['unique_creators'] | |
| merged_loc['ratio'] = merged_loc['ratio'].replace([np.inf, -np.inf], np.nan) | |
| # Rata-rata bulanan per lokasi | |
| avg_ratio_per_location = merged_loc.groupby('nama_lokasi_full')['ratio'].mean().reset_index(name='avg_monthly_ratio') | |
| avg_ratio_per_location = avg_ratio_per_location.dropna(subset=['avg_monthly_ratio']) | |
| if avg_ratio_per_location.empty: | |
| st.write("Data kosong.") | |
| else: | |
| # Treemap — hanya dibuat & ditampilkan sekali | |
| fig_treemap = px.treemap( | |
| avg_ratio_per_location, | |
| path=['nama_lokasi_full'], | |
| values='avg_monthly_ratio', | |
| title='Avg Monthly Finding/Person Ratio by Location', | |
| labels={ | |
| 'avg_monthly_ratio': 'Avg Monthly Finding/Person Ratio', | |
| 'nama_lokasi_full': 'Location' | |
| }, | |
| color='avg_monthly_ratio', | |
| color_continuous_scale=[ | |
| [0.0, '#D32F2F'], # Red (low activity) | |
| [0.5, '#FFB300'], # Amber/orange-yellow (medium) | |
| [1.0, '#4CAF50'] # Green (high activity) | |
| ], | |
| hover_data={'avg_monthly_ratio': ':.2f'} # pastikan format hover sudah 2 desimal | |
| ) | |
| # Perbaiki tata letak & hover — lakukan *sebelum* menampilkan | |
| fig_treemap.update_traces( | |
| hovertemplate="<b>%{label}</b><br>Avg Ratio: %{value:.2f}<extra></extra>" | |
| ) | |
| fig_treemap.update_layout( | |
| title=dict(text='Avg Monthly Finding/Person Ratio by Location', x=0.5, xanchor='center'), | |
| height=600, | |
| margin=dict(t=50, l=25, r=25, b=25) | |
| ) | |
| # Tampilkan hanya sekali! | |
| st.plotly_chart(fig_treemap, use_container_width=True) | |
| # === AI Insight === | |
| top_location = avg_ratio_per_location.loc[avg_ratio_per_location['avg_monthly_ratio'].idxmax()] | |
| low_location = avg_ratio_per_location.loc[avg_ratio_per_location['avg_monthly_ratio'].idxmin()] | |
| st.markdown("### Insight") | |
| insight_text = ( | |
| f"<div class='ai-insight'>" | |
| f"<strong>{top_location['nama_lokasi_full']}</strong> shows the highest activity level " | |
| f"(<strong>{top_location['avg_monthly_ratio']:.2f}</strong> findings per person/month). " | |
| f"In contrast, <strong>{low_location['nama_lokasi_full']}</strong> has the lowest activity level " | |
| f"(<strong>{low_location['avg_monthly_ratio']:.2f}</strong>).<br><br>" | |
| f"🟢 <strong>High-ratio (green)</strong> locations indicate either high reporting diligence or high exposure to risks — both warrant deeper root-cause analysis.<br>" | |
| f"🟡 <strong>Medium-ratio (yellow)</strong> areas need monitoring to ensure trends do not deteriorate.<br>" | |
| f"🔴 <strong>Low-ratio (red)</strong> locations may reflect under-reporting, lack of engagement, or low hazard visibility — recommend follow-up audits to verify data completeness." | |
| f"</div>" | |
| ) | |
| st.markdown(insight_text, unsafe_allow_html=True) | |
| # =================== OBJECTIVE 3 - Frequency & Response Time =================== | |
| st.markdown( | |
| """ | |
| <style> | |
| .section-title { | |
| text-align: center; | |
| font-size: 1.5rem; | |
| font-weight: 600; | |
| color: #2c3e50; | |
| margin-bottom: 1.2rem; | |
| } | |
| .ai-insight { | |
| background-color: #f8f9fa; | |
| padding: 12px; | |
| border-left: 4px solid #27ae60; | |
| border-radius: 0 4px 4px 0; | |
| font-size: 0.95rem; | |
| line-height: 1.5; | |
| margin-top: 1rem; | |
| } | |
| </style> | |
| <h3 class='section-title'>OBJECTIVE 3 — Frequency & Response Time: Who Reports Well? Who Executes Well?</h3> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| df_local = df_filtered.copy() | |
| if df_local.empty: | |
| st.warning("No data available after filtering.") | |
| st.stop() | |
| df_local['created_month'] = df_local['created_at'].dt.to_period('M') | |
| # ─── Helper: Hitung rasio per division (reporter) ───────────────────────────── | |
| def compute_reporter_ratio_by_nama(df): | |
| if 'nama' not in df.columns: | |
| return pd.DataFrame() | |
| findings_by_nama_month = df.groupby(['created_month', 'nama']).size().reset_index(name='findings_count') | |
| creators_by_nama_month = df.groupby(['created_month', 'nama'])['creator_nid'].nunique().reset_index(name='unique_creators') | |
| merged_rep = findings_by_nama_month.merge(creators_by_nama_month, on=['created_month', 'nama'], how='outer') | |
| merged_rep = merged_rep.fillna({'findings_count': 0, 'unique_creators': 0}) | |
| merged_rep = merged_rep[merged_rep['unique_creators'] > 0] | |
| merged_rep['ratio'] = merged_rep['findings_count'] / merged_rep['unique_creators'] | |
| merged_rep['ratio'] = merged_rep['ratio'].replace([np.inf, -np.inf], np.nan) | |
| avg_ratio_per_nama = merged_rep.groupby('nama')['ratio'].mean().reset_index(name='avg_monthly_ratio') | |
| avg_ratio_per_nama = avg_ratio_per_nama.dropna(subset=['avg_monthly_ratio']) | |
| return avg_ratio_per_nama | |
| # ─── Helper: Hitung rata-rata temuan per reporter (individu) ───────────────── | |
| def compute_reporter_rate_by_creator(df): | |
| if 'creator_name' not in df.columns: | |
| return pd.DataFrame() | |
| findings_by_creator_month = df.groupby(['created_month', 'creator_name']).size().reset_index(name='findings_count') | |
| active_months_by_creator = findings_by_creator_month.groupby('creator_name')['created_month'].nunique().reset_index(name='active_months') | |
| total_findings_by_creator = findings_by_creator_month.groupby('creator_name')['findings_count'].sum().reset_index() | |
| merged_rep_creator = total_findings_by_creator.merge(active_months_by_creator, on='creator_name', how='outer') | |
| merged_rep_creator = merged_rep_creator.fillna({'findings_count': 0, 'active_months': 0}) | |
| merged_rep_creator = merged_rep_creator[merged_rep_creator['active_months'] > 0] | |
| merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['findings_count'] / merged_rep_creator['active_months'] | |
| merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['avg_monthly_rate'].replace([np.inf, -np.inf], np.nan) | |
| avg_rate_per_creator = merged_rep_creator.dropna(subset=['avg_monthly_rate']) | |
| return avg_rate_per_creator | |
| # ─── Helper: Hitung lead time per division (executor) ─────────────────────── | |
| def compute_executor_leadtime_by_nama(df): | |
| if 'nama' not in df.columns or 'days_to_close' not in df.columns: | |
| return pd.DataFrame() | |
| # Filter hanya data dengan lead time valid | |
| df_valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)] | |
| leadtime_by_nama_month = df_valid.groupby(['created_month', 'nama'])['days_to_close'].mean().reset_index(name='avg_leadtime') | |
| avg_leadtime_nama = leadtime_by_nama_month.groupby('nama')['avg_leadtime'].mean().reset_index(name='avg_monthly_leadtime') | |
| avg_leadtime_nama = avg_leadtime_nama.dropna(subset=['avg_monthly_leadtime']) | |
| return avg_leadtime_nama | |
| # ─── Helper: Hitung lead time per individu executor (deteksi kolom otomatis) ─ | |
| def compute_executor_leadtime_by_individual(df, name_col='creator_name'): | |
| if name_col not in df.columns or 'days_to_close' not in df.columns: | |
| return pd.DataFrame() | |
| df_valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)] | |
| leadtime_by_indiv_month = df_valid.groupby(['created_month', name_col])['days_to_close'].mean().reset_index(name='avg_leadtime') | |
| avg_leadtime_indiv = leadtime_by_indiv_month.groupby(name_col)['avg_leadtime'].mean().reset_index(name='avg_monthly_leadtime') | |
| avg_leadtime_indiv = avg_leadtime_indiv.dropna(subset=['avg_monthly_leadtime']) | |
| return avg_leadtime_indiv | |
| # ─── Deteksi kolom executor individu ──────────────────────────────────────── | |
| EXECUTOR_INDIV_COL = None | |
| candidate_executor_cols = ['pic', 'pic_name', 'responsible', 'responsible_name', 'assigned_to', 'closed_by', 'executor_name', 'executor'] | |
| for col in candidate_executor_cols: | |
| if col in df_local.columns: | |
| EXECUTOR_INDIV_COL = col | |
| break | |
| if EXECUTOR_INDIV_COL is None: | |
| # Fallback — gunakan creator_name (dengan warning transparan) | |
| EXECUTOR_INDIV_COL = 'nama_pic' | |
| # st.warning( | |
| # "⚠️ No dedicated executor column (e.g., 'pic', 'responsible') found. " | |
| # "Using 'creator_name' as proxy for executor — insights may conflate reporters & executors. " | |
| # "Consider adding an executor identifier column for accuracy." | |
| # ) | |
| # ─── Hitung semua metrik ───────────────────────────────────────────────────── | |
| avg_ratio_per_nama = compute_reporter_ratio_by_nama(df_local) # 3a | |
| avg_rate_per_creator = compute_reporter_rate_by_creator(df_local) # 3c | |
| avg_leadtime_nama = compute_executor_leadtime_by_nama(df_local) # 3b | |
| avg_leadtime_per_indiv = compute_executor_leadtime_by_individual(df_local, name_col=EXECUTOR_INDIV_COL) # 3d | |
| # ─── Helper: Warna berdasarkan ranking global ─────────────────────────────── | |
| def add_color_by_global_rank(df, value_col, top_n=5, worst_n=5, high_is_good=True): | |
| df = df.copy() | |
| df['color'] = '#1f77b4' # default biru | |
| if len(df) == 0: | |
| return df | |
| if high_is_good: | |
| # Nilai tinggi = baik → top N → hijau | |
| top_names = df.nlargest(top_n, value_col)['nama' if 'nama' in df.columns else df.columns[0]] | |
| df.loc[df[df.columns[0]].isin(top_names), 'color'] = '#4CAF50' | |
| else: | |
| # Nilai tinggi = buruk (e.g., lead time) → worst N (tertinggi) → merah | |
| worst_names = df.nlargest(worst_n, value_col)['nama' if 'nama' in df.columns else df.columns[0]] | |
| df.loc[df[df.columns[0]].isin(worst_names), 'color'] = '#D32F2F' | |
| return df | |
| # ─── Layout: 2 baris × 2 kolom ─────────────────────────────────────────────── | |
| col_3a, col_3c = st.columns(2) | |
| # ─── 3a: Reporter by Division (Rasio Temuan/Orang) ─────────────────────────── | |
| with col_3a: | |
| st.markdown("<h5 style='text-align:center;'>3a. Finding/Person Ratio by Division (Reporter)</h5>", unsafe_allow_html=True) | |
| if avg_ratio_per_nama.empty: | |
| st.warning("No data for division-level reporter analysis.") | |
| else: | |
| sort_opt = st.selectbox("Show:", ["Top 10", "Bottom 10"], key='sort_3a') | |
| full_sorted = avg_ratio_per_nama.sort_values('avg_monthly_ratio', ascending=False) | |
| subset = full_sorted.head(10) if sort_opt == "Top 10" else full_sorted.tail(10).sort_values('avg_monthly_ratio', ascending=True) | |
| # Tambahkan warna: top 5 → hijau | |
| colored = add_color_by_global_rank(avg_ratio_per_nama, 'avg_monthly_ratio', top_n=5, high_is_good=True) | |
| subset = subset.merge(colored[['nama', 'color']], on='nama', how='left').fillna({'color': '#1f77b4'}) | |
| # Reverse untuk visual (tertinggi di atas) | |
| if sort_opt == "Top 10": | |
| subset = subset.iloc[::-1] | |
| fig = px.bar( | |
| subset, x='avg_monthly_ratio', y='nama', orientation='h', | |
| title=f'{sort_opt} Divisions', | |
| labels={'avg_monthly_ratio': 'Monthly Ratio', 'nama': 'Division'}, | |
| color='color', color_discrete_map={c: c for c in subset['color'].unique()}, | |
| text=subset['avg_monthly_ratio'].apply(lambda x: f'{x:.2f}') | |
| ) | |
| fig.update_layout(height=450, showlegend=False, yaxis={'categoryorder': 'array', 'categoryarray': subset['nama'].tolist()}) | |
| fig.update_traces(textposition='auto') | |
| st.plotly_chart(fig, use_container_width=True) | |
| # 🔍 Insight (dari full data) | |
| if len(full_sorted) >= 2: | |
| min_r, max_r, mean_r = full_sorted['avg_monthly_ratio'].min(), full_sorted['avg_monthly_ratio'].max(), full_sorted['avg_monthly_ratio'].mean() | |
| best, worst = full_sorted.iloc[0]['nama'], full_sorted.iloc[-1]['nama'] | |
| st.markdown( | |
| f"<div class='ai-insight'>" | |
| f"<strong>Insight:</strong> Division reporting ratio ranges from {min_r:.2f} to {max_r:.2f} (avg: {mean_r:.2f}). " | |
| f"<strong>{best}</strong> leads." | |
| # <strong>{worst}</strong> lags. | |
| f"<strong>Recommendation:</strong> Benchmark processes from {best}; assess capacity/tooling gaps in {worst}." | |
| f"</div>", | |
| unsafe_allow_html=True | |
| ) | |
| # ─── 3c: Reporter by Individual ────────────────────────────────────────────── | |
| with col_3c: | |
| st.markdown("<h5 style='text-align:center;'>3c. Monthly Findings per Reporter (Individual)</h5>", unsafe_allow_html=True) | |
| if avg_rate_per_creator.empty: | |
| st.warning("No data for individual reporter analysis.") | |
| else: | |
| sort_opt = st.selectbox("Show:", ["Top 10", "Bottom 10"], key='sort_3c') | |
| full_sorted = avg_rate_per_creator.sort_values('avg_monthly_rate', ascending=False) | |
| subset = full_sorted.head(10) if sort_opt == "Top 10" else full_sorted.tail(10).sort_values('avg_monthly_rate', ascending=True) | |
| colored = add_color_by_global_rank(avg_rate_per_creator, 'avg_monthly_rate', top_n=5, high_is_good=True) | |
| subset = subset.merge(colored[['creator_name', 'color']], on='creator_name', how='left').fillna({'color': '#1f77b4'}) | |
| if sort_opt == "Top 10": | |
| subset = subset.iloc[::-1] | |
| fig = px.bar( | |
| subset, x='avg_monthly_rate', y='creator_name', orientation='h', | |
| title=f'{sort_opt} Reporters', | |
| labels={'avg_monthly_rate': 'Monthly Findings', 'creator_name': 'Reporter'}, | |
| color='color', color_discrete_map={c: c for c in subset['color'].unique()}, | |
| text=subset['avg_monthly_rate'].apply(lambda x: f'{x:.2f}') | |
| ) | |
| fig.update_layout(height=450, showlegend=False, yaxis={'categoryorder': 'array', 'categoryarray': subset['creator_name'].tolist()}) | |
| fig.update_traces(textposition='auto') | |
| st.plotly_chart(fig, use_container_width=True) | |
| if len(full_sorted) >= 2: | |
| min_r, max_r, mean_r = full_sorted['avg_monthly_rate'].min(), full_sorted['avg_monthly_rate'].max(), full_sorted['avg_monthly_rate'].mean() | |
| top_reporter = full_sorted.iloc[0]['creator_name'] | |
| st.markdown( | |
| f"<div class='ai-insight'>" | |
| f"<strong>Insight:</strong> Individual reporting ranges from {min_r:.2f} to {max_r:.2f} findings/month (avg: {mean_r:.2f}). " | |
| f"<strong>{top_reporter}</strong> is the most active contributor. " | |
| f"<strong>Recommendation:</strong> Recognize top reporters; inspection causes of low activity (<0.5/month) via 1:1 review." | |
| f"</div>", | |
| unsafe_allow_html=True | |
| ) | |
| # ─── Baris 2: Executor ─────────────────────────────────────────────────────── | |
| col_3b, col_3d = st.columns(2) | |
| # ─── 3b: Executor by Division (Lead Time) ──────────────────────────────────── | |
| with col_3b: | |
| st.markdown("<h5 style='text-align:center;'>3b. Monthly Lead Time by Division (Executor)</h5>", unsafe_allow_html=True) | |
| if avg_leadtime_nama.empty: | |
| st.warning("No data for division-level executor analysis.") | |
| else: | |
| sort_opt = st.selectbox( | |
| "Show:", | |
| ["Top 10", "Bottom 10"], | |
| key='sort_3b' | |
| ) | |
| # Sort penuh sekali — ascending: tercepat → terlambat | |
| full_sorted = avg_leadtime_nama.sort_values('avg_monthly_leadtime', ascending=True) | |
| # Ambil subset sesuai pilihan | |
| if sort_opt == "Top 10": | |
| # 10 tercepat: ascending (kecil → besar), tetap diurut ascending → tercepat di atas | |
| subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=False) | |
| else: # "Bottom 10 Slowest" | |
| # 10 terlambat: descending (besar → kecil), agar terlambat di atas | |
| subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False) | |
| # Warna: 5 terlambat secara global (bukan di subset!) → warna merah | |
| colored = add_color_by_global_rank( | |
| avg_leadtime_nama, | |
| 'avg_monthly_leadtime', | |
| worst_n=5, | |
| high_is_good=False # lebih tinggi = buruk | |
| ) | |
| subset = subset.merge(colored[['nama', 'color']], on='nama', how='left').fillna({'color': '#1f77b4'}) | |
| fig = px.bar( | |
| subset, | |
| x='avg_monthly_leadtime', | |
| y='nama', | |
| orientation='h', | |
| title=sort_opt, | |
| labels={'avg_monthly_leadtime': 'Monthly Lead Time (Days)', 'nama': 'Division'}, | |
| color='color', | |
| color_discrete_map={c: c for c in subset['color'].unique()}, | |
| text=subset['avg_monthly_leadtime'].apply(lambda x: f'{x:.1f}') | |
| ) | |
| fig.update_layout( | |
| height=450, | |
| showlegend=False, | |
| yaxis={'categoryorder': 'array', 'categoryarray': subset['nama'].tolist()} | |
| ) | |
| fig.update_traces(textposition='auto') | |
| st.plotly_chart(fig, use_container_width=True) | |
| if len(full_sorted) >= 2: | |
| min_lt = full_sorted['avg_monthly_leadtime'].min() | |
| max_lt = full_sorted['avg_monthly_leadtime'].max() | |
| mean_lt = full_sorted['avg_monthly_leadtime'].mean() | |
| fastest = full_sorted.iloc[0]['nama'] | |
| slowest = full_sorted.iloc[-1]['nama'] | |
| st.markdown( | |
| f"<div class='ai-insight'>" | |
| f"<strong>Insight:</strong> Resolution time ranges from {min_lt:.1f} to {max_lt:.1f} days (avg: {mean_lt:.1f}). " | |
| f"<strong>{slowest}</strong> has highest risk of SLA breach. " | |
| f"<strong>Recommendation:</strong> Initiate RCA for {slowest}; replicate workflow from {fastest}. Set SLA threshold at 7 days." | |
| f"</div>", | |
| unsafe_allow_html=True | |
| ) | |
| # ─── 3d: Executor by Individual ────────────────────────────────────────────── | |
| with col_3d: | |
| st.markdown( | |
| f"<h5 style='text-align:center;'>3d. Monthly Lead Time per Executor (Individual)</h5>", | |
| unsafe_allow_html=True | |
| ) | |
| if avg_leadtime_per_indiv.empty: | |
| st.warning(f"No data for individual executor analysis (column: '{EXECUTOR_INDIV_COL}').") | |
| else: | |
| sort_opt = st.selectbox( | |
| "Show:", | |
| ["Top 10", "Bottom 10"], | |
| key='sort_3d' | |
| ) | |
| full_sorted = avg_leadtime_per_indiv.sort_values('avg_monthly_leadtime', ascending=True) | |
| if sort_opt == "Top 10": | |
| subset = full_sorted.head(10).subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=False) | |
| else: # "Bottom 10 Slowest" | |
| subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False) | |
| # Warna berdasarkan ranking global | |
| colored = add_color_by_global_rank( | |
| avg_leadtime_per_indiv, | |
| 'avg_monthly_leadtime', | |
| worst_n=5, | |
| high_is_good=False | |
| ) | |
| id_col = EXECUTOR_INDIV_COL | |
| subset = subset.merge(colored[[id_col, 'color']], on=id_col, how='left').fillna({'color': '#1f77b4'}) | |
| fig = px.bar( | |
| subset, | |
| x='avg_monthly_leadtime', | |
| y=id_col, | |
| orientation='h', | |
| title=sort_opt, | |
| labels={'avg_monthly_leadtime': 'Monthly Lead Time (Days)', id_col: 'Executor'}, | |
| color='color', | |
| color_discrete_map={c: c for c in subset['color'].unique()}, | |
| text=subset['avg_monthly_leadtime'].apply(lambda x: f'{x:.1f}') | |
| ) | |
| fig.update_layout( | |
| height=450, | |
| showlegend=False, | |
| yaxis={'categoryorder': 'array', 'categoryarray': subset[id_col].tolist()} | |
| ) | |
| fig.update_traces(textposition='auto') | |
| st.plotly_chart(fig, use_container_width=True) | |
| if len(full_sorted) >= 2: | |
| min_lt = full_sorted['avg_monthly_leadtime'].min() | |
| max_lt = full_sorted['avg_monthly_leadtime'].max() | |
| mean_lt = full_sorted['avg_monthly_leadtime'].mean() | |
| slowest_exec = full_sorted.iloc[-1][id_col] | |
| fastest_exec = full_sorted.iloc[0][id_col] | |
| st.markdown( | |
| f"<div class='ai-insight'>" | |
| f"<strong>Insight:</strong> Executor performance ranges from {min_lt:.1f} to {max_lt:.1f} days (avg: {mean_lt:.1f}). " | |
| f"<strong>{slowest_exec}</strong> requires support to meet SLA. " | |
| f"<strong>Recommendation:</strong> Assign mentor to executors >7 days; document & share best practices from top performers (e.g., {fastest_exec})." | |
| f"</div>", | |
| unsafe_allow_html=True | |
| ) | |
| #Objective 4 | |
| try: | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| import plotly.express as px | |
| from collections import Counter | |
| WORDCLOUD_AVAILABLE = True | |
| except ImportError: | |
| WORDCLOUD_AVAILABLE = False | |
| st.markdown("<h3 class='section-title'>OBJECTIVE 4 - Unsafe Issues: Which One is the Most Often?</h3>", | |
| unsafe_allow_html=True) | |
| # 🔹 Fungsi untuk membuat judul seragam | |
| def create_consistent_title(title_text): | |
| return f"<div style='font-family: Arial; font-size: 16px; font-weight: bold; color: #2c3e50; text-align: center; margin-bottom: 8px;'>{title_text}</div>" | |
| if WORDCLOUD_AVAILABLE: | |
| # 🔥 Data untuk pie chart: semua kategori | |
| df_all_kategori = df_local.copy() | |
| # 🔥 Data untuk wordcloud: hanya Non-Positive | |
| df_filtered_kategori = df_local[df_local['temuan_kategori'] != 'Positive'] \ | |
| if 'temuan_kategori' in df_local.columns else df_local | |
| # 2 Kolom | |
| col1, col2 = st.columns(2) | |
| # === PIE CHART: Semua temuan_kategori (Dengan Custom Text Position untuk Unsafe Action & Near Miss) === | |
| with col1: | |
| st.markdown(create_consistent_title("Distribution of All Issue Categories"), unsafe_allow_html=True) | |
| if 'temuan_kategori' in df_all_kategori.columns: | |
| category_counts = df_all_kategori['temuan_kategori'].value_counts() | |
| if not category_counts.empty: | |
| # Mapping warna | |
| color_map = { | |
| 'Positive': '#2E7D32', # Hijau | |
| 'Unsafe Condition': '#EF5350', # Merah Muda | |
| 'Unsafe Action': '#F48FB1', # Pink | |
| 'Near Miss': '#BDBDBD' # Abu-abu | |
| } | |
| colors = [color_map.get(cat, '#9E9E9E') for cat in category_counts.index] | |
| # Buat pie chart tanpa legend | |
| fig_pie = px.pie( | |
| names=category_counts.index, | |
| values=category_counts.values, | |
| color_discrete_sequence=colors | |
| ) | |
| # ⚙️ Custom: tarik keluar slice untuk Unsafe Action & Near Miss | |
| pull_values = [] | |
| for cat in category_counts.index: | |
| if cat in ['Unsafe Action', 'Near Miss']: | |
| pull_values.append(0.1) | |
| else: | |
| pull_values.append(0.0) | |
| fig_pie.update_traces( | |
| textposition='outside', | |
| textinfo='percent+label', | |
| pull=pull_values, | |
| marker=dict(line=dict(color='#FFFFFF', width=1)) | |
| ) | |
| fig_pie.update_layout( | |
| showlegend=False, | |
| height=450, | |
| margin=dict(t=20, b=20, l=40, r=40) | |
| ) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| # ✅ CUSTOM LEGEND | |
| st.markdown("<div style='text-align: center; font-size: 14px; margin-top: -10px;'>", unsafe_allow_html=True) | |
| legend_items = [] | |
| for cat in category_counts.index: | |
| color = color_map.get(cat, '#9E9E9E') | |
| item = ( | |
| f"<span style='display: inline-block; width: 16px; height: 4px; background-color: {color}; " | |
| f"margin: 0 8px; border-radius: 1px;'></span>{cat}" | |
| ) | |
| legend_items.append(item) | |
| legend_html = " | ".join(legend_items) | |
| st.markdown(f"<div>{legend_html}</div>", unsafe_allow_html=True) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| else: | |
| st.warning("No data available for pie chart.") | |
| else: | |
| st.warning("Column 'temuan_kategori' not available.") | |
| # === WORDCLOUD: Hanya Non-Positive (dari keyword_kategori) === | |
| with col2: | |
| st.markdown(create_consistent_title("Unsafe Issues"), unsafe_allow_html=True) | |
| st.markdown( | |
| """ | |
| <p style='text-align: center; font-size: 1rem; color: #555; margin-top: -0.5rem;'> | |
| Categorization uses NLP — Natural Language Processing from random text | |
| </p> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| if df_filtered_kategori.empty: | |
| st.warning("No data available after filtering out 'Positive' category.") | |
| else: | |
| if 'keyword_kategori' in df_filtered_kategori.columns: | |
| # Gabungkan & bersihkan teks | |
| import re | |
| text_series = df_filtered_kategori['keyword_kategori'].dropna().astype(str) | |
| text = ' '.join(text_series) | |
| text = re.sub(r'[^a-zA-Z\s]', ' ', text).strip() | |
| if text: | |
| # Generate wordcloud | |
| wordcloud = WordCloud( | |
| width=1600, | |
| height=800, | |
| background_color='white', | |
| colormap='viridis', | |
| max_words=1000, | |
| random_state=42 | |
| ).generate(text) | |
| # Tampilkan | |
| fig, ax = plt.subplots(figsize=(3, 2), dpi=200) | |
| ax.imshow(wordcloud, interpolation='bilinear') | |
| ax.axis('off') | |
| plt.tight_layout() | |
| st.pyplot(fig, use_container_width=True) | |
| # 🔍 🔹 Insight dinamis: top 3 keywords | |
| words_clean = [w.lower() for w in text.split() if len(w) > 2] | |
| top3 = [word.capitalize() for word, _ in Counter(words_clean).most_common(3)] | |
| if top3: | |
| insight = f"The most frequent unsafe issue themes are: <strong>{', '.join(top3)}</strong>." | |
| else: | |
| insight = "Recurring unsafe issue patterns are detectable in textual findings." | |
| st.markdown( | |
| f""" | |
| <div style='text-align: center; font-size: 14px; color: #2c3e50; margin-top: 12px; line-height: 1.5;'> | |
| <strong>Insight:</strong> {insight} | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| else: | |
| st.warning("No valid text remaining after cleaning.") | |
| else: | |
| st.warning("Column 'keyword_kategori' not available.") | |
| else: | |
| st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.") | |
| # =================== 5. Matrix (Tetap Dipertahankan) =================== | |
| # =================== 5. Matrix (Tetap Dipertahankan) =================== | |
| st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Companies Move Slow?</h3>", unsafe_allow_html=True) | |
| import math | |
| import plotly.express as px | |
| import pandas as pd | |
| try: | |
| df_local_matrix = df.copy() | |
| # ============================ | |
| # 0. Filter: ONLY 1 COMPANY & 1 PROFILE (if applicable) | |
| # ============================ | |
| # (Skipped for general dashboard view) | |
| # ============================ | |
| # 1. Exclude Positive findings | |
| # ============================ | |
| if 'temuan_kategori' in df_local_matrix.columns: | |
| df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"] | |
| # ============================ | |
| # 2. Ensure datetime columns | |
| # ============================ | |
| df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce') | |
| df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce') | |
| # ============================ | |
| # 3. Compute LEAD TIME | |
| # ============================ | |
| df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days | |
| df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0) | |
| # ============================ | |
| # 4. Average Monthly Finding Count per Operator | |
| # ============================ | |
| if 'nama' not in df_local_matrix.columns: | |
| st.error("❌ Kolom 'nama' (operator) tidak ditemukan.") | |
| # st.stop() # Stop bisa dihilangkan agar script tetap jalan | |
| else: | |
| # Buat kolom bulan (YYYY-MM) | |
| df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str)) | |
| # Hitung jumlah temuan per operator per bulan | |
| monthly_counts = ( | |
| df_local_matrix | |
| .groupby(['nama', 'month'])['kode_temuan'] | |
| .nunique() | |
| .reset_index(name='monthly_count') | |
| ) | |
| # Hitung rata-rata bulanan per operator | |
| operator_avg = ( | |
| monthly_counts | |
| .groupby('nama')['monthly_count'] | |
| .mean() # <-- RATA-RATA per bulan (bukan total!) | |
| .reset_index(name='Finding Count') | |
| ) | |
| # ============================ | |
| # 5. Average Lead Time per Operator | |
| # ============================ | |
| operator_lead = ( | |
| df_local_matrix.groupby('nama')['lead_time_days'] | |
| .mean() | |
| .reset_index(name='Average Lead Time') | |
| ) | |
| # ============================ | |
| # 6. Merge Risk Matrix | |
| # ============================ | |
| risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left') | |
| risk_matrix = risk_matrix.rename(columns={'nama': 'Operator Name'}) | |
| # Handle operator tanpa lead time (e.g., belum closed) | |
| risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0) | |
| # ============================ | |
| # 7. Quadrant Logic (unchanged) | |
| # ============================ | |
| X_LIMIT = 20 | |
| Y_LIMIT = 3 | |
| def assign_quadrant(row): | |
| if row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] >= Y_LIMIT: | |
| return "Quadrant I – High Leadtime & High Count" | |
| elif row['Finding Count'] < X_LIMIT and row['Average Lead Time'] >= Y_LIMIT: | |
| return "Quadrant II – High Leadtime but Low Count" | |
| elif row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] < Y_LIMIT: | |
| return "Quadrant III – Low Leadtime but High Count" | |
| else: | |
| return "Quadrant IV – Low Leadtime & Low Count" | |
| risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1) | |
| quadrant_count = risk_matrix['quadrant'].value_counts() | |
| # ============================ | |
| # 8. Scatter Plot (format visual tetap sam persis) | |
| # ============================ | |
| max_x = risk_matrix['Finding Count'].max() + 1 | |
| max_y = risk_matrix['Average Lead Time'].max() + 5 | |
| fig = px.scatter( | |
| risk_matrix, | |
| x='Finding Count', | |
| y='Average Lead Time', | |
| hover_name="Operator Name", | |
| size=[12] * len(risk_matrix), | |
| size_max=15, | |
| title="Audit Findings Risk Matrix: Avg Monthly Count vs Lead Time" | |
| ) | |
| # Background quadrant (same as original) | |
| fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y, | |
| fillcolor="rgba(255,0,0,0.25)", line_width=0) # Q1 | |
| fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y, | |
| fillcolor="rgba(255,150,50,0.25)", line_width=0) # Q2 | |
| fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT, | |
| fillcolor="rgba(255,200,200,0.25)", line_width=0) # Q3 | |
| fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT, | |
| fillcolor="rgba(0,120,255,0.15)", line_width=0) # Q4 | |
| fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="black") | |
| fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="black") | |
| # Quadrant count annotations (same positions & style) | |
| fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2, | |
| y=Y_LIMIT + (max_y - Y_LIMIT)/2, | |
| text=f"<b>{quadrant_count.get('Quadrant I – High Leadtime & High Count',0)}</b>", | |
| showarrow=False, font=dict(size=22, color="darkred")) | |
| fig.add_annotation(x=X_LIMIT/2, | |
| y=Y_LIMIT + (max_y - Y_LIMIT)/2, | |
| text=f"<b>{quadrant_count.get('Quadrant II – High Leadtime but Low Count',0)}</b>", | |
| showarrow=False, font=dict(size=22, color="orange")) | |
| fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2, | |
| y=Y_LIMIT/2, | |
| text=f"<b>{quadrant_count.get('Quadrant III – Low Leadtime but High Count',0)}</b>", | |
| showarrow=False, font=dict(size=22, color="red")) | |
| fig.add_annotation(x=X_LIMIT/2, | |
| y=Y_LIMIT/2, | |
| text=f"<b>{quadrant_count.get('Quadrant IV – Low Leadtime & Low Count',0)}</b>", | |
| showarrow=False, font=dict(size=22, color="green")) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # ============================ | |
| # 9. Summary Table | |
| # ============================ | |
| st.subheader("Summary (Avg Monthly Count vs Avg Lead Time)") | |
| st.dataframe( | |
| risk_matrix.sort_values("Finding Count", ascending=False), | |
| use_container_width=True | |
| ) | |
| except Exception as e: | |
| st.error(f"⚠️ Error Risk Matrix: {e}") | |
| # st.exception(e) # Uncomment for debugging | |
| # st.exception(e) # Uncomment for debugging | |
| import streamlit as st | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| import pandas as pd | |
| # =================== OBJECTIVE 6 - Predictive Dashboard & Early Warning Signals =================== | |
| st.markdown("<h3 class='section-title'>OBJECTIVE 6 — Prediction for Early Warning Signals: Which Ones Have Less Future Inspections?</h3>", unsafe_allow_html=True) | |
| # ✅ Enhanced CSS + Minimal Sortable JS | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); | |
| .predictive-panel { | |
| margin-bottom: 28px; | |
| background: white; | |
| border-radius: 12px; | |
| box-shadow: 0 4px 16px rgba(0,0,0,0.05); | |
| overflow: hidden; | |
| border: 1px solid #edf2f7; | |
| } | |
| .predictive-header { | |
| background: #E3F2FD; | |
| color: #003DA5; | |
| font-weight: 700; | |
| font-size: 1.1em; | |
| padding: 14px 20px; | |
| border-left: 4px solid #003DA5; | |
| } | |
| .predictive-table-wrapper { | |
| padding: 0 20px 20px; | |
| } | |
| .predictive-table-wrapper table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-family: 'Inter', sans-serif; | |
| font-size: 0.94em; | |
| margin-top: 12px; | |
| } | |
| .predictive-table-wrapper th, | |
| .predictive-table-wrapper td { | |
| text-align: center !important; | |
| padding: 12px 10px; | |
| border: 1px solid #e5e7eb; | |
| vertical-align: middle; | |
| } | |
| .predictive-table-wrapper th { | |
| background-color: #f8fafc; | |
| font-weight: 600; | |
| color: #003DA5; | |
| cursor: pointer; | |
| user-select: none; | |
| position: relative; | |
| } | |
| .predictive-table-wrapper th:hover { | |
| background-color: #edf2f7; | |
| } | |
| .predictive-table-wrapper th::after { | |
| content: " ⇵"; | |
| opacity: 0.4; | |
| margin-left: 4px; | |
| } | |
| .predictive-table-wrapper th.asc::after { | |
| content: " ↑"; | |
| opacity: 1; | |
| color: #003DA5; | |
| } | |
| .predictive-table-wrapper th.desc::after { | |
| content: " ↓"; | |
| opacity: 1; | |
| color: #c62828; | |
| } | |
| .predictive-table-wrapper tr:nth-child(even) { | |
| background-color: #fafcff; | |
| } | |
| .predictive-table-wrapper tr:hover { | |
| background-color: #f0f7ff !important; | |
| } | |
| .predictive-note { | |
| font-size: 0.86em; | |
| color: #64748b; | |
| margin-top: 10px; | |
| padding: 0 20px; | |
| line-height: 1.5; | |
| } | |
| .spark { | |
| font-family: 'Courier New', monospace; | |
| font-weight: bold; | |
| } | |
| .status-active { color: #2e7d32; font-weight: bold; } | |
| .status-neutral { color: #f57c00; } | |
| .status-inactive { color: #c62828; font-weight: bold; } | |
| .trend-rising { color: #c62828; font-weight: 600; } | |
| .trend-stable { color: #388e3c; } | |
| .trend-declining { color: #d32f2f; } | |
| .footer-insight { | |
| background: #003DA5; | |
| color: white; | |
| padding: 14px 24px; | |
| border-radius: 10px; | |
| font-weight: 600; | |
| margin-top: 20px; | |
| text-align: center; | |
| font-size: 1.08em; | |
| box-shadow: 0 3px 10px rgba(0,61,165,0.15); | |
| } | |
| .warning-box { | |
| background: #fff8e1; | |
| border-left: 4px solid #ffc107; | |
| padding: 12px 16px; | |
| font-size: 0.9em; | |
| color: #5d4037; | |
| margin: 10px 20px 0; | |
| border-radius: 0 6px 6px 0; | |
| } | |
| </style> | |
| <script> | |
| function makeSortable(tableId) { | |
| const table = document.getElementById(tableId); | |
| if (!table) return; | |
| let headers = table.querySelectorAll("th"); | |
| headers.forEach((header, i) => { | |
| header.onclick = () => { | |
| headers.forEach(h => h.classList.remove('asc', 'desc')); | |
| let rows = Array.from(table.querySelectorAll("tr:nth-child(n+2)")); | |
| let isNumeric = !isNaN(rows[0]?.cells[i]?.textContent.replace(/[^0-9.-]/g, '')); | |
| rows.sort((a, b) => { | |
| let aVal = a.cells[i].textContent.trim(); | |
| let bVal = b.cells[i].textContent.trim(); | |
| if (isNumeric) { | |
| aVal = parseFloat(aVal.replace(/[^0-9.-]/g, '')) || 0; | |
| bVal = parseFloat(bVal.replace(/[^0-9.-]/g, '')) || 0; | |
| } | |
| return header.classList.contains('asc') ? bVal - aVal : aVal - bVal; | |
| }); | |
| header.classList.toggle('asc'); | |
| header.classList.toggle('desc'); | |
| rows.forEach(row => table.querySelector('tbody').appendChild(row)); | |
| }; | |
| }); | |
| } | |
| setTimeout(() => { | |
| makeSortable('tbl-creators'); | |
| makeSortable('tbl-locations'); | |
| makeSortable('tbl-divisions'); | |
| makeSortable('tbl-categories'); | |
| }, 800); | |
| </script> | |
| """, unsafe_allow_html=True) | |
| #bloob line | |
| # 🔹 Helper: Sparkline ala chart saham — hijau kalau naik, merah kalau turun | |
| def ascii_sparkline_pln(data): | |
| if not data or len(data) == 0: | |
| return "<span class='spark' style='color:#999;'>—</span>" | |
| try: | |
| data = [float(x) for x in data] | |
| n = len(data) | |
| if n == 1: | |
| return "<span class='spark' style='color:#666;'>•</span>" | |
| # Hitung slope (linear trend) | |
| xs = list(range(n)) | |
| x_mean = sum(xs) / n | |
| y_mean = sum(data) / n | |
| numerator = sum((x - x_mean) * (y - y_mean) for x, y in zip(xs, data)) | |
| denominator = sum((x - x_mean) ** 2 for x in xs) | |
| slope = numerator / (denominator + 1e-12) | |
| # Tentukan warna & label berdasarkan slope | |
| if slope > 0.001: | |
| color = "#2E7D32" # hijau (naik) | |
| label = "↑" | |
| elif slope < -0.001: | |
| color = "#C62828" # merah (turun) | |
| label = "↓" | |
| else: | |
| color = "#546E7A" # abu-abu (datar) | |
| label = "→" | |
| # Normalisasi koordinat Y untuk SVG | |
| height = 16 | |
| margin = 2 | |
| usable_h = height - 2 * margin | |
| min_y, max_y = min(data), max(data) | |
| if max_y == min_y: | |
| y_coords = [margin + usable_h / 2] * n | |
| else: | |
| y_coords = [ | |
| margin + usable_h * (1 - (v - min_y) / (max_y - min_y + 1e-9)) | |
| for v in data | |
| ] | |
| # Buat polyline | |
| points = " ".join(f"{i * 8},{y:.1f}" for i, y in enumerate(y_coords)) | |
| width = max(24, (n - 1) * 8 + 4) | |
| # SVG inline | |
| svg = ( | |
| f'<svg class="sparkline" width="{width + 6}" height="{height}" ' | |
| f'viewBox="-2 0 {width + 6} {height}" style="vertical-align:middle; ' | |
| f'font-family:sans-serif;" xmlns="http://www.w3.org/2000/svg">' | |
| f'<polyline points="{points}" fill="none" stroke="{color}" ' | |
| f'stroke-width="1.3" stroke-linecap="round" stroke-linejoin="round"/>' | |
| f'<text x="{(n-1)*8 + 1}" y="14" font-size="9" fill="{color}" ' | |
| f'dominant-baseline="middle">{label}</text>' | |
| f'</svg>' | |
| ) | |
| return svg | |
| except Exception: | |
| return "<span class='spark' style='color:#999;'>⚠</span>" | |
| # ——————— 1. Creators: ONLY Coverage < 90% AND Slope < 0 ——————— | |
| def predict_creators(df): | |
| # ❌ Tidak ada filter Non-Positive | |
| if 'creator_name' not in df.columns or df.empty: | |
| return pd.DataFrame() | |
| start_month = df['created_at'].min().to_period('M') | |
| end_month = df['created_at'].max().to_period('M') | |
| all_months = pd.period_range(start=start_month, end=end_month, freq='M') | |
| df_monthly = ( | |
| df.groupby(['creator_name', df['created_at'].dt.to_period('M')]) | |
| .size() | |
| .unstack(fill_value=0) | |
| .reindex(columns=all_months, fill_value=0) | |
| .stack() | |
| .reset_index(name='count') | |
| ) | |
| df_monthly.columns = ['Creator', 'Month', 'Count'] | |
| results = [] | |
| for creator, group in df_monthly.groupby('Creator'): | |
| ts = group.set_index('Month')['Count'] | |
| total = len(all_months) | |
| active = (ts > 0).sum() | |
| coverage = active / total if total > 0 else 0 | |
| avg_rate = ts.mean() | |
| if len(ts) >= 2: | |
| try: | |
| slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0] | |
| # ✅ FILTER: Coverage < 90% AND Slope < 0 | |
| if slope < 0 and coverage < 0.9: | |
| reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo." | |
| results.append({ | |
| 'Creator': creator, | |
| 'Reports/Month': round(avg_rate, 2), | |
| 'Monthly Consistency (%)': round(coverage * 100, 1), | |
| 'Trend Slope': round(slope, 3), | |
| 'Trend': ascii_sparkline_pln(ts.values.tolist()), | |
| 'Reason': reason | |
| }) | |
| except: | |
| continue | |
| df_res = pd.DataFrame(results) | |
| # ✅ Ambil 10 creator dengan slope paling negatif (paling turun) | |
| return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res | |
| # ——————— 2. Locations: ONLY Coverage < 90% AND Slope < 0 ——————— | |
| def predict_locations(df): | |
| # ❌ Tidak ada filter Non-Positive | |
| if 'nama_lokasi_full' not in df.columns or df.empty: | |
| return pd.DataFrame() | |
| start_month = df['created_at'].min().to_period('M') | |
| end_month = df['created_at'].max().to_period('M') | |
| all_months = pd.period_range(start=start_month, end=end_month, freq='M') | |
| df_monthly = ( | |
| df.groupby(['nama_lokasi_full', df['created_at'].dt.to_period('M')]) | |
| .size() | |
| .unstack(fill_value=0) | |
| .reindex(columns=all_months, fill_value=0) | |
| .stack() | |
| .reset_index(name='count') | |
| ) | |
| df_monthly.columns = ['Location', 'Month', 'Count'] | |
| results = [] | |
| for lokasi, group in df_monthly.groupby('Location'): | |
| ts = group.set_index('Month')['Count'] | |
| total = len(all_months) | |
| active = (ts > 0).sum() | |
| coverage = active / total if total > 0 else 0 | |
| avg_rate = ts.mean() | |
| if len(ts) >= 2: | |
| try: | |
| slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0] | |
| # ✅ FILTER: Coverage < 90% AND Slope < 0 | |
| if slope < 0 and coverage < 0.9: | |
| reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo." | |
| results.append({ | |
| 'Location': lokasi, | |
| 'Reports/Month': round(avg_rate, 2), | |
| 'Monthly Consistency (%)': round(coverage * 100, 1), | |
| 'Trend Slope': round(slope, 3), | |
| 'Trend': ascii_sparkline_pln(ts.values.tolist()), | |
| 'Reason': reason | |
| }) | |
| except: | |
| continue | |
| df_res = pd.DataFrame(results) | |
| # ✅ Ambil 10 lokasi dengan slope paling negatif (paling turun) | |
| return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res | |
| # ——————— 3. Divisions: ONLY Coverage < 90% AND Slope < 0 ——————— | |
| def predict_divisions(df): | |
| # ❌ Tidak ada filter Non-Positive | |
| if 'nama' not in df.columns or df.empty: | |
| return pd.DataFrame() | |
| start_month = df['created_at'].min().to_period('M') | |
| end_month = df['created_at'].max().to_period('M') | |
| all_months = pd.period_range(start=start_month, end=end_month, freq='M') | |
| df_monthly = ( | |
| df.groupby(['nama', df['created_at'].dt.to_period('M')]) | |
| .size() | |
| .unstack(fill_value=0) | |
| .reindex(columns=all_months, fill_value=0) | |
| .stack() | |
| .reset_index(name='count') | |
| ) | |
| df_monthly.columns = ['Division', 'Month', 'Count'] | |
| results = [] | |
| for div, group in df_monthly.groupby('Division'): | |
| ts = group.set_index('Month')['Count'] | |
| total = len(all_months) | |
| active = (ts > 0).sum() | |
| coverage = active / total if total > 0 else 0 | |
| avg_rate = ts.mean() | |
| if len(ts) >= 2: | |
| try: | |
| slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0] | |
| # ✅ FILTER: Coverage < 90% AND Slope < 0 | |
| if slope < 0 and coverage < 0.9: | |
| reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo." | |
| results.append({ | |
| 'Division': div, | |
| 'Reports/Month': round(avg_rate, 2), | |
| 'Monthly Consistency (%)': round(coverage * 100, 1), | |
| 'Trend Slope': round(slope, 3), | |
| 'Trend': ascii_sparkline_pln(ts.values.tolist()), | |
| 'Reason': reason | |
| }) | |
| except: | |
| continue | |
| df_res = pd.DataFrame(results) | |
| # ✅ Ambil 10 divisi dengan slope paling negatif (paling turun) | |
| return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res | |
| # ——————— 4. Categories: ONLY Non-Positive + Coverage=100% & Trend Slope > 0 ——————— | |
| def predict_categories(df): | |
| # 🔥 Filter: Hanya yang bukan 'Positive' | |
| df = df[df['temuan_kategori'] != 'Positive'].copy() # ✅ Filter non-Positive | |
| if 'kategori' not in df.columns or df.empty: | |
| return pd.DataFrame() | |
| start_month = df['created_at'].min().to_period('M') | |
| end_month = df['created_at'].max().to_period('M') | |
| all_months = pd.period_range(start=start_month, end=end_month, freq='M') | |
| n_months = len(all_months) | |
| results = [] | |
| for cat, group in df.groupby('kategori'): | |
| ts_data = ( | |
| group.groupby(group['created_at'].dt.to_period('M')) | |
| .size() | |
| .reindex(all_months, fill_value=0) | |
| ) | |
| total_reports = ts_data.sum() | |
| avg_per_month = total_reports / n_months if n_months > 0 else 0 | |
| active_months = (ts_data > 0).sum() | |
| coverage = active_months / n_months if n_months > 0 else 0 | |
| slope = 0.0 | |
| if len(ts_data) >= 2: | |
| try: | |
| slope = np.polyfit(np.arange(len(ts_data)), ts_data.values, 1)[0] | |
| except: | |
| pass | |
| results.append({ | |
| 'Category': cat, | |
| 'Avg/Month': round(avg_per_month, 2), | |
| 'Monthly Consistency (%)': round(coverage * 100, 1), | |
| 'Trend Slope': round(slope, 3), | |
| 'Trend': ascii_sparkline_pln(ts_data.values.tolist()) | |
| }) | |
| df_res = pd.DataFrame(results) | |
| # ✅ FILTER: Coverage = 100% AND Trend Slope > 0 | |
| if not df_res.empty: | |
| df_res = df_res | |
| # [ | |
| # (df_res['Coverage (%)'] == 100.0) & | |
| # (df_res['Trend Slope'] > 0) | |
| # ].copy() | |
| df_res['Status'] = df_res['Trend Slope'].apply( | |
| lambda s: "<span class='trend-rising'>High-Risk Rising</span>" if s > 0.2 else | |
| "<span class='trend-stable'>Emerging Rising</span>" | |
| ) | |
| df_res = df_res.sort_values('Trend Slope', ascending=False) | |
| return df_res.reset_index(drop=True).head(10) if not df_res.empty else df_res | |
| # ——————— RUN ——————— | |
| df_creator = predict_creators(df_filtered) | |
| df_location = predict_locations(df_filtered) | |
| df_division = predict_divisions(df_filtered) | |
| df_category = predict_categories(df_filtered) | |
| # 🎯 PANEL 1: Creators (FILTERED: Coverage < 90% & Slope < 0) | |
| st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True) | |
| st.markdown("<div class='predictive-header'>1. Which Reporters Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True) | |
| if not df_creator.empty: | |
| cols = ['Creator', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend'] | |
| # 🔥 Rename hanya untuk DISPLAY, bukan data asli | |
| df_display = df_creator[cols].rename(columns={ | |
| "Reason": "Reason Forecast" | |
| }) | |
| html = df_display.to_html(escape=False, index=False, table_id="tbl-creators") | |
| st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True) | |
| # st.markdown( | |
| # "<div class='predictive-note'>" | |
| # "<strong>Criteria:</strong> Coverage < 90% AND negative slope. " | |
| # "High-risk: steep negative slope + low baseline activity." | |
| # "</div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| # else: | |
| # st.markdown( | |
| # "<div class='predictive-table-wrapper'>" | |
| # "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>" | |
| # "No creators meet criteria: Coverage < 90% and negative trend." | |
| # "</p>" | |
| # "<div class='warning-box'>" | |
| # "💡 Note: Creators with Coverage ≥ 90% are excluded — they are considered stable reporters." | |
| # "</div>" | |
| # "</div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # 🎯 PANEL 2: Locations (FILTERED: Coverage < 90% & Slope < 0) | |
| st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True) | |
| st.markdown("<div class='predictive-header'>2. Which Locations Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True) | |
| if not df_location.empty: | |
| cols = ['Location', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend'] | |
| # # 🔥 Rename hanya untuk DISPLAY, bukan data asli | |
| df_display = df_location[cols].rename(columns={ | |
| "Reason": "Reason Forecast" | |
| }) | |
| html = df_display.to_html(escape=False, index=False, table_id="tbl-locations") | |
| st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True) | |
| # st.markdown( | |
| # "<div class='predictive-note'>" | |
| # "<strong>Criteria:</strong> Coverage < 90% AND negative slope. " | |
| # "High-risk: steep negative slope + low baseline activity." | |
| # "</div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| # else: | |
| # st.markdown( | |
| # "<div class='predictive-table-wrapper'>" | |
| # "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>" | |
| # "No locations meet criteria: Coverage < 90% and negative trend." | |
| # "</p>" | |
| # "<div class='warning-box'>" | |
| # "💡 Note: Locations with Coverage ≥ 90% are excluded — they are considered stable reporting zones." | |
| # "</div>" | |
| # "</div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # 🎯 PANEL 3: Divisions (FILTERED: Coverage < 90% & Slope < 0) | |
| st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True) | |
| st.markdown("<div class='predictive-header'>3. Which Divisions Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True) | |
| if not df_division.empty: | |
| cols = ['Division', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend'] | |
| # # 🔥 Rename hanya untuk DISPLAY, bukan data asli | |
| df_display = df_division[cols].rename(columns={ | |
| "Reason": "Reason Forecast" | |
| }) | |
| html = df_display.to_html(escape=False, index=False, table_id="tbl-divisions") | |
| st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True) | |
| # st.markdown( | |
| # "<div class='predictive-note'>" | |
| # "<strong>Criteria:</strong> Coverage < 90% AND negative slope. " | |
| # "High-risk: steep negative slope + low baseline activity." | |
| # "</div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| # else: | |
| # st.markdown( | |
| # "<div class='predictive-table-wrapper'>" | |
| # "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>" | |
| # "No divisions meet criteria: Coverage < 90% and negative trend." | |
| # "</p>" | |
| # "<div class='warning-box'>" | |
| # "💡 Note: Divisions with Coverage ≥ 90% are excluded — they are considered stable reporting zones." | |
| # "</div>" | |
| # "</div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # 🎯 PANEL 4: Categories (FILTERED: Non-Positive + Coverage=100% & Rising) | |
| # st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True) | |
| st.markdown( | |
| "<div class='predictive-header'>" | |
| "4. Which Issue Categories Are Likely to Appear in the Next 3 Months (Increasing Trend Only)" | |
| "<span style='font-size:0.75em; font-weight:400; color:#003DA5;'>" | |
| " (* Categorization uses NLP — Natural Language Processing from random text)" | |
| "</span>" | |
| "</div>", | |
| unsafe_allow_html=True | |
| ) | |
| # if not df_category.empty: | |
| # cols = ['Category', 'Avg/Month', 'Coverage (%)', 'Trend Slope', 'Status', 'Trend'] | |
| # # 🔵 Rename ONLY for display | |
| # df_display = df_category[cols].rename(columns={ | |
| # "Status": "Status Issue for Next Month" | |
| # }) | |
| # html = df_display.to_html(escape=False, index=False, table_id="tbl-categories") | |
| # st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True) | |
| # st.markdown( | |
| # "<div class='predictive-note'>" | |
| # "<strong>Filtered:</strong> Reported every month (100% coverage) with increasing trend. " | |
| # "<strong>Avg/Month</strong> = total ÷ months. " | |
| # "<span class='trend-rising'>High-Risk Rising</span> = slope > 0.2." | |
| # "</div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| # else: | |
| # st.markdown( | |
| # "<div class='predictive-table-wrapper'>" | |
| # "<p style='text-align:center; color:#c62828; padding:24px; font-weight:500;'>" | |
| # "⚠️ No rising categories with 100% monthly coverage." | |
| # "</p>" | |
| # "<p style='text-align:center; color:#666; font-size:0.9em;'>" | |
| # "Consider relaxing coverage filter if data is sparse." | |
| # "</p></div>", | |
| # unsafe_allow_html=True | |
| # ) | |
| # st.markdown("</div>", unsafe_allow_html=True) | |
| # =================== WHITEBOARD STYLE CHART FOR PANEL 4 =================== | |
| # st.markdown("<h4 style='text-align: center; color: #2c3e50;'>Whiteboard Insight: Trend vs Frequency</h4>", unsafe_allow_html=True) | |
| # Buat chart scatter dengan gaya whiteboard | |
| if not df_category.empty: | |
| # Ambil data untuk scatter | |
| df_plot = df_category.copy() | |
| df_plot['Size'] = df_plot['Avg/Month'] # Ukuran lingkaran = frekuensi (Avg/Month) | |
| df_plot['Y'] = df_plot['Trend Slope'] # Y = Trend Slope | |
| # Buat scatter plot | |
| fig = go.Figure() | |
| # Tambahkan scatter | |
| fig.add_trace(go.Scatter( | |
| x=df_plot['Category'], | |
| y=df_plot['Y'], | |
| mode='markers', | |
| marker=dict( | |
| size=df_plot['Size'] * 1.5, # Skala ukuran agar tidak terlalu besar | |
| color='#003DA5', | |
| line=dict(width=2, color='white'), | |
| opacity=0.8 | |
| ), | |
| text=df_plot['Category'], | |
| textposition="top center", | |
| textfont=dict(size=10, color="#003DA5", family="Arial"), | |
| hovertemplate="<b>%{text}</b><br>Trend Slope: %{y:.3f}<br>Avg/Month: %{marker.size:.1f}<extra></extra>", | |
| )) | |
| # Layout | |
| fig.update_layout( | |
| # title=dict(text="<b>Issue Category Trend vs Frequency (Non-Positive)</b>", x=0.5, y=0.95), | |
| xaxis=dict( | |
| title="Category", | |
| tickangle=45, | |
| showgrid=True, | |
| gridcolor="#e0e0e0", | |
| gridwidth=1, | |
| tickfont=dict(size=10, color="#003DA5") | |
| ), | |
| yaxis=dict( | |
| title="Trend Slope", | |
| showgrid=True, | |
| gridcolor="#e0e0e0", | |
| gridwidth=1, | |
| tickfont=dict(size=10, color="#003DA5") | |
| ), | |
| plot_bgcolor="white", | |
| paper_bgcolor="white", | |
| height=500, | |
| margin=dict(t=60, b=60, l=60, r=60), | |
| font=dict(family="Arial", size=12, color="#003DA5"), | |
| showlegend=False | |
| ) | |
| # Tambahkan teks manual seperti whiteboard | |
| fig.add_annotation( | |
| xref="paper", yref="paper", | |
| x=0.05, y=0.95, | |
| showarrow=False, | |
| font=dict(size=12, color="#003DA5"), | |
| textangle=-90, | |
| align="center", | |
| visible=False | |
| ) | |
| fig.add_annotation( | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.05, | |
| # text="", | |
| showarrow=False, | |
| font=dict(size=12, color="#003DA5"), | |
| align="center", | |
| visible=False | |
| ) | |
| fig.add_annotation( | |
| xref="paper", yref="paper", | |
| x=0.95, y=0.95, | |
| text="<b>Number of Findings</b><br>• 10<br>•• 20<br>••• 30<br>•••• 40", | |
| showarrow=False, | |
| font=dict(size=12, color="#003DA5"), | |
| align="left" | |
| ) | |
| fig.add_annotation( | |
| xref="paper", yref="paper", | |
| x=0.05, y=0.1, | |
| text="<b>Semakin tinggi = semakin sering ditemukan deviasi</b><br><b>Semakin besar = semakin banyak ditemukan deviasi</b><br><b></b>", | |
| showarrow=False, | |
| font=dict(size=12, color="#003DA5"), | |
| align="left" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Insight | |
| # st.markdown("### 💡 Insight") | |
| # insight_text = ( | |
| # f"<div class='ai-insight'>" | |
| # f"The scatter plot visualizes issue categories with 100% coverage and positive trend. " | |
| # f"Categories higher on the Y-axis indicate stronger upward trends, while larger circles indicate higher average monthly frequency. " | |
| # f"This helps identify which non-positive issues are both increasing in trend and frequently reported — these should be prioritized for intervention." | |
| # f"</div>" | |
| # ) | |
| # st.markdown(insight_text, unsafe_allow_html=True) | |
| else: | |
| st.info("No data available for non-positive issue categories with 100% coverage and positive trend.") | |
| import streamlit as st | |
| import pandas as pd | |
| from huggingface_hub import InferenceClient | |
| # ========================== | |
| # LLM FUNCTION (HuggingFace) | |
| # ========================== | |
| def llm_generate_recommendation(insights_text): | |
| client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct") | |
| prompt = f""" | |
| You are an expert Safety & Reliability Agentic AI. | |
| Based on the following structured INSIGHT SUMMARY, create: | |
| 1. Recommended Action (max 2 sentences) | |
| 2. Risk Mitigation Strategy (max 2 sentences) | |
| The insights: | |
| {insights_text} | |
| Now generate concise, high-impact: | |
| - "recommendation" | |
| - "mitigation" | |
| Return output in EXACT JSON format: | |
| {{ | |
| "recommendation": "...", | |
| "mitigation": "..." | |
| }} | |
| """ | |
| output = client.text_generation(prompt, max_new_tokens=500, temperature=0.4) | |
| return output | |
| # ============================================== | |
| # === INSIGHT COMPUTATION FUNCTION (your code) | |
| # ============================================== | |
| def extract_agentic_insights_v5(df: pd.DataFrame): | |
| dev = { | |
| "lowest_ratio_9_locs": [], | |
| "obj3a_lowest_div": None, | |
| "obj3b_slowest_executor": None, | |
| "obj3c_lowest_reporter": None, | |
| "obj3d_slowest_div": None, | |
| "obj4_unsafe_condition_pct": 0.0, | |
| "obj4_unsafe_action_pct": 0.0, | |
| "obj4_near_miss_pct": 0.0, | |
| "obj5_q1_divs": [], | |
| "obj5_q2_divs": [], | |
| "obj6_top2_categories": [], | |
| } | |
| # === 1. 9 locations with lowest finding-to-reporter ratio === | |
| if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns): | |
| calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy() | |
| calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce') | |
| calc = calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid']) | |
| calc['bulan'] = calc['created_at'].dt.to_period('M') | |
| monthly = calc.groupby(['nama_lokasi_full', 'bulan']).agg( | |
| findings=('kode_temuan', 'size'), | |
| reporters=('creator_nid', 'nunique') | |
| ).reset_index() | |
| monthly = monthly[monthly['reporters'] > 0] | |
| monthly['ratio'] = monthly['findings'] / monthly['reporters'] | |
| loc_avg = monthly.groupby('nama_lokasi_full')['ratio'].mean() | |
| lowest_9 = loc_avg.nsmallest(9) | |
| dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()] | |
| # === 2a Lowest-ratio division === | |
| if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns): | |
| calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy() | |
| calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M') | |
| agg = calc.groupby(['nama', 'bulan']).agg( | |
| findings=('kode_temuan', 'size'), | |
| reporters=('creator_nid', 'nunique') | |
| ) | |
| agg = agg[agg['reporters'] > 0].reset_index() | |
| agg['ratio'] = agg['findings'] / agg['reporters'] | |
| div_ratio = agg.groupby('nama')['ratio'].mean() | |
| if not div_ratio.empty: | |
| dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2)) | |
| # === 2b Slowest executor | |
| if 'days_to_close' in df.columns: | |
| valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)] | |
| exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name' | |
| if exec_col in valid.columns: | |
| lead = valid.groupby(exec_col)['days_to_close'].mean() | |
| if not lead.empty: | |
| dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1)) | |
| # === 2c Lowest reporter freq | |
| if {'creator_name', 'created_at'}.issubset(df.columns): | |
| calc = df[['creator_name', 'created_at']].copy() | |
| calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M') | |
| monthly = calc.groupby(['creator_name', 'bulan']).size().reset_index(name='count') | |
| avg = monthly.groupby('creator_name')['count'].mean() | |
| avg = avg[avg > 0] | |
| if not avg.empty: | |
| dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2)) | |
| # === 2d Slowest division resolution | |
| if 'days_to_close' in df.columns and 'nama' in df.columns: | |
| valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)] | |
| if not valid.empty: | |
| lead = valid.groupby('nama')['days_to_close'].mean() | |
| if not lead.empty: | |
| dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1)) | |
| # === 3. Non-Positive findings composition | |
| if 'temuan_kategori' in df.columns: | |
| cnt = df['temuan_kategori'].value_counts(normalize=True) * 100 | |
| dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1) | |
| dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1) | |
| dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1) | |
| # === 4. Quadrants | |
| X_LIMIT, Y_LIMIT = 20, 3 | |
| if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns): | |
| calc = df.copy() | |
| calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce') | |
| calc = calc.assign(month=calc['created_at'].dt.to_period('M').astype(str)) | |
| monthly_counts = calc.groupby(['nama', 'month'])['kode_temuan'].nunique().reset_index() | |
| avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count') | |
| leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time') | |
| mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0) | |
| for _, r in mat.iterrows(): | |
| if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT: | |
| dev["obj5_q1_divs"].append(r['nama']) | |
| elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT: | |
| dev["obj5_q2_divs"].append(r['nama']) | |
| # === 5. Top categories | |
| if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns): | |
| nonpos = df[df['temuan_kategori'] != 'Positive'] | |
| if not nonpos.empty: | |
| start = nonpos['created_at'].min().to_period('M') | |
| end = nonpos['created_at'].max().to_period('M') | |
| n_months = len(pd.period_range(start=start, end=end, freq='M')) | |
| cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2) | |
| dev["obj6_top2_categories"] = [(cat, round(val, 1)) for cat, val in cat_avg.items()] | |
| return dev | |
| # ========================== | |
| # ===== MAIN APP =========== | |
| # ========================== | |
| # app.py | |
| import streamlit as st | |
| import pandas as pd | |
| import json | |
| from typing import List, Dict | |
| # Transformers pipeline for lightweight local LLM (text2text) | |
| from transformers import pipeline | |
| import math | |
| # ===================================================================== | |
| # OBJECTIVE 7 — INSIGHT & RECOMMENDATION (LLM FIRST, RULE-BASED IF FAIL) | |
| # ===================================================================== | |
| st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Risk Mitigation: What are the Insights and Recommendations?</h3>", unsafe_allow_html=True) | |
| # ============================================================ | |
| # 1. LLM LOADER | |
| # ============================================================ | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| def load_llm_model(): | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "google/gemma-2-2b-it", | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| return tokenizer, model | |
| except: | |
| return None, None | |
| def llm_generate_recommendation(insight_summary_text): | |
| """ | |
| Try to generate JSON recommendation using LLM. | |
| If LLM fails → return None, and rule-based will be used. | |
| """ | |
| tok, mdl = load_llm_model() | |
| if tok is None or mdl is None: | |
| return None | |
| try: | |
| prompt = f""" | |
| You are an expert Industrial Safety Analyst AI. | |
| Below is an INSIGHT SUMMARY from a Safety Reporting System: | |
| --- | |
| {insight_summary_text} | |
| --- | |
| Generate 5 Recommended Actions and Risk Mitigation in clean JSON: | |
| {{ | |
| "recommendations": [ | |
| {{"point":"1","rec":"...","mit":"..."}}, | |
| {{"point":"2","rec":"...","mit":"..."}}, | |
| {{"point":"3","rec":"...","mit":"..."}} | |
| ] | |
| }} | |
| """ | |
| inputs = tok(prompt, return_tensors="pt").to(mdl.device) | |
| out = mdl.generate( | |
| **inputs, | |
| max_new_tokens=380, | |
| temperature=0.25, | |
| do_sample=True | |
| ) | |
| text = tok.decode(out[0], skip_special_tokens=True) | |
| import re, json | |
| json_match = re.search(r"\{[\s\S]*\}", text) | |
| if not json_match: | |
| return None | |
| return json.loads(json_match.group(0)) | |
| except: | |
| return None | |
| # ============================================================ | |
| # 2. RULE-BASED ENGINE (YOUR ORIGINAL SCRIPT) | |
| # ============================================================ | |
| def extract_agentic_insights_v5(df: pd.DataFrame): | |
| dev = { | |
| "lowest_ratio_9_locs": [], | |
| "obj3a_lowest_div": None, | |
| "obj3b_slowest_executor": None, | |
| "obj3c_lowest_reporter": None, | |
| "obj3d_slowest_div": None, | |
| "obj4_unsafe_condition_pct": 0.0, | |
| "obj4_unsafe_action_pct": 0.0, | |
| "obj4_near_miss_pct": 0.0, | |
| "obj5_q1_divs": [], | |
| "obj5_q2_divs": [], | |
| "obj6_top2_categories": [], | |
| } | |
| # === 1. 9 lowest locations === | |
| if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns): | |
| calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy() | |
| calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce') | |
| calc = calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid']) | |
| calc['bulan'] = calc['created_at'].dt.to_period('M') | |
| monthly = calc.groupby(['nama_lokasi_full', 'bulan']).agg( | |
| findings=('kode_temuan', 'size'), | |
| reporters=('creator_nid', 'nunique') | |
| ).reset_index() | |
| monthly = monthly[monthly['reporters'] > 0] | |
| monthly['ratio'] = monthly['findings'] / monthly['reporters'] | |
| loc_avg = monthly.groupby('nama_lokasi_full')['ratio'].mean() | |
| lowest_9 = loc_avg.nsmallest(9) | |
| dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()] | |
| # === 2a: Lowest division ratio === | |
| if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns): | |
| calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy() | |
| calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M') | |
| agg = calc.groupby(['nama', 'bulan']).agg( | |
| findings=('kode_temuan', 'size'), | |
| reporters=('creator_nid', 'nunique') | |
| ) | |
| agg = agg[agg['reporters'] > 0].reset_index() | |
| agg['ratio'] = agg['findings'] / agg['reporters'] | |
| div_ratio = agg.groupby('nama')['ratio'].mean() | |
| if not div_ratio.empty: | |
| dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2)) | |
| # === 2b: Slowest executor === | |
| if 'days_to_close' in df.columns: | |
| valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)] | |
| exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name' | |
| if exec_col in valid.columns: | |
| lead = valid.groupby(exec_col)['days_to_close'].mean() | |
| if not lead.empty: | |
| dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1)) | |
| # === 2c: Lowest reporter === | |
| if {'creator_name', 'created_at'}.issubset(df.columns): | |
| calc = df[['creator_name', 'created_at']].copy() | |
| calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M') | |
| monthly = calc.groupby(['creator_name', 'bulan']).size().reset_index(name='count') | |
| avg = monthly.groupby('creator_name')['count'].mean() | |
| avg = avg[avg > 0] | |
| if not avg.empty: | |
| dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2)) | |
| # === 2d: Slowest division === | |
| if 'days_to_close' in df.columns and 'nama' in df.columns: | |
| valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)] | |
| if not valid.empty: | |
| lead = valid.groupby('nama')['days_to_close'].mean() | |
| if not lead.empty: | |
| dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1)) | |
| # === 3. Non-positive === | |
| if 'temuan_kategori' in df.columns: | |
| cnt = df['temuan_kategori'].value_counts(normalize=True) * 100 | |
| dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1) | |
| dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1) | |
| dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1) | |
| # === 4. Risk Quadrant === | |
| X_LIMIT, Y_LIMIT = 20, 3 | |
| if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns): | |
| calc = df.copy() | |
| calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce') | |
| calc = calc.assign(month=calc['created_at'].dt.to_period('M').astype(str)) | |
| monthly_counts = calc.groupby(['nama', 'month'])['kode_temuan'].nunique().reset_index() | |
| avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count') | |
| leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time') | |
| mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0) | |
| for _, r in mat.iterrows(): | |
| if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT: | |
| dev["obj5_q1_divs"].append(r['nama']) | |
| elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT: | |
| dev["obj5_q2_divs"].append(r['nama']) | |
| # === 5. Top categories === | |
| if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns): | |
| nonpos = df[df['temuan_kategori'] != 'Positive'] | |
| if not nonpos.empty: | |
| start = nonpos['created_at'].min().to_period('M') | |
| end = nonpos['created_at'].max().to_period('M') | |
| n_months = len(pd.period_range(start=start, end=end, freq='M')) | |
| cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2) | |
| dev["obj6_top2_categories"] = [(cat, round(v, 1)) for cat, v in cat_avg.items()] | |
| return dev | |
| # ============================================================ | |
| # 3. RUN INSIGHT ENGINE | |
| # ============================================================ | |
| dev = extract_agentic_insights_v5(df_filtered) | |
| # Build Insight Summary Text | |
| insight_lines = [] | |
| if dev["lowest_ratio_9_locs"]: | |
| loc_list = ", ".join([f"<strong>{loc}</strong> ({ratio})" for loc, ratio in dev["lowest_ratio_9_locs"]]) | |
| insight_lines.append(f"1. Nine locations with the lowest finding-to-reporter ratio: {loc_list}.") | |
| parts = [] | |
| if dev["obj3a_lowest_div"]: | |
| parts.append(f"division <strong>{dev['obj3a_lowest_div'][0]}</strong> (ratio: {dev['obj3a_lowest_div'][1]})") | |
| if dev["obj3c_lowest_reporter"]: | |
| parts.append(f"reporter <strong>{dev['obj3c_lowest_reporter'][0]}</strong> ({dev['obj3c_lowest_reporter'][1]} findings/month)") | |
| if dev["obj3d_slowest_div"]: | |
| parts.append(f"division <strong>{dev['obj3d_slowest_div'][0]}</strong> ({dev['obj3d_slowest_div'][1]} days)") | |
| if dev["obj3b_slowest_executor"]: | |
| parts.append(f"executor <strong>{dev['obj3b_slowest_executor'][0]}</strong> ({dev['obj3b_slowest_executor'][1]} days)") | |
| if parts: | |
| insight_lines.append("2. Uneven operational capacity: " + "; ".join(parts)) | |
| uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"] | |
| if uc + ua + nm > 0: | |
| insight_lines.append(f"3. Non-Positive composition: Unsafe Condition ({uc}%), Unsafe Action ({ua}%), Near Miss ({nm}%).") | |
| if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]: | |
| q1 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q1_divs"][:5]]) | |
| q2 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q2_divs"][:5]]) | |
| insight_lines.append(f"4. High-risk divisions (QI): {q1 or '—'}. Hidden-risk (QII): {q2 or '—'}.") | |
| if dev["obj6_top2_categories"]: | |
| c1, c2 = dev["obj6_top2_categories"] | |
| insight_lines.append(f"5. Top recurring non-Positive categories: <strong>{c1[0]}</strong> ({c1[1]}/mo) & <strong>{c2[0]}</strong> ({c2[1]}/mo).") | |
| insight_text = "<br><br>".join(insight_lines) | |
| # ============================================================ | |
| # 4. TRY LLM FIRST → ELSE FALLBACK RULE-BASED | |
| # ============================================================ | |
| llm_json = llm_generate_recommendation(insight_text) | |
| if llm_json is not None and "recommendations" in llm_json: | |
| recs = llm_json["recommendations"] | |
| else: | |
| # FALLBACK RULE-BASED | |
| recs = [] | |
| if dev["lowest_ratio_9_locs"]: | |
| recs.append({"point":"1","rec":"Launch spot-inspection sprint at across the 9 lowest-ratio locations.","mit":"Enable 3-min QR checklist + automated WhatsApp reminders."}) | |
| if parts: | |
| recs.append({"point":"2","rec":"Real-time monitoring of finding/reporter ratios and resolution lead times per division/individual.","mit":"Trigger coaching alerts to Area PICs & Division"}) | |
| if uc + ua + nm > 0: | |
| recs.append({"point":"3","rec":"Enforce photo-based validation for Unsafe Condition/Action/Near Miss submissions to ensure accurate categorization.","mit":"System blocks submission if photo evidence or category justification is missing."}) | |
| if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]: | |
| recs.append({"point":"4","rec":"Assign dedicated safety crews to Quadrant I divisions; enforce One Finding, One Day closure policy for Quadrant II.","mit":"Auto-generate executive escalation reports to VP Operations if any division remains in QI/QII for ≥2 consecutive months."}) | |
| if dev["obj6_top2_categories"]: | |
| c1, c2 = dev["obj6_top2_categories"] | |
| recs.append({"point":"5","rec":f"Create RCA task force for {c1[0]} & {c2[0]}.","mit":"Update tender specs with required mitigations."}) | |
| # ============================================================ | |
| # 5. RENDERING (NO CHANGES) | |
| # ============================================================ | |
| # Insight Summary Card | |
| st.markdown( | |
| f""" | |
| <div class="card" style="background-color:#f8f9fa;border-left:4px solid #003DA5;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);"> | |
| <h4 style="margin-top:0;color:#FF6B6B;">Summary</h4> | |
| <p style="margin-bottom:0;line-height:1.6;font-size:0.98em;">{insight_text}</p> | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Recommendation Table | |
| if recs: | |
| rows = "".join([ | |
| f"<tr><td style='text-align:center;font-weight:bold;width:5%;'>{r['point']}</td>" | |
| f"<td style='padding:8px;'>{r['rec']}</td>" | |
| f"<td style='padding:8px;'>{r['mit']}</td></tr>" | |
| for r in recs | |
| ]) | |
| st.markdown( | |
| f""" | |
| <div class="card" style="background-color:#e8f5e9;border-left:4px solid #4CAF50;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);"> | |
| <h4 style="margin-top:0;color:#2E7D32;">Recommended Actions & Agentic Risk Mitigation</h4> | |
| <table style="width:100%;border-collapse:collapse;font-size:0.95em;margin-top:12px;"> | |
| <thead> | |
| <tr style="background-color:#e8f5ee;"> | |
| <th style="padding:10px;text-align:center;border:1px solid #ccc;">#</th> | |
| <th style="padding:10px;text-align:left;border:1px solid #ccc;">Recommended Action</th> | |
| <th style="padding:10px;text-align:left;border:1px solid #ccc;">Risk Mitigation Strategy</th> | |
| </tr> | |
| </thead> | |
| <tbody>{rows}</tbody> | |
| </table> | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| else: | |
| st.info("No actionable insights generated.") | |