pln / app.py
SHELLAPANDIANGANHUNGING's picture
Update app.py
56deb9e verified
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from datetime import datetime, timedelta
from typing import List
import os
import sklearn
import kaleido
from transformers import pipeline
import transformers
print("Transformers version:", transformers.__version__)
from transformers import pipeline
print("✅ pipeline imported successfully")
# =================== PAGE CONFIG ===================
st.set_page_config(
page_title="Proactive Safety Intelligence & Analytics Dashboard",
page_icon="",
layout="wide",
initial_sidebar_state="expanded"
)
# =================== CUSTOM CSS (Updated for PLN Colors) ===================
st.markdown("""<style>
.main-header {
background-color: white;
padding: 25px;
border-radius: 12px;
margin-bottom: 25px;
box-shadow: 0 4px 12px rgba(0,0,0,0.06);
border: 1px solid #e0e0e0;
}
h1, h2, h3, h4, h5, .stMarkdown h1, .stMarkdown h2, .stMarkdown h3 {
text-align: center;
font-weight: 700;
color: #003DA5; /* Dark Blue - PLN Color */
}
.metric-card {
background: white;
padding: 16px;
border-radius: 10px;
box-shadow: 0 3px 10px rgba(0,0,0,0.05);
text-align: center;
border: 1px solid #f0f0f0;
}
.ai-insight {
background: #f0f4ff; /* Light Blue */
padding: 14px 18px;
border-left: 4px solid #003DA5; /* PLN Blue */
margin: 10px 0;
border-radius: 0 6px 6px 0;
font-size: 0.95em;
}
.ai-recommendation {
background: #e8f5e9;
padding: 14px 18px;
border-left: 4px solid #4caf50;
margin: 10px 0;
border-radius: 0 6px 6px 0;
font-size: 0.95em;
}
.risk-very-high { color: #c62828; font-weight: bold; }
.risk-high { color: #d32f2f; }
.risk-moderate { color: #f57c00; }
.risk-slight { color: #388e3c; }
.trend-worsening { color: #d32f2f; }
.trend-improvement { color: #388e3c; }
.trend-stable { color: #616161; }
.chart-container {
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 15px;
margin: 10px 0;
background-color: white;
box-shadow: 0 2px 6px rgba(0,0,0,0.03);
}
.section-title {
color: #003DA5; /* PLN Blue */
font-weight: 700;
font-size: 1.5em;
text-align: left;
margin-top: 20px;
margin-bottom: 10px;
}
.ai-section {
background: #ffffff;
padding: 20px;
border-radius: 8px;
margin: 10px 0;
box-shadow: 0 2px 6px rgba(0,0,0,0.03);
}
/* PLN Styled Selectbox and Multiselect */
.stSelectbox > label, .stMultiselect > label {
color: #003DA5; /* PLN Blue */
font-weight: bold;
}
.stSelectbox > div > div, .stMultiselect > div > div {
border: 2px solid #003DA5; /* PLN Blue Border */
border-radius: 8px;
}
.st-bq {
background-color: #f0f4ff; /* Light Blue Background */
}
.stButton > button {
background-color: #003DA5; /* PLN Blue Button */
color: white;
border: none;
border-radius: 8px;
padding: 8px 16px;
font-weight: bold;
}
.stButton > button:hover {
background-color: #0050A0; /* Darker PLN Blue on Hover */
}
/* Filter Container Styling */
.filter-container {
background-color: #f9f9f9;
border-radius: 10px;
padding: 15px;
margin-bottom: 15px;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
}
.filter-title {
color: #003DA5;
font-weight: bold;
font-size: 1.1em;
margin-bottom: 10px;
text-align: center;
}
</style>""", unsafe_allow_html=True)
# =================== DATA LOADING (FROM data.xlsx) ===================
@st.cache_data(ttl=300) # refresh every 5 min
def load_data():
file_path = "data.xlsx"
if not os.path.exists(file_path):
st.error(f"❌ File **`{file_path}`** not found. Please ensure it's in the same directory as this script.")
return pd.DataFrame()
try:
# Load Excel file
df = pd.read_excel(file_path, sheet_name='Sheet1', engine='openpyxl')
# Check for required columns
required_cols = ['created_at']
missing = [c for c in required_cols if c not in df.columns]
if missing:
st.error(f"❌ Missing required columns: {missing}. Available: {list(df.columns)}")
return pd.DataFrame()
# Parse datetime
df['created_at'] = pd.to_datetime(df['created_at'], errors='coerce')
if df['created_at'].isna().all():
st.error("❌ `created_at` column could not be parsed as datetime.")
return pd.DataFrame()
# Optional: close_at
if 'close_at' in df.columns:
df['close_at'] = pd.to_datetime(df['close_at'], errors='coerce')
df['days_to_close'] = (df['close_at'] - df['created_at']).dt.total_seconds() / (24 * 3600)
df['days_to_close'] = df['days_to_close'].apply(lambda x: x if x >= 0 else np.nan)
else:
df['days_to_close'] = np.nan
# Derived columns
df['created_month'] = df['created_at'].dt.to_period('M')
df['created_date'] = df['created_at'].dt.date
df['created_week'] = df['created_at'].dt.to_period('W')
# Keep only valid rows
df = df.dropna(subset=['created_at']).copy()
# Log shape
st.sidebar.success(f"Loaded {len(df):,} Audit Findings from `data.xlsx`")
return df
except Exception as e:
st.exception(f"Error loading data.xlsx: {e}")
return pd.DataFrame()
df = load_data()
if df.empty:
st.stop()
# =================== SIDEBAR FILTERS (Perbaikan) ===================
st.sidebar.markdown('<div class="filter-container">', unsafe_allow_html=True)
st.sidebar.markdown('<h4 class="filter-title">Filter Dashboard</h4>', unsafe_allow_html=True)
# Inisialisasi df_filtered
df_filtered = df.copy()
# Flag to track if filters were applied
filters_applied = False
# 1. Date Range Filter
min_date = df['created_at'].min().date()
max_date = df['created_at'].max().date()
date_range = st.sidebar.date_input(
"Date Range",
value=(min_date, max_date),
min_value=min_date,
max_value=max_date
)
# 2. Filter by Vendor (nama_perusahaan) - Default to All
if 'nama_perusahaan' in df.columns:
unique_vendors = sorted(df['nama_perusahaan'].dropna().astype(str).unique())
all_vendors_option = "All Vendors"
vendor_options = [all_vendors_option] + list(unique_vendors)
selected_vendor = st.sidebar.selectbox("Vendor", vendor_options, index=0) # Default to "All"
if selected_vendor != all_vendors_option:
df_filtered = df_filtered[df_filtered['nama_perusahaan'].astype(str) == selected_vendor]
filters_applied = True
# 3. Filter by Area/Unit Type (temuan_nama_distrik or creator_nama_distrik) - Renamed
area_col = None
if 'temuan_nama_distrik' in df_filtered.columns:
area_col = 'temuan_nama_distrik'
elif 'creator_nama_distrik' in df_filtered.columns:
area_col = 'creator_nama_distrik'
if area_col:
# Define mapping for display names
area_mapping = {
'UMRO': 'Unit Maintenance',
'UP GRESIK': 'Unit Pembangkit'
}
unique_areas_raw = sorted(df_filtered[area_col].dropna().astype(str).unique())
# Map raw values to display names, keep unmapped values as is
unique_areas_display = [area_mapping.get(area, area) for area in unique_areas_raw]
# Prepend "All" option
all_areas_option = "All Units"
area_options = [all_areas_option] + unique_areas_display
selected_area_display = st.sidebar.selectbox("Unit Type", area_options, index=0) # Default to "All"
if selected_area_display != all_areas_option:
# Reverse map the selected display name back to the raw value for filtering
selected_area_raw = next((raw for raw, disp in area_mapping.items() if disp == selected_area_display), selected_area_display)
df_filtered = df_filtered[df_filtered[area_col].astype(str) == selected_area_raw]
filters_applied = True
# 4. Status filter - Changed to selectbox (dropdown)
status_filter_applied = False
if 'temuan_status' in df_filtered.columns:
all_status = sorted(df_filtered['temuan_status'].dropna().astype(str).unique())
# Prepend "All" option
all_status_option = "All Status"
status_options = [all_status_option] + list(all_status)
selected_status = st.sidebar.selectbox(
"Status",
status_options,
index=0 # Default to "All"
)
if selected_status != all_status_option:
df_filtered = df_filtered[df_filtered['temuan_status'].astype(str) == selected_status]
status_filter_applied = True
filters_applied = True
# Apply date filter *after* other filters
if len(date_range) == 2:
df_filtered = df_filtered[
(df_filtered['created_at'].dt.date >= date_range[0]) &
(df_filtered['created_at'].dt.date <= date_range[1])
]
if date_range[0] != min_date or date_range[1] != max_date:
filters_applied = True
# Submit Button
submit_clicked = st.sidebar.button("Apply Filters")
# Apply filters logic when button is clicked
if submit_clicked:
# The filtering based on selections already happened above
# Here we just update the summary based on the current state of df_filtered
active_filters = []
if 'selected_vendor' in locals() and selected_vendor != all_vendors_option:
active_filters.append(f"Vendor: {selected_vendor}")
if 'selected_area_display' in locals() and selected_area_display != all_areas_option:
active_filters.append(f"Unit: {selected_area_display}")
if 'selected_status' in locals() and selected_status != all_status_option:
active_filters.append(f"Status: {selected_status}")
if len(date_range) == 2 and (date_range[0] != min_date or date_range[1] != max_date):
active_filters.append(f"Date: {date_range[0]} to {date_range[1]}")
if active_filters:
st.sidebar.success("**Active Filters:**")
for f in active_filters:
st.sidebar.markdown(f"- {f}")
st.sidebar.info(f"Showing {len(df_filtered)} records based on filters.")
else:
st.sidebar.info("No specific filters applied (showing all records).")
else:
# Show default message when not submitted yet
st.sidebar.info("Set filters and click 'Apply Filters'.")
st.sidebar.markdown('</div>', unsafe_allow_html=True)
# =================== HEADER ===================
import streamlit as st
import os
import streamlit as st
import os
# ====================== CSS UNTUK HEADER ===========================
# CSS biar header terlihat seperti 1 kotak besar
st.markdown("""
<style>
.header-box {
padding: 15px 25px;
background-color: white;
border: 1px solid #e0e0e0;
border-radius: 12px;
margin-bottom: 25px;
display: flex;
align-items: center;
justify-content: space-between;
}
.header-title {
text-align: center;
flex: 1;
}
.logo-container {
display: flex;
align-items: center;
justify-content: flex-end;
width: 150px; /* 🔥 Diperbesar dari 100px */
height: auto;
}
</style>
""", unsafe_allow_html=True)
# ==== HEADER LAYOUT (Columns) ====
with st.container():
st.markdown('<div class="header-box">', unsafe_allow_html=True)
col1, col2, col3 = st.columns([1, 5, 1]) # tengah lebih besar
# LEFT (kosong)
with col1:
st.write("")
# CENTER — TITLE
with col2:
st.markdown("""
<div class="header-title">
<h1 style="margin-bottom:6px;">Proactive Safety Intelligence & Analytics Dashboard</h1>
<p style="color:#546e7a; font-size:1.05em; margin-top:-8px;">
</p>
</div>
""", unsafe_allow_html=True)
# RIGHT — LOGO
with col3:
try:
st.image("pln.png", width=120) # 🔥 Diperbesar dari 80px
except:
st.write("") # Jika logo tidak ditemukan, kosongkan
st.markdown('</div>', unsafe_allow_html=True)
# =================== OBJECTIVE 1 - Company Reporting Activity (Polar Bar Chart) ===================
st.markdown(
"""
<style>
.section-title {
text-align: center;
font-size: 1.5rem;
font-weight: 600;
color: #2c3e50;
margin-bottom: 1.2rem;
}
.ai-insight {
background-color: #f8f9fa;
padding: 12px;
border-left: 4px solid #27ae60;
border-radius: 0 4px 4px 0;
font-size: 0.95rem;
line-height: 1.5;
margin-top: 1rem;
}
</style>
<h3 class='section-title'>OBJECTIVE 1 — Company Reporting Activity: Who Reports the Most?</h3>
""",
unsafe_allow_html=True
)
df_local = df_filtered.copy()
df_local['created_month'] = df_local['created_at'].dt.to_period('M')
if 'temuan_kode_distrik' in df_local.columns:
df_local['Area_Type'] = df_local['temuan_kode_distrik'].apply(
lambda x: 'PG' if 'PG' in str(x).upper()
else 'UM' if 'UM' in str(x).upper()
else 'Other'
)
df_pg = df_local[df_local['Area_Type'] == 'PG'].copy()
df_um = df_local[df_local['Area_Type'] == 'UM'].copy()
# --- Hitung rasio per perusahaan ---
def calculate_avg_ratio_per_company(df_area):
if df_area.empty:
return pd.DataFrame()
findings = df_area.groupby(['created_month', 'nama_perusahaan']).size().reset_index(name='findings_count')
creators = df_area.groupby(['created_month', 'nama_perusahaan'])['creator_nid'].nunique().reset_index(name='unique_creators')
merged = findings.merge(creators, on=['created_month', 'nama_perusahaan'], how='outer')
merged = merged.fillna({'findings_count': 0, 'unique_creators': 0})
merged = merged[merged['unique_creators'] > 0]
merged['ratio'] = merged['findings_count'] / merged['unique_creators']
merged['ratio'] = merged['ratio'].replace([np.inf, -np.inf], np.nan)
avg_ratio = merged.groupby('nama_perusahaan')['ratio'].mean().reset_index(name='avg_monthly_ratio')
return avg_ratio.dropna(subset=['avg_monthly_ratio'])
avg_ratio_pg = calculate_avg_ratio_per_company(df_pg)
avg_ratio_um = calculate_avg_ratio_per_company(df_um)
# 🔑 PALET & COLOR MAPPING GLOBAL — KONSISTEN ANTAR CHART
PLN_COLOR = "#FFD700"
PASTEL_BLUES = [
"#A8DADC", "#E2ECE9", "#CCE4E7", "#B5D9D9",
"#98C8D1", "#7FB9C1", "#6BA9B3", "#5A9CB5", "#4A8FA7", "#3A8399"
] # diperpanjang sedikit untuk antisipasi banyak kontraktor
# 🔥 Dapatkan daftar *semua* perusahaan non-PLN unik (dari seluruh data), urut alfabetis → assign warna deterministik
all_companies = pd.concat([avg_ratio_pg, avg_ratio_um])['nama_perusahaan'].dropna().unique()
non_pln_companies = sorted([c for c in all_companies if 'PLN' not in str(c).upper()])
# Mapping: company → color (PLN khusus, lainnya dari palet biru berurut)
COMPANY_COLOR_MAP = {}
for i, company in enumerate(non_pln_companies):
COMPANY_COLOR_MAP[company] = PASTEL_BLUES[i % len(PASTEL_BLUES)]
# Pastikan PLN juga masuk mapping (jika muncul)
for company in all_companies:
if 'PLN' in str(company).upper():
COMPANY_COLOR_MAP[company] = PLN_COLOR
# 🔥 Fungsi helper: dapatkan warna berdasarkan nama perusahaan (konsisten!)
def get_color(company_name):
return COMPANY_COLOR_MAP.get(company_name, "#CCCCCC") # fallback abu-abu
# 🔥 Fungsi chart — pakai mapping global
def create_polar_bar_chart(df, area_name):
if df.empty:
return None
# Urutkan untuk stabilitas visual (misal: ascending ratio)
df = df.sort_values('avg_monthly_ratio', ascending=True).reset_index(drop=True)
companies = df['nama_perusahaan'].tolist()
ratios = df['avg_monthly_ratio'].tolist()
colors = [get_color(comp) for comp in companies]
# Hitung proporsi temuan (untuk lebar bar)
total_findings = df_local[df_local['Area_Type'] == area_name].groupby('nama_perusahaan').size()
angles = [total_findings.get(comp, 0) / total_findings.sum() * 360 if total_findings.sum() > 0 else 0 for comp in companies]
# Hitung posisi tengah
mid_angles = []
current = 0
for a in angles:
mid_angles.append(current + a / 2)
current += a
fig = go.Figure()
# Satu trace saja — lebih clean, legend bisa diatur manual jika perlu
fig.add_trace(go.Barpolar(
r=ratios,
theta=mid_angles,
width=angles,
marker_color=colors,
marker_line_color="white",
marker_line_width=1.2,
opacity=0.9,
hovertemplate="<b>%{text}</b><br>Avg Ratio: %{r:.2f}<extra></extra>",
text=companies,
showlegend=False # Kita akan buat legend manual yang rapi
))
# 🔥 LEGEND MANUAL — hanya tampilkan setiap perusahaan sekali, dengan warna konsisten
# Tambahkan satu scatter "dummy" per perusahaan untuk legend
for company in sorted(set(companies)):
color = get_color(company)
fig.add_trace(go.Scatterpolar(
r=[None], theta=[None], # invisible
mode='markers',
marker=dict(color=color, size=10),
name=company,
showlegend=True
))
fig.update_layout(
title=f'{area_name} Area',
polar=dict(
radialaxis=dict(
visible=True,
tickfont=dict(size=9, color="gray"),
gridcolor='lightgray',
title=dict(text='Avg Finding/Person', font=dict(size=10, color="gray"))
),
angularaxis=dict(
visible=True,
direction='clockwise',
tickfont=dict(size=9, color="white"),
showline=False,
gridcolor="lightgray"
),
),
showlegend=True,
legend=dict(
orientation="v",
yanchor="top",
y=1,
xanchor="right",
x=1.02,
font=dict(size=10)
),
height=450,
margin=dict(t=40, b=20, l=20, r=40)
)
return fig
# Plot
col1, col2 = st.columns(2)
with col1:
st.markdown("<h5>Unit Pembangkit: Monthly Finding by Company</h5>", unsafe_allow_html=True)
fig_pg = create_polar_bar_chart(avg_ratio_pg, 'PG')
if fig_pg:
st.plotly_chart(fig_pg, use_container_width=True)
if not avg_ratio_pg.empty:
top = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmax()]
low = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmin()]
st.markdown("### Insight")
st.markdown(
f"<div class='ai-insight'>"
f"In PG Area, <strong>{top['nama_perusahaan']}</strong> has the highest ratio (<strong>{top['avg_monthly_ratio']:.2f}</strong>), "
f"while <strong>{low['nama_perusahaan']}</strong> has the lowest (<strong>{low['avg_monthly_ratio']:.2f}</strong>). "
f"Consider cross-learning between them to standardize reporting culture."
f"</div>",
unsafe_allow_html=True
)
else:
st.warning("No data for PG area.")
with col2:
st.markdown("<h5>Unit Maintenance: Monthly Finding by Company</h5>", unsafe_allow_html=True)
fig_um = create_polar_bar_chart(avg_ratio_um, 'UM')
if fig_um:
st.plotly_chart(fig_um, use_container_width=True)
if not avg_ratio_um.empty:
top = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmax()]
low = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmin()]
st.markdown("### Insight")
st.markdown(
f"<div class='ai-insight'>"
f"Across all companies, the finding-per-person ratio is similar in the UM Area "
f"</div>",
unsafe_allow_html=True
)
else:
st.warning("No data for UM area.")
else:
st.error("Column 'temuan_kode_distrik' not found.")
st.stop()
# =================== OBJECTIVE 2 — Active vs Inactive Locations (Treemap with Color Gradient) ===================
st.markdown(
"""
<style>
.section-title {
text-align: center;
font-size: 1.5rem;
font-weight: 600;
color: #2c3e50;
margin-bottom: 1.2rem;
}
.ai-insight {
background-color: #f8f9fa;
padding: 12px;
border-left: 4px solid #27ae60;
border-radius: 0 4px 4px 0;
font-size: 0.95rem;
line-height: 1.5;
margin-top: 1rem;
}
</style>
<h3 class='section-title'>OBJECTIVE 2 — Active vs Inactive Locations: Who Leads?</h3>
""",
unsafe_allow_html=True
)
df_local = df_filtered.copy()
if df_local.empty:
st.warning("No data available after filtering.")
st.stop()
df_local['created_month'] = df_local['created_at'].dt.to_period('M')
# Hitung temuan per bulan per lokasi
findings_by_location_month = df_local.groupby(['created_month', 'nama_lokasi_full']).size().reset_index(name='findings_count')
# Hitung jumlah orang unik per bulan per lokasi
creators_by_location_month = df_local.groupby(['created_month', 'nama_lokasi_full'])['creator_nid'].nunique().reset_index(name='unique_creators')
# Gabung
merged_loc = findings_by_location_month.merge(creators_by_location_month, on=['created_month', 'nama_lokasi_full'], how='outer')
# Isi NaN dengan 0
merged_loc = merged_loc.fillna({'findings_count': 0, 'unique_creators': 0})
# Hindari pembagian dengan nol
merged_loc = merged_loc[merged_loc['unique_creators'] > 0]
# Hitung rasio
merged_loc['ratio'] = merged_loc['findings_count'] / merged_loc['unique_creators']
merged_loc['ratio'] = merged_loc['ratio'].replace([np.inf, -np.inf], np.nan)
# Rata-rata bulanan per lokasi
avg_ratio_per_location = merged_loc.groupby('nama_lokasi_full')['ratio'].mean().reset_index(name='avg_monthly_ratio')
avg_ratio_per_location = avg_ratio_per_location.dropna(subset=['avg_monthly_ratio'])
if avg_ratio_per_location.empty:
st.write("Data kosong.")
else:
# Treemap — hanya dibuat & ditampilkan sekali
fig_treemap = px.treemap(
avg_ratio_per_location,
path=['nama_lokasi_full'],
values='avg_monthly_ratio',
title='Avg Monthly Finding/Person Ratio by Location',
labels={
'avg_monthly_ratio': 'Avg Monthly Finding/Person Ratio',
'nama_lokasi_full': 'Location'
},
color='avg_monthly_ratio',
color_continuous_scale=[
[0.0, '#D32F2F'], # Red (low activity)
[0.5, '#FFB300'], # Amber/orange-yellow (medium)
[1.0, '#4CAF50'] # Green (high activity)
],
hover_data={'avg_monthly_ratio': ':.2f'} # pastikan format hover sudah 2 desimal
)
# Perbaiki tata letak & hover — lakukan *sebelum* menampilkan
fig_treemap.update_traces(
hovertemplate="<b>%{label}</b><br>Avg Ratio: %{value:.2f}<extra></extra>"
)
fig_treemap.update_layout(
title=dict(text='Avg Monthly Finding/Person Ratio by Location', x=0.5, xanchor='center'),
height=600,
margin=dict(t=50, l=25, r=25, b=25)
)
# Tampilkan hanya sekali!
st.plotly_chart(fig_treemap, use_container_width=True)
# === AI Insight ===
top_location = avg_ratio_per_location.loc[avg_ratio_per_location['avg_monthly_ratio'].idxmax()]
low_location = avg_ratio_per_location.loc[avg_ratio_per_location['avg_monthly_ratio'].idxmin()]
st.markdown("### Insight")
insight_text = (
f"<div class='ai-insight'>"
f"<strong>{top_location['nama_lokasi_full']}</strong> shows the highest activity level "
f"(<strong>{top_location['avg_monthly_ratio']:.2f}</strong> findings per person/month). "
f"In contrast, <strong>{low_location['nama_lokasi_full']}</strong> has the lowest activity level "
f"(<strong>{low_location['avg_monthly_ratio']:.2f}</strong>).<br><br>"
f"🟢 <strong>High-ratio (green)</strong> locations indicate either high reporting diligence or high exposure to risks — both warrant deeper root-cause analysis.<br>"
f"🟡 <strong>Medium-ratio (yellow)</strong> areas need monitoring to ensure trends do not deteriorate.<br>"
f"🔴 <strong>Low-ratio (red)</strong> locations may reflect under-reporting, lack of engagement, or low hazard visibility — recommend follow-up audits to verify data completeness."
f"</div>"
)
st.markdown(insight_text, unsafe_allow_html=True)
# =================== OBJECTIVE 3 - Frequency & Response Time ===================
st.markdown(
"""
<style>
.section-title {
text-align: center;
font-size: 1.5rem;
font-weight: 600;
color: #2c3e50;
margin-bottom: 1.2rem;
}
.ai-insight {
background-color: #f8f9fa;
padding: 12px;
border-left: 4px solid #27ae60;
border-radius: 0 4px 4px 0;
font-size: 0.95rem;
line-height: 1.5;
margin-top: 1rem;
}
</style>
<h3 class='section-title'>OBJECTIVE 3 — Frequency & Response Time: Who Reports Well? Who Executes Well?</h3>
""",
unsafe_allow_html=True
)
df_local = df_filtered.copy()
if df_local.empty:
st.warning("No data available after filtering.")
st.stop()
df_local['created_month'] = df_local['created_at'].dt.to_period('M')
# ─── Helper: Hitung rasio per division (reporter) ─────────────────────────────
def compute_reporter_ratio_by_nama(df):
if 'nama' not in df.columns:
return pd.DataFrame()
findings_by_nama_month = df.groupby(['created_month', 'nama']).size().reset_index(name='findings_count')
creators_by_nama_month = df.groupby(['created_month', 'nama'])['creator_nid'].nunique().reset_index(name='unique_creators')
merged_rep = findings_by_nama_month.merge(creators_by_nama_month, on=['created_month', 'nama'], how='outer')
merged_rep = merged_rep.fillna({'findings_count': 0, 'unique_creators': 0})
merged_rep = merged_rep[merged_rep['unique_creators'] > 0]
merged_rep['ratio'] = merged_rep['findings_count'] / merged_rep['unique_creators']
merged_rep['ratio'] = merged_rep['ratio'].replace([np.inf, -np.inf], np.nan)
avg_ratio_per_nama = merged_rep.groupby('nama')['ratio'].mean().reset_index(name='avg_monthly_ratio')
avg_ratio_per_nama = avg_ratio_per_nama.dropna(subset=['avg_monthly_ratio'])
return avg_ratio_per_nama
# ─── Helper: Hitung rata-rata temuan per reporter (individu) ─────────────────
def compute_reporter_rate_by_creator(df):
if 'creator_name' not in df.columns:
return pd.DataFrame()
findings_by_creator_month = df.groupby(['created_month', 'creator_name']).size().reset_index(name='findings_count')
active_months_by_creator = findings_by_creator_month.groupby('creator_name')['created_month'].nunique().reset_index(name='active_months')
total_findings_by_creator = findings_by_creator_month.groupby('creator_name')['findings_count'].sum().reset_index()
merged_rep_creator = total_findings_by_creator.merge(active_months_by_creator, on='creator_name', how='outer')
merged_rep_creator = merged_rep_creator.fillna({'findings_count': 0, 'active_months': 0})
merged_rep_creator = merged_rep_creator[merged_rep_creator['active_months'] > 0]
merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['findings_count'] / merged_rep_creator['active_months']
merged_rep_creator['avg_monthly_rate'] = merged_rep_creator['avg_monthly_rate'].replace([np.inf, -np.inf], np.nan)
avg_rate_per_creator = merged_rep_creator.dropna(subset=['avg_monthly_rate'])
return avg_rate_per_creator
# ─── Helper: Hitung lead time per division (executor) ───────────────────────
def compute_executor_leadtime_by_nama(df):
if 'nama' not in df.columns or 'days_to_close' not in df.columns:
return pd.DataFrame()
# Filter hanya data dengan lead time valid
df_valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
leadtime_by_nama_month = df_valid.groupby(['created_month', 'nama'])['days_to_close'].mean().reset_index(name='avg_leadtime')
avg_leadtime_nama = leadtime_by_nama_month.groupby('nama')['avg_leadtime'].mean().reset_index(name='avg_monthly_leadtime')
avg_leadtime_nama = avg_leadtime_nama.dropna(subset=['avg_monthly_leadtime'])
return avg_leadtime_nama
# ─── Helper: Hitung lead time per individu executor (deteksi kolom otomatis) ─
def compute_executor_leadtime_by_individual(df, name_col='creator_name'):
if name_col not in df.columns or 'days_to_close' not in df.columns:
return pd.DataFrame()
df_valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
leadtime_by_indiv_month = df_valid.groupby(['created_month', name_col])['days_to_close'].mean().reset_index(name='avg_leadtime')
avg_leadtime_indiv = leadtime_by_indiv_month.groupby(name_col)['avg_leadtime'].mean().reset_index(name='avg_monthly_leadtime')
avg_leadtime_indiv = avg_leadtime_indiv.dropna(subset=['avg_monthly_leadtime'])
return avg_leadtime_indiv
# ─── Deteksi kolom executor individu ────────────────────────────────────────
EXECUTOR_INDIV_COL = None
candidate_executor_cols = ['pic', 'pic_name', 'responsible', 'responsible_name', 'assigned_to', 'closed_by', 'executor_name', 'executor']
for col in candidate_executor_cols:
if col in df_local.columns:
EXECUTOR_INDIV_COL = col
break
if EXECUTOR_INDIV_COL is None:
# Fallback — gunakan creator_name (dengan warning transparan)
EXECUTOR_INDIV_COL = 'nama_pic'
# st.warning(
# "⚠️ No dedicated executor column (e.g., 'pic', 'responsible') found. "
# "Using 'creator_name' as proxy for executor — insights may conflate reporters & executors. "
# "Consider adding an executor identifier column for accuracy."
# )
# ─── Hitung semua metrik ─────────────────────────────────────────────────────
avg_ratio_per_nama = compute_reporter_ratio_by_nama(df_local) # 3a
avg_rate_per_creator = compute_reporter_rate_by_creator(df_local) # 3c
avg_leadtime_nama = compute_executor_leadtime_by_nama(df_local) # 3b
avg_leadtime_per_indiv = compute_executor_leadtime_by_individual(df_local, name_col=EXECUTOR_INDIV_COL) # 3d
# ─── Helper: Warna berdasarkan ranking global ───────────────────────────────
def add_color_by_global_rank(df, value_col, top_n=5, worst_n=5, high_is_good=True):
df = df.copy()
df['color'] = '#1f77b4' # default biru
if len(df) == 0:
return df
if high_is_good:
# Nilai tinggi = baik → top N → hijau
top_names = df.nlargest(top_n, value_col)['nama' if 'nama' in df.columns else df.columns[0]]
df.loc[df[df.columns[0]].isin(top_names), 'color'] = '#4CAF50'
else:
# Nilai tinggi = buruk (e.g., lead time) → worst N (tertinggi) → merah
worst_names = df.nlargest(worst_n, value_col)['nama' if 'nama' in df.columns else df.columns[0]]
df.loc[df[df.columns[0]].isin(worst_names), 'color'] = '#D32F2F'
return df
# ─── Layout: 2 baris × 2 kolom ───────────────────────────────────────────────
col_3a, col_3c = st.columns(2)
# ─── 3a: Reporter by Division (Rasio Temuan/Orang) ───────────────────────────
with col_3a:
st.markdown("<h5 style='text-align:center;'>3a. Finding/Person Ratio by Division (Reporter)</h5>", unsafe_allow_html=True)
if avg_ratio_per_nama.empty:
st.warning("No data for division-level reporter analysis.")
else:
sort_opt = st.selectbox("Show:", ["Top 10", "Bottom 10"], key='sort_3a')
full_sorted = avg_ratio_per_nama.sort_values('avg_monthly_ratio', ascending=False)
subset = full_sorted.head(10) if sort_opt == "Top 10" else full_sorted.tail(10).sort_values('avg_monthly_ratio', ascending=True)
# Tambahkan warna: top 5 → hijau
colored = add_color_by_global_rank(avg_ratio_per_nama, 'avg_monthly_ratio', top_n=5, high_is_good=True)
subset = subset.merge(colored[['nama', 'color']], on='nama', how='left').fillna({'color': '#1f77b4'})
# Reverse untuk visual (tertinggi di atas)
if sort_opt == "Top 10":
subset = subset.iloc[::-1]
fig = px.bar(
subset, x='avg_monthly_ratio', y='nama', orientation='h',
title=f'{sort_opt} Divisions',
labels={'avg_monthly_ratio': 'Monthly Ratio', 'nama': 'Division'},
color='color', color_discrete_map={c: c for c in subset['color'].unique()},
text=subset['avg_monthly_ratio'].apply(lambda x: f'{x:.2f}')
)
fig.update_layout(height=450, showlegend=False, yaxis={'categoryorder': 'array', 'categoryarray': subset['nama'].tolist()})
fig.update_traces(textposition='auto')
st.plotly_chart(fig, use_container_width=True)
# 🔍 Insight (dari full data)
if len(full_sorted) >= 2:
min_r, max_r, mean_r = full_sorted['avg_monthly_ratio'].min(), full_sorted['avg_monthly_ratio'].max(), full_sorted['avg_monthly_ratio'].mean()
best, worst = full_sorted.iloc[0]['nama'], full_sorted.iloc[-1]['nama']
st.markdown(
f"<div class='ai-insight'>"
f"<strong>Insight:</strong> Division reporting ratio ranges from {min_r:.2f} to {max_r:.2f} (avg: {mean_r:.2f}). "
f"<strong>{best}</strong> leads."
# <strong>{worst}</strong> lags.
f"<strong>Recommendation:</strong> Benchmark processes from {best}; assess capacity/tooling gaps in {worst}."
f"</div>",
unsafe_allow_html=True
)
# ─── 3c: Reporter by Individual ──────────────────────────────────────────────
with col_3c:
st.markdown("<h5 style='text-align:center;'>3c. Monthly Findings per Reporter (Individual)</h5>", unsafe_allow_html=True)
if avg_rate_per_creator.empty:
st.warning("No data for individual reporter analysis.")
else:
sort_opt = st.selectbox("Show:", ["Top 10", "Bottom 10"], key='sort_3c')
full_sorted = avg_rate_per_creator.sort_values('avg_monthly_rate', ascending=False)
subset = full_sorted.head(10) if sort_opt == "Top 10" else full_sorted.tail(10).sort_values('avg_monthly_rate', ascending=True)
colored = add_color_by_global_rank(avg_rate_per_creator, 'avg_monthly_rate', top_n=5, high_is_good=True)
subset = subset.merge(colored[['creator_name', 'color']], on='creator_name', how='left').fillna({'color': '#1f77b4'})
if sort_opt == "Top 10":
subset = subset.iloc[::-1]
fig = px.bar(
subset, x='avg_monthly_rate', y='creator_name', orientation='h',
title=f'{sort_opt} Reporters',
labels={'avg_monthly_rate': 'Monthly Findings', 'creator_name': 'Reporter'},
color='color', color_discrete_map={c: c for c in subset['color'].unique()},
text=subset['avg_monthly_rate'].apply(lambda x: f'{x:.2f}')
)
fig.update_layout(height=450, showlegend=False, yaxis={'categoryorder': 'array', 'categoryarray': subset['creator_name'].tolist()})
fig.update_traces(textposition='auto')
st.plotly_chart(fig, use_container_width=True)
if len(full_sorted) >= 2:
min_r, max_r, mean_r = full_sorted['avg_monthly_rate'].min(), full_sorted['avg_monthly_rate'].max(), full_sorted['avg_monthly_rate'].mean()
top_reporter = full_sorted.iloc[0]['creator_name']
st.markdown(
f"<div class='ai-insight'>"
f"<strong>Insight:</strong> Individual reporting ranges from {min_r:.2f} to {max_r:.2f} findings/month (avg: {mean_r:.2f}). "
f"<strong>{top_reporter}</strong> is the most active contributor. "
f"<strong>Recommendation:</strong> Recognize top reporters; inspection causes of low activity (<0.5/month) via 1:1 review."
f"</div>",
unsafe_allow_html=True
)
# ─── Baris 2: Executor ───────────────────────────────────────────────────────
col_3b, col_3d = st.columns(2)
# ─── 3b: Executor by Division (Lead Time) ────────────────────────────────────
with col_3b:
st.markdown("<h5 style='text-align:center;'>3b. Monthly Lead Time by Division (Executor)</h5>", unsafe_allow_html=True)
if avg_leadtime_nama.empty:
st.warning("No data for division-level executor analysis.")
else:
sort_opt = st.selectbox(
"Show:",
["Top 10", "Bottom 10"],
key='sort_3b'
)
# Sort penuh sekali — ascending: tercepat → terlambat
full_sorted = avg_leadtime_nama.sort_values('avg_monthly_leadtime', ascending=True)
# Ambil subset sesuai pilihan
if sort_opt == "Top 10":
# 10 tercepat: ascending (kecil → besar), tetap diurut ascending → tercepat di atas
subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=False)
else: # "Bottom 10 Slowest"
# 10 terlambat: descending (besar → kecil), agar terlambat di atas
subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False)
# Warna: 5 terlambat secara global (bukan di subset!) → warna merah
colored = add_color_by_global_rank(
avg_leadtime_nama,
'avg_monthly_leadtime',
worst_n=5,
high_is_good=False # lebih tinggi = buruk
)
subset = subset.merge(colored[['nama', 'color']], on='nama', how='left').fillna({'color': '#1f77b4'})
fig = px.bar(
subset,
x='avg_monthly_leadtime',
y='nama',
orientation='h',
title=sort_opt,
labels={'avg_monthly_leadtime': 'Monthly Lead Time (Days)', 'nama': 'Division'},
color='color',
color_discrete_map={c: c for c in subset['color'].unique()},
text=subset['avg_monthly_leadtime'].apply(lambda x: f'{x:.1f}')
)
fig.update_layout(
height=450,
showlegend=False,
yaxis={'categoryorder': 'array', 'categoryarray': subset['nama'].tolist()}
)
fig.update_traces(textposition='auto')
st.plotly_chart(fig, use_container_width=True)
if len(full_sorted) >= 2:
min_lt = full_sorted['avg_monthly_leadtime'].min()
max_lt = full_sorted['avg_monthly_leadtime'].max()
mean_lt = full_sorted['avg_monthly_leadtime'].mean()
fastest = full_sorted.iloc[0]['nama']
slowest = full_sorted.iloc[-1]['nama']
st.markdown(
f"<div class='ai-insight'>"
f"<strong>Insight:</strong> Resolution time ranges from {min_lt:.1f} to {max_lt:.1f} days (avg: {mean_lt:.1f}). "
f"<strong>{slowest}</strong> has highest risk of SLA breach. "
f"<strong>Recommendation:</strong> Initiate RCA for {slowest}; replicate workflow from {fastest}. Set SLA threshold at 7 days."
f"</div>",
unsafe_allow_html=True
)
# ─── 3d: Executor by Individual ──────────────────────────────────────────────
with col_3d:
st.markdown(
f"<h5 style='text-align:center;'>3d. Monthly Lead Time per Executor (Individual)</h5>",
unsafe_allow_html=True
)
if avg_leadtime_per_indiv.empty:
st.warning(f"No data for individual executor analysis (column: '{EXECUTOR_INDIV_COL}').")
else:
sort_opt = st.selectbox(
"Show:",
["Top 10", "Bottom 10"],
key='sort_3d'
)
full_sorted = avg_leadtime_per_indiv.sort_values('avg_monthly_leadtime', ascending=True)
if sort_opt == "Top 10":
subset = full_sorted.head(10).subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=False)
else: # "Bottom 10 Slowest"
subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False)
# Warna berdasarkan ranking global
colored = add_color_by_global_rank(
avg_leadtime_per_indiv,
'avg_monthly_leadtime',
worst_n=5,
high_is_good=False
)
id_col = EXECUTOR_INDIV_COL
subset = subset.merge(colored[[id_col, 'color']], on=id_col, how='left').fillna({'color': '#1f77b4'})
fig = px.bar(
subset,
x='avg_monthly_leadtime',
y=id_col,
orientation='h',
title=sort_opt,
labels={'avg_monthly_leadtime': 'Monthly Lead Time (Days)', id_col: 'Executor'},
color='color',
color_discrete_map={c: c for c in subset['color'].unique()},
text=subset['avg_monthly_leadtime'].apply(lambda x: f'{x:.1f}')
)
fig.update_layout(
height=450,
showlegend=False,
yaxis={'categoryorder': 'array', 'categoryarray': subset[id_col].tolist()}
)
fig.update_traces(textposition='auto')
st.plotly_chart(fig, use_container_width=True)
if len(full_sorted) >= 2:
min_lt = full_sorted['avg_monthly_leadtime'].min()
max_lt = full_sorted['avg_monthly_leadtime'].max()
mean_lt = full_sorted['avg_monthly_leadtime'].mean()
slowest_exec = full_sorted.iloc[-1][id_col]
fastest_exec = full_sorted.iloc[0][id_col]
st.markdown(
f"<div class='ai-insight'>"
f"<strong>Insight:</strong> Executor performance ranges from {min_lt:.1f} to {max_lt:.1f} days (avg: {mean_lt:.1f}). "
f"<strong>{slowest_exec}</strong> requires support to meet SLA. "
f"<strong>Recommendation:</strong> Assign mentor to executors >7 days; document & share best practices from top performers (e.g., {fastest_exec})."
f"</div>",
unsafe_allow_html=True
)
#Objective 4
try:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import plotly.express as px
from collections import Counter
WORDCLOUD_AVAILABLE = True
except ImportError:
WORDCLOUD_AVAILABLE = False
st.markdown("<h3 class='section-title'>OBJECTIVE 4 - Unsafe Issues: Which One is the Most Often?</h3>",
unsafe_allow_html=True)
# 🔹 Fungsi untuk membuat judul seragam
def create_consistent_title(title_text):
return f"<div style='font-family: Arial; font-size: 16px; font-weight: bold; color: #2c3e50; text-align: center; margin-bottom: 8px;'>{title_text}</div>"
if WORDCLOUD_AVAILABLE:
# 🔥 Data untuk pie chart: semua kategori
df_all_kategori = df_local.copy()
# 🔥 Data untuk wordcloud: hanya Non-Positive
df_filtered_kategori = df_local[df_local['temuan_kategori'] != 'Positive'] \
if 'temuan_kategori' in df_local.columns else df_local
# 2 Kolom
col1, col2 = st.columns(2)
# === PIE CHART: Semua temuan_kategori (Dengan Custom Text Position untuk Unsafe Action & Near Miss) ===
with col1:
st.markdown(create_consistent_title("Distribution of All Issue Categories"), unsafe_allow_html=True)
if 'temuan_kategori' in df_all_kategori.columns:
category_counts = df_all_kategori['temuan_kategori'].value_counts()
if not category_counts.empty:
# Mapping warna
color_map = {
'Positive': '#2E7D32', # Hijau
'Unsafe Condition': '#EF5350', # Merah Muda
'Unsafe Action': '#F48FB1', # Pink
'Near Miss': '#BDBDBD' # Abu-abu
}
colors = [color_map.get(cat, '#9E9E9E') for cat in category_counts.index]
# Buat pie chart tanpa legend
fig_pie = px.pie(
names=category_counts.index,
values=category_counts.values,
color_discrete_sequence=colors
)
# ⚙️ Custom: tarik keluar slice untuk Unsafe Action & Near Miss
pull_values = []
for cat in category_counts.index:
if cat in ['Unsafe Action', 'Near Miss']:
pull_values.append(0.1)
else:
pull_values.append(0.0)
fig_pie.update_traces(
textposition='outside',
textinfo='percent+label',
pull=pull_values,
marker=dict(line=dict(color='#FFFFFF', width=1))
)
fig_pie.update_layout(
showlegend=False,
height=450,
margin=dict(t=20, b=20, l=40, r=40)
)
st.plotly_chart(fig_pie, use_container_width=True)
# ✅ CUSTOM LEGEND
st.markdown("<div style='text-align: center; font-size: 14px; margin-top: -10px;'>", unsafe_allow_html=True)
legend_items = []
for cat in category_counts.index:
color = color_map.get(cat, '#9E9E9E')
item = (
f"<span style='display: inline-block; width: 16px; height: 4px; background-color: {color}; "
f"margin: 0 8px; border-radius: 1px;'></span>{cat}"
)
legend_items.append(item)
legend_html = " | ".join(legend_items)
st.markdown(f"<div>{legend_html}</div>", unsafe_allow_html=True)
st.markdown("</div>", unsafe_allow_html=True)
else:
st.warning("No data available for pie chart.")
else:
st.warning("Column 'temuan_kategori' not available.")
# === WORDCLOUD: Hanya Non-Positive (dari keyword_kategori) ===
with col2:
st.markdown(create_consistent_title("Unsafe Issues"), unsafe_allow_html=True)
st.markdown(
"""
<p style='text-align: center; font-size: 1rem; color: #555; margin-top: -0.5rem;'>
Categorization uses NLP — Natural Language Processing from random text
</p>
""",
unsafe_allow_html=True
)
if df_filtered_kategori.empty:
st.warning("No data available after filtering out 'Positive' category.")
else:
if 'keyword_kategori' in df_filtered_kategori.columns:
# Gabungkan & bersihkan teks
import re
text_series = df_filtered_kategori['keyword_kategori'].dropna().astype(str)
text = ' '.join(text_series)
text = re.sub(r'[^a-zA-Z\s]', ' ', text).strip()
if text:
# Generate wordcloud
wordcloud = WordCloud(
width=1600,
height=800,
background_color='white',
colormap='viridis',
max_words=1000,
random_state=42
).generate(text)
# Tampilkan
fig, ax = plt.subplots(figsize=(3, 2), dpi=200)
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
plt.tight_layout()
st.pyplot(fig, use_container_width=True)
# 🔍 🔹 Insight dinamis: top 3 keywords
words_clean = [w.lower() for w in text.split() if len(w) > 2]
top3 = [word.capitalize() for word, _ in Counter(words_clean).most_common(3)]
if top3:
insight = f"The most frequent unsafe issue themes are: <strong>{', '.join(top3)}</strong>."
else:
insight = "Recurring unsafe issue patterns are detectable in textual findings."
st.markdown(
f"""
<div style='text-align: center; font-size: 14px; color: #2c3e50; margin-top: 12px; line-height: 1.5;'>
<strong>Insight:</strong> {insight}
</div>
""",
unsafe_allow_html=True
)
else:
st.warning("No valid text remaining after cleaning.")
else:
st.warning("Column 'keyword_kategori' not available.")
else:
st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
# =================== 5. Matrix (Tetap Dipertahankan) ===================
# =================== 5. Matrix (Tetap Dipertahankan) ===================
st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Companies Move Slow?</h3>", unsafe_allow_html=True)
import math
import plotly.express as px
import pandas as pd
try:
df_local_matrix = df.copy()
# ============================
# 0. Filter: ONLY 1 COMPANY & 1 PROFILE (if applicable)
# ============================
# (Skipped for general dashboard view)
# ============================
# 1. Exclude Positive findings
# ============================
if 'temuan_kategori' in df_local_matrix.columns:
df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
# ============================
# 2. Ensure datetime columns
# ============================
df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
# ============================
# 3. Compute LEAD TIME
# ============================
df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
# ============================
# 4. Average Monthly Finding Count per Operator
# ============================
if 'nama' not in df_local_matrix.columns:
st.error("❌ Kolom 'nama' (operator) tidak ditemukan.")
# st.stop() # Stop bisa dihilangkan agar script tetap jalan
else:
# Buat kolom bulan (YYYY-MM)
df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
# Hitung jumlah temuan per operator per bulan
monthly_counts = (
df_local_matrix
.groupby(['nama', 'month'])['kode_temuan']
.nunique()
.reset_index(name='monthly_count')
)
# Hitung rata-rata bulanan per operator
operator_avg = (
monthly_counts
.groupby('nama')['monthly_count']
.mean() # <-- RATA-RATA per bulan (bukan total!)
.reset_index(name='Finding Count')
)
# ============================
# 5. Average Lead Time per Operator
# ============================
operator_lead = (
df_local_matrix.groupby('nama')['lead_time_days']
.mean()
.reset_index(name='Average Lead Time')
)
# ============================
# 6. Merge Risk Matrix
# ============================
risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
risk_matrix = risk_matrix.rename(columns={'nama': 'Operator Name'})
# Handle operator tanpa lead time (e.g., belum closed)
risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0)
# ============================
# 7. Quadrant Logic (unchanged)
# ============================
X_LIMIT = 20
Y_LIMIT = 3
def assign_quadrant(row):
if row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
return "Quadrant I – High Leadtime & High Count"
elif row['Finding Count'] < X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
return "Quadrant II – High Leadtime but Low Count"
elif row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] < Y_LIMIT:
return "Quadrant III – Low Leadtime but High Count"
else:
return "Quadrant IV – Low Leadtime & Low Count"
risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
quadrant_count = risk_matrix['quadrant'].value_counts()
# ============================
# 8. Scatter Plot (format visual tetap sam persis)
# ============================
max_x = risk_matrix['Finding Count'].max() + 1
max_y = risk_matrix['Average Lead Time'].max() + 5
fig = px.scatter(
risk_matrix,
x='Finding Count',
y='Average Lead Time',
hover_name="Operator Name",
size=[12] * len(risk_matrix),
size_max=15,
title="Audit Findings Risk Matrix: Avg Monthly Count vs Lead Time"
)
# Background quadrant (same as original)
fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
fillcolor="rgba(255,0,0,0.25)", line_width=0) # Q1
fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
fillcolor="rgba(255,150,50,0.25)", line_width=0) # Q2
fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
fillcolor="rgba(255,200,200,0.25)", line_width=0) # Q3
fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
fillcolor="rgba(0,120,255,0.15)", line_width=0) # Q4
fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="black")
fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="black")
# Quadrant count annotations (same positions & style)
fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
y=Y_LIMIT + (max_y - Y_LIMIT)/2,
text=f"<b>{quadrant_count.get('Quadrant I – High Leadtime & High Count',0)}</b>",
showarrow=False, font=dict(size=22, color="darkred"))
fig.add_annotation(x=X_LIMIT/2,
y=Y_LIMIT + (max_y - Y_LIMIT)/2,
text=f"<b>{quadrant_count.get('Quadrant II – High Leadtime but Low Count',0)}</b>",
showarrow=False, font=dict(size=22, color="orange"))
fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
y=Y_LIMIT/2,
text=f"<b>{quadrant_count.get('Quadrant III – Low Leadtime but High Count',0)}</b>",
showarrow=False, font=dict(size=22, color="red"))
fig.add_annotation(x=X_LIMIT/2,
y=Y_LIMIT/2,
text=f"<b>{quadrant_count.get('Quadrant IV – Low Leadtime & Low Count',0)}</b>",
showarrow=False, font=dict(size=22, color="green"))
st.plotly_chart(fig, use_container_width=True)
# ============================
# 9. Summary Table
# ============================
st.subheader("Summary (Avg Monthly Count vs Avg Lead Time)")
st.dataframe(
risk_matrix.sort_values("Finding Count", ascending=False),
use_container_width=True
)
except Exception as e:
st.error(f"⚠️ Error Risk Matrix: {e}")
# st.exception(e) # Uncomment for debugging
# st.exception(e) # Uncomment for debugging
import streamlit as st
import plotly.graph_objects as go
import numpy as np
import pandas as pd
# =================== OBJECTIVE 6 - Predictive Dashboard & Early Warning Signals ===================
st.markdown("<h3 class='section-title'>OBJECTIVE 6 — Prediction for Early Warning Signals: Which Ones Have Less Future Inspections?</h3>", unsafe_allow_html=True)
# ✅ Enhanced CSS + Minimal Sortable JS
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
.predictive-panel {
margin-bottom: 28px;
background: white;
border-radius: 12px;
box-shadow: 0 4px 16px rgba(0,0,0,0.05);
overflow: hidden;
border: 1px solid #edf2f7;
}
.predictive-header {
background: #E3F2FD;
color: #003DA5;
font-weight: 700;
font-size: 1.1em;
padding: 14px 20px;
border-left: 4px solid #003DA5;
}
.predictive-table-wrapper {
padding: 0 20px 20px;
}
.predictive-table-wrapper table {
width: 100%;
border-collapse: collapse;
font-family: 'Inter', sans-serif;
font-size: 0.94em;
margin-top: 12px;
}
.predictive-table-wrapper th,
.predictive-table-wrapper td {
text-align: center !important;
padding: 12px 10px;
border: 1px solid #e5e7eb;
vertical-align: middle;
}
.predictive-table-wrapper th {
background-color: #f8fafc;
font-weight: 600;
color: #003DA5;
cursor: pointer;
user-select: none;
position: relative;
}
.predictive-table-wrapper th:hover {
background-color: #edf2f7;
}
.predictive-table-wrapper th::after {
content: " ⇵";
opacity: 0.4;
margin-left: 4px;
}
.predictive-table-wrapper th.asc::after {
content: " ↑";
opacity: 1;
color: #003DA5;
}
.predictive-table-wrapper th.desc::after {
content: " ↓";
opacity: 1;
color: #c62828;
}
.predictive-table-wrapper tr:nth-child(even) {
background-color: #fafcff;
}
.predictive-table-wrapper tr:hover {
background-color: #f0f7ff !important;
}
.predictive-note {
font-size: 0.86em;
color: #64748b;
margin-top: 10px;
padding: 0 20px;
line-height: 1.5;
}
.spark {
font-family: 'Courier New', monospace;
font-weight: bold;
}
.status-active { color: #2e7d32; font-weight: bold; }
.status-neutral { color: #f57c00; }
.status-inactive { color: #c62828; font-weight: bold; }
.trend-rising { color: #c62828; font-weight: 600; }
.trend-stable { color: #388e3c; }
.trend-declining { color: #d32f2f; }
.footer-insight {
background: #003DA5;
color: white;
padding: 14px 24px;
border-radius: 10px;
font-weight: 600;
margin-top: 20px;
text-align: center;
font-size: 1.08em;
box-shadow: 0 3px 10px rgba(0,61,165,0.15);
}
.warning-box {
background: #fff8e1;
border-left: 4px solid #ffc107;
padding: 12px 16px;
font-size: 0.9em;
color: #5d4037;
margin: 10px 20px 0;
border-radius: 0 6px 6px 0;
}
</style>
<script>
function makeSortable(tableId) {
const table = document.getElementById(tableId);
if (!table) return;
let headers = table.querySelectorAll("th");
headers.forEach((header, i) => {
header.onclick = () => {
headers.forEach(h => h.classList.remove('asc', 'desc'));
let rows = Array.from(table.querySelectorAll("tr:nth-child(n+2)"));
let isNumeric = !isNaN(rows[0]?.cells[i]?.textContent.replace(/[^0-9.-]/g, ''));
rows.sort((a, b) => {
let aVal = a.cells[i].textContent.trim();
let bVal = b.cells[i].textContent.trim();
if (isNumeric) {
aVal = parseFloat(aVal.replace(/[^0-9.-]/g, '')) || 0;
bVal = parseFloat(bVal.replace(/[^0-9.-]/g, '')) || 0;
}
return header.classList.contains('asc') ? bVal - aVal : aVal - bVal;
});
header.classList.toggle('asc');
header.classList.toggle('desc');
rows.forEach(row => table.querySelector('tbody').appendChild(row));
};
});
}
setTimeout(() => {
makeSortable('tbl-creators');
makeSortable('tbl-locations');
makeSortable('tbl-divisions');
makeSortable('tbl-categories');
}, 800);
</script>
""", unsafe_allow_html=True)
#bloob line
# 🔹 Helper: Sparkline ala chart saham — hijau kalau naik, merah kalau turun
def ascii_sparkline_pln(data):
if not data or len(data) == 0:
return "<span class='spark' style='color:#999;'>—</span>"
try:
data = [float(x) for x in data]
n = len(data)
if n == 1:
return "<span class='spark' style='color:#666;'>•</span>"
# Hitung slope (linear trend)
xs = list(range(n))
x_mean = sum(xs) / n
y_mean = sum(data) / n
numerator = sum((x - x_mean) * (y - y_mean) for x, y in zip(xs, data))
denominator = sum((x - x_mean) ** 2 for x in xs)
slope = numerator / (denominator + 1e-12)
# Tentukan warna & label berdasarkan slope
if slope > 0.001:
color = "#2E7D32" # hijau (naik)
label = "↑"
elif slope < -0.001:
color = "#C62828" # merah (turun)
label = "↓"
else:
color = "#546E7A" # abu-abu (datar)
label = "→"
# Normalisasi koordinat Y untuk SVG
height = 16
margin = 2
usable_h = height - 2 * margin
min_y, max_y = min(data), max(data)
if max_y == min_y:
y_coords = [margin + usable_h / 2] * n
else:
y_coords = [
margin + usable_h * (1 - (v - min_y) / (max_y - min_y + 1e-9))
for v in data
]
# Buat polyline
points = " ".join(f"{i * 8},{y:.1f}" for i, y in enumerate(y_coords))
width = max(24, (n - 1) * 8 + 4)
# SVG inline
svg = (
f'<svg class="sparkline" width="{width + 6}" height="{height}" '
f'viewBox="-2 0 {width + 6} {height}" style="vertical-align:middle; '
f'font-family:sans-serif;" xmlns="http://www.w3.org/2000/svg">'
f'<polyline points="{points}" fill="none" stroke="{color}" '
f'stroke-width="1.3" stroke-linecap="round" stroke-linejoin="round"/>'
f'<text x="{(n-1)*8 + 1}" y="14" font-size="9" fill="{color}" '
f'dominant-baseline="middle">{label}</text>'
f'</svg>'
)
return svg
except Exception:
return "<span class='spark' style='color:#999;'>⚠</span>"
# ——————— 1. Creators: ONLY Coverage < 90% AND Slope < 0 ———————
def predict_creators(df):
# ❌ Tidak ada filter Non-Positive
if 'creator_name' not in df.columns or df.empty:
return pd.DataFrame()
start_month = df['created_at'].min().to_period('M')
end_month = df['created_at'].max().to_period('M')
all_months = pd.period_range(start=start_month, end=end_month, freq='M')
df_monthly = (
df.groupby(['creator_name', df['created_at'].dt.to_period('M')])
.size()
.unstack(fill_value=0)
.reindex(columns=all_months, fill_value=0)
.stack()
.reset_index(name='count')
)
df_monthly.columns = ['Creator', 'Month', 'Count']
results = []
for creator, group in df_monthly.groupby('Creator'):
ts = group.set_index('Month')['Count']
total = len(all_months)
active = (ts > 0).sum()
coverage = active / total if total > 0 else 0
avg_rate = ts.mean()
if len(ts) >= 2:
try:
slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0]
# ✅ FILTER: Coverage < 90% AND Slope < 0
if slope < 0 and coverage < 0.9:
reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo."
results.append({
'Creator': creator,
'Reports/Month': round(avg_rate, 2),
'Monthly Consistency (%)': round(coverage * 100, 1),
'Trend Slope': round(slope, 3),
'Trend': ascii_sparkline_pln(ts.values.tolist()),
'Reason': reason
})
except:
continue
df_res = pd.DataFrame(results)
# ✅ Ambil 10 creator dengan slope paling negatif (paling turun)
return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res
# ——————— 2. Locations: ONLY Coverage < 90% AND Slope < 0 ———————
def predict_locations(df):
# ❌ Tidak ada filter Non-Positive
if 'nama_lokasi_full' not in df.columns or df.empty:
return pd.DataFrame()
start_month = df['created_at'].min().to_period('M')
end_month = df['created_at'].max().to_period('M')
all_months = pd.period_range(start=start_month, end=end_month, freq='M')
df_monthly = (
df.groupby(['nama_lokasi_full', df['created_at'].dt.to_period('M')])
.size()
.unstack(fill_value=0)
.reindex(columns=all_months, fill_value=0)
.stack()
.reset_index(name='count')
)
df_monthly.columns = ['Location', 'Month', 'Count']
results = []
for lokasi, group in df_monthly.groupby('Location'):
ts = group.set_index('Month')['Count']
total = len(all_months)
active = (ts > 0).sum()
coverage = active / total if total > 0 else 0
avg_rate = ts.mean()
if len(ts) >= 2:
try:
slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0]
# ✅ FILTER: Coverage < 90% AND Slope < 0
if slope < 0 and coverage < 0.9:
reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo."
results.append({
'Location': lokasi,
'Reports/Month': round(avg_rate, 2),
'Monthly Consistency (%)': round(coverage * 100, 1),
'Trend Slope': round(slope, 3),
'Trend': ascii_sparkline_pln(ts.values.tolist()),
'Reason': reason
})
except:
continue
df_res = pd.DataFrame(results)
# ✅ Ambil 10 lokasi dengan slope paling negatif (paling turun)
return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res
# ——————— 3. Divisions: ONLY Coverage < 90% AND Slope < 0 ———————
def predict_divisions(df):
# ❌ Tidak ada filter Non-Positive
if 'nama' not in df.columns or df.empty:
return pd.DataFrame()
start_month = df['created_at'].min().to_period('M')
end_month = df['created_at'].max().to_period('M')
all_months = pd.period_range(start=start_month, end=end_month, freq='M')
df_monthly = (
df.groupby(['nama', df['created_at'].dt.to_period('M')])
.size()
.unstack(fill_value=0)
.reindex(columns=all_months, fill_value=0)
.stack()
.reset_index(name='count')
)
df_monthly.columns = ['Division', 'Month', 'Count']
results = []
for div, group in df_monthly.groupby('Division'):
ts = group.set_index('Month')['Count']
total = len(all_months)
active = (ts > 0).sum()
coverage = active / total if total > 0 else 0
avg_rate = ts.mean()
if len(ts) >= 2:
try:
slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0]
# ✅ FILTER: Coverage < 90% AND Slope < 0
if slope < 0 and coverage < 0.9:
reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo."
results.append({
'Division': div,
'Reports/Month': round(avg_rate, 2),
'Monthly Consistency (%)': round(coverage * 100, 1),
'Trend Slope': round(slope, 3),
'Trend': ascii_sparkline_pln(ts.values.tolist()),
'Reason': reason
})
except:
continue
df_res = pd.DataFrame(results)
# ✅ Ambil 10 divisi dengan slope paling negatif (paling turun)
return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res
# ——————— 4. Categories: ONLY Non-Positive + Coverage=100% & Trend Slope > 0 ———————
def predict_categories(df):
# 🔥 Filter: Hanya yang bukan 'Positive'
df = df[df['temuan_kategori'] != 'Positive'].copy() # ✅ Filter non-Positive
if 'kategori' not in df.columns or df.empty:
return pd.DataFrame()
start_month = df['created_at'].min().to_period('M')
end_month = df['created_at'].max().to_period('M')
all_months = pd.period_range(start=start_month, end=end_month, freq='M')
n_months = len(all_months)
results = []
for cat, group in df.groupby('kategori'):
ts_data = (
group.groupby(group['created_at'].dt.to_period('M'))
.size()
.reindex(all_months, fill_value=0)
)
total_reports = ts_data.sum()
avg_per_month = total_reports / n_months if n_months > 0 else 0
active_months = (ts_data > 0).sum()
coverage = active_months / n_months if n_months > 0 else 0
slope = 0.0
if len(ts_data) >= 2:
try:
slope = np.polyfit(np.arange(len(ts_data)), ts_data.values, 1)[0]
except:
pass
results.append({
'Category': cat,
'Avg/Month': round(avg_per_month, 2),
'Monthly Consistency (%)': round(coverage * 100, 1),
'Trend Slope': round(slope, 3),
'Trend': ascii_sparkline_pln(ts_data.values.tolist())
})
df_res = pd.DataFrame(results)
# ✅ FILTER: Coverage = 100% AND Trend Slope > 0
if not df_res.empty:
df_res = df_res
# [
# (df_res['Coverage (%)'] == 100.0) &
# (df_res['Trend Slope'] > 0)
# ].copy()
df_res['Status'] = df_res['Trend Slope'].apply(
lambda s: "<span class='trend-rising'>High-Risk Rising</span>" if s > 0.2 else
"<span class='trend-stable'>Emerging Rising</span>"
)
df_res = df_res.sort_values('Trend Slope', ascending=False)
return df_res.reset_index(drop=True).head(10) if not df_res.empty else df_res
# ——————— RUN ———————
df_creator = predict_creators(df_filtered)
df_location = predict_locations(df_filtered)
df_division = predict_divisions(df_filtered)
df_category = predict_categories(df_filtered)
# 🎯 PANEL 1: Creators (FILTERED: Coverage < 90% & Slope < 0)
st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
st.markdown("<div class='predictive-header'>1. Which Reporters Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
if not df_creator.empty:
cols = ['Creator', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend']
# 🔥 Rename hanya untuk DISPLAY, bukan data asli
df_display = df_creator[cols].rename(columns={
"Reason": "Reason Forecast"
})
html = df_display.to_html(escape=False, index=False, table_id="tbl-creators")
st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
# st.markdown(
# "<div class='predictive-note'>"
# "<strong>Criteria:</strong> Coverage < 90% AND negative slope. "
# "High-risk: steep negative slope + low baseline activity."
# "</div>",
# unsafe_allow_html=True
# )
# else:
# st.markdown(
# "<div class='predictive-table-wrapper'>"
# "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>"
# "No creators meet criteria: Coverage < 90% and negative trend."
# "</p>"
# "<div class='warning-box'>"
# "💡 Note: Creators with Coverage ≥ 90% are excluded — they are considered stable reporters."
# "</div>"
# "</div>",
# unsafe_allow_html=True
# )
st.markdown("</div>", unsafe_allow_html=True)
# 🎯 PANEL 2: Locations (FILTERED: Coverage < 90% & Slope < 0)
st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
st.markdown("<div class='predictive-header'>2. Which Locations Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
if not df_location.empty:
cols = ['Location', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend']
# # 🔥 Rename hanya untuk DISPLAY, bukan data asli
df_display = df_location[cols].rename(columns={
"Reason": "Reason Forecast"
})
html = df_display.to_html(escape=False, index=False, table_id="tbl-locations")
st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
# st.markdown(
# "<div class='predictive-note'>"
# "<strong>Criteria:</strong> Coverage < 90% AND negative slope. "
# "High-risk: steep negative slope + low baseline activity."
# "</div>",
# unsafe_allow_html=True
# )
# else:
# st.markdown(
# "<div class='predictive-table-wrapper'>"
# "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>"
# "No locations meet criteria: Coverage < 90% and negative trend."
# "</p>"
# "<div class='warning-box'>"
# "💡 Note: Locations with Coverage ≥ 90% are excluded — they are considered stable reporting zones."
# "</div>"
# "</div>",
# unsafe_allow_html=True
# )
st.markdown("</div>", unsafe_allow_html=True)
# 🎯 PANEL 3: Divisions (FILTERED: Coverage < 90% & Slope < 0)
st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
st.markdown("<div class='predictive-header'>3. Which Divisions Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
if not df_division.empty:
cols = ['Division', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend']
# # 🔥 Rename hanya untuk DISPLAY, bukan data asli
df_display = df_division[cols].rename(columns={
"Reason": "Reason Forecast"
})
html = df_display.to_html(escape=False, index=False, table_id="tbl-divisions")
st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
# st.markdown(
# "<div class='predictive-note'>"
# "<strong>Criteria:</strong> Coverage < 90% AND negative slope. "
# "High-risk: steep negative slope + low baseline activity."
# "</div>",
# unsafe_allow_html=True
# )
# else:
# st.markdown(
# "<div class='predictive-table-wrapper'>"
# "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>"
# "No divisions meet criteria: Coverage < 90% and negative trend."
# "</p>"
# "<div class='warning-box'>"
# "💡 Note: Divisions with Coverage ≥ 90% are excluded — they are considered stable reporting zones."
# "</div>"
# "</div>",
# unsafe_allow_html=True
# )
st.markdown("</div>", unsafe_allow_html=True)
# 🎯 PANEL 4: Categories (FILTERED: Non-Positive + Coverage=100% & Rising)
# st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
st.markdown(
"<div class='predictive-header'>"
"4. Which Issue Categories Are Likely to Appear in the Next 3 Months (Increasing Trend Only)"
"<span style='font-size:0.75em; font-weight:400; color:#003DA5;'>"
" &nbsp;&nbsp;(* Categorization uses NLP — Natural Language Processing from random text)"
"</span>"
"</div>",
unsafe_allow_html=True
)
# if not df_category.empty:
# cols = ['Category', 'Avg/Month', 'Coverage (%)', 'Trend Slope', 'Status', 'Trend']
# # 🔵 Rename ONLY for display
# df_display = df_category[cols].rename(columns={
# "Status": "Status Issue for Next Month"
# })
# html = df_display.to_html(escape=False, index=False, table_id="tbl-categories")
# st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
# st.markdown(
# "<div class='predictive-note'>"
# "<strong>Filtered:</strong> Reported every month (100% coverage) with increasing trend. "
# "<strong>Avg/Month</strong> = total ÷ months. "
# "<span class='trend-rising'>High-Risk Rising</span> = slope > 0.2."
# "</div>",
# unsafe_allow_html=True
# )
# else:
# st.markdown(
# "<div class='predictive-table-wrapper'>"
# "<p style='text-align:center; color:#c62828; padding:24px; font-weight:500;'>"
# "⚠️ No rising categories with 100% monthly coverage."
# "</p>"
# "<p style='text-align:center; color:#666; font-size:0.9em;'>"
# "Consider relaxing coverage filter if data is sparse."
# "</p></div>",
# unsafe_allow_html=True
# )
# st.markdown("</div>", unsafe_allow_html=True)
# =================== WHITEBOARD STYLE CHART FOR PANEL 4 ===================
# st.markdown("<h4 style='text-align: center; color: #2c3e50;'>Whiteboard Insight: Trend vs Frequency</h4>", unsafe_allow_html=True)
# Buat chart scatter dengan gaya whiteboard
if not df_category.empty:
# Ambil data untuk scatter
df_plot = df_category.copy()
df_plot['Size'] = df_plot['Avg/Month'] # Ukuran lingkaran = frekuensi (Avg/Month)
df_plot['Y'] = df_plot['Trend Slope'] # Y = Trend Slope
# Buat scatter plot
fig = go.Figure()
# Tambahkan scatter
fig.add_trace(go.Scatter(
x=df_plot['Category'],
y=df_plot['Y'],
mode='markers',
marker=dict(
size=df_plot['Size'] * 1.5, # Skala ukuran agar tidak terlalu besar
color='#003DA5',
line=dict(width=2, color='white'),
opacity=0.8
),
text=df_plot['Category'],
textposition="top center",
textfont=dict(size=10, color="#003DA5", family="Arial"),
hovertemplate="<b>%{text}</b><br>Trend Slope: %{y:.3f}<br>Avg/Month: %{marker.size:.1f}<extra></extra>",
))
# Layout
fig.update_layout(
# title=dict(text="<b>Issue Category Trend vs Frequency (Non-Positive)</b>", x=0.5, y=0.95),
xaxis=dict(
title="Category",
tickangle=45,
showgrid=True,
gridcolor="#e0e0e0",
gridwidth=1,
tickfont=dict(size=10, color="#003DA5")
),
yaxis=dict(
title="Trend Slope",
showgrid=True,
gridcolor="#e0e0e0",
gridwidth=1,
tickfont=dict(size=10, color="#003DA5")
),
plot_bgcolor="white",
paper_bgcolor="white",
height=500,
margin=dict(t=60, b=60, l=60, r=60),
font=dict(family="Arial", size=12, color="#003DA5"),
showlegend=False
)
# Tambahkan teks manual seperti whiteboard
fig.add_annotation(
xref="paper", yref="paper",
x=0.05, y=0.95,
showarrow=False,
font=dict(size=12, color="#003DA5"),
textangle=-90,
align="center",
visible=False
)
fig.add_annotation(
xref="paper", yref="paper",
x=0.5, y=0.05,
# text="",
showarrow=False,
font=dict(size=12, color="#003DA5"),
align="center",
visible=False
)
fig.add_annotation(
xref="paper", yref="paper",
x=0.95, y=0.95,
text="<b>Number of Findings</b><br>• 10<br>•• 20<br>••• 30<br>•••• 40",
showarrow=False,
font=dict(size=12, color="#003DA5"),
align="left"
)
fig.add_annotation(
xref="paper", yref="paper",
x=0.05, y=0.1,
text="<b>Semakin tinggi = semakin sering ditemukan deviasi</b><br><b>Semakin besar = semakin banyak ditemukan deviasi</b><br><b></b>",
showarrow=False,
font=dict(size=12, color="#003DA5"),
align="left"
)
st.plotly_chart(fig, use_container_width=True)
# Insight
# st.markdown("### 💡 Insight")
# insight_text = (
# f"<div class='ai-insight'>"
# f"The scatter plot visualizes issue categories with 100% coverage and positive trend. "
# f"Categories higher on the Y-axis indicate stronger upward trends, while larger circles indicate higher average monthly frequency. "
# f"This helps identify which non-positive issues are both increasing in trend and frequently reported — these should be prioritized for intervention."
# f"</div>"
# )
# st.markdown(insight_text, unsafe_allow_html=True)
else:
st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
import streamlit as st
import pandas as pd
from huggingface_hub import InferenceClient
# ==========================
# LLM FUNCTION (HuggingFace)
# ==========================
def llm_generate_recommendation(insights_text):
client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
prompt = f"""
You are an expert Safety & Reliability Agentic AI.
Based on the following structured INSIGHT SUMMARY, create:
1. Recommended Action (max 2 sentences)
2. Risk Mitigation Strategy (max 2 sentences)
The insights:
{insights_text}
Now generate concise, high-impact:
- "recommendation"
- "mitigation"
Return output in EXACT JSON format:
{{
"recommendation": "...",
"mitigation": "..."
}}
"""
output = client.text_generation(prompt, max_new_tokens=500, temperature=0.4)
return output
# ==============================================
# === INSIGHT COMPUTATION FUNCTION (your code)
# ==============================================
def extract_agentic_insights_v5(df: pd.DataFrame):
dev = {
"lowest_ratio_9_locs": [],
"obj3a_lowest_div": None,
"obj3b_slowest_executor": None,
"obj3c_lowest_reporter": None,
"obj3d_slowest_div": None,
"obj4_unsafe_condition_pct": 0.0,
"obj4_unsafe_action_pct": 0.0,
"obj4_near_miss_pct": 0.0,
"obj5_q1_divs": [],
"obj5_q2_divs": [],
"obj6_top2_categories": [],
}
# === 1. 9 locations with lowest finding-to-reporter ratio ===
if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
calc = calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid'])
calc['bulan'] = calc['created_at'].dt.to_period('M')
monthly = calc.groupby(['nama_lokasi_full', 'bulan']).agg(
findings=('kode_temuan', 'size'),
reporters=('creator_nid', 'nunique')
).reset_index()
monthly = monthly[monthly['reporters'] > 0]
monthly['ratio'] = monthly['findings'] / monthly['reporters']
loc_avg = monthly.groupby('nama_lokasi_full')['ratio'].mean()
lowest_9 = loc_avg.nsmallest(9)
dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
# === 2a Lowest-ratio division ===
if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
agg = calc.groupby(['nama', 'bulan']).agg(
findings=('kode_temuan', 'size'),
reporters=('creator_nid', 'nunique')
)
agg = agg[agg['reporters'] > 0].reset_index()
agg['ratio'] = agg['findings'] / agg['reporters']
div_ratio = agg.groupby('nama')['ratio'].mean()
if not div_ratio.empty:
dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2))
# === 2b Slowest executor
if 'days_to_close' in df.columns:
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
if exec_col in valid.columns:
lead = valid.groupby(exec_col)['days_to_close'].mean()
if not lead.empty:
dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1))
# === 2c Lowest reporter freq
if {'creator_name', 'created_at'}.issubset(df.columns):
calc = df[['creator_name', 'created_at']].copy()
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
monthly = calc.groupby(['creator_name', 'bulan']).size().reset_index(name='count')
avg = monthly.groupby('creator_name')['count'].mean()
avg = avg[avg > 0]
if not avg.empty:
dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2))
# === 2d Slowest division resolution
if 'days_to_close' in df.columns and 'nama' in df.columns:
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
if not valid.empty:
lead = valid.groupby('nama')['days_to_close'].mean()
if not lead.empty:
dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1))
# === 3. Non-Positive findings composition
if 'temuan_kategori' in df.columns:
cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
# === 4. Quadrants
X_LIMIT, Y_LIMIT = 20, 3
if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
calc = df.copy()
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
calc = calc.assign(month=calc['created_at'].dt.to_period('M').astype(str))
monthly_counts = calc.groupby(['nama', 'month'])['kode_temuan'].nunique().reset_index()
avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
for _, r in mat.iterrows():
if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
dev["obj5_q1_divs"].append(r['nama'])
elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
dev["obj5_q2_divs"].append(r['nama'])
# === 5. Top categories
if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
nonpos = df[df['temuan_kategori'] != 'Positive']
if not nonpos.empty:
start = nonpos['created_at'].min().to_period('M')
end = nonpos['created_at'].max().to_period('M')
n_months = len(pd.period_range(start=start, end=end, freq='M'))
cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
dev["obj6_top2_categories"] = [(cat, round(val, 1)) for cat, val in cat_avg.items()]
return dev
# ==========================
# ===== MAIN APP ===========
# ==========================
# app.py
import streamlit as st
import pandas as pd
import json
from typing import List, Dict
# Transformers pipeline for lightweight local LLM (text2text)
from transformers import pipeline
import math
# =====================================================================
# OBJECTIVE 7 — INSIGHT & RECOMMENDATION (LLM FIRST, RULE-BASED IF FAIL)
# =====================================================================
st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Risk Mitigation: What are the Insights and Recommendations?</h3>", unsafe_allow_html=True)
# ============================================================
# 1. LLM LOADER
# ============================================================
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
@st.cache_resource
def load_llm_model():
try:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
model = AutoModelForCausalLM.from_pretrained(
"google/gemma-2-2b-it",
torch_dtype=torch.float16,
device_map="auto"
)
return tokenizer, model
except:
return None, None
def llm_generate_recommendation(insight_summary_text):
"""
Try to generate JSON recommendation using LLM.
If LLM fails → return None, and rule-based will be used.
"""
tok, mdl = load_llm_model()
if tok is None or mdl is None:
return None
try:
prompt = f"""
You are an expert Industrial Safety Analyst AI.
Below is an INSIGHT SUMMARY from a Safety Reporting System:
---
{insight_summary_text}
---
Generate 5 Recommended Actions and Risk Mitigation in clean JSON:
{{
"recommendations": [
{{"point":"1","rec":"...","mit":"..."}},
{{"point":"2","rec":"...","mit":"..."}},
{{"point":"3","rec":"...","mit":"..."}}
]
}}
"""
inputs = tok(prompt, return_tensors="pt").to(mdl.device)
out = mdl.generate(
**inputs,
max_new_tokens=380,
temperature=0.25,
do_sample=True
)
text = tok.decode(out[0], skip_special_tokens=True)
import re, json
json_match = re.search(r"\{[\s\S]*\}", text)
if not json_match:
return None
return json.loads(json_match.group(0))
except:
return None
# ============================================================
# 2. RULE-BASED ENGINE (YOUR ORIGINAL SCRIPT)
# ============================================================
def extract_agentic_insights_v5(df: pd.DataFrame):
dev = {
"lowest_ratio_9_locs": [],
"obj3a_lowest_div": None,
"obj3b_slowest_executor": None,
"obj3c_lowest_reporter": None,
"obj3d_slowest_div": None,
"obj4_unsafe_condition_pct": 0.0,
"obj4_unsafe_action_pct": 0.0,
"obj4_near_miss_pct": 0.0,
"obj5_q1_divs": [],
"obj5_q2_divs": [],
"obj6_top2_categories": [],
}
# === 1. 9 lowest locations ===
if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
calc = calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid'])
calc['bulan'] = calc['created_at'].dt.to_period('M')
monthly = calc.groupby(['nama_lokasi_full', 'bulan']).agg(
findings=('kode_temuan', 'size'),
reporters=('creator_nid', 'nunique')
).reset_index()
monthly = monthly[monthly['reporters'] > 0]
monthly['ratio'] = monthly['findings'] / monthly['reporters']
loc_avg = monthly.groupby('nama_lokasi_full')['ratio'].mean()
lowest_9 = loc_avg.nsmallest(9)
dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
# === 2a: Lowest division ratio ===
if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
agg = calc.groupby(['nama', 'bulan']).agg(
findings=('kode_temuan', 'size'),
reporters=('creator_nid', 'nunique')
)
agg = agg[agg['reporters'] > 0].reset_index()
agg['ratio'] = agg['findings'] / agg['reporters']
div_ratio = agg.groupby('nama')['ratio'].mean()
if not div_ratio.empty:
dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2))
# === 2b: Slowest executor ===
if 'days_to_close' in df.columns:
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
if exec_col in valid.columns:
lead = valid.groupby(exec_col)['days_to_close'].mean()
if not lead.empty:
dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1))
# === 2c: Lowest reporter ===
if {'creator_name', 'created_at'}.issubset(df.columns):
calc = df[['creator_name', 'created_at']].copy()
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
monthly = calc.groupby(['creator_name', 'bulan']).size().reset_index(name='count')
avg = monthly.groupby('creator_name')['count'].mean()
avg = avg[avg > 0]
if not avg.empty:
dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2))
# === 2d: Slowest division ===
if 'days_to_close' in df.columns and 'nama' in df.columns:
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
if not valid.empty:
lead = valid.groupby('nama')['days_to_close'].mean()
if not lead.empty:
dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1))
# === 3. Non-positive ===
if 'temuan_kategori' in df.columns:
cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
# === 4. Risk Quadrant ===
X_LIMIT, Y_LIMIT = 20, 3
if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
calc = df.copy()
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
calc = calc.assign(month=calc['created_at'].dt.to_period('M').astype(str))
monthly_counts = calc.groupby(['nama', 'month'])['kode_temuan'].nunique().reset_index()
avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
for _, r in mat.iterrows():
if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
dev["obj5_q1_divs"].append(r['nama'])
elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
dev["obj5_q2_divs"].append(r['nama'])
# === 5. Top categories ===
if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
nonpos = df[df['temuan_kategori'] != 'Positive']
if not nonpos.empty:
start = nonpos['created_at'].min().to_period('M')
end = nonpos['created_at'].max().to_period('M')
n_months = len(pd.period_range(start=start, end=end, freq='M'))
cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
dev["obj6_top2_categories"] = [(cat, round(v, 1)) for cat, v in cat_avg.items()]
return dev
# ============================================================
# 3. RUN INSIGHT ENGINE
# ============================================================
dev = extract_agentic_insights_v5(df_filtered)
# Build Insight Summary Text
insight_lines = []
if dev["lowest_ratio_9_locs"]:
loc_list = ", ".join([f"<strong>{loc}</strong> ({ratio})" for loc, ratio in dev["lowest_ratio_9_locs"]])
insight_lines.append(f"1. Nine locations with the lowest finding-to-reporter ratio: {loc_list}.")
parts = []
if dev["obj3a_lowest_div"]:
parts.append(f"division <strong>{dev['obj3a_lowest_div'][0]}</strong> (ratio: {dev['obj3a_lowest_div'][1]})")
if dev["obj3c_lowest_reporter"]:
parts.append(f"reporter <strong>{dev['obj3c_lowest_reporter'][0]}</strong> ({dev['obj3c_lowest_reporter'][1]} findings/month)")
if dev["obj3d_slowest_div"]:
parts.append(f"division <strong>{dev['obj3d_slowest_div'][0]}</strong> ({dev['obj3d_slowest_div'][1]} days)")
if dev["obj3b_slowest_executor"]:
parts.append(f"executor <strong>{dev['obj3b_slowest_executor'][0]}</strong> ({dev['obj3b_slowest_executor'][1]} days)")
if parts:
insight_lines.append("2. Uneven operational capacity: " + "; ".join(parts))
uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
if uc + ua + nm > 0:
insight_lines.append(f"3. Non-Positive composition: Unsafe Condition ({uc}%), Unsafe Action ({ua}%), Near Miss ({nm}%).")
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
q1 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q1_divs"][:5]])
q2 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q2_divs"][:5]])
insight_lines.append(f"4. High-risk divisions (QI): {q1 or '—'}. Hidden-risk (QII): {q2 or '—'}.")
if dev["obj6_top2_categories"]:
c1, c2 = dev["obj6_top2_categories"]
insight_lines.append(f"5. Top recurring non-Positive categories: <strong>{c1[0]}</strong> ({c1[1]}/mo) & <strong>{c2[0]}</strong> ({c2[1]}/mo).")
insight_text = "<br><br>".join(insight_lines)
# ============================================================
# 4. TRY LLM FIRST → ELSE FALLBACK RULE-BASED
# ============================================================
llm_json = llm_generate_recommendation(insight_text)
if llm_json is not None and "recommendations" in llm_json:
recs = llm_json["recommendations"]
else:
# FALLBACK RULE-BASED
recs = []
if dev["lowest_ratio_9_locs"]:
recs.append({"point":"1","rec":"Launch spot-inspection sprint at across the 9 lowest-ratio locations.","mit":"Enable 3-min QR checklist + automated WhatsApp reminders."})
if parts:
recs.append({"point":"2","rec":"Real-time monitoring of finding/reporter ratios and resolution lead times per division/individual.","mit":"Trigger coaching alerts to Area PICs & Division"})
if uc + ua + nm > 0:
recs.append({"point":"3","rec":"Enforce photo-based validation for Unsafe Condition/Action/Near Miss submissions to ensure accurate categorization.","mit":"System blocks submission if photo evidence or category justification is missing."})
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
recs.append({"point":"4","rec":"Assign dedicated safety crews to Quadrant I divisions; enforce One Finding, One Day closure policy for Quadrant II.","mit":"Auto-generate executive escalation reports to VP Operations if any division remains in QI/QII for ≥2 consecutive months."})
if dev["obj6_top2_categories"]:
c1, c2 = dev["obj6_top2_categories"]
recs.append({"point":"5","rec":f"Create RCA task force for {c1[0]} & {c2[0]}.","mit":"Update tender specs with required mitigations."})
# ============================================================
# 5. RENDERING (NO CHANGES)
# ============================================================
# Insight Summary Card
st.markdown(
f"""
<div class="card" style="background-color:#f8f9fa;border-left:4px solid #003DA5;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);">
<h4 style="margin-top:0;color:#FF6B6B;">Summary</h4>
<p style="margin-bottom:0;line-height:1.6;font-size:0.98em;">{insight_text}</p>
</div>
""",
unsafe_allow_html=True
)
# Recommendation Table
if recs:
rows = "".join([
f"<tr><td style='text-align:center;font-weight:bold;width:5%;'>{r['point']}</td>"
f"<td style='padding:8px;'>{r['rec']}</td>"
f"<td style='padding:8px;'>{r['mit']}</td></tr>"
for r in recs
])
st.markdown(
f"""
<div class="card" style="background-color:#e8f5e9;border-left:4px solid #4CAF50;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);">
<h4 style="margin-top:0;color:#2E7D32;">Recommended Actions & Agentic Risk Mitigation</h4>
<table style="width:100%;border-collapse:collapse;font-size:0.95em;margin-top:12px;">
<thead>
<tr style="background-color:#e8f5ee;">
<th style="padding:10px;text-align:center;border:1px solid #ccc;">#</th>
<th style="padding:10px;text-align:left;border:1px solid #ccc;">Recommended Action</th>
<th style="padding:10px;text-align:left;border:1px solid #ccc;">Risk Mitigation Strategy</th>
</tr>
</thead>
<tbody>{rows}</tbody>
</table>
</div>
""",
unsafe_allow_html=True
)
else:
st.info("No actionable insights generated.")