Spaces:

bukittechnology
/

pln

Sleeping

App Files Files Community

SHELLAPANDIANGANHUNGING commited on Dec 10, 2025

Commit

a95c654

verified ·

1 Parent(s): f0f3fbe

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -102

app.py CHANGED Viewed

@@ -904,7 +904,7 @@ with col_3b:
         # Ambil subset sesuai pilihan
         if sort_opt == "Top 10":
             # 10 tercepat: ascending (kecil → besar), tetap diurut ascending → tercepat di atas
-            subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=True)
         else:  # "Bottom 10 Slowest"
             # 10 terlambat: descending (besar → kecil), agar terlambat di atas
             subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False)
@@ -971,7 +971,7 @@ with col_3d:
         full_sorted = avg_leadtime_per_indiv.sort_values('avg_monthly_leadtime', ascending=True)
         if sort_opt == "Top 10":
-            subset = full_sorted.head(10).subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=True)
         else:  # "Bottom 10 Slowest"
             subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False)
@@ -1549,7 +1549,7 @@ def predict_creators(df):
                     results.append({
                         'Creator': creator,
                         'Reports/Month': round(avg_rate, 2),
-                        'Coverage (%)': round(coverage * 100, 1),
                         'Trend Slope': round(slope, 3),
                         'Trend': ascii_sparkline_pln(ts.values.tolist()),
                         'Reason': reason
@@ -1597,7 +1597,7 @@ def predict_locations(df):
                     results.append({
                         'Location': lokasi,
                         'Reports/Month': round(avg_rate, 2),
-                        'Coverage (%)': round(coverage * 100, 1),
                         'Trend Slope': round(slope, 3),
                         'Trend': ascii_sparkline_pln(ts.values.tolist()),
                         'Reason': reason
@@ -1645,7 +1645,7 @@ def predict_divisions(df):
                     results.append({
                         'Division': div,
                         'Reports/Month': round(avg_rate, 2),
-                        'Coverage (%)': round(coverage * 100, 1),
                         'Trend Slope': round(slope, 3),
                         'Trend': ascii_sparkline_pln(ts.values.tolist()),
                         'Reason': reason
@@ -1691,7 +1691,7 @@ def predict_categories(df):
         results.append({
             'Category': cat,
             'Avg/Month': round(avg_per_month, 2),
-            'Coverage (%)': round(coverage * 100, 1),
             'Trend Slope': round(slope, 3),
             'Trend': ascii_sparkline_pln(ts_data.values.tolist())
         })
@@ -1724,7 +1724,7 @@ df_category = predict_categories(df_filtered)
 st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-header'>1. Which Reporters Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
 if not df_creator.empty:
-    cols = ['Creator', 'Reports/Month', 'Coverage (%)', 'Trend Slope', 'Trend']
     # 🔥 Rename hanya untuk DISPLAY, bukan data asli
     df_display = df_creator[cols].rename(columns={
@@ -1760,7 +1760,7 @@ st.markdown("</div>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-header'>2. Which Locations Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
 if not df_location.empty:
-    cols = ['Location', 'Reports/Month', 'Coverage (%)', 'Trend Slope', 'Trend']
     # # 🔥 Rename hanya untuk DISPLAY, bukan data asli
     df_display = df_location[cols].rename(columns={
@@ -1796,7 +1796,7 @@ st.markdown("</div>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-header'>3. Which Divisions Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
 if not df_division.empty:
-    cols = ['Division', 'Reports/Month', 'Coverage (%)', 'Trend Slope', 'Trend']
     # # 🔥 Rename hanya untuk DISPLAY, bukan data asli
     df_display = df_division[cols].rename(columns={
@@ -1984,9 +1984,11 @@ else:
 st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
 # ============================================================== #
-# Helper 1: Hitung Average Monthly Finding-to-Reporter Ratio per Lokasi
 # ============================================================== #
 def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
     required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
     missing = [col for col in required if col not in df.columns]
@@ -2024,9 +2026,6 @@ def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
     return loc_summary
-# ============================================================== #
-# Helper 2: Interpretasi Aktivitas Pelaporan secara Adil
-# ============================================================== #
 def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
     loc_df = df[df['nama_lokasi_full'] == location_name].copy()
     if loc_df.empty:
@@ -2080,9 +2079,6 @@ def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
         "positive_rate": perc_positive
     }
-# ============================================================== #
-# Helper 3: Deteksi Isu Tidak Aman dari Teks
-# ============================================================== #
 def detect_unsafe_terms(df: pd.DataFrame):
     text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
     text_col = None
@@ -2097,127 +2093,144 @@ def detect_unsafe_terms(df: pd.DataFrame):
     unsafe_terms = [
         'terbuka', 'tidak terkunci', 'tanpa izin', 'tanpa pelindung', 'tanpa alat',
         'korsleting', 'overload', 'grounding', 'exposed', 'unlocked', 'no ppe',
-        'jatuh', 'slip',  'kebakaran', 'fire', 'fall', 'unauthorized',
         'tidak kompeten', 'untrained', 'prosedur dilanggar', 'bypass'
     ]
     found = [term for term in unsafe_terms if term in all_text]
     return list(set(found))
-# ============================================================== #
-# Main: Generate Risk Mitigation Insights
-# ============================================================== #
-def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
-    insights = []
     if df.empty:
-        return insights
-    # Insight 1: Top 3 Locations by Volume — Interpreted with Safety Maturity
     if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
         top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
         for loc in top_locs:
             interp = interpret_location_safely(df, loc)
-            insight = f"Location {loc}: {interp['interpretation']}"
-            signal = interp['risk_signal']
-            if signal == "Slight Risk":
-                recommendation = (
-                    "Recognize this location as a safety exemplar. Share their positive findings in internal safety communications. "
-                    "Facilitate cross-location learning sessions to replicate practices."
-                )
-            elif signal == "Moderate Risk":
-                recommendation = (
-                    "Conduct a workshop on positive intervention techniques. Train teams to identify and report good practices. "
-                    "Set a target to increase the positive finding rate to above 60 percent within three months."
-                )
-            elif signal == "High Risk":
-                recommendation = (
-                    "Assign two additional auditors to rotate into this location for one month. "
-                    "Administer an anonymous psychological safety survey to assess reporting barriers."
-                )
-            elif signal == "Very High Risk":
-                recommendation = (
-                    "Escalate to area management. Implement daily safety huddles, scheduled supervisor walkarounds, "
-                    "and weekly tracking of unsafe finding closure rates."
-                )
-            else:
-                recommendation = (
-                    "Validate physical inspection coverage. Ensure field presence aligns with digital reporting records."
-                )
-            insights.append({"insight": insight, "recommendation": recommendation})
-    # Insight 2: Organizational Agentic Safety Maturity
     if 'temuan_kategori' in df.columns:
         total = len(df)
         n_positive = (df['temuan_kategori'] == 'Positive').sum()
         positive_rate = n_positive / total if total > 0 else 0
-        insight = (
-            f"Organization-wide, {positive_rate:.1%} of findings are categorized as Positive, "
-            f"indicating proactive safety behaviors. The remaining {100 - positive_rate * 100:.1f} percent are reactive, "
-            f"responding to existing hazards."
         )
-        if positive_rate < 0.4:
-            recommendation = (
-                "Launch an Agentic Safety Program: incentivize near-miss reporting and safety suggestions, "
-                "train designated Safety Coaches per division, and adopt percentage of Positive findings as a leading KPI, "
-                "with a six-month target of 50 percent."
-            )
-        else:
-            recommendation = (
-                "Sustain current momentum. Formalize recognition for divisions with consistently high positive reporting rates."
-            )
-        insights.append({"insight": insight, "recommendation": recommendation})
-    # Insight 3: Emerging Unsafe Issues from Text Analysis
     unsafe_terms = detect_unsafe_terms(df)
     if unsafe_terms:
         top_terms = ', '.join(sorted(unsafe_terms)[:5])
-        insight = f"Text analysis of findings reveals recurring unsafe conditions related to: {top_terms}."
-        recommendation = (
-            "Initiate a targeted two-week Risk Blitz focusing on these conditions. "
-            "Update inspection checklists to include these items as critical control points. "
-            "Require photo documentation for verification of corrective actions."
-        )
-        insights.append({"insight": insight, "recommendation": recommendation})
-    # Insight 4: Low-Activity Locations with Potential Silent Risks
     if 'nama_lokasi_full' in df.columns:
         loc_counts = df['nama_lokasi_full'].value_counts()
         low_activity_locs = loc_counts[loc_counts <= 2].index.tolist()
         for loc in low_activity_locs[:3]:
             interp = interpret_location_safely(df, loc)
             if 0 < interp['positive_rate'] < 0.5:
-                insight = (
                     f"Location {loc} reports low volume ({loc_counts[loc]} findings) with a positive rate of "
                     f"{interp['positive_rate']:.0%}, suggesting possible under-reporting or unobserved hazards."
                 )
-                recommendation = (
-                    "Conduct an unannounced observational audit by an independent team to assess true field conditions."
-                )
-                insights.append({"insight": insight, "recommendation": recommendation})
-    return insights
 # ============================================================== #
-# Execute and Display
 # ============================================================== #
 try:
-    risk_insights = compute_risk_mitigation_insights(df_filtered)
 except Exception as e:
-    st.error(f"Error during insight generation: {str(e)}")
-    risk_insights = []
-if risk_insights:
-    for i, item in enumerate(risk_insights, 1):
-        st.markdown(f"<div class='ai-insight'><strong>Insight {i}:</strong> {item['insight']}</div>", unsafe_allow_html=True)
-        st.markdown(f"<div class='ai-recommendation'><strong>Recommendation {i}:</strong> {item['recommendation']}</div>", unsafe_allow_html=True)
-else:
-    st.markdown(
-        "<div class='ai-insight'>No risk mitigation insights were generated. "
-        "Please ensure the dataset contains the following columns: "
-        "nama_lokasi_full, temuan_kategori, creator_nid, and created_at.</div>",
-        unsafe_allow_html=True
-    )

         # Ambil subset sesuai pilihan
         if sort_opt == "Top 10":
             # 10 tercepat: ascending (kecil → besar), tetap diurut ascending → tercepat di atas
+            subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=False)
         else:  # "Bottom 10 Slowest"
             # 10 terlambat: descending (besar → kecil), agar terlambat di atas
             subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False)
         full_sorted = avg_leadtime_per_indiv.sort_values('avg_monthly_leadtime', ascending=True)
         if sort_opt == "Top 10":
+            subset = full_sorted.head(10).subset = full_sorted.head(10).sort_values('avg_monthly_leadtime', ascending=False)
         else:  # "Bottom 10 Slowest"
             subset = full_sorted.tail(10).sort_values('avg_monthly_leadtime', ascending=False)
                     results.append({
                         'Creator': creator,
                         'Reports/Month': round(avg_rate, 2),
+                        'Monthly Consistency (%)': round(coverage * 100, 1),
                         'Trend Slope': round(slope, 3),
                         'Trend': ascii_sparkline_pln(ts.values.tolist()),
                         'Reason': reason
                     results.append({
                         'Location': lokasi,
                         'Reports/Month': round(avg_rate, 2),
+                        'Monthly Consistency (%)': round(coverage * 100, 1),
                         'Trend Slope': round(slope, 3),
                         'Trend': ascii_sparkline_pln(ts.values.tolist()),
                         'Reason': reason
                     results.append({
                         'Division': div,
                         'Reports/Month': round(avg_rate, 2),
+                        'Monthly Consistency (%)': round(coverage * 100, 1),
                         'Trend Slope': round(slope, 3),
                         'Trend': ascii_sparkline_pln(ts.values.tolist()),
                         'Reason': reason
         results.append({
             'Category': cat,
             'Avg/Month': round(avg_per_month, 2),
+            'Monthly Consistency (%)': round(coverage * 100, 1),
             'Trend Slope': round(slope, 3),
             'Trend': ascii_sparkline_pln(ts_data.values.tolist())
         })
 st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-header'>1. Which Reporters Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
 if not df_creator.empty:
+    cols = ['Creator', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend']
     # 🔥 Rename hanya untuk DISPLAY, bukan data asli
     df_display = df_creator[cols].rename(columns={
 st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-header'>2. Which Locations Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
 if not df_location.empty:
+    cols = ['Location', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend']
     # # 🔥 Rename hanya untuk DISPLAY, bukan data asli
     df_display = df_location[cols].rename(columns={
 st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
 st.markdown("<div class='predictive-header'>3. Which Divisions Are Predicted to Have Less Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
 if not df_division.empty:
+    cols = ['Division', 'Reports/Month', 'Monthly Consistency (%)', 'Trend Slope', 'Trend']
     # # 🔥 Rename hanya untuk DISPLAY, bukan data asli
     df_display = df_division[cols].rename(columns={
 st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
 # ============================================================== #
+# Fungsi Insight & Rekomendasi (sama seperti sebelumnya, tanpa perubahan logika)
 # ============================================================== #
 def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
     required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
     missing = [col for col in required if col not in df.columns]
     return loc_summary
 def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
     loc_df = df[df['nama_lokasi_full'] == location_name].copy()
     if loc_df.empty:
         "positive_rate": perc_positive
     }
 def detect_unsafe_terms(df: pd.DataFrame):
     text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
     text_col = None
     unsafe_terms = [
         'terbuka', 'tidak terkunci', 'tanpa izin', 'tanpa pelindung', 'tanpa alat',
         'korsleting', 'overload', 'grounding', 'exposed', 'unlocked', 'no ppe',
+        'jatuh', 'slip', 'trip', 'kebakaran', 'fire', 'fall', 'unauthorized',
         'tidak kompeten', 'untrained', 'prosedur dilanggar', 'bypass'
     ]
     found = [term for term in unsafe_terms if term in all_text]
     return list(set(found))
+def generate_insight_and_recommendation(df: pd.DataFrame):
     if df.empty:
+        return "Insufficient data for insight generation.", "Ensure dataset is populated and filtered appropriately."
+    insights_parts = []
+    recommendations_parts = []
+    # --- Insight 1: Top Active Locations (Interpreted) ---
     if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
         top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
         for loc in top_locs:
             interp = interpret_location_safely(df, loc)
+            insights_parts.append(f"Location {loc}: {interp['interpretation']}")
+    # --- Insight 2: Organizational Safety Maturity ---
     if 'temuan_kategori' in df.columns:
         total = len(df)
         n_positive = (df['temuan_kategori'] == 'Positive').sum()
         positive_rate = n_positive / total if total > 0 else 0
+        insights_parts.append(
+            f"Organization-wide, {positive_rate:.1%} of findings are Positive (proactive), "
+            f"while {100 - positive_rate * 100:.1f}% are reactive responses to existing hazards."
         )
+    # --- Insight 3: Emerging Unsafe Conditions ---
     unsafe_terms = detect_unsafe_terms(df)
     if unsafe_terms:
         top_terms = ', '.join(sorted(unsafe_terms)[:5])
+        insights_parts.append(f"Text analysis identifies recurring unsafe conditions related to: {top_terms}.")
+    # --- Insight 4: Low-Activity Locations ---
     if 'nama_lokasi_full' in df.columns:
         loc_counts = df['nama_lokasi_full'].value_counts()
         low_activity_locs = loc_counts[loc_counts <= 2].index.tolist()
         for loc in low_activity_locs[:3]:
             interp = interpret_location_safely(df, loc)
             if 0 < interp['positive_rate'] < 0.5:
+                insights_parts.append(
                     f"Location {loc} reports low volume ({loc_counts[loc]} findings) with a positive rate of "
                     f"{interp['positive_rate']:.0%}, suggesting possible under-reporting or unobserved hazards."
                 )
+    # --- Build Recommendation + Risk Mitigation Strategy ---
+    rec_parts = []
+    mitigation_parts = []
+    # Recommendation: Culture & Capability
+    rec_parts.append(
+        "Strengthen agentic safety behaviors by launching an Agentic Safety Program, including incentives for near-miss reporting, "
+        "training of Safety Coaches per division, and adoption of the percentage of Positive findings as a leading performance indicator."
+    )
+    mitigation_parts.append(
+        "Shift from compliance-driven audits to capability-building engagements. Measure success by reduction in repeat unsafe findings and increase in proactive interventions."
+    )
+    # Recommendation: Data-Driven Intervention
+    rec_parts.append(
+        "Conduct targeted Risk Blitz campaigns for high-frequency unsafe conditions identified through text analysis, "
+        "supported by updated checklists and photo-based verification of corrective actions."
+    )
+    mitigation_parts.append(
+        "Integrate text analytics into monthly safety reviews to detect emerging risks earlier. Automate alerts when unsafe keywords exceed baseline thresholds."
+    )
+    # Recommendation: Coverage & Equity
+    rec_parts.append(
+        "Improve inspection coverage equity through mandatory auditor rotation, geotagged field validation, and deployment of micro-checklists for frontline personnel."
+    )
+    mitigation_parts.append(
+        "Monitor the Gini coefficient of reporter distribution across locations monthly. Set an organizational target of below 0.5 to ensure balanced surveillance."
+    )
+    # Recommendation: Psychological Safety
+    rec_parts.append(
+        "Assess and improve psychological safety in high-risk locations using anonymous surveys and leadership listening sessions, "
+        "particularly where reporting relies on very few individuals."
+    )
+    mitigation_parts.append(
+        "Decouple reporting volume from individual performance evaluation. Reward quality, learning, and prevention impact instead."
+    )
+    # Combine all
+    insight_text = " ".join(insights_parts) if insights_parts else "No significant patterns detected in current data."
+    recommendation_text = " ".join(rec_parts)
+    mitigation_text = " ".join(mitigation_parts)
+    return insight_text, recommendation_text, mitigation_text
 # ============================================================== #
+# Eksekusi & Tampilan — SATU CARD PER BAGIAN
 # ============================================================== #
 try:
+    insight, recommendation, risk_mitigation = generate_insight_and_recommendation(df_filtered)
 except Exception as e:
+    insight = "Error during insight generation."
+    recommendation = f"Review data pipeline: {str(e)}"
+    risk_mitigation = "Ensure required columns are present and datetime formats are consistent."
+# Card Insight
+st.markdown(
+    f"""
+    <div class="card" style="
+        background-color: #f8f9fa;
+        border-left: 4px solid #2196f3;
+        padding: 16px;
+        margin-bottom: 20px;
+        border-radius: 4px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.05);
+    ">
+        <h4 style="margin-top: 0; color: #1976d2;">Insight Summary</h4>
+        <p style="margin-bottom: 0;">{insight}</p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)
+# Card Recommendation + Risk Mitigation
+st.markdown(
+    f"""
+    <div class="card" style="
+        background-color: #f0f7ff;
+        border-left: 4px solid #4caf50;
+        padding: 16px;
+        margin-bottom: 20px;
+        border-radius: 4px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.05);
+    ">
+        <h4 style="margin-top: 0; color: #2e7d32;">Recommended Actions and Risk Mitigation Strategy</h4>
+        <p><strong>Recommended Actions:</strong> {recommendation}</p>
+        <p><strong>Risk Mitigation Strategy:</strong> {risk_mitigation}</p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)