Spaces:

bukittechnology
/

pln

Sleeping

App Files Files Community

SHELLAPANDIANGANHUNGING commited on Dec 10, 2025

Commit

cf1132c

verified ·

1 Parent(s): 09e2f01

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -97

app.py CHANGED Viewed

@@ -1986,7 +1986,7 @@ st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation<
 # ============================================================== #
-# 1. Helper: Hitung Average Monthly Ratio per Lokasi (untuk chart & insight)
 # ============================================================== #
 def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
     required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
@@ -2026,16 +2026,15 @@ def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
     return loc_summary
 # ============================================================== #
-# 2. Helper: Interpretasi Aktivitas (Just Culture Perspective)
 # ============================================================== #
 def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
     loc_df = df[df['nama_lokasi_full'] == location_name].copy()
     if loc_df.empty:
         return {
-            "interpretation": "No findings reported — verify coverage.",
             "risk_signal": "Slight Risk",
-            "positive_rate": 0.0,
-            "reporter_diversity": 0
         }
     total = len(loc_df)
@@ -2045,51 +2044,54 @@ def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
     unique_reporters = loc_df['creator_nid'].nunique()
     months_active = loc_df['created_at'].dt.to_period('M').nunique() if 'created_at' in loc_df.columns else 1
-    reporter_diversity = min(unique_reporters / max(months_active, 1), 3.0)  # cap at 3
-    # Risk stratifikasi berdasarkan kombinasi
     if total == 0:
-        signal, interp = "Slight Risk", "No findings — confirm if due to safety or silence."
     elif perc_positive >= 0.6:
         signal = "Slight Risk"
         interp = (
-            f"High engagement: {total} findings, {perc_positive:.0%} Positive, by {unique_reporters} reporter(s). "
-            f"A model of proactive safety culture."
         )
     elif perc_positive >= 0.3:
         signal = "Moderate Risk"
         interp = (
-            f"Active monitoring: {n_unsafe} unsafe vs {n_positive} positive. "
-            f"Opportunity to shift balance toward prevention."
         )
-    else:  # <30% positive
         if unique_reporters == 1:
             signal = "High Risk"
             interp = (
-                f"High unsafe volume, low positivity ({perc_positive:.0%}), only 1 reporter. "
-                f"Risk of fatigue, bias, or fear suppressing broader input."
             )
         else:
             signal = "Very High Risk"
             interp = (
-                f"Dominantly unsafe ({n_unsafe}/{total}), reported by {unique_reporters} people — "
-                f"indicates systemic hazards requiring urgent intervention."
             )
     return {
         "interpretation": interp,
         "risk_signal": signal,
-        "positive_rate": perc_positive,
-        "reporter_diversity": reporter_diversity
     }
 # ============================================================== #
-# 3. Helper: Deteksi Emerging Risk dari Text (Wordcloud Unsafe Issue)
 # ============================================================== #
 def detect_unsafe_terms(df: pd.DataFrame):
     text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
-    text_col = next((col for col in text_cols if col in df.columns and df[col].notna().any()), None)
-    if not text_col:
         return []
     all_text = ' '.join(df[text_col].dropna().astype(str).str.lower())
@@ -2103,132 +2105,120 @@ def detect_unsafe_terms(df: pd.DataFrame):
     return list(set(found))
 # ============================================================== #
-# 4. Main Insight Generator
 # ============================================================== #
 def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
     insights = []
     if df.empty:
         return insights
-    # --- Insight 1: Top 3 Locations by Activity — Interpreted Safely ---
     if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
         top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
         for loc in top_locs:
             interp = interpret_location_safely(df, loc)
-            insight = f"📍 <strong>{loc}</strong>: {interp['interpretation']}"
-            signal = interp['risk_signal']
-            # Rekomendasi berbasis signal & maturity
             if signal == "Slight Risk":
-                rec = (
-                    f"🏆 Highlight as safety exemplar. Share their positive findings in PLN safety newsletters. "
-                    f"Encourage cross-location learning visits."
                 )
             elif signal == "Moderate Risk":
-                rec = (
-                    f"🔄 Run a 'Positive Intervention Workshop': train teams to spot & report good practices. "
-                    f"Aim to increase positive rate from {interp['positive_rate']:.0%} to ≥60% in 3 months."
                 )
             elif signal == "High Risk":
-                rec = (
-                    f"👥 Rotate 2 additional auditors into this location for 1 month. "
-                    f"Conduct anonymous psychological safety survey — fear suppresses reporting diversity."
                 )
             elif signal == "Very High Risk":
-                rec = (
-                    f"🚨 Escalate to Area Manager. Implement: (1) Daily safety huddles, (2) Supervisor walkarounds, "
-                    f"(3) Weekly unsafe finding closure tracking."
                 )
             else:
-                rec = "🔍 Validate inspection coverage — ensure physical presence matches digital records."
-            insights.append({"insight": insight, "recommendation": rec})
-    # --- Insight 2: Coverage Equity & Ratio Analysis ---
-    try:
-        ratio_df = compute_avg_monthly_ratio_per_location(df)
-        if not ratio_df.empty:
-            # Lokasi dengan rasio tinggi TAPI positive rate rendah → waspada
-            high_ratio_locs = ratio_df.nlargest(3, 'avg_monthly_ratio')['nama_lokasi_full'].tolist()
-            for loc in high_ratio_locs:
-                interp = interpret_location_safely(df, loc)
-                if interp['risk_signal'] in ["High Risk", "Very High Risk"]:
-                    insight = (
-                        f"📊 Location '{loc}' has high finding-to-reporter ratio ({ratio_df.loc[ratio_df['nama_lokasi_full']==loc, 'avg_monthly_ratio'].iloc[0]:.2f}) "
-                        f"but low positive culture ({interp['positive_rate']:.0%}) — efficiency ≠ safety."
-                    )
-                    rec = (
-                        f"Prioritize coaching over counting. Shift KPI from 'findings submitted' to "
-                        f"'% positive findings' and 'unsafe items closed within 72h'."
-                    )
-                    insights.append({"insight": insight, "recommendation": rec})
-    except Exception as e:
-        pass  # silent fail if ratio can't be computed
-    # --- Insight 3: Agentic Safety Maturity ---
     if 'temuan_kategori' in df.columns:
         total = len(df)
         n_positive = (df['temuan_kategori'] == 'Positive').sum()
         positive_rate = n_positive / total if total > 0 else 0
         insight = (
-            f"Across all locations, only {positive_rate:.1%} of findings are *Positive* (proactive), "
-            f"while {100 - positive_rate*100:.1f}% are *reactive* (hazards already present)."
         )
         if positive_rate < 0.4:
-            rec = (
-                f"Launch <strong>Agentic Safety Program</strong>:<br>"
-                f"• Reward near-miss reports & safety suggestions (not just violations)<br>"
-                f"• Train 'Safety Coaches' in each division<br>"
-                f"• Track <em>% Positive Findings</em> as leading KPI (target: ≥50% in 6 months)"
             )
         else:
-            rec = (
-                f"Maintain momentum — scale successful practices. Recognize top 3 'Safety Coaches' quarterly."
             )
-        insights.append({"insight": insight, "recommendation": rec})
-    # --- Insight 4: Emerging Unsafe Issues (Wordcloud Alert) ---
     unsafe_terms = detect_unsafe_terms(df)
     if unsafe_terms:
-        top_terms = ', '.join(unsafe_terms[:5])
-        insight = f"⚠️ <strong>Unsafe Issue</strong> cloud shows recurring hazards: *{top_terms}*."
-        rec = (
-            f"• Run 14-day <strong>Risk Blitz</strong> on these items<br>"
-            f"• Update checklists to include these as 'Critical Control Points'<br>"
-            f"• Require photo evidence for closure"
         )
-        insights.append({"insight": insight, "recommendation": rec})
-    # --- Insight 5: Silent Zones (Low Activity + Low Positivity) ---
-    if {'nama_lokasi_full', 'temuan_kategori'}.issubset(df.columns):
-        loc_activity = df.groupby('nama_lokasi_full').size()
-        low_activity_locs = loc_activity[loc_activity <= 2].index.tolist()  # ≤2 findings
         for loc in low_activity_locs[:3]:
             interp = interpret_location_safely(df, loc)
-            if interp['positive_rate'] < 0.5 and loc_activity[loc] > 0:
-                insight = f"🔇 Location '{loc}' has low reporting ({loc_activity[loc]} findings) and low positivity ({interp['positive_rate']:.0%}) — possible silent failure."
-                rec = "Conduct unannounced 'silent audit' by external team to validate true safety status."
-                insights.append({"insight": insight, "recommendation": rec})
     return insights
 # ============================================================== #
-# 5. Tampilkan Hasil
 # ============================================================== #
 try:
     risk_insights = compute_risk_mitigation_insights(df_filtered)
 except Exception as e:
-    st.error(f"Error in insight generation: {str(e)}")
     risk_insights = []
 if risk_insights:
-    for i, ir in enumerate(risk_insights, 1):
-        st.markdown(f"<div class='ai-insight'><strong>Insight {i}:</strong> {ir['insight']}</div>", unsafe_allow_html=True)
-        st.markdown(f"<div class='ai-recommendation'><strong>Action {i}:</strong> {ir['recommendation']}</div>", unsafe_allow_html=True)
 else:
     st.markdown(
-        "<div class='ai-insight'>No insights generated. Ensure your data includes: "
-        "<code>nama_lokasi_full</code>, <code>temuan_kategori</code>, <code>creator_nid</code>, <code>created_at</code>.</div>",
         unsafe_allow_html=True
     )

 # ============================================================== #
+# Helper 1: Hitung Average Monthly Finding-to-Reporter Ratio per Lokasi
 # ============================================================== #
 def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
     required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
     return loc_summary
 # ============================================================== #
+# Helper 2: Interpretasi Aktivitas Pelaporan secara Adil
 # ============================================================== #
 def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
     loc_df = df[df['nama_lokasi_full'] == location_name].copy()
     if loc_df.empty:
         return {
+            "interpretation": "No findings reported. Validation of coverage or actual safety status is required.",
             "risk_signal": "Slight Risk",
+            "positive_rate": 0.0
         }
     total = len(loc_df)
     unique_reporters = loc_df['creator_nid'].nunique()
     months_active = loc_df['created_at'].dt.to_period('M').nunique() if 'created_at' in loc_df.columns else 1
     if total == 0:
+        signal = "Slight Risk"
+        interp = "No findings reported. Validation of coverage or actual safety status is required."
     elif perc_positive >= 0.6:
         signal = "Slight Risk"
         interp = (
+            f"High reporting engagement with {total} findings and {perc_positive:.0%} positive category, "
+            f"contributed by {unique_reporters} unique reporter(s) over {months_active} month(s). "
+            f"This indicates a proactive safety culture."
         )
     elif perc_positive >= 0.3:
         signal = "Moderate Risk"
         interp = (
+            f"Balanced reporting with {n_unsafe} unsafe findings versus {n_positive} positive. "
+            f"Active monitoring is present, with opportunity to increase preventive behaviors."
         )
+    else:
         if unique_reporters == 1:
             signal = "High Risk"
             interp = (
+                f"High volume of unsafe findings with low positivity ({perc_positive:.0%}) "
+                f"and reliance on only one reporter. This may indicate observer fatigue, bias, "
+                f"or psychological barriers to broader reporting."
             )
         else:
             signal = "Very High Risk"
             interp = (
+                f"Predominantly unsafe findings ({n_unsafe} out of {total}) reported by multiple individuals, "
+                f"suggesting genuine and systemic safety hazards requiring urgent management attention."
             )
     return {
         "interpretation": interp,
         "risk_signal": signal,
+        "positive_rate": perc_positive
     }
 # ============================================================== #
+# Helper 3: Deteksi Isu Tidak Aman dari Teks
 # ============================================================== #
 def detect_unsafe_terms(df: pd.DataFrame):
     text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
+    text_col = None
+    for col in text_cols:
+        if col in df.columns and df[col].notna().any():
+            text_col = col
+            break
+    if text_col is None:
         return []
     all_text = ' '.join(df[text_col].dropna().astype(str).str.lower())
     return list(set(found))
 # ============================================================== #
+# Main: Generate Risk Mitigation Insights
 # ============================================================== #
 def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
     insights = []
     if df.empty:
         return insights
+    # Insight 1: Top 3 Locations by Volume — Interpreted with Safety Maturity
     if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
         top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
         for loc in top_locs:
             interp = interpret_location_safely(df, loc)
+            insight = f"Location {loc}: {interp['interpretation']}"
+            signal = interp['risk_signal']
             if signal == "Slight Risk":
+                recommendation = (
+                    "Recognize this location as a safety exemplar. Share their positive findings in internal safety communications. "
+                    "Facilitate cross-location learning sessions to replicate practices."
                 )
             elif signal == "Moderate Risk":
+                recommendation = (
+                    "Conduct a workshop on positive intervention techniques. Train teams to identify and report good practices. "
+                    "Set a target to increase the positive finding rate to above 60 percent within three months."
                 )
             elif signal == "High Risk":
+                recommendation = (
+                    "Assign two additional auditors to rotate into this location for one month. "
+                    "Administer an anonymous psychological safety survey to assess reporting barriers."
                 )
             elif signal == "Very High Risk":
+                recommendation = (
+                    "Escalate to area management. Implement daily safety huddles, scheduled supervisor walkarounds, "
+                    "and weekly tracking of unsafe finding closure rates."
                 )
             else:
+                recommendation = (
+                    "Validate physical inspection coverage. Ensure field presence aligns with digital reporting records."
+                )
+            insights.append({"insight": insight, "recommendation": recommendation})
+    # Insight 2: Organizational Agentic Safety Maturity
     if 'temuan_kategori' in df.columns:
         total = len(df)
         n_positive = (df['temuan_kategori'] == 'Positive').sum()
         positive_rate = n_positive / total if total > 0 else 0
         insight = (
+            f"Organization-wide, {positive_rate:.1%} of findings are categorized as Positive, "
+            f"indicating proactive safety behaviors. The remaining {100 - positive_rate * 100:.1f} percent are reactive, "
+            f"responding to existing hazards."
         )
         if positive_rate < 0.4:
+            recommendation = (
+                "Launch an Agentic Safety Program: incentivize near-miss reporting and safety suggestions, "
+                "train designated Safety Coaches per division, and adopt percentage of Positive findings as a leading KPI, "
+                "with a six-month target of 50 percent."
             )
         else:
+            recommendation = (
+                "Sustain current momentum. Formalize recognition for divisions with consistently high positive reporting rates."
             )
+        insights.append({"insight": insight, "recommendation": recommendation})
+    # Insight 3: Emerging Unsafe Issues from Text Analysis
     unsafe_terms = detect_unsafe_terms(df)
     if unsafe_terms:
+        top_terms = ', '.join(sorted(unsafe_terms)[:5])
+        insight = f"Text analysis of findings reveals recurring unsafe conditions related to: {top_terms}."
+        recommendation = (
+            "Initiate a targeted two-week Risk Blitz focusing on these conditions. "
+            "Update inspection checklists to include these items as critical control points. "
+            "Require photo documentation for verification of corrective actions."
         )
+        insights.append({"insight": insight, "recommendation": recommendation})
+    # Insight 4: Low-Activity Locations with Potential Silent Risks
+    if 'nama_lokasi_full' in df.columns:
+        loc_counts = df['nama_lokasi_full'].value_counts()
+        low_activity_locs = loc_counts[loc_counts <= 2].index.tolist()
         for loc in low_activity_locs[:3]:
             interp = interpret_location_safely(df, loc)
+            if 0 < interp['positive_rate'] < 0.5:
+                insight = (
+                    f"Location {loc} reports low volume ({loc_counts[loc]} findings) with a positive rate of "
+                    f"{interp['positive_rate']:.0%}, suggesting possible under-reporting or unobserved hazards."
+                )
+                recommendation = (
+                    "Conduct an unannounced observational audit by an independent team to assess true field conditions."
+                )
+                insights.append({"insight": insight, "recommendation": recommendation})
     return insights
 # ============================================================== #
+# Execute and Display
 # ============================================================== #
 try:
     risk_insights = compute_risk_mitigation_insights(df_filtered)
 except Exception as e:
+    st.error(f"Error during insight generation: {str(e)}")
     risk_insights = []
 if risk_insights:
+    for i, item in enumerate(risk_insights, 1):
+        st.markdown(f"<div class='ai-insight'><strong>Insight {i}:</strong> {item['insight']}</div>", unsafe_allow_html=True)
+        st.markdown(f"<div class='ai-recommendation'><strong>Recommendation {i}:</strong> {item['recommendation']}</div>", unsafe_allow_html=True)
 else:
     st.markdown(
+        "<div class='ai-insight'>No risk mitigation insights were generated. "
+        "Please ensure the dataset contains the following columns: "
+        "nama_lokasi_full, temuan_kategori, creator_nid, and created_at.</div>",
         unsafe_allow_html=True
     )