Spaces:

bukittechnology
/

pln

Sleeping

App Files Files Community

SHELLAPANDIANGANHUNGING commited on Dec 10, 2025

Commit

8b5e6d5

verified ·

1 Parent(s): a95c654

Update app.py

Browse files

Files changed (1) hide show

app.py +186 -208

app.py CHANGED Viewed

@@ -1984,221 +1984,200 @@ else:
 st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
-# ============================================================== #
-# Fungsi Insight & Rekomendasi (sama seperti sebelumnya, tanpa perubahan logika)
-# ============================================================== #
-def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
-    required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
-    missing = [col for col in required if col not in df.columns]
-    if missing:
-        raise ValueError(f"Missing columns for ratio: {missing}")
-    df_calc = df[required].copy()
-    df_calc['created_at'] = pd.to_datetime(df_calc['created_at'], errors='coerce')
-    df_calc = df_calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid'])
-    if df_calc.empty:
-        return pd.DataFrame(columns=[
-            'nama_lokasi_full', 'avg_monthly_ratio', 'total_months_active',
-            'total_findings', 'avg_unique_reporters_per_month'
-        ])
-    df_calc['bulan'] = df_calc['created_at'].dt.to_period('M')
-    monthly_agg = df_calc.groupby(['nama_lokasi_full', 'bulan']).agg(
-        findings_count=('kode_temuan', 'size'),
-        unique_reporters=('creator_nid', 'nunique')
-    ).reset_index()
-    monthly_agg = monthly_agg[monthly_agg['unique_reporters'] > 0]
-    monthly_agg['monthly_ratio'] = monthly_agg['findings_count'] / monthly_agg['unique_reporters']
-    loc_summary = monthly_agg.groupby('nama_lokasi_full').agg(
-        avg_monthly_ratio=('monthly_ratio', 'mean'),
-        total_months_active=('bulan', 'nunique'),
-        total_findings=('findings_count', 'sum'),
-        avg_unique_reporters_per_month=('unique_reporters', 'mean')
-    ).reset_index()
-    loc_summary['avg_monthly_ratio'] = loc_summary['avg_monthly_ratio'].round(2)
-    loc_summary['avg_unique_reporters_per_month'] = loc_summary['avg_unique_reporters_per_month'].round(1)
-    return loc_summary
-def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
-    loc_df = df[df['nama_lokasi_full'] == location_name].copy()
-    if loc_df.empty:
-        return {
-            "interpretation": "No findings reported. Validation of coverage or actual safety status is required.",
-            "risk_signal": "Slight Risk",
-            "positive_rate": 0.0
-        }
-    total = len(loc_df)
-    n_positive = (loc_df['temuan_kategori'] == 'Positive').sum()
-    n_unsafe = total - n_positive
-    perc_positive = n_positive / total if total > 0 else 0
-    unique_reporters = loc_df['creator_nid'].nunique()
-    months_active = loc_df['created_at'].dt.to_period('M').nunique() if 'created_at' in loc_df.columns else 1
-    if total == 0:
-        signal = "Slight Risk"
-        interp = "No findings reported. Validation of coverage or actual safety status is required."
-    elif perc_positive >= 0.6:
-        signal = "Slight Risk"
-        interp = (
-            f"High reporting engagement with {total} findings and {perc_positive:.0%} positive category, "
-            f"contributed by {unique_reporters} unique reporter(s) over {months_active} month(s). "
-            f"This indicates a proactive safety culture."
-        )
-    elif perc_positive >= 0.3:
-        signal = "Moderate Risk"
-        interp = (
-            f"Balanced reporting with {n_unsafe} unsafe findings versus {n_positive} positive. "
-            f"Active monitoring is present, with opportunity to increase preventive behaviors."
-        )
-    else:
-        if unique_reporters == 1:
-            signal = "High Risk"
-            interp = (
-                f"High volume of unsafe findings with low positivity ({perc_positive:.0%}) "
-                f"and reliance on only one reporter. This may indicate observer fatigue, bias, "
-                f"or psychological barriers to broader reporting."
-            )
-        else:
-            signal = "Very High Risk"
-            interp = (
-                f"Predominantly unsafe findings ({n_unsafe} out of {total}) reported by multiple individuals, "
-                f"suggesting genuine and systemic safety hazards requiring urgent management attention."
-            )
-    return {
-        "interpretation": interp,
-        "risk_signal": signal,
-        "positive_rate": perc_positive
     }
-def detect_unsafe_terms(df: pd.DataFrame):
-    text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
-    text_col = None
-    for col in text_cols:
-        if col in df.columns and df[col].notna().any():
-            text_col = col
-            break
-    if text_col is None:
-        return []
-    all_text = ' '.join(df[text_col].dropna().astype(str).str.lower())
-    unsafe_terms = [
-        'terbuka', 'tidak terkunci', 'tanpa izin', 'tanpa pelindung', 'tanpa alat',
-        'korsleting', 'overload', 'grounding', 'exposed', 'unlocked', 'no ppe',
-        'jatuh', 'slip', 'trip', 'kebakaran', 'fire', 'fall', 'unauthorized',
-        'tidak kompeten', 'untrained', 'prosedur dilanggar', 'bypass'
-    ]
-    found = [term for term in unsafe_terms if term in all_text]
-    return list(set(found))
-def generate_insight_and_recommendation(df: pd.DataFrame):
-    if df.empty:
-        return "Insufficient data for insight generation.", "Ensure dataset is populated and filtered appropriately."
-    insights_parts = []
-    recommendations_parts = []
-    # --- Insight 1: Top Active Locations (Interpreted) ---
-    if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
-        top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
-        for loc in top_locs:
-            interp = interpret_location_safely(df, loc)
-            insights_parts.append(f"Location {loc}: {interp['interpretation']}")
-    # --- Insight 2: Organizational Safety Maturity ---
-    if 'temuan_kategori' in df.columns:
-        total = len(df)
-        n_positive = (df['temuan_kategori'] == 'Positive').sum()
-        positive_rate = n_positive / total if total > 0 else 0
-        insights_parts.append(
-            f"Organization-wide, {positive_rate:.1%} of findings are Positive (proactive), "
-            f"while {100 - positive_rate * 100:.1f}% are reactive responses to existing hazards."
         )
-    # --- Insight 3: Emerging Unsafe Conditions ---
-    unsafe_terms = detect_unsafe_terms(df)
-    if unsafe_terms:
-        top_terms = ', '.join(sorted(unsafe_terms)[:5])
-        insights_parts.append(f"Text analysis identifies recurring unsafe conditions related to: {top_terms}.")
-    # --- Insight 4: Low-Activity Locations ---
-    if 'nama_lokasi_full' in df.columns:
-        loc_counts = df['nama_lokasi_full'].value_counts()
-        low_activity_locs = loc_counts[loc_counts <= 2].index.tolist()
-        for loc in low_activity_locs[:3]:
-            interp = interpret_location_safely(df, loc)
-            if 0 < interp['positive_rate'] < 0.5:
-                insights_parts.append(
-                    f"Location {loc} reports low volume ({loc_counts[loc]} findings) with a positive rate of "
-                    f"{interp['positive_rate']:.0%}, suggesting possible under-reporting or unobserved hazards."
-                )
-    # --- Build Recommendation + Risk Mitigation Strategy ---
-    rec_parts = []
-    mitigation_parts = []
-    # Recommendation: Culture & Capability
-    rec_parts.append(
-        "Strengthen agentic safety behaviors by launching an Agentic Safety Program, including incentives for near-miss reporting, "
-        "training of Safety Coaches per division, and adoption of the percentage of Positive findings as a leading performance indicator."
-    )
-    mitigation_parts.append(
-        "Shift from compliance-driven audits to capability-building engagements. Measure success by reduction in repeat unsafe findings and increase in proactive interventions."
-    )
-    # Recommendation: Data-Driven Intervention
-    rec_parts.append(
-        "Conduct targeted Risk Blitz campaigns for high-frequency unsafe conditions identified through text analysis, "
-        "supported by updated checklists and photo-based verification of corrective actions."
-    )
-    mitigation_parts.append(
-        "Integrate text analytics into monthly safety reviews to detect emerging risks earlier. Automate alerts when unsafe keywords exceed baseline thresholds."
-    )
-    # Recommendation: Coverage & Equity
-    rec_parts.append(
-        "Improve inspection coverage equity through mandatory auditor rotation, geotagged field validation, and deployment of micro-checklists for frontline personnel."
-    )
-    mitigation_parts.append(
-        "Monitor the Gini coefficient of reporter distribution across locations monthly. Set an organizational target of below 0.5 to ensure balanced surveillance."
     )
-    # Recommendation: Psychological Safety
-    rec_parts.append(
-        "Assess and improve psychological safety in high-risk locations using anonymous surveys and leadership listening sessions, "
-        "particularly where reporting relies on very few individuals."
-    )
-    mitigation_parts.append(
-        "Decouple reporting volume from individual performance evaluation. Reward quality, learning, and prevention impact instead."
     )
-    # Combine all
-    insight_text = " ".join(insights_parts) if insights_parts else "No significant patterns detected in current data."
-    recommendation_text = " ".join(rec_parts)
-    mitigation_text = " ".join(mitigation_parts)
-    return insight_text, recommendation_text, mitigation_text
-# ============================================================== #
-# Eksekusi & Tampilan — SATU CARD PER BAGIAN
-# ============================================================== #
-try:
-    insight, recommendation, risk_mitigation = generate_insight_and_recommendation(df_filtered)
-except Exception as e:
-    insight = "Error during insight generation."
-    recommendation = f"Review data pipeline: {str(e)}"
-    risk_mitigation = "Ensure required columns are present and datetime formats are consistent."
-# Card Insight
 st.markdown(
     f"""
     <div class="card" style="
@@ -2210,13 +2189,12 @@ st.markdown(
         box-shadow: 0 2px 4px rgba(0,0,0,0.05);
     ">
         <h4 style="margin-top: 0; color: #1976d2;">Insight Summary</h4>
-        <p style="margin-bottom: 0;">{insight}</p>
     </div>
     """,
     unsafe_allow_html=True
 )
-# Card Recommendation + Risk Mitigation
 st.markdown(
     f"""
     <div class="card" style="
@@ -2228,8 +2206,8 @@ st.markdown(
         box-shadow: 0 2px 4px rgba(0,0,0,0.05);
     ">
         <h4 style="margin-top: 0; color: #2e7d32;">Recommended Actions and Risk Mitigation Strategy</h4>
-        <p><strong>Recommended Actions:</strong> {recommendation}</p>
-        <p><strong>Risk Mitigation Strategy:</strong> {risk_mitigation}</p>
     </div>
     """,
     unsafe_allow_html=True

 st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
+def extract_critical_deviations(df: pd.DataFrame):
+    dev = {
+        "obj2_locations_ratio_1": [],
+        "obj3a_lowest_div_ratio": None,
+        "obj3b_lowest_reporter": None,
+        "obj3c_slowest_div_leadtime": None,
+        "obj3d_slowest_executor": None,
+        "obj4_unsafe_share": {},
+        "obj5_quadrant_I": [],
+        "obj5_quadrant_II": [],
+        "obj6_top2_bubbles": []
     }
+    # === OBJ 2: 9 lokasi dengan finding ratio ≈ 1.0 (rentang 0.95–1.05) ===
+    if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
+        df_calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
+        df_calc['created_at'] = pd.to_datetime(df_calc['created_at'], errors='coerce')
+        df_calc = df_calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid'])
+        df_calc['bulan'] = df_calc['created_at'].dt.to_period('M')
+        monthly_agg = df_calc.groupby(['nama_lokasi_full', 'bulan']).agg(
+            findings=('kode_temuan', 'size'),
+            reporters=('creator_nid', 'nunique')
+        ).reset_index()
+        monthly_agg = monthly_agg[monthly_agg['reporters'] > 0]
+        monthly_agg['ratio'] = monthly_agg['findings'] / monthly_agg['reporters']
+        loc_avg = monthly_agg.groupby('nama_lokasi_full')['ratio'].mean().reset_index()
+        # Ambil yang 0.95 ≤ ratio ≤ 1.05
+        near_1 = loc_avg[(loc_avg['ratio'] >= 0.95) & (loc_avg['ratio'] <= 1.05)]
+        dev["obj2_locations_ratio_1"] = near_1.nlargest(9, 'ratio')['nama_lokasi_full'].tolist()
+    # === OBJ 3a: Divisi dengan rasio temuan/orang terendah ===
+    if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
+        df_ratio = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
+        df_ratio['bulan'] = pd.to_datetime(df_ratio['created_at']).dt.to_period('M')
+        agg = df_ratio.groupby(['nama', 'bulan']).agg(
+            findings=('kode_temuan', 'size'),
+            reporters=('creator_nid', 'nunique')
         )
+        agg = agg[agg['reporters'] > 0].reset_index()
+        agg['ratio'] = agg['findings'] / agg['reporters']
+        div_ratio = agg.groupby('nama')['ratio'].mean()
+        if not div_ratio.empty:
+            lowest = div_ratio.idxmin()
+            dev["obj3a_lowest_div_ratio"] = (lowest, round(div_ratio.min(), 2))
+    # === OBJ 3b: Reporter dengan frekuensi terendah (>0) ===
+    if {'creator_name', 'created_at'}.issubset(df.columns):
+        df_rep = df[['creator_name', 'created_at']].copy()
+        df_rep['bulan'] = pd.to_datetime(df_rep['created_at']).dt.to_period('M')
+        rep_monthly = df_rep.groupby(['creator_name', 'bulan']).size().reset_index(name='count')
+        rep_avg = rep_monthly.groupby('creator_name')['count'].mean()
+        if not rep_avg.empty and rep_avg.min() > 0:
+            lowest = rep_avg.idxmin()
+            dev["obj3b_lowest_reporter"] = (lowest, round(rep_avg.min(), 2))
+    # === OBJ 3c & 3d: Lead time terpanjang (divisi & individu) ===
+    if 'days_to_close' in df.columns:
+        valid_df = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
+        # 3c: divisi
+        if 'nama' in valid_df.columns:
+            div_lead = valid_df.groupby('nama')['days_to_close'].mean()
+            if not div_lead.empty:
+                slowest = div_lead.idxmax()
+                dev["obj3c_slowest_div_leadtime"] = (slowest, round(div_lead.max(), 1))
+        # 3d: executor (prioritas: nama_pic → creator_name)
+        executor_col = 'nama_pic' if 'nama_pic' in valid_df.columns else 'creator_name'
+        if executor_col in valid_df.columns:
+            exec_lead = valid_df.groupby(executor_col)['days_to_close'].mean()
+            if not exec_lead.empty:
+                slowest = exec_lead.idxmax()
+                dev["obj3d_slowest_executor"] = (slowest, round(exec_lead.max(), 1))
+    # === OBJ 4: Pie chart — unsafe share ===
+    if 'temuan_kategori' in df.columns:
+        cat_counts = df['temuan_kategori'].value_counts(normalize=True) * 100
+        unsafe_cats = ['Unsafe Condition', 'Unsafe Action', 'Near Miss']
+        for cat in unsafe_cats:
+            if cat in cat_counts.index:
+                dev["obj4_unsafe_share"][cat] = round(cat_counts[cat], 1)
+    # === OBJ 5: Risk Matrix kuadran ===
+    # Gunakan logika yang sama seperti Objective 5 (X_LIMIT=20, Y_LIMIT=3)
+    X_LIMIT, Y_LIMIT = 20, 3
+    if 'nama' in df.columns and 'days_to_close' in df.columns:
+        df_risk = df.copy()
+        df_risk['created_at'] = pd.to_datetime(df_risk['created_at'], errors='coerce')
+        df_risk = df_risk.assign(month=df_risk['created_at'].dt.to_period('M').astype(str))
+        # Avg bulanan per divisi
+        monthly_counts = df_risk.groupby(['nama', 'month'])['kode_temuan'].nunique().reset_index()
+        avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
+        leadtime = df_risk.groupby('nama')['days_to_close'].mean().reset_index(name='Average Lead Time')
+        risk_mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
+        risk_mat['Average Lead Time'] = risk_mat['Average Lead Time'].clip(lower=0)
+        for _, row in risk_mat.iterrows():
+            div = row['nama']
+            cnt = row['Finding Count']
+            lt = row['Average Lead Time']
+            if cnt >= X_LIMIT and lt >= Y_LIMIT:
+                dev["obj5_quadrant_I"].append(div)
+            elif cnt < X_LIMIT and lt >= Y_LIMIT:
+                dev["obj5_quadrant_II"].append(div)
+    # === OBJ 6: Whiteboard — 2 bubble terbesar (Avg/Month tertinggi) ===
+    if 'kategori' in df.columns and 'temuan_kategori' in df.columns:
+        df_nonpos = df[df['temuan_kategori'] != 'Positive']
+        if not df_nonpos.empty:
+            start_month = df['created_at'].min().to_period('M')
+            end_month = df['created_at'].max().to_period('M')
+            n_months = len(pd.period_range(start=start_month, end=end_month, freq='M'))
+            cat_avg = (
+                df_nonpos.groupby('kategori').size() / n_months
+            ).sort_values(ascending=False).head(2)
+            dev["obj6_top2_bubbles"] = [(cat, round(val, 2)) for cat, val in cat_avg.items()]
+    return dev
+# Jalankan ekstraksi
+deviations = extract_critical_deviations(df_filtered)
+# Bangun insight berbasis temuan nyata
+insight_parts = []
+rec_parts = []
+# Objective 2
+if deviations["obj2_locations_ratio_1"]:
+    locs = ", ".join(deviations["obj2_locations_ratio_1"][:5])  # Tampilkan 5 saja di teks
+    insight_parts.append(
+        f"Nine locations show near-optimal finding-to-reporter ratio (~1.0), indicating balanced workload: "
+        f"{locs}, and others."
     )
+# Objective 3
+if deviations["obj3a_lowest_div_ratio"]:
+    div, ratio = deviations["obj3a_lowest_div_ratio"]
+    insight_parts.append(f"Division {div} has the lowest reporting ratio ({ratio}), suggesting potential under-utilization or resource gaps.")
+if deviations["obj3b_lowest_reporter"]:
+    name, rate = deviations["obj3b_lowest_reporter"]
+    insight_parts.append(f"Reporter {name} averages only {rate} finding(s) per month — the lowest among active staff.")
+if deviations["obj3c_slowest_div_leadtime"]:
+    div, lt = deviations["obj3c_slowest_div_leadtime"]
+    insight_parts.append(f"Division {div} takes longest to resolve findings (avg {lt} days), risking SLA breach.")
+if deviations["obj3d_slowest_executor"]:
+    name, lt = deviations["obj3d_slowest_executor"]
+    insight_parts.append(f"Executor {name} has the longest lead time ({lt} days), requiring workflow review.")
+# Objective 4
+if deviations["obj4_unsafe_share"]:
+    unsafe_list = [f"{cat} ({pct}%)" for cat, pct in deviations["obj4_unsafe_share"].items()]
+    unsafe_str = "; ".join(unsafe_list)
+    insight_parts.append(f"Unsafe issues dominate: {unsafe_str} of all findings.")
+# Objective 5
+if deviations["obj5_quadrant_I"]:
+    q1 = ", ".join(deviations["obj5_quadrant_I"][:3])
+    insight_parts.append(f"High-risk divisions (high volume + slow resolution): {q1}.")
+if deviations["obj5_quadrant_II"]:
+    q2 = ", ".join(deviations["obj5_quadrant_II"][:3])
+    insight_parts.append(f"Hidden-risk divisions (low volume but very slow): {q2} — may indicate capacity or priority issues.")
+# Objective 6
+if deviations["obj6_top2_bubbles"]:
+    bub1, bub2 = deviations["obj6_top2_bubbles"]
+    insight_parts.append(
+        f"The two most frequently recurring unsafe issues are {bub1[0]} ({bub1[1]}/month) "
+        f"and {bub2[0]} ({bub2[1]}/month), indicating systemic root causes."
     )
+# Combine insight
+insight_text = " ".join(insight_parts) if insight_parts else "No significant deviations detected based on current filters."
+# Rekomendasi & Risk Mitigation
+rec_parts.append(
+    "Prioritize capacity assessment and coaching for divisions and individuals with lowest activity or longest resolution times."
+)
+rec_parts.append(
+    "Initiate root-cause analysis on top two high-frequency unsafe categories to prevent recurrence."
+)
+rec_parts.append(
+    "Review workload distribution for locations with ratio ≈1.0 — they represent a benchmark for sustainable inspection load."
+)
+mitigation_parts = [
+    "Establish SLA thresholds: max 7 days lead time, min 0.5 findings/reporter/month for active status.",
+    "Deploy predictive alerts when a division enters Quadrant I or II in the risk matrix.",
+    "Integrate category-level trend monitoring into monthly safety meetings to catch emerging risks early."
+]
+recommendation_text = " ".join(rec_parts)
+mitigation_text = " ".join(mitigation_parts)
+# Tampilkan — SATU CARD INSIGHT, SATU CARD REKOMENDASI + MITIGASI
 st.markdown(
     f"""
     <div class="card" style="
         box-shadow: 0 2px 4px rgba(0,0,0,0.05);
     ">
         <h4 style="margin-top: 0; color: #1976d2;">Insight Summary</h4>
+        <p style="margin-bottom: 0;">{insight_text}</p>
     </div>
     """,
     unsafe_allow_html=True
 )
 st.markdown(
     f"""
     <div class="card" style="
         box-shadow: 0 2px 4px rgba(0,0,0,0.05);
     ">
         <h4 style="margin-top: 0; color: #2e7d32;">Recommended Actions and Risk Mitigation Strategy</h4>
+        <p><strong>Recommended Actions:</strong> {recommendation_text}</p>
+        <p><strong>Risk Mitigation Strategy:</strong> {mitigation_text}</p>
     </div>
     """,
     unsafe_allow_html=True