SHELLAPANDIANGANHUNGING commited on
Commit
cf1132c
·
verified ·
1 Parent(s): 09e2f01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -97
app.py CHANGED
@@ -1986,7 +1986,7 @@ st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation<
1986
 
1987
 
1988
  # ============================================================== #
1989
- # 1. Helper: Hitung Average Monthly Ratio per Lokasi (untuk chart & insight)
1990
  # ============================================================== #
1991
  def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
1992
  required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
@@ -2026,16 +2026,15 @@ def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
2026
  return loc_summary
2027
 
2028
  # ============================================================== #
2029
- # 2. Helper: Interpretasi Aktivitas (Just Culture Perspective)
2030
  # ============================================================== #
2031
  def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
2032
  loc_df = df[df['nama_lokasi_full'] == location_name].copy()
2033
  if loc_df.empty:
2034
  return {
2035
- "interpretation": "No findings reported verify coverage.",
2036
  "risk_signal": "Slight Risk",
2037
- "positive_rate": 0.0,
2038
- "reporter_diversity": 0
2039
  }
2040
 
2041
  total = len(loc_df)
@@ -2045,51 +2044,54 @@ def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
2045
  unique_reporters = loc_df['creator_nid'].nunique()
2046
  months_active = loc_df['created_at'].dt.to_period('M').nunique() if 'created_at' in loc_df.columns else 1
2047
 
2048
- reporter_diversity = min(unique_reporters / max(months_active, 1), 3.0) # cap at 3
2049
-
2050
- # Risk stratifikasi berdasarkan kombinasi
2051
  if total == 0:
2052
- signal, interp = "Slight Risk", "No findings — confirm if due to safety or silence."
 
2053
  elif perc_positive >= 0.6:
2054
  signal = "Slight Risk"
2055
  interp = (
2056
- f"High engagement: {total} findings, {perc_positive:.0%} Positive, by {unique_reporters} reporter(s). "
2057
- f"A model of proactive safety culture."
 
2058
  )
2059
  elif perc_positive >= 0.3:
2060
  signal = "Moderate Risk"
2061
  interp = (
2062
- f"Active monitoring: {n_unsafe} unsafe vs {n_positive} positive. "
2063
- f"Opportunity to shift balance toward prevention."
2064
  )
2065
- else: # <30% positive
2066
  if unique_reporters == 1:
2067
  signal = "High Risk"
2068
  interp = (
2069
- f"High unsafe volume, low positivity ({perc_positive:.0%}), only 1 reporter. "
2070
- f"Risk of fatigue, bias, or fear suppressing broader input."
 
2071
  )
2072
  else:
2073
  signal = "Very High Risk"
2074
  interp = (
2075
- f"Dominantly unsafe ({n_unsafe}/{total}), reported by {unique_reporters} people "
2076
- f"indicates systemic hazards requiring urgent intervention."
2077
  )
2078
 
2079
  return {
2080
  "interpretation": interp,
2081
  "risk_signal": signal,
2082
- "positive_rate": perc_positive,
2083
- "reporter_diversity": reporter_diversity
2084
  }
2085
 
2086
  # ============================================================== #
2087
- # 3. Helper: Deteksi Emerging Risk dari Text (Wordcloud Unsafe Issue)
2088
  # ============================================================== #
2089
  def detect_unsafe_terms(df: pd.DataFrame):
2090
  text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
2091
- text_col = next((col for col in text_cols if col in df.columns and df[col].notna().any()), None)
2092
- if not text_col:
 
 
 
 
2093
  return []
2094
 
2095
  all_text = ' '.join(df[text_col].dropna().astype(str).str.lower())
@@ -2103,132 +2105,120 @@ def detect_unsafe_terms(df: pd.DataFrame):
2103
  return list(set(found))
2104
 
2105
  # ============================================================== #
2106
- # 4. Main Insight Generator
2107
  # ============================================================== #
2108
  def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
2109
  insights = []
2110
  if df.empty:
2111
  return insights
2112
 
2113
- # --- Insight 1: Top 3 Locations by Activity — Interpreted Safely ---
2114
  if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
2115
  top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
2116
  for loc in top_locs:
2117
  interp = interpret_location_safely(df, loc)
2118
- insight = f"📍 <strong>{loc}</strong>: {interp['interpretation']}"
2119
- signal = interp['risk_signal']
2120
 
2121
- # Rekomendasi berbasis signal & maturity
2122
  if signal == "Slight Risk":
2123
- rec = (
2124
- f"🏆 Highlight as safety exemplar. Share their positive findings in PLN safety newsletters. "
2125
- f"Encourage cross-location learning visits."
2126
  )
2127
  elif signal == "Moderate Risk":
2128
- rec = (
2129
- f"🔄 Run a 'Positive Intervention Workshop': train teams to spot & report good practices. "
2130
- f"Aim to increase positive rate from {interp['positive_rate']:.0%} to ≥60% in 3 months."
2131
  )
2132
  elif signal == "High Risk":
2133
- rec = (
2134
- f"👥 Rotate 2 additional auditors into this location for 1 month. "
2135
- f"Conduct anonymous psychological safety survey fear suppresses reporting diversity."
2136
  )
2137
  elif signal == "Very High Risk":
2138
- rec = (
2139
- f"🚨 Escalate to Area Manager. Implement: (1) Daily safety huddles, (2) Supervisor walkarounds, "
2140
- f"(3) Weekly unsafe finding closure tracking."
2141
  )
2142
  else:
2143
- rec = "🔍 Validate inspection coverage — ensure physical presence matches digital records."
 
 
2144
 
2145
- insights.append({"insight": insight, "recommendation": rec})
2146
 
2147
- # --- Insight 2: Coverage Equity & Ratio Analysis ---
2148
- try:
2149
- ratio_df = compute_avg_monthly_ratio_per_location(df)
2150
- if not ratio_df.empty:
2151
- # Lokasi dengan rasio tinggi TAPI positive rate rendah → waspada
2152
- high_ratio_locs = ratio_df.nlargest(3, 'avg_monthly_ratio')['nama_lokasi_full'].tolist()
2153
- for loc in high_ratio_locs:
2154
- interp = interpret_location_safely(df, loc)
2155
- if interp['risk_signal'] in ["High Risk", "Very High Risk"]:
2156
- insight = (
2157
- f"📊 Location '{loc}' has high finding-to-reporter ratio ({ratio_df.loc[ratio_df['nama_lokasi_full']==loc, 'avg_monthly_ratio'].iloc[0]:.2f}) "
2158
- f"but low positive culture ({interp['positive_rate']:.0%}) — efficiency ≠ safety."
2159
- )
2160
- rec = (
2161
- f"Prioritize coaching over counting. Shift KPI from 'findings submitted' to "
2162
- f"'% positive findings' and 'unsafe items closed within 72h'."
2163
- )
2164
- insights.append({"insight": insight, "recommendation": rec})
2165
- except Exception as e:
2166
- pass # silent fail if ratio can't be computed
2167
-
2168
- # --- Insight 3: Agentic Safety Maturity ---
2169
  if 'temuan_kategori' in df.columns:
2170
  total = len(df)
2171
  n_positive = (df['temuan_kategori'] == 'Positive').sum()
2172
  positive_rate = n_positive / total if total > 0 else 0
2173
 
2174
  insight = (
2175
- f"Across all locations, only {positive_rate:.1%} of findings are *Positive* (proactive), "
2176
- f"while {100 - positive_rate*100:.1f}% are *reactive* (hazards already present)."
 
2177
  )
 
2178
  if positive_rate < 0.4:
2179
- rec = (
2180
- f"Launch <strong>Agentic Safety Program</strong>:<br>"
2181
- f" Reward near-miss reports & safety suggestions (not just violations)<br>"
2182
- f" Train 'Safety Coaches' in each division<br>"
2183
- f"• Track <em>% Positive Findings</em> as leading KPI (target: ≥50% in 6 months)"
2184
  )
2185
  else:
2186
- rec = (
2187
- f"Maintain momentum scale successful practices. Recognize top 3 'Safety Coaches' quarterly."
2188
  )
2189
- insights.append({"insight": insight, "recommendation": rec})
2190
 
2191
- # --- Insight 4: Emerging Unsafe Issues (Wordcloud Alert) ---
 
 
2192
  unsafe_terms = detect_unsafe_terms(df)
2193
  if unsafe_terms:
2194
- top_terms = ', '.join(unsafe_terms[:5])
2195
- insight = f"⚠️ <strong>Unsafe Issue</strong> cloud shows recurring hazards: *{top_terms}*."
2196
- rec = (
2197
- f" Run 14-day <strong>Risk Blitz</strong> on these items<br>"
2198
- f"Update checklists to include these as 'Critical Control Points'<br>"
2199
- f"Require photo evidence for closure"
2200
  )
2201
- insights.append({"insight": insight, "recommendation": rec})
2202
 
2203
- # --- Insight 5: Silent Zones (Low Activity + Low Positivity) ---
2204
- if {'nama_lokasi_full', 'temuan_kategori'}.issubset(df.columns):
2205
- loc_activity = df.groupby('nama_lokasi_full').size()
2206
- low_activity_locs = loc_activity[loc_activity <= 2].index.tolist() # ≤2 findings
2207
  for loc in low_activity_locs[:3]:
2208
  interp = interpret_location_safely(df, loc)
2209
- if interp['positive_rate'] < 0.5 and loc_activity[loc] > 0:
2210
- insight = f"🔇 Location '{loc}' has low reporting ({loc_activity[loc]} findings) and low positivity ({interp['positive_rate']:.0%}) — possible silent failure."
2211
- rec = "Conduct unannounced 'silent audit' by external team to validate true safety status."
2212
- insights.append({"insight": insight, "recommendation": rec})
 
 
 
 
 
2213
 
2214
  return insights
2215
 
2216
  # ============================================================== #
2217
- # 5. Tampilkan Hasil
2218
  # ============================================================== #
2219
  try:
2220
  risk_insights = compute_risk_mitigation_insights(df_filtered)
2221
  except Exception as e:
2222
- st.error(f"Error in insight generation: {str(e)}")
2223
  risk_insights = []
2224
 
2225
  if risk_insights:
2226
- for i, ir in enumerate(risk_insights, 1):
2227
- st.markdown(f"<div class='ai-insight'><strong>Insight {i}:</strong> {ir['insight']}</div>", unsafe_allow_html=True)
2228
- st.markdown(f"<div class='ai-recommendation'><strong>Action {i}:</strong> {ir['recommendation']}</div>", unsafe_allow_html=True)
2229
  else:
2230
  st.markdown(
2231
- "<div class='ai-insight'>No insights generated. Ensure your data includes: "
2232
- "<code>nama_lokasi_full</code>, <code>temuan_kategori</code>, <code>creator_nid</code>, <code>created_at</code>.</div>",
 
2233
  unsafe_allow_html=True
2234
  )
 
1986
 
1987
 
1988
  # ============================================================== #
1989
+ # Helper 1: Hitung Average Monthly Finding-to-Reporter Ratio per Lokasi
1990
  # ============================================================== #
1991
  def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
1992
  required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
 
2026
  return loc_summary
2027
 
2028
  # ============================================================== #
2029
+ # Helper 2: Interpretasi Aktivitas Pelaporan secara Adil
2030
  # ============================================================== #
2031
  def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
2032
  loc_df = df[df['nama_lokasi_full'] == location_name].copy()
2033
  if loc_df.empty:
2034
  return {
2035
+ "interpretation": "No findings reported. Validation of coverage or actual safety status is required.",
2036
  "risk_signal": "Slight Risk",
2037
+ "positive_rate": 0.0
 
2038
  }
2039
 
2040
  total = len(loc_df)
 
2044
  unique_reporters = loc_df['creator_nid'].nunique()
2045
  months_active = loc_df['created_at'].dt.to_period('M').nunique() if 'created_at' in loc_df.columns else 1
2046
 
 
 
 
2047
  if total == 0:
2048
+ signal = "Slight Risk"
2049
+ interp = "No findings reported. Validation of coverage or actual safety status is required."
2050
  elif perc_positive >= 0.6:
2051
  signal = "Slight Risk"
2052
  interp = (
2053
+ f"High reporting engagement with {total} findings and {perc_positive:.0%} positive category, "
2054
+ f"contributed by {unique_reporters} unique reporter(s) over {months_active} month(s). "
2055
+ f"This indicates a proactive safety culture."
2056
  )
2057
  elif perc_positive >= 0.3:
2058
  signal = "Moderate Risk"
2059
  interp = (
2060
+ f"Balanced reporting with {n_unsafe} unsafe findings versus {n_positive} positive. "
2061
+ f"Active monitoring is present, with opportunity to increase preventive behaviors."
2062
  )
2063
+ else:
2064
  if unique_reporters == 1:
2065
  signal = "High Risk"
2066
  interp = (
2067
+ f"High volume of unsafe findings with low positivity ({perc_positive:.0%}) "
2068
+ f"and reliance on only one reporter. This may indicate observer fatigue, bias, "
2069
+ f"or psychological barriers to broader reporting."
2070
  )
2071
  else:
2072
  signal = "Very High Risk"
2073
  interp = (
2074
+ f"Predominantly unsafe findings ({n_unsafe} out of {total}) reported by multiple individuals, "
2075
+ f"suggesting genuine and systemic safety hazards requiring urgent management attention."
2076
  )
2077
 
2078
  return {
2079
  "interpretation": interp,
2080
  "risk_signal": signal,
2081
+ "positive_rate": perc_positive
 
2082
  }
2083
 
2084
  # ============================================================== #
2085
+ # Helper 3: Deteksi Isu Tidak Aman dari Teks
2086
  # ============================================================== #
2087
  def detect_unsafe_terms(df: pd.DataFrame):
2088
  text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
2089
+ text_col = None
2090
+ for col in text_cols:
2091
+ if col in df.columns and df[col].notna().any():
2092
+ text_col = col
2093
+ break
2094
+ if text_col is None:
2095
  return []
2096
 
2097
  all_text = ' '.join(df[text_col].dropna().astype(str).str.lower())
 
2105
  return list(set(found))
2106
 
2107
  # ============================================================== #
2108
+ # Main: Generate Risk Mitigation Insights
2109
  # ============================================================== #
2110
  def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
2111
  insights = []
2112
  if df.empty:
2113
  return insights
2114
 
2115
+ # Insight 1: Top 3 Locations by Volume — Interpreted with Safety Maturity
2116
  if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
2117
  top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
2118
  for loc in top_locs:
2119
  interp = interpret_location_safely(df, loc)
2120
+ insight = f"Location {loc}: {interp['interpretation']}"
 
2121
 
2122
+ signal = interp['risk_signal']
2123
  if signal == "Slight Risk":
2124
+ recommendation = (
2125
+ "Recognize this location as a safety exemplar. Share their positive findings in internal safety communications. "
2126
+ "Facilitate cross-location learning sessions to replicate practices."
2127
  )
2128
  elif signal == "Moderate Risk":
2129
+ recommendation = (
2130
+ "Conduct a workshop on positive intervention techniques. Train teams to identify and report good practices. "
2131
+ "Set a target to increase the positive finding rate to above 60 percent within three months."
2132
  )
2133
  elif signal == "High Risk":
2134
+ recommendation = (
2135
+ "Assign two additional auditors to rotate into this location for one month. "
2136
+ "Administer an anonymous psychological safety survey to assess reporting barriers."
2137
  )
2138
  elif signal == "Very High Risk":
2139
+ recommendation = (
2140
+ "Escalate to area management. Implement daily safety huddles, scheduled supervisor walkarounds, "
2141
+ "and weekly tracking of unsafe finding closure rates."
2142
  )
2143
  else:
2144
+ recommendation = (
2145
+ "Validate physical inspection coverage. Ensure field presence aligns with digital reporting records."
2146
+ )
2147
 
2148
+ insights.append({"insight": insight, "recommendation": recommendation})
2149
 
2150
+ # Insight 2: Organizational Agentic Safety Maturity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2151
  if 'temuan_kategori' in df.columns:
2152
  total = len(df)
2153
  n_positive = (df['temuan_kategori'] == 'Positive').sum()
2154
  positive_rate = n_positive / total if total > 0 else 0
2155
 
2156
  insight = (
2157
+ f"Organization-wide, {positive_rate:.1%} of findings are categorized as Positive, "
2158
+ f"indicating proactive safety behaviors. The remaining {100 - positive_rate * 100:.1f} percent are reactive, "
2159
+ f"responding to existing hazards."
2160
  )
2161
+
2162
  if positive_rate < 0.4:
2163
+ recommendation = (
2164
+ "Launch an Agentic Safety Program: incentivize near-miss reporting and safety suggestions, "
2165
+ "train designated Safety Coaches per division, and adopt percentage of Positive findings as a leading KPI, "
2166
+ "with a six-month target of 50 percent."
 
2167
  )
2168
  else:
2169
+ recommendation = (
2170
+ "Sustain current momentum. Formalize recognition for divisions with consistently high positive reporting rates."
2171
  )
 
2172
 
2173
+ insights.append({"insight": insight, "recommendation": recommendation})
2174
+
2175
+ # Insight 3: Emerging Unsafe Issues from Text Analysis
2176
  unsafe_terms = detect_unsafe_terms(df)
2177
  if unsafe_terms:
2178
+ top_terms = ', '.join(sorted(unsafe_terms)[:5])
2179
+ insight = f"Text analysis of findings reveals recurring unsafe conditions related to: {top_terms}."
2180
+ recommendation = (
2181
+ "Initiate a targeted two-week Risk Blitz focusing on these conditions. "
2182
+ "Update inspection checklists to include these items as critical control points. "
2183
+ "Require photo documentation for verification of corrective actions."
2184
  )
2185
+ insights.append({"insight": insight, "recommendation": recommendation})
2186
 
2187
+ # Insight 4: Low-Activity Locations with Potential Silent Risks
2188
+ if 'nama_lokasi_full' in df.columns:
2189
+ loc_counts = df['nama_lokasi_full'].value_counts()
2190
+ low_activity_locs = loc_counts[loc_counts <= 2].index.tolist()
2191
  for loc in low_activity_locs[:3]:
2192
  interp = interpret_location_safely(df, loc)
2193
+ if 0 < interp['positive_rate'] < 0.5:
2194
+ insight = (
2195
+ f"Location {loc} reports low volume ({loc_counts[loc]} findings) with a positive rate of "
2196
+ f"{interp['positive_rate']:.0%}, suggesting possible under-reporting or unobserved hazards."
2197
+ )
2198
+ recommendation = (
2199
+ "Conduct an unannounced observational audit by an independent team to assess true field conditions."
2200
+ )
2201
+ insights.append({"insight": insight, "recommendation": recommendation})
2202
 
2203
  return insights
2204
 
2205
  # ============================================================== #
2206
+ # Execute and Display
2207
  # ============================================================== #
2208
  try:
2209
  risk_insights = compute_risk_mitigation_insights(df_filtered)
2210
  except Exception as e:
2211
+ st.error(f"Error during insight generation: {str(e)}")
2212
  risk_insights = []
2213
 
2214
  if risk_insights:
2215
+ for i, item in enumerate(risk_insights, 1):
2216
+ st.markdown(f"<div class='ai-insight'><strong>Insight {i}:</strong> {item['insight']}</div>", unsafe_allow_html=True)
2217
+ st.markdown(f"<div class='ai-recommendation'><strong>Recommendation {i}:</strong> {item['recommendation']}</div>", unsafe_allow_html=True)
2218
  else:
2219
  st.markdown(
2220
+ "<div class='ai-insight'>No risk mitigation insights were generated. "
2221
+ "Please ensure the dataset contains the following columns: "
2222
+ "nama_lokasi_full, temuan_kategori, creator_nid, and created_at.</div>",
2223
  unsafe_allow_html=True
2224
  )