Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1986,7 +1986,7 @@ st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation<
|
|
| 1986 |
|
| 1987 |
|
| 1988 |
# ============================================================== #
|
| 1989 |
-
# 1
|
| 1990 |
# ============================================================== #
|
| 1991 |
def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
|
| 1992 |
required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
|
|
@@ -2026,16 +2026,15 @@ def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 2026 |
return loc_summary
|
| 2027 |
|
| 2028 |
# ============================================================== #
|
| 2029 |
-
# 2
|
| 2030 |
# ============================================================== #
|
| 2031 |
def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
|
| 2032 |
loc_df = df[df['nama_lokasi_full'] == location_name].copy()
|
| 2033 |
if loc_df.empty:
|
| 2034 |
return {
|
| 2035 |
-
"interpretation": "No findings reported
|
| 2036 |
"risk_signal": "Slight Risk",
|
| 2037 |
-
"positive_rate": 0.0
|
| 2038 |
-
"reporter_diversity": 0
|
| 2039 |
}
|
| 2040 |
|
| 2041 |
total = len(loc_df)
|
|
@@ -2045,51 +2044,54 @@ def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
|
|
| 2045 |
unique_reporters = loc_df['creator_nid'].nunique()
|
| 2046 |
months_active = loc_df['created_at'].dt.to_period('M').nunique() if 'created_at' in loc_df.columns else 1
|
| 2047 |
|
| 2048 |
-
reporter_diversity = min(unique_reporters / max(months_active, 1), 3.0) # cap at 3
|
| 2049 |
-
|
| 2050 |
-
# Risk stratifikasi berdasarkan kombinasi
|
| 2051 |
if total == 0:
|
| 2052 |
-
signal
|
|
|
|
| 2053 |
elif perc_positive >= 0.6:
|
| 2054 |
signal = "Slight Risk"
|
| 2055 |
interp = (
|
| 2056 |
-
f"High engagement
|
| 2057 |
-
f"
|
|
|
|
| 2058 |
)
|
| 2059 |
elif perc_positive >= 0.3:
|
| 2060 |
signal = "Moderate Risk"
|
| 2061 |
interp = (
|
| 2062 |
-
f"
|
| 2063 |
-
f"
|
| 2064 |
)
|
| 2065 |
-
else:
|
| 2066 |
if unique_reporters == 1:
|
| 2067 |
signal = "High Risk"
|
| 2068 |
interp = (
|
| 2069 |
-
f"High unsafe
|
| 2070 |
-
f"
|
|
|
|
| 2071 |
)
|
| 2072 |
else:
|
| 2073 |
signal = "Very High Risk"
|
| 2074 |
interp = (
|
| 2075 |
-
f"
|
| 2076 |
-
f"
|
| 2077 |
)
|
| 2078 |
|
| 2079 |
return {
|
| 2080 |
"interpretation": interp,
|
| 2081 |
"risk_signal": signal,
|
| 2082 |
-
"positive_rate": perc_positive
|
| 2083 |
-
"reporter_diversity": reporter_diversity
|
| 2084 |
}
|
| 2085 |
|
| 2086 |
# ============================================================== #
|
| 2087 |
-
# 3
|
| 2088 |
# ============================================================== #
|
| 2089 |
def detect_unsafe_terms(df: pd.DataFrame):
|
| 2090 |
text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
|
| 2091 |
-
text_col =
|
| 2092 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2093 |
return []
|
| 2094 |
|
| 2095 |
all_text = ' '.join(df[text_col].dropna().astype(str).str.lower())
|
|
@@ -2103,132 +2105,120 @@ def detect_unsafe_terms(df: pd.DataFrame):
|
|
| 2103 |
return list(set(found))
|
| 2104 |
|
| 2105 |
# ============================================================== #
|
| 2106 |
-
#
|
| 2107 |
# ============================================================== #
|
| 2108 |
def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
| 2109 |
insights = []
|
| 2110 |
if df.empty:
|
| 2111 |
return insights
|
| 2112 |
|
| 2113 |
-
#
|
| 2114 |
if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
|
| 2115 |
top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
|
| 2116 |
for loc in top_locs:
|
| 2117 |
interp = interpret_location_safely(df, loc)
|
| 2118 |
-
insight = f"
|
| 2119 |
-
signal = interp['risk_signal']
|
| 2120 |
|
| 2121 |
-
|
| 2122 |
if signal == "Slight Risk":
|
| 2123 |
-
|
| 2124 |
-
|
| 2125 |
-
|
| 2126 |
)
|
| 2127 |
elif signal == "Moderate Risk":
|
| 2128 |
-
|
| 2129 |
-
|
| 2130 |
-
|
| 2131 |
)
|
| 2132 |
elif signal == "High Risk":
|
| 2133 |
-
|
| 2134 |
-
|
| 2135 |
-
|
| 2136 |
)
|
| 2137 |
elif signal == "Very High Risk":
|
| 2138 |
-
|
| 2139 |
-
|
| 2140 |
-
|
| 2141 |
)
|
| 2142 |
else:
|
| 2143 |
-
|
|
|
|
|
|
|
| 2144 |
|
| 2145 |
-
insights.append({"insight": insight, "recommendation":
|
| 2146 |
|
| 2147 |
-
#
|
| 2148 |
-
try:
|
| 2149 |
-
ratio_df = compute_avg_monthly_ratio_per_location(df)
|
| 2150 |
-
if not ratio_df.empty:
|
| 2151 |
-
# Lokasi dengan rasio tinggi TAPI positive rate rendah → waspada
|
| 2152 |
-
high_ratio_locs = ratio_df.nlargest(3, 'avg_monthly_ratio')['nama_lokasi_full'].tolist()
|
| 2153 |
-
for loc in high_ratio_locs:
|
| 2154 |
-
interp = interpret_location_safely(df, loc)
|
| 2155 |
-
if interp['risk_signal'] in ["High Risk", "Very High Risk"]:
|
| 2156 |
-
insight = (
|
| 2157 |
-
f"📊 Location '{loc}' has high finding-to-reporter ratio ({ratio_df.loc[ratio_df['nama_lokasi_full']==loc, 'avg_monthly_ratio'].iloc[0]:.2f}) "
|
| 2158 |
-
f"but low positive culture ({interp['positive_rate']:.0%}) — efficiency ≠ safety."
|
| 2159 |
-
)
|
| 2160 |
-
rec = (
|
| 2161 |
-
f"Prioritize coaching over counting. Shift KPI from 'findings submitted' to "
|
| 2162 |
-
f"'% positive findings' and 'unsafe items closed within 72h'."
|
| 2163 |
-
)
|
| 2164 |
-
insights.append({"insight": insight, "recommendation": rec})
|
| 2165 |
-
except Exception as e:
|
| 2166 |
-
pass # silent fail if ratio can't be computed
|
| 2167 |
-
|
| 2168 |
-
# --- Insight 3: Agentic Safety Maturity ---
|
| 2169 |
if 'temuan_kategori' in df.columns:
|
| 2170 |
total = len(df)
|
| 2171 |
n_positive = (df['temuan_kategori'] == 'Positive').sum()
|
| 2172 |
positive_rate = n_positive / total if total > 0 else 0
|
| 2173 |
|
| 2174 |
insight = (
|
| 2175 |
-
f"
|
| 2176 |
-
f"
|
|
|
|
| 2177 |
)
|
|
|
|
| 2178 |
if positive_rate < 0.4:
|
| 2179 |
-
|
| 2180 |
-
|
| 2181 |
-
|
| 2182 |
-
|
| 2183 |
-
f"• Track <em>% Positive Findings</em> as leading KPI (target: ≥50% in 6 months)"
|
| 2184 |
)
|
| 2185 |
else:
|
| 2186 |
-
|
| 2187 |
-
|
| 2188 |
)
|
| 2189 |
-
insights.append({"insight": insight, "recommendation": rec})
|
| 2190 |
|
| 2191 |
-
|
|
|
|
|
|
|
| 2192 |
unsafe_terms = detect_unsafe_terms(df)
|
| 2193 |
if unsafe_terms:
|
| 2194 |
-
top_terms = ', '.join(unsafe_terms[:5])
|
| 2195 |
-
insight = f"
|
| 2196 |
-
|
| 2197 |
-
|
| 2198 |
-
|
| 2199 |
-
|
| 2200 |
)
|
| 2201 |
-
insights.append({"insight": insight, "recommendation":
|
| 2202 |
|
| 2203 |
-
#
|
| 2204 |
-
if
|
| 2205 |
-
|
| 2206 |
-
low_activity_locs =
|
| 2207 |
for loc in low_activity_locs[:3]:
|
| 2208 |
interp = interpret_location_safely(df, loc)
|
| 2209 |
-
if interp['positive_rate'] < 0.5
|
| 2210 |
-
insight =
|
| 2211 |
-
|
| 2212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2213 |
|
| 2214 |
return insights
|
| 2215 |
|
| 2216 |
# ============================================================== #
|
| 2217 |
-
#
|
| 2218 |
# ============================================================== #
|
| 2219 |
try:
|
| 2220 |
risk_insights = compute_risk_mitigation_insights(df_filtered)
|
| 2221 |
except Exception as e:
|
| 2222 |
-
st.error(f"Error
|
| 2223 |
risk_insights = []
|
| 2224 |
|
| 2225 |
if risk_insights:
|
| 2226 |
-
for i,
|
| 2227 |
-
st.markdown(f"<div class='ai-insight'><strong>Insight {i}:</strong> {
|
| 2228 |
-
st.markdown(f"<div class='ai-recommendation'><strong>
|
| 2229 |
else:
|
| 2230 |
st.markdown(
|
| 2231 |
-
"<div class='ai-insight'>No insights generated.
|
| 2232 |
-
"
|
|
|
|
| 2233 |
unsafe_allow_html=True
|
| 2234 |
)
|
|
|
|
| 1986 |
|
| 1987 |
|
| 1988 |
# ============================================================== #
|
| 1989 |
+
# Helper 1: Hitung Average Monthly Finding-to-Reporter Ratio per Lokasi
|
| 1990 |
# ============================================================== #
|
| 1991 |
def compute_avg_monthly_ratio_per_location(df: pd.DataFrame) -> pd.DataFrame:
|
| 1992 |
required = ['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']
|
|
|
|
| 2026 |
return loc_summary
|
| 2027 |
|
| 2028 |
# ============================================================== #
|
| 2029 |
+
# Helper 2: Interpretasi Aktivitas Pelaporan secara Adil
|
| 2030 |
# ============================================================== #
|
| 2031 |
def interpret_location_safely(df: pd.DataFrame, location_name: str) -> dict:
|
| 2032 |
loc_df = df[df['nama_lokasi_full'] == location_name].copy()
|
| 2033 |
if loc_df.empty:
|
| 2034 |
return {
|
| 2035 |
+
"interpretation": "No findings reported. Validation of coverage or actual safety status is required.",
|
| 2036 |
"risk_signal": "Slight Risk",
|
| 2037 |
+
"positive_rate": 0.0
|
|
|
|
| 2038 |
}
|
| 2039 |
|
| 2040 |
total = len(loc_df)
|
|
|
|
| 2044 |
unique_reporters = loc_df['creator_nid'].nunique()
|
| 2045 |
months_active = loc_df['created_at'].dt.to_period('M').nunique() if 'created_at' in loc_df.columns else 1
|
| 2046 |
|
|
|
|
|
|
|
|
|
|
| 2047 |
if total == 0:
|
| 2048 |
+
signal = "Slight Risk"
|
| 2049 |
+
interp = "No findings reported. Validation of coverage or actual safety status is required."
|
| 2050 |
elif perc_positive >= 0.6:
|
| 2051 |
signal = "Slight Risk"
|
| 2052 |
interp = (
|
| 2053 |
+
f"High reporting engagement with {total} findings and {perc_positive:.0%} positive category, "
|
| 2054 |
+
f"contributed by {unique_reporters} unique reporter(s) over {months_active} month(s). "
|
| 2055 |
+
f"This indicates a proactive safety culture."
|
| 2056 |
)
|
| 2057 |
elif perc_positive >= 0.3:
|
| 2058 |
signal = "Moderate Risk"
|
| 2059 |
interp = (
|
| 2060 |
+
f"Balanced reporting with {n_unsafe} unsafe findings versus {n_positive} positive. "
|
| 2061 |
+
f"Active monitoring is present, with opportunity to increase preventive behaviors."
|
| 2062 |
)
|
| 2063 |
+
else:
|
| 2064 |
if unique_reporters == 1:
|
| 2065 |
signal = "High Risk"
|
| 2066 |
interp = (
|
| 2067 |
+
f"High volume of unsafe findings with low positivity ({perc_positive:.0%}) "
|
| 2068 |
+
f"and reliance on only one reporter. This may indicate observer fatigue, bias, "
|
| 2069 |
+
f"or psychological barriers to broader reporting."
|
| 2070 |
)
|
| 2071 |
else:
|
| 2072 |
signal = "Very High Risk"
|
| 2073 |
interp = (
|
| 2074 |
+
f"Predominantly unsafe findings ({n_unsafe} out of {total}) reported by multiple individuals, "
|
| 2075 |
+
f"suggesting genuine and systemic safety hazards requiring urgent management attention."
|
| 2076 |
)
|
| 2077 |
|
| 2078 |
return {
|
| 2079 |
"interpretation": interp,
|
| 2080 |
"risk_signal": signal,
|
| 2081 |
+
"positive_rate": perc_positive
|
|
|
|
| 2082 |
}
|
| 2083 |
|
| 2084 |
# ============================================================== #
|
| 2085 |
+
# Helper 3: Deteksi Isu Tidak Aman dari Teks
|
| 2086 |
# ============================================================== #
|
| 2087 |
def detect_unsafe_terms(df: pd.DataFrame):
|
| 2088 |
text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul']
|
| 2089 |
+
text_col = None
|
| 2090 |
+
for col in text_cols:
|
| 2091 |
+
if col in df.columns and df[col].notna().any():
|
| 2092 |
+
text_col = col
|
| 2093 |
+
break
|
| 2094 |
+
if text_col is None:
|
| 2095 |
return []
|
| 2096 |
|
| 2097 |
all_text = ' '.join(df[text_col].dropna().astype(str).str.lower())
|
|
|
|
| 2105 |
return list(set(found))
|
| 2106 |
|
| 2107 |
# ============================================================== #
|
| 2108 |
+
# Main: Generate Risk Mitigation Insights
|
| 2109 |
# ============================================================== #
|
| 2110 |
def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
| 2111 |
insights = []
|
| 2112 |
if df.empty:
|
| 2113 |
return insights
|
| 2114 |
|
| 2115 |
+
# Insight 1: Top 3 Locations by Volume — Interpreted with Safety Maturity
|
| 2116 |
if {'nama_lokasi_full', 'temuan_kategori', 'creator_nid', 'created_at'}.issubset(df.columns):
|
| 2117 |
top_locs = df['nama_lokasi_full'].value_counts().head(3).index.tolist()
|
| 2118 |
for loc in top_locs:
|
| 2119 |
interp = interpret_location_safely(df, loc)
|
| 2120 |
+
insight = f"Location {loc}: {interp['interpretation']}"
|
|
|
|
| 2121 |
|
| 2122 |
+
signal = interp['risk_signal']
|
| 2123 |
if signal == "Slight Risk":
|
| 2124 |
+
recommendation = (
|
| 2125 |
+
"Recognize this location as a safety exemplar. Share their positive findings in internal safety communications. "
|
| 2126 |
+
"Facilitate cross-location learning sessions to replicate practices."
|
| 2127 |
)
|
| 2128 |
elif signal == "Moderate Risk":
|
| 2129 |
+
recommendation = (
|
| 2130 |
+
"Conduct a workshop on positive intervention techniques. Train teams to identify and report good practices. "
|
| 2131 |
+
"Set a target to increase the positive finding rate to above 60 percent within three months."
|
| 2132 |
)
|
| 2133 |
elif signal == "High Risk":
|
| 2134 |
+
recommendation = (
|
| 2135 |
+
"Assign two additional auditors to rotate into this location for one month. "
|
| 2136 |
+
"Administer an anonymous psychological safety survey to assess reporting barriers."
|
| 2137 |
)
|
| 2138 |
elif signal == "Very High Risk":
|
| 2139 |
+
recommendation = (
|
| 2140 |
+
"Escalate to area management. Implement daily safety huddles, scheduled supervisor walkarounds, "
|
| 2141 |
+
"and weekly tracking of unsafe finding closure rates."
|
| 2142 |
)
|
| 2143 |
else:
|
| 2144 |
+
recommendation = (
|
| 2145 |
+
"Validate physical inspection coverage. Ensure field presence aligns with digital reporting records."
|
| 2146 |
+
)
|
| 2147 |
|
| 2148 |
+
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2149 |
|
| 2150 |
+
# Insight 2: Organizational Agentic Safety Maturity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2151 |
if 'temuan_kategori' in df.columns:
|
| 2152 |
total = len(df)
|
| 2153 |
n_positive = (df['temuan_kategori'] == 'Positive').sum()
|
| 2154 |
positive_rate = n_positive / total if total > 0 else 0
|
| 2155 |
|
| 2156 |
insight = (
|
| 2157 |
+
f"Organization-wide, {positive_rate:.1%} of findings are categorized as Positive, "
|
| 2158 |
+
f"indicating proactive safety behaviors. The remaining {100 - positive_rate * 100:.1f} percent are reactive, "
|
| 2159 |
+
f"responding to existing hazards."
|
| 2160 |
)
|
| 2161 |
+
|
| 2162 |
if positive_rate < 0.4:
|
| 2163 |
+
recommendation = (
|
| 2164 |
+
"Launch an Agentic Safety Program: incentivize near-miss reporting and safety suggestions, "
|
| 2165 |
+
"train designated Safety Coaches per division, and adopt percentage of Positive findings as a leading KPI, "
|
| 2166 |
+
"with a six-month target of 50 percent."
|
|
|
|
| 2167 |
)
|
| 2168 |
else:
|
| 2169 |
+
recommendation = (
|
| 2170 |
+
"Sustain current momentum. Formalize recognition for divisions with consistently high positive reporting rates."
|
| 2171 |
)
|
|
|
|
| 2172 |
|
| 2173 |
+
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2174 |
+
|
| 2175 |
+
# Insight 3: Emerging Unsafe Issues from Text Analysis
|
| 2176 |
unsafe_terms = detect_unsafe_terms(df)
|
| 2177 |
if unsafe_terms:
|
| 2178 |
+
top_terms = ', '.join(sorted(unsafe_terms)[:5])
|
| 2179 |
+
insight = f"Text analysis of findings reveals recurring unsafe conditions related to: {top_terms}."
|
| 2180 |
+
recommendation = (
|
| 2181 |
+
"Initiate a targeted two-week Risk Blitz focusing on these conditions. "
|
| 2182 |
+
"Update inspection checklists to include these items as critical control points. "
|
| 2183 |
+
"Require photo documentation for verification of corrective actions."
|
| 2184 |
)
|
| 2185 |
+
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2186 |
|
| 2187 |
+
# Insight 4: Low-Activity Locations with Potential Silent Risks
|
| 2188 |
+
if 'nama_lokasi_full' in df.columns:
|
| 2189 |
+
loc_counts = df['nama_lokasi_full'].value_counts()
|
| 2190 |
+
low_activity_locs = loc_counts[loc_counts <= 2].index.tolist()
|
| 2191 |
for loc in low_activity_locs[:3]:
|
| 2192 |
interp = interpret_location_safely(df, loc)
|
| 2193 |
+
if 0 < interp['positive_rate'] < 0.5:
|
| 2194 |
+
insight = (
|
| 2195 |
+
f"Location {loc} reports low volume ({loc_counts[loc]} findings) with a positive rate of "
|
| 2196 |
+
f"{interp['positive_rate']:.0%}, suggesting possible under-reporting or unobserved hazards."
|
| 2197 |
+
)
|
| 2198 |
+
recommendation = (
|
| 2199 |
+
"Conduct an unannounced observational audit by an independent team to assess true field conditions."
|
| 2200 |
+
)
|
| 2201 |
+
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2202 |
|
| 2203 |
return insights
|
| 2204 |
|
| 2205 |
# ============================================================== #
|
| 2206 |
+
# Execute and Display
|
| 2207 |
# ============================================================== #
|
| 2208 |
try:
|
| 2209 |
risk_insights = compute_risk_mitigation_insights(df_filtered)
|
| 2210 |
except Exception as e:
|
| 2211 |
+
st.error(f"Error during insight generation: {str(e)}")
|
| 2212 |
risk_insights = []
|
| 2213 |
|
| 2214 |
if risk_insights:
|
| 2215 |
+
for i, item in enumerate(risk_insights, 1):
|
| 2216 |
+
st.markdown(f"<div class='ai-insight'><strong>Insight {i}:</strong> {item['insight']}</div>", unsafe_allow_html=True)
|
| 2217 |
+
st.markdown(f"<div class='ai-recommendation'><strong>Recommendation {i}:</strong> {item['recommendation']}</div>", unsafe_allow_html=True)
|
| 2218 |
else:
|
| 2219 |
st.markdown(
|
| 2220 |
+
"<div class='ai-insight'>No risk mitigation insights were generated. "
|
| 2221 |
+
"Please ensure the dataset contains the following columns: "
|
| 2222 |
+
"nama_lokasi_full, temuan_kategori, creator_nid, and created_at.</div>",
|
| 2223 |
unsafe_allow_html=True
|
| 2224 |
)
|