Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1975,7 +1975,7 @@ else:
|
|
| 1975 |
st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
|
| 1976 |
|
| 1977 |
st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
|
| 1978 |
-
|
| 1979 |
|
| 1980 |
def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
| 1981 |
"""
|
|
@@ -1985,52 +1985,70 @@ def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
|
| 1985 |
- Agentic Safety Behaviors (proactive vs reactive)
|
| 1986 |
- Wordcloud-based Emerging Risk Detection
|
| 1987 |
- Actionable coverage-balancing strategies
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1988 |
"""
|
| 1989 |
insights = []
|
| 1990 |
|
| 1991 |
if df.empty:
|
| 1992 |
return insights
|
| 1993 |
|
| 1994 |
-
# --- Helper: Detect risk terms from
|
| 1995 |
def detect_emerging_risks(df):
|
| 1996 |
-
#
|
|
|
|
| 1997 |
text_col = None
|
| 1998 |
-
for col in
|
| 1999 |
if col in df.columns and df[col].notna().any():
|
| 2000 |
text_col = col
|
| 2001 |
break
|
| 2002 |
if text_col is None:
|
| 2003 |
return [], []
|
| 2004 |
|
| 2005 |
-
# Combine
|
| 2006 |
-
|
| 2007 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2008 |
risk_keywords = [
|
|
|
|
| 2009 |
'terbuka', 'tidak terkunci', 'tanpa izin', 'tanpa alat', 'tanpa pelindung',
|
| 2010 |
-
'overload', '
|
| 2011 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2012 |
]
|
| 2013 |
found_risks = [kw for kw in risk_keywords if kw in all_text]
|
| 2014 |
return risk_keywords, found_risks
|
| 2015 |
|
| 2016 |
-
# --- 1. Coverage Equity by Location
|
| 2017 |
if 'nama_lokasi_full' in df.columns and 'creator_nid' in df.columns:
|
|
|
|
| 2018 |
loc_activity = df.groupby('nama_lokasi_full').agg(
|
| 2019 |
-
findings_count=('
|
| 2020 |
unique_reporters=('creator_nid', 'nunique')
|
| 2021 |
).reset_index()
|
| 2022 |
-
total_locations = loc_activity.shape[0]
|
| 2023 |
-
low_coverage_locs = loc_activity[loc_activity['unique_reporters'] <= 1]
|
| 2024 |
-
high_volume_locs = loc_activity[loc_activity['findings_count'] > loc_activity['findings_count'].quantile(0.75)]
|
| 2025 |
|
| 2026 |
-
|
|
|
|
| 2027 |
risky_high_low = loc_activity[
|
| 2028 |
(loc_activity['findings_count'] > loc_activity['findings_count'].median()) &
|
| 2029 |
(loc_activity['unique_reporters'] <= 2)
|
| 2030 |
]
|
| 2031 |
|
| 2032 |
if not risky_high_low.empty:
|
| 2033 |
-
loc_list = risky_high_low['nama_lokasi_full'].tolist()
|
| 2034 |
loc_names = ', '.join(loc_list)
|
| 2035 |
insight = (
|
| 2036 |
f"Locations {loc_names} show high finding volume but rely on ≤2 reporters, indicating potential blind spots "
|
|
@@ -2042,7 +2060,7 @@ def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
|
| 2042 |
)
|
| 2043 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2044 |
|
| 2045 |
-
if
|
| 2046 |
insight = (
|
| 2047 |
f"Over 30% of locations ({len(low_coverage_locs)}/{total_locations}) are covered by only 1 reporter, "
|
| 2048 |
f"increasing the risk of unreported hazards due to observer fatigue or familiarity bias."
|
|
@@ -2053,63 +2071,71 @@ def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
|
| 2053 |
)
|
| 2054 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2055 |
|
| 2056 |
-
# --- 2. Divisional Load & Frequency Risk
|
| 2057 |
-
if 'nama' in df.columns and 'created_at' in df.columns:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2058 |
div_summary = df.groupby('nama').agg(
|
| 2059 |
-
total_findings=('
|
| 2060 |
unique_people=('creator_nid', 'nunique'),
|
| 2061 |
first_report=('created_at', 'min'),
|
| 2062 |
last_report=('created_at', 'max')
|
| 2063 |
)
|
| 2064 |
-
div_summary['reporting_span_days'] = (div_summary['last_report'] - div_summary['first_report']).dt.days + 1
|
| 2065 |
-
div_summary['avg_freq_per_person'] = div_summary['total_findings'] / div_summary['unique_people']
|
| 2066 |
-
div_summary['findings_per_day'] = div_summary['total_findings'] / div_summary['reporting_span_days']
|
| 2067 |
-
|
| 2068 |
-
# Define thresholds (adjust as needed)
|
| 2069 |
-
HIGH_LOAD_THRESHOLD = 8 # avg > 8 findings/person
|
| 2070 |
-
LOW_ACTIVITY_THRESHOLD = 0.2 # < 0.2 findings/day
|
| 2071 |
|
| 2072 |
-
|
| 2073 |
-
|
| 2074 |
-
|
| 2075 |
-
|
| 2076 |
-
|
| 2077 |
-
|
| 2078 |
-
|
| 2079 |
-
|
| 2080 |
-
|
| 2081 |
-
|
| 2082 |
-
|
| 2083 |
-
|
| 2084 |
-
|
| 2085 |
-
|
| 2086 |
-
|
| 2087 |
-
|
| 2088 |
-
|
| 2089 |
-
|
| 2090 |
-
|
| 2091 |
-
|
| 2092 |
-
|
| 2093 |
-
|
| 2094 |
-
|
| 2095 |
-
|
| 2096 |
-
|
| 2097 |
-
|
| 2098 |
-
|
| 2099 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2100 |
|
| 2101 |
-
# --- 3. Agentic Safety Mitigation (Proactive vs Reactive
|
| 2102 |
if 'temuan_kategori' in df.columns:
|
| 2103 |
-
# Assume: 'Positive' = proactive (e.g., good housekeeping, initiative)
|
| 2104 |
-
# Others (e.g., 'Unsafe Condition', 'Unsafe Act') = reactive
|
| 2105 |
total = len(df)
|
| 2106 |
proactive = (df['temuan_kategori'] == 'Positive').sum()
|
| 2107 |
-
reactive = total - proactive
|
| 2108 |
proactive_rate = proactive / total if total > 0 else 0
|
| 2109 |
|
| 2110 |
insight = (
|
| 2111 |
f"Only {proactive_rate:.1%} of findings reflect *proactive* safety behaviors (e.g., positive interventions, improvements). "
|
| 2112 |
-
f"The remaining {100 - proactive_rate*100:.1f}% are *reactive* (hazards already present)."
|
| 2113 |
)
|
| 2114 |
recommendation = (
|
| 2115 |
f"Shift incentives from 'finding count' to 'prevention impact'. "
|
|
@@ -2118,18 +2144,17 @@ def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
|
| 2118 |
)
|
| 2119 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2120 |
|
| 2121 |
-
# --- 4. Emerging Risk Detection via Wordcloud (
|
| 2122 |
all_risk_terms, detected_terms = detect_emerging_risks(df)
|
| 2123 |
if detected_terms:
|
| 2124 |
-
|
| 2125 |
-
|
| 2126 |
-
high_sev_terms = ['exposed', 'fire hazard', 'fall', 'short circuit', 'unauthorized']
|
| 2127 |
detected_high = [t for t in detected_terms if t in high_sev_terms]
|
| 2128 |
if detected_high:
|
| 2129 |
-
terms_str = ', '.join(detected_high)
|
| 2130 |
insight = (
|
| 2131 |
f"Wordcloud analysis indicates emerging high-severity risks: *{terms_str}*. "
|
| 2132 |
-
f"These signal active hazards
|
| 2133 |
)
|
| 2134 |
recommendation = (
|
| 2135 |
f"Launch a 14-day *Targeted Risk Blitz* on locations reporting these terms. "
|
|
@@ -2138,43 +2163,51 @@ def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
|
| 2138 |
)
|
| 2139 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2140 |
|
| 2141 |
-
# Cloud
|
| 2142 |
-
if
|
| 2143 |
insight = (
|
| 2144 |
f"Despite mitigation efforts, the risk 'cloud' persists — likely due to: "
|
| 2145 |
-
f"(1)
|
| 2146 |
f"(2) Incomplete closure verification, or "
|
| 2147 |
-
f"(3) Findings
|
| 2148 |
)
|
| 2149 |
recommendation = (
|
| 2150 |
-
f"Adopt *closed-loop verification*: require geo-tagged before/after photos +
|
| 2151 |
-
f"Map recurring findings to contractor IDs —
|
| 2152 |
-
f"Use AI to
|
| 2153 |
)
|
| 2154 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2155 |
|
| 2156 |
-
# --- 5. Coverage Balancing Strategy
|
| 2157 |
-
# Based on location & reporter distribution
|
| 2158 |
if 'nama_lokasi_full' in df.columns and 'creator_nid' in df.columns:
|
| 2159 |
reporters_per_location = df.groupby('nama_lokasi_full')['creator_nid'].nunique()
|
| 2160 |
-
|
| 2161 |
-
|
| 2162 |
-
|
| 2163 |
-
|
| 2164 |
-
|
| 2165 |
-
|
| 2166 |
-
|
| 2167 |
-
|
| 2168 |
-
|
| 2169 |
-
|
| 2170 |
-
|
| 2171 |
-
|
| 2172 |
-
|
|
|
|
|
|
|
|
|
|
| 2173 |
|
| 2174 |
return insights
|
| 2175 |
|
| 2176 |
-
#
|
| 2177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2178 |
|
| 2179 |
if risk_insights:
|
| 2180 |
for i, ir in enumerate(risk_insights, 1):
|
|
@@ -2182,7 +2215,8 @@ if risk_insights:
|
|
| 2182 |
st.markdown(f"<div class='ai-recommendation'><strong>Action {i}:</strong> {ir['recommendation']}</div>", unsafe_allow_html=True)
|
| 2183 |
else:
|
| 2184 |
st.markdown(
|
| 2185 |
-
"<div class='ai-insight'>No risk-mitigation insights generated. Ensure
|
| 2186 |
-
"<code>nama_lokasi_full</code>, <code>nama</code
|
|
|
|
| 2187 |
unsafe_allow_html=True
|
| 2188 |
)
|
|
|
|
| 1975 |
st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
|
| 1976 |
|
| 1977 |
st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
|
| 1978 |
+
st.markdown("<h3 class='section-title'>OBJECTIVE 7 – Risk Mitigation Insights & Actions</h3>", unsafe_allow_html=True)
|
| 1979 |
|
| 1980 |
def compute_risk_mitigation_insights(df: pd.DataFrame) -> List[dict]:
|
| 1981 |
"""
|
|
|
|
| 1985 |
- Agentic Safety Behaviors (proactive vs reactive)
|
| 1986 |
- Wordcloud-based Emerging Risk Detection
|
| 1987 |
- Actionable coverage-balancing strategies
|
| 1988 |
+
|
| 1989 |
+
✅ Uses ONLY columns confirmed in your schema.
|
| 1990 |
+
✅ Replaces 'temuan_id' with 'kode_temuan' or row count logic.
|
| 1991 |
+
✅ Handles missing text fields gracefully.
|
| 1992 |
"""
|
| 1993 |
insights = []
|
| 1994 |
|
| 1995 |
if df.empty:
|
| 1996 |
return insights
|
| 1997 |
|
| 1998 |
+
# --- Helper: Detect risk terms from free-text fields ---
|
| 1999 |
def detect_emerging_risks(df):
|
| 2000 |
+
# Prioritize richest free-text field in order
|
| 2001 |
+
text_cols = ['hasil_keyword_dan_kondisi', 'judul_dan_kondisi', 'kondisi', 'judul', 'keterangan_lokasi', 'note']
|
| 2002 |
text_col = None
|
| 2003 |
+
for col in text_cols:
|
| 2004 |
if col in df.columns and df[col].notna().any():
|
| 2005 |
text_col = col
|
| 2006 |
break
|
| 2007 |
if text_col is None:
|
| 2008 |
return [], []
|
| 2009 |
|
| 2010 |
+
# Combine non-null text
|
| 2011 |
+
texts = df[text_col].dropna().astype(str)
|
| 2012 |
+
if texts.empty:
|
| 2013 |
+
return [], []
|
| 2014 |
+
|
| 2015 |
+
all_text = ' '.join(texts.str.lower())
|
| 2016 |
+
|
| 2017 |
+
# Domain-specific risk lexicon (Indonesian + English)
|
| 2018 |
risk_keywords = [
|
| 2019 |
+
# Bahasa Indonesia
|
| 2020 |
'terbuka', 'tidak terkunci', 'tanpa izin', 'tanpa alat', 'tanpa pelindung',
|
| 2021 |
+
'overload', 'korsleting', 'grounding buruk', 'kabel terkelupas', 'tanpa grounding',
|
| 2022 |
+
'jatuh', 'terpeleset', 'tergelincir', 'kebakaran', 'panas berlebih',
|
| 2023 |
+
'tidak kompeten', 'tanpa pelatihan', 'tidak tersertifikasi',
|
| 2024 |
+
'tidak sesuai prosedur', 'prosedur tidak diikuti',
|
| 2025 |
+
# English (for bilingual reports)
|
| 2026 |
+
'exposed', 'unlocked', 'unauthorized', 'no ppe', 'no permit',
|
| 2027 |
+
'overload', 'short circuit', 'poor grounding', 'stripped cable',
|
| 2028 |
+
'fall hazard', 'slip', 'trip', 'fire hazard', 'overheating',
|
| 2029 |
+
'untrained', 'incompetent', 'not certified',
|
| 2030 |
+
'procedure violated', 'bypassed'
|
| 2031 |
]
|
| 2032 |
found_risks = [kw for kw in risk_keywords if kw in all_text]
|
| 2033 |
return risk_keywords, found_risks
|
| 2034 |
|
| 2035 |
+
# --- 1. Coverage Equity by Location ---
|
| 2036 |
if 'nama_lokasi_full' in df.columns and 'creator_nid' in df.columns:
|
| 2037 |
+
# Group by location
|
| 2038 |
loc_activity = df.groupby('nama_lokasi_full').agg(
|
| 2039 |
+
findings_count=('kode_temuan', 'size'), # ✅ SAFE: uses row count
|
| 2040 |
unique_reporters=('creator_nid', 'nunique')
|
| 2041 |
).reset_index()
|
|
|
|
|
|
|
|
|
|
| 2042 |
|
| 2043 |
+
total_locations = len(loc_activity)
|
| 2044 |
+
low_coverage_locs = loc_activity[loc_activity['unique_reporters'] <= 1]
|
| 2045 |
risky_high_low = loc_activity[
|
| 2046 |
(loc_activity['findings_count'] > loc_activity['findings_count'].median()) &
|
| 2047 |
(loc_activity['unique_reporters'] <= 2)
|
| 2048 |
]
|
| 2049 |
|
| 2050 |
if not risky_high_low.empty:
|
| 2051 |
+
loc_list = risky_high_low['nama_lokasi_full'].head(3).tolist()
|
| 2052 |
loc_names = ', '.join(loc_list)
|
| 2053 |
insight = (
|
| 2054 |
f"Locations {loc_names} show high finding volume but rely on ≤2 reporters, indicating potential blind spots "
|
|
|
|
| 2060 |
)
|
| 2061 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2062 |
|
| 2063 |
+
if len(low_coverage_locs) > total_locations * 0.3 and total_locations > 3:
|
| 2064 |
insight = (
|
| 2065 |
f"Over 30% of locations ({len(low_coverage_locs)}/{total_locations}) are covered by only 1 reporter, "
|
| 2066 |
f"increasing the risk of unreported hazards due to observer fatigue or familiarity bias."
|
|
|
|
| 2071 |
)
|
| 2072 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2073 |
|
| 2074 |
+
# --- 2. Divisional Load & Frequency Risk ---
|
| 2075 |
+
if 'nama' in df.columns and 'created_at' in df.columns:
|
| 2076 |
+
# Ensure 'created_at' is datetime
|
| 2077 |
+
if not pd.api.types.is_datetime64_any_dtype(df['created_at']):
|
| 2078 |
+
df = df.copy()
|
| 2079 |
+
df['created_at'] = pd.to_datetime(df['created_at'], errors='coerce')
|
| 2080 |
+
|
| 2081 |
div_summary = df.groupby('nama').agg(
|
| 2082 |
+
total_findings=('kode_temuan', 'size'), # ✅
|
| 2083 |
unique_people=('creator_nid', 'nunique'),
|
| 2084 |
first_report=('created_at', 'min'),
|
| 2085 |
last_report=('created_at', 'max')
|
| 2086 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2087 |
|
| 2088 |
+
# Drop groups with NaT (invalid dates)
|
| 2089 |
+
div_summary = div_summary.dropna(subset=['first_report', 'last_report'])
|
| 2090 |
+
if not div_summary.empty:
|
| 2091 |
+
div_summary['reporting_span_days'] = (
|
| 2092 |
+
(div_summary['last_report'] - div_summary['first_report']).dt.days + 1
|
| 2093 |
+
).clip(lower=1) # avoid zero division
|
| 2094 |
+
div_summary['avg_freq_per_person'] = div_summary['total_findings'] / div_summary['unique_people'].replace(0, 1)
|
| 2095 |
+
div_summary['findings_per_day'] = div_summary['total_findings'] / div_summary['reporting_span_days']
|
| 2096 |
+
|
| 2097 |
+
HIGH_LOAD_THRESHOLD = 8.0
|
| 2098 |
+
LOW_ACTIVITY_THRESHOLD = 0.2
|
| 2099 |
+
|
| 2100 |
+
high_load_div = div_summary[div_summary['avg_freq_per_person'] >= HIGH_LOAD_THRESHOLD]
|
| 2101 |
+
low_activity_div = div_summary[div_summary['findings_per_day'] <= LOW_ACTIVITY_THRESHOLD]
|
| 2102 |
+
|
| 2103 |
+
if not high_load_div.empty:
|
| 2104 |
+
top_div = high_load_div['avg_freq_per_person'].idxmax()
|
| 2105 |
+
rate = high_load_div.loc[top_div, 'avg_freq_per_person']
|
| 2106 |
+
insight = (
|
| 2107 |
+
f"Division '{top_div}' has an elevated reporting load ({rate:.1f} findings/person), "
|
| 2108 |
+
f"which may lead to fatigue, rushed inspections, or selective reporting."
|
| 2109 |
+
)
|
| 2110 |
+
recommendation = (
|
| 2111 |
+
f"Augment the division’s safety team with cross-trained support staff. "
|
| 2112 |
+
f"Introduce AI-assisted checklist validation to reduce cognitive load. "
|
| 2113 |
+
f"Monitor for declining finding quality (e.g., vague descriptions)."
|
| 2114 |
+
)
|
| 2115 |
+
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2116 |
+
|
| 2117 |
+
if not low_activity_div.empty:
|
| 2118 |
+
low_divs = low_activity_div.index.tolist()[:3]
|
| 2119 |
+
div_names = ', '.join(low_divs)
|
| 2120 |
+
insight = (
|
| 2121 |
+
f"Divisions {div_names} show persistently low reporting frequency (<0.2 findings/day), "
|
| 2122 |
+
f"suggesting either excellent safety performance or significant under-reporting."
|
| 2123 |
+
)
|
| 2124 |
+
recommendation = (
|
| 2125 |
+
f"Conduct a *silent audit* (observation-only, no prior notice) in these divisions to validate safety status. "
|
| 2126 |
+
f"Review training records and psychological safety survey scores — fear of blame suppresses reporting."
|
| 2127 |
+
)
|
| 2128 |
+
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2129 |
|
| 2130 |
+
# --- 3. Agentic Safety Mitigation (Proactive vs Reactive) ---
|
| 2131 |
if 'temuan_kategori' in df.columns:
|
|
|
|
|
|
|
| 2132 |
total = len(df)
|
| 2133 |
proactive = (df['temuan_kategori'] == 'Positive').sum()
|
|
|
|
| 2134 |
proactive_rate = proactive / total if total > 0 else 0
|
| 2135 |
|
| 2136 |
insight = (
|
| 2137 |
f"Only {proactive_rate:.1%} of findings reflect *proactive* safety behaviors (e.g., positive interventions, improvements). "
|
| 2138 |
+
f"The remaining {100 - proactive_rate * 100:.1f}% are *reactive* (hazards already present)."
|
| 2139 |
)
|
| 2140 |
recommendation = (
|
| 2141 |
f"Shift incentives from 'finding count' to 'prevention impact'. "
|
|
|
|
| 2144 |
)
|
| 2145 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2146 |
|
| 2147 |
+
# --- 4. Emerging Risk Detection via Wordcloud (Risk Cloud) ---
|
| 2148 |
all_risk_terms, detected_terms = detect_emerging_risks(df)
|
| 2149 |
if detected_terms:
|
| 2150 |
+
high_sev_terms = ['exposed', 'fire hazard', 'fall', 'short circuit', 'unauthorized',
|
| 2151 |
+
'korsleting', 'kebakaran', 'jatuh', 'tanpa izin', 'kabel terkelupas']
|
|
|
|
| 2152 |
detected_high = [t for t in detected_terms if t in high_sev_terms]
|
| 2153 |
if detected_high:
|
| 2154 |
+
terms_str = ', '.join(detected_high[:4])
|
| 2155 |
insight = (
|
| 2156 |
f"Wordcloud analysis indicates emerging high-severity risks: *{terms_str}*. "
|
| 2157 |
+
f"These signal active hazards not yet fully mitigated."
|
| 2158 |
)
|
| 2159 |
recommendation = (
|
| 2160 |
f"Launch a 14-day *Targeted Risk Blitz* on locations reporting these terms. "
|
|
|
|
| 2163 |
)
|
| 2164 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2165 |
|
| 2166 |
+
# Cloud persists? → Why?
|
| 2167 |
+
if len(detected_terms) >= 4 and (len(all_risk_terms) - len(detected_terms)) > 5:
|
| 2168 |
insight = (
|
| 2169 |
f"Despite mitigation efforts, the risk 'cloud' persists — likely due to: "
|
| 2170 |
+
f"(1) Contractor turnover re-introducing old hazards, "
|
| 2171 |
f"(2) Incomplete closure verification, or "
|
| 2172 |
+
f"(3) Findings migrating across locations after local fixes."
|
| 2173 |
)
|
| 2174 |
recommendation = (
|
| 2175 |
+
f"Adopt *closed-loop verification*: require geo-tagged before/after photos + PIC sign-off. "
|
| 2176 |
+
f"Map recurring findings to contractor IDs — enforce SLA penalties for repeat failures. "
|
| 2177 |
+
f"Use AI clustering to detect systemic patterns (e.g., 'grounding failure' across 3 locations in 2 weeks)."
|
| 2178 |
)
|
| 2179 |
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2180 |
|
| 2181 |
+
# --- 5. Coverage Balancing Strategy ---
|
|
|
|
| 2182 |
if 'nama_lokasi_full' in df.columns and 'creator_nid' in df.columns:
|
| 2183 |
reporters_per_location = df.groupby('nama_lokasi_full')['creator_nid'].nunique()
|
| 2184 |
+
if len(reporters_per_location) > 1:
|
| 2185 |
+
mean_r = reporters_per_location.mean()
|
| 2186 |
+
std_r = reporters_per_location.std()
|
| 2187 |
+
coverage_gini = std_r / mean_r if mean_r > 0 else 0
|
| 2188 |
+
|
| 2189 |
+
if coverage_gini > 0.6:
|
| 2190 |
+
insight = (
|
| 2191 |
+
f"Coverage inequality (Gini ≈ {coverage_gini:.2f}) is high — a few locations dominate reporting effort. "
|
| 2192 |
+
f"This creates surveillance deserts in low-coverage zones."
|
| 2193 |
+
)
|
| 2194 |
+
recommendation = (
|
| 2195 |
+
f"1. Assign *minimum 2 unique reporters per high-risk location* monthly. "
|
| 2196 |
+
f"2. Use route optimization (e.g., VRP algorithm) to balance travel + inspection load. "
|
| 2197 |
+
f"3. Deploy mobile micro-checklists for non-auditors (e.g., operators) to increase eyes-on-ground."
|
| 2198 |
+
)
|
| 2199 |
+
insights.append({"insight": insight, "recommendation": recommendation})
|
| 2200 |
|
| 2201 |
return insights
|
| 2202 |
|
| 2203 |
+
# ==============================
|
| 2204 |
+
# Execute & Display
|
| 2205 |
+
# ==============================
|
| 2206 |
+
try:
|
| 2207 |
+
risk_insights = compute_risk_mitigation_insights(df_filtered)
|
| 2208 |
+
except Exception as e:
|
| 2209 |
+
st.error(f"Error in risk insight generation: {str(e)}")
|
| 2210 |
+
risk_insights = []
|
| 2211 |
|
| 2212 |
if risk_insights:
|
| 2213 |
for i, ir in enumerate(risk_insights, 1):
|
|
|
|
| 2215 |
st.markdown(f"<div class='ai-recommendation'><strong>Action {i}:</strong> {ir['recommendation']}</div>", unsafe_allow_html=True)
|
| 2216 |
else:
|
| 2217 |
st.markdown(
|
| 2218 |
+
"<div class='ai-insight'>No risk-mitigation insights generated. Ensure your data includes: "
|
| 2219 |
+
"<code>nama_lokasi_full</code>, <code>nama</code>, <code>creator_nid</code>, "
|
| 2220 |
+
"<code>temuan_kategori</code>, and at least one text field (e.g., <code>kondisi</code> or <code>judul</code>).</div>",
|
| 2221 |
unsafe_allow_html=True
|
| 2222 |
)
|