Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2133,71 +2133,433 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2133 |
# ===== MAIN APP ===========
|
| 2134 |
# ==========================
|
| 2135 |
|
| 2136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2137 |
|
| 2138 |
-
|
|
|
|
|
|
|
| 2139 |
|
| 2140 |
-
|
| 2141 |
-
summary_parts = []
|
| 2142 |
|
| 2143 |
-
|
| 2144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2145 |
|
| 2146 |
-
|
| 2147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2148 |
|
| 2149 |
-
|
| 2150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2151 |
|
| 2152 |
-
|
| 2153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2155 |
if dev["obj3d_slowest_div"]:
|
| 2156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2157 |
|
| 2158 |
uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
|
| 2159 |
-
|
|
|
|
| 2160 |
|
| 2161 |
-
|
| 2162 |
-
|
|
|
|
|
|
|
| 2163 |
|
| 2164 |
if dev["obj6_top2_categories"]:
|
| 2165 |
-
|
| 2166 |
-
|
| 2167 |
-
insight_summary_text = "\n".join(summary_parts)
|
| 2168 |
|
| 2169 |
-
|
| 2170 |
-
|
| 2171 |
|
| 2172 |
-
#
|
| 2173 |
-
|
| 2174 |
-
|
| 2175 |
-
llm_output = json.loads(llm_json)
|
| 2176 |
-
recommendation = llm_output["recommendation"]
|
| 2177 |
-
mitigation = llm_output["mitigation"]
|
| 2178 |
-
except:
|
| 2179 |
-
recommendation = "LLM output not valid JSON."
|
| 2180 |
-
mitigation = "-"
|
| 2181 |
-
|
| 2182 |
-
# Render
|
| 2183 |
st.markdown(
|
| 2184 |
f"""
|
| 2185 |
-
<div
|
| 2186 |
-
|
| 2187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2188 |
</div>
|
| 2189 |
""",
|
| 2190 |
-
unsafe_allow_html=True
|
| 2191 |
)
|
| 2192 |
|
| 2193 |
-
|
| 2194 |
-
|
| 2195 |
-
|
| 2196 |
-
|
| 2197 |
-
|
| 2198 |
-
|
| 2199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2200 |
</div>
|
| 2201 |
-
"""
|
| 2202 |
-
unsafe_allow_html=True
|
| 2203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2133 |
# ===== MAIN APP ===========
|
| 2134 |
# ==========================
|
| 2135 |
|
| 2136 |
+
# app.py
|
| 2137 |
+
import streamlit as st
|
| 2138 |
+
import pandas as pd
|
| 2139 |
+
import json
|
| 2140 |
+
from typing import List, Dict
|
| 2141 |
|
| 2142 |
+
# Transformers pipeline for lightweight local LLM (text2text)
|
| 2143 |
+
from transformers import pipeline
|
| 2144 |
+
import math
|
| 2145 |
|
| 2146 |
+
st.set_page_config(layout="wide", page_title="Objective 7 — Insight & Recommendation (LLM)")
|
|
|
|
| 2147 |
|
| 2148 |
+
# --------------------
|
| 2149 |
+
# Utility: load small local model (flan-t5-small)
|
| 2150 |
+
# --------------------
|
| 2151 |
+
@st.cache_resource
|
| 2152 |
+
def load_local_model(model_name="google/flan-t5-small"):
|
| 2153 |
+
# text2text pipeline works well for instruction-style prompts
|
| 2154 |
+
return pipeline("text2text-generation", model=model_name, device_map="auto" if hasattr(__import__('torch'),'cuda') else None)
|
| 2155 |
|
| 2156 |
+
# Try to load model once
|
| 2157 |
+
try:
|
| 2158 |
+
llm_pipe = load_local_model()
|
| 2159 |
+
local_llm_available = True
|
| 2160 |
+
except Exception as e:
|
| 2161 |
+
llm_pipe = None
|
| 2162 |
+
local_llm_available = False
|
| 2163 |
+
st.warning("Local LLM not available or failed to load (will fallback to rule-based recommendations).")
|
| 2164 |
|
| 2165 |
+
# --------------------
|
| 2166 |
+
# Insert your extract_agentic_insights_v5 function (kept faithful to your original)
|
| 2167 |
+
# --------------------
|
| 2168 |
+
def extract_agentic_insights_v5(df: pd.DataFrame):
|
| 2169 |
+
dev = {
|
| 2170 |
+
"lowest_ratio_9_locs": [],
|
| 2171 |
+
"obj3a_lowest_div": None,
|
| 2172 |
+
"obj3b_slowest_executor": None,
|
| 2173 |
+
"obj3c_lowest_reporter": None,
|
| 2174 |
+
"obj3d_slowest_div": None,
|
| 2175 |
+
"obj4_unsafe_condition_pct": 0.0,
|
| 2176 |
+
"obj4_unsafe_action_pct": 0.0,
|
| 2177 |
+
"obj4_near_miss_pct": 0.0,
|
| 2178 |
+
"obj5_q1_divs": [],
|
| 2179 |
+
"obj5_q2_divs": [],
|
| 2180 |
+
"obj6_top2_categories": [],
|
| 2181 |
+
}
|
| 2182 |
|
| 2183 |
+
# === 1. 9 locations with lowest finding-to-reporter ratio ===
|
| 2184 |
+
if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2185 |
+
calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2186 |
+
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
|
| 2187 |
+
calc = calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid'])
|
| 2188 |
+
calc['bulan'] = calc['created_at'].dt.to_period('M')
|
| 2189 |
+
monthly = calc.groupby(['nama_lokasi_full', 'bulan']).agg(
|
| 2190 |
+
findings=('kode_temuan', 'size'),
|
| 2191 |
+
reporters=('creator_nid', 'nunique')
|
| 2192 |
+
).reset_index()
|
| 2193 |
+
monthly = monthly[monthly['reporters'] > 0]
|
| 2194 |
+
monthly['ratio'] = monthly['findings'] / monthly['reporters']
|
| 2195 |
+
loc_avg = monthly.groupby('nama_lokasi_full')['ratio'].mean()
|
| 2196 |
+
lowest_9 = loc_avg.nsmallest(9)
|
| 2197 |
+
dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
|
| 2198 |
+
|
| 2199 |
+
# === 2a: Division — lowest finding-to-reporter ratio (Obj 3a) ===
|
| 2200 |
+
if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2201 |
+
calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2202 |
+
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
| 2203 |
+
agg = calc.groupby(['nama', 'bulan']).agg(
|
| 2204 |
+
findings=('kode_temuan', 'size'),
|
| 2205 |
+
reporters=('creator_nid', 'nunique')
|
| 2206 |
+
)
|
| 2207 |
+
agg = agg[agg['reporters'] > 0].reset_index()
|
| 2208 |
+
agg['ratio'] = agg['findings'] / agg['reporters']
|
| 2209 |
+
div_ratio = agg.groupby('nama')['ratio'].mean()
|
| 2210 |
+
if not div_ratio.empty:
|
| 2211 |
+
name = div_ratio.idxmin()
|
| 2212 |
+
val = round(div_ratio.min(), 2)
|
| 2213 |
+
dev["obj3a_lowest_div"] = (name, val)
|
| 2214 |
+
|
| 2215 |
+
# === 2b: Executor — longest average resolution time (Obj 3b) ===
|
| 2216 |
+
if 'days_to_close' in df.columns:
|
| 2217 |
+
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2218 |
+
exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
|
| 2219 |
+
if exec_col in valid.columns:
|
| 2220 |
+
lead = valid.groupby(exec_col)['days_to_close'].mean()
|
| 2221 |
+
if not lead.empty:
|
| 2222 |
+
name = lead.idxmax()
|
| 2223 |
+
val = round(lead.max(), 1)
|
| 2224 |
+
dev["obj3b_slowest_executor"] = (name, val)
|
| 2225 |
+
|
| 2226 |
+
# === 2c: Reporter — lowest reporting frequency (Obj 3c) ===
|
| 2227 |
+
if {'creator_name', 'created_at'}.issubset(df.columns):
|
| 2228 |
+
calc = df[['creator_name', 'created_at']].copy()
|
| 2229 |
+
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
| 2230 |
+
monthly = calc.groupby(['creator_name', 'bulan']).size().reset_index(name='count')
|
| 2231 |
+
avg = monthly.groupby('creator_name')['count'].mean()
|
| 2232 |
+
avg = avg[avg > 0]
|
| 2233 |
+
if not avg.empty:
|
| 2234 |
+
name = avg.idxmin()
|
| 2235 |
+
val = round(avg.min(), 2)
|
| 2236 |
+
dev["obj3c_lowest_reporter"] = (name, val)
|
| 2237 |
|
| 2238 |
+
# === 2d: Division — longest average resolution time (Obj 3d) ===
|
| 2239 |
+
if 'days_to_close' in df.columns and 'nama' in df.columns:
|
| 2240 |
+
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2241 |
+
if not valid.empty:
|
| 2242 |
+
lead = valid.groupby('nama')['days_to_close'].mean()
|
| 2243 |
+
if not lead.empty:
|
| 2244 |
+
name = lead.idxmax()
|
| 2245 |
+
val = round(lead.max(), 1)
|
| 2246 |
+
dev["obj3d_slowest_div"] = (name, val)
|
| 2247 |
+
|
| 2248 |
+
# === 3. Composition of non-Positive findings ===
|
| 2249 |
+
if 'temuan_kategori' in df.columns:
|
| 2250 |
+
cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
|
| 2251 |
+
dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
|
| 2252 |
+
dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
|
| 2253 |
+
dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
|
| 2254 |
+
|
| 2255 |
+
# === 4. Risk Quadrants (X=20 findings/month, Y=3 days avg lead time) ===
|
| 2256 |
+
X_LIMIT, Y_LIMIT = 20, 3
|
| 2257 |
+
if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
|
| 2258 |
+
calc = df.copy()
|
| 2259 |
+
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
|
| 2260 |
+
calc = calc.assign(month=calc['created_at'].dt.to_period('M').astype(str))
|
| 2261 |
+
monthly_counts = calc.groupby(['nama', 'month'])['kode_temuan'].nunique().reset_index()
|
| 2262 |
+
avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
|
| 2263 |
+
leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
|
| 2264 |
+
mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
|
| 2265 |
+
for _, r in mat.iterrows():
|
| 2266 |
+
if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2267 |
+
dev["obj5_q1_divs"].append(r['nama'])
|
| 2268 |
+
elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2269 |
+
dev["obj5_q2_divs"].append(r['nama'])
|
| 2270 |
+
|
| 2271 |
+
# === 5. Top 2 non-Positive categories (avg per month) ===
|
| 2272 |
+
if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
|
| 2273 |
+
nonpos = df[df['temuan_kategori'] != 'Positive']
|
| 2274 |
+
if not nonpos.empty:
|
| 2275 |
+
start = nonpos['created_at'].min().to_period('M')
|
| 2276 |
+
end = nonpos['created_at'].max().to_period('M')
|
| 2277 |
+
n_months = len(pd.period_range(start=start, end=end, freq='M'))
|
| 2278 |
+
if n_months == 0:
|
| 2279 |
+
n_months = 1
|
| 2280 |
+
cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
|
| 2281 |
+
dev["obj6_top2_categories"] = [(cat, round(val, 1)) for cat, val in cat_avg.items()]
|
| 2282 |
+
|
| 2283 |
+
return dev
|
| 2284 |
+
|
| 2285 |
+
# --------------------
|
| 2286 |
+
# Sample fallback data (if user didn't provide)
|
| 2287 |
+
# --------------------
|
| 2288 |
+
def sample_dataframe():
|
| 2289 |
+
sample = {
|
| 2290 |
+
"nama_lokasi_full": ["Loc A","Loc B","Loc C","Loc D","Loc E","Loc F","Loc G","Loc H","Loc I","Loc J"],
|
| 2291 |
+
"creator_nid": [1,2,3,4,5,6,7,8,9,10],
|
| 2292 |
+
"created_at": pd.date_range(end=pd.Timestamp("now"), periods=10).astype(str),
|
| 2293 |
+
"kode_temuan": list(range(10)),
|
| 2294 |
+
"nama": ["Div1","Div1","Div2","Div2","Div3","Div3","Div4","Div4","Div5","Div5"],
|
| 2295 |
+
"days_to_close": [2,5,10,1,20,3,4,6,2,8],
|
| 2296 |
+
"creator_name": ["A","B","C","D","E","F","G","H","I","J"],
|
| 2297 |
+
"temuan_kategori": ["Unsafe Condition","Positive","Unsafe Action","Near Miss","Positive","Unsafe Condition","Unsafe Action","Positive","Near Miss","Unsafe Condition"],
|
| 2298 |
+
"kategori": ["Electrical","Mechanical","Electrical","Civil","Mechanical","Electrical","Civil","Mechanical","Civil","Electrical"]
|
| 2299 |
+
}
|
| 2300 |
+
return pd.DataFrame(sample)
|
| 2301 |
+
|
| 2302 |
+
# --------------------
|
| 2303 |
+
# LLM prompt & call (local using flan T5 small via pipeline)
|
| 2304 |
+
# --------------------
|
| 2305 |
+
def generate_recs_with_local_llm(insight_text: str, dev: dict, n_items: int = 5) -> List[Dict]:
|
| 2306 |
+
"""
|
| 2307 |
+
Return list of objects: [{"point":"1","rec":"...","mit":"..."} ...]
|
| 2308 |
+
Uses local text2text pipeline.
|
| 2309 |
+
"""
|
| 2310 |
+
if not local_llm_available or llm_pipe is None:
|
| 2311 |
+
return None
|
| 2312 |
+
|
| 2313 |
+
# Build very clear prompt asking for JSON list
|
| 2314 |
+
prompt = (
|
| 2315 |
+
"You are an expert Safety Analytics advisor for a power utility. "
|
| 2316 |
+
"Based on the structured analytics below, produce EXACTLY "
|
| 2317 |
+
f"{n_items} items. For each item return a JSON object with keys: point, rec, mit. "
|
| 2318 |
+
"rec = Recommended Action (concise, 1-2 sentences). "
|
| 2319 |
+
"mit = Risk Mitigation Strategy (concise, 1-2 sentences). "
|
| 2320 |
+
"Return only a JSON array (no extra text).\n\n"
|
| 2321 |
+
"Structured analytics (python dict):\n"
|
| 2322 |
+
f"{json.dumps(dev, ensure_ascii=False, indent=2)}\n\n"
|
| 2323 |
+
"Insight summary (short):\n"
|
| 2324 |
+
f"{insight_text}\n\n"
|
| 2325 |
+
"Output example:\n"
|
| 2326 |
+
'[{"point":"1","rec":"...","mit":"..."}, ..., {"point":"5","rec":"...","mit":"..."}]\n'
|
| 2327 |
+
)
|
| 2328 |
+
|
| 2329 |
+
# use pipeline to generate
|
| 2330 |
+
try:
|
| 2331 |
+
gen = llm_pipe(prompt, max_new_tokens=400, do_sample=False)[0]["generated_text"]
|
| 2332 |
+
except Exception as e:
|
| 2333 |
+
st.error(f"Local LLM generation failed: {e}")
|
| 2334 |
+
return None
|
| 2335 |
+
|
| 2336 |
+
# try parse JSON array inside output text
|
| 2337 |
+
try:
|
| 2338 |
+
start = gen.index('[')
|
| 2339 |
+
end = gen.rindex(']') + 1
|
| 2340 |
+
arr_text = gen[start:end]
|
| 2341 |
+
obj = json.loads(arr_text)
|
| 2342 |
+
# Normalize: ensure point fields exist; if not, assign numbers
|
| 2343 |
+
for idx, item in enumerate(obj):
|
| 2344 |
+
if 'point' not in item:
|
| 2345 |
+
item['point'] = str(idx+1)
|
| 2346 |
+
return obj
|
| 2347 |
+
except Exception:
|
| 2348 |
+
# fallback: try to parse lines heuristically
|
| 2349 |
+
lines = [ln.strip() for ln in gen.splitlines() if ln.strip()]
|
| 2350 |
+
items = []
|
| 2351 |
+
num = 1
|
| 2352 |
+
for ln in lines:
|
| 2353 |
+
if len(items) >= n_items:
|
| 2354 |
+
break
|
| 2355 |
+
# naive split by '|' or '-' if present
|
| 2356 |
+
if '|' in ln:
|
| 2357 |
+
parts = [p.strip() for p in ln.split('|') if p.strip()]
|
| 2358 |
+
if len(parts) >= 2:
|
| 2359 |
+
rec = parts[0]
|
| 2360 |
+
mit = parts[1] if len(parts) > 1 else ""
|
| 2361 |
+
items.append({"point": str(num), "rec": rec, "mit": mit})
|
| 2362 |
+
num += 1
|
| 2363 |
+
else:
|
| 2364 |
+
# fallback short capture
|
| 2365 |
+
items.append({"point": str(num), "rec": ln[:200], "mit": ""})
|
| 2366 |
+
num += 1
|
| 2367 |
+
if items:
|
| 2368 |
+
return items
|
| 2369 |
+
return None
|
| 2370 |
+
|
| 2371 |
+
# --------------------
|
| 2372 |
+
# Rule-based fallback generator (if LLM unavailable)
|
| 2373 |
+
# --------------------
|
| 2374 |
+
def rule_based_recs(dev: dict) -> List[Dict]:
|
| 2375 |
+
recs = []
|
| 2376 |
+
if dev["lowest_ratio_9_locs"]:
|
| 2377 |
+
recs.append({
|
| 2378 |
+
"point": "1",
|
| 2379 |
+
"rec": "Launch Agency Activation Sprint across the identified low-ratio locations: weekly micro-inspection by Area PIC.",
|
| 2380 |
+
"mit": "Enable QR-based checklists with automatic reminders; monitor ratio weekly and target improvement within 45 days."
|
| 2381 |
+
})
|
| 2382 |
+
parts_exist = any([dev["obj3a_lowest_div"], dev["obj3c_lowest_reporter"], dev["obj3d_slowest_div"], dev["obj3b_slowest_executor"]])
|
| 2383 |
+
if parts_exist:
|
| 2384 |
+
recs.append({
|
| 2385 |
+
"point": "2",
|
| 2386 |
+
"rec": "Deploy Agentic Capacity Dashboard to monitor reporting & resolution KPIs per division/individual.",
|
| 2387 |
+
"mit": "Auto-trigger coaching alerts when deviation >20% from baseline; weekly manager reports."
|
| 2388 |
+
})
|
| 2389 |
+
uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
|
| 2390 |
+
if uc + ua + nm > 0:
|
| 2391 |
+
recs.append({
|
| 2392 |
+
"point": "3",
|
| 2393 |
+
"rec": "Enforce photo-based validation for Unsafe categories to improve classification fidelity.",
|
| 2394 |
+
"mit": "Block submission without evidence and require mandatory justification for manual overrides."
|
| 2395 |
+
})
|
| 2396 |
+
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
|
| 2397 |
+
recs.append({
|
| 2398 |
+
"point": "4",
|
| 2399 |
+
"rec": "Assign Rapid Response Teams for Q1 divisions and apply One-Finding-One-Day policy for Q2.",
|
| 2400 |
+
"mit": "Auto-escalate to senior ops if division remains Q1/Q2 for >=2 months."
|
| 2401 |
+
})
|
| 2402 |
+
if dev["obj6_top2_categories"]:
|
| 2403 |
+
c1, c2 = dev["obj6_top2_categories"]
|
| 2404 |
+
recs.append({
|
| 2405 |
+
"point": "5",
|
| 2406 |
+
"rec": f"Form RCA Task Force for {c1[0]} and {c2[0]} with cross-functional owners.",
|
| 2407 |
+
"mit": "Update SOP and tender templates to include mitigations based on historical findings."
|
| 2408 |
+
})
|
| 2409 |
+
# Ensure at least 5 items
|
| 2410 |
+
i = 1
|
| 2411 |
+
while len(recs) < 5:
|
| 2412 |
+
recs.append({"point": str(len(recs)+1), "rec": "Operational review and monitoring.", "mit": "Periodic review & KPIs."})
|
| 2413 |
+
i += 1
|
| 2414 |
+
return recs[:5]
|
| 2415 |
+
|
| 2416 |
+
# --------------------
|
| 2417 |
+
# Streamlit UI
|
| 2418 |
+
# --------------------
|
| 2419 |
+
st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
|
| 2420 |
+
|
| 2421 |
+
col1, col2 = st.columns([2,1])
|
| 2422 |
+
|
| 2423 |
+
with col1:
|
| 2424 |
+
st.subheader("Upload data (or use sample)")
|
| 2425 |
+
uploaded = st.file_uploader("Upload CSV (must contain relevant columns)", type=["csv"])
|
| 2426 |
+
if uploaded is not None:
|
| 2427 |
+
try:
|
| 2428 |
+
df = pd.read_csv(uploaded)
|
| 2429 |
+
st.success("CSV loaded.")
|
| 2430 |
+
except Exception as e:
|
| 2431 |
+
st.error(f"Failed to read CSV: {e}")
|
| 2432 |
+
df = sample_dataframe()
|
| 2433 |
+
st.info("Using sample dataframe due to read error.")
|
| 2434 |
+
else:
|
| 2435 |
+
st.info("No file uploaded — using sample dataset.")
|
| 2436 |
+
df = sample_dataframe()
|
| 2437 |
+
|
| 2438 |
+
st.markdown("### Preview data (first 5 rows)")
|
| 2439 |
+
st.dataframe(df.head(), use_container_width=True)
|
| 2440 |
+
|
| 2441 |
+
with col2:
|
| 2442 |
+
st.subheader("LLM / Mode")
|
| 2443 |
+
st.write("Local LLM (flan-t5-small) will be used if available.")
|
| 2444 |
+
st.write(f"Local LLM available: {local_llm_available}")
|
| 2445 |
+
st.button("Reload model", on_click=lambda: st.experimental_rerun())
|
| 2446 |
+
|
| 2447 |
+
# --------------------
|
| 2448 |
+
# Compute insights
|
| 2449 |
+
# --------------------
|
| 2450 |
+
dev = extract_agentic_insights_v5(df)
|
| 2451 |
+
|
| 2452 |
+
# Build insight_text exactly similar to your format
|
| 2453 |
+
insight_lines = []
|
| 2454 |
+
if dev["lowest_ratio_9_locs"]:
|
| 2455 |
+
loc_list = ", ".join([f"<strong>{loc}</strong> ({ratio})" for loc, ratio in dev["lowest_ratio_9_locs"]])
|
| 2456 |
+
insight_lines.append(f"1. Nine locations with the <em>lowest</em> finding-to-reporter ratio: {loc_list}.")
|
| 2457 |
+
|
| 2458 |
+
parts = []
|
| 2459 |
+
if dev["obj3a_lowest_div"]:
|
| 2460 |
+
parts.append(f"division <strong>{dev['obj3a_lowest_div'][0]}</strong> (ratio: {dev['obj3a_lowest_div'][1]})")
|
| 2461 |
+
if dev["obj3c_lowest_reporter"]:
|
| 2462 |
+
parts.append(f"reporter <strong>{dev['obj3c_lowest_reporter'][0]}</strong> ({dev['obj3c_lowest_reporter'][1]} findings/month)")
|
| 2463 |
if dev["obj3d_slowest_div"]:
|
| 2464 |
+
parts.append(f"division <strong>{dev['obj3d_slowest_div'][0]}</strong> (avg. resolution: {dev['obj3d_slowest_div'][1]} days)")
|
| 2465 |
+
if dev["obj3b_slowest_executor"]:
|
| 2466 |
+
parts.append(f"executor <strong>{dev['obj3b_slowest_executor'][0]}</strong> (avg. resolution: {dev['obj3b_slowest_executor'][1]} days)")
|
| 2467 |
+
|
| 2468 |
+
if parts:
|
| 2469 |
+
insight_lines.append(
|
| 2470 |
+
f"2. Agentic AI highlights operational imbalance: {'; '.join(parts)}. These patterns indicate uneven engagement and resolution capability."
|
| 2471 |
+
)
|
| 2472 |
|
| 2473 |
uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
|
| 2474 |
+
if uc + ua + nm > 0:
|
| 2475 |
+
insight_lines.append(f"3. Non-Positive composition: Unsafe Condition ({uc}%), Unsafe Action ({ua}%), Near Miss ({nm}%).")
|
| 2476 |
|
| 2477 |
+
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
|
| 2478 |
+
q1 = ", ".join([f"<strong>{d}</strong>" for d in dev["obj5_q1_divs"]]) or "—"
|
| 2479 |
+
q2 = ", ".join([f"<strong>{d}</strong>" for d in dev["obj5_q2_divs"]]) or "—"
|
| 2480 |
+
insight_lines.append(f"4. Quadrant I high-risk divisions: {q1}. Quadrant II hidden-risk divisions: {q2}.")
|
| 2481 |
|
| 2482 |
if dev["obj6_top2_categories"]:
|
| 2483 |
+
c1, c2 = dev["obj6_top2_categories"]
|
| 2484 |
+
insight_lines.append(f"5. Top recurring categories: <strong>{c1[0]}</strong> ({c1[1]}/month) and <strong>{c2[0]}</strong> ({c2[1]}/month).")
|
|
|
|
| 2485 |
|
| 2486 |
+
insight_text_html = "<br>".join(insight_lines)
|
| 2487 |
+
insight_text_plain = "\n".join([s.replace("<strong>", "").replace("</strong>", "").replace("<em>", "").replace("</em>", "") for s in insight_lines])
|
| 2488 |
|
| 2489 |
+
# --------------------
|
| 2490 |
+
# Render Insight card
|
| 2491 |
+
# --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2492 |
st.markdown(
|
| 2493 |
f"""
|
| 2494 |
+
<div class="card" style="
|
| 2495 |
+
background-color: #f8f9fa;
|
| 2496 |
+
border-left: 4px solid #003DA5;
|
| 2497 |
+
padding: 16px;
|
| 2498 |
+
margin-bottom: 20px;
|
| 2499 |
+
border-radius: 4px;
|
| 2500 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 2501 |
+
">
|
| 2502 |
+
<h4 style="margin-top: 0; color: #FF6B6B;">Insight Summary</h4>
|
| 2503 |
+
<p style="margin-bottom: 0; line-height: 1.6; font-size: 0.98em;">{insight_text_html if insight_text_html else 'No insights (missing columns).'}</p>
|
| 2504 |
</div>
|
| 2505 |
""",
|
| 2506 |
+
unsafe_allow_html=True
|
| 2507 |
)
|
| 2508 |
|
| 2509 |
+
# --------------------
|
| 2510 |
+
# Generate Recs via LLM (or fallback)
|
| 2511 |
+
# --------------------
|
| 2512 |
+
st.markdown("## Recommendations & Risk Mitigation (generated)")
|
| 2513 |
+
with st.spinner("Generating recommendations..."):
|
| 2514 |
+
recs = generate_recs_with_local_llm(insight_text_plain, dev, n_items=5) if local_llm_available else None
|
| 2515 |
+
|
| 2516 |
+
if not recs:
|
| 2517 |
+
st.warning("LLM not available or failed to parse — using fallback rule-based recommendations.")
|
| 2518 |
+
recs = rule_based_recs(dev)
|
| 2519 |
+
|
| 2520 |
+
# --------------------
|
| 2521 |
+
# Render Recommendation table (same style)
|
| 2522 |
+
# --------------------
|
| 2523 |
+
if recs:
|
| 2524 |
+
rows_html = ""
|
| 2525 |
+
for r in recs[:5]:
|
| 2526 |
+
rows_html += (
|
| 2527 |
+
f"<tr>"
|
| 2528 |
+
f"<td style='text-align:center; font-weight:bold; width:5%;'>{r.get('point','')}</td>"
|
| 2529 |
+
f"<td style='padding:8px;'>{r.get('rec','')}</td>"
|
| 2530 |
+
f"<td style='padding:8px;'>{r.get('mit','')}</td>"
|
| 2531 |
+
f"</tr>"
|
| 2532 |
+
)
|
| 2533 |
+
|
| 2534 |
+
table_html = f"""
|
| 2535 |
+
<div class="card" style="
|
| 2536 |
+
background-color: #e8f5e9;
|
| 2537 |
+
border-left: 4px solid #4CAF50;
|
| 2538 |
+
padding: 16px;
|
| 2539 |
+
margin-bottom: 20px;
|
| 2540 |
+
border-radius: 4px;
|
| 2541 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 2542 |
+
">
|
| 2543 |
+
<h4 style="margin-top: 0; color: #2E7D32;">Recommended Actions & Agentic Risk Mitigation</h4>
|
| 2544 |
+
<table style="width:100%; border-collapse:collapse; font-size:0.95em; margin-top:12px;">
|
| 2545 |
+
<thead>
|
| 2546 |
+
<tr style="background-color:#e8f5ee;">
|
| 2547 |
+
<th style="padding:10px; text-align:center; border:1px solid #ccc;">#</th>
|
| 2548 |
+
<th style="padding:10px; text-align:left; border:1px solid #ccc;">Recommended Action</th>
|
| 2549 |
+
<th style="padding:10px; text-align:left; border:1px solid #ccc;">Risk Mitigation Strategy</th>
|
| 2550 |
+
</tr>
|
| 2551 |
+
</thead>
|
| 2552 |
+
<tbody>
|
| 2553 |
+
{rows_html}
|
| 2554 |
+
</tbody>
|
| 2555 |
+
</table>
|
| 2556 |
</div>
|
| 2557 |
+
"""
|
| 2558 |
+
st.markdown(table_html, unsafe_allow_html=True)
|
| 2559 |
+
else:
|
| 2560 |
+
st.info("No recommendations available.")
|
| 2561 |
+
|
| 2562 |
+
# --------------------
|
| 2563 |
+
# End
|
| 2564 |
+
# --------------------
|
| 2565 |
+
st.caption("Objective 7 — Streamlit app. LLM (local) used when available; fallback rule-based otherwise.")
|