Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2142,28 +2142,86 @@ from typing import List, Dict
|
|
| 2142 |
# Transformers pipeline for lightweight local LLM (text2text)
|
| 2143 |
from transformers import pipeline
|
| 2144 |
import math
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2145 |
|
| 2146 |
-
st.markdown("<h3 class='section-title'>Objective 7 — Insight & Recommendation (LLM)</h3>", unsafe_allow_html=True)
|
| 2147 |
-
# --------------------
|
| 2148 |
-
# Utility: load small local model (flan-t5-small)
|
| 2149 |
-
# --------------------
|
| 2150 |
@st.cache_resource
|
| 2151 |
-
def
|
| 2152 |
-
|
| 2153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2154 |
|
| 2155 |
-
# Try to load model once
|
| 2156 |
-
try:
|
| 2157 |
-
llm_pipe = load_local_model()
|
| 2158 |
-
local_llm_available = True
|
| 2159 |
-
except Exception as e:
|
| 2160 |
-
llm_pipe = None
|
| 2161 |
-
local_llm_available = False
|
| 2162 |
-
st.warning("Local LLM not available or failed to load (will fallback to rule-based recommendations).")
|
| 2163 |
|
| 2164 |
-
|
| 2165 |
-
|
| 2166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2167 |
def extract_agentic_insights_v5(df: pd.DataFrame):
|
| 2168 |
dev = {
|
| 2169 |
"lowest_ratio_9_locs": [],
|
|
@@ -2179,7 +2237,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2179 |
"obj6_top2_categories": [],
|
| 2180 |
}
|
| 2181 |
|
| 2182 |
-
# === 1. 9
|
| 2183 |
if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2184 |
calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2185 |
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
|
|
@@ -2195,7 +2253,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2195 |
lowest_9 = loc_avg.nsmallest(9)
|
| 2196 |
dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
|
| 2197 |
|
| 2198 |
-
# === 2a:
|
| 2199 |
if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2200 |
calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2201 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
@@ -2207,22 +2265,18 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2207 |
agg['ratio'] = agg['findings'] / agg['reporters']
|
| 2208 |
div_ratio = agg.groupby('nama')['ratio'].mean()
|
| 2209 |
if not div_ratio.empty:
|
| 2210 |
-
|
| 2211 |
-
val = round(div_ratio.min(), 2)
|
| 2212 |
-
dev["obj3a_lowest_div"] = (name, val)
|
| 2213 |
|
| 2214 |
-
# === 2b:
|
| 2215 |
if 'days_to_close' in df.columns:
|
| 2216 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2217 |
exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
|
| 2218 |
if exec_col in valid.columns:
|
| 2219 |
lead = valid.groupby(exec_col)['days_to_close'].mean()
|
| 2220 |
if not lead.empty:
|
| 2221 |
-
|
| 2222 |
-
val = round(lead.max(), 1)
|
| 2223 |
-
dev["obj3b_slowest_executor"] = (name, val)
|
| 2224 |
|
| 2225 |
-
# === 2c:
|
| 2226 |
if {'creator_name', 'created_at'}.issubset(df.columns):
|
| 2227 |
calc = df[['creator_name', 'created_at']].copy()
|
| 2228 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
@@ -2230,28 +2284,24 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2230 |
avg = monthly.groupby('creator_name')['count'].mean()
|
| 2231 |
avg = avg[avg > 0]
|
| 2232 |
if not avg.empty:
|
| 2233 |
-
|
| 2234 |
-
val = round(avg.min(), 2)
|
| 2235 |
-
dev["obj3c_lowest_reporter"] = (name, val)
|
| 2236 |
|
| 2237 |
-
# === 2d:
|
| 2238 |
if 'days_to_close' in df.columns and 'nama' in df.columns:
|
| 2239 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2240 |
if not valid.empty:
|
| 2241 |
lead = valid.groupby('nama')['days_to_close'].mean()
|
| 2242 |
if not lead.empty:
|
| 2243 |
-
|
| 2244 |
-
val = round(lead.max(), 1)
|
| 2245 |
-
dev["obj3d_slowest_div"] = (name, val)
|
| 2246 |
|
| 2247 |
-
# === 3.
|
| 2248 |
if 'temuan_kategori' in df.columns:
|
| 2249 |
cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
|
| 2250 |
dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
|
| 2251 |
dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
|
| 2252 |
dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
|
| 2253 |
|
| 2254 |
-
# === 4. Risk
|
| 2255 |
X_LIMIT, Y_LIMIT = 20, 3
|
| 2256 |
if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
|
| 2257 |
calc = df.copy()
|
|
@@ -2261,198 +2311,37 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2261 |
avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
|
| 2262 |
leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
|
| 2263 |
mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
|
|
|
|
| 2264 |
for _, r in mat.iterrows():
|
| 2265 |
if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2266 |
dev["obj5_q1_divs"].append(r['nama'])
|
| 2267 |
elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2268 |
dev["obj5_q2_divs"].append(r['nama'])
|
| 2269 |
|
| 2270 |
-
# === 5. Top
|
| 2271 |
if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
|
| 2272 |
nonpos = df[df['temuan_kategori'] != 'Positive']
|
| 2273 |
if not nonpos.empty:
|
| 2274 |
start = nonpos['created_at'].min().to_period('M')
|
| 2275 |
end = nonpos['created_at'].max().to_period('M')
|
| 2276 |
n_months = len(pd.period_range(start=start, end=end, freq='M'))
|
| 2277 |
-
if n_months == 0:
|
| 2278 |
-
n_months = 1
|
| 2279 |
cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
|
| 2280 |
-
dev["obj6_top2_categories"] = [(cat, round(
|
| 2281 |
|
| 2282 |
return dev
|
| 2283 |
|
| 2284 |
-
# --------------------
|
| 2285 |
-
# Sample fallback data (if user didn't provide)
|
| 2286 |
-
# --------------------
|
| 2287 |
-
def sample_dataframe():
|
| 2288 |
-
sample = {
|
| 2289 |
-
"nama_lokasi_full": ["Loc A","Loc B","Loc C","Loc D","Loc E","Loc F","Loc G","Loc H","Loc I","Loc J"],
|
| 2290 |
-
"creator_nid": [1,2,3,4,5,6,7,8,9,10],
|
| 2291 |
-
"created_at": pd.date_range(end=pd.Timestamp("now"), periods=10).astype(str),
|
| 2292 |
-
"kode_temuan": list(range(10)),
|
| 2293 |
-
"nama": ["Div1","Div1","Div2","Div2","Div3","Div3","Div4","Div4","Div5","Div5"],
|
| 2294 |
-
"days_to_close": [2,5,10,1,20,3,4,6,2,8],
|
| 2295 |
-
"creator_name": ["A","B","C","D","E","F","G","H","I","J"],
|
| 2296 |
-
"temuan_kategori": ["Unsafe Condition","Positive","Unsafe Action","Near Miss","Positive","Unsafe Condition","Unsafe Action","Positive","Near Miss","Unsafe Condition"],
|
| 2297 |
-
"kategori": ["Electrical","Mechanical","Electrical","Civil","Mechanical","Electrical","Civil","Mechanical","Civil","Electrical"]
|
| 2298 |
-
}
|
| 2299 |
-
return pd.DataFrame(sample)
|
| 2300 |
|
| 2301 |
-
#
|
| 2302 |
-
#
|
| 2303 |
-
#
|
| 2304 |
-
def generate_recs_with_local_llm(insight_text: str, dev: dict, n_items: int = 5) -> List[Dict]:
|
| 2305 |
-
"""
|
| 2306 |
-
Return list of objects: [{"point":"1","rec":"...","mit":"..."} ...]
|
| 2307 |
-
Uses local text2text pipeline.
|
| 2308 |
-
"""
|
| 2309 |
-
if not local_llm_available or llm_pipe is None:
|
| 2310 |
-
return None
|
| 2311 |
|
| 2312 |
-
|
| 2313 |
-
prompt = (
|
| 2314 |
-
"You are an expert Safety Analytics advisor for a power utility. "
|
| 2315 |
-
"Based on the structured analytics below, produce EXACTLY "
|
| 2316 |
-
f"{n_items} items. For each item return a JSON object with keys: point, rec, mit. "
|
| 2317 |
-
"rec = Recommended Action (concise, 1-2 sentences). "
|
| 2318 |
-
"mit = Risk Mitigation Strategy (concise, 1-2 sentences). "
|
| 2319 |
-
"Return only a JSON array (no extra text).\n\n"
|
| 2320 |
-
"Structured analytics (python dict):\n"
|
| 2321 |
-
f"{json.dumps(dev, ensure_ascii=False, indent=2)}\n\n"
|
| 2322 |
-
"Insight summary (short):\n"
|
| 2323 |
-
f"{insight_text}\n\n"
|
| 2324 |
-
"Output example:\n"
|
| 2325 |
-
'[{"point":"1","rec":"...","mit":"..."}, ..., {"point":"5","rec":"...","mit":"..."}]\n'
|
| 2326 |
-
)
|
| 2327 |
|
| 2328 |
-
|
| 2329 |
-
try:
|
| 2330 |
-
gen = llm_pipe(prompt, max_new_tokens=400, do_sample=False)[0]["generated_text"]
|
| 2331 |
-
except Exception as e:
|
| 2332 |
-
st.error(f"Local LLM generation failed: {e}")
|
| 2333 |
-
return None
|
| 2334 |
-
|
| 2335 |
-
# try parse JSON array inside output text
|
| 2336 |
-
try:
|
| 2337 |
-
start = gen.index('[')
|
| 2338 |
-
end = gen.rindex(']') + 1
|
| 2339 |
-
arr_text = gen[start:end]
|
| 2340 |
-
obj = json.loads(arr_text)
|
| 2341 |
-
# Normalize: ensure point fields exist; if not, assign numbers
|
| 2342 |
-
for idx, item in enumerate(obj):
|
| 2343 |
-
if 'point' not in item:
|
| 2344 |
-
item['point'] = str(idx+1)
|
| 2345 |
-
return obj
|
| 2346 |
-
except Exception:
|
| 2347 |
-
# fallback: try to parse lines heuristically
|
| 2348 |
-
lines = [ln.strip() for ln in gen.splitlines() if ln.strip()]
|
| 2349 |
-
items = []
|
| 2350 |
-
num = 1
|
| 2351 |
-
for ln in lines:
|
| 2352 |
-
if len(items) >= n_items:
|
| 2353 |
-
break
|
| 2354 |
-
# naive split by '|' or '-' if present
|
| 2355 |
-
if '|' in ln:
|
| 2356 |
-
parts = [p.strip() for p in ln.split('|') if p.strip()]
|
| 2357 |
-
if len(parts) >= 2:
|
| 2358 |
-
rec = parts[0]
|
| 2359 |
-
mit = parts[1] if len(parts) > 1 else ""
|
| 2360 |
-
items.append({"point": str(num), "rec": rec, "mit": mit})
|
| 2361 |
-
num += 1
|
| 2362 |
-
else:
|
| 2363 |
-
# fallback short capture
|
| 2364 |
-
items.append({"point": str(num), "rec": ln[:200], "mit": ""})
|
| 2365 |
-
num += 1
|
| 2366 |
-
if items:
|
| 2367 |
-
return items
|
| 2368 |
-
return None
|
| 2369 |
-
|
| 2370 |
-
# --------------------
|
| 2371 |
-
# Rule-based fallback generator (if LLM unavailable)
|
| 2372 |
-
# --------------------
|
| 2373 |
-
def rule_based_recs(dev: dict) -> List[Dict]:
|
| 2374 |
-
recs = []
|
| 2375 |
-
if dev["lowest_ratio_9_locs"]:
|
| 2376 |
-
recs.append({
|
| 2377 |
-
"point": "1",
|
| 2378 |
-
"rec": "Launch Agency Activation Sprint across the identified low-ratio locations: weekly micro-inspection by Area PIC.",
|
| 2379 |
-
"mit": "Enable QR-based checklists with automatic reminders; monitor ratio weekly and target improvement within 45 days."
|
| 2380 |
-
})
|
| 2381 |
-
parts_exist = any([dev["obj3a_lowest_div"], dev["obj3c_lowest_reporter"], dev["obj3d_slowest_div"], dev["obj3b_slowest_executor"]])
|
| 2382 |
-
if parts_exist:
|
| 2383 |
-
recs.append({
|
| 2384 |
-
"point": "2",
|
| 2385 |
-
"rec": "Deploy Agentic Capacity Dashboard to monitor reporting & resolution KPIs per division/individual.",
|
| 2386 |
-
"mit": "Auto-trigger coaching alerts when deviation >20% from baseline; weekly manager reports."
|
| 2387 |
-
})
|
| 2388 |
-
uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
|
| 2389 |
-
if uc + ua + nm > 0:
|
| 2390 |
-
recs.append({
|
| 2391 |
-
"point": "3",
|
| 2392 |
-
"rec": "Enforce photo-based validation for Unsafe categories to improve classification fidelity.",
|
| 2393 |
-
"mit": "Block submission without evidence and require mandatory justification for manual overrides."
|
| 2394 |
-
})
|
| 2395 |
-
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
|
| 2396 |
-
recs.append({
|
| 2397 |
-
"point": "4",
|
| 2398 |
-
"rec": "Assign Rapid Response Teams for Q1 divisions and apply One-Finding-One-Day policy for Q2.",
|
| 2399 |
-
"mit": "Auto-escalate to senior ops if division remains Q1/Q2 for >=2 months."
|
| 2400 |
-
})
|
| 2401 |
-
if dev["obj6_top2_categories"]:
|
| 2402 |
-
c1, c2 = dev["obj6_top2_categories"]
|
| 2403 |
-
recs.append({
|
| 2404 |
-
"point": "5",
|
| 2405 |
-
"rec": f"Form RCA Task Force for {c1[0]} and {c2[0]} with cross-functional owners.",
|
| 2406 |
-
"mit": "Update SOP and tender templates to include mitigations based on historical findings."
|
| 2407 |
-
})
|
| 2408 |
-
# Ensure at least 5 items
|
| 2409 |
-
i = 1
|
| 2410 |
-
while len(recs) < 5:
|
| 2411 |
-
recs.append({"point": str(len(recs)+1), "rec": "Operational review and monitoring.", "mit": "Periodic review & KPIs."})
|
| 2412 |
-
i += 1
|
| 2413 |
-
return recs[:5]
|
| 2414 |
-
|
| 2415 |
-
# --------------------
|
| 2416 |
-
# Streamlit UI
|
| 2417 |
-
# --------------------
|
| 2418 |
-
st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
|
| 2419 |
-
|
| 2420 |
-
col1, col2 = st.columns([2,1])
|
| 2421 |
-
|
| 2422 |
-
with col1:
|
| 2423 |
-
st.subheader("Upload data (or use sample)")
|
| 2424 |
-
uploaded = st.file_uploader("Upload CSV (must contain relevant columns)", type=["csv"])
|
| 2425 |
-
if uploaded is not None:
|
| 2426 |
-
try:
|
| 2427 |
-
df = pd.read_csv(uploaded)
|
| 2428 |
-
st.success("CSV loaded.")
|
| 2429 |
-
except Exception as e:
|
| 2430 |
-
st.error(f"Failed to read CSV: {e}")
|
| 2431 |
-
df = sample_dataframe()
|
| 2432 |
-
st.info("Using sample dataframe due to read error.")
|
| 2433 |
-
else:
|
| 2434 |
-
st.info("No file uploaded — using sample dataset.")
|
| 2435 |
-
df = sample_dataframe()
|
| 2436 |
-
|
| 2437 |
-
st.markdown("### Preview data (first 5 rows)")
|
| 2438 |
-
st.dataframe(df.head(), use_container_width=True)
|
| 2439 |
-
|
| 2440 |
-
with col2:
|
| 2441 |
-
st.subheader("LLM / Mode")
|
| 2442 |
-
st.write("Local LLM (flan-t5-small) will be used if available.")
|
| 2443 |
-
st.write(f"Local LLM available: {local_llm_available}")
|
| 2444 |
-
st.button("Reload model", on_click=lambda: st.experimental_rerun())
|
| 2445 |
-
|
| 2446 |
-
# --------------------
|
| 2447 |
-
# Compute insights
|
| 2448 |
-
# --------------------
|
| 2449 |
-
dev = extract_agentic_insights_v5(df)
|
| 2450 |
-
|
| 2451 |
-
# Build insight_text exactly similar to your format
|
| 2452 |
insight_lines = []
|
| 2453 |
if dev["lowest_ratio_9_locs"]:
|
| 2454 |
loc_list = ", ".join([f"<strong>{loc}</strong> ({ratio})" for loc, ratio in dev["lowest_ratio_9_locs"]])
|
| 2455 |
-
insight_lines.append(f"1. Nine locations with the
|
| 2456 |
|
| 2457 |
parts = []
|
| 2458 |
if dev["obj3a_lowest_div"]:
|
|
@@ -2460,104 +2349,98 @@ if dev["obj3a_lowest_div"]:
|
|
| 2460 |
if dev["obj3c_lowest_reporter"]:
|
| 2461 |
parts.append(f"reporter <strong>{dev['obj3c_lowest_reporter'][0]}</strong> ({dev['obj3c_lowest_reporter'][1]} findings/month)")
|
| 2462 |
if dev["obj3d_slowest_div"]:
|
| 2463 |
-
parts.append(f"division <strong>{dev['obj3d_slowest_div'][0]}</strong> (
|
| 2464 |
if dev["obj3b_slowest_executor"]:
|
| 2465 |
-
parts.append(f"executor <strong>{dev['obj3b_slowest_executor'][0]}</strong> (
|
| 2466 |
|
| 2467 |
if parts:
|
| 2468 |
-
insight_lines.append(
|
| 2469 |
-
f"2. Agentic AI highlights operational imbalance: {'; '.join(parts)}. These patterns indicate uneven engagement and resolution capability."
|
| 2470 |
-
)
|
| 2471 |
|
| 2472 |
uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
|
| 2473 |
if uc + ua + nm > 0:
|
| 2474 |
insight_lines.append(f"3. Non-Positive composition: Unsafe Condition ({uc}%), Unsafe Action ({ua}%), Near Miss ({nm}%).")
|
| 2475 |
|
| 2476 |
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
|
| 2477 |
-
q1 = ", ".join([f"<strong>{
|
| 2478 |
-
q2 = ", ".join([f"<strong>{
|
| 2479 |
-
insight_lines.append(f"4.
|
| 2480 |
|
| 2481 |
if dev["obj6_top2_categories"]:
|
| 2482 |
c1, c2 = dev["obj6_top2_categories"]
|
| 2483 |
-
insight_lines.append(f"5. Top recurring categories: <strong>{c1[0]}</strong> ({c1[1]}/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2484 |
|
| 2485 |
-
|
| 2486 |
-
|
| 2487 |
|
| 2488 |
-
|
| 2489 |
-
|
| 2490 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2491 |
st.markdown(
|
| 2492 |
f"""
|
| 2493 |
-
<div class="card" style="
|
| 2494 |
-
|
| 2495 |
-
|
| 2496 |
-
padding: 16px;
|
| 2497 |
-
margin-bottom: 20px;
|
| 2498 |
-
border-radius: 4px;
|
| 2499 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 2500 |
-
">
|
| 2501 |
-
<h4 style="margin-top: 0; color: #FF6B6B;">Insight Summary</h4>
|
| 2502 |
-
<p style="margin-bottom: 0; line-height: 1.6; font-size: 0.98em;">{insight_text_html if insight_text_html else 'No insights (missing columns).'}</p>
|
| 2503 |
</div>
|
| 2504 |
""",
|
| 2505 |
unsafe_allow_html=True
|
| 2506 |
)
|
| 2507 |
|
| 2508 |
-
#
|
| 2509 |
-
# Generate Recs via LLM (or fallback)
|
| 2510 |
-
# --------------------
|
| 2511 |
-
st.markdown("## Recommendations & Risk Mitigation (generated)")
|
| 2512 |
-
with st.spinner("Generating recommendations..."):
|
| 2513 |
-
recs = generate_recs_with_local_llm(insight_text_plain, dev, n_items=5) if local_llm_available else None
|
| 2514 |
-
|
| 2515 |
-
if not recs:
|
| 2516 |
-
st.warning("LLM not available or failed to parse — using fallback rule-based recommendations.")
|
| 2517 |
-
recs = rule_based_recs(dev)
|
| 2518 |
-
|
| 2519 |
-
# --------------------
|
| 2520 |
-
# Render Recommendation table (same style)
|
| 2521 |
-
# --------------------
|
| 2522 |
if recs:
|
| 2523 |
-
|
| 2524 |
-
|
| 2525 |
-
|
| 2526 |
-
|
| 2527 |
-
|
| 2528 |
-
|
| 2529 |
-
f"<td style='padding:8px;'>{r.get('mit','')}</td>"
|
| 2530 |
-
f"</tr>"
|
| 2531 |
-
)
|
| 2532 |
|
| 2533 |
-
|
| 2534 |
-
|
| 2535 |
-
background-color: #
|
| 2536 |
-
|
| 2537 |
-
|
| 2538 |
-
|
| 2539 |
-
|
| 2540 |
-
|
| 2541 |
-
|
| 2542 |
-
|
| 2543 |
-
|
| 2544 |
-
|
| 2545 |
-
<
|
| 2546 |
-
|
| 2547 |
-
|
| 2548 |
-
|
| 2549 |
-
|
| 2550 |
-
|
| 2551 |
-
<tbody>
|
| 2552 |
-
{rows_html}
|
| 2553 |
-
</tbody>
|
| 2554 |
-
</table>
|
| 2555 |
-
</div>
|
| 2556 |
-
"""
|
| 2557 |
-
st.markdown(table_html, unsafe_allow_html=True)
|
| 2558 |
else:
|
| 2559 |
-
st.info("No
|
| 2560 |
-
|
| 2561 |
-
# --------------------
|
| 2562 |
-
# End
|
| 2563 |
-
# --------------------
|
|
|
|
| 2142 |
# Transformers pipeline for lightweight local LLM (text2text)
|
| 2143 |
from transformers import pipeline
|
| 2144 |
import math
|
| 2145 |
+
# =====================================================================
|
| 2146 |
+
# OBJECTIVE 7 — INSIGHT & RECOMMENDATION (LLM FIRST, RULE-BASED IF FAIL)
|
| 2147 |
+
# =====================================================================
|
| 2148 |
+
|
| 2149 |
+
st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
|
| 2150 |
+
|
| 2151 |
+
|
| 2152 |
+
# ============================================================
|
| 2153 |
+
# 1. LLM LOADER
|
| 2154 |
+
# ============================================================
|
| 2155 |
+
import torch
|
| 2156 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 2157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2158 |
@st.cache_resource
|
| 2159 |
+
def load_llm_model():
|
| 2160 |
+
try:
|
| 2161 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
|
| 2162 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 2163 |
+
"google/gemma-2-2b-it",
|
| 2164 |
+
torch_dtype=torch.float16,
|
| 2165 |
+
device_map="auto"
|
| 2166 |
+
)
|
| 2167 |
+
return tokenizer, model
|
| 2168 |
+
except:
|
| 2169 |
+
return None, None
|
| 2170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2171 |
|
| 2172 |
+
def llm_generate_recommendation(insight_summary_text):
|
| 2173 |
+
"""
|
| 2174 |
+
Try to generate JSON recommendation using LLM.
|
| 2175 |
+
If LLM fails → return None, and rule-based will be used.
|
| 2176 |
+
"""
|
| 2177 |
+
tok, mdl = load_llm_model()
|
| 2178 |
+
if tok is None or mdl is None:
|
| 2179 |
+
return None
|
| 2180 |
+
|
| 2181 |
+
try:
|
| 2182 |
+
prompt = f"""
|
| 2183 |
+
You are an expert Industrial Safety Analyst AI.
|
| 2184 |
+
Below is an INSIGHT SUMMARY from a Safety Reporting System:
|
| 2185 |
+
|
| 2186 |
+
---
|
| 2187 |
+
{insight_summary_text}
|
| 2188 |
+
---
|
| 2189 |
+
|
| 2190 |
+
Generate 5 Recommended Actions and Risk Mitigation in clean JSON:
|
| 2191 |
+
|
| 2192 |
+
{{
|
| 2193 |
+
"recommendations": [
|
| 2194 |
+
{{"point":"1","rec":"...","mit":"..."}},
|
| 2195 |
+
{{"point":"2","rec":"...","mit":"..."}},
|
| 2196 |
+
{{"point":"3","rec":"...","mit":"..."}}
|
| 2197 |
+
]
|
| 2198 |
+
}}
|
| 2199 |
+
"""
|
| 2200 |
+
|
| 2201 |
+
inputs = tok(prompt, return_tensors="pt").to(mdl.device)
|
| 2202 |
+
out = mdl.generate(
|
| 2203 |
+
**inputs,
|
| 2204 |
+
max_new_tokens=380,
|
| 2205 |
+
temperature=0.25,
|
| 2206 |
+
do_sample=True
|
| 2207 |
+
)
|
| 2208 |
+
text = tok.decode(out[0], skip_special_tokens=True)
|
| 2209 |
+
|
| 2210 |
+
import re, json
|
| 2211 |
+
json_match = re.search(r"\{[\s\S]*\}", text)
|
| 2212 |
+
if not json_match:
|
| 2213 |
+
return None
|
| 2214 |
+
|
| 2215 |
+
return json.loads(json_match.group(0))
|
| 2216 |
+
|
| 2217 |
+
except:
|
| 2218 |
+
return None
|
| 2219 |
+
|
| 2220 |
+
|
| 2221 |
+
# ============================================================
|
| 2222 |
+
# 2. RULE-BASED ENGINE (YOUR ORIGINAL SCRIPT)
|
| 2223 |
+
# ============================================================
|
| 2224 |
+
|
| 2225 |
def extract_agentic_insights_v5(df: pd.DataFrame):
|
| 2226 |
dev = {
|
| 2227 |
"lowest_ratio_9_locs": [],
|
|
|
|
| 2237 |
"obj6_top2_categories": [],
|
| 2238 |
}
|
| 2239 |
|
| 2240 |
+
# === 1. 9 lowest locations ===
|
| 2241 |
if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2242 |
calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2243 |
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
|
|
|
|
| 2253 |
lowest_9 = loc_avg.nsmallest(9)
|
| 2254 |
dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
|
| 2255 |
|
| 2256 |
+
# === 2a: Lowest division ratio ===
|
| 2257 |
if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2258 |
calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2259 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
|
|
| 2265 |
agg['ratio'] = agg['findings'] / agg['reporters']
|
| 2266 |
div_ratio = agg.groupby('nama')['ratio'].mean()
|
| 2267 |
if not div_ratio.empty:
|
| 2268 |
+
dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2))
|
|
|
|
|
|
|
| 2269 |
|
| 2270 |
+
# === 2b: Slowest executor ===
|
| 2271 |
if 'days_to_close' in df.columns:
|
| 2272 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2273 |
exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
|
| 2274 |
if exec_col in valid.columns:
|
| 2275 |
lead = valid.groupby(exec_col)['days_to_close'].mean()
|
| 2276 |
if not lead.empty:
|
| 2277 |
+
dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1))
|
|
|
|
|
|
|
| 2278 |
|
| 2279 |
+
# === 2c: Lowest reporter ===
|
| 2280 |
if {'creator_name', 'created_at'}.issubset(df.columns):
|
| 2281 |
calc = df[['creator_name', 'created_at']].copy()
|
| 2282 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
|
|
| 2284 |
avg = monthly.groupby('creator_name')['count'].mean()
|
| 2285 |
avg = avg[avg > 0]
|
| 2286 |
if not avg.empty:
|
| 2287 |
+
dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2))
|
|
|
|
|
|
|
| 2288 |
|
| 2289 |
+
# === 2d: Slowest division ===
|
| 2290 |
if 'days_to_close' in df.columns and 'nama' in df.columns:
|
| 2291 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2292 |
if not valid.empty:
|
| 2293 |
lead = valid.groupby('nama')['days_to_close'].mean()
|
| 2294 |
if not lead.empty:
|
| 2295 |
+
dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1))
|
|
|
|
|
|
|
| 2296 |
|
| 2297 |
+
# === 3. Non-positive ===
|
| 2298 |
if 'temuan_kategori' in df.columns:
|
| 2299 |
cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
|
| 2300 |
dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
|
| 2301 |
dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
|
| 2302 |
dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
|
| 2303 |
|
| 2304 |
+
# === 4. Risk Quadrant ===
|
| 2305 |
X_LIMIT, Y_LIMIT = 20, 3
|
| 2306 |
if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
|
| 2307 |
calc = df.copy()
|
|
|
|
| 2311 |
avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
|
| 2312 |
leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
|
| 2313 |
mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
|
| 2314 |
+
|
| 2315 |
for _, r in mat.iterrows():
|
| 2316 |
if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2317 |
dev["obj5_q1_divs"].append(r['nama'])
|
| 2318 |
elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2319 |
dev["obj5_q2_divs"].append(r['nama'])
|
| 2320 |
|
| 2321 |
+
# === 5. Top categories ===
|
| 2322 |
if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
|
| 2323 |
nonpos = df[df['temuan_kategori'] != 'Positive']
|
| 2324 |
if not nonpos.empty:
|
| 2325 |
start = nonpos['created_at'].min().to_period('M')
|
| 2326 |
end = nonpos['created_at'].max().to_period('M')
|
| 2327 |
n_months = len(pd.period_range(start=start, end=end, freq='M'))
|
|
|
|
|
|
|
| 2328 |
cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
|
| 2329 |
+
dev["obj6_top2_categories"] = [(cat, round(v, 1)) for cat, v in cat_avg.items()]
|
| 2330 |
|
| 2331 |
return dev
|
| 2332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2333 |
|
| 2334 |
+
# ============================================================
|
| 2335 |
+
# 3. RUN INSIGHT ENGINE
|
| 2336 |
+
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2337 |
|
| 2338 |
+
dev = extract_agentic_insights_v5(df_filtered)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2339 |
|
| 2340 |
+
# Build Insight Summary Text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2341 |
insight_lines = []
|
| 2342 |
if dev["lowest_ratio_9_locs"]:
|
| 2343 |
loc_list = ", ".join([f"<strong>{loc}</strong> ({ratio})" for loc, ratio in dev["lowest_ratio_9_locs"]])
|
| 2344 |
+
insight_lines.append(f"1. Nine locations with the lowest finding-to-reporter ratio: {loc_list}.")
|
| 2345 |
|
| 2346 |
parts = []
|
| 2347 |
if dev["obj3a_lowest_div"]:
|
|
|
|
| 2349 |
if dev["obj3c_lowest_reporter"]:
|
| 2350 |
parts.append(f"reporter <strong>{dev['obj3c_lowest_reporter'][0]}</strong> ({dev['obj3c_lowest_reporter'][1]} findings/month)")
|
| 2351 |
if dev["obj3d_slowest_div"]:
|
| 2352 |
+
parts.append(f"division <strong>{dev['obj3d_slowest_div'][0]}</strong> ({dev['obj3d_slowest_div'][1]} days)")
|
| 2353 |
if dev["obj3b_slowest_executor"]:
|
| 2354 |
+
parts.append(f"executor <strong>{dev['obj3b_slowest_executor'][0]}</strong> ({dev['obj3b_slowest_executor'][1]} days)")
|
| 2355 |
|
| 2356 |
if parts:
|
| 2357 |
+
insight_lines.append("2. Agentic AI detection flags uneven operational capacity: " + "; ".join(parts))
|
|
|
|
|
|
|
| 2358 |
|
| 2359 |
uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
|
| 2360 |
if uc + ua + nm > 0:
|
| 2361 |
insight_lines.append(f"3. Non-Positive composition: Unsafe Condition ({uc}%), Unsafe Action ({ua}%), Near Miss ({nm}%).")
|
| 2362 |
|
| 2363 |
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
|
| 2364 |
+
q1 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q1_divs"][:5]])
|
| 2365 |
+
q2 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q2_divs"][:5]])
|
| 2366 |
+
insight_lines.append(f"4. High-risk divisions (QI): {q1 or '—'}. Hidden-risk (QII): {q2 or '—'}.")
|
| 2367 |
|
| 2368 |
if dev["obj6_top2_categories"]:
|
| 2369 |
c1, c2 = dev["obj6_top2_categories"]
|
| 2370 |
+
insight_lines.append(f"5. Top recurring non-Positive categories: <strong>{c1[0]}</strong> ({c1[1]}/mo) & <strong>{c2[0]}</strong> ({c2[1]}/mo).")
|
| 2371 |
+
|
| 2372 |
+
insight_text = "<br>".join(insight_lines)
|
| 2373 |
+
|
| 2374 |
+
|
| 2375 |
+
# ============================================================
|
| 2376 |
+
# 4. TRY LLM FIRST → ELSE FALLBACK RULE-BASED
|
| 2377 |
+
# ============================================================
|
| 2378 |
+
|
| 2379 |
+
llm_json = llm_generate_recommendation(insight_text)
|
| 2380 |
+
|
| 2381 |
+
if llm_json is not None and "recommendations" in llm_json:
|
| 2382 |
+
recs = llm_json["recommendations"]
|
| 2383 |
+
else:
|
| 2384 |
+
# FALLBACK RULE-BASED
|
| 2385 |
+
recs = []
|
| 2386 |
+
if dev["lowest_ratio_9_locs"]:
|
| 2387 |
+
recs.append({"point":"1","rec":"Launch spot-inspection sprint at low-ratio locations.","mit":"Enable 3-min QR checklist + auto-reminder."})
|
| 2388 |
|
| 2389 |
+
if parts:
|
| 2390 |
+
recs.append({"point":"2","rec":"Activate capacity dashboard.","mit":"Trigger coaching alerts if deviation >20%."})
|
| 2391 |
|
| 2392 |
+
if uc + ua + nm > 0:
|
| 2393 |
+
recs.append({"point":"3","rec":"Enforce photo-based validation.","mit":"Block submission without evidence."})
|
| 2394 |
+
|
| 2395 |
+
if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
|
| 2396 |
+
recs.append({"point":"4","rec":"Assign safety crews to QI divisions.","mit":"Auto escalate if stuck 2 months."})
|
| 2397 |
+
|
| 2398 |
+
if dev["obj6_top2_categories"]:
|
| 2399 |
+
c1, c2 = dev["obj6_top2_categories"]
|
| 2400 |
+
recs.append({"point":"5","rec":f"Create RCA task force for {c1[0]} & {c2[0]}.","mit":"Update tender specs with required mitigations."})
|
| 2401 |
+
|
| 2402 |
+
|
| 2403 |
+
# ============================================================
|
| 2404 |
+
# 5. RENDERING (NO CHANGES)
|
| 2405 |
+
# ============================================================
|
| 2406 |
+
|
| 2407 |
+
# Insight Summary Card
|
| 2408 |
st.markdown(
|
| 2409 |
f"""
|
| 2410 |
+
<div class="card" style="background-color:#f8f9fa;border-left:4px solid #003DA5;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);">
|
| 2411 |
+
<h4 style="margin-top:0;color:#FF6B6B;">Insight Summary</h4>
|
| 2412 |
+
<p style="margin-bottom:0;line-height:1.6;font-size:0.98em;">{insight_text}</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2413 |
</div>
|
| 2414 |
""",
|
| 2415 |
unsafe_allow_html=True
|
| 2416 |
)
|
| 2417 |
|
| 2418 |
+
# Recommendation Table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2419 |
if recs:
|
| 2420 |
+
rows = "".join([
|
| 2421 |
+
f"<tr><td style='text-align:center;font-weight:bold;width:5%;'>{r['point']}</td>"
|
| 2422 |
+
f"<td style='padding:8px;'>{r['rec']}</td>"
|
| 2423 |
+
f"<td style='padding:8px;'>{r['mit']}</td></tr>"
|
| 2424 |
+
for r in recs
|
| 2425 |
+
])
|
|
|
|
|
|
|
|
|
|
| 2426 |
|
| 2427 |
+
st.markdown(
|
| 2428 |
+
f"""
|
| 2429 |
+
<div class="card" style="background-color:#e8f5e9;border-left:4px solid #4CAF50;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);">
|
| 2430 |
+
<h4 style="margin-top:0;color:#2E7D32;">Recommended Actions & Agentic Risk Mitigation</h4>
|
| 2431 |
+
<table style="width:100%;border-collapse:collapse;font-size:0.95em;margin-top:12px;">
|
| 2432 |
+
<thead>
|
| 2433 |
+
<tr style="background-color:#e8f5ee;">
|
| 2434 |
+
<th style="padding:10px;text-align:center;border:1px solid #ccc;">#</th>
|
| 2435 |
+
<th style="padding:10px;text-align:left;border:1px solid #ccc;">Recommended Action</th>
|
| 2436 |
+
<th style="padding:10px;text-align:left;border:1px solid #ccc;">Risk Mitigation Strategy</th>
|
| 2437 |
+
</tr>
|
| 2438 |
+
</thead>
|
| 2439 |
+
<tbody>{rows}</tbody>
|
| 2440 |
+
</table>
|
| 2441 |
+
</div>
|
| 2442 |
+
""",
|
| 2443 |
+
unsafe_allow_html=True
|
| 2444 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2445 |
else:
|
| 2446 |
+
st.info("No actionable insights generated.")
|
|
|
|
|
|
|
|
|
|
|
|