SHELLAPANDIANGANHUNGING commited on
Commit
a8a21ed
·
verified ·
1 Parent(s): d2696db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -284
app.py CHANGED
@@ -2142,28 +2142,86 @@ from typing import List, Dict
2142
  # Transformers pipeline for lightweight local LLM (text2text)
2143
  from transformers import pipeline
2144
  import math
 
 
 
 
 
 
 
 
 
 
 
 
2145
 
2146
- st.markdown("<h3 class='section-title'>Objective 7 — Insight & Recommendation (LLM)</h3>", unsafe_allow_html=True)
2147
- # --------------------
2148
- # Utility: load small local model (flan-t5-small)
2149
- # --------------------
2150
  @st.cache_resource
2151
- def load_local_model(model_name="google/flan-t5-small"):
2152
- # text2text pipeline works well for instruction-style prompts
2153
- return pipeline("text2text-generation", model=model_name, device_map="auto" if hasattr(__import__('torch'),'cuda') else None)
 
 
 
 
 
 
 
 
2154
 
2155
- # Try to load model once
2156
- try:
2157
- llm_pipe = load_local_model()
2158
- local_llm_available = True
2159
- except Exception as e:
2160
- llm_pipe = None
2161
- local_llm_available = False
2162
- st.warning("Local LLM not available or failed to load (will fallback to rule-based recommendations).")
2163
 
2164
- # --------------------
2165
- # Insert your extract_agentic_insights_v5 function (kept faithful to your original)
2166
- # --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2167
  def extract_agentic_insights_v5(df: pd.DataFrame):
2168
  dev = {
2169
  "lowest_ratio_9_locs": [],
@@ -2179,7 +2237,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2179
  "obj6_top2_categories": [],
2180
  }
2181
 
2182
- # === 1. 9 locations with lowest finding-to-reporter ratio ===
2183
  if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2184
  calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2185
  calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
@@ -2195,7 +2253,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2195
  lowest_9 = loc_avg.nsmallest(9)
2196
  dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
2197
 
2198
- # === 2a: Division lowest finding-to-reporter ratio (Obj 3a) ===
2199
  if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2200
  calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2201
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
@@ -2207,22 +2265,18 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2207
  agg['ratio'] = agg['findings'] / agg['reporters']
2208
  div_ratio = agg.groupby('nama')['ratio'].mean()
2209
  if not div_ratio.empty:
2210
- name = div_ratio.idxmin()
2211
- val = round(div_ratio.min(), 2)
2212
- dev["obj3a_lowest_div"] = (name, val)
2213
 
2214
- # === 2b: Executor longest average resolution time (Obj 3b) ===
2215
  if 'days_to_close' in df.columns:
2216
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2217
  exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
2218
  if exec_col in valid.columns:
2219
  lead = valid.groupby(exec_col)['days_to_close'].mean()
2220
  if not lead.empty:
2221
- name = lead.idxmax()
2222
- val = round(lead.max(), 1)
2223
- dev["obj3b_slowest_executor"] = (name, val)
2224
 
2225
- # === 2c: Reporter lowest reporting frequency (Obj 3c) ===
2226
  if {'creator_name', 'created_at'}.issubset(df.columns):
2227
  calc = df[['creator_name', 'created_at']].copy()
2228
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
@@ -2230,28 +2284,24 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2230
  avg = monthly.groupby('creator_name')['count'].mean()
2231
  avg = avg[avg > 0]
2232
  if not avg.empty:
2233
- name = avg.idxmin()
2234
- val = round(avg.min(), 2)
2235
- dev["obj3c_lowest_reporter"] = (name, val)
2236
 
2237
- # === 2d: Division longest average resolution time (Obj 3d) ===
2238
  if 'days_to_close' in df.columns and 'nama' in df.columns:
2239
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2240
  if not valid.empty:
2241
  lead = valid.groupby('nama')['days_to_close'].mean()
2242
  if not lead.empty:
2243
- name = lead.idxmax()
2244
- val = round(lead.max(), 1)
2245
- dev["obj3d_slowest_div"] = (name, val)
2246
 
2247
- # === 3. Composition of non-Positive findings ===
2248
  if 'temuan_kategori' in df.columns:
2249
  cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
2250
  dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
2251
  dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
2252
  dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
2253
 
2254
- # === 4. Risk Quadrants (X=20 findings/month, Y=3 days avg lead time) ===
2255
  X_LIMIT, Y_LIMIT = 20, 3
2256
  if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
2257
  calc = df.copy()
@@ -2261,198 +2311,37 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2261
  avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
2262
  leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
2263
  mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
 
2264
  for _, r in mat.iterrows():
2265
  if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
2266
  dev["obj5_q1_divs"].append(r['nama'])
2267
  elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
2268
  dev["obj5_q2_divs"].append(r['nama'])
2269
 
2270
- # === 5. Top 2 non-Positive categories (avg per month) ===
2271
  if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
2272
  nonpos = df[df['temuan_kategori'] != 'Positive']
2273
  if not nonpos.empty:
2274
  start = nonpos['created_at'].min().to_period('M')
2275
  end = nonpos['created_at'].max().to_period('M')
2276
  n_months = len(pd.period_range(start=start, end=end, freq='M'))
2277
- if n_months == 0:
2278
- n_months = 1
2279
  cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
2280
- dev["obj6_top2_categories"] = [(cat, round(val, 1)) for cat, val in cat_avg.items()]
2281
 
2282
  return dev
2283
 
2284
- # --------------------
2285
- # Sample fallback data (if user didn't provide)
2286
- # --------------------
2287
- def sample_dataframe():
2288
- sample = {
2289
- "nama_lokasi_full": ["Loc A","Loc B","Loc C","Loc D","Loc E","Loc F","Loc G","Loc H","Loc I","Loc J"],
2290
- "creator_nid": [1,2,3,4,5,6,7,8,9,10],
2291
- "created_at": pd.date_range(end=pd.Timestamp("now"), periods=10).astype(str),
2292
- "kode_temuan": list(range(10)),
2293
- "nama": ["Div1","Div1","Div2","Div2","Div3","Div3","Div4","Div4","Div5","Div5"],
2294
- "days_to_close": [2,5,10,1,20,3,4,6,2,8],
2295
- "creator_name": ["A","B","C","D","E","F","G","H","I","J"],
2296
- "temuan_kategori": ["Unsafe Condition","Positive","Unsafe Action","Near Miss","Positive","Unsafe Condition","Unsafe Action","Positive","Near Miss","Unsafe Condition"],
2297
- "kategori": ["Electrical","Mechanical","Electrical","Civil","Mechanical","Electrical","Civil","Mechanical","Civil","Electrical"]
2298
- }
2299
- return pd.DataFrame(sample)
2300
 
2301
- # --------------------
2302
- # LLM prompt & call (local using flan T5 small via pipeline)
2303
- # --------------------
2304
- def generate_recs_with_local_llm(insight_text: str, dev: dict, n_items: int = 5) -> List[Dict]:
2305
- """
2306
- Return list of objects: [{"point":"1","rec":"...","mit":"..."} ...]
2307
- Uses local text2text pipeline.
2308
- """
2309
- if not local_llm_available or llm_pipe is None:
2310
- return None
2311
 
2312
- # Build very clear prompt asking for JSON list
2313
- prompt = (
2314
- "You are an expert Safety Analytics advisor for a power utility. "
2315
- "Based on the structured analytics below, produce EXACTLY "
2316
- f"{n_items} items. For each item return a JSON object with keys: point, rec, mit. "
2317
- "rec = Recommended Action (concise, 1-2 sentences). "
2318
- "mit = Risk Mitigation Strategy (concise, 1-2 sentences). "
2319
- "Return only a JSON array (no extra text).\n\n"
2320
- "Structured analytics (python dict):\n"
2321
- f"{json.dumps(dev, ensure_ascii=False, indent=2)}\n\n"
2322
- "Insight summary (short):\n"
2323
- f"{insight_text}\n\n"
2324
- "Output example:\n"
2325
- '[{"point":"1","rec":"...","mit":"..."}, ..., {"point":"5","rec":"...","mit":"..."}]\n'
2326
- )
2327
 
2328
- # use pipeline to generate
2329
- try:
2330
- gen = llm_pipe(prompt, max_new_tokens=400, do_sample=False)[0]["generated_text"]
2331
- except Exception as e:
2332
- st.error(f"Local LLM generation failed: {e}")
2333
- return None
2334
-
2335
- # try parse JSON array inside output text
2336
- try:
2337
- start = gen.index('[')
2338
- end = gen.rindex(']') + 1
2339
- arr_text = gen[start:end]
2340
- obj = json.loads(arr_text)
2341
- # Normalize: ensure point fields exist; if not, assign numbers
2342
- for idx, item in enumerate(obj):
2343
- if 'point' not in item:
2344
- item['point'] = str(idx+1)
2345
- return obj
2346
- except Exception:
2347
- # fallback: try to parse lines heuristically
2348
- lines = [ln.strip() for ln in gen.splitlines() if ln.strip()]
2349
- items = []
2350
- num = 1
2351
- for ln in lines:
2352
- if len(items) >= n_items:
2353
- break
2354
- # naive split by '|' or '-' if present
2355
- if '|' in ln:
2356
- parts = [p.strip() for p in ln.split('|') if p.strip()]
2357
- if len(parts) >= 2:
2358
- rec = parts[0]
2359
- mit = parts[1] if len(parts) > 1 else ""
2360
- items.append({"point": str(num), "rec": rec, "mit": mit})
2361
- num += 1
2362
- else:
2363
- # fallback short capture
2364
- items.append({"point": str(num), "rec": ln[:200], "mit": ""})
2365
- num += 1
2366
- if items:
2367
- return items
2368
- return None
2369
-
2370
- # --------------------
2371
- # Rule-based fallback generator (if LLM unavailable)
2372
- # --------------------
2373
- def rule_based_recs(dev: dict) -> List[Dict]:
2374
- recs = []
2375
- if dev["lowest_ratio_9_locs"]:
2376
- recs.append({
2377
- "point": "1",
2378
- "rec": "Launch Agency Activation Sprint across the identified low-ratio locations: weekly micro-inspection by Area PIC.",
2379
- "mit": "Enable QR-based checklists with automatic reminders; monitor ratio weekly and target improvement within 45 days."
2380
- })
2381
- parts_exist = any([dev["obj3a_lowest_div"], dev["obj3c_lowest_reporter"], dev["obj3d_slowest_div"], dev["obj3b_slowest_executor"]])
2382
- if parts_exist:
2383
- recs.append({
2384
- "point": "2",
2385
- "rec": "Deploy Agentic Capacity Dashboard to monitor reporting & resolution KPIs per division/individual.",
2386
- "mit": "Auto-trigger coaching alerts when deviation >20% from baseline; weekly manager reports."
2387
- })
2388
- uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
2389
- if uc + ua + nm > 0:
2390
- recs.append({
2391
- "point": "3",
2392
- "rec": "Enforce photo-based validation for Unsafe categories to improve classification fidelity.",
2393
- "mit": "Block submission without evidence and require mandatory justification for manual overrides."
2394
- })
2395
- if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
2396
- recs.append({
2397
- "point": "4",
2398
- "rec": "Assign Rapid Response Teams for Q1 divisions and apply One-Finding-One-Day policy for Q2.",
2399
- "mit": "Auto-escalate to senior ops if division remains Q1/Q2 for >=2 months."
2400
- })
2401
- if dev["obj6_top2_categories"]:
2402
- c1, c2 = dev["obj6_top2_categories"]
2403
- recs.append({
2404
- "point": "5",
2405
- "rec": f"Form RCA Task Force for {c1[0]} and {c2[0]} with cross-functional owners.",
2406
- "mit": "Update SOP and tender templates to include mitigations based on historical findings."
2407
- })
2408
- # Ensure at least 5 items
2409
- i = 1
2410
- while len(recs) < 5:
2411
- recs.append({"point": str(len(recs)+1), "rec": "Operational review and monitoring.", "mit": "Periodic review & KPIs."})
2412
- i += 1
2413
- return recs[:5]
2414
-
2415
- # --------------------
2416
- # Streamlit UI
2417
- # --------------------
2418
- st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
2419
-
2420
- col1, col2 = st.columns([2,1])
2421
-
2422
- with col1:
2423
- st.subheader("Upload data (or use sample)")
2424
- uploaded = st.file_uploader("Upload CSV (must contain relevant columns)", type=["csv"])
2425
- if uploaded is not None:
2426
- try:
2427
- df = pd.read_csv(uploaded)
2428
- st.success("CSV loaded.")
2429
- except Exception as e:
2430
- st.error(f"Failed to read CSV: {e}")
2431
- df = sample_dataframe()
2432
- st.info("Using sample dataframe due to read error.")
2433
- else:
2434
- st.info("No file uploaded — using sample dataset.")
2435
- df = sample_dataframe()
2436
-
2437
- st.markdown("### Preview data (first 5 rows)")
2438
- st.dataframe(df.head(), use_container_width=True)
2439
-
2440
- with col2:
2441
- st.subheader("LLM / Mode")
2442
- st.write("Local LLM (flan-t5-small) will be used if available.")
2443
- st.write(f"Local LLM available: {local_llm_available}")
2444
- st.button("Reload model", on_click=lambda: st.experimental_rerun())
2445
-
2446
- # --------------------
2447
- # Compute insights
2448
- # --------------------
2449
- dev = extract_agentic_insights_v5(df)
2450
-
2451
- # Build insight_text exactly similar to your format
2452
  insight_lines = []
2453
  if dev["lowest_ratio_9_locs"]:
2454
  loc_list = ", ".join([f"<strong>{loc}</strong> ({ratio})" for loc, ratio in dev["lowest_ratio_9_locs"]])
2455
- insight_lines.append(f"1. Nine locations with the <em>lowest</em> finding-to-reporter ratio: {loc_list}.")
2456
 
2457
  parts = []
2458
  if dev["obj3a_lowest_div"]:
@@ -2460,104 +2349,98 @@ if dev["obj3a_lowest_div"]:
2460
  if dev["obj3c_lowest_reporter"]:
2461
  parts.append(f"reporter <strong>{dev['obj3c_lowest_reporter'][0]}</strong> ({dev['obj3c_lowest_reporter'][1]} findings/month)")
2462
  if dev["obj3d_slowest_div"]:
2463
- parts.append(f"division <strong>{dev['obj3d_slowest_div'][0]}</strong> (avg. resolution: {dev['obj3d_slowest_div'][1]} days)")
2464
  if dev["obj3b_slowest_executor"]:
2465
- parts.append(f"executor <strong>{dev['obj3b_slowest_executor'][0]}</strong> (avg. resolution: {dev['obj3b_slowest_executor'][1]} days)")
2466
 
2467
  if parts:
2468
- insight_lines.append(
2469
- f"2. Agentic AI highlights operational imbalance: {'; '.join(parts)}. These patterns indicate uneven engagement and resolution capability."
2470
- )
2471
 
2472
  uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
2473
  if uc + ua + nm > 0:
2474
  insight_lines.append(f"3. Non-Positive composition: Unsafe Condition ({uc}%), Unsafe Action ({ua}%), Near Miss ({nm}%).")
2475
 
2476
  if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
2477
- q1 = ", ".join([f"<strong>{d}</strong>" for d in dev["obj5_q1_divs"]]) or "—"
2478
- q2 = ", ".join([f"<strong>{d}</strong>" for d in dev["obj5_q2_divs"]]) or "—"
2479
- insight_lines.append(f"4. Quadrant I high-risk divisions: {q1}. Quadrant II hidden-risk divisions: {q2}.")
2480
 
2481
  if dev["obj6_top2_categories"]:
2482
  c1, c2 = dev["obj6_top2_categories"]
2483
- insight_lines.append(f"5. Top recurring categories: <strong>{c1[0]}</strong> ({c1[1]}/month) and <strong>{c2[0]}</strong> ({c2[1]}/month).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2484
 
2485
- insight_text_html = "<br>".join(insight_lines)
2486
- insight_text_plain = "\n".join([s.replace("<strong>", "").replace("</strong>", "").replace("<em>", "").replace("</em>", "") for s in insight_lines])
2487
 
2488
- # --------------------
2489
- # Render Insight card
2490
- # --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
2491
  st.markdown(
2492
  f"""
2493
- <div class="card" style="
2494
- background-color: #f8f9fa;
2495
- border-left: 4px solid #003DA5;
2496
- padding: 16px;
2497
- margin-bottom: 20px;
2498
- border-radius: 4px;
2499
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
2500
- ">
2501
- <h4 style="margin-top: 0; color: #FF6B6B;">Insight Summary</h4>
2502
- <p style="margin-bottom: 0; line-height: 1.6; font-size: 0.98em;">{insight_text_html if insight_text_html else 'No insights (missing columns).'}</p>
2503
  </div>
2504
  """,
2505
  unsafe_allow_html=True
2506
  )
2507
 
2508
- # --------------------
2509
- # Generate Recs via LLM (or fallback)
2510
- # --------------------
2511
- st.markdown("## Recommendations & Risk Mitigation (generated)")
2512
- with st.spinner("Generating recommendations..."):
2513
- recs = generate_recs_with_local_llm(insight_text_plain, dev, n_items=5) if local_llm_available else None
2514
-
2515
- if not recs:
2516
- st.warning("LLM not available or failed to parse — using fallback rule-based recommendations.")
2517
- recs = rule_based_recs(dev)
2518
-
2519
- # --------------------
2520
- # Render Recommendation table (same style)
2521
- # --------------------
2522
  if recs:
2523
- rows_html = ""
2524
- for r in recs[:5]:
2525
- rows_html += (
2526
- f"<tr>"
2527
- f"<td style='text-align:center; font-weight:bold; width:5%;'>{r.get('point','')}</td>"
2528
- f"<td style='padding:8px;'>{r.get('rec','')}</td>"
2529
- f"<td style='padding:8px;'>{r.get('mit','')}</td>"
2530
- f"</tr>"
2531
- )
2532
 
2533
- table_html = f"""
2534
- <div class="card" style="
2535
- background-color: #e8f5e9;
2536
- border-left: 4px solid #4CAF50;
2537
- padding: 16px;
2538
- margin-bottom: 20px;
2539
- border-radius: 4px;
2540
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
2541
- ">
2542
- <h4 style="margin-top: 0; color: #2E7D32;">Recommended Actions & Agentic Risk Mitigation</h4>
2543
- <table style="width:100%; border-collapse:collapse; font-size:0.95em; margin-top:12px;">
2544
- <thead>
2545
- <tr style="background-color:#e8f5ee;">
2546
- <th style="padding:10px; text-align:center; border:1px solid #ccc;">#</th>
2547
- <th style="padding:10px; text-align:left; border:1px solid #ccc;">Recommended Action</th>
2548
- <th style="padding:10px; text-align:left; border:1px solid #ccc;">Risk Mitigation Strategy</th>
2549
- </tr>
2550
- </thead>
2551
- <tbody>
2552
- {rows_html}
2553
- </tbody>
2554
- </table>
2555
- </div>
2556
- """
2557
- st.markdown(table_html, unsafe_allow_html=True)
2558
  else:
2559
- st.info("No recommendations available.")
2560
-
2561
- # --------------------
2562
- # End
2563
- # --------------------
 
2142
  # Transformers pipeline for lightweight local LLM (text2text)
2143
  from transformers import pipeline
2144
  import math
2145
+ # =====================================================================
2146
+ # OBJECTIVE 7 — INSIGHT & RECOMMENDATION (LLM FIRST, RULE-BASED IF FAIL)
2147
+ # =====================================================================
2148
+
2149
+ st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
2150
+
2151
+
2152
+ # ============================================================
2153
+ # 1. LLM LOADER
2154
+ # ============================================================
2155
+ import torch
2156
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2157
 
 
 
 
 
2158
  @st.cache_resource
2159
+ def load_llm_model():
2160
+ try:
2161
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
2162
+ model = AutoModelForCausalLM.from_pretrained(
2163
+ "google/gemma-2-2b-it",
2164
+ torch_dtype=torch.float16,
2165
+ device_map="auto"
2166
+ )
2167
+ return tokenizer, model
2168
+ except:
2169
+ return None, None
2170
 
 
 
 
 
 
 
 
 
2171
 
2172
+ def llm_generate_recommendation(insight_summary_text):
2173
+ """
2174
+ Try to generate JSON recommendation using LLM.
2175
+ If LLM fails → return None, and rule-based will be used.
2176
+ """
2177
+ tok, mdl = load_llm_model()
2178
+ if tok is None or mdl is None:
2179
+ return None
2180
+
2181
+ try:
2182
+ prompt = f"""
2183
+ You are an expert Industrial Safety Analyst AI.
2184
+ Below is an INSIGHT SUMMARY from a Safety Reporting System:
2185
+
2186
+ ---
2187
+ {insight_summary_text}
2188
+ ---
2189
+
2190
+ Generate 5 Recommended Actions and Risk Mitigation in clean JSON:
2191
+
2192
+ {{
2193
+ "recommendations": [
2194
+ {{"point":"1","rec":"...","mit":"..."}},
2195
+ {{"point":"2","rec":"...","mit":"..."}},
2196
+ {{"point":"3","rec":"...","mit":"..."}}
2197
+ ]
2198
+ }}
2199
+ """
2200
+
2201
+ inputs = tok(prompt, return_tensors="pt").to(mdl.device)
2202
+ out = mdl.generate(
2203
+ **inputs,
2204
+ max_new_tokens=380,
2205
+ temperature=0.25,
2206
+ do_sample=True
2207
+ )
2208
+ text = tok.decode(out[0], skip_special_tokens=True)
2209
+
2210
+ import re, json
2211
+ json_match = re.search(r"\{[\s\S]*\}", text)
2212
+ if not json_match:
2213
+ return None
2214
+
2215
+ return json.loads(json_match.group(0))
2216
+
2217
+ except:
2218
+ return None
2219
+
2220
+
2221
+ # ============================================================
2222
+ # 2. RULE-BASED ENGINE (YOUR ORIGINAL SCRIPT)
2223
+ # ============================================================
2224
+
2225
  def extract_agentic_insights_v5(df: pd.DataFrame):
2226
  dev = {
2227
  "lowest_ratio_9_locs": [],
 
2237
  "obj6_top2_categories": [],
2238
  }
2239
 
2240
+ # === 1. 9 lowest locations ===
2241
  if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2242
  calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2243
  calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
 
2253
  lowest_9 = loc_avg.nsmallest(9)
2254
  dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
2255
 
2256
+ # === 2a: Lowest division ratio ===
2257
  if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2258
  calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2259
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
 
2265
  agg['ratio'] = agg['findings'] / agg['reporters']
2266
  div_ratio = agg.groupby('nama')['ratio'].mean()
2267
  if not div_ratio.empty:
2268
+ dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2))
 
 
2269
 
2270
+ # === 2b: Slowest executor ===
2271
  if 'days_to_close' in df.columns:
2272
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2273
  exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
2274
  if exec_col in valid.columns:
2275
  lead = valid.groupby(exec_col)['days_to_close'].mean()
2276
  if not lead.empty:
2277
+ dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1))
 
 
2278
 
2279
+ # === 2c: Lowest reporter ===
2280
  if {'creator_name', 'created_at'}.issubset(df.columns):
2281
  calc = df[['creator_name', 'created_at']].copy()
2282
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
 
2284
  avg = monthly.groupby('creator_name')['count'].mean()
2285
  avg = avg[avg > 0]
2286
  if not avg.empty:
2287
+ dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2))
 
 
2288
 
2289
+ # === 2d: Slowest division ===
2290
  if 'days_to_close' in df.columns and 'nama' in df.columns:
2291
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2292
  if not valid.empty:
2293
  lead = valid.groupby('nama')['days_to_close'].mean()
2294
  if not lead.empty:
2295
+ dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1))
 
 
2296
 
2297
+ # === 3. Non-positive ===
2298
  if 'temuan_kategori' in df.columns:
2299
  cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
2300
  dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
2301
  dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
2302
  dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
2303
 
2304
+ # === 4. Risk Quadrant ===
2305
  X_LIMIT, Y_LIMIT = 20, 3
2306
  if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
2307
  calc = df.copy()
 
2311
  avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
2312
  leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
2313
  mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
2314
+
2315
  for _, r in mat.iterrows():
2316
  if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
2317
  dev["obj5_q1_divs"].append(r['nama'])
2318
  elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
2319
  dev["obj5_q2_divs"].append(r['nama'])
2320
 
2321
+ # === 5. Top categories ===
2322
  if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
2323
  nonpos = df[df['temuan_kategori'] != 'Positive']
2324
  if not nonpos.empty:
2325
  start = nonpos['created_at'].min().to_period('M')
2326
  end = nonpos['created_at'].max().to_period('M')
2327
  n_months = len(pd.period_range(start=start, end=end, freq='M'))
 
 
2328
  cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
2329
+ dev["obj6_top2_categories"] = [(cat, round(v, 1)) for cat, v in cat_avg.items()]
2330
 
2331
  return dev
2332
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2333
 
2334
+ # ============================================================
2335
+ # 3. RUN INSIGHT ENGINE
2336
+ # ============================================================
 
 
 
 
 
 
 
2337
 
2338
+ dev = extract_agentic_insights_v5(df_filtered)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2339
 
2340
+ # Build Insight Summary Text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2341
  insight_lines = []
2342
  if dev["lowest_ratio_9_locs"]:
2343
  loc_list = ", ".join([f"<strong>{loc}</strong> ({ratio})" for loc, ratio in dev["lowest_ratio_9_locs"]])
2344
+ insight_lines.append(f"1. Nine locations with the lowest finding-to-reporter ratio: {loc_list}.")
2345
 
2346
  parts = []
2347
  if dev["obj3a_lowest_div"]:
 
2349
  if dev["obj3c_lowest_reporter"]:
2350
  parts.append(f"reporter <strong>{dev['obj3c_lowest_reporter'][0]}</strong> ({dev['obj3c_lowest_reporter'][1]} findings/month)")
2351
  if dev["obj3d_slowest_div"]:
2352
+ parts.append(f"division <strong>{dev['obj3d_slowest_div'][0]}</strong> ({dev['obj3d_slowest_div'][1]} days)")
2353
  if dev["obj3b_slowest_executor"]:
2354
+ parts.append(f"executor <strong>{dev['obj3b_slowest_executor'][0]}</strong> ({dev['obj3b_slowest_executor'][1]} days)")
2355
 
2356
  if parts:
2357
+ insight_lines.append("2. Agentic AI detection flags uneven operational capacity: " + "; ".join(parts))
 
 
2358
 
2359
  uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
2360
  if uc + ua + nm > 0:
2361
  insight_lines.append(f"3. Non-Positive composition: Unsafe Condition ({uc}%), Unsafe Action ({ua}%), Near Miss ({nm}%).")
2362
 
2363
  if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
2364
+ q1 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q1_divs"][:5]])
2365
+ q2 = ", ".join([f"<strong>{x}</strong>" for x in dev["obj5_q2_divs"][:5]])
2366
+ insight_lines.append(f"4. High-risk divisions (QI): {q1 or '—'}. Hidden-risk (QII): {q2 or '—'}.")
2367
 
2368
  if dev["obj6_top2_categories"]:
2369
  c1, c2 = dev["obj6_top2_categories"]
2370
+ insight_lines.append(f"5. Top recurring non-Positive categories: <strong>{c1[0]}</strong> ({c1[1]}/mo) & <strong>{c2[0]}</strong> ({c2[1]}/mo).")
2371
+
2372
+ insight_text = "<br>".join(insight_lines)
2373
+
2374
+
2375
+ # ============================================================
2376
+ # 4. TRY LLM FIRST → ELSE FALLBACK RULE-BASED
2377
+ # ============================================================
2378
+
2379
+ llm_json = llm_generate_recommendation(insight_text)
2380
+
2381
+ if llm_json is not None and "recommendations" in llm_json:
2382
+ recs = llm_json["recommendations"]
2383
+ else:
2384
+ # FALLBACK RULE-BASED
2385
+ recs = []
2386
+ if dev["lowest_ratio_9_locs"]:
2387
+ recs.append({"point":"1","rec":"Launch spot-inspection sprint at low-ratio locations.","mit":"Enable 3-min QR checklist + auto-reminder."})
2388
 
2389
+ if parts:
2390
+ recs.append({"point":"2","rec":"Activate capacity dashboard.","mit":"Trigger coaching alerts if deviation >20%."})
2391
 
2392
+ if uc + ua + nm > 0:
2393
+ recs.append({"point":"3","rec":"Enforce photo-based validation.","mit":"Block submission without evidence."})
2394
+
2395
+ if dev["obj5_q1_divs"] or dev["obj5_q2_divs"]:
2396
+ recs.append({"point":"4","rec":"Assign safety crews to QI divisions.","mit":"Auto escalate if stuck 2 months."})
2397
+
2398
+ if dev["obj6_top2_categories"]:
2399
+ c1, c2 = dev["obj6_top2_categories"]
2400
+ recs.append({"point":"5","rec":f"Create RCA task force for {c1[0]} & {c2[0]}.","mit":"Update tender specs with required mitigations."})
2401
+
2402
+
2403
+ # ============================================================
2404
+ # 5. RENDERING (NO CHANGES)
2405
+ # ============================================================
2406
+
2407
+ # Insight Summary Card
2408
  st.markdown(
2409
  f"""
2410
+ <div class="card" style="background-color:#f8f9fa;border-left:4px solid #003DA5;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);">
2411
+ <h4 style="margin-top:0;color:#FF6B6B;">Insight Summary</h4>
2412
+ <p style="margin-bottom:0;line-height:1.6;font-size:0.98em;">{insight_text}</p>
 
 
 
 
 
 
 
2413
  </div>
2414
  """,
2415
  unsafe_allow_html=True
2416
  )
2417
 
2418
+ # Recommendation Table
 
 
 
 
 
 
 
 
 
 
 
 
 
2419
  if recs:
2420
+ rows = "".join([
2421
+ f"<tr><td style='text-align:center;font-weight:bold;width:5%;'>{r['point']}</td>"
2422
+ f"<td style='padding:8px;'>{r['rec']}</td>"
2423
+ f"<td style='padding:8px;'>{r['mit']}</td></tr>"
2424
+ for r in recs
2425
+ ])
 
 
 
2426
 
2427
+ st.markdown(
2428
+ f"""
2429
+ <div class="card" style="background-color:#e8f5e9;border-left:4px solid #4CAF50;padding:16px;margin-bottom:20px;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,0.05);">
2430
+ <h4 style="margin-top:0;color:#2E7D32;">Recommended Actions & Agentic Risk Mitigation</h4>
2431
+ <table style="width:100%;border-collapse:collapse;font-size:0.95em;margin-top:12px;">
2432
+ <thead>
2433
+ <tr style="background-color:#e8f5ee;">
2434
+ <th style="padding:10px;text-align:center;border:1px solid #ccc;">#</th>
2435
+ <th style="padding:10px;text-align:left;border:1px solid #ccc;">Recommended Action</th>
2436
+ <th style="padding:10px;text-align:left;border:1px solid #ccc;">Risk Mitigation Strategy</th>
2437
+ </tr>
2438
+ </thead>
2439
+ <tbody>{rows}</tbody>
2440
+ </table>
2441
+ </div>
2442
+ """,
2443
+ unsafe_allow_html=True
2444
+ )
 
 
 
 
 
 
 
2445
  else:
2446
+ st.info("No actionable insights generated.")