SHELLAPANDIANGANHUNGING commited on
Commit
0cf89fe
·
verified ·
1 Parent(s): 2d8c7f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -49
app.py CHANGED
@@ -1987,45 +1987,46 @@ else:
1987
  st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
1988
  # =================== OBJECTIVE 7 — Insight and Recommendation (Agentic AI LLM Style — Final) ===================
1989
  # =================== OBJECTIVE 7 — Insight and Recommendation (Final — Agentic AI, No markdown bold) ===================
 
1990
  import streamlit as st
1991
  import pandas as pd
1992
  import re
1993
  import os
1994
 
 
 
 
 
1995
  # ==============================
1996
- # 1. IMPORT & INSTALL CHECK
1997
  # ==============================
1998
  try:
1999
  from transformers import pipeline
2000
  except ImportError:
2001
  st.error("❌ `transformers` not installed. Run: `pip install transformers torch accelerate sentencepiece einops`")
2002
- st.stop()
2003
-
2004
- # ==============================
2005
- # 2. LOAD LLM (Phi-3-mini — ringan & stabil)
2006
- # ==============================
2007
- @st.cache_resource
2008
- def load_llm():
2009
- try:
2010
- st.info("🧠 Loading Phi-3-mini-4k-instruct (optimized for safety recommendations)...")
2011
- pipe = pipeline(
2012
- "text-generation",
2013
- model="microsoft/Phi-3-mini-4k-instruct",
2014
- device_map="auto",
2015
- torch_dtype="auto",
2016
- trust_remote_code=True,
2017
- max_new_tokens=256
2018
- )
2019
- st.success(" Phi-3-mini loaded!")
2020
- return pipe
2021
- except Exception as e:
2022
- st.error(f"❌ Failed to load model: {e}")
2023
- st.stop()
2024
-
2025
- pipe = load_llm()
2026
 
2027
  # ==============================
2028
- # 3. INSIGHT EXTRACTION (sama seperti kode Anda)
2029
  # ==============================
2030
  def extract_agentic_insights_v5(df: pd.DataFrame):
2031
  dev = {
@@ -2042,7 +2043,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2042
  "obj6_top2_categories": [],
2043
  }
2044
 
2045
- # === 1. 9 locations with lowest finding-to-reporter ratio ===
2046
  if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2047
  calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2048
  calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
@@ -2058,7 +2059,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2058
  lowest_9 = loc_avg.nsmallest(9)
2059
  dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 2)) for loc, ratio in lowest_9.items()]
2060
 
2061
- # === 2a: Division — lowest ratio ===
2062
  if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2063
  calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2064
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
@@ -2074,7 +2075,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2074
  val = round(div_ratio.min(), 2)
2075
  dev["obj3a_lowest_div"] = (name, val)
2076
 
2077
- # === 2b: Executor — slowest resolution ===
2078
  if 'days_to_close' in df.columns:
2079
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2080
  exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
@@ -2085,7 +2086,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2085
  val = round(lead.max(), 2)
2086
  dev["obj3b_slowest_executor"] = (name, val)
2087
 
2088
- # === 2c: Reporter — lowest frequency ===
2089
  if {'creator_name', 'created_at'}.issubset(df.columns):
2090
  calc = df[['creator_name', 'created_at']].copy()
2091
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
@@ -2097,7 +2098,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2097
  val = round(avg.min(), 2)
2098
  dev["obj3c_lowest_reporter"] = (name, val)
2099
 
2100
- # === 2d: Division — slowest resolution ===
2101
  if 'days_to_close' in df.columns and 'nama' in df.columns:
2102
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2103
  if not valid.empty:
@@ -2107,14 +2108,14 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2107
  val = round(lead.max(), 2)
2108
  dev["obj3d_slowest_div"] = (name, val)
2109
 
2110
- # === 3. Non-Positive composition ===
2111
  if 'temuan_kategori' in df.columns:
2112
  cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
2113
  dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 2)
2114
  dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 2)
2115
  dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 2)
2116
 
2117
- # === 4. Risk Quadrants ===
2118
  X_LIMIT, Y_LIMIT = 20, 3
2119
  if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
2120
  calc = df.copy()
@@ -2130,7 +2131,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2130
  elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
2131
  dev["obj5_q2_divs"].append(r['nama'])
2132
 
2133
- # === 5. Top 2 non-Positive categories ===
2134
  if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
2135
  nonpos = df[df['temuan_kategori'] != 'Positive']
2136
  if not nonpos.empty:
@@ -2143,16 +2144,18 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
2143
  return dev
2144
 
2145
  # ==============================
2146
- # 4. LLM UTILS (aman & cepat)
2147
  # ==============================
2148
  def generate_llm_text(insight: str, mode: str = "rec") -> str:
2149
- """Generate rec or mit text using Phi-3-mini."""
 
 
 
2150
  suffix = "Recommend a single high-leverage action." if mode == "rec" else "Propose one automated/systemic risk control."
2151
  messages = [
2152
  {"role": "system", "content": "You are PLN's Lead Safety AI. Output ONLY a short, professional sentence. Be directive. No markdown, no emoticons."},
2153
  {"role": "user", "content": f"Insight: {insight}\n\n{suffix}"}
2154
  ]
2155
-
2156
  try:
2157
  out = pipe(
2158
  messages,
@@ -2161,13 +2164,11 @@ def generate_llm_text(insight: str, mode: str = "rec") -> str:
2161
  return_full_text=False
2162
  )
2163
  text = out[0]["generated_text"].strip()
2164
- # Clean
2165
  text = re.sub(r"^(Recommendation|Mitigation|Action|Control):\s*", "", text, flags=re.IGNORECASE)
2166
  text = re.sub(r"[\n\"`*]", " ", text).strip(". ")
2167
- return text[:250] # Batas panjang
2168
  except Exception as e:
2169
- st.warning(f"LLM fallback for {mode}: {e}")
2170
- # Fallback — tetap profesional & sesuai gaya Anda
2171
  fallbacks = {
2172
  ("1", "rec"): "Launch Agency Activation Sprint: ≥1 spot inspection/week per low-ratio location.",
2173
  ("1", "mit"): "Deploy QR-code checklists + automated reminders; target ratio ≥0.5 in 45 days.",
@@ -2180,22 +2181,23 @@ def generate_llm_text(insight: str, mode: str = "rec") -> str:
2180
  ("5", "rec"): "Form cross-functional RCA Task Force (Civil, Electrical, HSE, Contractors) for top recurring categories.",
2181
  ("5", "mit"): "Update tender templates: all bids must include mitigations for these historical findings.",
2182
  }
2183
- return fallbacks.get((str(len(insight_list) + 1), mode), "Review insight and implement targeted action.")
 
2184
 
2185
  # ==============================
2186
- # 5. MAIN EXECUTION
2187
  # ==============================
2188
  st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
2189
 
2190
- # Pastikan df_filtered ada
2191
  if 'df_filtered' not in st.session_state:
2192
- st.error("⚠️ `df_filtered` not found in session state. Please load data first.")
2193
  st.stop()
2194
 
2195
  df_filtered = st.session_state.df_filtered
2196
  dev = extract_agentic_insights_v5(df_filtered)
2197
 
2198
- # === BUILD INSIGHT LINES (2 desimal, clean) ===
2199
  insight_lines = []
2200
 
2201
  if dev["lowest_ratio_9_locs"]:
@@ -2260,13 +2262,18 @@ st.markdown(
2260
  unsafe_allow_html=True
2261
  )
2262
 
 
2263
  if insight_lines:
2264
- # Generate rec & mit
2265
  rec_list, mit_list = [], []
2266
  with st.spinner("🧠 Generating Recommendation & Risk Mitigation with Phi-3-mini..."):
2267
  for i, ins in enumerate(insight_lines, 1):
2268
- # Ekstrak teks bersih untuk LLM
2269
- clean_ins = re.sub(r"<[^>]+>", "", ins).replace("1. ", "").replace("2. ", "").replace("3. ", "").replace("4. ", "").replace("5. ", "").strip()
 
 
 
 
 
2270
  rec = generate_llm_text(clean_ins, "rec")
2271
  mit = generate_llm_text(clean_ins, "mit")
2272
  rec_list.append(f"{i}. {rec}")
 
1987
  st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
1988
  # =================== OBJECTIVE 7 — Insight and Recommendation (Agentic AI LLM Style — Final) ===================
1989
  # =================== OBJECTIVE 7 — Insight and Recommendation (Final — Agentic AI, No markdown bold) ===================
1990
+ # =================== OBJECTIVE 7 — Insight and Recommendation (FINAL — 3 Cards + Phi-3-mini) ===================
1991
  import streamlit as st
1992
  import pandas as pd
1993
  import re
1994
  import os
1995
 
1996
+ # ✅ SIMPAN df_filtered KE SESSION STATE (harus dilakukan SEBELUM Objective 7)
1997
+ # Letakkan ini tepat setelah filtering di sidebar (setelah `submit_clicked = ...`)
1998
+ st.session_state.df_filtered = df_filtered # <-- BARIS INI WAJIB ADA!
1999
+
2000
  # ==============================
2001
+ # 1. IMPORT & LLM LOADING (cached)
2002
  # ==============================
2003
  try:
2004
  from transformers import pipeline
2005
  except ImportError:
2006
  st.error("❌ `transformers` not installed. Run: `pip install transformers torch accelerate sentencepiece einops`")
2007
+ pipe = None
2008
+ else:
2009
+ @st.cache_resource
2010
+ def load_llm():
2011
+ try:
2012
+ st.info("🧠 Loading Phi-3-mini-4k-instruct (optimized for safety recommendations)...")
2013
+ pipe = pipeline(
2014
+ "text-generation",
2015
+ model="microsoft/Phi-3-mini-4k-instruct",
2016
+ device_map="auto",
2017
+ torch_dtype="auto",
2018
+ trust_remote_code=True,
2019
+ max_new_tokens=256
2020
+ )
2021
+ st.success("✅ Phi-3-mini loaded!")
2022
+ return pipe
2023
+ except Exception as e:
2024
+ st.error(f" Failed to load model: {e}")
2025
+ return None
2026
+ pipe = load_llm()
 
 
 
 
2027
 
2028
  # ==============================
2029
+ # 2. INSIGHT EXTRACTION (sama seperti kode Anda — diperbaiki ke 2 desimal)
2030
  # ==============================
2031
  def extract_agentic_insights_v5(df: pd.DataFrame):
2032
  dev = {
 
2043
  "obj6_top2_categories": [],
2044
  }
2045
 
2046
+ # 1. 9 locations with lowest finding-to-reporter ratio
2047
  if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2048
  calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2049
  calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
 
2059
  lowest_9 = loc_avg.nsmallest(9)
2060
  dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 2)) for loc, ratio in lowest_9.items()]
2061
 
2062
+ # 2a: Division — lowest ratio
2063
  if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2064
  calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2065
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
 
2075
  val = round(div_ratio.min(), 2)
2076
  dev["obj3a_lowest_div"] = (name, val)
2077
 
2078
+ # 2b: Executor — slowest resolution
2079
  if 'days_to_close' in df.columns:
2080
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2081
  exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
 
2086
  val = round(lead.max(), 2)
2087
  dev["obj3b_slowest_executor"] = (name, val)
2088
 
2089
+ # 2c: Reporter — lowest frequency
2090
  if {'creator_name', 'created_at'}.issubset(df.columns):
2091
  calc = df[['creator_name', 'created_at']].copy()
2092
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
 
2098
  val = round(avg.min(), 2)
2099
  dev["obj3c_lowest_reporter"] = (name, val)
2100
 
2101
+ # 2d: Division — slowest resolution
2102
  if 'days_to_close' in df.columns and 'nama' in df.columns:
2103
  valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2104
  if not valid.empty:
 
2108
  val = round(lead.max(), 2)
2109
  dev["obj3d_slowest_div"] = (name, val)
2110
 
2111
+ # 3. Non-Positive composition
2112
  if 'temuan_kategori' in df.columns:
2113
  cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
2114
  dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 2)
2115
  dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 2)
2116
  dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 2)
2117
 
2118
+ # 4. Risk Quadrants
2119
  X_LIMIT, Y_LIMIT = 20, 3
2120
  if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
2121
  calc = df.copy()
 
2131
  elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
2132
  dev["obj5_q2_divs"].append(r['nama'])
2133
 
2134
+ # 5. Top 2 non-Positive categories
2135
  if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
2136
  nonpos = df[df['temuan_kategori'] != 'Positive']
2137
  if not nonpos.empty:
 
2144
  return dev
2145
 
2146
  # ==============================
2147
+ # 3. LLM UTILS (aman, fallback-ready)
2148
  # ==============================
2149
  def generate_llm_text(insight: str, mode: str = "rec") -> str:
2150
+ if pipe is None:
2151
+ mode_map = {"rec": "Recommend action", "mit": "Mitigation strategy"}
2152
+ return f"[LLM disabled] {mode_map[mode]} for: {insight[:50]}..."
2153
+
2154
  suffix = "Recommend a single high-leverage action." if mode == "rec" else "Propose one automated/systemic risk control."
2155
  messages = [
2156
  {"role": "system", "content": "You are PLN's Lead Safety AI. Output ONLY a short, professional sentence. Be directive. No markdown, no emoticons."},
2157
  {"role": "user", "content": f"Insight: {insight}\n\n{suffix}"}
2158
  ]
 
2159
  try:
2160
  out = pipe(
2161
  messages,
 
2164
  return_full_text=False
2165
  )
2166
  text = out[0]["generated_text"].strip()
 
2167
  text = re.sub(r"^(Recommendation|Mitigation|Action|Control):\s*", "", text, flags=re.IGNORECASE)
2168
  text = re.sub(r"[\n\"`*]", " ", text).strip(". ")
2169
+ return text[:250]
2170
  except Exception as e:
2171
+ # Fallback aman (tetap sesuai gaya Anda)
 
2172
  fallbacks = {
2173
  ("1", "rec"): "Launch Agency Activation Sprint: ≥1 spot inspection/week per low-ratio location.",
2174
  ("1", "mit"): "Deploy QR-code checklists + automated reminders; target ratio ≥0.5 in 45 days.",
 
2181
  ("5", "rec"): "Form cross-functional RCA Task Force (Civil, Electrical, HSE, Contractors) for top recurring categories.",
2182
  ("5", "mit"): "Update tender templates: all bids must include mitigations for these historical findings.",
2183
  }
2184
+ idx = str(len(insight_list) + 1) if 'insight_list' in locals() else "1"
2185
+ return fallbacks.get((idx, mode), f"Review insight and implement targeted action for: {insight[:30]}...")
2186
 
2187
  # ==============================
2188
+ # 4. RUN & RENDER
2189
  # ==============================
2190
  st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
2191
 
2192
+ # Ambil df_filtered dari session state
2193
  if 'df_filtered' not in st.session_state:
2194
+ st.error("⚠️ `df_filtered` not found in session state. Please apply filters first.")
2195
  st.stop()
2196
 
2197
  df_filtered = st.session_state.df_filtered
2198
  dev = extract_agentic_insights_v5(df_filtered)
2199
 
2200
+ # === BUILD INSIGHT LINES ===
2201
  insight_lines = []
2202
 
2203
  if dev["lowest_ratio_9_locs"]:
 
2262
  unsafe_allow_html=True
2263
  )
2264
 
2265
+ # Card 2 & 3: Recommendation + Mitigation (only if insights exist)
2266
  if insight_lines:
 
2267
  rec_list, mit_list = [], []
2268
  with st.spinner("🧠 Generating Recommendation & Risk Mitigation with Phi-3-mini..."):
2269
  for i, ins in enumerate(insight_lines, 1):
2270
+ clean_ins = re.sub(r"<[^>]+>", "", ins)
2271
+ # Hapus nomor urut depan (misal "1. ", "2. ")
2272
+ for prefix in ["1. ", "2. ", "3. ", "4. ", "5. "]:
2273
+ if clean_ins.startswith(prefix):
2274
+ clean_ins = clean_ins[len(prefix):]
2275
+ break
2276
+ clean_ins = clean_ins.strip()
2277
  rec = generate_llm_text(clean_ins, "rec")
2278
  mit = generate_llm_text(clean_ins, "mit")
2279
  rec_list.append(f"{i}. {rec}")