Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1987,45 +1987,46 @@ else:
|
|
| 1987 |
st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
|
| 1988 |
# =================== OBJECTIVE 7 — Insight and Recommendation (Agentic AI LLM Style — Final) ===================
|
| 1989 |
# =================== OBJECTIVE 7 — Insight and Recommendation (Final — Agentic AI, No markdown bold) ===================
|
|
|
|
| 1990 |
import streamlit as st
|
| 1991 |
import pandas as pd
|
| 1992 |
import re
|
| 1993 |
import os
|
| 1994 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1995 |
# ==============================
|
| 1996 |
-
# 1. IMPORT &
|
| 1997 |
# ==============================
|
| 1998 |
try:
|
| 1999 |
from transformers import pipeline
|
| 2000 |
except ImportError:
|
| 2001 |
st.error("❌ `transformers` not installed. Run: `pip install transformers torch accelerate sentencepiece einops`")
|
| 2002 |
-
|
| 2003 |
-
|
| 2004 |
-
|
| 2005 |
-
|
| 2006 |
-
|
| 2007 |
-
|
| 2008 |
-
|
| 2009 |
-
|
| 2010 |
-
|
| 2011 |
-
|
| 2012 |
-
|
| 2013 |
-
|
| 2014 |
-
|
| 2015 |
-
|
| 2016 |
-
|
| 2017 |
-
|
| 2018 |
-
|
| 2019 |
-
|
| 2020 |
-
|
| 2021 |
-
|
| 2022 |
-
st.error(f"❌ Failed to load model: {e}")
|
| 2023 |
-
st.stop()
|
| 2024 |
-
|
| 2025 |
-
pipe = load_llm()
|
| 2026 |
|
| 2027 |
# ==============================
|
| 2028 |
-
#
|
| 2029 |
# ==============================
|
| 2030 |
def extract_agentic_insights_v5(df: pd.DataFrame):
|
| 2031 |
dev = {
|
|
@@ -2042,7 +2043,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2042 |
"obj6_top2_categories": [],
|
| 2043 |
}
|
| 2044 |
|
| 2045 |
-
#
|
| 2046 |
if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2047 |
calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2048 |
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
|
|
@@ -2058,7 +2059,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2058 |
lowest_9 = loc_avg.nsmallest(9)
|
| 2059 |
dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 2)) for loc, ratio in lowest_9.items()]
|
| 2060 |
|
| 2061 |
-
#
|
| 2062 |
if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2063 |
calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2064 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
@@ -2074,7 +2075,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2074 |
val = round(div_ratio.min(), 2)
|
| 2075 |
dev["obj3a_lowest_div"] = (name, val)
|
| 2076 |
|
| 2077 |
-
#
|
| 2078 |
if 'days_to_close' in df.columns:
|
| 2079 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2080 |
exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
|
|
@@ -2085,7 +2086,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2085 |
val = round(lead.max(), 2)
|
| 2086 |
dev["obj3b_slowest_executor"] = (name, val)
|
| 2087 |
|
| 2088 |
-
#
|
| 2089 |
if {'creator_name', 'created_at'}.issubset(df.columns):
|
| 2090 |
calc = df[['creator_name', 'created_at']].copy()
|
| 2091 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
@@ -2097,7 +2098,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2097 |
val = round(avg.min(), 2)
|
| 2098 |
dev["obj3c_lowest_reporter"] = (name, val)
|
| 2099 |
|
| 2100 |
-
#
|
| 2101 |
if 'days_to_close' in df.columns and 'nama' in df.columns:
|
| 2102 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2103 |
if not valid.empty:
|
|
@@ -2107,14 +2108,14 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2107 |
val = round(lead.max(), 2)
|
| 2108 |
dev["obj3d_slowest_div"] = (name, val)
|
| 2109 |
|
| 2110 |
-
#
|
| 2111 |
if 'temuan_kategori' in df.columns:
|
| 2112 |
cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
|
| 2113 |
dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 2)
|
| 2114 |
dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 2)
|
| 2115 |
dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 2)
|
| 2116 |
|
| 2117 |
-
#
|
| 2118 |
X_LIMIT, Y_LIMIT = 20, 3
|
| 2119 |
if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
|
| 2120 |
calc = df.copy()
|
|
@@ -2130,7 +2131,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2130 |
elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2131 |
dev["obj5_q2_divs"].append(r['nama'])
|
| 2132 |
|
| 2133 |
-
#
|
| 2134 |
if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
|
| 2135 |
nonpos = df[df['temuan_kategori'] != 'Positive']
|
| 2136 |
if not nonpos.empty:
|
|
@@ -2143,16 +2144,18 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
|
|
| 2143 |
return dev
|
| 2144 |
|
| 2145 |
# ==============================
|
| 2146 |
-
#
|
| 2147 |
# ==============================
|
| 2148 |
def generate_llm_text(insight: str, mode: str = "rec") -> str:
|
| 2149 |
-
|
|
|
|
|
|
|
|
|
|
| 2150 |
suffix = "Recommend a single high-leverage action." if mode == "rec" else "Propose one automated/systemic risk control."
|
| 2151 |
messages = [
|
| 2152 |
{"role": "system", "content": "You are PLN's Lead Safety AI. Output ONLY a short, professional sentence. Be directive. No markdown, no emoticons."},
|
| 2153 |
{"role": "user", "content": f"Insight: {insight}\n\n{suffix}"}
|
| 2154 |
]
|
| 2155 |
-
|
| 2156 |
try:
|
| 2157 |
out = pipe(
|
| 2158 |
messages,
|
|
@@ -2161,13 +2164,11 @@ def generate_llm_text(insight: str, mode: str = "rec") -> str:
|
|
| 2161 |
return_full_text=False
|
| 2162 |
)
|
| 2163 |
text = out[0]["generated_text"].strip()
|
| 2164 |
-
# Clean
|
| 2165 |
text = re.sub(r"^(Recommendation|Mitigation|Action|Control):\s*", "", text, flags=re.IGNORECASE)
|
| 2166 |
text = re.sub(r"[\n\"`*]", " ", text).strip(". ")
|
| 2167 |
-
return text[:250]
|
| 2168 |
except Exception as e:
|
| 2169 |
-
|
| 2170 |
-
# Fallback — tetap profesional & sesuai gaya Anda
|
| 2171 |
fallbacks = {
|
| 2172 |
("1", "rec"): "Launch Agency Activation Sprint: ≥1 spot inspection/week per low-ratio location.",
|
| 2173 |
("1", "mit"): "Deploy QR-code checklists + automated reminders; target ratio ≥0.5 in 45 days.",
|
|
@@ -2180,22 +2181,23 @@ def generate_llm_text(insight: str, mode: str = "rec") -> str:
|
|
| 2180 |
("5", "rec"): "Form cross-functional RCA Task Force (Civil, Electrical, HSE, Contractors) for top recurring categories.",
|
| 2181 |
("5", "mit"): "Update tender templates: all bids must include mitigations for these historical findings.",
|
| 2182 |
}
|
| 2183 |
-
|
|
|
|
| 2184 |
|
| 2185 |
# ==============================
|
| 2186 |
-
#
|
| 2187 |
# ==============================
|
| 2188 |
st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
|
| 2189 |
|
| 2190 |
-
#
|
| 2191 |
if 'df_filtered' not in st.session_state:
|
| 2192 |
-
st.error("⚠️ `df_filtered` not found in session state. Please
|
| 2193 |
st.stop()
|
| 2194 |
|
| 2195 |
df_filtered = st.session_state.df_filtered
|
| 2196 |
dev = extract_agentic_insights_v5(df_filtered)
|
| 2197 |
|
| 2198 |
-
# === BUILD INSIGHT LINES
|
| 2199 |
insight_lines = []
|
| 2200 |
|
| 2201 |
if dev["lowest_ratio_9_locs"]:
|
|
@@ -2260,13 +2262,18 @@ st.markdown(
|
|
| 2260 |
unsafe_allow_html=True
|
| 2261 |
)
|
| 2262 |
|
|
|
|
| 2263 |
if insight_lines:
|
| 2264 |
-
# Generate rec & mit
|
| 2265 |
rec_list, mit_list = [], []
|
| 2266 |
with st.spinner("🧠 Generating Recommendation & Risk Mitigation with Phi-3-mini..."):
|
| 2267 |
for i, ins in enumerate(insight_lines, 1):
|
| 2268 |
-
|
| 2269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2270 |
rec = generate_llm_text(clean_ins, "rec")
|
| 2271 |
mit = generate_llm_text(clean_ins, "mit")
|
| 2272 |
rec_list.append(f"{i}. {rec}")
|
|
|
|
| 1987 |
st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
|
| 1988 |
# =================== OBJECTIVE 7 — Insight and Recommendation (Agentic AI LLM Style — Final) ===================
|
| 1989 |
# =================== OBJECTIVE 7 — Insight and Recommendation (Final — Agentic AI, No markdown bold) ===================
|
| 1990 |
+
# =================== OBJECTIVE 7 — Insight and Recommendation (FINAL — 3 Cards + Phi-3-mini) ===================
|
| 1991 |
import streamlit as st
|
| 1992 |
import pandas as pd
|
| 1993 |
import re
|
| 1994 |
import os
|
| 1995 |
|
| 1996 |
+
# ✅ SIMPAN df_filtered KE SESSION STATE (harus dilakukan SEBELUM Objective 7)
|
| 1997 |
+
# Letakkan ini tepat setelah filtering di sidebar (setelah `submit_clicked = ...`)
|
| 1998 |
+
st.session_state.df_filtered = df_filtered # <-- BARIS INI WAJIB ADA!
|
| 1999 |
+
|
| 2000 |
# ==============================
|
| 2001 |
+
# 1. IMPORT & LLM LOADING (cached)
|
| 2002 |
# ==============================
|
| 2003 |
try:
|
| 2004 |
from transformers import pipeline
|
| 2005 |
except ImportError:
|
| 2006 |
st.error("❌ `transformers` not installed. Run: `pip install transformers torch accelerate sentencepiece einops`")
|
| 2007 |
+
pipe = None
|
| 2008 |
+
else:
|
| 2009 |
+
@st.cache_resource
|
| 2010 |
+
def load_llm():
|
| 2011 |
+
try:
|
| 2012 |
+
st.info("🧠 Loading Phi-3-mini-4k-instruct (optimized for safety recommendations)...")
|
| 2013 |
+
pipe = pipeline(
|
| 2014 |
+
"text-generation",
|
| 2015 |
+
model="microsoft/Phi-3-mini-4k-instruct",
|
| 2016 |
+
device_map="auto",
|
| 2017 |
+
torch_dtype="auto",
|
| 2018 |
+
trust_remote_code=True,
|
| 2019 |
+
max_new_tokens=256
|
| 2020 |
+
)
|
| 2021 |
+
st.success("✅ Phi-3-mini loaded!")
|
| 2022 |
+
return pipe
|
| 2023 |
+
except Exception as e:
|
| 2024 |
+
st.error(f"❌ Failed to load model: {e}")
|
| 2025 |
+
return None
|
| 2026 |
+
pipe = load_llm()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2027 |
|
| 2028 |
# ==============================
|
| 2029 |
+
# 2. INSIGHT EXTRACTION (sama seperti kode Anda — diperbaiki ke 2 desimal)
|
| 2030 |
# ==============================
|
| 2031 |
def extract_agentic_insights_v5(df: pd.DataFrame):
|
| 2032 |
dev = {
|
|
|
|
| 2043 |
"obj6_top2_categories": [],
|
| 2044 |
}
|
| 2045 |
|
| 2046 |
+
# 1. 9 locations with lowest finding-to-reporter ratio
|
| 2047 |
if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2048 |
calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2049 |
calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
|
|
|
|
| 2059 |
lowest_9 = loc_avg.nsmallest(9)
|
| 2060 |
dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 2)) for loc, ratio in lowest_9.items()]
|
| 2061 |
|
| 2062 |
+
# 2a: Division — lowest ratio
|
| 2063 |
if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
|
| 2064 |
calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
|
| 2065 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
|
|
| 2075 |
val = round(div_ratio.min(), 2)
|
| 2076 |
dev["obj3a_lowest_div"] = (name, val)
|
| 2077 |
|
| 2078 |
+
# 2b: Executor — slowest resolution
|
| 2079 |
if 'days_to_close' in df.columns:
|
| 2080 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2081 |
exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
|
|
|
|
| 2086 |
val = round(lead.max(), 2)
|
| 2087 |
dev["obj3b_slowest_executor"] = (name, val)
|
| 2088 |
|
| 2089 |
+
# 2c: Reporter — lowest frequency
|
| 2090 |
if {'creator_name', 'created_at'}.issubset(df.columns):
|
| 2091 |
calc = df[['creator_name', 'created_at']].copy()
|
| 2092 |
calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
|
|
|
|
| 2098 |
val = round(avg.min(), 2)
|
| 2099 |
dev["obj3c_lowest_reporter"] = (name, val)
|
| 2100 |
|
| 2101 |
+
# 2d: Division — slowest resolution
|
| 2102 |
if 'days_to_close' in df.columns and 'nama' in df.columns:
|
| 2103 |
valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
|
| 2104 |
if not valid.empty:
|
|
|
|
| 2108 |
val = round(lead.max(), 2)
|
| 2109 |
dev["obj3d_slowest_div"] = (name, val)
|
| 2110 |
|
| 2111 |
+
# 3. Non-Positive composition
|
| 2112 |
if 'temuan_kategori' in df.columns:
|
| 2113 |
cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
|
| 2114 |
dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 2)
|
| 2115 |
dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 2)
|
| 2116 |
dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 2)
|
| 2117 |
|
| 2118 |
+
# 4. Risk Quadrants
|
| 2119 |
X_LIMIT, Y_LIMIT = 20, 3
|
| 2120 |
if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
|
| 2121 |
calc = df.copy()
|
|
|
|
| 2131 |
elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
|
| 2132 |
dev["obj5_q2_divs"].append(r['nama'])
|
| 2133 |
|
| 2134 |
+
# 5. Top 2 non-Positive categories
|
| 2135 |
if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
|
| 2136 |
nonpos = df[df['temuan_kategori'] != 'Positive']
|
| 2137 |
if not nonpos.empty:
|
|
|
|
| 2144 |
return dev
|
| 2145 |
|
| 2146 |
# ==============================
|
| 2147 |
+
# 3. LLM UTILS (aman, fallback-ready)
|
| 2148 |
# ==============================
|
| 2149 |
def generate_llm_text(insight: str, mode: str = "rec") -> str:
|
| 2150 |
+
if pipe is None:
|
| 2151 |
+
mode_map = {"rec": "Recommend action", "mit": "Mitigation strategy"}
|
| 2152 |
+
return f"[LLM disabled] {mode_map[mode]} for: {insight[:50]}..."
|
| 2153 |
+
|
| 2154 |
suffix = "Recommend a single high-leverage action." if mode == "rec" else "Propose one automated/systemic risk control."
|
| 2155 |
messages = [
|
| 2156 |
{"role": "system", "content": "You are PLN's Lead Safety AI. Output ONLY a short, professional sentence. Be directive. No markdown, no emoticons."},
|
| 2157 |
{"role": "user", "content": f"Insight: {insight}\n\n{suffix}"}
|
| 2158 |
]
|
|
|
|
| 2159 |
try:
|
| 2160 |
out = pipe(
|
| 2161 |
messages,
|
|
|
|
| 2164 |
return_full_text=False
|
| 2165 |
)
|
| 2166 |
text = out[0]["generated_text"].strip()
|
|
|
|
| 2167 |
text = re.sub(r"^(Recommendation|Mitigation|Action|Control):\s*", "", text, flags=re.IGNORECASE)
|
| 2168 |
text = re.sub(r"[\n\"`*]", " ", text).strip(". ")
|
| 2169 |
+
return text[:250]
|
| 2170 |
except Exception as e:
|
| 2171 |
+
# Fallback aman (tetap sesuai gaya Anda)
|
|
|
|
| 2172 |
fallbacks = {
|
| 2173 |
("1", "rec"): "Launch Agency Activation Sprint: ≥1 spot inspection/week per low-ratio location.",
|
| 2174 |
("1", "mit"): "Deploy QR-code checklists + automated reminders; target ratio ≥0.5 in 45 days.",
|
|
|
|
| 2181 |
("5", "rec"): "Form cross-functional RCA Task Force (Civil, Electrical, HSE, Contractors) for top recurring categories.",
|
| 2182 |
("5", "mit"): "Update tender templates: all bids must include mitigations for these historical findings.",
|
| 2183 |
}
|
| 2184 |
+
idx = str(len(insight_list) + 1) if 'insight_list' in locals() else "1"
|
| 2185 |
+
return fallbacks.get((idx, mode), f"Review insight and implement targeted action for: {insight[:30]}...")
|
| 2186 |
|
| 2187 |
# ==============================
|
| 2188 |
+
# 4. RUN & RENDER
|
| 2189 |
# ==============================
|
| 2190 |
st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
|
| 2191 |
|
| 2192 |
+
# Ambil df_filtered dari session state
|
| 2193 |
if 'df_filtered' not in st.session_state:
|
| 2194 |
+
st.error("⚠️ `df_filtered` not found in session state. Please apply filters first.")
|
| 2195 |
st.stop()
|
| 2196 |
|
| 2197 |
df_filtered = st.session_state.df_filtered
|
| 2198 |
dev = extract_agentic_insights_v5(df_filtered)
|
| 2199 |
|
| 2200 |
+
# === BUILD INSIGHT LINES ===
|
| 2201 |
insight_lines = []
|
| 2202 |
|
| 2203 |
if dev["lowest_ratio_9_locs"]:
|
|
|
|
| 2262 |
unsafe_allow_html=True
|
| 2263 |
)
|
| 2264 |
|
| 2265 |
+
# Card 2 & 3: Recommendation + Mitigation (only if insights exist)
|
| 2266 |
if insight_lines:
|
|
|
|
| 2267 |
rec_list, mit_list = [], []
|
| 2268 |
with st.spinner("🧠 Generating Recommendation & Risk Mitigation with Phi-3-mini..."):
|
| 2269 |
for i, ins in enumerate(insight_lines, 1):
|
| 2270 |
+
clean_ins = re.sub(r"<[^>]+>", "", ins)
|
| 2271 |
+
# Hapus nomor urut depan (misal "1. ", "2. ")
|
| 2272 |
+
for prefix in ["1. ", "2. ", "3. ", "4. ", "5. "]:
|
| 2273 |
+
if clean_ins.startswith(prefix):
|
| 2274 |
+
clean_ins = clean_ins[len(prefix):]
|
| 2275 |
+
break
|
| 2276 |
+
clean_ins = clean_ins.strip()
|
| 2277 |
rec = generate_llm_text(clean_ins, "rec")
|
| 2278 |
mit = generate_llm_text(clean_ins, "mit")
|
| 2279 |
rec_list.append(f"{i}. {rec}")
|