Spaces:

bukittechnology
/

pln

Sleeping

App Files Files Community

SHELLAPANDIANGANHUNGING commited on Dec 11, 2025

Commit

0cf89fe

verified ·

1 Parent(s): 2d8c7f9

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -49

app.py CHANGED Viewed

@@ -1987,45 +1987,46 @@ else:
     st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
 # =================== OBJECTIVE 7 — Insight and Recommendation (Agentic AI LLM Style — Final) ===================
 # =================== OBJECTIVE 7 — Insight and Recommendation (Final — Agentic AI, No markdown bold) ===================
 import streamlit as st
 import pandas as pd
 import re
 import os
 # ==============================
-# 1. IMPORT & INSTALL CHECK
 # ==============================
 try:
     from transformers import pipeline
 except ImportError:
     st.error("❌ `transformers` not installed. Run: `pip install transformers torch accelerate sentencepiece einops`")
-    st.stop()
-# ==============================
-# 2. LOAD LLM (Phi-3-mini — ringan & stabil)
-# ==============================
-@st.cache_resource
-def load_llm():
-    try:
-        st.info("🧠 Loading Phi-3-mini-4k-instruct (optimized for safety recommendations)...")
-        pipe = pipeline(
-            "text-generation",
-            model="microsoft/Phi-3-mini-4k-instruct",
-            device_map="auto",
-            torch_dtype="auto",
-            trust_remote_code=True,
-            max_new_tokens=256
-        )
-        st.success("✅ Phi-3-mini loaded!")
-        return pipe
-    except Exception as e:
-        st.error(f"❌ Failed to load model: {e}")
-        st.stop()
-pipe = load_llm()
 # ==============================
-# 3. INSIGHT EXTRACTION (sama seperti kode Anda)
 # ==============================
 def extract_agentic_insights_v5(df: pd.DataFrame):
     dev = {
@@ -2042,7 +2043,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
         "obj6_top2_categories": [],
     }
-    # === 1. 9 locations with lowest finding-to-reporter ratio ===
     if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
         calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
         calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
@@ -2058,7 +2059,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
         lowest_9 = loc_avg.nsmallest(9)
         dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 2)) for loc, ratio in lowest_9.items()]
-    # === 2a: Division — lowest ratio ===
     if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
         calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
         calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
@@ -2074,7 +2075,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
             val = round(div_ratio.min(), 2)
             dev["obj3a_lowest_div"] = (name, val)
-    # === 2b: Executor — slowest resolution ===
     if 'days_to_close' in df.columns:
         valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
         exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
@@ -2085,7 +2086,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
                 val = round(lead.max(), 2)
                 dev["obj3b_slowest_executor"] = (name, val)
-    # === 2c: Reporter — lowest frequency ===
     if {'creator_name', 'created_at'}.issubset(df.columns):
         calc = df[['creator_name', 'created_at']].copy()
         calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
@@ -2097,7 +2098,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
             val = round(avg.min(), 2)
             dev["obj3c_lowest_reporter"] = (name, val)
-    # === 2d: Division — slowest resolution ===
     if 'days_to_close' in df.columns and 'nama' in df.columns:
         valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
         if not valid.empty:
@@ -2107,14 +2108,14 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
                 val = round(lead.max(), 2)
                 dev["obj3d_slowest_div"] = (name, val)
-    # === 3. Non-Positive composition ===
     if 'temuan_kategori' in df.columns:
         cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
         dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 2)
         dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 2)
         dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 2)
-    # === 4. Risk Quadrants ===
     X_LIMIT, Y_LIMIT = 20, 3
     if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
         calc = df.copy()
@@ -2130,7 +2131,7 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
             elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
                 dev["obj5_q2_divs"].append(r['nama'])
-    # === 5. Top 2 non-Positive categories ===
     if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
         nonpos = df[df['temuan_kategori'] != 'Positive']
         if not nonpos.empty:
@@ -2143,16 +2144,18 @@ def extract_agentic_insights_v5(df: pd.DataFrame):
     return dev
 # ==============================
-# 4. LLM UTILS (aman & cepat)
 # ==============================
 def generate_llm_text(insight: str, mode: str = "rec") -> str:
-    """Generate rec or mit text using Phi-3-mini."""
     suffix = "Recommend a single high-leverage action." if mode == "rec" else "Propose one automated/systemic risk control."
     messages = [
         {"role": "system", "content": "You are PLN's Lead Safety AI. Output ONLY a short, professional sentence. Be directive. No markdown, no emoticons."},
         {"role": "user", "content": f"Insight: {insight}\n\n{suffix}"}
     ]
     try:
         out = pipe(
             messages,
@@ -2161,13 +2164,11 @@ def generate_llm_text(insight: str, mode: str = "rec") -> str:
             return_full_text=False
         )
         text = out[0]["generated_text"].strip()
-        # Clean
         text = re.sub(r"^(Recommendation|Mitigation|Action|Control):\s*", "", text, flags=re.IGNORECASE)
         text = re.sub(r"[\n\"`*]", " ", text).strip(". ")
-        return text[:250]  # Batas panjang
     except Exception as e:
-        st.warning(f"LLM fallback for {mode}: {e}")
-        # Fallback — tetap profesional & sesuai gaya Anda
         fallbacks = {
             ("1", "rec"): "Launch Agency Activation Sprint: ≥1 spot inspection/week per low-ratio location.",
             ("1", "mit"): "Deploy QR-code checklists + automated reminders; target ratio ≥0.5 in 45 days.",
@@ -2180,22 +2181,23 @@ def generate_llm_text(insight: str, mode: str = "rec") -> str:
             ("5", "rec"): "Form cross-functional RCA Task Force (Civil, Electrical, HSE, Contractors) for top recurring categories.",
             ("5", "mit"): "Update tender templates: all bids must include mitigations for these historical findings.",
         }
-        return fallbacks.get((str(len(insight_list) + 1), mode), "Review insight and implement targeted action.")
 # ==============================
-# 5. MAIN EXECUTION
 # ==============================
 st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
-# ✅ Pastikan df_filtered ada
 if 'df_filtered' not in st.session_state:
-    st.error("⚠️ `df_filtered` not found in session state. Please load data first.")
     st.stop()
 df_filtered = st.session_state.df_filtered
 dev = extract_agentic_insights_v5(df_filtered)
-# === BUILD INSIGHT LINES (2 desimal, clean) ===
 insight_lines = []
 if dev["lowest_ratio_9_locs"]:
@@ -2260,13 +2262,18 @@ st.markdown(
     unsafe_allow_html=True
 )
 if insight_lines:
-    # Generate rec & mit
     rec_list, mit_list = [], []
     with st.spinner("🧠 Generating Recommendation & Risk Mitigation with Phi-3-mini..."):
         for i, ins in enumerate(insight_lines, 1):
-            # Ekstrak teks bersih untuk LLM
-            clean_ins = re.sub(r"<[^>]+>", "", ins).replace("1. ", "").replace("2. ", "").replace("3. ", "").replace("4. ", "").replace("5. ", "").strip()
             rec = generate_llm_text(clean_ins, "rec")
             mit = generate_llm_text(clean_ins, "mit")
             rec_list.append(f"{i}. {rec}")

     st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
 # =================== OBJECTIVE 7 — Insight and Recommendation (Agentic AI LLM Style — Final) ===================
 # =================== OBJECTIVE 7 — Insight and Recommendation (Final — Agentic AI, No markdown bold) ===================
+# =================== OBJECTIVE 7 — Insight and Recommendation (FINAL — 3 Cards + Phi-3-mini) ===================
 import streamlit as st
 import pandas as pd
 import re
 import os
+# ✅ SIMPAN df_filtered KE SESSION STATE (harus dilakukan SEBELUM Objective 7)
+# Letakkan ini tepat setelah filtering di sidebar (setelah `submit_clicked = ...`)
+st.session_state.df_filtered = df_filtered  # <-- BARIS INI WAJIB ADA!
 # ==============================
+# 1. IMPORT & LLM LOADING (cached)
 # ==============================
 try:
     from transformers import pipeline
 except ImportError:
     st.error("❌ `transformers` not installed. Run: `pip install transformers torch accelerate sentencepiece einops`")
+    pipe = None
+else:
+    @st.cache_resource
+    def load_llm():
+        try:
+            st.info("🧠 Loading Phi-3-mini-4k-instruct (optimized for safety recommendations)...")
+            pipe = pipeline(
+                "text-generation",
+                model="microsoft/Phi-3-mini-4k-instruct",
+                device_map="auto",
+                torch_dtype="auto",
+                trust_remote_code=True,
+                max_new_tokens=256
+            )
+            st.success("✅ Phi-3-mini loaded!")
+            return pipe
+        except Exception as e:
+            st.error(f"❌ Failed to load model: {e}")
+            return None
+    pipe = load_llm()
 # ==============================
+# 2. INSIGHT EXTRACTION (sama seperti kode Anda — diperbaiki ke 2 desimal)
 # ==============================
 def extract_agentic_insights_v5(df: pd.DataFrame):
     dev = {
         "obj6_top2_categories": [],
     }
+    # 1. 9 locations with lowest finding-to-reporter ratio
     if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
         calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
         calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
         lowest_9 = loc_avg.nsmallest(9)
         dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 2)) for loc, ratio in lowest_9.items()]
+    # 2a: Division — lowest ratio
     if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
         calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
         calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
             val = round(div_ratio.min(), 2)
             dev["obj3a_lowest_div"] = (name, val)
+    # 2b: Executor — slowest resolution
     if 'days_to_close' in df.columns:
         valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
         exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
                 val = round(lead.max(), 2)
                 dev["obj3b_slowest_executor"] = (name, val)
+    # 2c: Reporter — lowest frequency
     if {'creator_name', 'created_at'}.issubset(df.columns):
         calc = df[['creator_name', 'created_at']].copy()
         calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
             val = round(avg.min(), 2)
             dev["obj3c_lowest_reporter"] = (name, val)
+    # 2d: Division — slowest resolution
     if 'days_to_close' in df.columns and 'nama' in df.columns:
         valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
         if not valid.empty:
                 val = round(lead.max(), 2)
                 dev["obj3d_slowest_div"] = (name, val)
+    # 3. Non-Positive composition
     if 'temuan_kategori' in df.columns:
         cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
         dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 2)
         dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 2)
         dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 2)
+    # 4. Risk Quadrants
     X_LIMIT, Y_LIMIT = 20, 3
     if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
         calc = df.copy()
             elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
                 dev["obj5_q2_divs"].append(r['nama'])
+    # 5. Top 2 non-Positive categories
     if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
         nonpos = df[df['temuan_kategori'] != 'Positive']
         if not nonpos.empty:
     return dev
 # ==============================
+# 3. LLM UTILS (aman, fallback-ready)
 # ==============================
 def generate_llm_text(insight: str, mode: str = "rec") -> str:
+    if pipe is None:
+        mode_map = {"rec": "Recommend action", "mit": "Mitigation strategy"}
+        return f"[LLM disabled] {mode_map[mode]} for: {insight[:50]}..."
     suffix = "Recommend a single high-leverage action." if mode == "rec" else "Propose one automated/systemic risk control."
     messages = [
         {"role": "system", "content": "You are PLN's Lead Safety AI. Output ONLY a short, professional sentence. Be directive. No markdown, no emoticons."},
         {"role": "user", "content": f"Insight: {insight}\n\n{suffix}"}
     ]
     try:
         out = pipe(
             messages,
             return_full_text=False
         )
         text = out[0]["generated_text"].strip()
         text = re.sub(r"^(Recommendation|Mitigation|Action|Control):\s*", "", text, flags=re.IGNORECASE)
         text = re.sub(r"[\n\"`*]", " ", text).strip(". ")
+        return text[:250]
     except Exception as e:
+        # Fallback aman (tetap sesuai gaya Anda)
         fallbacks = {
             ("1", "rec"): "Launch Agency Activation Sprint: ≥1 spot inspection/week per low-ratio location.",
             ("1", "mit"): "Deploy QR-code checklists + automated reminders; target ratio ≥0.5 in 45 days.",
             ("5", "rec"): "Form cross-functional RCA Task Force (Civil, Electrical, HSE, Contractors) for top recurring categories.",
             ("5", "mit"): "Update tender templates: all bids must include mitigations for these historical findings.",
         }
+        idx = str(len(insight_list) + 1) if 'insight_list' in locals() else "1"
+        return fallbacks.get((idx, mode), f"Review insight and implement targeted action for: {insight[:30]}...")
 # ==============================
+# 4. RUN & RENDER
 # ==============================
 st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
+# Ambil df_filtered dari session state
 if 'df_filtered' not in st.session_state:
+    st.error("⚠️ `df_filtered` not found in session state. Please apply filters first.")
     st.stop()
 df_filtered = st.session_state.df_filtered
 dev = extract_agentic_insights_v5(df_filtered)
+# === BUILD INSIGHT LINES ===
 insight_lines = []
 if dev["lowest_ratio_9_locs"]:
     unsafe_allow_html=True
 )
+# Card 2 & 3: Recommendation + Mitigation (only if insights exist)
 if insight_lines:
     rec_list, mit_list = [], []
     with st.spinner("🧠 Generating Recommendation & Risk Mitigation with Phi-3-mini..."):
         for i, ins in enumerate(insight_lines, 1):
+            clean_ins = re.sub(r"<[^>]+>", "", ins)
+            # Hapus nomor urut depan (misal "1. ", "2. ")
+            for prefix in ["1. ", "2. ", "3. ", "4. ", "5. "]:
+                if clean_ins.startswith(prefix):
+                    clean_ins = clean_ins[len(prefix):]
+                    break
+            clean_ins = clean_ins.strip()
             rec = generate_llm_text(clean_ins, "rec")
             mit = generate_llm_text(clean_ins, "mit")
             rec_list.append(f"{i}. {rec}")