SHELLAPANDIANGANHUNGING commited on
Commit
748ee04
·
verified ·
1 Parent(s): 817e9ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -136
app.py CHANGED
@@ -1985,33 +1985,43 @@ if not df_category.empty:
1985
  # st.markdown(insight_text, unsafe_allow_html=True)
1986
  else:
1987
  st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
1988
-
1989
- # =================== OBJECTIVE 7 — Insight and Recommendation ===================
1990
  import streamlit as st
1991
  import pandas as pd
1992
- import requests
1993
-
1994
- # =========================
1995
- # UNIVERSAL LLM CALL
1996
- # =========================
1997
- def llm_call(prompt: str):
1998
- """Universal call untuk LLM (HF Docker / LM Studio / OpenAI)."""
1999
- url = "http://localhost:1234/v1/chat/completions" # UBAH jika perlu
2000
- payload = {
2001
- "model": "Qwen2.5-7B-Instruct", # UBAH sesuai model
2002
- "messages": [{"role": "user", "content": prompt}],
2003
- "temperature": 0.3,
2004
- "max_tokens": 700
2005
- }
2006
- r = requests.post(url, json=payload)
2007
- r.raise_for_status()
2008
- return r.json()["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
2009
 
 
 
2010
 
2011
- # =========================
2012
- # EXTRACT INSIGHTS
2013
- # =========================
2014
- def extract_agentic_insights(df: pd.DataFrame):
 
2015
  dev = {
2016
  "lowest_ratio_9_locs": [],
2017
  "obj3a_lowest_div": None,
@@ -2026,179 +2036,168 @@ def extract_agentic_insights(df: pd.DataFrame):
2026
  "obj6_top2_categories": [],
2027
  }
2028
 
2029
- # === 1. 9 lowest location ratios ===
2030
  if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2031
- calc = df.copy()
2032
  calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
2033
- calc = calc.dropna(subset=['created_at', 'creator_nid'])
2034
  calc['bulan'] = calc['created_at'].dt.to_period('M')
2035
-
2036
  monthly = calc.groupby(['nama_lokasi_full', 'bulan']).agg(
2037
  findings=('kode_temuan', 'size'),
2038
  reporters=('creator_nid', 'nunique')
2039
  ).reset_index()
2040
-
2041
  monthly = monthly[monthly['reporters'] > 0]
2042
  monthly['ratio'] = monthly['findings'] / monthly['reporters']
2043
-
2044
  loc_avg = monthly.groupby('nama_lokasi_full')['ratio'].mean()
2045
- lowest9 = loc_avg.nsmallest(9)
2046
-
2047
- dev["lowest_ratio_9_locs"] = [(k, round(v, 3)) for k, v in lowest9.items()]
2048
 
2049
- # === 2. Divisions & reporters ===
2050
  if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2051
- calc = df.copy()
2052
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
2053
-
2054
  agg = calc.groupby(['nama', 'bulan']).agg(
2055
  findings=('kode_temuan', 'size'),
2056
  reporters=('creator_nid', 'nunique')
2057
- ).reset_index()
2058
-
2059
- agg = agg[agg['reporters'] > 0]
2060
  agg['ratio'] = agg['findings'] / agg['reporters']
2061
-
2062
  div_ratio = agg.groupby('nama')['ratio'].mean()
2063
  if not div_ratio.empty:
2064
  dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2))
2065
 
2066
- # Slowest executor
2067
  if 'days_to_close' in df.columns:
2068
- valid = df[df['days_to_close'] >= 0]
2069
- exec_col = 'nama_pic' if 'nama_pic' in df.columns else 'creator_name'
2070
-
2071
  if exec_col in valid.columns:
2072
  lead = valid.groupby(exec_col)['days_to_close'].mean()
2073
- dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1))
 
2074
 
2075
- # Lowest reporter
2076
  if {'creator_name', 'created_at'}.issubset(df.columns):
2077
- calc = df.copy()
2078
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
2079
- monthly = calc.groupby(['creator_name', 'bulan']).size()
2080
- avg = monthly.groupby('creator_name').mean()
2081
- dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2))
2082
-
2083
- # Slowest division (lead time)
2084
- if {'nama', 'days_to_close'}.issubset(df.columns):
2085
- lead = df.groupby('nama')['days_to_close'].mean()
2086
- dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1))
2087
-
2088
- # === 3. Non positive %
 
 
 
 
 
2089
  if 'temuan_kategori' in df.columns:
2090
  cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
2091
  dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
2092
  dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
2093
  dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
2094
 
2095
- # === 4. Quadrants ===
 
2096
  if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
2097
  calc = df.copy()
2098
- calc['created_at'] = pd.to_datetime(calc['created_at'])
2099
- calc['month'] = calc['created_at'].dt.to_period('M').astype(str)
2100
-
2101
- monthly_count = calc.groupby(['nama', 'month'])['kode_temuan'].size().reset_index(name='count')
2102
- avg_count = monthly_count.groupby('nama')['count'].mean().reset_index(name='Finding Count')
2103
- avg_lead = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
2104
-
2105
- m = avg_count.merge(avg_lead, on='nama')
2106
- X, Y = 20, 3
2107
-
2108
- for _, r in m.iterrows():
2109
- if r['Finding Count'] >= X and r['Avg Lead Time'] >= Y:
2110
  dev["obj5_q1_divs"].append(r['nama'])
2111
- elif r['Finding Count'] < X and r['Avg Lead Time'] >= Y:
2112
  dev["obj5_q2_divs"].append(r['nama'])
2113
 
2114
- # === 5. top 2 category ===
2115
  if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
2116
- nonpos = df[df['temuan_kategori'] != "Positive"]
2117
- start = nonpos['created_at'].min().to_period('M')
2118
- end = nonpos['created_at'].max().to_period('M')
2119
- n_months = len(pd.period_range(start, end, freq='M'))
2120
- cat_avg = nonpos.groupby('kategori').size() / n_months
2121
- cat_avg = cat_avg.sort_values(ascending=False).head(2)
2122
- dev["obj6_top2_categories"] = [(k, round(v, 1)) for k, v in cat_avg.items()]
2123
 
2124
  return dev
2125
 
2126
 
2127
- # =========================
2128
- # RENDER + LLM GENERATION
2129
- # =========================
2130
 
2131
- dev = extract_agentic_insights(df_filtered)
2132
 
2133
- # ======== BUILD TEXT FOR LLM ========
2134
- prompt = f"""
2135
- You are an advanced Safety Analytics LLM.
2136
 
2137
- Given the following structured insights from real safety operational data:
 
2138
 
2139
- {dev}
 
2140
 
2141
- Your tasks:
 
2142
 
2143
- 1. Write a concise **Insight Summary** (max 6 bullets). Use corporate tone and highlight anomalies.
2144
- 2. Generate **5 Recommended Actions**, each 1–2 sentences.
2145
- 3. Generate **5 Risk Mitigation Strategies**, each paired to each recommendation.
2146
 
2147
- Return output in this JSON structure ONLY:
 
2148
 
2149
- {{
2150
- "insight_summary": "...",
2151
- "recommendations": ["...", "...", "...", "...", "..."],
2152
- "mitigations": ["...", "...", "...", "...", "..."]
2153
- }}
2154
- """
2155
 
2156
- llm_output = llm_call(prompt)
 
2157
 
2158
- import json
2159
- out = json.loads(llm_output)
 
 
 
2160
 
2161
- # ----------------------------
2162
- # STREAMLIT RENDERING
2163
- # ----------------------------
2164
- st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation</h3>", unsafe_allow_html=True)
2165
 
2166
- # Insight card
 
 
 
 
 
 
 
 
 
 
 
 
 
2167
  st.markdown(
2168
  f"""
2169
- <div style="background:#f8f9fa; border-left:4px solid #003DA5; padding:16px; border-radius:4px;">
2170
- <h4 style="margin:0;color:#003DA5;">Insight Summary (LLM Generated)</h4>
2171
- <p style="line-height:1.6;">{out['insight_summary'].replace("\n", "<br>")}</p>
2172
- </div>""",unsafe_allow_html=True)
2173
-
2174
- # Recommendations + Mitigations table
2175
- rows = ""
2176
- for i in range(5):
2177
- rows += f"""
2178
- <tr>
2179
- <td style='text-align:center; font-weight:bold;'>{i+1}</td>
2180
- <td style='padding:8px;'>{out['recommendations'][i]}</td>
2181
- <td style='padding:8px;'>{out['mitigations'][i]}</td>
2182
- </tr>
2183
- """
2184
 
2185
  st.markdown(
2186
  f"""
2187
- <div style="background:#e8f5e9; border-left:4px solid #4CAF50; padding:16px; border-radius:4px;">
2188
- <h4 style="margin:0;color:#2E7D32;">Recommended Actions & Agentic Risk Mitigation (LLM)</h4>
2189
- <table style="width:100%; border-collapse:collapse; margin-top:12px;">
2190
- <thead>
2191
- <tr style="background:#d4efdf;">
2192
- <th>#</th>
2193
- <th>Recommended Action</th>
2194
- <th>Risk Mitigation</th>
2195
- </tr>
2196
- </thead>
2197
- <tbody>
2198
- {rows}
2199
- </tbody>
2200
- </table>
2201
  </div>
2202
  """,
2203
- unsafe_allow_html=True
2204
  )
 
1985
  # st.markdown(insight_text, unsafe_allow_html=True)
1986
  else:
1987
  st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
 
 
1988
  import streamlit as st
1989
  import pandas as pd
1990
+ from huggingface_hub import InferenceClient
1991
+
1992
+ # ==========================
1993
+ # LLM FUNCTION (HuggingFace)
1994
+ # ==========================
1995
+ def llm_generate_recommendation(insights_text):
1996
+ client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
1997
+
1998
+ prompt = f"""
1999
+ You are an expert Safety & Reliability Agentic AI.
2000
+ Based on the following structured INSIGHT SUMMARY, create:
2001
+ 1. Recommended Action (max 2 sentences)
2002
+ 2. Risk Mitigation Strategy (max 2 sentences)
2003
+
2004
+ The insights:
2005
+ {insights_text}
2006
+
2007
+ Now generate concise, high-impact:
2008
+ - "recommendation"
2009
+ - "mitigation"
2010
+ Return output in EXACT JSON format:
2011
+ {{
2012
+ "recommendation": "...",
2013
+ "mitigation": "..."
2014
+ }}
2015
+ """
2016
 
2017
+ output = client.text_generation(prompt, max_new_tokens=256, temperature=0.3)
2018
+ return output
2019
 
2020
+
2021
+ # ==============================================
2022
+ # === INSIGHT COMPUTATION FUNCTION (your code)
2023
+ # ==============================================
2024
+ def extract_agentic_insights_v5(df: pd.DataFrame):
2025
  dev = {
2026
  "lowest_ratio_9_locs": [],
2027
  "obj3a_lowest_div": None,
 
2036
  "obj6_top2_categories": [],
2037
  }
2038
 
2039
+ # === 1. 9 locations with lowest finding-to-reporter ratio ===
2040
  if {'nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2041
+ calc = df[['nama_lokasi_full', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2042
  calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
2043
+ calc = calc.dropna(subset=['created_at', 'nama_lokasi_full', 'creator_nid'])
2044
  calc['bulan'] = calc['created_at'].dt.to_period('M')
 
2045
  monthly = calc.groupby(['nama_lokasi_full', 'bulan']).agg(
2046
  findings=('kode_temuan', 'size'),
2047
  reporters=('creator_nid', 'nunique')
2048
  ).reset_index()
 
2049
  monthly = monthly[monthly['reporters'] > 0]
2050
  monthly['ratio'] = monthly['findings'] / monthly['reporters']
 
2051
  loc_avg = monthly.groupby('nama_lokasi_full')['ratio'].mean()
2052
+ lowest_9 = loc_avg.nsmallest(9)
2053
+ dev["lowest_ratio_9_locs"] = [(loc, round(ratio, 3)) for loc, ratio in lowest_9.items()]
 
2054
 
2055
+ # === 2a Lowest-ratio division ===
2056
  if {'nama', 'creator_nid', 'created_at', 'kode_temuan'}.issubset(df.columns):
2057
+ calc = df[['nama', 'creator_nid', 'created_at', 'kode_temuan']].copy()
2058
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
 
2059
  agg = calc.groupby(['nama', 'bulan']).agg(
2060
  findings=('kode_temuan', 'size'),
2061
  reporters=('creator_nid', 'nunique')
2062
+ )
2063
+ agg = agg[agg['reporters'] > 0].reset_index()
 
2064
  agg['ratio'] = agg['findings'] / agg['reporters']
 
2065
  div_ratio = agg.groupby('nama')['ratio'].mean()
2066
  if not div_ratio.empty:
2067
  dev["obj3a_lowest_div"] = (div_ratio.idxmin(), round(div_ratio.min(), 2))
2068
 
2069
+ # === 2b Slowest executor
2070
  if 'days_to_close' in df.columns:
2071
+ valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2072
+ exec_col = 'nama_pic' if 'nama_pic' in valid.columns else 'creator_name'
 
2073
  if exec_col in valid.columns:
2074
  lead = valid.groupby(exec_col)['days_to_close'].mean()
2075
+ if not lead.empty:
2076
+ dev["obj3b_slowest_executor"] = (lead.idxmax(), round(lead.max(), 1))
2077
 
2078
+ # === 2c Lowest reporter freq
2079
  if {'creator_name', 'created_at'}.issubset(df.columns):
2080
+ calc = df[['creator_name', 'created_at']].copy()
2081
  calc['bulan'] = pd.to_datetime(calc['created_at']).dt.to_period('M')
2082
+ monthly = calc.groupby(['creator_name', 'bulan']).size().reset_index(name='count')
2083
+ avg = monthly.groupby('creator_name')['count'].mean()
2084
+ avg = avg[avg > 0]
2085
+ if not avg.empty:
2086
+ dev["obj3c_lowest_reporter"] = (avg.idxmin(), round(avg.min(), 2))
2087
+
2088
+ # === 2d Slowest division resolution
2089
+ if 'days_to_close' in df.columns and 'nama' in df.columns:
2090
+ valid = df[df['days_to_close'].notna() & (df['days_to_close'] >= 0)]
2091
+ if not valid.empty:
2092
+ lead = valid.groupby('nama')['days_to_close'].mean()
2093
+ if not lead.empty:
2094
+ dev["obj3d_slowest_div"] = (lead.idxmax(), round(lead.max(), 1))
2095
+
2096
+ # === 3. Non-Positive findings composition
2097
  if 'temuan_kategori' in df.columns:
2098
  cnt = df['temuan_kategori'].value_counts(normalize=True) * 100
2099
  dev["obj4_unsafe_condition_pct"] = round(cnt.get("Unsafe Condition", 0), 1)
2100
  dev["obj4_unsafe_action_pct"] = round(cnt.get("Unsafe Action", 0), 1)
2101
  dev["obj4_near_miss_pct"] = round(cnt.get("Near Miss", 0), 1)
2102
 
2103
+ # === 4. Quadrants
2104
+ X_LIMIT, Y_LIMIT = 20, 3
2105
  if {'nama', 'created_at', 'days_to_close', 'kode_temuan'}.issubset(df.columns):
2106
  calc = df.copy()
2107
+ calc['created_at'] = pd.to_datetime(calc['created_at'], errors='coerce')
2108
+ calc = calc.assign(month=calc['created_at'].dt.to_period('M').astype(str))
2109
+ monthly_counts = calc.groupby(['nama', 'month'])['kode_temuan'].nunique().reset_index()
2110
+ avg_count = monthly_counts.groupby('nama')['kode_temuan'].mean().reset_index(name='Finding Count')
2111
+ leadtime = calc.groupby('nama')['days_to_close'].mean().reset_index(name='Avg Lead Time')
2112
+ mat = avg_count.merge(leadtime, on='nama', how='left').fillna(0)
2113
+ for _, r in mat.iterrows():
2114
+ if r['Finding Count'] >= X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
 
 
 
 
2115
  dev["obj5_q1_divs"].append(r['nama'])
2116
+ elif r['Finding Count'] < X_LIMIT and r['Avg Lead Time'] >= Y_LIMIT:
2117
  dev["obj5_q2_divs"].append(r['nama'])
2118
 
2119
+ # === 5. Top categories
2120
  if {'kategori', 'temuan_kategori', 'created_at'}.issubset(df.columns):
2121
+ nonpos = df[df['temuan_kategori'] != 'Positive']
2122
+ if not nonpos.empty:
2123
+ start = nonpos['created_at'].min().to_period('M')
2124
+ end = nonpos['created_at'].max().to_period('M')
2125
+ n_months = len(pd.period_range(start=start, end=end, freq='M'))
2126
+ cat_avg = (nonpos.groupby('kategori').size() / n_months).sort_values(ascending=False).head(2)
2127
+ dev["obj6_top2_categories"] = [(cat, round(val, 1)) for cat, val in cat_avg.items()]
2128
 
2129
  return dev
2130
 
2131
 
2132
+ # ==========================
2133
+ # ===== MAIN APP ===========
2134
+ # ==========================
2135
 
2136
+ st.markdown("<h3 class='section-title'>OBJECTIVE 7 — Insight and Recommendation (LLM powered)</h3>", unsafe_allow_html=True)
2137
 
2138
+ dev = extract_agentic_insights_v5(df_filtered)
 
 
2139
 
2140
+ # Build INSIGHT SUMMARY as text for LLM
2141
+ summary_parts = []
2142
 
2143
+ if dev["lowest_ratio_9_locs"]:
2144
+ summary_parts.append(f"Lowest-ratio locations: {dev['lowest_ratio_9_locs']}")
2145
 
2146
+ if dev["obj3a_lowest_div"]:
2147
+ summary_parts.append(f"Lowest performing division: {dev['obj3a_lowest_div']}")
2148
 
2149
+ if dev["obj3b_slowest_executor"]:
2150
+ summary_parts.append(f"Slowest executor: {dev['obj3b_slowest_executor']}")
 
2151
 
2152
+ if dev["obj3c_lowest_reporter"]:
2153
+ summary_parts.append(f"Least active reporter: {dev['obj3c_lowest_reporter']}")
2154
 
2155
+ if dev["obj3d_slowest_div"]:
2156
+ summary_parts.append(f"Slowest division resolution: {dev['obj3d_slowest_div']}")
 
 
 
 
2157
 
2158
+ uc, ua, nm = dev["obj4_unsafe_condition_pct"], dev["obj4_unsafe_action_pct"], dev["obj4_near_miss_pct"]
2159
+ summary_parts.append(f"Non-Positive: UnsafeCondition={uc}%, UnsafeAction={ua}%, NearMiss={nm}%")
2160
 
2161
+ summary_parts.append(f"Quadrant I: {dev['obj5_q1_divs']}")
2162
+ summary_parts.append(f"Quadrant II: {dev['obj5_q2_divs']}")
2163
+
2164
+ if dev["obj6_top2_categories"]:
2165
+ summary_parts.append(f"Top non-positive categories: {dev['obj6_top2_categories']}")
2166
 
2167
+ insight_summary_text = "\n".join(summary_parts)
 
 
 
2168
 
2169
+ # Call LLM to generate recommendation + mitigation
2170
+ llm_json = llm_generate_recommendation(insight_summary_text)
2171
+
2172
+ # Try to parse JSON
2173
+ import json
2174
+ try:
2175
+ llm_output = json.loads(llm_json)
2176
+ recommendation = llm_output["recommendation"]
2177
+ mitigation = llm_output["mitigation"]
2178
+ except:
2179
+ recommendation = "LLM output not valid JSON."
2180
+ mitigation = "-"
2181
+
2182
+ # Render
2183
  st.markdown(
2184
  f"""
2185
+ <div style="padding:15px; background:#eef3ff; border-left:4px solid #003DA5;">
2186
+ <h4>Insight Summary</h4>
2187
+ <pre style="white-space: pre-wrap;">{insight_summary_text}</pre>
2188
+ </div>
2189
+ """,
2190
+ unsafe_allow_html=True,
2191
+ )
 
 
 
 
 
 
 
 
2192
 
2193
  st.markdown(
2194
  f"""
2195
+ <div style="padding:15px; background:#e8f5e9; border-left:4px solid #4CAF50; margin-top:20px;">
2196
+ <h4>LLM Recommended Action</h4>
2197
+ <p>{recommendation}</p>
2198
+ <h4>LLM Risk Mitigation</h4>
2199
+ <p>{mitigation}</p>
 
 
 
 
 
 
 
 
 
2200
  </div>
2201
  """,
2202
+ unsafe_allow_html=True,
2203
  )