SHELLAPANDIANGANHUNGING commited on
Commit
06018d0
Β·
verified Β·
1 Parent(s): a4b0e9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +800 -83
app.py CHANGED
@@ -1,104 +1,821 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
 
 
4
 
5
- # =============== STREAMLIT CONFIG ==================
6
  st.set_page_config(
7
- page_title="Fatigue Camera Dashboard",
 
8
  layout="wide",
9
- page_icon="😴"
10
  )
11
 
12
- # =============== APP HEADER =========================
13
- st.title("πŸ˜΄πŸ“Š Fatigue & Distraction Camera Safety Dashboard")
14
- st.write("Upload data dari fatigue monitoring system untuk analisis safety.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # =============== FILE UPLOADER ======================
17
- uploaded_file = st.file_uploader("πŸ“‚ Upload File (Excel)", type=["xlsx", "xls"])
18
 
19
- if uploaded_file:
20
- df = pd.read_excel(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- st.subheader("πŸ“„ Data Preview")
23
- st.dataframe(df, use_container_width=True)
24
-
25
- # =============== CLEANING OTOMATIS ===============
26
- df.columns = df.columns.str.lower().str.replace(" ", "_").str.strip()
27
-
28
- # Deteksi kolom penting
29
- col_time = next((c for c in df.columns if "time" in c or "timestamp" in c or "date" in c), None)
30
- col_operator = next((c for c in df.columns if "operator" in c or "driver" in c or "employee" in c), None)
31
- col_fatigue = next((c for c in df.columns if "fatigue" in c), None)
32
- col_distraction = next((c for c in df.columns if "distract" in c), None)
33
-
34
- st.write("🧠 Kolom terdeteksi:")
35
- st.code(f"""
36
- Time Column: {col_time}
37
- Operator Column: {col_operator}
38
- Fatigue Column: {col_fatigue}
39
- Distraction Column: {col_distraction}
40
- """)
41
-
42
- # =============== KPI SUMMARY =====================
43
- st.divider()
44
- st.subheader("πŸ“Œ Safety Event Summary")
45
-
46
- cols = st.columns(3)
47
-
48
- if col_fatigue:
49
- total_fatigue = df[col_fatigue].sum()
50
- cols[0].metric("Total Fatigue Alerts", total_fatigue)
51
-
52
- if col_distraction:
53
- total_distraction = df[col_distraction].sum()
54
- cols[1].metric("Total Distraction Alerts", total_distraction)
55
-
56
- if col_operator:
57
- operator_count = df[col_operator].nunique()
58
- cols[2].metric("Operators Monitored", operator_count)
59
-
60
- # =============== TREND CHART =====================
61
- if col_time and col_fatigue:
62
- df[col_time] = pd.to_datetime(df[col_time], errors="coerce")
63
-
64
- fig = px.line(
65
- df.sort_values(col_time),
66
- x=col_time,
67
- y=col_fatigue,
68
- title="πŸ“ˆ Trend Fatigue Events Over Time"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  )
70
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
71
 
72
- # =============== OPERATOR RANKING =================
73
- if col_operator and col_fatigue:
74
- st.subheader("πŸ† Operator Ranking (Based on Fatigue Events)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- rank_df = df.groupby(col_operator)[col_fatigue].sum().sort_values(ascending=False)
77
- st.bar_chart(rank_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # =============== AUTO INSIGHT =====================
80
- st.divider()
81
- st.subheader("🧩 AI Insight (Auto Summary)")
 
82
 
83
- insight = []
 
 
 
 
84
 
85
- if col_time and col_fatigue:
86
- peak_hour = df.groupby(df[col_time].dt.hour)[col_fatigue].sum().idxmax()
87
- insight.append(f"⏰ **Jam paling rawan:** {peak_hour}:00")
 
 
 
 
88
 
89
- if col_operator:
90
- worst_operator = rank_df.index[0]
91
- insight.append(f"⚠️ Operator dengan alert tertinggi: **{worst_operator}** β†’ perlu coaching / fatigue management.")
 
 
 
 
92
 
93
- if col_distraction and df[col_distraction].sum() > df[col_fatigue].sum():
94
- insight.append("🚨 Distraction lebih sering daripada fatigue β€” kemungkinan faktor lingkungan atau policy.")
 
95
 
96
- for i in insight:
97
- st.write(i)
98
 
99
- # ================= HUGGING FACE FIX ==================
100
- if __name__ == "__main__":
101
- import streamlit.web.cli as stcli
102
- import sys
103
- sys.argv = ["streamlit", "run", "app.py", "--server.enableCORS=false", "--server.enableXsrfProtection=false"]
104
- sys.exit(stcli.main())
 
1
+ ```python
2
  import streamlit as st
3
  import pandas as pd
4
  import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from datetime import datetime, timedelta
7
 
8
+ # =================== CONFIG =====================
9
  st.set_page_config(
10
+ page_title="MineVision AI - Advanced Fatigue Analytics",
11
+ page_icon="⛏️",
12
  layout="wide",
13
+ initial_sidebar_state="expanded"
14
  )
15
 
16
+ # Custom CSS for professional look
17
+ st.markdown("""
18
+ <style>
19
+ .main-header {
20
+ background-color: #003366;
21
+ padding: 20px;
22
+ border-radius: 10px;
23
+ color: white;
24
+ text-align: center;
25
+ margin-bottom: 20px;
26
+ }
27
+ .metric-card {
28
+ background-color: #f0f8ff;
29
+ padding: 15px;
30
+ border-radius: 8px;
31
+ border-left: 5px solid #003366;
32
+ }
33
+ .insight-box {
34
+ background-color: #f9f9f9;
35
+ padding: 15px;
36
+ border-radius: 8px;
37
+ border-left: 5px solid #ff6b6b;
38
+ margin: 10px 0;
39
+ }
40
+ .footer {
41
+ text-align: center;
42
+ padding: 20px;
43
+ color: gray;
44
+ font-size: 0.9em;
45
+ }
46
+ .risk-matrix {
47
+ border-collapse: collapse;
48
+ width: 100%;
49
+ margin: 20px 0;
50
+ }
51
+ .risk-matrix th, .risk-matrix td {
52
+ border: 1px solid #ddd;
53
+ padding: 8px;
54
+ text-align: left;
55
+ }
56
+ .risk-matrix th {
57
+ background-color: #f2f2f2;
58
+ }
59
+ .critical { background-color: #ffcccc; }
60
+ .high { background-color: #ffebcc; }
61
+ .medium { background-color: #ffffcc; }
62
+ .low { background-color: #e6ffe6; }
63
+ .chat-container {
64
+ background-color: white;
65
+ padding: 15px;
66
+ border-radius: 10px;
67
+ height: 400px;
68
+ overflow-y: auto;
69
+ margin-top: 20px;
70
+ border: 1px solid #ccc;
71
+ }
72
+ .user-message {
73
+ background-color: #e3f2fd;
74
+ color: black;
75
+ padding: 10px;
76
+ border-radius: 10px;
77
+ margin: 10px 0;
78
+ text-align: right;
79
+ border: 1px solid #bbdefb;
80
+ }
81
+ .ai-message {
82
+ background-color: #f5f5f5;
83
+ color: black;
84
+ padding: 10px;
85
+ border-radius: 10px;
86
+ margin: 10px 0;
87
+ text-align: left;
88
+ border: 1px solid #e0e0e0;
89
+ }
90
+ .chat-box {
91
+ background-color: white;
92
+ border: 1px solid #ccc;
93
+ border-radius: 8px;
94
+ padding: 10px;
95
+ margin-top: 10px;
96
+ width: 100%;
97
+ }
98
+ .user-question {
99
+ background-color: white;
100
+ border: 1px solid #ccc;
101
+ border-radius: 8px;
102
+ padding: 10px;
103
+ margin-bottom: 10px;
104
+ }
105
+ .ai-answer {
106
+ background-color: #f0f0f0;
107
+ border: 1px solid #ccc;
108
+ border-radius: 8px;
109
+ padding: 10px;
110
+ margin-bottom: 10px;
111
+ }
112
+ </style>
113
+ """, unsafe_allow_html=True)
114
 
115
+ # Header
116
+ st.markdown('<div class="main-header"><h1>Safety Analysis and AI - Advanced Fatigue Analysis</h1><p>Proactive Safety Intelligence for Mining Operations</p></div>', unsafe_allow_html=True)
117
 
118
+ # =================== CHAT AI SECTION =====================
119
+ st.subheader("MineVision AI Assistant")
120
+
121
+ # Initialize session state for chat
122
+ if 'chat_history' not in st.session_state:
123
+ st.session_state.chat_history = []
124
+
125
+ # Display chat history in a fancy box with white background
126
+ st.markdown('<div class="chat-container">', unsafe_allow_html=True)
127
+ for message in st.session_state.chat_history:
128
+ if message['role'] == 'user':
129
+ st.markdown(f'<div class="user-message">You: {message["content"]}</div>', unsafe_allow_html=True)
130
+ else:
131
+ st.markdown(f'<div class="ai-message">MineVision AI: {message["content"]}</div>', unsafe_allow_html=True)
132
+ st.markdown('</div>', unsafe_allow_html=True)
133
+
134
+ # Input for user question
135
+ user_input = st.text_input("Ask a question about the fatigue data...", key="chat_input")
136
+
137
+ if st.button("Send", key="send_button"):
138
+ if user_input:
139
+ # Add user message to history
140
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
141
+
142
+ # Process the question and generate response based on data
143
+ response = ""
144
+ user_input_lower = user_input.lower()
145
+
146
+ # Improved RAG responses based on data analysis and Wenco insights
147
+ if "operator" in user_input_lower and ("sering" in user_input_lower or "banyak" in user_input_lower or "most" in user_input_lower or "highest" in user_input_lower):
148
+ if col_operator and not df.empty:
149
+ top_operator = df[col_operator].value_counts().idxmax()
150
+ count = df[col_operator].value_counts().iloc[0]
151
+ total_alerts = len(df)
152
+ percentage = (count / total_alerts) * 100
153
+ response = f"Operator dengan jumlah kejadian ngantuk paling banyak adalah **{top_operator}** dengan **{count}** kejadian ({percentage:.1f}% dari total {total_alerts} kejadian)."
154
+ else:
155
+ response = "Tidak ada data operator yang tersedia."
156
+ elif "shift" in user_input_lower and ("banyak" in user_input_lower or "most" in user_input_lower or "highest" in user_input_lower):
157
+ if col_shift and not df.empty:
158
+ top_shift = df[col_shift].value_counts().idxmax()
159
+ count = df[col_shift].value_counts().iloc[0]
160
+ total_alerts = len(df)
161
+ percentage = (count / total_alerts) * 100
162
+ response = f"Shift dengan jumlah kejadian ngantuk paling banyak adalah **Shift {top_shift}** dengan **{count}** kejadian ({percentage:.1f}% dari total {total_alerts} kejadian)."
163
+ else:
164
+ response = "Tidak ada data shift yang tersedia."
165
+ elif "jam" in user_input_lower and ("banyak" in user_input_lower or "most" in user_input_lower or "highest" in user_input_lower or "sering" in user_input_lower):
166
+ if "hour" in df.columns and not df.empty:
167
+ top_hour = df["hour"].value_counts().idxmax()
168
+ count = df["hour"].value_counts().iloc[0]
169
+ total_alerts = len(df)
170
+ percentage = (count / total_alerts) * 100
171
+ response = f"Jam dengan jumlah kejadian ngantuk paling banyak adalah pukul **{top_hour}:00** dengan **{count}** kejadian ({percentage:.1f}% dari total {total_alerts} kejadian)."
172
+ else:
173
+ response = "Tidak ada data jam yang tersedia."
174
+ elif "fleet" in user_input_lower and ("banyak" in user_input_lower or "most" in user_input_lower or "highest" in user_input_lower):
175
+ if col_fleet_type and not df.empty:
176
+ top_fleet = df[col_fleet_type].value_counts().idxmax()
177
+ count = df[col_fleet_type].value_counts().iloc[0]
178
+ total_alerts = len(df)
179
+ percentage = (count / total_alerts) * 100
180
+ response = f"Fleet type dengan jumlah kejadian ngantuk paling banyak adalah **{top_fleet}** dengan **{count}** kejadian ({percentage:.1f}% dari total {total_alerts} kejadian)."
181
+ else:
182
+ response = "Tidak ada data fleet type yang tersedia."
183
+ elif "total" in user_input_lower and "alert" in user_input_lower:
184
+ response = f"Total kejadian fatigue alert adalah **{len(df)}**."
185
+ elif "average" in user_input_lower and ("duration" in user_input_lower or "lama" in user_input_lower):
186
+ if "duration_sec" in df.columns and not df.empty:
187
+ avg_duration = df["duration_sec"].mean()
188
+ response = f"Rata-rata durasi kejadian fatigue adalah **{avg_duration:.2f} detik**."
189
+ else:
190
+ response = "Tidak ada data durasi yang tersedia."
191
+ elif "risk" in user_input_lower and ("category" in user_input_lower or "level" in user_input_lower):
192
+ if 'risk_category' in df.columns and not df.empty:
193
+ risk_counts = df['risk_category'].value_counts()
194
+ total_alerts = len(df)
195
+ response = f"Kategori risiko kelelahan:\n"
196
+ for category, count in risk_counts.items():
197
+ percentage = (count / total_alerts) * 100
198
+ response += f"- {category}: {count} kejadian ({percentage:.1f}% dari total)\n"
199
+ else:
200
+ response = "Tidak ada data kategori risiko yang tersedia."
201
+ elif "speed" in user_input_lower and ("high" in user_input_lower or "fast" in user_input_lower):
202
+ if col_speed and not df.empty:
203
+ high_speed_threshold = df[col_speed].quantile(0.75)
204
+ high_speed_count = len(df[df[col_speed] >= high_speed_threshold])
205
+ total_alerts = len(df)
206
+ percentage = (high_speed_count / total_alerts) * 100
207
+ response = f"Jumlah kejadian fatigue pada kecepatan tinggi (> {high_speed_threshold:.0f} km/h) adalah **{high_speed_count}** kejadian ({percentage:.1f}% dari total {total_alerts} kejadian)."
208
+ else:
209
+ response = "Tidak ada data kecepatan yang tersedia."
210
+ elif "critical" in user_input_lower and "hour" in user_input_lower:
211
+ critical_hours = [2, 3, 4, 5]
212
+ critical_alerts = df[df['hour'].isin(critical_hours)]
213
+ total_alerts = len(df)
214
+ percentage = (len(critical_alerts) / total_alerts) * 100 if total_alerts > 0 else 0
215
+ response = f"Jumlah kejadian fatigue pada jam kritis (2-5 AM) adalah **{len(critical_alerts)}** kejadian ({percentage:.1f}% dari total {total_alerts} kejadian)."
216
+ elif "madar" in user_input_lower:
217
+ if col_operator and not df.empty:
218
+ # Check if "Madar" is an operator in the data
219
+ madar_data = df[df[col_operator].str.contains('Madar', case=False, na=False)]
220
+ if not madar_data.empty:
221
+ madar_count = len(madar_data)
222
+ total_alerts = len(df)
223
+ percentage = (madar_count / total_alerts) * 100
224
+ response = f"Operator **Madar** tercatat memiliki **{madar_count}** kejadian ngantuk ({percentage:.1f}% dari total {total_alerts} kejadian)."
225
+ else:
226
+ response = "Operator 'Madar' tidak ditemukan dalam data."
227
+ else:
228
+ response = "Tidak ada data operator yang tersedia."
229
+ else:
230
+ # Improved fallback response with more context
231
+ context_info = []
232
+ if col_operator:
233
+ context_info.append(f"Operator: {df[col_operator].nunique() if not df.empty else 0} unik")
234
+ if col_shift:
235
+ context_info.append(f"Shift: {sorted(df[col_shift].dropna().unique()) if not df.empty else []}")
236
+ if "hour" in df.columns:
237
+ context_info.append(f"Jam: {min(df['hour']) if not df.empty and not df['hour'].isna().all() else 0}-{max(df['hour']) if not df.empty and not df['hour'].isna().all() else 23}")
238
+ if col_fleet_type:
239
+ context_info.append(f"Fleet: {df[col_fleet_type].nunique() if not df.empty else 0} jenis")
240
+ if "duration_sec" in df.columns:
241
+ context_info.append(f"Durasi: rata-rata {df['duration_sec'].mean():.2f} detik")
242
+ if col_speed:
243
+ context_info.append(f"Kecepatan: hingga {df[col_speed].max() if not df.empty and not df[col_speed].isna().all() else 0} km/h")
244
+
245
+ context_str = ", ".join(context_info)
246
+ response = f"Pertanyaan Anda tidak dapat diproses. Silakan tanyakan tentang operator, shift, jam, fleet type, total alert, durasi, kategori risiko, kecepatan tinggi, atau jam kritis. Data saat ini mencakup: {context_str}."
247
+
248
+ # Add AI response to history
249
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
250
+
251
+ # Rerun to update the chat display
252
+ st.rerun()
253
+
254
+
255
+ # =================== LOAD DATA ======================
256
+ @st.cache_data
257
+ def load_data():
258
+ # Load data from the uploaded file
259
+ try:
260
+ df = pd.read_excel('manual fatique.xlsx', sheet_name=None, engine="openpyxl")
261
+
262
+ # If the file has multiple sheets, concatenate them
263
+ if isinstance(df, dict):
264
+ df = pd.concat(df.values(), ignore_index=True)
265
+
266
+ df.columns = df.columns.astype(str).str.strip().str.lower().str.replace(" ", "_")
267
+
268
+ # auto detect important columns
269
+ col_operator = next((c for c in df.columns if "operator" in c or "driver" in c), None)
270
+ col_shift = next((c for c in df.columns if "shift" in c), None)
271
+ col_asset = next((c for c in df.columns if "asset" in c or "vehicle" in c or "fleet" in c), None)
272
+ col_fleet_type = next((c for c in df.columns if "parent_fleet" in c), None)
273
+ col_speed = next((c for c in df.columns if "speed" in c or "km/h" in c), None)
274
+
275
+ # detect timestamps (using the actual column names from the provided file)
276
+ start_time_cols = [c for c in df.columns if "gmt" in c.lower() and "wita" in c.lower()]
277
+ # Assuming the first one is start and the second is end
278
+ if len(start_time_cols) >= 2:
279
+ df["start"] = pd.to_datetime(df[start_time_cols[0]], errors="coerce")
280
+ df["end"] = pd.to_datetime(df[start_time_cols[1]], errors="coerce")
281
+ elif len(start_time_cols) == 1:
282
+ # If only one time column, assume it's start time and set end time to start + 1 minute as a placeholder
283
+ df["start"] = pd.to_datetime(df[start_time_cols[0]], errors="coerce")
284
+ df["end"] = df["start"] + pd.Timedelta(minutes=1)
285
+
286
+ df["duration_sec"] = (df["end"] - df["start"]).dt.total_seconds()
287
+ df["hour"] = df["start"].dt.hour
288
+ df["date"] = df["start"].dt.date # Add date column for filtering
289
+ df["day_of_week"] = df["start"].dt.day_name() # Add day of week for analysis
290
+ df["week"] = df["start"].dt.isocalendar().week # Add week for trend analysis
291
+ df["month"] = df["start"].dt.month # Add month for filtering
292
+ df["year"] = df["start"].dt.year # Add year for filtering
293
+
294
+ # Ensure shift is integer type and handle potential decimal values by rounding
295
+ if col_shift:
296
+ # Convert to numeric, then round to nearest integer, then convert to int64 to remove decimals
297
+ df[col_shift] = pd.to_numeric(df[col_shift], errors='coerce').round().astype('Int64')
298
+
299
+ return df, col_operator, col_shift, col_asset, col_fleet_type, col_speed
300
+ except FileNotFoundError:
301
+ st.error("File 'manual fatique.xlsx' not found. Please check the file path.")
302
+ return pd.DataFrame(), None, None, None, None, None
303
+ except Exception as e:
304
+ st.error(f"Error loading {e}")
305
+ return pd.DataFrame(), None, None, None, None, None
306
+
307
+
308
+ df, col_operator, col_shift, col_asset, col_fleet_type, col_speed = load_data()
309
+
310
+ if df.empty:
311
+ st.stop()
312
+
313
+ st.success("Data Loaded Successfully")
314
+
315
+ # =================== FILTERS (Sidebar) =====================
316
+ st.sidebar.header("Filters")
317
+
318
+ # Year Filter
319
+ if 'year' in df.columns:
320
+ all_years = sorted(df['year'].dropna().unique())
321
+ selected_years = st.sidebar.multiselect(
322
+ "Select Year (Leave blank for All)",
323
+ options=all_years,
324
+ default=all_years # Default to all if none selected
325
+ )
326
+ if selected_years:
327
+ df = df[df['year'].isin(selected_years)]
328
+
329
+ # Month Filter
330
+ if 'month' in df.columns:
331
+ all_months = sorted(df['month'].dropna().unique())
332
+ selected_months = st.sidebar.multiselect(
333
+ "Select Month (Leave blank for All)",
334
+ options=all_months,
335
+ default=all_months # Default to all if none selected
336
+ )
337
+ if selected_months:
338
+ df = df[df['month'].isin(selected_months)]
339
+
340
+ # Week Filter
341
+ if 'week' in df.columns:
342
+ all_weeks = sorted(df['week'].dropna().unique())
343
+ selected_weeks = st.sidebar.multiselect(
344
+ "Select Week (Leave blank for All)",
345
+ options=all_weeks,
346
+ default=all_weeks # Default to all if none selected
347
+ )
348
+ if selected_weeks:
349
+ df = df[df['week'].isin(selected_weeks)]
350
+
351
+ # Date Range Filter: Default to "All" if no specific range is selected
352
+ if 'date' in df.columns:
353
+ min_date = df['date'].min()
354
+ max_date = df['date'].max()
355
+ # Set default value to the full range initially
356
+ date_range_default = (min_date, max_date)
357
+
358
+ date_range_input = st.sidebar.date_input(
359
+ "Select Date Range (Leave blank for All)",
360
+ value=date_range_default, # Default to full range
361
+ min_value=min_date,
362
+ max_value=max_date
363
+ )
364
+
365
+ # Check if date_range_input is empty (user cleared the dates) or default full range is kept without interaction
366
+ if not date_range_input or (len(date_range_input) == 2 and date_range_input[0] == min_date and date_range_input[1] == max_date):
367
+ # If empty tuple or default full range, set to actual full range and mark as not explicitly filtered
368
+ date_range = (min_date, max_date)
369
+ date_filtered = False
370
+ else:
371
+ # If user selected a specific range, use it
372
+ date_range = tuple(date_range_input)
373
+ date_filtered = True
374
+ # Apply date filter
375
+ df = df[(df['date'] >= date_range[0]) & (df['date'] <= date_range[1])]
376
+
377
+ # Operator Filter (with search functionality)
378
+ if col_operator:
379
+ all_operators = sorted(df[col_operator].dropna().unique())
380
+ # Use multiselect with search functionality
381
+ selected_operators = st.sidebar.multiselect(
382
+ f"Select {col_operator.replace('_', ' ').title()} (Leave blank for All)",
383
+ options=all_operators,
384
+ default=all_operators, # Default to all if none selected
385
+ format_func=lambda x: x # Format function for better display
386
+ )
387
+ if selected_operators:
388
+ df = df[df[col_operator].isin(selected_operators)]
389
+
390
+ # Shift Filter (with search functionality) - Ensure integers
391
+ if col_shift:
392
+ all_shifts = sorted(df[col_shift].dropna().unique())
393
+ # Use multiselect with search functionality
394
+ selected_shifts = st.sidebar.multiselect(
395
+ f"Select {col_shift.replace('_', ' ').title()} (Leave blank for All)",
396
+ options=all_shifts,
397
+ default=all_shifts, # Default to all if none selected
398
+ )
399
+ if selected_shifts:
400
+ df = df[df[col_shift].isin(selected_shifts)]
401
+
402
+ # Hour Range Filter
403
+ all_hours = sorted(df['hour'].dropna().unique())
404
+ if len(all_hours) > 0:
405
+ hour_range = st.sidebar.slider(
406
+ "Select Hour Range (Leave at full range for All)",
407
+ min_value=int(min(all_hours)),
408
+ max_value=int(max(all_hours)),
409
+ value=(int(min(all_hours)), int(max(all_hours))),
410
+ step=1
411
+ )
412
+ if hour_range != (int(min(all_hours)), int(max(all_hours))):
413
+ df = df[(df['hour'] >= hour_range[0]) & (df['hour'] <= hour_range[1])]
414
+ else:
415
+ # Handle case where there are no hours
416
+ st.sidebar.text("No hour data available")
417
+ hour_range = (0, 23)
418
+
419
+
420
+ # =================== FATIGUE RISK CATEGORIZATION =====================
421
+ st.subheader("Fatigue Risk Categorization")
422
+
423
+ # Define risk categories based on the provided matrix
424
+ if col_speed and "hour" in df.columns:
425
+ # Create risk category column based on the matrix
426
+ df['risk_category'] = df.apply(lambda row:
427
+ 'Critical' if (row[col_speed] > df[col_speed].quantile(0.75) and row['hour'] in [2, 3, 4, 5]) else
428
+ 'High' if (row[col_speed] > df[col_speed].quantile(0.5) and row['hour'] in [2, 3, 4, 5]) else
429
+ 'Medium' if (row[col_speed] > df[col_speed].quantile(0.25) and row['hour'] in [2, 3, 4, 5]) else
430
+ 'Low' if (row[col_speed] <= df[col_speed].quantile(0.25) and row['hour'] not in [2, 3, 4, 5]) else
431
+ 'Medium', axis=1) # Default to medium for other cases
432
+
433
+ # Count alerts by risk category
434
+ risk_counts = df['risk_category'].value_counts().reindex(['Critical', 'High', 'Medium', 'Low'])
435
+
436
+ # Create a bar chart showing the distribution of risk categories
437
+ fig_risk = px.bar(
438
+ x=risk_counts.index,
439
+ y=risk_counts.values,
440
+ title="Fatigue Risk Categories Distribution",
441
+ labels={'x': 'Risk Category', 'y': 'Number of Alerts'},
442
+ color=risk_counts.index,
443
+ color_discrete_map={'Critical': 'red', 'High': 'orange', 'Medium': 'yellow', 'Low': 'green'}
444
+ )
445
+ fig_risk.update_layout(
446
+ xaxis_title="Risk Category",
447
+ yaxis_title="Number of Alerts",
448
+ height=400
449
+ )
450
+ # Add legend to explain each category
451
+ fig_risk.update_layout(
452
+ legend_title_text="Risk Level",
453
+ legend=dict(
454
+ orientation="v",
455
+ yanchor="top",
456
+ y=1,
457
+ xanchor="left",
458
+ x=1.02
459
+ )
460
+ )
461
+ # Add annotations to explain what each risk level means
462
+ for i, (cat, count) in enumerate(risk_counts.items()):
463
+ if cat == 'Critical':
464
+ fig_risk.add_annotation(
465
+ x=cat,
466
+ y=count + 1,
467
+ text="High fatigue + high-speed haul road",
468
+ showarrow=False,
469
+ font=dict(size=10),
470
+ bgcolor="red",
471
+ opacity=0.8
472
+ )
473
+ elif cat == 'High':
474
+ fig_risk.add_annotation(
475
+ x=cat,
476
+ y=count + 1,
477
+ text="Moderate fatigue + decline haul road",
478
+ showarrow=False,
479
+ font=dict(size=10),
480
+ bgcolor="orange",
481
+ opacity=0.8
482
+ )
483
+ elif cat == 'Medium':
484
+ fig_risk.add_annotation(
485
+ x=cat,
486
+ y=count + 1,
487
+ text="High fatigue + low-risk task",
488
+ showarrow=False,
489
+ font=dict(size=10),
490
+ bgcolor="yellow",
491
+ opacity=0.8
492
+ )
493
+ elif cat == 'Low':
494
+ fig_risk.add_annotation(
495
+ x=cat,
496
+ y=count + 1,
497
+ text="Low fatigue + non-hazard task",
498
+ showarrow=False,
499
+ font=dict(size=10),
500
+ bgcolor="green",
501
+ opacity=0.8
502
+ )
503
 
504
+ st.plotly_chart(fig_risk, width="stretch")
505
+
506
+
507
+ # =================== KPI METRICS =====================
508
+ st.subheader("Executive Safety Dashboard")
509
+
510
+ col1, col2, col3, col4 = st.columns(4)
511
+
512
+ col1.metric("Total Alerts", f"{len(df):,}")
513
+ col2.metric("Operators", df[col_operator].nunique() if col_operator else "-")
514
+ col3.metric("Qty Equipment", df[col_asset].nunique() if col_asset else "-") # Changed from "Assets" to "Qty Equipment"
515
+ col4.metric("Avg Duration (sec)", round(df["duration_sec"].mean(),2) if "duration_sec" in df.columns else "N/A")
516
+
517
+
518
+ # =================== TREND ANALYTICS =====================
519
+ st.subheader("Fatigue Trend Analysis")
520
+
521
+ # Hourly
522
+ fig_hour = px.bar(
523
+ df.groupby("hour").size().reset_index(name="alerts"),
524
+ x="hour", y="alerts",
525
+ title="Fatigue Alerts by Hour"
526
+ )
527
+ st.plotly_chart(fig_hour, width="stretch")
528
+
529
+ # Shift-Based
530
+ if col_shift:
531
+ fig_shift = px.bar(
532
+ df.groupby(col_shift).size().reset_index(name="alerts"),
533
+ x=col_shift, y="alerts",
534
+ title="Fatigue Distribution by Shift"
535
+ )
536
+ # Force the x-axis (shift) to be categorical to avoid decimal labels
537
+ fig_shift.update_xaxes(type='category')
538
+ st.plotly_chart(fig_shift, width="stretch")
539
+
540
+ # hour inside shift heatmap
541
+ heat_df = df.groupby([col_shift, "hour"]).size().reset_index(name="alerts")
542
+
543
+ fig_heat = px.density_heatmap(
544
+ heat_df,
545
+ x="hour", y=col_shift, z="alerts",
546
+ title="Heatmap Fatigue by Shift & Hour",
547
+ color_continuous_scale="reds"
548
+ )
549
+ # Force the y-axis (shift) to be categorical to avoid decimal labels
550
+ fig_heat.update_yaxes(type='category')
551
+ st.plotly_chart(fig_heat, width="stretch")
552
+
553
+
554
+ # Operator Ranking
555
+ if col_operator:
556
+ operator_counts = df[col_operator].value_counts().reset_index()
557
+ operator_counts.columns = ["operator", "alerts"]
558
+ fig_operator = px.bar(
559
+ operator_counts,
560
+ x="operator", y="alerts",
561
+ title="Top Fatigue Alerts by Operator"
562
+ )
563
+ st.plotly_chart(fig_operator, width="stretch")
564
+
565
+
566
+ # =================== NEW CHARTS (Based on Mining Fatigue Factors) =====================
567
+ st.subheader("Advanced Mining Fatigue Analytics")
568
+
569
+ # 1. Day of Week Analysis (Workload Pattern)
570
+ if 'day_of_week' in df.columns:
571
+ day_counts = df['day_of_week'].value_counts().reindex(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
572
+ fig_day = px.bar(
573
+ day_counts,
574
+ x=day_counts.index, y=day_counts.values,
575
+ title="Fatigue Alerts by Day of Week (Workload Pattern)"
576
+ )
577
+ st.plotly_chart(fig_day, width="stretch")
578
+
579
+ # 2. Fleet Type Analysis (Task & Workload)
580
+ if col_fleet_type:
581
+ fleet_counts = df[col_fleet_type].value_counts().reset_index()
582
+ fleet_counts.columns = [col_fleet_type, "alerts"]
583
+ fig_fleet = px.bar(
584
+ fleet_counts,
585
+ x=col_fleet_type, y="alerts",
586
+ title="Fatigue Alerts by Fleet Type (Task Complexity)"
587
+ )
588
+ st.plotly_chart(fig_fleet, width="stretch")
589
+
590
+ # 3. Speed vs Hour Analysis (Environmental Factors & Workload)
591
+ if col_speed and "hour" in df.columns:
592
+ # Remove rows with NaN speed values for this analysis
593
+ speed_df = df.dropna(subset=[col_speed])
594
+ if not speed_df.empty:
595
+ fig_speed_hour = px.scatter(
596
+ speed_df,
597
+ x="hour", y=col_speed,
598
+ title="Speed vs Hour of Day (Fatigue Events) - Environmental Factor",
599
+ hover_data=[col_operator, col_asset]
600
  )
601
+ st.plotly_chart(fig_speed_hour, width="stretch")
602
+
603
+ # 4. Duration vs Hour Analysis (Physiological Response)
604
+ if "duration_sec" in df.columns and "hour" in df.columns:
605
+ fig_duration_hour = px.scatter(
606
+ df,
607
+ x="hour", y="duration_sec",
608
+ title="Fatigue Event Duration vs Hour of Day (Physiological Response)",
609
+ hover_data=[col_operator, col_asset]
610
+ )
611
+ st.plotly_chart(fig_duration_hour, width="stretch")
612
 
613
+ # 5. Operator vs Shift Analysis (Shift Pattern Risk)
614
+ if col_operator and col_shift:
615
+ op_shift_counts = df.groupby([col_operator, col_shift]).size().reset_index(name="alerts")
616
+ fig_op_shift = px.bar(
617
+ op_shift_counts,
618
+ x=col_operator, y="alerts", color=col_shift,
619
+ title="Operator Fatigue Distribution by Shift (Shift Pattern Risk)"
620
+ )
621
+ st.plotly_chart(fig_op_shift, width="stretch")
622
+
623
+ # 6. Weekly Trend Analysis (Recovery Pattern) - With Color by Shift
624
+ if 'week' in df.columns and col_shift:
625
+ # Create a new column for the legend
626
+ df['shift_legend'] = df[col_shift].apply(lambda x: f"Shift {x}")
627
+
628
+ # Group by week and shift
629
+ weekly_shift_trend = df.groupby(['week', 'shift_legend']).size().reset_index(name='alerts')
630
+
631
+ fig_weekly = px.line(
632
+ weekly_shift_trend,
633
+ x='week', y='alerts',
634
+ color='shift_legend',
635
+ title="Weekly Fatigue Trend by Shift (Recovery Pattern)",
636
+ markers=True
637
+ )
638
+ # Customize colors for each shift
639
+ if len(weekly_shift_trend['shift_legend'].unique()) >= 2:
640
+ # Assign specific colors to shifts (e.g., Shift 1: blue, Shift 2: red)
641
+ color_map = {}
642
+ unique_shifts = sorted(weekly_shift_trend['shift_legend'].unique())
643
+ for i, shift in enumerate(unique_shifts):
644
+ if i == 0:
645
+ color_map[shift] = 'blue'
646
+ elif i == 1:
647
+ color_map[shift] = 'red'
648
+ else:
649
+ color_map[shift] = f'hsl({i*60}, 70%, 50%)' # Generate different colors for more than 2 shifts
650
 
651
+ fig_weekly.update_traces(marker=dict(size=8))
652
+ fig_weekly.update_layout(
653
+ legend_title_text="Shift",
654
+ legend=dict(
655
+ orientation="h",
656
+ yanchor="bottom",
657
+ y=1.02,
658
+ xanchor="right",
659
+ x=1
660
+ )
661
+ )
662
+ # Apply custom colors
663
+ for trace in fig_weekly.data:
664
+ if trace.name in color_map:
665
+ trace.line.color = color_map[trace.name]
666
+ trace.marker.color = color_map[trace.name]
667
+
668
+ st.plotly_chart(fig_weekly, width="stretch")
669
+
670
+ # 7. Speed Distribution Analysis (Task Complexity)
671
+ if col_speed:
672
+ speed_df_clean = df.dropna(subset=[col_speed])
673
+ if not speed_df_clean.empty:
674
+ fig_speed_dist = px.histogram(
675
+ speed_df_clean,
676
+ x=col_speed,
677
+ title="Speed Distribution (Task Complexity Indicator)",
678
+ nbins=20
679
+ )
680
+ st.plotly_chart(fig_speed_dist, width="stretch")
681
+
682
+
683
+ # =================== INSIGHTS BY ADVANCED ANALYTICS =====================
684
+ st.subheader("Insights by Advanced Analytics")
685
+
686
+ # 1. Critical Hour Analysis (2-5 AM)
687
+ critical_hours = [2, 3, 4, 5]
688
+ critical_alerts = df[df['hour'].isin(critical_hours)]
689
+ critical_pct = (len(critical_alerts) / len(df)) * 100 if len(df) > 0 else 0
690
+
691
+ st.markdown(f"Critical Hour Risk (2-5 AM)")
692
+ # Use conditional formatting for background color
693
+ bg_color = "#ffcccc" if critical_pct > 50 else "#ffebcc" if critical_pct > 25 else "#ffffcc" if critical_pct > 10 else "#e6ffe6"
694
+ st.markdown(f'<div style="background-color: {bg_color}; padding: 10px; border-radius: 5px;">Critical Hour Alerts: {len(critical_alerts)} ({critical_pct:.1f}% of total alerts)</div>', unsafe_allow_html=True)
695
+ if critical_pct > 10: # If more than 10% of alerts happen in critical hours
696
+ st.warning(f"High risk: {critical_pct:.1f}% of fatigue alerts occur during critical hours (2-5 AM). This is a known circadian dip period.")
697
+ else:
698
+ st.info(f"{critical_pct:.1f}% of alerts occur during critical hours. This is within acceptable range.")
699
+
700
+ # 2. High-Speed Fatigue Analysis (Environmental Risk)
701
+ if col_speed:
702
+ high_speed_threshold = df[col_speed].quantile(0.75) # Top 25% of speeds
703
+ high_speed_fatigue = df[df[col_speed] >= high_speed_threshold]
704
+ high_speed_pct = (len(high_speed_fatigue) / len(df)) * 100 if len(df) > 0 else 0
705
+
706
+ st.markdown(f"High-Speed Fatigue Risk (Speed > {high_speed_threshold:.0f} km/h)")
707
+ st.metric("High-Speed Fatigue Events", f"{len(high_speed_fatigue)}", f"{high_speed_pct:.1f}% of total alerts")
708
+ if high_speed_pct > 20: # If more than 20% of alerts happen at high speed
709
+ st.warning(f"High risk: {high_speed_pct:.1f}% of fatigue alerts occur at high speeds. This increases accident severity potential.")
710
+ else:
711
+ st.info(f"{high_speed_pct:.1f}% of alerts occur at high speeds. This is within acceptable range.")
712
+
713
+ # 3. Shift Pattern Analysis
714
+ if col_shift:
715
+ shift_counts = df[col_shift].value_counts()
716
+ shift_alerts_by_hour = df.groupby([col_shift, 'hour']).size().reset_index(name='alerts')
717
+
718
+ st.markdown(f"Shift Pattern Risk")
719
+ for shift_val in shift_counts.index:
720
+ shift_pct = (shift_counts[shift_val] / len(df)) * 100
721
+ st.metric(f"Shift {shift_val} Alerts", f"{shift_counts[shift_val]}", f"{shift_pct:.1f}% of total alerts")
722
+ if shift_pct > 50: # If one shift has more than 50% of alerts
723
+ st.warning(f"Shift {shift_val} has disproportionately high alerts ({shift_pct:.1f}%). Review shift scheduling and workload.")
724
+ else:
725
+ st.info(f"Shift {shift_val} alert distribution is acceptable ({shift_pct:.1f}%).")
726
+
727
+ # 4. Operator Risk Profiling
728
+ if col_operator:
729
+ operator_alerts = df[col_operator].value_counts()
730
+ top_risk_operators = operator_alerts.head(5) # Top 5 operators by alerts
731
+
732
+ st.markdown(f"High-Risk Operator Identification")
733
+ for op_name, count in top_risk_operators.items():
734
+ op_pct = (count / len(df)) * 100
735
+ st.metric(f"Operator: {op_name}", f"{count} alerts", f"{op_pct:.1f}% of total alerts")
736
+ if op_pct > 5: # If an operator has more than 5% of all alerts
737
+ st.warning(f"Operator {op_name} has high fatigue risk ({op_pct:.1f}% of alerts). Consider coaching or rest plan.")
738
+ else:
739
+ st.info(f"Operator {op_name} fatigue risk is within acceptable range ({op_pct:.1f}%).")
740
+
741
+
742
+ # =================== FATIGUE RISK MATRIX =====================
743
+ # Moved to sidebar
744
+ with st.sidebar:
745
+ st.subheader("Fatigue Risk Matrix")
746
+
747
+ risk_matrix_data = [
748
+ ["High fatigue + high-speed haul road", "Potential fatality", "Critical"],
749
+ ["Moderate fatigue + decline haul road", "Serious injury", "High"],
750
+ ["High fatigue + low-risk task", "Minor injury", "Medium"],
751
+ ["Low fatigue + non-hazard task", "No injury", "Low"]
752
+ ]
753
+
754
+ risk_df = pd.DataFrame(risk_matrix_data, columns=["Likelihood (Fatigue Level)", "Severity (Hazard Impact)", "Risk Tier"])
755
+
756
+ # Display risk matrix as a styled table
757
+ html_string = '<table class="risk-matrix"><thead><tr><th>Likelihood (Fatigue Level)</th><th>Severity (Hazard Impact)</th><th>Risk Tier</th></tr></thead><tbody>'
758
+ for _, row in risk_df.iterrows():
759
+ risk_class = row["Risk Tier"].lower()
760
+ html_string += f'<tr class="{risk_class}"><td>{row["Likelihood (Fatigue Level)"]}</td><td>{row["Severity (Hazard Impact)"]}</td><td>{row["Risk Tier"]}</td></tr>'
761
+ html_string += '</tbody></table>'
762
+
763
+ st.markdown(html_string, unsafe_allow_html=True)
764
+
765
+
766
+ # =================== AI INSIGHT ENGINE =====================
767
+ st.subheader("Automated Insight Summary")
768
+
769
+ # Create a more elegant summary
770
+ insights = []
771
+
772
+ # Peak hour
773
+ if "hour" in df.columns and not df.empty:
774
+ peak_hour = df["hour"].value_counts().idxmax()
775
+ critical_hours = [2, 3, 4, 5]
776
+ if peak_hour in critical_hours:
777
+ insights.append(f"⚠️ Most fatigue risk occurs at **{peak_hour}:00** β€” during critical circadian low period (2-5 AM). Consider enhanced monitoring.")
778
+ else:
779
+ insights.append(f"Most fatigue risk occurs at **{peak_hour}:00** β€” likely due to circadian drop.")
780
+
781
+ # Risk shift
782
+ if col_shift and not df.empty:
783
+ worst_shift = df[col_shift].value_counts().idxmax()
784
+ insights.append(f"πŸ‘· Highest fatigue recorded in **Shift {worst_shift}** β€” review scheduling & workload.")
785
 
786
+ # Worst operator
787
+ if col_operator and not df.empty:
788
+ worst_operator = df[col_operator].value_counts().idxmax()
789
+ insights.append(f"⚠️ Operator at highest risk: **{worst_operator}** β€” suggested coaching or rest plan.")
790
 
791
+ # Duration risk
792
+ if "duration_sec" in df.columns and not df.empty:
793
+ avg_duration = df["duration_sec"].mean()
794
+ if not pd.isna(avg_duration) and avg_duration > 10:
795
+ insights.append("⏳ Long fatigue event duration suggests slow response β€” improve alerting training.")
796
 
797
+ # Critical hour insight
798
+ if "hour" in df.columns and not df.empty:
799
+ critical_alerts = df[df['hour'].isin([2, 3, 4, 5])]
800
+ if len(critical_alerts) > 0:
801
+ critical_pct = (len(critical_alerts) / len(df)) * 100
802
+ if critical_pct > 15:
803
+ insights.append(f"πŸŒ™ **CRITICAL HOUR RISK**: {critical_pct:.1f}% of alerts occur during circadian low (2-5 AM). Consider enhanced monitoring during this period.")
804
 
805
+ # High-speed insight
806
+ if col_speed and not df.empty:
807
+ high_speed_fatigue = df[df[col_speed] >= df[col_speed].quantile(0.75)] if not df[col_speed].dropna().empty else pd.DataFrame()
808
+ if len(high_speed_fatigue) > 0:
809
+ high_speed_pct = (len(high_speed_fatigue) / len(df)) * 100
810
+ if high_speed_pct > 20:
811
+ insights.append(f"πŸš€ **HIGH-SPEED RISK**: {high_speed_pct:.1f}% of fatigue events occur at high speeds, increasing accident severity potential.")
812
 
813
+ # Output insights in an elegant format
814
+ for i in insights:
815
+ st.markdown(f"- {i}")
816
 
 
 
817
 
818
+ # ================= FOOTER ===========================
819
+ st.markdown("---")
820
+ st.markdown('<div class="footer">MineVision AI - Transforming Mining Safety with Intelligent Analytics | Contact: sales@minevision-ai.com</div>', unsafe_allow_html=True)
821
+ ```