berangerthomas commited on
Commit
49363c2
·
1 Parent(s): d1fd9e1

Alert page added. Modified directories names

Browse files
app.py CHANGED
@@ -30,10 +30,11 @@ add_logo()
30
 
31
 
32
  # Pages definition
33
- home = st.Page("pages/home.py", title="🏠 Home")
34
- upload = st.Page("pages/upload.py", title="📥 Upload")
35
- analyze = st.Page("pages/analyze.py", title=" 📊 Analyze")
36
- about = st.Page("pages/about.py", title="📄 About")
 
37
 
38
- pg = st.navigation([home, upload, analyze, about])
39
  pg.run()
 
30
 
31
 
32
  # Pages definition
33
+ home = st.Page("sections/home.py", title="🏠 Home")
34
+ upload = st.Page("sections/upload.py", title="📥 Upload")
35
+ analyze = st.Page("sections/analyze.py", title=" 📊 Analyze")
36
+ alerts = st.Page("sections/alerts.py", title=" 📊 Alerts")
37
+ about = st.Page("sections/about.py", title="📄 About")
38
 
39
+ pg = st.navigation([home, upload, analyze, alerts, about])
40
  pg.run()
{pages → sections}/about.py RENAMED
File without changes
sections/alerts.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import altair as alt
2
+ import pandas as pd
3
+ import streamlit as st
4
+
5
+ st.title("📊 Alerts and Anomalies")
6
+
7
+ if "parsed_df" not in st.session_state or st.session_state.parsed_df is None:
8
+ st.warning(
9
+ "No log data is available. Please first upload and parse a log file in the Upload section."
10
+ )
11
+ else:
12
+ df = st.session_state.parsed_df
13
+
14
+ # Display overall statistics
15
+ st.subheader("Overview of logs")
16
+ col1, col2, col3 = st.columns(3)
17
+
18
+ with col1:
19
+ total_entries = len(df)
20
+ st.metric("Total number of entries", total_entries)
21
+
22
+ with col2:
23
+ # Check if the 'level' column exists, otherwise look for a similar column
24
+ level_col = None
25
+ possible_level_cols = ["level", "severity", "log_level", "type", "status"]
26
+ for col in possible_level_cols:
27
+ if col in df.columns:
28
+ level_col = col
29
+ break
30
+
31
+ if level_col:
32
+ error_count = df[
33
+ df[level_col]
34
+ .str.upper()
35
+ .isin(["ERROR", "CRITICAL", "FATAL", "FAIL", "I"])
36
+ ].shape[0]
37
+ error_percent = (
38
+ (error_count / total_entries) * 100 if total_entries > 0 else 0
39
+ )
40
+ st.metric("Error entries", f"{error_count} ({error_percent:.1f}%)")
41
+ else:
42
+ # Search in the entire log text if no specific column is found
43
+ text_col = (
44
+ df.select_dtypes(include=["object"]).columns[0]
45
+ if not df.select_dtypes(include=["object"]).empty
46
+ else None
47
+ )
48
+ if text_col:
49
+ error_count = df[
50
+ df[text_col].str.contains(
51
+ "ERROR|CRITICAL|FATAL|FAIL|EXCEPTION", case=False, na=False
52
+ )
53
+ ].shape[0]
54
+ error_percent = (
55
+ (error_count / total_entries) * 100 if total_entries > 0 else 0
56
+ )
57
+ st.metric("Error entries", f"{error_count} ({error_percent:.1f}%)")
58
+ else:
59
+ st.metric("Error entries", "Not detectable")
60
+
61
+ with col3:
62
+ # Search for a datetime type column
63
+ timestamp_col = None
64
+
65
+ # First, look for columns that are already of datetime type
66
+ datetime_cols = [
67
+ col for col in df.columns if pd.api.types.is_datetime64_any_dtype(df[col])
68
+ ]
69
+ if datetime_cols:
70
+ timestamp_col = datetime_cols[0]
71
+ else:
72
+ # If no datetime column is found, try to find by name
73
+ possible_ts_cols = ["timestamp", "date", "time", "datetime"]
74
+ for col in possible_ts_cols:
75
+ if col in df.columns:
76
+ timestamp_col = col
77
+ break
78
+
79
+ if timestamp_col:
80
+ time_range = f"{df[timestamp_col].min()} to {df[timestamp_col].max()}"
81
+ st.markdown(
82
+ f"**Time range**<br><small>{time_range}</small>",
83
+ unsafe_allow_html=True,
84
+ )
85
+ else:
86
+ st.metric("Time range", "Not detectable")
87
+
88
+ # Detection of critical errors
89
+ st.subheader("Detected critical errors")
90
+
91
+ # Function to identify errors by keywords
92
+ def detect_errors(dataframe):
93
+ # Search in all textual columns
94
+ error_patterns = [
95
+ "error",
96
+ "critical",
97
+ "fatal",
98
+ "fail",
99
+ "exception",
100
+ "crash",
101
+ "timeout",
102
+ ]
103
+
104
+ error_df = pd.DataFrame()
105
+ for col in dataframe.select_dtypes(include=["object"]).columns:
106
+ mask = dataframe[col].str.contains(
107
+ "|".join(error_patterns), case=False, na=False
108
+ )
109
+ if error_df.empty:
110
+ error_df = dataframe[mask].copy()
111
+ else:
112
+ error_df = pd.concat([error_df, dataframe[mask]]).drop_duplicates()
113
+
114
+ return error_df
115
+
116
+ error_logs = detect_errors(df)
117
+
118
+ if not error_logs.empty:
119
+ st.write(f"**{len(error_logs)} critical errors detected**")
120
+ st.dataframe(error_logs)
121
+
122
+ # Extraction of the most common error types
123
+ if len(error_logs) > 5:
124
+ st.subheader("Frequent error types")
125
+ error_types = {}
126
+
127
+ # Browse textual columns to extract error patterns
128
+ for col in error_logs.select_dtypes(include=["object"]).columns:
129
+ for pattern in ["error", "exception", "fail"]:
130
+ pattern_errors = error_logs[
131
+ error_logs[col].str.contains(pattern, case=False, na=False)
132
+ ]
133
+ if not pattern_errors.empty:
134
+ # Extract error context (words after the pattern)
135
+ for _, row in pattern_errors.iterrows():
136
+ text = str(row[col])
137
+ if pattern.lower() in text.lower():
138
+ idx = text.lower().find(pattern.lower())
139
+ context = (
140
+ text[idx : idx + 50].strip()
141
+ if idx + 50 < len(text)
142
+ else text[idx:].strip()
143
+ )
144
+ if context not in error_types:
145
+ error_types[context] = 0
146
+ error_types[context] += 1
147
+
148
+ # Display the most frequent error types
149
+ sorted_errors = sorted(
150
+ error_types.items(), key=lambda x: x[1], reverse=True
151
+ )[:10]
152
+ error_df = pd.DataFrame(
153
+ sorted_errors, columns=["Error type", "Occurrences"]
154
+ )
155
+ st.dataframe(error_df)
156
+
157
+ # Visualization of errors
158
+ if timestamp_col:
159
+ st.subheader("Temporal distribution of errors")
160
+
161
+ # Convert to datetime if necessary
162
+ if not pd.api.types.is_datetime64_any_dtype(error_logs[timestamp_col]):
163
+ try:
164
+ error_logs[timestamp_col] = pd.to_datetime(
165
+ error_logs[timestamp_col]
166
+ )
167
+ except:
168
+ pass
169
+
170
+ if pd.api.types.is_datetime64_any_dtype(error_logs[timestamp_col]):
171
+ # Group by time period
172
+ error_count = (
173
+ error_logs.groupby(pd.Grouper(key=timestamp_col, freq="1H"))
174
+ .size()
175
+ .reset_index()
176
+ )
177
+ error_count.columns = [timestamp_col, "count"]
178
+
179
+ # Create the chart
180
+ chart = (
181
+ alt.Chart(error_count)
182
+ .mark_line()
183
+ .encode(
184
+ x=alt.X(f"{timestamp_col}:T", title="Time"),
185
+ y=alt.Y("count:Q", title="Number of errors"),
186
+ tooltip=[f"{timestamp_col}:T", "count:Q"],
187
+ )
188
+ .properties(width=700, height=300, title="Errors per hour")
189
+ )
190
+ st.altair_chart(chart, use_container_width=True)
191
+ else:
192
+ st.success("No critical errors detected in the logs.")
193
+
194
+ # Detection of anomalies
195
+ st.subheader("Anomaly detection")
196
+
197
+ # Temporal analysis if possible
198
+ if timestamp_col is not None and (
199
+ pd.api.types.is_datetime64_any_dtype(df[timestamp_col])
200
+ or pd.api.types.is_datetime64_any_dtype(
201
+ pd.to_datetime(df[timestamp_col], errors="coerce")
202
+ )
203
+ ):
204
+ try:
205
+ # Convert to datetime if necessary
206
+ if not pd.api.types.is_datetime64_any_dtype(df[timestamp_col]):
207
+ df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors="coerce")
208
+
209
+ # Group by time period
210
+ time_df = (
211
+ df.groupby(pd.Grouper(key=timestamp_col, freq="5Min"))
212
+ .size()
213
+ .reset_index()
214
+ )
215
+ time_df.columns = [timestamp_col, "count"]
216
+
217
+ # Calculate moving average and limits
218
+ time_df["moving_avg"] = (
219
+ time_df["count"].rolling(window=5, min_periods=1).mean()
220
+ )
221
+ time_df["std"] = (
222
+ time_df["count"].rolling(window=5, min_periods=1).std().fillna(0)
223
+ )
224
+ time_df["upper_bound"] = time_df["moving_avg"] + 2 * time_df["std"]
225
+ time_df["lower_bound"] = (time_df["moving_avg"] - 2 * time_df["std"]).clip(
226
+ lower=0
227
+ )
228
+
229
+ # Detection of peaks
230
+ time_df["is_anomaly"] = (time_df["count"] > time_df["upper_bound"]) | (
231
+ time_df["count"] < time_df["lower_bound"]
232
+ )
233
+
234
+ # Visualization
235
+ anomaly_points = time_df[time_df["is_anomaly"]]
236
+
237
+ if not anomaly_points.empty:
238
+ st.write(
239
+ f"**{len(anomaly_points)} periods with abnormal activity detected**"
240
+ )
241
+
242
+ # Create the chart
243
+ base = alt.Chart(time_df).encode(
244
+ x=alt.X(f"{timestamp_col}:T", title="Time")
245
+ )
246
+
247
+ line = base.mark_line().encode(
248
+ y=alt.Y("count:Q", title="Number of log entries")
249
+ )
250
+
251
+ bands = base.mark_area(opacity=0.2).encode(
252
+ y="lower_bound:Q",
253
+ y2="upper_bound:Q",
254
+ tooltip=[
255
+ f"{timestamp_col}:T",
256
+ "count:Q",
257
+ "moving_avg:Q",
258
+ "lower_bound:Q",
259
+ "upper_bound:Q",
260
+ ],
261
+ )
262
+
263
+ points = (
264
+ base.mark_circle(size=100, color="red")
265
+ .encode(y="count:Q")
266
+ .transform_filter(alt.datum.is_anomaly == True)
267
+ )
268
+
269
+ chart = (line + bands + points).properties(
270
+ width=700,
271
+ height=300,
272
+ title="Anomaly detection (unusual activity)",
273
+ )
274
+ st.altair_chart(chart, use_container_width=True)
275
+
276
+ # Table of anomalies
277
+ st.write("Periods with abnormal activity:")
278
+ anomaly_df = anomaly_points[
279
+ [timestamp_col, "count", "moving_avg", "upper_bound", "lower_bound"]
280
+ ]
281
+ anomaly_df.columns = [
282
+ "Period",
283
+ "Number of entries",
284
+ "Moving average",
285
+ "Upper limit",
286
+ "Lower limit",
287
+ ]
288
+ st.dataframe(anomaly_df)
289
+ else:
290
+ st.success("No temporal anomalies detected.")
291
+
292
+ except Exception as e:
293
+ st.error(f"Unable to analyze the temporal distribution of logs: {e}")
294
+
295
+ # Detection of suspicious event sequences
296
+ if timestamp_col and level_col:
297
+ st.subheader("Unusual event sequences")
298
+ try:
299
+ # Search for consecutive error sequences
300
+ df_sorted = df.sort_values(by=timestamp_col)
301
+ consecutive_errors = []
302
+
303
+ current_sequence = []
304
+ for i, row in df_sorted.iterrows():
305
+ if str(row[level_col]).upper() in ["ERROR", "CRITICAL", "FATAL"]:
306
+ current_sequence.append(i)
307
+ else:
308
+ if len(current_sequence) >= 3: # At least 3 consecutive errors
309
+ consecutive_errors.append(current_sequence)
310
+ current_sequence = []
311
+
312
+ if len(current_sequence) >= 3: # Don't forget the last sequence
313
+ consecutive_errors.append(current_sequence)
314
+
315
+ if consecutive_errors:
316
+ st.write(
317
+ f"**{len(consecutive_errors)} sequences of 3+ consecutive errors detected**"
318
+ )
319
+
320
+ # For each sequence, display the relevant entries
321
+ for i, sequence in enumerate(
322
+ consecutive_errors[:5]
323
+ ): # Limit to 5 sequences for clarity
324
+ with st.expander(
325
+ f"Sequence {i + 1}: {len(sequence)} consecutive errors"
326
+ ):
327
+ st.dataframe(df.loc[sequence])
328
+ else:
329
+ st.success("No sequences of consecutive errors detected.")
330
+
331
+ except Exception as e:
332
+ st.error(f"Unable to analyze event sequences: {e}")
333
+
334
+ # Recommendations
335
+ st.subheader("Recommendations")
336
+
337
+ if not error_logs.empty:
338
+ st.warning(
339
+ "⚠️ Critical errors have been detected. Review the entries in red for more details."
340
+ )
341
+
342
+ if "error_types" in locals() and error_types:
343
+ top_error = sorted_errors[0][0]
344
+ st.info(
345
+ f"💡 The most frequent error is '{top_error}'. Focus your analysis on this type of error."
346
+ )
347
+
348
+ if "anomaly_points" in locals() and not anomaly_points.empty:
349
+ peak_time = anomaly_points.iloc[anomaly_points["count"].idxmax()][timestamp_col]
350
+ st.warning(
351
+ f"⚠️ A significant activity peak was detected around {peak_time}. Review this period."
352
+ )
353
+
354
+ if "consecutive_errors" in locals() and consecutive_errors:
355
+ st.warning(
356
+ "⚠️ Sequences of consecutive errors have been detected, which may indicate systemic issues."
357
+ )
358
+
359
+ if error_logs.empty and ("anomaly_points" not in locals() or anomaly_points.empty):
360
+ st.success("✅ No major issues detected in the analyzed logs.")
{pages → sections}/analyze.py RENAMED
File without changes
{pages → sections}/home.py RENAMED
@@ -36,7 +36,7 @@ with col1:
36
 
37
  st.subheader("🔍 Advanced Parsing")
38
  st.write("""
39
- Automatically detect log formats or configure custom parsing rules.
40
  Extract timestamp, log level, and message content with ease.
41
  """)
42
 
 
36
 
37
  st.subheader("🔍 Advanced Parsing")
38
  st.write("""
39
+ Configure custom parsing rules to extract data from your logs.
40
  Extract timestamp, log level, and message content with ease.
41
  """)
42
 
{pages → sections}/upload.py RENAMED
@@ -56,10 +56,10 @@ if uploaded_file is not None:
56
  st.session_state.parsed_df = parser.parse_file()
57
 
58
  # Display a success message and the dataframe
59
- st.success("Log file successfully analyzed!")
60
  # st.dataframe(st.session_state.parsed_df)
61
  except Exception as e:
62
- st.error(f"Error analyzing the file: {e}")
63
  finally:
64
  # Clean up the temporary file
65
  os.unlink(tmp_path)
@@ -110,5 +110,5 @@ if uploaded_file is not None:
110
 
111
  # Display the dataframe if available
112
  if st.session_state.parsed_df is not None:
113
- st.subheader("Analyzed log data")
114
  st.dataframe(st.session_state.parsed_df)
 
56
  st.session_state.parsed_df = parser.parse_file()
57
 
58
  # Display a success message and the dataframe
59
+ st.success("Log file successfully parsed!")
60
  # st.dataframe(st.session_state.parsed_df)
61
  except Exception as e:
62
+ st.error(f"Error parsing the file: {e}")
63
  finally:
64
  # Clean up the temporary file
65
  os.unlink(tmp_path)
 
110
 
111
  # Display the dataframe if available
112
  if st.session_state.parsed_df is not None:
113
+ st.subheader("Parsed log data")
114
  st.dataframe(st.session_state.parsed_df)