berangerthomas commited on
Commit
24f4026
·
1 Parent(s): b78ea6e

Faster load

Browse files
Files changed (1) hide show
  1. sections/upload.py +59 -110
sections/upload.py CHANGED
@@ -1,133 +1,82 @@
1
- #####################################################
2
- #### Imports ####
3
- #####################################################
4
  import os
5
  import tempfile
6
  from datetime import datetime
7
 
 
8
  import streamlit as st
9
 
10
- from config.log_definitions import log_definitions
11
-
12
- # from utils.log2pandas import LogParser
13
- from utils.log2polars import LogParser
14
  from utils.pandas2sql import Pandas2SQL
15
 
16
- #####################################################
17
- #### Interface Setup ####
18
- #####################################################
19
-
20
  st.title("ShadowLog - Log File Analyzer")
21
- st.write("Upload a log file to analyze")
 
22
 
23
- # File upload widget
24
  uploaded_file = st.file_uploader("Choose a log file")
25
 
26
- # Get available log types from log_definitions
27
- log_types = list(log_definitions.keys())
28
-
29
- # Set default log type if not already in session state
30
- # Initialize log_type in session state if not present
31
- if "log_type" not in st.session_state:
32
- st.session_state.log_type = log_types[0] # Start with first log type as default
33
- st.session_state.user_selected = False # Track if user manually selected a log type
34
-
35
- # Check if a new file was uploaded and update suggested log type
36
- if uploaded_file is not None and not st.session_state.get("user_selected", False):
37
- filename = uploaded_file.name.lower()
38
- for log_type in log_types:
39
- if log_type.lower() in filename:
40
- st.session_state.log_type = log_type
41
- break
42
-
43
- # Display the selectbox
44
- selected_log_type = st.selectbox(
45
- "Select log type", log_types, index=log_types.index(st.session_state.log_type)
46
- )
47
-
48
- # Update session state and mark as user-selected when changed
49
- if selected_log_type != st.session_state.log_type:
50
- st.session_state.log_type = selected_log_type
51
- st.session_state.user_selected = True
52
-
53
- # Store the parsed dataframe in the session state
54
  if "parsed_df" not in st.session_state:
55
  st.session_state.parsed_df = None
56
 
57
  if uploaded_file is not None:
58
- # Create two columns for the buttons
59
- col1, col2 = st.columns(2)
60
-
61
- with col1:
62
- # Button to parse the log file
63
- if st.button("Parse the log file"):
64
- with st.spinner("Analyzing the file..."):
65
- # Create a temporary file
66
- with tempfile.NamedTemporaryFile(
67
- delete=False, suffix=".log"
68
- ) as tmp_file:
69
- tmp_file.write(uploaded_file.getvalue())
70
- tmp_path = tmp_file.name
71
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  try:
73
- # Parse the log file
74
- parser = LogParser(tmp_path, st.session_state.log_type)
75
- st.session_state.parsed_df = parser.parse_file()
76
-
77
- # Display a success message and the dataframe
78
- st.success("Log file successfully parsed!")
79
- # st.dataframe(st.session_state.parsed_df)
 
 
 
 
 
 
 
 
 
 
 
80
  except Exception as e:
81
- st.error(f"Error parsing the file: {e}")
82
  finally:
83
- # Clean up the temporary file
84
- os.unlink(tmp_path)
85
-
86
- with col2:
87
- # Button to convert to SQLite and download
88
- if st.button("Convert to SQLite"):
89
- if st.session_state.parsed_df is not None:
90
- with st.spinner("Converting to SQLite..."):
91
- try:
92
- # Create a temporary SQLite file
93
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
94
- sqlite_path = os.path.join(
95
- tempfile.gettempdir(), f"log_data_{timestamp}.sqlite"
96
- )
97
-
98
- # Create the SQL converter
99
- sql_converter = Pandas2SQL(sqlite_path)
100
-
101
- # Convert the dataframe to SQLite
102
- sql_converter.create_table(
103
- st.session_state.parsed_df, st.session_state.log_type
104
- )
105
-
106
- # Read the SQLite file for download
107
- with open(sqlite_path, "rb") as file:
108
- sqlite_data = file.read()
109
-
110
- # Success message and immediate download
111
- st.success("SQLite file created successfully!")
112
-
113
- # Download button
114
- st.download_button(
115
- label="Download SQLite file",
116
- data=sqlite_data,
117
- file_name=f"log_file_{st.session_state.log_type}_{timestamp}.sqlite",
118
- mime="application/octet-stream",
119
- key="auto_download",
120
- )
121
- except Exception as e:
122
- st.error(f"Error converting to SQLite: {e}")
123
- finally:
124
- # Clean up the temporary file
125
- if os.path.exists(sqlite_path):
126
- os.unlink(sqlite_path)
127
- else:
128
- st.warning("Please parse the log file first.")
129
 
130
- # Display the dataframe if available
131
  if st.session_state.parsed_df is not None:
132
  st.subheader("Parsed log data")
133
  st.dataframe(st.session_state.parsed_df)
 
 
 
 
1
  import os
2
  import tempfile
3
  from datetime import datetime
4
 
5
+ import polars as pl
6
  import streamlit as st
7
 
 
 
 
 
8
  from utils.pandas2sql import Pandas2SQL
9
 
 
 
 
 
10
  st.title("ShadowLog - Log File Analyzer")
11
+ st.write("Upload a log file to analyze with the following format :")
12
+ st.write("date;ipsrc;ipdst;protocole;portsrc;portdst;rule;action;interface;unknown;fw")
13
 
 
14
  uploaded_file = st.file_uploader("Choose a log file")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  if "parsed_df" not in st.session_state:
17
  st.session_state.parsed_df = None
18
 
19
  if uploaded_file is not None:
20
+ with st.spinner("Parsing and filtering the file..."):
21
+ try:
22
+ st.session_state.parsed_df = (
23
+ pl.read_csv(
24
+ uploaded_file,
25
+ separator=";",
26
+ has_header=False,
27
+ infer_schema_length=10000,
28
+ dtypes={
29
+ "date": pl.Datetime,
30
+ "ipsrc": pl.Utf8,
31
+ "ipdst": pl.Utf8,
32
+ "protocole": pl.Utf8,
33
+ "portsrc": pl.Int64,
34
+ "portdst": pl.Int64,
35
+ "rule": pl.Int64,
36
+ "action": pl.Utf8,
37
+ "interface": pl.Utf8,
38
+ "unknown": pl.Utf8,
39
+ "fw": pl.Int64,
40
+ },
41
+ )
42
+ .filter(
43
+ (pl.col("date") >= pl.datetime(2024, 11, 1))
44
+ & (pl.col("date") < pl.datetime(2025, 3, 1))
45
+ )
46
+ .drop(["portsrc", "unknown", "fw"])
47
+ )
48
+ st.success("File parsed and filtered successfully!")
49
+ except Exception as e:
50
+ st.error(f"Error parsing the file: {e}")
51
+
52
+ if st.session_state.parsed_df is not None:
53
+ if st.button("Convert to SQLite"):
54
+ with st.spinner("Converting to SQLite..."):
55
  try:
56
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
57
+ sqlite_path = os.path.join(
58
+ tempfile.gettempdir(), f"log_data_{timestamp}.sqlite"
59
+ )
60
+ sql_converter = Pandas2SQL(sqlite_path)
61
+ sql_converter.create_table(
62
+ st.session_state.parsed_df.to_pandas(),
63
+ st.session_state.log_type,
64
+ )
65
+ with open(sqlite_path, "rb") as file:
66
+ sqlite_data = file.read()
67
+ st.success("SQLite file created successfully!")
68
+ st.download_button(
69
+ label="Download SQLite file",
70
+ data=sqlite_data,
71
+ file_name=f"log_file_{st.session_state.log_type}_{timestamp}.sqlite",
72
+ mime="application/octet-stream",
73
+ )
74
  except Exception as e:
75
+ st.error(f"Error converting to SQLite: {e}")
76
  finally:
77
+ if os.path.exists(sqlite_path):
78
+ os.unlink(sqlite_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
 
80
  if st.session_state.parsed_df is not None:
81
  st.subheader("Parsed log data")
82
  st.dataframe(st.session_state.parsed_df)