Spaces:
Sleeping
Sleeping
Commit
·
31fea3a
1
Parent(s):
c42cb7b
English translation
Browse files- app.py +14 -14
- config/log_definitions.py +35 -35
- utils/log2pandas.py +0 -39
app.py
CHANGED
|
@@ -5,32 +5,32 @@ import streamlit as st
|
|
| 5 |
from config.log_definitions import log_definitions
|
| 6 |
from utils.log2pandas import LogParser
|
| 7 |
|
| 8 |
-
st.title("
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
uploaded_file = st.file_uploader("
|
| 12 |
|
| 13 |
-
#
|
| 14 |
|
| 15 |
-
#
|
| 16 |
log_types = list(log_definitions.keys())
|
| 17 |
|
| 18 |
-
log_type = st.selectbox("
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
if st.button("
|
| 22 |
if uploaded_file is not None:
|
| 23 |
-
#
|
| 24 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".log") as tmp_file:
|
| 25 |
tmp_file.write(uploaded_file.getbuffer())
|
| 26 |
tmp_file_path = tmp_file.name
|
| 27 |
|
| 28 |
-
#
|
| 29 |
parser = LogParser(tmp_file_path, log_type)
|
| 30 |
-
#
|
| 31 |
parsed_df = parser.parse_file()
|
| 32 |
-
#
|
| 33 |
-
st.write("DataFrame
|
| 34 |
st.dataframe(parsed_df)
|
| 35 |
else:
|
| 36 |
-
st.error("
|
|
|
|
| 5 |
from config.log_definitions import log_definitions
|
| 6 |
from utils.log2pandas import LogParser
|
| 7 |
|
| 8 |
+
st.title("Log Analyzer")
|
| 9 |
|
| 10 |
+
# Upload area by drag and drop
|
| 11 |
+
uploaded_file = st.file_uploader("Drop your log file here")
|
| 12 |
|
| 13 |
+
# Dropdown menu to choose the log type
|
| 14 |
|
| 15 |
+
# Extract log types from the configuration file
|
| 16 |
log_types = list(log_definitions.keys())
|
| 17 |
|
| 18 |
+
log_type = st.selectbox("Select log type", options=log_types)
|
| 19 |
|
| 20 |
+
# Analyze button
|
| 21 |
+
if st.button("Analyze"):
|
| 22 |
if uploaded_file is not None:
|
| 23 |
+
# Temporarily save the uploaded file
|
| 24 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".log") as tmp_file:
|
| 25 |
tmp_file.write(uploaded_file.getbuffer())
|
| 26 |
tmp_file_path = tmp_file.name
|
| 27 |
|
| 28 |
+
# Create an instance of LogParser with the temporary path and log type
|
| 29 |
parser = LogParser(tmp_file_path, log_type)
|
| 30 |
+
# Parse the file and get the DataFrame
|
| 31 |
parsed_df = parser.parse_file()
|
| 32 |
+
# Display the first rows of the resulting DataFrame
|
| 33 |
+
st.write("Resulting DataFrame:")
|
| 34 |
st.dataframe(parsed_df)
|
| 35 |
else:
|
| 36 |
+
st.error("Please upload a log file.")
|
config/log_definitions.py
CHANGED
|
@@ -1,24 +1,5 @@
|
|
| 1 |
log_definitions = {
|
| 2 |
-
"
|
| 3 |
-
"sep": " ",
|
| 4 |
-
"fields": [
|
| 5 |
-
{"name": "current_time", "pos": slice(0, 5), "type": "datetime"},
|
| 6 |
-
{"name": "transfer_time", "pos": 5, "type": int},
|
| 7 |
-
{"name": "remote_host", "pos": 6, "type": str},
|
| 8 |
-
{"name": "file_size", "pos": 7, "type": int},
|
| 9 |
-
{"name": "filename", "pos": 8, "type": str},
|
| 10 |
-
{"name": "transfer_type", "pos": 9, "type": str},
|
| 11 |
-
{"name": "special_flag", "pos": 10, "type": str},
|
| 12 |
-
{"name": "direction", "pos": 11, "type": "direction"},
|
| 13 |
-
{"name": "access_mode", "pos": 12, "type": str},
|
| 14 |
-
{"name": "username", "pos": 13, "type": str},
|
| 15 |
-
{"name": "service_name", "pos": 14, "type": str},
|
| 16 |
-
{"name": "auth_method", "pos": 15, "type": int},
|
| 17 |
-
{"name": "auth_user_id", "pos": 16, "type": str},
|
| 18 |
-
{"name": "status", "pos": 17, "type": str},
|
| 19 |
-
],
|
| 20 |
-
},
|
| 21 |
-
"apache_access_log": {
|
| 22 |
"sep": " ",
|
| 23 |
"fields": [
|
| 24 |
{"name": "datetime", "pos": slice(0, 5), "type": "datetime"},
|
|
@@ -26,18 +7,7 @@ log_definitions = {
|
|
| 26 |
{"name": "message", "pos": slice(6, None), "type": str},
|
| 27 |
],
|
| 28 |
},
|
| 29 |
-
"
|
| 30 |
-
"sep": " ",
|
| 31 |
-
"fields": [
|
| 32 |
-
{"name": "month", "pos": 0, "type": str},
|
| 33 |
-
{"name": "day", "pos": 1, "type": int},
|
| 34 |
-
{"name": "time", "pos": 2, "type": "datetime"},
|
| 35 |
-
{"name": "host", "pos": 3, "type": str},
|
| 36 |
-
{"name": "kernel", "pos": 4, "type": str},
|
| 37 |
-
{"name": "message", "pos": slice(5, None), "type": str},
|
| 38 |
-
],
|
| 39 |
-
},
|
| 40 |
-
"auth_log": {
|
| 41 |
"sep": " ",
|
| 42 |
"fields": [
|
| 43 |
{"name": "month", "pos": 0, "type": str},
|
|
@@ -48,7 +18,7 @@ log_definitions = {
|
|
| 48 |
{"name": "message", "pos": slice(5, None), "type": str},
|
| 49 |
],
|
| 50 |
},
|
| 51 |
-
"
|
| 52 |
"sep": " ",
|
| 53 |
"fields": [
|
| 54 |
{"name": "date", "pos": 0, "type": "datetime"},
|
|
@@ -58,7 +28,18 @@ log_definitions = {
|
|
| 58 |
{"name": "record_type", "pos": 4, "type": str},
|
| 59 |
],
|
| 60 |
},
|
| 61 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
"sep": " ",
|
| 63 |
"fields": [
|
| 64 |
{"name": "datetime", "pos": slice(0, 3), "type": "datetime"},
|
|
@@ -68,7 +49,7 @@ log_definitions = {
|
|
| 68 |
{"name": "Content", "pos": slice(6, None), "type": str},
|
| 69 |
],
|
| 70 |
},
|
| 71 |
-
"
|
| 72 |
"sep": " ",
|
| 73 |
"fields": [
|
| 74 |
{"name": "datetime", "pos": slice(0, 3), "type": "datetime"},
|
|
@@ -78,4 +59,23 @@ log_definitions = {
|
|
| 78 |
{"name": "Content", "pos": slice(6, None), "type": str},
|
| 79 |
],
|
| 80 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
}
|
|
|
|
| 1 |
log_definitions = {
|
| 2 |
+
"apache_access": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"sep": " ",
|
| 4 |
"fields": [
|
| 5 |
{"name": "datetime", "pos": slice(0, 5), "type": "datetime"},
|
|
|
|
| 7 |
{"name": "message", "pos": slice(6, None), "type": str},
|
| 8 |
],
|
| 9 |
},
|
| 10 |
+
"auth": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"sep": " ",
|
| 12 |
"fields": [
|
| 13 |
{"name": "month", "pos": 0, "type": str},
|
|
|
|
| 18 |
{"name": "message", "pos": slice(5, None), "type": str},
|
| 19 |
],
|
| 20 |
},
|
| 21 |
+
"dns": {
|
| 22 |
"sep": " ",
|
| 23 |
"fields": [
|
| 24 |
{"name": "date", "pos": 0, "type": "datetime"},
|
|
|
|
| 28 |
{"name": "record_type", "pos": 4, "type": str},
|
| 29 |
],
|
| 30 |
},
|
| 31 |
+
"firewall": {
|
| 32 |
+
"sep": " ",
|
| 33 |
+
"fields": [
|
| 34 |
+
{"name": "month", "pos": 0, "type": str},
|
| 35 |
+
{"name": "day", "pos": 1, "type": int},
|
| 36 |
+
{"name": "time", "pos": 2, "type": "datetime"},
|
| 37 |
+
{"name": "host", "pos": 3, "type": str},
|
| 38 |
+
{"name": "kernel", "pos": 4, "type": str},
|
| 39 |
+
{"name": "message", "pos": slice(5, None), "type": str},
|
| 40 |
+
],
|
| 41 |
+
},
|
| 42 |
+
"linux": {
|
| 43 |
"sep": " ",
|
| 44 |
"fields": [
|
| 45 |
{"name": "datetime", "pos": slice(0, 3), "type": "datetime"},
|
|
|
|
| 49 |
{"name": "Content", "pos": slice(6, None), "type": str},
|
| 50 |
],
|
| 51 |
},
|
| 52 |
+
"ssh": {
|
| 53 |
"sep": " ",
|
| 54 |
"fields": [
|
| 55 |
{"name": "datetime", "pos": slice(0, 3), "type": "datetime"},
|
|
|
|
| 59 |
{"name": "Content", "pos": slice(6, None), "type": str},
|
| 60 |
],
|
| 61 |
},
|
| 62 |
+
"xferlog": {
|
| 63 |
+
"sep": " ",
|
| 64 |
+
"fields": [
|
| 65 |
+
{"name": "current_time", "pos": slice(0, 5), "type": "datetime"},
|
| 66 |
+
{"name": "transfer_time", "pos": 5, "type": int},
|
| 67 |
+
{"name": "remote_host", "pos": 6, "type": str},
|
| 68 |
+
{"name": "file_size", "pos": 7, "type": int},
|
| 69 |
+
{"name": "filename", "pos": 8, "type": str},
|
| 70 |
+
{"name": "transfer_type", "pos": 9, "type": str},
|
| 71 |
+
{"name": "special_flag", "pos": 10, "type": str},
|
| 72 |
+
{"name": "direction", "pos": 11, "type": "direction"},
|
| 73 |
+
{"name": "access_mode", "pos": 12, "type": str},
|
| 74 |
+
{"name": "username", "pos": 13, "type": str},
|
| 75 |
+
{"name": "service_name", "pos": 14, "type": str},
|
| 76 |
+
{"name": "auth_method", "pos": 15, "type": int},
|
| 77 |
+
{"name": "auth_user_id", "pos": 16, "type": str},
|
| 78 |
+
{"name": "status", "pos": 17, "type": str},
|
| 79 |
+
],
|
| 80 |
+
},
|
| 81 |
}
|
utils/log2pandas.py
CHANGED
|
@@ -84,45 +84,6 @@ class LogParser:
|
|
| 84 |
entry[field["name"]] = value
|
| 85 |
|
| 86 |
return entry
|
| 87 |
-
"""Parse une ligne du fichier log en utilisant la définition fournie."""
|
| 88 |
-
tokens = line.strip().split()
|
| 89 |
-
# On ignore la ligne si elle ne contient pas assez de tokens
|
| 90 |
-
if len(tokens) < len(self.log_definition["fields"]):
|
| 91 |
-
return None
|
| 92 |
-
|
| 93 |
-
entry = {}
|
| 94 |
-
for field in self.log_definition["fields"]:
|
| 95 |
-
pos = field["pos"]
|
| 96 |
-
|
| 97 |
-
# Extraction de la valeur selon la position indiquée
|
| 98 |
-
if isinstance(pos, slice):
|
| 99 |
-
value = " ".join(tokens[pos])
|
| 100 |
-
else:
|
| 101 |
-
try:
|
| 102 |
-
value = tokens[pos]
|
| 103 |
-
except IndexError:
|
| 104 |
-
value = None
|
| 105 |
-
|
| 106 |
-
# Conversion du type
|
| 107 |
-
if "type" in field:
|
| 108 |
-
typ = field["type"]
|
| 109 |
-
if typ == "datetime":
|
| 110 |
-
try:
|
| 111 |
-
# Format typique utilisé dans nos logs
|
| 112 |
-
value = datetime.strptime(value, "%a %b %d %H:%M:%S %Y")
|
| 113 |
-
except Exception:
|
| 114 |
-
value = None
|
| 115 |
-
elif typ == "direction":
|
| 116 |
-
value = "download" if value == "o" else "upload"
|
| 117 |
-
else:
|
| 118 |
-
try:
|
| 119 |
-
value = typ(value)
|
| 120 |
-
except Exception:
|
| 121 |
-
pass
|
| 122 |
-
|
| 123 |
-
entry[field["name"]] = value
|
| 124 |
-
|
| 125 |
-
return entry
|
| 126 |
|
| 127 |
def parse_file(self):
|
| 128 |
"""Parcourt tout le fichier log et renvoie un DataFrame pandas contenant les entrées parse."""
|
|
|
|
| 84 |
entry[field["name"]] = value
|
| 85 |
|
| 86 |
return entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
def parse_file(self):
|
| 89 |
"""Parcourt tout le fichier log et renvoie un DataFrame pandas contenant les entrées parse."""
|