Spaces:
Sleeping
Sleeping
Commit ·
d3d8436
1
Parent(s): 2a74583
Add special caracters workaround
Browse files- config/log_definitions.py +3 -7
- utils/log2pandas.py +45 -0
config/log_definitions.py
CHANGED
|
@@ -21,13 +21,9 @@ log_definitions = {
|
|
| 21 |
"apache_access_log": {
|
| 22 |
"sep": " ",
|
| 23 |
"fields": [
|
| 24 |
-
{"name": "
|
| 25 |
-
{"name": "
|
| 26 |
-
{"name": "
|
| 27 |
-
{"name": "datetime", "pos": slice(3, 5), "type": "datetime"},
|
| 28 |
-
{"name": "request", "pos": 5, "type": str},
|
| 29 |
-
{"name": "status", "pos": 6, "type": int},
|
| 30 |
-
{"name": "bytes_sent", "pos": 7, "type": int},
|
| 31 |
],
|
| 32 |
},
|
| 33 |
"firewall_log": {
|
|
|
|
| 21 |
"apache_access_log": {
|
| 22 |
"sep": " ",
|
| 23 |
"fields": [
|
| 24 |
+
{"name": "datetime", "pos": slice(0, 5), "type": "datetime"},
|
| 25 |
+
{"name": "status", "pos": 5, "type": int},
|
| 26 |
+
{"name": "message", "pos": slice(6, None), "type": str},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
],
|
| 28 |
},
|
| 29 |
"firewall_log": {
|
utils/log2pandas.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
from datetime import datetime
|
|
|
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
from config.log_definitions import log_definitions
|
| 4 |
|
| 5 |
|
|
@@ -21,6 +23,49 @@ class LogParser:
|
|
| 21 |
if len(tokens) < len(self.log_definition["fields"]):
|
| 22 |
return None
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
entry = {}
|
| 25 |
for field in self.log_definition["fields"]:
|
| 26 |
pos = field["pos"]
|
|
|
|
| 1 |
from datetime import datetime
|
| 2 |
+
|
| 3 |
import pandas as pd
|
| 4 |
+
|
| 5 |
from config.log_definitions import log_definitions
|
| 6 |
|
| 7 |
|
|
|
|
| 23 |
if len(tokens) < len(self.log_definition["fields"]):
|
| 24 |
return None
|
| 25 |
|
| 26 |
+
entry = {}
|
| 27 |
+
for field in self.log_definition["fields"]:
|
| 28 |
+
pos = field["pos"]
|
| 29 |
+
|
| 30 |
+
# Extraction de la valeur selon la position indiquée
|
| 31 |
+
if isinstance(pos, slice):
|
| 32 |
+
value = " ".join(tokens[pos])
|
| 33 |
+
else:
|
| 34 |
+
try:
|
| 35 |
+
value = tokens[pos]
|
| 36 |
+
except IndexError:
|
| 37 |
+
value = None
|
| 38 |
+
|
| 39 |
+
# Nettoyage des caractères qui entourent la valeur (crochets, parenthèses, etc.)
|
| 40 |
+
if value:
|
| 41 |
+
value = value.strip("[](){}<>")
|
| 42 |
+
|
| 43 |
+
# Conversion du type
|
| 44 |
+
if "type" in field:
|
| 45 |
+
typ = field["type"]
|
| 46 |
+
if typ == "datetime":
|
| 47 |
+
try:
|
| 48 |
+
# Format typique utilisé dans nos logs
|
| 49 |
+
value = datetime.strptime(value, "%a %b %d %H:%M:%S %Y")
|
| 50 |
+
except Exception:
|
| 51 |
+
value = None
|
| 52 |
+
elif typ == "direction":
|
| 53 |
+
value = "download" if value == "o" else "upload"
|
| 54 |
+
else:
|
| 55 |
+
try:
|
| 56 |
+
value = typ(value)
|
| 57 |
+
except Exception:
|
| 58 |
+
pass
|
| 59 |
+
|
| 60 |
+
entry[field["name"]] = value
|
| 61 |
+
|
| 62 |
+
return entry
|
| 63 |
+
"""Parse une ligne du fichier log en utilisant la définition fournie."""
|
| 64 |
+
tokens = line.strip().split()
|
| 65 |
+
# On ignore la ligne si elle ne contient pas assez de tokens
|
| 66 |
+
if len(tokens) < len(self.log_definition["fields"]):
|
| 67 |
+
return None
|
| 68 |
+
|
| 69 |
entry = {}
|
| 70 |
for field in self.log_definition["fields"]:
|
| 71 |
pos = field["pos"]
|