berangerthomas commited on
Commit
d3d8436
·
1 Parent(s): 2a74583

Add special caracters workaround

Browse files
Files changed (2) hide show
  1. config/log_definitions.py +3 -7
  2. utils/log2pandas.py +45 -0
config/log_definitions.py CHANGED
@@ -21,13 +21,9 @@ log_definitions = {
21
  "apache_access_log": {
22
  "sep": " ",
23
  "fields": [
24
- {"name": "remote_host", "pos": 0, "type": str},
25
- {"name": "logname", "pos": 1, "type": str},
26
- {"name": "user", "pos": 2, "type": str},
27
- {"name": "datetime", "pos": slice(3, 5), "type": "datetime"},
28
- {"name": "request", "pos": 5, "type": str},
29
- {"name": "status", "pos": 6, "type": int},
30
- {"name": "bytes_sent", "pos": 7, "type": int},
31
  ],
32
  },
33
  "firewall_log": {
 
21
  "apache_access_log": {
22
  "sep": " ",
23
  "fields": [
24
+ {"name": "datetime", "pos": slice(0, 5), "type": "datetime"},
25
+ {"name": "status", "pos": 5, "type": int},
26
+ {"name": "message", "pos": slice(6, None), "type": str},
 
 
 
 
27
  ],
28
  },
29
  "firewall_log": {
utils/log2pandas.py CHANGED
@@ -1,5 +1,7 @@
1
  from datetime import datetime
 
2
  import pandas as pd
 
3
  from config.log_definitions import log_definitions
4
 
5
 
@@ -21,6 +23,49 @@ class LogParser:
21
  if len(tokens) < len(self.log_definition["fields"]):
22
  return None
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  entry = {}
25
  for field in self.log_definition["fields"]:
26
  pos = field["pos"]
 
1
  from datetime import datetime
2
+
3
  import pandas as pd
4
+
5
  from config.log_definitions import log_definitions
6
 
7
 
 
23
  if len(tokens) < len(self.log_definition["fields"]):
24
  return None
25
 
26
+ entry = {}
27
+ for field in self.log_definition["fields"]:
28
+ pos = field["pos"]
29
+
30
+ # Extraction de la valeur selon la position indiquée
31
+ if isinstance(pos, slice):
32
+ value = " ".join(tokens[pos])
33
+ else:
34
+ try:
35
+ value = tokens[pos]
36
+ except IndexError:
37
+ value = None
38
+
39
+ # Nettoyage des caractères qui entourent la valeur (crochets, parenthèses, etc.)
40
+ if value:
41
+ value = value.strip("[](){}<>")
42
+
43
+ # Conversion du type
44
+ if "type" in field:
45
+ typ = field["type"]
46
+ if typ == "datetime":
47
+ try:
48
+ # Format typique utilisé dans nos logs
49
+ value = datetime.strptime(value, "%a %b %d %H:%M:%S %Y")
50
+ except Exception:
51
+ value = None
52
+ elif typ == "direction":
53
+ value = "download" if value == "o" else "upload"
54
+ else:
55
+ try:
56
+ value = typ(value)
57
+ except Exception:
58
+ pass
59
+
60
+ entry[field["name"]] = value
61
+
62
+ return entry
63
+ """Parse une ligne du fichier log en utilisant la définition fournie."""
64
+ tokens = line.strip().split()
65
+ # On ignore la ligne si elle ne contient pas assez de tokens
66
+ if len(tokens) < len(self.log_definition["fields"]):
67
+ return None
68
+
69
  entry = {}
70
  for field in self.log_definition["fields"]:
71
  pos = field["pos"]