danielostrow commited on
Commit
4abea42
·
verified ·
1 Parent(s): beca259

Add parsers for syslog, Windows firewall, CSV, and Graylog formats

Browse files
Files changed (1) hide show
  1. c2sentinel.py +127 -26
c2sentinel.py CHANGED
@@ -1074,30 +1074,108 @@ class LogParser:
1074
 
1075
  @staticmethod
1076
  def parse_syslog(log_line: str) -> Optional[Dict]:
1077
- """Parse common syslog/netflow formats."""
1078
- patterns = [
1079
- r'(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}).*?(\d+\.\d+\.\d+\.\d+):(\d+)\s*->\s*(\d+\.\d+\.\d+\.\d+):(\d+)',
1080
- r'src=(\d+\.\d+\.\d+\.\d+).*?dst=(\d+\.\d+\.\d+\.\d+).*?sport=(\d+).*?dport=(\d+)',
1081
- ]
1082
-
1083
- for pattern in patterns:
1084
- match = re.search(pattern, log_line)
1085
- if match:
1086
- groups = match.groups()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  try:
1088
- if len(groups) == 5:
1089
- return {
1090
- 'timestamp': groups[0],
1091
- 'src_ip': groups[1],
1092
- 'src_port': int(groups[2]),
1093
- 'dst_ip': groups[3],
1094
- 'dst_port': int(groups[4]),
1095
- 'protocol': 'tcp',
1096
- 'bytes_sent': 0,
1097
- 'bytes_recv': 0
1098
- }
1099
  except:
1100
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1101
  return None
1102
 
1103
  @staticmethod
@@ -1719,39 +1797,62 @@ class C2Sentinel:
1719
  threshold: float = 0.5
1720
  ) -> List[Dict]:
1721
  """Analyze raw log lines for C2 activity."""
 
1722
  connections = []
1723
 
1724
- # First try to parse as a complete JSON array
1725
  full_content = ''.join(log_lines)
1726
  try:
1727
  data = json.loads(full_content)
 
 
 
 
 
1728
  if isinstance(data, list):
1729
  for item in data:
1730
  if isinstance(item, dict):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1731
  conn = {
1732
- 'timestamp': item.get('timestamp', item.get('@timestamp', 0)),
1733
  'src_ip': item.get('src_ip', item.get('source_ip', '')),
1734
  'dst_ip': item.get('dst_ip', item.get('dest_ip', '')),
1735
  'src_port': int(item.get('src_port', item.get('source_port', 0))),
1736
  'dst_port': int(item.get('dst_port', item.get('dest_port', 0))),
1737
  'protocol': item.get('protocol', 'tcp'),
1738
- 'bytes_sent': int(item.get('bytes_sent', item.get('bytes_out', 0))),
1739
- 'bytes_recv': int(item.get('bytes_recv', item.get('bytes_in', 0))),
1740
  'duration': float(item.get('duration', 0))
1741
  }
1742
  if conn.get('dst_ip'):
1743
  connections.append(conn)
1744
- except (json.JSONDecodeError, TypeError):
1745
  pass
1746
 
1747
  # Fall back to line-by-line parsing
1748
  if not connections:
 
1749
  for line in log_lines:
1750
  conn = self.log_parser.parse_json(line)
1751
  if not conn:
1752
  conn = self.log_parser.parse_zeek_conn(line)
1753
  if not conn:
1754
  conn = self.log_parser.parse_syslog(line)
 
 
1755
  if conn:
1756
  connections.append(conn)
1757
 
 
1074
 
1075
  @staticmethod
1076
  def parse_syslog(log_line: str) -> Optional[Dict]:
1077
+ """Parse common syslog/firewall formats."""
1078
+ from datetime import datetime
1079
+
1080
+ # Linux iptables format: SRC=x.x.x.x DST=x.x.x.x SPT=xxx DPT=xxx LEN=xxx
1081
+ iptables_match = re.search(
1082
+ r'(\w{3}\s+\d+\s+\d+:\d+:\d+).*?SRC=(\d+\.\d+\.\d+\.\d+).*?DST=(\d+\.\d+\.\d+\.\d+).*?SPT=(\d+).*?DPT=(\d+)(?:.*?LEN=(\d+))?',
1083
+ log_line, re.IGNORECASE
1084
+ )
1085
+ if iptables_match:
1086
+ try:
1087
+ ts_str = iptables_match.group(1)
1088
+ # Parse timestamp like "Jan 18 10:00:00"
1089
+ dt = datetime.strptime(f"2026 {ts_str}", "%Y %b %d %H:%M:%S")
1090
+ return {
1091
+ 'timestamp': dt.timestamp(),
1092
+ 'src_ip': iptables_match.group(2),
1093
+ 'dst_ip': iptables_match.group(3),
1094
+ 'src_port': int(iptables_match.group(4)),
1095
+ 'dst_port': int(iptables_match.group(5)),
1096
+ 'protocol': 'tcp',
1097
+ 'bytes_sent': int(iptables_match.group(6) or 0),
1098
+ 'bytes_recv': 0
1099
+ }
1100
+ except:
1101
+ pass
1102
+
1103
+ # Windows Firewall format: TimeGenerated=xxx SourceAddress=xxx DestAddress=xxx DestPort=xxx
1104
+ win_match = re.search(
1105
+ r'TimeGenerated=(\S+).*?(?:SourceAddress|SourceIP)=(\d+\.\d+\.\d+\.\d+).*?(?:DestAddress|DestinationIP)=(\d+\.\d+\.\d+\.\d+).*?(?:DestPort|DestinationPort)=(\d+)',
1106
+ log_line, re.IGNORECASE
1107
+ )
1108
+ if win_match:
1109
+ try:
1110
+ ts_str = win_match.group(1)
1111
+ dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
1112
+ return {
1113
+ 'timestamp': dt.timestamp(),
1114
+ 'src_ip': win_match.group(2),
1115
+ 'dst_ip': win_match.group(3),
1116
+ 'src_port': 0,
1117
+ 'dst_port': int(win_match.group(4)),
1118
+ 'protocol': 'tcp',
1119
+ 'bytes_sent': 0,
1120
+ 'bytes_recv': 0
1121
+ }
1122
+ except:
1123
+ pass
1124
+
1125
+ # Generic key=value format
1126
+ kv_match = re.findall(r'(\w+)=(\S+)', log_line)
1127
+ if kv_match:
1128
+ kv = dict(kv_match)
1129
+ dst_ip = kv.get('dst') or kv.get('DST') or kv.get('DestAddress') or kv.get('dest_ip')
1130
+ dst_port = kv.get('dport') or kv.get('DPT') or kv.get('DestPort') or kv.get('dest_port')
1131
+ if dst_ip and dst_port:
1132
  try:
1133
+ return {
1134
+ 'timestamp': 0,
1135
+ 'src_ip': kv.get('src') or kv.get('SRC') or kv.get('SourceAddress') or '',
1136
+ 'dst_ip': dst_ip,
1137
+ 'src_port': int(kv.get('sport') or kv.get('SPT') or kv.get('SourcePort') or 0),
1138
+ 'dst_port': int(dst_port),
1139
+ 'protocol': kv.get('proto') or kv.get('Protocol') or 'tcp',
1140
+ 'bytes_sent': int(kv.get('bytes') or kv.get('LEN') or 0),
1141
+ 'bytes_recv': 0
1142
+ }
 
1143
  except:
1144
  pass
1145
+
1146
+ return None
1147
+
1148
+ @staticmethod
1149
+ def parse_csv(log_line: str, headers: List[str] = None) -> Optional[Dict]:
1150
+ """Parse CSV log format."""
1151
+ from datetime import datetime
1152
+
1153
+ if not headers or log_line.startswith('timestamp'):
1154
+ return None # Skip header row
1155
+
1156
+ try:
1157
+ parts = log_line.strip().split(',')
1158
+ if len(parts) >= 5:
1159
+ # Try to map by position if we have standard columns
1160
+ ts_str = parts[0].strip()
1161
+ try:
1162
+ dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
1163
+ ts = dt.timestamp()
1164
+ except:
1165
+ ts = 0
1166
+
1167
+ return {
1168
+ 'timestamp': ts,
1169
+ 'src_ip': parts[1].strip() if len(parts) > 1 else '',
1170
+ 'src_port': int(parts[2].strip()) if len(parts) > 2 and parts[2].strip().isdigit() else 0,
1171
+ 'dst_ip': parts[3].strip() if len(parts) > 3 else '',
1172
+ 'dst_port': int(parts[4].strip()) if len(parts) > 4 and parts[4].strip().isdigit() else 0,
1173
+ 'protocol': parts[5].strip() if len(parts) > 5 else 'tcp',
1174
+ 'bytes_sent': int(parts[6].strip()) if len(parts) > 6 and parts[6].strip().isdigit() else 0,
1175
+ 'bytes_recv': int(parts[7].strip()) if len(parts) > 7 and parts[7].strip().isdigit() else 0
1176
+ }
1177
+ except:
1178
+ pass
1179
  return None
1180
 
1181
  @staticmethod
 
1797
  threshold: float = 0.5
1798
  ) -> List[Dict]:
1799
  """Analyze raw log lines for C2 activity."""
1800
+ from datetime import datetime
1801
  connections = []
1802
 
1803
+ # First try to parse as complete JSON (array or object with messages)
1804
  full_content = ''.join(log_lines)
1805
  try:
1806
  data = json.loads(full_content)
1807
+
1808
+ # Handle Graylog-style nested JSON: {"messages": [...]}
1809
+ if isinstance(data, dict) and 'messages' in data:
1810
+ data = data['messages']
1811
+
1812
  if isinstance(data, list):
1813
  for item in data:
1814
  if isinstance(item, dict):
1815
+ # Parse timestamp
1816
+ ts = item.get('timestamp', item.get('@timestamp', 0))
1817
+ if isinstance(ts, str):
1818
+ try:
1819
+ dt = datetime.fromisoformat(ts.replace('Z', '+00:00'))
1820
+ ts = dt.timestamp()
1821
+ except:
1822
+ ts = 0
1823
+
1824
+ # Handle 'bytes' field (combined) vs separate sent/recv
1825
+ bytes_val = int(item.get('bytes', 0))
1826
+ bytes_sent = int(item.get('bytes_sent', item.get('bytes_out', bytes_val)))
1827
+ bytes_recv = int(item.get('bytes_recv', item.get('bytes_in', 0)))
1828
+
1829
  conn = {
1830
+ 'timestamp': ts,
1831
  'src_ip': item.get('src_ip', item.get('source_ip', '')),
1832
  'dst_ip': item.get('dst_ip', item.get('dest_ip', '')),
1833
  'src_port': int(item.get('src_port', item.get('source_port', 0))),
1834
  'dst_port': int(item.get('dst_port', item.get('dest_port', 0))),
1835
  'protocol': item.get('protocol', 'tcp'),
1836
+ 'bytes_sent': bytes_sent,
1837
+ 'bytes_recv': bytes_recv,
1838
  'duration': float(item.get('duration', 0))
1839
  }
1840
  if conn.get('dst_ip'):
1841
  connections.append(conn)
1842
+ except (json.JSONDecodeError, TypeError, ValueError):
1843
  pass
1844
 
1845
  # Fall back to line-by-line parsing
1846
  if not connections:
1847
+ has_csv_header = log_lines and log_lines[0].strip().startswith('timestamp,')
1848
  for line in log_lines:
1849
  conn = self.log_parser.parse_json(line)
1850
  if not conn:
1851
  conn = self.log_parser.parse_zeek_conn(line)
1852
  if not conn:
1853
  conn = self.log_parser.parse_syslog(line)
1854
+ if not conn and has_csv_header:
1855
+ conn = self.log_parser.parse_csv(line, headers=['timestamp'])
1856
  if conn:
1857
  connections.append(conn)
1858