Add parsers for syslog, Windows firewall, CSV, and Graylog formats
Browse files- c2sentinel.py +127 -26
c2sentinel.py
CHANGED
|
@@ -1074,30 +1074,108 @@ class LogParser:
|
|
| 1074 |
|
| 1075 |
@staticmethod
|
| 1076 |
def parse_syslog(log_line: str) -> Optional[Dict]:
|
| 1077 |
-
"""Parse common syslog/
|
| 1078 |
-
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1087 |
try:
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
|
| 1094 |
-
|
| 1095 |
-
|
| 1096 |
-
|
| 1097 |
-
|
| 1098 |
-
}
|
| 1099 |
except:
|
| 1100 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1101 |
return None
|
| 1102 |
|
| 1103 |
@staticmethod
|
|
@@ -1719,39 +1797,62 @@ class C2Sentinel:
|
|
| 1719 |
threshold: float = 0.5
|
| 1720 |
) -> List[Dict]:
|
| 1721 |
"""Analyze raw log lines for C2 activity."""
|
|
|
|
| 1722 |
connections = []
|
| 1723 |
|
| 1724 |
-
# First try to parse as
|
| 1725 |
full_content = ''.join(log_lines)
|
| 1726 |
try:
|
| 1727 |
data = json.loads(full_content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1728 |
if isinstance(data, list):
|
| 1729 |
for item in data:
|
| 1730 |
if isinstance(item, dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1731 |
conn = {
|
| 1732 |
-
'timestamp':
|
| 1733 |
'src_ip': item.get('src_ip', item.get('source_ip', '')),
|
| 1734 |
'dst_ip': item.get('dst_ip', item.get('dest_ip', '')),
|
| 1735 |
'src_port': int(item.get('src_port', item.get('source_port', 0))),
|
| 1736 |
'dst_port': int(item.get('dst_port', item.get('dest_port', 0))),
|
| 1737 |
'protocol': item.get('protocol', 'tcp'),
|
| 1738 |
-
'bytes_sent':
|
| 1739 |
-
'bytes_recv':
|
| 1740 |
'duration': float(item.get('duration', 0))
|
| 1741 |
}
|
| 1742 |
if conn.get('dst_ip'):
|
| 1743 |
connections.append(conn)
|
| 1744 |
-
except (json.JSONDecodeError, TypeError):
|
| 1745 |
pass
|
| 1746 |
|
| 1747 |
# Fall back to line-by-line parsing
|
| 1748 |
if not connections:
|
|
|
|
| 1749 |
for line in log_lines:
|
| 1750 |
conn = self.log_parser.parse_json(line)
|
| 1751 |
if not conn:
|
| 1752 |
conn = self.log_parser.parse_zeek_conn(line)
|
| 1753 |
if not conn:
|
| 1754 |
conn = self.log_parser.parse_syslog(line)
|
|
|
|
|
|
|
| 1755 |
if conn:
|
| 1756 |
connections.append(conn)
|
| 1757 |
|
|
|
|
| 1074 |
|
| 1075 |
@staticmethod
|
| 1076 |
def parse_syslog(log_line: str) -> Optional[Dict]:
|
| 1077 |
+
"""Parse common syslog/firewall formats."""
|
| 1078 |
+
from datetime import datetime
|
| 1079 |
+
|
| 1080 |
+
# Linux iptables format: SRC=x.x.x.x DST=x.x.x.x SPT=xxx DPT=xxx LEN=xxx
|
| 1081 |
+
iptables_match = re.search(
|
| 1082 |
+
r'(\w{3}\s+\d+\s+\d+:\d+:\d+).*?SRC=(\d+\.\d+\.\d+\.\d+).*?DST=(\d+\.\d+\.\d+\.\d+).*?SPT=(\d+).*?DPT=(\d+)(?:.*?LEN=(\d+))?',
|
| 1083 |
+
log_line, re.IGNORECASE
|
| 1084 |
+
)
|
| 1085 |
+
if iptables_match:
|
| 1086 |
+
try:
|
| 1087 |
+
ts_str = iptables_match.group(1)
|
| 1088 |
+
# Parse timestamp like "Jan 18 10:00:00"
|
| 1089 |
+
dt = datetime.strptime(f"2026 {ts_str}", "%Y %b %d %H:%M:%S")
|
| 1090 |
+
return {
|
| 1091 |
+
'timestamp': dt.timestamp(),
|
| 1092 |
+
'src_ip': iptables_match.group(2),
|
| 1093 |
+
'dst_ip': iptables_match.group(3),
|
| 1094 |
+
'src_port': int(iptables_match.group(4)),
|
| 1095 |
+
'dst_port': int(iptables_match.group(5)),
|
| 1096 |
+
'protocol': 'tcp',
|
| 1097 |
+
'bytes_sent': int(iptables_match.group(6) or 0),
|
| 1098 |
+
'bytes_recv': 0
|
| 1099 |
+
}
|
| 1100 |
+
except:
|
| 1101 |
+
pass
|
| 1102 |
+
|
| 1103 |
+
# Windows Firewall format: TimeGenerated=xxx SourceAddress=xxx DestAddress=xxx DestPort=xxx
|
| 1104 |
+
win_match = re.search(
|
| 1105 |
+
r'TimeGenerated=(\S+).*?(?:SourceAddress|SourceIP)=(\d+\.\d+\.\d+\.\d+).*?(?:DestAddress|DestinationIP)=(\d+\.\d+\.\d+\.\d+).*?(?:DestPort|DestinationPort)=(\d+)',
|
| 1106 |
+
log_line, re.IGNORECASE
|
| 1107 |
+
)
|
| 1108 |
+
if win_match:
|
| 1109 |
+
try:
|
| 1110 |
+
ts_str = win_match.group(1)
|
| 1111 |
+
dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
|
| 1112 |
+
return {
|
| 1113 |
+
'timestamp': dt.timestamp(),
|
| 1114 |
+
'src_ip': win_match.group(2),
|
| 1115 |
+
'dst_ip': win_match.group(3),
|
| 1116 |
+
'src_port': 0,
|
| 1117 |
+
'dst_port': int(win_match.group(4)),
|
| 1118 |
+
'protocol': 'tcp',
|
| 1119 |
+
'bytes_sent': 0,
|
| 1120 |
+
'bytes_recv': 0
|
| 1121 |
+
}
|
| 1122 |
+
except:
|
| 1123 |
+
pass
|
| 1124 |
+
|
| 1125 |
+
# Generic key=value format
|
| 1126 |
+
kv_match = re.findall(r'(\w+)=(\S+)', log_line)
|
| 1127 |
+
if kv_match:
|
| 1128 |
+
kv = dict(kv_match)
|
| 1129 |
+
dst_ip = kv.get('dst') or kv.get('DST') or kv.get('DestAddress') or kv.get('dest_ip')
|
| 1130 |
+
dst_port = kv.get('dport') or kv.get('DPT') or kv.get('DestPort') or kv.get('dest_port')
|
| 1131 |
+
if dst_ip and dst_port:
|
| 1132 |
try:
|
| 1133 |
+
return {
|
| 1134 |
+
'timestamp': 0,
|
| 1135 |
+
'src_ip': kv.get('src') or kv.get('SRC') or kv.get('SourceAddress') or '',
|
| 1136 |
+
'dst_ip': dst_ip,
|
| 1137 |
+
'src_port': int(kv.get('sport') or kv.get('SPT') or kv.get('SourcePort') or 0),
|
| 1138 |
+
'dst_port': int(dst_port),
|
| 1139 |
+
'protocol': kv.get('proto') or kv.get('Protocol') or 'tcp',
|
| 1140 |
+
'bytes_sent': int(kv.get('bytes') or kv.get('LEN') or 0),
|
| 1141 |
+
'bytes_recv': 0
|
| 1142 |
+
}
|
|
|
|
| 1143 |
except:
|
| 1144 |
pass
|
| 1145 |
+
|
| 1146 |
+
return None
|
| 1147 |
+
|
| 1148 |
+
@staticmethod
|
| 1149 |
+
def parse_csv(log_line: str, headers: List[str] = None) -> Optional[Dict]:
|
| 1150 |
+
"""Parse CSV log format."""
|
| 1151 |
+
from datetime import datetime
|
| 1152 |
+
|
| 1153 |
+
if not headers or log_line.startswith('timestamp'):
|
| 1154 |
+
return None # Skip header row
|
| 1155 |
+
|
| 1156 |
+
try:
|
| 1157 |
+
parts = log_line.strip().split(',')
|
| 1158 |
+
if len(parts) >= 5:
|
| 1159 |
+
# Try to map by position if we have standard columns
|
| 1160 |
+
ts_str = parts[0].strip()
|
| 1161 |
+
try:
|
| 1162 |
+
dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
|
| 1163 |
+
ts = dt.timestamp()
|
| 1164 |
+
except:
|
| 1165 |
+
ts = 0
|
| 1166 |
+
|
| 1167 |
+
return {
|
| 1168 |
+
'timestamp': ts,
|
| 1169 |
+
'src_ip': parts[1].strip() if len(parts) > 1 else '',
|
| 1170 |
+
'src_port': int(parts[2].strip()) if len(parts) > 2 and parts[2].strip().isdigit() else 0,
|
| 1171 |
+
'dst_ip': parts[3].strip() if len(parts) > 3 else '',
|
| 1172 |
+
'dst_port': int(parts[4].strip()) if len(parts) > 4 and parts[4].strip().isdigit() else 0,
|
| 1173 |
+
'protocol': parts[5].strip() if len(parts) > 5 else 'tcp',
|
| 1174 |
+
'bytes_sent': int(parts[6].strip()) if len(parts) > 6 and parts[6].strip().isdigit() else 0,
|
| 1175 |
+
'bytes_recv': int(parts[7].strip()) if len(parts) > 7 and parts[7].strip().isdigit() else 0
|
| 1176 |
+
}
|
| 1177 |
+
except:
|
| 1178 |
+
pass
|
| 1179 |
return None
|
| 1180 |
|
| 1181 |
@staticmethod
|
|
|
|
| 1797 |
threshold: float = 0.5
|
| 1798 |
) -> List[Dict]:
|
| 1799 |
"""Analyze raw log lines for C2 activity."""
|
| 1800 |
+
from datetime import datetime
|
| 1801 |
connections = []
|
| 1802 |
|
| 1803 |
+
# First try to parse as complete JSON (array or object with messages)
|
| 1804 |
full_content = ''.join(log_lines)
|
| 1805 |
try:
|
| 1806 |
data = json.loads(full_content)
|
| 1807 |
+
|
| 1808 |
+
# Handle Graylog-style nested JSON: {"messages": [...]}
|
| 1809 |
+
if isinstance(data, dict) and 'messages' in data:
|
| 1810 |
+
data = data['messages']
|
| 1811 |
+
|
| 1812 |
if isinstance(data, list):
|
| 1813 |
for item in data:
|
| 1814 |
if isinstance(item, dict):
|
| 1815 |
+
# Parse timestamp
|
| 1816 |
+
ts = item.get('timestamp', item.get('@timestamp', 0))
|
| 1817 |
+
if isinstance(ts, str):
|
| 1818 |
+
try:
|
| 1819 |
+
dt = datetime.fromisoformat(ts.replace('Z', '+00:00'))
|
| 1820 |
+
ts = dt.timestamp()
|
| 1821 |
+
except:
|
| 1822 |
+
ts = 0
|
| 1823 |
+
|
| 1824 |
+
# Handle 'bytes' field (combined) vs separate sent/recv
|
| 1825 |
+
bytes_val = int(item.get('bytes', 0))
|
| 1826 |
+
bytes_sent = int(item.get('bytes_sent', item.get('bytes_out', bytes_val)))
|
| 1827 |
+
bytes_recv = int(item.get('bytes_recv', item.get('bytes_in', 0)))
|
| 1828 |
+
|
| 1829 |
conn = {
|
| 1830 |
+
'timestamp': ts,
|
| 1831 |
'src_ip': item.get('src_ip', item.get('source_ip', '')),
|
| 1832 |
'dst_ip': item.get('dst_ip', item.get('dest_ip', '')),
|
| 1833 |
'src_port': int(item.get('src_port', item.get('source_port', 0))),
|
| 1834 |
'dst_port': int(item.get('dst_port', item.get('dest_port', 0))),
|
| 1835 |
'protocol': item.get('protocol', 'tcp'),
|
| 1836 |
+
'bytes_sent': bytes_sent,
|
| 1837 |
+
'bytes_recv': bytes_recv,
|
| 1838 |
'duration': float(item.get('duration', 0))
|
| 1839 |
}
|
| 1840 |
if conn.get('dst_ip'):
|
| 1841 |
connections.append(conn)
|
| 1842 |
+
except (json.JSONDecodeError, TypeError, ValueError):
|
| 1843 |
pass
|
| 1844 |
|
| 1845 |
# Fall back to line-by-line parsing
|
| 1846 |
if not connections:
|
| 1847 |
+
has_csv_header = log_lines and log_lines[0].strip().startswith('timestamp,')
|
| 1848 |
for line in log_lines:
|
| 1849 |
conn = self.log_parser.parse_json(line)
|
| 1850 |
if not conn:
|
| 1851 |
conn = self.log_parser.parse_zeek_conn(line)
|
| 1852 |
if not conn:
|
| 1853 |
conn = self.log_parser.parse_syslog(line)
|
| 1854 |
+
if not conn and has_csv_header:
|
| 1855 |
+
conn = self.log_parser.parse_csv(line, headers=['timestamp'])
|
| 1856 |
if conn:
|
| 1857 |
connections.append(conn)
|
| 1858 |
|