danielostrow commited on
Commit
beca259
·
verified ·
1 Parent(s): 4ad28eb

Fix log parsing: JSON arrays, Zeek format, and beacon detection priority

Browse files
Files changed (1) hide show
  1. c2sentinel.py +95 -16
c2sentinel.py CHANGED
@@ -1051,14 +1051,18 @@ class LogParser:
1051
  def parse_zeek_conn(log_line: str) -> Optional[Dict]:
1052
  """Parse Zeek/Bro conn.log format."""
1053
  try:
 
 
 
1054
  parts = log_line.strip().split('\t')
1055
- if len(parts) >= 15:
 
1056
  return {
1057
  'timestamp': float(parts[0]),
1058
  'src_ip': parts[2],
1059
- 'src_port': int(parts[3]),
1060
  'dst_ip': parts[4],
1061
- 'dst_port': int(parts[5]),
1062
  'protocol': parts[6],
1063
  'duration': float(parts[8]) if parts[8] != '-' else 0,
1064
  'bytes_sent': int(parts[9]) if parts[9] != '-' else 0,
@@ -1518,6 +1522,7 @@ class C2Sentinel:
1518
  # PHASE 4: Behavioral refinement
1519
  # ================================================================
1520
 
 
1521
  dst_ips = set(conn.get('dst_ip', '') for conn in connections)
1522
  bytes_recv = [conn.get('bytes_recv', 0) for conn in connections]
1523
  bytes_sent = [conn.get('bytes_sent', 0) for conn in connections]
@@ -1567,15 +1572,63 @@ class C2Sentinel:
1567
  c2_prob = min(1.0, c2_prob * 1.5)
1568
  result.risk_factors.append("APT-style slow beacon pattern")
1569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1570
  # ================================================================
1571
- # PHASE 5: Apply legitimate pattern discount
1572
  # ================================================================
1573
 
1574
  if matched_pattern and pattern_confidence > 0.5:
1575
- # Reduce probability based on pattern match
1576
- discount = 1.0 - (pattern_confidence * 0.7) # Up to 70% reduction
1577
- c2_prob *= discount
1578
- result.mitigating_factors.append(f"Legitimate pattern match reduces probability by {(1-discount)*100:.0f}%")
 
 
 
 
 
 
 
 
 
 
 
1579
 
1580
  # ================================================================
1581
  # PHASE 6: Apply context inference (always check whitelist/blacklist)
@@ -1667,14 +1720,40 @@ class C2Sentinel:
1667
  ) -> List[Dict]:
1668
  """Analyze raw log lines for C2 activity."""
1669
  connections = []
1670
- for line in log_lines:
1671
- conn = self.log_parser.parse_json(line)
1672
- if not conn:
1673
- conn = self.log_parser.parse_zeek_conn(line)
1674
- if not conn:
1675
- conn = self.log_parser.parse_syslog(line)
1676
- if conn:
1677
- connections.append(conn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1678
 
1679
  if not connections:
1680
  return []
 
1051
  def parse_zeek_conn(log_line: str) -> Optional[Dict]:
1052
  """Parse Zeek/Bro conn.log format."""
1053
  try:
1054
+ # Skip header lines
1055
+ if log_line.startswith('#'):
1056
+ return None
1057
  parts = log_line.strip().split('\t')
1058
+ # Minimum fields: ts, uid, orig_h, orig_p, resp_h, resp_p, proto, service, duration, orig_bytes, resp_bytes
1059
+ if len(parts) >= 11:
1060
  return {
1061
  'timestamp': float(parts[0]),
1062
  'src_ip': parts[2],
1063
+ 'src_port': int(parts[3]) if parts[3] != '-' else 0,
1064
  'dst_ip': parts[4],
1065
+ 'dst_port': int(parts[5]) if parts[5] != '-' else 0,
1066
  'protocol': parts[6],
1067
  'duration': float(parts[8]) if parts[8] != '-' else 0,
1068
  'bytes_sent': int(parts[9]) if parts[9] != '-' else 0,
 
1522
  # PHASE 4: Behavioral refinement
1523
  # ================================================================
1524
 
1525
+ beacon_indicators = 0 # Initialize here so it's always defined
1526
  dst_ips = set(conn.get('dst_ip', '') for conn in connections)
1527
  bytes_recv = [conn.get('bytes_recv', 0) for conn in connections]
1528
  bytes_sent = [conn.get('bytes_sent', 0) for conn in connections]
 
1572
  c2_prob = min(1.0, c2_prob * 1.5)
1573
  result.risk_factors.append("APT-style slow beacon pattern")
1574
 
1575
+ # ============================================================
1576
+ # CRITICAL: Explicit beacon pattern override
1577
+ # When ALL classic beacon indicators are present, force detection
1578
+ # ============================================================
1579
+ beacon_indicators = 0
1580
+
1581
+ # Indicator 1: Very regular timing (CV < 0.15)
1582
+ if interval_cv < 0.15:
1583
+ beacon_indicators += 1
1584
+
1585
+ # Indicator 2: Very consistent sizes (both sent and recv CV < 0.15)
1586
+ if recv_cv < 0.15 and sent_cv < 0.15:
1587
+ beacon_indicators += 1
1588
+
1589
+ # Indicator 3: Small packet sizes (typical heartbeat)
1590
+ mean_sent = np.mean(bytes_sent) if bytes_sent else 0
1591
+ mean_recv = np.mean(bytes_recv) if bytes_recv else 0
1592
+ if mean_sent < 500 and mean_recv < 500:
1593
+ beacon_indicators += 1
1594
+
1595
+ # Indicator 4: Regular interval in beacon range (5s - 300s)
1596
+ if 5 <= mean_interval <= 300:
1597
+ beacon_indicators += 1
1598
+
1599
+ # Indicator 5: Sufficient sample size
1600
+ if len(connections) >= 8:
1601
+ beacon_indicators += 1
1602
+
1603
+ # If 4+ of 5 indicators present, this is almost certainly C2
1604
+ if beacon_indicators >= 4:
1605
+ c2_prob = max(c2_prob, 0.85) # Force high probability
1606
+ result.risk_factors.append(f"Classic C2 beacon pattern detected ({beacon_indicators}/5 indicators)")
1607
+ result.detection_method = DetectionMethod.BEHAVIORAL.value
1608
+ elif beacon_indicators >= 3:
1609
+ c2_prob = max(c2_prob, 0.65) # Likely C2
1610
+ result.risk_factors.append(f"Probable C2 beacon pattern ({beacon_indicators}/5 indicators)")
1611
+
1612
  # ================================================================
1613
+ # PHASE 5: Apply legitimate pattern discount (but respect strong beacon signals)
1614
  # ================================================================
1615
 
1616
  if matched_pattern and pattern_confidence > 0.5:
1617
+ # If strong beacon indicators present, skip or reduce the legitimate pattern discount
1618
+ # This prevents false negatives when C2 mimics legitimate patterns
1619
+ if beacon_indicators >= 4:
1620
+ # Strong C2 signal - don't discount
1621
+ result.mitigating_factors.append(f"Matches {matched_pattern.name} pattern but beacon indicators override")
1622
+ elif beacon_indicators >= 3:
1623
+ # Moderate C2 signal - apply reduced discount
1624
+ discount = 1.0 - (pattern_confidence * 0.3) # Max 30% reduction
1625
+ c2_prob *= discount
1626
+ result.mitigating_factors.append(f"Legitimate pattern match reduces probability by {(1-discount)*100:.0f}%")
1627
+ else:
1628
+ # Weak C2 signal - apply full discount
1629
+ discount = 1.0 - (pattern_confidence * 0.7) # Up to 70% reduction
1630
+ c2_prob *= discount
1631
+ result.mitigating_factors.append(f"Legitimate pattern match reduces probability by {(1-discount)*100:.0f}%")
1632
 
1633
  # ================================================================
1634
  # PHASE 6: Apply context inference (always check whitelist/blacklist)
 
1720
  ) -> List[Dict]:
1721
  """Analyze raw log lines for C2 activity."""
1722
  connections = []
1723
+
1724
+ # First try to parse as a complete JSON array
1725
+ full_content = ''.join(log_lines)
1726
+ try:
1727
+ data = json.loads(full_content)
1728
+ if isinstance(data, list):
1729
+ for item in data:
1730
+ if isinstance(item, dict):
1731
+ conn = {
1732
+ 'timestamp': item.get('timestamp', item.get('@timestamp', 0)),
1733
+ 'src_ip': item.get('src_ip', item.get('source_ip', '')),
1734
+ 'dst_ip': item.get('dst_ip', item.get('dest_ip', '')),
1735
+ 'src_port': int(item.get('src_port', item.get('source_port', 0))),
1736
+ 'dst_port': int(item.get('dst_port', item.get('dest_port', 0))),
1737
+ 'protocol': item.get('protocol', 'tcp'),
1738
+ 'bytes_sent': int(item.get('bytes_sent', item.get('bytes_out', 0))),
1739
+ 'bytes_recv': int(item.get('bytes_recv', item.get('bytes_in', 0))),
1740
+ 'duration': float(item.get('duration', 0))
1741
+ }
1742
+ if conn.get('dst_ip'):
1743
+ connections.append(conn)
1744
+ except (json.JSONDecodeError, TypeError):
1745
+ pass
1746
+
1747
+ # Fall back to line-by-line parsing
1748
+ if not connections:
1749
+ for line in log_lines:
1750
+ conn = self.log_parser.parse_json(line)
1751
+ if not conn:
1752
+ conn = self.log_parser.parse_zeek_conn(line)
1753
+ if not conn:
1754
+ conn = self.log_parser.parse_syslog(line)
1755
+ if conn:
1756
+ connections.append(conn)
1757
 
1758
  if not connections:
1759
  return []