JoeiBanana commited on
Commit
dbbc211
Β·
verified Β·
1 Parent(s): 3453f3d

Upload batch 1/8

Browse files
Files changed (1) hide show
  1. backend/daemon.py +1525 -0
backend/daemon.py ADDED
@@ -0,0 +1,1525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ daemon.py β€” Autonomous Wazuh-LLM Incident Response Daemon
4
+
5
+ Runs in a continuous loop, watching /var/ossec/logs/alerts/alerts.json for
6
+ new Wazuh alerts (rule level 7-12). Also detects SSH brute-force patterns:
7
+ 5+ failed SSH logins from the same source IP within a 5-minute window.
8
+
9
+ For every triggered alert the daemon automatically:
10
+ 1. Classify β€” Ollama wazuh-llama -> incident_type, severity, IOCs
11
+ 2. Show cmds β€” deterministic table -> which Cisco show commands to run
12
+ 3. GET state β€” RESTCONF GET calls -> current device state (JSON)
13
+ 4. Fix LLM β€” domain LoRA adapter -> CLI fix commands
14
+ 5. Apply fix β€” RESTCONF PATCH/PUT -> push changes to device
15
+ 6. Log result β€” managed_incidents.jsonl + managed_incidents.log
16
+
17
+ Usage:
18
+ python daemon.py
19
+ python daemon.py --alerts /var/ossec/logs/alerts/alerts.json
20
+ python daemon.py --poll 3 --ssh-threshold 5 --ssh-window 300 --min-level 7
21
+ python daemon.py --dry-run # classify + plan, but don't PATCH devices
22
+ """
23
+
24
+ import argparse
25
+ import base64
26
+ import gc
27
+ import json
28
+ import logging
29
+ import os
30
+ import re
31
+ import signal
32
+ import sys
33
+ import time
34
+ from collections import defaultdict
35
+ from datetime import datetime, timezone, timedelta
36
+ from pathlib import Path
37
+
38
+ import requests
39
+ import urllib3
40
+
41
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
42
+
43
+ # Heavy ML imports β€” loaded lazily when first alert fires
44
+ _torch = None
45
+ _PeftModel = None
46
+ _AutoModelForCausalLM = None
47
+ _AutoTokenizer = None
48
+
49
+ def _load_ml():
50
+ global _torch, _PeftModel, _AutoModelForCausalLM, _AutoTokenizer
51
+ if _torch is not None:
52
+ return
53
+ import torch
54
+ from peft import PeftModel
55
+ from transformers import AutoModelForCausalLM, AutoTokenizer
56
+ _torch = torch
57
+ _PeftModel = PeftModel
58
+ _AutoModelForCausalLM = AutoModelForCausalLM
59
+ _AutoTokenizer = AutoTokenizer
60
+
61
+
62
+ # ─────────────────────────────────────────────────────────────────────────────
63
+ # PATHS & CONSTANTS
64
+ # ─────────────────────────────────────────────────────────────────────────────
65
+
66
+ WORK_DIR = Path(__file__).parent # project-root/backend/
67
+ PROJECT_ROOT = WORK_DIR.parent # project-root/
68
+ BASE_MODEL = str(PROJECT_ROOT / "models" / "base" / "Hermes-3-Llama-3.1-8B")
69
+
70
+ DEFAULT_ALERTS_FILE = Path("/var/ossec/logs/alerts/alerts.json")
71
+ SHOW_OUTPUTS_DIR = PROJECT_ROOT / "show_outputs"
72
+ MANAGED_LOG_JSONL = PROJECT_ROOT / "managed_incidents.jsonl"
73
+ MANAGED_LOG_READABLE = PROJECT_ROOT / "managed_incidents.log"
74
+
75
+ OLLAMA_URL = "http://localhost:11434/api/generate"
76
+ WAZUH_MODEL = "wazuh-llama"
77
+ ROUTER_USER = "admin"
78
+ ROUTER_PASS = "cisco123!"
79
+ RESTCONF_PORT = 443
80
+
81
+ RESTCONF_GET_TIMEOUT = 15 # seconds per GET call
82
+ RESTCONF_PATCH_TIMEOUT = 20 # seconds per PATCH call
83
+
84
+
85
+ # ─────────────────────────────────────────────────────────────────────────────
86
+ # LOGGING
87
+ # ─────────────────────────────────────────────────────────────────────────────
88
+
89
+ logging.basicConfig(
90
+ level=logging.INFO,
91
+ format="%(asctime)s [%(levelname)s] %(message)s",
92
+ handlers=[
93
+ logging.StreamHandler(sys.stdout),
94
+ logging.FileHandler(str(PROJECT_ROOT / "daemon.log"), encoding="utf-8"),
95
+ ],
96
+ )
97
+ log = logging.getLogger(__name__)
98
+
99
+
100
+ # ─────────────────────────────────────────────────────────────────────────────
101
+ # DEVICE MAPS (same as pipeline.py)
102
+ # ─────────────────────────────────────────────────────────────────────────────
103
+
104
+ SOURCE_IP_DEVICE_MAP: dict[str, str] = {
105
+ "1.1.1.1": "R1",
106
+ "2.2.2.2": "R2",
107
+ "3.3.3.3": "R3",
108
+ "4.4.4.4": "R4",
109
+ "5.5.5.5": "R5",
110
+ "10.10.10.10": "R1",
111
+ "10.10.10.1": "cisco-router-02",
112
+ "192.168.100.2": "SW1",
113
+ "192.168.100.3": "SW2",
114
+ "192.168.100.21": "ACCESS-SW1",
115
+ "192.168.100.22": "ACCESS-SW2",
116
+ "192.168.40.20": "dhcp-test-client",
117
+ "192.168.40.21": "dhcp-test-client",
118
+ "192.168.40.22": "dhcp-test-client",
119
+ "198.51.100.23": "external-ssh-scanner",
120
+ "203.0.113.45": "external-port-scan",
121
+ "185.199.108.12": "malicious-server",
122
+ "192.0.2.111": "rdp-bruteforce-source",
123
+ }
124
+
125
+ DEVICE_MGMT_IP: dict[str, str] = {
126
+ "R1": "10.10.10.10",
127
+ "R2": "2.2.2.2",
128
+ "R3": "3.3.3.3",
129
+ "R4": "4.4.4.4",
130
+ "SW1": "192.168.100.2",
131
+ "SW2": "192.168.100.3",
132
+ "ACCESS-SW1": "192.168.100.21",
133
+ "ACCESS-SW2": "192.168.100.22",
134
+ "cisco-router-02": "10.10.10.1",
135
+ }
136
+
137
+ UNMANAGED = {
138
+ "unknown", "external-ssh-scanner", "external-port-scan",
139
+ "malicious-server", "rdp-bruteforce-source", "dhcp-test-client",
140
+ }
141
+
142
+ _I = PROJECT_ROOT / "models" / "incidents"
143
+ LORA_PATHS: dict[tuple, Path] = {
144
+ ("bgp", 1): _I / "bgp" / "lora_llm_bgp1",
145
+ ("bgp", 2): _I / "bgp" / "lora_llm_bgp2",
146
+ ("bgp", 3): _I / "bgp" / "lora_llm_bgp3",
147
+ ("ospf", 1): _I / "ospf" / "ospf1",
148
+ ("ospf", 2): _I / "ospf" / "ospf2",
149
+ ("ospf", 3): _I / "ospf" / "ospf3",
150
+ ("ospf", 4): _I / "ospf" / "ospf4",
151
+ ("sec", 1): _I / "security" / "lora_llm_sec1",
152
+ ("sec", 2): _I / "security" / "lora_llm_sec2",
153
+ ("sec", 3): _I / "security" / "lora_llm_sec3",
154
+ ("switch", 1): _I / "switch" / "lora_llm_switch1",
155
+ ("switch", 2): _I / "switch" / "lora_llm_switch2",
156
+ ("switch", 3): _I / "switch" / "lora_llm_switch3",
157
+ ("service", 1): _I / "service" / "lora_llm_service1",
158
+ ("service", 2): _I / "service" / "lora_llm_service2",
159
+ ("sys", 1): _I / "sys" / "lora_llm_sys1",
160
+ ("sys", 2): _I / "sys" / "lora_llm_sys2",
161
+ ("sys", 3): _I / "sys" / "lora_llm_sys3",
162
+ }
163
+
164
+ DOMAIN_INSTRUCTIONS: dict[str, str] = {
165
+ "bgp": (
166
+ "Analyze multi-device BGP incidents and output ONLY CLI FIX COMMANDS.\n"
167
+ "Do not provide explanation. Always modify the device in the Wazuh alert.\n"
168
+ "Rules:\n"
169
+ "- Keepalive MUST be >= 30 seconds.\n"
170
+ "- NEVER use keepalive value 10.\n"
171
+ "- Keepalive MUST be <= hold/3.\n"
172
+ "- For hold timer expiration, timers MUST exactly match peer timers."
173
+ ),
174
+ "ospf": (
175
+ "Analyze a multi-device OSPF adjacency issue and generate ONLY CLI FIX COMMANDS.\n"
176
+ "Do not provide explanation. Do not add comments. Only output the commands."
177
+ ),
178
+ "sec": (
179
+ "Analyze security policy incidents and output ONLY CLI FIX COMMANDS.\n"
180
+ "Do not provide explanation.\n"
181
+ "INCIDENT-SPECIFIC FIX RULES (MANDATORY):\n"
182
+ "1. acl_blocking_legitimate_traffic: MUST add a PERMIT; MUST NOT keep DENY for the flow.\n"
183
+ "2. acl_misconfiguration: Correct protocol, wildcard, or direction.\n"
184
+ "3. excessive_deny_entries: Refine/replace broad DENYs. NEVER 'permit ip any any'.\n"
185
+ "4. DNS = UDP/53 (TCP/53 only for zone transfers).\n"
186
+ "5. NEVER invert policy logic."
187
+ ),
188
+ "switch": (
189
+ "Analyze switching/L2 incidents and output ONLY CLI FIX COMMANDS.\n"
190
+ "Do not provide explanation. Output only valid Cisco IOS/IOS-XE commands."
191
+ ),
192
+ "service": (
193
+ "Analyze network service incidents (DHCP, DNS, NTP) and output ONLY CLI FIX COMMANDS.\n"
194
+ "Do not provide explanation. Output only the commands."
195
+ ),
196
+ "sys": (
197
+ "Given a system-health incident and device outputs, generate ONLY VALID "
198
+ "Cisco IOS/IOS-XE CONFIGURATION commands that immediately mitigate the incident.\n"
199
+ "GLOBAL RULES:\n"
200
+ "1. Output ONLY complete, syntactically correct Cisco IOS/IOS-XE config commands.\n"
201
+ "2. Do NOT output show, debug, clear, reload, write memory, or comments.\n"
202
+ "3. HIGH CPU: disable console logging, reduce buffered logging, "
203
+ "disable unused HTTP/HTTPS services.\n"
204
+ "4. HIGH MEMORY: REQUIRED - no ip http server / no ip http secure-server.\n"
205
+ "5. PROCESS CRASH: FIRST command MUST be: "
206
+ "exception crashinfo file bootflash:crashinfo"
207
+ ),
208
+ }
209
+
210
+
211
+ # ─────────────────────────────────────────────────────────────────────────────
212
+ # SHOW COMMAND RULES (same as pipeline.py)
213
+ # ─────────────────────────────────────────────────────────────────────────────
214
+
215
+ _SHOW_RULES: list[tuple[tuple[str, ...], list[str]]] = [
216
+ # OSPF
217
+ (("ospf_neighbor_down", "ospf_neighbour_down", "neighbor_down",
218
+ "neighbour_down", "adj_down", "adjacency_down", "full_to_down", "full to down"),
219
+ ["show ip ospf neighbor", "show ip ospf interface",
220
+ "show ip ospf database", "show ip route"]),
221
+ (("ospf_stuck_init", "stuck_init", "ospf_init", "ospf_2way", "ospf_2-way"),
222
+ ["show ip ospf neighbor", "show ip ospf interface", "show ip ospf database"]),
223
+ (("ospf_exstart", "exstart_exchange_stuck", "exstart", "exchange_stuck", "ospf_exchange"),
224
+ ["show ip ospf neighbor", "show ip ospf interface", "show ip ospf database"]),
225
+ (("ospf_mtu_mismatch", "mtu_mismatch", "mtu mismatch"),
226
+ ["show ip ospf neighbor", "show ip ospf interface", "show interfaces"]),
227
+ (("ospf_area_mismatch", "area_mismatch", "area mismatch", "wrong_area", "ospf_area"),
228
+ ["show ip ospf neighbor", "show ip ospf interface", "show ip ospf database"]),
229
+ (("ospf_auth_mismatch", "auth_mismatch", "authentication_mismatch", "ospf_authentication"),
230
+ ["show ip ospf neighbor", "show ip ospf interface", "show running-config"]),
231
+ (("ospf_hello_dead_mismatch", "hello_dead_mismatch",
232
+ "hello_mismatch", "dead_mismatch", "ospf_hello", "ospf_dead"),
233
+ ["show ip ospf neighbor", "show ip ospf interface"]),
234
+ (("ospf_network_type_mismatch", "network_type_mismatch", "ospf_network_type"),
235
+ ["show ip ospf neighbor", "show ip ospf interface"]),
236
+ (("ospf_lsa_flood", "lsa_flood", "lsa flood", "ospf_flood"),
237
+ ["show ip ospf", "show ip ospf neighbor", "show interfaces"]),
238
+ (("ospf_lsdb_inconsistency", "lsdb_inconsistency", "lsdb inconsistency"),
239
+ ["show ip ospf database", "show ip ospf neighbor"]),
240
+ (("ospf_redistribution_issue", "redistribution_issue", "ospf_redistrib",
241
+ "ospf redistrib", "ospf_route_redistribution"),
242
+ ["show ip route", "show running-config", "show ip ospf"]),
243
+ (("ospf",),
244
+ ["show ip ospf neighbor", "show ip ospf interface",
245
+ "show ip ospf database", "show ip route"]),
246
+ # BGP
247
+ (("bgp_session_flap", "session_flap", "bgp_flap", "bgp flap"),
248
+ ["show ip bgp summary", "show ip bgp neighbors", "show logging", "show interfaces"]),
249
+ (("bgp_hold_timer_expiration", "hold_timer_expiration", "hold_timer_expired",
250
+ "hold timer expir", "hold_timer", "hold timer"),
251
+ ["show ip bgp summary", "show ip bgp neighbors", "show logging"]),
252
+ (("bgp_neighborship_reset", "neighborship_reset", "bgp_reset", "bgp reset",
253
+ "bgp_neighbor_reset"),
254
+ ["show ip bgp summary", "show ip bgp neighbors", "show logging"]),
255
+ (("bgp_prefix_limit_exceeded", "prefix_limit_exceeded", "prefix_limit", "prefix limit"),
256
+ ["show ip bgp summary", "show ip bgp neighbors", "show logging"]),
257
+ (("bgp_route_leak_suspected", "route_leak_suspected", "route_leak",
258
+ "route leak", "bgp_community"),
259
+ ["show ip bgp summary", "show ip bgp neighbors",
260
+ "show running-config", "show logging"]),
261
+ (("bgp_path_selection_incorrect", "path_selection_incorrect", "path_selection",
262
+ "path selection", "bgp_bestpath", "bgp bestpath"),
263
+ ["show ip bgp summary", "show ip bgp neighbors", "show ip bgp", "show logging"]),
264
+ (("bgp_missing_routes_in_rib", "missing_routes_in_rib", "missing_routes",
265
+ "missing routes", "bgp_rib", "routes_missing"),
266
+ ["show ip bgp", "show ip route", "show ip bgp summary", "show logging"]),
267
+ (("bgp_next_hop_self_issue", "next_hop_self_issue", "next_hop_self",
268
+ "next-hop-self", "bgp_next_hop"),
269
+ ["show ip bgp neighbors", "show ip bgp", "show ip route", "show running-config"]),
270
+ (("bgp_afi_safi_mismatch", "afi_safi_mismatch", "afi_safi", "afi-safi",
271
+ "bgp_ipv6", "bgp_address_family"),
272
+ ["show ip bgp summary", "show ip bgp neighbors", "show running-config"]),
273
+ (("bgp",),
274
+ ["show ip bgp summary", "show ip bgp neighbors", "show logging"]),
275
+ # Security
276
+ (("acl_blocking_legitimate_traffic", "acl_blocking_legit",
277
+ "acl_blocking", "blocking_legitimate", "block_legit"),
278
+ ["show access-lists", "show ip access-lists",
279
+ "show interfaces", "show ip interface brief"]),
280
+ (("acl_misconfiguration", "acl_misconfig", "acl_wrong",
281
+ "access_list_misconfiguration"),
282
+ ["show access-lists", "show ip access-lists", "show running-config"]),
283
+ (("excessive_deny_entries", "excessive_deny", "excessive_denies",
284
+ "too_many_denies", "deny_entries"),
285
+ ["show access-lists", "show ip access-lists"]),
286
+ (("acl_block", "acl_drop", "access_list", "access-list", "access_control_list"),
287
+ ["show access-lists", "show ip access-lists", "show interfaces"]),
288
+ (("firewall_conn_exhaust", "conn_exhaust", "connection_exhaustion",
289
+ "connection_table_full", "conn_table"),
290
+ ["show interfaces", "show ip nat statistics", "show ip nat translations"]),
291
+ (("nat_translation_failure", "nat_failure", "nat_translation", "nat_issue", "nat_error"),
292
+ ["show ip nat statistics", "show ip nat translations"]),
293
+ (("ip_spoofing", "spoofing", "ip_spoof"),
294
+ ["show interfaces", "show ip arp", "show logging"]),
295
+ (("ssh_brute_force", "ssh_bruteforce", "brute_force", "bruteforce", "brute force"),
296
+ ["show logging", "show ip access-lists"]),
297
+ (("failed_login", "login_failure", "login_failures", "authentication_failure"),
298
+ ["show logging"]),
299
+ (("snmp_bruteforce", "snmp_brute_force", "snmp_attack", "snmp brute"),
300
+ ["show logging", "show running-config"]),
301
+ (("port_scan", "port scan", "port_scanning", "network_scan"),
302
+ ["show logging", "show ip access-lists", "show interfaces"]),
303
+ (("security", "firewall", "intrusion", "malware", "attack"),
304
+ ["show access-lists", "show logging", "show interfaces"]),
305
+ # Switching / L2
306
+ (("mac_flapping", "mac_flap", "mac flapping", "mac flap"),
307
+ ["show interfaces status", "show mac address-table",
308
+ "show spanning-tree", "show logging"]),
309
+ (("stp_topology_change", "stp_change", "spanning_tree_topology",
310
+ "topology_change", "spanning tree topology"),
311
+ ["show spanning-tree", "show interfaces status", "show logging"]),
312
+ (("port_errdisable", "errdisable", "err_disable", "err-disable", "port err-disable"),
313
+ ["show interfaces status", "show logging"]),
314
+ (("native_vlan_mismatch", "native_vlan", "native vlan mismatch"),
315
+ ["show interfaces", "show vlan"]),
316
+ (("trunk_negotiation_failure", "trunk_negotiation", "trunk_mismatch"),
317
+ ["show interfaces", "show vlan"]),
318
+ (("vlan_mismatch",),
319
+ ["show interfaces status", "show vlan", "show interfaces"]),
320
+ (("lldp_cdp_inconsistency", "lldp_inconsistency", "cdp_inconsistency", "lldp cdp"),
321
+ ["show cdp neighbors", "show lldp neighbors"]),
322
+ (("storm_control",), ["show interfaces", "show logging"]),
323
+ (("broadcast_storm", "broadcast storm", "multicast_storm"),
324
+ ["show interfaces", "show logging"]),
325
+ (("switch", "vlan", "trunk", "spanning", "layer2", "layer 2", "l2",
326
+ "mac_address", "mac address"),
327
+ ["show interfaces status", "show spanning-tree", "show vlan"]),
328
+ # Services
329
+ (("dhcp_conflict",),
330
+ ["show ip dhcp conflict", "show ip dhcp binding", "show ip arp"]),
331
+ (("dhcp_starvation", "dhcp starvation", "dhcp_pool_exhausted"),
332
+ ["show ip dhcp pool", "show ip dhcp binding", "show ip dhcp server statistics"]),
333
+ (("ip_conflict", "ip conflict", "duplicate_ip", "duplicate ip"),
334
+ ["show ip arp", "show ip dhcp conflict"]),
335
+ (("dhcp",),
336
+ ["show ip dhcp pool", "show ip dhcp conflict", "show ip dhcp binding"]),
337
+ (("dns_issue", "dns_issues", "dns_failure", "dns failure", "dns_resolution"),
338
+ ["show ip route", "show running-config"]),
339
+ (("ntp_unsync", "ntp_sync_issue", "ntp_issue", "ntp issue", "ntp_drift", "clock_skew"),
340
+ ["show ntp status", "show ntp associations"]),
341
+ (("arp_spoofing", "arp_spoof", "arp spoof", "gratuitous_arp"),
342
+ ["show ip arp", "show mac address-table", "show logging"]),
343
+ (("dns", "domain_name", "name_server", "resolver"),
344
+ ["show ip route", "show running-config"]),
345
+ (("ntp", "time_sync", "clock"), ["show ntp status", "show ntp associations"]),
346
+ (("arp",), ["show ip arp", "show mac address-table"]),
347
+ # System health
348
+ (("high_cpu", "cpu_high", "cpu_utilization", "cpu spike", "high cpu"),
349
+ ["show processes cpu", "show processes cpu history",
350
+ "show logging", "show interfaces"]),
351
+ (("high_memory", "memory_high", "memory_exhaustion",
352
+ "memory_leak", "out_of_memory", "oom", "high memory"),
353
+ ["show processes memory", "show version", "show logging"]),
354
+ (("process_crash", "process crash", "crashinfo", "crash"),
355
+ ["show version", "show logging", "show processes memory"]),
356
+ (("traceback",), ["show logging", "show version"]),
357
+ (("interface_flap", "interface flap", "link_flap", "link flap", "int_flap"),
358
+ ["show interfaces", "show logging", "show ip route"]),
359
+ (("duplex_mismatch", "duplex mismatch", "speed_mismatch", "speed mismatch"),
360
+ ["show interfaces", "show running-config"]),
361
+ (("reload", "reboot", "scheduled_reload"), ["show version", "show logging"]),
362
+ (("environment", "temperature", "fan_fail", "power_supply"),
363
+ ["show environment", "show version"]),
364
+ (("cpu", "memory", "system", "hardware", "health"),
365
+ ["show processes cpu", "show processes memory", "show logging", "show version"]),
366
+ ]
367
+
368
+ _DEFAULT_SHOW_CMDS = ["show interfaces", "show ip route", "show logging", "show version"]
369
+
370
+
371
+ def _get_show_commands(incident_type: str) -> list[str]:
372
+ key = incident_type.strip().lower()
373
+ for keywords, cmds in _SHOW_RULES:
374
+ if any(kw in key for kw in keywords):
375
+ return list(cmds)
376
+ return list(_DEFAULT_SHOW_CMDS)
377
+
378
+
379
+ # ─────────────────────────────────────────────────────────────────────────────
380
+ # RESTCONF GET RULES (same as pipeline.py)
381
+ # ─────────────────────────────────────────────────────────────────────────────
382
+
383
+ _RESTCONF_RULES: list[tuple[str, str]] = [
384
+ ("show ip ospf neighbor detail",
385
+ "/restconf/data/Cisco-IOS-XE-ospf-oper:ospf-oper-data/ospf-state"),
386
+ ("show ip ospf neighbor",
387
+ "/restconf/data/Cisco-IOS-XE-ospf-oper:ospf-oper-data/ospf-state"),
388
+ ("show ip ospf interface",
389
+ "/restconf/data/Cisco-IOS-XE-ospf-oper:ospf-oper-data/ospf-state"),
390
+ ("show ip ospf database",
391
+ "/restconf/data/Cisco-IOS-XE-ospf-oper:ospf-oper-data/ospf-state"),
392
+ ("show ip ospf",
393
+ "/restconf/data/Cisco-IOS-XE-ospf-oper:ospf-oper-data/ospf-state"),
394
+ ("show ip bgp summary",
395
+ "/restconf/data/Cisco-IOS-XE-bgp-oper:bgp-state-data/bgp-route-vrfs"),
396
+ ("show ip bgp neighbors",
397
+ "/restconf/data/Cisco-IOS-XE-bgp-oper:bgp-state-data/neighbors"),
398
+ ("show ip bgp",
399
+ "/restconf/data/Cisco-IOS-XE-bgp-oper:bgp-state-data"),
400
+ ("show interfaces status",
401
+ "/restconf/data/Cisco-IOS-XE-interfaces-oper:interfaces"),
402
+ ("show interfaces",
403
+ "/restconf/data/Cisco-IOS-XE-interfaces-oper:interfaces"),
404
+ ("show ip interface brief",
405
+ "/restconf/data/Cisco-IOS-XE-interfaces-oper:interfaces"),
406
+ ("show ip route",
407
+ "/restconf/data/Cisco-IOS-XE-ip-route-oper:ip-route-data"),
408
+ ("show ip access-lists",
409
+ "/restconf/data/Cisco-IOS-XE-acl-oper:access-lists"),
410
+ ("show access-lists",
411
+ "/restconf/data/Cisco-IOS-XE-acl-oper:access-lists"),
412
+ ("show port-security",
413
+ "/restconf/data/Cisco-IOS-XE-port-security-oper:port-security"),
414
+ ("show vlan",
415
+ "/restconf/data/Cisco-IOS-XE-vlan-oper:vlans"),
416
+ ("show spanning-tree",
417
+ "/restconf/data/Cisco-IOS-XE-spanning-tree-oper:stp-details"),
418
+ ("show mac address-table",
419
+ "/restconf/data/Cisco-IOS-XE-matm-oper:matm-oper-data"),
420
+ ("show etherchannel",
421
+ "/restconf/data/Cisco-IOS-XE-lacp-oper:lacp"),
422
+ ("show ip dhcp binding",
423
+ "/restconf/data/Cisco-IOS-XE-dhcp-oper:dhcp-oper-data/client-bindings"),
424
+ ("show ip dhcp conflict",
425
+ "/restconf/data/Cisco-IOS-XE-dhcp-oper:dhcp-oper-data"),
426
+ ("show ip dhcp pool",
427
+ "/restconf/data/Cisco-IOS-XE-dhcp-oper:dhcp-oper-data"),
428
+ ("show ip dhcp server statistics",
429
+ "/restconf/data/Cisco-IOS-XE-dhcp-oper:dhcp-oper-data"),
430
+ ("show ip dhcp",
431
+ "/restconf/data/Cisco-IOS-XE-dhcp-oper:dhcp-oper-data"),
432
+ ("show ip arp",
433
+ "/restconf/data/Cisco-IOS-XE-arp-oper:arp-data"),
434
+ ("show cdp neighbors",
435
+ "/restconf/data/Cisco-IOS-XE-cdp-oper:cdp-neighbor-details"),
436
+ ("show lldp neighbors",
437
+ "/restconf/data/Cisco-IOS-XE-lldp-oper:lldp-entries"),
438
+ ("show ntp associations",
439
+ "/restconf/data/Cisco-IOS-XE-ntp-oper:ntp-oper-data"),
440
+ ("show ntp status",
441
+ "/restconf/data/Cisco-IOS-XE-ntp-oper:ntp-oper-data"),
442
+ ("show processes cpu history",
443
+ "/restconf/data/Cisco-IOS-XE-process-cpu-oper:cpu-usage"),
444
+ ("show processes cpu",
445
+ "/restconf/data/Cisco-IOS-XE-process-cpu-oper:cpu-usage"),
446
+ ("show processes memory",
447
+ "/restconf/data/Cisco-IOS-XE-process-memory-oper:memory-usage-processes"),
448
+ ("show logging",
449
+ "/restconf/data/Cisco-IOS-XE-native:native/logging"),
450
+ ("show ip nat translations",
451
+ "/restconf/data/Cisco-IOS-XE-nat-oper:nat-data"),
452
+ ("show ip nat statistics",
453
+ "/restconf/data/Cisco-IOS-XE-nat-oper:nat-data"),
454
+ ("show running-config",
455
+ "/restconf/data/Cisco-IOS-XE-native:native"),
456
+ ("show version",
457
+ "/restconf/data/Cisco-IOS-XE-native:native/version"),
458
+ ("show environment",
459
+ "/restconf/data/Cisco-IOS-XE-environment-oper:environment-sensors"),
460
+ ]
461
+
462
+
463
+ def _resolve_restconf_path(cmd: str) -> str | None:
464
+ c = cmd.strip().lower()
465
+ for prefix, path in _RESTCONF_RULES:
466
+ if c == prefix or c.startswith(prefix + " ") or c.startswith(prefix):
467
+ return path
468
+ return None
469
+
470
+
471
+ # ─────────────────────────────────────────────────────────────────────────────
472
+ # RESTCONF AUTH
473
+ # ─────────────────────────────────────────────────────────────────────────────
474
+
475
+ def _auth_header() -> str:
476
+ return "Basic " + base64.b64encode(
477
+ f"{ROUTER_USER}:{ROUTER_PASS}".encode()
478
+ ).decode()
479
+
480
+
481
+ def _restconf_headers() -> dict:
482
+ return {
483
+ "Accept": "application/yang-data+json",
484
+ "Content-Type": "application/yang-data+json",
485
+ "Authorization": _auth_header(),
486
+ }
487
+
488
+
489
+ # ─────────────────────────────────────────────────────────────────────────────
490
+ # RESTCONF EXECUTOR
491
+ # ─────────────────────────────────────────────────────────────────────────────
492
+
493
+ def restconf_get(mgmt_ip: str, yang_path: str) -> dict:
494
+ """
495
+ Execute a RESTCONF GET.
496
+ Returns {"ok": bool, "status": int|None, "data": dict|None, "error": str|None}
497
+ """
498
+ url = f"https://{mgmt_ip}:{RESTCONF_PORT}{yang_path}"
499
+ try:
500
+ resp = requests.get(
501
+ url, headers=_restconf_headers(), verify=False,
502
+ timeout=RESTCONF_GET_TIMEOUT,
503
+ )
504
+ try:
505
+ data = resp.json()
506
+ except Exception:
507
+ data = {"raw": resp.text[:2000]}
508
+ return {"ok": resp.ok, "status": resp.status_code, "url": url, "data": data}
509
+ except requests.RequestException as exc:
510
+ return {"ok": False, "status": None, "url": url, "error": str(exc)}
511
+
512
+
513
+ def restconf_patch(url: str, body: dict) -> dict:
514
+ """
515
+ Execute a RESTCONF PATCH.
516
+ Returns {"ok": bool, "status": int|None, "error": str|None}
517
+ """
518
+ if body is None:
519
+ return {"ok": False, "status": None,
520
+ "error": "No YANG body β€” apply manually via CLI"}
521
+ try:
522
+ resp = requests.patch(
523
+ url, headers=_restconf_headers(), json=body, verify=False,
524
+ timeout=RESTCONF_PATCH_TIMEOUT,
525
+ )
526
+ return {"ok": resp.ok, "status": resp.status_code,
527
+ "response": resp.text[:500] if not resp.ok else "OK"}
528
+ except requests.RequestException as exc:
529
+ return {"ok": False, "status": None, "error": str(exc)}
530
+
531
+
532
+ def collect_device_state(device: str, show_cmds: list[str]) -> dict[str, dict]:
533
+ """
534
+ Run RESTCONF GETs for all show commands.
535
+ Returns {show_command: response_dict}
536
+ """
537
+ mgmt_ip = DEVICE_MGMT_IP.get(device, device)
538
+ results: dict[str, dict] = {}
539
+ seen_paths: set[str] = set()
540
+
541
+ for cmd in show_cmds:
542
+ path = _resolve_restconf_path(cmd)
543
+ if path is None:
544
+ log.warning(" No RESTCONF path for: %s", cmd)
545
+ results[cmd] = {"ok": False, "error": "No RESTCONF path found"}
546
+ continue
547
+ if path in seen_paths:
548
+ log.debug(" Skipping duplicate path: %s", path)
549
+ continue
550
+ seen_paths.add(path)
551
+ log.info(" GET %s -> %s", cmd, path)
552
+ result = restconf_get(mgmt_ip, path)
553
+ results[cmd] = result
554
+ log.info(" -> HTTP %s ok=%s", result.get("status"), result.get("ok"))
555
+
556
+ return results
557
+
558
+
559
+ # ─────────────────────────────────────────────────────────────────────────────
560
+ # WAZUH CLASSIFIER (same as pipeline.py)
561
+ # ─────────────────────────────────────────────────────────────────────────────
562
+
563
+ _WAZUH_SYSTEM = (
564
+ "You are a cybersecurity analyst specializing in Wazuh and Cisco network alerts. "
565
+ "Analyze the log and return ONLY valid JSON with these fields: "
566
+ "incident_type (e.g. ospf_neighbor_down, bgp_session_flap, acl_block, "
567
+ "dhcp_starvation, high_cpu, mac_flapping, brute_force, port_scan, etc.), "
568
+ "classification (benign/suspicious/malicious), "
569
+ "severity (low/medium/high/critical), "
570
+ "source_ip (string or null), destination_ip (string or null), "
571
+ "iocs (list of strings), "
572
+ "recommended_actions (list of strings), "
573
+ "explanation (1-2 sentences). "
574
+ "If data is missing infer from context or set null."
575
+ )
576
+
577
+ _IP_RE = re.compile(r"([0-9]+(?:\.[0-9]+){3})")
578
+ _CISCO_SRC = re.compile(r"\[Source:\s*([0-9]+(?:\.[0-9]+){3})\]")
579
+
580
+
581
+ def _parse_json_safe(text: str) -> dict | None:
582
+ text = text.strip()
583
+ try:
584
+ return json.loads(text)
585
+ except Exception:
586
+ pass
587
+ s, e = text.find("{"), text.rfind("}")
588
+ if s != -1 and e != -1:
589
+ try:
590
+ return json.loads(text[s:e + 1])
591
+ except Exception:
592
+ pass
593
+ return None
594
+
595
+
596
+ def call_wazuh_llm(log_text: str) -> dict | None:
597
+ payload = {
598
+ "model": WAZUH_MODEL,
599
+ "prompt": f"<|system|>\n{_WAZUH_SYSTEM}\n\n<|user|>\nLog: {log_text}\n<|assistant|>\n",
600
+ "options": {"num_predict": 400, "stop": ["<|user|>", "<|system|>"]},
601
+ }
602
+ try:
603
+ with requests.post(OLLAMA_URL, json=payload, stream=True, timeout=60) as resp:
604
+ if resp.status_code != 200:
605
+ return None
606
+ output = ""
607
+ for raw in resp.iter_lines():
608
+ if not raw:
609
+ continue
610
+ try:
611
+ chunk = json.loads(raw.decode("utf-8"))
612
+ except Exception:
613
+ output += raw.decode("utf-8", errors="ignore")
614
+ continue
615
+ output += chunk.get("response", "")
616
+ if chunk.get("done"):
617
+ break
618
+ return _parse_json_safe(output)
619
+ except requests.RequestException:
620
+ return None
621
+
622
+
623
+ def _fallback_classify(alert_raw: dict) -> dict:
624
+ desc = alert_raw.get("rule", {}).get("description", "unknown")
625
+ return {
626
+ "incident_type": desc.lower().replace(" ", "_"),
627
+ "classification": "suspicious",
628
+ "severity": "high",
629
+ "explanation": desc,
630
+ "source_ip": None,
631
+ "destination_ip": None,
632
+ "iocs": [],
633
+ "recommended_actions": [],
634
+ }
635
+
636
+
637
+ def _extract_device(alert_raw: dict) -> str:
638
+ """Extract device name from a raw Wazuh alert dict."""
639
+ full_log = alert_raw.get("full_log") or alert_raw.get("decoded") or ""
640
+ m = _CISCO_SRC.search(full_log)
641
+ if m:
642
+ return SOURCE_IP_DEVICE_MAP.get(m.group(1), "unknown")
643
+ m2 = _IP_RE.search(full_log)
644
+ if m2:
645
+ return SOURCE_IP_DEVICE_MAP.get(m2.group(1), "unknown")
646
+ agent_ip = alert_raw.get("agent", {}).get("ip", "")
647
+ return SOURCE_IP_DEVICE_MAP.get(agent_ip, "unknown")
648
+
649
+
650
+ # ─────────────────────────────────────────────────────────────────────────────
651
+ # DOMAIN ROUTING (same as pipeline.py)
652
+ # ─────────────────────────────────────────────────────────────────────────────
653
+
654
+ def classify_domain(incident_type: str, description: str = "") -> tuple[str, int]:
655
+ combined = ((incident_type or "") + " " + (description or "")).lower()
656
+
657
+ if "bgp" in combined:
658
+ if any(k in combined for k in ("session_flap", "session flap", "hold_timer",
659
+ "hold timer", "neighborship", "keepalive")):
660
+ return "bgp", 1
661
+ if any(k in combined for k in ("routing_policy", "prefix", "community")):
662
+ return "bgp", 2
663
+ return "bgp", 3
664
+
665
+ if "ospf" in combined:
666
+ if any(k in combined for k in ("neighbor_down", "neighbor down", "adjacency",
667
+ "adj_down", "full to down")):
668
+ return "ospf", 1
669
+ if any(k in combined for k in ("stuck", "exstart", "init", "exchange", "2way")):
670
+ return "ospf", 2
671
+ if any(k in combined for k in ("area", "auth", "mismatch", "hello", "dead")):
672
+ return "ospf", 3
673
+ if any(k in combined for k in ("lsa", "lsdb", "flood", "redistrib")):
674
+ return "ospf", 4
675
+ return "ospf", 1
676
+
677
+ if any(k in combined for k in ("acl", "access_list", "firewall", "security",
678
+ "brute_force", "brute force", "port_scan",
679
+ "malware", "intrusion", "login_failure", "deny")):
680
+ if any(k in combined for k in ("blocking", "legitimate", "legit")):
681
+ return "sec", 1
682
+ if any(k in combined for k in ("misconfiguration", "misconfig", "wrong")):
683
+ return "sec", 2
684
+ if any(k in combined for k in ("excessive", "too_many", "deny_entries")):
685
+ return "sec", 3
686
+ return "sec", 1
687
+
688
+ if any(k in combined for k in ("mac_flap", "stp", "spanning", "vlan", "trunk",
689
+ "err_disable", "switch", "l2", "layer2")):
690
+ if any(k in combined for k in ("mac_flap", "stp", "spanning", "err_disable")):
691
+ return "switch", 1
692
+ if "port" in combined or "duplex" in combined:
693
+ return "switch", 2
694
+ if "vlan" in combined or "trunk" in combined:
695
+ return "switch", 3
696
+ return "switch", 1
697
+
698
+ if any(k in combined for k in ("dhcp", "dns", "ntp", "service", "ip_conflict")):
699
+ if any(k in combined for k in ("dhcp", "ip_conflict", "starvation")):
700
+ return "service", 1
701
+ return "service", 2
702
+
703
+ if any(k in combined for k in ("cpu", "memory", "crash", "process",
704
+ "interface_flap", "duplex", "mismatch",
705
+ "system", "reload", "reboot", "traceback")):
706
+ if any(k in combined for k in ("cpu", "memory", "crash", "process", "traceback")):
707
+ return "sys", 1
708
+ if any(k in combined for k in ("interface_flap", "duplex", "mismatch", "flap")):
709
+ return "sys", 2
710
+ return "sys", 3
711
+
712
+ return "sys", 1
713
+
714
+
715
+ # ─────────────────────────────────────────────────────────────────────────────
716
+ # CLI FIX -> RESTCONF PATCH (same as run_fix.py)
717
+ # ─────────────────────────────────────────────────────────────────────────────
718
+
719
+ _CTX_PAT = [
720
+ re.compile(r"^interface\s+(\S+)", re.I),
721
+ re.compile(r"^router\s+(ospf|bgp|eigrp|isis|rip)\s+(\S+)", re.I),
722
+ re.compile(r"^ip\s+access-list\s+(extended|standard)\s+(\S+)", re.I),
723
+ re.compile(r"^ip\s+dhcp\s+pool\s+(\S+)", re.I),
724
+ ]
725
+
726
+
727
+ def _parse_cli_blocks(fix_text: str) -> list[dict]:
728
+ blocks: list[dict] = []
729
+ ctx = "global"
730
+ lines: list[str] = []
731
+ for raw in fix_text.splitlines():
732
+ s = raw.strip()
733
+ if not s:
734
+ continue
735
+ if s == "!":
736
+ if lines:
737
+ blocks.append({"context": ctx, "lines": list(lines)})
738
+ lines = []
739
+ ctx = "global"
740
+ continue
741
+ if not raw.startswith((" ", "\t")) and any(p.match(s) for p in _CTX_PAT):
742
+ if lines:
743
+ blocks.append({"context": ctx, "lines": list(lines)})
744
+ lines = []
745
+ ctx = s
746
+ continue
747
+ lines.append(s)
748
+ if lines:
749
+ blocks.append({"context": ctx, "lines": list(lines)})
750
+ return blocks
751
+
752
+
753
+ def _url_enc(slot: str) -> str:
754
+ return slot.replace("/", "%2F").replace(".", "%2E")
755
+
756
+
757
+ def _iface_parts(iface: str) -> tuple[str, str]:
758
+ m = re.match(r"([A-Za-z]+)([\d/\.]+)", iface.strip())
759
+ return (m.group(1), m.group(2)) if m else (iface, "0")
760
+
761
+
762
+ def _yang_interface(itype: str, islot: str, lines: list[str]) -> dict:
763
+ inner: dict = {"name": islot}
764
+ for line in lines:
765
+ l = line.strip().lower()
766
+ if l in ("no shutdown", "no shut"):
767
+ inner["shutdown"] = False
768
+ elif l in ("shutdown", "shut"):
769
+ inner["shutdown"] = True
770
+ m = re.match(r"(?:no\s+)?ip\s+ospf\s+hello-interval\s+(\d+)", l)
771
+ if m:
772
+ inner.setdefault("ip", {}).setdefault("Cisco-IOS-XE-ospf:ospf", {})["hello-interval"] = int(m.group(1))
773
+ m = re.match(r"(?:no\s+)?ip\s+ospf\s+dead-interval\s+(\d+)", l)
774
+ if m:
775
+ inner.setdefault("ip", {}).setdefault("Cisco-IOS-XE-ospf:ospf", {})["dead-interval"] = int(m.group(1))
776
+ m = re.match(r"ip\s+ospf\s+(\d+)\s+area\s+(\S+)", l)
777
+ if m:
778
+ ospf = inner.setdefault("ip", {}).setdefault("Cisco-IOS-XE-ospf:ospf", {})
779
+ ospf["process-id"] = int(m.group(1)); ospf["area"] = m.group(2)
780
+ m = re.match(r"ip\s+ospf\s+network\s+(\S+)", l)
781
+ if m:
782
+ inner.setdefault("ip", {}).setdefault("Cisco-IOS-XE-ospf:ospf", {})["network"] = {"network-type": m.group(1)}
783
+ if re.match(r"ip\s+ospf\s+mtu-ignore", l):
784
+ inner.setdefault("ip", {}).setdefault("Cisco-IOS-XE-ospf:ospf", {})["mtu-ignore"] = True
785
+ m = re.match(r"switchport\s+mode\s+(\S+)", l)
786
+ if m:
787
+ (inner.setdefault("Cisco-IOS-XE-switch:switchport-conf", {})
788
+ .setdefault("switchport", {}).setdefault("mode", {}))[m.group(1)] = {}
789
+ m = re.match(r"switchport\s+access\s+vlan\s+(\d+)", l)
790
+ if m:
791
+ (inner.setdefault("Cisco-IOS-XE-switch:switchport-conf", {})
792
+ .setdefault("switchport", {}).setdefault("access", {}))["vlan"] = {"vlan": int(m.group(1))}
793
+ m = re.match(r"switchport\s+trunk\s+native\s+vlan\s+(\d+)", l)
794
+ if m:
795
+ (inner.setdefault("Cisco-IOS-XE-switch:switchport-conf", {})
796
+ .setdefault("switchport", {}).setdefault("trunk", {}))["native"] = {"vlan": {"vlan-id": int(m.group(1))}}
797
+ if re.match(r"spanning-tree\s+portfast", l):
798
+ inner.setdefault("Cisco-IOS-XE-spanning-tree:spanning-tree", {})["portfast"] = {}
799
+ m = re.match(r"duplex\s+(\S+)", l)
800
+ if m:
801
+ inner["duplex"] = {"Cisco-IOS-XE-ethernet:duplex-enum": m.group(1)}
802
+ m = re.match(r"speed\s+(\d+)", l)
803
+ if m:
804
+ inner["speed"] = {"Cisco-IOS-XE-ethernet:value": int(m.group(1))}
805
+ m = re.match(r"ip\s+address\s+(\S+)\s+(\S+)", l)
806
+ if m:
807
+ inner.setdefault("ip", {}).setdefault("address", {})["primary"] = {
808
+ "address": m.group(1), "mask": m.group(2)}
809
+ return {f"Cisco-IOS-XE-native:{itype}": [inner]}
810
+
811
+
812
+ def _yang_ospf(pid: str, lines: list[str]) -> dict:
813
+ inner: dict = {"id": int(pid) if pid.isdigit() else pid}
814
+ for line in lines:
815
+ l = line.strip().lower()
816
+ m = re.match(r"area\s+(\S+)\s+authentication(\s+message-digest)?", l)
817
+ if m:
818
+ inner.setdefault("area", []).append({
819
+ "area-id": m.group(1),
820
+ "authentication": {"message-digest": {}} if m.group(2) else {}
821
+ })
822
+ m = re.match(r"network\s+(\S+)\s+(\S+)\s+area\s+(\S+)", l)
823
+ if m:
824
+ inner.setdefault("network", []).append(
825
+ {"ip": m.group(1), "mask": m.group(2), "area": m.group(3)})
826
+ m = re.match(r"router-id\s+(\S+)", l)
827
+ if m:
828
+ inner["router-id"] = m.group(1)
829
+ return {"Cisco-IOS-XE-native:ospf": [inner]}
830
+
831
+
832
+ def _yang_bgp(asn: str, lines: list[str]) -> dict:
833
+ inner: dict = {"id": int(asn) if asn.isdigit() else asn}
834
+ neighbors: dict[str, dict] = {}
835
+ for line in lines:
836
+ l = line.strip().lower()
837
+ m = re.match(r"neighbor\s+(\S+)\s+timers\s+(\d+)\s+(\d+)", l)
838
+ if m:
839
+ neighbors.setdefault(m.group(1), {"id": m.group(1)})["timers"] = {
840
+ "keepalive": int(m.group(2)), "holdtime": int(m.group(3))}
841
+ m = re.match(r"neighbor\s+(\S+)\s+maximum-prefix\s+(\d+)", l)
842
+ if m:
843
+ neighbors.setdefault(m.group(1), {"id": m.group(1)})["maximum-prefix"] = int(m.group(2))
844
+ m = re.match(r"neighbor\s+(\S+)\s+next-hop-self", l)
845
+ if m:
846
+ neighbors.setdefault(m.group(1), {"id": m.group(1)})["next-hop-self"] = {}
847
+ m = re.match(r"neighbor\s+(\S+)\s+remote-as\s+(\d+)", l)
848
+ if m:
849
+ neighbors.setdefault(m.group(1), {"id": m.group(1)})["remote-as"] = int(m.group(2))
850
+ if neighbors:
851
+ inner["neighbor"] = list(neighbors.values())
852
+ return {"Cisco-IOS-XE-native:bgp": [inner]}
853
+
854
+
855
+ def _yang_acl(acl_type: str, acl_name: str, lines: list[str]) -> dict:
856
+ entries = []
857
+ seq = 10
858
+ for line in lines:
859
+ m = re.match(r"(permit|deny)\s+(.+)", line.strip(), re.I)
860
+ if m:
861
+ entries.append({"sequence": seq, "action": m.group(1).lower(), "rule": m.group(2)})
862
+ seq += 10
863
+ return {f"Cisco-IOS-XE-native:{acl_type}": [{"name": acl_name, "access-list-seq-rule": entries}]}
864
+
865
+
866
+ def _yang_global(lines: list[str]) -> list[dict]:
867
+ ops = []
868
+ for line in lines:
869
+ l = line.strip().lower()
870
+ m = re.match(r"ntp\s+server\s+(\S+)", l)
871
+ if m:
872
+ ops.append({"suffix": "/ntp", "body": {
873
+ "Cisco-IOS-XE-native:ntp": {"server": {"server-list": [{"ip-address": m.group(1)}]}}}})
874
+ m = re.match(r"ip\s+route\s+(\S+)\s+(\S+)\s+(\S+)", l)
875
+ if m:
876
+ ops.append({"suffix": "/ip/route", "body": {
877
+ "Cisco-IOS-XE-native:route": {"ip-route-interface-forwarding-list": [{
878
+ "prefix": m.group(1), "mask": m.group(2), "fwd-list": [{"fwd": m.group(3)}]}]}}})
879
+ if re.match(r"no\s+ip\s+http\s+secure-server", l):
880
+ ops.append({"suffix": "/ip/http", "body": {"Cisco-IOS-XE-native:http": {"secure-server": False}}})
881
+ elif re.match(r"no\s+ip\s+http\s+server", l):
882
+ ops.append({"suffix": "/ip/http", "body": {"Cisco-IOS-XE-native:http": {"server": False}}})
883
+ m = re.match(r"logging\s+buffered\s+(\d+)", l)
884
+ if m:
885
+ ops.append({"suffix": "/logging", "body": {
886
+ "Cisco-IOS-XE-native:logging": {"buffered": {"size": int(m.group(1))}}}})
887
+ if re.match(r"no\s+logging\s+console", l):
888
+ ops.append({"suffix": "/logging", "body": {"Cisco-IOS-XE-native:logging": {"console": False}}})
889
+ m = re.match(r"exception\s+crashinfo\s+(?:file\s+)?(.+)", l)
890
+ if m:
891
+ ops.append({"suffix": "/exception", "body": {
892
+ "Cisco-IOS-XE-native:exception": {"crashinfo": {"filepath": m.group(1).strip()}}}})
893
+ return ops
894
+
895
+
896
+ def build_restconf_patch_ops(device: str, fix_text: str) -> list[dict]:
897
+ """Convert CLI fix commands to RESTCONF PATCH operations with YANG bodies."""
898
+ mgmt_ip = DEVICE_MGMT_IP.get(device, device)
899
+ base_url = f"https://{mgmt_ip}:{RESTCONF_PORT}/restconf/data/Cisco-IOS-XE-native:native"
900
+ ops = []
901
+ for block in _parse_cli_blocks(fix_text):
902
+ ctx = block["context"]
903
+ lines = block["lines"]
904
+ if not lines:
905
+ continue
906
+ cl = ctx.strip().lower()
907
+
908
+ m = re.match(r"interface\s+(\S+)", cl)
909
+ if m:
910
+ itype, islot = _iface_parts(m.group(1))
911
+ ops.append({"method": "PATCH",
912
+ "url": f"{base_url}/interface/{itype}={_url_enc(islot)}",
913
+ "body": _yang_interface(itype, islot, lines),
914
+ "cli_context": ctx, "cli_commands": lines})
915
+ continue
916
+ m = re.match(r"router\s+ospf\s+(\S+)", cl)
917
+ if m:
918
+ ops.append({"method": "PATCH",
919
+ "url": f"{base_url}/router/ospf={m.group(1)}",
920
+ "body": _yang_ospf(m.group(1), lines),
921
+ "cli_context": ctx, "cli_commands": lines})
922
+ continue
923
+ m = re.match(r"router\s+bgp\s+(\S+)", cl)
924
+ if m:
925
+ ops.append({"method": "PATCH",
926
+ "url": f"{base_url}/router/bgp={m.group(1)}",
927
+ "body": _yang_bgp(m.group(1), lines),
928
+ "cli_context": ctx, "cli_commands": lines})
929
+ continue
930
+ m = re.match(r"ip\s+access-list\s+(extended|standard)\s+(\S+)", cl)
931
+ if m:
932
+ ops.append({"method": "PATCH",
933
+ "url": f"{base_url}/ip/access-list/{m.group(1)}={m.group(2)}",
934
+ "body": _yang_acl(m.group(1), m.group(2), lines),
935
+ "cli_context": ctx, "cli_commands": lines})
936
+ continue
937
+ if cl == "global":
938
+ for gop in _yang_global(lines):
939
+ ops.append({"method": "PATCH",
940
+ "url": base_url + gop["suffix"],
941
+ "body": gop["body"],
942
+ "cli_context": "global", "cli_commands": lines})
943
+ continue
944
+ ops.append({"method": "PATCH", "url": base_url, "body": None,
945
+ "cli_context": ctx, "cli_commands": lines,
946
+ "note": "Context not mapped - apply manually"})
947
+ return ops
948
+
949
+
950
+ # ─────────────────────────────────────────────────────────────────────────────
951
+ # LoRA MODEL RUNNER (same pattern as run_fix.py FixRunner)
952
+ # ─────────────────────────────────────────────────────────────────────────────
953
+
954
+ class FixRunner:
955
+ def __init__(self, base_path: str):
956
+ self._base_path = base_path
957
+ self._tokenizer = None
958
+ self._base_model = None
959
+ self._active = None
960
+ self._model = None
961
+
962
+ def _ensure_base(self):
963
+ _load_ml()
964
+ if self._tokenizer is None:
965
+ log.info("Loading base tokenizer...")
966
+ self._tokenizer = _AutoTokenizer.from_pretrained(self._base_path)
967
+ if self._base_model is None:
968
+ log.info("Loading base model (fp16)...")
969
+ self._base_model = _AutoModelForCausalLM.from_pretrained(
970
+ self._base_path, torch_dtype=_torch.float16, device_map="auto"
971
+ )
972
+
973
+ def _swap(self, lora_path: Path):
974
+ key = str(lora_path)
975
+ if self._active == key and self._model is not None:
976
+ return
977
+ if self._model is not None:
978
+ del self._model; self._model = None
979
+ if self._base_model is not None:
980
+ del self._base_model; self._base_model = None
981
+ gc.collect()
982
+ _torch.cuda.empty_cache()
983
+ self._ensure_base()
984
+ log.info("Loading LoRA adapter: %s", lora_path.name)
985
+ self._model = _PeftModel.from_pretrained(self._base_model, key)
986
+ self._model.eval()
987
+ self._active = key
988
+
989
+ def run(self, lora_path: Path, prompt: str, max_new_tokens: int = 300) -> str:
990
+ self._swap(lora_path)
991
+ inputs = self._tokenizer(prompt, return_tensors="pt").to("cuda")
992
+ with _torch.no_grad():
993
+ out = self._model.generate(
994
+ **inputs, max_new_tokens=max_new_tokens,
995
+ temperature=0.2, do_sample=False, repetition_penalty=1.05,
996
+ )
997
+ return self._tokenizer.decode(out[0], skip_special_tokens=True)
998
+
999
+
1000
+ def build_fix_prompt(domain: str, alert_info: dict, device_state: dict[str, dict]) -> str:
1001
+ instruction = DOMAIN_INSTRUCTIONS.get(domain, DOMAIN_INSTRUCTIONS["sys"])
1002
+ device = alert_info.get("device", "DEVICE")
1003
+ lines = [
1004
+ "### Instruction:", instruction, "",
1005
+ "### Wazuh alert:", json.dumps(alert_info, indent=2), "",
1006
+ "### Device information:", f"\n#### DEVICE {device} ####",
1007
+ ]
1008
+ for cmd, result in device_state.items():
1009
+ data = result.get("data") or result.get("error", "No data")
1010
+ lines.append(f"\n# {cmd}\n{json.dumps(data, indent=2) if isinstance(data, dict) else str(data)}")
1011
+ lines.append("\n### Response (CLI FIX COMMANDS ONLY):\n")
1012
+ return "\n".join(lines)
1013
+
1014
+
1015
+ # ─────────────────────────────────────────────────────────────────────────────
1016
+ # SSH BRUTE FORCE DETECTOR
1017
+ # ─────────────────────────────────────────────────────────────────────────────
1018
+
1019
+ _SSH_KEYWORDS = (
1020
+ "sshd", "ssh", "failed password", "invalid user",
1021
+ "authentication failure", "failed login",
1022
+ )
1023
+ _SSH_RULE_GROUPS = {"authentication_failed", "sshd", "ssh_auth"}
1024
+
1025
+
1026
+ class SSHBruteForceDetector:
1027
+ """
1028
+ Track SSH login failures per source IP.
1029
+ Fires when the same IP accumulates >= threshold failures within window_secs.
1030
+ """
1031
+ def __init__(self, threshold: int = 5, window_secs: int = 300):
1032
+ self.threshold = threshold
1033
+ self.window_secs = window_secs
1034
+ self._attempts: dict[str, list[float]] = defaultdict(list)
1035
+
1036
+ def _is_ssh_failure(self, alert_raw: dict) -> bool:
1037
+ rule = alert_raw.get("rule", {})
1038
+ groups = set(rule.get("groups", []))
1039
+ desc = rule.get("description", "").lower()
1040
+ full = (alert_raw.get("full_log") or "").lower()
1041
+ return (groups & _SSH_RULE_GROUPS or
1042
+ any(kw in desc for kw in _SSH_KEYWORDS) or
1043
+ any(kw in full for kw in _SSH_KEYWORDS))
1044
+
1045
+ def _src_ip(self, alert_raw: dict) -> str | None:
1046
+ data = alert_raw.get("data", {}) or {}
1047
+ ip = data.get("srcip") or data.get("src_ip")
1048
+ if ip:
1049
+ return ip
1050
+ full = alert_raw.get("full_log") or ""
1051
+ m = _CISCO_SRC.search(full) or _IP_RE.search(full)
1052
+ return m.group(1) if m else None
1053
+
1054
+ def feed(self, alert_raw: dict) -> tuple[bool, str | None, int]:
1055
+ """
1056
+ Feed one alert.
1057
+ Returns (triggered, src_ip, count_so_far).
1058
+ """
1059
+ if not self._is_ssh_failure(alert_raw):
1060
+ return False, None, 0
1061
+ ip = self._src_ip(alert_raw)
1062
+ if not ip:
1063
+ return False, None, 0
1064
+
1065
+ now = time.monotonic()
1066
+ cutoff = now - self.window_secs
1067
+ self._attempts[ip] = [t for t in self._attempts[ip] if t >= cutoff]
1068
+ self._attempts[ip].append(now)
1069
+ count = len(self._attempts[ip])
1070
+
1071
+ if count >= self.threshold:
1072
+ self._attempts[ip] = [] # reset after detection
1073
+ return True, ip, count
1074
+ return False, ip, count
1075
+
1076
+
1077
+ # ─────────────────────────────────────────────────────────────────────────────
1078
+ # INCIDENT LOGGER
1079
+ # ─────────────────────────────────────────────────────────────────────────────
1080
+
1081
+ def _now_iso() -> str:
1082
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
1083
+
1084
+
1085
+ def log_managed_incident(record: dict):
1086
+ """Append to managed_incidents.jsonl and write a human-readable summary line."""
1087
+ MANAGED_LOG_JSONL.parent.mkdir(parents=True, exist_ok=True)
1088
+
1089
+ with MANAGED_LOG_JSONL.open("a", encoding="utf-8") as fh:
1090
+ fh.write(json.dumps(record, ensure_ascii=False) + "\n")
1091
+
1092
+ # Human-readable single-line summary
1093
+ patches_ok = sum(1 for p in record.get("patch_results", []) if p.get("ok"))
1094
+ patches_tot = len(record.get("patch_results", []))
1095
+ status = record.get("status", "unknown")
1096
+ line = (
1097
+ f"[{record['managed_at']}] #{record['alert_index']:04d} "
1098
+ f"device={record['device']:<14s} incident={record['incident_type']:<30s} "
1099
+ f"domain={record['domain']}-{record['sub_id']} "
1100
+ f"patches={patches_ok}/{patches_tot} status={status}\n"
1101
+ f" trigger : {record.get('trigger', '?')}\n"
1102
+ f" fix : {record.get('fix_commands_summary', '(see jsonl)')}\n"
1103
+ f" duration : {record.get('duration_seconds', 0):.1f}s\n"
1104
+ f" folder : {record.get('alert_folder', '-')}\n"
1105
+ + "-" * 80 + "\n"
1106
+ )
1107
+ with MANAGED_LOG_READABLE.open("a", encoding="utf-8") as fh:
1108
+ fh.write(line)
1109
+
1110
+ log.info("Incident logged -> %s [%s]", record["incident_type"], status)
1111
+
1112
+
1113
+ # ─────────────────────────────────────────────────────────────────────────────
1114
+ # CORE ALERT PROCESSOR
1115
+ # ─────────────────────────────────────────────────────────────────────────────
1116
+
1117
+ _safe_re = re.compile(r"[^\w\-]")
1118
+
1119
+
1120
+ def _safe(s: str) -> str:
1121
+ return _safe_re.sub("_", s)
1122
+
1123
+
1124
+ def process_alert(
1125
+ alert_raw: dict,
1126
+ alert_index: int,
1127
+ runner: FixRunner,
1128
+ trigger: str = "level_alert",
1129
+ dry_run: bool = False,
1130
+ ) -> dict:
1131
+ """
1132
+ Full autonomous pipeline for one alert.
1133
+ Returns the managed_incident record dict.
1134
+ """
1135
+ t_start = time.monotonic()
1136
+ ts = _now_iso()
1137
+ log.info("\n" + "=" * 65)
1138
+ log.info(" Processing alert #%d trigger=%s", alert_index, trigger)
1139
+ log.info("=" * 65)
1140
+
1141
+ # ── Step 1: Extract device ───────────────────────────────────────────────
1142
+ device = _extract_device(alert_raw)
1143
+ rule = alert_raw.get("rule", {})
1144
+ log.info("[1] Device: %s Rule: %s", device, rule.get("description", "?"))
1145
+
1146
+ # ── Step 2: Classify with Ollama ─────────────────────────────────────────
1147
+ log.info("[2] Calling Ollama classifier...")
1148
+ full_log = alert_raw.get("full_log") or alert_raw.get("decoded") or ""
1149
+ wazuh_result = call_wazuh_llm(full_log)
1150
+ if not wazuh_result:
1151
+ log.warning("[2] Ollama unavailable β€” using rule fallback.")
1152
+ wazuh_result = _fallback_classify(alert_raw)
1153
+
1154
+ incident_type = wazuh_result.get("incident_type", "unknown")
1155
+ severity = wazuh_result.get("severity", "high")
1156
+ explanation = wazuh_result.get("explanation", "")
1157
+ log.info("[2] incident_type=%s severity=%s", incident_type, severity)
1158
+
1159
+ # Build the alert info dict passed to domain LLM
1160
+ alert_info = {
1161
+ **wazuh_result,
1162
+ "device": device,
1163
+ "rule": rule.get("description", ""),
1164
+ "timestamp": alert_raw.get("timestamp", ts),
1165
+ "agent": alert_raw.get("agent", {}),
1166
+ "trigger": trigger,
1167
+ }
1168
+
1169
+ # Skip unmanaged devices early
1170
+ if device in UNMANAGED:
1171
+ log.warning("[2] Device '%s' is unmanaged β€” skipping RESTCONF/LLM.", device)
1172
+ rec = {
1173
+ "managed_at": ts, "alert_index": alert_index, "trigger": trigger,
1174
+ "device": device, "incident_type": incident_type, "severity": severity,
1175
+ "domain": "n/a", "sub_id": 0,
1176
+ "show_commands": [], "restconf_get_results": {},
1177
+ "fix_commands": "SKIPPED (unmanaged device)", "fix_commands_summary": "skipped",
1178
+ "patch_results": [], "status": "skipped_unmanaged",
1179
+ "duration_seconds": round(time.monotonic() - t_start, 1),
1180
+ "alert_folder": None, "wazuh_classification": wazuh_result,
1181
+ }
1182
+ log_managed_incident(rec)
1183
+ return rec
1184
+
1185
+ # ── Step 3: Show commands ────────────────────────────────────────────────
1186
+ show_cmds = _get_show_commands(incident_type)
1187
+ log.info("[3] Show commands (%d): %s", len(show_cmds), show_cmds)
1188
+
1189
+ # ── Step 4: Create alert folder ──────────────────────────────────────────
1190
+ folder_name = f"alert_{alert_index:03d}_{_safe(device)}_{_safe(incident_type)}"
1191
+ alert_folder = SHOW_OUTPUTS_DIR / folder_name
1192
+ alert_folder.mkdir(parents=True, exist_ok=True)
1193
+
1194
+ # ── Step 5: Execute RESTCONF GETs ────────────────────────────────────────
1195
+ log.info("[5] Executing RESTCONF GETs for %s ...", device)
1196
+ device_state = collect_device_state(device, show_cmds)
1197
+
1198
+ # Save GET results to folder for audit
1199
+ (alert_folder / "alert_info.json").write_text(
1200
+ json.dumps(alert_info, indent=2, ensure_ascii=False), encoding="utf-8")
1201
+ (alert_folder / "restconf_get_results.json").write_text(
1202
+ json.dumps(device_state, indent=2, ensure_ascii=False), encoding="utf-8")
1203
+
1204
+ # ── Step 6: Domain routing + LoRA fix ───────────────────────────────────
1205
+ domain, sub_id = classify_domain(incident_type, explanation)
1206
+ lora_path = LORA_PATHS.get((domain, sub_id))
1207
+ lora_rel = os.path.relpath(lora_path, PROJECT_ROOT) if lora_path else "n/a"
1208
+ log.info("[6] Domain: %s-%d LoRA: %s", domain, sub_id, lora_rel)
1209
+
1210
+ fix_commands = ""
1211
+ if not lora_path or not lora_path.exists():
1212
+ log.error("[6] LoRA adapter not found: %s", lora_path)
1213
+ fix_commands = f"[ERROR] LoRA adapter not found: {lora_path}"
1214
+ else:
1215
+ prompt = build_fix_prompt(domain, alert_info, device_state)
1216
+ raw_out = runner.run(lora_path, prompt)
1217
+ parts = re.split(r"### Response \(CLI FIX COMMANDS ONLY\):", raw_out, flags=re.I)
1218
+ fix_commands = parts[-1].strip() if len(parts) > 1 else raw_out.strip()
1219
+
1220
+ log.info("[6] Fix commands:\n%s", fix_commands)
1221
+ (alert_folder / "fix_commands.txt").write_text(fix_commands, encoding="utf-8")
1222
+
1223
+ # ── Step 7: Build RESTCONF PATCH operations ──────────────────────────────
1224
+ log.info("[7] Converting CLI fix to RESTCONF PATCH ops ...")
1225
+ patch_ops = build_restconf_patch_ops(device, fix_commands)
1226
+ (alert_folder / "restconf_fix_commands.json").write_text(
1227
+ json.dumps(patch_ops, indent=2, ensure_ascii=False), encoding="utf-8")
1228
+
1229
+ # ── Step 8: Apply RESTCONF PATCHes ───────────────────────────────────────
1230
+ patch_results = []
1231
+ if dry_run:
1232
+ log.info("[8] DRY RUN β€” skipping %d PATCH operation(s).", len(patch_ops))
1233
+ for op in patch_ops:
1234
+ patch_results.append({"url": op["url"], "dry_run": True, "ok": None})
1235
+ else:
1236
+ log.info("[8] Applying %d RESTCONF PATCH operation(s) ...", len(patch_ops))
1237
+ for op in patch_ops:
1238
+ result = restconf_patch(op["url"], op.get("body"))
1239
+ patch_results.append({"url": op["url"], **result})
1240
+ flag = "OK" if result.get("ok") else f"FAIL({result.get('status')})"
1241
+ log.info(" PATCH %s -> %s", op["url"], flag)
1242
+
1243
+ (alert_folder / "patch_results.json").write_text(
1244
+ json.dumps(patch_results, indent=2, ensure_ascii=False), encoding="utf-8")
1245
+
1246
+ # ── Step 9: Log managed incident ─────────────────────────────────────────
1247
+ ok_count = sum(1 for r in patch_results if r.get("ok") is True)
1248
+ tot_count = len(patch_results)
1249
+ if dry_run:
1250
+ status = "dry_run"
1251
+ elif tot_count == 0:
1252
+ status = "no_patches"
1253
+ elif ok_count == tot_count:
1254
+ status = "fixed"
1255
+ elif ok_count > 0:
1256
+ status = "partial"
1257
+ else:
1258
+ status = "failed"
1259
+
1260
+ summary_lines = fix_commands.splitlines()
1261
+ fix_summary = " | ".join(summary_lines[:3]) + ("..." if len(summary_lines) > 3 else "")
1262
+
1263
+ record = {
1264
+ "managed_at": ts,
1265
+ "alert_index": alert_index,
1266
+ "trigger": trigger,
1267
+ "device": device,
1268
+ "incident_type": incident_type,
1269
+ "severity": severity,
1270
+ "domain": domain,
1271
+ "sub_id": sub_id,
1272
+ "lora_used": lora_rel,
1273
+ "show_commands": show_cmds,
1274
+ "restconf_get_results": {
1275
+ cmd: {"ok": r.get("ok"), "status": r.get("status")}
1276
+ for cmd, r in device_state.items()
1277
+ },
1278
+ "fix_commands": fix_commands,
1279
+ "fix_commands_summary": fix_summary,
1280
+ "restconf_patch_count": tot_count,
1281
+ "patch_results": patch_results,
1282
+ "status": status,
1283
+ "duration_seconds": round(time.monotonic() - t_start, 1),
1284
+ "alert_folder": str(alert_folder),
1285
+ "wazuh_classification": wazuh_result,
1286
+ "raw_alert_rule": rule.get("description", ""),
1287
+ "raw_alert_level": rule.get("level"),
1288
+ }
1289
+
1290
+ log_managed_incident(record)
1291
+
1292
+ log.info(
1293
+ "[DONE] Alert #%d: %s/%s -> %s patches=%d/%d %.1fs",
1294
+ alert_index, device, incident_type, status,
1295
+ ok_count, tot_count, record["duration_seconds"],
1296
+ )
1297
+ return record
1298
+
1299
+
1300
+ # ─────────────────────────────────────────────────────────────────────────────
1301
+ # ALERT FILE WATCHER (tail -f style)
1302
+ # ─────────────────────────────────────────────────────────────────────────────
1303
+
1304
+ class AlertTailer:
1305
+ """
1306
+ Efficiently tails a growing JSON-lines file.
1307
+ Seeks to EOF on first open (skips historical alerts), then yields
1308
+ newly appended lines as they arrive.
1309
+ """
1310
+ def __init__(self, path: Path, min_level: int, max_level: int):
1311
+ self.path = path
1312
+ self.min_level = min_level
1313
+ self.max_level = max_level
1314
+ self._fh = None
1315
+ self._pos = 0
1316
+
1317
+ def _open(self):
1318
+ if self._fh is None:
1319
+ if not self.path.exists():
1320
+ return False
1321
+ self._fh = self.path.open("r", encoding="utf-8", errors="replace")
1322
+ self._fh.seek(0, 2) # seek to end β€” only process NEW alerts
1323
+ self._pos = self._fh.tell()
1324
+ log.info("Watching: %s (from position %d)", self.path, self._pos)
1325
+ return True
1326
+
1327
+ def _reopen_if_rotated(self):
1328
+ """Detect log rotation: if file shrank, re-open from the beginning."""
1329
+ try:
1330
+ current_size = self.path.stat().st_size
1331
+ except FileNotFoundError:
1332
+ return
1333
+ if current_size < self._pos:
1334
+ log.info("Log rotation detected β€” re-opening %s", self.path)
1335
+ self._fh.close()
1336
+ self._fh = None
1337
+ self._pos = 0
1338
+ self._open()
1339
+
1340
+ def poll(self) -> list[dict]:
1341
+ """Return any new valid alerts since last poll."""
1342
+ if not self._open():
1343
+ return []
1344
+ self._reopen_if_rotated()
1345
+
1346
+ new_alerts = []
1347
+ while True:
1348
+ line = self._fh.readline()
1349
+ if not line:
1350
+ break
1351
+ self._pos = self._fh.tell()
1352
+ line = line.strip()
1353
+ if not line:
1354
+ continue
1355
+ try:
1356
+ data = json.loads(line)
1357
+ except json.JSONDecodeError:
1358
+ continue
1359
+ level = data.get("rule", {}).get("level", 0)
1360
+ if self.min_level <= level <= self.max_level:
1361
+ new_alerts.append(data)
1362
+ return new_alerts
1363
+
1364
+ def close(self):
1365
+ if self._fh:
1366
+ self._fh.close()
1367
+ self._fh = None
1368
+
1369
+
1370
+ # ─────────────────────────────────────────────────────────────────────────────
1371
+ # DAEMON
1372
+ # ─────────────────────────────────────────────────────────────────────────────
1373
+
1374
+ class IncidentDaemon:
1375
+ def __init__(self, alerts_file: Path, poll_secs: int,
1376
+ min_level: int, max_level: int,
1377
+ ssh_threshold: int, ssh_window: int,
1378
+ dry_run: bool):
1379
+ self.tailer = AlertTailer(alerts_file, min_level, max_level)
1380
+ self.ssh_detector = SSHBruteForceDetector(ssh_threshold, ssh_window)
1381
+ self.runner = FixRunner(BASE_MODEL)
1382
+ self.poll_secs = poll_secs
1383
+ self.dry_run = dry_run
1384
+ self._running = True
1385
+ self._alert_index = 0
1386
+ self._processed = 0
1387
+ SHOW_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
1388
+
1389
+ def _next_index(self) -> int:
1390
+ self._alert_index += 1
1391
+ return self._alert_index
1392
+
1393
+ def _handle(self, alert_raw: dict, trigger: str):
1394
+ idx = self._next_index()
1395
+ try:
1396
+ process_alert(
1397
+ alert_raw=alert_raw,
1398
+ alert_index=idx,
1399
+ runner=self.runner,
1400
+ trigger=trigger,
1401
+ dry_run=self.dry_run,
1402
+ )
1403
+ self._processed += 1
1404
+ except Exception as exc:
1405
+ log.exception("Unhandled error processing alert #%d: %s", idx, exc)
1406
+
1407
+ def run(self):
1408
+ log.info("=" * 65)
1409
+ log.info(" Incident Response Daemon started")
1410
+ log.info(" Alerts file : %s", self.tailer.path)
1411
+ log.info(" Poll every : %ds", self.poll_secs)
1412
+ log.info(" Level range : %d-%d", self.tailer.min_level, self.tailer.max_level)
1413
+ log.info(" SSH trigger : %d failures / %ds window",
1414
+ self.ssh_detector.threshold, self.ssh_detector.window_secs)
1415
+ log.info(" Dry run : %s", self.dry_run)
1416
+ log.info(" Output log : %s", MANAGED_LOG_JSONL)
1417
+ log.info("=" * 65)
1418
+
1419
+ while self._running:
1420
+ new_alerts = self.tailer.poll()
1421
+
1422
+ for alert_raw in new_alerts:
1423
+ level = alert_raw.get("rule", {}).get("level", 0)
1424
+
1425
+ # SSH brute-force pattern check (runs for every alert regardless of level)
1426
+ triggered, src_ip, count = self.ssh_detector.feed(alert_raw)
1427
+ if triggered:
1428
+ log.warning(
1429
+ "SSH brute-force pattern detected: %d failures from %s within %ds",
1430
+ count, src_ip, self.ssh_detector.window_secs,
1431
+ )
1432
+ # Synthesise a brute-force alert record
1433
+ ssh_alert = dict(alert_raw)
1434
+ ssh_alert.setdefault("rule", {})["description"] = (
1435
+ f"SSH brute-force: {count} failures from {src_ip}"
1436
+ )
1437
+ self._handle(ssh_alert, trigger=f"ssh_brute_force:{src_ip}:{count}")
1438
+
1439
+ # Standard level-based alert
1440
+ elif self.tailer.min_level <= level <= self.tailer.max_level:
1441
+ log.info(
1442
+ "New alert: level=%d rule=%s",
1443
+ level, alert_raw.get("rule", {}).get("description", "?"),
1444
+ )
1445
+ self._handle(alert_raw, trigger=f"level{level}_alert")
1446
+
1447
+ if not new_alerts:
1448
+ time.sleep(self.poll_secs)
1449
+
1450
+ def stop(self):
1451
+ log.info("Daemon shutting down (processed %d alerts).", self._processed)
1452
+ self._running = False
1453
+ self.tailer.close()
1454
+
1455
+
1456
+ # ─────────────────────────────────────────────────────────────────────────────
1457
+ # ENTRY POINT
1458
+ # ─────────────────────────────────────────────────────────────────────────────
1459
+
1460
+ def main():
1461
+ parser = argparse.ArgumentParser(
1462
+ description="Autonomous Wazuh-LLM Incident Response Daemon"
1463
+ )
1464
+ parser.add_argument(
1465
+ "--alerts",
1466
+ default=str(DEFAULT_ALERTS_FILE),
1467
+ help=f"Wazuh alerts JSON-lines file (default: {DEFAULT_ALERTS_FILE})"
1468
+ )
1469
+ parser.add_argument(
1470
+ "--poll", type=int, default=3,
1471
+ help="File poll interval in seconds (default: 3)"
1472
+ )
1473
+ parser.add_argument(
1474
+ "--min-level", type=int, default=7,
1475
+ help="Minimum Wazuh rule level to process (default: 7)"
1476
+ )
1477
+ parser.add_argument(
1478
+ "--max-level", type=int, default=12,
1479
+ help="Maximum Wazuh rule level to process (default: 12)"
1480
+ )
1481
+ parser.add_argument(
1482
+ "--ssh-threshold", type=int, default=5,
1483
+ help="SSH failures from same IP to trigger brute-force alert (default: 5)"
1484
+ )
1485
+ parser.add_argument(
1486
+ "--ssh-window", type=int, default=300,
1487
+ help="SSH brute-force detection window in seconds (default: 300)"
1488
+ )
1489
+ parser.add_argument(
1490
+ "--dry-run", action="store_true",
1491
+ help="Classify and generate fix commands but do NOT apply RESTCONF PATCHes"
1492
+ )
1493
+ args = parser.parse_args()
1494
+
1495
+ alerts_path = Path(args.alerts)
1496
+ if not alerts_path.parent.exists():
1497
+ log.warning(
1498
+ "Alerts directory does not exist: %s\n"
1499
+ " Daemon will wait until the file appears.",
1500
+ alerts_path.parent
1501
+ )
1502
+
1503
+ daemon = IncidentDaemon(
1504
+ alerts_file = alerts_path,
1505
+ poll_secs = args.poll,
1506
+ min_level = args.min_level,
1507
+ max_level = args.max_level,
1508
+ ssh_threshold = args.ssh_threshold,
1509
+ ssh_window = args.ssh_window,
1510
+ dry_run = args.dry_run,
1511
+ )
1512
+
1513
+ def _signal_handler(sig, _frame):
1514
+ print(f"\nSignal {sig} received β€” stopping daemon...")
1515
+ daemon.stop()
1516
+ sys.exit(0)
1517
+
1518
+ signal.signal(signal.SIGINT, _signal_handler)
1519
+ signal.signal(signal.SIGTERM, _signal_handler)
1520
+
1521
+ daemon.run()
1522
+
1523
+
1524
+ if __name__ == "__main__":
1525
+ main()