diff --git "a/data/demo_examples.json" "b/data/demo_examples.json" new file mode 100644--- /dev/null +++ "b/data/demo_examples.json" @@ -0,0 +1,3514 @@ +{ + "n": 48, + "source": "placeholder", + "baseline_model": "unsloth/Qwen2.5-3B-Instruct", + "trained_adapter": null, + "examples": [ + { + "alert": { + "alert_id": "A-EVAL-i-91030", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 2 event(s); first=auth.login_success", + "host": "host-001", + "user": "carol" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T11:58:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.187" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T22:09:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + } + ], + "ground_truth": "dismiss", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91030, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": 1.0, + "reward_breakdown": { + "correct_action": 1.0 + }, + "correct": true + }, + "trained": { + "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", + "action": "dismiss", + "cited_log_id": "L1-1", + "rationale": "Indicators are consistent with normal user activity.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91008", + "category": "brute_force", + "severity": "medium", + "summary": "brute_force: 8 event(s); first=net.outbound_connection", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T11:11:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.101.112", + "bytes_out": 73324 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T14:47:00Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "alice", + "src_ip": "203.0.113.76" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T14:47:05Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "alice", + "src_ip": "203.0.113.196" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T14:47:10Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "alice", + "src_ip": "203.0.113.242" + } + }, + { + "log_id": "L1-4", + "timestamp": "2026-04-25T14:47:15Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "alice", + "src_ip": "203.0.113.243" + } + }, + { + "log_id": "L1-5", + "timestamp": "2026-04-25T14:47:20Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "alice", + "src_ip": "203.0.113.100" + } + }, + { + "log_id": "L1-6", + "timestamp": "2026-04-25T14:47:25Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "alice", + "src_ip": "203.0.113.155" + } + }, + { + "log_id": "L1-7", + "timestamp": "2026-04-25T14:47:30Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "alice", + "src_ip": "203.0.113.43" + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91008, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-1", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92026", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 3 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T03:29:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T03:29:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.46" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T22:52:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.204.226", + "interval_s": 90 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92026, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-2", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92043", + "category": "phishing", + "severity": "high", + "summary": "phishing: 3 event(s); first=email.link_clicked", + "host": "host-001", + "user": "bob" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T09:59:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.cn/secure", + "user": "bob" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T18:48:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.170.227", + "bytes_out": 65651 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T18:48:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.170" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-0", + "stage": "stage3_mixed", + "seed": 92043, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-0", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91037", + "category": "benign_noise", + "severity": "medium", + "summary": "benign_noise: 2 event(s); first=net.outbound_connection", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T02:54:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.137.186", + "bytes_out": 37324 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T04:23:00Z", + "source": "network", + "event_type": "net.port_scan_hit", + "fields": { + "src_ip": "203.0.113.96", + "port": 5985 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91037, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-1", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92013", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 3 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T06:31:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T06:31:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "bob", + "src_ip": "10.0.0.194" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T13:13:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "bob", + "src_ip": "10.0.0.146" + } + } + ], + "ground_truth": "dismiss", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92013, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": 1.0, + "reward_breakdown": { + "correct_action": 1.0 + }, + "correct": true + }, + "trained": { + "raw_text": "Action: dismiss\nCitedLog: L1-2\nRationale: Indicators are consistent with normal user activity.", + "action": "dismiss", + "cited_log_id": "L1-2", + "rationale": "Indicators are consistent with normal user activity.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92049", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 4 event(s); first=auth.login_success", + "host": "host-001", + "user": "dave" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T00:12:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.52" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T00:12:02Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.202.40", + "bytes_out": 80520 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T19:55:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "excel.exe", + "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T19:55:05Z", + "source": "endpoint", + "event_type": "file.write", + "fields": { + "path": "C:\\Users\\Public\\svc.exe" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92049, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-2", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91048", + "category": "phishing", + "severity": "high", + "summary": "phishing: 2 event(s); first=auth.login_success", + "host": "host-001", + "user": "bob" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T05:25:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "bob", + "src_ip": "10.0.0.225" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T06:09:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.ru/secure", + "user": "bob" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91048, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-1\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-1", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92021", + "category": "malware_execution", + "severity": "critical", + "summary": "malware_execution: 5 event(s); first=proc.lolbin_use", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T03:53:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "outlook.exe", + "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T03:53:15Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "203.0.113.5", + "interval_s": 30 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T03:53:30Z", + "source": "identity", + "event_type": "auth.privilege_grant", + "fields": { + "user": "bob", + "role": "admin" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T08:51:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-4", + "timestamp": "2026-04-25T08:51:02Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + } + ], + "ground_truth": "escalate", + "triggering_log_id": "L1-0", + "stage": "stage3_mixed", + "seed": 92021, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", + "action": "escalate", + "cited_log_id": "L1-0", + "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90035", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 1 event(s); first=net.beacon", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T01:08:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.98.8", + "interval_s": 60 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90035, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93032", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 4 event(s); first=auth.login_success", + "host": "host-001", + "user": "dave" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T17:30:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.56" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T17:30:02Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.30.76", + "bytes_out": 65394 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T17:30:04Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.171.122", + "bytes_out": 2642 + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T18:00:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.72" + } + } + ], + "ground_truth": "dismiss", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93032, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": 1.0, + "reward_breakdown": { + "correct_action": 1.0 + }, + "correct": true + }, + "trained": { + "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", + "action": "dismiss", + "cited_log_id": "L1-3", + "rationale": "Indicators are consistent with normal user activity.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93030", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 4 event(s); first=net.beacon", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T18:13:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.122.44", + "interval_s": 60 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T20:00:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "bob", + "src_ip": "10.0.0.88" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T20:00:02Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T20:00:04Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage4_adversarial", + "seed": 93030, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91001", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 2 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T03:58:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T06:16:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "bob", + "src_ip": "10.0.0.120" + } + } + ], + "ground_truth": "dismiss", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91001, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": 1.0, + "reward_breakdown": { + "correct_action": 1.0 + }, + "correct": true + }, + "trained": { + "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", + "action": "dismiss", + "cited_log_id": "L1-1", + "rationale": "Indicators are consistent with normal user activity.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93038", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 4 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T18:58:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T18:58:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "bob", + "src_ip": "10.0.0.171" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T18:58:04Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.47.203", + "bytes_out": 14699 + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T20:59:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.36.159", + "interval_s": 90 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93038, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-3", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91046", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 3 event(s); first=proc.lolbin_use", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T02:24:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "chrome.exe", + "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T02:24:05Z", + "source": "endpoint", + "event_type": "file.write", + "fields": { + "path": "C:\\Users\\Public\\svc.exe" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T09:39:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-0", + "stage": "stage2_multi", + "seed": 91046, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-0", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92047", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 4 event(s); first=auth.login_success", + "host": "host-001", + "user": "carol" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T12:36:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.130" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T12:36:02Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.164.13", + "bytes_out": 94453 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T16:44:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "excel.exe", + "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T16:44:05Z", + "source": "endpoint", + "event_type": "file.write", + "fields": { + "path": "C:\\Users\\Public\\svc.exe" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92047, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-2", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90038", + "category": "benign_noise", + "severity": "medium", + "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T05:08:00Z", + "source": "network", + "event_type": "net.port_scan_hit", + "fields": { + "src_ip": "203.0.113.140", + "port": 22 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90038, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91000", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 2 event(s); first=net.beacon", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T21:23:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.61.71", + "interval_s": 60 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T22:28:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "alice", + "src_ip": "10.0.0.76" + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage2_multi", + "seed": 91000, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90011", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T19:21:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "winword.exe", + "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T19:21:05Z", + "source": "endpoint", + "event_type": "file.write", + "fields": { + "path": "C:\\Users\\Public\\payload.exe" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90011, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-0", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90024", + "category": "phishing", + "severity": "high", + "summary": "phishing: 1 event(s); first=email.link_clicked", + "host": "host-001", + "user": "alice" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T03:27:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.top/secure", + "user": "alice" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90024, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-0", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93004", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 4 event(s); first=auth.login_success", + "host": "host-001", + "user": "dave" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T04:55:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.144" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T19:38:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T19:38:02Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T19:38:04Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + } + ], + "ground_truth": "dismiss", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93004, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": 1.0, + "reward_breakdown": { + "correct_action": 1.0 + }, + "correct": true + }, + "trained": { + "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", + "action": "dismiss", + "cited_log_id": "L1-3", + "rationale": "Indicators are consistent with normal user activity.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90048", + "category": "phishing", + "severity": "high", + "summary": "phishing: 1 event(s); first=email.link_clicked", + "host": "host-001", + "user": "alice" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T02:43:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.click/secure", + "user": "alice" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90048, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-0", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91040", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 2 event(s); first=net.outbound_connection", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T08:09:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.168.133", + "bytes_out": 96660 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T18:31:00Z", + "source": "edr", + "event_type": "edr.behavior_match", + "fields": { + "severity": "high", + "rule_id": "EDR-9974", + "host": "host-015" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91040, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-1\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-1", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90028", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 1 event(s); first=net.beacon", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T21:07:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.168.29", + "interval_s": 60 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90028, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92000", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 3 event(s); first=net.outbound_connection", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T04:39:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.81.214", + "bytes_out": 86290 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T04:39:02Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.138.227", + "bytes_out": 53711 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T16:45:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.244.186", + "interval_s": 90 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92000, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-2", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90042", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 1 event(s); first=auth.login_success", + "host": "host-001", + "user": "carol" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T12:10:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.187" + } + } + ], + "ground_truth": "dismiss", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90042, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + }, + "trained": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Indicators are consistent with normal user activity.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Indicators are consistent with normal user activity.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93005", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 4 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T09:11:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T09:11:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "bob", + "src_ip": "10.0.0.196" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T09:11:04Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.153.118", + "bytes_out": 9827 + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T17:44:00Z", + "source": "edr", + "event_type": "edr.behavior_match", + "fields": { + "severity": "high", + "rule_id": "EDR-5070", + "host": "host-033" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93005, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-3", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90031", + "category": "brute_force", + "severity": "medium", + "summary": "brute_force: 8 event(s); first=auth.login_failure", + "host": "host-001", + "user": "root" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T04:02:00Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.204" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T04:02:05Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.157" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T04:02:10Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.181" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T04:02:15Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.76" + } + }, + { + "log_id": "L1-4", + "timestamp": "2026-04-25T04:02:20Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.90" + } + }, + { + "log_id": "L1-5", + "timestamp": "2026-04-25T04:02:25Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.42" + } + }, + { + "log_id": "L1-6", + "timestamp": "2026-04-25T04:02:30Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.9" + } + }, + { + "log_id": "L1-7", + "timestamp": "2026-04-25T04:02:35Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.115" + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90031, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93002", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 4 event(s); first=net.beacon", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T04:30:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.244.83", + "interval_s": 60 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T08:04:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T08:04:02Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T08:04:04Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "alice", + "src_ip": "10.0.0.243" + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage4_adversarial", + "seed": 93002, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90014", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T01:39:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "chrome.exe", + "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T01:39:05Z", + "source": "endpoint", + "event_type": "file.write", + "fields": { + "path": "C:\\Users\\Public\\tmp.exe" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90014, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-0", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93010", + "category": "brute_force", + "severity": "medium", + "summary": "brute_force: 10 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T05:01:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T05:01:02Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.244.112", + "bytes_out": 61917 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T05:01:04Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "alice", + "src_ip": "10.0.0.15" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T20:05:00Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "admin", + "src_ip": "203.0.113.215" + } + }, + { + "log_id": "L1-4", + "timestamp": "2026-04-25T20:05:05Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "admin", + "src_ip": "203.0.113.55" + } + }, + { + "log_id": "L1-5", + "timestamp": "2026-04-25T20:05:10Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "admin", + "src_ip": "203.0.113.156" + } + }, + { + "log_id": "L1-6", + "timestamp": "2026-04-25T20:05:15Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "admin", + "src_ip": "203.0.113.182" + } + }, + { + "log_id": "L1-7", + "timestamp": "2026-04-25T20:05:20Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "admin", + "src_ip": "203.0.113.15" + } + }, + { + "log_id": "L1-8", + "timestamp": "2026-04-25T20:05:25Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "admin", + "src_ip": "203.0.113.226" + } + }, + { + "log_id": "L1-9", + "timestamp": "2026-04-25T20:05:30Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "admin", + "src_ip": "203.0.113.237" + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93010, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-3", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92032", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 3 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T04:00:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T04:00:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.103" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T15:15:00Z", + "source": "edr", + "event_type": "edr.behavior_match", + "fields": { + "severity": "high", + "rule_id": "EDR-7716", + "host": "host-030" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92032, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-2", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93013", + "category": "malware_execution", + "severity": "high", + "summary": "malware_execution: 5 event(s); first=auth.login_success", + "host": "host-001", + "user": "carol" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T03:22:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.18" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T03:22:02Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.22.98", + "bytes_out": 65184 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T03:22:04Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T18:32:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "chrome.exe", + "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + }, + { + "log_id": "L1-4", + "timestamp": "2026-04-25T18:32:05Z", + "source": "endpoint", + "event_type": "file.write", + "fields": { + "path": "C:\\Users\\Public\\payload.exe" + } + } + ], + "ground_truth": "quarantine_host", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93013, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", + "action": "quarantine_host", + "cited_log_id": "L1-3", + "rationale": "Host shows malware execution indicators; isolating.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92016", + "category": "phishing", + "severity": "high", + "summary": "phishing: 3 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T01:35:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T01:35:02Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T07:28:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.top/secure", + "user": "bob" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92016, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-2", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92010", + "category": "brute_force", + "severity": "medium", + "summary": "brute_force: 8 event(s); first=auth.login_success", + "host": "host-001", + "user": "dave" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T13:35:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.212" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T13:35:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.165" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T14:59:00Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.7" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T14:59:05Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.168" + } + }, + { + "log_id": "L1-4", + "timestamp": "2026-04-25T14:59:10Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.196" + } + }, + { + "log_id": "L1-5", + "timestamp": "2026-04-25T14:59:15Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.201" + } + }, + { + "log_id": "L1-6", + "timestamp": "2026-04-25T14:59:20Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.134" + } + }, + { + "log_id": "L1-7", + "timestamp": "2026-04-25T14:59:25Z", + "source": "identity", + "event_type": "auth.login_failure", + "fields": { + "user": "root", + "src_ip": "203.0.113.155" + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92010, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-2", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93041", + "category": "malware_execution", + "severity": "critical", + "summary": "malware_execution: 6 event(s); first=proc.lolbin_use", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T13:22:00Z", + "source": "endpoint", + "event_type": "proc.lolbin_use", + "fields": { + "process": "powershell.exe", + "parent_process": "outlook.exe", + "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T13:22:15Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "203.0.113.5", + "interval_s": 30 + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T13:22:30Z", + "source": "identity", + "event_type": "auth.privilege_grant", + "fields": { + "user": "bob", + "role": "admin" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T14:00:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.1.241", + "bytes_out": 11167 + } + }, + { + "log_id": "L1-4", + "timestamp": "2026-04-25T14:00:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.122" + } + }, + { + "log_id": "L1-5", + "timestamp": "2026-04-25T14:00:04Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.10" + } + } + ], + "ground_truth": "escalate", + "triggering_log_id": "L1-0", + "stage": "stage4_adversarial", + "seed": 93041, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", + "action": "escalate", + "cited_log_id": "L1-0", + "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90016", + "category": "benign_noise", + "severity": "medium", + "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T04:36:00Z", + "source": "network", + "event_type": "net.port_scan_hit", + "fields": { + "src_ip": "203.0.113.245", + "port": 3389 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90016, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92011", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 3 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T05:46:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T05:46:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.188" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T07:31:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.214.180", + "interval_s": 60 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92011, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-2", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91021", + "category": "phishing", + "severity": "high", + "summary": "phishing: 2 event(s); first=email.link_clicked", + "host": "host-001", + "user": "bob" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T11:07:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.cn/secure", + "user": "bob" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T22:48:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "alice", + "src_ip": "10.0.0.138" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-0", + "stage": "stage2_multi", + "seed": 91021, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-0", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93046", + "category": "c2_beacon", + "severity": "high", + "summary": "c2_beacon: 4 event(s); first=net.beacon", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T11:55:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "203.0.113.5", + "interval_s": 90 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T18:06:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.243" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T18:06:02Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T18:06:04Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-0", + "stage": "stage4_adversarial", + "seed": 93046, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-0", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91028", + "category": "benign_noise", + "severity": "medium", + "summary": "benign_noise: 2 event(s); first=auth.login_success", + "host": "host-001", + "user": "carol" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T00:59:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "carol", + "src_ip": "10.0.0.208" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T13:57:00Z", + "source": "network", + "event_type": "net.port_scan_hit", + "fields": { + "src_ip": "203.0.113.15", + "port": 22 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91028, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-1", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91042", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 2 event(s); first=auth.login_success", + "host": "host-001", + "user": "dave" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T05:30:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.51" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T10:03:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.41.43", + "bytes_out": 90697 + } + } + ], + "ground_truth": "dismiss", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91042, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": 1.0, + "reward_breakdown": { + "correct_action": 1.0 + }, + "correct": true + }, + "trained": { + "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", + "action": "dismiss", + "cited_log_id": "L1-1", + "rationale": "Indicators are consistent with normal user activity.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93029", + "category": "c2_beacon", + "severity": "high", + "summary": "c2_beacon: 4 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T13:00:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "google.com" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T13:00:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "alice", + "src_ip": "10.0.0.234" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T13:00:04Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "internal.corp" + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T22:27:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "185.220.101.7", + "interval_s": 90 + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93029, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-3", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90029", + "category": "phishing", + "severity": "high", + "summary": "phishing: 1 event(s); first=email.link_clicked", + "host": "host-001", + "user": "alice" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T01:21:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.click/secure", + "user": "alice" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90029, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-0", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-d-92023", + "category": "phishing", + "severity": "high", + "summary": "phishing: 3 event(s); first=net.dns_query", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T08:46:00Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T08:46:02Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.224" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T09:39:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.click/secure", + "user": "alice" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-2", + "stage": "stage3_mixed", + "seed": 92023, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-2", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-l-93039", + "category": "phishing", + "severity": "high", + "summary": "phishing: 4 event(s); first=net.outbound_connection", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T10:31:00Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.184.117", + "bytes_out": 54691 + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T10:31:02Z", + "source": "network", + "event_type": "net.dns_query", + "fields": { + "domain": "github.com" + } + }, + { + "log_id": "L1-2", + "timestamp": "2026-04-25T10:31:04Z", + "source": "network", + "event_type": "net.outbound_connection", + "fields": { + "dst_ip": "10.0.108.52", + "bytes_out": 60603 + } + }, + { + "log_id": "L1-3", + "timestamp": "2026-04-25T11:04:00Z", + "source": "email", + "event_type": "email.link_clicked", + "fields": { + "url": "https://login-update.ru/secure", + "user": "carol" + } + } + ], + "ground_truth": "block_ip", + "triggering_log_id": "L1-3", + "stage": "stage4_adversarial", + "seed": 93039, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", + "action": "block_ip", + "cited_log_id": "L1-3", + "rationale": "External IP is engaged in active brute-force; blocking.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-i-91017", + "category": "benign_noise", + "severity": "low", + "summary": "benign_noise: 2 event(s); first=auth.login_success", + "host": "host-001", + "user": "dave" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T04:36:00Z", + "source": "identity", + "event_type": "auth.login_success", + "fields": { + "user": "dave", + "src_ip": "10.0.0.4" + } + }, + { + "log_id": "L1-1", + "timestamp": "2026-04-25T07:24:00Z", + "source": "network", + "event_type": "net.beacon", + "fields": { + "dst_ip": "10.0.216.132", + "interval_s": 30 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-1", + "stage": "stage2_multi", + "seed": 91017, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -1.0, + "reward_breakdown": { + "missed_malicious": -1.0 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-1", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + }, + { + "alert": { + "alert_id": "A-EVAL-c-90010", + "category": "benign_noise", + "severity": "medium", + "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", + "host": "host-001", + "user": "user-001" + }, + "events": [ + { + "log_id": "L1-0", + "timestamp": "2026-04-25T21:48:00Z", + "source": "network", + "event_type": "net.port_scan_hit", + "fields": { + "src_ip": "203.0.113.146", + "port": 445 + } + } + ], + "ground_truth": "monitor", + "triggering_log_id": "L1-0", + "stage": "stage1_basic", + "seed": 90010, + "baseline": { + "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", + "action": "dismiss", + "cited_log_id": "L1-0", + "rationale": "Looks like routine activity, no clear malicious indicator.", + "format_ok": true, + "reward": -0.9, + "reward_breakdown": { + "missed_malicious": -1.0, + "correct_citation_bonus": 0.1 + }, + "correct": false + }, + "trained": { + "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", + "action": "monitor", + "cited_log_id": "L1-0", + "rationale": "Behavior is anomalous but reversible; raising visibility.", + "format_ok": true, + "reward": 1.1, + "reward_breakdown": { + "correct_action": 1.0, + "correct_citation_bonus": 0.1 + }, + "correct": true + } + } + ] +} \ No newline at end of file