{ "n": 48, "source": "placeholder", "baseline_model": "unsloth/Qwen2.5-3B-Instruct", "trained_adapter": null, "examples": [ { "alert": { "alert_id": "A-EVAL-i-91030", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 2 event(s); first=auth.login_success", "host": "host-001", "user": "carol" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T11:58:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.187" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T22:09:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } } ], "ground_truth": "dismiss", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91030, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": 1.0, "reward_breakdown": { "correct_action": 1.0 }, "correct": true }, "trained": { "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", "action": "dismiss", "cited_log_id": "L1-1", "rationale": "Indicators are consistent with normal user activity.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91008", "category": "brute_force", "severity": "medium", "summary": "brute_force: 8 event(s); first=net.outbound_connection", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T11:11:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.101.112", "bytes_out": 73324 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T14:47:00Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "alice", "src_ip": "203.0.113.76" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T14:47:05Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "alice", "src_ip": "203.0.113.196" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T14:47:10Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "alice", "src_ip": "203.0.113.242" } }, { "log_id": "L1-4", "timestamp": "2026-04-25T14:47:15Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "alice", "src_ip": "203.0.113.243" } }, { "log_id": "L1-5", "timestamp": "2026-04-25T14:47:20Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "alice", "src_ip": "203.0.113.100" } }, { "log_id": "L1-6", "timestamp": "2026-04-25T14:47:25Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "alice", "src_ip": "203.0.113.155" } }, { "log_id": "L1-7", "timestamp": "2026-04-25T14:47:30Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "alice", "src_ip": "203.0.113.43" } } ], "ground_truth": "monitor", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91008, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-1", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92026", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 3 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T03:29:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T03:29:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.46" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T22:52:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.204.226", "interval_s": 90 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92026, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-2", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92043", "category": "phishing", "severity": "high", "summary": "phishing: 3 event(s); first=email.link_clicked", "host": "host-001", "user": "bob" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T09:59:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.cn/secure", "user": "bob" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T18:48:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.170.227", "bytes_out": 65651 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T18:48:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.170" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-0", "stage": "stage3_mixed", "seed": 92043, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-0", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91037", "category": "benign_noise", "severity": "medium", "summary": "benign_noise: 2 event(s); first=net.outbound_connection", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T02:54:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.137.186", "bytes_out": 37324 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T04:23:00Z", "source": "network", "event_type": "net.port_scan_hit", "fields": { "src_ip": "203.0.113.96", "port": 5985 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91037, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-1", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92013", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 3 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T06:31:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T06:31:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "bob", "src_ip": "10.0.0.194" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T13:13:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "bob", "src_ip": "10.0.0.146" } } ], "ground_truth": "dismiss", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92013, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": 1.0, "reward_breakdown": { "correct_action": 1.0 }, "correct": true }, "trained": { "raw_text": "Action: dismiss\nCitedLog: L1-2\nRationale: Indicators are consistent with normal user activity.", "action": "dismiss", "cited_log_id": "L1-2", "rationale": "Indicators are consistent with normal user activity.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92049", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 4 event(s); first=auth.login_success", "host": "host-001", "user": "dave" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T00:12:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.52" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T00:12:02Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.202.40", "bytes_out": 80520 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T19:55:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "excel.exe", "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T19:55:05Z", "source": "endpoint", "event_type": "file.write", "fields": { "path": "C:\\Users\\Public\\svc.exe" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92049, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-2", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91048", "category": "phishing", "severity": "high", "summary": "phishing: 2 event(s); first=auth.login_success", "host": "host-001", "user": "bob" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T05:25:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "bob", "src_ip": "10.0.0.225" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T06:09:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.ru/secure", "user": "bob" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91048, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-1\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-1", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92021", "category": "malware_execution", "severity": "critical", "summary": "malware_execution: 5 event(s); first=proc.lolbin_use", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T03:53:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "outlook.exe", "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T03:53:15Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "203.0.113.5", "interval_s": 30 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T03:53:30Z", "source": "identity", "event_type": "auth.privilege_grant", "fields": { "user": "bob", "role": "admin" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T08:51:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-4", "timestamp": "2026-04-25T08:51:02Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } } ], "ground_truth": "escalate", "triggering_log_id": "L1-0", "stage": "stage3_mixed", "seed": 92021, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", "action": "escalate", "cited_log_id": "L1-0", "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90035", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 1 event(s); first=net.beacon", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T01:08:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.98.8", "interval_s": 60 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90035, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93032", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 4 event(s); first=auth.login_success", "host": "host-001", "user": "dave" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T17:30:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.56" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T17:30:02Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.30.76", "bytes_out": 65394 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T17:30:04Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.171.122", "bytes_out": 2642 } }, { "log_id": "L1-3", "timestamp": "2026-04-25T18:00:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.72" } } ], "ground_truth": "dismiss", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93032, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": 1.0, "reward_breakdown": { "correct_action": 1.0 }, "correct": true }, "trained": { "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", "action": "dismiss", "cited_log_id": "L1-3", "rationale": "Indicators are consistent with normal user activity.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93030", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 4 event(s); first=net.beacon", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T18:13:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.122.44", "interval_s": 60 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T20:00:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "bob", "src_ip": "10.0.0.88" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T20:00:02Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T20:00:04Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage4_adversarial", "seed": 93030, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91001", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 2 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T03:58:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T06:16:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "bob", "src_ip": "10.0.0.120" } } ], "ground_truth": "dismiss", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91001, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": 1.0, "reward_breakdown": { "correct_action": 1.0 }, "correct": true }, "trained": { "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", "action": "dismiss", "cited_log_id": "L1-1", "rationale": "Indicators are consistent with normal user activity.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93038", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 4 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T18:58:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T18:58:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "bob", "src_ip": "10.0.0.171" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T18:58:04Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.47.203", "bytes_out": 14699 } }, { "log_id": "L1-3", "timestamp": "2026-04-25T20:59:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.36.159", "interval_s": 90 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93038, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-3", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91046", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 3 event(s); first=proc.lolbin_use", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T02:24:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "chrome.exe", "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T02:24:05Z", "source": "endpoint", "event_type": "file.write", "fields": { "path": "C:\\Users\\Public\\svc.exe" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T09:39:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-0", "stage": "stage2_multi", "seed": 91046, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-0", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92047", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 4 event(s); first=auth.login_success", "host": "host-001", "user": "carol" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T12:36:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.130" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T12:36:02Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.164.13", "bytes_out": 94453 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T16:44:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "excel.exe", "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T16:44:05Z", "source": "endpoint", "event_type": "file.write", "fields": { "path": "C:\\Users\\Public\\svc.exe" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92047, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-2", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90038", "category": "benign_noise", "severity": "medium", "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T05:08:00Z", "source": "network", "event_type": "net.port_scan_hit", "fields": { "src_ip": "203.0.113.140", "port": 22 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90038, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91000", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 2 event(s); first=net.beacon", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T21:23:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.61.71", "interval_s": 60 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T22:28:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "alice", "src_ip": "10.0.0.76" } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage2_multi", "seed": 91000, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90011", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T19:21:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "winword.exe", "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T19:21:05Z", "source": "endpoint", "event_type": "file.write", "fields": { "path": "C:\\Users\\Public\\payload.exe" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90011, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-0", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90024", "category": "phishing", "severity": "high", "summary": "phishing: 1 event(s); first=email.link_clicked", "host": "host-001", "user": "alice" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T03:27:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.top/secure", "user": "alice" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90024, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-0", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93004", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 4 event(s); first=auth.login_success", "host": "host-001", "user": "dave" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T04:55:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.144" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T19:38:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T19:38:02Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T19:38:04Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } } ], "ground_truth": "dismiss", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93004, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": 1.0, "reward_breakdown": { "correct_action": 1.0 }, "correct": true }, "trained": { "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", "action": "dismiss", "cited_log_id": "L1-3", "rationale": "Indicators are consistent with normal user activity.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90048", "category": "phishing", "severity": "high", "summary": "phishing: 1 event(s); first=email.link_clicked", "host": "host-001", "user": "alice" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T02:43:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.click/secure", "user": "alice" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90048, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-0", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91040", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 2 event(s); first=net.outbound_connection", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T08:09:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.168.133", "bytes_out": 96660 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T18:31:00Z", "source": "edr", "event_type": "edr.behavior_match", "fields": { "severity": "high", "rule_id": "EDR-9974", "host": "host-015" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91040, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-1\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-1", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90028", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 1 event(s); first=net.beacon", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T21:07:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.168.29", "interval_s": 60 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90028, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92000", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 3 event(s); first=net.outbound_connection", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T04:39:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.81.214", "bytes_out": 86290 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T04:39:02Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.138.227", "bytes_out": 53711 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T16:45:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.244.186", "interval_s": 90 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92000, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-2", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90042", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 1 event(s); first=auth.login_success", "host": "host-001", "user": "carol" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T12:10:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.187" } } ], "ground_truth": "dismiss", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90042, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true }, "trained": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Indicators are consistent with normal user activity.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Indicators are consistent with normal user activity.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93005", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 4 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T09:11:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T09:11:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "bob", "src_ip": "10.0.0.196" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T09:11:04Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.153.118", "bytes_out": 9827 } }, { "log_id": "L1-3", "timestamp": "2026-04-25T17:44:00Z", "source": "edr", "event_type": "edr.behavior_match", "fields": { "severity": "high", "rule_id": "EDR-5070", "host": "host-033" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93005, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-3", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90031", "category": "brute_force", "severity": "medium", "summary": "brute_force: 8 event(s); first=auth.login_failure", "host": "host-001", "user": "root" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T04:02:00Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.204" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T04:02:05Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.157" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T04:02:10Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.181" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T04:02:15Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.76" } }, { "log_id": "L1-4", "timestamp": "2026-04-25T04:02:20Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.90" } }, { "log_id": "L1-5", "timestamp": "2026-04-25T04:02:25Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.42" } }, { "log_id": "L1-6", "timestamp": "2026-04-25T04:02:30Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.9" } }, { "log_id": "L1-7", "timestamp": "2026-04-25T04:02:35Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.115" } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90031, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93002", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 4 event(s); first=net.beacon", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T04:30:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.244.83", "interval_s": 60 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T08:04:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T08:04:02Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T08:04:04Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "alice", "src_ip": "10.0.0.243" } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage4_adversarial", "seed": 93002, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90014", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T01:39:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "chrome.exe", "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T01:39:05Z", "source": "endpoint", "event_type": "file.write", "fields": { "path": "C:\\Users\\Public\\tmp.exe" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90014, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-0", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93010", "category": "brute_force", "severity": "medium", "summary": "brute_force: 10 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T05:01:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T05:01:02Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.244.112", "bytes_out": 61917 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T05:01:04Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "alice", "src_ip": "10.0.0.15" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T20:05:00Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "admin", "src_ip": "203.0.113.215" } }, { "log_id": "L1-4", "timestamp": "2026-04-25T20:05:05Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "admin", "src_ip": "203.0.113.55" } }, { "log_id": "L1-5", "timestamp": "2026-04-25T20:05:10Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "admin", "src_ip": "203.0.113.156" } }, { "log_id": "L1-6", "timestamp": "2026-04-25T20:05:15Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "admin", "src_ip": "203.0.113.182" } }, { "log_id": "L1-7", "timestamp": "2026-04-25T20:05:20Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "admin", "src_ip": "203.0.113.15" } }, { "log_id": "L1-8", "timestamp": "2026-04-25T20:05:25Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "admin", "src_ip": "203.0.113.226" } }, { "log_id": "L1-9", "timestamp": "2026-04-25T20:05:30Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "admin", "src_ip": "203.0.113.237" } } ], "ground_truth": "monitor", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93010, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-3", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92032", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 3 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T04:00:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T04:00:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.103" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T15:15:00Z", "source": "edr", "event_type": "edr.behavior_match", "fields": { "severity": "high", "rule_id": "EDR-7716", "host": "host-030" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92032, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-2", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93013", "category": "malware_execution", "severity": "high", "summary": "malware_execution: 5 event(s); first=auth.login_success", "host": "host-001", "user": "carol" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T03:22:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.18" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T03:22:02Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.22.98", "bytes_out": 65184 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T03:22:04Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T18:32:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "chrome.exe", "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" } }, { "log_id": "L1-4", "timestamp": "2026-04-25T18:32:05Z", "source": "endpoint", "event_type": "file.write", "fields": { "path": "C:\\Users\\Public\\payload.exe" } } ], "ground_truth": "quarantine_host", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93013, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", "action": "quarantine_host", "cited_log_id": "L1-3", "rationale": "Host shows malware execution indicators; isolating.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92016", "category": "phishing", "severity": "high", "summary": "phishing: 3 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T01:35:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T01:35:02Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T07:28:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.top/secure", "user": "bob" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92016, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-2", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92010", "category": "brute_force", "severity": "medium", "summary": "brute_force: 8 event(s); first=auth.login_success", "host": "host-001", "user": "dave" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T13:35:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.212" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T13:35:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.165" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T14:59:00Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.7" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T14:59:05Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.168" } }, { "log_id": "L1-4", "timestamp": "2026-04-25T14:59:10Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.196" } }, { "log_id": "L1-5", "timestamp": "2026-04-25T14:59:15Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.201" } }, { "log_id": "L1-6", "timestamp": "2026-04-25T14:59:20Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.134" } }, { "log_id": "L1-7", "timestamp": "2026-04-25T14:59:25Z", "source": "identity", "event_type": "auth.login_failure", "fields": { "user": "root", "src_ip": "203.0.113.155" } } ], "ground_truth": "monitor", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92010, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-2", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93041", "category": "malware_execution", "severity": "critical", "summary": "malware_execution: 6 event(s); first=proc.lolbin_use", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T13:22:00Z", "source": "endpoint", "event_type": "proc.lolbin_use", "fields": { "process": "powershell.exe", "parent_process": "outlook.exe", "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T13:22:15Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "203.0.113.5", "interval_s": 30 } }, { "log_id": "L1-2", "timestamp": "2026-04-25T13:22:30Z", "source": "identity", "event_type": "auth.privilege_grant", "fields": { "user": "bob", "role": "admin" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T14:00:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.1.241", "bytes_out": 11167 } }, { "log_id": "L1-4", "timestamp": "2026-04-25T14:00:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.122" } }, { "log_id": "L1-5", "timestamp": "2026-04-25T14:00:04Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.10" } } ], "ground_truth": "escalate", "triggering_log_id": "L1-0", "stage": "stage4_adversarial", "seed": 93041, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", "action": "escalate", "cited_log_id": "L1-0", "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90016", "category": "benign_noise", "severity": "medium", "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T04:36:00Z", "source": "network", "event_type": "net.port_scan_hit", "fields": { "src_ip": "203.0.113.245", "port": 3389 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90016, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92011", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 3 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T05:46:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T05:46:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.188" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T07:31:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.214.180", "interval_s": 60 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92011, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-2", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91021", "category": "phishing", "severity": "high", "summary": "phishing: 2 event(s); first=email.link_clicked", "host": "host-001", "user": "bob" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T11:07:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.cn/secure", "user": "bob" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T22:48:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "alice", "src_ip": "10.0.0.138" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-0", "stage": "stage2_multi", "seed": 91021, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-0", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93046", "category": "c2_beacon", "severity": "high", "summary": "c2_beacon: 4 event(s); first=net.beacon", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T11:55:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "203.0.113.5", "interval_s": 90 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T18:06:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.243" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T18:06:02Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T18:06:04Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-0", "stage": "stage4_adversarial", "seed": 93046, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-0", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91028", "category": "benign_noise", "severity": "medium", "summary": "benign_noise: 2 event(s); first=auth.login_success", "host": "host-001", "user": "carol" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T00:59:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "carol", "src_ip": "10.0.0.208" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T13:57:00Z", "source": "network", "event_type": "net.port_scan_hit", "fields": { "src_ip": "203.0.113.15", "port": 22 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91028, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-1", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91042", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 2 event(s); first=auth.login_success", "host": "host-001", "user": "dave" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T05:30:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.51" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T10:03:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.41.43", "bytes_out": 90697 } } ], "ground_truth": "dismiss", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91042, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": 1.0, "reward_breakdown": { "correct_action": 1.0 }, "correct": true }, "trained": { "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", "action": "dismiss", "cited_log_id": "L1-1", "rationale": "Indicators are consistent with normal user activity.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93029", "category": "c2_beacon", "severity": "high", "summary": "c2_beacon: 4 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T13:00:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "google.com" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T13:00:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "alice", "src_ip": "10.0.0.234" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T13:00:04Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "internal.corp" } }, { "log_id": "L1-3", "timestamp": "2026-04-25T22:27:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "185.220.101.7", "interval_s": 90 } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93029, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-3", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90029", "category": "phishing", "severity": "high", "summary": "phishing: 1 event(s); first=email.link_clicked", "host": "host-001", "user": "alice" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T01:21:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.click/secure", "user": "alice" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90029, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-0", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-d-92023", "category": "phishing", "severity": "high", "summary": "phishing: 3 event(s); first=net.dns_query", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T08:46:00Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T08:46:02Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.224" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T09:39:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.click/secure", "user": "alice" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-2", "stage": "stage3_mixed", "seed": 92023, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-2", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-l-93039", "category": "phishing", "severity": "high", "summary": "phishing: 4 event(s); first=net.outbound_connection", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T10:31:00Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.184.117", "bytes_out": 54691 } }, { "log_id": "L1-1", "timestamp": "2026-04-25T10:31:02Z", "source": "network", "event_type": "net.dns_query", "fields": { "domain": "github.com" } }, { "log_id": "L1-2", "timestamp": "2026-04-25T10:31:04Z", "source": "network", "event_type": "net.outbound_connection", "fields": { "dst_ip": "10.0.108.52", "bytes_out": 60603 } }, { "log_id": "L1-3", "timestamp": "2026-04-25T11:04:00Z", "source": "email", "event_type": "email.link_clicked", "fields": { "url": "https://login-update.ru/secure", "user": "carol" } } ], "ground_truth": "block_ip", "triggering_log_id": "L1-3", "stage": "stage4_adversarial", "seed": 93039, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", "action": "block_ip", "cited_log_id": "L1-3", "rationale": "External IP is engaged in active brute-force; blocking.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-i-91017", "category": "benign_noise", "severity": "low", "summary": "benign_noise: 2 event(s); first=auth.login_success", "host": "host-001", "user": "dave" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T04:36:00Z", "source": "identity", "event_type": "auth.login_success", "fields": { "user": "dave", "src_ip": "10.0.0.4" } }, { "log_id": "L1-1", "timestamp": "2026-04-25T07:24:00Z", "source": "network", "event_type": "net.beacon", "fields": { "dst_ip": "10.0.216.132", "interval_s": 30 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-1", "stage": "stage2_multi", "seed": 91017, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -1.0, "reward_breakdown": { "missed_malicious": -1.0 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-1", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } }, { "alert": { "alert_id": "A-EVAL-c-90010", "category": "benign_noise", "severity": "medium", "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", "host": "host-001", "user": "user-001" }, "events": [ { "log_id": "L1-0", "timestamp": "2026-04-25T21:48:00Z", "source": "network", "event_type": "net.port_scan_hit", "fields": { "src_ip": "203.0.113.146", "port": 445 } } ], "ground_truth": "monitor", "triggering_log_id": "L1-0", "stage": "stage1_basic", "seed": 90010, "baseline": { "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", "action": "dismiss", "cited_log_id": "L1-0", "rationale": "Looks like routine activity, no clear malicious indicator.", "format_ok": true, "reward": -0.9, "reward_breakdown": { "missed_malicious": -1.0, "correct_citation_bonus": 0.1 }, "correct": false }, "trained": { "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", "action": "monitor", "cited_log_id": "L1-0", "rationale": "Behavior is anomalous but reversible; raising visibility.", "format_ok": true, "reward": 1.1, "reward_breakdown": { "correct_action": 1.0, "correct_citation_bonus": 0.1 }, "correct": true } } ] }