| { |
| "n": 48, |
| "source": "placeholder", |
| "baseline_model": "unsloth/Qwen2.5-3B-Instruct", |
| "trained_adapter": null, |
| "examples": [ |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91030", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "carol" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T11:58:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.187" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T22:09:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| } |
| ], |
| "ground_truth": "dismiss", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91030, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": 1.0, |
| "reward_breakdown": { |
| "correct_action": 1.0 |
| }, |
| "correct": true |
| }, |
| "trained": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", |
| "action": "dismiss", |
| "cited_log_id": "L1-1", |
| "rationale": "Indicators are consistent with normal user activity.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91008", |
| "category": "brute_force", |
| "severity": "medium", |
| "summary": "brute_force: 8 event(s); first=net.outbound_connection", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T11:11:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.101.112", |
| "bytes_out": 73324 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T14:47:00Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "alice", |
| "src_ip": "203.0.113.76" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T14:47:05Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "alice", |
| "src_ip": "203.0.113.196" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T14:47:10Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "alice", |
| "src_ip": "203.0.113.242" |
| } |
| }, |
| { |
| "log_id": "L1-4", |
| "timestamp": "2026-04-25T14:47:15Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "alice", |
| "src_ip": "203.0.113.243" |
| } |
| }, |
| { |
| "log_id": "L1-5", |
| "timestamp": "2026-04-25T14:47:20Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "alice", |
| "src_ip": "203.0.113.100" |
| } |
| }, |
| { |
| "log_id": "L1-6", |
| "timestamp": "2026-04-25T14:47:25Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "alice", |
| "src_ip": "203.0.113.155" |
| } |
| }, |
| { |
| "log_id": "L1-7", |
| "timestamp": "2026-04-25T14:47:30Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "alice", |
| "src_ip": "203.0.113.43" |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91008, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-1", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92026", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 3 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T03:29:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T03:29:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.46" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T22:52:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.204.226", |
| "interval_s": 90 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92026, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-2", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92043", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 3 event(s); first=email.link_clicked", |
| "host": "host-001", |
| "user": "bob" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T09:59:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.cn/secure", |
| "user": "bob" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T18:48:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.170.227", |
| "bytes_out": 65651 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T18:48:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.170" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-0", |
| "stage": "stage3_mixed", |
| "seed": 92043, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-0", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91037", |
| "category": "benign_noise", |
| "severity": "medium", |
| "summary": "benign_noise: 2 event(s); first=net.outbound_connection", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T02:54:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.137.186", |
| "bytes_out": 37324 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T04:23:00Z", |
| "source": "network", |
| "event_type": "net.port_scan_hit", |
| "fields": { |
| "src_ip": "203.0.113.96", |
| "port": 5985 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91037, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-1", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92013", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 3 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T06:31:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T06:31:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "bob", |
| "src_ip": "10.0.0.194" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T13:13:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "bob", |
| "src_ip": "10.0.0.146" |
| } |
| } |
| ], |
| "ground_truth": "dismiss", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92013, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": 1.0, |
| "reward_breakdown": { |
| "correct_action": 1.0 |
| }, |
| "correct": true |
| }, |
| "trained": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-2\nRationale: Indicators are consistent with normal user activity.", |
| "action": "dismiss", |
| "cited_log_id": "L1-2", |
| "rationale": "Indicators are consistent with normal user activity.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92049", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 4 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "dave" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T00:12:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.52" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T00:12:02Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.202.40", |
| "bytes_out": 80520 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T19:55:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "excel.exe", |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T19:55:05Z", |
| "source": "endpoint", |
| "event_type": "file.write", |
| "fields": { |
| "path": "C:\\Users\\Public\\svc.exe" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92049, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-2", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91048", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 2 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "bob" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T05:25:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "bob", |
| "src_ip": "10.0.0.225" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T06:09:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.ru/secure", |
| "user": "bob" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91048, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-1\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-1", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92021", |
| "category": "malware_execution", |
| "severity": "critical", |
| "summary": "malware_execution: 5 event(s); first=proc.lolbin_use", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T03:53:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "outlook.exe", |
| "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T03:53:15Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "203.0.113.5", |
| "interval_s": 30 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T03:53:30Z", |
| "source": "identity", |
| "event_type": "auth.privilege_grant", |
| "fields": { |
| "user": "bob", |
| "role": "admin" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T08:51:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-4", |
| "timestamp": "2026-04-25T08:51:02Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| } |
| ], |
| "ground_truth": "escalate", |
| "triggering_log_id": "L1-0", |
| "stage": "stage3_mixed", |
| "seed": 92021, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", |
| "action": "escalate", |
| "cited_log_id": "L1-0", |
| "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90035", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 1 event(s); first=net.beacon", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T01:08:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.98.8", |
| "interval_s": 60 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90035, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93032", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 4 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "dave" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T17:30:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.56" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T17:30:02Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.30.76", |
| "bytes_out": 65394 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T17:30:04Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.171.122", |
| "bytes_out": 2642 |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T18:00:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.72" |
| } |
| } |
| ], |
| "ground_truth": "dismiss", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93032, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": 1.0, |
| "reward_breakdown": { |
| "correct_action": 1.0 |
| }, |
| "correct": true |
| }, |
| "trained": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", |
| "action": "dismiss", |
| "cited_log_id": "L1-3", |
| "rationale": "Indicators are consistent with normal user activity.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93030", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 4 event(s); first=net.beacon", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T18:13:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.122.44", |
| "interval_s": 60 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T20:00:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "bob", |
| "src_ip": "10.0.0.88" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T20:00:02Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T20:00:04Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage4_adversarial", |
| "seed": 93030, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91001", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 2 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T03:58:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T06:16:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "bob", |
| "src_ip": "10.0.0.120" |
| } |
| } |
| ], |
| "ground_truth": "dismiss", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91001, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": 1.0, |
| "reward_breakdown": { |
| "correct_action": 1.0 |
| }, |
| "correct": true |
| }, |
| "trained": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", |
| "action": "dismiss", |
| "cited_log_id": "L1-1", |
| "rationale": "Indicators are consistent with normal user activity.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93038", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 4 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T18:58:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T18:58:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "bob", |
| "src_ip": "10.0.0.171" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T18:58:04Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.47.203", |
| "bytes_out": 14699 |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T20:59:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.36.159", |
| "interval_s": 90 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93038, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-3", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91046", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 3 event(s); first=proc.lolbin_use", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T02:24:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "chrome.exe", |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T02:24:05Z", |
| "source": "endpoint", |
| "event_type": "file.write", |
| "fields": { |
| "path": "C:\\Users\\Public\\svc.exe" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T09:39:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-0", |
| "stage": "stage2_multi", |
| "seed": 91046, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-0", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92047", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 4 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "carol" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T12:36:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.130" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T12:36:02Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.164.13", |
| "bytes_out": 94453 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T16:44:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "excel.exe", |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T16:44:05Z", |
| "source": "endpoint", |
| "event_type": "file.write", |
| "fields": { |
| "path": "C:\\Users\\Public\\svc.exe" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92047, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-2", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90038", |
| "category": "benign_noise", |
| "severity": "medium", |
| "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T05:08:00Z", |
| "source": "network", |
| "event_type": "net.port_scan_hit", |
| "fields": { |
| "src_ip": "203.0.113.140", |
| "port": 22 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90038, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91000", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 2 event(s); first=net.beacon", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T21:23:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.61.71", |
| "interval_s": 60 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T22:28:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "alice", |
| "src_ip": "10.0.0.76" |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage2_multi", |
| "seed": 91000, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90011", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T19:21:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "winword.exe", |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T19:21:05Z", |
| "source": "endpoint", |
| "event_type": "file.write", |
| "fields": { |
| "path": "C:\\Users\\Public\\payload.exe" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90011, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-0", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90024", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 1 event(s); first=email.link_clicked", |
| "host": "host-001", |
| "user": "alice" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T03:27:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.top/secure", |
| "user": "alice" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90024, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-0", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93004", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 4 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "dave" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T04:55:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.144" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T19:38:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T19:38:02Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T19:38:04Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| } |
| ], |
| "ground_truth": "dismiss", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93004, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": 1.0, |
| "reward_breakdown": { |
| "correct_action": 1.0 |
| }, |
| "correct": true |
| }, |
| "trained": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", |
| "action": "dismiss", |
| "cited_log_id": "L1-3", |
| "rationale": "Indicators are consistent with normal user activity.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90048", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 1 event(s); first=email.link_clicked", |
| "host": "host-001", |
| "user": "alice" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T02:43:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.click/secure", |
| "user": "alice" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90048, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-0", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91040", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 2 event(s); first=net.outbound_connection", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T08:09:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.168.133", |
| "bytes_out": 96660 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T18:31:00Z", |
| "source": "edr", |
| "event_type": "edr.behavior_match", |
| "fields": { |
| "severity": "high", |
| "rule_id": "EDR-9974", |
| "host": "host-015" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91040, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-1\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-1", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90028", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 1 event(s); first=net.beacon", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T21:07:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.168.29", |
| "interval_s": 60 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90028, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92000", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 3 event(s); first=net.outbound_connection", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T04:39:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.81.214", |
| "bytes_out": 86290 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T04:39:02Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.138.227", |
| "bytes_out": 53711 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T16:45:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.244.186", |
| "interval_s": 90 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92000, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-2", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90042", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 1 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "carol" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T12:10:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.187" |
| } |
| } |
| ], |
| "ground_truth": "dismiss", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90042, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| }, |
| "trained": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Indicators are consistent with normal user activity.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Indicators are consistent with normal user activity.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93005", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 4 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T09:11:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T09:11:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "bob", |
| "src_ip": "10.0.0.196" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T09:11:04Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.153.118", |
| "bytes_out": 9827 |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T17:44:00Z", |
| "source": "edr", |
| "event_type": "edr.behavior_match", |
| "fields": { |
| "severity": "high", |
| "rule_id": "EDR-5070", |
| "host": "host-033" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93005, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-3", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90031", |
| "category": "brute_force", |
| "severity": "medium", |
| "summary": "brute_force: 8 event(s); first=auth.login_failure", |
| "host": "host-001", |
| "user": "root" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T04:02:00Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.204" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T04:02:05Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.157" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T04:02:10Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.181" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T04:02:15Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.76" |
| } |
| }, |
| { |
| "log_id": "L1-4", |
| "timestamp": "2026-04-25T04:02:20Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.90" |
| } |
| }, |
| { |
| "log_id": "L1-5", |
| "timestamp": "2026-04-25T04:02:25Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.42" |
| } |
| }, |
| { |
| "log_id": "L1-6", |
| "timestamp": "2026-04-25T04:02:30Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.9" |
| } |
| }, |
| { |
| "log_id": "L1-7", |
| "timestamp": "2026-04-25T04:02:35Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.115" |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90031, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93002", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 4 event(s); first=net.beacon", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T04:30:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.244.83", |
| "interval_s": 60 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T08:04:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T08:04:02Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T08:04:04Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "alice", |
| "src_ip": "10.0.0.243" |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage4_adversarial", |
| "seed": 93002, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90014", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T01:39:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "chrome.exe", |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T01:39:05Z", |
| "source": "endpoint", |
| "event_type": "file.write", |
| "fields": { |
| "path": "C:\\Users\\Public\\tmp.exe" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90014, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-0", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93010", |
| "category": "brute_force", |
| "severity": "medium", |
| "summary": "brute_force: 10 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T05:01:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T05:01:02Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.244.112", |
| "bytes_out": 61917 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T05:01:04Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "alice", |
| "src_ip": "10.0.0.15" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T20:05:00Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "admin", |
| "src_ip": "203.0.113.215" |
| } |
| }, |
| { |
| "log_id": "L1-4", |
| "timestamp": "2026-04-25T20:05:05Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "admin", |
| "src_ip": "203.0.113.55" |
| } |
| }, |
| { |
| "log_id": "L1-5", |
| "timestamp": "2026-04-25T20:05:10Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "admin", |
| "src_ip": "203.0.113.156" |
| } |
| }, |
| { |
| "log_id": "L1-6", |
| "timestamp": "2026-04-25T20:05:15Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "admin", |
| "src_ip": "203.0.113.182" |
| } |
| }, |
| { |
| "log_id": "L1-7", |
| "timestamp": "2026-04-25T20:05:20Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "admin", |
| "src_ip": "203.0.113.15" |
| } |
| }, |
| { |
| "log_id": "L1-8", |
| "timestamp": "2026-04-25T20:05:25Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "admin", |
| "src_ip": "203.0.113.226" |
| } |
| }, |
| { |
| "log_id": "L1-9", |
| "timestamp": "2026-04-25T20:05:30Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "admin", |
| "src_ip": "203.0.113.237" |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93010, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-3", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92032", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 3 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T04:00:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T04:00:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.103" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T15:15:00Z", |
| "source": "edr", |
| "event_type": "edr.behavior_match", |
| "fields": { |
| "severity": "high", |
| "rule_id": "EDR-7716", |
| "host": "host-030" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92032, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-2", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93013", |
| "category": "malware_execution", |
| "severity": "high", |
| "summary": "malware_execution: 5 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "carol" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T03:22:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.18" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T03:22:02Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.22.98", |
| "bytes_out": 65184 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T03:22:04Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T18:32:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "chrome.exe", |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| } |
| }, |
| { |
| "log_id": "L1-4", |
| "timestamp": "2026-04-25T18:32:05Z", |
| "source": "endpoint", |
| "event_type": "file.write", |
| "fields": { |
| "path": "C:\\Users\\Public\\payload.exe" |
| } |
| } |
| ], |
| "ground_truth": "quarantine_host", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93013, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", |
| "action": "quarantine_host", |
| "cited_log_id": "L1-3", |
| "rationale": "Host shows malware execution indicators; isolating.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92016", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 3 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T01:35:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T01:35:02Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T07:28:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.top/secure", |
| "user": "bob" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92016, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-2", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92010", |
| "category": "brute_force", |
| "severity": "medium", |
| "summary": "brute_force: 8 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "dave" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T13:35:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.212" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T13:35:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.165" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T14:59:00Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.7" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T14:59:05Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.168" |
| } |
| }, |
| { |
| "log_id": "L1-4", |
| "timestamp": "2026-04-25T14:59:10Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.196" |
| } |
| }, |
| { |
| "log_id": "L1-5", |
| "timestamp": "2026-04-25T14:59:15Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.201" |
| } |
| }, |
| { |
| "log_id": "L1-6", |
| "timestamp": "2026-04-25T14:59:20Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.134" |
| } |
| }, |
| { |
| "log_id": "L1-7", |
| "timestamp": "2026-04-25T14:59:25Z", |
| "source": "identity", |
| "event_type": "auth.login_failure", |
| "fields": { |
| "user": "root", |
| "src_ip": "203.0.113.155" |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92010, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-2", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93041", |
| "category": "malware_execution", |
| "severity": "critical", |
| "summary": "malware_execution: 6 event(s); first=proc.lolbin_use", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T13:22:00Z", |
| "source": "endpoint", |
| "event_type": "proc.lolbin_use", |
| "fields": { |
| "process": "powershell.exe", |
| "parent_process": "outlook.exe", |
| "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T13:22:15Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "203.0.113.5", |
| "interval_s": 30 |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T13:22:30Z", |
| "source": "identity", |
| "event_type": "auth.privilege_grant", |
| "fields": { |
| "user": "bob", |
| "role": "admin" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T14:00:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.1.241", |
| "bytes_out": 11167 |
| } |
| }, |
| { |
| "log_id": "L1-4", |
| "timestamp": "2026-04-25T14:00:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.122" |
| } |
| }, |
| { |
| "log_id": "L1-5", |
| "timestamp": "2026-04-25T14:00:04Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.10" |
| } |
| } |
| ], |
| "ground_truth": "escalate", |
| "triggering_log_id": "L1-0", |
| "stage": "stage4_adversarial", |
| "seed": 93041, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", |
| "action": "escalate", |
| "cited_log_id": "L1-0", |
| "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90016", |
| "category": "benign_noise", |
| "severity": "medium", |
| "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T04:36:00Z", |
| "source": "network", |
| "event_type": "net.port_scan_hit", |
| "fields": { |
| "src_ip": "203.0.113.245", |
| "port": 3389 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90016, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92011", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 3 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T05:46:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T05:46:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.188" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T07:31:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.214.180", |
| "interval_s": 60 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92011, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-2", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91021", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 2 event(s); first=email.link_clicked", |
| "host": "host-001", |
| "user": "bob" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T11:07:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.cn/secure", |
| "user": "bob" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T22:48:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "alice", |
| "src_ip": "10.0.0.138" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-0", |
| "stage": "stage2_multi", |
| "seed": 91021, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-0", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93046", |
| "category": "c2_beacon", |
| "severity": "high", |
| "summary": "c2_beacon: 4 event(s); first=net.beacon", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T11:55:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "203.0.113.5", |
| "interval_s": 90 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T18:06:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.243" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T18:06:02Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T18:06:04Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-0", |
| "stage": "stage4_adversarial", |
| "seed": 93046, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-0", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91028", |
| "category": "benign_noise", |
| "severity": "medium", |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "carol" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T00:59:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "carol", |
| "src_ip": "10.0.0.208" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T13:57:00Z", |
| "source": "network", |
| "event_type": "net.port_scan_hit", |
| "fields": { |
| "src_ip": "203.0.113.15", |
| "port": 22 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91028, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-1", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91042", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "dave" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T05:30:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.51" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T10:03:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.41.43", |
| "bytes_out": 90697 |
| } |
| } |
| ], |
| "ground_truth": "dismiss", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91042, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": 1.0, |
| "reward_breakdown": { |
| "correct_action": 1.0 |
| }, |
| "correct": true |
| }, |
| "trained": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", |
| "action": "dismiss", |
| "cited_log_id": "L1-1", |
| "rationale": "Indicators are consistent with normal user activity.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93029", |
| "category": "c2_beacon", |
| "severity": "high", |
| "summary": "c2_beacon: 4 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T13:00:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "google.com" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T13:00:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "alice", |
| "src_ip": "10.0.0.234" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T13:00:04Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "internal.corp" |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T22:27:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "185.220.101.7", |
| "interval_s": 90 |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93029, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-3", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90029", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 1 event(s); first=email.link_clicked", |
| "host": "host-001", |
| "user": "alice" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T01:21:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.click/secure", |
| "user": "alice" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90029, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-0", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-d-92023", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 3 event(s); first=net.dns_query", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T08:46:00Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T08:46:02Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.224" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T09:39:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.click/secure", |
| "user": "alice" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-2", |
| "stage": "stage3_mixed", |
| "seed": 92023, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-2", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-l-93039", |
| "category": "phishing", |
| "severity": "high", |
| "summary": "phishing: 4 event(s); first=net.outbound_connection", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T10:31:00Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.184.117", |
| "bytes_out": 54691 |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T10:31:02Z", |
| "source": "network", |
| "event_type": "net.dns_query", |
| "fields": { |
| "domain": "github.com" |
| } |
| }, |
| { |
| "log_id": "L1-2", |
| "timestamp": "2026-04-25T10:31:04Z", |
| "source": "network", |
| "event_type": "net.outbound_connection", |
| "fields": { |
| "dst_ip": "10.0.108.52", |
| "bytes_out": 60603 |
| } |
| }, |
| { |
| "log_id": "L1-3", |
| "timestamp": "2026-04-25T11:04:00Z", |
| "source": "email", |
| "event_type": "email.link_clicked", |
| "fields": { |
| "url": "https://login-update.ru/secure", |
| "user": "carol" |
| } |
| } |
| ], |
| "ground_truth": "block_ip", |
| "triggering_log_id": "L1-3", |
| "stage": "stage4_adversarial", |
| "seed": 93039, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", |
| "action": "block_ip", |
| "cited_log_id": "L1-3", |
| "rationale": "External IP is engaged in active brute-force; blocking.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-i-91017", |
| "category": "benign_noise", |
| "severity": "low", |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", |
| "host": "host-001", |
| "user": "dave" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T04:36:00Z", |
| "source": "identity", |
| "event_type": "auth.login_success", |
| "fields": { |
| "user": "dave", |
| "src_ip": "10.0.0.4" |
| } |
| }, |
| { |
| "log_id": "L1-1", |
| "timestamp": "2026-04-25T07:24:00Z", |
| "source": "network", |
| "event_type": "net.beacon", |
| "fields": { |
| "dst_ip": "10.0.216.132", |
| "interval_s": 30 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-1", |
| "stage": "stage2_multi", |
| "seed": 91017, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -1.0, |
| "reward_breakdown": { |
| "missed_malicious": -1.0 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-1", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| }, |
| { |
| "alert": { |
| "alert_id": "A-EVAL-c-90010", |
| "category": "benign_noise", |
| "severity": "medium", |
| "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", |
| "host": "host-001", |
| "user": "user-001" |
| }, |
| "events": [ |
| { |
| "log_id": "L1-0", |
| "timestamp": "2026-04-25T21:48:00Z", |
| "source": "network", |
| "event_type": "net.port_scan_hit", |
| "fields": { |
| "src_ip": "203.0.113.146", |
| "port": 445 |
| } |
| } |
| ], |
| "ground_truth": "monitor", |
| "triggering_log_id": "L1-0", |
| "stage": "stage1_basic", |
| "seed": 90010, |
| "baseline": { |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", |
| "action": "dismiss", |
| "cited_log_id": "L1-0", |
| "rationale": "Looks like routine activity, no clear malicious indicator.", |
| "format_ok": true, |
| "reward": -0.9, |
| "reward_breakdown": { |
| "missed_malicious": -1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": false |
| }, |
| "trained": { |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", |
| "action": "monitor", |
| "cited_log_id": "L1-0", |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", |
| "format_ok": true, |
| "reward": 1.1, |
| "reward_breakdown": { |
| "correct_action": 1.0, |
| "correct_citation_bonus": 0.1 |
| }, |
| "correct": true |
| } |
| } |
| ] |
| } |