Spaces:
Running
Running
| { | |
| "n": 48, | |
| "source": "placeholder", | |
| "baseline_model": "unsloth/Qwen2.5-3B-Instruct", | |
| "trained_adapter": null, | |
| "examples": [ | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91030", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "carol" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T11:58:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.187" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T22:09:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| } | |
| ], | |
| "ground_truth": "dismiss", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91030, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": 1.0, | |
| "reward_breakdown": { | |
| "correct_action": 1.0 | |
| }, | |
| "correct": true | |
| }, | |
| "trained": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Indicators are consistent with normal user activity.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91008", | |
| "category": "brute_force", | |
| "severity": "medium", | |
| "summary": "brute_force: 8 event(s); first=net.outbound_connection", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T11:11:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.101.112", | |
| "bytes_out": 73324 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T14:47:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "203.0.113.76" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T14:47:05Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "203.0.113.196" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T14:47:10Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "203.0.113.242" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-4", | |
| "timestamp": "2026-04-25T14:47:15Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "203.0.113.243" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-5", | |
| "timestamp": "2026-04-25T14:47:20Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "203.0.113.100" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-6", | |
| "timestamp": "2026-04-25T14:47:25Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "203.0.113.155" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-7", | |
| "timestamp": "2026-04-25T14:47:30Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "203.0.113.43" | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91008, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92026", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 3 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T03:29:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T03:29:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.46" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T22:52:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.204.226", | |
| "interval_s": 90 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92026, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92043", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 3 event(s); first=email.link_clicked", | |
| "host": "host-001", | |
| "user": "bob" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T09:59:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.cn/secure", | |
| "user": "bob" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T18:48:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.170.227", | |
| "bytes_out": 65651 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T18:48:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.170" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage3_mixed", | |
| "seed": 92043, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-0", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91037", | |
| "category": "benign_noise", | |
| "severity": "medium", | |
| "summary": "benign_noise: 2 event(s); first=net.outbound_connection", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T02:54:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.137.186", | |
| "bytes_out": 37324 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T04:23:00Z", | |
| "source": "network", | |
| "event_type": "net.port_scan_hit", | |
| "fields": { | |
| "src_ip": "203.0.113.96", | |
| "port": 5985 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91037, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92013", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 3 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T06:31:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T06:31:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "bob", | |
| "src_ip": "10.0.0.194" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T13:13:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "bob", | |
| "src_ip": "10.0.0.146" | |
| } | |
| } | |
| ], | |
| "ground_truth": "dismiss", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92013, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": 1.0, | |
| "reward_breakdown": { | |
| "correct_action": 1.0 | |
| }, | |
| "correct": true | |
| }, | |
| "trained": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-2\nRationale: Indicators are consistent with normal user activity.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Indicators are consistent with normal user activity.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92049", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 4 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "dave" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T00:12:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.52" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T00:12:02Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.202.40", | |
| "bytes_out": 80520 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T19:55:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "excel.exe", | |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T19:55:05Z", | |
| "source": "endpoint", | |
| "event_type": "file.write", | |
| "fields": { | |
| "path": "C:\\Users\\Public\\svc.exe" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92049, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91048", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 2 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "bob" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T05:25:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "bob", | |
| "src_ip": "10.0.0.225" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T06:09:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.ru/secure", | |
| "user": "bob" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91048, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-1\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-1", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92021", | |
| "category": "malware_execution", | |
| "severity": "critical", | |
| "summary": "malware_execution: 5 event(s); first=proc.lolbin_use", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T03:53:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "outlook.exe", | |
| "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T03:53:15Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "203.0.113.5", | |
| "interval_s": 30 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T03:53:30Z", | |
| "source": "identity", | |
| "event_type": "auth.privilege_grant", | |
| "fields": { | |
| "user": "bob", | |
| "role": "admin" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T08:51:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-4", | |
| "timestamp": "2026-04-25T08:51:02Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| } | |
| ], | |
| "ground_truth": "escalate", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage3_mixed", | |
| "seed": 92021, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", | |
| "action": "escalate", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90035", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 1 event(s); first=net.beacon", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T01:08:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.98.8", | |
| "interval_s": 60 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90035, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93032", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 4 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "dave" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T17:30:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.56" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T17:30:02Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.30.76", | |
| "bytes_out": 65394 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T17:30:04Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.171.122", | |
| "bytes_out": 2642 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T18:00:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.72" | |
| } | |
| } | |
| ], | |
| "ground_truth": "dismiss", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93032, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": 1.0, | |
| "reward_breakdown": { | |
| "correct_action": 1.0 | |
| }, | |
| "correct": true | |
| }, | |
| "trained": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-3", | |
| "rationale": "Indicators are consistent with normal user activity.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93030", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 4 event(s); first=net.beacon", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T18:13:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.122.44", | |
| "interval_s": 60 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T20:00:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "bob", | |
| "src_ip": "10.0.0.88" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T20:00:02Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T20:00:04Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage4_adversarial", | |
| "seed": 93030, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91001", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 2 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T03:58:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T06:16:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "bob", | |
| "src_ip": "10.0.0.120" | |
| } | |
| } | |
| ], | |
| "ground_truth": "dismiss", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91001, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": 1.0, | |
| "reward_breakdown": { | |
| "correct_action": 1.0 | |
| }, | |
| "correct": true | |
| }, | |
| "trained": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Indicators are consistent with normal user activity.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93038", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 4 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T18:58:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T18:58:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "bob", | |
| "src_ip": "10.0.0.171" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T18:58:04Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.47.203", | |
| "bytes_out": 14699 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T20:59:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.36.159", | |
| "interval_s": 90 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93038, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-3", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91046", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 3 event(s); first=proc.lolbin_use", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T02:24:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "chrome.exe", | |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T02:24:05Z", | |
| "source": "endpoint", | |
| "event_type": "file.write", | |
| "fields": { | |
| "path": "C:\\Users\\Public\\svc.exe" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T09:39:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage2_multi", | |
| "seed": 91046, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92047", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 4 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "carol" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T12:36:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.130" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T12:36:02Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.164.13", | |
| "bytes_out": 94453 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T16:44:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "excel.exe", | |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T16:44:05Z", | |
| "source": "endpoint", | |
| "event_type": "file.write", | |
| "fields": { | |
| "path": "C:\\Users\\Public\\svc.exe" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92047, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90038", | |
| "category": "benign_noise", | |
| "severity": "medium", | |
| "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T05:08:00Z", | |
| "source": "network", | |
| "event_type": "net.port_scan_hit", | |
| "fields": { | |
| "src_ip": "203.0.113.140", | |
| "port": 22 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90038, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91000", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 2 event(s); first=net.beacon", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T21:23:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.61.71", | |
| "interval_s": 60 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T22:28:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "10.0.0.76" | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage2_multi", | |
| "seed": 91000, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90011", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T19:21:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "winword.exe", | |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T19:21:05Z", | |
| "source": "endpoint", | |
| "event_type": "file.write", | |
| "fields": { | |
| "path": "C:\\Users\\Public\\payload.exe" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90011, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90024", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 1 event(s); first=email.link_clicked", | |
| "host": "host-001", | |
| "user": "alice" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T03:27:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.top/secure", | |
| "user": "alice" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90024, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-0", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93004", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 4 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "dave" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T04:55:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.144" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T19:38:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T19:38:02Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T19:38:04Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| } | |
| ], | |
| "ground_truth": "dismiss", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93004, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": 1.0, | |
| "reward_breakdown": { | |
| "correct_action": 1.0 | |
| }, | |
| "correct": true | |
| }, | |
| "trained": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-3", | |
| "rationale": "Indicators are consistent with normal user activity.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90048", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 1 event(s); first=email.link_clicked", | |
| "host": "host-001", | |
| "user": "alice" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T02:43:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.click/secure", | |
| "user": "alice" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90048, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-0", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91040", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 2 event(s); first=net.outbound_connection", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T08:09:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.168.133", | |
| "bytes_out": 96660 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T18:31:00Z", | |
| "source": "edr", | |
| "event_type": "edr.behavior_match", | |
| "fields": { | |
| "severity": "high", | |
| "rule_id": "EDR-9974", | |
| "host": "host-015" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91040, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-1\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90028", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 1 event(s); first=net.beacon", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T21:07:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.168.29", | |
| "interval_s": 60 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90028, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92000", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 3 event(s); first=net.outbound_connection", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T04:39:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.81.214", | |
| "bytes_out": 86290 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T04:39:02Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.138.227", | |
| "bytes_out": 53711 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T16:45:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.244.186", | |
| "interval_s": 90 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92000, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90042", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 1 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "carol" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T12:10:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.187" | |
| } | |
| } | |
| ], | |
| "ground_truth": "dismiss", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90042, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| }, | |
| "trained": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Indicators are consistent with normal user activity.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Indicators are consistent with normal user activity.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93005", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 4 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T09:11:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T09:11:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "bob", | |
| "src_ip": "10.0.0.196" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T09:11:04Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.153.118", | |
| "bytes_out": 9827 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T17:44:00Z", | |
| "source": "edr", | |
| "event_type": "edr.behavior_match", | |
| "fields": { | |
| "severity": "high", | |
| "rule_id": "EDR-5070", | |
| "host": "host-033" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93005, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-3", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90031", | |
| "category": "brute_force", | |
| "severity": "medium", | |
| "summary": "brute_force: 8 event(s); first=auth.login_failure", | |
| "host": "host-001", | |
| "user": "root" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T04:02:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.204" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T04:02:05Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.157" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T04:02:10Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.181" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T04:02:15Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.76" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-4", | |
| "timestamp": "2026-04-25T04:02:20Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.90" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-5", | |
| "timestamp": "2026-04-25T04:02:25Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.42" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-6", | |
| "timestamp": "2026-04-25T04:02:30Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.9" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-7", | |
| "timestamp": "2026-04-25T04:02:35Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.115" | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90031, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93002", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 4 event(s); first=net.beacon", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T04:30:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.244.83", | |
| "interval_s": 60 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T08:04:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T08:04:02Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T08:04:04Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "10.0.0.243" | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage4_adversarial", | |
| "seed": 93002, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90014", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 2 event(s); first=proc.lolbin_use", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T01:39:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "chrome.exe", | |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T01:39:05Z", | |
| "source": "endpoint", | |
| "event_type": "file.write", | |
| "fields": { | |
| "path": "C:\\Users\\Public\\tmp.exe" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90014, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93010", | |
| "category": "brute_force", | |
| "severity": "medium", | |
| "summary": "brute_force: 10 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T05:01:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T05:01:02Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.244.112", | |
| "bytes_out": 61917 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T05:01:04Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "10.0.0.15" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T20:05:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "admin", | |
| "src_ip": "203.0.113.215" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-4", | |
| "timestamp": "2026-04-25T20:05:05Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "admin", | |
| "src_ip": "203.0.113.55" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-5", | |
| "timestamp": "2026-04-25T20:05:10Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "admin", | |
| "src_ip": "203.0.113.156" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-6", | |
| "timestamp": "2026-04-25T20:05:15Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "admin", | |
| "src_ip": "203.0.113.182" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-7", | |
| "timestamp": "2026-04-25T20:05:20Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "admin", | |
| "src_ip": "203.0.113.15" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-8", | |
| "timestamp": "2026-04-25T20:05:25Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "admin", | |
| "src_ip": "203.0.113.226" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-9", | |
| "timestamp": "2026-04-25T20:05:30Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "admin", | |
| "src_ip": "203.0.113.237" | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93010, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-3", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92032", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 3 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T04:00:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T04:00:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.103" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T15:15:00Z", | |
| "source": "edr", | |
| "event_type": "edr.behavior_match", | |
| "fields": { | |
| "severity": "high", | |
| "rule_id": "EDR-7716", | |
| "host": "host-030" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92032, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93013", | |
| "category": "malware_execution", | |
| "severity": "high", | |
| "summary": "malware_execution: 5 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "carol" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T03:22:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.18" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T03:22:02Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.22.98", | |
| "bytes_out": 65184 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T03:22:04Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T18:32:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "chrome.exe", | |
| "cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-4", | |
| "timestamp": "2026-04-25T18:32:05Z", | |
| "source": "endpoint", | |
| "event_type": "file.write", | |
| "fields": { | |
| "path": "C:\\Users\\Public\\payload.exe" | |
| } | |
| } | |
| ], | |
| "ground_truth": "quarantine_host", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93013, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.", | |
| "action": "quarantine_host", | |
| "cited_log_id": "L1-3", | |
| "rationale": "Host shows malware execution indicators; isolating.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92016", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 3 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T01:35:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T01:35:02Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T07:28:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.top/secure", | |
| "user": "bob" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92016, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-2", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92010", | |
| "category": "brute_force", | |
| "severity": "medium", | |
| "summary": "brute_force: 8 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "dave" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T13:35:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.212" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T13:35:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.165" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T14:59:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.7" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T14:59:05Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.168" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-4", | |
| "timestamp": "2026-04-25T14:59:10Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.196" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-5", | |
| "timestamp": "2026-04-25T14:59:15Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.201" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-6", | |
| "timestamp": "2026-04-25T14:59:20Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.134" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-7", | |
| "timestamp": "2026-04-25T14:59:25Z", | |
| "source": "identity", | |
| "event_type": "auth.login_failure", | |
| "fields": { | |
| "user": "root", | |
| "src_ip": "203.0.113.155" | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92010, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93041", | |
| "category": "malware_execution", | |
| "severity": "critical", | |
| "summary": "malware_execution: 6 event(s); first=proc.lolbin_use", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T13:22:00Z", | |
| "source": "endpoint", | |
| "event_type": "proc.lolbin_use", | |
| "fields": { | |
| "process": "powershell.exe", | |
| "parent_process": "outlook.exe", | |
| "cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T13:22:15Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "203.0.113.5", | |
| "interval_s": 30 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T13:22:30Z", | |
| "source": "identity", | |
| "event_type": "auth.privilege_grant", | |
| "fields": { | |
| "user": "bob", | |
| "role": "admin" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T14:00:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.1.241", | |
| "bytes_out": 11167 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-4", | |
| "timestamp": "2026-04-25T14:00:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.122" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-5", | |
| "timestamp": "2026-04-25T14:00:04Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.10" | |
| } | |
| } | |
| ], | |
| "ground_truth": "escalate", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage4_adversarial", | |
| "seed": 93041, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.", | |
| "action": "escalate", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90016", | |
| "category": "benign_noise", | |
| "severity": "medium", | |
| "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T04:36:00Z", | |
| "source": "network", | |
| "event_type": "net.port_scan_hit", | |
| "fields": { | |
| "src_ip": "203.0.113.245", | |
| "port": 3389 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90016, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92011", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 3 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T05:46:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T05:46:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.188" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T07:31:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.214.180", | |
| "interval_s": 60 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92011, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-2", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91021", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 2 event(s); first=email.link_clicked", | |
| "host": "host-001", | |
| "user": "bob" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T11:07:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.cn/secure", | |
| "user": "bob" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T22:48:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "10.0.0.138" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage2_multi", | |
| "seed": 91021, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-0", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93046", | |
| "category": "c2_beacon", | |
| "severity": "high", | |
| "summary": "c2_beacon: 4 event(s); first=net.beacon", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T11:55:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "203.0.113.5", | |
| "interval_s": 90 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T18:06:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.243" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T18:06:02Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T18:06:04Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage4_adversarial", | |
| "seed": 93046, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-0", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91028", | |
| "category": "benign_noise", | |
| "severity": "medium", | |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "carol" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T00:59:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "carol", | |
| "src_ip": "10.0.0.208" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T13:57:00Z", | |
| "source": "network", | |
| "event_type": "net.port_scan_hit", | |
| "fields": { | |
| "src_ip": "203.0.113.15", | |
| "port": 22 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91028, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91042", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "dave" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T05:30:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.51" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T10:03:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.41.43", | |
| "bytes_out": 90697 | |
| } | |
| } | |
| ], | |
| "ground_truth": "dismiss", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91042, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": 1.0, | |
| "reward_breakdown": { | |
| "correct_action": 1.0 | |
| }, | |
| "correct": true | |
| }, | |
| "trained": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Indicators are consistent with normal user activity.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93029", | |
| "category": "c2_beacon", | |
| "severity": "high", | |
| "summary": "c2_beacon: 4 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T13:00:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "google.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T13:00:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "alice", | |
| "src_ip": "10.0.0.234" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T13:00:04Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "internal.corp" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T22:27:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "185.220.101.7", | |
| "interval_s": 90 | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93029, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-3", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90029", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 1 event(s); first=email.link_clicked", | |
| "host": "host-001", | |
| "user": "alice" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T01:21:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.click/secure", | |
| "user": "alice" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90029, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-0", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-d-92023", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 3 event(s); first=net.dns_query", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T08:46:00Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T08:46:02Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.224" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T09:39:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.click/secure", | |
| "user": "alice" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-2", | |
| "stage": "stage3_mixed", | |
| "seed": 92023, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-2", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-l-93039", | |
| "category": "phishing", | |
| "severity": "high", | |
| "summary": "phishing: 4 event(s); first=net.outbound_connection", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T10:31:00Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.184.117", | |
| "bytes_out": 54691 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T10:31:02Z", | |
| "source": "network", | |
| "event_type": "net.dns_query", | |
| "fields": { | |
| "domain": "github.com" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-2", | |
| "timestamp": "2026-04-25T10:31:04Z", | |
| "source": "network", | |
| "event_type": "net.outbound_connection", | |
| "fields": { | |
| "dst_ip": "10.0.108.52", | |
| "bytes_out": 60603 | |
| } | |
| }, | |
| { | |
| "log_id": "L1-3", | |
| "timestamp": "2026-04-25T11:04:00Z", | |
| "source": "email", | |
| "event_type": "email.link_clicked", | |
| "fields": { | |
| "url": "https://login-update.ru/secure", | |
| "user": "carol" | |
| } | |
| } | |
| ], | |
| "ground_truth": "block_ip", | |
| "triggering_log_id": "L1-3", | |
| "stage": "stage4_adversarial", | |
| "seed": 93039, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.", | |
| "action": "block_ip", | |
| "cited_log_id": "L1-3", | |
| "rationale": "External IP is engaged in active brute-force; blocking.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-i-91017", | |
| "category": "benign_noise", | |
| "severity": "low", | |
| "summary": "benign_noise: 2 event(s); first=auth.login_success", | |
| "host": "host-001", | |
| "user": "dave" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T04:36:00Z", | |
| "source": "identity", | |
| "event_type": "auth.login_success", | |
| "fields": { | |
| "user": "dave", | |
| "src_ip": "10.0.0.4" | |
| } | |
| }, | |
| { | |
| "log_id": "L1-1", | |
| "timestamp": "2026-04-25T07:24:00Z", | |
| "source": "network", | |
| "event_type": "net.beacon", | |
| "fields": { | |
| "dst_ip": "10.0.216.132", | |
| "interval_s": 30 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-1", | |
| "stage": "stage2_multi", | |
| "seed": 91017, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -1.0, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-1", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| }, | |
| { | |
| "alert": { | |
| "alert_id": "A-EVAL-c-90010", | |
| "category": "benign_noise", | |
| "severity": "medium", | |
| "summary": "benign_noise: 1 event(s); first=net.port_scan_hit", | |
| "host": "host-001", | |
| "user": "user-001" | |
| }, | |
| "events": [ | |
| { | |
| "log_id": "L1-0", | |
| "timestamp": "2026-04-25T21:48:00Z", | |
| "source": "network", | |
| "event_type": "net.port_scan_hit", | |
| "fields": { | |
| "src_ip": "203.0.113.146", | |
| "port": 445 | |
| } | |
| } | |
| ], | |
| "ground_truth": "monitor", | |
| "triggering_log_id": "L1-0", | |
| "stage": "stage1_basic", | |
| "seed": 90010, | |
| "baseline": { | |
| "raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.", | |
| "action": "dismiss", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Looks like routine activity, no clear malicious indicator.", | |
| "format_ok": true, | |
| "reward": -0.9, | |
| "reward_breakdown": { | |
| "missed_malicious": -1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": false | |
| }, | |
| "trained": { | |
| "raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.", | |
| "action": "monitor", | |
| "cited_log_id": "L1-0", | |
| "rationale": "Behavior is anomalous but reversible; raising visibility.", | |
| "format_ok": true, | |
| "reward": 1.1, | |
| "reward_breakdown": { | |
| "correct_action": 1.0, | |
| "correct_citation_bonus": 0.1 | |
| }, | |
| "correct": true | |
| } | |
| } | |
| ] | |
| } |