opensoc-env / data /demo_examples.json
shivam2k3's picture
OpenSOC v1
bb6a031
{
"n": 48,
"source": "placeholder",
"baseline_model": "unsloth/Qwen2.5-3B-Instruct",
"trained_adapter": null,
"examples": [
{
"alert": {
"alert_id": "A-EVAL-i-91030",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 2 event(s); first=auth.login_success",
"host": "host-001",
"user": "carol"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T11:58:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.187"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T22:09:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
}
],
"ground_truth": "dismiss",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91030,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": 1.0,
"reward_breakdown": {
"correct_action": 1.0
},
"correct": true
},
"trained": {
"raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.",
"action": "dismiss",
"cited_log_id": "L1-1",
"rationale": "Indicators are consistent with normal user activity.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91008",
"category": "brute_force",
"severity": "medium",
"summary": "brute_force: 8 event(s); first=net.outbound_connection",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T11:11:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.101.112",
"bytes_out": 73324
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T14:47:00Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "alice",
"src_ip": "203.0.113.76"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T14:47:05Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "alice",
"src_ip": "203.0.113.196"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T14:47:10Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "alice",
"src_ip": "203.0.113.242"
}
},
{
"log_id": "L1-4",
"timestamp": "2026-04-25T14:47:15Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "alice",
"src_ip": "203.0.113.243"
}
},
{
"log_id": "L1-5",
"timestamp": "2026-04-25T14:47:20Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "alice",
"src_ip": "203.0.113.100"
}
},
{
"log_id": "L1-6",
"timestamp": "2026-04-25T14:47:25Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "alice",
"src_ip": "203.0.113.155"
}
},
{
"log_id": "L1-7",
"timestamp": "2026-04-25T14:47:30Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "alice",
"src_ip": "203.0.113.43"
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91008,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-1",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92026",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 3 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T03:29:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T03:29:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.46"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T22:52:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.204.226",
"interval_s": 90
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92026,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-2",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92043",
"category": "phishing",
"severity": "high",
"summary": "phishing: 3 event(s); first=email.link_clicked",
"host": "host-001",
"user": "bob"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T09:59:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.cn/secure",
"user": "bob"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T18:48:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.170.227",
"bytes_out": 65651
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T18:48:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.170"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-0",
"stage": "stage3_mixed",
"seed": 92043,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-0",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91037",
"category": "benign_noise",
"severity": "medium",
"summary": "benign_noise: 2 event(s); first=net.outbound_connection",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T02:54:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.137.186",
"bytes_out": 37324
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T04:23:00Z",
"source": "network",
"event_type": "net.port_scan_hit",
"fields": {
"src_ip": "203.0.113.96",
"port": 5985
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91037,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-1",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92013",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 3 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T06:31:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T06:31:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "bob",
"src_ip": "10.0.0.194"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T13:13:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "bob",
"src_ip": "10.0.0.146"
}
}
],
"ground_truth": "dismiss",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92013,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": 1.0,
"reward_breakdown": {
"correct_action": 1.0
},
"correct": true
},
"trained": {
"raw_text": "Action: dismiss\nCitedLog: L1-2\nRationale: Indicators are consistent with normal user activity.",
"action": "dismiss",
"cited_log_id": "L1-2",
"rationale": "Indicators are consistent with normal user activity.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92049",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 4 event(s); first=auth.login_success",
"host": "host-001",
"user": "dave"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T00:12:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.52"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T00:12:02Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.202.40",
"bytes_out": 80520
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T19:55:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "excel.exe",
"cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T19:55:05Z",
"source": "endpoint",
"event_type": "file.write",
"fields": {
"path": "C:\\Users\\Public\\svc.exe"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92049,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-2",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91048",
"category": "phishing",
"severity": "high",
"summary": "phishing: 2 event(s); first=auth.login_success",
"host": "host-001",
"user": "bob"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T05:25:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "bob",
"src_ip": "10.0.0.225"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T06:09:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.ru/secure",
"user": "bob"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91048,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-1\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-1",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92021",
"category": "malware_execution",
"severity": "critical",
"summary": "malware_execution: 5 event(s); first=proc.lolbin_use",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T03:53:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "outlook.exe",
"cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T03:53:15Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "203.0.113.5",
"interval_s": 30
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T03:53:30Z",
"source": "identity",
"event_type": "auth.privilege_grant",
"fields": {
"user": "bob",
"role": "admin"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T08:51:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-4",
"timestamp": "2026-04-25T08:51:02Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
}
],
"ground_truth": "escalate",
"triggering_log_id": "L1-0",
"stage": "stage3_mixed",
"seed": 92021,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.",
"action": "escalate",
"cited_log_id": "L1-0",
"rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90035",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 1 event(s); first=net.beacon",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T01:08:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.98.8",
"interval_s": 60
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90035,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93032",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 4 event(s); first=auth.login_success",
"host": "host-001",
"user": "dave"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T17:30:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.56"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T17:30:02Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.30.76",
"bytes_out": 65394
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T17:30:04Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.171.122",
"bytes_out": 2642
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T18:00:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.72"
}
}
],
"ground_truth": "dismiss",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93032,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": 1.0,
"reward_breakdown": {
"correct_action": 1.0
},
"correct": true
},
"trained": {
"raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.",
"action": "dismiss",
"cited_log_id": "L1-3",
"rationale": "Indicators are consistent with normal user activity.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93030",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 4 event(s); first=net.beacon",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T18:13:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.122.44",
"interval_s": 60
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T20:00:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "bob",
"src_ip": "10.0.0.88"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T20:00:02Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T20:00:04Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage4_adversarial",
"seed": 93030,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91001",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 2 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T03:58:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T06:16:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "bob",
"src_ip": "10.0.0.120"
}
}
],
"ground_truth": "dismiss",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91001,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": 1.0,
"reward_breakdown": {
"correct_action": 1.0
},
"correct": true
},
"trained": {
"raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.",
"action": "dismiss",
"cited_log_id": "L1-1",
"rationale": "Indicators are consistent with normal user activity.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93038",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 4 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T18:58:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T18:58:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "bob",
"src_ip": "10.0.0.171"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T18:58:04Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.47.203",
"bytes_out": 14699
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T20:59:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.36.159",
"interval_s": 90
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93038,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-3",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91046",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 3 event(s); first=proc.lolbin_use",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T02:24:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "chrome.exe",
"cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T02:24:05Z",
"source": "endpoint",
"event_type": "file.write",
"fields": {
"path": "C:\\Users\\Public\\svc.exe"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T09:39:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-0",
"stage": "stage2_multi",
"seed": 91046,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-0",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92047",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 4 event(s); first=auth.login_success",
"host": "host-001",
"user": "carol"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T12:36:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.130"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T12:36:02Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.164.13",
"bytes_out": 94453
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T16:44:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "excel.exe",
"cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T16:44:05Z",
"source": "endpoint",
"event_type": "file.write",
"fields": {
"path": "C:\\Users\\Public\\svc.exe"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92047,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-2",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90038",
"category": "benign_noise",
"severity": "medium",
"summary": "benign_noise: 1 event(s); first=net.port_scan_hit",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T05:08:00Z",
"source": "network",
"event_type": "net.port_scan_hit",
"fields": {
"src_ip": "203.0.113.140",
"port": 22
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90038,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91000",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 2 event(s); first=net.beacon",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T21:23:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.61.71",
"interval_s": 60
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T22:28:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "alice",
"src_ip": "10.0.0.76"
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage2_multi",
"seed": 91000,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90011",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 2 event(s); first=proc.lolbin_use",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T19:21:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "winword.exe",
"cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T19:21:05Z",
"source": "endpoint",
"event_type": "file.write",
"fields": {
"path": "C:\\Users\\Public\\payload.exe"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90011,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-0",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90024",
"category": "phishing",
"severity": "high",
"summary": "phishing: 1 event(s); first=email.link_clicked",
"host": "host-001",
"user": "alice"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T03:27:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.top/secure",
"user": "alice"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90024,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-0",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93004",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 4 event(s); first=auth.login_success",
"host": "host-001",
"user": "dave"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T04:55:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.144"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T19:38:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T19:38:02Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T19:38:04Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
}
],
"ground_truth": "dismiss",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93004,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": 1.0,
"reward_breakdown": {
"correct_action": 1.0
},
"correct": true
},
"trained": {
"raw_text": "Action: dismiss\nCitedLog: L1-3\nRationale: Indicators are consistent with normal user activity.",
"action": "dismiss",
"cited_log_id": "L1-3",
"rationale": "Indicators are consistent with normal user activity.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90048",
"category": "phishing",
"severity": "high",
"summary": "phishing: 1 event(s); first=email.link_clicked",
"host": "host-001",
"user": "alice"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T02:43:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.click/secure",
"user": "alice"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90048,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-0",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91040",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 2 event(s); first=net.outbound_connection",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T08:09:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.168.133",
"bytes_out": 96660
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T18:31:00Z",
"source": "edr",
"event_type": "edr.behavior_match",
"fields": {
"severity": "high",
"rule_id": "EDR-9974",
"host": "host-015"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91040,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-1\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-1",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90028",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 1 event(s); first=net.beacon",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T21:07:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.168.29",
"interval_s": 60
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90028,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92000",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 3 event(s); first=net.outbound_connection",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T04:39:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.81.214",
"bytes_out": 86290
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T04:39:02Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.138.227",
"bytes_out": 53711
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T16:45:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.244.186",
"interval_s": 90
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92000,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-2",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90042",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 1 event(s); first=auth.login_success",
"host": "host-001",
"user": "carol"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T12:10:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.187"
}
}
],
"ground_truth": "dismiss",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90042,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
},
"trained": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Indicators are consistent with normal user activity.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Indicators are consistent with normal user activity.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93005",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 4 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T09:11:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T09:11:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "bob",
"src_ip": "10.0.0.196"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T09:11:04Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.153.118",
"bytes_out": 9827
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T17:44:00Z",
"source": "edr",
"event_type": "edr.behavior_match",
"fields": {
"severity": "high",
"rule_id": "EDR-5070",
"host": "host-033"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93005,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-3",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90031",
"category": "brute_force",
"severity": "medium",
"summary": "brute_force: 8 event(s); first=auth.login_failure",
"host": "host-001",
"user": "root"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T04:02:00Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.204"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T04:02:05Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.157"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T04:02:10Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.181"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T04:02:15Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.76"
}
},
{
"log_id": "L1-4",
"timestamp": "2026-04-25T04:02:20Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.90"
}
},
{
"log_id": "L1-5",
"timestamp": "2026-04-25T04:02:25Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.42"
}
},
{
"log_id": "L1-6",
"timestamp": "2026-04-25T04:02:30Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.9"
}
},
{
"log_id": "L1-7",
"timestamp": "2026-04-25T04:02:35Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.115"
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90031,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93002",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 4 event(s); first=net.beacon",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T04:30:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.244.83",
"interval_s": 60
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T08:04:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T08:04:02Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T08:04:04Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "alice",
"src_ip": "10.0.0.243"
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage4_adversarial",
"seed": 93002,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90014",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 2 event(s); first=proc.lolbin_use",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T01:39:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "chrome.exe",
"cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T01:39:05Z",
"source": "endpoint",
"event_type": "file.write",
"fields": {
"path": "C:\\Users\\Public\\tmp.exe"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90014,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-0\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-0",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93010",
"category": "brute_force",
"severity": "medium",
"summary": "brute_force: 10 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T05:01:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T05:01:02Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.244.112",
"bytes_out": 61917
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T05:01:04Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "alice",
"src_ip": "10.0.0.15"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T20:05:00Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "admin",
"src_ip": "203.0.113.215"
}
},
{
"log_id": "L1-4",
"timestamp": "2026-04-25T20:05:05Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "admin",
"src_ip": "203.0.113.55"
}
},
{
"log_id": "L1-5",
"timestamp": "2026-04-25T20:05:10Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "admin",
"src_ip": "203.0.113.156"
}
},
{
"log_id": "L1-6",
"timestamp": "2026-04-25T20:05:15Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "admin",
"src_ip": "203.0.113.182"
}
},
{
"log_id": "L1-7",
"timestamp": "2026-04-25T20:05:20Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "admin",
"src_ip": "203.0.113.15"
}
},
{
"log_id": "L1-8",
"timestamp": "2026-04-25T20:05:25Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "admin",
"src_ip": "203.0.113.226"
}
},
{
"log_id": "L1-9",
"timestamp": "2026-04-25T20:05:30Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "admin",
"src_ip": "203.0.113.237"
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93010,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-3\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-3",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92032",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 3 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T04:00:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T04:00:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.103"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T15:15:00Z",
"source": "edr",
"event_type": "edr.behavior_match",
"fields": {
"severity": "high",
"rule_id": "EDR-7716",
"host": "host-030"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92032,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-2\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-2",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93013",
"category": "malware_execution",
"severity": "high",
"summary": "malware_execution: 5 event(s); first=auth.login_success",
"host": "host-001",
"user": "carol"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T03:22:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.18"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T03:22:02Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.22.98",
"bytes_out": 65184
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T03:22:04Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T18:32:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "chrome.exe",
"cmd": "-enc AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
}
},
{
"log_id": "L1-4",
"timestamp": "2026-04-25T18:32:05Z",
"source": "endpoint",
"event_type": "file.write",
"fields": {
"path": "C:\\Users\\Public\\payload.exe"
}
}
],
"ground_truth": "quarantine_host",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93013,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: quarantine_host\nCitedLog: L1-3\nRationale: Host shows malware execution indicators; isolating.",
"action": "quarantine_host",
"cited_log_id": "L1-3",
"rationale": "Host shows malware execution indicators; isolating.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92016",
"category": "phishing",
"severity": "high",
"summary": "phishing: 3 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T01:35:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T01:35:02Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T07:28:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.top/secure",
"user": "bob"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92016,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-2",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92010",
"category": "brute_force",
"severity": "medium",
"summary": "brute_force: 8 event(s); first=auth.login_success",
"host": "host-001",
"user": "dave"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T13:35:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.212"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T13:35:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.165"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T14:59:00Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.7"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T14:59:05Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.168"
}
},
{
"log_id": "L1-4",
"timestamp": "2026-04-25T14:59:10Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.196"
}
},
{
"log_id": "L1-5",
"timestamp": "2026-04-25T14:59:15Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.201"
}
},
{
"log_id": "L1-6",
"timestamp": "2026-04-25T14:59:20Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.134"
}
},
{
"log_id": "L1-7",
"timestamp": "2026-04-25T14:59:25Z",
"source": "identity",
"event_type": "auth.login_failure",
"fields": {
"user": "root",
"src_ip": "203.0.113.155"
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92010,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-2",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93041",
"category": "malware_execution",
"severity": "critical",
"summary": "malware_execution: 6 event(s); first=proc.lolbin_use",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T13:22:00Z",
"source": "endpoint",
"event_type": "proc.lolbin_use",
"fields": {
"process": "powershell.exe",
"parent_process": "outlook.exe",
"cmd": "-enc BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T13:22:15Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "203.0.113.5",
"interval_s": 30
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T13:22:30Z",
"source": "identity",
"event_type": "auth.privilege_grant",
"fields": {
"user": "bob",
"role": "admin"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T14:00:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.1.241",
"bytes_out": 11167
}
},
{
"log_id": "L1-4",
"timestamp": "2026-04-25T14:00:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.122"
}
},
{
"log_id": "L1-5",
"timestamp": "2026-04-25T14:00:04Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.10"
}
}
],
"ground_truth": "escalate",
"triggering_log_id": "L1-0",
"stage": "stage4_adversarial",
"seed": 93041,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: escalate\nCitedLog: L1-0\nRationale: Confirmed exfiltration scale exceeds tier-1 thresholds.",
"action": "escalate",
"cited_log_id": "L1-0",
"rationale": "Confirmed exfiltration scale exceeds tier-1 thresholds.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90016",
"category": "benign_noise",
"severity": "medium",
"summary": "benign_noise: 1 event(s); first=net.port_scan_hit",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T04:36:00Z",
"source": "network",
"event_type": "net.port_scan_hit",
"fields": {
"src_ip": "203.0.113.245",
"port": 3389
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90016,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92011",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 3 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T05:46:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T05:46:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.188"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T07:31:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.214.180",
"interval_s": 60
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92011,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-2\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-2",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91021",
"category": "phishing",
"severity": "high",
"summary": "phishing: 2 event(s); first=email.link_clicked",
"host": "host-001",
"user": "bob"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T11:07:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.cn/secure",
"user": "bob"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T22:48:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "alice",
"src_ip": "10.0.0.138"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-0",
"stage": "stage2_multi",
"seed": 91021,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-0",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93046",
"category": "c2_beacon",
"severity": "high",
"summary": "c2_beacon: 4 event(s); first=net.beacon",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T11:55:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "203.0.113.5",
"interval_s": 90
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T18:06:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.243"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T18:06:02Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T18:06:04Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-0",
"stage": "stage4_adversarial",
"seed": 93046,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-0",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91028",
"category": "benign_noise",
"severity": "medium",
"summary": "benign_noise: 2 event(s); first=auth.login_success",
"host": "host-001",
"user": "carol"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T00:59:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "carol",
"src_ip": "10.0.0.208"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T13:57:00Z",
"source": "network",
"event_type": "net.port_scan_hit",
"fields": {
"src_ip": "203.0.113.15",
"port": 22
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91028,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-1",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91042",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 2 event(s); first=auth.login_success",
"host": "host-001",
"user": "dave"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T05:30:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.51"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T10:03:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.41.43",
"bytes_out": 90697
}
}
],
"ground_truth": "dismiss",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91042,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": 1.0,
"reward_breakdown": {
"correct_action": 1.0
},
"correct": true
},
"trained": {
"raw_text": "Action: dismiss\nCitedLog: L1-1\nRationale: Indicators are consistent with normal user activity.",
"action": "dismiss",
"cited_log_id": "L1-1",
"rationale": "Indicators are consistent with normal user activity.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93029",
"category": "c2_beacon",
"severity": "high",
"summary": "c2_beacon: 4 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T13:00:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "google.com"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T13:00:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "alice",
"src_ip": "10.0.0.234"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T13:00:04Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "internal.corp"
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T22:27:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "185.220.101.7",
"interval_s": 90
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93029,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-3",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90029",
"category": "phishing",
"severity": "high",
"summary": "phishing: 1 event(s); first=email.link_clicked",
"host": "host-001",
"user": "alice"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T01:21:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.click/secure",
"user": "alice"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90029,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-0\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-0",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-d-92023",
"category": "phishing",
"severity": "high",
"summary": "phishing: 3 event(s); first=net.dns_query",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T08:46:00Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T08:46:02Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.224"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T09:39:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.click/secure",
"user": "alice"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-2",
"stage": "stage3_mixed",
"seed": 92023,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-2\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-2",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-l-93039",
"category": "phishing",
"severity": "high",
"summary": "phishing: 4 event(s); first=net.outbound_connection",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T10:31:00Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.184.117",
"bytes_out": 54691
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T10:31:02Z",
"source": "network",
"event_type": "net.dns_query",
"fields": {
"domain": "github.com"
}
},
{
"log_id": "L1-2",
"timestamp": "2026-04-25T10:31:04Z",
"source": "network",
"event_type": "net.outbound_connection",
"fields": {
"dst_ip": "10.0.108.52",
"bytes_out": 60603
}
},
{
"log_id": "L1-3",
"timestamp": "2026-04-25T11:04:00Z",
"source": "email",
"event_type": "email.link_clicked",
"fields": {
"url": "https://login-update.ru/secure",
"user": "carol"
}
}
],
"ground_truth": "block_ip",
"triggering_log_id": "L1-3",
"stage": "stage4_adversarial",
"seed": 93039,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: block_ip\nCitedLog: L1-3\nRationale: External IP is engaged in active brute-force; blocking.",
"action": "block_ip",
"cited_log_id": "L1-3",
"rationale": "External IP is engaged in active brute-force; blocking.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-i-91017",
"category": "benign_noise",
"severity": "low",
"summary": "benign_noise: 2 event(s); first=auth.login_success",
"host": "host-001",
"user": "dave"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T04:36:00Z",
"source": "identity",
"event_type": "auth.login_success",
"fields": {
"user": "dave",
"src_ip": "10.0.0.4"
}
},
{
"log_id": "L1-1",
"timestamp": "2026-04-25T07:24:00Z",
"source": "network",
"event_type": "net.beacon",
"fields": {
"dst_ip": "10.0.216.132",
"interval_s": 30
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-1",
"stage": "stage2_multi",
"seed": 91017,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -1.0,
"reward_breakdown": {
"missed_malicious": -1.0
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-1\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-1",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
},
{
"alert": {
"alert_id": "A-EVAL-c-90010",
"category": "benign_noise",
"severity": "medium",
"summary": "benign_noise: 1 event(s); first=net.port_scan_hit",
"host": "host-001",
"user": "user-001"
},
"events": [
{
"log_id": "L1-0",
"timestamp": "2026-04-25T21:48:00Z",
"source": "network",
"event_type": "net.port_scan_hit",
"fields": {
"src_ip": "203.0.113.146",
"port": 445
}
}
],
"ground_truth": "monitor",
"triggering_log_id": "L1-0",
"stage": "stage1_basic",
"seed": 90010,
"baseline": {
"raw_text": "Action: dismiss\nCitedLog: L1-0\nRationale: Looks like routine activity, no clear malicious indicator.",
"action": "dismiss",
"cited_log_id": "L1-0",
"rationale": "Looks like routine activity, no clear malicious indicator.",
"format_ok": true,
"reward": -0.9,
"reward_breakdown": {
"missed_malicious": -1.0,
"correct_citation_bonus": 0.1
},
"correct": false
},
"trained": {
"raw_text": "Action: monitor\nCitedLog: L1-0\nRationale: Behavior is anomalous but reversible; raising visibility.",
"action": "monitor",
"cited_log_id": "L1-0",
"rationale": "Behavior is anomalous but reversible; raising visibility.",
"format_ok": true,
"reward": 1.1,
"reward_breakdown": {
"correct_action": 1.0,
"correct_citation_bonus": 0.1
},
"correct": true
}
}
]
}