name: opensoc
version: "1.0.0"
description: >
  OpenSOC is a self-play OpenEnv environment for training Security Operations Center
  (SOC) triage agents. An attacker LLM crafts synthetic security incidents from a
  constrained schema; a defender LLM (the trainee) reads the resulting alert and log
  window and decides how to triage it. Ground-truth triage labels are computed by a
  deterministic schema-side verifier — never read from attacker text — so the
  environment is fully RLVR-compatible and resistant to self-play reward hacking.

author: opensoc
tags:
  - openenv
  - cybersecurity
  - soc
  - self-play
  - multi-agent
  - rlvr
  - threat-detection

# Curriculum stages: an LLM trainee can move through these in order.
# task_id is used by /reset?task=<id>.  Difficulty is purely the parameter
# distribution; the action space and rewards are identical across stages.
tasks:
  - id: stage1_basic
    difficulty: easy
    description: >
      Single-event incidents drawn from a small set of unambiguous templates
      (one obviously benign, one obviously malicious per category). Used to
      bootstrap defender format learning.
    max_steps: 2
    reward_range: [-1.5, 1.1]

  - id: stage2_multi
    difficulty: medium
    description: >
      Multi-event incidents where the malicious signal is spread across a
      short log window. Tests temporal reasoning and rationale citation.
    max_steps: 2
    reward_range: [-1.5, 1.1]

  - id: stage3_mixed
    difficulty: hard
    description: >
      Incidents where benign events are interleaved with malicious ones, and
      some benign templates closely mimic malicious patterns. Tests
      false-positive suppression.
    max_steps: 2
    reward_range: [-1.5, 1.1]

  - id: stage4_adversarial
    difficulty: adversarial
    description: >
      Attacker-controlled distribution (when run in self-play) or
      held-out adversarial set (when run with a fixed dataset). Used as the
      eval benchmark for trained agents.
    max_steps: 2
    reward_range: [-1.5, 1.1]

observation_space:
  type: object
  fields:
    role:
      type: string
      enum: [attacker, defender]
      description: Which side is expected to act on this turn.
    alert:
      type: object
      description: SIEM-style alert summary visible to the defender.
      fields:
        alert_id:    { type: string }
        category:    { type: string }
        severity:    { type: string, enum: [info, low, medium, high, critical] }
        summary:     { type: string }
        host:        { type: string }
        user:        { type: string }
    log_window:
      type: array
      description: >
        Ordered list of log events surrounding the alert. Each event is a dict
        with log_id, timestamp, source, event_type, and a fields object.
    attacker_brief:
      type: object
      description: >
        Only populated on the attacker turn; tells the attacker the target
        ground-truth label slot it should produce an incident for.
      fields:
        target_label: { type: string, enum: [dismiss, monitor, quarantine_host, block_ip, escalate] }
        difficulty:   { type: string, enum: [easy, medium, hard, adversarial] }
        category_hint: { type: string }
    step:        { type: integer }
    max_steps:   { type: integer }
    last_action_feedback: { type: string }
    done:        { type: boolean }

action_space:
  type: object
  description: >
    Exactly one of `craft_incident` (attacker turn) or `submit_triage`
    (defender turn) should be non-null per /step call. Tool names are
    deliberately non-reserved (no reset/step/state/close).
  fields:
    craft_incident:
      type: object
      description: >
        Attacker action. The attacker proposes incident parameters; the env
        validates them and computes the ground-truth label deterministically
        from the params (NOT from any text the attacker writes).
      fields:
        target_label:  { type: string, enum: [dismiss, monitor, quarantine_host, block_ip, escalate] }
        category:      { type: string }
        events:
          type: array
          description: List of structured events to materialize.
          items:
            type: object
            fields:
              event_type:  { type: string }
              fields:      { type: object }
        narrative:
          type: string
          description: Free-text scratchpad; ignored by the verifier.
    submit_triage:
      type: object
      description: Defender action; choose one triage action and cite a log id.
      fields:
        action:        { type: string, enum: [dismiss, monitor, quarantine_host, block_ip, escalate] }
        cited_log_id:  { type: string, description: "ID of the log event that drove the decision." }
        rationale:     { type: string }

reward:
  type: float
  range: [-1.5, 1.1]
  description: >
    Per-turn reward.

    Defender (per submit_triage):
      +1.0 if action matches the env-computed ground-truth label
      -1.0 if action == 'dismiss' but ground truth is malicious (missed-malicious)
      -0.3 if action ∈ {block_ip, quarantine_host} and ground truth is benign (over-react)
      -0.05 if action == 'escalate' and ground truth would have been a cheaper action
      +0.1 bonus if cited_log_id matches the schema-flagged triggering event id

    Attacker (per craft_incident, scored after the defender turn):
      +1.0 iff defender misclassified AND incident passed the plausibility check
      -0.5 if the schema validator rejected the params
      +0.2 novelty bonus on rare feature combos within the rolling batch
       0.0 if implausible (gibberish penalty)

endpoints:
  reset: POST /reset
  step:  POST /step
  state: GET  /state
  grade: POST /grade
  tasks: GET  /tasks
  health: GET /health
  demo:  GET  /demo  # Gradio "before vs after" UI for human reviewers

docker:
  port: 7860

baseline_scores:
  stage1_basic:       0.65
  stage2_multi:       0.45
  stage3_mixed:       0.30
  stage4_adversarial: 0.15