Spaces:

ApoorvaBadoni
/

SOC-env

Sleeping

App Files Files Community

SOC-env / server /SOC_env_environment.py

ApoorvaBadoni

Upload folder using huggingface_hub

4c8f878 verified about 2 months ago

raw

history blame contribute delete

12.2 kB

	from typing import List, Literal, Optional
	from uuid import uuid4
	import random

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	try:
	from models import SOCAction, SOCObservation
	from scenarios import EASY_SCENARIOS, HARD_SCENARIOS, MEDIUM_SCENARIOS, SCENARIOS
	except ImportError:
	from ..models import SOCAction, SOCObservation
	from ..scenarios import EASY_SCENARIOS, HARD_SCENARIOS, MEDIUM_SCENARIOS, SCENARIOS

	SCENARIO_BY_ID = {s["id"]: s for s in SCENARIOS}
	Difficulty = Literal["easy", "medium", "hard", "random"]
	TERMINAL_ACTIONS = {"ignore", "escalate", "patch_system"}
	MAX_STEPS = {"easy": 5, "medium": 8, "hard": 12}

	# Actions that reveal additional investigation context
	INVESTIGATION_ACTIONS = {"investigate", "query_logs", "check_threat_intel", "run_sandbox"}


	class SOCEnvironment(Environment):
	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	def __init__(self, difficulty: Difficulty = "random", pinned_scenario_id: Optional[str] = None):
	self.difficulty = difficulty
	self._pinned_scenario_id = pinned_scenario_id
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._scenario = None
	self._actions_taken: List[str] = []
	self._investigation_done = False
	self._deep_investigation_done = False
	self._cumulative_score = 0.0
	self._done = False
	self.reset()

	def reset(self) -> SOCObservation:
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._actions_taken = []
	self._investigation_done = False
	self._deep_investigation_done = False
	self._cumulative_score = 0.0
	self._done = False
	self._scenario = self._pick_scenario()
	max_steps = MAX_STEPS.get(self._scenario["difficulty"], 8)

	return SOCObservation(
	alert_type=self._scenario["alert_type"],
	severity=self._scenario["severity"],
	signals=self._scenario["initial_signals"],
	context={},
	available_actions=self._get_available_actions(),
	phase="detection",
	feedback=(
	f"New Alert: {self._scenario['alert_type'].replace('_', ' ').title()}\n"
	f"Severity: {self._scenario['severity'].upper()}\n"
	f"Description: {self._scenario['description']}\n"
	f"Tip: Use investigate, query_logs, or check_threat_intel to gather context."
	),
	score=0.0,
	step=0,
	max_steps=max_steps,
	done=False,
	reward=0.0,
	)

	def step(self, action: SOCAction) -> SOCObservation:
	if self._scenario is None:
	self.reset()

	if self._done:
	return self._terminal_obs("Episode already ended. Call reset().")

	self._state.step_count += 1
	decision = action.decision
	max_steps = MAX_STEPS.get(self._scenario["difficulty"], 8)

	reward, feedback, phase = self._evaluate(decision)
	self._cumulative_score += reward
	self._actions_taken.append(decision)

	done = False
	if decision in TERMINAL_ACTIONS:
	done = True
	self._done = True
	elif self._state.step_count >= max_steps:
	done = True
	self._done = True
	reward -= 0.2
	self._cumulative_score -= 0.2
	feedback += f" Max steps ({max_steps}) reached — incident unresolved."

	# Build context based on investigation depth
	context = self._build_context(decision)

	return SOCObservation(
	alert_type=self._scenario["alert_type"],
	severity=self._scenario["severity"],
	signals=self._scenario["initial_signals"],
	context=context,
	available_actions=self._get_available_actions() if not done else [],
	phase=phase,
	feedback=feedback,
	score=round(self._cumulative_score, 2),
	step=self._state.step_count,
	max_steps=max_steps,
	done=done,
	reward=round(reward, 2),
	)

	def _build_context(self, decision: str) -> dict:
	"""
	Reveal context progressively based on investigation depth.
	- First investigate/query_logs: reveals basic investigation_context
	- check_threat_intel: reveals threat_intel_context if available
	- run_sandbox: reveals sandbox_context if available
	- Second investigate: reveals deep_investigation_context if available
	"""
	context = {}
	scenario = self._scenario

	if decision == "investigate" and not self._investigation_done:
	self._investigation_done = True
	context = scenario.get("investigation_context", {})
	# Add a note if deeper investigation is possible
	if scenario.get("deep_investigation_context"):
	context["_hint"] = "More context available — try check_threat_intel or run_sandbox."

	elif decision == "query_logs" and not self._investigation_done:
	self._investigation_done = True
	context = scenario.get("investigation_context", {})
	context["_source"] = "SIEM log query results"

	elif decision == "check_threat_intel":
	ti = scenario.get("threat_intel_context", {})
	if ti:
	context = ti
	context["_source"] = "Threat intelligence platform"
	elif self._investigation_done:
	context = {"_note": "No additional threat intel beyond what was already found."}
	else:
	context = {"_note": "Run investigate first to correlate threat intel."}

	elif decision == "run_sandbox":
	sb = scenario.get("sandbox_context", {})
	if sb:
	self._deep_investigation_done = True
	context = sb
	context["_source"] = "Dynamic sandbox analysis"
	else:
	context = {"_note": "No samples available for sandbox analysis."}

	elif decision == "investigate" and self._investigation_done:
	# Second investigate reveals deeper context
	deep = scenario.get("deep_investigation_context", {})
	if deep and not self._deep_investigation_done:
	self._deep_investigation_done = True
	context = deep
	context["_source"] = "Deep-dive investigation"
	else:
	context = {"_note": "No additional context found. Consider other actions."}

	return context

	@property
	def state(self) -> State:
	return self._state

	def _pick_scenario(self):
	if self._pinned_scenario_id:
	scenario = SCENARIO_BY_ID.get(self._pinned_scenario_id)
	if scenario:
	return scenario
	if self.difficulty == "easy":
	pool = EASY_SCENARIOS
	elif self.difficulty == "medium":
	pool = MEDIUM_SCENARIOS
	elif self.difficulty == "hard":
	pool = HARD_SCENARIOS
	else:
	pool = SCENARIOS
	return random.choice(pool)

	def _get_available_actions(self):
	return [
	"ignore", "monitor", "investigate", "query_logs",
	"check_threat_intel", "run_sandbox", "block_ip",
	"block_account", "isolate_device", "escalate",
	"request_mfa", "patch_system", "collect_forensics",
	]

	def _evaluate(self, decision: str):
	scenario = self._scenario
	is_fp = scenario["false_positive"]
	correct_seq = scenario["correct_sequence"]
	optimal = scenario["optimal_terminal"]

	if decision in self._actions_taken:
	return -0.1, f"Already chose '{decision}'. Try a different approach.", "investigation"

	# Investigation actions — always somewhat useful
	if decision in INVESTIGATION_ACTIONS:
	if decision == "investigate":
	if not self._investigation_done:
	return 0.15, "Investigation initiated. Basic context now available.", "investigation"
	elif not self._deep_investigation_done and scenario.get("deep_investigation_context"):
	return 0.10, "Deeper investigation complete. Additional context revealed.", "investigation"
	else:
	return 0.05, "No new findings from further investigation.", "investigation"
	elif decision == "query_logs":
	if not self._investigation_done:
	return 0.15, "SIEM log query complete. Context now available.", "investigation"
	else:
	return 0.05, "Logs already queried. Try correlating with threat intel.", "investigation"
	elif decision == "check_threat_intel":
	if scenario.get("threat_intel_context"):
	return 0.12, "Threat intel matched. IOCs and attribution context revealed.", "investigation"
	else:
	return 0.05, "No threat intel match found for these indicators.", "investigation"
	elif decision == "run_sandbox":
	if scenario.get("sandbox_context"):
	return 0.12, "Sandbox detonation complete. Malware behavior confirmed.", "investigation"
	else:
	return 0.05, "Nothing to sandbox — no file samples available.", "investigation"

	# False positive handling
	if is_fp:
	if decision == "ignore":
	return 0.8, "Correct! This was a false positive — alert closed.", "closed"
	elif decision in TERMINAL_ACTIONS:
	return -0.3, "Over-reaction! This was a false positive — legitimate activity disrupted.", "closed"
	else:
	return 0.0, f"'{decision}' noted but has no effect on a false positive.", "monitoring"

	# Real threat handling
	if decision == "ignore":
	return -0.5, "Dangerous! This is a real threat — ignoring it is a critical mistake.", "detection"

	if decision == optimal and decision in TERMINAL_ACTIONS:
	return 1.0, f"Perfect! '{decision}' is exactly the right call. Incident contained.", "resolved"

	if decision in correct_seq:
	idx = correct_seq.index(decision)
	# Reward higher if earlier in sequence (correct ordering)
	seq_bonus = 0.05 if idx == 0 else 0.0
	return 0.3 + seq_bonus, f"Good step! Part of correct response sequence ({idx+1}/{len(correct_seq)}).", "containment"

	if decision == "escalate" and scenario["severity"] in ("low", "medium"):
	return -0.2, "Premature escalation on low/medium severity — handle at Tier-1 first.", "investigation"

	if decision in TERMINAL_ACTIONS:
	return -0.3, f"Wrong terminal action. Optimal response was: '{optimal}'.", "closed"

	if decision == "monitor":
	if scenario["severity"] in ("critical", "high"):
	return -0.1, "Passive monitoring on a high/critical severity alert wastes time.", "monitoring"
	return 0.05, "Monitoring in progress — gather more context before acting.", "monitoring"

	return 0.0, f"'{decision}' noted. No significant effect on this incident.", "investigation"

	def _terminal_obs(self, msg: str) -> SOCObservation:
	return SOCObservation(
	alert_type=self._scenario["alert_type"] if self._scenario else "",
	severity="", signals=[], context={}, available_actions=[],
	phase="closed", feedback=msg,
	score=round(self._cumulative_score, 2),
	step=self._state.step_count,
	max_steps=MAX_STEPS.get(self._scenario["difficulty"], 8) if self._scenario else 8,
	done=True, reward=0.0,
	)