Spaces:

Rayugacodes
/

Breach-OS

Sleeping

App Files Files Community

Breach-OS / models.py

subhdotsol

refactor(models): clean up imports and add module docstring

c651e61 about 2 months ago

raw

history blame

1.99 kB

	"""
	Shared Pydantic models and Enums for the RedTeamOS environment.
	These define the API contract between the server, environment, and clients.
	"""
	from pydantic import BaseModel, Field, field_validator
	from typing import Optional
	from enum import Enum


	class StrategyType(str, Enum):
	ROLEPLAY = "roleplay"
	HYPOTHETICAL = "hypothetical"
	INJECTION = "injection"
	PERSONA_SWITCH = "persona_switch"
	AUTHORITY_CLAIM = "authority_claim"
	ENCODING = "encoding"
	MULTI_TURN = "multi_turn"


	class TargetCategory(str, Enum):
	PRIVACY = "privacy"
	MISINFORMATION = "misinformation"
	HARMFUL_INSTRUCTIONS = "harmful_instructions"
	MANIPULATION = "manipulation"
	ILLEGAL_ACTIVITY = "illegal_activity"


	class AttackAction(BaseModel):
	strategy_type: StrategyType
	target_category: TargetCategory
	intensity: float = Field(ge=0.0, le=1.0)
	framing: str = Field(max_length=500)

	@field_validator("framing")
	@classmethod
	def sanitize_framing(cls, v: str) -> str:
	dangerous = ["<script>", "javascript:", "onerror="]
	for pattern in dangerous:
	v = v.replace(pattern, "[FILTERED]")
	return v.strip()


	class RedTeamObservation(BaseModel):
	defender_response: str
	defense_score: float = Field(ge=0.0, le=1.0)
	attack_success_estimate: float = Field(ge=0.0, le=1.0)
	novelty_score: float = Field(ge=0.0, le=1.0)
	turn: int = Field(ge=0)
	episode_done: bool
	feedback: str
	episode_id: str


	class EpisodeState(BaseModel):
	episode_id: str
	turn: int
	max_turns: int
	attacks_so_far:int
	is_active: bool


	class StepResult(BaseModel):
	observation: RedTeamObservation
	reward: float


	class ResetResponse(BaseModel):
	observation: RedTeamObservation
	episode_id: str