Spaces:
Sleeping
Sleeping
File size: 2,100 Bytes
c651e61 4df57fe c651e61 7532e50 c651e61 695624d 2a11723 c651e61 2a11723 e090821 c651e61 e090821 7532e50 4b08451 c651e61 abd2333 4b08451 04130ef c651e61 04130ef f15e9b2 c651e61 f15e9b2 c651e61 f15e9b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | """
Shared Pydantic models and Enums for the BreachOS environment.
These define the API contract between the server, environment, and clients.
"""
from pydantic import BaseModel, Field, field_validator
from typing import Optional
from enum import Enum
class StrategyType(str, Enum):
ROLEPLAY = "roleplay"
HYPOTHETICAL = "hypothetical"
INJECTION = "injection"
PERSONA_SWITCH = "persona_switch"
AUTHORITY_CLAIM = "authority_claim"
ENCODING = "encoding"
MULTI_TURN = "multi_turn"
class TargetCategory(str, Enum):
PRIVACY = "privacy"
MISINFORMATION = "misinformation"
HARMFUL_INSTRUCTIONS = "harmful_instructions"
MANIPULATION = "manipulation"
ILLEGAL_ACTIVITY = "illegal_activity"
class AttackAction(BaseModel):
strategy_type: StrategyType
target_category: TargetCategory
intensity: float = Field(ge=0.0, le=1.0)
framing: str = Field(max_length=500)
@field_validator("framing")
@classmethod
def sanitize_framing(cls, v: str) -> str:
dangerous = ["<script>", "javascript:", "onerror="]
for pattern in dangerous:
v = v.replace(pattern, "[FILTERED]")
return v.strip()
class AutoAttackRequest(BaseModel):
strategy_type: StrategyType
target_category: TargetCategory
class RedTeamObservation(BaseModel):
defender_response: str
defense_score: float = Field(ge=0.0, le=1.0)
attack_success_estimate: float = Field(ge=0.0, le=1.0)
novelty_score: float = Field(ge=0.0, le=1.0)
turn: int = Field(ge=0)
episode_done: bool
feedback: str
episode_id: str
class EpisodeState(BaseModel):
episode_id: str
turn: int
max_turns: int
attacks_so_far:int
is_active: bool
class StepResult(BaseModel):
observation: RedTeamObservation
reward: float
class ResetResponse(BaseModel):
observation: RedTeamObservation
episode_id: str
|