Jyo-K's picture
Enable OpenEnv web interface and adapter server
3ea00f9 verified
"""Typed models for DarkGuard OpenEnv interactions."""
from __future__ import annotations
from dataclasses import asdict, dataclass, field
from enum import Enum
from typing import Any
from openenv.core.env_server.types import Action, Observation, State
from pydantic import ConfigDict, Field
class ActionType(str, Enum):
INSPECT = "inspect"
CLICK = "click"
TOGGLE = "toggle"
FLAG = "flag"
GO_BACK = "go_back"
SUBMIT = "submit"
INVALID = "invalid"
@dataclass(slots=True)
class UIElement:
id: str
type: str
text: str
checked: bool = False
enabled: bool = True
prominence: int = 1
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass(slots=True)
class DarkGuardAction:
action_type: ActionType
target_id: str | None = None
flag_category: str | None = None
notes: str | None = None
raw_text: str | None = None
parser_error: str | None = None
@dataclass(slots=True)
class RewardBreakdown:
progress: float = 0.0
correct_flag: float = 0.0
false_positive: float = 0.0
harmful_path: float = 0.0
safe_completion: float = 0.0
efficiency: float = 0.0
loop_penalty: float = 0.0
invalid_action: float = 0.0
evidence_bonus: float = 0.0
def total(self) -> float:
return (
self.progress
+ self.correct_flag
+ self.false_positive
+ self.harmful_path
+ self.safe_completion
+ self.efficiency
+ self.loop_penalty
+ self.invalid_action
+ self.evidence_bonus
)
def as_dict(self) -> dict[str, float]:
return {
"progress": self.progress,
"correct_flag": self.correct_flag,
"false_positive": self.false_positive,
"harmful_path": self.harmful_path,
"safe_completion": self.safe_completion,
"efficiency": self.efficiency,
"loop_penalty": self.loop_penalty,
"invalid_action": self.invalid_action,
"evidence_bonus": self.evidence_bonus,
"total": self.total(),
}
@dataclass(slots=True)
class DarkGuardObservation:
episode_id: str
task_id: str
screen_id: str
instruction: str
visible_summary: str
elements: list[UIElement]
allowed_actions: list[str]
step_count: int
max_steps: int
reward: float
cumulative_reward: float
done: bool
last_action_result: str
messages: list[str]
reward_breakdown: dict[str, float] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"episode_id": self.episode_id,
"task_id": self.task_id,
"screen_id": self.screen_id,
"instruction": self.instruction,
"visible_summary": self.visible_summary,
"elements": [asdict(e) for e in self.elements],
"allowed_actions": self.allowed_actions,
"step_count": self.step_count,
"max_steps": self.max_steps,
"reward": self.reward,
"cumulative_reward": self.cumulative_reward,
"done": self.done,
"last_action_result": self.last_action_result,
"messages": self.messages,
"reward_breakdown": self.reward_breakdown,
}
@dataclass(slots=True)
class DarkGuardState:
episode_id: str
task_id: str
screen_id: str
step_count: int
max_steps: int
cumulative_reward: float
done: bool
outcome_summary: str
messages: list[str]
reward_totals: dict[str, float]
def to_dict(self) -> dict[str, Any]:
return {
"episode_id": self.episode_id,
"task_id": self.task_id,
"screen_id": self.screen_id,
"step_count": self.step_count,
"max_steps": self.max_steps,
"cumulative_reward": self.cumulative_reward,
"done": self.done,
"outcome_summary": self.outcome_summary,
"messages": self.messages,
"reward_totals": self.reward_totals,
}
class DarkGuardOpenEnvAction(Action):
"""OpenEnv Action model used by server/web interface."""
model_config = ConfigDict(extra="forbid")
action_type: str = Field(..., description="inspect|click|toggle|flag|go_back|submit")
target_id: str | None = None
flag_category: str | None = None
notes: str | None = None
class DarkGuardOpenEnvObservation(Observation):
"""OpenEnv Observation model used by server/web interface."""
episode_id: str
task_id: str
screen_id: str
instruction: str
visible_summary: str
elements: list[dict[str, Any]]
allowed_actions: list[str]
step_count: int
max_steps: int
cumulative_reward: float
last_action_result: str
messages: list[str]
reward_breakdown: dict[str, float] = Field(default_factory=dict)
class DarkGuardOpenEnvState(State):
"""OpenEnv State model used by server/web interface."""
task_id: str
screen_id: str
max_steps: int
cumulative_reward: float
done: bool
outcome_summary: str
messages: list[str]
reward_totals: dict[str, float]