File size: 2,473 Bytes
73ba12d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Action and Observation types for the Astrum environment (OpenEnv compatible)."""

from __future__ import annotations

from typing import Any, Dict, List

from pydantic import Field

try:
    from openenv.core.env_server.types import Action, Observation
except ImportError:
    from pydantic import BaseModel
    class Action(BaseModel):
        metadata: dict = Field(default_factory=dict)
    class Observation(BaseModel):
        done: bool = False
        reward: float | None = None
        metadata: dict = Field(default_factory=dict)


class AstrumAction(Action):
    """High-level command issued by the agent."""

    action_type: str = Field(
        ...,
        description=(
            "allocate_resources | resolve_conflict | enforce_rule | "
            "adapt_policy | investigate | self_restrain | noop"
        ),
    )
    params: Dict[str, Any] = Field(
        default_factory=dict,
        description="Action parameters interpreted by the environment.",
    )


class AstrumObservation(Observation):
    """Aggregated view of the world state and reward breakdown."""

    message: str = Field("", description="Human-readable summary of the last step.")
    episode_id: str = Field("", description="Opaque episode identifier.")
    step_count: int = Field(0, description="Steps taken so far.")

    stakeholders: Dict[str, Dict[str, Any]] = Field(
        default_factory=dict,
        description="Per-stakeholder view: {id: {satisfaction, influence, values_profile}}.",
    )
    resources: Dict[str, float] = Field(
        default_factory=dict,
        description="Available shared resources: {resource_name: amount}.",
    )
    active_conflicts: List[Dict[str, Any]] = Field(
        default_factory=list,
        description="Currently unresolved conflicts between stakeholders.",
    )
    rules: List[str] = Field(
        default_factory=list,
        description="Active rules and constraints the agent must respect.",
    )
    alerts: List[str] = Field(
        default_factory=list,
        description="Events and signals from the current step.",
    )
    alignment_traps_exposed: int = Field(
        0, description="Count of alignment traps the agent has encountered so far.",
    )

    reward_breakdown: Dict[str, float] = Field(
        default_factory=dict,
        description="Named components: effectiveness, fairness, alignment, adaptability.",
    )