people_test_space01 / server /k_persona_env_environment.py
gbenaa's picture
persona_env OpenEnv Docker Space
cd7277c
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from models import PersonaAction, PersonaObservation
@dataclass
class PersonaInternal:
# = stable attributes (placeholders)
star_sign: str = "Taurus"
character_type: str = "Sanguine-melancholic"
background: str = "Arts-adjacent, lower-middle class"
# = evolving state
mood: float = 0.1 # -> [-1, 1]
interests: Dict[str, float] = field(default_factory=lambda: {
"animal_welfare": 0.7,
"interior_design": 0.5,
"politics": 0.3,
})
def clamp(self) -> None:
self.mood = max(-1.0, min(1.0, self.mood))
for k, v in list(self.interests.items()):
self.interests[k] = max(0.0, min(1.0, v))
class PersonaEnvironment(Environment):
def __init__(self):
self._state = State(episode_id=str(uuid4()), step_count=0)
self._p = PersonaInternal()
def reset(self) -> PersonaObservation:
self._state = State(episode_id=str(uuid4()), step_count=0)
self._p = PersonaInternal()
return PersonaObservation(
reaction_text="Persona initialised.",
mood=self._p.mood,
interests=dict(self._p.interests),
done=False,
reward=0.0,
)
def step(self, action: PersonaAction) -> PersonaObservation:
self._state.step_count += 1
if action.kind == "show_content":
reaction = self._apply_content(
topic=action.topic or "unknown",
source=action.source or "unknown",
valence=action.valence or "neutral",
)
elif action.kind == "ask_question":
reaction = self._answer_question(action.question or "")
elif action.kind == "advance_time":
reaction = self._advance_time(action.hours or 0)
else:
reaction = "Action rejected."
self._p.clamp()
return PersonaObservation(
reaction_text=reaction,
mood=self._p.mood,
interests=dict(self._p.interests),
done=False,
reward=0.0,
)
@property
def state(self) -> State:
return self._state
# = internal logic (simple, deterministic)
def _apply_content(self, topic: str, source: str, valence: str) -> str:
base = 0.02
if valence == "positive":
mood_delta = +0.05
interest_delta = +base
elif valence == "negative":
mood_delta = -0.05
interest_delta = +base / 2
else:
mood_delta = 0.0
interest_delta = +base / 4
if source in {"tabloid", "ragebait"}:
mood_delta -= 0.03
elif source in {"charity", "trusted"}:
mood_delta += 0.02
self._p.mood += mood_delta
self._p.interests[topic] = self._p.interests.get(topic, 0.2) + interest_delta
return f"Consumed {valence} content on {topic} from {source}. Mood {mood_delta:+.2f}."
def _answer_question(self, question: str) -> str:
if not question.strip():
return "No reaction."
top_interest = max(self._p.interests.items(), key=lambda kv: kv[1])[0]
return f"Answers via {top_interest}: '{question.strip()}'"
def _advance_time(self, hours: int) -> str:
hours = max(0, hours)
decay = min(0.2, hours / 240.0)
self._p.mood *= (1.0 - decay)
for k in list(self._p.interests.keys()):
self._p.interests[k] *= (1.0 - decay / 5.0)
return f"Advanced time by {hours}h."