people_test_space01 / server /persona_env_environment.py
gbenaa's picture
persona_env OpenEnv Docker Space
cd7277c
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, Optional, Tuple
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from models import PersonaAction, PersonaObservation
@dataclass
class PersonaInternal:
# = stable attributes (placeholders)
star_sign: str = "Taurus"
character_type: str = "Sanguine-melancholic"
background: str = "Arts-adjacent, lower-middle class"
# = evolving state
mood: float = 0.1 # -> [-1, 1]
interests: Dict[str, float] = field(default_factory=lambda: {
"animal_welfare": 0.7,
"interior_design": 0.5,
"politics": 0.3,
})
# = tiny memory (for continuity)
last_question: Optional[str] = None
last_topic: Optional[str] = None
def clamp(self) -> None:
self.mood = max(-1.0, min(1.0, self.mood))
for k, v in list(self.interests.items()):
self.interests[k] = max(0.0, min(1.0, v))
def bump_interest(self, topic: str, delta: float) -> None:
self.interests[topic] = self.interests.get(topic, 0.2) + delta
class PersonaEnvironment(Environment):
def __init__(self):
self._state = State(episode_id=str(uuid4()), step_count=0)
self._p = PersonaInternal()
def reset(self) -> PersonaObservation:
self._state = State(episode_id=str(uuid4()), step_count=0)
self._p = PersonaInternal()
return PersonaObservation(
reaction_text="Persona initialised.",
mood=self._p.mood,
interests=dict(self._p.interests),
done=False,
reward=0.0,
)
def step(self, action: PersonaAction) -> PersonaObservation:
self._state.step_count += 1
if action.kind == "show_content":
reaction = self._apply_content(
topic=action.topic or "unknown",
source=action.source or "unknown",
valence=action.valence or "neutral",
)
elif action.kind == "ask_question":
reaction = self._answer_question(action.question or "")
elif action.kind == "advance_time":
reaction = self._advance_time(action.hours or 0)
else:
reaction = "Action rejected."
self._p.clamp()
return PersonaObservation(
reaction_text=reaction,
mood=self._p.mood,
interests=dict(self._p.interests),
done=False,
reward=0.0,
)
@property
def state(self) -> State:
return self._state
# = internal logic (simple, deterministic)
def _apply_content(self, topic: str, source: str, valence: str) -> str:
base = 0.02
if valence == "positive":
mood_delta = +0.05
interest_delta = +base
elif valence == "negative":
mood_delta = -0.05
interest_delta = +base / 2
else:
mood_delta = 0.0
interest_delta = +base / 4
if source in {"tabloid", "ragebait"}:
mood_delta -= 0.03
elif source in {"charity", "trusted"}:
mood_delta += 0.02
self._p.mood += mood_delta
self._p.bump_interest(topic, interest_delta)
self._p.last_topic = topic
return f"Consumed {valence} content on {topic} from {source}. Mood {mood_delta:+.2f}."
def _answer_question(self, question: str) -> str:
q = question.strip()
if not q:
return "No reaction."
self._p.last_question = q
# = keyword -> topic mapping
# -> This gives you a controllable, inspectable way to make questions influence state.
topic, mood_delta = self._infer_topic_and_mood_from_question(q)
if topic is not None:
# -> Questions increase attention to a topic a little.
self._p.bump_interest(topic, 0.015)
self._p.last_topic = topic
self._p.mood += mood_delta
top_interest = max(self._p.interests.items(), key=lambda kv: kv[1])[0]
# -> Mild continuity: reference last topic if available
if self._p.last_topic:
continuity = f" (recently thinking about {self._p.last_topic})"
else:
continuity = ""
return f"Answers via {top_interest}{continuity}: '{q}'"
def _infer_topic_and_mood_from_question(self, q: str) -> Tuple[Optional[str], float]:
ql = q.lower()
# -> Default: neutral mood change from being asked something
mood_delta = 0.0
# -> A few simple triggers
if any(w in ql for w in ["ethical", "cruelty", "welfare", "rescue", "animal"]):
return "animal_welfare", +0.01
if any(w in ql for w in ["decor", "interior", "furniture", "colour", "paint", "design"]):
return "interior_design", +0.01
if any(w in ql for w in ["election", "immigration", "tax", "government", "policy", "minister", "party"]):
# -> Politics questions tend to stress this persona slightly
return "politics", -0.01
return None, mood_delta
def _advance_time(self, hours: int) -> str:
hours = max(0, hours)
decay = min(0.2, hours / 240.0)
# -> mood drifts towards 0 with time
self._p.mood *= (1.0 - decay)
# -> interests slowly decay with time
for k in list(self._p.interests.keys()):
self._p.interests[k] *= (1.0 - decay / 5.0)
# -> very light memory fade
if hours >= 24:
self._p.last_question = None
return f"Advanced time by {hours}h."