Spaces:

fmsithaa
/

people_test_space01

Sleeping

App Files Files Community

people_test_space01 / server /k_persona_env_environment.py

gbenaa

persona_env OpenEnv Docker Space

cd7277c 3 months ago

raw

history blame contribute delete

3.76 kB

	from __future__ import annotations

	from dataclasses import dataclass, field
	from typing import Dict
	from uuid import uuid4

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	from models import PersonaAction, PersonaObservation


	@dataclass
	class PersonaInternal:
	# = stable attributes (placeholders)
	star_sign: str = "Taurus"
	character_type: str = "Sanguine-melancholic"
	background: str = "Arts-adjacent, lower-middle class"

	# = evolving state
	mood: float = 0.1 # -> [-1, 1]
	interests: Dict[str, float] = field(default_factory=lambda: {
	"animal_welfare": 0.7,
	"interior_design": 0.5,
	"politics": 0.3,
	})

	def clamp(self) -> None:
	self.mood = max(-1.0, min(1.0, self.mood))
	for k, v in list(self.interests.items()):
	self.interests[k] = max(0.0, min(1.0, v))


	class PersonaEnvironment(Environment):
	def __init__(self):
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._p = PersonaInternal()

	def reset(self) -> PersonaObservation:
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._p = PersonaInternal()
	return PersonaObservation(
	reaction_text="Persona initialised.",
	mood=self._p.mood,
	interests=dict(self._p.interests),
	done=False,
	reward=0.0,
	)

	def step(self, action: PersonaAction) -> PersonaObservation:
	self._state.step_count += 1

	if action.kind == "show_content":
	reaction = self._apply_content(
	topic=action.topic or "unknown",
	source=action.source or "unknown",
	valence=action.valence or "neutral",
	)
	elif action.kind == "ask_question":
	reaction = self._answer_question(action.question or "")
	elif action.kind == "advance_time":
	reaction = self._advance_time(action.hours or 0)
	else:
	reaction = "Action rejected."

	self._p.clamp()

	return PersonaObservation(
	reaction_text=reaction,
	mood=self._p.mood,
	interests=dict(self._p.interests),
	done=False,
	reward=0.0,
	)

	@property
	def state(self) -> State:
	return self._state

	# = internal logic (simple, deterministic)

	def _apply_content(self, topic: str, source: str, valence: str) -> str:
	base = 0.02
	if valence == "positive":
	mood_delta = +0.05
	interest_delta = +base
	elif valence == "negative":
	mood_delta = -0.05
	interest_delta = +base / 2
	else:
	mood_delta = 0.0
	interest_delta = +base / 4

	if source in {"tabloid", "ragebait"}:
	mood_delta -= 0.03
	elif source in {"charity", "trusted"}:
	mood_delta += 0.02

	self._p.mood += mood_delta
	self._p.interests[topic] = self._p.interests.get(topic, 0.2) + interest_delta

	return f"Consumed {valence} content on {topic} from {source}. Mood {mood_delta:+.2f}."

	def _answer_question(self, question: str) -> str:
	if not question.strip():
	return "No reaction."
	top_interest = max(self._p.interests.items(), key=lambda kv: kv[1])[0]
	return f"Answers via {top_interest}: '{question.strip()}'"

	def _advance_time(self, hours: int) -> str:
	hours = max(0, hours)
	decay = min(0.2, hours / 240.0)
	self._p.mood *= (1.0 - decay)
	for k in list(self._p.interests.keys()):
	self._p.interests[k] *= (1.0 - decay / 5.0)
	return f"Advanced time by {hours}h."