Spaces:

fmsithaa
/

people_test_space01

Sleeping

App Files Files Community

people_test_space01 / server /persona_env_environment.py

gbenaa

persona_env OpenEnv Docker Space

cd7277c 3 months ago

raw

history blame contribute delete

5.69 kB

	from __future__ import annotations

	from dataclasses import dataclass, field
	from typing import Dict, Optional, Tuple
	from uuid import uuid4

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	from models import PersonaAction, PersonaObservation


	@dataclass
	class PersonaInternal:
	# = stable attributes (placeholders)
	star_sign: str = "Taurus"
	character_type: str = "Sanguine-melancholic"
	background: str = "Arts-adjacent, lower-middle class"

	# = evolving state
	mood: float = 0.1 # -> [-1, 1]
	interests: Dict[str, float] = field(default_factory=lambda: {
	"animal_welfare": 0.7,
	"interior_design": 0.5,
	"politics": 0.3,
	})

	# = tiny memory (for continuity)
	last_question: Optional[str] = None
	last_topic: Optional[str] = None

	def clamp(self) -> None:
	self.mood = max(-1.0, min(1.0, self.mood))
	for k, v in list(self.interests.items()):
	self.interests[k] = max(0.0, min(1.0, v))

	def bump_interest(self, topic: str, delta: float) -> None:
	self.interests[topic] = self.interests.get(topic, 0.2) + delta


	class PersonaEnvironment(Environment):
	def __init__(self):
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._p = PersonaInternal()

	def reset(self) -> PersonaObservation:
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._p = PersonaInternal()
	return PersonaObservation(
	reaction_text="Persona initialised.",
	mood=self._p.mood,
	interests=dict(self._p.interests),
	done=False,
	reward=0.0,
	)

	def step(self, action: PersonaAction) -> PersonaObservation:
	self._state.step_count += 1

	if action.kind == "show_content":
	reaction = self._apply_content(
	topic=action.topic or "unknown",
	source=action.source or "unknown",
	valence=action.valence or "neutral",
	)
	elif action.kind == "ask_question":
	reaction = self._answer_question(action.question or "")
	elif action.kind == "advance_time":
	reaction = self._advance_time(action.hours or 0)
	else:
	reaction = "Action rejected."

	self._p.clamp()

	return PersonaObservation(
	reaction_text=reaction,
	mood=self._p.mood,
	interests=dict(self._p.interests),
	done=False,
	reward=0.0,
	)

	@property
	def state(self) -> State:
	return self._state

	# = internal logic (simple, deterministic)

	def _apply_content(self, topic: str, source: str, valence: str) -> str:
	base = 0.02
	if valence == "positive":
	mood_delta = +0.05
	interest_delta = +base
	elif valence == "negative":
	mood_delta = -0.05
	interest_delta = +base / 2
	else:
	mood_delta = 0.0
	interest_delta = +base / 4

	if source in {"tabloid", "ragebait"}:
	mood_delta -= 0.03
	elif source in {"charity", "trusted"}:
	mood_delta += 0.02

	self._p.mood += mood_delta
	self._p.bump_interest(topic, interest_delta)
	self._p.last_topic = topic

	return f"Consumed {valence} content on {topic} from {source}. Mood {mood_delta:+.2f}."

	def _answer_question(self, question: str) -> str:
	q = question.strip()
	if not q:
	return "No reaction."

	self._p.last_question = q

	# = keyword -> topic mapping
	# -> This gives you a controllable, inspectable way to make questions influence state.
	topic, mood_delta = self._infer_topic_and_mood_from_question(q)

	if topic is not None:
	# -> Questions increase attention to a topic a little.
	self._p.bump_interest(topic, 0.015)
	self._p.last_topic = topic

	self._p.mood += mood_delta

	top_interest = max(self._p.interests.items(), key=lambda kv: kv[1])[0]

	# -> Mild continuity: reference last topic if available
	if self._p.last_topic:
	continuity = f" (recently thinking about {self._p.last_topic})"
	else:
	continuity = ""

	return f"Answers via {top_interest}{continuity}: '{q}'"

	def _infer_topic_and_mood_from_question(self, q: str) -> Tuple[Optional[str], float]:
	ql = q.lower()

	# -> Default: neutral mood change from being asked something
	mood_delta = 0.0

	# -> A few simple triggers
	if any(w in ql for w in ["ethical", "cruelty", "welfare", "rescue", "animal"]):
	return "animal_welfare", +0.01

	if any(w in ql for w in ["decor", "interior", "furniture", "colour", "paint", "design"]):
	return "interior_design", +0.01

	if any(w in ql for w in ["election", "immigration", "tax", "government", "policy", "minister", "party"]):
	# -> Politics questions tend to stress this persona slightly
	return "politics", -0.01

	return None, mood_delta

	def _advance_time(self, hours: int) -> str:
	hours = max(0, hours)
	decay = min(0.2, hours / 240.0)

	# -> mood drifts towards 0 with time
	self._p.mood *= (1.0 - decay)

	# -> interests slowly decay with time
	for k in list(self._p.interests.keys()):
	self._p.interests[k] *= (1.0 - decay / 5.0)

	# -> very light memory fade
	if hours >= 24:
	self._p.last_question = None

	return f"Advanced time by {hours}h."