Spaces:

Addyk24
/

Project-Polymath

Sleeping

Created RL schemas and prompter for RL environment

7bafea6 22 days ago

1.42 kB


	from pydantic import BaseModel,Field
	from typing import List, Optional, Dict, Any,Literal


	# ACTION (Agent -> Environment)
	class WorkSpaceAction(BaseModel):
	action_type: Literal["message_expert", "propose_draft", "submit_final"] = Field(description="the type of action the agent is taking")

	target: Optional[Literal["Security", "Finance", "UX", "All"]] = Field(
	default=None,
	description="Who the agent is talking to. Null if submitting final."
	)

	content: str = Field(
	description="The actual message, question, or draft payload."
	)

	# INTERNAL STATE (Environment's Brain - Hidden from Agent)
	class ExpertState(BaseModel):
	name: str
	hidden_constraint: str
	frustration_level: float = 0.0 # Goes up if the agent ignores them...
	constraint_discovered_by_agent: bool = False # Used to give the +0.1 dense reward...
	constraint_shifted: bool = False

	class WorkspaceState(BaseModel):
	turn_count: int = 0
	max_turns: int = 15
	experts: Dict[str, ExpertState]
	chat_history: List[Dict[str, str]] # Full transcript of the negotiation
	is_done: bool = False

	# OBSERVATION (Environment -> Agent)
	class WorkspaceObservation(BaseModel):
	feedback: str = Field(description="The reply from the expert(s) or system.")
	current_turn: int
	reward: float = Field(description="Step reward (e.g., +0.1) or Final Harmonic Mean.")
	done: bool