Spaces:
Sleeping
Sleeping
Created RL schemas and prompter for RL environment
Browse files- models/__init__.py +0 -0
- models/schemas.py +39 -0
- prompter/__init__.py +0 -0
- prompter/system_prompt.py +57 -0
models/__init__.py
ADDED
|
File without changes
|
models/schemas.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from pydantic import BaseModel,Field
|
| 3 |
+
from typing import List, Optional, Dict, Any,Literal
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# ACTION (Agent -> Environment)
|
| 7 |
+
class WorkSpaceAction(BaseModel):
|
| 8 |
+
action_type: Literal["message_expert", "propose_draft", "submit_final"] = Field(description="the type of action the agent is taking")
|
| 9 |
+
|
| 10 |
+
target: Optional[Literal["Security", "Finance", "UX", "All"]] = Field(
|
| 11 |
+
default=None,
|
| 12 |
+
description="Who the agent is talking to. Null if submitting final."
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
content: str = Field(
|
| 16 |
+
description="The actual message, question, or draft payload."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# INTERNAL STATE (Environment's Brain - Hidden from Agent)
|
| 20 |
+
class ExpertState(BaseModel):
|
| 21 |
+
name: str
|
| 22 |
+
hidden_constraint: str
|
| 23 |
+
frustration_level: float = 0.0 # Goes up if the agent ignores them...
|
| 24 |
+
constraint_discovered_by_agent: bool = False # Used to give the +0.1 dense reward...
|
| 25 |
+
constraint_shifted: bool = False
|
| 26 |
+
|
| 27 |
+
class WorkspaceState(BaseModel):
|
| 28 |
+
turn_count: int = 0
|
| 29 |
+
max_turns: int = 15
|
| 30 |
+
experts: Dict[str, ExpertState]
|
| 31 |
+
chat_history: List[Dict[str, str]] # Full transcript of the negotiation
|
| 32 |
+
is_done: bool = False
|
| 33 |
+
|
| 34 |
+
# OBSERVATION (Environment -> Agent)
|
| 35 |
+
class WorkspaceObservation(BaseModel):
|
| 36 |
+
feedback: str = Field(description="The reply from the expert(s) or system.")
|
| 37 |
+
current_turn: int
|
| 38 |
+
reward: float = Field(description="Step reward (e.g., +0.1) or Final Harmonic Mean.")
|
| 39 |
+
done: bool
|
prompter/__init__.py
ADDED
|
File without changes
|
prompter/system_prompt.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from models.schemas import ExpertState
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class SystemPrompt:
|
| 5 |
+
def get_expert_prompt(self, expert: ExpertState, expert_name: str, agent_message: str) -> str:
|
| 6 |
+
hint_instruction = (
|
| 7 |
+
"Drop subtle hints about your constraint if the PM is asking relevant questions."
|
| 8 |
+
if len(agent_message.split()) > 5 and "?" in agent_message
|
| 9 |
+
else "Do not reveal any constraint information. Just acknowledge you received the message."
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
return f"""You are {expert_name} in a corporate meeting.
|
| 13 |
+
Your hidden constraint (never reveal directly): {expert.hidden_constraint}
|
| 14 |
+
Frustration level: {expert.frustration_level}/10
|
| 15 |
+
The PM says: "{agent_message}"
|
| 16 |
+
{hint_instruction}
|
| 17 |
+
Reply in 2-3 sentences."""
|
| 18 |
+
|
| 19 |
+
def get_grader_prompt(self, draft: str, constraint: str) -> str:
|
| 20 |
+
return f"""Score how well this draft satisfies the constraint.
|
| 21 |
+
Constraint: {constraint}
|
| 22 |
+
Draft: {draft}
|
| 23 |
+
Return only a float between 0.0 and 1.0. Nothing else."""
|
| 24 |
+
|
| 25 |
+
def build_pm_system_prompt(self, conversation_history: str, discovered: str) -> str:
|
| 26 |
+
return f"""You are an AI Project Manager in a corporate negotiation simulation.
|
| 27 |
+
|
| 28 |
+
YOUR GOAL: Draft a PRD that satisfies ALL experts' hidden requirements before turn 15.
|
| 29 |
+
|
| 30 |
+
OPERATING RULES:
|
| 31 |
+
1. Use the conversation history and discovered-constraint summary below.
|
| 32 |
+
2. Ask targeted follow-up questions instead of repeating broad requests.
|
| 33 |
+
3. For `message_expert`, target exactly one expert: `Finance`, `Security`, or `UX`. Never use `All` with `message_expert`.
|
| 34 |
+
4. Use `propose_draft` only after you have enough signal. `propose_draft` may use `target="All"` to collect draft feedback.
|
| 35 |
+
5. `submit_final` must always use `target=null`.
|
| 36 |
+
6. Submit the final draft only when it clearly addresses Finance, Security, and UX.
|
| 37 |
+
7. Respond with strict JSON only. No markdown. No explanation.
|
| 38 |
+
|
| 39 |
+
CONVERSATION SO FAR:
|
| 40 |
+
{conversation_history}
|
| 41 |
+
|
| 42 |
+
DISCOVERED CONSTRAINTS SO FAR:
|
| 43 |
+
{discovered}
|
| 44 |
+
|
| 45 |
+
Valid response schema:
|
| 46 |
+
{{
|
| 47 |
+
"action_type": "message_expert" | "propose_draft" | "submit_final",
|
| 48 |
+
"target": "Finance" | "Security" | "UX" | "All" | null,
|
| 49 |
+
"content": "your message"
|
| 50 |
+
}}"""
|
| 51 |
+
|
| 52 |
+
def system_prompt(
|
| 53 |
+
self,
|
| 54 |
+
conversation_history: str = "No prior conversation yet.",
|
| 55 |
+
discovered: str = "No constraints confirmed yet.",
|
| 56 |
+
) -> str:
|
| 57 |
+
return self.build_pm_system_prompt(conversation_history, discovered)
|