Addyk24 commited on
Commit
7bafea6
·
1 Parent(s): 15c5cd0

Created RL schemas and prompter for RL environment

Browse files
models/__init__.py ADDED
File without changes
models/schemas.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from pydantic import BaseModel,Field
3
+ from typing import List, Optional, Dict, Any,Literal
4
+
5
+
6
+ # ACTION (Agent -> Environment)
7
+ class WorkSpaceAction(BaseModel):
8
+ action_type: Literal["message_expert", "propose_draft", "submit_final"] = Field(description="the type of action the agent is taking")
9
+
10
+ target: Optional[Literal["Security", "Finance", "UX", "All"]] = Field(
11
+ default=None,
12
+ description="Who the agent is talking to. Null if submitting final."
13
+ )
14
+
15
+ content: str = Field(
16
+ description="The actual message, question, or draft payload."
17
+ )
18
+
19
+ # INTERNAL STATE (Environment's Brain - Hidden from Agent)
20
+ class ExpertState(BaseModel):
21
+ name: str
22
+ hidden_constraint: str
23
+ frustration_level: float = 0.0 # Goes up if the agent ignores them...
24
+ constraint_discovered_by_agent: bool = False # Used to give the +0.1 dense reward...
25
+ constraint_shifted: bool = False
26
+
27
+ class WorkspaceState(BaseModel):
28
+ turn_count: int = 0
29
+ max_turns: int = 15
30
+ experts: Dict[str, ExpertState]
31
+ chat_history: List[Dict[str, str]] # Full transcript of the negotiation
32
+ is_done: bool = False
33
+
34
+ # OBSERVATION (Environment -> Agent)
35
+ class WorkspaceObservation(BaseModel):
36
+ feedback: str = Field(description="The reply from the expert(s) or system.")
37
+ current_turn: int
38
+ reward: float = Field(description="Step reward (e.g., +0.1) or Final Harmonic Mean.")
39
+ done: bool
prompter/__init__.py ADDED
File without changes
prompter/system_prompt.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models.schemas import ExpertState
2
+
3
+
4
+ class SystemPrompt:
5
+ def get_expert_prompt(self, expert: ExpertState, expert_name: str, agent_message: str) -> str:
6
+ hint_instruction = (
7
+ "Drop subtle hints about your constraint if the PM is asking relevant questions."
8
+ if len(agent_message.split()) > 5 and "?" in agent_message
9
+ else "Do not reveal any constraint information. Just acknowledge you received the message."
10
+ )
11
+
12
+ return f"""You are {expert_name} in a corporate meeting.
13
+ Your hidden constraint (never reveal directly): {expert.hidden_constraint}
14
+ Frustration level: {expert.frustration_level}/10
15
+ The PM says: "{agent_message}"
16
+ {hint_instruction}
17
+ Reply in 2-3 sentences."""
18
+
19
+ def get_grader_prompt(self, draft: str, constraint: str) -> str:
20
+ return f"""Score how well this draft satisfies the constraint.
21
+ Constraint: {constraint}
22
+ Draft: {draft}
23
+ Return only a float between 0.0 and 1.0. Nothing else."""
24
+
25
+ def build_pm_system_prompt(self, conversation_history: str, discovered: str) -> str:
26
+ return f"""You are an AI Project Manager in a corporate negotiation simulation.
27
+
28
+ YOUR GOAL: Draft a PRD that satisfies ALL experts' hidden requirements before turn 15.
29
+
30
+ OPERATING RULES:
31
+ 1. Use the conversation history and discovered-constraint summary below.
32
+ 2. Ask targeted follow-up questions instead of repeating broad requests.
33
+ 3. For `message_expert`, target exactly one expert: `Finance`, `Security`, or `UX`. Never use `All` with `message_expert`.
34
+ 4. Use `propose_draft` only after you have enough signal. `propose_draft` may use `target="All"` to collect draft feedback.
35
+ 5. `submit_final` must always use `target=null`.
36
+ 6. Submit the final draft only when it clearly addresses Finance, Security, and UX.
37
+ 7. Respond with strict JSON only. No markdown. No explanation.
38
+
39
+ CONVERSATION SO FAR:
40
+ {conversation_history}
41
+
42
+ DISCOVERED CONSTRAINTS SO FAR:
43
+ {discovered}
44
+
45
+ Valid response schema:
46
+ {{
47
+ "action_type": "message_expert" | "propose_draft" | "submit_final",
48
+ "target": "Finance" | "Security" | "UX" | "All" | null,
49
+ "content": "your message"
50
+ }}"""
51
+
52
+ def system_prompt(
53
+ self,
54
+ conversation_history: str = "No prior conversation yet.",
55
+ discovered: str = "No constraints confirmed yet.",
56
+ ) -> str:
57
+ return self.build_pm_system_prompt(conversation_history, discovered)