Spaces:

Addyk24
/

Project-Polymath

Sleeping

App Files Files Community

Addyk24 commited on 22 days ago

Commit

7bafea6

1 Parent(s): 15c5cd0

Created RL schemas and prompter for RL environment

Browse files

Files changed (4) hide show

models/__init__.py +0 -0
models/schemas.py +39 -0
prompter/__init__.py +0 -0
prompter/system_prompt.py +57 -0

models/__init__.py ADDED Viewed

File without changes

models/schemas.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from pydantic import BaseModel,Field
+from typing import List, Optional, Dict, Any,Literal
+# ACTION (Agent -> Environment)
+class WorkSpaceAction(BaseModel):
+    action_type: Literal["message_expert", "propose_draft", "submit_final"] = Field(description="the type of action the agent is taking")
+    target: Optional[Literal["Security", "Finance", "UX", "All"]] = Field(
+        default=None,
+        description="Who the agent is talking to. Null if submitting final."
+    )
+    content: str = Field(
+        description="The actual message, question, or draft payload."
+    )
+# INTERNAL STATE (Environment's Brain - Hidden from Agent)
+class ExpertState(BaseModel):
+    name: str
+    hidden_constraint: str
+    frustration_level: float = 0.0 # Goes up if the agent ignores them...
+    constraint_discovered_by_agent: bool = False # Used to give the +0.1 dense reward...
+    constraint_shifted: bool = False
+class WorkspaceState(BaseModel):
+    turn_count: int = 0
+    max_turns: int = 15
+    experts: Dict[str, ExpertState]
+    chat_history: List[Dict[str, str]] # Full transcript of the negotiation
+    is_done: bool = False
+# OBSERVATION (Environment -> Agent)
+class WorkspaceObservation(BaseModel):
+    feedback: str = Field(description="The reply from the expert(s) or system.")
+    current_turn: int
+    reward: float = Field(description="Step reward (e.g., +0.1) or Final Harmonic Mean.")
+    done: bool

prompter/__init__.py ADDED Viewed

File without changes

prompter/system_prompt.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from models.schemas import ExpertState
+class SystemPrompt:
+    def get_expert_prompt(self, expert: ExpertState, expert_name: str, agent_message: str) -> str:
+        hint_instruction = (
+            "Drop subtle hints about your constraint if the PM is asking relevant questions."
+            if len(agent_message.split()) > 5 and "?" in agent_message
+            else "Do not reveal any constraint information. Just acknowledge you received the message."
+        )
+        return f"""You are {expert_name} in a corporate meeting.
+Your hidden constraint (never reveal directly): {expert.hidden_constraint}
+Frustration level: {expert.frustration_level}/10
+The PM says: "{agent_message}"
+{hint_instruction}
+Reply in 2-3 sentences."""
+    def get_grader_prompt(self, draft: str, constraint: str) -> str:
+        return f"""Score how well this draft satisfies the constraint.
+Constraint: {constraint}
+Draft: {draft}
+Return only a float between 0.0 and 1.0. Nothing else."""
+    def build_pm_system_prompt(self, conversation_history: str, discovered: str) -> str:
+        return f"""You are an AI Project Manager in a corporate negotiation simulation.
+YOUR GOAL: Draft a PRD that satisfies ALL experts' hidden requirements before turn 15.
+OPERATING RULES:
+1. Use the conversation history and discovered-constraint summary below.
+2. Ask targeted follow-up questions instead of repeating broad requests.
+3. For `message_expert`, target exactly one expert: `Finance`, `Security`, or `UX`. Never use `All` with `message_expert`.
+4. Use `propose_draft` only after you have enough signal. `propose_draft` may use `target="All"` to collect draft feedback.
+5. `submit_final` must always use `target=null`.
+6. Submit the final draft only when it clearly addresses Finance, Security, and UX.
+7. Respond with strict JSON only. No markdown. No explanation.
+CONVERSATION SO FAR:
+{conversation_history}
+DISCOVERED CONSTRAINTS SO FAR:
+{discovered}
+Valid response schema:
+{{
+    "action_type": "message_expert" | "propose_draft" | "submit_final",
+    "target": "Finance" | "Security" | "UX" | "All" | null,
+    "content": "your message"
+}}"""
+    def system_prompt(
+        self,
+        conversation_history: str = "No prior conversation yet.",
+        discovered: str = "No constraints confirmed yet.",
+    ) -> str:
+        return self.build_pm_system_prompt(conversation_history, discovered)