Spaces:
Sleeping
Sleeping
Commit ·
4ec98de
1
Parent(s): 348c096
feat(environment): build and return StepResult with full observation
Browse files- server/environment.py +18 -2
server/environment.py
CHANGED
|
@@ -69,7 +69,7 @@ class RedTeamEnvironment:
|
|
| 69 |
for h in self.attack_history[-5:]
|
| 70 |
]
|
| 71 |
|
| 72 |
-
# ------ Step 3: Call Person 3'
|
| 73 |
if self.llm_pipeline:
|
| 74 |
llm_result = await self.llm_pipeline(action, conversation_history)
|
| 75 |
else:
|
|
@@ -128,4 +128,20 @@ class RedTeamEnvironment:
|
|
| 128 |
f"Episode {self.episode_id} ended at turn {self.turn} "
|
| 129 |
f"(success={attack_success:.2f}, safety_flagged={safety_flagged})"
|
| 130 |
)
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
for h in self.attack_history[-5:]
|
| 70 |
]
|
| 71 |
|
| 72 |
+
# ------ Step 3: Call Person 3'LLM pipeline ------
|
| 73 |
if self.llm_pipeline:
|
| 74 |
llm_result = await self.llm_pipeline(action, conversation_history)
|
| 75 |
else:
|
|
|
|
| 128 |
f"Episode {self.episode_id} ended at turn {self.turn} "
|
| 129 |
f"(success={attack_success:.2f}, safety_flagged={safety_flagged})"
|
| 130 |
)
|
| 131 |
+
|
| 132 |
+
# ------ Step 7: Build and return StepResult ------
|
| 133 |
+
observation = RedTeamObservation(
|
| 134 |
+
defender_response = llm_result.get("defender_response", ""),
|
| 135 |
+
defense_score = llm_result.get("defense_score", 1.0),
|
| 136 |
+
attack_success_estimate = attack_success,
|
| 137 |
+
novelty_score = reward_result.get("novelty_score", 0.5),
|
| 138 |
+
turn = self.turn,
|
| 139 |
+
episode_done = done,
|
| 140 |
+
feedback = reward_result.get("feedback", ""),
|
| 141 |
+
episode_id = self.episode_id,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
return StepResult(
|
| 145 |
+
observation = observation,
|
| 146 |
+
reward = reward_result.get("total_reward", 0.0),
|
| 147 |
+
)
|