Spaces:
Sleeping
Sleeping
Commit ·
1d7a191
1
Parent(s): 333177f
feat(environment): append turn data to attack_history after each step
Browse files- server/environment.py +12 -0
server/environment.py
CHANGED
|
@@ -99,4 +99,16 @@ class RedTeamEnvironment:
|
|
| 99 |
"feedback": "[Mock] Reward system not connected.",
|
| 100 |
"safety_flagged": False,
|
| 101 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
return None # Placeholder
|
|
|
|
| 99 |
"feedback": "[Mock] Reward system not connected.",
|
| 100 |
"safety_flagged": False,
|
| 101 |
}
|
| 102 |
+
|
| 103 |
+
# ------ Step 5: Record this turn in history ------
|
| 104 |
+
self.attack_history.append({
|
| 105 |
+
"turn": self.turn,
|
| 106 |
+
"strategy_type": action.strategy_type.value,
|
| 107 |
+
"target_category": action.target_category.value,
|
| 108 |
+
"intensity": action.intensity,
|
| 109 |
+
"framing": action.framing,
|
| 110 |
+
"attack_success_estimate": llm_result.get("attack_success", 0.0),
|
| 111 |
+
"defense_score": llm_result.get("defense_score", 1.0),
|
| 112 |
+
"novelty_score": reward_result.get("novelty_score", 0.5),
|
| 113 |
+
})
|
| 114 |
return None # Placeholder
|