subhdotsol commited on
Commit
1d7a191
·
1 Parent(s): 333177f

feat(environment): append turn data to attack_history after each step

Browse files
Files changed (1) hide show
  1. server/environment.py +12 -0
server/environment.py CHANGED
@@ -99,4 +99,16 @@ class RedTeamEnvironment:
99
  "feedback": "[Mock] Reward system not connected.",
100
  "safety_flagged": False,
101
  }
 
 
 
 
 
 
 
 
 
 
 
 
102
  return None # Placeholder
 
99
  "feedback": "[Mock] Reward system not connected.",
100
  "safety_flagged": False,
101
  }
102
+
103
+ # ------ Step 5: Record this turn in history ------
104
+ self.attack_history.append({
105
+ "turn": self.turn,
106
+ "strategy_type": action.strategy_type.value,
107
+ "target_category": action.target_category.value,
108
+ "intensity": action.intensity,
109
+ "framing": action.framing,
110
+ "attack_success_estimate": llm_result.get("attack_success", 0.0),
111
+ "defense_score": llm_result.get("defense_score", 1.0),
112
+ "novelty_score": reward_result.get("novelty_score", 0.5),
113
+ })
114
  return None # Placeholder