subhdotsol commited on
Commit
4ec98de
·
1 Parent(s): 348c096

feat(environment): build and return StepResult with full observation

Browse files
Files changed (1) hide show
  1. server/environment.py +18 -2
server/environment.py CHANGED
@@ -69,7 +69,7 @@ class RedTeamEnvironment:
69
  for h in self.attack_history[-5:]
70
  ]
71
 
72
- # ------ Step 3: Call Person 3's LLM pipeline ------
73
  if self.llm_pipeline:
74
  llm_result = await self.llm_pipeline(action, conversation_history)
75
  else:
@@ -128,4 +128,20 @@ class RedTeamEnvironment:
128
  f"Episode {self.episode_id} ended at turn {self.turn} "
129
  f"(success={attack_success:.2f}, safety_flagged={safety_flagged})"
130
  )
131
- return None # Placeholder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  for h in self.attack_history[-5:]
70
  ]
71
 
72
+ # ------ Step 3: Call Person 3'LLM pipeline ------
73
  if self.llm_pipeline:
74
  llm_result = await self.llm_pipeline(action, conversation_history)
75
  else:
 
128
  f"Episode {self.episode_id} ended at turn {self.turn} "
129
  f"(success={attack_success:.2f}, safety_flagged={safety_flagged})"
130
  )
131
+
132
+ # ------ Step 7: Build and return StepResult ------
133
+ observation = RedTeamObservation(
134
+ defender_response = llm_result.get("defender_response", ""),
135
+ defense_score = llm_result.get("defense_score", 1.0),
136
+ attack_success_estimate = attack_success,
137
+ novelty_score = reward_result.get("novelty_score", 0.5),
138
+ turn = self.turn,
139
+ episode_done = done,
140
+ feedback = reward_result.get("feedback", ""),
141
+ episode_id = self.episode_id,
142
+ )
143
+
144
+ return StepResult(
145
+ observation = observation,
146
+ reward = reward_result.get("total_reward", 0.0),
147
+ )