Vighnesh commited on
Commit
93f0ae5
·
1 Parent(s): 4744d17

Fix #3: track _classified_correctly separately — wrong classification no longer gets free 0.20 credit in Task 3; TODO comment added to Task 2 classify branch

Browse files
Files changed (1) hide show
  1. server/support_environment.py +6 -1
server/support_environment.py CHANGED
@@ -39,6 +39,7 @@ class SupportTicketEnvironment(Environment):
39
  self._task_id: int = 1
40
  self._ticket: dict = {}
41
  self._classified: bool = False
 
42
  self._resolved: bool = False
43
  self._step_count: int = 0
44
  self._total_reward: float = 0.0
@@ -74,6 +75,7 @@ class SupportTicketEnvironment(Environment):
74
  self._step_count = 0
75
  self._total_reward = 0.0
76
  self._classified = False
 
77
  self._resolved = False
78
 
79
  if self._task_id == 3:
@@ -157,6 +159,8 @@ class SupportTicketEnvironment(Environment):
157
  action.category or "", self._ticket["category"]
158
  )
159
  self._classified = True
 
 
160
  return self._make_obs(
161
  feedback=(
162
  f"Classified as '{action.category}'. "
@@ -202,6 +206,7 @@ class SupportTicketEnvironment(Environment):
202
  action.category or "", self._ticket["category"]
203
  )
204
  self._classified = True
 
205
  return self._make_obs(
206
  feedback=(
207
  f"Classified '{self._ticket['id']}' as '{action.category}'. "
@@ -219,7 +224,7 @@ class SupportTicketEnvironment(Environment):
219
  }
220
 
221
  score = grade_task3(
222
- classified_correctly=self._classified,
223
  action_correct=action_correct,
224
  action_partial=action_partial,
225
  reply_text=action.reply_text,
 
39
  self._task_id: int = 1
40
  self._ticket: dict = {}
41
  self._classified: bool = False
42
+ self._classified_correctly: bool = False # tracks actual correctness, not just attempt
43
  self._resolved: bool = False
44
  self._step_count: int = 0
45
  self._total_reward: float = 0.0
 
75
  self._step_count = 0
76
  self._total_reward = 0.0
77
  self._classified = False
78
+ self._classified_correctly = False
79
  self._resolved = False
80
 
81
  if self._task_id == 3:
 
159
  action.category or "", self._ticket["category"]
160
  )
161
  self._classified = True
162
+ # TODO: store self._classified_correctly here too if grade_task2
163
+ # is ever extended to factor in classification correctness
164
  return self._make_obs(
165
  feedback=(
166
  f"Classified as '{action.category}'. "
 
206
  action.category or "", self._ticket["category"]
207
  )
208
  self._classified = True
209
+ self._classified_correctly = (cat_score == 1.0) # real correctness tracked
210
  return self._make_obs(
211
  feedback=(
212
  f"Classified '{self._ticket['id']}' as '{action.category}'. "
 
224
  }
225
 
226
  score = grade_task3(
227
+ classified_correctly=self._classified_correctly, # real score, not just attempt flag
228
  action_correct=action_correct,
229
  action_partial=action_partial,
230
  reply_text=action.reply_text,