Spaces:
Sleeping
Sleeping
Vighnesh commited on
Commit ·
93f0ae5
1
Parent(s): 4744d17
Fix #3: track _classified_correctly separately — wrong classification no longer gets free 0.20 credit in Task 3; TODO comment added to Task 2 classify branch
Browse files
server/support_environment.py
CHANGED
|
@@ -39,6 +39,7 @@ class SupportTicketEnvironment(Environment):
|
|
| 39 |
self._task_id: int = 1
|
| 40 |
self._ticket: dict = {}
|
| 41 |
self._classified: bool = False
|
|
|
|
| 42 |
self._resolved: bool = False
|
| 43 |
self._step_count: int = 0
|
| 44 |
self._total_reward: float = 0.0
|
|
@@ -74,6 +75,7 @@ class SupportTicketEnvironment(Environment):
|
|
| 74 |
self._step_count = 0
|
| 75 |
self._total_reward = 0.0
|
| 76 |
self._classified = False
|
|
|
|
| 77 |
self._resolved = False
|
| 78 |
|
| 79 |
if self._task_id == 3:
|
|
@@ -157,6 +159,8 @@ class SupportTicketEnvironment(Environment):
|
|
| 157 |
action.category or "", self._ticket["category"]
|
| 158 |
)
|
| 159 |
self._classified = True
|
|
|
|
|
|
|
| 160 |
return self._make_obs(
|
| 161 |
feedback=(
|
| 162 |
f"Classified as '{action.category}'. "
|
|
@@ -202,6 +206,7 @@ class SupportTicketEnvironment(Environment):
|
|
| 202 |
action.category or "", self._ticket["category"]
|
| 203 |
)
|
| 204 |
self._classified = True
|
|
|
|
| 205 |
return self._make_obs(
|
| 206 |
feedback=(
|
| 207 |
f"Classified '{self._ticket['id']}' as '{action.category}'. "
|
|
@@ -219,7 +224,7 @@ class SupportTicketEnvironment(Environment):
|
|
| 219 |
}
|
| 220 |
|
| 221 |
score = grade_task3(
|
| 222 |
-
classified_correctly=self.
|
| 223 |
action_correct=action_correct,
|
| 224 |
action_partial=action_partial,
|
| 225 |
reply_text=action.reply_text,
|
|
|
|
| 39 |
self._task_id: int = 1
|
| 40 |
self._ticket: dict = {}
|
| 41 |
self._classified: bool = False
|
| 42 |
+
self._classified_correctly: bool = False # tracks actual correctness, not just attempt
|
| 43 |
self._resolved: bool = False
|
| 44 |
self._step_count: int = 0
|
| 45 |
self._total_reward: float = 0.0
|
|
|
|
| 75 |
self._step_count = 0
|
| 76 |
self._total_reward = 0.0
|
| 77 |
self._classified = False
|
| 78 |
+
self._classified_correctly = False
|
| 79 |
self._resolved = False
|
| 80 |
|
| 81 |
if self._task_id == 3:
|
|
|
|
| 159 |
action.category or "", self._ticket["category"]
|
| 160 |
)
|
| 161 |
self._classified = True
|
| 162 |
+
# TODO: store self._classified_correctly here too if grade_task2
|
| 163 |
+
# is ever extended to factor in classification correctness
|
| 164 |
return self._make_obs(
|
| 165 |
feedback=(
|
| 166 |
f"Classified as '{action.category}'. "
|
|
|
|
| 206 |
action.category or "", self._ticket["category"]
|
| 207 |
)
|
| 208 |
self._classified = True
|
| 209 |
+
self._classified_correctly = (cat_score == 1.0) # real correctness tracked
|
| 210 |
return self._make_obs(
|
| 211 |
feedback=(
|
| 212 |
f"Classified '{self._ticket['id']}' as '{action.category}'. "
|
|
|
|
| 224 |
}
|
| 225 |
|
| 226 |
score = grade_task3(
|
| 227 |
+
classified_correctly=self._classified_correctly, # real score, not just attempt flag
|
| 228 |
action_correct=action_correct,
|
| 229 |
action_partial=action_partial,
|
| 230 |
reply_text=action.reply_text,
|