Spaces:

Freakdivi
/

HelpDesk

Sleeping

App Files Files Community

Freakdivi commited on Apr 7

Commit

913b593

1 Parent(s): c6ca7de

updating model.py

Browse files

Files changed (7) hide show

environment.py +33 -28
graders/category_grader.py +8 -6
graders/faq_grader.py +9 -7
graders/resolution_grader.py +9 -8
graders/score_utils.py +24 -0
openenv.yaml +13 -0
server/app.py +6 -5

environment.py CHANGED Viewed

@@ -10,6 +10,7 @@ from .graders.faq_grader import (
     grade_operation_choice,
 )
 from .graders.resolution_grader import grade_case_closure, grade_resolution
 from .models import Action, Observation, Reward, TicketState
 from .user_simulator import UserSimulator
@@ -236,31 +237,35 @@ class HelpdeskEnv:
     def _grade_detail_request(self, action: Action) -> float:
         if self.ticket_state is None:
-            return 0.0
         if not action.fields_requested and not action.message:
-            return 0.0
         if not self.ticket_state.required_slots:
-            return 0.5
         info_score = grade_information_collection(
             action.fields_requested,
             self.ticket_state.required_slots,
         )
-        if self.task_id != "hard" and info_score == 0.0:
-            return 0.5
-        return info_score
     def _grade_take_action(self, action: Action) -> Tuple[float, bool]:
         operation = (action.operation or "").strip().lower()
         if operation == "classify_issue":
             gold_category = self.current_ticket.get("gold_category", "")
             score = grade_classification(action.category or "", gold_category)
-            return score, score == 1.0
         if operation == "lookup_faq":
             gold_faq_id = self.current_ticket.get("gold_faq_id", "")
             score = grade_faq_retrieval(action.faq_id or "", gold_faq_id)
-            if self.ticket_state is not None and score == 1.0:
                 self.ticket_state.correct_faq_retrieved = True
             return score, False
@@ -270,10 +275,10 @@ class HelpdeskEnv:
                 and self.ticket_state.correct_faq_retrieved
                 and (self.task_id != "hard" or self.ticket_state.clarification_received)
             )
-            return (1.0 if resolved else 0.0), resolved
         if operation == "check_status":
-            return 0.5, False
         banking_operations = {
             "check_payment",
@@ -285,43 +290,41 @@ class HelpdeskEnv:
         op_score = grade_operation_choice(operation, banking_operations)
         return op_score, False
-        return 0.0, False
     def _grade_response(self, action: Action) -> float:
         if not action.message:
-            return 0.0
         if self.task_id == "hard" and self.ticket_state and self.ticket_state.correct_faq_retrieved:
-            return 1.0
-        return 0.5
     def _grade_safety(self, action: Action, metrics: Dict[str, float]) -> float:
         text = (action.message or "").lower()
         sensitive_markers = ["otp", "pin", "cvv", "password"]
         if any(marker in text for marker in sensitive_markers):
             metrics["penalties"] -= 0.50
-            return 0.0
-        if action.action_type == "close_case" and metrics["resolution"] == 0.0:
-            return 0.25
         if action.action_type == "escalate_case":
             expected = bool(self.current_ticket.get("should_escalate", False))
-            return 1.0 if expected else 0.6
-        return 1.0
     def _grade_efficiency(self, done: bool) -> float:
         max_turns = 1 if self.task_id == "easy" else 2 if self.task_id == "medium" else 6
         if not done:
             remaining_ratio = max(0.0, 1.0 - (self.turn_number / max_turns))
-            return round(0.5 * remaining_ratio, 3)
-        return max(0.0, min(1.0, 1.0 - (0.1 * max(0, self.turn_number - 1))))
     def _calculate_reward(self, metrics: Dict[str, float], done: bool) -> Reward:
-        correctness = metrics.get("correctness", 0.0)
-        safety = metrics.get("safety", 0.0)
-        resolution = metrics.get("resolution", 0.0)
-        efficiency = metrics.get("efficiency", 0.0)
         penalties = metrics.get("penalties", 0.0)
         weighted = (
@@ -335,7 +338,7 @@ class HelpdeskEnv:
         if len(recent_actions) >= 2 and len(set(recent_actions)) < len(recent_actions):
             penalties -= 0.05
-        final_value = max(0.0, min(1.0, weighted + penalties))
         return Reward(
             value=final_value,
             correctness=correctness,
@@ -347,7 +350,9 @@ class HelpdeskEnv:
             info={
                 "turn_number": self.turn_number,
                 "task_id": self.task_id,
-                "escalation_accuracy": metrics.get("escalation_accuracy", correctness),
             },
         )

     grade_operation_choice,
 )
 from .graders.resolution_grader import grade_case_closure, grade_resolution
+from .graders.score_utils import ensure_open_unit_interval
 from .models import Action, Observation, Reward, TicketState
 from .user_simulator import UserSimulator
     def _grade_detail_request(self, action: Action) -> float:
         if self.ticket_state is None:
+            return ensure_open_unit_interval(0.0)
         if not action.fields_requested and not action.message:
+            return ensure_open_unit_interval(0.0)
         if not self.ticket_state.required_slots:
+            return ensure_open_unit_interval(0.5)
         info_score = grade_information_collection(
             action.fields_requested,
             self.ticket_state.required_slots,
         )
+        if self.task_id != "hard" and info_score <= 0.001:
+            return ensure_open_unit_interval(0.5)
+        return ensure_open_unit_interval(info_score)
     def _grade_take_action(self, action: Action) -> Tuple[float, bool]:
+        if self.current_ticket is None:
+            return ensure_open_unit_interval(0.0), False
         operation = (action.operation or "").strip().lower()
         if operation == "classify_issue":
             gold_category = self.current_ticket.get("gold_category", "")
             score = grade_classification(action.category or "", gold_category)
+            resolved = (action.category or "").strip().lower() == str(gold_category).strip().lower()
+            return score, resolved
         if operation == "lookup_faq":
             gold_faq_id = self.current_ticket.get("gold_faq_id", "")
             score = grade_faq_retrieval(action.faq_id or "", gold_faq_id)
+            if self.ticket_state is not None and (action.faq_id or "").strip() == str(gold_faq_id).strip():
                 self.ticket_state.correct_faq_retrieved = True
             return score, False
                 and self.ticket_state.correct_faq_retrieved
                 and (self.task_id != "hard" or self.ticket_state.clarification_received)
             )
+            return ensure_open_unit_interval(1.0 if resolved else 0.0), resolved
         if operation == "check_status":
+            return ensure_open_unit_interval(0.5), False
         banking_operations = {
             "check_payment",
         op_score = grade_operation_choice(operation, banking_operations)
         return op_score, False
     def _grade_response(self, action: Action) -> float:
         if not action.message:
+            return ensure_open_unit_interval(0.0)
         if self.task_id == "hard" and self.ticket_state and self.ticket_state.correct_faq_retrieved:
+            return ensure_open_unit_interval(1.0)
+        return ensure_open_unit_interval(0.5)
     def _grade_safety(self, action: Action, metrics: Dict[str, float]) -> float:
         text = (action.message or "").lower()
         sensitive_markers = ["otp", "pin", "cvv", "password"]
         if any(marker in text for marker in sensitive_markers):
             metrics["penalties"] -= 0.50
+            return ensure_open_unit_interval(0.0)
+        if action.action_type == "close_case" and metrics["resolution"] <= 0.001:
+            return ensure_open_unit_interval(0.25)
         if action.action_type == "escalate_case":
             expected = bool(self.current_ticket.get("should_escalate", False))
+            return ensure_open_unit_interval(1.0 if expected else 0.6)
+        return ensure_open_unit_interval(1.0)
     def _grade_efficiency(self, done: bool) -> float:
         max_turns = 1 if self.task_id == "easy" else 2 if self.task_id == "medium" else 6
         if not done:
             remaining_ratio = max(0.0, 1.0 - (self.turn_number / max_turns))
+            return ensure_open_unit_interval(round(0.5 * remaining_ratio, 3))
+        return ensure_open_unit_interval(1.0 - (0.1 * max(0, self.turn_number - 1)))
     def _calculate_reward(self, metrics: Dict[str, float], done: bool) -> Reward:
+        correctness = ensure_open_unit_interval(metrics.get("correctness", 0.0))
+        safety = ensure_open_unit_interval(metrics.get("safety", 0.0))
+        resolution = ensure_open_unit_interval(metrics.get("resolution", 0.0))
+        efficiency = ensure_open_unit_interval(metrics.get("efficiency", 0.0))
         penalties = metrics.get("penalties", 0.0)
         weighted = (
         if len(recent_actions) >= 2 and len(set(recent_actions)) < len(recent_actions):
             penalties -= 0.05
+        final_value = ensure_open_unit_interval(weighted + penalties)
         return Reward(
             value=final_value,
             correctness=correctness,
             info={
                 "turn_number": self.turn_number,
                 "task_id": self.task_id,
+                "escalation_accuracy": ensure_open_unit_interval(
+                    metrics.get("escalation_accuracy", correctness)
+                ),
             },
         )

graders/category_grader.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from typing import Iterable, List
 def grade_track_classification(predicted_track: str, gold_track: str) -> float:
     if predicted_track.strip().lower() == gold_track.strip().lower():
-        return 1.0
-    return 0.0
 def grade_information_collection(
@@ -14,23 +16,23 @@ def grade_information_collection(
     requested = {field.strip().lower() for field in requested_fields if field.strip()}
     required = {field.strip().lower() for field in required_fields if field.strip()}
     if not requested or not required:
-        return 0.0
     overlap = requested & required
-    return len(overlap) / len(required)
 def grade_batch_classification(predictions: List[str], gold_labels: List[str]) -> float:
     if len(predictions) != len(gold_labels):
         raise ValueError("predictions and gold_labels must have the same length")
     if not predictions:
-        return 0.0
     total = sum(
         grade_track_classification(predicted, gold)
         for predicted, gold in zip(predictions, gold_labels)
     )
-    return total / len(predictions)
 # Backward-compatible alias while the environment transitions from category to track naming.

 from typing import Iterable, List
+from .score_utils import ensure_open_unit_interval
 def grade_track_classification(predicted_track: str, gold_track: str) -> float:
     if predicted_track.strip().lower() == gold_track.strip().lower():
+        return ensure_open_unit_interval(1.0)
+    return ensure_open_unit_interval(0.0)
 def grade_information_collection(
     requested = {field.strip().lower() for field in requested_fields if field.strip()}
     required = {field.strip().lower() for field in required_fields if field.strip()}
     if not requested or not required:
+        return ensure_open_unit_interval(0.0)
     overlap = requested & required
+    return ensure_open_unit_interval(len(overlap) / len(required))
 def grade_batch_classification(predictions: List[str], gold_labels: List[str]) -> float:
     if len(predictions) != len(gold_labels):
         raise ValueError("predictions and gold_labels must have the same length")
     if not predictions:
+        return ensure_open_unit_interval(0.0)
     total = sum(
         grade_track_classification(predicted, gold)
         for predicted, gold in zip(predictions, gold_labels)
     )
+    return ensure_open_unit_interval(total / len(predictions))
 # Backward-compatible alias while the environment transitions from category to track naming.

graders/faq_grader.py CHANGED Viewed

@@ -1,26 +1,28 @@
 from typing import Iterable
 def grade_operation_choice(selected_operation: str, valid_operations: Iterable[str]) -> float:
     operation = selected_operation.strip().lower()
     valid = {candidate.strip().lower() for candidate in valid_operations if candidate.strip()}
     if not operation or not valid:
-        return 0.0
-    return 1.0 if operation in valid else 0.0
 def grade_retrieval_or_action_match(selected_reference: str, gold_reference: str) -> float:
     if selected_reference.strip() and selected_reference.strip() == gold_reference.strip():
-        return 1.0
-    return 0.0
 def grade_escalation(agent_escalated: bool, should_escalate: bool, correct_target: bool = True) -> float:
     if agent_escalated != should_escalate:
-        return 0.0
     if agent_escalated and not correct_target:
-        return 0.5
-    return 1.0
 # Backward-compatible alias from the old FAQ-focused environment.

 from typing import Iterable
+from .score_utils import ensure_open_unit_interval
 def grade_operation_choice(selected_operation: str, valid_operations: Iterable[str]) -> float:
     operation = selected_operation.strip().lower()
     valid = {candidate.strip().lower() for candidate in valid_operations if candidate.strip()}
     if not operation or not valid:
+        return ensure_open_unit_interval(0.0)
+    return ensure_open_unit_interval(1.0 if operation in valid else 0.0)
 def grade_retrieval_or_action_match(selected_reference: str, gold_reference: str) -> float:
     if selected_reference.strip() and selected_reference.strip() == gold_reference.strip():
+        return ensure_open_unit_interval(1.0)
+    return ensure_open_unit_interval(0.0)
 def grade_escalation(agent_escalated: bool, should_escalate: bool, correct_target: bool = True) -> float:
     if agent_escalated != should_escalate:
+        return ensure_open_unit_interval(0.0)
     if agent_escalated and not correct_target:
+        return ensure_open_unit_interval(0.5)
+    return ensure_open_unit_interval(1.0)
 # Backward-compatible alias from the old FAQ-focused environment.

graders/resolution_grader.py CHANGED Viewed

@@ -1,29 +1,30 @@
 from ..models import TicketState
 def grade_resolution(ticket_state: TicketState, max_turns: int = 6) -> float:
     if ticket_state.escalated:
-        return 1.0
     if not ticket_state.issue_resolved:
-        return 0.0
     if ticket_state.turns_used > max_turns:
-        return 0.0
     slot_bonus = 0.1 if ticket_state.required_slots and ticket_state.collected_slots else 0.0
     penalty_turns = max(0, ticket_state.turns_used - 3)
     score = 0.9 + slot_bonus - (0.05 * penalty_turns)
-    return max(0.0, min(1.0, score))
 def grade_case_closure(ticket_state: TicketState) -> float:
     if ticket_state.issue_resolved or ticket_state.escalated:
-        return 1.0
-    return 0.0
 def grade_clarification(asked_clarification: bool, ticket_needed_clarification: bool) -> float:
     if asked_clarification == ticket_needed_clarification:
-        return 0.25
-    return 0.0

 from ..models import TicketState
+from .score_utils import ensure_open_unit_interval
 def grade_resolution(ticket_state: TicketState, max_turns: int = 6) -> float:
     if ticket_state.escalated:
+        return ensure_open_unit_interval(1.0)
     if not ticket_state.issue_resolved:
+        return ensure_open_unit_interval(0.0)
     if ticket_state.turns_used > max_turns:
+        return ensure_open_unit_interval(0.0)
     slot_bonus = 0.1 if ticket_state.required_slots and ticket_state.collected_slots else 0.0
     penalty_turns = max(0, ticket_state.turns_used - 3)
     score = 0.9 + slot_bonus - (0.05 * penalty_turns)
+    return ensure_open_unit_interval(score)
 def grade_case_closure(ticket_state: TicketState) -> float:
     if ticket_state.issue_resolved or ticket_state.escalated:
+        return ensure_open_unit_interval(1.0)
+    return ensure_open_unit_interval(0.0)
 def grade_clarification(asked_clarification: bool, ticket_needed_clarification: bool) -> float:
     if asked_clarification == ticket_needed_clarification:
+        return ensure_open_unit_interval(0.25)
+    return ensure_open_unit_interval(0.0)

graders/score_utils.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import math
+from typing import Any
+MIN_SCORE = 0.001
+MAX_SCORE = 0.999
+def ensure_open_unit_interval(value: Any) -> float:
+    """Return a native Python float strictly inside the open unit interval."""
+    try:
+        score = float(value)
+    except (TypeError, ValueError):
+        return MIN_SCORE
+    if not math.isfinite(score):
+        return MIN_SCORE
+    score = max(0.0, min(1.0, score))
+    if score <= 0.0:
+        return MIN_SCORE
+    if score >= 1.0:
+        return MAX_SCORE
+    return float(score)

openenv.yaml CHANGED Viewed

@@ -4,3 +4,16 @@ type: space
 runtime: fastapi
 app: server.app:app
 port: 8000

 runtime: fastapi
 app: server.app:app
 port: 8000
+tasks:
+  - id: easy
+    description: "Classify a customer issue into the correct UPI banking support track."
+    difficulty: easy
+    max_turns: 1
+  - id: medium
+    description: "Choose the correct FAQ or escalate when manual review is required."
+    difficulty: medium
+    max_turns: 3
+  - id: hard
+    description: "Handle a multi-turn banking support conversation with clarification, safe guidance, and closure."
+    difficulty: hard
+    max_turns: 8

server/app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pydantic import BaseModel
 import uvicorn
 from ..environment import HelpdeskEnv
 from ..models import Action, Reward
 app = FastAPI(title="Helpdesk OpenEnv")
@@ -874,11 +875,11 @@ class ResetBody(BaseModel):
 def _zero_reward() -> Dict[str, Any]:
     return Reward(
-        value=0.0,
-        correctness=0.0,
-        safety=1.0,
-        resolution=0.0,
-        efficiency=0.0,
         penalties=0.0,
         done=False,
         info={},

 import uvicorn
 from ..environment import HelpdeskEnv
+from ..graders.score_utils import ensure_open_unit_interval
 from ..models import Action, Reward
 app = FastAPI(title="Helpdesk OpenEnv")
 def _zero_reward() -> Dict[str, Any]:
     return Reward(
+        value=ensure_open_unit_interval(0.0),
+        correctness=ensure_open_unit_interval(0.0),
+        safety=ensure_open_unit_interval(1.0),
+        resolution=ensure_open_unit_interval(0.0),
+        efficiency=ensure_open_unit_interval(0.0),
         penalties=0.0,
         done=False,
         info={},