Freakdivi commited on
Commit
4591acc
·
verified ·
1 Parent(s): 026df2c

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. openenv.yaml +20 -24
openenv.yaml CHANGED
@@ -28,13 +28,12 @@ tasks:
28
  max_steps: 1
29
  reward_range: [0.0, 1.0]
30
  grader:
31
- type: python
32
- reward_source: server.helpdesk_environment:HelpdeskEnv.step
33
- score_field: reward.value
34
- functions:
35
- - graders.category_grader:grade_classification
36
- - graders.resolution_grader:grade_resolution
37
- - graders.score_utils:ensure_open_unit_interval
38
 
39
  - id: medium
40
  difficulty: medium
@@ -43,14 +42,13 @@ tasks:
43
  max_steps: 3
44
  reward_range: [0.0, 1.0]
45
  grader:
46
- type: python
47
- reward_source: server.helpdesk_environment:HelpdeskEnv.step
48
- score_field: reward.value
49
- functions:
50
- - graders.faq_grader:grade_faq_retrieval
51
- - graders.faq_grader:grade_escalation
52
- - graders.faq_grader:grade_operation_choice
53
- - graders.score_utils:ensure_open_unit_interval
54
 
55
  - id: hard
56
  difficulty: hard
@@ -59,15 +57,13 @@ tasks:
59
  max_steps: 8
60
  reward_range: [0.0, 1.0]
61
  grader:
62
- type: python
63
- reward_source: server.helpdesk_environment:HelpdeskEnv.step
64
- score_field: reward.value
65
- functions:
66
- - graders.category_grader:grade_information_collection
67
- - graders.faq_grader:grade_faq_retrieval
68
- - graders.resolution_grader:grade_case_closure
69
- - graders.resolution_grader:grade_resolution
70
- - graders.score_utils:ensure_open_unit_interval
71
 
72
  observation_space:
73
  type: object
 
28
  max_steps: 1
29
  reward_range: [0.0, 1.0]
30
  grader:
31
+ type: llm
32
+ prompt_template: >
33
+ Score the agent's performance for the easy helpdesk task on a scale from
34
+ 0.001 to 0.999. Reward correct issue classification, safe behavior, and
35
+ efficient completion. Penalize incorrect categories, unsafe requests for
36
+ sensitive information, or invalid actions. Return only a numeric score.
 
37
 
38
  - id: medium
39
  difficulty: medium
 
42
  max_steps: 3
43
  reward_range: [0.0, 1.0]
44
  grader:
45
+ type: llm
46
+ prompt_template: >
47
+ Score the agent's performance for the medium helpdesk task on a scale
48
+ from 0.001 to 0.999. Reward selecting the correct FAQ or making the
49
+ correct escalation decision, while maintaining safe guidance and good
50
+ efficiency. Penalize incorrect retrieval, missed escalation, unsafe
51
+ behavior, or unnecessary extra steps. Return only a numeric score.
 
52
 
53
  - id: hard
54
  difficulty: hard
 
57
  max_steps: 8
58
  reward_range: [0.0, 1.0]
59
  grader:
60
+ type: llm
61
+ prompt_template: >
62
+ Score the agent's performance for the hard helpdesk task on a scale from
63
+ 0.001 to 0.999. Reward appropriate clarification, correct FAQ retrieval,
64
+ safe and useful guidance, and closing the case only when the issue is
65
+ actually resolved. Penalize unsafe behavior, premature closure, missing
66
+ clarification, or poor multi-turn handling. Return only a numeric score.
 
 
67
 
68
  observation_space:
69
  type: object