seatyyy commited on
Commit
192c450
·
1 Parent(s): 53692f5
Files changed (2) hide show
  1. models.py +1 -1
  2. server/skill_forge_environment.py +10 -7
models.py CHANGED
@@ -29,7 +29,7 @@ class SkillForgeObservation(Observation):
29
  task_id: str
30
  task_description: str
31
  snapshot_data: str #df.head(5).to_string()
32
- skill_library: List[Dict]
33
  context: str
34
  result_correct: bool
35
  result_output: str
 
29
  task_id: str
30
  task_description: str
31
  snapshot_data: str #df.head(5).to_string()
32
+ skill_library: dict
33
  context: str
34
  result_correct: bool
35
  result_output: str
server/skill_forge_environment.py CHANGED
@@ -10,13 +10,16 @@ Skill Forge Environment Implementation.
10
  An RL training environment where LLM Agents evolve from "reinventing the wheel" to "building a skill library."
11
  """
12
 
 
13
  from uuid import uuid4
14
 
 
 
15
  from openenv.core.env_server.interfaces import Environment
16
  from openenv.core.env_server.types import State
17
 
18
  from models import SkillForgeAction, SkillForgeObservation
19
- from data_generator import TASKS
20
 
21
  class SkillForgeEnvironment(Environment):
22
  """
@@ -71,13 +74,13 @@ class SkillForgeEnvironment(Environment):
71
  task_id=task["id"],
72
  task_description=task["description"],
73
  snapshot_data=task["dataframe"].head(5).to_string(),
74
- skill_library=self.current_state.skill_library,
75
  context="",
76
  step_count=0,
77
  total_tokens=0,
78
  result_correct=False,
79
  result_output="",
80
- expected_output=task["expected_output"],
81
  )
82
 
83
  def step(self, action: SkillForgeAction) -> SkillForgeObservation:
@@ -91,7 +94,7 @@ class SkillForgeEnvironment(Environment):
91
  reward = 0.0
92
 
93
  if action.action_type == "create_skill":
94
- self.current_state.skill_library[action.skill_name] = {
95
  "template": action.content,
96
  "description": action.reasoning,
97
  "used_count": 0,
@@ -101,11 +104,11 @@ class SkillForgeEnvironment(Environment):
101
  result_output = f"Skill {action.skill_name} created"
102
  else:
103
  if action.action_type == "use_skill":
104
- skill = self.current_state.skill_library.get(action.content)
105
  # TODO: if action_type is use_skill while we don't have the skill yet, read reasoning to understand why the skill should be used and create the skill accordingly
106
  if skill:
107
  exec_code = skill["template"].format(**(action.params or {}))
108
- self.current_state.skill_library[action.content]["used_count"] += 1
109
  else: # TODO: log this scenario
110
  exec_code = None
111
  else:
@@ -135,7 +138,7 @@ class SkillForgeEnvironment(Environment):
135
  total_tokens=0, # TODO P0: update this with the total tokens used
136
  result_correct=result_correct,
137
  result_output=result_output,
138
- expected_output=next_task["expected_output"],
139
  )
140
 
141
  def _evaluate(self, exec_code, dataframe, expected_output):
 
10
  An RL training environment where LLM Agents evolve from "reinventing the wheel" to "building a skill library."
11
  """
12
 
13
+ import traceback
14
  from uuid import uuid4
15
 
16
+ import pandas as pd
17
+
18
  from openenv.core.env_server.interfaces import Environment
19
  from openenv.core.env_server.types import State
20
 
21
  from models import SkillForgeAction, SkillForgeObservation
22
+ from .data_generator import TASKS
23
 
24
  class SkillForgeEnvironment(Environment):
25
  """
 
74
  task_id=task["id"],
75
  task_description=task["description"],
76
  snapshot_data=task["dataframe"].head(5).to_string(),
77
+ skill_library=self.skill_library,
78
  context="",
79
  step_count=0,
80
  total_tokens=0,
81
  result_correct=False,
82
  result_output="",
83
+ expected_output=str(task["expected_output"]),
84
  )
85
 
86
  def step(self, action: SkillForgeAction) -> SkillForgeObservation:
 
94
  reward = 0.0
95
 
96
  if action.action_type == "create_skill":
97
+ self.skill_library[action.skill_name] = {
98
  "template": action.content,
99
  "description": action.reasoning,
100
  "used_count": 0,
 
104
  result_output = f"Skill {action.skill_name} created"
105
  else:
106
  if action.action_type == "use_skill":
107
+ skill = self.skill_library.get(action.content)
108
  # TODO: if action_type is use_skill while we don't have the skill yet, read reasoning to understand why the skill should be used and create the skill accordingly
109
  if skill:
110
  exec_code = skill["template"].format(**(action.params or {}))
111
+ self.skill_library[action.content]["used_count"] += 1
112
  else: # TODO: log this scenario
113
  exec_code = None
114
  else:
 
138
  total_tokens=0, # TODO P0: update this with the total tokens used
139
  result_correct=result_correct,
140
  result_output=result_output,
141
+ expected_output=str(next_task["expected_output"]),
142
  )
143
 
144
  def _evaluate(self, exec_code, dataframe, expected_output):