Spaces:
Sleeping
Sleeping
seatyyy commited on
Commit ·
192c450
1
Parent(s): 53692f5
fixed
Browse files- models.py +1 -1
- server/skill_forge_environment.py +10 -7
models.py
CHANGED
|
@@ -29,7 +29,7 @@ class SkillForgeObservation(Observation):
|
|
| 29 |
task_id: str
|
| 30 |
task_description: str
|
| 31 |
snapshot_data: str #df.head(5).to_string()
|
| 32 |
-
skill_library:
|
| 33 |
context: str
|
| 34 |
result_correct: bool
|
| 35 |
result_output: str
|
|
|
|
| 29 |
task_id: str
|
| 30 |
task_description: str
|
| 31 |
snapshot_data: str #df.head(5).to_string()
|
| 32 |
+
skill_library: dict
|
| 33 |
context: str
|
| 34 |
result_correct: bool
|
| 35 |
result_output: str
|
server/skill_forge_environment.py
CHANGED
|
@@ -10,13 +10,16 @@ Skill Forge Environment Implementation.
|
|
| 10 |
An RL training environment where LLM Agents evolve from "reinventing the wheel" to "building a skill library."
|
| 11 |
"""
|
| 12 |
|
|
|
|
| 13 |
from uuid import uuid4
|
| 14 |
|
|
|
|
|
|
|
| 15 |
from openenv.core.env_server.interfaces import Environment
|
| 16 |
from openenv.core.env_server.types import State
|
| 17 |
|
| 18 |
from models import SkillForgeAction, SkillForgeObservation
|
| 19 |
-
from data_generator import TASKS
|
| 20 |
|
| 21 |
class SkillForgeEnvironment(Environment):
|
| 22 |
"""
|
|
@@ -71,13 +74,13 @@ class SkillForgeEnvironment(Environment):
|
|
| 71 |
task_id=task["id"],
|
| 72 |
task_description=task["description"],
|
| 73 |
snapshot_data=task["dataframe"].head(5).to_string(),
|
| 74 |
-
skill_library=self.
|
| 75 |
context="",
|
| 76 |
step_count=0,
|
| 77 |
total_tokens=0,
|
| 78 |
result_correct=False,
|
| 79 |
result_output="",
|
| 80 |
-
expected_output=task["expected_output"],
|
| 81 |
)
|
| 82 |
|
| 83 |
def step(self, action: SkillForgeAction) -> SkillForgeObservation:
|
|
@@ -91,7 +94,7 @@ class SkillForgeEnvironment(Environment):
|
|
| 91 |
reward = 0.0
|
| 92 |
|
| 93 |
if action.action_type == "create_skill":
|
| 94 |
-
self.
|
| 95 |
"template": action.content,
|
| 96 |
"description": action.reasoning,
|
| 97 |
"used_count": 0,
|
|
@@ -101,11 +104,11 @@ class SkillForgeEnvironment(Environment):
|
|
| 101 |
result_output = f"Skill {action.skill_name} created"
|
| 102 |
else:
|
| 103 |
if action.action_type == "use_skill":
|
| 104 |
-
skill = self.
|
| 105 |
# TODO: if action_type is use_skill while we don't have the skill yet, read reasoning to understand why the skill should be used and create the skill accordingly
|
| 106 |
if skill:
|
| 107 |
exec_code = skill["template"].format(**(action.params or {}))
|
| 108 |
-
self.
|
| 109 |
else: # TODO: log this scenario
|
| 110 |
exec_code = None
|
| 111 |
else:
|
|
@@ -135,7 +138,7 @@ class SkillForgeEnvironment(Environment):
|
|
| 135 |
total_tokens=0, # TODO P0: update this with the total tokens used
|
| 136 |
result_correct=result_correct,
|
| 137 |
result_output=result_output,
|
| 138 |
-
expected_output=next_task["expected_output"],
|
| 139 |
)
|
| 140 |
|
| 141 |
def _evaluate(self, exec_code, dataframe, expected_output):
|
|
|
|
| 10 |
An RL training environment where LLM Agents evolve from "reinventing the wheel" to "building a skill library."
|
| 11 |
"""
|
| 12 |
|
| 13 |
+
import traceback
|
| 14 |
from uuid import uuid4
|
| 15 |
|
| 16 |
+
import pandas as pd
|
| 17 |
+
|
| 18 |
from openenv.core.env_server.interfaces import Environment
|
| 19 |
from openenv.core.env_server.types import State
|
| 20 |
|
| 21 |
from models import SkillForgeAction, SkillForgeObservation
|
| 22 |
+
from .data_generator import TASKS
|
| 23 |
|
| 24 |
class SkillForgeEnvironment(Environment):
|
| 25 |
"""
|
|
|
|
| 74 |
task_id=task["id"],
|
| 75 |
task_description=task["description"],
|
| 76 |
snapshot_data=task["dataframe"].head(5).to_string(),
|
| 77 |
+
skill_library=self.skill_library,
|
| 78 |
context="",
|
| 79 |
step_count=0,
|
| 80 |
total_tokens=0,
|
| 81 |
result_correct=False,
|
| 82 |
result_output="",
|
| 83 |
+
expected_output=str(task["expected_output"]),
|
| 84 |
)
|
| 85 |
|
| 86 |
def step(self, action: SkillForgeAction) -> SkillForgeObservation:
|
|
|
|
| 94 |
reward = 0.0
|
| 95 |
|
| 96 |
if action.action_type == "create_skill":
|
| 97 |
+
self.skill_library[action.skill_name] = {
|
| 98 |
"template": action.content,
|
| 99 |
"description": action.reasoning,
|
| 100 |
"used_count": 0,
|
|
|
|
| 104 |
result_output = f"Skill {action.skill_name} created"
|
| 105 |
else:
|
| 106 |
if action.action_type == "use_skill":
|
| 107 |
+
skill = self.skill_library.get(action.content)
|
| 108 |
# TODO: if action_type is use_skill while we don't have the skill yet, read reasoning to understand why the skill should be used and create the skill accordingly
|
| 109 |
if skill:
|
| 110 |
exec_code = skill["template"].format(**(action.params or {}))
|
| 111 |
+
self.skill_library[action.content]["used_count"] += 1
|
| 112 |
else: # TODO: log this scenario
|
| 113 |
exec_code = None
|
| 114 |
else:
|
|
|
|
| 138 |
total_tokens=0, # TODO P0: update this with the total tokens used
|
| 139 |
result_correct=result_correct,
|
| 140 |
result_output=result_output,
|
| 141 |
+
expected_output=str(next_task["expected_output"]),
|
| 142 |
)
|
| 143 |
|
| 144 |
def _evaluate(self, exec_code, dataframe, expected_output):
|