update client.py
#5
by rsaibhargav - opened
client.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""
|
| 8 |
|
| 9 |
from typing import Dict
|
| 10 |
|
|
@@ -19,61 +19,29 @@ class CodeAssessmentEnv(
|
|
| 19 |
EnvClient[CodeAssessmentAction, CodeAssessmentObservation, State]
|
| 20 |
):
|
| 21 |
"""
|
| 22 |
-
Client for the
|
| 23 |
-
|
| 24 |
-
This client maintains a persistent WebSocket connection to the environment server,
|
| 25 |
-
enabling efficient multi-step interactions with lower latency.
|
| 26 |
-
Each client instance has its own dedicated environment session on the server.
|
| 27 |
|
| 28 |
Example:
|
| 29 |
-
>>>
|
| 30 |
-
>>>
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
... print(result.observation.test_case_input)
|
| 34 |
-
...
|
| 35 |
-
... result = client.step(FirstRlProjAction(answer="8"))
|
| 36 |
-
... print(result.observation.is_correct)
|
| 37 |
-
|
| 38 |
-
Example with Docker:
|
| 39 |
-
>>> # Automatically start container and connect
|
| 40 |
-
>>> client = FirstRlProjEnv.from_docker_image("first_rl_proj:latest")
|
| 41 |
-
>>> try:
|
| 42 |
-
... result = client.reset()
|
| 43 |
-
... result = client.step(FirstRlProjAction(answer="8"))
|
| 44 |
-
... finally:
|
| 45 |
-
... client.close()
|
| 46 |
"""
|
| 47 |
|
| 48 |
def _step_payload(self, action: CodeAssessmentAction) -> Dict:
|
| 49 |
-
""
|
| 50 |
-
Convert CodeAssessmentAction to JSON payload for step message.
|
| 51 |
-
|
| 52 |
-
Args:
|
| 53 |
-
action: CodeAssessmentAction instance
|
| 54 |
-
|
| 55 |
-
Returns:
|
| 56 |
-
Dictionary representation suitable for JSON encoding
|
| 57 |
-
"""
|
| 58 |
-
return {
|
| 59 |
-
"answer": action.answer,
|
| 60 |
-
}
|
| 61 |
|
| 62 |
def _parse_result(self, payload: Dict) -> StepResult[CodeAssessmentObservation]:
|
| 63 |
-
"""
|
| 64 |
-
Parse server response into StepResult[CodeAssessmentObservation].
|
| 65 |
-
|
| 66 |
-
Args:
|
| 67 |
-
payload: JSON response data from server
|
| 68 |
-
|
| 69 |
-
Returns:
|
| 70 |
-
StepResult with CodeAssessmentObservation
|
| 71 |
-
"""
|
| 72 |
obs_data = payload.get("observation", {})
|
| 73 |
observation = CodeAssessmentObservation(
|
| 74 |
problem_description=obs_data.get("problem_description", ""),
|
| 75 |
difficulty=obs_data.get("difficulty", "easy"),
|
| 76 |
test_case_input=obs_data.get("test_case_input", ""),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
expected_output=obs_data.get("expected_output"),
|
| 78 |
feedback=obs_data.get("feedback", ""),
|
| 79 |
is_correct=obs_data.get("is_correct", False),
|
|
@@ -84,7 +52,6 @@ class CodeAssessmentEnv(
|
|
| 84 |
reward=payload.get("reward"),
|
| 85 |
metadata=obs_data.get("metadata", {}),
|
| 86 |
)
|
| 87 |
-
|
| 88 |
return StepResult(
|
| 89 |
observation=observation,
|
| 90 |
reward=payload.get("reward"),
|
|
@@ -92,15 +59,6 @@ class CodeAssessmentEnv(
|
|
| 92 |
)
|
| 93 |
|
| 94 |
def _parse_state(self, payload: Dict) -> State:
|
| 95 |
-
"""
|
| 96 |
-
Parse server response into State object.
|
| 97 |
-
|
| 98 |
-
Args:
|
| 99 |
-
payload: JSON response from state request
|
| 100 |
-
|
| 101 |
-
Returns:
|
| 102 |
-
State object with episode_id and step_count
|
| 103 |
-
"""
|
| 104 |
return State(
|
| 105 |
episode_id=payload.get("episode_id"),
|
| 106 |
step_count=payload.get("step_count", 0),
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""AI Response Evaluation Environment Client."""
|
| 8 |
|
| 9 |
from typing import Dict
|
| 10 |
|
|
|
|
| 19 |
EnvClient[CodeAssessmentAction, CodeAssessmentObservation, State]
|
| 20 |
):
|
| 21 |
"""
|
| 22 |
+
Client for the AI Response Evaluation Environment.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
Example:
|
| 25 |
+
>>> env = await CodeAssessmentEnv.from_docker_image("code_assessment_env:latest")
|
| 26 |
+
>>> result = await env.reset()
|
| 27 |
+
>>> print(result.observation.task_type)
|
| 28 |
+
>>> result = await env.step(CodeAssessmentAction(answer="incorrect, factual-error"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
def _step_payload(self, action: CodeAssessmentAction) -> Dict:
|
| 32 |
+
return {"answer": action.answer}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def _parse_result(self, payload: Dict) -> StepResult[CodeAssessmentObservation]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
obs_data = payload.get("observation", {})
|
| 36 |
observation = CodeAssessmentObservation(
|
| 37 |
problem_description=obs_data.get("problem_description", ""),
|
| 38 |
difficulty=obs_data.get("difficulty", "easy"),
|
| 39 |
test_case_input=obs_data.get("test_case_input", ""),
|
| 40 |
+
task_type=obs_data.get("task_type", "correctness_check"),
|
| 41 |
+
language=obs_data.get("language", "en"),
|
| 42 |
+
user_age=obs_data.get("user_age"),
|
| 43 |
+
user_mood=obs_data.get("user_mood"),
|
| 44 |
+
user_context=obs_data.get("user_context"),
|
| 45 |
expected_output=obs_data.get("expected_output"),
|
| 46 |
feedback=obs_data.get("feedback", ""),
|
| 47 |
is_correct=obs_data.get("is_correct", False),
|
|
|
|
| 52 |
reward=payload.get("reward"),
|
| 53 |
metadata=obs_data.get("metadata", {}),
|
| 54 |
)
|
|
|
|
| 55 |
return StepResult(
|
| 56 |
observation=observation,
|
| 57 |
reward=payload.get("reward"),
|
|
|
|
| 59 |
)
|
| 60 |
|
| 61 |
def _parse_state(self, payload: Dict) -> State:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
return State(
|
| 63 |
episode_id=payload.get("episode_id"),
|
| 64 |
step_count=payload.get("step_count", 0),
|