Spaces:
No application file
No application file
| from openenv.core.env_server import Environment as BaseEnv | |
| from .schemas import CalculateAction, SubmitAction, AccountantState, Action | |
| class AccountantEng(BaseEnv): | |
| def __init__(self): | |
| self.target_answer = 3000 | |
| self.max_steps = 5 | |
| self.messy_text = "Revenue was 5000, costs were 2000. What is the net profit?" | |
| self.current_step = 0 | |
| def reset(self) -> AccountantState: | |
| self.current_state = 0 | |
| return AccountantState( | |
| observation=self.messy_text, | |
| is_done=False | |
| ) | |
| def step(self, action:Action)->tuple[AccountantState, float, bool, dict]: | |
| self.current_step+=1 | |
| reward=-0.1 | |
| done=False | |
| observation="" | |
| if self.current_step==self.max_steps: | |
| observation="Max steps reached." | |
| if action.action_type=="calculate": | |
| try: | |
| result=eval(action.expression) | |
| observation = f"Calculation result: {result}" | |
| except Exception as e: | |
| reward-=0.5 | |
| elif action.action_type=="submit": | |
| done=True | |
| if action.net_profit==self.target_answer: | |
| reward+=1.0 | |
| observation = "Correct! Task complete." | |
| else: | |
| reward-=0.5 | |
| observation = f"Incorrect. The target was not {action.net_profit}." | |
| if self.current_step==self.max_steps: | |
| observation="Max steps reached." | |
| state = AccountantState(observation=observation, is_done=done) | |
| return state, float(reward), done, {} |