from openenv.core.env_server import Environment as BaseEnv from .schemas import CalculateAction, SubmitAction, AccountantState, Action class AccountantEng(BaseEnv): def __init__(self): self.target_answer = 3000 self.max_steps = 5 self.messy_text = "Revenue was 5000, costs were 2000. What is the net profit?" self.current_step = 0 def reset(self) -> AccountantState: self.current_state = 0 return AccountantState( observation=self.messy_text, is_done=False ) def step(self, action:Action)->tuple[AccountantState, float, bool, dict]: self.current_step+=1 reward=-0.1 done=False observation="" if self.current_step==self.max_steps: observation="Max steps reached." if action.action_type=="calculate": try: result=eval(action.expression) observation = f"Calculation result: {result}" except Exception as e: reward-=0.5 elif action.action_type=="submit": done=True if action.net_profit==self.target_answer: reward+=1.0 observation = "Correct! Task complete." else: reward-=0.5 observation = f"Incorrect. The target was not {action.net_profit}." if self.current_step==self.max_steps: observation="Max steps reached." state = AccountantState(observation=observation, is_done=done) return state, float(reward), done, {}