import json from training import parse_action, map_to_env from environment import CodeReviewEnv env = CodeReviewEnv() obs = env.reset() actions = [ {'action_type': 'inspect'}, {'action_type': 'run_tests'}, {'action_type': 'question', 'content': 'Why this fails?'}, {'action_type': 'fix', 'content': 'def fix(data):\n if not data:\n return 0\n return sum(data)/len(data)'}, {'action_type': 'done'}, ] print('initial author_response=', repr(getattr(obs, 'author_response', ''))) print('initial last_action_type=', getattr(obs, 'last_action_type', None)) for i, payload in enumerate(actions, 1): txt = json.dumps(payload) parsed = parse_action(txt) env_action = map_to_env(parsed) obs, reward, done, info = env.step(env_action) print(f"step={i} parsed={parsed.action_type} env_action={type(env_action).__name__} last_action_type={obs.last_action_type} reward={reward.value:.4f} done={done}") print(' author_response=', repr(obs.author_response)) print(' info_action_type=', info.get('action_type')) if done: break