import json
from training import parse_action, map_to_env
from environment import CodeReviewEnv

env = CodeReviewEnv()
obs = env.reset()
actions = [
    {'action_type': 'inspect'},
    {'action_type': 'run_tests'},
    {'action_type': 'question', 'content': 'Why this fails?'},
    {'action_type': 'fix', 'content': 'def fix(data):\n    if not data:\n        return 0\n    return sum(data)/len(data)'},
    {'action_type': 'done'},
]

print('initial author_response=', repr(getattr(obs, 'author_response', '')))
print('initial last_action_type=', getattr(obs, 'last_action_type', None))

for i, payload in enumerate(actions, 1):
    txt = json.dumps(payload)
    parsed = parse_action(txt)
    env_action = map_to_env(parsed)
    obs, reward, done, info = env.step(env_action)
    print(f"step={i} parsed={parsed.action_type} env_action={type(env_action).__name__} last_action_type={obs.last_action_type} reward={reward.value:.4f} done={done}")
    print('  author_response=', repr(obs.author_response))
    print('  info_action_type=', info.get('action_type'))
    if done:
        break