| | import os |
| | import sys |
| | import json |
| | import numpy as np |
| | from PIL import Image |
| | from datetime import datetime |
| | from env.thor_env import ThorEnv |
| | from eval import Eval |
| |
|
| |
|
| | class EvalSubgoals(Eval): |
| | ''' |
| | evaluate subgoals by teacher-forching expert demonstrations |
| | ''' |
| |
|
| | |
| | ALL_SUBGOALS = ['GotoLocation', 'PickupObject', 'PutObject', 'CoolObject', 'HeatObject', 'CleanObject', 'SliceObject', 'ToggleObject'] |
| |
|
| | @classmethod |
| | def run(cls, model, resnet, task_queue, args, lock, successes, failures, results): |
| | ''' |
| | evaluation loop |
| | ''' |
| | |
| | env = ThorEnv() |
| |
|
| | |
| | subgoals_to_evaluate = cls.ALL_SUBGOALS if args.subgoals.lower() == "all" else args.subgoals.split(',') |
| | subgoals_to_evaluate = [sg for sg in subgoals_to_evaluate if sg in cls.ALL_SUBGOALS] |
| | print ("Subgoals to evaluate: %s" % str(subgoals_to_evaluate)) |
| |
|
| | |
| | for sg in subgoals_to_evaluate: |
| | successes[sg] = list() |
| | failures[sg] = list() |
| |
|
| | while True: |
| | if task_queue.qsize() == 0: |
| | break |
| |
|
| | task = task_queue.get() |
| |
|
| | try: |
| | traj = model.load_task_json(task) |
| | r_idx = task['repeat_idx'] |
| | subgoal_idxs = [sg['high_idx'] for sg in traj['plan']['high_pddl'] if sg['discrete_action']['action'] in subgoals_to_evaluate] |
| | for eval_idx in subgoal_idxs: |
| | print("No. of trajectories left: %d" % (task_queue.qsize())) |
| | cls.evaluate(env, model, eval_idx, r_idx, resnet, traj, args, lock, successes, failures, results) |
| | except Exception as e: |
| | import traceback |
| | traceback.print_exc() |
| | print("Error: " + repr(e)) |
| |
|
| | |
| | env.stop() |
| |
|
| | @classmethod |
| | def evaluate(cls, env, model, eval_idx, r_idx, resnet, traj_data, args, lock, successes, failures, results): |
| | |
| | model.reset() |
| |
|
| | |
| | reward_type = 'dense' |
| | cls.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type) |
| |
|
| | |
| | expert_init_actions = [a['discrete_action'] for a in traj_data['plan']['low_actions'] if a['high_idx'] < eval_idx] |
| |
|
| | |
| | subgoal_action = traj_data['plan']['high_pddl'][eval_idx]['discrete_action']['action'] |
| | subgoal_instr = traj_data['turk_annotations']['anns'][r_idx]['high_descs'][eval_idx] |
| |
|
| | |
| | print("Evaluating: %s\nSubgoal %s (%d)\nInstr: %s" % (traj_data['root'], subgoal_action, eval_idx, subgoal_instr)) |
| |
|
| | |
| | feat = model.featurize([traj_data], load_mask=False) |
| |
|
| | |
| | prev_action = None |
| |
|
| | done, subgoal_success = False, False |
| | fails = 0 |
| | t = 0 |
| | reward = 0 |
| | while not done: |
| | |
| | if t >= args.max_steps + len(expert_init_actions): |
| | break |
| |
|
| | |
| | curr_image = Image.fromarray(np.uint8(env.last_event.frame)) |
| | feat['frames'] = resnet.featurize([curr_image], batch=1).unsqueeze(0) |
| |
|
| | |
| | if t < len(expert_init_actions): |
| | |
| | action = expert_init_actions[t] |
| | subgoal_completed = traj_data['plan']['low_actions'][t+1]['high_idx'] != traj_data['plan']['low_actions'][t]['high_idx'] |
| | compressed_mask = action['args']['mask'] if 'mask' in action['args'] else None |
| | mask = env.decompress_mask(compressed_mask) if compressed_mask is not None else None |
| |
|
| | |
| | if not args.skip_model_unroll_with_expert: |
| | model.step(feat, prev_action=prev_action) |
| | prev_action = action['action'] if not args.no_teacher_force_unroll_with_expert else None |
| |
|
| | |
| | success, _, _, err, _ = env.va_interact(action['action'], interact_mask=mask, smooth_nav=args.smooth_nav, debug=args.debug) |
| | if not success: |
| | print ("expert initialization failed") |
| | break |
| |
|
| | |
| | _, _ = env.get_transition_reward() |
| |
|
| | |
| | else: |
| | |
| | m_out = model.step(feat, prev_action=prev_action) |
| | m_pred = model.extract_preds(m_out, [traj_data], feat, clean_special_tokens=False) |
| | m_pred = list(m_pred.values())[0] |
| |
|
| | |
| | action, mask = m_pred['action_low'], m_pred['action_low_mask'][0] |
| | mask = np.squeeze(mask, axis=0) if model.has_interaction(action) else None |
| |
|
| | |
| | if args.debug: |
| | print("Pred: ", action) |
| |
|
| | |
| | prev_action = str(action) |
| |
|
| | if action not in cls.TERMINAL_TOKENS: |
| | |
| | t_success, _, _, err, _ = env.va_interact(action, interact_mask=mask, smooth_nav=args.smooth_nav, debug=args.debug) |
| | if not t_success: |
| | fails += 1 |
| | if fails >= args.max_fails: |
| | print("Interact API failed %d times" % (fails) + "; latest error '%s'" % err) |
| | break |
| |
|
| | |
| | t_reward, t_done = env.get_transition_reward() |
| | reward += t_reward |
| |
|
| | |
| | curr_subgoal_idx = env.get_subgoal_idx() |
| | if curr_subgoal_idx == eval_idx: |
| | subgoal_success = True |
| | break |
| |
|
| | |
| | if action in cls.TERMINAL_TOKENS: |
| | print("predicted %s" % action) |
| | break |
| |
|
| | |
| | t += 1 |
| |
|
| | |
| | pl = float(t - len(expert_init_actions)) + 1 |
| | expert_pl = len([ll for ll in traj_data['plan']['low_actions'] if ll['high_idx'] == eval_idx]) |
| |
|
| | s_spl = (1 if subgoal_success else 0) * min(1., expert_pl / (pl + sys.float_info.epsilon)) |
| | plw_s_spl = s_spl * expert_pl |
| |
|
| | |
| | lock.acquire() |
| |
|
| | |
| | for sg in cls.ALL_SUBGOALS: |
| | results[sg] = { |
| | 'sr': 0., |
| | 'successes': 0., |
| | 'evals': 0., |
| | 'sr_plw': 0. |
| | } |
| |
|
| | log_entry = {'trial': traj_data['task_id'], |
| | 'type': traj_data['task_type'], |
| | 'repeat_idx': int(r_idx), |
| | 'subgoal_idx': int(eval_idx), |
| | 'subgoal_type': subgoal_action, |
| | 'subgoal_instr': subgoal_instr, |
| | 'subgoal_success_spl': float(s_spl), |
| | 'subgoal_path_len_weighted_success_spl': float(plw_s_spl), |
| | 'subgoal_path_len_weight': float(expert_pl), |
| | 'reward': float(reward)} |
| | if subgoal_success: |
| | sg_successes = successes[subgoal_action] |
| | sg_successes.append(log_entry) |
| | successes[subgoal_action] = sg_successes |
| | else: |
| | sg_failures = failures[subgoal_action] |
| | sg_failures.append(log_entry) |
| | failures[subgoal_action] = sg_failures |
| |
|
| | |
| | print("-------------") |
| | subgoals_to_evaluate = list(successes.keys()) |
| | subgoals_to_evaluate.sort() |
| | for sg in subgoals_to_evaluate: |
| | num_successes, num_failures = len(successes[sg]), len(failures[sg]) |
| | num_evals = len(successes[sg]) + len(failures[sg]) |
| | if num_evals > 0: |
| | sr = float(num_successes) / num_evals |
| | total_path_len_weight = sum([entry['subgoal_path_len_weight'] for entry in successes[sg]]) + \ |
| | sum([entry['subgoal_path_len_weight'] for entry in failures[sg]]) |
| | sr_plw = float(sum([entry['subgoal_path_len_weighted_success_spl'] for entry in successes[sg]]) + |
| | sum([entry['subgoal_path_len_weighted_success_spl'] for entry in failures[sg]])) / total_path_len_weight |
| |
|
| | results[sg] = { |
| | 'sr': sr, |
| | 'successes': num_successes, |
| | 'evals': num_evals, |
| | 'sr_plw': sr_plw |
| | } |
| |
|
| | print("%s ==========" % sg) |
| | print("SR: %d/%d = %.3f" % (num_successes, num_evals, sr)) |
| | print("PLW SR: %.3f" % (sr_plw)) |
| | print("------------") |
| |
|
| | lock.release() |
| |
|
| | def create_stats(self): |
| | ''' |
| | storage for success, failure, and results info |
| | ''' |
| | self.successes, self.failures = self.manager.dict(), self.manager.dict() |
| | self.results = self.manager.dict() |
| |
|
| | def save_results(self): |
| | results = {'successes': dict(self.successes), |
| | 'failures': dict(self.failures), |
| | 'results': dict(self.results)} |
| |
|
| | save_path = os.path.dirname(self.args.model_path) |
| | save_path = os.path.join(save_path, 'subgoal_results_' + datetime.now().strftime("%Y%m%d_%H%M%S_%f") + '.json') |
| | with open(save_path, 'w') as r: |
| | json.dump(results, r, indent=4, sort_keys=True) |