| | import datetime |
| | import json |
| | import logging |
| | import os |
| | import time |
| | from wrapt_timeout_decorator import * |
| | from lib_results_logger import log_task_completion |
| |
|
| | logger = logging.getLogger("desktopenv.experiment") |
| |
|
| |
|
| | def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| |
|
| | |
| | env.reset(task_config=example) |
| |
|
| | |
| | try: |
| | agent.reset(runtime_logger, vm_ip=env.vm_ip) |
| | except Exception as e: |
| | agent.reset(vm_ip=env.vm_ip) |
| | |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | response, actions = agent.predict( |
| | instruction, |
| | obs |
| | ) |
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S%f") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| |
|
| | logger.info("Reward: %.2f", reward) |
| | logger.info("Done: %s", done) |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action_timestamp": action_timestamp, |
| | "action": action, |
| | "response": response, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" |
| | })) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| | time.sleep(20) |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | |
| | |
| | log_task_completion(example, result, example_result_dir, args) |
| | |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| |
|
| | def setup_logger(example, example_result_dir): |
| | runtime_logger = logging.getLogger(f"desktopenv.example.{example['id']}") |
| | runtime_logger.setLevel(logging.DEBUG) |
| | runtime_logger.addHandler(logging.FileHandler(os.path.join(example_result_dir, "runtime.log"))) |
| | return runtime_logger |
| |
|
| | def run_single_example_human(env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | env.reset(task_config=example) |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | |
| | |
| | with open(os.path.join(example_result_dir, "initial_state.png"), "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | |
| | |
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "instruction": instruction, |
| | "initial_state": "initial_state.png" |
| | })) |
| | f.write("\n") |
| | |
| | |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| |
|
| |
|
| | def run_single_example_kimi(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | agent.reset(runtime_logger) |
| | env.reset(task_config=example) |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | response, actions, info_dict = agent.predict(instruction, obs) |
| |
|
| | logger.info(f"Got Action: {actions}") |
| | |
| | if not actions or len(actions)==0 or actions[0]=="" or actions[0].lower().startswith("error"): |
| | break |
| |
|
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| |
|
| | logger.info(f"Action {action} executed, reward: {reward}, done: {done}") |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| |
|
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a", encoding="utf-8") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action": action, |
| | "natural_language_action": info_dict.get("action"), |
| | "action_timestamp": action_timestamp, |
| | "response": response, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" |
| | }, ensure_ascii=False)) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| |
|
| | time.sleep(30) |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| | def run_single_example_agi(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | agent.reset(runtime_logger) |
| | env.reset(task_config=example) |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | response, actions = agent.predict( |
| | instruction, |
| | obs |
| | ) |
| |
|
| | done = not response.get('state_correct', False) |
| |
|
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | obs, reward, done, info, step_info = agent.step(action) |
| |
|
| | if not done: |
| | if not response.get('state_correct', False): |
| | done = True |
| |
|
| | logger.info("Reward: %.2f", reward) |
| | logger.info("Done: %s", done) |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| |
|
| | |
| | if action.get('pending_checks', None): |
| | del action['pending_checks'] |
| |
|
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action_timestamp": action_timestamp, |
| | "action": action, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" |
| | })) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| |
|
| | def run_single_example_openaicua(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | agent.reset(runtime_logger) |
| | env.reset(task_config=example) |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | response, actions = agent.predict( |
| | instruction, |
| | obs |
| | ) |
| |
|
| | done = not response.get('state_correct', False) |
| |
|
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | obs, reward, done, info, step_info = agent.step(action) |
| |
|
| | if not done: |
| | if not response.get('state_correct', False): |
| | done = True |
| |
|
| | logger.info("Reward: %.2f", reward) |
| | logger.info("Done: %s", done) |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| |
|
| | |
| | if action.get('pending_checks', None): |
| | del action['pending_checks'] |
| |
|
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action_timestamp": action_timestamp, |
| | "action": action, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" |
| | })) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| | def run_single_example_opencua(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | agent.reset(runtime_logger) |
| | env.reset(task_config=example) |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | response, actions, info_dict = agent.predict(instruction, obs) |
| |
|
| | logger.info(f"Got Action: {actions}") |
| | |
| | if not actions or len(actions)==0 or actions[0]=="" or actions[0].lower().startswith("error"): |
| | break |
| |
|
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| |
|
| | logger.info(f"Action {action} executed, reward: {reward}, done: {done}") |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| |
|
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a", encoding="utf-8") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action": action, |
| | "natural_language_action": info_dict.get("action"), |
| | "action_timestamp": action_timestamp, |
| | "response": response, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" |
| | }, ensure_ascii=False)) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| |
|
| | time.sleep(20) |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| | def run_single_example_autoglm(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | try: |
| | agent.reset(runtime_logger) |
| | except Exception as e: |
| | agent.reset() |
| |
|
| | env.reset(task_config=example) |
| | |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | response, actions = agent.predict( |
| | instruction, |
| | obs |
| | ) |
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| |
|
| | logger.info("Reward: %.2f", reward) |
| | logger.info("Done: %s", done) |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action_timestamp": action_timestamp, |
| | "action": action, |
| | "response": response, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" |
| | })) |
| | f.write("\n") |
| | |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | |
| | |
| | if not actions: |
| | obs = env._get_obs() |
| | |
| | step_idx += 1 |
| | |
| | if not done: |
| | env.action_history.append('FAIL') |
| | |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| | def run_single_example_mano(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | agent.reset(runtime_logger) |
| | env.reset(task_config=example) |
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | |
| | with open(os.path.join(example_result_dir, f"step_0.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | while not done and step_idx < max_steps: |
| | response, actions = agent.predict( |
| | instruction, |
| | obs |
| | ) |
| | if len(actions) > 1: |
| | if (("pyautogui.hotkey('shift')" in actions[0] or "pyautogui.hotkey('ctrl')" in actions[0]) |
| | and "pyautogui.click" in actions[1]): |
| | hotkey_type = 'shift' if "shift" in actions[0] else 'ctrl' |
| | action = f"pyautogui.keyDown('{hotkey_type}')\n{actions[1]}\npyautogui.keyUp('{hotkey_type}')" |
| | actions = [action] |
| | |
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| |
|
| | logger.info("Reward: %.2f", reward) |
| | logger.info("Done: %s", done) |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action_timestamp": action_timestamp, |
| | "action": action, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png", |
| | "response":response |
| | })) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| | |
| | def run_single_example_uipath(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | try: |
| | agent.reset(runtime_logger) |
| | except Exception as e: |
| | agent.reset() |
| |
|
| | env.reset(task_config=example) |
| |
|
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | response, actions = agent.predict( |
| | instruction, |
| | obs, |
| | args, |
| | step_idx |
| | ) |
| | for action in actions: |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| |
|
| | logger.info("Reward: %.2f", reward) |
| | logger.info("Done: %s", done) |
| | |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "action_timestamp": action_timestamp, |
| | "action": action, |
| | "response": response, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" |
| | })) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| |
|
| | from mm_agents.os_symphony.utils.common_utils import draw_coordinates |
| | from mm_agents.os_symphony.utils.process_context import set_current_result_dir |
| |
|
| |
|
| | logger = logging.getLogger("desktopenv.experiment") |
| |
|
| | def run_single_example_os_symphony(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | set_current_result_dir(example_result_dir) |
| | |
| | agent.reset(result_dir=example_result_dir) |
| | env.reset(task_config=example) |
| | time.sleep(30) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| | |
| | start_time = time.time() |
| |
|
| | while not done and step_idx < max_steps: |
| | response, actions = agent.predict( |
| | instruction, |
| | obs, |
| | step_idx == max_steps - 1 |
| | ) |
| | for action in actions: |
| | |
| | if "reflection" in response and response["reflection"].get("is_milestone"): |
| | img_name = f"step_{step_idx + 1}_milestone.png" |
| | else: |
| | img_name = f"step_{step_idx + 1}.png" |
| | |
| | with open(os.path.join(example_result_dir, img_name), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | if "coordinates" in response and response["coordinates"]: |
| | draw_coordinates( |
| | image_bytes=obs['screenshot'], |
| | coordinates=response["coordinates"], |
| | save_path=os.path.join(example_result_dir, img_name[:-4] + "_draw.png") |
| | ) |
| |
|
| | logger.info("Step %d: %s", step_idx + 1, action) |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| | logger.info("Done: %s", done) |
| |
|
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a", encoding="utf-8") as f: |
| | f.write(json.dumps({ |
| | "instruction": instruction, |
| | "step_num": step_idx + 1, |
| | "action": action, |
| | "response": response, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": img_name |
| | })) |
| | f.write("\n") |
| | with open(os.path.join(example_result_dir, f"traj_{step_idx+1}.json"), "w", encoding="utf-8") as f: |
| | json.dump({ |
| | "step_num": step_idx + 1, |
| | "action": action, |
| | "response": response, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": img_name |
| | }, f, indent=4, ensure_ascii=False) |
| | if done: |
| | logger.info("The episode is done.") |
| | time.sleep(60) |
| | break |
| | step_idx += 1 |
| | end_time = time.time() |
| | result = float(env.evaluate()) |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| |
|
| | with open(os.path.join(example_result_dir, "time.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{end_time-start_time:.2f}\n") |
| |
|
| |
|
| | def run_single_example_evocua(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | """ |
| | Unified run function for EvoCUAAgent (supporting both S1 and S2 modes). |
| | """ |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | |
| | |
| | env.reset(task_config=example) |
| | |
| | |
| | |
| | try: |
| | agent.reset(runtime_logger, vm_ip=env.vm_ip) |
| | except Exception: |
| | try: |
| | agent.reset(runtime_logger) |
| | except Exception: |
| | agent.reset() |
| |
|
| | time.sleep(60) |
| | obs = env._get_obs() |
| | done = False |
| | step_idx = 0 |
| |
|
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | |
| | |
| | predict_res = agent.predict(instruction, obs) |
| | |
| | |
| | if len(predict_res) == 3: |
| | |
| | response, actions, info_dict = predict_res |
| | else: |
| | response, actions = predict_res |
| | info_dict = {} |
| |
|
| | logger.info(f"Step {step_idx + 1} Actions: {actions}") |
| | |
| | |
| | if not actions or (len(actions) == 1 and (actions[0] == "" or "error" in actions[0].lower())): |
| | |
| | if not (actions and actions[0] in ["FAIL", "DONE"]): |
| | logger.warning("No valid actions returned. Breaking loop.") |
| | break |
| |
|
| | for action in actions: |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S%f") |
| | logger.info("Executing action: %s", action) |
| | |
| | |
| | obs, reward, done, info = env.step(action, args.sleep_after_execution) |
| | |
| | logger.info("Reward: %.2f", reward) |
| | logger.info("Done: %s", done) |
| | |
| | |
| | screenshot_file = f"step_{step_idx + 1}_{action_timestamp}.png" |
| | with open(os.path.join(example_result_dir, screenshot_file), "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | |
| | |
| | log_entry = { |
| | "step_num": step_idx + 1, |
| | "action_timestamp": action_timestamp, |
| | "action": action, |
| | "response": response, |
| | "reward": reward, |
| | "done": done, |
| | "info": info, |
| | "screenshot_file": screenshot_file |
| | } |
| | |
| | if info_dict: |
| | log_entry["natural_language_action"] = info_dict.get("action") |
| | |
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a", encoding="utf-8") as f: |
| | f.write(json.dumps(log_entry, ensure_ascii=False)) |
| | f.write("\n") |
| | |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | |
| | step_idx += 1 |
| | |
| | time.sleep(20) |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | |
| | log_task_completion(example, result, example_result_dir, args) |
| |
|
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|
| |
|
| | def run_single_example_mobileagent_v3(agent, env, example, max_steps, instruction, args, example_result_dir, scores): |
| | """ |
| | Specialized run function for MobileAgent V3. |
| | |
| | This agent has a different interface where it calls agent.step() directly |
| | which internally handles prediction and execution. |
| | """ |
| | runtime_logger = setup_logger(example, example_result_dir) |
| | try: |
| | agent.reset(runtime_logger) |
| | except: |
| | agent.reset() |
| | env.reset(task_config=example) |
| | time.sleep(60) |
| | obs = env._get_obs() |
| |
|
| | done = False |
| | step_idx = 0 |
| | |
| | |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| | with open(os.path.join(example_result_dir, f"step_{step_idx}_{action_timestamp}.png"), "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | |
| | eval_flag = True |
| | env.controller.start_recording() |
| | while not done and step_idx < max_steps: |
| | global_state, action_code, step_status, reward, done = agent.step(instruction, env, args) |
| | action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") |
| |
|
| | if step_status is False: |
| | eval_flag = False |
| | done = True |
| | reward = None |
| | else: |
| | obs = env._get_obs() |
| | with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), |
| | "wb") as _f: |
| | _f.write(obs['screenshot']) |
| | with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: |
| | f.write(json.dumps({ |
| | "step_num": step_idx + 1, |
| | "step_status": step_status, |
| | "action_timestamp": action_timestamp, |
| | "action": action_code, |
| | "reward": reward, |
| | "done": done, |
| | "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png", |
| | })) |
| | f.write("\n") |
| | if done: |
| | logger.info("The episode is done.") |
| | break |
| | step_idx += 1 |
| |
|
| | if eval_flag: |
| | result = env.evaluate() |
| | logger.info("Result: %.2f", result) |
| | scores.append(result) |
| | with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: |
| | f.write(f"{result}\n") |
| | env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) |
| |
|