| import os |
| import json |
| import requests |
| import time |
| from openai import OpenAI |
| from typing import Dict, List |
|
|
| |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your_token_here") |
| ENV_URL = "http://localhost:8000" |
|
|
| |
| client = OpenAI(api_key=OPENAI_API_KEY) |
|
|
| def run_task(task_id: int): |
| |
| start_log = {"task_id": task_id, "timestamp": int(time.time()), "model": "EmailAssistant-Baseline"} |
| print(f"[START] {json.dumps(start_log)}") |
|
|
| |
| try: |
| reset_resp = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=10).json() |
| obs = reset_resp["observation"] |
| except Exception as e: |
| print(f"Error resetting environment: {e}") |
| return 0.0 |
|
|
| total_reward = 0.0 |
| step_count = 0 |
| done = False |
| |
| |
| |
| task_actions = { |
| 1: [{"type": "MOVE", "email_id": 1, "target_folder": "Spam"}], |
| 2: [ |
| {"type": "MOVE", "email_id": 2, "target_folder": "Work"}, |
| {"type": "MOVE", "email_id": 4, "target_folder": "Archive"} |
| ], |
| 3: [{"type": "SCHEDULE", "email_id": 3, "reply_text": "Meeting at 2 PM is perfect!"}] |
| } |
|
|
| actions = task_actions.get(task_id, []) |
|
|
| for action_dict in actions: |
| if done: break |
| step_count += 1 |
| |
| |
| step_resp = requests.post(f"{ENV_URL}/step", json=action_dict, timeout=10).json() |
| |
| reward = step_resp["reward"] |
| obs = step_resp["observation"] |
| done = step_resp["terminated"] or step_resp["truncated"] |
| total_reward += reward |
|
|
| |
| step_log = { |
| "step": step_count, |
| "action": action_dict["type"], |
| "reward": round(float(reward), 4), |
| "obs_inbox_count": obs.get("inbox_count", 0) |
| } |
| print(f"[STEP] {json.dumps(step_log)}") |
|
|
| |
| end_log = { |
| "task_id": task_id, |
| "total_reward": round(float(total_reward), 4), |
| "status": "success" if total_reward >= 0.5 else "incomplete" |
| } |
| print(f"[END] {json.dumps(end_log)}") |
| return float(total_reward) |
|
|
| if __name__ == "__main__": |
| |
| scores = [] |
| for t_id in [1, 2, 3]: |
| scores.append(run_task(t_id)) |
| time.sleep(1) |
| |
| print(f"\n✅ All 3 tasks completed. Baseline Total Score: {sum(scores)}") |
|
|