| import os |
| from openai import OpenAI |
| from env import EmailEnv |
|
|
| env = EmailEnv() |
|
|
| def call_llm(text): |
| try: |
| client = OpenAI( |
| api_key=os.environ.get("API_KEY"), |
| base_url=os.environ.get("API_BASE_URL") |
| ) |
|
|
| response = client.chat.completions.create( |
| model="gpt-4o-mini", |
| messages=[ |
| {"role": "system", "content": "Classify email and suggest reply."}, |
| {"role": "user", "content": text} |
| ] |
| ) |
|
|
| return response.choices[0].message.content |
| except: |
| return "fallback" |
|
|
| try: |
| print("[START] task=easy_task", flush=True) |
|
|
| obs = env.reset() |
| text = obs["text"] |
|
|
| _ = call_llm(text) |
|
|
| |
| if "meeting" in text.lower(): |
| action = {"label": "important", "reply": "Sure, I will attend the meeting."} |
| elif "lottery" in text.lower(): |
| action = {"label": "spam", "reply": "This looks like spam."} |
| else: |
| action = {"label": "normal", "reply": "Sounds good!"} |
|
|
| reward = env.step(action) |
|
|
| print(f"[STEP] reward={reward}", flush=True) |
| print(f"[END] score={reward}", flush=True) |
|
|
| except Exception as e: |
| print("[ERROR]", str(e), flush=True) |