File size: 3,241 Bytes
b5af4d3 a227db3 b5af4d3 a227db3 b5af4d3 a227db3 b5af4d3 a227db3 b5af4d3 a227db3 b5af4d3 55a5567 b5af4d3 55a5567 b5af4d3 a227db3 b5af4d3 55a5567 2103d91 b5af4d3 2103d91 55a5567 b5af4d3 a227db3 b5af4d3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | import sys
import os
import json
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
sys.path.insert(0, os.path.dirname(__file__))
from dotenv import load_dotenv
load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
from env.environment import CustomerSupportEnv
from env.models import Action
from env.grader import grade_task
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
HF_TOKEN = os.getenv("HF_TOKEN")
LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME") # Optional for docker usage
from openai import OpenAI
client = OpenAI(
base_url=API_BASE_URL,
api_key=HF_TOKEN if HF_TOKEN else os.getenv("OPENAI_API_KEY", "dummy_key"),
)
SYSTEM_PROMPT = """You are an AI customer support agent inside an RL environment.
Read the ticket and respond with a JSON object ONLY. Pick one action:
{"action_type": "classify", "category": "<billing|technical|refund|account|abuse>"}
{"action_type": "reply", "content": "<your reply>"}
{"action_type": "escalate"}
{"action_type": "close"}
Strategy: classify first, reply next, escalate only if severe (legal threats / long-unresolved issues), then close."""
def obs_to_text(obs):
lines = [f"Ticket: {obs.ticket_id}", f"Status: {obs.status}", f"Query: {obs.customer_query}"]
if obs.history:
lines.append("History:")
for msg in obs.history:
lines.append(f" {msg}")
return "\n".join(lines)
def call_llm(obs, messages):
messages.append({"role": "user", "content": obs_to_text(obs)})
try:
resp = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
temperature=0.0,
response_format={"type": "json_object"},
)
raw = resp.choices[0].message.content
messages.append({"role": "assistant", "content": raw})
return Action(**json.loads(raw))
except Exception as e:
return Action(action_type="close")
def run_llm(task_id):
env = CustomerSupportEnv()
obs = env.reset(task_id=task_id)
task = env.current_task
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
taken = []
rewards = []
print(f"[START] task={task_id} env=customer-support model={MODEL_NAME}", flush=True)
for i in range(task["max_steps"]):
action = call_llm(obs, messages)
obs, reward, done, info = env.step(action)
taken.append(action)
rewards.append(reward)
action_str = action.model_dump_json() if hasattr(action, 'model_dump_json') else json.dumps(action.__dict__)
action_str = action_str.replace(" ", "")
print(f"[STEP] step={i+1} action={action_str} reward={reward.score:.2f} done={str(done).lower()} error=null", flush=True)
if done:
break
score = grade_task(task, taken)
rewards_str = ",".join(f"{r.score:.2f}" for r in rewards)
success = score >= 0.5
print(f"[END] success={str(success).lower()} steps={len(taken)} score={score:.3f} rewards={rewards_str}", flush=True)
return score
def main():
for tid in ["easy", "medium", "hard"]:
run_llm(tid)
if __name__ == "__main__":
main() |