vivekvish2004 commited on
Commit
f67d4aa
·
1 Parent(s): 45f57da

Fix validator Step 1: Support POST on /reset and update API routing

Browse files
Files changed (2) hide show
  1. app/main.py +1 -1
  2. inference.py +99 -93
app/main.py CHANGED
@@ -28,7 +28,7 @@ env_instance = CustomerSupportEnv()
28
  def read_root():
29
  return {"message": "Welcome to OpenEnv Customer Support API"}
30
 
31
- @app.get("/reset", response_model=Observation)
32
  def reset_env():
33
  """Reset the environment and yield the initial observation."""
34
  return env_instance.reset()
 
28
  def read_root():
29
  return {"message": "Welcome to OpenEnv Customer Support API"}
30
 
31
+ @app.api_route("/reset", methods=["GET", "POST"], response_model=Observation)
32
  def reset_env():
33
  """Reset the environment and yield the initial observation."""
34
  return env_instance.reset()
inference.py CHANGED
@@ -1,110 +1,116 @@
1
  import os
2
  import json
3
- import argparse
4
- from typing import Any
5
- from huggingface_hub import InferenceClient
 
 
6
  from app.env import CustomerSupportEnv
7
  from app.models import Action
8
 
9
- def evaluate_llm(task_id: str):
10
- """Deterministically evaluate a Hugging Face LLM agent against the support environment."""
11
- # Strict dynamic config parsing against Hugging Face requirements
12
- model_name = os.environ.get("MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
13
- hf_token = os.environ.get("HF_TOKEN", "")
14
-
15
- # Initialize Strict standard OpenEnv tracking log
16
- print(f"[START] task={task_id} env=customer-support-env model={model_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- if not hf_token:
19
- print("Warning: HF_TOKEN is explicitly empty. Hugging Face Inference may fail or rate limit.")
20
 
21
- # Execute via Hugging Face Hub natively mapped to their architecture
22
- client = InferenceClient(
23
- model=model_name,
24
- token=hf_token
25
- )
26
 
 
 
 
 
 
 
27
  env = CustomerSupportEnv()
28
- obs = env.reset()
29
-
30
- rewards_history = []
31
 
32
- # Inject formal logic constraints prompting reliable JSON interactions
33
- system_prompt = """You are a highly structured AI customer support agent resolving a ticket pipeline.
34
- Available actions list:
35
- 1. classify_ticket (payload format: {"classification": "refund" | "general_inquiry" | "login_issue" | "feedback"})
36
- 2. assign_priority (payload format: {"priority": "low" | "medium" | "high"})
37
- 3. generate_response (payload format: {"response": "<text>"})
38
- 4. escalate (payload format: {})
39
- 5. resolve (payload format: {})
40
-
41
- You MUST return ONLY a fully valid JSON format mapping this dict schema:
42
- {
43
- "action_type": "<action_name>",
44
- "payload": { ... }
45
- }"""
46
-
47
- done = False
48
- step_count = 0
49
- conversation_messages = [
50
- {"role": "system", "content": system_prompt}
51
- ]
52
-
53
- # Interaction Loop
54
- while not done and step_count < env.max_steps:
55
- step_count += 1
56
-
57
- obs_stringified = json.dumps(obs.dict()["state"])
58
- conversation_messages.append({"role": "user", "content": f"Current Ticket State: {obs_stringified}\nProvide your next action strictly in JSON:"})
59
-
60
- error_msg = ""
61
- action_type = "unknown"
62
- reward_val = 0.0
63
 
64
- try:
65
- # Deterministic, reproducible call explicitly leveraging HF formats
66
- response = client.chat_completion(
67
- messages=conversation_messages,
68
- temperature=0.01, # Hugging Face often crashes on explicitly 0.0 depending on the endpoint model deployed
69
- max_tokens=256,
70
- response_format={"type": "json"} if hasattr(client, "chat_completion") else None
71
- # Note: Not all HF hosted models support automatic JSON constraints, but instructions prompt for it natively.
72
- )
73
-
74
- action_text = response.choices[0].message.content
75
- action_data = json.loads(action_text)
76
-
77
- action_type = action_data.get("action_type", "unknown")
78
- action = Action(**action_data)
79
-
80
- # Step the mathematical environment
81
- obs, reward, done, info = env.step(action)
82
- reward_val = reward.value
83
 
84
- # Provide reflection feedback to AI
85
- conversation_messages.append({"role": "assistant", "content": action_text})
86
- conversation_messages.append({"role": "system", "content": f"Action result mapping: Reward={reward_val}, Done={done}, Info={json.dumps(info)}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- except Exception as e:
89
- error_msg = str(e).replace("\n", " ").strip()
90
- reward_val = -1.0
91
- done = True
92
 
93
- rewards_history.append(reward_val)
94
-
95
- # Output Explicit formatted log
96
- done_str = "true" if done else "false"
97
- print(f"[STEP] step={step_count} action={action_type} reward={reward_val:.2f} done={done_str} error={error_msg}")
98
 
99
- # Output Explicit formatted termination log
100
- # True metric determined by pipeline resolution logic
101
- success_str = "true" if (env.current_state and env.current_state.get("status") == "closed" and rewards_history and rewards_history[-1] > 0) else "false"
102
- r_mapped = ",".join(f"{r:.2f}" for r in rewards_history)
103
- print(f"[END] success={success_str} steps={step_count} rewards={r_mapped}")
 
 
 
 
104
 
105
  if __name__ == "__main__":
106
- parser = argparse.ArgumentParser()
107
- parser.add_argument("--task", type=str, default="task_hard_1", help="Task ID sequence to execute logic against.")
108
- args = parser.parse_args()
109
-
110
- evaluate_llm(args.task)
 
1
  import os
2
  import json
3
+ import textwrap
4
+ import asyncio
5
+ from typing import List, Optional
6
+ from openai import OpenAI
7
+
8
  from app.env import CustomerSupportEnv
9
  from app.models import Action
10
 
11
+ # Mandatory Environment Configuration
12
+ API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
13
+ API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
14
+ MODEL_NAME = os.getenv("MODEL_NAME") or "meta-llama/Meta-Llama-3-8B-Instruct"
15
+
16
+ # Benchmark Configuration
17
+ TASK_NAME = os.getenv("TASK_NAME", "task_hard_1")
18
+ BENCHMARK = "customer-support-enterprise"
19
+ MAX_STEPS = 15 # Total steps allowed across the queue
20
+ SUCCESS_SCORE_THRESHOLD = 0.1
21
+
22
+ # Max Total Reward: Approx 1.0 per ticket * 3 tickets in queue
23
+ MAX_TOTAL_REWARD = 3.0
24
+
25
+ SYSTEM_PROMPT = textwrap.dedent(
26
+ """
27
+ You are an Enterprise AI Customer Support agent resolving a ticket pipeline.
28
+ For each ticket, you must:
29
+ 1. classify_ticket: {"classification": "refund" | "general_inquiry" | "login_issue" | "feedback" | "technical_issue"}
30
+ 2. assign_priority: {"priority": "low" | "medium" | "high"}
31
+ 3. generate_response: {"response": "<empathetic_text>"}
32
+ 4. resolve: {}
33
+
34
+ Your goal is to process the ticket efficiently and move to the next one in the queue.
35
+ You MUST return ONLY a fully valid JSON object:
36
+ {"action_type": "<name>", "payload": {...}}
37
+ """
38
+ ).strip()
39
 
40
+ def log_start(task: str, env: str, model: str) -> None:
41
+ print(f"[START] task={task} env={env} model={model}", flush=True)
42
 
43
+ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
44
+ error_val = error if error else "null"
45
+ done_val = str(done).lower()
46
+ print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
 
47
 
48
+ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
49
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
50
+ print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
51
+
52
+ async def main():
53
+ client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
54
  env = CustomerSupportEnv()
 
 
 
55
 
56
+ rewards = []
57
+ total_steps = 0
58
+ score = 0.0
59
+ success = False
60
+
61
+ log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
62
+
63
+ try:
64
+ # Reset current enterprise session (populates queue)
65
+ obs = env.reset()
66
+ done = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ for step in range(1, MAX_STEPS + 1):
69
+ if done:
70
+ break
71
+
72
+ current_state = obs.dict()["state"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ # Agent decision using OpenAI
75
+ try:
76
+ completion = client.chat.completions.create(
77
+ model=MODEL_NAME,
78
+ messages=[
79
+ {"role": "system", "content": SYSTEM_PROMPT},
80
+ {"role": "user", "content": f"Current State: {json.dumps(current_state)}"}
81
+ ],
82
+ temperature=0.0,
83
+ response_format={"type": "json_object"}
84
+ )
85
+ action_text = completion.choices[0].message.content or "{}"
86
+ action_data = json.loads(action_text)
87
+ action = Action(**action_data)
88
+ action_type = action.action_type
89
+ except Exception:
90
+ action = Action(action_type="unknown", payload={})
91
+ action_type = "error"
92
+
93
+ # Step the environment
94
+ obs, reward_obj, done, info = env.step(action)
95
+ reward = reward_obj.value
96
 
97
+ rewards.append(reward)
98
+ total_steps = step
 
 
99
 
100
+ log_step(step=step, action=action_type, reward=reward, done=done, error=info.get("error"))
101
+
102
+ if done:
103
+ break
 
104
 
105
+ # Calculate final normalized score
106
+ final_reward_sum = sum(rewards)
107
+ # We target a normalized score between 0 and 1
108
+ score = final_reward_sum / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
109
+ score = min(max(score, 0.0), 1.0)
110
+ success = score >= SUCCESS_SCORE_THRESHOLD
111
+
112
+ finally:
113
+ log_end(success=success, steps=total_steps, score=score, rewards=rewards)
114
 
115
  if __name__ == "__main__":
116
+ asyncio.run(main())