customer_support_env / inference.py
ravindrakapse's picture
Upload folder using huggingface_hub
385ccc1 verified
#!/usr/bin/env python3
"""
Inference Script for Customer Support Environment
===================================
MANDATORY
- Before submitting, ensure the following variables are defined in your environment configuration:
API_BASE_URL The API endpoint for the LLM.
MODEL_NAME The model identifier to use for inference.
HF_TOKEN Your Hugging Face / API key.
- The inference script must be named `inference.py` and placed in the root directory of the project
- Participants must use OpenAI Client for all LLM calls using above variables
"""
import os
import sys
import json
import time
from typing import Dict, List
from openai import OpenAI
# Import environment components
from server.customer_support_env_environment import CustomerSupportEnvironment
from models import CustomerSupportAction, CustomerSupportObservation
# ─── Required environment variables ───────────────────────────────────────────
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.3-70B-Instruct")
# ─── Inference configuration ─────────────────────────────────────────────────
EPISODES_PER_TASK = 10
TEMPERATURE = 0.3
MAX_TOKENS = 500
OUTPUT_DIR = "outputs"
def get_openai_client() -> OpenAI:
"""Create OpenAI client with required env vars."""
if not API_KEY:
print("Error: HF_TOKEN or API_KEY environment variable not set.")
print("Set it with: export HF_TOKEN='your-token-here'")
sys.exit(1)
return OpenAI(api_key=API_KEY, base_url=API_BASE_URL)
def build_prompt(observation: CustomerSupportObservation, task_id: str) -> str:
"""Build the LLM prompt based on task difficulty and observation."""
if task_id == "easy":
task_instructions = (
"Categorize this support ticket into one of: billing, technical, account, shipping, general."
)
elif task_id == "medium":
task_instructions = (
"Categorize the ticket, assign a priority (low/medium/high/critical), "
"and route to the appropriate team (tier1/tier2/billing/technical/management)."
)
else: # hard
task_instructions = (
"Fully handle this ticket: categorize, prioritize, route to the right team, "
"draft a professional response, and decide whether to escalate."
)
return f"""You are a customer support AI assistant. {task_instructions}
TICKET INFORMATION:
- ID: {observation.ticket_id}
- Channel: {observation.channel}
- Timestamp: {observation.timestamp}
CUSTOMER MESSAGE:
{observation.customer_message}
CUSTOMER HISTORY:
- Account Age: {observation.account_age_days} days
- Total Tickets: {observation.total_tickets}
- Resolved Tickets: {observation.resolved_tickets}
- Satisfaction Score: {observation.satisfaction_score}/5.0
- Premium Customer: {"Yes" if observation.is_premium else "No"}
- Lifetime Value: ${observation.lifetime_value:.2f}
Based on this information, provide your response in JSON format with these fields:
{{
"category": "billing" | "technical" | "account" | "shipping" | "general",
"priority": "low" | "medium" | "high" | "critical",
"assigned_team": "tier1" | "tier2" | "billing" | "technical" | "management",
"response_draft": "Your professional response to the customer (minimum 20 characters)",
"internal_notes": "Brief internal notes for the team",
"escalate": true | false
}}
Respond with ONLY the JSON, no additional text."""
def parse_llm_response(content: str) -> Dict:
"""Parse LLM JSON response, handling markdown code blocks."""
content = content.strip()
if content.startswith("```"):
content = content.split("```")[1]
if content.startswith("json"):
content = content[4:]
content = content.strip()
return json.loads(content)
def get_action(
client: OpenAI, observation: CustomerSupportObservation, task_id: str
) -> CustomerSupportAction:
"""Get agent action using OpenAI-compatible API."""
prompt = build_prompt(observation, task_id)
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{
"role": "system",
"content": "You are a customer support expert. Always respond with valid JSON only.",
},
{"role": "user", "content": prompt},
],
temperature=TEMPERATURE,
max_tokens=MAX_TOKENS,
)
content = response.choices[0].message.content.strip()
action_dict = parse_llm_response(content)
return CustomerSupportAction(
category=action_dict.get("category", "general"),
priority=action_dict.get("priority", "medium"),
assigned_team=action_dict.get("assigned_team", "tier1"),
response_draft=action_dict.get(
"response_draft",
"Thank you for contacting support. We will review your request.",
),
internal_notes=action_dict.get("internal_notes"),
escalate=action_dict.get("escalate", False),
)
except Exception as e:
print(f" [WARN] LLM call failed: {e}")
# Return a reasonable fallback action
return CustomerSupportAction(
category="general",
priority="medium",
assigned_team="tier1",
response_draft="Thank you for contacting support. We will review your request and get back to you shortly.",
escalate=False,
)
def run_episode(
env: CustomerSupportEnvironment,
client: OpenAI,
task_id: str,
episode_num: int,
) -> Dict:
"""Run a single episode and return results."""
obs = env.reset()
action = get_action(client, obs, task_id)
obs = env.step(action)
result = {
"episode": episode_num,
"reward": obs.reward,
"grader_score": obs.metadata["grader_score"],
"ground_truth": obs.metadata["ground_truth"],
"agent_action": obs.metadata["agent_action"],
}
return result
def evaluate_task(task_id: str, client: OpenAI, num_episodes: int) -> Dict:
"""Evaluate the agent on a specific task difficulty."""
print(f"\n{'='*60}")
print(f" Task: {task_id.upper()} | Episodes: {num_episodes}")
print(f"{'='*60}")
env = CustomerSupportEnvironment(task_id=task_id, seed=42)
results: List[Dict] = []
for ep in range(num_episodes):
result = run_episode(env, client, task_id, ep + 1)
results.append(result)
print(
f" Episode {ep + 1}/{num_episodes} "
f"score={result['grader_score']:.3f} reward={result['reward']:.3f}"
)
scores = [r["grader_score"] for r in results]
rewards = [r["reward"] for r in results]
threshold = env.task_configs[task_id]["success_threshold"]
summary = {
"task_id": task_id,
"num_episodes": num_episodes,
"avg_score": sum(scores) / len(scores),
"avg_reward": sum(rewards) / len(rewards),
"min_score": min(scores),
"max_score": max(scores),
"success_rate": sum(1 for s in scores if s >= threshold) / len(scores),
"success_threshold": threshold,
"episodes": results,
}
print(f"\n Avg Score: {summary['avg_score']:.3f}")
print(f" Success Rate: {summary['success_rate']:.1%} (threshold {threshold})")
return summary
def main():
"""Main entry point — runs inference on all 3 tasks."""
print("=" * 60)
print(" Customer Support Env — Inference Script")
print(f" API_BASE_URL: {API_BASE_URL}")
print(f" MODEL_NAME: {MODEL_NAME}")
print("=" * 60)
# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Initialize OpenAI client
client = get_openai_client()
# Run all three tasks
all_results = {}
start_time = time.time()
for task_id in ["easy", "medium", "hard"]:
all_results[task_id] = evaluate_task(task_id, client, EPISODES_PER_TASK)
elapsed = time.time() - start_time
# Print summary
print(f"\n{'='*60}")
print(" SUMMARY")
print(f"{'='*60}")
for task_id, result in all_results.items():
print(
f" {task_id.upper():8s} | Score: {result['avg_score']:.3f} | "
f"Success: {result['success_rate']:.1%}"
)
print(f" Total time: {elapsed:.1f}s")
print(f"{'='*60}")
# Save results
output_path = os.path.join(OUTPUT_DIR, "inference_results.json")
with open(output_path, "w") as f:
json.dump(all_results, f, indent=2, default=str)
print(f"\nResults saved to: {output_path}")
if __name__ == "__main__":
main()