Spaces:

ravindrakapse
/

customer_support_env

Sleeping

File size: 8,998 Bytes

385ccc1

#!/usr/bin/env python3
"""
Inference Script for Customer Support Environment
===================================
MANDATORY
- Before submitting, ensure the following variables are defined in your environment configuration:
    API_BASE_URL   The API endpoint for the LLM.
    MODEL_NAME     The model identifier to use for inference.
    HF_TOKEN       Your Hugging Face / API key.

- The inference script must be named `inference.py` and placed in the root directory of the project
- Participants must use OpenAI Client for all LLM calls using above variables
"""

import os
import sys
import json
import time
from typing import Dict, List

from openai import OpenAI

# Import environment components
from server.customer_support_env_environment import CustomerSupportEnvironment
from models import CustomerSupportAction, CustomerSupportObservation

# ─── Required environment variables ───────────────────────────────────────────
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.3-70B-Instruct")

# ─── Inference configuration ─────────────────────────────────────────────────
EPISODES_PER_TASK = 10
TEMPERATURE = 0.3
MAX_TOKENS = 500
OUTPUT_DIR = "outputs"


def get_openai_client() -> OpenAI:
    """Create OpenAI client with required env vars."""
    if not API_KEY:
        print("Error: HF_TOKEN or API_KEY environment variable not set.")
        print("Set it with: export HF_TOKEN='your-token-here'")
        sys.exit(1)
    return OpenAI(api_key=API_KEY, base_url=API_BASE_URL)


def build_prompt(observation: CustomerSupportObservation, task_id: str) -> str:
    """Build the LLM prompt based on task difficulty and observation."""
    if task_id == "easy":
        task_instructions = (
            "Categorize this support ticket into one of: billing, technical, account, shipping, general."
        )
    elif task_id == "medium":
        task_instructions = (
            "Categorize the ticket, assign a priority (low/medium/high/critical), "
            "and route to the appropriate team (tier1/tier2/billing/technical/management)."
        )
    else:  # hard
        task_instructions = (
            "Fully handle this ticket: categorize, prioritize, route to the right team, "
            "draft a professional response, and decide whether to escalate."
        )

    return f"""You are a customer support AI assistant. {task_instructions}

TICKET INFORMATION:
- ID: {observation.ticket_id}
- Channel: {observation.channel}
- Timestamp: {observation.timestamp}

CUSTOMER MESSAGE:
{observation.customer_message}

CUSTOMER HISTORY:
- Account Age: {observation.account_age_days} days
- Total Tickets: {observation.total_tickets}
- Resolved Tickets: {observation.resolved_tickets}
- Satisfaction Score: {observation.satisfaction_score}/5.0
- Premium Customer: {"Yes" if observation.is_premium else "No"}
- Lifetime Value: ${observation.lifetime_value:.2f}

Based on this information, provide your response in JSON format with these fields:
{{
  "category": "billing" | "technical" | "account" | "shipping" | "general",
  "priority": "low" | "medium" | "high" | "critical",
  "assigned_team": "tier1" | "tier2" | "billing" | "technical" | "management",
  "response_draft": "Your professional response to the customer (minimum 20 characters)",
  "internal_notes": "Brief internal notes for the team",
  "escalate": true | false
}}

Respond with ONLY the JSON, no additional text."""


def parse_llm_response(content: str) -> Dict:
    """Parse LLM JSON response, handling markdown code blocks."""
    content = content.strip()
    if content.startswith("```"):
        content = content.split("```")[1]
        if content.startswith("json"):
            content = content[4:]
        content = content.strip()
    return json.loads(content)


def get_action(
    client: OpenAI, observation: CustomerSupportObservation, task_id: str
) -> CustomerSupportAction:
    """Get agent action using OpenAI-compatible API."""
    prompt = build_prompt(observation, task_id)

    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {
                    "role": "system",
                    "content": "You are a customer support expert. Always respond with valid JSON only.",
                },
                {"role": "user", "content": prompt},
            ],
            temperature=TEMPERATURE,
            max_tokens=MAX_TOKENS,
        )

        content = response.choices[0].message.content.strip()
        action_dict = parse_llm_response(content)

        return CustomerSupportAction(
            category=action_dict.get("category", "general"),
            priority=action_dict.get("priority", "medium"),
            assigned_team=action_dict.get("assigned_team", "tier1"),
            response_draft=action_dict.get(
                "response_draft",
                "Thank you for contacting support. We will review your request.",
            ),
            internal_notes=action_dict.get("internal_notes"),
            escalate=action_dict.get("escalate", False),
        )

    except Exception as e:
        print(f"  [WARN] LLM call failed: {e}")
        # Return a reasonable fallback action
        return CustomerSupportAction(
            category="general",
            priority="medium",
            assigned_team="tier1",
            response_draft="Thank you for contacting support. We will review your request and get back to you shortly.",
            escalate=False,
        )


def run_episode(
    env: CustomerSupportEnvironment,
    client: OpenAI,
    task_id: str,
    episode_num: int,
) -> Dict:
    """Run a single episode and return results."""
    obs = env.reset()
    action = get_action(client, obs, task_id)
    obs = env.step(action)

    result = {
        "episode": episode_num,
        "reward": obs.reward,
        "grader_score": obs.metadata["grader_score"],
        "ground_truth": obs.metadata["ground_truth"],
        "agent_action": obs.metadata["agent_action"],
    }
    return result


def evaluate_task(task_id: str, client: OpenAI, num_episodes: int) -> Dict:
    """Evaluate the agent on a specific task difficulty."""
    print(f"\n{'='*60}")
    print(f"  Task: {task_id.upper()}  |  Episodes: {num_episodes}")
    print(f"{'='*60}")

    env = CustomerSupportEnvironment(task_id=task_id, seed=42)
    results: List[Dict] = []

    for ep in range(num_episodes):
        result = run_episode(env, client, task_id, ep + 1)
        results.append(result)
        print(
            f"  Episode {ep + 1}/{num_episodes}  "
            f"score={result['grader_score']:.3f}  reward={result['reward']:.3f}"
        )

    scores = [r["grader_score"] for r in results]
    rewards = [r["reward"] for r in results]
    threshold = env.task_configs[task_id]["success_threshold"]

    summary = {
        "task_id": task_id,
        "num_episodes": num_episodes,
        "avg_score": sum(scores) / len(scores),
        "avg_reward": sum(rewards) / len(rewards),
        "min_score": min(scores),
        "max_score": max(scores),
        "success_rate": sum(1 for s in scores if s >= threshold) / len(scores),
        "success_threshold": threshold,
        "episodes": results,
    }

    print(f"\n  Avg Score:    {summary['avg_score']:.3f}")
    print(f"  Success Rate: {summary['success_rate']:.1%} (threshold {threshold})")
    return summary


def main():
    """Main entry point — runs inference on all 3 tasks."""
    print("=" * 60)
    print("  Customer Support Env — Inference Script")
    print(f"  API_BASE_URL: {API_BASE_URL}")
    print(f"  MODEL_NAME:   {MODEL_NAME}")
    print("=" * 60)

    # Create output directory
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # Initialize OpenAI client
    client = get_openai_client()

    # Run all three tasks
    all_results = {}
    start_time = time.time()

    for task_id in ["easy", "medium", "hard"]:
        all_results[task_id] = evaluate_task(task_id, client, EPISODES_PER_TASK)

    elapsed = time.time() - start_time

    # Print summary
    print(f"\n{'='*60}")
    print("  SUMMARY")
    print(f"{'='*60}")
    for task_id, result in all_results.items():
        print(
            f"  {task_id.upper():8s} | Score: {result['avg_score']:.3f} | "
            f"Success: {result['success_rate']:.1%}"
        )
    print(f"  Total time: {elapsed:.1f}s")
    print(f"{'='*60}")

    # Save results
    output_path = os.path.join(OUTPUT_DIR, "inference_results.json")
    with open(output_path, "w") as f:
        json.dump(all_results, f, indent=2, default=str)
    print(f"\nResults saved to: {output_path}")


if __name__ == "__main__":
    main()