File size: 3,223 Bytes
62fbd09 dc97fe1 62fbd09 dc97fe1 62fbd09 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import requests
import sys
import time
import subprocess
import os
from typing import Dict, List, Any
# Ensure we can import from the current directory
sys.path.append(os.getcwd())
def test_internal_logic():
print("π [TEST] Internal Logic & Task Enumeration...")
try:
from backend.env import CustomerSupportEnv, TASKS
except ImportError as e:
print(f"β Error: Could not import environment components: {e}")
return False
env = CustomerSupportEnv()
# 1. Check get_tasks exists and returns correct number
tasks = env.get_tasks()
print(f"β
Found {len(tasks)} tasks via get_tasks().")
if len(tasks) < 3:
print(f"β Error: Only found {len(tasks)} tasks, expected at least 3.")
return False
# 2. Check each task has required metadata
for task in tasks:
task_id = task.get('id')
required_keys = ['has_grader', 'has_evaluator', 'grader']
for key in required_keys:
if task.get(key) is not True:
print(f"β Error: Task {task_id} {key} is NOT True.")
return False
# 3. Test Grading
mock_state = {"classification": "refund", "priority": "high", "status": "closed", "response": "sorry", "sentiment": "angry"}
ground_truth = {"expected_classification": "refund", "expected_priority": "high", "sentiment": "angry"}
try:
score = env.grade(tasks[0]['id'], [{"state": mock_state}], ground_truth)
print(f"β
Grading execution successful. Score: {score:.3f}")
if not (0.0 <= score <= 1.0):
print("β Error: Score out of range!")
return False
except Exception as e:
print(f"β Error: grade() method failed: {e}")
return False
print("β
Internal logic tests passed!\n")
return True
def test_endpoints():
print("π [TEST] API Endpoints...")
# Start the server
cmd = [sys.executable, "-m", "uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7861"]
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
time.sleep(5) # Wait for server
try:
# Test /tasks
r = requests.get("http://localhost:7861/tasks")
if r.status_code != 200 or len(r.json()) < 3:
print("β Error: /tasks endpoint failed.")
return False
# Test /grader
task_id = r.json()[0]["id"]
r_grader = requests.get(f"http://localhost:7861/grader?task_id={task_id}")
if r_grader.status_code != 200 or "score" not in r_grader.json():
print("β Error: /grader endpoint failed.")
return False
print("β
API endpoint tests passed!")
return True
except Exception as e:
print(f"β Error during API test: {e}")
return False
finally:
process.terminate()
def main():
print("π Starting consolidated validation...")
if test_internal_logic() and test_endpoints():
print("\n⨠ALL VALIDATION CHECKS PASSED!")
sys.exit(0)
else:
print("\nβ VALIDATION FAILED.")
sys.exit(1)
if __name__ == "__main__":
main()
|