| import requests |
| import sys |
| import time |
| import subprocess |
| import os |
| from typing import Dict, List, Any |
|
|
| |
| sys.path.append(os.getcwd()) |
|
|
| def test_internal_logic(): |
| print("π [TEST] Internal Logic & Task Enumeration...") |
| try: |
| from backend.env import CustomerSupportEnv, TASKS |
| except ImportError as e: |
| print(f"β Error: Could not import environment components: {e}") |
| return False |
|
|
| env = CustomerSupportEnv() |
| |
| |
| tasks = env.get_tasks() |
| print(f"β
Found {len(tasks)} tasks via get_tasks().") |
| |
| if len(tasks) < 3: |
| print(f"β Error: Only found {len(tasks)} tasks, expected at least 3.") |
| return False |
| |
| |
| for task in tasks: |
| task_id = task.get('id') |
| required_keys = ['has_grader', 'has_evaluator', 'grader'] |
| for key in required_keys: |
| if task.get(key) is not True: |
| print(f"β Error: Task {task_id} {key} is NOT True.") |
| return False |
| |
| |
| mock_state = {"classification": "refund", "priority": "high", "status": "closed", "response": "sorry", "sentiment": "angry"} |
| ground_truth = {"expected_classification": "refund", "expected_priority": "high", "sentiment": "angry"} |
| try: |
| score = env.grade(tasks[0]['id'], [{"state": mock_state}], ground_truth) |
| print(f"β
Grading execution successful. Score: {score:.3f}") |
| if not (0.0 <= score <= 1.0): |
| print("β Error: Score out of range!") |
| return False |
| except Exception as e: |
| print(f"β Error: grade() method failed: {e}") |
| return False |
|
|
| print("β
Internal logic tests passed!\n") |
| return True |
|
|
| def test_endpoints(): |
| print("π [TEST] API Endpoints...") |
| |
| |
| cmd = [sys.executable, "-m", "uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7861"] |
| process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) |
| |
| time.sleep(5) |
| |
| try: |
| |
| r = requests.get("http://localhost:7861/tasks") |
| if r.status_code != 200 or len(r.json()) < 3: |
| print("β Error: /tasks endpoint failed.") |
| return False |
| |
| |
| task_id = r.json()[0]["id"] |
| r_grader = requests.get(f"http://localhost:7861/grader?task_id={task_id}") |
| if r_grader.status_code != 200 or "score" not in r_grader.json(): |
| print("β Error: /grader endpoint failed.") |
| return False |
| |
| print("β
API endpoint tests passed!") |
| return True |
| except Exception as e: |
| print(f"β Error during API test: {e}") |
| return False |
| finally: |
| process.terminate() |
|
|
| def main(): |
| print("π Starting consolidated validation...") |
| if test_internal_logic() and test_endpoints(): |
| print("\n⨠ALL VALIDATION CHECKS PASSED!") |
| sys.exit(0) |
| else: |
| print("\nβ VALIDATION FAILED.") |
| sys.exit(1) |
|
|
| if __name__ == "__main__": |
| main() |
|
|