FairRelay / brain /test_workflow.py

Upload folder using huggingface_hub

fcf8749 verified 10 days ago

20.8 kB

	#!/usr/bin/env python
	"""
	Production QA Automation CLI for Fair Dispatch LangGraph Backend.

	Validates the ENTIRE LangGraph-migrated system:
	- 5 Agent Nodes (ML Effort, Route Planner, Fairness, Liaison, Explainability)
	- Gemini 3 Flash explanations
	- Phases 1-8 functionality
	- Performance requirements

	Usage:
	python test_workflow.py --help
	python test_workflow.py --full-e2e
	python test_workflow.py --ev-stress
	python test_workflow.py --recovery-stress
	python test_workflow.py --gemini-only
	python test_workflow.py --timeline-validate
	python test_workflow.py --all
	"""

	import argparse
	import asyncio
	import json
	import sys
	import time
	from datetime import date, datetime
	from typing import Dict, List, Any, Optional
	from uuid import uuid4

	try:
	import httpx
	except ImportError:
	print("Error: httpx not installed. Run: pip install httpx")
	sys.exit(1)


	# =============================================================================
	# CONFIGURATION
	# =============================================================================

	API_BASE_URL = "http://localhost:8000/api/v1"
	LANGGRAPH_ENDPOINT = f"{API_BASE_URL}/allocate/langgraph"
	ORIGINAL_ENDPOINT = f"{API_BASE_URL}/allocate"

	# Performance thresholds
	MAX_DURATION_50_DRIVERS = 20.0 # seconds
	MAX_DURATION_10_DRIVERS = 5.0 # seconds


	# =============================================================================
	# TEST DATA FIXTURES
	# =============================================================================

	def generate_drivers(count: int, ev_ratio: float = 0.2, high_debt_ratio: float = 0.3) -> List[Dict]:
	"""Generate realistic driver test data."""
	drivers = []
	for i in range(count):
	is_ev = i < int(count * ev_ratio)
	has_high_debt = i < int(count * high_debt_ratio)

	drivers.append({
	"id": f"drv_{i+1:03d}",
	"name": f"Driver {i+1}",
	"vehicle_capacity_kg": 80.0 if is_ev else 120.0,
	"preferred_language": "ta" if i % 3 == 0 else "en",
	})
	return drivers


	def generate_packages(count: int, warehouse_lat: float = 13.0827, warehouse_lng: float = 80.2707) -> List[Dict]:
	"""Generate realistic package test data."""
	packages = []
	for i in range(count):
	# Spread packages in 10km radius around warehouse
	lat_offset = (i % 10 - 5) * 0.01
	lng_offset = (i // 10 % 10 - 5) * 0.01

	packages.append({
	"id": f"pkg_{i+1:04d}",
	"weight_kg": 2.0 + (i % 10) * 0.5,
	"fragility_level": (i % 5) + 1,
	"address": f"Address {i+1}, Chennai",
	"latitude": warehouse_lat + lat_offset,
	"longitude": warehouse_lng + lng_offset,
	"priority": ["NORMAL", "NORMAL", "EXPRESS", "NORMAL", "HIGH"][i % 5],
	})
	return packages


	def create_allocation_request(
	num_drivers: int = 10,
	num_packages: int = 50,
	ev_ratio: float = 0.2,
	allocation_date: str = None,
	) -> Dict:
	"""Create a complete allocation request."""
	if allocation_date is None:
	allocation_date = date.today().isoformat()

	return {
	"allocation_date": allocation_date,
	"drivers": generate_drivers(num_drivers, ev_ratio=ev_ratio),
	"packages": generate_packages(num_packages),
	"warehouse": {
	"lat": 13.0827,
	"lng": 80.2707,
	}
	}


	# Pre-defined test scenarios
	TEST_INPUTS = {
	"full_e2e": create_allocation_request(num_drivers=50, num_packages=250, ev_ratio=0.2),
	"small": create_allocation_request(num_drivers=5, num_packages=25, ev_ratio=0.2),
	"medium": create_allocation_request(num_drivers=10, num_packages=50, ev_ratio=0.2),
	"ev_stress": create_allocation_request(num_drivers=20, num_packages=100, ev_ratio=0.5),
	"recovery_stress": create_allocation_request(num_drivers=15, num_packages=75, ev_ratio=0.1),
	}


	# =============================================================================
	# GOLDEN OUTPUTS (Expected Responses)
	# =============================================================================

	GOLDEN_OUTPUTS = {
	"full_e2e": {
	"status_code": 200,
	"gini_index": {"min": 0.15, "max": 0.45},
	"num_assignments": 50,
	"max_duration_s": 25.0,
	"required_fields": ["allocation_run_id", "allocation_date", "global_fairness", "assignments"],
	"timeline_agents": ["ML_EFFORT", "ROUTE_PLANNER", "FAIRNESS_MANAGER"],
	},
	"small": {
	"status_code": 200,
	"gini_index": {"min": 0.10, "max": 0.50},
	"num_assignments": 5,
	"max_duration_s": 5.0,
	},
	"medium": {
	"status_code": 200,
	"gini_index": {"min": 0.10, "max": 0.50},
	"num_assignments": 10,
	"max_duration_s": 10.0,
	},
	}


	# =============================================================================
	# TEST RUNNER
	# =============================================================================

	class TestResult:
	"""Container for test results."""
	def __init__(self, name: str):
	self.name = name
	self.passed = True
	self.errors: List[str] = []
	self.warnings: List[str] = []
	self.duration_s: float = 0
	self.data: Dict = {}

	def fail(self, message: str):
	self.passed = False
	self.errors.append(message)

	def warn(self, message: str):
	self.warnings.append(message)

	def __str__(self):
	status = "✅ PASS" if self.passed else "❌ FAIL"
	msg = f"{status} {self.name} ({self.duration_s:.2f}s)"
	for err in self.errors:
	msg += f"\n ❌ {err}"
	for warn in self.warnings:
	msg += f"\n ⚠️ {warn}"
	# Show error response if available
	if not self.passed and self.data.get("error"):
	msg += f"\n 📋 Response: {self.data['error'][:300]}"
	return msg



	async def run_allocation_test(
	test_name: str,
	request_data: Dict,
	golden: Dict,
	endpoint: str = LANGGRAPH_ENDPOINT,
	enable_gemini: bool = False,
	) -> TestResult:
	"""Run a single allocation test."""
	result = TestResult(test_name)

	url = f"{endpoint}?enable_gemini={str(enable_gemini).lower()}"

	try:
	async with httpx.AsyncClient(timeout=60.0) as client:
	start = time.time()
	response = await client.post(url, json=request_data)
	result.duration_s = time.time() - start

	# Status code check
	if response.status_code != golden.get("status_code", 200):
	result.fail(f"Status code {response.status_code}, expected {golden['status_code']}")
	result.data["error"] = response.text[:500]
	return result

	data = response.json()
	result.data = data

	# Required fields check
	for field in golden.get("required_fields", ["allocation_run_id", "assignments"]):
	if field not in data:
	result.fail(f"Missing required field: {field}")

	# Assignments count check
	if "num_assignments" in golden:
	actual = len(data.get("assignments", []))
	expected = golden["num_assignments"]
	if actual != expected:
	result.fail(f"Assignment count {actual}, expected {expected}")

	# Gini index check
	if "gini_index" in golden:
	gini = data.get("global_fairness", {}).get("gini_index", 0)
	if not (golden["gini_index"]["min"] <= gini <= golden["gini_index"]["max"]):
	result.warn(f"Gini {gini:.3f} outside expected range [{golden['gini_index']['min']}, {golden['gini_index']['max']}]")

	# Performance check
	if "max_duration_s" in golden:
	if result.duration_s > golden["max_duration_s"]:
	result.fail(f"Duration {result.duration_s:.2f}s exceeds max {golden['max_duration_s']}s")

	# Gemini check
	if enable_gemini:
	for assignment in data.get("assignments", []):
	explanation = assignment.get("explanation", "")
	if len(explanation) < 10:
	result.warn(f"Short explanation for {assignment.get('driver_id')}")
	break

	except httpx.ConnectError:
	result.fail("Cannot connect to server. Is uvicorn running?")
	except httpx.TimeoutException:
	result.fail(f"Request timed out after 60s")
	except Exception as e:
	result.fail(f"Exception: {str(e)[:200]}")

	return result


	# =============================================================================
	# TEST SUITES
	# =============================================================================

	async def test_full_e2e() -> TestResult:
	"""Full end-to-end test with 50 drivers."""
	print("\n🧪 Running Full E2E Test (50 drivers, 250 packages)...")
	return await run_allocation_test(
	"Full E2E",
	TEST_INPUTS["full_e2e"],
	GOLDEN_OUTPUTS["full_e2e"],
	)


	async def test_small() -> TestResult:
	"""Quick sanity test with 5 drivers."""
	print("\n🧪 Running Small Test (5 drivers)...")
	return await run_allocation_test(
	"Small",
	TEST_INPUTS["small"],
	GOLDEN_OUTPUTS["small"],
	)


	async def test_medium() -> TestResult:
	"""Medium test with 10 drivers."""
	print("\n🧪 Running Medium Test (10 drivers)...")
	return await run_allocation_test(
	"Medium",
	TEST_INPUTS["medium"],
	GOLDEN_OUTPUTS["medium"],
	)


	async def test_ev_stress() -> TestResult:
	"""EV stress test with 50% EV drivers."""
	print("\n🧪 Running EV Stress Test (50% EV drivers)...")
	return await run_allocation_test(
	"EV Stress",
	TEST_INPUTS["ev_stress"],
	{"status_code": 200, "num_assignments": 20, "max_duration_s": 15.0},
	)


	async def test_recovery_stress() -> TestResult:
	"""Recovery stress test."""
	print("\n🧪 Running Recovery Stress Test...")
	return await run_allocation_test(
	"Recovery Stress",
	TEST_INPUTS["recovery_stress"],
	{"status_code": 200, "num_assignments": 15, "max_duration_s": 15.0},
	)


	async def test_gemini_explanations() -> TestResult:
	"""Test Gemini-powered explanations."""
	print("\n🧪 Running Gemini Explanations Test...")
	result = await run_allocation_test(
	"Gemini Explanations",
	TEST_INPUTS["small"],
	{"status_code": 200, "num_assignments": 5, "max_duration_s": 30.0},
	enable_gemini=True,
	)

	# Additional Gemini-specific validations
	if result.passed and result.data:
	languages_seen = set()
	for assignment in result.data.get("assignments", []):
	explanation = assignment.get("explanation", "")
	if explanation:
	# Check if Tamil characters present
	if any('\u0B80' <= c <= '\u0BFF' for c in explanation):
	languages_seen.add("ta")
	else:
	languages_seen.add("en")

	result.data["languages_detected"] = list(languages_seen)
	print(f" Languages detected: {languages_seen}")

	return result


	async def test_api_equivalence() -> TestResult:
	"""Compare LangGraph vs Original endpoint responses."""
	print("\n🧪 Running API Equivalence Test...")
	result = TestResult("API Equivalence")

	request = TEST_INPUTS["small"]

	try:
	async with httpx.AsyncClient(timeout=30.0) as client:
	# LangGraph endpoint
	start1 = time.time()
	resp1 = await client.post(LANGGRAPH_ENDPOINT, json=request)
	time1 = time.time() - start1

	# Original endpoint
	start2 = time.time()
	resp2 = await client.post(ORIGINAL_ENDPOINT, json=request)
	time2 = time.time() - start2

	result.duration_s = time1 + time2

	if resp1.status_code != resp2.status_code:
	result.fail(f"Status mismatch: LangGraph={resp1.status_code}, Original={resp2.status_code}")
	return result

	data1 = resp1.json()
	data2 = resp2.json()

	# Compare structure
	if set(data1.keys()) != set(data2.keys()):
	result.warn(f"Response keys differ: {set(data1.keys())} vs {set(data2.keys())}")

	# Compare assignment counts
	if len(data1.get("assignments", [])) != len(data2.get("assignments", [])):
	result.fail(f"Assignment count mismatch: {len(data1['assignments'])} vs {len(data2['assignments'])}")

	print(f" LangGraph: {time1:.2f}s, Original: {time2:.2f}s")
	result.data = {"langgraph_time": time1, "original_time": time2}

	except Exception as e:
	result.fail(f"Exception: {str(e)[:200]}")

	return result


	async def test_timeline_validate() -> TestResult:
	"""Validate Phase 5 decision timeline."""
	print("\n🧪 Running Timeline Validation Test...")
	result = TestResult("Timeline Validation")

	# This would require querying the database for DecisionLog entries
	# For now, we verify the allocation completes successfully

	try:
	async with httpx.AsyncClient(timeout=30.0) as client:
	start = time.time()
	resp = await client.post(LANGGRAPH_ENDPOINT, json=TEST_INPUTS["small"])
	result.duration_s = time.time() - start

	if resp.status_code == 200:
	data = resp.json()
	allocation_id = data.get("allocation_run_id")

	# Query timeline endpoint if available
	timeline_resp = await client.get(f"{API_BASE_URL}/admin/runs/{allocation_id}/timeline")
	if timeline_resp.status_code == 200:
	timeline = timeline_resp.json()
	agents = [entry.get("agent_name") for entry in timeline.get("timeline", [])]

	expected_agents = ["ML_EFFORT", "ROUTE_PLANNER", "FAIRNESS_MANAGER"]
	for agent in expected_agents:
	if agent not in agents:
	result.warn(f"Expected agent {agent} not in timeline")

	result.data = {"timeline_agents": agents}
	print(f" Timeline agents: {agents}")
	else:
	result.warn("Timeline endpoint not available (may need admin access)")
	else:
	result.fail(f"Allocation failed: {resp.status_code}")

	except Exception as e:
	result.fail(f"Exception: {str(e)[:200]}")

	return result


	async def test_health_check() -> TestResult:
	"""Basic health check."""
	print("\n🧪 Running Health Check...")
	result = TestResult("Health Check")

	try:
	async with httpx.AsyncClient(timeout=5.0) as client:
	start = time.time()
	resp = await client.get(f"{API_BASE_URL.replace('/api/v1', '')}/health")
	result.duration_s = time.time() - start

	if resp.status_code != 200:
	result.fail(f"Health check failed: {resp.status_code}")
	else:
	result.data = resp.json()
	print(f" Status: {result.data}")

	except httpx.ConnectError:
	result.fail("Cannot connect to server. Is uvicorn running?")
	except Exception as e:
	result.fail(f"Exception: {str(e)}")

	return result


	# =============================================================================
	# CLI INTERFACE
	# =============================================================================

	async def run_tests(args) -> int:
	"""Run selected tests based on CLI arguments."""
	results: List[TestResult] = []

	# Always run health check first
	health = await test_health_check()
	results.append(health)

	if not health.passed:
	print("\n❌ Health check failed. Is the server running?")
	print(" Start with: uvicorn app.main:app --reload")
	return 1

	# Run selected tests
	if args.all:
	results.append(await test_small())
	results.append(await test_medium())
	results.append(await test_api_equivalence())
	results.append(await test_ev_stress())
	results.append(await test_timeline_validate())
	if args.gemini:
	results.append(await test_gemini_explanations())
	if args.full:
	results.append(await test_full_e2e())
	else:
	if args.full_e2e:
	results.append(await test_full_e2e())
	if args.small:
	results.append(await test_small())
	if args.medium:
	results.append(await test_medium())
	if args.ev_stress:
	results.append(await test_ev_stress())
	if args.recovery_stress:
	results.append(await test_recovery_stress())
	if args.gemini_only:
	results.append(await test_gemini_explanations())
	if args.timeline_validate:
	results.append(await test_timeline_validate())
	if args.equivalence:
	results.append(await test_api_equivalence())

	# Print summary
	print("\n" + "=" * 60)
	print("TEST RESULTS SUMMARY")
	print("=" * 60)

	passed = 0
	failed = 0
	for r in results:
	print(r)
	if r.passed:
	passed += 1
	else:
	failed += 1

	print("=" * 60)
	print(f"Total: {len(results)} \| Passed: {passed} \| Failed: {failed}")

	if failed == 0:
	print("✅ All tests passed!")
	return 0
	else:
	print("❌ Some tests failed.")
	return 1


	def main():
	parser = argparse.ArgumentParser(
	description="Fair Dispatch LangGraph Backend QA Automation",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	python test_workflow.py --small # Quick 5-driver test
	python test_workflow.py --medium # 10-driver test
	python test_workflow.py --full-e2e # Full 50-driver test
	python test_workflow.py --all # Run all tests
	python test_workflow.py --gemini-only # Test Gemini explanations
	python test_workflow.py --equivalence # Compare LangGraph vs Original
	"""
	)

	parser.add_argument("--full-e2e", action="store_true", help="Full E2E test (50 drivers)")
	parser.add_argument("--small", action="store_true", help="Quick sanity test (5 drivers)")
	parser.add_argument("--medium", action="store_true", help="Medium test (10 drivers)")
	parser.add_argument("--ev-stress", action="store_true", help="EV stress test (50%% EV)")
	parser.add_argument("--recovery-stress", action="store_true", help="Recovery stress test")
	parser.add_argument("--gemini-only", action="store_true", help="Test Gemini explanations")
	parser.add_argument("--timeline-validate", action="store_true", help="Validate Phase 5 timeline")
	parser.add_argument("--equivalence", action="store_true", help="Compare LangGraph vs Original")
	parser.add_argument("--all", action="store_true", help="Run all tests")
	parser.add_argument("--gemini", action="store_true", help="Include Gemini tests in --all")
	parser.add_argument("--full", action="store_true", help="Include full E2E in --all")
	parser.add_argument("--url", type=str, default="http://localhost:8000", help="API base URL")

	args = parser.parse_args()

	# Update URL if provided
	global API_BASE_URL, LANGGRAPH_ENDPOINT, ORIGINAL_ENDPOINT
	if args.url != "http://localhost:8000":
	API_BASE_URL = f"{args.url}/api/v1"
	LANGGRAPH_ENDPOINT = f"{API_BASE_URL}/allocate/langgraph"
	ORIGINAL_ENDPOINT = f"{API_BASE_URL}/allocate"

	# Default to --small if no tests specified
	if not any([args.full_e2e, args.small, args.medium, args.ev_stress,
	args.recovery_stress, args.gemini_only, args.timeline_validate,
	args.equivalence, args.all]):
	args.small = True

	print("=" * 60)
	print("Fair Dispatch LangGraph QA Automation")
	print(f"Target: {API_BASE_URL}")
	print("=" * 60)

	exit_code = asyncio.run(run_tests(args))
	sys.exit(exit_code)


	if __name__ == "__main__":
	main()