hallucination-detector-project / comprehensive_server_test.py

Upload comprehensive_server_test.py with huggingface_hub

f31ad07 verified 8 months ago

5.58 kB

	#!/usr/bin/env python3
	"""
	Comprehensive server functionality test
	Tests all endpoints and core features
	"""
	import requests
	import json
	import time
	import sys

	def test_endpoint(name, url, method="GET", data=None, headers=None):
	"""Test a single endpoint"""
	try:
	print(f"\n🔍 Testing {name}...")
	print(f" URL: {url}")

	if method == "POST":
	response = requests.post(url, json=data, headers=headers, timeout=30)
	else:
	response = requests.get(url, timeout=10)

	print(f" Status: {response.status_code}")

	if response.status_code == 200:
	try:
	result = response.json()
	print(f" ✅ SUCCESS")
	return True, result
	except:
	print(f" ✅ SUCCESS (Non-JSON response)")
	return True, response.text[:200]
	else:
	print(f" ❌ FAILED: {response.text[:200]}")
	return False, response.text

	except requests.exceptions.ConnectionError:
	print(f" ❌ CONNECTION FAILED - Server not running?")
	return False, "Connection failed"
	except Exception as e:
	print(f" ❌ ERROR: {str(e)}")
	return False, str(e)

	def comprehensive_server_test():
	"""Run comprehensive server tests"""

	base_url = "http://localhost:8000"
	print("🚀 COMPREHENSIVE SERVER FUNCTIONALITY TEST")
	print("=" * 60)

	results = {}

	# 1. Test Status/Health Endpoint
	success, data = test_endpoint("Health Check", f"{base_url}/api/status")
	results["health"] = success

	# 2. Test Main Landing Page
	success, data = test_endpoint("Main Page", f"{base_url}/")
	results["main_page"] = success

	# 3. Test Dashboard
	success, data = test_endpoint("Dashboard", f"{base_url}/dashboard")
	results["dashboard"] = success

	# 4. Test API Documentation
	success, data = test_endpoint("API Docs", f"{base_url}/docs")
	results["api_docs"] = success

	# 5. Test Playground
	success, data = test_endpoint("Playground", f"{base_url}/playground")
	results["playground"] = success

	# 6. Test CORE PREDICTION - Valid Response (No Hallucination)
	test_data_valid = {
	"prompt": "iPhone 15 Pro: A17 Pro chip",
	"response": "iPhone 15 Pro has A17 Pro chip",
	"question": "iPhone 15 Pro: A17 Pro chip"
	}
	success, data = test_endpoint(
	"Prediction API - Valid Response",
	f"{base_url}/api/predict",
	"POST",
	test_data_valid,
	{"Content-Type": "application/json"}
	)
	results["prediction_valid"] = success
	if success:
	print(f" Result: is_hallucination={data.get('is_hallucination')}, confidence={data.get('confidence_score')}")

	# 7. Test CORE PREDICTION - Hallucination Detection
	test_data_invalid = {
	"prompt": "iPhone 15 Pro: A17 Pro chip",
	"response": "iPhone 15 Pro has M1 chip", # Wrong chip = hallucination
	"question": "iPhone 15 Pro: A17 Pro chip"
	}
	success, data = test_endpoint(
	"Prediction API - Hallucination Detection",
	f"{base_url}/api/predict",
	"POST",
	test_data_invalid,
	{"Content-Type": "application/json"}
	)
	results["prediction_hallucination"] = success
	if success:
	print(f" Result: is_hallucination={data.get('is_hallucination')}, confidence={data.get('confidence_score')}")

	# 8. Test Batch Prediction
	batch_data = {
	"items": [
	{
	"prompt": "iPhone 15 Pro: A17 Pro chip",
	"response": "iPhone 15 Pro has A17 Pro chip",
	"question": "iPhone 15 Pro: A17 Pro chip"
	},
	{
	"prompt": "iPhone 14: A16 Bionic chip",
	"response": "iPhone 14 features A16 Bionic processor",
	"question": "iPhone 14: A16 Bionic chip"
	}
	]
	}
	success, data = test_endpoint(
	"Batch Prediction API",
	f"{base_url}/api/batch_predict",
	"POST",
	batch_data,
	{"Content-Type": "application/json"}
	)
	results["batch_prediction"] = success

	# 9. Test Model Training Status
	success, data = test_endpoint("Training Status", f"{base_url}/api/train/status")
	results["training_status"] = success

	# 10. Test Monitor/Metrics
	success, data = test_endpoint("Monitor/Metrics", f"{base_url}/api/monitor/metrics")
	results["monitor"] = success

	# Summary
	print("\n" + "=" * 60)
	print("📊 TEST RESULTS SUMMARY")
	print("=" * 60)

	passed = sum(1 for result in results.values() if result)
	total = len(results)

	for test_name, result in results.items():
	status = "✅ PASS" if result else "❌ FAIL"
	print(f"{test_name:25} : {status}")

	print(f"\n🎯 OVERALL RESULT: {passed}/{total} tests passed")

	if passed == total:
	print("🎉 ALL TESTS PASSED - SERVER IS FULLY FUNCTIONAL!")
	return True
	elif passed >= total * 0.8: # 80% pass rate
	print("⚠️ MOSTLY FUNCTIONAL - Some minor issues")
	return True
	else:
	print("❌ MAJOR ISSUES DETECTED - Server needs attention")
	return False

	if __name__ == "__main__":
	comprehensive_server_test()