|
|
| """
|
| Debug the rule-based detection system
|
| """
|
|
|
| import requests
|
| import json
|
|
|
| BASE_URL = "http://localhost:8000"
|
|
|
| def debug_rule_detection():
|
| """Debug rule detection for specific cases"""
|
|
|
| print("π DEBUGGING RULE-BASED DETECTION")
|
| print("=" * 50)
|
|
|
| test_cases = [
|
| {
|
| "name": "Geography - Wrong Capital",
|
| "prompt": "Question: What is the capital of France?",
|
| "response": "London is the capital of France",
|
| "question": "What is the capital of France?",
|
| "expected_rule": "Should detect geography contradiction"
|
| },
|
| {
|
| "name": "Science - Wrong Heart Chambers",
|
| "prompt": "Question: How many chambers does the human heart have?",
|
| "response": "The human heart has 6 chambers",
|
| "question": "How many chambers does the human heart have?",
|
| "expected_rule": "Should detect science contradiction"
|
| },
|
| {
|
| "name": "Physics - Wrong Planet Size",
|
| "prompt": "Question: Which is larger, Earth or Jupiter?",
|
| "response": "Earth is larger than Jupiter",
|
| "question": "Which is larger, Earth or Jupiter?",
|
| "expected_rule": "Should detect physics contradiction"
|
| }
|
| ]
|
|
|
| for i, test in enumerate(test_cases, 1):
|
| print(f"\nπ§ͺ Debug Test {i}: {test['name']}")
|
| print(f"β Question: {test['question']}")
|
| print(f"π Response: {test['response']}")
|
| print(f"π Expected Rule: {test['expected_rule']}")
|
|
|
| payload = {
|
| "prompt": test["prompt"],
|
| "response": test["response"],
|
| "question": test["question"],
|
| "use_cache": False
|
| }
|
|
|
| try:
|
|
|
| response = requests.post(f"{BASE_URL}/api/debug-predict", json=payload, timeout=10)
|
| if response.status_code == 200:
|
| result = response.json()
|
| print(f"π€ Raw Model Output: '{result.get('raw_model_output', '')}'")
|
| print(f"π Is Hallucination: {result.get('is_hallucination', False)}")
|
| print(f"π Confidence: {result.get('confidence_score', 0):.1%}")
|
|
|
|
|
| response2 = requests.post(f"{BASE_URL}/api/predict", json=payload, timeout=10)
|
| if response2.status_code == 200:
|
| result2 = response2.json()
|
| print(f"π§ Final Is Hallucination: {result2.get('is_hallucination', False)}")
|
| print(f"π§ Final Confidence: {result2.get('confidence_score', 0):.1%}")
|
|
|
|
|
| if abs(result.get('confidence_score', 0) - result2.get('confidence_score', 0)) > 0.01:
|
| print("β
RULE ASSISTANCE DETECTED")
|
| else:
|
| print("β NO RULE ASSISTANCE")
|
| else:
|
| print(f"β Error: {response.status_code}")
|
|
|
| except Exception as e:
|
| print(f"β Error: {e}")
|
|
|
| if __name__ == "__main__":
|
| debug_rule_detection()
|
|
|