vqa-backend / test_vqa_enhancements.py
Deva8's picture
Deploy VQA Space with model downloader
bb8f662
"""
Test Suite for VQA Enhancements
Tests LLM Reasoning Engine and Conversational VQA features
"""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def test_llm_reasoning():
"""Test LLM Reasoning Service"""
print("=" * 80)
print("πŸ§ͺ TEST 1: LLM Reasoning Engine")
print("=" * 80)
try:
from llm_reasoning_service import get_llm_reasoning_service
service = get_llm_reasoning_service()
print("βœ“ LLM Reasoning Service initialized\n")
print("πŸ“ Test Case 1.1: Can a candle melt?")
result = service.reason_with_facts(
object_name="candle",
facts={
"materials": ["wax", "wick"],
"categories": ["light source", "household item"]
},
question="Can this melt?"
)
print(f" Answer: {result['answer']}")
print(f" Reasoning Chain:")
for i, step in enumerate(result['reasoning_chain'], 1):
print(f" {i}. {step}")
print(f" Confidence: {result['confidence']}")
print(f" Status: {result['status']}")
assert result['answer'], "Answer should not be empty"
assert result['confidence'] > 0, "Confidence should be positive"
print(" βœ“ Test passed\n")
print("πŸ“ Test Case 1.2: Would ice cream survive in the desert?")
result = service.reason_with_facts(
object_name="ice cream",
facts={
"materials": ["milk", "sugar", "cream"],
"categories": ["frozen dessert", "food"],
"properties": ["cold", "frozen"]
},
question="Would this survive in the desert?"
)
print(f" Answer: {result['answer']}")
print(f" Reasoning Chain:")
for i, step in enumerate(result['reasoning_chain'], 1):
print(f" {i}. {step}")
print(f" Confidence: {result['confidence']}")
print(f" Status: {result['status']}")
assert result['answer'], "Answer should not be empty"
print(" βœ“ Test passed\n")
print("πŸ“ Test Case 1.3: Is an apple edible?")
result = service.reason_with_facts(
object_name="apple",
facts={
"categories": ["fruit", "food"],
"properties": ["nutritious", "healthy"]
},
question="Is this edible?"
)
print(f" Answer: {result['answer']}")
print(f" Confidence: {result['confidence']}")
print(" βœ“ Test passed\n")
print("βœ… LLM Reasoning Engine: ALL TESTS PASSED\n")
return True
except ValueError as e:
print(f"⚠️ LLM Reasoning tests skipped: {e}")
print(" (Set GROQ_API_KEY to run these tests)\n")
return False
except Exception as e:
print(f"❌ LLM Reasoning tests failed: {e}\n")
return False
def test_conversation_manager():
"""Test Conversation Manager"""
print("=" * 80)
print("πŸ§ͺ TEST 2: Conversation Manager")
print("=" * 80)
try:
from conversation_manager import ConversationManager
manager = ConversationManager(session_timeout_minutes=30)
print("βœ“ Conversation Manager initialized\n")
print("πŸ“ Test Case 2.1: Multi-turn conversation")
session_id = manager.create_session("test_image.jpg")
print(f" Created session: {session_id}")
manager.add_turn(
session_id=session_id,
question="What is this?",
answer="apple",
objects_detected=["apple"]
)
print(" Turn 1: 'What is this?' β†’ 'apple'")
session = manager.get_session(session_id)
question_2 = "Is it healthy?"
resolved_2 = manager.resolve_references(question_2, session)
print(f" Turn 2: '{question_2}' β†’ Resolved: '{resolved_2}'")
assert "apple" in resolved_2.lower() or resolved_2 == "Is apple healthy?", \
"Pronoun 'it' should be resolved to 'apple'"
manager.add_turn(
session_id=session_id,
question=question_2,
answer="Yes, apples are healthy",
objects_detected=["apple"]
)
question_3 = "What color is it?"
resolved_3 = manager.resolve_references(question_3, session)
print(f" Turn 3: '{question_3}' β†’ Resolved: '{resolved_3}'")
assert "apple" in resolved_3.lower(), \
"Pronoun 'it' should still resolve to 'apple'"
print(" βœ“ Pronoun resolution working\n")
print("πŸ“ Test Case 2.2: Context retrieval")
context = manager.get_context_for_question(session_id, "Another question")
print(f" Turn number: {context['turn_number']}")
print(f" Previous objects: {context['previous_objects']}")
print(f" Has context: {context['has_context']}")
assert context['turn_number'] == 4, "Should be on turn 4"
assert context['has_context'], "Should have context"
assert "apple" in context['previous_objects'], "Should remember apple"
print(" βœ“ Context tracking working\n")
print("πŸ“ Test Case 2.3: Conversation history")
history = manager.get_history(session_id)
print(f" Total turns: {len(history)}")
for i, turn in enumerate(history, 1):
print(f" Turn {i}: Q: {turn['question']} | A: {turn['answer']}")
assert len(history) == 3, "Should have 3 turns"
print(" βœ“ History retrieval working\n")
print("πŸ“ Test Case 2.4: Session deletion")
deleted = manager.delete_session(session_id)
assert deleted, "Session should be deleted"
session_after = manager.get_session(session_id)
assert session_after is None, "Session should not exist after deletion"
print(" βœ“ Session deletion working\n")
print("βœ… Conversation Manager: ALL TESTS PASSED\n")
return True
except Exception as e:
print(f"❌ Conversation Manager tests failed: {e}\n")
import traceback
traceback.print_exc()
return False
def test_integration():
"""Test integration of features"""
print("=" * 80)
print("πŸ§ͺ TEST 3: Integration Tests")
print("=" * 80)
try:
from semantic_neurosymbolic_vqa import SemanticNeurosymbolicVQA
print("πŸ“ Test Case 3.1: Semantic VQA with LLM reasoning")
vqa = SemanticNeurosymbolicVQA(device='cpu')
if vqa.llm_enabled:
print(" βœ“ LLM reasoning integrated into Semantic VQA")
else:
print(" ⚠️ LLM reasoning not available (fallback mode)")
print()
print("πŸ“ Test Case 3.2: Ensemble VQA with conversation support")
from ensemble_vqa_app import ProductionEnsembleVQA
print(" βœ“ Ensemble VQA imports successfully")
print(" (Full test requires model checkpoints)\n")
print("βœ… Integration: TESTS PASSED\n")
return True
except Exception as e:
print(f"❌ Integration tests failed: {e}\n")
import traceback
traceback.print_exc()
return False
def run_all_tests():
"""Run all test suites"""
print("\n" + "=" * 80)
print("πŸš€ VQA ENHANCEMENT TEST SUITE")
print("=" * 80)
print()
results = []
results.append(("LLM Reasoning", test_llm_reasoning()))
results.append(("Conversation Manager", test_conversation_manager()))
results.append(("Integration", test_integration()))
print("=" * 80)
print("πŸ“Š TEST SUMMARY")
print("=" * 80)
for name, passed in results:
status = "βœ… PASSED" if passed else "❌ FAILED"
print(f"{name}: {status}")
total_passed = sum(1 for _, passed in results if passed)
total_tests = len(results)
print()
print(f"Total: {total_passed}/{total_tests} test suites passed")
print("=" * 80)
return all(passed for _, passed in results)
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)