Spaces:
Sleeping
Sleeping
| """ | |
| Test Suite for VQA Enhancements | |
| Tests LLM Reasoning Engine and Conversational VQA features | |
| """ | |
| import os | |
| import sys | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| def test_llm_reasoning(): | |
| """Test LLM Reasoning Service""" | |
| print("=" * 80) | |
| print("π§ͺ TEST 1: LLM Reasoning Engine") | |
| print("=" * 80) | |
| try: | |
| from llm_reasoning_service import get_llm_reasoning_service | |
| service = get_llm_reasoning_service() | |
| print("β LLM Reasoning Service initialized\n") | |
| print("π Test Case 1.1: Can a candle melt?") | |
| result = service.reason_with_facts( | |
| object_name="candle", | |
| facts={ | |
| "materials": ["wax", "wick"], | |
| "categories": ["light source", "household item"] | |
| }, | |
| question="Can this melt?" | |
| ) | |
| print(f" Answer: {result['answer']}") | |
| print(f" Reasoning Chain:") | |
| for i, step in enumerate(result['reasoning_chain'], 1): | |
| print(f" {i}. {step}") | |
| print(f" Confidence: {result['confidence']}") | |
| print(f" Status: {result['status']}") | |
| assert result['answer'], "Answer should not be empty" | |
| assert result['confidence'] > 0, "Confidence should be positive" | |
| print(" β Test passed\n") | |
| print("π Test Case 1.2: Would ice cream survive in the desert?") | |
| result = service.reason_with_facts( | |
| object_name="ice cream", | |
| facts={ | |
| "materials": ["milk", "sugar", "cream"], | |
| "categories": ["frozen dessert", "food"], | |
| "properties": ["cold", "frozen"] | |
| }, | |
| question="Would this survive in the desert?" | |
| ) | |
| print(f" Answer: {result['answer']}") | |
| print(f" Reasoning Chain:") | |
| for i, step in enumerate(result['reasoning_chain'], 1): | |
| print(f" {i}. {step}") | |
| print(f" Confidence: {result['confidence']}") | |
| print(f" Status: {result['status']}") | |
| assert result['answer'], "Answer should not be empty" | |
| print(" β Test passed\n") | |
| print("π Test Case 1.3: Is an apple edible?") | |
| result = service.reason_with_facts( | |
| object_name="apple", | |
| facts={ | |
| "categories": ["fruit", "food"], | |
| "properties": ["nutritious", "healthy"] | |
| }, | |
| question="Is this edible?" | |
| ) | |
| print(f" Answer: {result['answer']}") | |
| print(f" Confidence: {result['confidence']}") | |
| print(" β Test passed\n") | |
| print("β LLM Reasoning Engine: ALL TESTS PASSED\n") | |
| return True | |
| except ValueError as e: | |
| print(f"β οΈ LLM Reasoning tests skipped: {e}") | |
| print(" (Set GROQ_API_KEY to run these tests)\n") | |
| return False | |
| except Exception as e: | |
| print(f"β LLM Reasoning tests failed: {e}\n") | |
| return False | |
| def test_conversation_manager(): | |
| """Test Conversation Manager""" | |
| print("=" * 80) | |
| print("π§ͺ TEST 2: Conversation Manager") | |
| print("=" * 80) | |
| try: | |
| from conversation_manager import ConversationManager | |
| manager = ConversationManager(session_timeout_minutes=30) | |
| print("β Conversation Manager initialized\n") | |
| print("π Test Case 2.1: Multi-turn conversation") | |
| session_id = manager.create_session("test_image.jpg") | |
| print(f" Created session: {session_id}") | |
| manager.add_turn( | |
| session_id=session_id, | |
| question="What is this?", | |
| answer="apple", | |
| objects_detected=["apple"] | |
| ) | |
| print(" Turn 1: 'What is this?' β 'apple'") | |
| session = manager.get_session(session_id) | |
| question_2 = "Is it healthy?" | |
| resolved_2 = manager.resolve_references(question_2, session) | |
| print(f" Turn 2: '{question_2}' β Resolved: '{resolved_2}'") | |
| assert "apple" in resolved_2.lower() or resolved_2 == "Is apple healthy?", \ | |
| "Pronoun 'it' should be resolved to 'apple'" | |
| manager.add_turn( | |
| session_id=session_id, | |
| question=question_2, | |
| answer="Yes, apples are healthy", | |
| objects_detected=["apple"] | |
| ) | |
| question_3 = "What color is it?" | |
| resolved_3 = manager.resolve_references(question_3, session) | |
| print(f" Turn 3: '{question_3}' β Resolved: '{resolved_3}'") | |
| assert "apple" in resolved_3.lower(), \ | |
| "Pronoun 'it' should still resolve to 'apple'" | |
| print(" β Pronoun resolution working\n") | |
| print("π Test Case 2.2: Context retrieval") | |
| context = manager.get_context_for_question(session_id, "Another question") | |
| print(f" Turn number: {context['turn_number']}") | |
| print(f" Previous objects: {context['previous_objects']}") | |
| print(f" Has context: {context['has_context']}") | |
| assert context['turn_number'] == 4, "Should be on turn 4" | |
| assert context['has_context'], "Should have context" | |
| assert "apple" in context['previous_objects'], "Should remember apple" | |
| print(" β Context tracking working\n") | |
| print("π Test Case 2.3: Conversation history") | |
| history = manager.get_history(session_id) | |
| print(f" Total turns: {len(history)}") | |
| for i, turn in enumerate(history, 1): | |
| print(f" Turn {i}: Q: {turn['question']} | A: {turn['answer']}") | |
| assert len(history) == 3, "Should have 3 turns" | |
| print(" β History retrieval working\n") | |
| print("π Test Case 2.4: Session deletion") | |
| deleted = manager.delete_session(session_id) | |
| assert deleted, "Session should be deleted" | |
| session_after = manager.get_session(session_id) | |
| assert session_after is None, "Session should not exist after deletion" | |
| print(" β Session deletion working\n") | |
| print("β Conversation Manager: ALL TESTS PASSED\n") | |
| return True | |
| except Exception as e: | |
| print(f"β Conversation Manager tests failed: {e}\n") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def test_integration(): | |
| """Test integration of features""" | |
| print("=" * 80) | |
| print("π§ͺ TEST 3: Integration Tests") | |
| print("=" * 80) | |
| try: | |
| from semantic_neurosymbolic_vqa import SemanticNeurosymbolicVQA | |
| print("π Test Case 3.1: Semantic VQA with LLM reasoning") | |
| vqa = SemanticNeurosymbolicVQA(device='cpu') | |
| if vqa.llm_enabled: | |
| print(" β LLM reasoning integrated into Semantic VQA") | |
| else: | |
| print(" β οΈ LLM reasoning not available (fallback mode)") | |
| print() | |
| print("π Test Case 3.2: Ensemble VQA with conversation support") | |
| from ensemble_vqa_app import ProductionEnsembleVQA | |
| print(" β Ensemble VQA imports successfully") | |
| print(" (Full test requires model checkpoints)\n") | |
| print("β Integration: TESTS PASSED\n") | |
| return True | |
| except Exception as e: | |
| print(f"β Integration tests failed: {e}\n") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def run_all_tests(): | |
| """Run all test suites""" | |
| print("\n" + "=" * 80) | |
| print("π VQA ENHANCEMENT TEST SUITE") | |
| print("=" * 80) | |
| print() | |
| results = [] | |
| results.append(("LLM Reasoning", test_llm_reasoning())) | |
| results.append(("Conversation Manager", test_conversation_manager())) | |
| results.append(("Integration", test_integration())) | |
| print("=" * 80) | |
| print("π TEST SUMMARY") | |
| print("=" * 80) | |
| for name, passed in results: | |
| status = "β PASSED" if passed else "β FAILED" | |
| print(f"{name}: {status}") | |
| total_passed = sum(1 for _, passed in results if passed) | |
| total_tests = len(results) | |
| print() | |
| print(f"Total: {total_passed}/{total_tests} test suites passed") | |
| print("=" * 80) | |
| return all(passed for _, passed in results) | |
| if __name__ == "__main__": | |
| success = run_all_tests() | |
| sys.exit(0 if success else 1) |