Spaces:

jonathanagustin
/

video_analyzer

Runtime error

Claude commited on Dec 28, 2025

Commit

29b2eb2

unverified ·

1 Parent(s): 7da7ce7

test: Add integration tests for chatbot RAG functionality

New test classes:
- TestSessionState: Session creation, isolation, clear functionality
- TestRAGPipeline: Vector DB add/search, session isolation, multi-video search
- TestChatWithVideosIntegration: Context retrieval, model info, error handling
- TestHandleChat: URL detection, question handling, auth requirements
- TestGetKnowledgeStatsWithSession: Stats with session state

Total: 56 tests (20 new integration tests)

Files changed (1) hide show

tests/test_app.py +454 -3

tests/test_app.py CHANGED Viewed

@@ -390,7 +390,7 @@ class TestProcessYoutube:
         from app import process_youtube
         mock_progress = MagicMock()
-        result = process_youtube("https://youtube.com/watch?v=test", 5, None, mock_progress)
         assert "log in" in result.lower()
     def test_empty_url_returns_prompt(self):
@@ -398,7 +398,7 @@ class TestProcessYoutube:
         from app import process_youtube
         mock_progress = MagicMock()
-        result = process_youtube("", 5, MagicMock(), mock_progress)
         assert "enter" in result.lower()
     def test_invalid_url_returns_error(self):
@@ -406,5 +406,456 @@ class TestProcessYoutube:
         from app import process_youtube
         mock_progress = MagicMock()
-        result = process_youtube("not-a-url", 5, MagicMock(), mock_progress)
         assert "valid youtube url" in result.lower()

         from app import process_youtube
         mock_progress = MagicMock()
+        result = process_youtube("https://youtube.com/watch?v=test", 5, None, None, mock_progress)
         assert "log in" in result.lower()
     def test_empty_url_returns_prompt(self):
         from app import process_youtube
         mock_progress = MagicMock()
+        result = process_youtube("", 5, MagicMock(), None, mock_progress)
         assert "enter" in result.lower()
     def test_invalid_url_returns_error(self):
         from app import process_youtube
         mock_progress = MagicMock()
+        result = process_youtube("not-a-url", 5, MagicMock(), None, mock_progress)
         assert "valid youtube url" in result.lower()
+class TestSessionState:
+    """Tests for the SessionState class."""
+    def test_creates_collection_with_session_id(self):
+        """Test SessionState creates a collection with session ID."""
+        from app import SessionState
+        state = SessionState("test_session_123")
+        assert state.session_id == "test_session_123"
+        assert state.collection is not None
+    def test_auto_generates_session_id(self):
+        """Test SessionState generates session ID if not provided."""
+        from app import SessionState
+        state = SessionState()
+        assert state.session_id is not None
+        assert len(state.session_id) == 32  # UUID hex length
+    def test_clear_recreates_collection(self):
+        """Test clear() recreates the collection."""
+        from app import SessionState
+        state = SessionState("test_clear")
+        # Add some data
+        state.collection.add(
+            documents=["test doc"],
+            ids=["test_id"],
+        )
+        assert state.collection.count() == 1
+        # Clear and verify
+        state.clear()
+        assert state.collection.count() == 0
+    def test_create_session_state_with_profile(self):
+        """Test create_session_state uses profile name for consistent ID."""
+        from app import create_session_state
+        mock_profile = MagicMock()
+        mock_profile.name = "TestUser"
+        state1 = create_session_state(mock_profile)
+        state2 = create_session_state(mock_profile)
+        # Same profile should get same session ID
+        assert state1.session_id == state2.session_id
+    def test_create_session_state_without_profile(self):
+        """Test create_session_state generates random ID without profile."""
+        from app import create_session_state
+        state1 = create_session_state(None)
+        state2 = create_session_state(None)
+        # Different calls should get different IDs
+        assert state1.session_id != state2.session_id
+class TestRAGPipeline:
+    """Integration tests for the RAG (Retrieval Augmented Generation) pipeline."""
+    def test_add_and_search_knowledge(self):
+        """Test adding content and searching retrieves it."""
+        from app import SessionState, add_to_vector_db, search_knowledge
+        state = SessionState("test_rag_1")
+        # Add content
+        add_to_vector_db(
+            title="Test Video",
+            transcript="This is a test about machine learning and neural networks.",
+            visual_contexts=["A person standing at a whiteboard"],
+            session_state=state,
+        )
+        # Search should find relevant content
+        results = search_knowledge("machine learning", session_state=state)
+        assert len(results) > 0
+        assert any("machine learning" in r["content"].lower() for r in results)
+        assert results[0]["title"] == "Test Video"
+    def test_search_returns_empty_for_unrelated_query(self):
+        """Test search returns empty for completely unrelated queries."""
+        from app import SessionState, add_to_vector_db, search_knowledge
+        state = SessionState("test_rag_2")
+        # Add specific content
+        add_to_vector_db(
+            title="Cooking Show",
+            transcript="Today we will make a delicious pasta with tomato sauce.",
+            visual_contexts=["Chef in kitchen"],
+            session_state=state,
+        )
+        # Search for something unrelated - should still return results but with low relevance
+        results = search_knowledge("quantum physics equations", session_state=state)
+        # ChromaDB will still return results, but they won't be highly relevant
+        # The key test is that the system doesn't crash
+        assert isinstance(results, list)
+    def test_visual_contexts_are_searchable(self):
+        """Test that visual context descriptions are searchable."""
+        from app import SessionState, add_to_vector_db, search_knowledge
+        state = SessionState("test_rag_3")
+        # Add content with visual context
+        add_to_vector_db(
+            title="Nature Documentary",
+            transcript="",
+            visual_contexts=["A majestic elephant walking through the savanna"],
+            session_state=state,
+        )
+        # Search for visual content
+        results = search_knowledge("elephant savanna", session_state=state)
+        assert len(results) > 0
+        assert any("elephant" in r["content"].lower() for r in results)
+        assert results[0]["type"] == "visual"
+    def test_multiple_videos_searchable(self):
+        """Test that content from multiple videos is searchable."""
+        from app import SessionState, add_to_vector_db, search_knowledge
+        state = SessionState("test_rag_4")
+        # Add content from two videos
+        add_to_vector_db(
+            title="Python Tutorial",
+            transcript="Learn Python programming with functions and classes.",
+            visual_contexts=[],
+            session_state=state,
+        )
+        add_to_vector_db(
+            title="JavaScript Guide",
+            transcript="Master JavaScript with callbacks and promises.",
+            visual_contexts=[],
+            session_state=state,
+        )
+        # Search should find Python content
+        python_results = search_knowledge("Python functions", session_state=state)
+        assert any("Python" in r["title"] for r in python_results)
+        # Search should find JavaScript content
+        js_results = search_knowledge("JavaScript promises", session_state=state)
+        assert any("JavaScript" in r["title"] for r in js_results)
+    def test_session_isolation(self):
+        """Test that different sessions have isolated knowledge bases."""
+        from app import SessionState, add_to_vector_db, search_knowledge
+        state1 = SessionState("isolation_test_1")
+        state2 = SessionState("isolation_test_2")
+        # Add content only to state1
+        add_to_vector_db(
+            title="Session 1 Only",
+            transcript="Unique content about dragons and wizards.",
+            visual_contexts=[],
+            session_state=state1,
+        )
+        # State1 should find it
+        results1 = search_knowledge("dragons wizards", session_state=state1)
+        assert len(results1) > 0
+        # State2 should not find anything
+        results2 = search_knowledge("dragons wizards", session_state=state2)
+        assert len(results2) == 0
+class TestChatWithVideosIntegration:
+    """Integration tests for the chat_with_videos function with actual RAG."""
+    def test_chat_retrieves_relevant_context(self):
+        """Test that chat retrieves relevant context from knowledge base."""
+        from app import SessionState, add_to_vector_db, chat_with_videos
+        state = SessionState("chat_test_1")
+        # Add content
+        add_to_vector_db(
+            title="AI Lecture",
+            transcript="Artificial intelligence is transforming healthcare. Machine learning models can diagnose diseases.",
+            visual_contexts=["Professor presenting slides about AI"],
+            session_state=state,
+        )
+        mock_profile = MagicMock()
+        mock_token = MagicMock()
+        mock_token.token = "test_token"
+        # Mock the InferenceClient
+        with patch("app.InferenceClient") as mock_client:
+            mock_response = MagicMock()
+            mock_response.choices = [MagicMock()]
+            mock_response.choices[0].message.content = "AI is transforming healthcare by enabling better diagnosis."
+            mock_client.return_value.chat.completions.create.return_value = mock_response
+            result = chat_with_videos(
+                message="What is AI used for in healthcare?",
+                history=[],
+                profile=mock_profile,
+                oauth_token=mock_token,
+                session_state=state,
+            )
+            # Should get a response (not an error message)
+            assert "AI" in result or "healthcare" in result
+            assert "Sources:" in result
+            assert "AI Lecture" in result
+    def test_chat_includes_model_info(self):
+        """Test that chat response includes model information."""
+        from app import SessionState, add_to_vector_db, chat_with_videos
+        state = SessionState("chat_test_2")
+        add_to_vector_db(
+            title="Test Video",
+            transcript="Some test content here.",
+            visual_contexts=[],
+            session_state=state,
+        )
+        mock_profile = MagicMock()
+        mock_token = MagicMock()
+        mock_token.token = "test_token"
+        with patch("app.InferenceClient") as mock_client:
+            mock_response = MagicMock()
+            mock_response.choices = [MagicMock()]
+            mock_response.choices[0].message.content = "Test response."
+            mock_client.return_value.chat.completions.create.return_value = mock_response
+            result = chat_with_videos(
+                message="Tell me about the test content",
+                history=[],
+                profile=mock_profile,
+                oauth_token=mock_token,
+                session_state=state,
+            )
+            # Should include model info
+            assert "Model:" in result
+    def test_chat_handles_api_error(self):
+        """Test that chat handles API errors gracefully."""
+        from app import SessionState, add_to_vector_db, chat_with_videos
+        state = SessionState("chat_test_3")
+        add_to_vector_db(
+            title="Test Video",
+            transcript="Some content.",
+            visual_contexts=[],
+            session_state=state,
+        )
+        mock_profile = MagicMock()
+        mock_token = MagicMock()
+        mock_token.token = "test_token"
+        with patch("app.InferenceClient") as mock_client:
+            # Simulate API error for all models
+            mock_client.return_value.chat.completions.create.side_effect = Exception("503 Service Unavailable")
+            result = chat_with_videos(
+                message="Test question",
+                history=[],
+                profile=mock_profile,
+                oauth_token=mock_token,
+                session_state=state,
+            )
+            # Should return error message
+            assert "unavailable" in result.lower() or "error" in result.lower()
+class TestHandleChat:
+    """Integration tests for the unified handle_chat function."""
+    def test_detects_youtube_url(self):
+        """Test that handle_chat detects YouTube URLs."""
+        from app import SessionState, handle_chat
+        state = SessionState("handle_test_1")
+        mock_profile = MagicMock()
+        mock_token = MagicMock()
+        # The URL processing will fail (no actual video), but it should detect it as a URL
+        with patch("app._process_youtube_impl") as mock_process:
+            mock_process.return_value = "## Test Video\n\nTranscript here"
+            history, msg, new_state = handle_chat(
+                message="https://youtube.com/watch?v=test123",
+                history=[],
+                session_state=state,
+                profile=mock_profile,
+                oauth_token=mock_token,
+            )
+            # Should have called process_youtube
+            mock_process.assert_called_once()
+            # Should have added messages to history
+            assert len(history) >= 2  # User message + assistant response
+    def test_detects_question(self):
+        """Test that handle_chat detects questions (non-URLs)."""
+        from app import SessionState, handle_chat
+        state = SessionState("handle_test_2")
+        mock_profile = MagicMock()
+        mock_token = MagicMock()
+        mock_token.token = "test_token"
+        # Empty knowledge base - should prompt to add videos
+        history, msg, new_state = handle_chat(
+            message="What is this video about?",
+            history=[],
+            session_state=state,
+            profile=mock_profile,
+            oauth_token=mock_token,
+        )
+        # Should have response about no videos analyzed
+        assert len(history) >= 2
+        last_response = history[-1]["content"]
+        assert "don't have any videos" in last_response.lower() or "paste a youtube url" in last_response.lower()
+    def test_answers_question_with_knowledge(self):
+        """Test that handle_chat answers questions when knowledge base has content."""
+        from app import SessionState, add_to_vector_db, handle_chat
+        state = SessionState("handle_test_3")
+        # Pre-populate knowledge base
+        add_to_vector_db(
+            title="Cooking Video",
+            transcript="Today we make pasta. Boil water, add salt, cook for 10 minutes.",
+            visual_contexts=["Chef stirring pot"],
+            session_state=state,
+        )
+        mock_profile = MagicMock()
+        mock_token = MagicMock()
+        mock_token.token = "test_token"
+        with patch("app.InferenceClient") as mock_client:
+            mock_response = MagicMock()
+            mock_response.choices = [MagicMock()]
+            mock_response.choices[0].message.content = "To cook pasta, boil water and add salt."
+            mock_client.return_value.chat.completions.create.return_value = mock_response
+            history, msg, new_state = handle_chat(
+                message="How do I cook pasta?",
+                history=[],
+                session_state=state,
+                profile=mock_profile,
+                oauth_token=mock_token,
+            )
+            # Should have a meaningful response
+            assert len(history) >= 2
+            last_response = history[-1]["content"]
+            assert "pasta" in last_response.lower() or "cook" in last_response.lower()
+    def test_requires_login(self):
+        """Test that handle_chat requires login."""
+        from app import SessionState, handle_chat
+        state = SessionState("handle_test_4")
+        history, msg, new_state = handle_chat(
+            message="Hello",
+            history=[],
+            session_state=state,
+            profile=None,  # Not logged in
+            oauth_token=None,
+        )
+        # Should prompt to sign in
+        assert len(history) >= 2
+        last_response = history[-1]["content"]
+        assert "sign in" in last_response.lower()
+    def test_creates_session_if_none(self):
+        """Test that handle_chat creates session state if None."""
+        from app import handle_chat
+        mock_profile = MagicMock()
+        mock_profile.name = "TestUser"
+        history, msg, new_state = handle_chat(
+            message="Hello",
+            history=[],
+            session_state=None,  # No session
+            profile=mock_profile,
+            oauth_token=MagicMock(),
+        )
+        # Should have created a session
+        assert new_state is not None
+        assert new_state.session_id is not None
+class TestGetKnowledgeStatsWithSession:
+    """Tests for get_knowledge_stats with session state."""
+    def test_empty_session_knowledge_base(self):
+        """Test stats for empty session knowledge base."""
+        from app import SessionState, get_knowledge_stats
+        state = SessionState("stats_test_1")
+        result = get_knowledge_stats(state)
+        assert "empty" in result.lower()
+    def test_populated_session_knowledge_base(self):
+        """Test stats for populated session knowledge base."""
+        from app import SessionState, add_to_vector_db, get_knowledge_stats
+        state = SessionState("stats_test_2")
+        add_to_vector_db(
+            title="Test Video 1",
+            transcript="Some content here about testing.",
+            visual_contexts=["Test scene"],
+            session_state=state,
+        )
+        add_to_vector_db(
+            title="Test Video 2",
+            transcript="More content about different things.",
+            visual_contexts=[],
+            session_state=state,
+        )
+        result = get_knowledge_stats(state)
+        # Should show chunk count and video count
+        assert "chunks" in result.lower() or "2" in result
+        assert "Test Video" in result