Spaces:

jmzlx
/

dd-poc

Sleeping

File size: 11,914 Bytes

d1564d4

#!/usr/bin/env python3
"""
E2E Tests for Document Processing Workflow

Tests the core document processing functionality:
- Data room selection and processing
- Document upload and indexing
- Search functionality
- Error handling for document operations
"""

import pytest
import os
from playwright.sync_api import Page, expect
from .conftest import StreamlitPageHelpers


class TestDocumentProcessing:
    """Test document processing and data room functionality"""

    def test_data_room_selection_interface(self, page: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
        """Test that data room selection interface is functional"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for data room selection in sidebar
        sidebar = page.locator("[data-testid='stSidebar']")
        
        # Should have some way to select/configure data rooms
        data_room_elements = sidebar.locator("text=/.*[Dd]ata.*[Rr]oom.*|.*VDR.*|.*[Dd]ocument.*/")
        expect(data_room_elements.first).to_be_visible()

    def test_document_processing_workflow(self, page: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
        """Test the complete document processing workflow"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Navigate to document processing section
        # This might be in the main area or a specific tab
        
        # Look for document processing controls
        processing_elements = page.locator("text=/.*[Pp]rocess.*|.*[Aa]nalyze.*|.*[Bb]uild.*|.*[Ii]ndex.*/")
        
        if processing_elements.count() > 0:
            # Check if there's a processing button or similar
            process_button = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*|.*[Aa]nalyze.*/)")
            
            if process_button.count() > 0:
                # Click the process button (but don't wait for completion in basic test)
                process_button.first.click()
                
                # Should show some indication of processing starting
                # Could be a spinner, status message, etc.
                processing_indicators = page.locator(".stSpinner, [data-testid='stSpinner'], .stStatus, text=/.*[Pp]rocessing.*|.*[Ll]oading.*/")
                
                # Give it a moment to start processing
                page.wait_for_timeout(2000)

    def test_file_upload_interface(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test file upload interface if available"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for file upload components
        file_uploaders = page.locator("input[type='file'], [data-testid='stFileUploader']")
        
        if file_uploaders.count() > 0:
            expect(file_uploaders.first).to_be_visible()
            
            # Test that file uploader accepts appropriate file types
            file_uploader = file_uploaders.first
            accept_attr = file_uploader.get_attribute("accept")
            
            # Should accept common document formats
            if accept_attr:
                assert any(fmt in accept_attr for fmt in [".pdf", ".md", ".txt", ".docx"]), \
                    f"File uploader should accept document formats, got: {accept_attr}"

    def test_search_functionality(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test document search functionality"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for search interface
        search_elements = page.locator("input[placeholder*='search'], input[aria-label*='search'], text=/.*[Ss]earch.*/")
        
        if search_elements.count() > 0:
            search_input = search_elements.first
            
            # Test basic search functionality
            if search_input.get_attribute("type") != "file":  # Make sure it's not a file input
                search_input.fill("revenue")
                
                # Look for search button or trigger search
                search_button = page.locator("button:has-text(/.*[Ss]earch.*|.*[Ff]ind.*/)")
                if search_button.count() > 0:
                    search_button.first.click()
                else:
                    # Try pressing Enter
                    search_input.press("Enter")
                
                # Wait for search results or indication
                page.wait_for_timeout(2000)

    def test_document_status_display(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test that document processing status is displayed"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for status indicators
        status_elements = page.locator("text=/.*[Ss]tatus.*|.*[Rr]eady.*|.*[Pp]rocessed.*|.*[Dd]ocuments.*found.*/")
        
        # Should have some indication of system state
        # This could be "No documents processed", "Ready", "X documents indexed", etc.
        if status_elements.count() > 0:
            expect(status_elements.first).to_be_visible()

    def test_error_handling_invalid_path(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test error handling for invalid data room paths"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for path input fields
        path_inputs = page.locator("input[placeholder*='path'], input[aria-label*='path']")
        
        if path_inputs.count() > 0:
            path_input = path_inputs.first
            
            # Enter an invalid path
            path_input.fill("/nonexistent/path/to/documents")
            
            # Look for a button to submit/validate
            submit_buttons = page.locator("button:has-text(/.*[Ss]ubmit.*|.*[Cc]heck.*|.*[Vv]alidate.*|.*[Pp]rocess.*/)")
            
            if submit_buttons.count() > 0:
                submit_buttons.first.click()
                
                # Should show an error message
                error_elements = page.locator(".stError, [data-testid='stError'], text=/.*[Ee]rror.*|.*[Nn]ot found.*|.*[Ii]nvalid.*/")
                
                # Wait for error message to appear
                page.wait_for_timeout(3000)
                
                # Should have some error indication
                if error_elements.count() > 0:
                    expect(error_elements.first).to_be_visible()

    def test_processing_progress_indicators(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test that processing shows appropriate progress indicators"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for any processing buttons
        process_buttons = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*|.*[Aa]nalyze.*|.*[Ii]ndex.*/)")
        
        if process_buttons.count() > 0:
            # Click a processing button
            process_buttons.first.click()
            
            # Should show progress indicators
            progress_elements = page.locator(".stSpinner, .stProgress, [data-testid='stSpinner'], [data-testid='stProgress']")
            
            # Give it a moment for progress indicators to appear
            page.wait_for_timeout(1000)
            
            # Note: We don't wait for completion as that could take too long for E2E tests

    def test_document_metadata_display(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test that document metadata is displayed when available"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for metadata displays
        metadata_elements = page.locator("text=/.*[Dd]ocument.*[Cc]ount.*|.*[Ff]iles.*found.*|.*[Cc]hunks.*|.*[Ii]ndex.*size.*/")
        
        # Should show some document information if documents are processed
        # This could be document counts, index size, processing status, etc.
        
        # Navigate through tabs to see if any show document information
        tabs = page.locator("[data-testid='stTabs'] button, .stTabs button")
        
        if tabs.count() > 0:
            for i in range(min(tabs.count(), 3)):  # Check first 3 tabs
                tabs.nth(i).click()
                page.wait_for_timeout(1000)
                
                # Check for document-related information in this tab
                doc_info = page.locator("text=/.*[Dd]ocuments.*|.*[Ff]iles.*|.*[Cc]hunks.*|.*[Pp]rocessed.*/")
                if doc_info.count() > 0:
                    expect(doc_info.first).to_be_visible()
                    break

    def test_data_room_switching(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test switching between different data rooms"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for data room selection dropdown or similar
        data_room_selectors = page.locator("select, [data-testid='stSelectbox']")
        
        if data_room_selectors.count() > 0:
            selector = data_room_selectors.first
            
            # Check if it has multiple options
            selector.click()
            page.wait_for_timeout(500)
            
            options = page.locator("[data-value], option")
            
            if options.count() > 1:
                # Select a different option
                options.nth(1).click()
                
                # Should trigger some update in the interface
                page.wait_for_timeout(2000)
                
                # Look for status updates or changes
                status_updates = page.locator("text=/.*[Ll]oading.*|.*[Ss]witching.*|.*[Pp]rocessing.*/")

    @pytest.mark.slow
    def test_full_processing_workflow(self, page_slow: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
        """Test the complete document processing workflow with real data (slower test)"""
        page = page_slow  # Use the slow page fixture
        streamlit_helpers.wait_for_streamlit_load()
        
        # This test would actually process documents if a test data room is available
        # Check if test VDR path exists
        vdr_path = sample_test_data["vdr_path"]
        
        if vdr_path.exists() and any(vdr_path.iterdir()):
            # Look for path configuration
            path_inputs = page.locator("input[placeholder*='path'], input[aria-label*='path']")
            
            if path_inputs.count() > 0:
                path_input = path_inputs.first
                path_input.fill(str(vdr_path))
                
                # Look for process button
                process_buttons = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*/)")
                
                if process_buttons.count() > 0:
                    process_buttons.first.click()
                    
                    # Wait for processing to complete or show progress
                    # Use the extended timeout for this slow operation
                    try:
                        streamlit_helpers.wait_for_processing(timeout=120000)  # 2 minutes
                        
                        # Check for success indicators
                        success_elements = page.locator(".stSuccess, text=/.*[Ss]uccess.*|.*[Cc]omplete.*|.*[Ff]inished.*/")
                        
                        page.wait_for_timeout(2000)
                        
                        # Verify that documents were processed
                        status_elements = page.locator("text=/.*documents.*processed.*|.*files.*indexed.*|.*chunks.*created.*/")
                        
                    except Exception as e:
                        # Processing might still be ongoing, that's okay for this test
                        print(f"Processing timeout or error: {e}")
        else:
            pytest.skip("No test VDR data available for full processing test")