File size: 11,914 Bytes
d1564d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/env python3
"""
E2E Tests for Document Processing Workflow

Tests the core document processing functionality:
- Data room selection and processing
- Document upload and indexing
- Search functionality
- Error handling for document operations
"""

import pytest
import os
from playwright.sync_api import Page, expect
from .conftest import StreamlitPageHelpers


class TestDocumentProcessing:
    """Test document processing and data room functionality"""

    def test_data_room_selection_interface(self, page: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
        """Test that data room selection interface is functional"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for data room selection in sidebar
        sidebar = page.locator("[data-testid='stSidebar']")
        
        # Should have some way to select/configure data rooms
        data_room_elements = sidebar.locator("text=/.*[Dd]ata.*[Rr]oom.*|.*VDR.*|.*[Dd]ocument.*/")
        expect(data_room_elements.first).to_be_visible()

    def test_document_processing_workflow(self, page: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
        """Test the complete document processing workflow"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Navigate to document processing section
        # This might be in the main area or a specific tab
        
        # Look for document processing controls
        processing_elements = page.locator("text=/.*[Pp]rocess.*|.*[Aa]nalyze.*|.*[Bb]uild.*|.*[Ii]ndex.*/")
        
        if processing_elements.count() > 0:
            # Check if there's a processing button or similar
            process_button = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*|.*[Aa]nalyze.*/)")
            
            if process_button.count() > 0:
                # Click the process button (but don't wait for completion in basic test)
                process_button.first.click()
                
                # Should show some indication of processing starting
                # Could be a spinner, status message, etc.
                processing_indicators = page.locator(".stSpinner, [data-testid='stSpinner'], .stStatus, text=/.*[Pp]rocessing.*|.*[Ll]oading.*/")
                
                # Give it a moment to start processing
                page.wait_for_timeout(2000)

    def test_file_upload_interface(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test file upload interface if available"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for file upload components
        file_uploaders = page.locator("input[type='file'], [data-testid='stFileUploader']")
        
        if file_uploaders.count() > 0:
            expect(file_uploaders.first).to_be_visible()
            
            # Test that file uploader accepts appropriate file types
            file_uploader = file_uploaders.first
            accept_attr = file_uploader.get_attribute("accept")
            
            # Should accept common document formats
            if accept_attr:
                assert any(fmt in accept_attr for fmt in [".pdf", ".md", ".txt", ".docx"]), \
                    f"File uploader should accept document formats, got: {accept_attr}"

    def test_search_functionality(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test document search functionality"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for search interface
        search_elements = page.locator("input[placeholder*='search'], input[aria-label*='search'], text=/.*[Ss]earch.*/")
        
        if search_elements.count() > 0:
            search_input = search_elements.first
            
            # Test basic search functionality
            if search_input.get_attribute("type") != "file":  # Make sure it's not a file input
                search_input.fill("revenue")
                
                # Look for search button or trigger search
                search_button = page.locator("button:has-text(/.*[Ss]earch.*|.*[Ff]ind.*/)")
                if search_button.count() > 0:
                    search_button.first.click()
                else:
                    # Try pressing Enter
                    search_input.press("Enter")
                
                # Wait for search results or indication
                page.wait_for_timeout(2000)

    def test_document_status_display(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test that document processing status is displayed"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for status indicators
        status_elements = page.locator("text=/.*[Ss]tatus.*|.*[Rr]eady.*|.*[Pp]rocessed.*|.*[Dd]ocuments.*found.*/")
        
        # Should have some indication of system state
        # This could be "No documents processed", "Ready", "X documents indexed", etc.
        if status_elements.count() > 0:
            expect(status_elements.first).to_be_visible()

    def test_error_handling_invalid_path(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test error handling for invalid data room paths"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for path input fields
        path_inputs = page.locator("input[placeholder*='path'], input[aria-label*='path']")
        
        if path_inputs.count() > 0:
            path_input = path_inputs.first
            
            # Enter an invalid path
            path_input.fill("/nonexistent/path/to/documents")
            
            # Look for a button to submit/validate
            submit_buttons = page.locator("button:has-text(/.*[Ss]ubmit.*|.*[Cc]heck.*|.*[Vv]alidate.*|.*[Pp]rocess.*/)")
            
            if submit_buttons.count() > 0:
                submit_buttons.first.click()
                
                # Should show an error message
                error_elements = page.locator(".stError, [data-testid='stError'], text=/.*[Ee]rror.*|.*[Nn]ot found.*|.*[Ii]nvalid.*/")
                
                # Wait for error message to appear
                page.wait_for_timeout(3000)
                
                # Should have some error indication
                if error_elements.count() > 0:
                    expect(error_elements.first).to_be_visible()

    def test_processing_progress_indicators(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test that processing shows appropriate progress indicators"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for any processing buttons
        process_buttons = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*|.*[Aa]nalyze.*|.*[Ii]ndex.*/)")
        
        if process_buttons.count() > 0:
            # Click a processing button
            process_buttons.first.click()
            
            # Should show progress indicators
            progress_elements = page.locator(".stSpinner, .stProgress, [data-testid='stSpinner'], [data-testid='stProgress']")
            
            # Give it a moment for progress indicators to appear
            page.wait_for_timeout(1000)
            
            # Note: We don't wait for completion as that could take too long for E2E tests

    def test_document_metadata_display(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test that document metadata is displayed when available"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for metadata displays
        metadata_elements = page.locator("text=/.*[Dd]ocument.*[Cc]ount.*|.*[Ff]iles.*found.*|.*[Cc]hunks.*|.*[Ii]ndex.*size.*/")
        
        # Should show some document information if documents are processed
        # This could be document counts, index size, processing status, etc.
        
        # Navigate through tabs to see if any show document information
        tabs = page.locator("[data-testid='stTabs'] button, .stTabs button")
        
        if tabs.count() > 0:
            for i in range(min(tabs.count(), 3)):  # Check first 3 tabs
                tabs.nth(i).click()
                page.wait_for_timeout(1000)
                
                # Check for document-related information in this tab
                doc_info = page.locator("text=/.*[Dd]ocuments.*|.*[Ff]iles.*|.*[Cc]hunks.*|.*[Pp]rocessed.*/")
                if doc_info.count() > 0:
                    expect(doc_info.first).to_be_visible()
                    break

    def test_data_room_switching(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
        """Test switching between different data rooms"""
        streamlit_helpers.wait_for_streamlit_load()
        
        # Look for data room selection dropdown or similar
        data_room_selectors = page.locator("select, [data-testid='stSelectbox']")
        
        if data_room_selectors.count() > 0:
            selector = data_room_selectors.first
            
            # Check if it has multiple options
            selector.click()
            page.wait_for_timeout(500)
            
            options = page.locator("[data-value], option")
            
            if options.count() > 1:
                # Select a different option
                options.nth(1).click()
                
                # Should trigger some update in the interface
                page.wait_for_timeout(2000)
                
                # Look for status updates or changes
                status_updates = page.locator("text=/.*[Ll]oading.*|.*[Ss]witching.*|.*[Pp]rocessing.*/")

    @pytest.mark.slow
    def test_full_processing_workflow(self, page_slow: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
        """Test the complete document processing workflow with real data (slower test)"""
        page = page_slow  # Use the slow page fixture
        streamlit_helpers.wait_for_streamlit_load()
        
        # This test would actually process documents if a test data room is available
        # Check if test VDR path exists
        vdr_path = sample_test_data["vdr_path"]
        
        if vdr_path.exists() and any(vdr_path.iterdir()):
            # Look for path configuration
            path_inputs = page.locator("input[placeholder*='path'], input[aria-label*='path']")
            
            if path_inputs.count() > 0:
                path_input = path_inputs.first
                path_input.fill(str(vdr_path))
                
                # Look for process button
                process_buttons = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*/)")
                
                if process_buttons.count() > 0:
                    process_buttons.first.click()
                    
                    # Wait for processing to complete or show progress
                    # Use the extended timeout for this slow operation
                    try:
                        streamlit_helpers.wait_for_processing(timeout=120000)  # 2 minutes
                        
                        # Check for success indicators
                        success_elements = page.locator(".stSuccess, text=/.*[Ss]uccess.*|.*[Cc]omplete.*|.*[Ff]inished.*/")
                        
                        page.wait_for_timeout(2000)
                        
                        # Verify that documents were processed
                        status_elements = page.locator("text=/.*documents.*processed.*|.*files.*indexed.*|.*chunks.*created.*/")
                        
                    except Exception as e:
                        # Processing might still be ongoing, that's okay for this test
                        print(f"Processing timeout or error: {e}")
        else:
            pytest.skip("No test VDR data available for full processing test")