File size: 11,914 Bytes
d1564d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
#!/usr/bin/env python3
"""
E2E Tests for Document Processing Workflow
Tests the core document processing functionality:
- Data room selection and processing
- Document upload and indexing
- Search functionality
- Error handling for document operations
"""
import pytest
import os
from playwright.sync_api import Page, expect
from .conftest import StreamlitPageHelpers
class TestDocumentProcessing:
"""Test document processing and data room functionality"""
def test_data_room_selection_interface(self, page: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
"""Test that data room selection interface is functional"""
streamlit_helpers.wait_for_streamlit_load()
# Look for data room selection in sidebar
sidebar = page.locator("[data-testid='stSidebar']")
# Should have some way to select/configure data rooms
data_room_elements = sidebar.locator("text=/.*[Dd]ata.*[Rr]oom.*|.*VDR.*|.*[Dd]ocument.*/")
expect(data_room_elements.first).to_be_visible()
def test_document_processing_workflow(self, page: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
"""Test the complete document processing workflow"""
streamlit_helpers.wait_for_streamlit_load()
# Navigate to document processing section
# This might be in the main area or a specific tab
# Look for document processing controls
processing_elements = page.locator("text=/.*[Pp]rocess.*|.*[Aa]nalyze.*|.*[Bb]uild.*|.*[Ii]ndex.*/")
if processing_elements.count() > 0:
# Check if there's a processing button or similar
process_button = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*|.*[Aa]nalyze.*/)")
if process_button.count() > 0:
# Click the process button (but don't wait for completion in basic test)
process_button.first.click()
# Should show some indication of processing starting
# Could be a spinner, status message, etc.
processing_indicators = page.locator(".stSpinner, [data-testid='stSpinner'], .stStatus, text=/.*[Pp]rocessing.*|.*[Ll]oading.*/")
# Give it a moment to start processing
page.wait_for_timeout(2000)
def test_file_upload_interface(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
"""Test file upload interface if available"""
streamlit_helpers.wait_for_streamlit_load()
# Look for file upload components
file_uploaders = page.locator("input[type='file'], [data-testid='stFileUploader']")
if file_uploaders.count() > 0:
expect(file_uploaders.first).to_be_visible()
# Test that file uploader accepts appropriate file types
file_uploader = file_uploaders.first
accept_attr = file_uploader.get_attribute("accept")
# Should accept common document formats
if accept_attr:
assert any(fmt in accept_attr for fmt in [".pdf", ".md", ".txt", ".docx"]), \
f"File uploader should accept document formats, got: {accept_attr}"
def test_search_functionality(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
"""Test document search functionality"""
streamlit_helpers.wait_for_streamlit_load()
# Look for search interface
search_elements = page.locator("input[placeholder*='search'], input[aria-label*='search'], text=/.*[Ss]earch.*/")
if search_elements.count() > 0:
search_input = search_elements.first
# Test basic search functionality
if search_input.get_attribute("type") != "file": # Make sure it's not a file input
search_input.fill("revenue")
# Look for search button or trigger search
search_button = page.locator("button:has-text(/.*[Ss]earch.*|.*[Ff]ind.*/)")
if search_button.count() > 0:
search_button.first.click()
else:
# Try pressing Enter
search_input.press("Enter")
# Wait for search results or indication
page.wait_for_timeout(2000)
def test_document_status_display(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
"""Test that document processing status is displayed"""
streamlit_helpers.wait_for_streamlit_load()
# Look for status indicators
status_elements = page.locator("text=/.*[Ss]tatus.*|.*[Rr]eady.*|.*[Pp]rocessed.*|.*[Dd]ocuments.*found.*/")
# Should have some indication of system state
# This could be "No documents processed", "Ready", "X documents indexed", etc.
if status_elements.count() > 0:
expect(status_elements.first).to_be_visible()
def test_error_handling_invalid_path(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
"""Test error handling for invalid data room paths"""
streamlit_helpers.wait_for_streamlit_load()
# Look for path input fields
path_inputs = page.locator("input[placeholder*='path'], input[aria-label*='path']")
if path_inputs.count() > 0:
path_input = path_inputs.first
# Enter an invalid path
path_input.fill("/nonexistent/path/to/documents")
# Look for a button to submit/validate
submit_buttons = page.locator("button:has-text(/.*[Ss]ubmit.*|.*[Cc]heck.*|.*[Vv]alidate.*|.*[Pp]rocess.*/)")
if submit_buttons.count() > 0:
submit_buttons.first.click()
# Should show an error message
error_elements = page.locator(".stError, [data-testid='stError'], text=/.*[Ee]rror.*|.*[Nn]ot found.*|.*[Ii]nvalid.*/")
# Wait for error message to appear
page.wait_for_timeout(3000)
# Should have some error indication
if error_elements.count() > 0:
expect(error_elements.first).to_be_visible()
def test_processing_progress_indicators(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
"""Test that processing shows appropriate progress indicators"""
streamlit_helpers.wait_for_streamlit_load()
# Look for any processing buttons
process_buttons = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*|.*[Aa]nalyze.*|.*[Ii]ndex.*/)")
if process_buttons.count() > 0:
# Click a processing button
process_buttons.first.click()
# Should show progress indicators
progress_elements = page.locator(".stSpinner, .stProgress, [data-testid='stSpinner'], [data-testid='stProgress']")
# Give it a moment for progress indicators to appear
page.wait_for_timeout(1000)
# Note: We don't wait for completion as that could take too long for E2E tests
def test_document_metadata_display(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
"""Test that document metadata is displayed when available"""
streamlit_helpers.wait_for_streamlit_load()
# Look for metadata displays
metadata_elements = page.locator("text=/.*[Dd]ocument.*[Cc]ount.*|.*[Ff]iles.*found.*|.*[Cc]hunks.*|.*[Ii]ndex.*size.*/")
# Should show some document information if documents are processed
# This could be document counts, index size, processing status, etc.
# Navigate through tabs to see if any show document information
tabs = page.locator("[data-testid='stTabs'] button, .stTabs button")
if tabs.count() > 0:
for i in range(min(tabs.count(), 3)): # Check first 3 tabs
tabs.nth(i).click()
page.wait_for_timeout(1000)
# Check for document-related information in this tab
doc_info = page.locator("text=/.*[Dd]ocuments.*|.*[Ff]iles.*|.*[Cc]hunks.*|.*[Pp]rocessed.*/")
if doc_info.count() > 0:
expect(doc_info.first).to_be_visible()
break
def test_data_room_switching(self, page: Page, streamlit_helpers: StreamlitPageHelpers):
"""Test switching between different data rooms"""
streamlit_helpers.wait_for_streamlit_load()
# Look for data room selection dropdown or similar
data_room_selectors = page.locator("select, [data-testid='stSelectbox']")
if data_room_selectors.count() > 0:
selector = data_room_selectors.first
# Check if it has multiple options
selector.click()
page.wait_for_timeout(500)
options = page.locator("[data-value], option")
if options.count() > 1:
# Select a different option
options.nth(1).click()
# Should trigger some update in the interface
page.wait_for_timeout(2000)
# Look for status updates or changes
status_updates = page.locator("text=/.*[Ll]oading.*|.*[Ss]witching.*|.*[Pp]rocessing.*/")
@pytest.mark.slow
def test_full_processing_workflow(self, page_slow: Page, streamlit_helpers: StreamlitPageHelpers, sample_test_data):
"""Test the complete document processing workflow with real data (slower test)"""
page = page_slow # Use the slow page fixture
streamlit_helpers.wait_for_streamlit_load()
# This test would actually process documents if a test data room is available
# Check if test VDR path exists
vdr_path = sample_test_data["vdr_path"]
if vdr_path.exists() and any(vdr_path.iterdir()):
# Look for path configuration
path_inputs = page.locator("input[placeholder*='path'], input[aria-label*='path']")
if path_inputs.count() > 0:
path_input = path_inputs.first
path_input.fill(str(vdr_path))
# Look for process button
process_buttons = page.locator("button:has-text(/.*[Pp]rocess.*|.*[Bb]uild.*/)")
if process_buttons.count() > 0:
process_buttons.first.click()
# Wait for processing to complete or show progress
# Use the extended timeout for this slow operation
try:
streamlit_helpers.wait_for_processing(timeout=120000) # 2 minutes
# Check for success indicators
success_elements = page.locator(".stSuccess, text=/.*[Ss]uccess.*|.*[Cc]omplete.*|.*[Ff]inished.*/")
page.wait_for_timeout(2000)
# Verify that documents were processed
status_elements = page.locator("text=/.*documents.*processed.*|.*files.*indexed.*|.*chunks.*created.*/")
except Exception as e:
# Processing might still be ongoing, that's okay for this test
print(f"Processing timeout or error: {e}")
else:
pytest.skip("No test VDR data available for full processing test")
|