Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

rmm

test: main full workflow test populates email unless debug mode

e550397 10 months ago

12.7 kB

	import pytest
	from unittest.mock import MagicMock, patch
	from streamlit.testing.v1 import AppTest
	import time

	from input.input_handling import spoof_metadata, load_debug_autopopulate
	from input.input_observation import InputObservation
	from input.input_handling import buffer_uploaded_files

	from streamlit.runtime.uploaded_file_manager import UploadedFile
	from numpy import ndarray

	from test_demo_multifile_upload import (
	mock_uploadedFile_List_ImageData, mock_uploadedFile,
	MockUploadedFile, )


	from test_demo_input_sidebar import (
	verify_initial_session_state, verify_session_state_after_processing_files,
	wrapped_buffer_uploaded_files_allowed_once)

	from test_demo_input_sidebar import _cprint, OKBLUE, OKGREEN, OKCYAN, FAIL, PURPLE

	TIMEOUT = 30
	SCRIPT_UNDER_TEST = "src/main.py"

	def debug_check_images(at:AppTest, msg:str=""):
	_cprint(f"[I] num images in session state {msg}: {len(at.session_state.images)}", OKCYAN)
	for i, (key, img) in enumerate(at.session_state.images.items()):
	#for i, img in enumerate(at.session_state.images.values()):
	#assert isinstance(img, ndarray)
	if isinstance(img, ndarray):
	print(f"image {i}: {img.shape} [{key}]")
	else:
	print(f"image {i}: {type(img)} [{key}]")

	def nooop(args, *kwargs):
	_cprint("skipping the buffering -- shoul only happen once", FAIL)
	raise RuntimeError
	pass

	@pytest.mark.end2end
	@pytest.mark.slow
	@patch("streamlit.file_uploader")
	def test_click_validate_after_data_entry(mock_file_rv: MagicMock, mock_uploadedFile_List_ImageData):
	# this test goes through several stages of the workflow
	#

	# 1. get app started

	# first we need to upload >0 files
	num_files = 2
	mock_files = mock_uploadedFile_List_ImageData(num_files=num_files)
	mock_file_rv.return_value = mock_files

	t0 = time.time()
	at = AppTest.from_file(SCRIPT_UNDER_TEST, default_timeout=TIMEOUT).run()
	t1 = time.time()
	_cprint(f"[T] time to load: {t1-t0:.2f}s", PURPLE)
	verify_initial_session_state(at)

	# 1-Test: at this initial state, we expect:
	# - the workflow state is 'doing_data_entry'
	# - the validate button is disabled
	# - the infer button (on main tab) is disabled
	# - note: props of the button: label, value, proto, disabled.
	# don't need to check others here

	assert at.session_state.workflow_fsm.current_state == 'doing_data_entry'
	assert at.sidebar.button[1].disabled == True
	infer_button = at.tabs[0].button[0]
	assert infer_button.disabled == True


	# 2. upload files, enter email, and trigger the callback
	if not load_debug_autopopulate():
	# fill the text box with a dummy email
	at.session_state.input_author_email = "person@organisation.org"

	# put the mocked file_upload into session state, as if it were the result of a file upload, with the key 'file_uploader_data'
	at.session_state["file_uploader_data"] = mock_files
	# the side effect cant run until now (need file_uploader_data to be set)
	if wrapped_buffer_uploaded_files_allowed_once.called == 0:
	mock_file_rv.side_effect = wrapped_buffer_uploaded_files_allowed_once
	else:
	mock_file_rv.side_effect = nooop

	_cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)

	t2 = time.time()
	at.run()
	t3 = time.time()
	_cprint(f"[T] time to run with file processing: {t3-t2:.2f}s", PURPLE)

	# 2-Test: after uploading the files, we should have:
	# - the workflow state moved on to 'data_entry_complete'
	# - several changes applied to the session_state (handled by verify_session_state_after_processing_files)
	# - the validate button is enabled
	# - the infer button is still disabled

	verify_session_state_after_processing_files(at, num_files)
	debug_check_images(at, "after processing files")
	_cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)

	assert at.session_state.workflow_fsm.current_state == 'data_entry_complete'

	assert at.sidebar.button[1].disabled == False
	infer_button = at.tabs[0].button[0]
	assert infer_button.disabled == True

	print(at.markdown[0])

	# 3. data entry complete, click the validate button
	at.sidebar.button[1].click().run()
	t4 = time.time()
	_cprint(f"[T] time to run step 3: {t4-t3:.2f}s", PURPLE)

	# 3-Test: after validating the data, we should have:
	# - the state (backend) should move to data_entry_validated
	# - the UI should show the new state (in sidebar.markdown[0])
	# - the infer button should now be enabled
	# - the validate button should be disabled

	assert at.session_state.workflow_fsm.current_state == 'data_entry_validated'
	assert "data_entry_validated" in at.sidebar.markdown[0].value

	# TODO: this part of the test currently fails because hte main code doesn't
	# change the button; in this exec path/branch, the button is not rendered at all.
	# so if we did at.run() after the click, the button is absent entierly!
	# If we don't run, the button is still present in its old state (enabled)
	# for btn in at.sidebar.button:
	# print(f"button: {btn.label} {btn.disabled}")
	# #assert at.sidebar.button[1].disabled == True

	infer_button = at.tabs[0].button[0]
	assert infer_button.disabled == False

	debug_check_images(at, "after validation button")
	_cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)

	# # at this point, we want to retrieve the main area, get the tabs child,
	# # and then on the first tab get the first button & check not disabled (will click next step)
	# #print(at._tree)
	# # fragile: assume the first child is 'main'
	# # robust: walk through children until we find the main area
	# # main_area = at._tree.children[0]
	# # main_area = None
	# # for _id, child in at._tree.children.items():
	# # if child.type == 'main':
	# # main_area = child
	# # break
	# # assert main_area is not None

	# # ah, we can go direct to the tabs. they are only plausible in main. (not supported in sidebar)
	# infer_tab = at.tabs[0]
	# #print(f"tab: {infer_tab}")
	# #print(dir(infer_tab))
	# btn = infer_tab.button[0]
	# print(f"button: {btn}")
	# print(btn.label)
	# print(btn.disabled)

	# infer_button = at.tabs[0].button[0]
	# assert infer_button.disabled == False

	# check pre-ML click that we are ready for it.

	debug_check_images(at, "before clicking infer. ")
	_cprint(f"[I] buffering called {wrapped_buffer_uploaded_files_allowed_once.called} times", OKGREEN)
	TEST_ML = True
	SKIP_CHECK_OVERRIDE = False
	# 4. launch ML inference by clicking the button
	if TEST_ML:
	# infer_button = at.tabs[0].button[0]
	# assert infer_button.disabled == False
	# now test the ML step
	infer_button.click().run()
	t5 = time.time()
	_cprint(f"[T] time to run step 4: {t5-t4:.2f}s", PURPLE)

	# 4-Test: after clicking the infer button, we should have:
	# - workflow should have moved on to 'ml_classification_completed'
	# - the main tab button should now have new text (confirm species predictions)
	# - we should have the results presented on the main area
	# - 2+6 image elements (the source image, images of 3 predictions) * num_files
	# - 2 dropdown elements (one for each image) + 1 for the page selector
	# - all of the observations should have class_overriden == False

	assert at.session_state.workflow_fsm.current_state == 'ml_classification_completed'
	# check the observations
	for i, obs in enumerate(at.session_state.observations.values()):
	print(f"obs {i}: {obs}")
	assert isinstance(obs, InputObservation)
	assert obs.class_overriden == False

	# check the visual elements
	infer_tab = at.tabs[0]
	print(f"tab: {infer_tab}")
	img_elems = infer_tab.get("imgs")
	print(f"imgs: {len(img_elems)}")
	assert len(img_elems) == num_files*4

	infer_button = infer_tab.button[0]
	assert infer_button.disabled == False
	assert 'Confirm species predictions' in infer_button.label

	# we have 1 per file, and also one more to select the page of results being shown.
	# - hmm, so we aren't going to see the right number if it goes multipage :(
	# - but this test specifically uses 2 inputs.
	assert len(infer_tab.selectbox) == num_files + 1


	# 5. manually override the class of one of the observations
	idx_to_override = 1 # don't forget, we also have the page selector first.
	infer_tab.selectbox[idx_to_override + 1].select_index(20).run() # FRAGILE!

	# 5-TEST.
	# - expect that all class_overriden are False, except for the one we just set
	# - also expect there still to be num_files*4 images (2+6 per file) etc
	for i, obs in enumerate(at.session_state.observations.values()):
	_cprint(f"obs {i}: {obs.class_overriden} {obs.to_dict()}", OKBLUE)
	assert isinstance(obs, InputObservation)
	if not SKIP_CHECK_OVERRIDE:
	if i == idx_to_override:
	assert obs.class_overriden == True
	else:
	assert obs.class_overriden == False

	# 6. confirm the species predictions, get ready to allow upload
	infer_tab = at.tabs[0]
	confirm_button = infer_tab.button[0]
	confirm_button.click().run()
	t6 = time.time()
	_cprint(f"[T] time to run step 5: {t6-t5:.2f}s", PURPLE)

	# 6-TEST. Now we expect to see:
	# - the workflow state should be 'manual_inspection_completed'
	# - the obsevations should be as per the previous step
	# - the main tab button should now have new text (Upload all observations)
	# - we should have 4n images
	# - we should have only 1 select box (page), (passed stage for overriding class)

	assert at.session_state.workflow_fsm.current_state == 'manual_inspection_completed'
	for i, obs in enumerate(at.session_state.observations.values()):
	_cprint(f"obs {i}: {obs.class_overriden} {obs.to_dict()}", OKBLUE)
	assert isinstance(obs, InputObservation)
	if not SKIP_CHECK_OVERRIDE:
	if i == idx_to_override:
	assert obs.class_overriden == True
	else:
	assert obs.class_overriden == False

	# we have to trigger a manual refresh? no, it seems that sometimes the tests fail, maybe
	# because the script is slow? it is not unique to here, various points that usually pass
	# occasionally fail because elements haven't yet been drawn. I suppose the timing aspect
	# internally by AppTest is not perfect (selenium has moved from explicit to implicit waits,
	# though I didn't look too deeply whether apptest also has an explicit wait mechanism)
	# # time.sleep(1)
	# #at.run()
	infer_tab = at.tabs[0]
	upload_button = infer_tab.button[0]
	assert upload_button.disabled == False
	assert 'Upload all observations' in upload_button.label

	img_elems = infer_tab.get("imgs")
	assert len(img_elems) == num_files*4

	assert len(infer_tab.selectbox) == 1

	# 7. upload the observations
	upload_button.click().run()
	t7 = time.time()
	_cprint(f"[T] time to run step 6: {t7-t6:.2f}s", PURPLE)

	# 7-TEST. Now we expect to see:
	# - workflow state should be 'data_uploaded'
	# - nothing else in the back end should have changed (is that a mistake? should we
	# add a boolean tracking if the observations have been uploaded?)
	# - a toast presented for each observation uploaded
	# - the images should still be there, and 1 select box (page)
	# - no more button on the main area

	assert at.session_state.workflow_fsm.current_state == 'data_uploaded'
	#print(at.toast)
	assert len(at.toast) == num_files
	infer_tab = at.tabs[0]

	img_elems = infer_tab.get("imgs")
	assert len(img_elems) == num_files*4
	assert len(infer_tab.selectbox) == 1
	assert len(infer_tab.button) == 0