Spaces:
Sleeping
feat: nearly complete input handling with stable state
Browse files- main bug was that every interaction with the UI led to the
file_uploader being re-instantiated, and then all the inputs
got re-parsed, the hashes recalculated, and the data lost.
- solution is via callback, and using the session state to implicitly
store the file_uploader return value (not well documented)
- on change of the file_uploader state, we dynamically generate
the input elements to supply the metadata. And process them inline.
- TODO: the data is stable in the session_state, but the UI loses the
elements for the list -- because the list hasn't changed! the
callback doesn't get triggered.
- Good: we don't overwrite our loaded data, and the ML/presentation
can continue, but...
- Bad: we don't redraw the elements. -> more caching I suppose.
- src/input/input_handling.py +199 -0
- src/input/input_observation.py +15 -1
- src/main.py +11 -1
|
@@ -1,8 +1,11 @@
|
|
|
|
|
| 1 |
import datetime
|
| 2 |
import logging
|
|
|
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from streamlit.delta_generator import DeltaGenerator
|
|
|
|
| 6 |
|
| 7 |
import cv2
|
| 8 |
import numpy as np
|
|
@@ -31,6 +34,47 @@ spoof_metadata = {
|
|
| 31 |
}
|
| 32 |
|
| 33 |
def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
"""
|
| 35 |
Checks if all expected inputs have been entered
|
| 36 |
|
|
@@ -65,9 +109,164 @@ def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
|
|
| 65 |
|
| 66 |
return all([v is not None for v in vals])
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
|
|
|
|
|
|
|
|
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
def setup_input(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
viewcontainer: DeltaGenerator=None,
|
| 72 |
_allowed_image_types: list=None, ) -> InputObservation:
|
| 73 |
"""
|
|
|
|
| 1 |
+
from typing import List, Tuple
|
| 2 |
import datetime
|
| 3 |
import logging
|
| 4 |
+
import hashlib
|
| 5 |
|
| 6 |
import streamlit as st
|
| 7 |
from streamlit.delta_generator import DeltaGenerator
|
| 8 |
+
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
| 9 |
|
| 10 |
import cv2
|
| 11 |
import numpy as np
|
|
|
|
| 34 |
}
|
| 35 |
|
| 36 |
def check_inputs_are_set(empty_ok:bool=False, debug:bool=False) -> bool:
|
| 37 |
+
return check_inputs_are_set_by_hash(empty_ok=empty_ok, debug=debug)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def check_inputs_are_set_by_hash(empty_ok:bool=False, debug:bool=False) -> bool:
|
| 41 |
+
"""
|
| 42 |
+
Checks if all expected inputs have been entered
|
| 43 |
+
|
| 44 |
+
Implementation: via the Streamlit session state.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
empty_ok (bool): If True, returns True if no inputs are set. Default is False.
|
| 48 |
+
debug (bool): If True, prints and logs the status of each expected input key. Default is False.
|
| 49 |
+
Returns:
|
| 50 |
+
bool: True if all expected input keys are set, False otherwise.
|
| 51 |
+
"""
|
| 52 |
+
image_hashes = st.session_state.image_hashes
|
| 53 |
+
if len(image_hashes) == 0:
|
| 54 |
+
return empty_ok
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
exp_input_key_stubs = ["input_latitude", "input_longitude"]
|
| 58 |
+
#exp_input_key_stubs = ["input_latitude", "input_longitude", "input_author_email", "input_date", "input_time", "input_image_selector"]
|
| 59 |
+
vals = []
|
| 60 |
+
for image_hash in image_hashes:
|
| 61 |
+
for stub in exp_input_key_stubs:
|
| 62 |
+
key = f"{stub}_{image_hash}"
|
| 63 |
+
val = None
|
| 64 |
+
if key in st.session_state:
|
| 65 |
+
val = st.session_state[key]
|
| 66 |
+
vals.append(val)
|
| 67 |
+
if debug:
|
| 68 |
+
msg = f"{key:15}, {(val is not None):8}, {val}"
|
| 69 |
+
m_logger.debug(msg)
|
| 70 |
+
print(msg)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
return all([v is not None for v in vals])
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def check_inputs_are_set_by_fname(empty_ok:bool=False, debug:bool=False) -> bool:
|
| 78 |
"""
|
| 79 |
Checks if all expected inputs have been entered
|
| 80 |
|
|
|
|
| 109 |
|
| 110 |
return all([v is not None for v in vals])
|
| 111 |
|
| 112 |
+
|
| 113 |
+
def process_one_file(file:UploadedFile) -> Tuple[np.ndarray, str, str, InputObservation]:
|
| 114 |
+
# do all the non-UI calcs
|
| 115 |
+
# add the UI elements
|
| 116 |
+
# and in-line, do processing/validation of the inputs
|
| 117 |
+
# - how to deal with the gathered data? a) push into session state, b) return all the elements needed?
|
| 118 |
+
|
| 119 |
+
viewcontainer = st.sidebar
|
| 120 |
+
|
| 121 |
+
# do all the non-UI calcs first
|
| 122 |
+
## get the bytes first, then convert into 1) image, 2) md5
|
| 123 |
+
_bytes = file.read()
|
| 124 |
+
image_hash = hashlib.md5(_bytes).hexdigest()
|
| 125 |
+
#file_bytes = np.asarray(bytearray(_bytes), dtype=np.uint8)
|
| 126 |
+
image: np.ndarray = cv2.imdecode(np.asarray(bytearray(_bytes), dtype=np.uint8), 1)
|
| 127 |
+
filename:str = file.name
|
| 128 |
+
image_datetime = get_image_datetime(file)
|
| 129 |
+
m_logger.debug(f"image date extracted as {image_datetime} (from {file})")
|
| 130 |
+
|
| 131 |
+
author_email = st.session_state["input_author_email"]
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
# add the UI elements
|
| 135 |
+
viewcontainer.title(f"Metadata for {filename}")
|
| 136 |
+
ukey = image_hash
|
| 137 |
+
|
| 138 |
+
# 3. Latitude Entry Box
|
| 139 |
+
latitude = viewcontainer.text_input(
|
| 140 |
+
"Latitude for " + filename,
|
| 141 |
+
spoof_metadata.get('latitude', ""),
|
| 142 |
+
key=f"input_latitude_{ukey}")
|
| 143 |
+
if latitude and not is_valid_number(latitude):
|
| 144 |
+
viewcontainer.error("Please enter a valid latitude (numerical only).")
|
| 145 |
+
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
| 146 |
+
# 4. Longitude Entry Box
|
| 147 |
+
longitude = viewcontainer.text_input(
|
| 148 |
+
"Longitude for " + filename,
|
| 149 |
+
spoof_metadata.get('longitude', ""),
|
| 150 |
+
key=f"input_longitude_{ukey}")
|
| 151 |
+
if longitude and not is_valid_number(longitude):
|
| 152 |
+
viewcontainer.error("Please enter a valid longitude (numerical only).")
|
| 153 |
+
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
| 154 |
+
|
| 155 |
+
# 5. Date/time
|
| 156 |
+
## first from image metadata
|
| 157 |
+
if image_datetime is not None:
|
| 158 |
+
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
|
| 159 |
+
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
|
| 160 |
+
else:
|
| 161 |
+
time_value = datetime.datetime.now().time() # Default to current time
|
| 162 |
+
date_value = datetime.datetime.now().date()
|
| 163 |
+
|
| 164 |
+
## if not, give user the option to enter manually
|
| 165 |
+
date_option = st.sidebar.date_input("Date for "+filename, value=date_value, key=f"input_date_{ukey}")
|
| 166 |
+
time_option = st.sidebar.time_input("Time for "+filename, time_value, key=f"input_time_{ukey}")
|
| 167 |
+
|
| 168 |
+
observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
|
| 169 |
+
author_email=author_email, date=image_datetime, time=None,
|
| 170 |
+
date_option=date_option, time_option=time_option,
|
| 171 |
+
uploaded_filename=file,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
#the_data = [] \
|
| 175 |
+
# + [image, file, image_hash, filename, ] \
|
| 176 |
+
# + [latitude, longitude, date_option, time_option]
|
| 177 |
+
# TODO: pass in the hash to InputObservation, so it is done once only. (need to refactor the class a bit)
|
| 178 |
+
|
| 179 |
+
the_data = (image, image_hash, filename, observation)
|
| 180 |
+
|
| 181 |
+
return the_data
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
#
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def process_files():
|
| 192 |
+
# this is triggered whenever the uploaded files are changed.
|
| 193 |
+
|
| 194 |
+
# process one file: add UI elements, and process the inputs
|
| 195 |
+
# generate an observation from the return info
|
| 196 |
+
# finally, put all the relevant stuff into the session state
|
| 197 |
+
# - note: here we overwrite the session state, we aren't extending it.
|
| 198 |
+
|
| 199 |
+
# get files from state
|
| 200 |
+
uploaded_files = st.session_state.file_uploader_data
|
| 201 |
+
|
| 202 |
+
observations = {}
|
| 203 |
+
images = {}
|
| 204 |
+
image_hashes = []
|
| 205 |
+
filenames = []
|
| 206 |
+
|
| 207 |
+
for file in uploaded_files:
|
| 208 |
+
(image, image_hash, filename, observation) = process_one_file(file)
|
| 209 |
+
# big old debug because of pain.
|
| 210 |
+
|
| 211 |
+
filenames.append(filename)
|
| 212 |
+
image_hashes.append(image_hash)
|
| 213 |
+
|
| 214 |
+
observations[image_hash] = observation
|
| 215 |
+
images[image_hash] = image
|
| 216 |
+
|
| 217 |
+
st.session_state.images = images
|
| 218 |
+
st.session_state.files = uploaded_files
|
| 219 |
+
st.session_state.observations = observations
|
| 220 |
+
st.session_state.image_hashes = image_hashes
|
| 221 |
+
st.session_state.image_filenames = filenames
|
| 222 |
+
|
| 223 |
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
|
| 227 |
|
| 228 |
+
|
| 229 |
+
def _setup_oneoff_inputs() -> None:
|
| 230 |
+
'''
|
| 231 |
+
Add the UI input elements for which we have one each
|
| 232 |
+
|
| 233 |
+
'''
|
| 234 |
+
viewcontainer = st.sidebar
|
| 235 |
+
viewcontainer.title("Input image and data")
|
| 236 |
+
|
| 237 |
+
# 1. Input the author email
|
| 238 |
+
author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""),
|
| 239 |
+
key="input_author_email")
|
| 240 |
+
if author_email and not is_valid_email(author_email):
|
| 241 |
+
viewcontainer.error("Please enter a valid email address.")
|
| 242 |
+
|
| 243 |
+
# 2. Image Selector
|
| 244 |
+
#uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
|
| 245 |
+
|
| 246 |
+
st.file_uploader("Upload one or more images", type=["png", 'jpg', 'jpeg', 'webp'],
|
| 247 |
+
accept_multiple_files=True,
|
| 248 |
+
key="file_uploader_data",
|
| 249 |
+
on_change=process_files)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
|
| 253 |
def setup_input(
|
| 254 |
+
viewcontainer: DeltaGenerator=None,
|
| 255 |
+
_allowed_image_types: list=None, ) -> None:
|
| 256 |
+
'''
|
| 257 |
+
Set up the input handling for the whale observation guidance tool
|
| 258 |
+
|
| 259 |
+
'''
|
| 260 |
+
_setup_oneoff_inputs()
|
| 261 |
+
# amazingly we just have to add the uploader and its callback, and the rest is dynamic.
|
| 262 |
+
|
| 263 |
+
# # check if the inputs are set
|
| 264 |
+
# if check_inputs_are_set(empty_ok=True):
|
| 265 |
+
# st.sidebar.success("All inputs are set.")
|
| 266 |
+
# else:
|
| 267 |
+
# st.sidebar.warning("Please fill in all the required inputs.")
|
| 268 |
+
|
| 269 |
+
def setup_input_monolithic(
|
| 270 |
viewcontainer: DeltaGenerator=None,
|
| 271 |
_allowed_image_types: list=None, ) -> InputObservation:
|
| 272 |
"""
|
|
@@ -44,6 +44,9 @@ class InputObservation:
|
|
| 44 |
from_input(input):
|
| 45 |
Creates an observation from another input observation.
|
| 46 |
"""
|
|
|
|
|
|
|
|
|
|
| 47 |
def __init__(self, image=None, latitude=None, longitude=None,
|
| 48 |
author_email=None, date=None, time=None, date_option=None, time_option=None,
|
| 49 |
uploaded_filename=None):
|
|
@@ -56,8 +59,13 @@ class InputObservation:
|
|
| 56 |
self.date_option = date_option
|
| 57 |
self.time_option = time_option
|
| 58 |
self.uploaded_filename = uploaded_filename
|
|
|
|
| 59 |
self._top_predictions = []
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def set_top_predictions(self, top_predictions:list):
|
| 62 |
self._top_predictions = top_predictions
|
| 63 |
|
|
@@ -66,6 +74,11 @@ class InputObservation:
|
|
| 66 |
def top_predictions(self):
|
| 67 |
return self._top_predictions
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
def __str__(self):
|
| 71 |
return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
|
|
@@ -88,7 +101,8 @@ class InputObservation:
|
|
| 88 |
return {
|
| 89 |
#"image": self.image,
|
| 90 |
"image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
|
| 91 |
-
"image_md5":
|
|
|
|
| 92 |
"latitude": self.latitude,
|
| 93 |
"longitude": self.longitude,
|
| 94 |
"author_email": self.author_email,
|
|
|
|
| 44 |
from_input(input):
|
| 45 |
Creates an observation from another input observation.
|
| 46 |
"""
|
| 47 |
+
|
| 48 |
+
_inst_count = 0
|
| 49 |
+
|
| 50 |
def __init__(self, image=None, latitude=None, longitude=None,
|
| 51 |
author_email=None, date=None, time=None, date_option=None, time_option=None,
|
| 52 |
uploaded_filename=None):
|
|
|
|
| 59 |
self.date_option = date_option
|
| 60 |
self.time_option = time_option
|
| 61 |
self.uploaded_filename = uploaded_filename
|
| 62 |
+
self._image_md5 = None
|
| 63 |
self._top_predictions = []
|
| 64 |
|
| 65 |
+
InputObservation._inst_count += 1
|
| 66 |
+
self._inst_id = InputObservation._inst_count
|
| 67 |
+
self.assign_image_md5()
|
| 68 |
+
|
| 69 |
def set_top_predictions(self, top_predictions:list):
|
| 70 |
self._top_predictions = top_predictions
|
| 71 |
|
|
|
|
| 74 |
def top_predictions(self):
|
| 75 |
return self._top_predictions
|
| 76 |
|
| 77 |
+
# add a method to assign the image_md5 only once
|
| 78 |
+
def assign_image_md5(self):
|
| 79 |
+
if not self._image_md5:
|
| 80 |
+
self._image_md5 = hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5()
|
| 81 |
+
|
| 82 |
|
| 83 |
def __str__(self):
|
| 84 |
return f"Observation: {self.image}, {self.latitude}, {self.longitude}, {self.author_email}, {self.date}, {self.time}, {self.date_option}, {self.time_option}, {self.uploaded_filename}"
|
|
|
|
| 101 |
return {
|
| 102 |
#"image": self.image,
|
| 103 |
"image_filename": self.uploaded_filename.name if self.uploaded_filename else None,
|
| 104 |
+
"image_md5": self._image_md5,
|
| 105 |
+
#"image_md5": hashlib.md5(self.uploaded_filename.read()).hexdigest() if self.uploaded_filename else generate_random_md5(),
|
| 106 |
"latitude": self.latitude,
|
| 107 |
"longitude": self.longitude,
|
| 108 |
"author_email": self.author_email,
|
|
@@ -97,6 +97,12 @@ if "progress" not in st.session_state:
|
|
| 97 |
st.sidebar.button("Refresh Progress", on_click=refresh_progress)
|
| 98 |
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def main() -> None:
|
| 102 |
"""
|
|
@@ -134,7 +140,8 @@ def main() -> None:
|
|
| 134 |
refresh_progress()
|
| 135 |
|
| 136 |
# create a sidebar, and parse all the input (returned as `observations` object)
|
| 137 |
-
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
if 0:## WIP
|
|
@@ -250,6 +257,9 @@ def main() -> None:
|
|
| 250 |
# 6. manual validation done -> enable the upload buttons
|
| 251 |
#
|
| 252 |
with tab_inference:
|
|
|
|
|
|
|
|
|
|
| 253 |
add_classifier_header()
|
| 254 |
# if we are before data_entry_validated, show the button, disabled.
|
| 255 |
if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
|
|
|
|
| 97 |
st.sidebar.button("Refresh Progress", on_click=refresh_progress)
|
| 98 |
|
| 99 |
|
| 100 |
+
def dbg_show_obs_hashes():
|
| 101 |
+
# a debug: we seem to be losing the whale classes?
|
| 102 |
+
st.write(f"[D] num observations: {len(st.session_state.observations)}")
|
| 103 |
+
for hash in st.session_state.observations.keys():
|
| 104 |
+
st.markdown(f"- [D] observation {hash} has {len(st.session_state.observations[hash].top_predictions)} predictions")
|
| 105 |
+
|
| 106 |
|
| 107 |
def main() -> None:
|
| 108 |
"""
|
|
|
|
| 140 |
refresh_progress()
|
| 141 |
|
| 142 |
# create a sidebar, and parse all the input (returned as `observations` object)
|
| 143 |
+
with st.sidebar:
|
| 144 |
+
setup_input(viewcontainer=st.sidebar)
|
| 145 |
|
| 146 |
|
| 147 |
if 0:## WIP
|
|
|
|
| 257 |
# 6. manual validation done -> enable the upload buttons
|
| 258 |
#
|
| 259 |
with tab_inference:
|
| 260 |
+
|
| 261 |
+
dbg_show_obs_hashes()
|
| 262 |
+
|
| 263 |
add_classifier_header()
|
| 264 |
# if we are before data_entry_validated, show the button, disabled.
|
| 265 |
if not st.session_state.workflow_fsm.is_in_state_or_beyond('data_entry_validated'):
|