Spaces:
Sleeping
Sleeping
rmm
commited on
Commit
·
8c4b1f7
1
Parent(s):
aa998c4
chore: tidy up of workflow and debug clutter
Browse files- basically all phases seem ok, almost ready for validation
- src/classifier/classifier_image.py +14 -8
- src/hf_push_observations.py +18 -17
- src/input/input_observation.py +1 -7
- src/main.py +24 -21
src/classifier/classifier_image.py
CHANGED
|
@@ -39,9 +39,10 @@ def cetacean_just_classify(cetacean_classifier):
|
|
| 39 |
msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
|
| 40 |
g_logger.info(msg)
|
| 41 |
|
| 42 |
-
#
|
| 43 |
st.session_state.public_observations[hash] = observation
|
| 44 |
-
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
# func to show results and allow review
|
|
@@ -70,7 +71,7 @@ def cetacean_show_results_and_review():
|
|
| 70 |
else:
|
| 71 |
pred1 = st.session_state.whale_prediction1[hash]
|
| 72 |
# get index of pred1 from WHALE_CLASSES, none if not present
|
| 73 |
-
print(f"[D] pred1: {pred1}")
|
| 74 |
ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
|
| 75 |
selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
|
| 76 |
|
|
@@ -79,7 +80,7 @@ def cetacean_show_results_and_review():
|
|
| 79 |
observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
| 80 |
|
| 81 |
st.session_state.public_observations[hash] = observation
|
| 82 |
-
st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
|
| 83 |
# TODO: the metadata only fills properly if `validate` was clicked.
|
| 84 |
st.markdown(metadata2md(hash))
|
| 85 |
|
|
@@ -91,7 +92,7 @@ def cetacean_show_results_and_review():
|
|
| 91 |
whale_classes = observations[hash].top_predictions
|
| 92 |
# render images for the top 3 (that is what the model api returns)
|
| 93 |
n = len(whale_classes)
|
| 94 |
-
st.markdown(f"Top {n} Predictions for observation {str(o)}")
|
| 95 |
for i in range(n):
|
| 96 |
viewer.display_whale(whale_classes, i)
|
| 97 |
o += 1
|
|
@@ -134,9 +135,14 @@ def cetacean_show_results():
|
|
| 134 |
# observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
| 135 |
|
| 136 |
# st.session_state.public_observation = observation
|
| 137 |
-
|
| 138 |
-
#
|
|
|
|
| 139 |
st.markdown(metadata2md(hash))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
st.markdown(f"- **hash**: {hash}")
|
| 141 |
|
| 142 |
msg = f"[D] full observation after inference: {observation}"
|
|
@@ -147,7 +153,7 @@ def cetacean_show_results():
|
|
| 147 |
whale_classes = observations[hash].top_predictions
|
| 148 |
# render images for the top 3 (that is what the model api returns)
|
| 149 |
n = len(whale_classes)
|
| 150 |
-
st.markdown(f"Top {n} Predictions for observation {str(o)}")
|
| 151 |
for i in range(n):
|
| 152 |
viewer.display_whale(whale_classes, i)
|
| 153 |
o += 1
|
|
|
|
| 39 |
msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
|
| 40 |
g_logger.info(msg)
|
| 41 |
|
| 42 |
+
# store the elements of the observation that will be transmitted (not image)
|
| 43 |
st.session_state.public_observations[hash] = observation
|
| 44 |
+
if st.session_state.MODE_DEV_STATEFUL:
|
| 45 |
+
st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
|
| 46 |
|
| 47 |
|
| 48 |
# func to show results and allow review
|
|
|
|
| 71 |
else:
|
| 72 |
pred1 = st.session_state.whale_prediction1[hash]
|
| 73 |
# get index of pred1 from WHALE_CLASSES, none if not present
|
| 74 |
+
print(f"[D] {o:3} pred1: {pred1:30} | {hash}")
|
| 75 |
ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
|
| 76 |
selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
|
| 77 |
|
|
|
|
| 80 |
observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
| 81 |
|
| 82 |
st.session_state.public_observations[hash] = observation
|
| 83 |
+
#st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
|
| 84 |
# TODO: the metadata only fills properly if `validate` was clicked.
|
| 85 |
st.markdown(metadata2md(hash))
|
| 86 |
|
|
|
|
| 92 |
whale_classes = observations[hash].top_predictions
|
| 93 |
# render images for the top 3 (that is what the model api returns)
|
| 94 |
n = len(whale_classes)
|
| 95 |
+
st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
|
| 96 |
for i in range(n):
|
| 97 |
viewer.display_whale(whale_classes, i)
|
| 98 |
o += 1
|
|
|
|
| 135 |
# observation['class_overriden'] = selected_class # TODO: this should be boolean!
|
| 136 |
|
| 137 |
# st.session_state.public_observation = observation
|
| 138 |
+
|
| 139 |
+
#st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
|
| 140 |
+
#
|
| 141 |
st.markdown(metadata2md(hash))
|
| 142 |
+
# TODO: FIXME: this is the data taht will get pushed -- it DOESN'T reflect any adjustments
|
| 143 |
+
# # made via the dropdown on the last step!!!!
|
| 144 |
+
#st.markdown(f"- **selected species**: {observation['predicted_class']}")
|
| 145 |
+
st.markdown(f"- **selected species**: {st.session_state.whale_prediction1[hash]}")
|
| 146 |
st.markdown(f"- **hash**: {hash}")
|
| 147 |
|
| 148 |
msg = f"[D] full observation after inference: {observation}"
|
|
|
|
| 153 |
whale_classes = observations[hash].top_predictions
|
| 154 |
# render images for the top 3 (that is what the model api returns)
|
| 155 |
n = len(whale_classes)
|
| 156 |
+
st.markdown(f"**Top {n} Predictions for observation {str(o)}**")
|
| 157 |
for i in range(n):
|
| 158 |
viewer.display_whale(whale_classes, i)
|
| 159 |
o += 1
|
src/hf_push_observations.py
CHANGED
|
@@ -13,7 +13,7 @@ LOG_LEVEL = logging.DEBUG
|
|
| 13 |
g_logger = logging.getLogger(__name__)
|
| 14 |
g_logger.setLevel(LOG_LEVEL)
|
| 15 |
|
| 16 |
-
def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
|
| 17 |
'''
|
| 18 |
push one observation to the Hugging Face dataset
|
| 19 |
|
|
@@ -38,32 +38,33 @@ def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
|
|
| 38 |
f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
|
| 39 |
f.write(metadata_str)
|
| 40 |
f.close()
|
| 41 |
-
st.info(f"temp file: {f.name} with metadata written...")
|
| 42 |
|
| 43 |
-
# observation['author_email']
|
| 44 |
-
# observation['image_md5']
|
| 45 |
path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
|
| 46 |
|
| 47 |
msg = f"fname: {f.name} | path: {path_in_repo}"
|
| 48 |
print(msg)
|
| 49 |
st.warning(msg)
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
return rv
|
| 63 |
|
| 64 |
|
| 65 |
|
| 66 |
-
def push_all_observations():
|
| 67 |
'''
|
| 68 |
open an API connection to Hugging Face, and push all observation one by one
|
| 69 |
'''
|
|
@@ -74,7 +75,7 @@ def push_all_observations():
|
|
| 74 |
|
| 75 |
# iterate over the list of observations
|
| 76 |
for hash in st.session_state.public_observations.keys():
|
| 77 |
-
rv = push_observation(hash, api)
|
| 78 |
|
| 79 |
|
| 80 |
|
|
|
|
| 13 |
g_logger = logging.getLogger(__name__)
|
| 14 |
g_logger.setLevel(LOG_LEVEL)
|
| 15 |
|
| 16 |
+
def push_observation(image_hash:str, api:HfApi, enable_push:False) -> CommitInfo:
|
| 17 |
'''
|
| 18 |
push one observation to the Hugging Face dataset
|
| 19 |
|
|
|
|
| 38 |
f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
|
| 39 |
f.write(metadata_str)
|
| 40 |
f.close()
|
| 41 |
+
#st.info(f"temp file: {f.name} with metadata written...")
|
| 42 |
|
|
|
|
|
|
|
| 43 |
path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
|
| 44 |
|
| 45 |
msg = f"fname: {f.name} | path: {path_in_repo}"
|
| 46 |
print(msg)
|
| 47 |
st.warning(msg)
|
| 48 |
+
|
| 49 |
+
if enable_push:
|
| 50 |
+
rv = api.upload_file(
|
| 51 |
+
path_or_fileobj=f.name,
|
| 52 |
+
path_in_repo=path_in_repo,
|
| 53 |
+
repo_id="Saving-Willy/temp_dataset",
|
| 54 |
+
repo_type="dataset",
|
| 55 |
+
)
|
| 56 |
+
print(rv)
|
| 57 |
+
msg = f"observation attempted tx to repo happy walrus: {rv}"
|
| 58 |
+
g_logger.info(msg)
|
| 59 |
+
st.info(msg)
|
| 60 |
+
else:
|
| 61 |
+
rv = None # temp don't send anything
|
| 62 |
|
| 63 |
return rv
|
| 64 |
|
| 65 |
|
| 66 |
|
| 67 |
+
def push_all_observations(enable_push:bool=False):
|
| 68 |
'''
|
| 69 |
open an API connection to Hugging Face, and push all observation one by one
|
| 70 |
'''
|
|
|
|
| 75 |
|
| 76 |
# iterate over the list of observations
|
| 77 |
for hash in st.session_state.public_observations.keys():
|
| 78 |
+
rv = push_observation(hash, api, enable_push=enable_push)
|
| 79 |
|
| 80 |
|
| 81 |
|
src/input/input_observation.py
CHANGED
|
@@ -92,13 +92,7 @@ class InputObservation:
|
|
| 92 |
raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
|
| 93 |
if not self.image_md5:
|
| 94 |
self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
|
| 95 |
-
|
| 96 |
-
# new comment / hybj hunk
|
| 97 |
-
self._cprint(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
|
| 98 |
-
|
| 99 |
-
def _cprint(self, msg:str, color:str=OKGREEN):
|
| 100 |
-
"""Print colored message"""
|
| 101 |
-
print(f"{color}{msg}{ENDC}")
|
| 102 |
|
| 103 |
def __str__(self):
|
| 104 |
_im_str = "None" if self.image is None else f"image dims: {self.image.shape}"
|
|
|
|
| 92 |
raise DeprecationWarning("This method is deprecated. hash is a required constructor argument.")
|
| 93 |
if not self.image_md5:
|
| 94 |
self.image_md5 = hashlib.md5(self.uploaded_file.read()).hexdigest() if self.uploaded_file else generate_random_md5()
|
| 95 |
+
m_logger.debug(f"[D] Assigned image md5: {self.image_md5} for {self.uploaded_file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
def __str__(self):
|
| 98 |
_im_str = "None" if self.image is None else f"image dims: {self.image.shape}"
|
src/main.py
CHANGED
|
@@ -44,6 +44,11 @@ data_files = "data/train-00000-of-00001.parquet"
|
|
| 44 |
USE_BASIC_MAP = False
|
| 45 |
DEV_SIDEBAR_LIB = True
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# get a global var for logger accessor in this module
|
| 48 |
LOG_LEVEL = logging.DEBUG
|
| 49 |
g_logger = logging.getLogger(__name__)
|
|
@@ -249,7 +254,8 @@ def main() -> None:
|
|
| 249 |
#
|
| 250 |
with tab_inference:
|
| 251 |
|
| 252 |
-
|
|
|
|
| 253 |
|
| 254 |
add_classifier_header()
|
| 255 |
# if we are before data_entry_validated, show the button, disabled.
|
|
@@ -277,17 +283,16 @@ def main() -> None:
|
|
| 277 |
|
| 278 |
elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
|
| 279 |
# show the results, and allow manual validation
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
s
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
st.markdown(s)
|
| 289 |
# add a button to advance the state
|
| 290 |
-
if st.button("
|
| 291 |
st.session_state.workflow_fsm.complete_current_state()
|
| 292 |
# -> manual_inspection_completed
|
| 293 |
st.rerun()
|
|
@@ -296,27 +301,25 @@ def main() -> None:
|
|
| 296 |
|
| 297 |
elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
|
| 298 |
# show the ML results, and allow the user to upload the observation
|
| 299 |
-
st.markdown("""
|
| 300 |
-
### Inference Results (after manual validation)
|
| 301 |
-
:construction: for now we just show the button.
|
| 302 |
-
""")
|
| 303 |
|
| 304 |
|
| 305 |
-
if st.button("
|
| 306 |
# let this go through to the push_all func, since it just reports to log for now.
|
| 307 |
-
push_all_observations()
|
| 308 |
st.session_state.workflow_fsm.complete_current_state()
|
| 309 |
# -> data_uploaded
|
|
|
|
| 310 |
|
| 311 |
cetacean_show_results()
|
| 312 |
|
| 313 |
elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
|
| 314 |
# the data has been sent. Lets show the observations again
|
| 315 |
# but no buttons to upload (or greyed out ok)
|
| 316 |
-
st.markdown("""
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
df = pd.DataFrame(st.session_state.observations, index=[0])
|
| 321 |
st.table(df)
|
| 322 |
|
|
|
|
| 44 |
USE_BASIC_MAP = False
|
| 45 |
DEV_SIDEBAR_LIB = True
|
| 46 |
|
| 47 |
+
# one toggle for all the extra debug text
|
| 48 |
+
if "MODE_DEV_STATEFUL" not in st.session_state:
|
| 49 |
+
st.session_state.MODE_DEV_STATEFUL = False
|
| 50 |
+
|
| 51 |
+
|
| 52 |
# get a global var for logger accessor in this module
|
| 53 |
LOG_LEVEL = logging.DEBUG
|
| 54 |
g_logger = logging.getLogger(__name__)
|
|
|
|
| 254 |
#
|
| 255 |
with tab_inference:
|
| 256 |
|
| 257 |
+
if st.session_state.MODE_DEV_STATEFUL:
|
| 258 |
+
dbg_show_obs_hashes()
|
| 259 |
|
| 260 |
add_classifier_header()
|
| 261 |
# if we are before data_entry_validated, show the button, disabled.
|
|
|
|
| 283 |
|
| 284 |
elif st.session_state.workflow_fsm.is_in_state('ml_classification_completed'):
|
| 285 |
# show the results, and allow manual validation
|
| 286 |
+
st.markdown("""### Inference results and manual validation/adjustment """)
|
| 287 |
+
if st.session_state.MODE_DEV_STATEFUL:
|
| 288 |
+
s = ""
|
| 289 |
+
for k, v in st.session_state.whale_prediction1.items():
|
| 290 |
+
s += f"* Image {k}: {v}\n"
|
| 291 |
+
|
| 292 |
+
st.markdown(s)
|
| 293 |
+
|
|
|
|
| 294 |
# add a button to advance the state
|
| 295 |
+
if st.button("Confirm species predictions", help="Confirm that all species are selected correctly"):
|
| 296 |
st.session_state.workflow_fsm.complete_current_state()
|
| 297 |
# -> manual_inspection_completed
|
| 298 |
st.rerun()
|
|
|
|
| 301 |
|
| 302 |
elif st.session_state.workflow_fsm.is_in_state('manual_inspection_completed'):
|
| 303 |
# show the ML results, and allow the user to upload the observation
|
| 304 |
+
st.markdown("""### Inference Results (after manual validation) """)
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
|
| 307 |
+
if st.button("Upload all observations to THE INTERNET!"):
|
| 308 |
# let this go through to the push_all func, since it just reports to log for now.
|
| 309 |
+
push_all_observations(enable_push=False)
|
| 310 |
st.session_state.workflow_fsm.complete_current_state()
|
| 311 |
# -> data_uploaded
|
| 312 |
+
st.rerun()
|
| 313 |
|
| 314 |
cetacean_show_results()
|
| 315 |
|
| 316 |
elif st.session_state.workflow_fsm.is_in_state('data_uploaded'):
|
| 317 |
# the data has been sent. Lets show the observations again
|
| 318 |
# but no buttons to upload (or greyed out ok)
|
| 319 |
+
st.markdown("""### Observation(s) uploaded - thank you!""")
|
| 320 |
+
cetacean_show_results()
|
| 321 |
+
|
| 322 |
+
st.divider()
|
| 323 |
df = pd.DataFrame(st.session_state.observations, index=[0])
|
| 324 |
st.table(df)
|
| 325 |
|