Spaces:

Saving-Willy
/

saving-willy-dev

Build error

rmm commited on Jan 31, 2025

Commit

71dfd99

1 Parent(s): 8c4b1f7

feat: extended InputObservation to contain species/prediction info

- when manual validation is performed (dropdown selection among
species), it is written to the observations (And not the
dynamically-created dicts).

- TODO: decide if we need to retain public_observations in
session_state, or just generate the dict each time it is needed.

Files changed (4) hide show

src/classifier/classifier_image.py +15 -16
src/input/input_observation.py +27 -1
src/main.py +4 -2
src/utils/metadata_handler.py +4 -1

src/classifier/classifier_image.py CHANGED Viewed

@@ -10,6 +10,7 @@ import whale_viewer as viewer
 from hf_push_observations import push_observations
 from utils.grid_maker import gridder
 from utils.metadata_handler import metadata2md
 def add_header_text() -> None:
     """
@@ -24,12 +25,11 @@ def add_header_text() -> None:
 def cetacean_just_classify(cetacean_classifier):
     images = st.session_state.images
-    observations = st.session_state.observations
     hashes = st.session_state.image_hashes
     for hash in hashes:
         image = images[hash]
-        observation = observations[hash].to_dict()
         # run classifier model on `image`, and persistently store the output
         out = cetacean_classifier(image) # get top 3 matches
         st.session_state.whale_prediction1[hash] = out['predictions'][0]
@@ -39,8 +39,6 @@ def cetacean_just_classify(cetacean_classifier):
         msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
         g_logger.info(msg)
-        # store the elements of the observation that will be transmitted (not image)
-        st.session_state.public_observations[hash] = observation
         if st.session_state.MODE_DEV_STATEFUL:
             st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
@@ -58,7 +56,8 @@ def cetacean_show_results_and_review():
     for hash in hashes:
         image = images[hash]
-        observation = observations[hash].to_dict()
         with grid[col]:
             st.image(image, use_column_width=True)
@@ -75,14 +74,19 @@ def cetacean_show_results_and_review():
                 ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
                 selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
-            observation['predicted_class'] = selected_class
-            if selected_class != st.session_state.whale_prediction1[hash]:
-                observation['class_overriden'] = selected_class # TODO: this should be boolean!
             st.session_state.public_observations[hash] = observation
             #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
             # TODO: the metadata only fills properly if `validate` was clicked.
-            st.markdown(metadata2md(hash))
             msg = f"[D] full observation after inference: {observation}"
             g_logger.debug(msg)
@@ -138,12 +142,7 @@ def cetacean_show_results():
             #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
             #
-            st.markdown(metadata2md(hash))
-            # TODO: FIXME: this is the data taht will get pushed -- it DOESN'T reflect any adjustments
-            # # made via the dropdown on the last step!!!!
-            #st.markdown(f"- **selected species**: {observation['predicted_class']}")
-            st.markdown(f"- **selected species**: {st.session_state.whale_prediction1[hash]}")
-            st.markdown(f"- **hash**: {hash}")
             msg = f"[D] full observation after inference: {observation}"
             g_logger.debug(msg)
@@ -223,4 +222,4 @@ def cetacean_classify_show_and_review(cetacean_classifier):
             for i in range(len(whale_classes)):
                 viewer.display_whale(whale_classes, i)
         o += 1
-        col = (col + 1) % row_size

 from hf_push_observations import push_observations
 from utils.grid_maker import gridder
 from utils.metadata_handler import metadata2md
+from input.input_observation import InputObservation
 def add_header_text() -> None:
     """
 def cetacean_just_classify(cetacean_classifier):
     images = st.session_state.images
+    #observations = st.session_state.observations
     hashes = st.session_state.image_hashes
     for hash in hashes:
         image = images[hash]
         # run classifier model on `image`, and persistently store the output
         out = cetacean_classifier(image) # get top 3 matches
         st.session_state.whale_prediction1[hash] = out['predictions'][0]
         msg = f"[D]2 classify_whale_done for {hash}: {st.session_state.classify_whale_done[hash]}, whale_prediction1: {st.session_state.whale_prediction1[hash]}"
         g_logger.info(msg)
         if st.session_state.MODE_DEV_STATEFUL:
             st.write(f"*[D] Observation {hash} classified as {st.session_state.whale_prediction1[hash]}*")
     for hash in hashes:
         image = images[hash]
+        #observation = observations[hash].to_dict()
+        _observation:InputObservation = observations[hash]
         with grid[col]:
             st.image(image, use_column_width=True)
                 ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
                 selected_class = st.selectbox(f"Species for observation {str(o)}", viewer.WHALE_CLASSES, index=ix)
+            _observation.set_selected_class(selected_class)
+            #observation['predicted_class'] = selected_class
+            # this logic is now in the InputObservation class automatially
+            #if selected_class != st.session_state.whale_prediction1[hash]:
+            #    observation['class_overriden'] = selected_class # TODO: this should be boolean!
+            # store the elements of the observation that will be transmitted (not image)
+            observation = _observation.to_dict()
             st.session_state.public_observations[hash] = observation
             #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
             # TODO: the metadata only fills properly if `validate` was clicked.
+            st.markdown(metadata2md(hash, debug=True))
             msg = f"[D] full observation after inference: {observation}"
             g_logger.debug(msg)
             #st.button(f"Upload observation {str(o)} to THE INTERNET!", on_click=push_observations)
             #
+            st.markdown(metadata2md(hash, debug=True))
             msg = f"[D] full observation after inference: {observation}"
             g_logger.debug(msg)
             for i in range(len(whale_classes)):
                 viewer.display_whale(whale_classes, i)
         o += 1
+        col = (col + 1) % row_size

src/input/input_observation.py CHANGED Viewed

@@ -68,7 +68,10 @@ class InputObservation:
         self.time = time
         self.uploaded_file = uploaded_file
         self.image_md5 = image_md5
         self._top_predictions = []
         InputObservation._inst_count += 1
         self._inst_id = InputObservation._inst_count
@@ -81,11 +84,30 @@ class InputObservation:
     def set_top_predictions(self, top_predictions:list):
         self._top_predictions = top_predictions
-    # add a method to get the top predictions (property?)
     @property
     def top_predictions(self):
         return self._top_predictions
     # add a method to assign the image_md5 only once
     def assign_image_md5(self):
@@ -194,6 +216,10 @@ class InputObservation:
             "image_datetime_raw": self.image_datetime_raw,
             "date": str(self.date),
             "time": str(self.time),
             #"uploaded_file": self.uploaded_file # can't serialize this in json, not sent to dataset anyway.
         }

         self.time = time
         self.uploaded_file = uploaded_file
         self.image_md5 = image_md5
+        # attributes that get set after predictions/processing
         self._top_predictions = []
+        self._selected_class = None
+        self._class_overriden = False
         InputObservation._inst_count += 1
         self._inst_id = InputObservation._inst_count
     def set_top_predictions(self, top_predictions:list):
         self._top_predictions = top_predictions
+        if len(top_predictions) > 0:
+            self.set_selected_class(top_predictions[0])
+    def set_selected_class(self, selected_class:str):
+        self._selected_class = selected_class
+        if selected_class != self._top_predictions[0]:
+            self.set_class_overriden(True)
+    def set_class_overriden(self, class_overriden:bool):
+        self._class_overriden = class_overriden
+    # add getters for the top_predictions, selected_class and class_overriden
     @property
     def top_predictions(self):
         return self._top_predictions
+    @property
+    def selected_class(self):
+        return self._selected_class
+    @property
+    def class_overriden(self):
+        return self._class_overriden
     # add a method to assign the image_md5 only once
     def assign_image_md5(self):
             "image_datetime_raw": self.image_datetime_raw,
             "date": str(self.date),
             "time": str(self.time),
+            "selected_class": self._selected_class,
+            "top_prediction": self._top_predictions[0] if len(self._top_predictions) else None,
+            "class_overriden": self._class_overriden,
             #"uploaded_file": self.uploaded_file # can't serialize this in json, not sent to dataset anyway.
         }

src/main.py CHANGED Viewed

@@ -237,7 +237,8 @@ def main() -> None:
         if st.sidebar.button(":white_check_mark:[**Validate**]"):
             # create a dictionary with the submitted observation
             tab_log.info(f"{st.session_state.observations}")
-            df = pd.DataFrame(st.session_state.observations, index=[0])
             with tab_coords:
                 st.table(df)
             # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
@@ -320,7 +321,8 @@ def main() -> None:
             cetacean_show_results()
             st.divider()
-            df = pd.DataFrame(st.session_state.observations, index=[0])
             st.table(df)
             # didn't decide what the next state is here - I think we are in the terminal state.

         if st.sidebar.button(":white_check_mark:[**Validate**]"):
             # create a dictionary with the submitted observation
             tab_log.info(f"{st.session_state.observations}")
+            df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
+            #df = pd.DataFrame(st.session_state.observations, index=[0])
             with tab_coords:
                 st.table(df)
             # there doesn't seem to be any actual validation here?? TODO: find validator function (each element is validated by the input box, but is there something at the whole image level?)
             cetacean_show_results()
             st.divider()
+            #df = pd.DataFrame(st.session_state.observations, index=[0])
+            df = pd.DataFrame([obs.to_dict() for obs in st.session_state.observations.values()])
             st.table(df)
             # didn't decide what the next state is here - I think we are in the terminal state.

src/utils/metadata_handler.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import streamlit as st
-def metadata2md(image_hash:str) -> str:
     """Get metadata from cache and return as markdown-formatted key-value list
     Args:
         image_hash (str): The hash of the image to get metadata for
     Returns:
         str: Markdown-formatted key-value list of metadata
@@ -12,6 +13,8 @@ def metadata2md(image_hash:str) -> str:
     """
     markdown_str = "\n"
     keys_to_print = ["author_email", "latitude", "longitude", "date", "time"]
     observation = st.session_state.public_observations.get(image_hash, {})

 import streamlit as st
+def metadata2md(image_hash:str, debug:bool=False) -> str:
     """Get metadata from cache and return as markdown-formatted key-value list
     Args:
         image_hash (str): The hash of the image to get metadata for
+        debug (bool, optional): Whether to print additional fields.
     Returns:
         str: Markdown-formatted key-value list of metadata
     """
     markdown_str = "\n"
     keys_to_print = ["author_email", "latitude", "longitude", "date", "time"]
+    if debug:
+        keys_to_print += ["iamge_md5", "selected_class", "top_prediction", "class_overriden"]
     observation = st.session_state.public_observations.get(image_hash, {})