Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

rmm commited on Jan 31, 2025

Commit

11550ac

1 Parent(s): bb4e7ad

feat: push observations functions for multi-file handling

- current implementation is to open the HF handle once, then prepare and
push each observation individually. Could check docs about pushing
multiple observations in one transaction.

- At present the `api.upload_file` call is commented out, just get
log/visual info about the actions

Files changed (2) hide show

src/hf_push_observations.py +72 -5
src/main.py +6 -0

src/hf_push_observations.py CHANGED Viewed

@@ -1,15 +1,83 @@
-from streamlit.delta_generator import DeltaGenerator
-import streamlit as st
-from huggingface_hub import HfApi
 import json
 import tempfile
 import logging
 # get a global var for logger accessor in this module
 LOG_LEVEL = logging.DEBUG
 g_logger = logging.getLogger(__name__)
 g_logger.setLevel(LOG_LEVEL)
 def push_observations(tab_log:DeltaGenerator=None):
     """
     Push the observations to the Hugging Face dataset
@@ -30,7 +98,6 @@ def push_observations(tab_log:DeltaGenerator=None):
         tab_log.info(f"Uploading observations: {metadata_str}")
     # get huggingface api
-    import os
     token = os.environ.get("HF_TOKEN", None)
     api = HfApi(token=token)
@@ -53,4 +120,4 @@ def push_observations(tab_log:DeltaGenerator=None):
     # msg = f"observation attempted tx to repo happy walrus: {rv}"
     g_logger.info(msg)
     st.info(msg)

+import os
 import json
 import tempfile
 import logging
+from streamlit.delta_generator import DeltaGenerator
+import streamlit as st
+from huggingface_hub import HfApi, CommitInfo
 # get a global var for logger accessor in this module
 LOG_LEVEL = logging.DEBUG
 g_logger = logging.getLogger(__name__)
 g_logger.setLevel(LOG_LEVEL)
+def push_observation(image_hash:str, api:HfApi) -> CommitInfo:
+    '''
+    push one observation to the Hugging Face dataset
+    '''
+    # get the observation
+    observation = st.session_state.public_observations.get(image_hash)
+    if observation is None:
+        msg = f"Could not find observation with hash {image_hash}"
+        g_logger.error(msg)
+        st.error(msg)
+        return None
+    # convert to json
+    metadata_str = json.dumps(observation) # doesn't work yet, TODO
+    st.toast(f"Uploading observation: {metadata_str}", icon="🦭")
+    tab_log = st.session_state.tab_log
+    if tab_log is not None:
+        tab_log.info(f"Uploading observation: {metadata_str}")
+    # write to temp file so we can send it (why is this not using context mgr?)
+    f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
+    f.write(metadata_str)
+    f.close()
+    st.info(f"temp file: {f.name} with metadata written...")
+    # observation['author_email']
+    # observation['image_md5']
+    path_in_repo = f"metadata/{observation['author_email']}/{observation['image_md5']}.json"
+    msg = f"fname: {f.name} | path: {path_in_repo}"
+    print(msg)
+    st.warning(msg)
+    rv = None # temp don't send anything
+    # rv = api.upload_file(
+    #     path_or_fileobj=f.name,
+    #     path_in_repo=path_in_repo,
+    #     repo_id="Saving-Willy/temp_dataset",
+    #     repo_type="dataset",
+    # )
+    # print(rv)
+    # msg = f"observation attempted tx to repo happy walrus: {rv}"
+    g_logger.info(msg)
+    st.info(msg)
+    return rv
+def push_all_observations():
+    '''
+    open an API connection to Hugging Face, and push all observation one by one
+    '''
+    # get huggingface api
+    token = os.environ.get("HF_TOKEN", None)
+    api = HfApi(token=token)
+    # iterate over the list of observations
+    for hash in st.session_state.public_observations.keys():
+        rv = push_observation(hash, api)
 def push_observations(tab_log:DeltaGenerator=None):
     """
     Push the observations to the Hugging Face dataset
         tab_log.info(f"Uploading observations: {metadata_str}")
     # get huggingface api
     token = os.environ.get("HF_TOKEN", None)
     api = HfApi(token=token)
     # msg = f"observation attempted tx to repo happy walrus: {rv}"
     g_logger.info(msg)
     st.info(msg)

src/main.py CHANGED Viewed

@@ -24,6 +24,9 @@ from maps.obs_map import present_obs_map
 from utils.st_logs import setup_logging, parse_log_buffer
 from utils.workflow_state import WorkflowFSM, FSM_STATES
 from utils.workflow_ui import refresh_progress, init_workflow_viz
 #from classifier.classifier_image import cetacean_classify
 from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results
@@ -287,6 +290,7 @@ def main() -> None:
             if st.button("mock: manual validation done."):
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> manual_inspection_completed
             cetacean_show_results_and_review()
@@ -299,6 +303,8 @@ def main() -> None:
             if st.button("(nooop) Upload observation to THE INTERNET!"):
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> data_uploaded

 from utils.st_logs import setup_logging, parse_log_buffer
 from utils.workflow_state import WorkflowFSM, FSM_STATES
 from utils.workflow_ui import refresh_progress, init_workflow_viz
+from hf_push_observations import push_all_observations
 #from classifier.classifier_image import cetacean_classify
 from classifier.classifier_image import cetacean_just_classify, cetacean_show_results_and_review, cetacean_show_results
             if st.button("mock: manual validation done."):
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> manual_inspection_completed
+                st.rerun()
             cetacean_show_results_and_review()
             if st.button("(nooop) Upload observation to THE INTERNET!"):
+                # let this go through to the push_all func, since it just reports to log for now.
+                push_all_observations()
                 st.session_state.workflow_fsm.complete_current_state()
                 # -> data_uploaded