Spaces:

Saving-Willy
/

saving-willy-dev

Sleeping

App Files Files Community

vancauwe commited on Mar 19, 2025

Commit

268ccba

unverified ·

2 Parent(s): c961cff 88f66c3

Merge pull request #39 from sdsc-ordes/fix/nofail-on-missing-ext-resource

Browse files

Files changed (2) hide show

src/maps/obs_map.py +60 -7
tests/test_obs_map.py +65 -0

src/maps/obs_map.py CHANGED Viewed

@@ -3,6 +3,10 @@ import logging
 import pandas as pd
 from datasets import load_dataset
 import streamlit as st
 import folium
 from streamlit_folium import st_folium
@@ -62,6 +66,13 @@ _colors = [
 whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
 def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
     """
     Create a folium map with the specified tile layer
@@ -113,6 +124,43 @@ def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> fol
     #folium.LayerControl().add_to(m)
     return m
 def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
@@ -139,14 +187,19 @@ def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
     """
     # load/download data from huggingface dataset
-    metadata = load_dataset(dataset_id, data_files=data_files)
-    # make a pandas df that is compliant with folium/streamlit maps
-    _df = pd.DataFrame({
-        'lat': metadata["train"]["latitude"],
-        'lon': metadata["train"]["longitude"],
-        'species': metadata["train"]["predicted_class"],}
-    )
     if dbg_show_extra:
         # add a few samples to visualise colours
         _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}

 import pandas as pd
 from datasets import load_dataset
+from datasets import DatasetDict, Dataset
+import time
 import streamlit as st
 import folium
 from streamlit_folium import st_folium
 whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
+presentation_data_schema = {
+    'lat': 'float',
+    'lon': 'float',
+    'species': 'str',
+}
 def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
     """
     Create a folium map with the specified tile layer
     #folium.LayerControl().add_to(m)
     return m
+def try_download_dataset(dataset_id:str, data_files:str) -> dict:
+    """
+    Attempts to download a dataset from Hugging Face, catching any errors that occur.
+    Args:
+        dataset_id (str): The ID of the dataset to download.
+        data_files (str): The data files associated with the dataset.
+    Returns:
+        dict: A dictionary containing the dataset metadata if the download is successful,
+              or an empty dictionary if an error occurs.
+    """
+    m_logger.info(f"Starting to download dataset {dataset_id} from Hugging Face")
+    t1 = time.time()
+    try:
+        metadata:DatasetDict = load_dataset(dataset_id, data_files=data_files)
+        t2 = time.time(); elap = t2 - t1
+    except ValueError as e:
+        t2 = time.time(); elap = t2 - t1
+        msg = f"Error downloading dataset: {e}.  (after {elap:.2f}s)."
+        st.error(msg)
+        m_logger.error(msg)
+        metadata = {}
+    except Exception as e:
+        # catch all (other) exceptions and log them, handle them once isolated
+        t2 = time.time(); elap = t2 - t1
+        msg = f"!!Unknown Error!! downloading dataset: {e}.  (after {elap:.2f}s)."
+        st.error(msg)
+        m_logger.error(msg)
+        metadata = {}
+    msg = f"Downloaded dataset: (after {elap:.2f}s). "
+    m_logger.info(msg)
+    st.write(msg)
+    return metadata
 def present_obs_map(dataset_id:str = "Saving-Willy/Happywhale-kaggle",
     """
     # load/download data from huggingface dataset
+    metadata = try_download_dataset(dataset_id, data_files)
+    if not metadata:
+        # create an empty, but compliant dataframe
+        _df = pd.DataFrame(columns=presentation_data_schema).astype(presentation_data_schema)
+    else:
+        # make a pandas df that is compliant with folium/streamlit maps
+        _df = pd.DataFrame({
+            'lat': metadata["train"]["latitude"],
+            'lon': metadata["train"]["longitude"],
+            'species': metadata["train"]["predicted_class"],}
+        )
     if dbg_show_extra:
         # add a few samples to visualise colours
         _df.loc[len(_df)] = {'lat': 0, 'lon': 0, 'species': 'rough_toothed_dolphin'}

tests/test_obs_map.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import pytest
+from unittest.mock import patch, MagicMock
+from maps.obs_map import try_download_dataset
+# tests for try_download_dataset
+# - the main aim here is to mock the function load_dataset which makes external HTTP requests,
+#   and follow the successful and failing pathways.
+# - tests templates generated with copilot, they test the text/messages too; the core
+#   is the return value, which should have similar form but change according to if an exception was raised or not
+# since this function uses st and m_logger to keep track of the download status, we need to mock them too
+@patch('maps.obs_map.load_dataset')
+@patch('maps.obs_map.st')
+@patch('maps.obs_map.m_logger')
+def test_try_download_dataset_success(mock_logger, mock_st, mock_load_dataset):
+    # Mock the return value of load_dataset
+    mock_load_dataset.return_value = {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
+    dataset_id = "test_dataset"
+    data_files = "test_file"
+    result = try_download_dataset(dataset_id, data_files)
+    # Assertions
+    mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
+    mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
+    assert result == {'train': {'latitude': [1], 'longitude': [2], 'predicted_class': ['whale']}}
+    mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
+    mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
+@patch('maps.obs_map.load_dataset', side_effect=ValueError("Download failed"))
+@patch('maps.obs_map.st')
+@patch('maps.obs_map.m_logger')
+def test_try_download_dataset_failure_known(mock_logger, mock_st, mock_load_dataset):
+    # testing the case where we've found (can reproduce by removing network connection)
+    dataset_id = "test_dataset"
+    data_files = "test_file"
+    result = try_download_dataset(dataset_id, data_files)
+    # Assertions
+    mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
+    mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
+    mock_logger.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s).")
+    mock_st.error.assert_called_with("Error downloading dataset: Download failed.  (after 0.00s).")
+    assert result == {}
+    mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
+    mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")
+@patch('maps.obs_map.load_dataset', side_effect=Exception("Download engine corrupt"))
+@patch('maps.obs_map.st')
+@patch('maps.obs_map.m_logger')
+def test_try_download_dataset_failure_unknown(mock_logger, mock_st, mock_load_dataset):
+    # the cases we haven't found, but should still be handled (maybe network error, etc)
+    dataset_id = "test_dataset"
+    data_files = "test_file"
+    result = try_download_dataset(dataset_id, data_files)
+    # Assertions
+    mock_logger.info.assert_any_call(f"Starting to download dataset {dataset_id} from Hugging Face")
+    mock_load_dataset.assert_called_once_with(dataset_id, data_files=data_files)
+    mock_logger.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt.  (after 0.00s).")
+    mock_st.error.assert_called_with("!!Unknown Error!! downloading dataset: Download engine corrupt.  (after 0.00s).")
+    assert result == {}
+    mock_logger.info.assert_called_with("Downloaded dataset: (after 0.00s). ")
+    mock_st.write.assert_called_with("Downloaded dataset: (after 0.00s). ")