Spaces:
Sleeping
Sleeping
Merge pull request #24 from sdsc-ordes/feat/image-batch
Browse files- Dockerfile +42 -0
- README.md +1 -1
- basic_map/app.py +0 -21
- basic_map/app1.py +0 -42
- basic_map/requirements.txt +0 -4
- docs/app.md +0 -5
- requirements.txt +1 -1
- src/classifier/classifier_hotdog.py +26 -0
- src/classifier/classifier_image.py +68 -0
- src/classifier_image.py +70 -0
- src/hf_push_observations.py +56 -0
- src/input/input_handling.py +118 -0
- src/{input_handling.py β input/input_observation.py} +4 -250
- src/input/input_validator.py +125 -0
- src/main.py +42 -165
- src/{alps_map.py β maps/alps_map.py} +0 -0
- src/{obs_map.py β maps/obs_map.py} +3 -3
- src/{fix_tabrender.py β utils/fix_tabrender.py} +0 -0
- src/utils/grid_maker.py +13 -0
- src/utils/metadata_handler.py +16 -0
- src/{st_logs.py β utils/st_logs.py} +0 -0
- src/whale_viewer.py +4 -4
- tests/test_input_handling.py +1 -1
Dockerfile
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
From ubuntu:latest
|
| 2 |
+
|
| 3 |
+
RUN apt-get update
|
| 4 |
+
RUN apt-get install python3 python3-pip -y
|
| 5 |
+
|
| 6 |
+
# https://stackoverflow.com/questions/75608323/how-do-i-solve-error-externally-managed-environment-every-time-i-use-pip-3
|
| 7 |
+
# https://veronneau.org/python-311-pip-and-breaking-system-packages.html
|
| 8 |
+
ENV PIP_BREAK_SYSTEM_PACKAGES 1
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
##################################################
|
| 12 |
+
# Ubuntu setup
|
| 13 |
+
##################################################
|
| 14 |
+
|
| 15 |
+
RUN apt-get update \
|
| 16 |
+
&& apt-get install -y wget \
|
| 17 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 18 |
+
|
| 19 |
+
RUN apt-get update && apt-get -y upgrade \
|
| 20 |
+
&& apt-get install -y --no-install-recommends \
|
| 21 |
+
unzip \
|
| 22 |
+
nano \
|
| 23 |
+
git \
|
| 24 |
+
g++ \
|
| 25 |
+
gcc \
|
| 26 |
+
htop \
|
| 27 |
+
zip \
|
| 28 |
+
ca-certificates \
|
| 29 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 30 |
+
|
| 31 |
+
##################################################
|
| 32 |
+
# ODTP setup
|
| 33 |
+
##################################################
|
| 34 |
+
|
| 35 |
+
RUN mkdir /app
|
| 36 |
+
COPY . /saving-willy
|
| 37 |
+
RUN pip3 install --upgrade setuptools
|
| 38 |
+
RUN pip3 install -r /saving-willy/requirements.txt
|
| 39 |
+
|
| 40 |
+
WORKDIR /saving-willy
|
| 41 |
+
|
| 42 |
+
ENTRYPOINT bash
|
README.md
CHANGED
|
@@ -28,7 +28,7 @@ pip install -r requirements.txt
|
|
| 28 |
```
|
| 29 |
|
| 30 |
```
|
| 31 |
-
streamlit run
|
| 32 |
```
|
| 33 |
|
| 34 |
|
|
|
|
| 28 |
```
|
| 29 |
|
| 30 |
```
|
| 31 |
+
streamlit run src/main.py
|
| 32 |
```
|
| 33 |
|
| 34 |
|
basic_map/app.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import streamlit as st
|
| 3 |
-
import folium
|
| 4 |
-
|
| 5 |
-
from streamlit_folium import st_folium
|
| 6 |
-
from streamlit_folium import folium_static
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
visp_loc = 46.295833, 7.883333
|
| 10 |
-
#m = folium.Map(location=visp_loc, zoom_start=9)
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
|
| 14 |
-
|
| 15 |
-
m = folium.Map(location=visp_loc, zoom_start=9,
|
| 16 |
-
tiles='https://tile.opentopomap.org/{z}/{x}/{y}.png',
|
| 17 |
-
attr='<a href="https://opentopomap.org/">Open Topo Map</a>')
|
| 18 |
-
|
| 19 |
-
folium_static(m)
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
basic_map/app1.py
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
# lets try using map stuff without folium, maybe stlite doesnt support that.
|
| 2 |
-
|
| 3 |
-
import streamlit as st
|
| 4 |
-
import pandas as pd
|
| 5 |
-
|
| 6 |
-
# Load data
|
| 7 |
-
f = 'mountains_clr.csv'
|
| 8 |
-
df = pd.read_csv(f).dropna()
|
| 9 |
-
|
| 10 |
-
print(df)
|
| 11 |
-
|
| 12 |
-
st.markdown("# :whale: :whale: Cetaceans :red[& friends] :balloon:")
|
| 13 |
-
|
| 14 |
-
st.markdown("## :mountain: Mountains")
|
| 15 |
-
st.markdown(f"library version: **{st.__version__}**")
|
| 16 |
-
# not sure where my versions are getting pegged from, but we have a 1y spread :(
|
| 17 |
-
# https://github.com/streamlit/streamlit/blob/1.24.1/lib/streamlit/elements/map.py
|
| 18 |
-
# rather hard to find the docs for old versions, no selector unlike many libraries.
|
| 19 |
-
|
| 20 |
-
visp_loc = 46.295833, 7.883333
|
| 21 |
-
tile_xyz = 'https://tile.opentopomap.org/{z}/{x}/{y}.png'
|
| 22 |
-
tile_attr = '<a href="https://opentopomap.org/">Open Topo Map</a>'
|
| 23 |
-
st.map(df, latitude='lat', longitude='lon', color='color', size='size', zoom=7)
|
| 24 |
-
#, tiles=tile_xyz, attr=tile_attr)
|
| 25 |
-
|
| 26 |
-
#st.map(df)
|
| 27 |
-
|
| 28 |
-
#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")
|
| 29 |
-
|
| 30 |
-
import numpy as np
|
| 31 |
-
|
| 32 |
-
df2 = pd.DataFrame(
|
| 33 |
-
{
|
| 34 |
-
"col1": np.random.randn(1000) / 50 + 37.76,
|
| 35 |
-
"col2": np.random.randn(1000) / 50 + -122.4,
|
| 36 |
-
"col3": np.random.randn(1000) * 100,
|
| 37 |
-
"col4": np.random.rand(1000, 4).tolist(),
|
| 38 |
-
}
|
| 39 |
-
)
|
| 40 |
-
#st.map(df, latitude="col1", longitude="col2", size="col3", color="col4")
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
basic_map/requirements.txt
DELETED
|
@@ -1,4 +0,0 @@
|
|
| 1 |
-
streamlit
|
| 2 |
-
folium
|
| 3 |
-
streamlit-folium
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/app.md
CHANGED
|
@@ -1,5 +0,0 @@
|
|
| 1 |
-
Here is the documentation for the app code generating the streamlit front-end.
|
| 2 |
-
|
| 3 |
-
# Streamlit App
|
| 4 |
-
|
| 5 |
-
::: basic_map.app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
numpy==1.
|
| 2 |
pandas==2.2.3
|
| 3 |
|
| 4 |
|
|
|
|
| 1 |
+
numpy==1.26.4
|
| 2 |
pandas==2.2.3
|
| 3 |
|
| 4 |
|
src/classifier/classifier_hotdog.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import json
|
| 3 |
+
from PIL import Image
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def hotdog_classify(pipeline_hot_dog, tab_hotdogs):
|
| 7 |
+
col1, col2 = tab_hotdogs.columns(2)
|
| 8 |
+
for file in st.session_state.files:
|
| 9 |
+
image = st.session_state.images[file.name]
|
| 10 |
+
observation = st.session_state.observations[file.name].to_dict()
|
| 11 |
+
# display the image (use cached version, no need to reread)
|
| 12 |
+
col1.image(image, use_column_width=True)
|
| 13 |
+
# and then run inference on the image
|
| 14 |
+
hotdog_image = Image.fromarray(image)
|
| 15 |
+
predictions = pipeline_hot_dog(hotdog_image)
|
| 16 |
+
|
| 17 |
+
col2.header("Probabilities")
|
| 18 |
+
first = True
|
| 19 |
+
for p in predictions:
|
| 20 |
+
col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
|
| 21 |
+
if first:
|
| 22 |
+
observation['predicted_class'] = p['label']
|
| 23 |
+
observation['predicted_score'] = round(p['score'] * 100, 1)
|
| 24 |
+
first = False
|
| 25 |
+
|
| 26 |
+
tab_hotdogs.write(f"Session observation: {json.dumps(observation)}")
|
src/classifier/classifier_image.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
# get a global var for logger accessor in this module
|
| 5 |
+
LOG_LEVEL = logging.DEBUG
|
| 6 |
+
g_logger = logging.getLogger(__name__)
|
| 7 |
+
g_logger.setLevel(LOG_LEVEL)
|
| 8 |
+
|
| 9 |
+
import whale_viewer as viewer
|
| 10 |
+
from hf_push_observations import push_observations
|
| 11 |
+
from utils.grid_maker import gridder
|
| 12 |
+
from utils.metadata_handler import metadata2md
|
| 13 |
+
|
| 14 |
+
def cetacean_classify(cetacean_classifier):
|
| 15 |
+
files = st.session_state.files
|
| 16 |
+
images = st.session_state.images
|
| 17 |
+
observations = st.session_state.observations
|
| 18 |
+
|
| 19 |
+
batch_size, row_size, page = gridder(files)
|
| 20 |
+
|
| 21 |
+
grid = st.columns(row_size)
|
| 22 |
+
col = 0
|
| 23 |
+
|
| 24 |
+
for file in files:
|
| 25 |
+
image = images[file.name]
|
| 26 |
+
|
| 27 |
+
with grid[col]:
|
| 28 |
+
st.image(image, use_column_width=True)
|
| 29 |
+
observation = observations[file.name].to_dict()
|
| 30 |
+
# run classifier model on `image`, and persistently store the output
|
| 31 |
+
out = cetacean_classifier(image) # get top 3 matches
|
| 32 |
+
st.session_state.whale_prediction1 = out['predictions'][0]
|
| 33 |
+
st.session_state.classify_whale_done = True
|
| 34 |
+
msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
|
| 35 |
+
g_logger.info(msg)
|
| 36 |
+
|
| 37 |
+
# dropdown for selecting/overriding the species prediction
|
| 38 |
+
if not st.session_state.classify_whale_done:
|
| 39 |
+
selected_class = st.sidebar.selectbox("Species", viewer.WHALE_CLASSES,
|
| 40 |
+
index=None, placeholder="Species not yet identified...",
|
| 41 |
+
disabled=True)
|
| 42 |
+
else:
|
| 43 |
+
pred1 = st.session_state.whale_prediction1
|
| 44 |
+
# get index of pred1 from WHALE_CLASSES, none if not present
|
| 45 |
+
print(f"[D] pred1: {pred1}")
|
| 46 |
+
ix = viewer.WHALE_CLASSES.index(pred1) if pred1 in viewer.WHALE_CLASSES else None
|
| 47 |
+
selected_class = st.selectbox(f"Species for {file.name}", viewer.WHALE_CLASSES, index=ix)
|
| 48 |
+
|
| 49 |
+
observation['predicted_class'] = selected_class
|
| 50 |
+
if selected_class != st.session_state.whale_prediction1:
|
| 51 |
+
observation['class_overriden'] = selected_class
|
| 52 |
+
|
| 53 |
+
st.session_state.public_observation = observation
|
| 54 |
+
st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=push_observations)
|
| 55 |
+
# TODO: the metadata only fills properly if `validate` was clicked.
|
| 56 |
+
st.markdown(metadata2md())
|
| 57 |
+
|
| 58 |
+
msg = f"[D] full observation after inference: {observation}"
|
| 59 |
+
g_logger.debug(msg)
|
| 60 |
+
print(msg)
|
| 61 |
+
# TODO: add a link to more info on the model, next to the button.
|
| 62 |
+
|
| 63 |
+
whale_classes = out['predictions'][:]
|
| 64 |
+
# render images for the top 3 (that is what the model api returns)
|
| 65 |
+
st.markdown(f"Top 3 Predictions for {file.name}")
|
| 66 |
+
for i in range(len(whale_classes)):
|
| 67 |
+
viewer.display_whale(whale_classes, i)
|
| 68 |
+
col = (col + 1) % row_size
|
src/classifier_image.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# get a global var for logger accessor in this module
|
| 6 |
+
LOG_LEVEL = logging.DEBUG
|
| 7 |
+
g_logger = logging.getLogger(__name__)
|
| 8 |
+
g_logger.setLevel(LOG_LEVEL)
|
| 9 |
+
|
| 10 |
+
from grid_maker import gridder
|
| 11 |
+
import hf_push_observations as sw_push_obs
|
| 12 |
+
import utils.metadata_handler as meta_handler
|
| 13 |
+
import whale_viewer as sw_wv
|
| 14 |
+
|
| 15 |
+
def cetacean_classify(cetacean_classifier, tab_inference):
|
| 16 |
+
files = st.session_state.files
|
| 17 |
+
images = st.session_state.images
|
| 18 |
+
observations = st.session_state.observations
|
| 19 |
+
|
| 20 |
+
batch_size, row_size, page = gridder(files)
|
| 21 |
+
|
| 22 |
+
grid = st.columns(row_size)
|
| 23 |
+
col = 0
|
| 24 |
+
|
| 25 |
+
for file in files:
|
| 26 |
+
image = images[file.name]
|
| 27 |
+
|
| 28 |
+
with grid[col]:
|
| 29 |
+
st.image(image, use_column_width=True)
|
| 30 |
+
observation = observations[file.name].to_dict()
|
| 31 |
+
# run classifier model on `image`, and persistently store the output
|
| 32 |
+
out = cetacean_classifier(image) # get top 3 matches
|
| 33 |
+
st.session_state.whale_prediction1 = out['predictions'][0]
|
| 34 |
+
st.session_state.classify_whale_done = True
|
| 35 |
+
msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
|
| 36 |
+
g_logger.info(msg)
|
| 37 |
+
|
| 38 |
+
# dropdown for selecting/overriding the species prediction
|
| 39 |
+
if not st.session_state.classify_whale_done:
|
| 40 |
+
selected_class = st.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES,
|
| 41 |
+
index=None, placeholder="Species not yet identified...",
|
| 42 |
+
disabled=True)
|
| 43 |
+
else:
|
| 44 |
+
pred1 = st.session_state.whale_prediction1
|
| 45 |
+
# get index of pred1 from WHALE_CLASSES, none if not present
|
| 46 |
+
print(f"[D] pred1: {pred1}")
|
| 47 |
+
ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
|
| 48 |
+
selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
|
| 49 |
+
|
| 50 |
+
observation['predicted_class'] = selected_class
|
| 51 |
+
if selected_class != st.session_state.whale_prediction1:
|
| 52 |
+
observation['class_overriden'] = selected_class
|
| 53 |
+
|
| 54 |
+
st.session_state.public_observation = observation
|
| 55 |
+
st.button(f"Upload observation for {file.name} to THE INTERNET!", on_click=sw_push_obs.push_observations)
|
| 56 |
+
# TODO: the metadata only fills properly if `validate` was clicked.
|
| 57 |
+
st.markdown(meta_handler.metadata2md())
|
| 58 |
+
|
| 59 |
+
msg = f"[D] full observation after inference: {observation}"
|
| 60 |
+
g_logger.debug(msg)
|
| 61 |
+
print(msg)
|
| 62 |
+
# TODO: add a link to more info on the model, next to the button.
|
| 63 |
+
|
| 64 |
+
whale_classes = out['predictions'][:]
|
| 65 |
+
# render images for the top 3 (that is what the model api returns)
|
| 66 |
+
#with tab_inference:
|
| 67 |
+
st.title(f"Species detected for {file.name}")
|
| 68 |
+
for i in range(len(whale_classes)):
|
| 69 |
+
sw_wv.display_whale(whale_classes, i)
|
| 70 |
+
col = (col + 1) % row_size
|
src/hf_push_observations.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from streamlit.delta_generator import DeltaGenerator
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from huggingface_hub import HfApi
|
| 4 |
+
import json
|
| 5 |
+
import tempfile
|
| 6 |
+
import logging
|
| 7 |
+
|
| 8 |
+
# get a global var for logger accessor in this module
|
| 9 |
+
LOG_LEVEL = logging.DEBUG
|
| 10 |
+
g_logger = logging.getLogger(__name__)
|
| 11 |
+
g_logger.setLevel(LOG_LEVEL)
|
| 12 |
+
|
| 13 |
+
def push_observations(tab_log:DeltaGenerator=None):
|
| 14 |
+
"""
|
| 15 |
+
Push the observations to the Hugging Face dataset
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
tab_log (streamlit.container): The container to log messages to. If not provided,
|
| 19 |
+
log messages are in any case written to the global logger (TODO: test - didn't
|
| 20 |
+
push any observation since generating the logger)
|
| 21 |
+
|
| 22 |
+
"""
|
| 23 |
+
# we get the observation from session state: 1 is the dict 2 is the image.
|
| 24 |
+
# first, lets do an info display (popup)
|
| 25 |
+
metadata_str = json.dumps(st.session_state.public_observation)
|
| 26 |
+
|
| 27 |
+
st.toast(f"Uploading observations: {metadata_str}", icon="π¦")
|
| 28 |
+
tab_log = st.session_state.tab_log
|
| 29 |
+
if tab_log is not None:
|
| 30 |
+
tab_log.info(f"Uploading observations: {metadata_str}")
|
| 31 |
+
|
| 32 |
+
# get huggingface api
|
| 33 |
+
import os
|
| 34 |
+
token = os.environ.get("HF_TOKEN", None)
|
| 35 |
+
api = HfApi(token=token)
|
| 36 |
+
|
| 37 |
+
f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
|
| 38 |
+
f.write(metadata_str)
|
| 39 |
+
f.close()
|
| 40 |
+
st.info(f"temp file: {f.name} with metadata written...")
|
| 41 |
+
|
| 42 |
+
path_in_repo= f"metadata/{st.session_state.public_observation['author_email']}/{st.session_state.public_observation['image_md5']}.json"
|
| 43 |
+
msg = f"fname: {f.name} | path: {path_in_repo}"
|
| 44 |
+
print(msg)
|
| 45 |
+
st.warning(msg)
|
| 46 |
+
# rv = api.upload_file(
|
| 47 |
+
# path_or_fileobj=f.name,
|
| 48 |
+
# path_in_repo=path_in_repo,
|
| 49 |
+
# repo_id="Saving-Willy/temp_dataset",
|
| 50 |
+
# repo_type="dataset",
|
| 51 |
+
# )
|
| 52 |
+
# print(rv)
|
| 53 |
+
# msg = f"observation attempted tx to repo happy walrus: {rv}"
|
| 54 |
+
g_logger.info(msg)
|
| 55 |
+
st.info(msg)
|
| 56 |
+
|
src/input/input_handling.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from streamlit.delta_generator import DeltaGenerator
|
| 6 |
+
|
| 7 |
+
import cv2
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
from input.input_observation import InputObservation
|
| 11 |
+
from input.input_validator import get_image_datetime, is_valid_email, is_valid_number
|
| 12 |
+
|
| 13 |
+
m_logger = logging.getLogger(__name__)
|
| 14 |
+
m_logger.setLevel(logging.INFO)
|
| 15 |
+
|
| 16 |
+
'''
|
| 17 |
+
A module to setup the input handling for the whale observation guidance tool
|
| 18 |
+
|
| 19 |
+
both the UI elements (setup_input_UI) and the validation functions.
|
| 20 |
+
'''
|
| 21 |
+
allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
|
| 22 |
+
|
| 23 |
+
# an arbitrary set of defaults so testing is less painful...
|
| 24 |
+
# ideally we add in some randomization to the defaults
|
| 25 |
+
spoof_metadata = {
|
| 26 |
+
"latitude": 23.5,
|
| 27 |
+
"longitude": 44,
|
| 28 |
+
"author_email": "super@whale.org",
|
| 29 |
+
"date": None,
|
| 30 |
+
"time": None,
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
def setup_input(
|
| 34 |
+
viewcontainer: DeltaGenerator=None,
|
| 35 |
+
_allowed_image_types: list=None, ) -> InputObservation:
|
| 36 |
+
"""
|
| 37 |
+
Sets up the input interface for uploading an image and entering metadata.
|
| 38 |
+
|
| 39 |
+
It provides input fields for an image upload, lat/lon, author email, and date-time.
|
| 40 |
+
In the ideal case, the image metadata will be used to populate location and datetime.
|
| 41 |
+
|
| 42 |
+
Parameters:
|
| 43 |
+
viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar.
|
| 44 |
+
_allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types.
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
InputObservation: An object containing the uploaded image and entered metadata.
|
| 48 |
+
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
if viewcontainer is None:
|
| 52 |
+
viewcontainer = st.sidebar
|
| 53 |
+
|
| 54 |
+
if _allowed_image_types is None:
|
| 55 |
+
_allowed_image_types = allowed_image_types
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
viewcontainer.title("Input image and data")
|
| 59 |
+
|
| 60 |
+
# 1. Input the author email
|
| 61 |
+
author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
|
| 62 |
+
if author_email and not is_valid_email(author_email):
|
| 63 |
+
viewcontainer.error("Please enter a valid email address.")
|
| 64 |
+
|
| 65 |
+
# 2. Image Selector
|
| 66 |
+
uploaded_files = viewcontainer.file_uploader("Upload an image", type=allowed_image_types, accept_multiple_files=True)
|
| 67 |
+
observations = {}
|
| 68 |
+
images = {}
|
| 69 |
+
if uploaded_files is not None:
|
| 70 |
+
for file in uploaded_files:
|
| 71 |
+
|
| 72 |
+
viewcontainer.title(f"Metadata for {file.name}")
|
| 73 |
+
|
| 74 |
+
# Display the uploaded image
|
| 75 |
+
# load image using cv2 format, so it is compatible with the ML models
|
| 76 |
+
file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
|
| 77 |
+
filename = file.name
|
| 78 |
+
image = cv2.imdecode(file_bytes, 1)
|
| 79 |
+
# Extract and display image date-time
|
| 80 |
+
image_datetime = None # For storing date-time from image
|
| 81 |
+
image_datetime = get_image_datetime(file)
|
| 82 |
+
m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_files})")
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# 3. Latitude Entry Box
|
| 86 |
+
latitude = viewcontainer.text_input("Latitude for "+filename, spoof_metadata.get('latitude', ""))
|
| 87 |
+
if latitude and not is_valid_number(latitude):
|
| 88 |
+
viewcontainer.error("Please enter a valid latitude (numerical only).")
|
| 89 |
+
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
| 90 |
+
# 4. Longitude Entry Box
|
| 91 |
+
longitude = viewcontainer.text_input("Longitude for "+filename, spoof_metadata.get('longitude', ""))
|
| 92 |
+
if longitude and not is_valid_number(longitude):
|
| 93 |
+
viewcontainer.error("Please enter a valid longitude (numerical only).")
|
| 94 |
+
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
| 95 |
+
# 5. Date/time
|
| 96 |
+
## first from image metadata
|
| 97 |
+
if image_datetime is not None:
|
| 98 |
+
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
|
| 99 |
+
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
|
| 100 |
+
else:
|
| 101 |
+
time_value = datetime.datetime.now().time() # Default to current time
|
| 102 |
+
date_value = datetime.datetime.now().date()
|
| 103 |
+
|
| 104 |
+
## if not, give user the option to enter manually
|
| 105 |
+
date_option = st.sidebar.date_input("Date for "+filename, value=date_value)
|
| 106 |
+
time_option = st.sidebar.time_input("Time for "+filename, time_value)
|
| 107 |
+
|
| 108 |
+
observation = InputObservation(image=file, latitude=latitude, longitude=longitude,
|
| 109 |
+
author_email=author_email, date=image_datetime, time=None,
|
| 110 |
+
date_option=date_option, time_option=time_option)
|
| 111 |
+
observations[file.name] = observation
|
| 112 |
+
images[file.name] = image
|
| 113 |
+
|
| 114 |
+
st.session_state.images = images
|
| 115 |
+
st.session_state.files = uploaded_files
|
| 116 |
+
|
| 117 |
+
return observations
|
| 118 |
+
|
src/{input_handling.py β input/input_observation.py}
RENAMED
|
@@ -1,48 +1,5 @@
|
|
| 1 |
-
from fractions import Fraction
|
| 2 |
-
from PIL import Image
|
| 3 |
-
from PIL import ExifTags
|
| 4 |
-
import re
|
| 5 |
-
import datetime
|
| 6 |
import hashlib
|
| 7 |
-
import
|
| 8 |
-
|
| 9 |
-
import streamlit as st
|
| 10 |
-
from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting
|
| 11 |
-
from streamlit.delta_generator import DeltaGenerator
|
| 12 |
-
|
| 13 |
-
import cv2
|
| 14 |
-
import numpy as np
|
| 15 |
-
|
| 16 |
-
m_logger = logging.getLogger(__name__)
|
| 17 |
-
# we can set the log level locally for funcs in this module
|
| 18 |
-
#g_m_logger.setLevel(logging.DEBUG)
|
| 19 |
-
m_logger.setLevel(logging.INFO)
|
| 20 |
-
|
| 21 |
-
'''
|
| 22 |
-
A module to setup the input handling for the whale observation guidance tool
|
| 23 |
-
|
| 24 |
-
both the UI elements (setup_input_UI) and the validation functions.
|
| 25 |
-
'''
|
| 26 |
-
#allowed_image_types = ['webp']
|
| 27 |
-
allowed_image_types = ['jpg', 'jpeg', 'png', 'webp']
|
| 28 |
-
|
| 29 |
-
import random
|
| 30 |
-
import string
|
| 31 |
-
def generate_random_md5() -> str:
|
| 32 |
-
"""
|
| 33 |
-
Generates a random MD5 hash.
|
| 34 |
-
|
| 35 |
-
This function creates a random string of 16 alphanumeric characters,
|
| 36 |
-
encodes it, and then computes its MD5 hash.
|
| 37 |
-
|
| 38 |
-
Returns:
|
| 39 |
-
str: The MD5 hash of the generated random string.
|
| 40 |
-
"""
|
| 41 |
-
# Generate a random string
|
| 42 |
-
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
|
| 43 |
-
# Encode the string and compute its MD5 hash
|
| 44 |
-
md5_hash = hashlib.md5(random_string.encode()).hexdigest()
|
| 45 |
-
return md5_hash
|
| 46 |
|
| 47 |
# autogenerated class to hold the input data
|
| 48 |
class InputObservation:
|
|
@@ -87,7 +44,9 @@ class InputObservation:
|
|
| 87 |
from_input(input):
|
| 88 |
Creates an observation from another input observation.
|
| 89 |
"""
|
| 90 |
-
def __init__(self, image=None, latitude=None, longitude=None,
|
|
|
|
|
|
|
| 91 |
self.image = image
|
| 92 |
self.latitude = latitude
|
| 93 |
self.longitude = longitude
|
|
@@ -125,8 +84,6 @@ class InputObservation:
|
|
| 125 |
"author_email": self.author_email,
|
| 126 |
"date": self.date,
|
| 127 |
"time": self.time,
|
| 128 |
-
# "date_option": self.date_option,
|
| 129 |
-
# "time_option": self.time_option,
|
| 130 |
"date_option": str(self.date_option),
|
| 131 |
"time_option": str(self.time_option),
|
| 132 |
"uploaded_filename": self.uploaded_filename
|
|
@@ -149,208 +106,5 @@ class InputObservation:
|
|
| 149 |
return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
|
| 150 |
|
| 151 |
|
| 152 |
-
def is_valid_number(number:str) -> bool:
|
| 153 |
-
"""
|
| 154 |
-
Check if the given string is a valid number (int or float, sign ok)
|
| 155 |
-
|
| 156 |
-
Args:
|
| 157 |
-
number (str): The string to be checked.
|
| 158 |
-
|
| 159 |
-
Returns:
|
| 160 |
-
bool: True if the string is a valid number, False otherwise.
|
| 161 |
-
"""
|
| 162 |
-
pattern = r'^[-+]?[0-9]*\.?[0-9]+$'
|
| 163 |
-
return re.match(pattern, number) is not None
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
# Function to validate email address
|
| 167 |
-
def is_valid_email(email:str) -> bool:
|
| 168 |
-
"""
|
| 169 |
-
Validates if the provided email address is in a correct format.
|
| 170 |
-
|
| 171 |
-
Args:
|
| 172 |
-
email (str): The email address to validate.
|
| 173 |
-
|
| 174 |
-
Returns:
|
| 175 |
-
bool: True if the email address is valid, False otherwise.
|
| 176 |
-
"""
|
| 177 |
-
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
| 178 |
-
return re.match(pattern, email) is not None
|
| 179 |
-
|
| 180 |
-
# Function to extract date and time from image metadata
|
| 181 |
-
def get_image_datetime(image_file: UploadedFile) -> str | None:
|
| 182 |
-
"""
|
| 183 |
-
Extracts the original date and time from the EXIF metadata of an uploaded image file.
|
| 184 |
-
|
| 185 |
-
Args:
|
| 186 |
-
image_file (UploadedFile): The uploaded image file from which to extract the date and time.
|
| 187 |
-
|
| 188 |
-
Returns:
|
| 189 |
-
str: The original date and time as a string if available, otherwise None.
|
| 190 |
-
|
| 191 |
-
Raises:
|
| 192 |
-
Warning: If the date and time could not be extracted from the image metadata.
|
| 193 |
-
"""
|
| 194 |
-
try:
|
| 195 |
-
image = Image.open(image_file)
|
| 196 |
-
exif_data = image._getexif()
|
| 197 |
-
if exif_data is not None:
|
| 198 |
-
if ExifTags.Base.DateTimeOriginal in exif_data:
|
| 199 |
-
return exif_data.get(ExifTags.Base.DateTimeOriginal)
|
| 200 |
-
except Exception as e: # FIXME: what types of exception?
|
| 201 |
-
st.warning(f"Could not extract date from image metadata. (file: {image_file.name})")
|
| 202 |
-
# TODO: add to logger
|
| 203 |
-
return None
|
| 204 |
-
|
| 205 |
-
def decimal_coords(coords:tuple, ref:str) -> Fraction:
|
| 206 |
-
"""
|
| 207 |
-
Converts coordinates from degrees, minutes, and seconds to decimal degrees.
|
| 208 |
-
|
| 209 |
-
Args:
|
| 210 |
-
coords (tuple): A tuple containing three elements representing degrees, minutes, and seconds.
|
| 211 |
-
ref (str): A string representing the reference direction ('N', 'S', 'E', 'W').
|
| 212 |
-
|
| 213 |
-
Returns:
|
| 214 |
-
Fraction: The coordinates in decimal degrees. Negative if the reference is 'S' or 'W'.
|
| 215 |
-
|
| 216 |
-
Example:
|
| 217 |
-
decimal_coords((40, 26, 46), 'N') -> 40.44611111111111
|
| 218 |
-
decimal_coords((40, 26, 46), 'W') -> -40.44611111111111
|
| 219 |
-
"""
|
| 220 |
-
# https://stackoverflow.com/a/73267185
|
| 221 |
-
if ref not in ['N', 'S', 'E', 'W']:
|
| 222 |
-
raise ValueError("Invalid reference direction. Must be 'N', 'S', 'E', or 'W'.")
|
| 223 |
-
if len(coords) != 3:
|
| 224 |
-
raise ValueError("Coordinates must be a tuple of three elements (degrees, minutes, seconds).")
|
| 225 |
-
|
| 226 |
-
decimal_degrees = coords[0] + coords[1] / 60 + coords[2] / 3600
|
| 227 |
-
if ref == "S" or ref =='W':
|
| 228 |
-
decimal_degrees = -decimal_degrees
|
| 229 |
-
return decimal_degrees
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
|
| 233 |
-
"""
|
| 234 |
-
Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.
|
| 235 |
-
|
| 236 |
-
Args:
|
| 237 |
-
image_file (UploadedFile): The uploaded image file from which to extract the latitude and longitude.
|
| 238 |
-
|
| 239 |
-
Returns:
|
| 240 |
-
tuple[float, float]: The latitude and longitude as a tuple if available, otherwise None.
|
| 241 |
-
|
| 242 |
-
Raises:
|
| 243 |
-
Warning: If the latitude and longitude could not be extracted from the image metadata.
|
| 244 |
-
"""
|
| 245 |
-
try:
|
| 246 |
-
image = Image.open(image_file)
|
| 247 |
-
exif_data = image._getexif()
|
| 248 |
-
if exif_data is not None:
|
| 249 |
-
if ExifTags.Base.GPSInfo in exif_data:
|
| 250 |
-
gps_ifd = exif_data.get(ExifTags.Base.GPSInfo)
|
| 251 |
-
|
| 252 |
-
lat = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLatitude], gps_ifd[ExifTags.GPS.GPSLatitudeRef]))
|
| 253 |
-
lon = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLongitude], gps_ifd[ExifTags.GPS.GPSLongitudeRef]))
|
| 254 |
-
|
| 255 |
-
return lat, lon
|
| 256 |
-
|
| 257 |
-
except Exception as e: # FIXME: what types of exception?
|
| 258 |
-
st.warning(f"Could not extract latitude and longitude from image metadata. (file: {str(image_file)}")
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
# an arbitrary set of defaults so testing is less painful...
|
| 262 |
-
# ideally we add in some randomization to the defaults
|
| 263 |
-
spoof_metadata = {
|
| 264 |
-
"latitude": 23.5,
|
| 265 |
-
"longitude": 44,
|
| 266 |
-
"author_email": "super@whale.org",
|
| 267 |
-
"date": None,
|
| 268 |
-
"time": None,
|
| 269 |
-
}
|
| 270 |
-
|
| 271 |
-
#def display_whale(whale_classes:List[str], i:int, viewcontainer=None):
|
| 272 |
-
def setup_input(
|
| 273 |
-
viewcontainer: DeltaGenerator=None,
|
| 274 |
-
_allowed_image_types: list=None, ) -> InputObservation:
|
| 275 |
-
"""
|
| 276 |
-
Sets up the input interface for uploading an image and entering metadata.
|
| 277 |
-
|
| 278 |
-
It provides input fields for an image upload, lat/lon, author email, and date-time.
|
| 279 |
-
In the ideal case, the image metadata will be used to populate location and datetime.
|
| 280 |
-
|
| 281 |
-
Parameters:
|
| 282 |
-
viewcontainer (DeltaGenerator, optional): The Streamlit container to use for the input interface. Defaults to st.sidebar.
|
| 283 |
-
_allowed_image_types (list, optional): List of allowed image file types for upload. Defaults to allowed_image_types.
|
| 284 |
-
|
| 285 |
-
Returns:
|
| 286 |
-
InputObservation: An object containing the uploaded image and entered metadata.
|
| 287 |
-
|
| 288 |
-
"""
|
| 289 |
-
|
| 290 |
-
if viewcontainer is None:
|
| 291 |
-
viewcontainer = st.sidebar
|
| 292 |
-
|
| 293 |
-
if _allowed_image_types is None:
|
| 294 |
-
_allowed_image_types = allowed_image_types
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
viewcontainer.title("Input image and data")
|
| 298 |
-
|
| 299 |
-
# 1. Image Selector
|
| 300 |
-
uploaded_filename = viewcontainer.file_uploader("Upload an image", type=allowed_image_types)
|
| 301 |
-
image_datetime = None # For storing date-time from image
|
| 302 |
-
|
| 303 |
-
if uploaded_filename is not None:
|
| 304 |
-
# Display the uploaded image
|
| 305 |
-
#image = Image.open(uploaded_filename)
|
| 306 |
-
# load image using cv2 format, so it is compatible with the ML models
|
| 307 |
-
file_bytes = np.asarray(bytearray(uploaded_filename.read()), dtype=np.uint8)
|
| 308 |
-
image = cv2.imdecode(file_bytes, 1)
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
viewcontainer.image(image, caption='Uploaded Image.', use_column_width=True)
|
| 312 |
-
# store the image in the session state
|
| 313 |
-
st.session_state.image = image
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
# Extract and display image date-time
|
| 317 |
-
image_datetime = get_image_datetime(uploaded_filename)
|
| 318 |
-
print(f"[D] image date extracted as {image_datetime}")
|
| 319 |
-
m_logger.debug(f"image date extracted as {image_datetime} (from {uploaded_filename})")
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
# 2. Latitude Entry Box
|
| 323 |
-
latitude = viewcontainer.text_input("Latitude", spoof_metadata.get('latitude', ""))
|
| 324 |
-
if latitude and not is_valid_number(latitude):
|
| 325 |
-
viewcontainer.error("Please enter a valid latitude (numerical only).")
|
| 326 |
-
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
| 327 |
-
# 3. Longitude Entry Box
|
| 328 |
-
longitude = viewcontainer.text_input("Longitude", spoof_metadata.get('longitude', ""))
|
| 329 |
-
if longitude and not is_valid_number(longitude):
|
| 330 |
-
viewcontainer.error("Please enter a valid longitude (numerical only).")
|
| 331 |
-
m_logger.error(f"Invalid latitude entered: {latitude}.")
|
| 332 |
-
|
| 333 |
-
# 4. Author Box with Email Address Validator
|
| 334 |
-
author_email = viewcontainer.text_input("Author Email", spoof_metadata.get('author_email', ""))
|
| 335 |
-
|
| 336 |
-
if author_email and not is_valid_email(author_email):
|
| 337 |
-
viewcontainer.error("Please enter a valid email address.")
|
| 338 |
-
|
| 339 |
-
# 5. date/time
|
| 340 |
-
## first from image metadata
|
| 341 |
-
if image_datetime is not None:
|
| 342 |
-
time_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').time()
|
| 343 |
-
date_value = datetime.datetime.strptime(image_datetime, '%Y:%m:%d %H:%M:%S').date()
|
| 344 |
-
else:
|
| 345 |
-
time_value = datetime.datetime.now().time() # Default to current time
|
| 346 |
-
date_value = datetime.datetime.now().date()
|
| 347 |
-
|
| 348 |
-
## if not, give user the option to enter manually
|
| 349 |
-
date_option = st.sidebar.date_input("Date", value=date_value)
|
| 350 |
-
time_option = st.sidebar.time_input("Time", time_value)
|
| 351 |
|
| 352 |
-
observation = InputObservation(image=uploaded_filename, latitude=latitude, longitude=longitude,
|
| 353 |
-
author_email=author_email, date=image_datetime, time=None,
|
| 354 |
-
date_option=date_option, time_option=time_option)
|
| 355 |
-
return observation
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import hashlib
|
| 2 |
+
from input.input_validator import generate_random_md5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
# autogenerated class to hold the input data
|
| 5 |
class InputObservation:
|
|
|
|
| 44 |
from_input(input):
|
| 45 |
Creates an observation from another input observation.
|
| 46 |
"""
|
| 47 |
+
def __init__(self, image=None, latitude=None, longitude=None,
|
| 48 |
+
author_email=None, date=None, time=None, date_option=None, time_option=None,
|
| 49 |
+
uploaded_filename=None):
|
| 50 |
self.image = image
|
| 51 |
self.latitude = latitude
|
| 52 |
self.longitude = longitude
|
|
|
|
| 84 |
"author_email": self.author_email,
|
| 85 |
"date": self.date,
|
| 86 |
"time": self.time,
|
|
|
|
|
|
|
| 87 |
"date_option": str(self.date_option),
|
| 88 |
"time_option": str(self.time_option),
|
| 89 |
"uploaded_filename": self.uploaded_filename
|
|
|
|
| 106 |
return InputObservation(data["image"], data["latitude"], data["longitude"], data["author_email"], data["date"], data["time"], data["date_option"], data["time_option"], data["uploaded_filename"])
|
| 107 |
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
src/input/input_validator.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import string
|
| 3 |
+
import hashlib
|
| 4 |
+
import re
|
| 5 |
+
import streamlit as st
|
| 6 |
+
from fractions import Fraction
|
| 7 |
+
|
| 8 |
+
from PIL import Image
|
| 9 |
+
from PIL import ExifTags
|
| 10 |
+
|
| 11 |
+
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
| 12 |
+
|
| 13 |
+
def generate_random_md5():
|
| 14 |
+
# Generate a random string
|
| 15 |
+
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
|
| 16 |
+
# Encode the string and compute its MD5 hash
|
| 17 |
+
md5_hash = hashlib.md5(random_string.encode()).hexdigest()
|
| 18 |
+
return md5_hash
|
| 19 |
+
|
| 20 |
+
def is_valid_number(number:str) -> bool:
|
| 21 |
+
"""
|
| 22 |
+
Check if the given string is a valid number (int or float, sign ok)
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
number (str): The string to be checked.
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
bool: True if the string is a valid number, False otherwise.
|
| 29 |
+
"""
|
| 30 |
+
pattern = r'^[-+]?[0-9]*\.?[0-9]+$'
|
| 31 |
+
return re.match(pattern, number) is not None
|
| 32 |
+
|
| 33 |
+
# Function to validate email address
|
| 34 |
+
def is_valid_email(email:str) -> bool:
|
| 35 |
+
"""
|
| 36 |
+
Validates if the provided email address is in a correct format.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
email (str): The email address to validate.
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
bool: True if the email address is valid, False otherwise.
|
| 43 |
+
"""
|
| 44 |
+
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
| 45 |
+
return re.match(pattern, email) is not None
|
| 46 |
+
|
| 47 |
+
# Function to extract date and time from image metadata
|
| 48 |
+
def get_image_datetime(image_file):
|
| 49 |
+
"""
|
| 50 |
+
Extracts the original date and time from the EXIF metadata of an uploaded image file.
|
| 51 |
+
|
| 52 |
+
Args:
|
| 53 |
+
image_file (UploadedFile): The uploaded image file from which to extract the date and time.
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
str: The original date and time as a string if available, otherwise None.
|
| 57 |
+
|
| 58 |
+
Raises:
|
| 59 |
+
Warning: If the date and time could not be extracted from the image metadata.
|
| 60 |
+
"""
|
| 61 |
+
try:
|
| 62 |
+
image = Image.open(image_file)
|
| 63 |
+
exif_data = image._getexif()
|
| 64 |
+
if exif_data is not None:
|
| 65 |
+
if ExifTags.Base.DateTimeOriginal in exif_data:
|
| 66 |
+
return exif_data.get(ExifTags.Base.DateTimeOriginal)
|
| 67 |
+
except Exception as e: # FIXME: what types of exception?
|
| 68 |
+
st.warning(f"Could not extract date from image metadata. (file: {image_file.name})")
|
| 69 |
+
# TODO: add to logger
|
| 70 |
+
return None
|
| 71 |
+
|
| 72 |
+
def decimal_coords(coords:tuple, ref:str) -> Fraction:
|
| 73 |
+
"""
|
| 74 |
+
Converts coordinates from degrees, minutes, and seconds to decimal degrees.
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
coords (tuple): A tuple containing three elements representing degrees, minutes, and seconds.
|
| 78 |
+
ref (str): A string representing the reference direction ('N', 'S', 'E', 'W').
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
Fraction: The coordinates in decimal degrees. Negative if the reference is 'S' or 'W'.
|
| 82 |
+
|
| 83 |
+
Example:
|
| 84 |
+
decimal_coords((40, 26, 46), 'N') -> 40.44611111111111
|
| 85 |
+
decimal_coords((40, 26, 46), 'W') -> -40.44611111111111
|
| 86 |
+
"""
|
| 87 |
+
# https://stackoverflow.com/a/73267185
|
| 88 |
+
if ref not in ['N', 'S', 'E', 'W']:
|
| 89 |
+
raise ValueError("Invalid reference direction. Must be 'N', 'S', 'E', or 'W'.")
|
| 90 |
+
if len(coords) != 3:
|
| 91 |
+
raise ValueError("Coordinates must be a tuple of three elements (degrees, minutes, seconds).")
|
| 92 |
+
|
| 93 |
+
decimal_degrees = coords[0] + coords[1] / 60 + coords[2] / 3600
|
| 94 |
+
if ref == "S" or ref =='W':
|
| 95 |
+
decimal_degrees = -decimal_degrees
|
| 96 |
+
return decimal_degrees
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None:
|
| 100 |
+
"""
|
| 101 |
+
Extracts the latitude and longitude from the EXIF metadata of an uploaded image file.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
image_file (UploadedFile): The uploaded image file from which to extract the latitude and longitude.
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
tuple[float, float]: The latitude and longitude as a tuple if available, otherwise None.
|
| 108 |
+
|
| 109 |
+
Raises:
|
| 110 |
+
Warning: If the latitude and longitude could not be extracted from the image metadata.
|
| 111 |
+
"""
|
| 112 |
+
try:
|
| 113 |
+
image = Image.open(image_file)
|
| 114 |
+
exif_data = image._getexif()
|
| 115 |
+
if exif_data is not None:
|
| 116 |
+
if ExifTags.Base.GPSInfo in exif_data:
|
| 117 |
+
gps_ifd = exif_data.get(ExifTags.Base.GPSInfo)
|
| 118 |
+
|
| 119 |
+
lat = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLatitude], gps_ifd[ExifTags.GPS.GPSLatitudeRef]))
|
| 120 |
+
lon = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLongitude], gps_ifd[ExifTags.GPS.GPSLongitudeRef]))
|
| 121 |
+
|
| 122 |
+
return lat, lon
|
| 123 |
+
|
| 124 |
+
except Exception as e: # FIXME: what types of exception?
|
| 125 |
+
st.warning(f"Could not extract latitude and longitude from image metadata. (file: {str(image_file)}")
|
src/main.py
CHANGED
|
@@ -1,31 +1,25 @@
|
|
| 1 |
-
#import datetime
|
| 2 |
-
from PIL import Image
|
| 3 |
-
|
| 4 |
-
import json
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
-
import tempfile
|
| 8 |
|
| 9 |
import pandas as pd
|
| 10 |
import streamlit as st
|
| 11 |
-
from streamlit.delta_generator import DeltaGenerator # for type hinting
|
| 12 |
import folium
|
| 13 |
from streamlit_folium import st_folium
|
| 14 |
-
|
| 15 |
from transformers import pipeline
|
| 16 |
from transformers import AutoModelForImageClassification
|
| 17 |
|
| 18 |
from datasets import disable_caching
|
| 19 |
disable_caching()
|
| 20 |
|
| 21 |
-
import
|
| 22 |
-
import
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
import
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
|
| 31 |
# setup for the ML model on huggingface (our wrapper)
|
|
@@ -45,96 +39,40 @@ g_logger = logging.getLogger(__name__)
|
|
| 45 |
g_logger.setLevel(LOG_LEVEL)
|
| 46 |
|
| 47 |
st.set_page_config(layout="wide")
|
| 48 |
-
#sw_logs.setup_logging(level=LOG_LEVEL, buffer_len=40)
|
| 49 |
-
|
| 50 |
-
|
| 51 |
|
| 52 |
# initialise various session state variables
|
| 53 |
if "handler" not in st.session_state:
|
| 54 |
-
st.session_state['handler'] =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
if "
|
| 57 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
if "classify_whale_done" not in st.session_state:
|
| 60 |
st.session_state.classify_whale_done = False
|
| 61 |
|
| 62 |
if "whale_prediction1" not in st.session_state:
|
| 63 |
st.session_state.whale_prediction1 = None
|
| 64 |
-
|
| 65 |
-
if "image" not in st.session_state:
|
| 66 |
-
st.session_state.image = None
|
| 67 |
|
| 68 |
if "tab_log" not in st.session_state:
|
| 69 |
st.session_state.tab_log = None
|
| 70 |
|
| 71 |
|
| 72 |
-
def metadata2md() -> str:
|
| 73 |
-
"""Get metadata from cache and return as markdown-formatted key-value list
|
| 74 |
-
|
| 75 |
-
Returns:
|
| 76 |
-
str: Markdown-formatted key-value list of metadata
|
| 77 |
-
|
| 78 |
-
"""
|
| 79 |
-
markdown_str = "\n"
|
| 80 |
-
for key, value in st.session_state.full_data.items():
|
| 81 |
-
markdown_str += f"- **{key}**: {value}\n"
|
| 82 |
-
return markdown_str
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
def push_observation(tab_log:DeltaGenerator=None):
|
| 86 |
-
"""
|
| 87 |
-
Push the observation to the Hugging Face dataset
|
| 88 |
-
|
| 89 |
-
Args:
|
| 90 |
-
tab_log (streamlit.container): The container to log messages to. If not provided,
|
| 91 |
-
log messages are in any case written to the global logger (TODO: test - didn't
|
| 92 |
-
push any data since generating the logger)
|
| 93 |
-
|
| 94 |
-
"""
|
| 95 |
-
# we get the data from session state: 1 is the dict 2 is the image.
|
| 96 |
-
# first, lets do an info display (popup)
|
| 97 |
-
metadata_str = json.dumps(st.session_state.full_data)
|
| 98 |
-
|
| 99 |
-
st.toast(f"Uploading observation: {metadata_str}", icon="π¦")
|
| 100 |
-
tab_log = st.session_state.tab_log
|
| 101 |
-
if tab_log is not None:
|
| 102 |
-
tab_log.info(f"Uploading observation: {metadata_str}")
|
| 103 |
-
|
| 104 |
-
# get huggingface api
|
| 105 |
-
import os
|
| 106 |
-
token = os.environ.get("HF_TOKEN", None)
|
| 107 |
-
api = HfApi(token=token)
|
| 108 |
-
|
| 109 |
-
f = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
|
| 110 |
-
f.write(metadata_str)
|
| 111 |
-
f.close()
|
| 112 |
-
st.info(f"temp file: {f.name} with metadata written...")
|
| 113 |
-
|
| 114 |
-
path_in_repo= f"metadata/{st.session_state.full_data['author_email']}/{st.session_state.full_data['image_md5']}.json"
|
| 115 |
-
msg = f"fname: {f.name} | path: {path_in_repo}"
|
| 116 |
-
print(msg)
|
| 117 |
-
st.warning(msg)
|
| 118 |
-
rv = api.upload_file(
|
| 119 |
-
path_or_fileobj=f.name,
|
| 120 |
-
path_in_repo=path_in_repo,
|
| 121 |
-
repo_id="Saving-Willy/temp_dataset",
|
| 122 |
-
repo_type="dataset",
|
| 123 |
-
)
|
| 124 |
-
print(rv)
|
| 125 |
-
msg = f"data attempted tx to repo happy walrus: {rv}"
|
| 126 |
-
g_logger.info(msg)
|
| 127 |
-
st.info(msg)
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
def main() -> None:
|
| 132 |
"""
|
| 133 |
Main entry point to set up the streamlit UI and run the application.
|
| 134 |
|
| 135 |
The organisation is as follows:
|
| 136 |
|
| 137 |
-
1.
|
| 138 |
2. the rest of the interface is organised in tabs:
|
| 139 |
|
| 140 |
- cetean classifier
|
|
@@ -156,26 +94,25 @@ def main() -> None:
|
|
| 156 |
#g_logger.warning("warning message")
|
| 157 |
|
| 158 |
# Streamlit app
|
| 159 |
-
#tab_gallery, tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log = st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "Data", "Log", "Beautiful cetaceans"])
|
| 160 |
tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
|
| 161 |
st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
|
| 162 |
st.session_state.tab_log = tab_log
|
| 163 |
|
| 164 |
|
| 165 |
-
# create a sidebar, and parse all the input (returned as `
|
| 166 |
-
|
| 167 |
|
| 168 |
|
| 169 |
if 0:## WIP
|
| 170 |
-
# goal of this code is to allow the user to override the ML prediction, before transmitting an
|
| 171 |
-
predicted_class = st.sidebar.selectbox("Predicted Class",
|
| 172 |
override_prediction = st.sidebar.checkbox("Override Prediction")
|
| 173 |
|
| 174 |
if override_prediction:
|
| 175 |
-
overridden_class = st.sidebar.selectbox("Override Class",
|
| 176 |
-
st.session_state.
|
| 177 |
else:
|
| 178 |
-
st.session_state.
|
| 179 |
|
| 180 |
|
| 181 |
with tab_map:
|
|
@@ -190,19 +127,19 @@ def main() -> None:
|
|
| 190 |
|
| 191 |
if show_db_points:
|
| 192 |
# show a nicer map, observations marked, tileset selectable.
|
| 193 |
-
|
| 194 |
dataset_id=dataset_id, data_files=data_files,
|
| 195 |
dbg_show_extra=dbg_show_extra)
|
| 196 |
|
| 197 |
else:
|
| 198 |
# development map.
|
| 199 |
-
|
| 200 |
|
| 201 |
|
| 202 |
with tab_log:
|
| 203 |
handler = st.session_state['handler']
|
| 204 |
if handler is not None:
|
| 205 |
-
records =
|
| 206 |
st.dataframe(records[::-1], use_container_width=True,)
|
| 207 |
st.info(f"Length of records: {len(records)}")
|
| 208 |
else:
|
|
@@ -236,22 +173,16 @@ def main() -> None:
|
|
| 236 |
# specific to the gallery (otherwise we get side effects)
|
| 237 |
tg_cont = st.container(key="swgallery")
|
| 238 |
with tg_cont:
|
| 239 |
-
|
| 240 |
|
| 241 |
|
| 242 |
-
# Display submitted
|
| 243 |
if st.sidebar.button("Validate"):
|
| 244 |
-
# create a dictionary with the submitted
|
| 245 |
-
submitted_data =
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
#full_data.update(**submitted_data)
|
| 249 |
-
for k, v in submitted_data.items():
|
| 250 |
-
st.session_state.full_data[k] = v
|
| 251 |
|
| 252 |
-
|
| 253 |
-
#tab_inference.info(f"{st.session_state.full_data}")
|
| 254 |
-
tab_log.info(f"{st.session_state.full_data}")
|
| 255 |
|
| 256 |
df = pd.DataFrame(submitted_data, index=[0])
|
| 257 |
with tab_coords:
|
|
@@ -278,49 +209,12 @@ def main() -> None:
|
|
| 278 |
trust_remote_code=True)
|
| 279 |
|
| 280 |
|
| 281 |
-
if st.session_state.
|
| 282 |
# TODO: cleaner design to disable the button until data input done?
|
| 283 |
st.info("Please upload an image first.")
|
| 284 |
else:
|
| 285 |
-
|
| 286 |
-
out = cetacean_classifier(st.session_state.image) # get top 3 matches
|
| 287 |
-
st.session_state.whale_prediction1 = out['predictions'][0]
|
| 288 |
-
st.session_state.classify_whale_done = True
|
| 289 |
-
msg = f"[D]2 classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}"
|
| 290 |
-
st.info(msg)
|
| 291 |
-
g_logger.info(msg)
|
| 292 |
-
|
| 293 |
-
# dropdown for selecting/overriding the species prediction
|
| 294 |
-
#st.info(f"[D] classify_whale_done: {st.session_state.classify_whale_done}, whale_prediction1: {st.session_state.whale_prediction1}")
|
| 295 |
-
if not st.session_state.classify_whale_done:
|
| 296 |
-
selected_class = tab_inference.sidebar.selectbox("Species", sw_wv.WHALE_CLASSES, index=None, placeholder="Species not yet identified...", disabled=True)
|
| 297 |
-
else:
|
| 298 |
-
pred1 = st.session_state.whale_prediction1
|
| 299 |
-
# get index of pred1 from WHALE_CLASSES, none if not present
|
| 300 |
-
print(f"[D] pred1: {pred1}")
|
| 301 |
-
ix = sw_wv.WHALE_CLASSES.index(pred1) if pred1 in sw_wv.WHALE_CLASSES else None
|
| 302 |
-
selected_class = tab_inference.selectbox("Species", sw_wv.WHALE_CLASSES, index=ix)
|
| 303 |
-
|
| 304 |
-
st.session_state.full_data['predicted_class'] = selected_class
|
| 305 |
-
if selected_class != st.session_state.whale_prediction1:
|
| 306 |
-
st.session_state.full_data['class_overriden'] = selected_class
|
| 307 |
|
| 308 |
-
btn = st.button("Upload observation to THE INTERNET!", on_click=push_observation)
|
| 309 |
-
# TODO: the metadata only fills properly if `validate` was clicked.
|
| 310 |
-
tab_inference.markdown(metadata2md())
|
| 311 |
-
|
| 312 |
-
msg = f"[D] full data after inference: {st.session_state.full_data}"
|
| 313 |
-
g_logger.debug(msg)
|
| 314 |
-
print(msg)
|
| 315 |
-
# TODO: add a link to more info on the model, next to the button.
|
| 316 |
-
|
| 317 |
-
whale_classes = out['predictions'][:]
|
| 318 |
-
# render images for the top 3 (that is what the model api returns)
|
| 319 |
-
with tab_inference:
|
| 320 |
-
st.markdown("## Species detected")
|
| 321 |
-
for i in range(len(whale_classes)):
|
| 322 |
-
sw_wv.display_whale(whale_classes, i)
|
| 323 |
-
|
| 324 |
|
| 325 |
|
| 326 |
|
|
@@ -340,27 +234,10 @@ def main() -> None:
|
|
| 340 |
|
| 341 |
if st.session_state.image is None:
|
| 342 |
st.info("Please upload an image first.")
|
| 343 |
-
st.info(str(
|
| 344 |
|
| 345 |
else:
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
# display the image (use cached version, no need to reread)
|
| 349 |
-
col1.image(st.session_state.image, use_column_width=True)
|
| 350 |
-
# and then run inference on the image
|
| 351 |
-
hotdog_image = Image.fromarray(st.session_state.image)
|
| 352 |
-
predictions = pipeline_hot_dog(hotdog_image)
|
| 353 |
-
|
| 354 |
-
col2.header("Probabilities")
|
| 355 |
-
first = True
|
| 356 |
-
for p in predictions:
|
| 357 |
-
col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
|
| 358 |
-
if first:
|
| 359 |
-
st.session_state.full_data['predicted_class'] = p['label']
|
| 360 |
-
st.session_state.full_data['predicted_score'] = round(p['score'] * 100, 1)
|
| 361 |
-
first = False
|
| 362 |
-
|
| 363 |
-
tab_hotdogs.write(f"Session Data: {json.dumps(st.session_state.full_data)}")
|
| 364 |
|
| 365 |
|
| 366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
import os
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
import streamlit as st
|
|
|
|
| 6 |
import folium
|
| 7 |
from streamlit_folium import st_folium
|
| 8 |
+
|
| 9 |
from transformers import pipeline
|
| 10 |
from transformers import AutoModelForImageClassification
|
| 11 |
|
| 12 |
from datasets import disable_caching
|
| 13 |
disable_caching()
|
| 14 |
|
| 15 |
+
import whale_gallery as gallery
|
| 16 |
+
import whale_viewer as viewer
|
| 17 |
+
from input.input_handling import setup_input
|
| 18 |
+
from maps.alps_map import present_alps_map
|
| 19 |
+
from maps.obs_map import present_obs_map
|
| 20 |
+
from utils.st_logs import setup_logging, parse_log_buffer
|
| 21 |
+
from classifier.classifier_image import cetacean_classify
|
| 22 |
+
from classifier.classifier_hotdog import hotdog_classify
|
| 23 |
|
| 24 |
|
| 25 |
# setup for the ML model on huggingface (our wrapper)
|
|
|
|
| 39 |
g_logger.setLevel(LOG_LEVEL)
|
| 40 |
|
| 41 |
st.set_page_config(layout="wide")
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# initialise various session state variables
|
| 44 |
if "handler" not in st.session_state:
|
| 45 |
+
st.session_state['handler'] = setup_logging()
|
| 46 |
+
|
| 47 |
+
if "observations" not in st.session_state:
|
| 48 |
+
st.session_state.observations = {}
|
| 49 |
+
|
| 50 |
+
if "images" not in st.session_state:
|
| 51 |
+
st.session_state.images = {}
|
| 52 |
|
| 53 |
+
if "files" not in st.session_state:
|
| 54 |
+
st.session_state.files = {}
|
| 55 |
+
|
| 56 |
+
if "public_observation" not in st.session_state:
|
| 57 |
+
st.session_state.public_observation = {}
|
| 58 |
|
| 59 |
if "classify_whale_done" not in st.session_state:
|
| 60 |
st.session_state.classify_whale_done = False
|
| 61 |
|
| 62 |
if "whale_prediction1" not in st.session_state:
|
| 63 |
st.session_state.whale_prediction1 = None
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
if "tab_log" not in st.session_state:
|
| 66 |
st.session_state.tab_log = None
|
| 67 |
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def main() -> None:
|
| 70 |
"""
|
| 71 |
Main entry point to set up the streamlit UI and run the application.
|
| 72 |
|
| 73 |
The organisation is as follows:
|
| 74 |
|
| 75 |
+
1. observation input (a new observations) is handled in the sidebar
|
| 76 |
2. the rest of the interface is organised in tabs:
|
| 77 |
|
| 78 |
- cetean classifier
|
|
|
|
| 94 |
#g_logger.warning("warning message")
|
| 95 |
|
| 96 |
# Streamlit app
|
|
|
|
| 97 |
tab_inference, tab_hotdogs, tab_map, tab_coords, tab_log, tab_gallery = \
|
| 98 |
st.tabs(["Cetecean classifier", "Hotdog classifier", "Map", "*:gray[Dev:coordinates]*", "Log", "Beautiful cetaceans"])
|
| 99 |
st.session_state.tab_log = tab_log
|
| 100 |
|
| 101 |
|
| 102 |
+
# create a sidebar, and parse all the input (returned as `observations` object)
|
| 103 |
+
observations = setup_input(viewcontainer=st.sidebar)
|
| 104 |
|
| 105 |
|
| 106 |
if 0:## WIP
|
| 107 |
+
# goal of this code is to allow the user to override the ML prediction, before transmitting an observations
|
| 108 |
+
predicted_class = st.sidebar.selectbox("Predicted Class", viewer.WHALE_CLASSES)
|
| 109 |
override_prediction = st.sidebar.checkbox("Override Prediction")
|
| 110 |
|
| 111 |
if override_prediction:
|
| 112 |
+
overridden_class = st.sidebar.selectbox("Override Class", viewer.WHALE_CLASSES)
|
| 113 |
+
st.session_state.observations['class_overriden'] = overridden_class
|
| 114 |
else:
|
| 115 |
+
st.session_state.observations['class_overriden'] = None
|
| 116 |
|
| 117 |
|
| 118 |
with tab_map:
|
|
|
|
| 127 |
|
| 128 |
if show_db_points:
|
| 129 |
# show a nicer map, observations marked, tileset selectable.
|
| 130 |
+
st_observation = present_obs_map(
|
| 131 |
dataset_id=dataset_id, data_files=data_files,
|
| 132 |
dbg_show_extra=dbg_show_extra)
|
| 133 |
|
| 134 |
else:
|
| 135 |
# development map.
|
| 136 |
+
st_observation = present_alps_map()
|
| 137 |
|
| 138 |
|
| 139 |
with tab_log:
|
| 140 |
handler = st.session_state['handler']
|
| 141 |
if handler is not None:
|
| 142 |
+
records = parse_log_buffer(handler.buffer)
|
| 143 |
st.dataframe(records[::-1], use_container_width=True,)
|
| 144 |
st.info(f"Length of records: {len(records)}")
|
| 145 |
else:
|
|
|
|
| 173 |
# specific to the gallery (otherwise we get side effects)
|
| 174 |
tg_cont = st.container(key="swgallery")
|
| 175 |
with tg_cont:
|
| 176 |
+
gallery.render_whale_gallery(n_cols=4)
|
| 177 |
|
| 178 |
|
| 179 |
+
# Display submitted observation
|
| 180 |
if st.sidebar.button("Validate"):
|
| 181 |
+
# create a dictionary with the submitted observation
|
| 182 |
+
submitted_data = observations
|
| 183 |
+
st.session_state.observations = observations
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
+
tab_log.info(f"{st.session_state.observations}")
|
|
|
|
|
|
|
| 186 |
|
| 187 |
df = pd.DataFrame(submitted_data, index=[0])
|
| 188 |
with tab_coords:
|
|
|
|
| 209 |
trust_remote_code=True)
|
| 210 |
|
| 211 |
|
| 212 |
+
if st.session_state.images is None:
|
| 213 |
# TODO: cleaner design to disable the button until data input done?
|
| 214 |
st.info("Please upload an image first.")
|
| 215 |
else:
|
| 216 |
+
cetacean_classify(cetacean_classifier)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
|
| 220 |
|
|
|
|
| 234 |
|
| 235 |
if st.session_state.image is None:
|
| 236 |
st.info("Please upload an image first.")
|
| 237 |
+
#st.info(str(observations.to_dict()))
|
| 238 |
|
| 239 |
else:
|
| 240 |
+
hotdog_classify(pipeline_hot_dog, tab_hotdogs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
|
| 243 |
|
src/{alps_map.py β maps/alps_map.py}
RENAMED
|
File without changes
|
src/{obs_map.py β maps/obs_map.py}
RENAMED
|
@@ -7,8 +7,8 @@ import streamlit as st
|
|
| 7 |
import folium
|
| 8 |
from streamlit_folium import st_folium
|
| 9 |
|
| 10 |
-
import whale_viewer as
|
| 11 |
-
from fix_tabrender import js_show_zeroheight_iframe
|
| 12 |
|
| 13 |
m_logger = logging.getLogger(__name__)
|
| 14 |
# we can set the log level locally for funcs in this module
|
|
@@ -60,7 +60,7 @@ _colors = [
|
|
| 60 |
"#778899" # Light Slate Gray
|
| 61 |
]
|
| 62 |
|
| 63 |
-
whale2color = {k: v for k, v in zip(
|
| 64 |
|
| 65 |
def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
|
| 66 |
"""
|
|
|
|
| 7 |
import folium
|
| 8 |
from streamlit_folium import st_folium
|
| 9 |
|
| 10 |
+
import whale_viewer as viewer
|
| 11 |
+
from utils.fix_tabrender import js_show_zeroheight_iframe
|
| 12 |
|
| 13 |
m_logger = logging.getLogger(__name__)
|
| 14 |
# we can set the log level locally for funcs in this module
|
|
|
|
| 60 |
"#778899" # Light Slate Gray
|
| 61 |
]
|
| 62 |
|
| 63 |
+
whale2color = {k: v for k, v in zip(viewer.WHALE_CLASSES, _colors)}
|
| 64 |
|
| 65 |
def create_map(tile_name:str, location:Tuple[float], zoom_start: int = 7) -> folium.Map:
|
| 66 |
"""
|
src/{fix_tabrender.py β utils/fix_tabrender.py}
RENAMED
|
File without changes
|
src/utils/grid_maker.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import math
|
| 3 |
+
|
| 4 |
+
def gridder(files):
|
| 5 |
+
cols = st.columns(3)
|
| 6 |
+
with cols[0]:
|
| 7 |
+
batch_size = st.select_slider("Batch size:",range(10,110,10), value=10)
|
| 8 |
+
with cols[1]:
|
| 9 |
+
row_size = st.select_slider("Row size:", range(1,6), value = 5)
|
| 10 |
+
num_batches = math.ceil(len(files)/batch_size)
|
| 11 |
+
with cols[2]:
|
| 12 |
+
page = st.selectbox("Page", range(1,num_batches+1))
|
| 13 |
+
return batch_size, row_size, page
|
src/utils/metadata_handler.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
def metadata2md() -> str:
|
| 4 |
+
"""Get metadata from cache and return as markdown-formatted key-value list
|
| 5 |
+
|
| 6 |
+
Returns:
|
| 7 |
+
str: Markdown-formatted key-value list of metadata
|
| 8 |
+
|
| 9 |
+
"""
|
| 10 |
+
markdown_str = "\n"
|
| 11 |
+
keys_to_print = ["latitude","longitude","author_email","date","time"]
|
| 12 |
+
for key, value in st.session_state.public_observation.items():
|
| 13 |
+
if key in keys_to_print:
|
| 14 |
+
markdown_str += f"- **{key}**: {value}\n"
|
| 15 |
+
return markdown_str
|
| 16 |
+
|
src/{st_logs.py β utils/st_logs.py}
RENAMED
|
File without changes
|
src/whale_viewer.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from typing import List
|
|
|
|
| 2 |
from streamlit.delta_generator import DeltaGenerator
|
| 3 |
|
| 4 |
from PIL import Image
|
|
@@ -133,7 +134,7 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer:DeltaGenerator=N
|
|
| 133 |
None
|
| 134 |
|
| 135 |
"""
|
| 136 |
-
|
| 137 |
if viewcontainer is None:
|
| 138 |
viewcontainer = st
|
| 139 |
|
|
@@ -147,11 +148,10 @@ def display_whale(whale_classes:List[str], i:int, viewcontainer:DeltaGenerator=N
|
|
| 147 |
|
| 148 |
|
| 149 |
viewcontainer.markdown(
|
| 150 |
-
"
|
| 151 |
)
|
| 152 |
current_dir = os.getcwd()
|
| 153 |
image_path = os.path.join(current_dir, "src/images/references/")
|
| 154 |
image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
|
| 155 |
|
| 156 |
-
viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"])
|
| 157 |
-
# link st.markdown(f"[{df.loc[whale_classes[i], 'WHALE_REFERENCES']}]({df.loc[whale_classes[i], 'WHALE_REFERENCES']})")
|
|
|
|
| 1 |
from typing import List
|
| 2 |
+
import streamlit as st
|
| 3 |
from streamlit.delta_generator import DeltaGenerator
|
| 4 |
|
| 5 |
from PIL import Image
|
|
|
|
| 134 |
None
|
| 135 |
|
| 136 |
"""
|
| 137 |
+
|
| 138 |
if viewcontainer is None:
|
| 139 |
viewcontainer = st
|
| 140 |
|
|
|
|
| 148 |
|
| 149 |
|
| 150 |
viewcontainer.markdown(
|
| 151 |
+
":whale: #" + str(i + 1) + ": " + format_whale_name(whale_classes[i])
|
| 152 |
)
|
| 153 |
current_dir = os.getcwd()
|
| 154 |
image_path = os.path.join(current_dir, "src/images/references/")
|
| 155 |
image = Image.open(image_path + df_whale_img_ref.loc[whale_classes[i], "WHALE_IMAGES"])
|
| 156 |
|
| 157 |
+
viewcontainer.image(image, caption=df_whale_img_ref.loc[whale_classes[i], "WHALE_REFERENCES"], use_column_width=True)
|
|
|
tests/test_input_handling.py
CHANGED
|
@@ -2,7 +2,7 @@ import pytest
|
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
from input_handling import is_valid_email, is_valid_number
|
| 5 |
-
from
|
| 6 |
|
| 7 |
# generate tests for is_valid_email
|
| 8 |
# - test with valid email
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
from input_handling import is_valid_email, is_valid_number
|
| 5 |
+
from input.input_validator import get_image_latlon, decimal_coords, get_image_datetime
|
| 6 |
|
| 7 |
# generate tests for is_valid_email
|
| 8 |
# - test with valid email
|