Moise Leance Sagbohan commited on
Commit
e80fa16
·
1 Parent(s): 5cb7864

first commit

Browse files
Files changed (4) hide show
  1. annotator.py +343 -0
  2. bioacoustics_logo_large2.gif +0 -0
  3. main.py +130 -0
  4. requirements.txt +7 -0
annotator.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ import gspread
5
+ from google.oauth2 import service_account
6
+ import glob
7
+ import soundfile as sf
8
+ from maad import sound
9
+ from maad.util import power2dB
10
+ from skimage import transform
11
+ import logging
12
+ import zipfile
13
+ import tempfile
14
+ from datetime import datetime
15
+ import matplotlib.pyplot as plt
16
+
17
+ # Set up logging
18
+ logging.basicConfig(level=logging.DEBUG)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @st.cache_resource
23
+ def authorize_google_sheets():
24
+ scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
25
+ creds = service_account.Credentials.from_service_account_info(st.secrets["gcp_service_account"], scopes=scope)
26
+ client = gspread.authorize(creds)
27
+ return client
28
+
29
+
30
+ def get_google_sheet_data():
31
+ client = authorize_google_sheets()
32
+ sheet = client.open("XP_final_annotations").worksheet("rec1tes")
33
+ data = sheet.get_all_records()
34
+ df = pd.DataFrame(data)
35
+ return df
36
+
37
+
38
+ def get_annotation_status():
39
+ client = authorize_google_sheets()
40
+ sheet = client.open("XP_annotation_status").worksheet("status")
41
+ data = sheet.get_all_records()
42
+ df = pd.DataFrame(data)
43
+ # Ensure required columns are present
44
+ if 'cluster_folder' not in df.columns:
45
+ df['cluster_folder'] = ''
46
+ if 'user' not in df.columns:
47
+ df['user'] = ''
48
+ if 'status' not in df.columns:
49
+ df['status'] = ''
50
+ if 'timestamp' not in df.columns:
51
+ df['timestamp'] = ''
52
+ return df
53
+
54
+
55
+ def update_annotation_status(cluster_folder, user, status):
56
+ client = authorize_google_sheets()
57
+ sheet = client.open("XP_annotation_status").worksheet("status")
58
+ df = get_annotation_status()
59
+ idx = df[df['cluster_folder'] == cluster_folder].index
60
+ if not idx.empty:
61
+ sheet.update_cell(idx[0] + 2, 2, user)
62
+ sheet.update_cell(idx[0] + 2, 3, status)
63
+ sheet.update_cell(idx[0] + 2, 4, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
64
+ else:
65
+ sheet.append_row([cluster_folder, user, status, datetime.now().strftime("%Y-%m-%d %H:%M:%S")])
66
+
67
+
68
+ @st.cache_data
69
+ def load_audio_files(folder):
70
+ audio_files = glob.glob(os.path.join(folder, "*.WAV"))
71
+ logger.debug(f"Audio files loaded from {folder}: {audio_files}")
72
+ return sorted(audio_files) # Sort audio files alphabetically
73
+
74
+
75
+ #@st.cache_data
76
+ def plot_spec(file_path, cmap: str):
77
+ import matplotlib.pyplot as plt
78
+ s, fs = sound.load(file_path)
79
+ duration = len(s) / fs
80
+
81
+ # Adjust figure size based on the duration of the audio file
82
+ if duration < 1:
83
+ fig_size = (2, 2)
84
+ elif duration < 2:
85
+ fig_size = (2.5, 2)
86
+ elif duration < 3:
87
+ fig_size = (4, 2.5)
88
+ else:
89
+ fig_size = (5, 3.5)
90
+ Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512, flims=(0, fs // 2))
91
+ Sxx_db = power2dB(Sxx, db_range=70)
92
+ Sxx_db = transform.rescale(Sxx_db, 0.5, anti_aliasing=True, channel_axis=None)
93
+ fig, ax = plt.subplots(figsize=fig_size)
94
+ img = ax.imshow(Sxx_db, aspect='auto', extent=ext, origin='lower', interpolation='bilinear', cmap=cmap)
95
+ fig.colorbar(img, ax=ax, format="%+2.0f dB")
96
+ ax.set(title='', xlabel='Time [s]', ylabel='Frequency [Hz]')
97
+ plt.tight_layout()
98
+ spectrogram_path = 'temp_spectrogram.png'
99
+ plt.savefig(spectrogram_path)
100
+ plt.close(fig)
101
+ st.image(spectrogram_path)
102
+
103
+
104
+
105
+ @st.cache_data
106
+ def spacing():
107
+ st.markdown("<br></br>", unsafe_allow_html=True)
108
+
109
+
110
+ def update_google_sheet(client, rec_name, annotations_df):
111
+ sheet = client.open("XP_final_annotations").worksheet(rec_name)
112
+ sheet.clear() # Clear existing data
113
+ sheet.update([annotations_df.columns.values.tolist()] + annotations_df.values.tolist())
114
+
115
+
116
+
117
+ def plot_pie_chart(annotations_df):
118
+ total_clusters = len(annotations_df['cluster_number'].unique())
119
+ annotated_clusters = annotations_df[annotations_df['validated_class'] != 0]['cluster_number'].nunique()
120
+ remaining_clusters = total_clusters - annotated_clusters
121
+ labels = 'Annotated', 'Unannotated'
122
+ sizes = [annotated_clusters, remaining_clusters]
123
+ colors = ['#1fd655', '#ff9999']
124
+ explode = (0.1, 0) # explode the 1st slice
125
+ fig1, ax1 = plt.subplots(figsize=(1, 1))
126
+ ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.0f%%',
127
+ shadow=True, startangle=90)
128
+ ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
129
+ plt.rcParams['font.size'] = 9.0
130
+ st.pyplot(fig1)
131
+
132
+
133
+ def iden():
134
+ # Set up the credentials and client
135
+ st.markdown('#####')
136
+ st.header("Bamscape Clusters Annotator")
137
+ client = authorize_google_sheets()
138
+
139
+ # Select a recorder to analyze
140
+ rec_name = st.selectbox('**:violet[Please, select a recorder to analyze]**',
141
+ options=['rec1dmu', 'rec3dmu', 'rec3dmu_v2', 'rec4dmu', 'rec4dmu_v2', 'rec6dmu', 'rec7dmu'])
142
+
143
+ if rec_name:
144
+ # Load the CSV files and Google Sheets
145
+ sheet = client.open("XP_final_annotations").worksheet(f"{rec_name}")
146
+ final_annotations = pd.DataFrame(sheet.get_all_records())
147
+ st.session_state.final_annotations = final_annotations
148
+ annotations_df = st.session_state.final_annotations
149
+ csv_file = f'{rec_name}_all_CLUSTERS_COMBINED.csv'
150
+
151
+ # Display the pie chart
152
+ plot_pie_chart(annotations_df)
153
+
154
+ # Filter out the annotated rows based on specific columns
155
+ unannotated_df = annotations_df[(annotations_df['validated_class'] == 0) |
156
+ (annotations_df['validated_specie'] == 0) |
157
+ (annotations_df['validator_name'] == 0)]
158
+
159
+ # Load the initial state from the Google Sheet
160
+ if 'folders' not in st.session_state:
161
+ folders = unannotated_df['cluster_number'].astype(str).unique()
162
+ st.session_state.folders = {
163
+ folder: unannotated_df[unannotated_df['cluster_number'] == int(folder)]['period'].astype(
164
+ str).unique().tolist() for folder in folders}
165
+
166
+ # Get current annotation status
167
+ annotation_status = get_annotation_status()
168
+
169
+ # Check if user has previously uploaded files for the selected rec_name and store in session state
170
+ if 'uploaded_files' not in st.session_state:
171
+ st.session_state.uploaded_files = {}
172
+
173
+ # Allow the user to upload a ZIP file
174
+ uploaded_files = st.file_uploader(f"**:violet[Upload a ZIP file containing Clusters folders of {rec_name}]**", type=["zip"], accept_multiple_files=True)
175
+
176
+ if uploaded_files:
177
+ # Create a temporary directory to extract the ZIP file
178
+ with tempfile.TemporaryDirectory() as tmpdir:
179
+ for uploaded_file in uploaded_files:
180
+ with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
181
+ zip_ref.extractall(tmpdir)
182
+
183
+ st.success(f"Clusters folders extracted successfully")
184
+
185
+ # Log the extracted files and directories
186
+ for root, dirs, files in os.walk(tmpdir):
187
+ logger.debug(f"Extracted root: {root}")
188
+ logger.debug(f"Extracted dirs: {dirs}")
189
+ logger.debug(f"Extracted files: {files}")
190
+
191
+ # Use the extracted directory as the base path
192
+ base_path = tmpdir
193
+
194
+ col1, col2, col3 = st.columns(3)
195
+ selected_folder = None
196
+ selected_subfolder = None
197
+ with st.container():
198
+ with col1:
199
+ if st.session_state.folders:
200
+ selected_folder = st.selectbox("**:violet[Select a cluster folder to analyze]**",
201
+ list(st.session_state.folders.keys()))
202
+ logger.debug(f"Selected folder: {selected_folder}")
203
+ else:
204
+ st.success("Congratulations, all the clusters have been annotated! Please select another recorder to annotate.")
205
+ with col2:
206
+ if selected_folder:
207
+ subfolders = st.session_state.folders[selected_folder]
208
+ if subfolders:
209
+ selected_subfolder = st.selectbox("**:violet[Select a subfolder to analyze]**", subfolders)
210
+ logger.debug(f"Selected subfolder: {selected_subfolder}")
211
+ with col3:
212
+ selected_cmap = st.selectbox("**:violet[Choose a colormap to display spectrograms]**",
213
+ options=['jet', 'Greys', 'plasma', 'viridis', 'inferno'])
214
+
215
+ if selected_folder and selected_subfolder:
216
+ subfolder_path = os.path.join(base_path, selected_folder, selected_subfolder)
217
+ logger.debug(f"Subfolder path: {subfolder_path}")
218
+
219
+ for root, dirs, files in os.walk(subfolder_path, topdown=False):
220
+ targetfolder = files
221
+ logger.debug(f"Files in subfolder: {files}")
222
+ st.write(targetfolder)
223
+ st.markdown("---")
224
+
225
+ audio_files = load_audio_files(subfolder_path)
226
+ logger.debug(f"Audio files found: {audio_files}")
227
+
228
+ if audio_files:
229
+ form = st.form(key=f"user_form")
230
+ annotations = [] # Initialize annotations list here
231
+ with form:
232
+ for i, audio_file in enumerate(audio_files):
233
+ file_name = os.path.basename(audio_file)
234
+ cols = [1.70, 1, 1, 1, 1, 1]
235
+ col1, col2, col3, col4, col5, col6 = st.columns(cols)
236
+ with col1:
237
+ with st.spinner('Processing...'):
238
+ st.markdown(
239
+ f"<h6 style='text-align: center; color: green;'>ROI: {file_name} </h10>",
240
+ unsafe_allow_html=True)
241
+ plot_spec(audio_file, cmap=selected_cmap)
242
+ with col2:
243
+ st.markdown(f"<h2 style='text-align: center; color: black;'></h10>",
244
+ unsafe_allow_html=True)
245
+ st.markdown('######')
246
+ audio_data, audio_sr = sf.read(audio_file)
247
+ st.audio(audio_data, format='audio/wav', sample_rate=audio_sr, )
248
+ with col3:
249
+ st.markdown('#####')
250
+ st.markdown(f"<h4 style='text-align: center; color: blue;'>Group</h5>",
251
+ unsafe_allow_html=True)
252
+ suggested_group = annotations_df.loc[
253
+ annotations_df['filename_ts'] == file_name, 'suggested_class'].values[0]
254
+ group_input = st.text_input(f"*(modify the text if needed)*", value=suggested_group,
255
+ key=f"group_{file_name}")
256
+ with col4:
257
+ st.markdown('#####')
258
+ st.markdown(f"<h4 style='text-align: center; color: blue;'>Species</h5>",
259
+ unsafe_allow_html=True)
260
+ suggested_label = annotations_df.loc[
261
+ annotations_df['filename_ts'] == file_name, 'suggested_label'].values[0]
262
+ scientific_name_input = st.text_input("*(modify the text if needed)*",
263
+ value=suggested_label,
264
+ key=f"scientific_name_{file_name}")
265
+ with col5:
266
+ st.markdown('#####')
267
+ st.markdown(f"<h4 style='text-align: center; color: blue;'>Validator</h5>",
268
+ unsafe_allow_html=True)
269
+ validator_name = annotations_df.loc[
270
+ annotations_df['filename_ts'] == file_name, 'validator_name'].values[0]
271
+ validator_name_input = st.text_input("*(please, enter your name)*",
272
+ value=validator_name,
273
+ key=f"validator_name_{file_name}")
274
+ with col6:
275
+ st.markdown('#####')
276
+ st.markdown(f"<h4 style='text-align: center; color: blue;'>Comment</h5>",
277
+ unsafe_allow_html=True)
278
+ comment = annotations_df.loc[
279
+ annotations_df['filename_ts'] == file_name, 'comment'].values[0]
280
+ comment_input = st.text_input("*(feel free to tell something)*", value=comment,
281
+ key=f"validator_comment_{file_name}")
282
+ annotations.append({
283
+ 'file_name': file_name,
284
+ 'group_input': group_input,
285
+ 'scientific_name_input': scientific_name_input,
286
+ 'validator_name_input': validator_name_input,
287
+ 'comment_input': comment_input
288
+ })
289
+ submitButton = form.form_submit_button(label="Submit annotations")
290
+ if submitButton:
291
+ with st.spinner('Saving annotations...'):
292
+ for annotation in annotations:
293
+ file_name = annotation['file_name']
294
+ group_input = annotation['group_input']
295
+ scientific_name_input = annotation['scientific_name_input']
296
+ validator_name_input = annotation['validator_name_input']
297
+ comment_input = annotation['comment_input']
298
+ # Update the annotations_df DataFrame with new annotations
299
+ annotations_df.loc[
300
+ annotations_df['filename_ts'] == file_name, 'validated_class'] = group_input
301
+ annotations_df.loc[
302
+ annotations_df[
303
+ 'filename_ts'] == file_name, 'validated_specie'] = scientific_name_input
304
+ annotations_df.loc[
305
+ annotations_df[
306
+ 'filename_ts'] == file_name, 'validator_name'] = validator_name_input
307
+ annotations_df.loc[
308
+ annotations_df['filename_ts'] == file_name, 'comment'] = comment_input
309
+ annotations_df['validated_class'] = annotations_df['validated_class'].astype(str)
310
+
311
+ # Save to CSV file
312
+ annotations_df.to_csv(csv_file, index=False)
313
+
314
+ # Update the Google Sheet
315
+ update_google_sheet(client, rec_name, annotations_df)
316
+
317
+ st.success("All annotations have been saved.")
318
+
319
+ # Remove the analyzed subfolder from the list
320
+ st.session_state.folders[selected_folder].remove(selected_subfolder)
321
+
322
+ # If no more subfolders in the main folder, remove the main folder as well
323
+ if not st.session_state.folders[selected_folder]:
324
+ del st.session_state.folders[selected_folder]
325
+
326
+ st.experimental_rerun()
327
+ else:
328
+ st.error("No audio files found in the selected subfolder.")
329
+
330
+ spacing()
331
+
332
+ # Display the DataFrame
333
+ st.header("Annotated DataFrame")
334
+ st.write(
335
+ ":orange[Feel free to also access the dataframe on google sheet [link](https://docs.google.com/spreadsheets/d/119CGzxLv0kclMMb3SDYYwrULn2WY77OqDrzR6McEYO0/edit?gid=0#gid=0)]")
336
+ df = get_google_sheet_data()
337
+ df_display = df.astype(str)
338
+ st.write(df_display)
339
+ st.markdown('#####')
340
+
341
+
342
+ if __name__ == "__main__":
343
+ iden()
bioacoustics_logo_large2.gif ADDED
main.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_option_menu import option_menu
3
+ import pyrebase
4
+
5
+ import annotator
6
+
7
+ # APP PAGE SETTINGS
8
+
9
+ st.set_page_config(page_title="BAMSCAPE", page_icon=":bird:", layout='wide', initial_sidebar_state='auto')
10
+
11
+ st.markdown("""
12
+ <style>
13
+ .css-o18uir.e16nr0p33 {
14
+ margin-top: -75px;
15
+ }
16
+ </style>
17
+ """, unsafe_allow_html=True)
18
+
19
+ st.markdown(
20
+ """
21
+ <style>
22
+ div.stButton > button {background-color: #64B5F6;}
23
+ div.Controls_container__LTeAA > button {background: purple !important;}
24
+ .block-container {
25
+ padding-top: 2.5rem;
26
+ padding-bottom: 0rem;
27
+ padding-left: 5rem;
28
+ padding-right: 5rem;
29
+ }
30
+
31
+ iframe[title="streamlit_text_label.label_select"] .ant-btn {background: orange !important;}
32
+ iframe[title="streamlit_text_label.label_select"] .ant-btn-primary {background: orange !important;}
33
+ .ant-btn {background: violet !important;}
34
+ .ant-btn-primary {background: violet !important;}
35
+ button.ant-btn {background: violet !important;}
36
+ button.ant-btn-primary {background: violet !important;}
37
+ </style>
38
+ """, unsafe_allow_html=True
39
+ )
40
+
41
+ # MAIN PAGE HEADER
42
+
43
+ col1, col2 = st.columns((1, 10))
44
+ with col1:
45
+ st.image('bioacoustics_logo_large2.gif', width=90)
46
+ with col2:
47
+ st.header(':green[Brazilian Team] &mdash;' ' ' ':violet[_Bioacoustics_ :bird:]')
48
+
49
+ # FIREBASE KEYS AND AUTHENTICATION
50
+
51
+ firebaseConfig = {
52
+ 'apiKey': st.secrets["config_firebase"]['apiKey'],
53
+ 'authDomain': st.secrets["config_firebase"]['authDomain'],
54
+ 'projectId': st.secrets["config_firebase"]['projectId'],
55
+ 'databaseURL': st.secrets["config_firebase"]['databaseURL'],
56
+ 'storageBucket': st.secrets["config_firebase"]['storageBucket'],
57
+ 'messagingSenderId': st.secrets["config_firebase"]['messagingSenderId'],
58
+ 'appId': st.secrets["config_firebase"]['appId'],
59
+ 'measurementId': st.secrets["config_firebase"]['measurementId'],
60
+ }
61
+
62
+ fire = pyrebase.initialize_app(firebaseConfig)
63
+ auth = fire.auth()
64
+
65
+ # DATABASE
66
+
67
+ db = fire.database()
68
+ storage = fire.storage()
69
+
70
+ # MAIN APP
71
+
72
+ if 'useremail' not in st.session_state:
73
+ st.session_state.useremail = ''
74
+
75
+
76
+ def f():
77
+ try:
78
+ auth.sign_in_with_email_and_password(email, password)
79
+ st.session_state.signout = True
80
+ st.session_state.signedout = True
81
+ st.session_state.useremail = email
82
+ except:
83
+ st.text(' ')
84
+
85
+
86
+ def t():
87
+ st.session_state.signout = False
88
+ st.session_state.signedout = False
89
+
90
+
91
+ if 'signedout' not in st.session_state:
92
+ st.session_state['signedout'] = False
93
+
94
+ if 'signout' not in st.session_state:
95
+ st.session_state['signout'] = False
96
+
97
+ if not st.session_state['signedout']:
98
+ st.subheader(':orange[Please, login to the interface]')
99
+ email = st.text_input(':blue[E-mail]', placeholder='Enter your e-mail')
100
+ password = st.text_input(':blue[Password]', placeholder='Enter your password', type='password')
101
+
102
+ if st.button('Login', on_click=f):
103
+ try:
104
+ auth.sign_in_with_email_and_password(email, password)
105
+ st.session_state.useremail = email
106
+ st.session_state.signout = True
107
+ st.session_state.signedout = True
108
+
109
+ except:
110
+ st.warning('Login failed, please enter a valid email/password')
111
+
112
+ if st.session_state.signout:
113
+ username = st.session_state.useremail
114
+ username = username.split('@')[0]
115
+ st.subheader(f':blue[Hello]' + ' ' + f':gray[{username}]' + ' ' + '👋🏻')
116
+ st.button('Sign out', on_click=t)
117
+ st.markdown('#####')
118
+ bio = option_menu(menu_title=None,
119
+ options=['Home', 'Database', 'Data analysis', 'Identification'],
120
+ icons=['house', 'volume-up', 'soundwave', 'layers'],
121
+ default_index=0,
122
+ styles={"nav-link": {"font-size": "17px", "text-align": "left", "margin": "0px"},
123
+ "nav-link-selected": {"background-color": "orange"}},
124
+ orientation='horizontal')
125
+
126
+ if bio == 'Identification':
127
+ annotator.iden()
128
+
129
+ st.markdown('#')
130
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gspread==6.1.2
2
+ Pyrebase4==4.8.0
3
+ scikit-maad==1.4.2
4
+ setuptools==70.3.0
5
+ soundfile==0.12.1
6
+ streamlit==1.36.0
7
+ streamlit-option-menu==0.3.13