czyoung commited on
Commit
b3a9316
·
verified ·
1 Parent(s): 9154b30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -14
app.py CHANGED
@@ -12,6 +12,7 @@ import datetime
12
  import tempfile
13
  import os
14
  import shutil
 
15
 
16
  PARQUET_DATASET_DIR = Path("parquet_dataset")
17
  PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
@@ -19,6 +20,10 @@ PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
19
 
20
  scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
21
 
 
 
 
 
22
  def save_data(
23
  config_dict: Dict[str,str], audio_paths: List[str], userid: str,
24
  ) -> None:
@@ -42,6 +47,7 @@ def save_data(
42
  # Send to scheduler
43
  scheduler.append(data)
44
 
 
45
  st.title("Lecturer Support Tool")
46
 
47
  uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
@@ -68,7 +74,7 @@ if uploaded_file_paths is not None:
68
  file_paths.append(path)
69
  if len(valid_files) > 0:
70
  audio_tabs = st.tabs([f.name for f in valid_files])
71
-
72
  for j, tab in enumerate(audio_tabs):
73
  if tab.button("Analyze Audio",key=f"button_{j}"):
74
  if uploaded_file is None:
@@ -82,7 +88,12 @@ for j, tab in enumerate(audio_tabs):
82
 
83
  # RTTM load as filler
84
  speakerList, annotations = su.loadAudioRTTM("24F CHEM1402 Night Class Week 4.rttm")
 
 
 
85
 
 
 
86
  # Display breakdowns
87
  #--------------------------------------------------------------------------
88
 
@@ -144,14 +155,31 @@ for j, tab in enumerate(audio_tabs):
144
  f.set_figwidth(15)
145
 
146
  tab.pyplot(f)
147
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
149
- tab.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
150
- int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
151
- 100*lecturer_speaker_times[0]/totalSeconds))
152
- tab.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
153
- int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
154
- 100*lecturer_speaker_times[1]/totalSeconds))
 
155
 
156
  # Experimental Speaker Breakdown
157
  #------------------------------------------------------------------------------
@@ -192,13 +220,24 @@ for j, tab in enumerate(audio_tabs):
192
 
193
  tab.pyplot(f)
194
 
 
 
 
 
 
 
 
 
 
195
  tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
196
- for i,speaker in enumerate(all_speaker_times):
197
- tab.write("Speaker {} spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(i,
198
- int(speaker/3600),
199
- int((speaker%3600)/60),
200
- int(speaker%60),
201
- 100*speaker/totalSeconds))
 
 
202
 
203
 
204
  userid = st.text_input("user id:", "Guest")
 
12
  import tempfile
13
  import os
14
  import shutil
15
+ import pandas as pd
16
 
17
  PARQUET_DATASET_DIR = Path("parquet_dataset")
18
  PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
 
20
 
21
  scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
22
 
23
+ # Store results for viewing
24
+ if 'results' not in st.session_state:
25
+ st.session_state.results = []
26
+
27
  def save_data(
28
  config_dict: Dict[str,str], audio_paths: List[str], userid: str,
29
  ) -> None:
 
47
  # Send to scheduler
48
  scheduler.append(data)
49
 
50
+ st.set_page_config(layout="wide")
51
  st.title("Lecturer Support Tool")
52
 
53
  uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
 
74
  file_paths.append(path)
75
  if len(valid_files) > 0:
76
  audio_tabs = st.tabs([f.name for f in valid_files])
77
+ st.info(f'{len(valid_files)} valid files: {[fi.name for fi in valid_files]}')
78
  for j, tab in enumerate(audio_tabs):
79
  if tab.button("Analyze Audio",key=f"button_{j}"):
80
  if uploaded_file is None:
 
88
 
89
  # RTTM load as filler
90
  speakerList, annotations = su.loadAudioRTTM("24F CHEM1402 Night Class Week 4.rttm")
91
+ while (len(st.session_state.results) < j):
92
+ st.session_state.results.append([])
93
+ st.session_state.results[j] = (speakerList,annotations)
94
 
95
+ if len(st.session_state.results > j) and len(st.session_state.results[j])) > 0:
96
+ with st.spinner(text='Loading results...'):
97
  # Display breakdowns
98
  #--------------------------------------------------------------------------
99
 
 
155
  f.set_figwidth(15)
156
 
157
  tab.pyplot(f)
158
+
159
+ df = pd.DataFrame(
160
+ {
161
+ "Speaker": ["Lecturer", "Audience"],
162
+ "Time spoken": ["{}h:{:02d}m:{:02d}s".format(int(lecturer_speaker_times[0]/3600),
163
+ int((lecturer_speaker_times[0]%3600)/60),
164
+ int(lecturer_speaker_times[0]%60)),
165
+ "{}h:{:02d}m:{:02d}s".format(int(lecturer_speaker_times[1]/3600),
166
+ int((lecturer_speaker_times[1]%3600)/60),
167
+ int(lecturer_speaker_times[1]%60))],
168
+ "Percentage": [
169
+ "{:.2f}%".format(100*lecturer_speaker_times[0]/totalSeconds),
170
+ "{:.2f}%".format(100*lecturer_speaker_times[1]/totalSeconds),
171
+ ],
172
+ }
173
+ )
174
+
175
  tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
176
+ st.table(df)
177
+ #tab.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
178
+ # int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
179
+ # 100*lecturer_speaker_times[0]/totalSeconds))
180
+ #tab.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
181
+ # int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
182
+ # 100*lecturer_speaker_times[1]/totalSeconds))
183
 
184
  # Experimental Speaker Breakdown
185
  #------------------------------------------------------------------------------
 
220
 
221
  tab.pyplot(f)
222
 
223
+ df = pd.DataFrame(
224
+ {
225
+ "Time spoken": ["{}h:{:02d}m:{:02d}s".format(int(sp/3600),
226
+ int((sp%3600)/60),
227
+ int(sp%60)) for sp in all_speaker_times],,
228
+ "Percentage": ["{:.2f}%".format(100*sp/totalSeconds) for sp in all_speaker_times],
229
+ }
230
+ )
231
+
232
  tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
233
+ st.table(df)
234
+
235
+ #for i,speaker in enumerate(all_speaker_times):
236
+ # tab.write("Speaker {} spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(i,
237
+ # int(speaker/3600),
238
+ # int((speaker%3600)/60),
239
+ # int(speaker%60),
240
+ # 100*speaker/totalSeconds))
241
 
242
 
243
  userid = st.text_input("user id:", "Guest")