czyoung commited on
Commit
19b30cf
·
verified ·
1 Parent(s): 2e72d99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -11
app.py CHANGED
@@ -17,12 +17,16 @@ import pandas as pd
17
  PARQUET_DATASET_DIR = Path("parquet_dataset")
18
  PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
19
 
 
 
20
 
21
  scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
22
 
23
- # Store results for viewing
24
  if 'results' not in st.session_state:
25
  st.session_state.results = []
 
 
26
 
27
  def save_data(
28
  config_dict: Dict[str,str], audio_paths: List[str], userid: str,
@@ -76,6 +80,7 @@ if uploaded_file_paths is not None:
76
  audio_tabs = st.tabs([f.name for f in valid_files])
77
  while (len(st.session_state.results) < len(valid_files)):
78
  st.session_state.results.append([])
 
79
  st.info(f'{len(valid_files)} valid files: {[fi.name for fi in valid_files]}')
80
  for j, tab in enumerate(audio_tabs):
81
  if tab.button("Analyze Audio",key=f"button_{j}"):
@@ -85,11 +90,11 @@ for j, tab in enumerate(audio_tabs):
85
  # Process
86
  # Pretend to take time as an example
87
  with st.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
88
- time.sleep(5)
89
  st.success('Done')
90
 
91
  # RTTM load as filler
92
- speakerList, annotations = su.loadAudioRTTM("24F CHEM1402 Night Class Week 4.rttm")
93
  st.session_state.results[j] = (speakerList,annotations)
94
 
95
  if len(st.session_state.results) > j and len(st.session_state.results[j]) > 0:
@@ -247,14 +252,40 @@ for j, tab in enumerate(audio_tabs):
247
  tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
248
  tab.table(df)
249
 
250
- #for i,speaker in enumerate(all_speaker_times):
251
- # tab.write("Speaker {} spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(i,
252
- # int(speaker/3600),
253
- # int((speaker%3600)/60),
254
- # int(speaker%60),
255
- # 100*speaker/totalSeconds))
 
 
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
 
 
 
 
 
258
  userid = st.text_input("user id:", "Guest")
259
  colorPref = st.text_input("Favorite color?", "None")
260
  radio = st.radio('Pick one:', ['Left','Right'])
@@ -263,5 +294,4 @@ if st.button("Upload Files to Dataset"):
263
  save_data({"color":colorPref,"direction":radio,"number":selection},
264
  file_paths,
265
  userid)
266
- st.success('I think it worked!')
267
-
 
17
  PARQUET_DATASET_DIR = Path("parquet_dataset")
18
  PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
19
 
20
+ sample_data = [f"CHEM1402_gt/24F_CHEM1402_Night_Class_Week_{i}_gt.rttm" for i in range(1,11)]
21
+
22
 
23
  scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
24
 
25
+ # Store results for viewing and further processing
26
  if 'results' not in st.session_state:
27
  st.session_state.results = []
28
+ if 'summaries' not in st.session_state:
29
+ st.session_state.summaries = []
30
 
31
  def save_data(
32
  config_dict: Dict[str,str], audio_paths: List[str], userid: str,
 
80
  audio_tabs = st.tabs([f.name for f in valid_files])
81
  while (len(st.session_state.results) < len(valid_files)):
82
  st.session_state.results.append([])
83
+ st.session_state.summaries.append([])
84
  st.info(f'{len(valid_files)} valid files: {[fi.name for fi in valid_files]}')
85
  for j, tab in enumerate(audio_tabs):
86
  if tab.button("Analyze Audio",key=f"button_{j}"):
 
90
  # Process
91
  # Pretend to take time as an example
92
  with st.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
93
+ time.sleep(1)
94
  st.success('Done')
95
 
96
  # RTTM load as filler
97
+ speakerList, annotations = su.loadAudioRTTM(sample_data[j])
98
  st.session_state.results[j] = (speakerList,annotations)
99
 
100
  if len(st.session_state.results) > j and len(st.session_state.results[j]) > 0:
 
252
  tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
253
  tab.table(df)
254
 
255
+ st.session_state.summaries[j] = [totalSeconds,lecturer_speaker_times,all_speaker_times]
256
+
257
+ with st.spinner(text='Processing summary results...'):
258
+ summary_count = 0
259
+ for su in st.session_state.summaries:
260
+ if len(su) > 0:
261
+ summary_count += 1
262
+
263
 
264
+ lecturer_data = pd.DataFrame({
265
+ 'ds' : range(summary_count) ,
266
+ 'y' : [100*ssm[1][0]/ssm[0] for ssm in enumerate(st.session_state.summaries)]
267
+
268
+ })
269
+ audience_data = pd.DataFrame({
270
+ 'ds' : range(summary_count) ,
271
+ 'y' : [100*ssm[1][1]/ssm[0] for ssm in enumerate(st.session_state.summaries)]
272
+
273
+ })
274
+ speaker_data = pd.DataFrame({
275
+ 'ds' : range(summary_count) ,
276
+ 'y' : [len(ssm[2]) for ssm in enumerate(st.session_state.summaries)]
277
+
278
+ })
279
+ fig_la = px.line(lecturer_data, x="ds", y="y", color_discrete_sequence=["#0514C0"], labels={'y': 'Lecturer Time'})
280
+ fig_la.add_scatter(x=audience_data['ds'], y=audience_data['y'], mode='lines', name='Audience Time', line=dict(color='#4CC005'))
281
+ fig_la.update_layout(title='Percentage of Time Speaking for Lecturer and Audience', xaxis_title='File', yaxis_title='Percent Time Spoken')
282
+ st.plotly_chart(fig_la, use_container_width=True)
283
 
284
+ fig_spc = px.line(speaker_data, x="ds", y="y", color_discrete_sequence=["#0514C0"], labels={'y': 'Speaker Count'})
285
+ fig_spc.update_layout(title='Number of Distinct Speakers over Time', xaxis_title='File', yaxis_title='# of Distinct Speakers')
286
+ st.plotly_chart(fig_spc, use_container_width=True)
287
+
288
+
289
  userid = st.text_input("user id:", "Guest")
290
  colorPref = st.text_input("Favorite color?", "None")
291
  radio = st.radio('Pick one:', ['Left','Right'])
 
294
  save_data({"color":colorPref,"direction":radio,"number":selection},
295
  file_paths,
296
  userid)
297
+ st.success('I think it worked!')