czyoung commited on
Commit
094a4d0
·
verified ·
1 Parent(s): f56554e

Updated Analysis and Charts

Browse files
Files changed (1) hide show
  1. app.py +480 -459
app.py CHANGED
@@ -23,50 +23,7 @@ from pyannote.core import Annotation, Segment, Timeline
23
  from df.enhance import enhance, init_df
24
  import datetime as dt
25
 
26
- torch.classes.__path__ = [os.path.join(torch.__path__[0], torch.classes.__file__)]
27
-
28
- PARQUET_DATASET_DIR = Path("parquet_dataset")
29
- PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
30
-
31
- sample_data = [f"CHEM1402_gt/24F_CHEM1402_Night_Class_Week_{i}_gt.rttm" for i in range(1,11)]
32
 
33
-
34
- scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
35
-
36
- secondDifference = 5
37
- gainWindow = 4
38
- minimumGain = -45
39
- maximumGain = -5
40
- attenLimDB = 3
41
-
42
- isGPU = False
43
-
44
- try:
45
- raise(RuntimeError("Not an error"))
46
- #device = xm.xla_device()
47
- print("TPU is available.")
48
- isGPU = True
49
- except RuntimeError as e:
50
- print(f"TPU is not available: {e}")
51
- # Fallback to CPU or other devices if needed
52
- isGPU = torch.cuda.is_available()
53
- device = torch.device("cuda" if isGPU else "cpu")
54
- print(f"Using {device} instead.")
55
- #device = xm.xla_device()
56
-
57
-
58
- # Instantiate and prepare model for training.
59
- dfModel, dfState, _ = init_df(model_base_dir="DeepFilterNet3")
60
- dfModel.to(device)#torch.device("cuda"))
61
- pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
62
- pipeline.to(device)#torch.device("cuda"))
63
-
64
- # Store results for viewing and further processing
65
- if 'results' not in st.session_state:
66
- st.session_state.results = []
67
- if 'summaries' not in st.session_state:
68
- st.session_state.summaries = []
69
-
70
  def save_data(
71
  config_dict: Dict[str,str], audio_paths: List[str], userid: str,
72
  ) -> None:
@@ -109,136 +66,234 @@ def processFile(filePath):
109
  waveform_gain_adjusted = su.equalizeVolume()(waveformEnhanced,sampleRate,gainWindow,minimumGain,maximumGain)
110
  print("Audio Equalized")
111
  print("Detecting speakers")
112
- time.sleep(10)
113
  annotations = pipeline({"waveform": waveformEnhanced, "sample_rate": sampleRate})
114
  print("Speakers Detected")
115
  speakerList = su.annotationToSpeakerList(annotations)
116
  return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
117
-
118
- def removeOverlap(timeSegment,overlap):
119
- times = []
120
- if timeSegment.start < overlap.start:
121
- times.append(Segment(timeSegment.start,min(overlap.start,timeSegment.end)))
122
- if timeSegment.end > overlap.end:
123
- times.append(Segment(max(timeSegment.start,overlap.end),timeSegment.end))
124
- return times
125
-
126
- def checkForOverlap(time1, time2):
127
- overlap = time1 & time2
128
- if overlap:
129
- return overlap
130
- else:
131
- return None
132
-
133
 
134
- def calcCategories(annotation,maxTime):
135
- noVoice = [Segment(0,maxTime)]
136
- oneVoice = []
137
- multiVoice = []
138
- # TBD Clean this up!!!
139
- rawData = {}
140
- for speakerName in annotation.labels():
141
- if speakerName not in rawData.keys():
142
- rawData[speakerName] = []
143
- for segmentItem in annotation.label_support(speakerName):
144
- rawData[speakerName].append(segmentItem)
145
- for speaker in rawData.keys():
146
- timesToProcess = []
147
- for timeSlot in rawData[speaker]:
148
- timesToProcess.append((speaker,timeSlot))
149
- while len(timesToProcess) > 0:
150
- currID, currTime = timesToProcess[0]
151
- timesToProcess.remove(timesToProcess[0])
152
- resetCheck = False
153
- # Check in multi
154
- for compareID,timeSlot in multiVoice:
155
- overlapTime = checkForOverlap(currTime,timeSlot)
156
- if overlapTime is None:
157
- continue
158
- else:
159
- compareID.append(currID)
160
- newTimes = removeOverlap(currTime,timeSlot)#+removeOverlap(timeSlot,currTime)
161
- for i in range(len(newTimes)):
162
- newTimes[i] = (currID,newTimes[i])
163
- timesToProcess += newTimes
164
- resetCheck = True
165
- break
166
- if resetCheck:
 
 
 
 
 
167
  continue
168
- # Check in one voice
169
- for timeSlot in oneVoice:
170
- tID = timeSlot[0]
171
- tTime = timeSlot[1]
172
- overlapTime = checkForOverlap(currTime,tTime)
173
- if overlapTime is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  continue
175
  else:
176
- oneVoice.remove(timeSlot)
177
- # Add back non overlap
178
- newTimes = removeOverlap(tTime,currTime)
179
- for i in range(len(newTimes)):
180
- newTimes[i] = (tID,newTimes[i])
181
- oneVoice += newTimes
182
- # Add overlap time to multivoice
183
- multiVoice.append(([tID,currID],overlapTime))
184
- # Add new times back to process
185
- newTimes = removeOverlap(currTime,tTime)
186
- for i in range(len(newTimes)):
187
- newTimes[i] = (currID,newTimes[i])
188
- timesToProcess += newTimes
189
- resetCheck = True
190
- break
191
- if resetCheck:
192
- continue
193
- # Add to one voice
194
- oneVoice.append((currID,currTime))
195
- for _,timeSlot in multiVoice:
196
- copyOfNo = copy.deepcopy(noVoice)
197
- for emptySlot in noVoice:
198
- if checkForOverlap(timeSlot,emptySlot) is None:
199
- continue
200
- else:
201
- copyOfNo.remove(emptySlot)
202
- copyOfNo += removeOverlap(emptySlot,timeSlot)
203
- noVoice = copyOfNo
204
- for _,timeSlot in oneVoice:
205
- copyOfNo = copy.deepcopy(noVoice)
206
- for emptySlot in noVoice:
207
- if checkForOverlap(timeSlot,emptySlot) is None:
208
- continue
209
- else:
210
- copyOfNo.remove(emptySlot)
211
- copyOfNo += removeOverlap(emptySlot,timeSlot)
212
- noVoice = copyOfNo
213
- return noVoice, oneVoice, multiVoice, rawData
214
-
215
- def sumTimes(timeList):
216
- totalTime = 0
217
- for timeSlot in timeList:
218
- totalTime += timeSlot.duration
219
- return totalTime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- def sumTimesPerSpeaker(timeSlotList):
222
- speakerList = []
223
- timeList = []
224
- for speaker,timeSlot in timeSlotList:
225
- if speaker not in speakerList:
226
- speakerList.append(speaker)
227
- timeList.append(0)
228
- timeList[speakerList.index(speaker)] += timeSlot.duration
229
- return speakerList, timeList
230
 
231
- def sumMultiTimesPerSpeaker(timeSlotList):
232
- speakerList = []
233
- timeList = []
234
- sList,tList = sumTimesPerSpeaker(timeSlotList)
235
- for i,speakerGroup in enumerate(sList):
236
- for speaker in speakerGroup:
237
- if speaker not in speakerList:
238
- speakerList.append(speaker)
239
- timeList.append(0)
240
- timeList[speakerList.index(speaker)] += tList[i]
241
- return speakerList, timeList
242
 
243
  #st.set_page_config(layout="wide")
244
  st.title("Lecturer Support Tool")
@@ -259,7 +314,7 @@ if uploaded_file_paths is not None:
259
  print("Found file paths")
260
  valid_files = []
261
  file_paths = []
262
- audio_tabs = []
263
  # Reset valid_files?
264
  for uploaded_file in uploaded_file_paths:
265
  if not uploaded_file.name.endswith(supported_file_types):
@@ -273,311 +328,277 @@ if uploaded_file_paths is not None:
273
  f.write(uploaded_file.getvalue())
274
  valid_files.append(uploaded_file)
275
  file_paths.append(path)
 
276
  if len(valid_files) > 0:
277
- audio_tabs = st.tabs([f.name for f in valid_files])
278
  while (len(st.session_state.results) < len(valid_files)):
279
  st.session_state.results.append([])
280
  while (len(st.session_state.summaries) < len(valid_files)):
281
  st.session_state.summaries.append([])
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
- st.info(f'{len(valid_files)} valid files: {[fi.name for fi in valid_files]}')
 
 
284
 
285
- if st.button("Analyze All Audio",key=f"button_all"):
286
- if len(valid_files) == 0:
287
- st.error('Upload file(s) first!')
288
- else:
289
- print("Start analyzing")
290
- start_time = time.time()
291
- totalFiles = len(valid_files)
292
- for i in range(totalFiles):
293
- with st.spinner(text=f'Analyzing File {i+1} of {totalFiles}'):
294
- # Text files use sample data
295
- if file_paths[i].endswith('.txt'):
296
- time.sleep(1)
297
- # RTTM load as filler
298
- speakerList, annotations = su.loadAudioRTTM(sample_data[i])
299
- st.session_state.results[i] = (speakerList,annotations, 10000)
300
- st.session_state.summaries[i] = []
301
- else:
302
- st.info(file_paths[i])
303
- speakerList, annotations, totalSeconds = processFile(file_paths[i])
304
- st.session_state.results[i] = (speakerList,annotations, totalSeconds)
305
- st.session_state.summaries[i] = []
306
- st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
307
-
308
- for i, tab in enumerate(audio_tabs):
309
- if tab.button("Analyze Audio",key=f"button_{i}"):
310
- start_time = time.time()
311
- # Text files use sample data
312
- if file_paths[i].endswith('.txt'):
313
- with st.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
314
- time.sleep(1)
315
- # RTTM load as filler
316
- speakerList, annotations = su.loadAudioRTTM(sample_data[i])
317
- st.session_state.results[i] = (speakerList,annotations,10000)
318
- st.session_state.summaries[i] = []
319
  else:
320
- with st.spinner(text='Analyzing File'):
321
- st.session_state.results[i] = processFile(file_paths[i])
322
- st.session_state.summaries[i] = []
323
- st.success(f"Took {time.time() - start_time} seconds to analyze!")
324
-
325
- if len(st.session_state.results) > i and len(st.session_state.summaries) > i and len(st.session_state.results[i]) > 0:
326
- with st.spinner(text='Loading results...'):
327
- # Display breakdowns
328
- #--------------------------------------------------------------------------
329
-
330
- speakerList, annotations, totalSeconds = st.session_state.results[i]
331
-
332
- # Prepare data
333
- sortedSpeakerList = sorted([[row for row in speaker if row[1] > 0.25] for speaker in speakerList if len([row for row in speaker if row[1] > 0.25]) > 0],
334
- key=lambda e: min(e)[0])
335
- pred_count = len(sortedSpeakerList)
336
- lecturer_speaker_list,_ = su.twoClassExtendAnnotation(annotations)
337
- lecturer_pred_count = 2
338
- lecturer_speaker_times = []
339
- for j,speaker in enumerate(lecturer_speaker_list):
340
- lecturer_speaker_times.append(0)
341
- for timeSection in speaker:
342
- lecturer_speaker_times[j] += timeSection[1]
343
-
344
- lecturer_dataFrame = su.speakerListToDataFrame(lecturer_speaker_list)
345
- lecturer_dataFrame.loc[lecturer_dataFrame['Resource'] == "Speaker 1", 'Resource'] = "Lecturer"
346
- lecturer_dataFrame.loc[lecturer_dataFrame['Resource'] == "Speaker 2", 'Resource'] = "Audience"
347
-
348
- all_speaker_times = []
349
- for j,speaker in enumerate(sortedSpeakerList):
350
- all_speaker_times.append(0)
351
- for timeSection in speaker:
352
- all_speaker_times[j] += timeSection[1]
353
-
354
- all_dataFrame = su.speakerListToDataFrame(sortedSpeakerList)
355
- currDF = all_dataFrame
356
-
357
- # TBD CLEAN THIS UP!!!
358
- noVoice2, oneVoice2, multiVoice2, rawSample = calcCategories(annotations,totalSeconds)
359
- noVoice2.sort()
360
- oneVoice2.sort()
361
- multiVoice2.sort()
362
- sList,timeList = sumTimesPerSpeaker(oneVoice2)
363
- multiSpeakerList, multiTimeList = sumMultiTimesPerSpeaker(multiVoice2)
364
-
365
- multiVoice = annotations.get_overlap()
366
- singleVoice = annotations.extrude(multiVoice).get_timeline()
367
- noVoice = Timeline(segments=[Segment(0,totalSeconds)]).extrude(singleVoice).extrude(multiVoice)
368
- # Pie Categories
369
- #---------------------------------------------------------------------------
370
- df = pd.DataFrame(
371
- {
372
- "Duration": [noVoice.duration(),singleVoice.duration(),multiVoice.duration()],
373
- "Category": ["Silence", "Single Voice", "Multiple Voices"],
374
- }
375
- )
376
- fig = px.pie(df, values='Duration', names='Category', title='Types of Discussion')
377
- tab.plotly_chart(fig, use_container_width=True)
378
-
379
- df4: pd.DataFrame = pd.DataFrame(
380
- {
381
- "values": [sumTimes(rawSample[key]) for key in rawSample.keys()],
382
- "names": [key for key in rawSample.keys()]
383
- }
384
- )
385
- df4.name = "df4"
386
-
387
- df5: pd.DataFrame = pd.DataFrame(
388
- {
389
- "ids" : ["NV","OV","MV"]+[f"OV_{i}" for i in range(len(sList))]
390
- +[f"MV_{i}" for i in range(len(multiSpeakerList))],
391
- "labels" : ["No Voice","One Voice","Multi Voice"] + sList + multiSpeakerList,
392
- "parents" : ["","",""]+["OV" for i in range(len(sList))]
393
- +["MV" for i in range(len(multiSpeakerList))],
394
- "values" : [sumTimes(noVoice2),
395
- sumTimes([n for _,n in oneVoice2]),
396
- sumTimes([n for _,n in multiVoice2]),
397
- ] + timeList + multiTimeList,
398
- }
399
- )
400
- df5.name = "df5"
401
-
402
- fig2 = make_subplots(rows=2, cols=1,
403
- specs=[[{"type": "pie"}],[{"type": "treemap"}]]
404
- , shared_xaxes=True)
405
- fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]),
406
- row=1, col=1)
407
- fig2.add_trace(go.Treemap(
408
- labels = df5["labels"],
409
- parents = df5["parents"],
410
- ids=df5["ids"],
411
- values = df5["values"]),
412
- row=2, col=1)
413
- tab.plotly_chart(fig2, use_container_width=True)
414
-
415
- # Lecturer vs. Audience
416
- #---------------------------------------------------------------------------
417
- fig_la = px.timeline(lecturer_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource")
418
- fig_la.update_yaxes(autorange="reversed")
419
-
420
-
421
- hMax = int(totalSeconds//3600)
422
- mMax = int(totalSeconds%3600//60)
423
- sMax = int(totalSeconds%60)
424
- msMax = int(totalSeconds*1000000%1000000)
425
- timeMax = dt.time(hMax,mMax,sMax,msMax)
426
-
427
- fig_la.update_layout(
428
- xaxis_tickformatstops = [
429
- dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
430
- dict(dtickrange=[1000, None], value="%H:%M:%S")
431
- ],
432
- xaxis=dict(
433
- range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
434
- ),
435
- xaxis_title="Time",
436
- yaxis_title="Speaker",
437
- legend_title=None
438
- )
439
-
440
- tab.plotly_chart(fig_la, use_container_width=True)
441
-
442
- dataTimeList = []
443
- dataTimeList.append(dict(Task=0,x=lecturer_speaker_times[0]/totalSeconds*100,y=f'Lecturer'))
444
- dataTimeList.append(dict(Task=1,x=lecturer_speaker_times[1]/totalSeconds*100,y=f'Audience'))
445
- df2 = pd.DataFrame(dataTimeList)
446
- fig2_la = px.bar(dataTimeList, x="x", y="y", color="y", orientation='h')
447
- fig2_la.update_xaxes(ticksuffix="%")
448
- fig2_la.update_yaxes(autorange="reversed")
449
- fig2_la.update_layout(
450
- xaxis_title="Percentage Time Spoken",
451
- yaxis_title="Speaker",
452
- legend_title=None
453
-
454
- )
455
- tab.plotly_chart(fig2_la, use_container_width=True)
456
- df = pd.DataFrame(
457
- {
458
- "Time spoken": ["{}h:{:02d}m:{:02d}s".format(int(lecturer_speaker_times[0]/3600),
459
- int((lecturer_speaker_times[0]%3600)/60),
460
- int(lecturer_speaker_times[0]%60)),
461
- "{}h:{:02d}m:{:02d}s".format(int(lecturer_speaker_times[1]/3600),
462
- int((lecturer_speaker_times[1]%3600)/60),
463
- int(lecturer_speaker_times[1]%60))],
464
- "Percentage": [
465
- "{:.2f}%".format(100*lecturer_speaker_times[0]/totalSeconds),
466
- "{:.2f}%".format(100*lecturer_speaker_times[1]/totalSeconds),
467
- ],
468
- }
469
- )
470
- df = df.style \
471
- .format_index(str.upper, axis=1) \
472
- .relabel_index(["Lecturer", "Audience"], axis=0) \
473
- #.set_properties(**{"background-color": "white"})
474
- tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
475
- tab.table(df)
476
- #tab.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
477
- # int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
478
- # 100*lecturer_speaker_times[0]/totalSeconds))
479
- #tab.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
480
- # int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
481
- # 100*lecturer_speaker_times[1]/totalSeconds))
482
-
483
- # Experimental Speaker Breakdown
484
- #------------------------------------------------------------------------------
485
- fig_spc = px.timeline(all_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource")
486
- fig_spc.update_yaxes(autorange="reversed")
487
-
488
-
489
- hMax = int(totalSeconds//3600)
490
- mMax = int(totalSeconds%3600//60)
491
- sMax = int(totalSeconds%60)
492
- msMax = int(totalSeconds*1000000%1000000)
493
- timeMax = dt.time(hMax,mMax,sMax,msMax)
494
-
495
- fig_spc.update_layout(
496
- xaxis_tickformatstops = [
497
- dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
498
- dict(dtickrange=[1000, None], value="%H:%M:%S")
499
- ],
500
- xaxis=dict(
501
- range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
502
- ),
503
- xaxis_title="Time",
504
- yaxis_title="Speaker",
505
- legend_title=None
506
- )
507
-
508
- tab.plotly_chart(fig_spc, use_container_width=True)
509
-
510
- dataTimeList = []
511
- for j, totalTime in enumerate(all_speaker_times):
512
- dataTimeList.append(dict(Task=j,x=totalTime/totalSeconds*100,y=f'Speaker {j+1}'))
513
- df2 = pd.DataFrame(dataTimeList)
514
- fig2_spc = px.bar(dataTimeList, x="x", y="y", color="y", orientation='h')
515
- fig2_spc.update_xaxes(ticksuffix="%")
516
- fig2_spc.update_yaxes(autorange="reversed")
517
- fig2_spc.update_layout(
518
- xaxis_title="Percentage Time Spoken",
519
- yaxis_title="Speaker",
520
- legend_title=None
521
 
522
- )
523
- tab.plotly_chart(fig2_spc, use_container_width=True)
524
-
525
- df = pd.DataFrame(
526
- {
527
- "Time spoken": ["{}h:{:02d}m:{:02d}s".format(int(sp/3600),
528
- int((sp%3600)/60),
529
- int(sp%60)) for sp in all_speaker_times],
530
- "Percentage": ["{:.2f}%".format(100*sp/totalSeconds) for sp in all_speaker_times],
531
- }
532
- )
533
- df = df.style \
534
- .format_index(str.upper, axis=1) \
535
- .relabel_index([f"speaker {sp}" for sp in range(len(all_speaker_times))], axis=0) \
536
- #.set_properties(**{"background-color": "white"})
 
 
 
 
 
 
 
 
537
 
538
- tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
539
- tab.table(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
- st.session_state.summaries[i] = [totalSeconds,lecturer_speaker_times,all_speaker_times]
542
-
543
- with st.spinner(text='Processing summary results...'):
544
- summary_count = 0
545
- for su in st.session_state.summaries:
546
- if len(su) > 0:
547
- summary_count += 1
548
-
549
- if summary_count > 1:
550
- valid_summaries = []
551
- for ssm in st.session_state.summaries:
552
- if len(ssm) > 0:
553
- valid_summaries.append(ssm)
554
- #st.info(f'{[ssm[0] for ssm in enumerate(valid_summaries)]}')
555
- lecturer_data = pd.DataFrame({
556
- 'ds' : range(summary_count) ,
557
- 'y' : [100*ssm[1][0]/ssm[0] for ssm in valid_summaries]
558
-
559
- })
560
- audience_data = pd.DataFrame({
561
- 'ds' : range(summary_count) ,
562
- 'y' : [100*ssm[1][1]/ssm[0] for ssm in valid_summaries]
563
-
564
- })
565
- speaker_data = pd.DataFrame({
566
- 'ds' : range(summary_count) ,
567
- 'y' : [len(ssm[2]) for ssm in valid_summaries]
568
-
569
- })
570
- fig_la = px.line(lecturer_data, x="ds", y="y", color_discrete_sequence=["#0514C0"], labels={'y': 'Lecturer Time'})
571
- fig_la.add_scatter(x=audience_data['ds'], y=audience_data['y'], mode='lines', name='Audience Time', line=dict(color='#4CC005'))
572
- fig_la.update_layout(title='Percentage of Time Speaking for Lecturer and Audience', xaxis_title='File', yaxis_title='Percent Time Spoken')
573
- st.plotly_chart(fig_la, use_container_width=True)
574
-
575
- fig_spc = px.line(speaker_data, x="ds", y="y", color_discrete_sequence=["#0514C0"], labels={'y': 'Speaker Count'})
576
- fig_spc.update_layout(title='Number of Distinct Speakers over Time', xaxis_title='File', yaxis_title='# of Distinct Speakers')
577
- st.plotly_chart(fig_spc, use_container_width=True)
578
-
579
-
580
- userid = st.text_input("user id:", "Guest")
 
 
 
 
 
 
 
 
 
 
 
 
581
  colorPref = st.text_input("Favorite color?", "None")
582
  radio = st.radio('Pick one:', ['Left','Right'])
583
  selection = st.selectbox('Select', [1,2,3])
@@ -586,7 +607,7 @@ if st.button("Upload Files to Dataset"):
586
  file_paths,
587
  userid)
588
  st.success('I think it worked!')
589
-
590
  @st.cache_data
591
  def convert_df(df):
592
  return df.to_csv(index=False).encode('utf-8')
 
23
  from df.enhance import enhance, init_df
24
  import datetime as dt
25
 
 
 
 
 
 
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def save_data(
28
  config_dict: Dict[str,str], audio_paths: List[str], userid: str,
29
  ) -> None:
 
66
  waveform_gain_adjusted = su.equalizeVolume()(waveformEnhanced,sampleRate,gainWindow,minimumGain,maximumGain)
67
  print("Audio Equalized")
68
  print("Detecting speakers")
 
69
  annotations = pipeline({"waveform": waveformEnhanced, "sample_rate": sampleRate})
70
  print("Speakers Detected")
71
  speakerList = su.annotationToSpeakerList(annotations)
72
  return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ def addCategory():
75
+ newCategory = st.session_state.categoryInput
76
+ st.toast(f"Adding {newCategory}")
77
+ st.session_state[f'multiselect_{newCategory}'] = []
78
+ st.session_state.categories.append(newCategory)
79
+ st.session_state.categoryInput = ''
80
+ for resultGroup in st.session_state.categorySelect:
81
+ resultGroup.append([])
82
+
83
+ def removeCategory(index):
84
+ categoryName = st.session_state.categories[index]
85
+ st.toast(f"Removing {categoryName}")
86
+ del st.session_state[f'multiselect_{categoryName}']
87
+ del st.session_state[f'remove_{categoryName}']
88
+ del st.session_state.categories[index]
89
+ for resultGroup in st.session_state.categorySelect:
90
+ del resultGroup[index]
91
+
92
+ def updateCategoryOptions(resultIndex):
93
+ if st.session_state.resetResult:
94
+ #st.info(f"Skipping update of {resultIndex}")
95
+ return
96
+ #st.info(f"Updating result {resultIndex}")
97
+ #st.info(f"In update: {st.session_state.categorySelect}")
98
+ # Handle
99
+ currResults = st.session_state.results[resultIndex][1]
100
+ speakerNames = [sp for sp in currResults["speakers"].keys()]
101
+
102
+ # Handle speaker category sidebars
103
+ unusedSpeakers = copy.deepcopy(speakerNames)
104
+ # Remove used speakers
105
+ for i, category in enumerate(st.session_state['categories']):
106
+ category_choices = copy.deepcopy(st.session_state[f'multiselect_{category}'])
107
+ st.session_state["categorySelect"][resultIndex][i] = category_choices
108
+ for sp in category_choices:
109
+ try:
110
+ unusedSpeakers.remove(sp)
111
+ except:
112
  continue
113
+ st.session_state.unusedSpeakers[resultIndex] = unusedSpeakers
114
+ #st.info(f"After update: {st.session_state.categorySelect}")
115
+
116
+ def updateMultiSelect():
117
+ currFileIndex = file_names.index(st.session_state["select_currFile"])
118
+ st.session_state.resetResult = True
119
+ for i, category in enumerate(st.session_state['categories']):
120
+ st.session_state[f'multiselect_{category}'] = st.session_state['categorySelect'][currFileIndex][i]
121
+
122
+ def analyze(inFileName):
123
+ try:
124
+ st.session_state.resetResult = False
125
+ currFileIndex = file_names.index(inFileName)
126
+ if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
127
+ # Handle
128
+ currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
129
+ speakerNames = currAnnotation.labels()
130
+
131
+ # Update other categories
132
+ unusedSpeakers = st.session_state.unusedSpeakers[currFileIndex]
133
+ categorySelections = st.session_state["categorySelect"][currFileIndex]
134
+
135
+ noVoice, oneVoice, multiVoice = su.calcSpeakingTypes(currAnnotation,currTotalTime)
136
+ noVoice.sort()
137
+ oneVoice.sort()
138
+ multiVoice.sort()
139
+
140
+ df3 = pd.DataFrame(
141
+ {
142
+ "values": [sumTimes(noVoice),
143
+ sumTimes([n for _,n in oneVoice]),
144
+ sumTimes([n for _,n in multiVoice])],
145
+ "names": ["No Voice","One Voice","Multi Voice"],
146
+ }
147
+ )
148
+ df3.name = "df3"
149
+ st.session_state.summaries[currFileIndex]["df3"] = df3
150
+
151
+ canRemoveMaybe = '''df4_dict = {}
152
+ nameList = st.session_state.categories
153
+ extraNames = []
154
+ valueList = [0 for i in range(len(nameList))]
155
+ extraValues = []
156
+
157
+ for sp in currResults["speakers"].keys():
158
+ foundSp = False
159
+ for i, categoryName in enumerate(nameList):
160
+ if sp in categorySelections[i]:
161
+ #st.info(categoryName)
162
+ valueList[i] += sumTimes(currResults["speakers"][sp])
163
+ foundSp = True
164
+ break
165
+ if foundSp:
166
  continue
167
  else:
168
+ extraNames.append(sp)
169
+ extraValues.append(sumTimes(currResults["speakers"][sp]))
170
+ df4_dict = {
171
+ "values": valueList+extraValues,
172
+ "names": nameList+extraNames,
173
+ }
174
+ df4 = pd.DataFrame(data=df4_dict)
175
+ df4.name = "df4"
176
+ st.session_state.summaries[currFileIndex]["df4"] = df4'''
177
+
178
+ speakerList,timeList = sumTimesPerSpeaker(oneVoice)
179
+ multiSpeakerList, multiTimeList = sumMultiTimesPerSpeaker(multiVoice)
180
+ summativeMultiSpeaker = sum(multiTimeList)
181
+ sumNoVoice = sumTimes(noVoice)
182
+ sumOneVoice = sumTimes([n for _,n in oneVoice])
183
+ sumMultiVoice = sumTimes([n for _,n in multiVoice])
184
+ basePercentiles = [sumNoVoice/currTotalTime,
185
+ sumOneVoice/currTotalTime,
186
+ sumMultiVoice/currTotalTime
187
+ ]
188
+ df5 = pd.DataFrame(
189
+ {
190
+ "ids" : ["NV","OV","MV"]+[f"OV_{i}" for i in range(len(speakerList))]
191
+ +[f"MV_{i}" for i in range(len(multiSpeakerList))],
192
+ "labels" : ["No Voice","One Voice","Multi Voice"] + speakerList + multiSpeakerList,
193
+ "parents" : ["","",""]+["OV" for i in range(len(speakerList))]
194
+ +["MV" for i in range(len(multiSpeakerList))],
195
+ "parentNames" : ["Total","Total","Total"]+["One Voice" for i in range(len(speakerList))]
196
+ +["Multi Voice" for i in range(len(multiSpeakerList))],
197
+ "values" : [sumNoVoice,
198
+ sumOneVoice,
199
+ sumMultiVoice,
200
+ ] + timeList + multiTimeList,
201
+ "valueStrings" : [timeToString(sumNoVoice),
202
+ timeToString(sumOneVoice),
203
+ timeToString(sumMultiVoice),
204
+ ] + timeToString(timeList) + timeToString(multiTimeList),
205
+ "percentiles" : [basePercentiles[0]*100,
206
+ basePercentiles[1]*100,
207
+ basePercentiles[2]*100] +
208
+ [(t*100) / sumOneVoice * basePercentiles[1] for t in timeList] +
209
+ [(t*100) / summativeMultiSpeaker * basePercentiles[2] for t in multiTimeList],
210
+ "parentPercentiles" : [basePercentiles[0]*100,
211
+ basePercentiles[1]*100,
212
+ basePercentiles[2]*100] +
213
+ [(t*100) / sumOneVoice for t in timeList] +
214
+ [(t*100) / summativeMultiSpeaker for t in multiTimeList],
215
+
216
+ }
217
+ )
218
+ df5.name = "df5"
219
+ st.session_state.summaries[currFileIndex]["df5"] = df5
220
+
221
+ speakers_dataFrame,speakers_times = su.annotationToDataFrame(currAnnotation)
222
+ st.session_state.summaries[currFileIndex]["speakers_dataFrame"] = speakers_dataFrame
223
+ st.session_state.summaries[currFileIndex]["speakers_times"] = speakers_times
224
+
225
+ df2_dict = {
226
+ "values":[100*t/currResults["duration"] for t in df4_dict["values"]],
227
+ "names":df4_dict["names"]
228
+ }
229
+ df2 = pd.DataFrame(df2_dict)
230
+ st.session_state.summaries[currFileIndex]["df2"] = df2
231
+ except ValueError:
232
+ pass
233
+
234
+ #----------------------------------------------------------------------------------------------------------------------
235
+
236
+ torch.classes.__path__ = [os.path.join(torch.__path__[0], torch.classes.__file__)]
237
+
238
+ PARQUET_DATASET_DIR = Path("parquet_dataset")
239
+ PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
240
+
241
+ sample_data = [f"CHEM1402_gt/24F_CHEM1402_Night_Class_Week_{i}_gt.rttm" for i in range(1,11)]
242
+
243
+
244
+ scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
245
+
246
+ secondDifference = 5
247
+ gainWindow = 4
248
+ minimumGain = -45
249
+ maximumGain = -5
250
+ attenLimDB = 3
251
+
252
+ isGPU = False
253
+
254
+ try:
255
+ raise(RuntimeError("Not an error"))
256
+ #device = xm.xla_device()
257
+ print("TPU is available.")
258
+ isGPU = True
259
+ except RuntimeError as e:
260
+ print(f"TPU is not available: {e}")
261
+ # Fallback to CPU or other devices if needed
262
+ isGPU = torch.cuda.is_available()
263
+ device = torch.device("cuda" if isGPU else "cpu")
264
+ print(f"Using {device} instead.")
265
+ #device = xm.xla_device()
266
+
267
+
268
+ # Instantiate and prepare model for training.
269
+ dfModel, dfState, _ = init_df(model_base_dir="DeepFilterNet3")
270
+ dfModel.to(device)#torch.device("cuda"))
271
+ pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
272
+ pipeline.to(device)#torch.device("cuda"))
273
+
274
+ # Store results for viewing and further processing
275
+ # Long-range usage
276
+ if 'results' not in st.session_state:
277
+ st.session_state.results = []
278
+ if 'summaries' not in st.session_state:
279
+ st.session_state.summaries = []
280
+ if 'categories' not in st.session_state:
281
+ st.session_state.categories = ["Lecturer","Audience"]
282
+ st.session_state.categorySelect = []
283
+ # Single Use
284
+ if 'removeCategory' not in st.session_state:
285
+ st.session_state.removeCategory = None
286
+ if 'resetResult' not in st.session_state:
287
+ st.session_state.resetResult = False
288
+ # Specific to target file
289
+ if 'unusedSpeakers' not in st.session_state:
290
+ st.session_state.unusedSpeakers = []
291
+ if 'file_names' not in st.session_state:
292
+ st.session_state.file_names = []
293
 
294
+
 
 
 
 
 
 
 
 
295
 
296
+
 
 
 
 
 
 
 
 
 
 
297
 
298
  #st.set_page_config(layout="wide")
299
  st.title("Lecturer Support Tool")
 
314
  print("Found file paths")
315
  valid_files = []
316
  file_paths = []
317
+ file_names = []
318
  # Reset valid_files?
319
  for uploaded_file in uploaded_file_paths:
320
  if not uploaded_file.name.endswith(supported_file_types):
 
328
  f.write(uploaded_file.getvalue())
329
  valid_files.append(uploaded_file)
330
  file_paths.append(path)
331
+ # Save valid file names
332
  if len(valid_files) > 0:
333
+ file_names = [f.name for f in valid_files]
334
  while (len(st.session_state.results) < len(valid_files)):
335
  st.session_state.results.append([])
336
  while (len(st.session_state.summaries) < len(valid_files)):
337
  st.session_state.summaries.append([])
338
+ while (len(st.session_state.unusedSpeakers) < len(valid_files)):
339
+ st.session_state.unusedSpeakers.append([])
340
+ while (len(st.session_state.categorySelect) < len(valid_files)):
341
+ tempCategories = [[] for cat in st.session_state.categories]
342
+ st.session_state.categorySelect.append(tempCategories)
343
+ while (len(st.session_state.summaries) < len(valid_files)):
344
+ st.session_state.summaries.append([])
345
+ # Clear replaced files
346
+ for i in range(len(valid_files)):
347
+ if len(st.session_state.results[i]) > 0 and st.session_state.results[i][0] != file_names[i]:
348
+ st.session_state.results[i] = []
349
+ st.session_state.summaries[i] = []
350
+ st.session_state.file_names = file_names
351
 
352
+ file_names = st.session_state.file_names
353
+
354
+ currFile = st.sidebar.selectbox('Current File', file_names,on_change=updateMultiSelect,key="select_currFile")
355
 
356
+ if len(file_names) == 0:
357
+ st.text("Upload file(s) to enable analysis")
358
+ else:
359
+ if st.button("Analyze All New Audio",key=f"button_all"):
360
+ if len(valid_files) == 0:
361
+ st.error('Upload file(s) first!')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  else:
363
+ print("Start analyzing")
364
+ start_time = time.time()
365
+ totalFiles = len(valid_files)
366
+ for i in range(totalFiles):
367
+ if len(st.session_state.results) > i and len(st.session_state.results[i]) > 0 and st.session_state.results[i][0] == file_names[i]:
368
+ continue
369
+ with st.spinner(text=f'Analyzing File {i+1} of {totalFiles}'):
370
+ # Text files use sample data
371
+ if file_paths[i].endswith('.txt'):
372
+ time.sleep(1)
373
+ # RTTM load as filler
374
+ speakerList, annotations = su.loadAudioRTTM(sample_data[i])
375
+ # Approximate total seconds
376
+ totalSeconds = 0
377
+ for segment in annotations.itersegments():
378
+ if segment.end > totalSeconds:
379
+ totalSeconds = segment.end
380
+ st.session_state.results[i] = (speakerList,annotations, totalSeconds)
381
+ st.session_state.summaries[i] = {}
382
+ speakerNames = annotations.labels()
383
+ st.session_state.unusedSpeakers[i] = speakerNames
384
+ else:
385
+ st.info(file_paths[i])
386
+ speakerList, annotations, totalSeconds = processFile(file_paths[i])
387
+ st.session_state.results[i] = (speakerList,annotations, totalSeconds)
388
+ st.session_state.summaries[i] = {}
389
+ speakerNames = annotations.labels()
390
+ st.session_state.unusedSpeakers[i] = speakerNames
391
+ st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
392
+
393
+ if currFile is None: #Do we need more? -> and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
394
+ st.write("Select a file to view from the sidebar")
395
+ try:
396
+ st.session_state.resetResult = False
397
+ currFileIndex = file_names.index(currFile)
398
+ if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
399
+ # Handle
400
+ currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
401
+ speakerNames = currAnnotation.labels()
402
+
403
+ # Update other categories
404
+ unusedSpeakers = st.session_state.unusedSpeakers[currFileIndex]
405
+ categorySelections = st.session_state["categorySelect"][currFileIndex]
406
+ for i,category in enumerate(st.session_state.categories):
407
+ speakerSet = categorySelections[i]
408
+ st.sidebar.multiselect(category,
409
+ speakerSet+unusedSpeakers,
410
+ default=speakerSet,
411
+ key=f"multiselect_{category}",
412
+ on_change=updateCategoryOptions,
413
+ args=(currFileIndex,))
414
+ st.sidebar.button(f"Remove {category}",key=f"remove_{category}",on_click=removeCategory,args=(i,))
415
+
416
+
417
+ newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
418
+
419
+ df4_dict = {}
420
+ nameList = st.session_state.categories
421
+ extraNames = []
422
+ valueList = [0 for i in range(len(nameList))]
423
+ extraValues = []
424
+
425
+ for i,speakerSet in enumerate(categorySelections):
426
+ valueList[i] += su.sumTimes(currAnnotation.subset(speakerSet))
427
+
428
+ for sp in unusedSpeakers:
429
+ extraNames.append(sp)
430
+ extraValues.append(su.sumTimes(currAnnotation.subset([sp])))
431
+
432
+ df4_dict = {
433
+ "names": nameList+extraNames,
434
+ "values": valueList+extraValues,
435
+ }
436
+ df4 = pd.DataFrame(data=df4_dict)
437
+ df4.name = "df4"
438
+ st.session_state.summaries[currFileIndex]["df4"] = df4
439
+
440
+ df2 = st.session_state.summaries[currFileIndex]["df2"]
441
+ df3 = st.session_state.summaries[currFileIndex]["df3"]
442
+ df4 = st.session_state.summaries[currFileIndex]["df4"]
443
+ df5 = st.session_state.summaries[currFileIndex]["df5"]
444
+ speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
445
+ currDF = speakers_dataFrame
446
+ speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
447
+
448
+ # generate plotting window
449
+ fig1 = go.Figure()
450
+ fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
451
+ fig2 = go.Figure()
452
+ fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
453
+ fig3_1 = px.sunburst(df5,
454
+ branchvalues = 'total',
455
+ names = "labels",
456
+ ids = "ids",
457
+ parents = "parents",
458
+ values = "percentiles",
459
+ custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
460
+ color = 'labels',
461
+ )
462
+ fig3_1.update_traces(
463
+ hovertemplate="<br>".join([
464
+ '<b>%{customdata[0]}</b>',
465
+ 'Duration: %{customdata[1]}s',
466
+ 'Percentage of Total: %{customdata[2]:.2f}%',
467
+ 'Parent: %{customdata[3]}',
468
+ 'Percentage of Parent: %{customdata[4]:.2f}%'
469
+ ])
470
+ )
471
+ fig3 = px.treemap(df5,
472
+ branchvalues = "total",
473
+ names = "labels",
474
+ parents = "parents",
475
+ ids="ids",
476
+ values = "percentiles",
477
+ custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
478
+ color='labels',
479
+ )
480
+ fig3.update_traces(
481
+ hovertemplate="<br>".join([
482
+ '<b>%{customdata[0]}</b>',
483
+ 'Duration: %{customdata[1]}s',
484
+ 'Percentage of Total: %{customdata[2]:.2f}%',
485
+ 'Parent: %{customdata[3]}',
486
+ 'Percentage of Parent: %{customdata[4]:.2f}%'
487
+ ])
488
+ )
489
+ st.plotly_chart(fig1, use_container_width=True)
490
+ st.plotly_chart(fig2, use_container_width=True)
491
+ st.plotly_chart(fig3_1, use_container_width=True)
492
+ st.plotly_chart(fig3, use_container_width=True)
493
+
494
+ fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource")
495
+ fig_la.update_yaxes(autorange="reversed")
496
+
497
+ hMax = int(currTotalTime//3600)
498
+ mMax = int(currTotalTime%3600//60)
499
+ sMax = int(currTotalTime%60)
500
+ msMax = int(currTotalTime*1000000%1000000)
501
+ timeMax = dt.time(hMax,mMax,sMax,msMax)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
 
503
+ fig_la.update_layout(
504
+ xaxis_tickformatstops = [
505
+ dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
506
+ dict(dtickrange=[1000, None], value="%H:%M:%S")
507
+ ],
508
+ xaxis=dict(
509
+ range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
510
+ ),
511
+ xaxis_title="Time",
512
+ yaxis_title="Speaker",
513
+ legend_title=None
514
+ )
515
+
516
+ st.plotly_chart(fig_la, use_container_width=True)
517
+
518
+ fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
519
+ custom_data=["names","values"])
520
+ fig2_la.update_xaxes(ticksuffix="%")
521
+ fig2_la.update_yaxes(autorange="reversed")
522
+ fig2_la.update_layout(
523
+ xaxis_title="Percentage Time Spoken",
524
+ yaxis_title="Speaker",
525
+ legend_title=None
526
 
527
+ )
528
+ fig2_la.update_traces(
529
+ hovertemplate="<br>".join([
530
+ '<b>%{customdata[0]}</b>',
531
+ 'Percentage of Time: %{customdata[1]:.2f}%'
532
+ ])
533
+ )
534
+ st.plotly_chart(fig2_la, use_container_width=True)
535
+ except ValueError:
536
+ pass
537
+
538
+ if len(st.session_state.results) > 0:
539
+ with st.spinner(text='Processing summary results...'):
540
+ fileNames = []
541
+ results = []
542
+ indices = []
543
+ for i, resultTuple in enumerate(st.session_state.results):
544
+ if len(resultTuple) == 2:
545
+ fileNames.append(resultTuple[0])
546
+ results.append(resultTuple[1])
547
+ indices.append(i)
548
+ if len(indices) > 1:
549
 
550
+ df6_dict = {
551
+ "files":fileNames,
552
+ }
553
+ allCategories = copy.deepcopy(st.session_state.categories)
554
+ for i in indices:
555
+ currResult = st.session_state.results[i][1]
556
+ categorySelections = st.session_state["categorySelect"][i]
557
+ catSummary,extraCats = calcCategories(currResult["speakers"],categorySelections)
558
+ st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
559
+ for extra in extraCats:
560
+ df6_dict[extra] = []
561
+ if extra not in allCategories:
562
+ allCategories.append(extra)
563
+
564
+ for category in st.session_state.categories:
565
+ df6_dict[category] = []
566
+ for i in indices:
567
+ summary, extras = st.session_state.summaries[i]["categories"]
568
+ theseCategories = st.session_state.categories + extras
569
+ for j, timeSlots in enumerate(summary):
570
+ df6_dict[theseCategories[j]].append(sumTimes([t for _,t in timeSlots])/st.session_state.results[i][1]['duration'])
571
+ for category in allCategories:
572
+ if category not in theseCategories:
573
+ df6_dict[category].append(0)
574
+ df6 = pd.DataFrame(df6_dict)
575
+ summFig = px.bar(df6, x="files", y=allCategories)
576
+ st.plotly_chart(summFig, use_container_width=True)
577
+
578
+ voiceNames = ["No Voice","One Voice","Multi Voice"]
579
+ df7_dict = {
580
+ "files":[fileName for fileName,_ in st.session_state.results],
581
+ }
582
+ for category in voiceNames:
583
+ df7_dict[category] = []
584
+ for resultID,summary in enumerate(st.session_state.summaries):
585
+ partialDf = summary["df5"]
586
+ for i in range(len(voiceNames)):
587
+ df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
588
+ df7 = pd.DataFrame(df7_dict)
589
+ sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
590
+ summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",])
591
+ st.plotly_chart(summFig2, use_container_width=True)
592
+ sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
593
+ summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",])
594
+ st.plotly_chart(summFig3, use_container_width=True)
595
+ sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
596
+ summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",])
597
+ st.plotly_chart(summFig4, use_container_width=True)
598
+
599
+
600
+
601
+ old = '''userid = st.text_input("user id:", "Guest")
602
  colorPref = st.text_input("Favorite color?", "None")
603
  radio = st.radio('Pick one:', ['Left','Right'])
604
  selection = st.selectbox('Select', [1,2,3])
 
607
  file_paths,
608
  userid)
609
  st.success('I think it worked!')
610
+ '''
611
  @st.cache_data
612
  def convert_df(df):
613
  return df.to_csv(index=False).encode('utf-8')