czyoung commited on
Commit
e0cfb45
·
verified ·
1 Parent(s): e6605eb

Slight visual improvement, reducing graph count on screen

Browse files
Files changed (1) hide show
  1. app.py +178 -160
app.py CHANGED
@@ -302,6 +302,8 @@ if 'unusedSpeakers' not in st.session_state:
302
  st.session_state.unusedSpeakers = []
303
  if 'file_names' not in st.session_state:
304
  st.session_state.file_names = []
 
 
305
 
306
 
307
 
@@ -315,6 +317,7 @@ if not isGPU:
315
  uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
316
 
317
  supported_file_types = ('.wav','.mp3','.mp4','.txt','.rttm')
 
318
 
319
  valid_files = []
320
  file_paths = []
@@ -416,14 +419,17 @@ else:
416
  print(f"Finished analyzing {file_paths[i]}")
417
  print(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
418
  st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
419
-
 
420
  currFile = st.sidebar.selectbox('Current File', file_names,on_change=updateMultiSelect,key="select_currFile")
 
421
  if currFile is None and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
422
  st.write("Select a file to view from the sidebar")
423
  try:
424
  st.session_state.resetResult = False
425
  currFileIndex = file_names.index(currFile)
426
  if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
 
427
  # Handle
428
  currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
429
  speakerNames = currAnnotation.labels()
@@ -466,175 +472,187 @@ try:
466
  df4.name = "df4"
467
  st.session_state.summaries[currFileIndex]["df4"] = df4
468
 
469
- df2 = st.session_state.summaries[currFileIndex]["df2"]
470
- df3 = st.session_state.summaries[currFileIndex]["df3"]
471
- df4 = st.session_state.summaries[currFileIndex]["df4"]
472
- df5 = st.session_state.summaries[currFileIndex]["df5"]
473
- speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
474
- currDF = speakers_dataFrame
475
- speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
476
-
477
- # generate plotting window
478
- fig1 = go.Figure()
479
- fig1.update_layout(
480
- title_text="Percentage of each Voice Category",
481
- )
482
- fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
483
- fig2 = go.Figure()
484
- fig2.update_layout(
485
- title_text="Percentage of Speakers and Custom Categories",
486
- )
487
- fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
488
- fig3_1 = px.sunburst(df5,
489
- branchvalues = 'total',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  names = "labels",
491
- ids = "ids",
492
  parents = "parents",
 
493
  values = "percentiles",
494
  custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
495
- color = 'labels',
496
- title="Percentage of each Voice Category with Speakers",
497
- )
498
- fig3_1.update_traces(
499
- hovertemplate="<br>".join([
500
- '<b>%{customdata[0]}</b>',
501
- 'Duration: %{customdata[1]}s',
502
- 'Percentage of Total: %{customdata[2]:.2f}%',
503
- 'Parent: %{customdata[3]}',
504
- 'Percentage of Parent: %{customdata[4]:.2f}%'
505
- ])
506
- )
507
- fig3 = px.treemap(df5,
508
- branchvalues = "total",
509
- names = "labels",
510
- parents = "parents",
511
- ids="ids",
512
- values = "percentiles",
513
- custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
514
- color='labels',
515
- title="Division of Speakers in each Voice Category",
516
- )
517
- fig3.update_traces(
518
- hovertemplate="<br>".join([
519
- '<b>%{customdata[0]}</b>',
520
- 'Duration: %{customdata[1]}s',
521
- 'Percentage of Total: %{customdata[2]:.2f}%',
522
- 'Parent: %{customdata[3]}',
523
- 'Percentage of Parent: %{customdata[4]:.2f}%'
524
- ])
525
- )
526
- st.plotly_chart(fig1, use_container_width=True)
527
- st.plotly_chart(fig2, use_container_width=True)
528
- st.plotly_chart(fig3_1, use_container_width=True)
529
- st.plotly_chart(fig3, use_container_width=True)
530
-
531
-
532
- fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers")
533
- fig_la.update_yaxes(autorange="reversed")
534
-
535
- hMax = int(currTotalTime//3600)
536
- mMax = int(currTotalTime%3600//60)
537
- sMax = int(currTotalTime%60)
538
- msMax = int(currTotalTime*1000000%1000000)
539
- timeMax = dt.time(hMax,mMax,sMax,msMax)
540
-
541
- fig_la.update_layout(
542
- xaxis_tickformatstops = [
543
- dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
544
- dict(dtickrange=[1000, None], value="%H:%M:%S")
545
- ],
546
- xaxis=dict(
547
- range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
548
- ),
549
- xaxis_title="Time",
550
- yaxis_title="Speaker",
551
- legend_title=None
552
- )
553
-
554
- st.plotly_chart(fig_la, use_container_width=True)
555
-
556
- fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
557
- custom_data=["names","values"],title="Time Spoken by each Speaker")
558
- fig2_la.update_xaxes(ticksuffix="%")
559
- fig2_la.update_yaxes(autorange="reversed")
560
- fig2_la.update_layout(
561
- xaxis_title="Percentage Time Spoken",
562
- yaxis_title="Speaker",
563
- legend_title=None
564
 
565
- )
566
- fig2_la.update_traces(
567
- hovertemplate="<br>".join([
568
- '<b>%{customdata[0]}</b>',
569
- 'Percentage of Time: %{customdata[1]:.2f}%'
570
- ])
571
- )
572
- st.plotly_chart(fig2_la, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
 
574
  except ValueError:
575
  pass
576
-
577
  if len(st.session_state.results) > 0:
578
- with st.spinner(text='Processing summary results...'):
579
- fileNames = st.session_state.file_names
580
- results = []
581
- indices = []
582
- for i, resultTuple in enumerate(st.session_state.results):
583
- if len(resultTuple) == 2:
584
- results.append(resultTuple)
585
- indices.append(i)
586
- if len(indices) > 1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588
- df6_dict = {
589
- "files":fileNames,
590
- }
591
- allCategories = copy.deepcopy(st.session_state.categories)
592
- for i in indices:
593
- currAnnotation, currTotalTime = st.session_state.results[i]
594
- categorySelections = st.session_state["categorySelect"][i]
595
- catSummary,extraCats = su.calcCategories(currAnnotation,categorySelections)
596
- st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
597
- for extra in extraCats:
598
- df6_dict[extra] = []
599
- if extra not in allCategories:
600
- allCategories.append(extra)
601
-
602
-
603
- for category in st.session_state.categories:
604
- df6_dict[category] = []
605
- for i in indices:
606
- summary, extras = st.session_state.summaries[i]["categories"]
607
- theseCategories = st.session_state.categories + extras
608
- for j, timeSlots in enumerate(summary):
609
- df6_dict[theseCategories[j]].append(sum([t.duration for _,t in timeSlots])/st.session_state.results[i][1])
610
- for category in allCategories:
611
- if category not in theseCategories:
612
- df6_dict[category].append(0)
613
- df6 = pd.DataFrame(df6_dict)
614
- summFig = px.bar(df6, x="files", y=allCategories,title="Time Spoken by Each Speaker in Each File")
615
- st.plotly_chart(summFig, use_container_width=True)
616
-
617
-
618
- voiceNames = ["No Voice","One Voice","Multi Voice"]
619
- df7_dict = {
620
- "files":fileNames,
621
- }
622
- for category in voiceNames:
623
- df7_dict[category] = []
624
- for resultID,summary in enumerate(st.session_state.summaries):
625
- partialDf = summary["df5"]
626
- for i in range(len(voiceNames)):
627
- df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
628
- df7 = pd.DataFrame(df7_dict)
629
- sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
630
- summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for One Voice")
631
- st.plotly_chart(summFig2, use_container_width=True)
632
- sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
633
- summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Multi Voice")
634
- st.plotly_chart(summFig3, use_container_width=True)
635
- sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
636
- summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Any Voice")
637
- st.plotly_chart(summFig4, use_container_width=True)
638
 
639
 
640
 
 
302
  st.session_state.unusedSpeakers = []
303
  if 'file_names' not in st.session_state:
304
  st.session_state.file_names = []
305
+ if 'showSummary' not in st.session_state:
306
+ st.session_state.showSummary = 'No'
307
 
308
 
309
 
 
317
  uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
318
 
319
  supported_file_types = ('.wav','.mp3','.mp4','.txt','.rttm')
320
+ viewChoices = ["Voice Categories","Custom Categories","Detailed Voice Categories","Voice Category Treemap","Speaker Timeline","Time per Speaker"]
321
 
322
  valid_files = []
323
  file_paths = []
 
419
  print(f"Finished analyzing {file_paths[i]}")
420
  print(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
421
  st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
422
+
423
+ summaryRadio = st.sidebar.empty()
424
  currFile = st.sidebar.selectbox('Current File', file_names,on_change=updateMultiSelect,key="select_currFile")
425
+ viewSelection = st.sidebar.selectbox('View', viewChoices)
426
  if currFile is None and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
427
  st.write("Select a file to view from the sidebar")
428
  try:
429
  st.session_state.resetResult = False
430
  currFileIndex = file_names.index(currFile)
431
  if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
432
+ st.header(f"Analysis of file {currFile}")
433
  # Handle
434
  currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
435
  speakerNames = currAnnotation.labels()
 
472
  df4.name = "df4"
473
  st.session_state.summaries[currFileIndex]["df4"] = df4
474
 
475
+ viewSelection = [viewSelection]
476
+
477
+ if viewChoices[0] in viewSelection:
478
+ df3 = st.session_state.summaries[currFileIndex]["df3"]
479
+ fig1 = go.Figure()
480
+ fig1.update_layout(
481
+ title_text="Percentage of each Voice Category",
482
+ )
483
+ fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
484
+ st.plotly_chart(fig1, use_container_width=True)
485
+ if viewChoices[1] in viewSelection:
486
+ df4 = st.session_state.summaries[currFileIndex]["df4"]
487
+ fig2 = go.Figure()
488
+ fig2.update_layout(
489
+ title_text="Percentage of Speakers and Custom Categories",
490
+ )
491
+ fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
492
+ st.plotly_chart(fig2, use_container_width=True)
493
+ if viewChoices[2] in viewSelection:
494
+ df5 = st.session_state.summaries[currFileIndex]["df5"]
495
+ fig3_1 = px.sunburst(df5,
496
+ branchvalues = 'total',
497
+ names = "labels",
498
+ ids = "ids",
499
+ parents = "parents",
500
+ values = "percentiles",
501
+ custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
502
+ color = 'labels',
503
+ title="Percentage of each Voice Category with Speakers",
504
+ )
505
+ fig3_1.update_traces(
506
+ hovertemplate="<br>".join([
507
+ '<b>%{customdata[0]}</b>',
508
+ 'Duration: %{customdata[1]}s',
509
+ 'Percentage of Total: %{customdata[2]:.2f}%',
510
+ 'Parent: %{customdata[3]}',
511
+ 'Percentage of Parent: %{customdata[4]:.2f}%'
512
+ ])
513
+ )
514
+ st.plotly_chart(fig3_1, use_container_width=True)
515
+ if viewChoices[3] in viewSelection:
516
+ df5 = st.session_state.summaries[currFileIndex]["df5"]
517
+ fig3 = px.treemap(df5,
518
+ branchvalues = "total",
519
  names = "labels",
 
520
  parents = "parents",
521
+ ids="ids",
522
  values = "percentiles",
523
  custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
524
+ color='labels',
525
+ title="Division of Speakers in each Voice Category",
526
+ )
527
+ fig3.update_traces(
528
+ hovertemplate="<br>".join([
529
+ '<b>%{customdata[0]}</b>',
530
+ 'Duration: %{customdata[1]}s',
531
+ 'Percentage of Total: %{customdata[2]:.2f}%',
532
+ 'Parent: %{customdata[3]}',
533
+ 'Percentage of Parent: %{customdata[4]:.2f}%'
534
+ ])
535
+ )
536
+ st.plotly_chart(fig3, use_container_width=True)
537
+ if viewChoices[4] in viewSelection:
538
+ speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
539
+ currDF = speakers_dataFrame
540
+ speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
541
+
542
+ # generate plotting window
543
+
544
+
545
+ fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers")
546
+ fig_la.update_yaxes(autorange="reversed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
 
548
+ hMax = int(currTotalTime//3600)
549
+ mMax = int(currTotalTime%3600//60)
550
+ sMax = int(currTotalTime%60)
551
+ msMax = int(currTotalTime*1000000%1000000)
552
+ timeMax = dt.time(hMax,mMax,sMax,msMax)
553
+
554
+ fig_la.update_layout(
555
+ xaxis_tickformatstops = [
556
+ dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
557
+ dict(dtickrange=[1000, None], value="%H:%M:%S")
558
+ ],
559
+ xaxis=dict(
560
+ range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
561
+ ),
562
+ xaxis_title="Time",
563
+ yaxis_title="Speaker",
564
+ legend_title=None
565
+ )
566
+
567
+ st.plotly_chart(fig_la, use_container_width=True)
568
+ if viewChoices[5] in viewSelection:
569
+ df2 = st.session_state.summaries[currFileIndex]["df2"]
570
+ fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
571
+ custom_data=["names","values"],title="Time Spoken by each Speaker")
572
+ fig2_la.update_xaxes(ticksuffix="%")
573
+ fig2_la.update_yaxes(autorange="reversed")
574
+ fig2_la.update_layout(
575
+ xaxis_title="Percentage Time Spoken",
576
+ yaxis_title="Speaker",
577
+ legend_title=None
578
+
579
+ )
580
+ fig2_la.update_traces(
581
+ hovertemplate="<br>".join([
582
+ '<b>%{customdata[0]}</b>',
583
+ 'Percentage of Time: %{customdata[1]:.2f}%'
584
+ ])
585
+ )
586
+ st.plotly_chart(fig2_la, use_container_width=True)
587
 
588
  except ValueError:
589
  pass
590
+
591
  if len(st.session_state.results) > 0:
592
+ st.session_state.showSummary = st.radio('Display Multi-file Summary?',['Yes','No'])
593
+
594
+ if st.session_state.showSummary == 'Yes':
595
+ st.header("Multi-file Summary Data")
596
+ with st.spinner(text='Processing summary results...'):
597
+ fileNames = st.session_state.file_names
598
+ results = []
599
+ indices = []
600
+ for i, resultTuple in enumerate(st.session_state.results):
601
+ if len(resultTuple) == 2:
602
+ results.append(resultTuple)
603
+ indices.append(i)
604
+ if len(indices) > 1:
605
+
606
+ df6_dict = {
607
+ "files":fileNames,
608
+ }
609
+ allCategories = copy.deepcopy(st.session_state.categories)
610
+ for i in indices:
611
+ currAnnotation, currTotalTime = st.session_state.results[i]
612
+ categorySelections = st.session_state["categorySelect"][i]
613
+ catSummary,extraCats = su.calcCategories(currAnnotation,categorySelections)
614
+ st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
615
+ for extra in extraCats:
616
+ df6_dict[extra] = []
617
+ if extra not in allCategories:
618
+ allCategories.append(extra)
619
+
620
 
621
+ for category in st.session_state.categories:
622
+ df6_dict[category] = []
623
+ for i in indices:
624
+ summary, extras = st.session_state.summaries[i]["categories"]
625
+ theseCategories = st.session_state.categories + extras
626
+ for j, timeSlots in enumerate(summary):
627
+ df6_dict[theseCategories[j]].append(sum([t.duration for _,t in timeSlots])/st.session_state.results[i][1])
628
+ for category in allCategories:
629
+ if category not in theseCategories:
630
+ df6_dict[category].append(0)
631
+ df6 = pd.DataFrame(df6_dict)
632
+ summFig = px.bar(df6, x="files", y=allCategories,title="Time Spoken by Each Speaker in Each File")
633
+ st.plotly_chart(summFig, use_container_width=True)
634
+
635
+
636
+ voiceNames = ["No Voice","One Voice","Multi Voice"]
637
+ df7_dict = {
638
+ "files":fileNames,
639
+ }
640
+ for category in voiceNames:
641
+ df7_dict[category] = []
642
+ for resultID,summary in enumerate(st.session_state.summaries):
643
+ partialDf = summary["df5"]
644
+ for i in range(len(voiceNames)):
645
+ df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
646
+ df7 = pd.DataFrame(df7_dict)
647
+ sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
648
+ summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for One Voice")
649
+ st.plotly_chart(summFig2, use_container_width=True)
650
+ sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
651
+ summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Multi Voice")
652
+ st.plotly_chart(summFig3, use_container_width=True)
653
+ sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
654
+ summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Any Voice")
655
+ st.plotly_chart(summFig4, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
 
657
 
658