czyoung commited on
Commit
702041e
·
verified ·
1 Parent(s): 9b477b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -232
app.py CHANGED
@@ -382,7 +382,7 @@ else:
382
  speakerNames = annotations.labels()
383
  st.session_state.unusedSpeakers[i] = speakerNames
384
  else:
385
- st.info(file_paths[i])
386
  speakerList, annotations, totalSeconds = processFile(file_paths[i])
387
  st.session_state.results[i] = (speakerList,annotations, totalSeconds)
388
  st.session_state.summaries[i] = {}
@@ -393,243 +393,244 @@ else:
393
 
394
  if currFile is None and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
395
  st.write("Select a file to view from the sidebar")
396
- try:
397
- st.session_state.resetResult = False
398
- currFileIndex = file_names.index(currFile)
399
- if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
400
- st.info("Displaying current file results")
401
- # Handle
402
- currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
403
- speakerNames = currAnnotation.labels()
404
-
405
- # Update other categories
406
- unusedSpeakers = st.session_state.unusedSpeakers[currFileIndex]
407
- categorySelections = st.session_state["categorySelect"][currFileIndex]
408
- for i,category in enumerate(st.session_state.categories):
409
- speakerSet = categorySelections[i]
410
- st.sidebar.multiselect(category,
411
- speakerSet+unusedSpeakers,
412
- default=speakerSet,
413
- key=f"multiselect_{category}",
414
- on_change=updateCategoryOptions,
415
- args=(currFileIndex,))
416
- st.sidebar.button(f"Remove {category}",key=f"remove_{category}",on_click=removeCategory,args=(i,))
417
- st.info("Displaying current file results1")
418
-
419
-
420
- newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
421
-
422
- df4_dict = {}
423
- nameList = st.session_state.categories
424
- extraNames = []
425
- valueList = [0 for i in range(len(nameList))]
426
- extraValues = []
427
-
428
- for i,speakerSet in enumerate(categorySelections):
429
- valueList[i] += su.sumTimes(currAnnotation.subset(speakerSet))
430
-
431
- for sp in unusedSpeakers:
432
- extraNames.append(sp)
433
- extraValues.append(su.sumTimes(currAnnotation.subset([sp])))
434
- st.info("Displaying current file results2")
435
-
436
- df4_dict = {
437
- "names": nameList+extraNames,
438
- "values": valueList+extraValues,
439
- }
440
- df4 = pd.DataFrame(data=df4_dict)
441
- df4.name = "df4"
442
- st.session_state.summaries[currFileIndex]["df4"] = df4
443
- st.info(st.session_state.summaries)
444
- df2 = st.session_state.summaries[currFileIndex]["df2"]
445
- df3 = st.session_state.summaries[currFileIndex]["df3"]
446
- df4 = st.session_state.summaries[currFileIndex]["df4"]
447
- df5 = st.session_state.summaries[currFileIndex]["df5"]
448
- speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
449
- currDF = speakers_dataFrame
450
- speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
451
-
452
- # generate plotting window
453
- fig1 = go.Figure()
454
- fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
455
- fig2 = go.Figure()
456
- fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
457
- fig3_1 = px.sunburst(df5,
458
- branchvalues = 'total',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  names = "labels",
460
- ids = "ids",
461
  parents = "parents",
 
462
  values = "percentiles",
463
  custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
464
- color = 'labels',
465
  )
466
- fig3_1.update_traces(
467
- hovertemplate="<br>".join([
468
- '<b>%{customdata[0]}</b>',
469
- 'Duration: %{customdata[1]}s',
470
- 'Percentage of Total: %{customdata[2]:.2f}%',
471
- 'Parent: %{customdata[3]}',
472
- 'Percentage of Parent: %{customdata[4]:.2f}%'
473
- ])
474
- )
475
- fig3 = px.treemap(df5,
476
- branchvalues = "total",
477
- names = "labels",
478
- parents = "parents",
479
- ids="ids",
480
- values = "percentiles",
481
- custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
482
- color='labels',
483
- )
484
- fig3.update_traces(
485
- hovertemplate="<br>".join([
486
- '<b>%{customdata[0]}</b>',
487
- 'Duration: %{customdata[1]}s',
488
- 'Percentage of Total: %{customdata[2]:.2f}%',
489
- 'Parent: %{customdata[3]}',
490
- 'Percentage of Parent: %{customdata[4]:.2f}%'
491
- ])
492
- )
493
- st.plotly_chart(fig1, use_container_width=True)
494
- st.plotly_chart(fig2, use_container_width=True)
495
- st.plotly_chart(fig3_1, use_container_width=True)
496
- st.plotly_chart(fig3, use_container_width=True)
497
- st.info("Displaying current file results3")
498
-
499
- fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource")
500
- fig_la.update_yaxes(autorange="reversed")
501
-
502
- hMax = int(currTotalTime//3600)
503
- mMax = int(currTotalTime%3600//60)
504
- sMax = int(currTotalTime%60)
505
- msMax = int(currTotalTime*1000000%1000000)
506
- timeMax = dt.time(hMax,mMax,sMax,msMax)
 
 
 
 
 
 
507
 
508
- fig_la.update_layout(
509
- xaxis_tickformatstops = [
510
- dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
511
- dict(dtickrange=[1000, None], value="%H:%M:%S")
512
- ],
513
- xaxis=dict(
514
- range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
515
- ),
516
- xaxis_title="Time",
517
- yaxis_title="Speaker",
518
- legend_title=None
519
- )
520
-
521
- st.plotly_chart(fig_la, use_container_width=True)
522
-
523
- fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
524
- custom_data=["names","values"])
525
- fig2_la.update_xaxes(ticksuffix="%")
526
- fig2_la.update_yaxes(autorange="reversed")
527
- fig2_la.update_layout(
528
- xaxis_title="Percentage Time Spoken",
529
- yaxis_title="Speaker",
530
- legend_title=None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
 
532
- )
533
- fig2_la.update_traces(
534
- hovertemplate="<br>".join([
535
- '<b>%{customdata[0]}</b>',
536
- 'Percentage of Time: %{customdata[1]:.2f}%'
537
- ])
538
- )
539
- st.plotly_chart(fig2_la, use_container_width=True)
540
- st.info("Displaying current file results4")
541
- except ValueError:
542
- pass
543
-
544
- if len(st.session_state.results) > 0:
545
- st.info("In full summary results")
546
- with st.spinner(text='Processing summary results...'):
547
- fileNames = st.session_state.file_names
548
- results = []
549
- indices = []
550
- for i, resultTuple in enumerate(st.session_state.results):
551
- if len(resultTuple) == 2:
552
- fileNames.append(resultTuple[0])
553
- results.append(resultTuple[1])
554
- indices.append(i)
555
- st.info("In full summary results1")
556
- if len(indices) > 1:
557
 
558
- df6_dict = {
559
- "files":fileNames,
560
- }
561
- allCategories = copy.deepcopy(st.session_state.categories)
562
- for i in indices:
563
- currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[i]
564
- categorySelections = st.session_state["categorySelect"][i]
565
- catSummary,extraCats = calcCategories(currAnnotation,categorySelections)
566
- st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
567
- for extra in extraCats:
568
- df6_dict[extra] = []
569
- if extra not in allCategories:
570
- allCategories.append(extra)
571
- st.info("In full summary results2")
572
-
573
- for category in st.session_state.categories:
574
- df6_dict[category] = []
575
- for i in indices:
576
- summary, extras = st.session_state.summaries[i]["categories"]
577
- theseCategories = st.session_state.categories + extras
578
- for j, timeSlots in enumerate(summary):
579
- df6_dict[theseCategories[j]].append(sumTimes([t for _,t in timeSlots])/st.session_state.results[i][2])
580
- for category in allCategories:
581
- if category not in theseCategories:
582
- df6_dict[category].append(0)
583
- df6 = pd.DataFrame(df6_dict)
584
- summFig = px.bar(df6, x="files", y=allCategories)
585
- st.plotly_chart(summFig, use_container_width=True)
586
- st.info("In full summary results3")
587
-
588
- voiceNames = ["No Voice","One Voice","Multi Voice"]
589
- df7_dict = {
590
- "files":fileNames,
591
- }
592
- for category in voiceNames:
593
- df7_dict[category] = []
594
- for resultID,summary in enumerate(st.session_state.summaries):
595
- partialDf = summary["df5"]
596
- for i in range(len(voiceNames)):
597
- df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
598
- df7 = pd.DataFrame(df7_dict)
599
- sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
600
- summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",])
601
- st.plotly_chart(summFig2, use_container_width=True)
602
- sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
603
- summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",])
604
- st.plotly_chart(summFig3, use_container_width=True)
605
- sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
606
- summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",])
607
- st.plotly_chart(summFig4, use_container_width=True)
608
-
609
-
610
-
611
- old = '''userid = st.text_input("user id:", "Guest")
612
- colorPref = st.text_input("Favorite color?", "None")
613
- radio = st.radio('Pick one:', ['Left','Right'])
614
- selection = st.selectbox('Select', [1,2,3])
615
- if st.button("Upload Files to Dataset"):
616
- save_data({"color":colorPref,"direction":radio,"number":selection},
617
- file_paths,
618
- userid)
619
- st.success('I think it worked!')
620
- '''
621
- @st.cache_data
622
- def convert_df(df):
623
- return df.to_csv(index=False).encode('utf-8')
624
-
625
-
626
- if currDF is not None:
627
- csv = convert_df(currDF)
628
 
629
- st.download_button(
630
- "Press to Download analysis data",
631
- csv,
632
- "file.csv",
633
- "text/csv",
634
- key='download-csv'
635
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  speakerNames = annotations.labels()
383
  st.session_state.unusedSpeakers[i] = speakerNames
384
  else:
385
+ #st.info(file_paths[i])
386
  speakerList, annotations, totalSeconds = processFile(file_paths[i])
387
  st.session_state.results[i] = (speakerList,annotations, totalSeconds)
388
  st.session_state.summaries[i] = {}
 
393
 
394
  if currFile is None and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
395
  st.write("Select a file to view from the sidebar")
396
+ else:
397
+ try:
398
+ st.session_state.resetResult = False
399
+ currFileIndex = file_names.index(currFile)
400
+ if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
401
+
402
+ # Handle
403
+ currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
404
+ speakerNames = currAnnotation.labels()
405
+
406
+ # Update other categories
407
+ unusedSpeakers = st.session_state.unusedSpeakers[currFileIndex]
408
+ categorySelections = st.session_state["categorySelect"][currFileIndex]
409
+ for i,category in enumerate(st.session_state.categories):
410
+ speakerSet = categorySelections[i]
411
+ st.sidebar.multiselect(category,
412
+ speakerSet+unusedSpeakers,
413
+ default=speakerSet,
414
+ key=f"multiselect_{category}",
415
+ on_change=updateCategoryOptions,
416
+ args=(currFileIndex,))
417
+ st.sidebar.button(f"Remove {category}",key=f"remove_{category}",on_click=removeCategory,args=(i,))
418
+
419
+
420
+
421
+ newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
422
+
423
+ df4_dict = {}
424
+ nameList = st.session_state.categories
425
+ extraNames = []
426
+ valueList = [0 for i in range(len(nameList))]
427
+ extraValues = []
428
+
429
+ for i,speakerSet in enumerate(categorySelections):
430
+ valueList[i] += su.sumTimes(currAnnotation.subset(speakerSet))
431
+
432
+ for sp in unusedSpeakers:
433
+ extraNames.append(sp)
434
+ extraValues.append(su.sumTimes(currAnnotation.subset([sp])))
435
+
436
+
437
+ df4_dict = {
438
+ "names": nameList+extraNames,
439
+ "values": valueList+extraValues,
440
+ }
441
+ df4 = pd.DataFrame(data=df4_dict)
442
+ df4.name = "df4"
443
+ st.session_state.summaries[currFileIndex]["df4"] = df4
444
+ st.info(st.session_state.summaries)
445
+ df2 = st.session_state.summaries[currFileIndex]["df2"]
446
+ df3 = st.session_state.summaries[currFileIndex]["df3"]
447
+ df4 = st.session_state.summaries[currFileIndex]["df4"]
448
+ df5 = st.session_state.summaries[currFileIndex]["df5"]
449
+ speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
450
+ currDF = speakers_dataFrame
451
+ speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
452
+
453
+ # generate plotting window
454
+ fig1 = go.Figure()
455
+ fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
456
+ fig2 = go.Figure()
457
+ fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
458
+ fig3_1 = px.sunburst(df5,
459
+ branchvalues = 'total',
460
+ names = "labels",
461
+ ids = "ids",
462
+ parents = "parents",
463
+ values = "percentiles",
464
+ custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
465
+ color = 'labels',
466
+ )
467
+ fig3_1.update_traces(
468
+ hovertemplate="<br>".join([
469
+ '<b>%{customdata[0]}</b>',
470
+ 'Duration: %{customdata[1]}s',
471
+ 'Percentage of Total: %{customdata[2]:.2f}%',
472
+ 'Parent: %{customdata[3]}',
473
+ 'Percentage of Parent: %{customdata[4]:.2f}%'
474
+ ])
475
+ )
476
+ fig3 = px.treemap(df5,
477
+ branchvalues = "total",
478
  names = "labels",
 
479
  parents = "parents",
480
+ ids="ids",
481
  values = "percentiles",
482
  custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
483
+ color='labels',
484
  )
485
+ fig3.update_traces(
486
+ hovertemplate="<br>".join([
487
+ '<b>%{customdata[0]}</b>',
488
+ 'Duration: %{customdata[1]}s',
489
+ 'Percentage of Total: %{customdata[2]:.2f}%',
490
+ 'Parent: %{customdata[3]}',
491
+ 'Percentage of Parent: %{customdata[4]:.2f}%'
492
+ ])
493
+ )
494
+ st.plotly_chart(fig1, use_container_width=True)
495
+ st.plotly_chart(fig2, use_container_width=True)
496
+ st.plotly_chart(fig3_1, use_container_width=True)
497
+ st.plotly_chart(fig3, use_container_width=True)
498
+
499
+
500
+ fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource")
501
+ fig_la.update_yaxes(autorange="reversed")
502
+
503
+ hMax = int(currTotalTime//3600)
504
+ mMax = int(currTotalTime%3600//60)
505
+ sMax = int(currTotalTime%60)
506
+ msMax = int(currTotalTime*1000000%1000000)
507
+ timeMax = dt.time(hMax,mMax,sMax,msMax)
508
+
509
+ fig_la.update_layout(
510
+ xaxis_tickformatstops = [
511
+ dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
512
+ dict(dtickrange=[1000, None], value="%H:%M:%S")
513
+ ],
514
+ xaxis=dict(
515
+ range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
516
+ ),
517
+ xaxis_title="Time",
518
+ yaxis_title="Speaker",
519
+ legend_title=None
520
+ )
521
+
522
+ st.plotly_chart(fig_la, use_container_width=True)
523
+
524
+ fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
525
+ custom_data=["names","values"])
526
+ fig2_la.update_xaxes(ticksuffix="%")
527
+ fig2_la.update_yaxes(autorange="reversed")
528
+ fig2_la.update_layout(
529
+ xaxis_title="Percentage Time Spoken",
530
+ yaxis_title="Speaker",
531
+ legend_title=None
532
 
533
+ )
534
+ fig2_la.update_traces(
535
+ hovertemplate="<br>".join([
536
+ '<b>%{customdata[0]}</b>',
537
+ 'Percentage of Time: %{customdata[1]:.2f}%'
538
+ ])
539
+ )
540
+ st.plotly_chart(fig2_la, use_container_width=True)
541
+
542
+ except ValueError:
543
+ pass
544
+
545
+ if len(st.session_state.results) > 0:
546
+ st.info("In full summary results")
547
+ with st.spinner(text='Processing summary results...'):
548
+ fileNames = st.session_state.file_names
549
+ results = []
550
+ indices = []
551
+ for i, resultTuple in enumerate(st.session_state.results):
552
+ if len(resultTuple) == 2:
553
+ fileNames.append(resultTuple[0])
554
+ results.append(resultTuple[1])
555
+ indices.append(i)
556
+
557
+ if len(indices) > 1:
558
+
559
+ df6_dict = {
560
+ "files":fileNames,
561
+ }
562
+ allCategories = copy.deepcopy(st.session_state.categories)
563
+ for i in indices:
564
+ currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[i]
565
+ categorySelections = st.session_state["categorySelect"][i]
566
+ catSummary,extraCats = calcCategories(currAnnotation,categorySelections)
567
+ st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
568
+ for extra in extraCats:
569
+ df6_dict[extra] = []
570
+ if extra not in allCategories:
571
+ allCategories.append(extra)
572
+
573
 
574
+ for category in st.session_state.categories:
575
+ df6_dict[category] = []
576
+ for i in indices:
577
+ summary, extras = st.session_state.summaries[i]["categories"]
578
+ theseCategories = st.session_state.categories + extras
579
+ for j, timeSlots in enumerate(summary):
580
+ df6_dict[theseCategories[j]].append(sumTimes([t for _,t in timeSlots])/st.session_state.results[i][2])
581
+ for category in allCategories:
582
+ if category not in theseCategories:
583
+ df6_dict[category].append(0)
584
+ df6 = pd.DataFrame(df6_dict)
585
+ summFig = px.bar(df6, x="files", y=allCategories)
586
+ st.plotly_chart(summFig, use_container_width=True)
587
+
 
 
 
 
 
 
 
 
 
 
 
588
 
589
+ voiceNames = ["No Voice","One Voice","Multi Voice"]
590
+ df7_dict = {
591
+ "files":fileNames,
592
+ }
593
+ for category in voiceNames:
594
+ df7_dict[category] = []
595
+ for resultID,summary in enumerate(st.session_state.summaries):
596
+ partialDf = summary["df5"]
597
+ for i in range(len(voiceNames)):
598
+ df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
599
+ df7 = pd.DataFrame(df7_dict)
600
+ sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
601
+ summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",])
602
+ st.plotly_chart(summFig2, use_container_width=True)
603
+ sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
604
+ summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",])
605
+ st.plotly_chart(summFig3, use_container_width=True)
606
+ sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
607
+ summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",])
608
+ st.plotly_chart(summFig4, use_container_width=True)
609
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
 
611
+
612
+ old = '''userid = st.text_input("user id:", "Guest")
613
+ colorPref = st.text_input("Favorite color?", "None")
614
+ radio = st.radio('Pick one:', ['Left','Right'])
615
+ selection = st.selectbox('Select', [1,2,3])
616
+ if st.button("Upload Files to Dataset"):
617
+ save_data({"color":colorPref,"direction":radio,"number":selection},
618
+ file_paths,
619
+ userid)
620
+ st.success('I think it worked!')
621
+ '''
622
+ @st.cache_data
623
+ def convert_df(df):
624
+ return df.to_csv(index=False).encode('utf-8')
625
+
626
+
627
+ if currDF is not None:
628
+ csv = convert_df(currDF)
629
+
630
+ st.download_button(
631
+ "Press to Download analysis data",
632
+ csv,
633
+ "file.csv",
634
+ "text/csv",
635
+ key='download-csv'
636
+ )