duongthienz commited on
Commit
f0a71c7
·
verified ·
1 Parent(s): da5ffe1

Update app.py

Browse files

add speaker name change

Files changed (1) hide show
  1. app.py +55 -7
app.py CHANGED
@@ -39,7 +39,22 @@ def printV(message,verbosityLevel):
39
  global verbosity
40
  if verbosity>=verbosityLevel:
41
  print(message)
 
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
43
  @st.cache_data
44
  def convert_df(df):
45
  return df.to_csv(index=False).encode('utf-8')
@@ -315,6 +330,8 @@ pipeline.to(device)#torch.device("cuda"))
315
  # Long-range usage
316
  if 'results' not in st.session_state:
317
  st.session_state.results = []
 
 
318
  if 'summaries' not in st.session_state:
319
  st.session_state.summaries = []
320
  if 'categories' not in st.session_state:
@@ -379,6 +396,8 @@ if uploaded_file_paths is not None:
379
  file_names = [f.name for f in valid_files]
380
  while (len(st.session_state.results) < len(valid_files)):
381
  st.session_state.results.append([])
 
 
382
  while (len(st.session_state.summaries) < len(valid_files)):
383
  st.session_state.summaries.append([])
384
  while (len(st.session_state.unusedSpeakers) < len(valid_files)):
@@ -494,6 +513,8 @@ if st.sidebar.button("Load Demo Example"):
494
  st.session_state.categorySelect.append(tempCategories)
495
  while (len(st.session_state.summaries) < len(valid_files)):
496
  st.session_state.summaries.append([])
 
 
497
 
498
  with st.spinner(text=f'Loading Demo Sample'):
499
  # RTTM load as filler
@@ -505,6 +526,7 @@ if st.sidebar.button("Load Demo Example"):
505
  totalSeconds = segment.end
506
  st.session_state.results = [(annotations, totalSeconds)]
507
  st.session_state.summaries = [{}]
 
508
  speakerNames = annotations.labels()
509
  st.session_state.unusedSpeakers = [speakerNames]
510
  with st.spinner(text=f'Analyzing Demo Data'):
@@ -554,6 +576,23 @@ try:
554
 
555
  newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
556
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  catTypeColors = su.colorsCSS(3)
558
  allColors = su.colorsCSS(len(speakerNames)+len(st.session_state.categories))
559
  speakerColors = allColors[:len(speakerNames)]
@@ -582,7 +621,8 @@ try:
582
  st.session_state.summaries[currFileIndex]["df4"] = df4
583
 
584
  with dataTab:
585
- csv = convert_df(currDF)
 
586
 
587
  st.download_button(
588
  "Press to Download analysis data",
@@ -592,7 +632,7 @@ try:
592
  key='download-csv',
593
  on_click="ignore",
594
  )
595
- st.dataframe(currDF)
596
  with pie1:
597
  printV("In Pie1",4)
598
  df3 = st.session_state.summaries[currFileIndex]["df3"]
@@ -639,13 +679,14 @@ try:
639
  printV("Pie1 post plotly",4)
640
 
641
  with pie2:
642
- df4 = st.session_state.summaries[currFileIndex]["df4"]
643
 
644
  # Some speakers may be missing, so fix colors
645
  figColors = []
646
  for n in df4["names"]:
647
  if n in speakerNames:
648
  figColors.append(speakerColors[speakerNames.index(n)])
 
649
  fig2 = go.Figure()
650
  fig2.update_layout(
651
  title_text="Percentage of Speakers and Custom Categories",
@@ -681,7 +722,9 @@ try:
681
  st.plotly_chart(fig2, use_container_width=True,config=config)
682
 
683
  with sunburst1:
684
- df5 = st.session_state.summaries[currFileIndex]["df5"]
 
 
685
  fig3_1 = px.sunburst(df5,
686
  branchvalues = 'total',
687
  names = "labels",
@@ -733,7 +776,9 @@ try:
733
  st.plotly_chart(fig3_1, use_container_width=True,config=config)
734
 
735
  with treemap1:
736
- df5 = st.session_state.summaries[currFileIndex]["df5"]
 
 
737
  fig3 = px.treemap(df5,
738
  branchvalues = "total",
739
  names = "labels",
@@ -788,7 +833,9 @@ try:
788
 
789
 
790
  with timeline:
791
- fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers",
 
 
792
  color_discrete_sequence=speakerColors)
793
  fig_la.update_yaxes(autorange="reversed")
794
 
@@ -841,7 +888,8 @@ try:
841
  st.plotly_chart(fig_la, use_container_width=True,config=config)
842
 
843
  with bar1:
844
- df2 = st.session_state.summaries[currFileIndex]["df2"]
 
845
  fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
846
  custom_data=["names","values"],title="Time Spoken by each Speaker",
847
  color_discrete_sequence=catColors+speakerColors)
 
39
  global verbosity
40
  if verbosity>=verbosityLevel:
41
  print(message)
42
+
43
+ def get_display_name(speaker, fileIndex):
44
+ """Return the user-assigned display name for a speaker, or the original label."""
45
+ renames = st.session_state.speakerRenames
46
+ if fileIndex < len(renames) and speaker in renames[fileIndex]:
47
+ return renames[fileIndex][speaker]
48
+ return speaker
49
 
50
+ def apply_speaker_renames_to_df(df, fileIndex, column="task"):
51
+ """Replace speaker_## labels in a DataFrame column with display names."""
52
+ if column not in df.columns:
53
+ return df
54
+ df = df.copy()
55
+ df[column] = df[column].apply(lambda s: get_display_name(s, fileIndex))
56
+ return df
57
+
58
  @st.cache_data
59
  def convert_df(df):
60
  return df.to_csv(index=False).encode('utf-8')
 
330
  # Long-range usage
331
  if 'results' not in st.session_state:
332
  st.session_state.results = []
333
+ if 'speakerRenames' not in st.session_state:
334
+ st.session_state.speakerRenames = []
335
  if 'summaries' not in st.session_state:
336
  st.session_state.summaries = []
337
  if 'categories' not in st.session_state:
 
396
  file_names = [f.name for f in valid_files]
397
  while (len(st.session_state.results) < len(valid_files)):
398
  st.session_state.results.append([])
399
+ while (len(st.session_state.speakerRenames) < len(valid_files)):
400
+ st.session_state.speakerRenames.append({})
401
  while (len(st.session_state.summaries) < len(valid_files)):
402
  st.session_state.summaries.append([])
403
  while (len(st.session_state.unusedSpeakers) < len(valid_files)):
 
513
  st.session_state.categorySelect.append(tempCategories)
514
  while (len(st.session_state.summaries) < len(valid_files)):
515
  st.session_state.summaries.append([])
516
+ while (len(st.session_state.speakerRenames) < len(valid_files)):
517
+ st.session_state.speakerRenames.append({})
518
 
519
  with st.spinner(text=f'Loading Demo Sample'):
520
  # RTTM load as filler
 
526
  totalSeconds = segment.end
527
  st.session_state.results = [(annotations, totalSeconds)]
528
  st.session_state.summaries = [{}]
529
+ st.session_state.speakerRenames = [{}]
530
  speakerNames = annotations.labels()
531
  st.session_state.unusedSpeakers = [speakerNames]
532
  with st.spinner(text=f'Analyzing Demo Data'):
 
576
 
577
  newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
578
 
579
+ st.sidebar.divider()
580
+ st.sidebar.subheader("Rename Speakers")
581
+ st.sidebar.caption("Replace SPEAKER_## labels with real names.")
582
+ current_renames = st.session_state.speakerRenames[currFileIndex]
583
+ for sp in speakerNames:
584
+ current_label = current_renames.get(sp, "")
585
+ new_name = st.sidebar.text_input(
586
+ f"{sp}",
587
+ value=current_label,
588
+ placeholder=f"e.g. John",
589
+ key=f"rename_{currFileIndex}_{sp}"
590
+ )
591
+ if new_name.strip():
592
+ st.session_state.speakerRenames[currFileIndex][sp] = new_name.strip()
593
+ elif sp in st.session_state.speakerRenames[currFileIndex]:
594
+ del st.session_state.speakerRenames[currFileIndex][sp]
595
+
596
  catTypeColors = su.colorsCSS(3)
597
  allColors = su.colorsCSS(len(speakerNames)+len(st.session_state.categories))
598
  speakerColors = allColors[:len(speakerNames)]
 
621
  st.session_state.summaries[currFileIndex]["df4"] = df4
622
 
623
  with dataTab:
624
+ displayDF = apply_speaker_renames_to_df(currDF, currFileIndex, column="task")
625
+ csv = convert_df(displayDF)
626
 
627
  st.download_button(
628
  "Press to Download analysis data",
 
632
  key='download-csv',
633
  on_click="ignore",
634
  )
635
+ st.dataframe(displayDF)
636
  with pie1:
637
  printV("In Pie1",4)
638
  df3 = st.session_state.summaries[currFileIndex]["df3"]
 
679
  printV("Pie1 post plotly",4)
680
 
681
  with pie2:
682
+ df4 = st.session_state.summaries[currFileIndex]["df4"].copy()
683
 
684
  # Some speakers may be missing, so fix colors
685
  figColors = []
686
  for n in df4["names"]:
687
  if n in speakerNames:
688
  figColors.append(speakerColors[speakerNames.index(n)])
689
+ df4["names"] = df4["names"].apply(lambda s: get_display_name(s, currFileIndex))
690
  fig2 = go.Figure()
691
  fig2.update_layout(
692
  title_text="Percentage of Speakers and Custom Categories",
 
722
  st.plotly_chart(fig2, use_container_width=True,config=config)
723
 
724
  with sunburst1:
725
+ df5 = st.session_state.summaries[currFileIndex]["df5"].copy()
726
+ df5["labels"] = df5["labels"].apply(lambda s: get_display_name(s, currFileIndex))
727
+ df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name(s, currFileIndex))
728
  fig3_1 = px.sunburst(df5,
729
  branchvalues = 'total',
730
  names = "labels",
 
776
  st.plotly_chart(fig3_1, use_container_width=True,config=config)
777
 
778
  with treemap1:
779
+ df5 = st.session_state.summaries[currFileIndex]["df5"].copy()
780
+ df5["labels"] = df5["labels"].apply(lambda s: get_display_name(s, currFileIndex))
781
+ df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name(s, currFileIndex))
782
  fig3 = px.treemap(df5,
783
  branchvalues = "total",
784
  names = "labels",
 
833
 
834
 
835
  with timeline:
836
+ timeline_df = speakers_dataFrame.copy()
837
+ timeline_df["Resource"] = timeline_df["Resource"].apply(lambda s: get_display_name(s, currFileIndex))
838
+ fig_la = px.timeline(timeline_df, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers",
839
  color_discrete_sequence=speakerColors)
840
  fig_la.update_yaxes(autorange="reversed")
841
 
 
888
  st.plotly_chart(fig_la, use_container_width=True,config=config)
889
 
890
  with bar1:
891
+ df2 = st.session_state.summaries[currFileIndex]["df2"].copy()
892
+ df2["names"] = df2["names"].apply(lambda s: get_display_name(s, currFileIndex))
893
  fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
894
  custom_data=["names","values"],title="Time Spoken by each Speaker",
895
  color_discrete_sequence=catColors+speakerColors)