Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Update app.py
Browse filesadd speaker name change
app.py
CHANGED
|
@@ -39,7 +39,22 @@ def printV(message,verbosityLevel):
|
|
| 39 |
global verbosity
|
| 40 |
if verbosity>=verbosityLevel:
|
| 41 |
print(message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
@st.cache_data
|
| 44 |
def convert_df(df):
|
| 45 |
return df.to_csv(index=False).encode('utf-8')
|
|
@@ -315,6 +330,8 @@ pipeline.to(device)#torch.device("cuda"))
|
|
| 315 |
# Long-range usage
|
| 316 |
if 'results' not in st.session_state:
|
| 317 |
st.session_state.results = []
|
|
|
|
|
|
|
| 318 |
if 'summaries' not in st.session_state:
|
| 319 |
st.session_state.summaries = []
|
| 320 |
if 'categories' not in st.session_state:
|
|
@@ -379,6 +396,8 @@ if uploaded_file_paths is not None:
|
|
| 379 |
file_names = [f.name for f in valid_files]
|
| 380 |
while (len(st.session_state.results) < len(valid_files)):
|
| 381 |
st.session_state.results.append([])
|
|
|
|
|
|
|
| 382 |
while (len(st.session_state.summaries) < len(valid_files)):
|
| 383 |
st.session_state.summaries.append([])
|
| 384 |
while (len(st.session_state.unusedSpeakers) < len(valid_files)):
|
|
@@ -494,6 +513,8 @@ if st.sidebar.button("Load Demo Example"):
|
|
| 494 |
st.session_state.categorySelect.append(tempCategories)
|
| 495 |
while (len(st.session_state.summaries) < len(valid_files)):
|
| 496 |
st.session_state.summaries.append([])
|
|
|
|
|
|
|
| 497 |
|
| 498 |
with st.spinner(text=f'Loading Demo Sample'):
|
| 499 |
# RTTM load as filler
|
|
@@ -505,6 +526,7 @@ if st.sidebar.button("Load Demo Example"):
|
|
| 505 |
totalSeconds = segment.end
|
| 506 |
st.session_state.results = [(annotations, totalSeconds)]
|
| 507 |
st.session_state.summaries = [{}]
|
|
|
|
| 508 |
speakerNames = annotations.labels()
|
| 509 |
st.session_state.unusedSpeakers = [speakerNames]
|
| 510 |
with st.spinner(text=f'Analyzing Demo Data'):
|
|
@@ -554,6 +576,23 @@ try:
|
|
| 554 |
|
| 555 |
newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
|
| 556 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 557 |
catTypeColors = su.colorsCSS(3)
|
| 558 |
allColors = su.colorsCSS(len(speakerNames)+len(st.session_state.categories))
|
| 559 |
speakerColors = allColors[:len(speakerNames)]
|
|
@@ -582,7 +621,8 @@ try:
|
|
| 582 |
st.session_state.summaries[currFileIndex]["df4"] = df4
|
| 583 |
|
| 584 |
with dataTab:
|
| 585 |
-
|
|
|
|
| 586 |
|
| 587 |
st.download_button(
|
| 588 |
"Press to Download analysis data",
|
|
@@ -592,7 +632,7 @@ try:
|
|
| 592 |
key='download-csv',
|
| 593 |
on_click="ignore",
|
| 594 |
)
|
| 595 |
-
st.dataframe(
|
| 596 |
with pie1:
|
| 597 |
printV("In Pie1",4)
|
| 598 |
df3 = st.session_state.summaries[currFileIndex]["df3"]
|
|
@@ -639,13 +679,14 @@ try:
|
|
| 639 |
printV("Pie1 post plotly",4)
|
| 640 |
|
| 641 |
with pie2:
|
| 642 |
-
df4 = st.session_state.summaries[currFileIndex]["df4"]
|
| 643 |
|
| 644 |
# Some speakers may be missing, so fix colors
|
| 645 |
figColors = []
|
| 646 |
for n in df4["names"]:
|
| 647 |
if n in speakerNames:
|
| 648 |
figColors.append(speakerColors[speakerNames.index(n)])
|
|
|
|
| 649 |
fig2 = go.Figure()
|
| 650 |
fig2.update_layout(
|
| 651 |
title_text="Percentage of Speakers and Custom Categories",
|
|
@@ -681,7 +722,9 @@ try:
|
|
| 681 |
st.plotly_chart(fig2, use_container_width=True,config=config)
|
| 682 |
|
| 683 |
with sunburst1:
|
| 684 |
-
df5 = st.session_state.summaries[currFileIndex]["df5"]
|
|
|
|
|
|
|
| 685 |
fig3_1 = px.sunburst(df5,
|
| 686 |
branchvalues = 'total',
|
| 687 |
names = "labels",
|
|
@@ -733,7 +776,9 @@ try:
|
|
| 733 |
st.plotly_chart(fig3_1, use_container_width=True,config=config)
|
| 734 |
|
| 735 |
with treemap1:
|
| 736 |
-
df5 = st.session_state.summaries[currFileIndex]["df5"]
|
|
|
|
|
|
|
| 737 |
fig3 = px.treemap(df5,
|
| 738 |
branchvalues = "total",
|
| 739 |
names = "labels",
|
|
@@ -788,7 +833,9 @@ try:
|
|
| 788 |
|
| 789 |
|
| 790 |
with timeline:
|
| 791 |
-
|
|
|
|
|
|
|
| 792 |
color_discrete_sequence=speakerColors)
|
| 793 |
fig_la.update_yaxes(autorange="reversed")
|
| 794 |
|
|
@@ -841,7 +888,8 @@ try:
|
|
| 841 |
st.plotly_chart(fig_la, use_container_width=True,config=config)
|
| 842 |
|
| 843 |
with bar1:
|
| 844 |
-
df2 = st.session_state.summaries[currFileIndex]["df2"]
|
|
|
|
| 845 |
fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
|
| 846 |
custom_data=["names","values"],title="Time Spoken by each Speaker",
|
| 847 |
color_discrete_sequence=catColors+speakerColors)
|
|
|
|
| 39 |
global verbosity
|
| 40 |
if verbosity>=verbosityLevel:
|
| 41 |
print(message)
|
| 42 |
+
|
| 43 |
+
def get_display_name(speaker, fileIndex):
|
| 44 |
+
"""Return the user-assigned display name for a speaker, or the original label."""
|
| 45 |
+
renames = st.session_state.speakerRenames
|
| 46 |
+
if fileIndex < len(renames) and speaker in renames[fileIndex]:
|
| 47 |
+
return renames[fileIndex][speaker]
|
| 48 |
+
return speaker
|
| 49 |
|
| 50 |
+
def apply_speaker_renames_to_df(df, fileIndex, column="task"):
|
| 51 |
+
"""Replace speaker_## labels in a DataFrame column with display names."""
|
| 52 |
+
if column not in df.columns:
|
| 53 |
+
return df
|
| 54 |
+
df = df.copy()
|
| 55 |
+
df[column] = df[column].apply(lambda s: get_display_name(s, fileIndex))
|
| 56 |
+
return df
|
| 57 |
+
|
| 58 |
@st.cache_data
|
| 59 |
def convert_df(df):
|
| 60 |
return df.to_csv(index=False).encode('utf-8')
|
|
|
|
| 330 |
# Long-range usage
|
| 331 |
if 'results' not in st.session_state:
|
| 332 |
st.session_state.results = []
|
| 333 |
+
if 'speakerRenames' not in st.session_state:
|
| 334 |
+
st.session_state.speakerRenames = []
|
| 335 |
if 'summaries' not in st.session_state:
|
| 336 |
st.session_state.summaries = []
|
| 337 |
if 'categories' not in st.session_state:
|
|
|
|
| 396 |
file_names = [f.name for f in valid_files]
|
| 397 |
while (len(st.session_state.results) < len(valid_files)):
|
| 398 |
st.session_state.results.append([])
|
| 399 |
+
while (len(st.session_state.speakerRenames) < len(valid_files)):
|
| 400 |
+
st.session_state.speakerRenames.append({})
|
| 401 |
while (len(st.session_state.summaries) < len(valid_files)):
|
| 402 |
st.session_state.summaries.append([])
|
| 403 |
while (len(st.session_state.unusedSpeakers) < len(valid_files)):
|
|
|
|
| 513 |
st.session_state.categorySelect.append(tempCategories)
|
| 514 |
while (len(st.session_state.summaries) < len(valid_files)):
|
| 515 |
st.session_state.summaries.append([])
|
| 516 |
+
while (len(st.session_state.speakerRenames) < len(valid_files)):
|
| 517 |
+
st.session_state.speakerRenames.append({})
|
| 518 |
|
| 519 |
with st.spinner(text=f'Loading Demo Sample'):
|
| 520 |
# RTTM load as filler
|
|
|
|
| 526 |
totalSeconds = segment.end
|
| 527 |
st.session_state.results = [(annotations, totalSeconds)]
|
| 528 |
st.session_state.summaries = [{}]
|
| 529 |
+
st.session_state.speakerRenames = [{}]
|
| 530 |
speakerNames = annotations.labels()
|
| 531 |
st.session_state.unusedSpeakers = [speakerNames]
|
| 532 |
with st.spinner(text=f'Analyzing Demo Data'):
|
|
|
|
| 576 |
|
| 577 |
newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
|
| 578 |
|
| 579 |
+
st.sidebar.divider()
|
| 580 |
+
st.sidebar.subheader("Rename Speakers")
|
| 581 |
+
st.sidebar.caption("Replace SPEAKER_## labels with real names.")
|
| 582 |
+
current_renames = st.session_state.speakerRenames[currFileIndex]
|
| 583 |
+
for sp in speakerNames:
|
| 584 |
+
current_label = current_renames.get(sp, "")
|
| 585 |
+
new_name = st.sidebar.text_input(
|
| 586 |
+
f"{sp}",
|
| 587 |
+
value=current_label,
|
| 588 |
+
placeholder=f"e.g. John",
|
| 589 |
+
key=f"rename_{currFileIndex}_{sp}"
|
| 590 |
+
)
|
| 591 |
+
if new_name.strip():
|
| 592 |
+
st.session_state.speakerRenames[currFileIndex][sp] = new_name.strip()
|
| 593 |
+
elif sp in st.session_state.speakerRenames[currFileIndex]:
|
| 594 |
+
del st.session_state.speakerRenames[currFileIndex][sp]
|
| 595 |
+
|
| 596 |
catTypeColors = su.colorsCSS(3)
|
| 597 |
allColors = su.colorsCSS(len(speakerNames)+len(st.session_state.categories))
|
| 598 |
speakerColors = allColors[:len(speakerNames)]
|
|
|
|
| 621 |
st.session_state.summaries[currFileIndex]["df4"] = df4
|
| 622 |
|
| 623 |
with dataTab:
|
| 624 |
+
displayDF = apply_speaker_renames_to_df(currDF, currFileIndex, column="task")
|
| 625 |
+
csv = convert_df(displayDF)
|
| 626 |
|
| 627 |
st.download_button(
|
| 628 |
"Press to Download analysis data",
|
|
|
|
| 632 |
key='download-csv',
|
| 633 |
on_click="ignore",
|
| 634 |
)
|
| 635 |
+
st.dataframe(displayDF)
|
| 636 |
with pie1:
|
| 637 |
printV("In Pie1",4)
|
| 638 |
df3 = st.session_state.summaries[currFileIndex]["df3"]
|
|
|
|
| 679 |
printV("Pie1 post plotly",4)
|
| 680 |
|
| 681 |
with pie2:
|
| 682 |
+
df4 = st.session_state.summaries[currFileIndex]["df4"].copy()
|
| 683 |
|
| 684 |
# Some speakers may be missing, so fix colors
|
| 685 |
figColors = []
|
| 686 |
for n in df4["names"]:
|
| 687 |
if n in speakerNames:
|
| 688 |
figColors.append(speakerColors[speakerNames.index(n)])
|
| 689 |
+
df4["names"] = df4["names"].apply(lambda s: get_display_name(s, currFileIndex))
|
| 690 |
fig2 = go.Figure()
|
| 691 |
fig2.update_layout(
|
| 692 |
title_text="Percentage of Speakers and Custom Categories",
|
|
|
|
| 722 |
st.plotly_chart(fig2, use_container_width=True,config=config)
|
| 723 |
|
| 724 |
with sunburst1:
|
| 725 |
+
df5 = st.session_state.summaries[currFileIndex]["df5"].copy()
|
| 726 |
+
df5["labels"] = df5["labels"].apply(lambda s: get_display_name(s, currFileIndex))
|
| 727 |
+
df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name(s, currFileIndex))
|
| 728 |
fig3_1 = px.sunburst(df5,
|
| 729 |
branchvalues = 'total',
|
| 730 |
names = "labels",
|
|
|
|
| 776 |
st.plotly_chart(fig3_1, use_container_width=True,config=config)
|
| 777 |
|
| 778 |
with treemap1:
|
| 779 |
+
df5 = st.session_state.summaries[currFileIndex]["df5"].copy()
|
| 780 |
+
df5["labels"] = df5["labels"].apply(lambda s: get_display_name(s, currFileIndex))
|
| 781 |
+
df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name(s, currFileIndex))
|
| 782 |
fig3 = px.treemap(df5,
|
| 783 |
branchvalues = "total",
|
| 784 |
names = "labels",
|
|
|
|
| 833 |
|
| 834 |
|
| 835 |
with timeline:
|
| 836 |
+
timeline_df = speakers_dataFrame.copy()
|
| 837 |
+
timeline_df["Resource"] = timeline_df["Resource"].apply(lambda s: get_display_name(s, currFileIndex))
|
| 838 |
+
fig_la = px.timeline(timeline_df, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers",
|
| 839 |
color_discrete_sequence=speakerColors)
|
| 840 |
fig_la.update_yaxes(autorange="reversed")
|
| 841 |
|
|
|
|
| 888 |
st.plotly_chart(fig_la, use_container_width=True,config=config)
|
| 889 |
|
| 890 |
with bar1:
|
| 891 |
+
df2 = st.session_state.summaries[currFileIndex]["df2"].copy()
|
| 892 |
+
df2["names"] = df2["names"].apply(lambda s: get_display_name(s, currFileIndex))
|
| 893 |
fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
|
| 894 |
custom_data=["names","values"],title="Time Spoken by each Speaker",
|
| 895 |
color_discrete_sequence=catColors+speakerColors)
|