Spaces:

Sonogram
/

Instructor-Support-Tool

Running on CPU Upgrade

App Files Files Community

duongthienz commited on Mar 23

Commit

f0a71c7

verified ·

1 Parent(s): da5ffe1

Update app.py

Browse files

add speaker name change

Files changed (1) hide show

app.py +55 -7

app.py CHANGED Viewed

@@ -39,7 +39,22 @@ def printV(message,verbosityLevel):
     global verbosity
     if verbosity>=verbosityLevel:
         print(message)
 @st.cache_data
 def convert_df(df):
    return df.to_csv(index=False).encode('utf-8')
@@ -315,6 +330,8 @@ pipeline.to(device)#torch.device("cuda"))
 # Long-range usage
 if 'results' not in st.session_state:
     st.session_state.results = []
 if 'summaries' not in st.session_state:
     st.session_state.summaries = []
 if 'categories' not in st.session_state:
@@ -379,6 +396,8 @@ if uploaded_file_paths is not None:
         file_names = [f.name for f in valid_files]
     while (len(st.session_state.results) < len(valid_files)):
         st.session_state.results.append([])
     while (len(st.session_state.summaries) < len(valid_files)):
         st.session_state.summaries.append([])
     while (len(st.session_state.unusedSpeakers) < len(valid_files)):
@@ -494,6 +513,8 @@ if st.sidebar.button("Load Demo Example"):
         st.session_state.categorySelect.append(tempCategories)
     while (len(st.session_state.summaries) < len(valid_files)):
         st.session_state.summaries.append([])
     with st.spinner(text=f'Loading Demo Sample'):
         # RTTM load as filler
@@ -505,6 +526,7 @@ if st.sidebar.button("Load Demo Example"):
                 totalSeconds = segment.end
         st.session_state.results = [(annotations, totalSeconds)]
         st.session_state.summaries = [{}]
         speakerNames = annotations.labels()
         st.session_state.unusedSpeakers = [speakerNames]
         with st.spinner(text=f'Analyzing Demo Data'):
@@ -554,6 +576,23 @@ try:
         newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
         catTypeColors = su.colorsCSS(3)
         allColors = su.colorsCSS(len(speakerNames)+len(st.session_state.categories))
         speakerColors = allColors[:len(speakerNames)]
@@ -582,7 +621,8 @@ try:
         st.session_state.summaries[currFileIndex]["df4"] = df4
         with dataTab:
-            csv = convert_df(currDF)
             st.download_button(
                "Press to Download analysis data",
@@ -592,7 +632,7 @@ try:
                key='download-csv',
                on_click="ignore",
             )
-            st.dataframe(currDF)
         with pie1:
             printV("In Pie1",4)
             df3 = st.session_state.summaries[currFileIndex]["df3"]
@@ -639,13 +679,14 @@ try:
             printV("Pie1 post plotly",4)
         with pie2:
-            df4 = st.session_state.summaries[currFileIndex]["df4"]
             # Some speakers may be missing, so fix colors
             figColors = []
             for n in df4["names"]:
                 if n in speakerNames:
                     figColors.append(speakerColors[speakerNames.index(n)])
             fig2 = go.Figure()
             fig2.update_layout(
                 title_text="Percentage of Speakers and Custom Categories",
@@ -681,7 +722,9 @@ try:
             st.plotly_chart(fig2, use_container_width=True,config=config)
         with sunburst1:
-            df5 = st.session_state.summaries[currFileIndex]["df5"]
             fig3_1 = px.sunburst(df5,
                                 branchvalues = 'total',
                                 names = "labels",
@@ -733,7 +776,9 @@ try:
             st.plotly_chart(fig3_1, use_container_width=True,config=config)
         with treemap1:
-            df5 = st.session_state.summaries[currFileIndex]["df5"]
             fig3 = px.treemap(df5,
                             branchvalues = "total",
                             names = "labels",
@@ -788,7 +833,9 @@ try:
         with timeline:
-            fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers",
                                 color_discrete_sequence=speakerColors)
             fig_la.update_yaxes(autorange="reversed")
@@ -841,7 +888,8 @@ try:
             st.plotly_chart(fig_la, use_container_width=True,config=config)
         with bar1:
-            df2 = st.session_state.summaries[currFileIndex]["df2"]
             fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
                             custom_data=["names","values"],title="Time Spoken by each Speaker",
                             color_discrete_sequence=catColors+speakerColors)

     global verbosity
     if verbosity>=verbosityLevel:
         print(message)
+def get_display_name(speaker, fileIndex):
+    """Return the user-assigned display name for a speaker, or the original label."""
+    renames = st.session_state.speakerRenames
+    if fileIndex < len(renames) and speaker in renames[fileIndex]:
+        return renames[fileIndex][speaker]
+    return speaker
+def apply_speaker_renames_to_df(df, fileIndex, column="task"):
+    """Replace speaker_## labels in a DataFrame column with display names."""
+    if column not in df.columns:
+        return df
+    df = df.copy()
+    df[column] = df[column].apply(lambda s: get_display_name(s, fileIndex))
+    return df
 @st.cache_data
 def convert_df(df):
    return df.to_csv(index=False).encode('utf-8')
 # Long-range usage
 if 'results' not in st.session_state:
     st.session_state.results = []
+if 'speakerRenames' not in st.session_state:
+    st.session_state.speakerRenames = []
 if 'summaries' not in st.session_state:
     st.session_state.summaries = []
 if 'categories' not in st.session_state:
         file_names = [f.name for f in valid_files]
     while (len(st.session_state.results) < len(valid_files)):
         st.session_state.results.append([])
+    while (len(st.session_state.speakerRenames) < len(valid_files)):
+        st.session_state.speakerRenames.append({})
     while (len(st.session_state.summaries) < len(valid_files)):
         st.session_state.summaries.append([])
     while (len(st.session_state.unusedSpeakers) < len(valid_files)):
         st.session_state.categorySelect.append(tempCategories)
     while (len(st.session_state.summaries) < len(valid_files)):
         st.session_state.summaries.append([])
+    while (len(st.session_state.speakerRenames) < len(valid_files)):
+        st.session_state.speakerRenames.append({})
     with st.spinner(text=f'Loading Demo Sample'):
         # RTTM load as filler
                 totalSeconds = segment.end
         st.session_state.results = [(annotations, totalSeconds)]
         st.session_state.summaries = [{}]
+        st.session_state.speakerRenames = [{}]
         speakerNames = annotations.labels()
         st.session_state.unusedSpeakers = [speakerNames]
         with st.spinner(text=f'Analyzing Demo Data'):
         newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
+        st.sidebar.divider()
+        st.sidebar.subheader("Rename Speakers")
+        st.sidebar.caption("Replace SPEAKER_## labels with real names.")
+        current_renames = st.session_state.speakerRenames[currFileIndex]
+        for sp in speakerNames:
+            current_label = current_renames.get(sp, "")
+            new_name = st.sidebar.text_input(
+                f"{sp}",
+                value=current_label,
+                placeholder=f"e.g. John",
+                key=f"rename_{currFileIndex}_{sp}"
+            )
+            if new_name.strip():
+                st.session_state.speakerRenames[currFileIndex][sp] = new_name.strip()
+            elif sp in st.session_state.speakerRenames[currFileIndex]:
+                del st.session_state.speakerRenames[currFileIndex][sp]
         catTypeColors = su.colorsCSS(3)
         allColors = su.colorsCSS(len(speakerNames)+len(st.session_state.categories))
         speakerColors = allColors[:len(speakerNames)]
         st.session_state.summaries[currFileIndex]["df4"] = df4
         with dataTab:
+            displayDF = apply_speaker_renames_to_df(currDF, currFileIndex, column="task")
+            csv = convert_df(displayDF)
             st.download_button(
                "Press to Download analysis data",
                key='download-csv',
                on_click="ignore",
             )
+            st.dataframe(displayDF)
         with pie1:
             printV("In Pie1",4)
             df3 = st.session_state.summaries[currFileIndex]["df3"]
             printV("Pie1 post plotly",4)
         with pie2:
+            df4 = st.session_state.summaries[currFileIndex]["df4"].copy()
             # Some speakers may be missing, so fix colors
             figColors = []
             for n in df4["names"]:
                 if n in speakerNames:
                     figColors.append(speakerColors[speakerNames.index(n)])
+            df4["names"] = df4["names"].apply(lambda s: get_display_name(s, currFileIndex))
             fig2 = go.Figure()
             fig2.update_layout(
                 title_text="Percentage of Speakers and Custom Categories",
             st.plotly_chart(fig2, use_container_width=True,config=config)
         with sunburst1:
+            df5 = st.session_state.summaries[currFileIndex]["df5"].copy()
+            df5["labels"] = df5["labels"].apply(lambda s: get_display_name(s, currFileIndex))
+            df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name(s, currFileIndex))
             fig3_1 = px.sunburst(df5,
                                 branchvalues = 'total',
                                 names = "labels",
             st.plotly_chart(fig3_1, use_container_width=True,config=config)
         with treemap1:
+            df5 = st.session_state.summaries[currFileIndex]["df5"].copy()
+            df5["labels"] = df5["labels"].apply(lambda s: get_display_name(s, currFileIndex))
+            df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name(s, currFileIndex))
             fig3 = px.treemap(df5,
                             branchvalues = "total",
                             names = "labels",
         with timeline:
+            timeline_df = speakers_dataFrame.copy()
+            timeline_df["Resource"] = timeline_df["Resource"].apply(lambda s: get_display_name(s, currFileIndex))
+            fig_la = px.timeline(timeline_df, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers",
                                 color_discrete_sequence=speakerColors)
             fig_la.update_yaxes(autorange="reversed")
             st.plotly_chart(fig_la, use_container_width=True,config=config)
         with bar1:
+            df2 = st.session_state.summaries[currFileIndex]["df2"].copy()
+            df2["names"] = df2["names"].apply(lambda s: get_display_name(s, currFileIndex))
             fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
                             custom_data=["names","values"],title="Time Spoken by each Speaker",
                             color_discrete_sequence=catColors+speakerColors)