Spaces:

Sonogram
/

Instructor-Support-Tool

Running on CPU Upgrade

App Files Files Community

czyoung commited on May 30, 2025

Commit

e0cfb45

verified ·

1 Parent(s): e6605eb

Slight visual improvement, reducing graph count on screen

Browse files

Files changed (1) hide show

app.py +178 -160

app.py CHANGED Viewed

@@ -302,6 +302,8 @@ if 'unusedSpeakers' not in st.session_state:
     st.session_state.unusedSpeakers = []
 if 'file_names' not in st.session_state:
     st.session_state.file_names = []
@@ -315,6 +317,7 @@ if not isGPU:
 uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
 supported_file_types = ('.wav','.mp3','.mp4','.txt','.rttm')
 valid_files = []
 file_paths = []
@@ -416,14 +419,17 @@ else:
                     print(f"Finished analyzing {file_paths[i]}")
             print(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
             st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
 currFile = st.sidebar.selectbox('Current File', file_names,on_change=updateMultiSelect,key="select_currFile")
 if currFile is None and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
     st.write("Select a file to view from the sidebar")
 try:
     st.session_state.resetResult = False
     currFileIndex = file_names.index(currFile)
     if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
         # Handle
         currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
         speakerNames = currAnnotation.labels()
@@ -466,175 +472,187 @@ try:
         df4.name = "df4"
         st.session_state.summaries[currFileIndex]["df4"] = df4
-        df2 = st.session_state.summaries[currFileIndex]["df2"]
-        df3 = st.session_state.summaries[currFileIndex]["df3"]
-        df4 = st.session_state.summaries[currFileIndex]["df4"]
-        df5 = st.session_state.summaries[currFileIndex]["df5"]
-        speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
-        currDF = speakers_dataFrame
-        speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
-        # generate plotting window
-        fig1 = go.Figure()
-        fig1.update_layout(
-            title_text="Percentage of each Voice Category",
-        )
-        fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
-        fig2 = go.Figure()
-        fig2.update_layout(
-            title_text="Percentage of Speakers and Custom Categories",
-        )
-        fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
-        fig3_1 = px.sunburst(df5,
-                            branchvalues = 'total',
                             names = "labels",
-                            ids = "ids",
                             parents = "parents",
                             values = "percentiles",
                             custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
-                            color = 'labels',
-                            title="Percentage of each Voice Category with Speakers",
-                            )
-        fig3_1.update_traces(
-            hovertemplate="<br>".join([
-                '<b>%{customdata[0]}</b>',
-                'Duration: %{customdata[1]}s',
-                'Percentage of Total: %{customdata[2]:.2f}%',
-                'Parent: %{customdata[3]}',
-                'Percentage of Parent: %{customdata[4]:.2f}%'
-            ])
-        )
-        fig3 = px.treemap(df5,
-                        branchvalues = "total",
-                        names = "labels",
-                        parents = "parents",
-                        ids="ids",
-                        values = "percentiles",
-                        custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
-                        color='labels',
-                        title="Division of Speakers in each Voice Category",
-                         )
-        fig3.update_traces(
-            hovertemplate="<br>".join([
-                '<b>%{customdata[0]}</b>',
-                'Duration: %{customdata[1]}s',
-                'Percentage of Total: %{customdata[2]:.2f}%',
-                'Parent: %{customdata[3]}',
-                'Percentage of Parent: %{customdata[4]:.2f}%'
-            ])
-        )
-        st.plotly_chart(fig1, use_container_width=True)
-        st.plotly_chart(fig2, use_container_width=True)
-        st.plotly_chart(fig3_1, use_container_width=True)
-        st.plotly_chart(fig3, use_container_width=True)
-        fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers")
-        fig_la.update_yaxes(autorange="reversed")
-        hMax = int(currTotalTime//3600)
-        mMax = int(currTotalTime%3600//60)
-        sMax = int(currTotalTime%60)
-        msMax = int(currTotalTime*1000000%1000000)
-        timeMax = dt.time(hMax,mMax,sMax,msMax)
-        fig_la.update_layout(
-            xaxis_tickformatstops = [
-                dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
-                dict(dtickrange=[1000, None], value="%H:%M:%S")
-            ],
-            xaxis=dict(
-                    range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
-                ),
-            xaxis_title="Time",
-            yaxis_title="Speaker",
-            legend_title=None
-        )
-        st.plotly_chart(fig_la, use_container_width=True)
-        fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
-                        custom_data=["names","values"],title="Time Spoken by each Speaker")
-        fig2_la.update_xaxes(ticksuffix="%")
-        fig2_la.update_yaxes(autorange="reversed")
-        fig2_la.update_layout(
-            xaxis_title="Percentage Time Spoken",
-            yaxis_title="Speaker",
-            legend_title=None
-        )
-        fig2_la.update_traces(
-            hovertemplate="<br>".join([
-                '<b>%{customdata[0]}</b>',
-                'Percentage of Time: %{customdata[1]:.2f}%'
-            ])
-        )
-        st.plotly_chart(fig2_la, use_container_width=True)
 except ValueError:
     pass
 if len(st.session_state.results) > 0:
-    with st.spinner(text='Processing summary results...'):
-        fileNames = st.session_state.file_names
-        results = []
-        indices = []
-        for i, resultTuple in enumerate(st.session_state.results):
-            if len(resultTuple) == 2:
-                results.append(resultTuple)
-                indices.append(i)
-        if len(indices) > 1:
-            df6_dict = {
-                "files":fileNames,
-            }
-            allCategories = copy.deepcopy(st.session_state.categories)
-            for i in indices:
-                currAnnotation, currTotalTime = st.session_state.results[i]
-                categorySelections = st.session_state["categorySelect"][i]
-                catSummary,extraCats = su.calcCategories(currAnnotation,categorySelections)
-                st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
-                for extra in extraCats:
-                    df6_dict[extra] = []
-                    if extra not in allCategories:
-                        allCategories.append(extra)
-            for category in st.session_state.categories:
-                df6_dict[category] = []
-            for i in indices:
-                summary, extras = st.session_state.summaries[i]["categories"]
-                theseCategories = st.session_state.categories + extras
-                for j, timeSlots in enumerate(summary):
-                    df6_dict[theseCategories[j]].append(sum([t.duration for _,t in timeSlots])/st.session_state.results[i][1])
-                for category in allCategories:
-                    if category not in theseCategories:
-                        df6_dict[category].append(0)
-            df6 = pd.DataFrame(df6_dict)
-            summFig = px.bar(df6, x="files", y=allCategories,title="Time Spoken by Each Speaker in Each File")
-            st.plotly_chart(summFig, use_container_width=True)
-            voiceNames = ["No Voice","One Voice","Multi Voice"]
-            df7_dict = {
-                "files":fileNames,
-            }
-            for category in voiceNames:
-                df7_dict[category] = []
-            for resultID,summary in enumerate(st.session_state.summaries):
-                partialDf = summary["df5"]
-                for i in range(len(voiceNames)):
-                    df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
-            df7 = pd.DataFrame(df7_dict)
-            sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
-            summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for One Voice")
-            st.plotly_chart(summFig2, use_container_width=True)
-            sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
-            summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Multi Voice")
-            st.plotly_chart(summFig3, use_container_width=True)
-            sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
-            summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Any Voice")
-            st.plotly_chart(summFig4, use_container_width=True)

     st.session_state.unusedSpeakers = []
 if 'file_names' not in st.session_state:
     st.session_state.file_names = []
+if 'showSummary' not in st.session_state:
+    st.session_state.showSummary = 'No'
 uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
 supported_file_types = ('.wav','.mp3','.mp4','.txt','.rttm')
+viewChoices = ["Voice Categories","Custom Categories","Detailed Voice Categories","Voice Category Treemap","Speaker Timeline","Time per Speaker"]
 valid_files = []
 file_paths = []
                     print(f"Finished analyzing {file_paths[i]}")
             print(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
             st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
+summaryRadio = st.sidebar.empty()
 currFile = st.sidebar.selectbox('Current File', file_names,on_change=updateMultiSelect,key="select_currFile")
+viewSelection = st.sidebar.selectbox('View', viewChoices)
 if currFile is None and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
     st.write("Select a file to view from the sidebar")
 try:
     st.session_state.resetResult = False
     currFileIndex = file_names.index(currFile)
     if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
+        st.header(f"Analysis of file {currFile}")
         # Handle
         currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
         speakerNames = currAnnotation.labels()
         df4.name = "df4"
         st.session_state.summaries[currFileIndex]["df4"] = df4
+        viewSelection = [viewSelection]
+        if viewChoices[0] in viewSelection:
+            df3 = st.session_state.summaries[currFileIndex]["df3"]
+            fig1 = go.Figure()
+            fig1.update_layout(
+                title_text="Percentage of each Voice Category",
+            )
+            fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
+            st.plotly_chart(fig1, use_container_width=True)
+        if viewChoices[1] in viewSelection:
+            df4 = st.session_state.summaries[currFileIndex]["df4"]
+            fig2 = go.Figure()
+            fig2.update_layout(
+                title_text="Percentage of Speakers and Custom Categories",
+            )
+            fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
+            st.plotly_chart(fig2, use_container_width=True)
+        if viewChoices[2] in viewSelection:
+            df5 = st.session_state.summaries[currFileIndex]["df5"]
+            fig3_1 = px.sunburst(df5,
+                                branchvalues = 'total',
+                                names = "labels",
+                                ids = "ids",
+                                parents = "parents",
+                                values = "percentiles",
+                                custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
+                                color = 'labels',
+                                title="Percentage of each Voice Category with Speakers",
+                                )
+            fig3_1.update_traces(
+                hovertemplate="<br>".join([
+                    '<b>%{customdata[0]}</b>',
+                    'Duration: %{customdata[1]}s',
+                    'Percentage of Total: %{customdata[2]:.2f}%',
+                    'Parent: %{customdata[3]}',
+                    'Percentage of Parent: %{customdata[4]:.2f}%'
+                ])
+            )
+            st.plotly_chart(fig3_1, use_container_width=True)
+        if viewChoices[3] in viewSelection:
+            df5 = st.session_state.summaries[currFileIndex]["df5"]
+            fig3 = px.treemap(df5,
+                            branchvalues = "total",
                             names = "labels",
                             parents = "parents",
+                            ids="ids",
                             values = "percentiles",
                             custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
+                            color='labels',
+                            title="Division of Speakers in each Voice Category",
+                             )
+            fig3.update_traces(
+                hovertemplate="<br>".join([
+                    '<b>%{customdata[0]}</b>',
+                    'Duration: %{customdata[1]}s',
+                    'Percentage of Total: %{customdata[2]:.2f}%',
+                    'Parent: %{customdata[3]}',
+                    'Percentage of Parent: %{customdata[4]:.2f}%'
+                ])
+            )
+            st.plotly_chart(fig3, use_container_width=True)
+        if viewChoices[4] in viewSelection:
+            speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
+            currDF = speakers_dataFrame
+            speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
+            # generate plotting window
+            fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource",title="Timeline of Audio with Speakers")
+            fig_la.update_yaxes(autorange="reversed")
+            hMax = int(currTotalTime//3600)
+            mMax = int(currTotalTime%3600//60)
+            sMax = int(currTotalTime%60)
+            msMax = int(currTotalTime*1000000%1000000)
+            timeMax = dt.time(hMax,mMax,sMax,msMax)
+            fig_la.update_layout(
+                xaxis_tickformatstops = [
+                    dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
+                    dict(dtickrange=[1000, None], value="%H:%M:%S")
+                ],
+                xaxis=dict(
+                        range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
+                    ),
+                xaxis_title="Time",
+                yaxis_title="Speaker",
+                legend_title=None
+            )
+            st.plotly_chart(fig_la, use_container_width=True)
+        if viewChoices[5] in viewSelection:
+            df2 = st.session_state.summaries[currFileIndex]["df2"]
+            fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
+                            custom_data=["names","values"],title="Time Spoken by each Speaker")
+            fig2_la.update_xaxes(ticksuffix="%")
+            fig2_la.update_yaxes(autorange="reversed")
+            fig2_la.update_layout(
+                xaxis_title="Percentage Time Spoken",
+                yaxis_title="Speaker",
+                legend_title=None
+            )
+            fig2_la.update_traces(
+                hovertemplate="<br>".join([
+                    '<b>%{customdata[0]}</b>',
+                    'Percentage of Time: %{customdata[1]:.2f}%'
+                ])
+            )
+            st.plotly_chart(fig2_la, use_container_width=True)
 except ValueError:
     pass
 if len(st.session_state.results) > 0:
+    st.session_state.showSummary = st.radio('Display Multi-file Summary?',['Yes','No'])
+    if st.session_state.showSummary == 'Yes':
+        st.header("Multi-file Summary Data")
+        with st.spinner(text='Processing summary results...'):
+            fileNames = st.session_state.file_names
+            results = []
+            indices = []
+            for i, resultTuple in enumerate(st.session_state.results):
+                if len(resultTuple) == 2:
+                    results.append(resultTuple)
+                    indices.append(i)
+            if len(indices) > 1:
+                df6_dict = {
+                    "files":fileNames,
+                }
+                allCategories = copy.deepcopy(st.session_state.categories)
+                for i in indices:
+                    currAnnotation, currTotalTime = st.session_state.results[i]
+                    categorySelections = st.session_state["categorySelect"][i]
+                    catSummary,extraCats = su.calcCategories(currAnnotation,categorySelections)
+                    st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
+                    for extra in extraCats:
+                        df6_dict[extra] = []
+                        if extra not in allCategories:
+                            allCategories.append(extra)
+                for category in st.session_state.categories:
+                    df6_dict[category] = []
+                for i in indices:
+                    summary, extras = st.session_state.summaries[i]["categories"]
+                    theseCategories = st.session_state.categories + extras
+                    for j, timeSlots in enumerate(summary):
+                        df6_dict[theseCategories[j]].append(sum([t.duration for _,t in timeSlots])/st.session_state.results[i][1])
+                    for category in allCategories:
+                        if category not in theseCategories:
+                            df6_dict[category].append(0)
+                df6 = pd.DataFrame(df6_dict)
+                summFig = px.bar(df6, x="files", y=allCategories,title="Time Spoken by Each Speaker in Each File")
+                st.plotly_chart(summFig, use_container_width=True)
+                voiceNames = ["No Voice","One Voice","Multi Voice"]
+                df7_dict = {
+                    "files":fileNames,
+                }
+                for category in voiceNames:
+                    df7_dict[category] = []
+                for resultID,summary in enumerate(st.session_state.summaries):
+                    partialDf = summary["df5"]
+                    for i in range(len(voiceNames)):
+                        df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
+                df7 = pd.DataFrame(df7_dict)
+                sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
+                summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for One Voice")
+                st.plotly_chart(summFig2, use_container_width=True)
+                sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
+                summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Multi Voice")
+                st.plotly_chart(summFig3, use_container_width=True)
+                sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
+                summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",],title="Cross-file Voice Categories sorted for Any Voice")
+                st.plotly_chart(summFig4, use_container_width=True)