Spaces:

Sonogram
/

Instructor-Support-Tool

Sleeping

App Files Files Community

czyoung commited on Feb 27, 2025

Commit

b3a9316

verified ·

1 Parent(s): 9154b30

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -14

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import datetime
 import tempfile
 import os
 import shutil
 PARQUET_DATASET_DIR = Path("parquet_dataset")
 PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
@@ -19,6 +20,10 @@ PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
 scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
 def save_data(
     config_dict: Dict[str,str], audio_paths: List[str], userid: str,
     ) -> None:
@@ -42,6 +47,7 @@ def save_data(
     # Send to scheduler
     scheduler.append(data)
 st.title("Lecturer Support Tool")
 uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
@@ -68,7 +74,7 @@ if uploaded_file_paths is not None:
                 file_paths.append(path)
     if len(valid_files) > 0:
         audio_tabs = st.tabs([f.name for f in valid_files])
 for j, tab in enumerate(audio_tabs):
     if tab.button("Analyze Audio",key=f"button_{j}"):
         if uploaded_file is None:
@@ -82,7 +88,12 @@ for j, tab in enumerate(audio_tabs):
             # RTTM load as filler
             speakerList, annotations = su.loadAudioRTTM("24F CHEM1402 Night Class Week 4.rttm")
             # Display breakdowns
             #--------------------------------------------------------------------------
@@ -144,14 +155,31 @@ for j, tab in enumerate(audio_tabs):
             f.set_figwidth(15)
             tab.pyplot(f)
             tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
-            tab.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
-                                                                                     int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
-                                                                                    100*lecturer_speaker_times[0]/totalSeconds))
-            tab.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
-                                                                                     int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
-                                                                                    100*lecturer_speaker_times[1]/totalSeconds))
             # Experimental Speaker Breakdown
             #------------------------------------------------------------------------------
@@ -192,13 +220,24 @@ for j, tab in enumerate(audio_tabs):
             tab.pyplot(f)
             tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
-            for i,speaker in enumerate(all_speaker_times):
-                tab.write("Speaker {} spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(i,
-                                                                                           int(speaker/3600),
-                                                                                           int((speaker%3600)/60),
-                                                                                           int(speaker%60),
-                                                                                           100*speaker/totalSeconds))
 userid = st.text_input("user id:", "Guest")

 import tempfile
 import os
 import shutil
+import pandas as pd
 PARQUET_DATASET_DIR = Path("parquet_dataset")
 PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
 scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
+# Store results for viewing
+if 'results' not in st.session_state:
+    st.session_state.results = []
 def save_data(
     config_dict: Dict[str,str], audio_paths: List[str], userid: str,
     ) -> None:
     # Send to scheduler
     scheduler.append(data)
+st.set_page_config(layout="wide")
 st.title("Lecturer Support Tool")
 uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
                 file_paths.append(path)
     if len(valid_files) > 0:
         audio_tabs = st.tabs([f.name for f in valid_files])
+    st.info(f'{len(valid_files)} valid files: {[fi.name for fi in valid_files]}')
 for j, tab in enumerate(audio_tabs):
     if tab.button("Analyze Audio",key=f"button_{j}"):
         if uploaded_file is None:
             # RTTM load as filler
             speakerList, annotations = su.loadAudioRTTM("24F CHEM1402 Night Class Week 4.rttm")
+            while (len(st.session_state.results) < j):
+                st.session_state.results.append([])
+            st.session_state.results[j] = (speakerList,annotations)
+    if len(st.session_state.results > j) and len(st.session_state.results[j])) > 0:
+        with st.spinner(text='Loading results...'):
             # Display breakdowns
             #--------------------------------------------------------------------------
             f.set_figwidth(15)
             tab.pyplot(f)
+            df = pd.DataFrame(
+                {
+                    "Speaker": ["Lecturer", "Audience"],
+                    "Time spoken": ["{}h:{:02d}m:{:02d}s".format(int(lecturer_speaker_times[0]/3600),
+                                                                int((lecturer_speaker_times[0]%3600)/60),
+                                                                 int(lecturer_speaker_times[0]%60)),
+                                    "{}h:{:02d}m:{:02d}s".format(int(lecturer_speaker_times[1]/3600),
+                                                                int((lecturer_speaker_times[1]%3600)/60),
+                                                                 int(lecturer_speaker_times[1]%60))],
+                    "Percentage": [
+                        "{:.2f}%".format(100*lecturer_speaker_times[0]/totalSeconds),
+                        "{:.2f}%".format(100*lecturer_speaker_times[1]/totalSeconds),
+                    ],
+                }
+            )
             tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
+            st.table(df)
+            #tab.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
+            #                                                                         int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
+            #                                                                        100*lecturer_speaker_times[0]/totalSeconds))
+            #tab.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
+            #                                                                         int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
+            #                                                                        100*lecturer_speaker_times[1]/totalSeconds))
             # Experimental Speaker Breakdown
             #------------------------------------------------------------------------------
             tab.pyplot(f)
+            df = pd.DataFrame(
+                {
+                    "Time spoken": ["{}h:{:02d}m:{:02d}s".format(int(sp/3600),
+                                                                int((sp%3600)/60),
+                                                                 int(sp%60)) for sp in all_speaker_times],,
+                    "Percentage": ["{:.2f}%".format(100*sp/totalSeconds) for sp in all_speaker_times],
+                }
+            )
             tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
+            st.table(df)
+            #for i,speaker in enumerate(all_speaker_times):
+            #    tab.write("Speaker {} spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(i,
+            #                                                                               int(speaker/3600),
+            #                                                                               int((speaker%3600)/60),
+            #                                                                               int(speaker%60),
+            #                                                                               100*speaker/totalSeconds))
 userid = st.text_input("user id:", "Guest")