Spaces:

Sonogram
/

Instructor-Support-Tool

Running on CPU Upgrade

App Files Files Community

czyoung commited on Feb 21, 2025

Commit

6a8a763

verified ·

1 Parent(s): e895a63

Updated to support multiple files and saving to dataset

Browse files

Files changed (1) hide show

app.py +189 -135

app.py CHANGED Viewed

@@ -4,145 +4,199 @@ import numpy as np
 import torchaudio
 import sonogram_utility as su
 import time
 st.title("Lecturer Support Tool")
-uploaded_file = st.file_uploader("Upload an audio of classroom activity to analyze")
-supported_file_types = ('.wav','.mp3','.mp4')
-if uploaded_file is not None:
-    if not uploaded_file.name.endswith(supported_file_types):
-        st.error('File must be of type: {}'.format(supported_file_types))
-        uploaded_file = st.empty()
-    else:
-        st.audio(uploaded_file)
-if st.button("Analyze Audio"):
-    if uploaded_file is None:
-        st.error('Upload a file first!')
-    else:
-        # Process
-        # Pretend to take time as an example
-        with st.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
-            time.sleep(5)
-            st.success('Done')
-        # RTTM load as filler
-        speakerList, annotations = su.loadAudioRTTM("24F CHEM1402 Night Class Week 4.rttm")
-        # Display breakdowns
-        #--------------------------------------------------------------------------
-        # Prepare data
-        sortedSpeakerList = sorted([[row for row in speaker if row[1] > 0.25] for speaker in speakerList if len([row for row in speaker if row[1] > 0.25]) > 0],
-               key=lambda e: min(e)[0])
-        pred_count = len(sortedSpeakerList)
-        lecturer_speaker_list,_ = su.twoClassExtendAnnotation(annotations)
-        lecturer_pred_count = 2
-        totalSeconds = 9049
-        lecturer_speaker_times = []
-        for i,speaker in enumerate(lecturer_speaker_list):
-            lecturer_speaker_times.append(0)
-            for timeSection in speaker:
-                lecturer_speaker_times[i] += timeSection[1]
-        all_speaker_times = []
-        for i,speaker in enumerate(sortedSpeakerList):
-            all_speaker_times.append(0)
-            for timeSection in speaker:
-                all_speaker_times[i] += timeSection[1]
-        # Lecturer vs. Audience
-        #---------------------------------------------------------------------------
-        f, ax1 =plt.subplots()
-        # Setting Y-axis limits
-        ax1.set_ylim(0, lecturer_pred_count*5 + 5)
-        # Setting X-axis limits
-        #gnt.set_xlim(0, 160)
-        # Setting labels for x-axis and y-axis
-        ax1.set_title('Recording Results')
-        ax1.set_xlabel('Minutes since start')
-        ax1.set_ylabel('Speaker ID')
-        ax1.spines.top.set_visible(False)
-        # Setting ticks on y-axis (5,10,15,...)
-        step = 5
-        ax1.set_yticks(list(range(step,(lecturer_pred_count+1)*step,step)))
-        # Labelling tickes of y-axis ('1','2','3',...)
-        pred_tick_list = [1,2]
-        ax1.set_yticklabels(["Lectuerer","Audience"])
-        #x_tick_list = range(0,6000,60)
-        #ax1.set_xticks(x_tick_list)
-        #ax1.set_xticklabels([str(int(element/60)) for element in x_tick_list])
-        ax1.tick_params(axis='x', labelrotation=90)
-        # Setting graph attribute
-        ax1.grid(True)
-        pred_colors = su.colors(lecturer_pred_count)
-        for j, row in enumerate(lecturer_speaker_list):
-            ax1.broken_barh(row, ((j+1)*5-1, 3), facecolors =(pred_colors[j]))
-        f.set_figheight(5)
-        f.set_figwidth(15)
-        st.pyplot(f)
-        st.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
-        st.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
-                                                                                 int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
-                                                                                100*lecturer_speaker_times[0]/totalSeconds))
-        st.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
-                                                                                 int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
-                                                                                100*lecturer_speaker_times[1]/totalSeconds))
-        # Experimental Speaker Breakdown
-        #------------------------------------------------------------------------------
-        f, ax1 =plt.subplots()
-        # Setting Y-axis limits
-        ax1.set_ylim(0, pred_count*5 + 5)
-        # Setting X-axis limits
-        #gnt.set_xlim(0, 160)
-        # Setting labels for x-axis and y-axis
-        ax1.set_title('Recording Results')
-        ax1.set_xlabel('Minutes since start')
-        ax1.set_ylabel('Speaker ID')
-        # Setting ticks on y-axis (5,10,15,...)
-        step = 5
-        ax1.set_yticks(list(range(step,(pred_count+1)*step,step)))
-        # Labelling tickes of y-axis ('1','2','3',...)
-        pred_tick_list = range(1,pred_count+1)
-        ax1.set_yticklabels([str(element) for element in pred_tick_list])
-        x_tick_list = range(0,6000,60)
-        ax1.set_xticks(x_tick_list)
-        ax1.set_xticklabels([str(int(element/60)) for element in x_tick_list])
-        ax1.tick_params(axis='x', labelrotation=90)
-        # Setting graph attribute
-        ax1.grid(True)
-        pred_colors = su.colors(pred_count)
-        for j, row in enumerate(sortedSpeakerList):
-            ax1.broken_barh(row, ((j+1)*5-1, 3), facecolors =(pred_colors[j]))
-        f.set_figheight(5)
-        f.set_figwidth(15)
-        st.pyplot(f)
-        st.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
-        for i,speaker in enumerate(all_speaker_times):
-            st.write("Speaker {} spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(i,
-                                                                                       int(speaker/3600),
-                                                                                       int((speaker%3600)/60),
-                                                                                       int(speaker%60),
-                                                                                       100*speaker/totalSeconds))

 import torchaudio
 import sonogram_utility as su
 import time
+import ParquetScheduler as ps
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+import copy
+PARQUET_DATASET_DIR = Path("parquet_dataset")
+PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
+scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
+def save_data(
+    config_dict: Dict[str,str], audio_path: List[str], userid: str,
+    ) -> None:
+        """Save data, i.e. move audio to a new folder and send paths+config to scheduler."""
+    save_dir = PARQUET_DATASET_DIR / f"{userid}"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    data = copy.deepcopy(config_dict)
+    # Add timestamp
+    data["timestamp"] = datetime.datetime.utcnow().isoformat()
+    # Copy and add audio
+    for i,p in enumerate(audio_paths):
+        name = f"{i:03d}"
+        dst_path = save_dir / f"{name}{Path(p).suffix}"
+        shutil.copyfile(p, dst_path)
+        data[f"audio_{name}"] = dst_path
+    # Send to scheduler
+    scheduler.append(data)
 st.title("Lecturer Support Tool")
+uploaded_file_paths = st.file_uploader("Upload an audio of classroom activity to analyze", accept_multiple_files=True)
+supported_file_types = ('.wav','.mp3','.mp4','.txt')
+valid_files = []
+audio_tabs = []
+if uploaded_file_paths is not None:
+    # Reset valid_files?
+    for uploaded_file in uploaded_file_paths:
+        if not uploaded_file.name.endswith(supported_file_types):
+            st.error('File must be of type: {}'.format(supported_file_types))
+            uploaded_file = None
+        else:
+            if uploaded_file not in valid_files:
+                valid_files.append(uploaded_file)
+    audio_tabs = st.tabs([f.name for f in valid_files])
+for tab in audio_tabs:
+    if tab.button("Analyze Audio"):
+        if uploaded_file is None:
+            tab.error('Upload a file first!')
+        else:
+            # Process
+            # Pretend to take time as an example
+            with tab.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
+                time.sleep(5)
+                tab.success('Done')
+            # RTTM load as filler
+            speakerList, annotations = su.loadAudioRTTM("24F CHEM1402 Night Class Week 4.rttm")
+            # Display breakdowns
+            #--------------------------------------------------------------------------
+            # Prepare data
+            sortedSpeakerList = sorted([[row for row in speaker if row[1] > 0.25] for speaker in speakerList if len([row for row in speaker if row[1] > 0.25]) > 0],
+                   key=lambda e: min(e)[0])
+            pred_count = len(sortedSpeakerList)
+            lecturer_speaker_list,_ = su.twoClassExtendAnnotation(annotations)
+            lecturer_pred_count = 2
+            totalSeconds = 9049
+            lecturer_speaker_times = []
+            for i,speaker in enumerate(lecturer_speaker_list):
+                lecturer_speaker_times.append(0)
+                for timeSection in speaker:
+                    lecturer_speaker_times[i] += timeSection[1]
+            all_speaker_times = []
+            for i,speaker in enumerate(sortedSpeakerList):
+                all_speaker_times.append(0)
+                for timeSection in speaker:
+                    all_speaker_times[i] += timeSection[1]
+            # Lecturer vs. Audience
+            #---------------------------------------------------------------------------
+            f, ax1 =plt.subplots()
+            # Setting Y-axis limits
+            ax1.set_ylim(0, lecturer_pred_count*5 + 5)
+            # Setting X-axis limits
+            #gnt.set_xlim(0, 160)
+            # Setting labels for x-axis and y-axis
+            ax1.set_title('Recording Results')
+            ax1.set_xlabel('Minutes since start')
+            ax1.set_ylabel('Speaker ID')
+            ax1.spines.top.set_visible(False)
+            # Setting ticks on y-axis (5,10,15,...)
+            step = 5
+            ax1.set_yticks(list(range(step,(lecturer_pred_count+1)*step,step)))
+            # Labelling tickes of y-axis ('1','2','3',...)
+            pred_tick_list = [1,2]
+            ax1.set_yticklabels(["Lectuerer","Audience"])
+            #x_tick_list = range(0,6000,60)
+            #ax1.set_xticks(x_tick_list)
+            #ax1.set_xticklabels([str(int(element/60)) for element in x_tick_list])
+            ax1.tick_params(axis='x', labelrotation=90)
+            # Setting graph attribute
+            ax1.grid(True)
+            pred_colors = su.colors(lecturer_pred_count)
+            for j, row in enumerate(lecturer_speaker_list):
+                ax1.broken_barh(row, ((j+1)*5-1, 3), facecolors =(pred_colors[j]))
+            f.set_figheight(5)
+            f.set_figwidth(15)
+            tab.pyplot(f)
+            tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
+            tab.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
+                                                                                     int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
+                                                                                    100*lecturer_speaker_times[0]/totalSeconds))
+            tab.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
+                                                                                     int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
+                                                                                    100*lecturer_speaker_times[1]/totalSeconds))
+            # Experimental Speaker Breakdown
+            #------------------------------------------------------------------------------
+            f, ax1 =plt.subplots()
+            # Setting Y-axis limits
+            ax1.set_ylim(0, pred_count*5 + 5)
+            # Setting X-axis limits
+            #gnt.set_xlim(0, 160)
+            # Setting labels for x-axis and y-axis
+            ax1.set_title('Recording Results')
+            ax1.set_xlabel('Minutes since start')
+            ax1.set_ylabel('Speaker ID')
+            # Setting ticks on y-axis (5,10,15,...)
+            step = 5
+            ax1.set_yticks(list(range(step,(pred_count+1)*step,step)))
+            # Labelling tickes of y-axis ('1','2','3',...)
+            pred_tick_list = range(1,pred_count+1)
+            ax1.set_yticklabels([str(element) for element in pred_tick_list])
+            x_tick_list = range(0,6000,60)
+            ax1.set_xticks(x_tick_list)
+            ax1.set_xticklabels([str(int(element/60)) for element in x_tick_list])
+            ax1.tick_params(axis='x', labelrotation=90)
+            # Setting graph attribute
+            ax1.grid(True)
+            pred_colors = su.colors(pred_count)
+            for j, row in enumerate(sortedSpeakerList):
+                ax1.broken_barh(row, ((j+1)*5-1, 3), facecolors =(pred_colors[j]))
+            f.set_figheight(5)
+            f.set_figwidth(15)
+            tab.pyplot(f)
+            tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
+            for i,speaker in enumerate(all_speaker_times):
+                tab.write("Speaker {} spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(i,
+                                                                                           int(speaker/3600),
+                                                                                           int((speaker%3600)/60),
+                                                                                           int(speaker%60),
+                                                                                           100*speaker/totalSeconds))
+userid = st.text_input("user id:", "Guest")
+colorPref = st.text_input("Favorite color?", "None")
+radio = st.radio('Pick one:', ['Left','Right'])
+selection = st.selectbox('Select', [1,2,3])
+if st.button("Upload Files to Dataset"):
+    save_data({"color":colorPref,"direction":radio,"number":selection},
+             valid_files,
+             userid)
+    st.success('I think it worked!')