Spaces:

Sonogram
/

Instructor-Support-Tool

Running on CPU Upgrade

App Files Files Community

czyoung commited on Apr 21, 2025

Commit

dcaa601

verified ·

1 Parent(s): 35daf22

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -0

app.py CHANGED Viewed

@@ -14,6 +14,8 @@ import os
 import shutil
 import pandas as pd
 import plotly.express as px
 import torch
 #import torch_xla.core.xla_model as xm
 from pyannote.audio import Pipeline
@@ -112,7 +114,132 @@ def processFile(filePath):
     print("Speakers Detected")
     speakerList = su.annotationToSpeakerList(annotations)
     return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
 #st.set_page_config(layout="wide")
 st.title("Lecturer Support Tool")
 if not isGPU:
@@ -227,6 +354,14 @@ for i, tab in enumerate(audio_tabs):
             all_dataFrame = su.speakerListToDataFrame(sortedSpeakerList)
             currDF = all_dataFrame
             multiVoice = annotations.get_overlap()
             singleVoice = annotations.extrude(multiVoice).get_timeline()
             noVoice = Timeline(segments=[Segment(0,totalSeconds)]).extrude(singleVoice).extrude(multiVoice)
@@ -240,6 +375,42 @@ for i, tab in enumerate(audio_tabs):
             )
             fig = px.pie(df, values='Duration', names='Category', title='Types of Discussion')
             tab.plotly_chart(fig, use_container_width=True)
             # Lecturer vs. Audience
             #---------------------------------------------------------------------------

 import shutil
 import pandas as pd
 import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 import torch
 #import torch_xla.core.xla_model as xm
 from pyannote.audio import Pipeline
     print("Speakers Detected")
     speakerList = su.annotationToSpeakerList(annotations)
     return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
+def removeOverlap(timeSegment,overlap):
+    times = []
+    if timeSegment.start < overlap.start:
+        times.append(Segment(timeSegment.start,min(overlap.start,timeSegment.end)))
+    if timeSegment.end > overlap.end:
+        times.append(Segment(max(timeSegment.start,overlap.end),timeSegment.end))
+    return times
+def checkForOverlap(time1, time2):
+    overlap = time1 & time2
+    if overlap:
+        return overlap
+    else:
+        return None
+def calcCategories(annotation,maxTime):
+    noVoice = [Segment(0,maxTime)]
+    oneVoice = []
+    multiVoice = []
+    # TBD Clean this up!!!
+    rawData = {}
+    for speakerName in myAnnotation.labels():
+        if speakerName not in rawData.keys():
+            rawData[speakerName] = []
+        for segmentItem in myAnnotation.label_support(speakerName):
+            rawData[speakerName].append(segmentItem)
+    for speaker in rawData.keys():
+        timesToProcess = []
+        for timeSlot in rawData[speaker]:
+            timesToProcess.append((speaker,timeSlot))
+        while len(timesToProcess) > 0:
+            currID, currTime = timesToProcess[0]
+            timesToProcess.remove(timesToProcess[0])
+            resetCheck = False
+            # Check in multi
+            for compareID,timeSlot in multiVoice:
+                overlapTime = checkForOverlap(currTime,timeSlot)
+                if overlapTime is None:
+                    continue
+                else:
+                    compareID.append(currID)
+                    newTimes = removeOverlap(currTime,timeSlot)#+removeOverlap(timeSlot,currTime)
+                    for i in range(len(newTimes)):
+                        newTimes[i] = (currID,newTimes[i])
+                    timesToProcess += newTimes
+                    resetCheck = True
+                    break
+            if resetCheck:
+                continue
+            # Check in one voice
+            for timeSlot in oneVoice:
+                tID = timeSlot[0]
+                tTime = timeSlot[1]
+                overlapTime = checkForOverlap(currTime,tTime)
+                if overlapTime is None:
+                    continue
+                else:
+                    oneVoice.remove(timeSlot)
+                    # Add back non overlap
+                    newTimes = removeOverlap(tTime,currTime)
+                    for i in range(len(newTimes)):
+                        newTimes[i] = (tID,newTimes[i])
+                    oneVoice += newTimes
+                    # Add overlap time to multivoice
+                    multiVoice.append(([tID,currID],overlapTime))
+                    # Add new times back to process
+                    newTimes = removeOverlap(currTime,tTime)
+                    for i in range(len(newTimes)):
+                        newTimes[i] = (currID,newTimes[i])
+                    timesToProcess += newTimes
+                    resetCheck = True
+                    break
+            if resetCheck:
+                continue
+            # Add to one voice
+            oneVoice.append((currID,currTime))
+    for _,timeSlot in multiVoice:
+        copyOfNo = copy.deepcopy(noVoice)
+        for emptySlot in noVoice:
+            if checkForOverlap(timeSlot,emptySlot) is None:
+                continue
+            else:
+                copyOfNo.remove(emptySlot)
+                copyOfNo += removeOverlap(emptySlot,timeSlot)
+        noVoice = copyOfNo
+    for _,timeSlot in oneVoice:
+        copyOfNo = copy.deepcopy(noVoice)
+        for emptySlot in noVoice:
+            if checkForOverlap(timeSlot,emptySlot) is None:
+                continue
+            else:
+                copyOfNo.remove(emptySlot)
+                copyOfNo += removeOverlap(emptySlot,timeSlot)
+        noVoice = copyOfNo
+    return noVoice, oneVoice, multiVoice
+def sumTimes(timeList):
+    totalTime = 0
+    for timeSlot in timeList:
+        totalTime += timeSlot.duration
+    return totalTime
+def sumTimesPerSpeaker(timeSlotList):
+    speakerList = []
+    timeList = []
+    for speaker,timeSlot in timeSlotList:
+        if speaker not in speakerList:
+            speakerList.append(speaker)
+            timeList.append(0)
+        timeList[speakerList.index(speaker)] += timeSlot.duration
+    return speakerList, timeList
+def sumMultiTimesPerSpeaker(timeSlotList):
+    speakerList = []
+    timeList = []
+    sList,tList = sumTimesPerSpeaker(timeSlotList)
+    for i,speakerGroup in enumerate(sList):
+        for speaker in speakerGroup:
+            if speaker not in speakerList:
+                speakerList.append(speaker)
+                timeList.append(0)
+            timeList[speakerList.index(speaker)] += tList[i]
+    return speakerList, timeList
 #st.set_page_config(layout="wide")
 st.title("Lecturer Support Tool")
 if not isGPU:
             all_dataFrame = su.speakerListToDataFrame(sortedSpeakerList)
             currDF = all_dataFrame
+            # TBD CLEAN THIS UP!!!
+            noVoice2, oneVoice2, multiVoice2 = calcCategories(annotations,totalSeconds)
+            noVoice2.sort()
+            oneVoice2.sort()
+            multiVoice2.sort()
+            sList,timeList = sumTimesPerSpeaker(oneVoice)
+            multiSpeakerList, multiTimeList = sumMultiTimesPerSpeaker(multiVoice)
             multiVoice = annotations.get_overlap()
             singleVoice = annotations.extrude(multiVoice).get_timeline()
             noVoice = Timeline(segments=[Segment(0,totalSeconds)]).extrude(singleVoice).extrude(multiVoice)
             )
             fig = px.pie(df, values='Duration', names='Category', title='Types of Discussion')
             tab.plotly_chart(fig, use_container_width=True)
+            df4: pd.DataFrame = pd.DataFrame(
+                {
+                    "values": [sumTimes(rawSample["speaker 1"]),sumTimes(rawSample["speaker 2"]),sumTimes(rawSample["speaker 3"])],
+                    "names": ["speaker 1","speaker 2","speaker 3"]
+                }
+            )
+            df4.name = "df4"
+            df5: pd.DataFrame = pd.DataFrame(
+                {
+                    "ids" : ["NV","OV","MV"]+[f"OV_{i}" for i in range(len(sList))]
+                                +[f"MV_{i}" for i in range(len(multiSpeakerList))],
+                    "labels" : ["No Voice","One Voice","Multi Voice"] + sList + multiSpeakerList,
+                    "parents" : ["","",""]+["OV" for i in range(len(sList))]
+                                +["MV" for i in range(len(multiSpeakerList))],
+                    "values" : [sumTimes(noVoice),
+                                sumTimes([n for _,n in oneVoice]),
+                                sumTimes([n for _,n in multiVoice]),
+                                ] + timeList + multiTimeList,
+                }
+            )
+            df5.name = "df5"
+            fig2_spc = make_subplots(rows=2, cols=1,
+                specs=[[{"type": "pie"}],[{"type": "treemap"}]]
+                       , shared_xaxes=True)
+            fig2_spc.add_trace(go.Pie(values=df4["values"],labels=df4["names"]),
+              row=1, col=1)
+            fig2.add_trace(go.Treemap(
+                            labels = df5["labels"],
+                            parents = df5["parents"],
+                            ids=df5["ids"],
+                            values = df5["values"]),
+                            row=2, col=1)
+            tab.plotly_chart(fig2, use_container_width=True)
             # Lecturer vs. Audience
             #---------------------------------------------------------------------------