Spaces:

Sonogram
/

Instructor-Support-Tool

Running on CPU Upgrade

App Files Files Community

czyoung commited on May 14, 2025

Commit

e851785

verified ·

1 Parent(s): 1df2438

Fix calcCategories

Browse files

Files changed (1) hide show

sonogram_utility.py +15 -95

sonogram_utility.py CHANGED Viewed

@@ -211,87 +211,6 @@ def checkForOverlap(time1, time2):
         return None
-def calcCategories(annotation,maxTime):
-    noVoice = [Segment(0,maxTime)]
-    oneVoice = []
-    multiVoice = []
-    # TBD Clean this up!!!
-    rawData = {}
-    for speakerName in annotation.labels():
-        if speakerName not in rawData.keys():
-            rawData[speakerName] = []
-        for segmentItem in annotation.label_support(speakerName):
-            rawData[speakerName].append(segmentItem)
-    for speaker in rawData.keys():
-        timesToProcess = []
-        for timeSlot in rawData[speaker]:
-            timesToProcess.append((speaker,timeSlot))
-        while len(timesToProcess) > 0:
-            currID, currTime = timesToProcess[0]
-            timesToProcess.remove(timesToProcess[0])
-            resetCheck = False
-            # Check in multi
-            for compareID,timeSlot in multiVoice:
-                overlapTime = checkForOverlap(currTime,timeSlot)
-                if overlapTime is None:
-                    continue
-                else:
-                    compareID.append(currID)
-                    newTimes = removeOverlap(currTime,timeSlot)#+removeOverlap(timeSlot,currTime)
-                    for i in range(len(newTimes)):
-                        newTimes[i] = (currID,newTimes[i])
-                    timesToProcess += newTimes
-                    resetCheck = True
-                    break
-            if resetCheck:
-                continue
-            # Check in one voice
-            for timeSlot in oneVoice:
-                tID = timeSlot[0]
-                tTime = timeSlot[1]
-                overlapTime = checkForOverlap(currTime,tTime)
-                if overlapTime is None:
-                    continue
-                else:
-                    oneVoice.remove(timeSlot)
-                    # Add back non overlap
-                    newTimes = removeOverlap(tTime,currTime)
-                    for i in range(len(newTimes)):
-                        newTimes[i] = (tID,newTimes[i])
-                    oneVoice += newTimes
-                    # Add overlap time to multivoice
-                    multiVoice.append(([tID,currID],overlapTime))
-                    # Add new times back to process
-                    newTimes = removeOverlap(currTime,tTime)
-                    for i in range(len(newTimes)):
-                        newTimes[i] = (currID,newTimes[i])
-                    timesToProcess += newTimes
-                    resetCheck = True
-                    break
-            if resetCheck:
-                continue
-            # Add to one voice
-            oneVoice.append((currID,currTime))
-    for _,timeSlot in multiVoice:
-        copyOfNo = copy.deepcopy(noVoice)
-        for emptySlot in noVoice:
-            if checkForOverlap(timeSlot,emptySlot) is None:
-                continue
-            else:
-                copyOfNo.remove(emptySlot)
-                copyOfNo += removeOverlap(emptySlot,timeSlot)
-        noVoice = copyOfNo
-    for _,timeSlot in oneVoice:
-        copyOfNo = copy.deepcopy(noVoice)
-        for emptySlot in noVoice:
-            if checkForOverlap(timeSlot,emptySlot) is None:
-                continue
-            else:
-                copyOfNo.remove(emptySlot)
-                copyOfNo += removeOverlap(emptySlot,timeSlot)
-        noVoice = copyOfNo
-    return noVoice, oneVoice, multiVoice, rawData
 def sumTimes(annotation):
     return annotation.get_timeline(False).duration()
@@ -356,12 +275,12 @@ def annotationToDataFrame(myAnnotation):
-def calcCategories(rawData,categories):
     categorySlots = []
     extraCategories = []
     for category in categories:
         categorySlots.append([])
-    for speaker in rawData.keys():
         targetCategory = None
         for i, category in enumerate(categories):
             if speaker in category:
@@ -371,27 +290,28 @@ def calcCategories(rawData,categories):
             categorySlots.append([])
             extraCategories.append(speaker)
-        for timeSlot in rawData[speaker]:
-            categorySlots[targetCategory].append((speaker,timeSlot))
     # Clean up categories
     cleanCategories = []
     for category in categorySlots:
         newCategory = []
-        catSorted = copy.deepcopy(sorted(category,key=lambda slot: slot[1][0]))
-        currID, currTime = None, None
         if len(catSorted) > 0:
-            currID, currTime = catSorted[0]
-        for sp, timeSlot in catSorted[1:]:
-            overlapTime = checkForOverlap(currTime,timeSlot)
             if overlapTime is None:
-                newCategory.append((currID,currTime))
                 currID = sp
-                currTime = timeSlot
             else:
                 currID = currID + "+" + sp
-                currTime[1] = max(currTime[1],timeSlot[1])
-        if currTime is not None:
-            newCategory.append((currID,currTime))
         cleanCategories.append(newCategory)
     return cleanCategories,extraCategories

         return None
 def sumTimes(annotation):
     return annotation.get_timeline(False).duration()
+def calcCategories(myAnnotation,categories):
     categorySlots = []
     extraCategories = []
     for category in categories:
         categorySlots.append([])
+    for speaker in myAnnotation.labels():
         targetCategory = None
         for i, category in enumerate(categories):
             if speaker in category:
             categorySlots.append([])
             extraCategories.append(speaker)
+        for timeSegment in myAnnotation.subset[speaker].itersegments():
+            categorySlots[targetCategory].append((speaker,timeSegment))
     # Clean up categories
     cleanCategories = []
     for category in categorySlots:
         newCategory = []
+        catSorted = copy.deepcopy(sorted(category,key=lambda cSegment: cSegment[1].start))
+        currID, currSegment = None, None
         if len(catSorted) > 0:
+            currID, currSegment = catSorted[0]
+        for sp, segmentSlot in catSorted[1:]:
+            overlapTime = checkForOverlap(currSegment,segmentSlot)
             if overlapTime is None:
+                newCategory.append((currID,currSegment))
                 currID = sp
+                currTime = segmentSlot
             else:
                 currID = currID + "+" + sp
+                # Union of segments
+                currTime[1] = currSegment | segmentSlot
+        if currSegment is not None:
+            newCategory.append((currID,currSegment))
         cleanCategories.append(newCategory)
     return cleanCategories,extraCategories