Spaces:

Sonogram
/

Instructor-Support-Tool

Running on CPU Upgrade

App Files Files Community

czyoung commited on Feb 14, 2025

Commit

460123d

verified ·

1 Parent(s): 989a9b7

Init sonogram_utility.py

Browse files

Initialize sonogram_utility.py code

Files changed (1) hide show

sonogram_utility.py +96 -0

sonogram_utility.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import cv2
+import random
+import copy
+from pyannote.core import Annotation, Segment
+def colors(n):
+  '''
+  Creates a list size n of distinctive colors
+  '''
+  if n == 0:
+    return []
+  ret = []
+  h = int(random.random() * 180)
+  step = 180 / n
+  for i in range(n):
+    h += step
+    h = int(h) % 180
+    hsv = np.uint8([[[h,200,200]]])
+    bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
+    ret.append((bgr[0][0][0].item()/255,bgr[0][0][1].item()/255,bgr[0][0][2].item()/255))
+  return ret
+def extendSpeakers(mySpeakerList, fileLabel = 'NONE', maximumSecondDifference = 1, minimumSecondDuration = 0):
+    '''
+    Assumes mySpeakerList is already split into Speaker/Audience
+    '''
+    mySpeakerAnnotations = Annotation(uri=fileLabel)
+    newSpeakerList = [[],[]]
+    for i, speaker in enumerate(mySpeakerList):
+        speaker.sort()
+        lastEnd = -1
+        tempSection = None
+        for section in speaker:
+            if lastEnd == -1:
+                tempSection = copy.deepcopy(section)
+                lastEnd = section[0] + section[1]
+            else:
+                if section[0] - lastEnd <= maximumSecondDifference:
+                    tempSection = (tempSection[0],max(section[0] + section[1] - tempSection[0],tempSection[1]))
+                    lastEnd = tempSection[0] + tempSection[1]
+                else:
+                    if tempSection[1] >= minimumSecondDuration:
+                        newSpeakerList[i].append(tempSection)
+                        mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
+                    tempSection = copy.deepcopy(section)
+                    lastEnd = section[0] + section[1]
+        if tempSection is not None:
+            # Add the last section back in
+            if tempSection[1] >= minimumSecondDuration:
+                newSpeakerList[i].append(tempSection)
+                mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
+    return newSpeakerList,mySpeakerAnnotations
+def twoClassExtendAnnotation(myAnnotation,maximumSecondDifference = 1, minimumSecondDuration = 0):
+    lecturerID = None
+    lecturerLen = 0
+    # Identify lecturer
+    for speakerName in myAnnotation.labels():
+        tempLen = len(myAnnotation.label_support(speakerName))
+        if tempLen > lecturerLen:
+            lecturerLen = tempLen
+            lecturerID = speakerName
+    tempSpeakerList = [[],[]]
+    # Recreate speakerList as [[lecturer labels],[audience labels]]
+    for speakerName in myAnnotation.labels():
+        if speakerName != lecturerID:
+            for segmentItem in myAnnotation.label_support(speakerName):
+                tempSpeakerList[1].append((segmentItem.start,segmentItem.duration))
+        else:
+            for segmentItem in myAnnotation.label_support(speakerName):
+                tempSpeakerList[0].append((segmentItem.start,segmentItem.duration))
+    newList, newAnnotation = extendSpeakers(tempSpeakerList, fileLabel = myAnnotation.uri, maximumSecondDifference = maximumSecondDifference, minimumSecondDuration = minimumSecondDuration)
+    return newList, newAnnotation
+def loadAudioRTTM(sampleRTTM):
+    # Read in prediction data
+    # Data in list form, for convenient plotting
+    speakerList = []
+    # Data in Annotation form, for convenient error rate calculation
+    prediction = Annotation(uri=sampleRTTM)
+    with open(sampleRTTM, "r") as rttm:
+        for line in rttm:
+            speakerResult = line.split(' ')
+            index = int(speakerResult[7][-2:])
+            start = float(speakerResult[3])
+            end = start + float(speakerResult[4])
+            while len(speakerList) < index + 1:
+                speakerList.append([])
+            speakerList[index].append((float(speakerResult[3]),float(speakerResult[4])))
+            prediction[Segment(start,end)] = index
+    return speakerList, prediction