| | import cv2 |
| | import random |
| | import copy |
| | from pyannote.core import Annotation, Segment |
| |
|
| | def colors(n): |
| | ''' |
| | Creates a list size n of distinctive colors |
| | ''' |
| | if n == 0: |
| | return [] |
| | ret = [] |
| | h = int(random.random() * 180) |
| | step = 180 / n |
| | for i in range(n): |
| | h += step |
| | h = int(h) % 180 |
| | hsv = np.uint8([[[h,200,200]]]) |
| | bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR) |
| | ret.append((bgr[0][0][0].item()/255,bgr[0][0][1].item()/255,bgr[0][0][2].item()/255)) |
| | return ret |
| |
|
| | def extendSpeakers(mySpeakerList, fileLabel = 'NONE', maximumSecondDifference = 1, minimumSecondDuration = 0): |
| | ''' |
| | Assumes mySpeakerList is already split into Speaker/Audience |
| | ''' |
| | mySpeakerAnnotations = Annotation(uri=fileLabel) |
| | newSpeakerList = [[],[]] |
| | for i, speaker in enumerate(mySpeakerList): |
| | speaker.sort() |
| | lastEnd = -1 |
| | tempSection = None |
| | for section in speaker: |
| | if lastEnd == -1: |
| | tempSection = copy.deepcopy(section) |
| | lastEnd = section[0] + section[1] |
| | else: |
| | if section[0] - lastEnd <= maximumSecondDifference: |
| | tempSection = (tempSection[0],max(section[0] + section[1] - tempSection[0],tempSection[1])) |
| | lastEnd = tempSection[0] + tempSection[1] |
| | else: |
| | if tempSection[1] >= minimumSecondDuration: |
| | newSpeakerList[i].append(tempSection) |
| | mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i |
| | tempSection = copy.deepcopy(section) |
| | lastEnd = section[0] + section[1] |
| | if tempSection is not None: |
| | |
| | if tempSection[1] >= minimumSecondDuration: |
| | newSpeakerList[i].append(tempSection) |
| | mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i |
| | return newSpeakerList,mySpeakerAnnotations |
| |
|
| | def twoClassExtendAnnotation(myAnnotation,maximumSecondDifference = 1, minimumSecondDuration = 0): |
| | lecturerID = None |
| | lecturerLen = 0 |
| | |
| | |
| | for speakerName in myAnnotation.labels(): |
| | tempLen = len(myAnnotation.label_support(speakerName)) |
| | if tempLen > lecturerLen: |
| | lecturerLen = tempLen |
| | lecturerID = speakerName |
| |
|
| | tempSpeakerList = [[],[]] |
| | |
| | for speakerName in myAnnotation.labels(): |
| | if speakerName != lecturerID: |
| | for segmentItem in myAnnotation.label_support(speakerName): |
| | tempSpeakerList[1].append((segmentItem.start,segmentItem.duration)) |
| | else: |
| | for segmentItem in myAnnotation.label_support(speakerName): |
| | tempSpeakerList[0].append((segmentItem.start,segmentItem.duration)) |
| | |
| | newList, newAnnotation = extendSpeakers(tempSpeakerList, fileLabel = myAnnotation.uri, maximumSecondDifference = maximumSecondDifference, minimumSecondDuration = minimumSecondDuration) |
| |
|
| | return newList, newAnnotation |
| |
|
| | def loadAudioRTTM(sampleRTTM): |
| | |
| | |
| | speakerList = [] |
| | |
| | prediction = Annotation(uri=sampleRTTM) |
| | with open(sampleRTTM, "r") as rttm: |
| | for line in rttm: |
| | speakerResult = line.split(' ') |
| | index = int(speakerResult[7][-2:]) |
| | start = float(speakerResult[3]) |
| | end = start + float(speakerResult[4]) |
| | while len(speakerList) < index + 1: |
| | speakerList.append([]) |
| | speakerList[index].append((float(speakerResult[3]),float(speakerResult[4]))) |
| | prediction[Segment(start,end)] = index |
| |
|
| | return speakerList, prediction |