czyoung's picture
Create sonogram
989a9b7 verified
import cv2
import random
import copy
from pyannote.core import Annotation, Segment
def colors(n):
'''
Creates a list size n of distinctive colors
'''
if n == 0:
return []
ret = []
h = int(random.random() * 180)
step = 180 / n
for i in range(n):
h += step
h = int(h) % 180
hsv = np.uint8([[[h,200,200]]])
bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
ret.append((bgr[0][0][0].item()/255,bgr[0][0][1].item()/255,bgr[0][0][2].item()/255))
return ret
def extendSpeakers(mySpeakerList, fileLabel = 'NONE', maximumSecondDifference = 1, minimumSecondDuration = 0):
'''
Assumes mySpeakerList is already split into Speaker/Audience
'''
mySpeakerAnnotations = Annotation(uri=fileLabel)
newSpeakerList = [[],[]]
for i, speaker in enumerate(mySpeakerList):
speaker.sort()
lastEnd = -1
tempSection = None
for section in speaker:
if lastEnd == -1:
tempSection = copy.deepcopy(section)
lastEnd = section[0] + section[1]
else:
if section[0] - lastEnd <= maximumSecondDifference:
tempSection = (tempSection[0],max(section[0] + section[1] - tempSection[0],tempSection[1]))
lastEnd = tempSection[0] + tempSection[1]
else:
if tempSection[1] >= minimumSecondDuration:
newSpeakerList[i].append(tempSection)
mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
tempSection = copy.deepcopy(section)
lastEnd = section[0] + section[1]
if tempSection is not None:
# Add the last section back in
if tempSection[1] >= minimumSecondDuration:
newSpeakerList[i].append(tempSection)
mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
return newSpeakerList,mySpeakerAnnotations
def twoClassExtendAnnotation(myAnnotation,maximumSecondDifference = 1, minimumSecondDuration = 0):
lecturerID = None
lecturerLen = 0
# Identify lecturer
for speakerName in myAnnotation.labels():
tempLen = len(myAnnotation.label_support(speakerName))
if tempLen > lecturerLen:
lecturerLen = tempLen
lecturerID = speakerName
tempSpeakerList = [[],[]]
# Recreate speakerList as [[lecturer labels],[audience labels]]
for speakerName in myAnnotation.labels():
if speakerName != lecturerID:
for segmentItem in myAnnotation.label_support(speakerName):
tempSpeakerList[1].append((segmentItem.start,segmentItem.duration))
else:
for segmentItem in myAnnotation.label_support(speakerName):
tempSpeakerList[0].append((segmentItem.start,segmentItem.duration))
newList, newAnnotation = extendSpeakers(tempSpeakerList, fileLabel = myAnnotation.uri, maximumSecondDifference = maximumSecondDifference, minimumSecondDuration = minimumSecondDuration)
return newList, newAnnotation
def loadAudioRTTM(sampleRTTM):
# Read in prediction data
# Data in list form, for convenient plotting
speakerList = []
# Data in Annotation form, for convenient error rate calculation
prediction = Annotation(uri=sampleRTTM)
with open(sampleRTTM, "r") as rttm:
for line in rttm:
speakerResult = line.split(' ')
index = int(speakerResult[7][-2:])
start = float(speakerResult[3])
end = start + float(speakerResult[4])
while len(speakerList) < index + 1:
speakerList.append([])
speakerList[index].append((float(speakerResult[3]),float(speakerResult[4])))
prediction[Segment(start,end)] = index
return speakerList, prediction