Spaces:

Sonogram
/

Instructor-Support-Tool

Sleeping

App Files Files Community

Instructor-Support-Tool / sonogram

czyoung

Create sonogram

989a9b7 verified about 1 year ago

raw

history blame contribute delete

3.85 kB

	import cv2
	import random
	import copy
	from pyannote.core import Annotation, Segment

	def colors(n):
	'''
	Creates a list size n of distinctive colors
	'''
	if n == 0:
	return []
	ret = []
	h = int(random.random() * 180)
	step = 180 / n
	for i in range(n):
	h += step
	h = int(h) % 180
	hsv = np.uint8([[[h,200,200]]])
	bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
	ret.append((bgr[0][0][0].item()/255,bgr[0][0][1].item()/255,bgr[0][0][2].item()/255))
	return ret

	def extendSpeakers(mySpeakerList, fileLabel = 'NONE', maximumSecondDifference = 1, minimumSecondDuration = 0):
	'''
	Assumes mySpeakerList is already split into Speaker/Audience
	'''
	mySpeakerAnnotations = Annotation(uri=fileLabel)
	newSpeakerList = [[],[]]
	for i, speaker in enumerate(mySpeakerList):
	speaker.sort()
	lastEnd = -1
	tempSection = None
	for section in speaker:
	if lastEnd == -1:
	tempSection = copy.deepcopy(section)
	lastEnd = section[0] + section[1]
	else:
	if section[0] - lastEnd <= maximumSecondDifference:
	tempSection = (tempSection[0],max(section[0] + section[1] - tempSection[0],tempSection[1]))
	lastEnd = tempSection[0] + tempSection[1]
	else:
	if tempSection[1] >= minimumSecondDuration:
	newSpeakerList[i].append(tempSection)
	mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
	tempSection = copy.deepcopy(section)
	lastEnd = section[0] + section[1]
	if tempSection is not None:
	# Add the last section back in
	if tempSection[1] >= minimumSecondDuration:
	newSpeakerList[i].append(tempSection)
	mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
	return newSpeakerList,mySpeakerAnnotations

	def twoClassExtendAnnotation(myAnnotation,maximumSecondDifference = 1, minimumSecondDuration = 0):
	lecturerID = None
	lecturerLen = 0

	# Identify lecturer
	for speakerName in myAnnotation.labels():
	tempLen = len(myAnnotation.label_support(speakerName))
	if tempLen > lecturerLen:
	lecturerLen = tempLen
	lecturerID = speakerName

	tempSpeakerList = [[],[]]
	# Recreate speakerList as [[lecturer labels],[audience labels]]
	for speakerName in myAnnotation.labels():
	if speakerName != lecturerID:
	for segmentItem in myAnnotation.label_support(speakerName):
	tempSpeakerList[1].append((segmentItem.start,segmentItem.duration))
	else:
	for segmentItem in myAnnotation.label_support(speakerName):
	tempSpeakerList[0].append((segmentItem.start,segmentItem.duration))

	newList, newAnnotation = extendSpeakers(tempSpeakerList, fileLabel = myAnnotation.uri, maximumSecondDifference = maximumSecondDifference, minimumSecondDuration = minimumSecondDuration)

	return newList, newAnnotation

	def loadAudioRTTM(sampleRTTM):
	# Read in prediction data
	# Data in list form, for convenient plotting
	speakerList = []
	# Data in Annotation form, for convenient error rate calculation
	prediction = Annotation(uri=sampleRTTM)
	with open(sampleRTTM, "r") as rttm:
	for line in rttm:
	speakerResult = line.split(' ')
	index = int(speakerResult[7][-2:])
	start = float(speakerResult[3])
	end = start + float(speakerResult[4])
	while len(speakerList) < index + 1:
	speakerList.append([])
	speakerList[index].append((float(speakerResult[3]),float(speakerResult[4])))
	prediction[Segment(start,end)] = index

	return speakerList, prediction