czyoung commited on
Commit
460123d
·
verified ·
1 Parent(s): 989a9b7

Init sonogram_utility.py

Browse files

Initialize sonogram_utility.py code

Files changed (1) hide show
  1. sonogram_utility.py +96 -0
sonogram_utility.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import random
3
+ import copy
4
+ from pyannote.core import Annotation, Segment
5
+
6
+ def colors(n):
7
+ '''
8
+ Creates a list size n of distinctive colors
9
+ '''
10
+ if n == 0:
11
+ return []
12
+ ret = []
13
+ h = int(random.random() * 180)
14
+ step = 180 / n
15
+ for i in range(n):
16
+ h += step
17
+ h = int(h) % 180
18
+ hsv = np.uint8([[[h,200,200]]])
19
+ bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
20
+ ret.append((bgr[0][0][0].item()/255,bgr[0][0][1].item()/255,bgr[0][0][2].item()/255))
21
+ return ret
22
+
23
+ def extendSpeakers(mySpeakerList, fileLabel = 'NONE', maximumSecondDifference = 1, minimumSecondDuration = 0):
24
+ '''
25
+ Assumes mySpeakerList is already split into Speaker/Audience
26
+ '''
27
+ mySpeakerAnnotations = Annotation(uri=fileLabel)
28
+ newSpeakerList = [[],[]]
29
+ for i, speaker in enumerate(mySpeakerList):
30
+ speaker.sort()
31
+ lastEnd = -1
32
+ tempSection = None
33
+ for section in speaker:
34
+ if lastEnd == -1:
35
+ tempSection = copy.deepcopy(section)
36
+ lastEnd = section[0] + section[1]
37
+ else:
38
+ if section[0] - lastEnd <= maximumSecondDifference:
39
+ tempSection = (tempSection[0],max(section[0] + section[1] - tempSection[0],tempSection[1]))
40
+ lastEnd = tempSection[0] + tempSection[1]
41
+ else:
42
+ if tempSection[1] >= minimumSecondDuration:
43
+ newSpeakerList[i].append(tempSection)
44
+ mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
45
+ tempSection = copy.deepcopy(section)
46
+ lastEnd = section[0] + section[1]
47
+ if tempSection is not None:
48
+ # Add the last section back in
49
+ if tempSection[1] >= minimumSecondDuration:
50
+ newSpeakerList[i].append(tempSection)
51
+ mySpeakerAnnotations[Segment(tempSection[0],lastEnd)] = i
52
+ return newSpeakerList,mySpeakerAnnotations
53
+
54
+ def twoClassExtendAnnotation(myAnnotation,maximumSecondDifference = 1, minimumSecondDuration = 0):
55
+ lecturerID = None
56
+ lecturerLen = 0
57
+
58
+ # Identify lecturer
59
+ for speakerName in myAnnotation.labels():
60
+ tempLen = len(myAnnotation.label_support(speakerName))
61
+ if tempLen > lecturerLen:
62
+ lecturerLen = tempLen
63
+ lecturerID = speakerName
64
+
65
+ tempSpeakerList = [[],[]]
66
+ # Recreate speakerList as [[lecturer labels],[audience labels]]
67
+ for speakerName in myAnnotation.labels():
68
+ if speakerName != lecturerID:
69
+ for segmentItem in myAnnotation.label_support(speakerName):
70
+ tempSpeakerList[1].append((segmentItem.start,segmentItem.duration))
71
+ else:
72
+ for segmentItem in myAnnotation.label_support(speakerName):
73
+ tempSpeakerList[0].append((segmentItem.start,segmentItem.duration))
74
+
75
+ newList, newAnnotation = extendSpeakers(tempSpeakerList, fileLabel = myAnnotation.uri, maximumSecondDifference = maximumSecondDifference, minimumSecondDuration = minimumSecondDuration)
76
+
77
+ return newList, newAnnotation
78
+
79
+ def loadAudioRTTM(sampleRTTM):
80
+ # Read in prediction data
81
+ # Data in list form, for convenient plotting
82
+ speakerList = []
83
+ # Data in Annotation form, for convenient error rate calculation
84
+ prediction = Annotation(uri=sampleRTTM)
85
+ with open(sampleRTTM, "r") as rttm:
86
+ for line in rttm:
87
+ speakerResult = line.split(' ')
88
+ index = int(speakerResult[7][-2:])
89
+ start = float(speakerResult[3])
90
+ end = start + float(speakerResult[4])
91
+ while len(speakerList) < index + 1:
92
+ speakerList.append([])
93
+ speakerList[index].append((float(speakerResult[3]),float(speakerResult[4])))
94
+ prediction[Segment(start,end)] = index
95
+
96
+ return speakerList, prediction