Spaces:
Sleeping
Sleeping
Fix calcCategories
Browse files- sonogram_utility.py +15 -95
sonogram_utility.py
CHANGED
|
@@ -211,87 +211,6 @@ def checkForOverlap(time1, time2):
|
|
| 211 |
return None
|
| 212 |
|
| 213 |
|
| 214 |
-
def calcCategories(annotation,maxTime):
|
| 215 |
-
noVoice = [Segment(0,maxTime)]
|
| 216 |
-
oneVoice = []
|
| 217 |
-
multiVoice = []
|
| 218 |
-
# TBD Clean this up!!!
|
| 219 |
-
rawData = {}
|
| 220 |
-
for speakerName in annotation.labels():
|
| 221 |
-
if speakerName not in rawData.keys():
|
| 222 |
-
rawData[speakerName] = []
|
| 223 |
-
for segmentItem in annotation.label_support(speakerName):
|
| 224 |
-
rawData[speakerName].append(segmentItem)
|
| 225 |
-
for speaker in rawData.keys():
|
| 226 |
-
timesToProcess = []
|
| 227 |
-
for timeSlot in rawData[speaker]:
|
| 228 |
-
timesToProcess.append((speaker,timeSlot))
|
| 229 |
-
while len(timesToProcess) > 0:
|
| 230 |
-
currID, currTime = timesToProcess[0]
|
| 231 |
-
timesToProcess.remove(timesToProcess[0])
|
| 232 |
-
resetCheck = False
|
| 233 |
-
# Check in multi
|
| 234 |
-
for compareID,timeSlot in multiVoice:
|
| 235 |
-
overlapTime = checkForOverlap(currTime,timeSlot)
|
| 236 |
-
if overlapTime is None:
|
| 237 |
-
continue
|
| 238 |
-
else:
|
| 239 |
-
compareID.append(currID)
|
| 240 |
-
newTimes = removeOverlap(currTime,timeSlot)#+removeOverlap(timeSlot,currTime)
|
| 241 |
-
for i in range(len(newTimes)):
|
| 242 |
-
newTimes[i] = (currID,newTimes[i])
|
| 243 |
-
timesToProcess += newTimes
|
| 244 |
-
resetCheck = True
|
| 245 |
-
break
|
| 246 |
-
if resetCheck:
|
| 247 |
-
continue
|
| 248 |
-
# Check in one voice
|
| 249 |
-
for timeSlot in oneVoice:
|
| 250 |
-
tID = timeSlot[0]
|
| 251 |
-
tTime = timeSlot[1]
|
| 252 |
-
overlapTime = checkForOverlap(currTime,tTime)
|
| 253 |
-
if overlapTime is None:
|
| 254 |
-
continue
|
| 255 |
-
else:
|
| 256 |
-
oneVoice.remove(timeSlot)
|
| 257 |
-
# Add back non overlap
|
| 258 |
-
newTimes = removeOverlap(tTime,currTime)
|
| 259 |
-
for i in range(len(newTimes)):
|
| 260 |
-
newTimes[i] = (tID,newTimes[i])
|
| 261 |
-
oneVoice += newTimes
|
| 262 |
-
# Add overlap time to multivoice
|
| 263 |
-
multiVoice.append(([tID,currID],overlapTime))
|
| 264 |
-
# Add new times back to process
|
| 265 |
-
newTimes = removeOverlap(currTime,tTime)
|
| 266 |
-
for i in range(len(newTimes)):
|
| 267 |
-
newTimes[i] = (currID,newTimes[i])
|
| 268 |
-
timesToProcess += newTimes
|
| 269 |
-
resetCheck = True
|
| 270 |
-
break
|
| 271 |
-
if resetCheck:
|
| 272 |
-
continue
|
| 273 |
-
# Add to one voice
|
| 274 |
-
oneVoice.append((currID,currTime))
|
| 275 |
-
for _,timeSlot in multiVoice:
|
| 276 |
-
copyOfNo = copy.deepcopy(noVoice)
|
| 277 |
-
for emptySlot in noVoice:
|
| 278 |
-
if checkForOverlap(timeSlot,emptySlot) is None:
|
| 279 |
-
continue
|
| 280 |
-
else:
|
| 281 |
-
copyOfNo.remove(emptySlot)
|
| 282 |
-
copyOfNo += removeOverlap(emptySlot,timeSlot)
|
| 283 |
-
noVoice = copyOfNo
|
| 284 |
-
for _,timeSlot in oneVoice:
|
| 285 |
-
copyOfNo = copy.deepcopy(noVoice)
|
| 286 |
-
for emptySlot in noVoice:
|
| 287 |
-
if checkForOverlap(timeSlot,emptySlot) is None:
|
| 288 |
-
continue
|
| 289 |
-
else:
|
| 290 |
-
copyOfNo.remove(emptySlot)
|
| 291 |
-
copyOfNo += removeOverlap(emptySlot,timeSlot)
|
| 292 |
-
noVoice = copyOfNo
|
| 293 |
-
return noVoice, oneVoice, multiVoice, rawData
|
| 294 |
-
|
| 295 |
def sumTimes(annotation):
|
| 296 |
return annotation.get_timeline(False).duration()
|
| 297 |
|
|
@@ -356,12 +275,12 @@ def annotationToDataFrame(myAnnotation):
|
|
| 356 |
|
| 357 |
|
| 358 |
|
| 359 |
-
def calcCategories(
|
| 360 |
categorySlots = []
|
| 361 |
extraCategories = []
|
| 362 |
for category in categories:
|
| 363 |
categorySlots.append([])
|
| 364 |
-
for speaker in
|
| 365 |
targetCategory = None
|
| 366 |
for i, category in enumerate(categories):
|
| 367 |
if speaker in category:
|
|
@@ -371,27 +290,28 @@ def calcCategories(rawData,categories):
|
|
| 371 |
categorySlots.append([])
|
| 372 |
extraCategories.append(speaker)
|
| 373 |
|
| 374 |
-
for
|
| 375 |
-
categorySlots[targetCategory].append((speaker,
|
| 376 |
# Clean up categories
|
| 377 |
cleanCategories = []
|
| 378 |
for category in categorySlots:
|
| 379 |
newCategory = []
|
| 380 |
-
catSorted = copy.deepcopy(sorted(category,key=lambda
|
| 381 |
-
currID,
|
| 382 |
if len(catSorted) > 0:
|
| 383 |
-
currID,
|
| 384 |
-
for sp,
|
| 385 |
-
overlapTime = checkForOverlap(
|
| 386 |
if overlapTime is None:
|
| 387 |
-
newCategory.append((currID,
|
| 388 |
currID = sp
|
| 389 |
-
currTime =
|
| 390 |
else:
|
| 391 |
currID = currID + "+" + sp
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
|
|
|
| 395 |
cleanCategories.append(newCategory)
|
| 396 |
return cleanCategories,extraCategories
|
| 397 |
|
|
|
|
| 211 |
return None
|
| 212 |
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
def sumTimes(annotation):
|
| 215 |
return annotation.get_timeline(False).duration()
|
| 216 |
|
|
|
|
| 275 |
|
| 276 |
|
| 277 |
|
| 278 |
+
def calcCategories(myAnnotation,categories):
|
| 279 |
categorySlots = []
|
| 280 |
extraCategories = []
|
| 281 |
for category in categories:
|
| 282 |
categorySlots.append([])
|
| 283 |
+
for speaker in myAnnotation.labels():
|
| 284 |
targetCategory = None
|
| 285 |
for i, category in enumerate(categories):
|
| 286 |
if speaker in category:
|
|
|
|
| 290 |
categorySlots.append([])
|
| 291 |
extraCategories.append(speaker)
|
| 292 |
|
| 293 |
+
for timeSegment in myAnnotation.subset[speaker].itersegments():
|
| 294 |
+
categorySlots[targetCategory].append((speaker,timeSegment))
|
| 295 |
# Clean up categories
|
| 296 |
cleanCategories = []
|
| 297 |
for category in categorySlots:
|
| 298 |
newCategory = []
|
| 299 |
+
catSorted = copy.deepcopy(sorted(category,key=lambda cSegment: cSegment[1].start))
|
| 300 |
+
currID, currSegment = None, None
|
| 301 |
if len(catSorted) > 0:
|
| 302 |
+
currID, currSegment = catSorted[0]
|
| 303 |
+
for sp, segmentSlot in catSorted[1:]:
|
| 304 |
+
overlapTime = checkForOverlap(currSegment,segmentSlot)
|
| 305 |
if overlapTime is None:
|
| 306 |
+
newCategory.append((currID,currSegment))
|
| 307 |
currID = sp
|
| 308 |
+
currTime = segmentSlot
|
| 309 |
else:
|
| 310 |
currID = currID + "+" + sp
|
| 311 |
+
# Union of segments
|
| 312 |
+
currTime[1] = currSegment | segmentSlot
|
| 313 |
+
if currSegment is not None:
|
| 314 |
+
newCategory.append((currID,currSegment))
|
| 315 |
cleanCategories.append(newCategory)
|
| 316 |
return cleanCategories,extraCategories
|
| 317 |
|