czyoung commited on
Commit
e851785
·
verified ·
1 Parent(s): 1df2438

Fix calcCategories

Browse files
Files changed (1) hide show
  1. sonogram_utility.py +15 -95
sonogram_utility.py CHANGED
@@ -211,87 +211,6 @@ def checkForOverlap(time1, time2):
211
  return None
212
 
213
 
214
- def calcCategories(annotation,maxTime):
215
- noVoice = [Segment(0,maxTime)]
216
- oneVoice = []
217
- multiVoice = []
218
- # TBD Clean this up!!!
219
- rawData = {}
220
- for speakerName in annotation.labels():
221
- if speakerName not in rawData.keys():
222
- rawData[speakerName] = []
223
- for segmentItem in annotation.label_support(speakerName):
224
- rawData[speakerName].append(segmentItem)
225
- for speaker in rawData.keys():
226
- timesToProcess = []
227
- for timeSlot in rawData[speaker]:
228
- timesToProcess.append((speaker,timeSlot))
229
- while len(timesToProcess) > 0:
230
- currID, currTime = timesToProcess[0]
231
- timesToProcess.remove(timesToProcess[0])
232
- resetCheck = False
233
- # Check in multi
234
- for compareID,timeSlot in multiVoice:
235
- overlapTime = checkForOverlap(currTime,timeSlot)
236
- if overlapTime is None:
237
- continue
238
- else:
239
- compareID.append(currID)
240
- newTimes = removeOverlap(currTime,timeSlot)#+removeOverlap(timeSlot,currTime)
241
- for i in range(len(newTimes)):
242
- newTimes[i] = (currID,newTimes[i])
243
- timesToProcess += newTimes
244
- resetCheck = True
245
- break
246
- if resetCheck:
247
- continue
248
- # Check in one voice
249
- for timeSlot in oneVoice:
250
- tID = timeSlot[0]
251
- tTime = timeSlot[1]
252
- overlapTime = checkForOverlap(currTime,tTime)
253
- if overlapTime is None:
254
- continue
255
- else:
256
- oneVoice.remove(timeSlot)
257
- # Add back non overlap
258
- newTimes = removeOverlap(tTime,currTime)
259
- for i in range(len(newTimes)):
260
- newTimes[i] = (tID,newTimes[i])
261
- oneVoice += newTimes
262
- # Add overlap time to multivoice
263
- multiVoice.append(([tID,currID],overlapTime))
264
- # Add new times back to process
265
- newTimes = removeOverlap(currTime,tTime)
266
- for i in range(len(newTimes)):
267
- newTimes[i] = (currID,newTimes[i])
268
- timesToProcess += newTimes
269
- resetCheck = True
270
- break
271
- if resetCheck:
272
- continue
273
- # Add to one voice
274
- oneVoice.append((currID,currTime))
275
- for _,timeSlot in multiVoice:
276
- copyOfNo = copy.deepcopy(noVoice)
277
- for emptySlot in noVoice:
278
- if checkForOverlap(timeSlot,emptySlot) is None:
279
- continue
280
- else:
281
- copyOfNo.remove(emptySlot)
282
- copyOfNo += removeOverlap(emptySlot,timeSlot)
283
- noVoice = copyOfNo
284
- for _,timeSlot in oneVoice:
285
- copyOfNo = copy.deepcopy(noVoice)
286
- for emptySlot in noVoice:
287
- if checkForOverlap(timeSlot,emptySlot) is None:
288
- continue
289
- else:
290
- copyOfNo.remove(emptySlot)
291
- copyOfNo += removeOverlap(emptySlot,timeSlot)
292
- noVoice = copyOfNo
293
- return noVoice, oneVoice, multiVoice, rawData
294
-
295
  def sumTimes(annotation):
296
  return annotation.get_timeline(False).duration()
297
 
@@ -356,12 +275,12 @@ def annotationToDataFrame(myAnnotation):
356
 
357
 
358
 
359
- def calcCategories(rawData,categories):
360
  categorySlots = []
361
  extraCategories = []
362
  for category in categories:
363
  categorySlots.append([])
364
- for speaker in rawData.keys():
365
  targetCategory = None
366
  for i, category in enumerate(categories):
367
  if speaker in category:
@@ -371,27 +290,28 @@ def calcCategories(rawData,categories):
371
  categorySlots.append([])
372
  extraCategories.append(speaker)
373
 
374
- for timeSlot in rawData[speaker]:
375
- categorySlots[targetCategory].append((speaker,timeSlot))
376
  # Clean up categories
377
  cleanCategories = []
378
  for category in categorySlots:
379
  newCategory = []
380
- catSorted = copy.deepcopy(sorted(category,key=lambda slot: slot[1][0]))
381
- currID, currTime = None, None
382
  if len(catSorted) > 0:
383
- currID, currTime = catSorted[0]
384
- for sp, timeSlot in catSorted[1:]:
385
- overlapTime = checkForOverlap(currTime,timeSlot)
386
  if overlapTime is None:
387
- newCategory.append((currID,currTime))
388
  currID = sp
389
- currTime = timeSlot
390
  else:
391
  currID = currID + "+" + sp
392
- currTime[1] = max(currTime[1],timeSlot[1])
393
- if currTime is not None:
394
- newCategory.append((currID,currTime))
 
395
  cleanCategories.append(newCategory)
396
  return cleanCategories,extraCategories
397
 
 
211
  return None
212
 
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def sumTimes(annotation):
215
  return annotation.get_timeline(False).duration()
216
 
 
275
 
276
 
277
 
278
+ def calcCategories(myAnnotation,categories):
279
  categorySlots = []
280
  extraCategories = []
281
  for category in categories:
282
  categorySlots.append([])
283
+ for speaker in myAnnotation.labels():
284
  targetCategory = None
285
  for i, category in enumerate(categories):
286
  if speaker in category:
 
290
  categorySlots.append([])
291
  extraCategories.append(speaker)
292
 
293
+ for timeSegment in myAnnotation.subset[speaker].itersegments():
294
+ categorySlots[targetCategory].append((speaker,timeSegment))
295
  # Clean up categories
296
  cleanCategories = []
297
  for category in categorySlots:
298
  newCategory = []
299
+ catSorted = copy.deepcopy(sorted(category,key=lambda cSegment: cSegment[1].start))
300
+ currID, currSegment = None, None
301
  if len(catSorted) > 0:
302
+ currID, currSegment = catSorted[0]
303
+ for sp, segmentSlot in catSorted[1:]:
304
+ overlapTime = checkForOverlap(currSegment,segmentSlot)
305
  if overlapTime is None:
306
+ newCategory.append((currID,currSegment))
307
  currID = sp
308
+ currTime = segmentSlot
309
  else:
310
  currID = currID + "+" + sp
311
+ # Union of segments
312
+ currTime[1] = currSegment | segmentSlot
313
+ if currSegment is not None:
314
+ newCategory.append((currID,currSegment))
315
  cleanCategories.append(newCategory)
316
  return cleanCategories,extraCategories
317