czyoung commited on
Commit
dcaa601
·
verified ·
1 Parent(s): 35daf22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -0
app.py CHANGED
@@ -14,6 +14,8 @@ import os
14
  import shutil
15
  import pandas as pd
16
  import plotly.express as px
 
 
17
  import torch
18
  #import torch_xla.core.xla_model as xm
19
  from pyannote.audio import Pipeline
@@ -112,7 +114,132 @@ def processFile(filePath):
112
  print("Speakers Detected")
113
  speakerList = su.annotationToSpeakerList(annotations)
114
  return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  #st.set_page_config(layout="wide")
117
  st.title("Lecturer Support Tool")
118
  if not isGPU:
@@ -227,6 +354,14 @@ for i, tab in enumerate(audio_tabs):
227
  all_dataFrame = su.speakerListToDataFrame(sortedSpeakerList)
228
  currDF = all_dataFrame
229
 
 
 
 
 
 
 
 
 
230
  multiVoice = annotations.get_overlap()
231
  singleVoice = annotations.extrude(multiVoice).get_timeline()
232
  noVoice = Timeline(segments=[Segment(0,totalSeconds)]).extrude(singleVoice).extrude(multiVoice)
@@ -240,6 +375,42 @@ for i, tab in enumerate(audio_tabs):
240
  )
241
  fig = px.pie(df, values='Duration', names='Category', title='Types of Discussion')
242
  tab.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  # Lecturer vs. Audience
245
  #---------------------------------------------------------------------------
 
14
  import shutil
15
  import pandas as pd
16
  import plotly.express as px
17
+ import plotly.graph_objects as go
18
+ from plotly.subplots import make_subplots
19
  import torch
20
  #import torch_xla.core.xla_model as xm
21
  from pyannote.audio import Pipeline
 
114
  print("Speakers Detected")
115
  speakerList = su.annotationToSpeakerList(annotations)
116
  return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
117
+
118
+ def removeOverlap(timeSegment,overlap):
119
+ times = []
120
+ if timeSegment.start < overlap.start:
121
+ times.append(Segment(timeSegment.start,min(overlap.start,timeSegment.end)))
122
+ if timeSegment.end > overlap.end:
123
+ times.append(Segment(max(timeSegment.start,overlap.end),timeSegment.end))
124
+ return times
125
+
126
+ def checkForOverlap(time1, time2):
127
+ overlap = time1 & time2
128
+ if overlap:
129
+ return overlap
130
+ else:
131
+ return None
132
+
133
 
134
+ def calcCategories(annotation,maxTime):
135
+ noVoice = [Segment(0,maxTime)]
136
+ oneVoice = []
137
+ multiVoice = []
138
+ # TBD Clean this up!!!
139
+ rawData = {}
140
+ for speakerName in myAnnotation.labels():
141
+ if speakerName not in rawData.keys():
142
+ rawData[speakerName] = []
143
+ for segmentItem in myAnnotation.label_support(speakerName):
144
+ rawData[speakerName].append(segmentItem)
145
+ for speaker in rawData.keys():
146
+ timesToProcess = []
147
+ for timeSlot in rawData[speaker]:
148
+ timesToProcess.append((speaker,timeSlot))
149
+ while len(timesToProcess) > 0:
150
+ currID, currTime = timesToProcess[0]
151
+ timesToProcess.remove(timesToProcess[0])
152
+ resetCheck = False
153
+ # Check in multi
154
+ for compareID,timeSlot in multiVoice:
155
+ overlapTime = checkForOverlap(currTime,timeSlot)
156
+ if overlapTime is None:
157
+ continue
158
+ else:
159
+ compareID.append(currID)
160
+ newTimes = removeOverlap(currTime,timeSlot)#+removeOverlap(timeSlot,currTime)
161
+ for i in range(len(newTimes)):
162
+ newTimes[i] = (currID,newTimes[i])
163
+ timesToProcess += newTimes
164
+ resetCheck = True
165
+ break
166
+ if resetCheck:
167
+ continue
168
+ # Check in one voice
169
+ for timeSlot in oneVoice:
170
+ tID = timeSlot[0]
171
+ tTime = timeSlot[1]
172
+ overlapTime = checkForOverlap(currTime,tTime)
173
+ if overlapTime is None:
174
+ continue
175
+ else:
176
+ oneVoice.remove(timeSlot)
177
+ # Add back non overlap
178
+ newTimes = removeOverlap(tTime,currTime)
179
+ for i in range(len(newTimes)):
180
+ newTimes[i] = (tID,newTimes[i])
181
+ oneVoice += newTimes
182
+ # Add overlap time to multivoice
183
+ multiVoice.append(([tID,currID],overlapTime))
184
+ # Add new times back to process
185
+ newTimes = removeOverlap(currTime,tTime)
186
+ for i in range(len(newTimes)):
187
+ newTimes[i] = (currID,newTimes[i])
188
+ timesToProcess += newTimes
189
+ resetCheck = True
190
+ break
191
+ if resetCheck:
192
+ continue
193
+ # Add to one voice
194
+ oneVoice.append((currID,currTime))
195
+ for _,timeSlot in multiVoice:
196
+ copyOfNo = copy.deepcopy(noVoice)
197
+ for emptySlot in noVoice:
198
+ if checkForOverlap(timeSlot,emptySlot) is None:
199
+ continue
200
+ else:
201
+ copyOfNo.remove(emptySlot)
202
+ copyOfNo += removeOverlap(emptySlot,timeSlot)
203
+ noVoice = copyOfNo
204
+ for _,timeSlot in oneVoice:
205
+ copyOfNo = copy.deepcopy(noVoice)
206
+ for emptySlot in noVoice:
207
+ if checkForOverlap(timeSlot,emptySlot) is None:
208
+ continue
209
+ else:
210
+ copyOfNo.remove(emptySlot)
211
+ copyOfNo += removeOverlap(emptySlot,timeSlot)
212
+ noVoice = copyOfNo
213
+ return noVoice, oneVoice, multiVoice
214
+
215
+ def sumTimes(timeList):
216
+ totalTime = 0
217
+ for timeSlot in timeList:
218
+ totalTime += timeSlot.duration
219
+ return totalTime
220
+
221
+ def sumTimesPerSpeaker(timeSlotList):
222
+ speakerList = []
223
+ timeList = []
224
+ for speaker,timeSlot in timeSlotList:
225
+ if speaker not in speakerList:
226
+ speakerList.append(speaker)
227
+ timeList.append(0)
228
+ timeList[speakerList.index(speaker)] += timeSlot.duration
229
+ return speakerList, timeList
230
+
231
+ def sumMultiTimesPerSpeaker(timeSlotList):
232
+ speakerList = []
233
+ timeList = []
234
+ sList,tList = sumTimesPerSpeaker(timeSlotList)
235
+ for i,speakerGroup in enumerate(sList):
236
+ for speaker in speakerGroup:
237
+ if speaker not in speakerList:
238
+ speakerList.append(speaker)
239
+ timeList.append(0)
240
+ timeList[speakerList.index(speaker)] += tList[i]
241
+ return speakerList, timeList
242
+
243
  #st.set_page_config(layout="wide")
244
  st.title("Lecturer Support Tool")
245
  if not isGPU:
 
354
  all_dataFrame = su.speakerListToDataFrame(sortedSpeakerList)
355
  currDF = all_dataFrame
356
 
357
+ # TBD CLEAN THIS UP!!!
358
+ noVoice2, oneVoice2, multiVoice2 = calcCategories(annotations,totalSeconds)
359
+ noVoice2.sort()
360
+ oneVoice2.sort()
361
+ multiVoice2.sort()
362
+ sList,timeList = sumTimesPerSpeaker(oneVoice)
363
+ multiSpeakerList, multiTimeList = sumMultiTimesPerSpeaker(multiVoice)
364
+
365
  multiVoice = annotations.get_overlap()
366
  singleVoice = annotations.extrude(multiVoice).get_timeline()
367
  noVoice = Timeline(segments=[Segment(0,totalSeconds)]).extrude(singleVoice).extrude(multiVoice)
 
375
  )
376
  fig = px.pie(df, values='Duration', names='Category', title='Types of Discussion')
377
  tab.plotly_chart(fig, use_container_width=True)
378
+
379
+ df4: pd.DataFrame = pd.DataFrame(
380
+ {
381
+ "values": [sumTimes(rawSample["speaker 1"]),sumTimes(rawSample["speaker 2"]),sumTimes(rawSample["speaker 3"])],
382
+ "names": ["speaker 1","speaker 2","speaker 3"]
383
+ }
384
+ )
385
+ df4.name = "df4"
386
+
387
+ df5: pd.DataFrame = pd.DataFrame(
388
+ {
389
+ "ids" : ["NV","OV","MV"]+[f"OV_{i}" for i in range(len(sList))]
390
+ +[f"MV_{i}" for i in range(len(multiSpeakerList))],
391
+ "labels" : ["No Voice","One Voice","Multi Voice"] + sList + multiSpeakerList,
392
+ "parents" : ["","",""]+["OV" for i in range(len(sList))]
393
+ +["MV" for i in range(len(multiSpeakerList))],
394
+ "values" : [sumTimes(noVoice),
395
+ sumTimes([n for _,n in oneVoice]),
396
+ sumTimes([n for _,n in multiVoice]),
397
+ ] + timeList + multiTimeList,
398
+ }
399
+ )
400
+ df5.name = "df5"
401
+
402
+ fig2_spc = make_subplots(rows=2, cols=1,
403
+ specs=[[{"type": "pie"}],[{"type": "treemap"}]]
404
+ , shared_xaxes=True)
405
+ fig2_spc.add_trace(go.Pie(values=df4["values"],labels=df4["names"]),
406
+ row=1, col=1)
407
+ fig2.add_trace(go.Treemap(
408
+ labels = df5["labels"],
409
+ parents = df5["parents"],
410
+ ids=df5["ids"],
411
+ values = df5["values"]),
412
+ row=2, col=1)
413
+ tab.plotly_chart(fig2, use_container_width=True)
414
 
415
  # Lecturer vs. Audience
416
  #---------------------------------------------------------------------------