Spaces:
Sleeping
Sleeping
Updated Analysis and Charts
Browse files
app.py
CHANGED
|
@@ -23,50 +23,7 @@ from pyannote.core import Annotation, Segment, Timeline
|
|
| 23 |
from df.enhance import enhance, init_df
|
| 24 |
import datetime as dt
|
| 25 |
|
| 26 |
-
torch.classes.__path__ = [os.path.join(torch.__path__[0], torch.classes.__file__)]
|
| 27 |
-
|
| 28 |
-
PARQUET_DATASET_DIR = Path("parquet_dataset")
|
| 29 |
-
PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
|
| 30 |
-
|
| 31 |
-
sample_data = [f"CHEM1402_gt/24F_CHEM1402_Night_Class_Week_{i}_gt.rttm" for i in range(1,11)]
|
| 32 |
|
| 33 |
-
|
| 34 |
-
scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
|
| 35 |
-
|
| 36 |
-
secondDifference = 5
|
| 37 |
-
gainWindow = 4
|
| 38 |
-
minimumGain = -45
|
| 39 |
-
maximumGain = -5
|
| 40 |
-
attenLimDB = 3
|
| 41 |
-
|
| 42 |
-
isGPU = False
|
| 43 |
-
|
| 44 |
-
try:
|
| 45 |
-
raise(RuntimeError("Not an error"))
|
| 46 |
-
#device = xm.xla_device()
|
| 47 |
-
print("TPU is available.")
|
| 48 |
-
isGPU = True
|
| 49 |
-
except RuntimeError as e:
|
| 50 |
-
print(f"TPU is not available: {e}")
|
| 51 |
-
# Fallback to CPU or other devices if needed
|
| 52 |
-
isGPU = torch.cuda.is_available()
|
| 53 |
-
device = torch.device("cuda" if isGPU else "cpu")
|
| 54 |
-
print(f"Using {device} instead.")
|
| 55 |
-
#device = xm.xla_device()
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
# Instantiate and prepare model for training.
|
| 59 |
-
dfModel, dfState, _ = init_df(model_base_dir="DeepFilterNet3")
|
| 60 |
-
dfModel.to(device)#torch.device("cuda"))
|
| 61 |
-
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
|
| 62 |
-
pipeline.to(device)#torch.device("cuda"))
|
| 63 |
-
|
| 64 |
-
# Store results for viewing and further processing
|
| 65 |
-
if 'results' not in st.session_state:
|
| 66 |
-
st.session_state.results = []
|
| 67 |
-
if 'summaries' not in st.session_state:
|
| 68 |
-
st.session_state.summaries = []
|
| 69 |
-
|
| 70 |
def save_data(
|
| 71 |
config_dict: Dict[str,str], audio_paths: List[str], userid: str,
|
| 72 |
) -> None:
|
|
@@ -109,136 +66,234 @@ def processFile(filePath):
|
|
| 109 |
waveform_gain_adjusted = su.equalizeVolume()(waveformEnhanced,sampleRate,gainWindow,minimumGain,maximumGain)
|
| 110 |
print("Audio Equalized")
|
| 111 |
print("Detecting speakers")
|
| 112 |
-
time.sleep(10)
|
| 113 |
annotations = pipeline({"waveform": waveformEnhanced, "sample_rate": sampleRate})
|
| 114 |
print("Speakers Detected")
|
| 115 |
speakerList = su.annotationToSpeakerList(annotations)
|
| 116 |
return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
|
| 117 |
-
|
| 118 |
-
def removeOverlap(timeSegment,overlap):
|
| 119 |
-
times = []
|
| 120 |
-
if timeSegment.start < overlap.start:
|
| 121 |
-
times.append(Segment(timeSegment.start,min(overlap.start,timeSegment.end)))
|
| 122 |
-
if timeSegment.end > overlap.end:
|
| 123 |
-
times.append(Segment(max(timeSegment.start,overlap.end),timeSegment.end))
|
| 124 |
-
return times
|
| 125 |
-
|
| 126 |
-
def checkForOverlap(time1, time2):
|
| 127 |
-
overlap = time1 & time2
|
| 128 |
-
if overlap:
|
| 129 |
-
return overlap
|
| 130 |
-
else:
|
| 131 |
-
return None
|
| 132 |
-
|
| 133 |
|
| 134 |
-
def
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
for
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
continue
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
continue
|
| 175 |
else:
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
-
|
| 222 |
-
speakerList = []
|
| 223 |
-
timeList = []
|
| 224 |
-
for speaker,timeSlot in timeSlotList:
|
| 225 |
-
if speaker not in speakerList:
|
| 226 |
-
speakerList.append(speaker)
|
| 227 |
-
timeList.append(0)
|
| 228 |
-
timeList[speakerList.index(speaker)] += timeSlot.duration
|
| 229 |
-
return speakerList, timeList
|
| 230 |
|
| 231 |
-
|
| 232 |
-
speakerList = []
|
| 233 |
-
timeList = []
|
| 234 |
-
sList,tList = sumTimesPerSpeaker(timeSlotList)
|
| 235 |
-
for i,speakerGroup in enumerate(sList):
|
| 236 |
-
for speaker in speakerGroup:
|
| 237 |
-
if speaker not in speakerList:
|
| 238 |
-
speakerList.append(speaker)
|
| 239 |
-
timeList.append(0)
|
| 240 |
-
timeList[speakerList.index(speaker)] += tList[i]
|
| 241 |
-
return speakerList, timeList
|
| 242 |
|
| 243 |
#st.set_page_config(layout="wide")
|
| 244 |
st.title("Lecturer Support Tool")
|
|
@@ -259,7 +314,7 @@ if uploaded_file_paths is not None:
|
|
| 259 |
print("Found file paths")
|
| 260 |
valid_files = []
|
| 261 |
file_paths = []
|
| 262 |
-
|
| 263 |
# Reset valid_files?
|
| 264 |
for uploaded_file in uploaded_file_paths:
|
| 265 |
if not uploaded_file.name.endswith(supported_file_types):
|
|
@@ -273,311 +328,277 @@ if uploaded_file_paths is not None:
|
|
| 273 |
f.write(uploaded_file.getvalue())
|
| 274 |
valid_files.append(uploaded_file)
|
| 275 |
file_paths.append(path)
|
|
|
|
| 276 |
if len(valid_files) > 0:
|
| 277 |
-
|
| 278 |
while (len(st.session_state.results) < len(valid_files)):
|
| 279 |
st.session_state.results.append([])
|
| 280 |
while (len(st.session_state.summaries) < len(valid_files)):
|
| 281 |
st.session_state.summaries.append([])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
-
|
|
|
|
|
|
|
| 284 |
|
| 285 |
-
if
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
totalFiles = len(valid_files)
|
| 292 |
-
for i in range(totalFiles):
|
| 293 |
-
with st.spinner(text=f'Analyzing File {i+1} of {totalFiles}'):
|
| 294 |
-
# Text files use sample data
|
| 295 |
-
if file_paths[i].endswith('.txt'):
|
| 296 |
-
time.sleep(1)
|
| 297 |
-
# RTTM load as filler
|
| 298 |
-
speakerList, annotations = su.loadAudioRTTM(sample_data[i])
|
| 299 |
-
st.session_state.results[i] = (speakerList,annotations, 10000)
|
| 300 |
-
st.session_state.summaries[i] = []
|
| 301 |
-
else:
|
| 302 |
-
st.info(file_paths[i])
|
| 303 |
-
speakerList, annotations, totalSeconds = processFile(file_paths[i])
|
| 304 |
-
st.session_state.results[i] = (speakerList,annotations, totalSeconds)
|
| 305 |
-
st.session_state.summaries[i] = []
|
| 306 |
-
st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
|
| 307 |
-
|
| 308 |
-
for i, tab in enumerate(audio_tabs):
|
| 309 |
-
if tab.button("Analyze Audio",key=f"button_{i}"):
|
| 310 |
-
start_time = time.time()
|
| 311 |
-
# Text files use sample data
|
| 312 |
-
if file_paths[i].endswith('.txt'):
|
| 313 |
-
with st.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
|
| 314 |
-
time.sleep(1)
|
| 315 |
-
# RTTM load as filler
|
| 316 |
-
speakerList, annotations = su.loadAudioRTTM(sample_data[i])
|
| 317 |
-
st.session_state.results[i] = (speakerList,annotations,10000)
|
| 318 |
-
st.session_state.summaries[i] = []
|
| 319 |
else:
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
int((lecturer_speaker_times[0]%3600)/60),
|
| 460 |
-
int(lecturer_speaker_times[0]%60)),
|
| 461 |
-
"{}h:{:02d}m:{:02d}s".format(int(lecturer_speaker_times[1]/3600),
|
| 462 |
-
int((lecturer_speaker_times[1]%3600)/60),
|
| 463 |
-
int(lecturer_speaker_times[1]%60))],
|
| 464 |
-
"Percentage": [
|
| 465 |
-
"{:.2f}%".format(100*lecturer_speaker_times[0]/totalSeconds),
|
| 466 |
-
"{:.2f}%".format(100*lecturer_speaker_times[1]/totalSeconds),
|
| 467 |
-
],
|
| 468 |
-
}
|
| 469 |
-
)
|
| 470 |
-
df = df.style \
|
| 471 |
-
.format_index(str.upper, axis=1) \
|
| 472 |
-
.relabel_index(["Lecturer", "Audience"], axis=0) \
|
| 473 |
-
#.set_properties(**{"background-color": "white"})
|
| 474 |
-
tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
|
| 475 |
-
tab.table(df)
|
| 476 |
-
#tab.write("Lecturer spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[0]/3600),
|
| 477 |
-
# int((lecturer_speaker_times[0]%3600)/60),int(lecturer_speaker_times[0]%60),
|
| 478 |
-
# 100*lecturer_speaker_times[0]/totalSeconds))
|
| 479 |
-
#tab.write("Audience spoke: {}h:{:02d}m:{:02d}s -> {:.2f}% of time".format(int(lecturer_speaker_times[1]/3600),
|
| 480 |
-
# int((lecturer_speaker_times[1]%3600)/60),int(lecturer_speaker_times[1]%60),
|
| 481 |
-
# 100*lecturer_speaker_times[1]/totalSeconds))
|
| 482 |
-
|
| 483 |
-
# Experimental Speaker Breakdown
|
| 484 |
-
#------------------------------------------------------------------------------
|
| 485 |
-
fig_spc = px.timeline(all_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource")
|
| 486 |
-
fig_spc.update_yaxes(autorange="reversed")
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
hMax = int(totalSeconds//3600)
|
| 490 |
-
mMax = int(totalSeconds%3600//60)
|
| 491 |
-
sMax = int(totalSeconds%60)
|
| 492 |
-
msMax = int(totalSeconds*1000000%1000000)
|
| 493 |
-
timeMax = dt.time(hMax,mMax,sMax,msMax)
|
| 494 |
-
|
| 495 |
-
fig_spc.update_layout(
|
| 496 |
-
xaxis_tickformatstops = [
|
| 497 |
-
dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
|
| 498 |
-
dict(dtickrange=[1000, None], value="%H:%M:%S")
|
| 499 |
-
],
|
| 500 |
-
xaxis=dict(
|
| 501 |
-
range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
|
| 502 |
-
),
|
| 503 |
-
xaxis_title="Time",
|
| 504 |
-
yaxis_title="Speaker",
|
| 505 |
-
legend_title=None
|
| 506 |
-
)
|
| 507 |
-
|
| 508 |
-
tab.plotly_chart(fig_spc, use_container_width=True)
|
| 509 |
-
|
| 510 |
-
dataTimeList = []
|
| 511 |
-
for j, totalTime in enumerate(all_speaker_times):
|
| 512 |
-
dataTimeList.append(dict(Task=j,x=totalTime/totalSeconds*100,y=f'Speaker {j+1}'))
|
| 513 |
-
df2 = pd.DataFrame(dataTimeList)
|
| 514 |
-
fig2_spc = px.bar(dataTimeList, x="x", y="y", color="y", orientation='h')
|
| 515 |
-
fig2_spc.update_xaxes(ticksuffix="%")
|
| 516 |
-
fig2_spc.update_yaxes(autorange="reversed")
|
| 517 |
-
fig2_spc.update_layout(
|
| 518 |
-
xaxis_title="Percentage Time Spoken",
|
| 519 |
-
yaxis_title="Speaker",
|
| 520 |
-
legend_title=None
|
| 521 |
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
|
| 538 |
-
|
| 539 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
colorPref = st.text_input("Favorite color?", "None")
|
| 582 |
radio = st.radio('Pick one:', ['Left','Right'])
|
| 583 |
selection = st.selectbox('Select', [1,2,3])
|
|
@@ -586,7 +607,7 @@ if st.button("Upload Files to Dataset"):
|
|
| 586 |
file_paths,
|
| 587 |
userid)
|
| 588 |
st.success('I think it worked!')
|
| 589 |
-
|
| 590 |
@st.cache_data
|
| 591 |
def convert_df(df):
|
| 592 |
return df.to_csv(index=False).encode('utf-8')
|
|
|
|
| 23 |
from df.enhance import enhance, init_df
|
| 24 |
import datetime as dt
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def save_data(
|
| 28 |
config_dict: Dict[str,str], audio_paths: List[str], userid: str,
|
| 29 |
) -> None:
|
|
|
|
| 66 |
waveform_gain_adjusted = su.equalizeVolume()(waveformEnhanced,sampleRate,gainWindow,minimumGain,maximumGain)
|
| 67 |
print("Audio Equalized")
|
| 68 |
print("Detecting speakers")
|
|
|
|
| 69 |
annotations = pipeline({"waveform": waveformEnhanced, "sample_rate": sampleRate})
|
| 70 |
print("Speakers Detected")
|
| 71 |
speakerList = su.annotationToSpeakerList(annotations)
|
| 72 |
return (speakerList, annotations, int(waveformEnhanced.shape[-1]/sampleRate))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
def addCategory():
|
| 75 |
+
newCategory = st.session_state.categoryInput
|
| 76 |
+
st.toast(f"Adding {newCategory}")
|
| 77 |
+
st.session_state[f'multiselect_{newCategory}'] = []
|
| 78 |
+
st.session_state.categories.append(newCategory)
|
| 79 |
+
st.session_state.categoryInput = ''
|
| 80 |
+
for resultGroup in st.session_state.categorySelect:
|
| 81 |
+
resultGroup.append([])
|
| 82 |
+
|
| 83 |
+
def removeCategory(index):
|
| 84 |
+
categoryName = st.session_state.categories[index]
|
| 85 |
+
st.toast(f"Removing {categoryName}")
|
| 86 |
+
del st.session_state[f'multiselect_{categoryName}']
|
| 87 |
+
del st.session_state[f'remove_{categoryName}']
|
| 88 |
+
del st.session_state.categories[index]
|
| 89 |
+
for resultGroup in st.session_state.categorySelect:
|
| 90 |
+
del resultGroup[index]
|
| 91 |
+
|
| 92 |
+
def updateCategoryOptions(resultIndex):
|
| 93 |
+
if st.session_state.resetResult:
|
| 94 |
+
#st.info(f"Skipping update of {resultIndex}")
|
| 95 |
+
return
|
| 96 |
+
#st.info(f"Updating result {resultIndex}")
|
| 97 |
+
#st.info(f"In update: {st.session_state.categorySelect}")
|
| 98 |
+
# Handle
|
| 99 |
+
currResults = st.session_state.results[resultIndex][1]
|
| 100 |
+
speakerNames = [sp for sp in currResults["speakers"].keys()]
|
| 101 |
+
|
| 102 |
+
# Handle speaker category sidebars
|
| 103 |
+
unusedSpeakers = copy.deepcopy(speakerNames)
|
| 104 |
+
# Remove used speakers
|
| 105 |
+
for i, category in enumerate(st.session_state['categories']):
|
| 106 |
+
category_choices = copy.deepcopy(st.session_state[f'multiselect_{category}'])
|
| 107 |
+
st.session_state["categorySelect"][resultIndex][i] = category_choices
|
| 108 |
+
for sp in category_choices:
|
| 109 |
+
try:
|
| 110 |
+
unusedSpeakers.remove(sp)
|
| 111 |
+
except:
|
| 112 |
continue
|
| 113 |
+
st.session_state.unusedSpeakers[resultIndex] = unusedSpeakers
|
| 114 |
+
#st.info(f"After update: {st.session_state.categorySelect}")
|
| 115 |
+
|
| 116 |
+
def updateMultiSelect():
|
| 117 |
+
currFileIndex = file_names.index(st.session_state["select_currFile"])
|
| 118 |
+
st.session_state.resetResult = True
|
| 119 |
+
for i, category in enumerate(st.session_state['categories']):
|
| 120 |
+
st.session_state[f'multiselect_{category}'] = st.session_state['categorySelect'][currFileIndex][i]
|
| 121 |
+
|
| 122 |
+
def analyze(inFileName):
|
| 123 |
+
try:
|
| 124 |
+
st.session_state.resetResult = False
|
| 125 |
+
currFileIndex = file_names.index(inFileName)
|
| 126 |
+
if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
|
| 127 |
+
# Handle
|
| 128 |
+
currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
|
| 129 |
+
speakerNames = currAnnotation.labels()
|
| 130 |
+
|
| 131 |
+
# Update other categories
|
| 132 |
+
unusedSpeakers = st.session_state.unusedSpeakers[currFileIndex]
|
| 133 |
+
categorySelections = st.session_state["categorySelect"][currFileIndex]
|
| 134 |
+
|
| 135 |
+
noVoice, oneVoice, multiVoice = su.calcSpeakingTypes(currAnnotation,currTotalTime)
|
| 136 |
+
noVoice.sort()
|
| 137 |
+
oneVoice.sort()
|
| 138 |
+
multiVoice.sort()
|
| 139 |
+
|
| 140 |
+
df3 = pd.DataFrame(
|
| 141 |
+
{
|
| 142 |
+
"values": [sumTimes(noVoice),
|
| 143 |
+
sumTimes([n for _,n in oneVoice]),
|
| 144 |
+
sumTimes([n for _,n in multiVoice])],
|
| 145 |
+
"names": ["No Voice","One Voice","Multi Voice"],
|
| 146 |
+
}
|
| 147 |
+
)
|
| 148 |
+
df3.name = "df3"
|
| 149 |
+
st.session_state.summaries[currFileIndex]["df3"] = df3
|
| 150 |
+
|
| 151 |
+
canRemoveMaybe = '''df4_dict = {}
|
| 152 |
+
nameList = st.session_state.categories
|
| 153 |
+
extraNames = []
|
| 154 |
+
valueList = [0 for i in range(len(nameList))]
|
| 155 |
+
extraValues = []
|
| 156 |
+
|
| 157 |
+
for sp in currResults["speakers"].keys():
|
| 158 |
+
foundSp = False
|
| 159 |
+
for i, categoryName in enumerate(nameList):
|
| 160 |
+
if sp in categorySelections[i]:
|
| 161 |
+
#st.info(categoryName)
|
| 162 |
+
valueList[i] += sumTimes(currResults["speakers"][sp])
|
| 163 |
+
foundSp = True
|
| 164 |
+
break
|
| 165 |
+
if foundSp:
|
| 166 |
continue
|
| 167 |
else:
|
| 168 |
+
extraNames.append(sp)
|
| 169 |
+
extraValues.append(sumTimes(currResults["speakers"][sp]))
|
| 170 |
+
df4_dict = {
|
| 171 |
+
"values": valueList+extraValues,
|
| 172 |
+
"names": nameList+extraNames,
|
| 173 |
+
}
|
| 174 |
+
df4 = pd.DataFrame(data=df4_dict)
|
| 175 |
+
df4.name = "df4"
|
| 176 |
+
st.session_state.summaries[currFileIndex]["df4"] = df4'''
|
| 177 |
+
|
| 178 |
+
speakerList,timeList = sumTimesPerSpeaker(oneVoice)
|
| 179 |
+
multiSpeakerList, multiTimeList = sumMultiTimesPerSpeaker(multiVoice)
|
| 180 |
+
summativeMultiSpeaker = sum(multiTimeList)
|
| 181 |
+
sumNoVoice = sumTimes(noVoice)
|
| 182 |
+
sumOneVoice = sumTimes([n for _,n in oneVoice])
|
| 183 |
+
sumMultiVoice = sumTimes([n for _,n in multiVoice])
|
| 184 |
+
basePercentiles = [sumNoVoice/currTotalTime,
|
| 185 |
+
sumOneVoice/currTotalTime,
|
| 186 |
+
sumMultiVoice/currTotalTime
|
| 187 |
+
]
|
| 188 |
+
df5 = pd.DataFrame(
|
| 189 |
+
{
|
| 190 |
+
"ids" : ["NV","OV","MV"]+[f"OV_{i}" for i in range(len(speakerList))]
|
| 191 |
+
+[f"MV_{i}" for i in range(len(multiSpeakerList))],
|
| 192 |
+
"labels" : ["No Voice","One Voice","Multi Voice"] + speakerList + multiSpeakerList,
|
| 193 |
+
"parents" : ["","",""]+["OV" for i in range(len(speakerList))]
|
| 194 |
+
+["MV" for i in range(len(multiSpeakerList))],
|
| 195 |
+
"parentNames" : ["Total","Total","Total"]+["One Voice" for i in range(len(speakerList))]
|
| 196 |
+
+["Multi Voice" for i in range(len(multiSpeakerList))],
|
| 197 |
+
"values" : [sumNoVoice,
|
| 198 |
+
sumOneVoice,
|
| 199 |
+
sumMultiVoice,
|
| 200 |
+
] + timeList + multiTimeList,
|
| 201 |
+
"valueStrings" : [timeToString(sumNoVoice),
|
| 202 |
+
timeToString(sumOneVoice),
|
| 203 |
+
timeToString(sumMultiVoice),
|
| 204 |
+
] + timeToString(timeList) + timeToString(multiTimeList),
|
| 205 |
+
"percentiles" : [basePercentiles[0]*100,
|
| 206 |
+
basePercentiles[1]*100,
|
| 207 |
+
basePercentiles[2]*100] +
|
| 208 |
+
[(t*100) / sumOneVoice * basePercentiles[1] for t in timeList] +
|
| 209 |
+
[(t*100) / summativeMultiSpeaker * basePercentiles[2] for t in multiTimeList],
|
| 210 |
+
"parentPercentiles" : [basePercentiles[0]*100,
|
| 211 |
+
basePercentiles[1]*100,
|
| 212 |
+
basePercentiles[2]*100] +
|
| 213 |
+
[(t*100) / sumOneVoice for t in timeList] +
|
| 214 |
+
[(t*100) / summativeMultiSpeaker for t in multiTimeList],
|
| 215 |
+
|
| 216 |
+
}
|
| 217 |
+
)
|
| 218 |
+
df5.name = "df5"
|
| 219 |
+
st.session_state.summaries[currFileIndex]["df5"] = df5
|
| 220 |
+
|
| 221 |
+
speakers_dataFrame,speakers_times = su.annotationToDataFrame(currAnnotation)
|
| 222 |
+
st.session_state.summaries[currFileIndex]["speakers_dataFrame"] = speakers_dataFrame
|
| 223 |
+
st.session_state.summaries[currFileIndex]["speakers_times"] = speakers_times
|
| 224 |
+
|
| 225 |
+
df2_dict = {
|
| 226 |
+
"values":[100*t/currResults["duration"] for t in df4_dict["values"]],
|
| 227 |
+
"names":df4_dict["names"]
|
| 228 |
+
}
|
| 229 |
+
df2 = pd.DataFrame(df2_dict)
|
| 230 |
+
st.session_state.summaries[currFileIndex]["df2"] = df2
|
| 231 |
+
except ValueError:
|
| 232 |
+
pass
|
| 233 |
+
|
| 234 |
+
#----------------------------------------------------------------------------------------------------------------------
|
| 235 |
+
|
| 236 |
+
torch.classes.__path__ = [os.path.join(torch.__path__[0], torch.classes.__file__)]
|
| 237 |
+
|
| 238 |
+
PARQUET_DATASET_DIR = Path("parquet_dataset")
|
| 239 |
+
PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
|
| 240 |
+
|
| 241 |
+
sample_data = [f"CHEM1402_gt/24F_CHEM1402_Night_Class_Week_{i}_gt.rttm" for i in range(1,11)]
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
|
| 245 |
+
|
| 246 |
+
secondDifference = 5
|
| 247 |
+
gainWindow = 4
|
| 248 |
+
minimumGain = -45
|
| 249 |
+
maximumGain = -5
|
| 250 |
+
attenLimDB = 3
|
| 251 |
+
|
| 252 |
+
isGPU = False
|
| 253 |
+
|
| 254 |
+
try:
|
| 255 |
+
raise(RuntimeError("Not an error"))
|
| 256 |
+
#device = xm.xla_device()
|
| 257 |
+
print("TPU is available.")
|
| 258 |
+
isGPU = True
|
| 259 |
+
except RuntimeError as e:
|
| 260 |
+
print(f"TPU is not available: {e}")
|
| 261 |
+
# Fallback to CPU or other devices if needed
|
| 262 |
+
isGPU = torch.cuda.is_available()
|
| 263 |
+
device = torch.device("cuda" if isGPU else "cpu")
|
| 264 |
+
print(f"Using {device} instead.")
|
| 265 |
+
#device = xm.xla_device()
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
# Instantiate and prepare model for training.
|
| 269 |
+
dfModel, dfState, _ = init_df(model_base_dir="DeepFilterNet3")
|
| 270 |
+
dfModel.to(device)#torch.device("cuda"))
|
| 271 |
+
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
|
| 272 |
+
pipeline.to(device)#torch.device("cuda"))
|
| 273 |
+
|
| 274 |
+
# Store results for viewing and further processing
|
| 275 |
+
# Long-range usage
|
| 276 |
+
if 'results' not in st.session_state:
|
| 277 |
+
st.session_state.results = []
|
| 278 |
+
if 'summaries' not in st.session_state:
|
| 279 |
+
st.session_state.summaries = []
|
| 280 |
+
if 'categories' not in st.session_state:
|
| 281 |
+
st.session_state.categories = ["Lecturer","Audience"]
|
| 282 |
+
st.session_state.categorySelect = []
|
| 283 |
+
# Single Use
|
| 284 |
+
if 'removeCategory' not in st.session_state:
|
| 285 |
+
st.session_state.removeCategory = None
|
| 286 |
+
if 'resetResult' not in st.session_state:
|
| 287 |
+
st.session_state.resetResult = False
|
| 288 |
+
# Specific to target file
|
| 289 |
+
if 'unusedSpeakers' not in st.session_state:
|
| 290 |
+
st.session_state.unusedSpeakers = []
|
| 291 |
+
if 'file_names' not in st.session_state:
|
| 292 |
+
st.session_state.file_names = []
|
| 293 |
|
| 294 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
#st.set_page_config(layout="wide")
|
| 299 |
st.title("Lecturer Support Tool")
|
|
|
|
| 314 |
print("Found file paths")
|
| 315 |
valid_files = []
|
| 316 |
file_paths = []
|
| 317 |
+
file_names = []
|
| 318 |
# Reset valid_files?
|
| 319 |
for uploaded_file in uploaded_file_paths:
|
| 320 |
if not uploaded_file.name.endswith(supported_file_types):
|
|
|
|
| 328 |
f.write(uploaded_file.getvalue())
|
| 329 |
valid_files.append(uploaded_file)
|
| 330 |
file_paths.append(path)
|
| 331 |
+
# Save valid file names
|
| 332 |
if len(valid_files) > 0:
|
| 333 |
+
file_names = [f.name for f in valid_files]
|
| 334 |
while (len(st.session_state.results) < len(valid_files)):
|
| 335 |
st.session_state.results.append([])
|
| 336 |
while (len(st.session_state.summaries) < len(valid_files)):
|
| 337 |
st.session_state.summaries.append([])
|
| 338 |
+
while (len(st.session_state.unusedSpeakers) < len(valid_files)):
|
| 339 |
+
st.session_state.unusedSpeakers.append([])
|
| 340 |
+
while (len(st.session_state.categorySelect) < len(valid_files)):
|
| 341 |
+
tempCategories = [[] for cat in st.session_state.categories]
|
| 342 |
+
st.session_state.categorySelect.append(tempCategories)
|
| 343 |
+
while (len(st.session_state.summaries) < len(valid_files)):
|
| 344 |
+
st.session_state.summaries.append([])
|
| 345 |
+
# Clear replaced files
|
| 346 |
+
for i in range(len(valid_files)):
|
| 347 |
+
if len(st.session_state.results[i]) > 0 and st.session_state.results[i][0] != file_names[i]:
|
| 348 |
+
st.session_state.results[i] = []
|
| 349 |
+
st.session_state.summaries[i] = []
|
| 350 |
+
st.session_state.file_names = file_names
|
| 351 |
|
| 352 |
+
file_names = st.session_state.file_names
|
| 353 |
+
|
| 354 |
+
currFile = st.sidebar.selectbox('Current File', file_names,on_change=updateMultiSelect,key="select_currFile")
|
| 355 |
|
| 356 |
+
if len(file_names) == 0:
|
| 357 |
+
st.text("Upload file(s) to enable analysis")
|
| 358 |
+
else:
|
| 359 |
+
if st.button("Analyze All New Audio",key=f"button_all"):
|
| 360 |
+
if len(valid_files) == 0:
|
| 361 |
+
st.error('Upload file(s) first!')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
else:
|
| 363 |
+
print("Start analyzing")
|
| 364 |
+
start_time = time.time()
|
| 365 |
+
totalFiles = len(valid_files)
|
| 366 |
+
for i in range(totalFiles):
|
| 367 |
+
if len(st.session_state.results) > i and len(st.session_state.results[i]) > 0 and st.session_state.results[i][0] == file_names[i]:
|
| 368 |
+
continue
|
| 369 |
+
with st.spinner(text=f'Analyzing File {i+1} of {totalFiles}'):
|
| 370 |
+
# Text files use sample data
|
| 371 |
+
if file_paths[i].endswith('.txt'):
|
| 372 |
+
time.sleep(1)
|
| 373 |
+
# RTTM load as filler
|
| 374 |
+
speakerList, annotations = su.loadAudioRTTM(sample_data[i])
|
| 375 |
+
# Approximate total seconds
|
| 376 |
+
totalSeconds = 0
|
| 377 |
+
for segment in annotations.itersegments():
|
| 378 |
+
if segment.end > totalSeconds:
|
| 379 |
+
totalSeconds = segment.end
|
| 380 |
+
st.session_state.results[i] = (speakerList,annotations, totalSeconds)
|
| 381 |
+
st.session_state.summaries[i] = {}
|
| 382 |
+
speakerNames = annotations.labels()
|
| 383 |
+
st.session_state.unusedSpeakers[i] = speakerNames
|
| 384 |
+
else:
|
| 385 |
+
st.info(file_paths[i])
|
| 386 |
+
speakerList, annotations, totalSeconds = processFile(file_paths[i])
|
| 387 |
+
st.session_state.results[i] = (speakerList,annotations, totalSeconds)
|
| 388 |
+
st.session_state.summaries[i] = {}
|
| 389 |
+
speakerNames = annotations.labels()
|
| 390 |
+
st.session_state.unusedSpeakers[i] = speakerNames
|
| 391 |
+
st.success(f"Took {time.time() - start_time} seconds to analyze {totalFiles} files!")
|
| 392 |
+
|
| 393 |
+
if currFile is None: #Do we need more? -> and len(st.session_state.results) > 0 and len(st.session_state.results[0]) > 0:
|
| 394 |
+
st.write("Select a file to view from the sidebar")
|
| 395 |
+
try:
|
| 396 |
+
st.session_state.resetResult = False
|
| 397 |
+
currFileIndex = file_names.index(currFile)
|
| 398 |
+
if len(st.session_state.results) > currFileIndex and len(st.session_state.summaries) > currFileIndex and len(st.session_state.results[currFileIndex]) > 0:
|
| 399 |
+
# Handle
|
| 400 |
+
currSpeakerList, currAnnotation, currTotalTime = st.session_state.results[currFileIndex]
|
| 401 |
+
speakerNames = currAnnotation.labels()
|
| 402 |
+
|
| 403 |
+
# Update other categories
|
| 404 |
+
unusedSpeakers = st.session_state.unusedSpeakers[currFileIndex]
|
| 405 |
+
categorySelections = st.session_state["categorySelect"][currFileIndex]
|
| 406 |
+
for i,category in enumerate(st.session_state.categories):
|
| 407 |
+
speakerSet = categorySelections[i]
|
| 408 |
+
st.sidebar.multiselect(category,
|
| 409 |
+
speakerSet+unusedSpeakers,
|
| 410 |
+
default=speakerSet,
|
| 411 |
+
key=f"multiselect_{category}",
|
| 412 |
+
on_change=updateCategoryOptions,
|
| 413 |
+
args=(currFileIndex,))
|
| 414 |
+
st.sidebar.button(f"Remove {category}",key=f"remove_{category}",on_click=removeCategory,args=(i,))
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
newCategory = st.sidebar.text_input('Add category', key='categoryInput',on_change=addCategory)
|
| 418 |
+
|
| 419 |
+
df4_dict = {}
|
| 420 |
+
nameList = st.session_state.categories
|
| 421 |
+
extraNames = []
|
| 422 |
+
valueList = [0 for i in range(len(nameList))]
|
| 423 |
+
extraValues = []
|
| 424 |
+
|
| 425 |
+
for i,speakerSet in enumerate(categorySelections):
|
| 426 |
+
valueList[i] += su.sumTimes(currAnnotation.subset(speakerSet))
|
| 427 |
+
|
| 428 |
+
for sp in unusedSpeakers:
|
| 429 |
+
extraNames.append(sp)
|
| 430 |
+
extraValues.append(su.sumTimes(currAnnotation.subset([sp])))
|
| 431 |
+
|
| 432 |
+
df4_dict = {
|
| 433 |
+
"names": nameList+extraNames,
|
| 434 |
+
"values": valueList+extraValues,
|
| 435 |
+
}
|
| 436 |
+
df4 = pd.DataFrame(data=df4_dict)
|
| 437 |
+
df4.name = "df4"
|
| 438 |
+
st.session_state.summaries[currFileIndex]["df4"] = df4
|
| 439 |
+
|
| 440 |
+
df2 = st.session_state.summaries[currFileIndex]["df2"]
|
| 441 |
+
df3 = st.session_state.summaries[currFileIndex]["df3"]
|
| 442 |
+
df4 = st.session_state.summaries[currFileIndex]["df4"]
|
| 443 |
+
df5 = st.session_state.summaries[currFileIndex]["df5"]
|
| 444 |
+
speakers_dataFrame = st.session_state.summaries[currFileIndex]["speakers_dataFrame"]
|
| 445 |
+
currDF = speakers_dataFrame
|
| 446 |
+
speakers_times = st.session_state.summaries[currFileIndex]["speakers_times"]
|
| 447 |
+
|
| 448 |
+
# generate plotting window
|
| 449 |
+
fig1 = go.Figure()
|
| 450 |
+
fig1.add_trace(go.Pie(values=df3["values"],labels=df3["names"]))
|
| 451 |
+
fig2 = go.Figure()
|
| 452 |
+
fig2.add_trace(go.Pie(values=df4["values"],labels=df4["names"]))
|
| 453 |
+
fig3_1 = px.sunburst(df5,
|
| 454 |
+
branchvalues = 'total',
|
| 455 |
+
names = "labels",
|
| 456 |
+
ids = "ids",
|
| 457 |
+
parents = "parents",
|
| 458 |
+
values = "percentiles",
|
| 459 |
+
custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
|
| 460 |
+
color = 'labels',
|
| 461 |
+
)
|
| 462 |
+
fig3_1.update_traces(
|
| 463 |
+
hovertemplate="<br>".join([
|
| 464 |
+
'<b>%{customdata[0]}</b>',
|
| 465 |
+
'Duration: %{customdata[1]}s',
|
| 466 |
+
'Percentage of Total: %{customdata[2]:.2f}%',
|
| 467 |
+
'Parent: %{customdata[3]}',
|
| 468 |
+
'Percentage of Parent: %{customdata[4]:.2f}%'
|
| 469 |
+
])
|
| 470 |
+
)
|
| 471 |
+
fig3 = px.treemap(df5,
|
| 472 |
+
branchvalues = "total",
|
| 473 |
+
names = "labels",
|
| 474 |
+
parents = "parents",
|
| 475 |
+
ids="ids",
|
| 476 |
+
values = "percentiles",
|
| 477 |
+
custom_data=['labels','valueStrings','percentiles','parentNames','parentPercentiles'],
|
| 478 |
+
color='labels',
|
| 479 |
+
)
|
| 480 |
+
fig3.update_traces(
|
| 481 |
+
hovertemplate="<br>".join([
|
| 482 |
+
'<b>%{customdata[0]}</b>',
|
| 483 |
+
'Duration: %{customdata[1]}s',
|
| 484 |
+
'Percentage of Total: %{customdata[2]:.2f}%',
|
| 485 |
+
'Parent: %{customdata[3]}',
|
| 486 |
+
'Percentage of Parent: %{customdata[4]:.2f}%'
|
| 487 |
+
])
|
| 488 |
+
)
|
| 489 |
+
st.plotly_chart(fig1, use_container_width=True)
|
| 490 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 491 |
+
st.plotly_chart(fig3_1, use_container_width=True)
|
| 492 |
+
st.plotly_chart(fig3, use_container_width=True)
|
| 493 |
+
|
| 494 |
+
fig_la = px.timeline(speakers_dataFrame, x_start="Start", x_end="Finish", y="Resource", color="Resource")
|
| 495 |
+
fig_la.update_yaxes(autorange="reversed")
|
| 496 |
+
|
| 497 |
+
hMax = int(currTotalTime//3600)
|
| 498 |
+
mMax = int(currTotalTime%3600//60)
|
| 499 |
+
sMax = int(currTotalTime%60)
|
| 500 |
+
msMax = int(currTotalTime*1000000%1000000)
|
| 501 |
+
timeMax = dt.time(hMax,mMax,sMax,msMax)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
|
| 503 |
+
fig_la.update_layout(
|
| 504 |
+
xaxis_tickformatstops = [
|
| 505 |
+
dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
|
| 506 |
+
dict(dtickrange=[1000, None], value="%H:%M:%S")
|
| 507 |
+
],
|
| 508 |
+
xaxis=dict(
|
| 509 |
+
range=[dt.datetime.combine(dt.date.today(), dt.time.min),dt.datetime.combine(dt.date.today(), timeMax)]
|
| 510 |
+
),
|
| 511 |
+
xaxis_title="Time",
|
| 512 |
+
yaxis_title="Speaker",
|
| 513 |
+
legend_title=None
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
st.plotly_chart(fig_la, use_container_width=True)
|
| 517 |
+
|
| 518 |
+
fig2_la = px.bar(df2, x="values", y="names", color="names", orientation='h',
|
| 519 |
+
custom_data=["names","values"])
|
| 520 |
+
fig2_la.update_xaxes(ticksuffix="%")
|
| 521 |
+
fig2_la.update_yaxes(autorange="reversed")
|
| 522 |
+
fig2_la.update_layout(
|
| 523 |
+
xaxis_title="Percentage Time Spoken",
|
| 524 |
+
yaxis_title="Speaker",
|
| 525 |
+
legend_title=None
|
| 526 |
|
| 527 |
+
)
|
| 528 |
+
fig2_la.update_traces(
|
| 529 |
+
hovertemplate="<br>".join([
|
| 530 |
+
'<b>%{customdata[0]}</b>',
|
| 531 |
+
'Percentage of Time: %{customdata[1]:.2f}%'
|
| 532 |
+
])
|
| 533 |
+
)
|
| 534 |
+
st.plotly_chart(fig2_la, use_container_width=True)
|
| 535 |
+
except ValueError:
|
| 536 |
+
pass
|
| 537 |
+
|
| 538 |
+
if len(st.session_state.results) > 0:
|
| 539 |
+
with st.spinner(text='Processing summary results...'):
|
| 540 |
+
fileNames = []
|
| 541 |
+
results = []
|
| 542 |
+
indices = []
|
| 543 |
+
for i, resultTuple in enumerate(st.session_state.results):
|
| 544 |
+
if len(resultTuple) == 2:
|
| 545 |
+
fileNames.append(resultTuple[0])
|
| 546 |
+
results.append(resultTuple[1])
|
| 547 |
+
indices.append(i)
|
| 548 |
+
if len(indices) > 1:
|
| 549 |
|
| 550 |
+
df6_dict = {
|
| 551 |
+
"files":fileNames,
|
| 552 |
+
}
|
| 553 |
+
allCategories = copy.deepcopy(st.session_state.categories)
|
| 554 |
+
for i in indices:
|
| 555 |
+
currResult = st.session_state.results[i][1]
|
| 556 |
+
categorySelections = st.session_state["categorySelect"][i]
|
| 557 |
+
catSummary,extraCats = calcCategories(currResult["speakers"],categorySelections)
|
| 558 |
+
st.session_state.summaries[i]["categories"] = (catSummary,extraCats)
|
| 559 |
+
for extra in extraCats:
|
| 560 |
+
df6_dict[extra] = []
|
| 561 |
+
if extra not in allCategories:
|
| 562 |
+
allCategories.append(extra)
|
| 563 |
+
|
| 564 |
+
for category in st.session_state.categories:
|
| 565 |
+
df6_dict[category] = []
|
| 566 |
+
for i in indices:
|
| 567 |
+
summary, extras = st.session_state.summaries[i]["categories"]
|
| 568 |
+
theseCategories = st.session_state.categories + extras
|
| 569 |
+
for j, timeSlots in enumerate(summary):
|
| 570 |
+
df6_dict[theseCategories[j]].append(sumTimes([t for _,t in timeSlots])/st.session_state.results[i][1]['duration'])
|
| 571 |
+
for category in allCategories:
|
| 572 |
+
if category not in theseCategories:
|
| 573 |
+
df6_dict[category].append(0)
|
| 574 |
+
df6 = pd.DataFrame(df6_dict)
|
| 575 |
+
summFig = px.bar(df6, x="files", y=allCategories)
|
| 576 |
+
st.plotly_chart(summFig, use_container_width=True)
|
| 577 |
+
|
| 578 |
+
voiceNames = ["No Voice","One Voice","Multi Voice"]
|
| 579 |
+
df7_dict = {
|
| 580 |
+
"files":[fileName for fileName,_ in st.session_state.results],
|
| 581 |
+
}
|
| 582 |
+
for category in voiceNames:
|
| 583 |
+
df7_dict[category] = []
|
| 584 |
+
for resultID,summary in enumerate(st.session_state.summaries):
|
| 585 |
+
partialDf = summary["df5"]
|
| 586 |
+
for i in range(len(voiceNames)):
|
| 587 |
+
df7_dict[voiceNames[i]].append(partialDf["percentiles"][i])
|
| 588 |
+
df7 = pd.DataFrame(df7_dict)
|
| 589 |
+
sorted_df7 = df7.sort_values(by=['One Voice', 'Multi Voice'])
|
| 590 |
+
summFig2 = px.bar(sorted_df7, x="files", y=["One Voice","Multi Voice","No Voice",])
|
| 591 |
+
st.plotly_chart(summFig2, use_container_width=True)
|
| 592 |
+
sorted_df7_3 = df7.sort_values(by=['Multi Voice','One Voice'])
|
| 593 |
+
summFig3 = px.bar(sorted_df7_3, x="files", y=["One Voice","Multi Voice","No Voice",])
|
| 594 |
+
st.plotly_chart(summFig3, use_container_width=True)
|
| 595 |
+
sorted_df7_4 = df7.sort_values(by=['No Voice', 'Multi Voice'],ascending=False)
|
| 596 |
+
summFig4 = px.bar(sorted_df7_4, x="files", y=["One Voice","Multi Voice","No Voice",])
|
| 597 |
+
st.plotly_chart(summFig4, use_container_width=True)
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
|
| 601 |
+
old = '''userid = st.text_input("user id:", "Guest")
|
| 602 |
colorPref = st.text_input("Favorite color?", "None")
|
| 603 |
radio = st.radio('Pick one:', ['Left','Right'])
|
| 604 |
selection = st.selectbox('Select', [1,2,3])
|
|
|
|
| 607 |
file_paths,
|
| 608 |
userid)
|
| 609 |
st.success('I think it worked!')
|
| 610 |
+
'''
|
| 611 |
@st.cache_data
|
| 612 |
def convert_df(df):
|
| 613 |
return df.to_csv(index=False).encode('utf-8')
|