Spaces:
Sleeping
Sleeping
File size: 22,267 Bytes
c07f4bc c6614e2 30199c9 c07f4bc 6b12bd8 30199c9 a9f4fc2 30199c9 a9f4fc2 1b597d6 30199c9 21f8bdd fc51005 30199c9 21f8bdd 6b12bd8 21f8bdd 30199c9 6b12bd8 30199c9 a9f4fc2 02555b7 21a519f 02555b7 a9f4fc2 6b12bd8 30199c9 6b12bd8 30199c9 c07f4bc 30199c9 6b12bd8 a9f4fc2 c07f4bc 30199c9 c07f4bc 6b12bd8 c07f4bc 759b69a c07f4bc a9f4fc2 c07f4bc 1b597d6 c07f4bc a1d5d0b 2097b36 6b12bd8 30199c9 2097b36 21f8bdd c07f4bc 21f8bdd a9f4fc2 02555b7 21a519f 02555b7 a9f4fc2 6b12bd8 35be664 6b12bd8 35be664 c07f4bc 2097b36 6b12bd8 c07f4bc a1d5d0b c07f4bc bfa5937 3f88b7e 5da9657 bfa5937 5da9657 2902b0f 30199c9 f3e7f6c 5da9657 efe7486 2902b0f efe7486 bfa5937 efe7486 bfa5937 5da9657 efe7486 5da9657 dc68e56 2e2161f 5da9657 844c99d 5b6254c c18c8f8 30199c9 5b6254c 2e2161f c9a70a9 3f2042c 844c99d 3f2042c 844c99d 3f2042c 844c99d 3f2042c 844c99d 3f2042c 2e2161f 3f2042c 2e2161f 3f88b7e 844c99d 3f2042c 844c99d 3f2042c 844c99d 3f2042c 844c99d 3f2042c 844c99d 5da9657 2902b0f 5da9657 efe7486 5da9657 bfa5937 efe7486 bfa5937 efe7486 2902b0f bfa5937 efe7486 c07f4bc e47db05 c07f4bc e47db05 c07f4bc 6399c54 c07f4bc 8b2ca45 9259e98 515aaea 1d70b74 515aaea da65e9c 6b64fdd da65e9c e47db05 da65e9c 100fd21 da65e9c 6b64fdd da65e9c e47db05 da65e9c 100fd21 da65e9c 100fd21 515aaea 6b64fdd da65e9c 6b64fdd e47db05 9018917 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 | """
ui.py β Reusable Streamlit UI rendering helpers.
Keeps all layout/widget code that isn't tied to a single page out of app.py,
while keeping utils.py pure-Python and state.py focused on callbacks.
"""
import os
import streamlit as st
import plotly.express as px
import utils
from state import (
updateCategoryOptions, removeCategory, addCategory,
addGlobalRename, removeGlobalRename,
_global_rename_key, on_grename_change,
randomize_speaker_clip, apply_inline_rename,
)
# ---------------------------------------------------------------------------
# Chart rendering
# ---------------------------------------------------------------------------
def render_chart(fig, tab, pdf_path, svg_path, pdf_name, svg_name, pdf_key, svg_key,
plotly_config=None):
"""Render a Plotly figure inside a tab with PDF/SVG download buttons."""
cfg = plotly_config or {"displayModeBar": True, "modeBarButtonsToRemove": []}
with tab:
st.plotly_chart(fig, use_container_width=True, config=cfg)
col_l, col_r = st.columns(2)
try:
fig.write_image(pdf_path)
fig.write_image(svg_path)
except Exception:
pass
with col_l:
if os.path.exists(pdf_path):
with open(pdf_path, "rb") as f:
st.download_button("Save As PDF", f, pdf_name, "application/pdf",
key=pdf_key, on_click="ignore")
with col_r:
if os.path.exists(svg_path):
with open(svg_path, "rb") as f:
st.download_button("Save As SVG", f, svg_name, "image/svg+xml",
key=svg_key, on_click="ignore")
# ---------------------------------------------------------------------------
# Sidebar β categories section
# ---------------------------------------------------------------------------
def render_role_sidebar(display_speaker_tokens, token_display_map):
"""Render cross-file role multiselects.
display_speaker_tokens : list of display strings shown in the dropdown
e.g. ["file: John", "file: SPEAKER_01"]
token_display_map : dict {display_label -> raw_token}
e.g. {"file: John": "file: SPEAKER_00"}
The data model (categorySelect) always stores raw tokens; this function
converts between display labels and raw tokens at the boundary.
"""
raw_to_display = {v: k for k, v in token_display_map.items()}
st.sidebar.subheader("Add Speaker's Role")
st.sidebar.markdown(
"<p style='font-size:0.85rem; color:gray; margin-bottom:2px;'>"
"Add speaker(s) into the Instructor or Students role. You can also add a new role "
"by typing in the \"Add custom role\" textbox.</p>",
unsafe_allow_html=True,
)
st.sidebar.text(" ")
valid_display = set(display_speaker_tokens)
for i, category in enumerate(st.session_state.categories):
ms_key = f"multiselect_{category}"
# Sync widget key from data model (raw -> display), dropping any stale
# labels that are no longer valid (e.g. speaker was just renamed)
raw_vals = st.session_state.categorySelect[i] if i < len(st.session_state.categorySelect) else []
st.session_state[ms_key] = [
raw_to_display.get(t, t) for t in raw_vals
if raw_to_display.get(t, t) in valid_display
]
# Build claimed set (display labels) from widget keys
all_claimed = {
token
for i, category in enumerate(st.session_state.categories)
for token in st.session_state.get(f"multiselect_{category}", [])
}
for i, category in enumerate(st.session_state.categories):
ms_key = f"multiselect_{category}"
label_col, trash_col = st.sidebar.columns([5, 1])
label_col.markdown(
f"<div style='padding-top:16px;'><strong>{category}</strong></div>",
unsafe_allow_html=True,
)
trash_col.button(
"ποΈ", key=f"remove_{category}",
on_click=removeCategory, args=(i,),
help=f"Remove {category}",
)
this_role_display = set(st.session_state.get(ms_key, []))
available_tokens = [
t for t in display_speaker_tokens
if t not in all_claimed or t in this_role_display
]
st.sidebar.multiselect(
category, available_tokens,
key=ms_key,
on_change=updateCategoryOptions,
args=(token_display_map,),
label_visibility="collapsed",
)
st.sidebar.text_input("Add custom role", key="categoryInput", on_change=addCategory)
# ---------------------------------------------------------------------------
# Sidebar β rename speakers section
# ---------------------------------------------------------------------------
def render_rename_sidebar(currFile, speakerNames, display_speaker_tokens, token_display_map):
"""Render the full Rename Multiple Speakers sidebar section.
display_speaker_tokens : list of display strings shown in the dropdown
token_display_map : dict {display_label -> raw_token}
The data model (entry["speakers"]) always stores raw tokens.
"""
raw_to_display = {v: k for k, v in token_display_map.items()}
st.sidebar.divider()
st.sidebar.subheader("Rename Cross-file Speakers")
st.sidebar.markdown(
"<p style='font-size:0.85rem; color:gray; margin-bottom:2px;'>"
"This option works best for renaming multiple speakers across multiple files. "
"For a simple rename for a single file, use the Rename Speaker tab in the center.</p>",
unsafe_allow_html=True,
)
st.sidebar.text(" ")
# Build claimed set (display labels) from the data model β one speaker per entry
all_claimed = {
raw_to_display.get(token, token)
for entry in st.session_state.globalRenames
for token in entry["speakers"]
}
valid_display = set(display_speaker_tokens)
for idx, entry in enumerate(st.session_state.globalRenames):
grkey = _global_rename_key(idx)
# Sync widget key from data model (raw -> display), dropping stale labels
st.session_state[grkey] = [
raw_to_display.get(t, t) for t in entry["speakers"]
if raw_to_display.get(t, t) in valid_display
]
label_col, trash_col = st.sidebar.columns([5, 1])
label_col.markdown(
f"<div style='padding-top:16px;'><strong>{entry['name']}</strong></div>",
unsafe_allow_html=True,
)
trash_col.button(
"ποΈ", key=f"remove_grename_{idx}",
on_click=removeGlobalRename, args=(idx,),
help=f"Remove '{entry['name']}'",
)
this_entry_display = set(st.session_state[grkey])
available_tokens = [
t for t in display_speaker_tokens
if t not in all_claimed or t in this_entry_display
]
st.sidebar.multiselect(
f"Speakers for {entry['name']}", options=available_tokens,
key=grkey,
on_change=on_grename_change,
args=(idx, token_display_map),
label_visibility="collapsed",
)
# Show error if two speakers from the same file are in this entry
files_in_entry = [token.split(": ", 1)[0] for token in entry["speakers"]]
for fname in set(files_in_entry):
if files_in_entry.count(fname) > 1:
st.sidebar.error(
f"Conflicted name for two speakers in one same file. Please remove one speaker."
)
break
st.sidebar.text_input(
"Add rename", placeholder="e.g. John",
key="globalRenameInput", on_change=addGlobalRename,
)
# ---------------------------------------------------------------------------
# Rename Speaker tab β speaker / audio sample table
# ---------------------------------------------------------------------------
def render_speaker_samples_tab(speakerNames, raw_to_display, currFile):
"""Render a table: Speaker (with inline β rename + history fill) | Audio Sample | βΊ button."""
file_samples = st.session_state.speakerClips.get(currFile, {})
has_waveform = currFile in st.session_state.speakerWaveforms
has_samples = bool(file_samples)
if "inline_rename_active" not in st.session_state:
st.session_state.inline_rename_active = {}
# Build the history list fresh each render:
# 1. Names currently in globalRenames (sidebar rename entries)
# 2. Names actively used in speakerRenames (confirmed via rename tab)
# Deleted sidebar entries are excluded automatically since we rebuild from scratch.
global_names = {entry["name"] for entry in st.session_state.globalRenames}
active_names = {
name
for renames in st.session_state.speakerRenames.values()
for name in renames.values()
}
# Preserve any tab-confirmed names that aren't in either set (typed manually)
# but drop names that were only ever in globalRenames and have since been removed.
prev_history = st.session_state.get("inline_rename_history", [])
st.session_state.inline_rename_history = [
name for name in prev_history
if name in global_names or name in active_names
]
# Add any new names from globalRenames not yet in history
for name in global_names:
if name not in st.session_state.inline_rename_history:
st.session_state.inline_rename_history.append(name)
def _get_role(sp):
"""Return the role name assigned to this speaker, or empty string."""
token = f"{currFile}: {sp}"
for i, tokens in enumerate(st.session_state.categorySelect):
if token in tokens:
return st.session_state.categories[i]
return ""
# Header
header_cols = st.columns([3, 2, 2, 1])
header_cols[0].markdown("**Speaker**")
header_cols[1].markdown("**Role**")
header_cols[2].markdown("**Audio Sample**")
header_cols[3].markdown("** **", unsafe_allow_html=True)
st.markdown("<hr style='margin-top:2px; margin-bottom:4px;'>", unsafe_allow_html=True)
if not has_samples:
st.info("Audio samples are only available for files analyzed from audio (not RTTM/CSV/TXT).")
for sp in speakerNames:
display_name = raw_to_display.get(sp, sp)
edit_key = (currFile, sp)
is_editing = st.session_state.inline_rename_active.get(edit_key, False)
row_cols = st.columns([3, 2, 2, 1])
# --- Speaker cell ---
if is_editing:
select_key = f"inline_rename_select_{currFile}_{sp}"
input_key = f"inline_rename_input_{currFile}_{sp}"
confirm_key = f"inline_rename_confirm_{currFile}_{sp}"
cancel_key = f"inline_rename_cancel_{currFile}_{sp}"
reset_key = f"inline_rename_reset_{currFile}_{sp}"
form_key = f"inline_rename_form_{currFile}_{sp}"
history = st.session_state.inline_rename_history
current_val = st.session_state.speakerRenames.get(currFile, {}).get(sp, "")
_NEW_OPTION = "οΌ Enter a new name"
with row_cols[0]:
# The selectbox MUST live outside the form: inside a form,
# widget state is only committed on submit, so choosing
# _NEW_OPTION would never make the text input appear until
# after an unrelated submit. We track the live choice in
# chosen_key and only put the text input + buttons in the form.
chosen_key = f"inline_rename_chosen_{currFile}_{sp}"
show_input = False
new_name = ""
if history:
options = history + [_NEW_OPTION]
default_idx = (
history.index(current_val) if current_val in history
else len(options) - 1
)
if chosen_key not in st.session_state:
st.session_state[chosen_key] = options[default_idx]
# Clamp stored value in case history changed
if st.session_state[chosen_key] not in options:
st.session_state[chosen_key] = options[default_idx]
st.selectbox(
"Rename",
options=options,
key=chosen_key,
label_visibility="collapsed",
)
chosen = st.session_state[chosen_key]
show_input = (chosen == _NEW_OPTION)
if not show_input:
new_name = chosen
with st.form(key=form_key, border=False):
if not history:
# No history β plain text input is the only field
new_name = st.text_input(
"Rename",
value=current_val,
key=input_key,
label_visibility="collapsed",
placeholder=f"Rename {sp}β¦",
)
elif show_input:
# User picked "+ Enter a new name" β show free-text field
new_name = st.text_input(
"New name",
value="",
key=input_key,
label_visibility="collapsed",
placeholder=f"Rename {sp}β¦",
)
_, btn_col1, btn_col2, btn_col3 = st.columns([4, 1, 1, 1])
confirmed_submit = btn_col1.form_submit_button("β", help="Confirm rename (or press Enter)")
cancel_clicked = btn_col2.form_submit_button("β", help="Cancel")
reset_clicked = btn_col3.form_submit_button("β³", help="Reset to original speaker label")
def _cleanup():
st.session_state.inline_rename_active[edit_key] = False
st.session_state.pop(select_key, None)
st.session_state.pop(chosen_key, None)
st.session_state.pop(input_key, None)
if confirmed_submit:
confirmed = (new_name or "").strip()
apply_inline_rename(currFile, sp, confirmed)
if confirmed and confirmed not in st.session_state.inline_rename_history:
st.session_state.inline_rename_history.append(confirmed)
_cleanup()
st.rerun()
if cancel_clicked:
_cleanup()
st.rerun()
if reset_clicked:
apply_inline_rename(currFile, sp, "")
_cleanup()
st.rerun()
else:
with row_cols[0]:
name_col, pencil_col = st.columns([4, 1])
name_col.write(display_name)
if pencil_col.button(
"β", key=f"inline_rename_edit_{currFile}_{sp}",
help=f"Rename {sp}",
):
st.session_state.inline_rename_active[edit_key] = True
st.rerun()
# --- Role cell ---
row_cols[1].write(_get_role(sp))
# --- Audio sample cell ---
if sp in file_samples:
row_cols[2].audio(file_samples[sp], format="audio/wav")
sp_segs = st.session_state.speakerSegments.get(currFile, {}).get(sp, [])
if has_waveform and sp_segs:
if row_cols[3].button(
"βΊ", key=f"sample_randomize_{currFile}_{sp}",
help="Try a different audio sample for this speaker",
):
randomize_speaker_clip(currFile, sp)
st.rerun()
else:
row_cols[2].write("β")
# ---------------------------------------------------------------------------
# Multi-file summary expander
# ---------------------------------------------------------------------------
def render_multifile_summary(plotly_config=None):
"""Render the Multi-file summary data expander if enough files are analyzed."""
cfg = plotly_config or {"displayModeBar": True, "modeBarButtonsToRemove": []}
if not st.session_state.results:
return
with st.expander("Multi-file summary data"):
st.header("Multi-file summary data")
with st.spinner("Processing summary results..."):
validNames = [
fn for fn in st.session_state.file_names
if fn in st.session_state.results
and len(st.session_state.results[fn]) == 2
]
if len(validNames) <= 1:
return
df6, allCategories = utils.build_multifile_category_df(
validNames, st.session_state.results, st.session_state.summaries,
st.session_state.categories, st.session_state.categorySelect,
speakerRenames=st.session_state.speakerRenames,
)
df7, _ = utils.build_multifile_voice_df(validNames, st.session_state.summaries)
# Build consistent color maps using the same _PALETTE as per-file charts
# Voice category colors β always fixed
voice_color_map = {
"Single Voice": utils._PALETTE[0], # reserved
"Multi Voice": utils._PALETTE[9], # reserved
"No Voice": utils._PALETTE[24], # reserved
"Unassigned": utils._PALETTE[31] if len(utils._PALETTE) > 31 else "#777a7d",
}
# Role/speaker colors β cycle through _SPEAKER_PALETTE by position
role_color_map = {
col: utils._SPEAKER_PALETTE[i % len(utils._SPEAKER_PALETTE)]
for i, col in enumerate(allCategories)
}
# Chart 1
fig6_sort_options = {
"File Name": ("files", True),
"% of Instructor": ("Instructor", False),
}
selected_fig6_sort = st.selectbox(
"Sort Percent of File Duration by:",
options=list(fig6_sort_options.keys()),
key="multifile_role_sort",
)
fig6_sort_col, fig6_ascending = fig6_sort_options[selected_fig6_sort]
if fig6_sort_col not in df6.columns:
fig6_sort_col, fig6_ascending = "files", True
fig6 = px.bar(df6.sort_values(fig6_sort_col, ascending=fig6_ascending),
x="files", y=allCategories,
title=f"Percent of file duration spoken by each speaker/role sorted by {selected_fig6_sort}",
labels={"files": "Files"},
color_discrete_map=role_color_map,
hover_data={"files": True})
fig6.update_layout(yaxis_title="% of File Duration")
fig6.update_xaxes(showticklabels=False)
st.plotly_chart(fig6, use_container_width=True, config=cfg)
# Chart 2
sort_options = {
"File Name": ("files", True),
"% of Single Voice": ("Single Voice", False),
}
selected_sort = st.selectbox(
"Sort Cross-file Voice Categories by:",
options=list(sort_options.keys()),
key="multifile_voice_sort",
)
sort_col, ascending = sort_options[selected_sort]
fig7 = px.bar(
df7.sort_values(by=sort_col, ascending=ascending),
x="files", y=["Single Voice", "Multi Voice", "No Voice"],
title=f"Cross-file voice categories sorted by {selected_sort}",
labels={"files": "Files"},
color_discrete_map=voice_color_map,
hover_data={"files": True},
)
fig7.update_layout(yaxis_title="% of File Duration")
fig7.update_xaxes(showticklabels=False)
st.plotly_chart(fig7, use_container_width=True, config=cfg)
st.divider()
# Chart 3
df8, role_voice_cols = utils.build_multifile_role_voice_df(
validNames, st.session_state.results, st.session_state.summaries,
st.session_state.categories, st.session_state.categorySelect,
speakerRenames=st.session_state.speakerRenames,
)
role_voice_color_map = {
col: utils._SPEAKER_PALETTE[i % len(utils._SPEAKER_PALETTE)]
for i, col in enumerate(role_voice_cols)
if col not in voice_color_map
}
role_voice_color_map.update(voice_color_map)
role_sort_options = {
"File Name": ("files", True),
"% of Multi Voice": ("Multi Voice", True),
}
selected_role_sort = st.selectbox(
"Sort Cross-file Role & Voice Breakdown by:",
options=list(role_sort_options.keys()),
key="multifile_role_voice_sort",
)
sort_col8, ascending8 = role_sort_options[selected_role_sort]
fig8 = px.bar(
df8.sort_values(by=sort_col8, ascending=ascending8),
x="files", y=role_voice_cols,
color_discrete_map=role_voice_color_map,
labels={"files": "Files"},
hover_data={"files": True},
title=f"Cross-file role and voice breakdown sorted by {selected_role_sort}",
)
fig8.update_layout(yaxis_title="% of File Duration")
fig8.update_xaxes(showticklabels=False)
st.plotly_chart(fig8, use_container_width=True, config=cfg) |