duongthienz's picture
Update ui.py
e47db05 verified
Raw
History Blame Contribute Delete
22.3 kB
"""
ui.py — Reusable Streamlit UI rendering helpers.
Keeps all layout/widget code that isn't tied to a single page out of app.py,
while keeping utils.py pure-Python and state.py focused on callbacks.
"""
import os
import streamlit as st
import plotly.express as px
import utils
from state import (
updateCategoryOptions, removeCategory, addCategory,
addGlobalRename, removeGlobalRename,
_global_rename_key, on_grename_change,
randomize_speaker_clip, apply_inline_rename,
)
# ---------------------------------------------------------------------------
# Chart rendering
# ---------------------------------------------------------------------------
def render_chart(fig, tab, pdf_path, svg_path, pdf_name, svg_name, pdf_key, svg_key,
plotly_config=None):
"""Render a Plotly figure inside a tab with PDF/SVG download buttons."""
cfg = plotly_config or {"displayModeBar": True, "modeBarButtonsToRemove": []}
with tab:
st.plotly_chart(fig, use_container_width=True, config=cfg)
col_l, col_r = st.columns(2)
try:
fig.write_image(pdf_path)
fig.write_image(svg_path)
except Exception:
pass
with col_l:
if os.path.exists(pdf_path):
with open(pdf_path, "rb") as f:
st.download_button("Save As PDF", f, pdf_name, "application/pdf",
key=pdf_key, on_click="ignore")
with col_r:
if os.path.exists(svg_path):
with open(svg_path, "rb") as f:
st.download_button("Save As SVG", f, svg_name, "image/svg+xml",
key=svg_key, on_click="ignore")
# ---------------------------------------------------------------------------
# Sidebar — categories section
# ---------------------------------------------------------------------------
def render_role_sidebar(display_speaker_tokens, token_display_map):
"""Render cross-file role multiselects.
display_speaker_tokens : list of display strings shown in the dropdown
e.g. ["file: John", "file: SPEAKER_01"]
token_display_map : dict {display_label -> raw_token}
e.g. {"file: John": "file: SPEAKER_00"}
The data model (categorySelect) always stores raw tokens; this function
converts between display labels and raw tokens at the boundary.
"""
raw_to_display = {v: k for k, v in token_display_map.items()}
st.sidebar.subheader("Add Speaker's Role")
st.sidebar.markdown(
"<p style='font-size:0.85rem; color:gray; margin-bottom:2px;'>"
"Add speaker(s) into the Instructor or Students role. You can also add a new role "
"by typing in the \"Add custom role\" textbox.</p>",
unsafe_allow_html=True,
)
st.sidebar.text(" ")
valid_display = set(display_speaker_tokens)
for i, category in enumerate(st.session_state.categories):
ms_key = f"multiselect_{category}"
# Sync widget key from data model (raw -> display), dropping any stale
# labels that are no longer valid (e.g. speaker was just renamed)
raw_vals = st.session_state.categorySelect[i] if i < len(st.session_state.categorySelect) else []
st.session_state[ms_key] = [
raw_to_display.get(t, t) for t in raw_vals
if raw_to_display.get(t, t) in valid_display
]
# Build claimed set (display labels) from widget keys
all_claimed = {
token
for i, category in enumerate(st.session_state.categories)
for token in st.session_state.get(f"multiselect_{category}", [])
}
for i, category in enumerate(st.session_state.categories):
ms_key = f"multiselect_{category}"
label_col, trash_col = st.sidebar.columns([5, 1])
label_col.markdown(
f"<div style='padding-top:16px;'><strong>{category}</strong></div>",
unsafe_allow_html=True,
)
trash_col.button(
"🗑️", key=f"remove_{category}",
on_click=removeCategory, args=(i,),
help=f"Remove {category}",
)
this_role_display = set(st.session_state.get(ms_key, []))
available_tokens = [
t for t in display_speaker_tokens
if t not in all_claimed or t in this_role_display
]
st.sidebar.multiselect(
category, available_tokens,
key=ms_key,
on_change=updateCategoryOptions,
args=(token_display_map,),
label_visibility="collapsed",
)
st.sidebar.text_input("Add custom role", key="categoryInput", on_change=addCategory)
# ---------------------------------------------------------------------------
# Sidebar — rename speakers section
# ---------------------------------------------------------------------------
def render_rename_sidebar(currFile, speakerNames, display_speaker_tokens, token_display_map):
"""Render the full Rename Multiple Speakers sidebar section.
display_speaker_tokens : list of display strings shown in the dropdown
token_display_map : dict {display_label -> raw_token}
The data model (entry["speakers"]) always stores raw tokens.
"""
raw_to_display = {v: k for k, v in token_display_map.items()}
st.sidebar.divider()
st.sidebar.subheader("Rename Cross-file Speakers")
st.sidebar.markdown(
"<p style='font-size:0.85rem; color:gray; margin-bottom:2px;'>"
"This option works best for renaming multiple speakers across multiple files. "
"For a simple rename for a single file, use the Rename Speaker tab in the center.</p>",
unsafe_allow_html=True,
)
st.sidebar.text(" ")
# Build claimed set (display labels) from the data model — one speaker per entry
all_claimed = {
raw_to_display.get(token, token)
for entry in st.session_state.globalRenames
for token in entry["speakers"]
}
valid_display = set(display_speaker_tokens)
for idx, entry in enumerate(st.session_state.globalRenames):
grkey = _global_rename_key(idx)
# Sync widget key from data model (raw -> display), dropping stale labels
st.session_state[grkey] = [
raw_to_display.get(t, t) for t in entry["speakers"]
if raw_to_display.get(t, t) in valid_display
]
label_col, trash_col = st.sidebar.columns([5, 1])
label_col.markdown(
f"<div style='padding-top:16px;'><strong>{entry['name']}</strong></div>",
unsafe_allow_html=True,
)
trash_col.button(
"🗑️", key=f"remove_grename_{idx}",
on_click=removeGlobalRename, args=(idx,),
help=f"Remove '{entry['name']}'",
)
this_entry_display = set(st.session_state[grkey])
available_tokens = [
t for t in display_speaker_tokens
if t not in all_claimed or t in this_entry_display
]
st.sidebar.multiselect(
f"Speakers for {entry['name']}", options=available_tokens,
key=grkey,
on_change=on_grename_change,
args=(idx, token_display_map),
label_visibility="collapsed",
)
# Show error if two speakers from the same file are in this entry
files_in_entry = [token.split(": ", 1)[0] for token in entry["speakers"]]
for fname in set(files_in_entry):
if files_in_entry.count(fname) > 1:
st.sidebar.error(
f"Conflicted name for two speakers in one same file. Please remove one speaker."
)
break
st.sidebar.text_input(
"Add rename", placeholder="e.g. John",
key="globalRenameInput", on_change=addGlobalRename,
)
# ---------------------------------------------------------------------------
# Rename Speaker tab — speaker / audio sample table
# ---------------------------------------------------------------------------
def render_speaker_samples_tab(speakerNames, raw_to_display, currFile):
"""Render a table: Speaker (with inline ✎ rename + history fill) | Audio Sample | ↺ button."""
file_samples = st.session_state.speakerClips.get(currFile, {})
has_waveform = currFile in st.session_state.speakerWaveforms
has_samples = bool(file_samples)
if "inline_rename_active" not in st.session_state:
st.session_state.inline_rename_active = {}
# Build the history list fresh each render:
# 1. Names currently in globalRenames (sidebar rename entries)
# 2. Names actively used in speakerRenames (confirmed via rename tab)
# Deleted sidebar entries are excluded automatically since we rebuild from scratch.
global_names = {entry["name"] for entry in st.session_state.globalRenames}
active_names = {
name
for renames in st.session_state.speakerRenames.values()
for name in renames.values()
}
# Preserve any tab-confirmed names that aren't in either set (typed manually)
# but drop names that were only ever in globalRenames and have since been removed.
prev_history = st.session_state.get("inline_rename_history", [])
st.session_state.inline_rename_history = [
name for name in prev_history
if name in global_names or name in active_names
]
# Add any new names from globalRenames not yet in history
for name in global_names:
if name not in st.session_state.inline_rename_history:
st.session_state.inline_rename_history.append(name)
def _get_role(sp):
"""Return the role name assigned to this speaker, or empty string."""
token = f"{currFile}: {sp}"
for i, tokens in enumerate(st.session_state.categorySelect):
if token in tokens:
return st.session_state.categories[i]
return ""
# Header
header_cols = st.columns([3, 2, 2, 1])
header_cols[0].markdown("**Speaker**")
header_cols[1].markdown("**Role**")
header_cols[2].markdown("**Audio Sample**")
header_cols[3].markdown("**&nbsp;**", unsafe_allow_html=True)
st.markdown("<hr style='margin-top:2px; margin-bottom:4px;'>", unsafe_allow_html=True)
if not has_samples:
st.info("Audio samples are only available for files analyzed from audio (not RTTM/CSV/TXT).")
for sp in speakerNames:
display_name = raw_to_display.get(sp, sp)
edit_key = (currFile, sp)
is_editing = st.session_state.inline_rename_active.get(edit_key, False)
row_cols = st.columns([3, 2, 2, 1])
# --- Speaker cell ---
if is_editing:
select_key = f"inline_rename_select_{currFile}_{sp}"
input_key = f"inline_rename_input_{currFile}_{sp}"
confirm_key = f"inline_rename_confirm_{currFile}_{sp}"
cancel_key = f"inline_rename_cancel_{currFile}_{sp}"
reset_key = f"inline_rename_reset_{currFile}_{sp}"
form_key = f"inline_rename_form_{currFile}_{sp}"
history = st.session_state.inline_rename_history
current_val = st.session_state.speakerRenames.get(currFile, {}).get(sp, "")
_NEW_OPTION = "+ Enter a new name"
with row_cols[0]:
# The selectbox MUST live outside the form: inside a form,
# widget state is only committed on submit, so choosing
# _NEW_OPTION would never make the text input appear until
# after an unrelated submit. We track the live choice in
# chosen_key and only put the text input + buttons in the form.
chosen_key = f"inline_rename_chosen_{currFile}_{sp}"
show_input = False
new_name = ""
if history:
options = history + [_NEW_OPTION]
default_idx = (
history.index(current_val) if current_val in history
else len(options) - 1
)
if chosen_key not in st.session_state:
st.session_state[chosen_key] = options[default_idx]
# Clamp stored value in case history changed
if st.session_state[chosen_key] not in options:
st.session_state[chosen_key] = options[default_idx]
st.selectbox(
"Rename",
options=options,
key=chosen_key,
label_visibility="collapsed",
)
chosen = st.session_state[chosen_key]
show_input = (chosen == _NEW_OPTION)
if not show_input:
new_name = chosen
with st.form(key=form_key, border=False):
if not history:
# No history — plain text input is the only field
new_name = st.text_input(
"Rename",
value=current_val,
key=input_key,
label_visibility="collapsed",
placeholder=f"Rename {sp}…",
)
elif show_input:
# User picked "+ Enter a new name" — show free-text field
new_name = st.text_input(
"New name",
value="",
key=input_key,
label_visibility="collapsed",
placeholder=f"Rename {sp}…",
)
_, btn_col1, btn_col2, btn_col3 = st.columns([4, 1, 1, 1])
confirmed_submit = btn_col1.form_submit_button("✓", help="Confirm rename (or press Enter)")
cancel_clicked = btn_col2.form_submit_button("✕", help="Cancel")
reset_clicked = btn_col3.form_submit_button("⟳", help="Reset to original speaker label")
def _cleanup():
st.session_state.inline_rename_active[edit_key] = False
st.session_state.pop(select_key, None)
st.session_state.pop(chosen_key, None)
st.session_state.pop(input_key, None)
if confirmed_submit:
confirmed = (new_name or "").strip()
apply_inline_rename(currFile, sp, confirmed)
if confirmed and confirmed not in st.session_state.inline_rename_history:
st.session_state.inline_rename_history.append(confirmed)
_cleanup()
st.rerun()
if cancel_clicked:
_cleanup()
st.rerun()
if reset_clicked:
apply_inline_rename(currFile, sp, "")
_cleanup()
st.rerun()
else:
with row_cols[0]:
name_col, pencil_col = st.columns([4, 1])
name_col.write(display_name)
if pencil_col.button(
"✎", key=f"inline_rename_edit_{currFile}_{sp}",
help=f"Rename {sp}",
):
st.session_state.inline_rename_active[edit_key] = True
st.rerun()
# --- Role cell ---
row_cols[1].write(_get_role(sp))
# --- Audio sample cell ---
if sp in file_samples:
row_cols[2].audio(file_samples[sp], format="audio/wav")
sp_segs = st.session_state.speakerSegments.get(currFile, {}).get(sp, [])
if has_waveform and sp_segs:
if row_cols[3].button(
"↺", key=f"sample_randomize_{currFile}_{sp}",
help="Try a different audio sample for this speaker",
):
randomize_speaker_clip(currFile, sp)
st.rerun()
else:
row_cols[2].write("—")
# ---------------------------------------------------------------------------
# Multi-file summary expander
# ---------------------------------------------------------------------------
def render_multifile_summary(plotly_config=None):
"""Render the Multi-file summary data expander if enough files are analyzed."""
cfg = plotly_config or {"displayModeBar": True, "modeBarButtonsToRemove": []}
if not st.session_state.results:
return
with st.expander("Multi-file summary data"):
st.header("Multi-file summary data")
with st.spinner("Processing summary results..."):
validNames = [
fn for fn in st.session_state.file_names
if fn in st.session_state.results
and len(st.session_state.results[fn]) == 2
]
if len(validNames) <= 1:
return
df6, allCategories = utils.build_multifile_category_df(
validNames, st.session_state.results, st.session_state.summaries,
st.session_state.categories, st.session_state.categorySelect,
speakerRenames=st.session_state.speakerRenames,
)
df7, _ = utils.build_multifile_voice_df(validNames, st.session_state.summaries)
# Build consistent color maps using the same _PALETTE as per-file charts
# Voice category colors — always fixed
voice_color_map = {
"Single Voice": utils._PALETTE[0], # reserved
"Multi Voice": utils._PALETTE[9], # reserved
"No Voice": utils._PALETTE[24], # reserved
"Unassigned": utils._PALETTE[31] if len(utils._PALETTE) > 31 else "#777a7d",
}
# Role/speaker colors — cycle through _SPEAKER_PALETTE by position
role_color_map = {
col: utils._SPEAKER_PALETTE[i % len(utils._SPEAKER_PALETTE)]
for i, col in enumerate(allCategories)
}
# Chart 1
fig6_sort_options = {
"File Name": ("files", True),
"% of Instructor": ("Instructor", False),
}
selected_fig6_sort = st.selectbox(
"Sort Percent of File Duration by:",
options=list(fig6_sort_options.keys()),
key="multifile_role_sort",
)
fig6_sort_col, fig6_ascending = fig6_sort_options[selected_fig6_sort]
if fig6_sort_col not in df6.columns:
fig6_sort_col, fig6_ascending = "files", True
fig6 = px.bar(df6.sort_values(fig6_sort_col, ascending=fig6_ascending),
x="files", y=allCategories,
title=f"Percent of file duration spoken by each speaker/role sorted by {selected_fig6_sort}",
labels={"files": "Files"},
color_discrete_map=role_color_map,
hover_data={"files": True})
fig6.update_layout(yaxis_title="% of File Duration")
fig6.update_xaxes(showticklabels=False)
st.plotly_chart(fig6, use_container_width=True, config=cfg)
# Chart 2
sort_options = {
"File Name": ("files", True),
"% of Single Voice": ("Single Voice", False),
}
selected_sort = st.selectbox(
"Sort Cross-file Voice Categories by:",
options=list(sort_options.keys()),
key="multifile_voice_sort",
)
sort_col, ascending = sort_options[selected_sort]
fig7 = px.bar(
df7.sort_values(by=sort_col, ascending=ascending),
x="files", y=["Single Voice", "Multi Voice", "No Voice"],
title=f"Cross-file voice categories sorted by {selected_sort}",
labels={"files": "Files"},
color_discrete_map=voice_color_map,
hover_data={"files": True},
)
fig7.update_layout(yaxis_title="% of File Duration")
fig7.update_xaxes(showticklabels=False)
st.plotly_chart(fig7, use_container_width=True, config=cfg)
st.divider()
# Chart 3
df8, role_voice_cols = utils.build_multifile_role_voice_df(
validNames, st.session_state.results, st.session_state.summaries,
st.session_state.categories, st.session_state.categorySelect,
speakerRenames=st.session_state.speakerRenames,
)
role_voice_color_map = {
col: utils._SPEAKER_PALETTE[i % len(utils._SPEAKER_PALETTE)]
for i, col in enumerate(role_voice_cols)
if col not in voice_color_map
}
role_voice_color_map.update(voice_color_map)
role_sort_options = {
"File Name": ("files", True),
"% of Multi Voice": ("Multi Voice", True),
}
selected_role_sort = st.selectbox(
"Sort Cross-file Role & Voice Breakdown by:",
options=list(role_sort_options.keys()),
key="multifile_role_voice_sort",
)
sort_col8, ascending8 = role_sort_options[selected_role_sort]
fig8 = px.bar(
df8.sort_values(by=sort_col8, ascending=ascending8),
x="files", y=role_voice_cols,
color_discrete_map=role_voice_color_map,
labels={"files": "Files"},
hover_data={"files": True},
title=f"Cross-file role and voice breakdown sorted by {selected_role_sort}",
)
fig8.update_layout(yaxis_title="% of File Duration")
fig8.update_xaxes(showticklabels=False)
st.plotly_chart(fig8, use_container_width=True, config=cfg)