duongthienz commited on
Commit
2d68ee3
·
verified ·
1 Parent(s): 06ce411

Added utils.py and state.py

Browse files

utils.py — everything with zero st.* calls: processFile, extract_clip_bytes, build_speaker_clips, get_randomized_clip, all six build_df* functions, all six build_fig_* functions, and the two multi-file summary DataFrame builders.
state.py — all callbacks (addCategory, removeCategory, updateCategoryOptions, updateMultiSelect, addGlobalRename, removeGlobalRename, applyGlobalRenames), session state init (init_session_state), and analyze()

Files changed (2) hide show
  1. state.py +314 -0
  2. utils.py +481 -0
state.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ state.py — Streamlit session-state management and UI callbacks.
3
+
4
+ Covers:
5
+ - init_session_state()
6
+ - Speaker rename helpers (get_display_name, apply_speaker_renames_to_df)
7
+ - Category callbacks (addCategory, removeCategory, updateCategoryOptions)
8
+ - Global rename callbacks (addGlobalRename, removeGlobalRename, applyGlobalRenames)
9
+ - File-switch callback (updateMultiSelect)
10
+ - analyze() — builds and caches all DataFrames for a single file
11
+ - convert_df(), printV()
12
+ """
13
+
14
+ import copy
15
+ import traceback
16
+
17
+ import pandas as pd
18
+ import streamlit as st
19
+
20
+ import sonogram_utility as su
21
+ import utils
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Logging
26
+ # ---------------------------------------------------------------------------
27
+
28
+ verbosity = 4 # 0=None 1=Low 2=Medium 3=High 4=Debug
29
+
30
+ def printV(message, level):
31
+ if verbosity >= level:
32
+ print(message)
33
+
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Session state initialisation
37
+ # ---------------------------------------------------------------------------
38
+
39
+ def init_session_state():
40
+ """Idempotently initialise every session-state key the app needs."""
41
+ defaults = {
42
+ "results": {}, # {filename: (annotations, totalSeconds)}
43
+ "speakerRenames": {}, # {filename: {speaker: name}}
44
+ "summaries": {}, # {filename: {df2, df3, ...}}
45
+ "categories": [],
46
+ "categorySelect": {}, # {filename: [[], [], ...]}
47
+ "removeCategory": None,
48
+ "resetResult": False,
49
+ "unusedSpeakers": {}, # {filename: [speaker, ...]}
50
+ "file_names": [],
51
+ "valid_files": [],
52
+ "file_paths": {}, # {filename: path}
53
+ "showSummary": "No",
54
+ "speakerClips": {}, # {filename: {speaker: wav_bytes}}
55
+ "speakerSegments": {}, # {filename: {speaker: [(start,end), ...]}}
56
+ "speakerWaveforms": {}, # {filename: (waveform_tensor, sample_rate)}
57
+ "globalRenames": [], # [{"name": str, "speakers": ["file: SPEAKER_##", ...]}]
58
+ "analyzeAllToggle": False,
59
+ }
60
+ for key, value in defaults.items():
61
+ if key not in st.session_state:
62
+ st.session_state[key] = value
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Display-name helpers
67
+ # ---------------------------------------------------------------------------
68
+
69
+ def get_display_name(speaker, fileName):
70
+ """Return the user-assigned display name for a speaker, or the original label."""
71
+ return st.session_state.speakerRenames.get(fileName, {}).get(speaker, speaker)
72
+
73
+
74
+ def apply_speaker_renames_to_df(df, fileName, column="task"):
75
+ """Replace SPEAKER_## labels in a DataFrame column with display names."""
76
+ if column not in df.columns:
77
+ return df
78
+ df = df.copy()
79
+ df[column] = df[column].apply(lambda s: get_display_name(s, fileName))
80
+ return df
81
+
82
+
83
+ @st.cache_data
84
+ def convert_df(df):
85
+ return df.to_csv(index=False).encode("utf-8")
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Category callbacks
90
+ # ---------------------------------------------------------------------------
91
+
92
+ def addCategory():
93
+ new = st.session_state.categoryInput
94
+ st.toast(f"Adding {new}")
95
+ st.session_state[f"multiselect_{new}"] = []
96
+ st.session_state.categories.append(new)
97
+ st.session_state.categoryInput = ""
98
+ for fname in st.session_state.categorySelect:
99
+ st.session_state.categorySelect[fname].append([])
100
+
101
+
102
+ def removeCategory(index):
103
+ name = st.session_state.categories[index]
104
+ st.toast(f"Removing {name}")
105
+ del st.session_state[f"multiselect_{name}"]
106
+ del st.session_state[f"remove_{name}"]
107
+ del st.session_state.categories[index]
108
+ for fname in st.session_state.categorySelect:
109
+ del st.session_state.categorySelect[fname][index]
110
+
111
+
112
+ def updateCategoryOptions(fileName):
113
+ if st.session_state.resetResult:
114
+ return
115
+ currAnnotation, _ = st.session_state.results[fileName]
116
+ speakerNames = list(currAnnotation.labels())
117
+ saved_renames = st.session_state.speakerRenames.get(fileName, {})
118
+ display_to_raw = {saved_renames.get(sp, sp): sp for sp in speakerNames}
119
+ unusedSpeakers = copy.deepcopy(speakerNames)
120
+ for i, category in enumerate(st.session_state.categories):
121
+ display_choices = list(st.session_state[f"multiselect_{category}"])
122
+ raw_choices = [display_to_raw.get(d, d) for d in display_choices]
123
+ st.session_state.categorySelect[fileName][i] = raw_choices
124
+ for sp in raw_choices:
125
+ try:
126
+ unusedSpeakers.remove(sp)
127
+ except ValueError:
128
+ pass
129
+ st.session_state.unusedSpeakers[fileName] = unusedSpeakers
130
+
131
+
132
+ # ---------------------------------------------------------------------------
133
+ # Global rename callbacks
134
+ # ---------------------------------------------------------------------------
135
+
136
+ def _global_rename_key(index):
137
+ return f"grename_speakers_{index}"
138
+
139
+
140
+ def applyGlobalRenames():
141
+ """Re-write speakerRenames from globalRenames and refresh widget keys."""
142
+ for fname in st.session_state.speakerRenames:
143
+ st.session_state.speakerRenames[fname] = {}
144
+ for entry in st.session_state.globalRenames:
145
+ display_name = entry["name"]
146
+ for token in entry["speakers"]:
147
+ if ": " not in token:
148
+ continue
149
+ fname, raw_sp = token.split(": ", 1)
150
+ if fname in st.session_state.speakerRenames:
151
+ st.session_state.speakerRenames[fname][raw_sp] = display_name
152
+ curr = st.session_state.get("select_currFile")
153
+ if curr and curr in st.session_state.speakerRenames:
154
+ saved = st.session_state.speakerRenames[curr]
155
+ results = st.session_state.results.get(curr)
156
+ if results:
157
+ for sp in results[0].labels():
158
+ st.session_state[f"rename_{curr}_{sp}"] = saved.get(sp, "")
159
+
160
+
161
+ def addGlobalRename():
162
+ new_name = st.session_state.globalRenameInput.strip()
163
+ if not new_name:
164
+ return
165
+ st.toast(f"Adding rename '{new_name}'")
166
+ st.session_state.globalRenames.append({"name": new_name, "speakers": []})
167
+ st.session_state[_global_rename_key(len(st.session_state.globalRenames) - 1)] = []
168
+ st.session_state.globalRenameInput = ""
169
+
170
+
171
+ def removeGlobalRename(index):
172
+ entry = st.session_state.globalRenames[index]
173
+ st.toast(f"Removing rename '{entry['name']}'")
174
+ del st.session_state.globalRenames[index]
175
+ for i in range(index, len(st.session_state.globalRenames)):
176
+ st.session_state[_global_rename_key(i)] = list(
177
+ st.session_state.globalRenames[i]["speakers"]
178
+ )
179
+ applyGlobalRenames()
180
+
181
+
182
+ # ---------------------------------------------------------------------------
183
+ # File-switch callback
184
+ # ---------------------------------------------------------------------------
185
+
186
+ def updateMultiSelect():
187
+ fileName = st.session_state["select_currFile"]
188
+ st.session_state.resetResult = True
189
+ result = st.session_state.results.get(fileName)
190
+ if not result:
191
+ return
192
+ currAnnotation, _ = result
193
+ speakerNames = list(currAnnotation.labels())
194
+ saved_renames = st.session_state.speakerRenames.get(fileName, {})
195
+ raw_to_display = {}
196
+ for sp in speakerNames:
197
+ saved = saved_renames.get(sp, "")
198
+ st.session_state[f"rename_{fileName}_{sp}"] = saved
199
+ raw_to_display[sp] = saved if saved else sp
200
+ for i, category in enumerate(st.session_state.categories):
201
+ raw_choices = st.session_state.categorySelect[fileName][i]
202
+ st.session_state[f"multiselect_{category}"] = [
203
+ raw_to_display.get(sp, sp) for sp in raw_choices
204
+ ]
205
+
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # Speaker-clip session-state helpers
209
+ # ---------------------------------------------------------------------------
210
+
211
+ def store_speaker_clips(fname, annotations, waveform, sample_rate):
212
+ """Generate clips & segments and write them into session state."""
213
+ clips, segments = utils.build_speaker_clips(annotations, waveform, sample_rate)
214
+ st.session_state.speakerClips[fname] = clips
215
+ st.session_state.speakerSegments[fname] = segments
216
+ st.session_state.speakerWaveforms[fname] = (waveform, sample_rate)
217
+ print(f"Generated {len(clips)} speaker clips for {fname}")
218
+
219
+
220
+ def randomize_speaker_clip(file_index, speaker):
221
+ """Replace a speaker's clip with a freshly randomized one."""
222
+ segs = st.session_state.speakerSegments.get(file_index, {}).get(speaker)
223
+ waveform_data = st.session_state.speakerWaveforms.get(file_index)
224
+ if not segs or waveform_data is None:
225
+ return
226
+ waveform, sample_rate = waveform_data
227
+ new_clip = utils.get_randomized_clip(waveform, sample_rate, segs)
228
+ st.session_state.speakerClips[file_index][speaker] = new_clip
229
+ print(f"Randomized clip for {speaker} in {file_index}")
230
+
231
+
232
+ # ---------------------------------------------------------------------------
233
+ # Per-file registration helper (keeps Demo / upload code DRY)
234
+ # ---------------------------------------------------------------------------
235
+
236
+ def register_file(fname):
237
+ """Ensure all session-state dicts have an entry for fname."""
238
+ st.session_state.results.setdefault(fname, [])
239
+ st.session_state.summaries.setdefault(fname, {})
240
+ st.session_state.unusedSpeakers.setdefault(fname, [])
241
+ st.session_state.categorySelect.setdefault(
242
+ fname, [[] for _ in st.session_state.categories]
243
+ )
244
+ st.session_state.speakerRenames.setdefault(fname, {})
245
+ st.session_state.speakerClips.setdefault(fname, {})
246
+ if fname not in st.session_state.file_names:
247
+ st.session_state.file_names.append(fname)
248
+
249
+
250
+ # ---------------------------------------------------------------------------
251
+ # analyze() — build and cache all DataFrames for one file
252
+ # ---------------------------------------------------------------------------
253
+
254
+ def analyze(inFileName):
255
+ """Compute and store all summary DataFrames for inFileName."""
256
+ try:
257
+ printV(f"Start analyzing {inFileName}", 4)
258
+ st.session_state.resetResult = False
259
+
260
+ if not (
261
+ inFileName in st.session_state.results
262
+ and inFileName in st.session_state.summaries
263
+ and len(st.session_state.results[inFileName]) > 0
264
+ ):
265
+ return
266
+
267
+ currAnnotation, currTotalTime = st.session_state.results[inFileName]
268
+ speakerNames = currAnnotation.labels()
269
+ categorySelections = st.session_state.categorySelect[inFileName]
270
+ printV("Loaded results", 4)
271
+
272
+ noVoice, oneVoice, multiVoice = su.calcSpeakingTypes(currAnnotation, currTotalTime)
273
+ sumNoVoice = su.sumTimes(noVoice)
274
+ sumOneVoice = su.sumTimes(oneVoice)
275
+ sumMultiVoice = su.sumTimes(multiVoice)
276
+
277
+ # df3
278
+ df3 = utils.build_df3(noVoice, oneVoice, multiVoice)
279
+ st.session_state.summaries[inFileName]["df3"] = df3
280
+ printV("Set df3", 4)
281
+
282
+ # df4
283
+ df4, nameList, valueList, extraNames, extraValues = utils.build_df4(
284
+ speakerNames, categorySelections, st.session_state.categories, currAnnotation
285
+ )
286
+ st.session_state.summaries[inFileName]["df4"] = df4
287
+ printV("Set df4", 4)
288
+
289
+ # df5
290
+ df5 = utils.build_df5(
291
+ oneVoice, multiVoice,
292
+ sumNoVoice, sumOneVoice, sumMultiVoice,
293
+ currTotalTime,
294
+ )
295
+ st.session_state.summaries[inFileName]["df5"] = df5
296
+ printV("Set df5", 4)
297
+
298
+ # speakers_dataFrame, df2
299
+ speakers_dataFrame, speakers_times = su.annotationToDataFrame(currAnnotation)
300
+ st.session_state.summaries[inFileName]["speakers_dataFrame"] = speakers_dataFrame
301
+ st.session_state.summaries[inFileName]["speakers_times"] = speakers_times
302
+
303
+ df2 = utils.build_df2(
304
+ nameList + extraNames,
305
+ valueList + extraValues,
306
+ currTotalTime,
307
+ )
308
+ st.session_state.summaries[inFileName]["df2"] = df2
309
+ printV("Set df2", 4)
310
+
311
+ except Exception as e:
312
+ print(f"Error in analyze: {e}")
313
+ traceback.print_exc()
314
+ st.error(f"Debug - analyze() failed: {e}")
utils.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils.py — pure logic helpers with no Streamlit dependency.
3
+
4
+ Covers:
5
+ - Audio processing (processFile, clip extraction, randomization)
6
+ - DataFrame builders (build_df2 … build_df5)
7
+ - Plotly figure builders (one function per chart tab)
8
+ - Multi-file summary DataFrame builders
9
+ """
10
+
11
+ import io
12
+ import random
13
+ import datetime as dt
14
+ import copy
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ import soundfile as sf
19
+ import torch
20
+ import plotly.express as px
21
+ import plotly.graph_objects as go
22
+
23
+ import sonogram_utility as su
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Constants
27
+ # ---------------------------------------------------------------------------
28
+ CLIP_MIN_S = 3.0
29
+ CLIP_MAX_S = 5.0
30
+ CLIP_SCAN_STEP_S = 0.5
31
+
32
+ TRANSPARENT_BG = dict(
33
+ plot_bgcolor="rgba(0,0,0,0)",
34
+ paper_bgcolor="rgba(0,0,0,0)",
35
+ )
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Audio processing
39
+ # ---------------------------------------------------------------------------
40
+
41
+ def processFile(filePath, pipeline, enableDenoise, earlyCleanup,
42
+ gainWindow, minimumGain, maximumGain,
43
+ dfModel=None, dfState=None, attenLimDB=3):
44
+ """Load, optionally denoise, equalize, and diarize an audio file.
45
+
46
+ Returns (annotations, totalTimeInSeconds, waveform_tensor, sample_rate).
47
+ """
48
+ print("Loading file")
49
+ waveformList, sampleRate = su.splitIntoTimeSegments(filePath, 600)
50
+ print("File loaded")
51
+
52
+ enhancedWaveformList = []
53
+ if enableDenoise:
54
+ print("Denoising")
55
+ for w in waveformList:
56
+ if enableDenoise:
57
+ from df import enhance
58
+ newW = enhance(dfModel, dfState, w, atten_lim_db=attenLimDB).detach().cpu()
59
+ enhancedWaveformList.append(newW)
60
+ else:
61
+ enhancedWaveformList.append(w)
62
+ if enableDenoise:
63
+ print("Audio denoised")
64
+
65
+ waveformEnhanced = su.combineWaveforms(enhancedWaveformList)
66
+ if earlyCleanup:
67
+ del enhancedWaveformList
68
+
69
+ print("Equalizing Audio")
70
+ waveform_gain_adjusted = su.equalizeVolume()(
71
+ waveformEnhanced, sampleRate, gainWindow, minimumGain, maximumGain
72
+ )
73
+ if earlyCleanup:
74
+ del waveformEnhanced
75
+ print("Audio Equalized")
76
+
77
+ print("Detecting speakers")
78
+ diarization_output = pipeline({"waveform": waveform_gain_adjusted, "sample_rate": sampleRate})
79
+ annotations = diarization_output.speaker_diarization
80
+ print("Speakers Detected")
81
+
82
+ totalTimeInSeconds = int(waveform_gain_adjusted.shape[-1] / sampleRate)
83
+ return annotations, totalTimeInSeconds, waveform_gain_adjusted, sampleRate
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Speaker clip helpers
88
+ # ---------------------------------------------------------------------------
89
+
90
+ def extract_clip_bytes(waveform, sample_rate, seg_start, seg_end):
91
+ """Return WAV bytes for the loudest CLIP_MIN–CLIP_MAX window in [seg_start, seg_end]."""
92
+ total_samples = waveform.shape[-1]
93
+ seg_start_s = int(seg_start * sample_rate)
94
+ seg_end_s = min(int(seg_end * sample_rate), total_samples)
95
+
96
+ seg_dur = (seg_end_s - seg_start_s) / sample_rate
97
+ clip_dur = min(max(min(seg_dur, CLIP_MAX_S), CLIP_MIN_S), seg_dur)
98
+ clip_samples = int(clip_dur * sample_rate)
99
+
100
+ best_start = seg_start_s
101
+ best_rms = -1.0
102
+ step_samples = int(CLIP_SCAN_STEP_S * sample_rate)
103
+
104
+ pos = seg_start_s
105
+ while pos + clip_samples <= seg_end_s:
106
+ window = waveform[:, pos: pos + clip_samples].float()
107
+ rms = float(window.pow(2).mean().sqrt())
108
+ if rms > best_rms:
109
+ best_rms = rms
110
+ best_start = pos
111
+ pos += step_samples
112
+
113
+ clip_np = waveform[:, best_start: best_start + clip_samples].numpy().T
114
+ buf = io.BytesIO()
115
+ sf.write(buf, clip_np, sample_rate, format="WAV", subtype="PCM_16")
116
+ buf.seek(0)
117
+ return buf.read()
118
+
119
+
120
+ def build_speaker_clips(annotations, waveform, sample_rate):
121
+ """Return (clips_dict, segments_dict) for all speakers in annotations.
122
+
123
+ clips_dict : {speaker: wav_bytes}
124
+ segments_dict : {speaker: [(start, end), ...]}
125
+ """
126
+ clips = {}
127
+ segments = {}
128
+
129
+ for speaker in annotations.labels():
130
+ speaker_segments = [
131
+ seg for seg, _, label in annotations.itertracks(yield_label=True)
132
+ if label == speaker
133
+ ]
134
+ if not speaker_segments:
135
+ continue
136
+
137
+ segments[speaker] = [(s.start, s.end) for s in speaker_segments]
138
+ longest = max(speaker_segments, key=lambda s: s.duration)
139
+ clips[speaker] = extract_clip_bytes(waveform, sample_rate, longest.start, longest.end)
140
+
141
+ return clips, segments
142
+
143
+
144
+ def get_randomized_clip(waveform, sample_rate, segments):
145
+ """Return WAV bytes for a random 3–5 s window drawn from a random segment.
146
+
147
+ segments : [(start, end), ...] (all segments for one speaker)
148
+ """
149
+ durations = [max(e - s, 0.01) for s, e in segments]
150
+ total_dur = sum(durations)
151
+ rand_val = random.random() * total_dur
152
+ cumulative = 0.0
153
+ chosen_start, chosen_end = segments[0]
154
+ for (seg_s, seg_e), dur in zip(segments, durations):
155
+ cumulative += dur
156
+ if rand_val <= cumulative:
157
+ chosen_start, chosen_end = seg_s, seg_e
158
+ break
159
+
160
+ seg_dur = chosen_end - chosen_start
161
+ clip_dur = min(max(min(seg_dur, CLIP_MAX_S), CLIP_MIN_S), seg_dur)
162
+ max_offset = max(seg_dur - clip_dur, 0.0)
163
+ offset = random.uniform(0.0, max_offset)
164
+ clip_start = chosen_start + offset
165
+ clip_end = clip_start + clip_dur
166
+
167
+ return extract_clip_bytes(waveform, sample_rate, clip_start, clip_end)
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # DataFrame builders (called from analyze() in state.py)
172
+ # ---------------------------------------------------------------------------
173
+
174
+ def build_df3(noVoice, oneVoice, multiVoice):
175
+ """Voice category totals DataFrame."""
176
+ return pd.DataFrame({
177
+ "values": [su.sumTimes(noVoice), su.sumTimes(oneVoice), su.sumTimes(multiVoice)],
178
+ "names": ["No Voice", "One Voice", "Multi Voice"],
179
+ })
180
+
181
+
182
+ def build_df4(speakerNames, categorySelections, categoryNames, currAnnotation):
183
+ """Speaker-to-category time DataFrame. Returns (df4, nameList, valueList, extraNames, extraValues)."""
184
+ nameList = list(categoryNames)
185
+ valueList = [0.0] * len(nameList)
186
+ extraNames : list = []
187
+ extraValues: list = []
188
+
189
+ for sp in speakerNames:
190
+ found = False
191
+ for i, _ in enumerate(nameList):
192
+ if sp in categorySelections[i]:
193
+ valueList[i] += su.sumTimes(currAnnotation.subset([sp]))
194
+ found = True
195
+ break
196
+ if not found:
197
+ extraNames.append(sp)
198
+ extraValues.append(su.sumTimes(currAnnotation.subset([sp])))
199
+
200
+ if extraNames:
201
+ pairs = sorted(zip(extraNames, extraValues), key=lambda p: p[0])
202
+ extraNames, extraValues = map(list, zip(*pairs))
203
+ else:
204
+ extraNames, extraValues = [], []
205
+
206
+ df4 = pd.DataFrame({"values": valueList + extraValues, "names": nameList + extraNames})
207
+ return df4, nameList, valueList, extraNames, extraValues
208
+
209
+
210
+ def build_df5(oneVoice, multiVoice, sumNoVoice, sumOneVoice, sumMultiVoice, currTotalTime):
211
+ """Hierarchical voice-category DataFrame for sunburst / treemap."""
212
+ speakerList, timeList = su.sumTimesPerSpeaker(oneVoice)
213
+ multiSpeakerList, multiTimeList = su.sumMultiTimesPerSpeaker(multiVoice)
214
+
215
+ speakerList = list(speakerList) if speakerList else []
216
+ timeList = list(timeList) if timeList else []
217
+ multiSpeakerList = list(multiSpeakerList) if multiSpeakerList else []
218
+ multiTimeList = list(multiTimeList) if multiTimeList else []
219
+
220
+ summativeMulti = sum(multiTimeList) if multiTimeList else 1
221
+ safeOneVoice = sumOneVoice if sumOneVoice > 0 else 1
222
+
223
+ base = [sumNoVoice / currTotalTime, sumOneVoice / currTotalTime, sumMultiVoice / currTotalTime]
224
+
225
+ timeStrings = su.timeToString(timeList) if timeList else []
226
+ multiTimeStrings = su.timeToString(multiTimeList) if multiTimeList else []
227
+ if isinstance(timeStrings, str):
228
+ timeStrings = [timeStrings]
229
+ if isinstance(multiTimeStrings, str):
230
+ multiTimeStrings = [multiTimeStrings]
231
+
232
+ n_ov = len(speakerList)
233
+ n_mv = len(multiSpeakerList)
234
+
235
+ return pd.DataFrame({
236
+ "ids": ["NV", "OV", "MV"] + [f"OV_{i}" for i in range(n_ov)] + [f"MV_{i}" for i in range(n_mv)],
237
+ "labels": ["No Voice", "One Voice", "Multi Voice"] + speakerList + multiSpeakerList,
238
+ "parents": ["", "", ""] + ["OV"] * n_ov + ["MV"] * n_mv,
239
+ "parentNames": ["Total", "Total", "Total"] + ["One Voice"] * n_ov + ["Multi Voice"] * n_mv,
240
+ "values": [sumNoVoice, sumOneVoice, sumMultiVoice] + timeList + multiTimeList,
241
+ "valueStrings": [
242
+ su.timeToString(sumNoVoice),
243
+ su.timeToString(sumOneVoice),
244
+ su.timeToString(sumMultiVoice),
245
+ ] + timeStrings + multiTimeStrings,
246
+ "percentiles": [b * 100 for b in base]
247
+ + [(t * 100) / safeOneVoice * base[1] for t in timeList]
248
+ + [(t * 100) / summativeMulti * base[2] for t in multiTimeList],
249
+ "parentPercentiles": [b * 100 for b in base]
250
+ + [(t * 100) / safeOneVoice for t in timeList]
251
+ + [(t * 100) / summativeMulti for t in multiTimeList],
252
+ })
253
+
254
+
255
+ def build_df2(df4_names, df4_values, currTotalTime):
256
+ """Percentage-of-total DataFrame (used by the bar chart tab)."""
257
+ return pd.DataFrame({
258
+ "values": [100 * v / currTotalTime for v in df4_values],
259
+ "names": df4_names,
260
+ })
261
+
262
+
263
+ # ---------------------------------------------------------------------------
264
+ # Plotly figure builders
265
+ # ---------------------------------------------------------------------------
266
+
267
+ def _save_fig(fig, *paths):
268
+ """Try to write fig to each path; silently skip on failure."""
269
+ for path in paths:
270
+ try:
271
+ fig.write_image(path)
272
+ except Exception:
273
+ pass
274
+
275
+
276
+ def build_fig_pie1(df3, catTypeColors):
277
+ """Voice category pie chart."""
278
+ fig = go.Figure()
279
+ fig.update_layout(
280
+ title_text="Percentage of each Voice Category",
281
+ colorway=catTypeColors,
282
+ **TRANSPARENT_BG,
283
+ )
284
+ fig.add_trace(go.Pie(values=df3["values"], labels=df3["names"], sort=False))
285
+ return fig
286
+
287
+
288
+ def build_fig_pie2(df4, speakerNames, speakerColors, catColors, get_display_name_fn, currFile):
289
+ """Speaker / category pie chart."""
290
+ df4 = df4.copy()
291
+ figColors = [
292
+ speakerColors[list(speakerNames).index(n)]
293
+ for n in df4["names"] if n in speakerNames
294
+ ]
295
+ df4["names"] = df4["names"].apply(lambda s: get_display_name_fn(s, currFile))
296
+ fig = go.Figure()
297
+ fig.update_layout(
298
+ title_text="Percentage of Speakers and Custom Categories",
299
+ colorway=catColors + figColors,
300
+ **TRANSPARENT_BG,
301
+ )
302
+ fig.add_trace(go.Pie(values=df4["values"], labels=df4["names"], sort=False))
303
+ return fig
304
+
305
+
306
+ def build_fig_sunburst(df5, catTypeColors, speakerColors, get_display_name_fn, currFile):
307
+ """Sunburst voice-category chart."""
308
+ df5 = df5.copy()
309
+ df5["labels"] = df5["labels"].apply(lambda s: get_display_name_fn(s, currFile))
310
+ df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name_fn(s, currFile))
311
+ fig = px.sunburst(
312
+ df5,
313
+ branchvalues="total",
314
+ names="labels", ids="ids", parents="parents",
315
+ values="percentiles",
316
+ custom_data=["labels", "valueStrings", "percentiles", "parentNames", "parentPercentiles"],
317
+ color="labels",
318
+ title="Percentage of each Voice Category with Speakers",
319
+ color_discrete_sequence=catTypeColors + speakerColors,
320
+ )
321
+ fig.update_traces(hovertemplate="<br>".join([
322
+ "<b>%{customdata[0]}</b>",
323
+ "Duration: %{customdata[1]}s",
324
+ "Percentage of Total: %{customdata[2]:.2f}%",
325
+ "Parent: %{customdata[3]}",
326
+ "Percentage of Parent: %{customdata[4]:.2f}%",
327
+ ]))
328
+ fig.update_layout(**TRANSPARENT_BG)
329
+ return fig
330
+
331
+
332
+ def build_fig_treemap(df5, catTypeColors, speakerColors, get_display_name_fn, currFile):
333
+ """Treemap voice-category chart."""
334
+ df5 = df5.copy()
335
+ df5["labels"] = df5["labels"].apply(lambda s: get_display_name_fn(s, currFile))
336
+ df5["parentNames"] = df5["parentNames"].apply(lambda s: get_display_name_fn(s, currFile))
337
+ fig = px.treemap(
338
+ df5,
339
+ branchvalues="total",
340
+ names="labels", parents="parents", ids="ids",
341
+ values="percentiles",
342
+ custom_data=["labels", "valueStrings", "percentiles", "parentNames", "parentPercentiles"],
343
+ color="labels",
344
+ title="Division of Speakers in each Voice Category",
345
+ color_discrete_sequence=catTypeColors + speakerColors,
346
+ )
347
+ fig.update_traces(hovertemplate="<br>".join([
348
+ "<b>%{customdata[0]}</b>",
349
+ "Duration: %{customdata[1]}s",
350
+ "Percentage of Total: %{customdata[2]:.2f}%",
351
+ "Parent: %{customdata[3]}",
352
+ "Percentage of Parent: %{customdata[4]:.2f}%",
353
+ ]))
354
+ fig.update_layout(**TRANSPARENT_BG)
355
+ return fig
356
+
357
+
358
+ def build_fig_timeline(speakers_dataFrame, currTotalTime, speakerColors, get_display_name_fn, currFile):
359
+ """Gantt-style speaker timeline."""
360
+ df = speakers_dataFrame.copy()
361
+ df["Resource"] = df["Resource"].apply(lambda s: get_display_name_fn(s, currFile))
362
+
363
+ base = dt.datetime.combine(dt.date.today(), dt.time.min)
364
+
365
+ def to_audio_dt(s):
366
+ if isinstance(s, (dt.datetime, pd.Timestamp)):
367
+ midnight = s.replace(hour=0, minute=0, second=0, microsecond=0)
368
+ seconds = (s - midnight).total_seconds()
369
+ else:
370
+ seconds = float(s)
371
+ return base + dt.timedelta(seconds=seconds)
372
+
373
+ df["Start"] = df["Start"].apply(to_audio_dt)
374
+ df["Finish"] = df["Finish"].apply(to_audio_dt)
375
+
376
+ fig = px.timeline(
377
+ df, x_start="Start", x_end="Finish", y="Resource", color="Resource",
378
+ title="Timeline of Audio with Speakers",
379
+ color_discrete_sequence=speakerColors,
380
+ )
381
+ fig.update_yaxes(autorange="reversed")
382
+
383
+ h = int(currTotalTime // 3600)
384
+ m = int(currTotalTime % 3600 // 60)
385
+ s = int(currTotalTime % 60)
386
+ ms= int(currTotalTime * 1_000_000 % 1_000_000)
387
+ time_max = dt.time(h, m, s, ms)
388
+
389
+ fig.update_layout(
390
+ xaxis_tickformatstops=[
391
+ dict(dtickrange=[None, 1000], value="%H:%M:%S.%L"),
392
+ dict(dtickrange=[1000, None], value="%H:%M:%S"),
393
+ ],
394
+ xaxis=dict(range=[
395
+ dt.datetime.combine(dt.date.today(), dt.time.min),
396
+ dt.datetime.combine(dt.date.today(), time_max),
397
+ ]),
398
+ xaxis_title="Time",
399
+ yaxis_title="Speaker",
400
+ legend_title=None,
401
+ legend={"traceorder": "reversed"},
402
+ yaxis={"showticklabels": False},
403
+ **TRANSPARENT_BG,
404
+ )
405
+ return fig
406
+
407
+
408
+ def build_fig_bar(df2, catColors, speakerColors, get_display_name_fn, currFile):
409
+ """Horizontal bar chart — time spoken per speaker."""
410
+ df2 = df2.copy()
411
+ df2["names"] = df2["names"].apply(lambda s: get_display_name_fn(s, currFile))
412
+ fig = px.bar(
413
+ df2, x="values", y="names", color="names", orientation="h",
414
+ custom_data=["names", "values"],
415
+ title="Time Spoken by each Speaker",
416
+ color_discrete_sequence=catColors + speakerColors,
417
+ )
418
+ fig.update_xaxes(ticksuffix="%")
419
+ fig.update_yaxes(autorange="reversed")
420
+ fig.update_layout(
421
+ xaxis_title="Percentage Time Spoken",
422
+ yaxis_title=None,
423
+ showlegend=False,
424
+ yaxis={"showticklabels": True},
425
+ **TRANSPARENT_BG,
426
+ )
427
+ fig.update_traces(hovertemplate="<br>".join([
428
+ "<b>%{customdata[0]}</b>",
429
+ "Percentage of Time: %{customdata[1]:.2f}%",
430
+ ]))
431
+ return fig
432
+
433
+
434
+ # ---------------------------------------------------------------------------
435
+ # Multi-file summary DataFrames
436
+ # ---------------------------------------------------------------------------
437
+
438
+ def build_multifile_category_df(validNames, results, summaries, categories, categorySelect):
439
+ """Build df6 (category breakdown per file) for the multi-file expander."""
440
+ df6_dict = {"files": validNames}
441
+ allCategories = copy.deepcopy(categories)
442
+
443
+ for fn in validNames:
444
+ currAnnotation, _ = results[fn]
445
+ catSummary, extraCats = su.calcCategories(currAnnotation, categorySelect[fn])
446
+ summaries[fn]["categories"] = (catSummary, extraCats)
447
+ for extra in extraCats:
448
+ df6_dict.setdefault(extra, [])
449
+ if extra not in allCategories:
450
+ allCategories.append(extra)
451
+
452
+ for category in categories:
453
+ df6_dict.setdefault(category, [])
454
+
455
+ for fn in validNames:
456
+ summary, extras = summaries[fn]["categories"]
457
+ theseCategories = categories + extras
458
+ for j, timeSlots in enumerate(summary):
459
+ df6_dict[theseCategories[j]].append(
460
+ sum(t.duration for _, t in timeSlots) / results[fn][1]
461
+ )
462
+ for category in allCategories:
463
+ if category not in theseCategories:
464
+ df6_dict[category].append(0)
465
+
466
+ return pd.DataFrame(df6_dict), allCategories
467
+
468
+
469
+ def build_multifile_voice_df(validNames, summaries):
470
+ """Build df7 (no/one/multi voice percentages per file) for the multi-file expander."""
471
+ voiceNames = ["No Voice", "One Voice", "Multi Voice"]
472
+ df7_dict = {"files": validNames}
473
+ for name in voiceNames:
474
+ df7_dict[name] = []
475
+
476
+ for fn in validNames:
477
+ partial = summaries[fn]["df5"]
478
+ for i, name in enumerate(voiceNames):
479
+ df7_dict[name].append(partial["percentiles"][i])
480
+
481
+ return pd.DataFrame(df7_dict), voiceNames