Clean up legacy Streamlit implementation
Browse files- Remove src/streamlit_app.py (legacy Streamlit application)
- Remove src/editing_sync.py (Streamlit editing helper)
- Update README.md to remove references to deleted files
- Update improved_diarization.py comment to remove Streamlit reference
- README.md +0 -2
- src/editing_sync.py +0 -65
- src/improved_diarization.py +1 -1
- src/streamlit_app.py +0 -1444
README.md
CHANGED
|
@@ -59,11 +59,9 @@ voxsum-studio/
|
|
| 59 |
│ ├── __init__.py # Makes src a Python package
|
| 60 |
│ ├── asr.py # Logic for Automatic Speech Recognition (ASR) transcription
|
| 61 |
│ ├── diarization.py # Speaker diarization functionality
|
| 62 |
-
│ ├── editing_sync.py # Audio editing and synchronization
|
| 63 |
│ ├── export_utils.py # Utilities for exporting transcripts and summaries
|
| 64 |
│ ├── improved_diarization.py # Enhanced diarization features
|
| 65 |
│ ├── podcast.py # Functions for podcast search, episode fetching, and audio downloading
|
| 66 |
-
│ ├── streamlit_app.py # Legacy Streamlit application (for reference)
|
| 67 |
│ ├── summarization.py # Logic for generating summaries using LLMs
|
| 68 |
│ ├── utils.py # Utility functions and model configurations
|
| 69 |
│ ├── server/ # FastAPI backend
|
|
|
|
| 59 |
│ ├── __init__.py # Makes src a Python package
|
| 60 |
│ ├── asr.py # Logic for Automatic Speech Recognition (ASR) transcription
|
| 61 |
│ ├── diarization.py # Speaker diarization functionality
|
|
|
|
| 62 |
│ ├── export_utils.py # Utilities for exporting transcripts and summaries
|
| 63 |
│ ├── improved_diarization.py # Enhanced diarization features
|
| 64 |
│ ├── podcast.py # Functions for podcast search, episode fetching, and audio downloading
|
|
|
|
| 65 |
│ ├── summarization.py # Logic for generating summaries using LLMs
|
| 66 |
│ ├── utils.py # Utility functions and model configurations
|
| 67 |
│ ├── server/ # FastAPI backend
|
src/editing_sync.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Helper script to handle inline editing communication with Streamlit
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
import streamlit as st
|
| 6 |
-
import json
|
| 7 |
-
|
| 8 |
-
def init_editing_communication():
|
| 9 |
-
"""Initialize communication channel for inline editing"""
|
| 10 |
-
|
| 11 |
-
# Check for updates from JavaScript
|
| 12 |
-
if 'editing_updates' not in st.session_state:
|
| 13 |
-
st.session_state.editing_updates = {}
|
| 14 |
-
|
| 15 |
-
# Add JavaScript to handle communication
|
| 16 |
-
js_code = """
|
| 17 |
-
<script>
|
| 18 |
-
// Listen for utterance updates
|
| 19 |
-
window.addEventListener('utteranceUpdate', function(event) {
|
| 20 |
-
const detail = event.detail;
|
| 21 |
-
console.log('📝 Utterance update received:', detail);
|
| 22 |
-
|
| 23 |
-
// Send update to Streamlit via session state
|
| 24 |
-
// Note: This is a demonstration - in production, you'd use st.components for two-way communication
|
| 25 |
-
// For now, we rely on localStorage and manual sync
|
| 26 |
-
});
|
| 27 |
-
|
| 28 |
-
// Function to get all edits for sync with Streamlit
|
| 29 |
-
window.getEditedUtterances = function(playerId) {
|
| 30 |
-
const editKey = 'voxsum_edits_' + playerId;
|
| 31 |
-
return JSON.parse(localStorage.getItem(editKey) || '{}');
|
| 32 |
-
};
|
| 33 |
-
|
| 34 |
-
// Function to clear edits after sync
|
| 35 |
-
window.clearEditedUtterances = function(playerId) {
|
| 36 |
-
const editKey = 'voxsum_edits_' + playerId;
|
| 37 |
-
localStorage.removeItem(editKey);
|
| 38 |
-
};
|
| 39 |
-
</script>
|
| 40 |
-
"""
|
| 41 |
-
|
| 42 |
-
st.components.v1.html(js_code, height=0)
|
| 43 |
-
|
| 44 |
-
def check_for_editing_updates():
|
| 45 |
-
"""Check if there are any editing updates and apply them"""
|
| 46 |
-
|
| 47 |
-
# This is a placeholder - in a real implementation, you'd need
|
| 48 |
-
# a proper communication channel between JavaScript and Streamlit
|
| 49 |
-
# For now, we show how the system would work
|
| 50 |
-
|
| 51 |
-
if st.button("🔄 Sync edits from transcript", help="Click to apply any edits made in the interactive transcript"):
|
| 52 |
-
# In a real implementation, this would:
|
| 53 |
-
# 1. Get edits from JavaScript via st.components
|
| 54 |
-
# 2. Apply them to session state
|
| 55 |
-
# 3. Update the utterances
|
| 56 |
-
|
| 57 |
-
st.info("Edits would be synchronized here. For demonstration purposes, the localStorage-based editing is working in the transcript viewer.")
|
| 58 |
-
|
| 59 |
-
# For now, show current state
|
| 60 |
-
if st.session_state.utterances:
|
| 61 |
-
st.write(f"Current utterances: {len(st.session_state.utterances)}")
|
| 62 |
-
|
| 63 |
-
return True
|
| 64 |
-
|
| 65 |
-
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/improved_diarization.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
Diarisation Améliorée avec Clustering Adaptatif et Validation de Qualité
|
| 3 |
-
Vendored copy
|
| 4 |
"""
|
| 5 |
|
| 6 |
import numpy as np
|
|
|
|
| 1 |
"""
|
| 2 |
Diarisation Améliorée avec Clustering Adaptatif et Validation de Qualité
|
| 3 |
+
Vendored copy for importability from src/.
|
| 4 |
"""
|
| 5 |
|
| 6 |
import numpy as np
|
src/streamlit_app.py
DELETED
|
@@ -1,1444 +0,0 @@
|
|
| 1 |
-
# frontend.py
|
| 2 |
-
import streamlit as st
|
| 3 |
-
from asr import transcribe_file
|
| 4 |
-
from summarization import summarize_transcript
|
| 5 |
-
from podcast import search_podcast_series, fetch_episodes, download_podcast_audio, fetch_audio
|
| 6 |
-
from utils import model_names, sensevoice_models, available_gguf_llms
|
| 7 |
-
from diarization import (
|
| 8 |
-
init_speaker_embedding_extractor, perform_speaker_diarization_on_utterances,
|
| 9 |
-
merge_transcription_with_diarization, merge_consecutive_utterances, format_speaker_transcript,
|
| 10 |
-
get_diarization_stats, get_speaker_color
|
| 11 |
-
)
|
| 12 |
-
from export_utils import (
|
| 13 |
-
SUBTITLE_FORMATS, TRANSCRIPT_FORMATS, SUMMARY_FORMATS,
|
| 14 |
-
export_to_srt, export_to_vtt, export_to_ass, export_to_transcript_json,
|
| 15 |
-
export_to_elan_eaf, export_plain_text, export_summary_markdown, export_summary_plain_text
|
| 16 |
-
)
|
| 17 |
-
import base64
|
| 18 |
-
import json
|
| 19 |
-
import hashlib
|
| 20 |
-
import os
|
| 21 |
-
import shutil
|
| 22 |
-
import uuid
|
| 23 |
-
import math
|
| 24 |
-
from pathlib import Path
|
| 25 |
-
from datetime import datetime
|
| 26 |
-
|
| 27 |
-
# === 1. Session State Initialization ===
|
| 28 |
-
def init_session_state():
|
| 29 |
-
defaults = {
|
| 30 |
-
"transcript": "",
|
| 31 |
-
"summary": "",
|
| 32 |
-
"status": "Ready",
|
| 33 |
-
"audio_path": None,
|
| 34 |
-
"utterances": [],
|
| 35 |
-
"utterances_with_speakers": [], # New: for diarization results
|
| 36 |
-
"audio_base64": None,
|
| 37 |
-
"prev_audio_path": None,
|
| 38 |
-
"transcribing": False,
|
| 39 |
-
"series_list": [],
|
| 40 |
-
"episodes": [],
|
| 41 |
-
"backend": "sensevoice", # New: default backend
|
| 42 |
-
"sensevoice_model": list(sensevoice_models.keys())[0], # New: default SenseVoice model
|
| 43 |
-
"language": "auto", # New: language setting for SenseVoice
|
| 44 |
-
"textnorm": "withitn", # New: text normalization for SenseVoice
|
| 45 |
-
"current_page": 1, # New: for pagination
|
| 46 |
-
"utterances_per_page": 100, # New: pagination size
|
| 47 |
-
"static_audio_url": None, # New: for static audio serving
|
| 48 |
-
# Speaker Diarization Settings
|
| 49 |
-
"enable_diarization": False, # New: diarization toggle
|
| 50 |
-
"num_speakers": -1, # New: number of speakers (-1 = auto)
|
| 51 |
-
"cluster_threshold": 0.5, # New: clustering threshold
|
| 52 |
-
"diarization_stats": {}, # New: speaker statistics
|
| 53 |
-
"utterances_with_speakers": [], # New: diarized utterances
|
| 54 |
-
}
|
| 55 |
-
for key, value in defaults.items():
|
| 56 |
-
if key not in st.session_state:
|
| 57 |
-
st.session_state[key] = value
|
| 58 |
-
|
| 59 |
-
# === 1.1. Static Audio File Management ===
|
| 60 |
-
def cleanup_old_static_files():
|
| 61 |
-
"""Clean up old static audio files to prevent disk space issues on HF Spaces"""
|
| 62 |
-
try:
|
| 63 |
-
static_dir = Path("static")
|
| 64 |
-
if not static_dir.exists():
|
| 65 |
-
return
|
| 66 |
-
|
| 67 |
-
# Get all audio files with their modification times
|
| 68 |
-
audio_files = []
|
| 69 |
-
for pattern in ["*.mp3", "*.wav", "*.m4a"]:
|
| 70 |
-
audio_files.extend(static_dir.glob(pattern))
|
| 71 |
-
|
| 72 |
-
# If more than 10 files, remove oldest ones
|
| 73 |
-
if len(audio_files) > 10:
|
| 74 |
-
audio_files.sort(key=lambda f: f.stat().st_mtime)
|
| 75 |
-
for old_file in audio_files[:-10]: # Keep only 10 newest
|
| 76 |
-
try:
|
| 77 |
-
old_file.unlink()
|
| 78 |
-
print(f"🧹 Cleaned up old audio file: {old_file.name}")
|
| 79 |
-
except:
|
| 80 |
-
pass
|
| 81 |
-
except Exception as e:
|
| 82 |
-
print(f"⚠️ Cleanup warning: {e}")
|
| 83 |
-
|
| 84 |
-
def setup_static_audio(audio_path):
|
| 85 |
-
"""
|
| 86 |
-
Copy audio file to static directory and return URL for serving.
|
| 87 |
-
This eliminates the need for base64 encoding.
|
| 88 |
-
"""
|
| 89 |
-
try:
|
| 90 |
-
# Clean up old files first (important for HF Spaces)
|
| 91 |
-
cleanup_old_static_files()
|
| 92 |
-
|
| 93 |
-
# Use Streamlit's static directory structure
|
| 94 |
-
static_dir = Path("static")
|
| 95 |
-
static_dir.mkdir(exist_ok=True)
|
| 96 |
-
|
| 97 |
-
# Generate unique filename
|
| 98 |
-
audio_id = str(uuid.uuid4())[:8]
|
| 99 |
-
file_extension = Path(audio_path).suffix or '.mp3'
|
| 100 |
-
static_filename = f"audio_{audio_id}{file_extension}"
|
| 101 |
-
static_path = static_dir / static_filename
|
| 102 |
-
|
| 103 |
-
# Copy audio file
|
| 104 |
-
shutil.copy2(audio_path, static_path)
|
| 105 |
-
|
| 106 |
-
# Return relative URL that Streamlit can serve
|
| 107 |
-
return f"./static/{static_filename}"
|
| 108 |
-
except PermissionError:
|
| 109 |
-
st.warning("⚠️ Cannot access static directory. Using fallback method.")
|
| 110 |
-
return None
|
| 111 |
-
except Exception as e:
|
| 112 |
-
st.warning(f"Static file setup failed: {e}. Using fallback method.")
|
| 113 |
-
return None
|
| 114 |
-
|
| 115 |
-
# === 2. UI Components ===
|
| 116 |
-
# In render_settings_sidebar function
|
| 117 |
-
def render_settings_sidebar():
|
| 118 |
-
with st.sidebar:
|
| 119 |
-
st.header("⚙️ Settings")
|
| 120 |
-
|
| 121 |
-
# Backend selection
|
| 122 |
-
st.session_state.backend = st.radio(
|
| 123 |
-
"ASR Backend",
|
| 124 |
-
["moonshine", "sensevoice"],
|
| 125 |
-
index=0 if st.session_state.backend == "moonshine" else 1
|
| 126 |
-
)
|
| 127 |
-
|
| 128 |
-
# Model selection based on backend
|
| 129 |
-
if st.session_state.backend == "moonshine":
|
| 130 |
-
model_name = st.selectbox("Moonshine Model", model_names.keys())
|
| 131 |
-
else:
|
| 132 |
-
st.session_state.sensevoice_model = st.selectbox(
|
| 133 |
-
"SenseVoice Model",
|
| 134 |
-
sensevoice_models.keys(),
|
| 135 |
-
index=list(sensevoice_models.keys()).index(st.session_state.sensevoice_model) if st.session_state.sensevoice_model in sensevoice_models else 0
|
| 136 |
-
)
|
| 137 |
-
model_name = st.session_state.sensevoice_model
|
| 138 |
-
|
| 139 |
-
# SenseVoice specific settings
|
| 140 |
-
st.session_state.language = st.selectbox(
|
| 141 |
-
"Language",
|
| 142 |
-
["auto", "zh", "en", "ja", "ko", "yue"],
|
| 143 |
-
index=["auto", "zh", "en", "ja", "ko", "yue"].index(st.session_state.language) if st.session_state.language in ["auto", "zh", "en", "ja", "ko", "yue"] else 0
|
| 144 |
-
)
|
| 145 |
-
st.session_state.textnorm = st.radio(
|
| 146 |
-
"Text Normalization",
|
| 147 |
-
["withitn", "noitn"],
|
| 148 |
-
index=0 if st.session_state.textnorm == "withitn" else 1
|
| 149 |
-
)
|
| 150 |
-
|
| 151 |
-
# Speaker Diarization Settings
|
| 152 |
-
st.divider()
|
| 153 |
-
st.subheader("🎭 Speaker Diarization")
|
| 154 |
-
st.session_state.enable_diarization = st.checkbox(
|
| 155 |
-
"Enable Speaker Diarization",
|
| 156 |
-
value=st.session_state.enable_diarization,
|
| 157 |
-
help="⚠️ This feature is time-consuming and will significantly increase processing time"
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
if st.session_state.enable_diarization:
|
| 161 |
-
col1, col2 = st.columns(2)
|
| 162 |
-
with col1:
|
| 163 |
-
st.session_state.num_speakers = st.number_input(
|
| 164 |
-
"Number of Speakers",
|
| 165 |
-
min_value=-1,
|
| 166 |
-
max_value=10,
|
| 167 |
-
value=st.session_state.num_speakers,
|
| 168 |
-
help="-1 for auto-detection"
|
| 169 |
-
)
|
| 170 |
-
with col2:
|
| 171 |
-
st.session_state.cluster_threshold = st.slider(
|
| 172 |
-
"Clustering Threshold",
|
| 173 |
-
min_value=0.1,
|
| 174 |
-
max_value=1.0,
|
| 175 |
-
value=st.session_state.cluster_threshold,
|
| 176 |
-
step=0.05,
|
| 177 |
-
help="Lower = more speakers detected"
|
| 178 |
-
)
|
| 179 |
-
|
| 180 |
-
st.info("📝 **Note:** Speaker diarization requires downloading ~200MB of models on first use")
|
| 181 |
-
|
| 182 |
-
return {
|
| 183 |
-
"vad_threshold": st.slider("VAD Threshold", 0.1, 0.9, 0.5),
|
| 184 |
-
"model_name": model_name,
|
| 185 |
-
"llm_model": st.selectbox("LLM for Summarization", list(available_gguf_llms.keys())),
|
| 186 |
-
"prompt_input": st.text_area("Custom Prompt", value="Summarize the transcript below."),
|
| 187 |
-
"utterances_per_page": st.number_input("Utterances per page", min_value=20, max_value=500, value=st.session_state.utterances_per_page, step=20, help="For large transcripts, adjust pagination size")
|
| 188 |
-
}
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
def render_podcast_tab():
|
| 192 |
-
st.subheader("Search Podcast")
|
| 193 |
-
query = st.text_input("Enter podcast name")
|
| 194 |
-
|
| 195 |
-
col1, col2 = st.columns(2)
|
| 196 |
-
with col1:
|
| 197 |
-
if st.button("Search Series") and query:
|
| 198 |
-
st.session_state.series_list = search_podcast_series(query)
|
| 199 |
-
|
| 200 |
-
if st.session_state.series_list:
|
| 201 |
-
series_titles = [f"{s['title']} by {s['artist']}" for s in st.session_state.series_list]
|
| 202 |
-
selected_title = st.selectbox("Select Series", series_titles)
|
| 203 |
-
series = next((s for s in st.session_state.series_list if f"{s['title']} by {s['artist']}" == selected_title), None)
|
| 204 |
-
|
| 205 |
-
if series:
|
| 206 |
-
col1, col2 = st.columns([1, 3])
|
| 207 |
-
with col1:
|
| 208 |
-
st.image(series["thumbnail"], width=150)
|
| 209 |
-
with col2:
|
| 210 |
-
st.text_area("Series Info", value=f"Title: {series['title']}\nArtist: {series['artist']}\nEpisodes: {series['episode_count']}", disabled=True)
|
| 211 |
-
|
| 212 |
-
if st.button("Load Episodes"):
|
| 213 |
-
st.session_state.episodes = fetch_episodes(series["feed_url"])
|
| 214 |
-
|
| 215 |
-
if st.session_state.episodes:
|
| 216 |
-
episode_titles = [e["title"] for e in st.session_state.episodes]
|
| 217 |
-
selected_episode = st.selectbox("Select Episode", episode_titles)
|
| 218 |
-
episode = next((e for e in st.session_state.episodes if e["title"] == selected_episode), None)
|
| 219 |
-
|
| 220 |
-
if episode:
|
| 221 |
-
st.text_area("Episode Info", value=f"Title: {episode['title']}\nPublished: {episode['published']}\nDuration: {episode['duration']}", disabled=True)
|
| 222 |
-
if st.button("Download Episode"):
|
| 223 |
-
audio_path, status = download_podcast_audio(episode["audio_url"], episode["title"], st.session_state.status)
|
| 224 |
-
st.session_state.audio_path = audio_path
|
| 225 |
-
st.session_state.status = status
|
| 226 |
-
|
| 227 |
-
def render_audio_tab():
|
| 228 |
-
st.subheader("Upload or Fetch Audio")
|
| 229 |
-
|
| 230 |
-
# YouTube Section
|
| 231 |
-
youtube_url = st.text_input("YouTube URL")
|
| 232 |
-
if st.button("Fetch from YouTube") and youtube_url:
|
| 233 |
-
audio_path, status = fetch_audio(youtube_url, st.session_state.status)
|
| 234 |
-
st.session_state.audio_path = audio_path
|
| 235 |
-
st.session_state.audio_base64 = None
|
| 236 |
-
st.session_state.status = status
|
| 237 |
-
|
| 238 |
-
# File Upload Section
|
| 239 |
-
uploaded_file = st.file_uploader("Upload Audio", type=["mp3", "wav"])
|
| 240 |
-
if uploaded_file:
|
| 241 |
-
import tempfile
|
| 242 |
-
try:
|
| 243 |
-
tmp = tempfile.NamedTemporaryFile(prefix="voxsum_", suffix=".mp3", delete=False)
|
| 244 |
-
tmp.write(uploaded_file.getbuffer())
|
| 245 |
-
tmp.flush()
|
| 246 |
-
tmp.close()
|
| 247 |
-
st.session_state.audio_path = tmp.name
|
| 248 |
-
st.session_state.audio_base64 = None
|
| 249 |
-
except Exception as e:
|
| 250 |
-
st.error(f"Failed to save uploaded file: {e}")
|
| 251 |
-
|
| 252 |
-
def create_efficient_sync_player(audio_path, utterances, utterances_with_speakers=None):
|
| 253 |
-
"""
|
| 254 |
-
Ultra-optimized player with inline editing for large audio files and long transcripts:
|
| 255 |
-
1. Base64 encoding with intelligent size limits
|
| 256 |
-
2. Virtual scrolling for 1000+ utterances
|
| 257 |
-
3. Binary search for O(log n) synchronization
|
| 258 |
-
4. Efficient DOM management
|
| 259 |
-
5. Debounced updates
|
| 260 |
-
6. Speaker color coding for diarization
|
| 261 |
-
7. Inline editing with auto-save to session state
|
| 262 |
-
"""
|
| 263 |
-
|
| 264 |
-
# Use speaker-aware utterances if available
|
| 265 |
-
display_utterances = utterances_with_speakers if utterances_with_speakers else utterances
|
| 266 |
-
has_speakers = utterances_with_speakers is not None
|
| 267 |
-
|
| 268 |
-
print(f"🎭 DEBUG Player: has_speakers={has_speakers}, display_utterances count={len(display_utterances)}")
|
| 269 |
-
if has_speakers and len(display_utterances) > 0:
|
| 270 |
-
sample = display_utterances[0]
|
| 271 |
-
print(f"🎭 DEBUG Player: Sample utterance format: {len(sample)} elements = {sample}")
|
| 272 |
-
|
| 273 |
-
file_size = os.path.getsize(audio_path)
|
| 274 |
-
|
| 275 |
-
# For now, use base64 for all files with intelligent limits
|
| 276 |
-
# TODO: Implement proper static file serving for production
|
| 277 |
-
if file_size > 100 * 1024 * 1024: # 100MB absolute limit
|
| 278 |
-
return f"""
|
| 279 |
-
<div style="padding: 20px; text-align: center; color: #d32f2f; background: #ffebee; border-radius: 8px;">
|
| 280 |
-
⚠️ Audio file too large ({file_size / 1024 / 1024:.1f}MB) for browser playback.
|
| 281 |
-
<br>Please use a smaller file (< 100MB) for optimal performance.
|
| 282 |
-
<br><small>Large file support requires production deployment.</small>
|
| 283 |
-
</div>
|
| 284 |
-
"""
|
| 285 |
-
|
| 286 |
-
# Read and encode file as base64 - most reliable method
|
| 287 |
-
try:
|
| 288 |
-
with open(audio_path, "rb") as f:
|
| 289 |
-
audio_bytes = f.read()
|
| 290 |
-
|
| 291 |
-
# Check if base64 will be too large for DOM
|
| 292 |
-
base64_size = len(audio_bytes) * 4 // 3 # Approximate base64 size
|
| 293 |
-
if base64_size > 100 * 1024 * 1024: # 100MB base64 limit
|
| 294 |
-
return f"""
|
| 295 |
-
<div style="padding: 20px; text-align: center; color: #d32f2f; background: #ffebee; border-radius: 8px;">
|
| 296 |
-
⚠️ Audio file creates {base64_size / 1024 / 1024:.1f}MB base64 string - too large for DOM.
|
| 297 |
-
<br>Please use a smaller file (< 75MB original size).
|
| 298 |
-
</div>
|
| 299 |
-
"""
|
| 300 |
-
|
| 301 |
-
audio_url = f"data:audio/mp3;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
|
| 302 |
-
|
| 303 |
-
# Warning for larger files
|
| 304 |
-
audio_warning = ""
|
| 305 |
-
if file_size > 10 * 1024 * 1024: # > 10MB
|
| 306 |
-
audio_warning = f"""
|
| 307 |
-
<div style="padding: 8px; background: #fff3e0; border-left: 4px solid #ff9800; margin-bottom: 10px; border-radius: 4px;">
|
| 308 |
-
📡 Loading {file_size / 1024 / 1024:.1f}MB file ({base64_size / 1024 / 1024:.1f}MB encoded)... This may take a moment.
|
| 309 |
-
</div>
|
| 310 |
-
"""
|
| 311 |
-
except Exception as e:
|
| 312 |
-
return f"""
|
| 313 |
-
<div style="padding: 20px; text-align: center; color: #d32f2f;">
|
| 314 |
-
❌ Failed to load audio file: {str(e)}
|
| 315 |
-
</div>
|
| 316 |
-
"""
|
| 317 |
-
|
| 318 |
-
# Generate unique ID for this player instance
|
| 319 |
-
player_id = hashlib.md5((audio_path + str(len(display_utterances))).encode()).hexdigest()[:8]
|
| 320 |
-
|
| 321 |
-
# Determine if we need virtualization
|
| 322 |
-
use_virtualization = len(display_utterances) > 200
|
| 323 |
-
max_visible_items = 50 if use_virtualization else len(display_utterances)
|
| 324 |
-
|
| 325 |
-
# Prepare utterances data and speaker colors
|
| 326 |
-
utterances_json = json.dumps(display_utterances)
|
| 327 |
-
|
| 328 |
-
# Generate speaker color mapping for JavaScript
|
| 329 |
-
speaker_colors = {}
|
| 330 |
-
if has_speakers:
|
| 331 |
-
unique_speakers = set()
|
| 332 |
-
for utt in display_utterances:
|
| 333 |
-
if len(utt) >= 4: # (start, end, text, speaker_id)
|
| 334 |
-
unique_speakers.add(utt[3])
|
| 335 |
-
for speaker_id in unique_speakers:
|
| 336 |
-
speaker_colors[speaker_id] = get_speaker_color(speaker_id)
|
| 337 |
-
|
| 338 |
-
speaker_colors_json = json.dumps(speaker_colors)
|
| 339 |
-
|
| 340 |
-
html_content = f"""
|
| 341 |
-
<!DOCTYPE html>
|
| 342 |
-
<html>
|
| 343 |
-
<head>
|
| 344 |
-
<meta charset="UTF-8">
|
| 345 |
-
<style>
|
| 346 |
-
body {{
|
| 347 |
-
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 348 |
-
margin: 0; padding: 10px; background: #fafafa;
|
| 349 |
-
}}
|
| 350 |
-
#audio-container-{player_id} {{
|
| 351 |
-
margin-bottom: 15px;
|
| 352 |
-
background: white;
|
| 353 |
-
border-radius: 8px;
|
| 354 |
-
padding: 10px;
|
| 355 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 356 |
-
}}
|
| 357 |
-
#transcript-container-{player_id} {{
|
| 358 |
-
max-height: 600px;
|
| 359 |
-
overflow-y: auto;
|
| 360 |
-
border: 1px solid #e0e0e0;
|
| 361 |
-
border-radius: 8px;
|
| 362 |
-
background: white;
|
| 363 |
-
position: relative;
|
| 364 |
-
}}
|
| 365 |
-
#virtual-content-{player_id} {{
|
| 366 |
-
padding: 8px;
|
| 367 |
-
position: relative;
|
| 368 |
-
}}
|
| 369 |
-
.utterance-{player_id} {{
|
| 370 |
-
padding: 8px 12px;
|
| 371 |
-
margin: 2px 0;
|
| 372 |
-
border-radius: 6px;
|
| 373 |
-
cursor: pointer;
|
| 374 |
-
transition: all 0.15s ease;
|
| 375 |
-
border-left: 3px solid transparent;
|
| 376 |
-
font-size: 0.95em;
|
| 377 |
-
line-height: 1.5;
|
| 378 |
-
background: #fdfdfd;
|
| 379 |
-
}}
|
| 380 |
-
.utterance-{player_id}:hover {{
|
| 381 |
-
background-color: #f0f8ff;
|
| 382 |
-
transform: translateX(3px);
|
| 383 |
-
box-shadow: 0 2px 8px rgba(33, 150, 243, 0.2);
|
| 384 |
-
}}
|
| 385 |
-
.current-{player_id} {{
|
| 386 |
-
background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%) !important;
|
| 387 |
-
border-left: 3px solid #2196f3 !important;
|
| 388 |
-
font-weight: 500;
|
| 389 |
-
box-shadow: 0 3px 12px rgba(33, 150, 243, 0.3);
|
| 390 |
-
transform: translateX(3px);
|
| 391 |
-
}}
|
| 392 |
-
.timestamp-{player_id} {{
|
| 393 |
-
font-size: 0.8em;
|
| 394 |
-
color: #666;
|
| 395 |
-
margin-right: 8px;
|
| 396 |
-
font-weight: 600;
|
| 397 |
-
background: #f5f5f5;
|
| 398 |
-
padding: 2px 6px;
|
| 399 |
-
border-radius: 3px;
|
| 400 |
-
}}
|
| 401 |
-
.pagination-{player_id} {{
|
| 402 |
-
display: flex;
|
| 403 |
-
justify-content: center;
|
| 404 |
-
align-items: center;
|
| 405 |
-
padding: 10px;
|
| 406 |
-
background: #f8f9fa;
|
| 407 |
-
border-top: 1px solid #e0e0e0;
|
| 408 |
-
gap: 10px;
|
| 409 |
-
}}
|
| 410 |
-
.pagination-{player_id} button {{
|
| 411 |
-
padding: 6px 12px;
|
| 412 |
-
border: 1px solid #ddd;
|
| 413 |
-
background: white;
|
| 414 |
-
border-radius: 4px;
|
| 415 |
-
cursor: pointer;
|
| 416 |
-
transition: all 0.2s;
|
| 417 |
-
}}
|
| 418 |
-
.pagination-{player_id} button:hover {{
|
| 419 |
-
background: #e3f2fd;
|
| 420 |
-
border-color: #2196f3;
|
| 421 |
-
}}
|
| 422 |
-
.pagination-{player_id} button:disabled {{
|
| 423 |
-
opacity: 0.5;
|
| 424 |
-
cursor: not-allowed;
|
| 425 |
-
}}
|
| 426 |
-
.stats-{player_id} {{
|
| 427 |
-
font-size: 0.85em;
|
| 428 |
-
color: #666;
|
| 429 |
-
text-align: center;
|
| 430 |
-
padding: 5px;
|
| 431 |
-
background: #f8f9fa;
|
| 432 |
-
}}
|
| 433 |
-
|
| 434 |
-
/* Inline editing styles */
|
| 435 |
-
.edit-mode-{player_id} {{
|
| 436 |
-
background: #fff8e1 !important;
|
| 437 |
-
border: 2px solid #ff9800 !important;
|
| 438 |
-
border-radius: 8px;
|
| 439 |
-
}}
|
| 440 |
-
|
| 441 |
-
.edit-controls-{player_id} {{
|
| 442 |
-
display: flex;
|
| 443 |
-
align-items: center;
|
| 444 |
-
gap: 8px;
|
| 445 |
-
margin-top: 8px;
|
| 446 |
-
padding-top: 8px;
|
| 447 |
-
border-top: 1px solid #e0e0e0;
|
| 448 |
-
}}
|
| 449 |
-
|
| 450 |
-
.edit-textarea-{player_id} {{
|
| 451 |
-
width: 100%;
|
| 452 |
-
border: 1px solid #ddd;
|
| 453 |
-
border-radius: 4px;
|
| 454 |
-
padding: 8px;
|
| 455 |
-
font-size: 0.95em;
|
| 456 |
-
line-height: 1.5;
|
| 457 |
-
resize: vertical;
|
| 458 |
-
min-height: 60px;
|
| 459 |
-
font-family: inherit;
|
| 460 |
-
}}
|
| 461 |
-
|
| 462 |
-
.edit-btn-{player_id} {{
|
| 463 |
-
padding: 4px 8px;
|
| 464 |
-
border: 1px solid #ddd;
|
| 465 |
-
border-radius: 4px;
|
| 466 |
-
background: white;
|
| 467 |
-
cursor: pointer;
|
| 468 |
-
font-size: 0.8em;
|
| 469 |
-
transition: all 0.2s;
|
| 470 |
-
}}
|
| 471 |
-
|
| 472 |
-
.edit-btn-{player_id}.save {{
|
| 473 |
-
background: #4caf50;
|
| 474 |
-
color: white;
|
| 475 |
-
border-color: #4caf50;
|
| 476 |
-
}}
|
| 477 |
-
|
| 478 |
-
.edit-btn-{player_id}.cancel {{
|
| 479 |
-
background: #f44336;
|
| 480 |
-
color: white;
|
| 481 |
-
border-color: #f44336;
|
| 482 |
-
}}
|
| 483 |
-
|
| 484 |
-
.edit-btn-{player_id}:hover {{
|
| 485 |
-
opacity: 0.8;
|
| 486 |
-
}}
|
| 487 |
-
|
| 488 |
-
.edit-icon-{player_id} {{
|
| 489 |
-
position: absolute;
|
| 490 |
-
top: 8px;
|
| 491 |
-
right: 8px;
|
| 492 |
-
background: rgba(255, 152, 0, 0.1);
|
| 493 |
-
border: 1px solid #ff9800;
|
| 494 |
-
border-radius: 50%;
|
| 495 |
-
width: 24px;
|
| 496 |
-
height: 24px;
|
| 497 |
-
display: flex;
|
| 498 |
-
align-items: center;
|
| 499 |
-
justify-content: center;
|
| 500 |
-
cursor: pointer;
|
| 501 |
-
font-size: 12px;
|
| 502 |
-
opacity: 0;
|
| 503 |
-
transition: opacity 0.2s;
|
| 504 |
-
}}
|
| 505 |
-
|
| 506 |
-
.utterance-{player_id}:hover .edit-icon-{player_id} {{
|
| 507 |
-
opacity: 1;
|
| 508 |
-
}}
|
| 509 |
-
|
| 510 |
-
.utterance-text-{player_id} {{
|
| 511 |
-
position: relative;
|
| 512 |
-
padding-right: 30px;
|
| 513 |
-
}}
|
| 514 |
-
</style>
|
| 515 |
-
</head>
|
| 516 |
-
<body>
|
| 517 |
-
{audio_warning}
|
| 518 |
-
<div id="audio-container-{player_id}">
|
| 519 |
-
<audio id="audio-{player_id}" controls preload="auto" style="width: 100%;">
|
| 520 |
-
<source src="{audio_url}" type="audio/mp3">
|
| 521 |
-
<source src="{audio_url}" type="audio/mpeg">
|
| 522 |
-
<source src="{audio_url}" type="audio/wav">
|
| 523 |
-
Your browser does not support the audio element.
|
| 524 |
-
</audio>
|
| 525 |
-
</div>
|
| 526 |
-
|
| 527 |
-
<div class="stats-{player_id}">
|
| 528 |
-
📊 {len(display_utterances)} utterances • ⏱️ {display_utterances[-1][1]:.1f}s duration
|
| 529 |
-
{' • 🔄 Virtual scrolling enabled' if use_virtualization else ''}
|
| 530 |
-
{' • 🎭 Speaker diarization active' if has_speakers else ''}
|
| 531 |
-
</div>
|
| 532 |
-
|
| 533 |
-
<div id="transcript-container-{player_id}">
|
| 534 |
-
<div id="virtual-content-{player_id}"></div>
|
| 535 |
-
</div>
|
| 536 |
-
|
| 537 |
-
{"<div class='pagination-" + player_id + "' id='pagination-" + player_id + "'></div>" if use_virtualization else ""}
|
| 538 |
-
|
| 539 |
-
<script>
|
| 540 |
-
(function() {{
|
| 541 |
-
const playerId = '{player_id}';
|
| 542 |
-
const player = document.getElementById('audio-' + playerId);
|
| 543 |
-
const container = document.getElementById('transcript-container-' + playerId);
|
| 544 |
-
const virtualContent = document.getElementById('virtual-content-' + playerId);
|
| 545 |
-
const utterances = {utterances_json};
|
| 546 |
-
const useVirtualization = {str(use_virtualization).lower()};
|
| 547 |
-
const maxVisibleItems = {max_visible_items};
|
| 548 |
-
const hasSpeakers = {str(has_speakers).lower()};
|
| 549 |
-
const speakerColors = {speaker_colors_json};
|
| 550 |
-
|
| 551 |
-
let currentHighlight = null;
|
| 552 |
-
let isSeeking = false;
|
| 553 |
-
let lastUpdateTime = 0;
|
| 554 |
-
let currentPage = 1;
|
| 555 |
-
let itemsPerPage = maxVisibleItems;
|
| 556 |
-
let totalPages = Math.ceil(utterances.length / itemsPerPage);
|
| 557 |
-
|
| 558 |
-
// Binary search for efficient utterance finding - O(log n)
|
| 559 |
-
function findActiveUtterance(currentTime) {{
|
| 560 |
-
let left = 0, right = utterances.length - 1;
|
| 561 |
-
let result = -1;
|
| 562 |
-
|
| 563 |
-
while (left <= right) {{
|
| 564 |
-
const mid = Math.floor((left + right) / 2);
|
| 565 |
-
const [start, end] = utterances[mid];
|
| 566 |
-
|
| 567 |
-
if (currentTime >= start && currentTime < end) {{
|
| 568 |
-
return mid;
|
| 569 |
-
}} else if (currentTime < start) {{
|
| 570 |
-
right = mid - 1;
|
| 571 |
-
}} else {{
|
| 572 |
-
left = mid + 1;
|
| 573 |
-
if (currentTime >= start) result = mid; // Keep track of closest
|
| 574 |
-
}}
|
| 575 |
-
}}
|
| 576 |
-
return result;
|
| 577 |
-
}}
|
| 578 |
-
|
| 579 |
-
// Efficient DOM builder with virtual scrolling
|
| 580 |
-
function buildTranscript(page = 1) {{
|
| 581 |
-
virtualContent.innerHTML = '';
|
| 582 |
-
|
| 583 |
-
let startIdx, endIdx;
|
| 584 |
-
if (useVirtualization) {{
|
| 585 |
-
startIdx = (page - 1) * itemsPerPage;
|
| 586 |
-
endIdx = Math.min(startIdx + itemsPerPage, utterances.length);
|
| 587 |
-
}} else {{
|
| 588 |
-
startIdx = 0;
|
| 589 |
-
endIdx = utterances.length;
|
| 590 |
-
}}
|
| 591 |
-
|
| 592 |
-
// Create document fragment for efficient DOM insertion
|
| 593 |
-
const fragment = document.createDocumentFragment();
|
| 594 |
-
|
| 595 |
-
for (let i = startIdx; i < endIdx; i++) {{
|
| 596 |
-
const utt = utterances[i];
|
| 597 |
-
if (utt.length < 3) continue;
|
| 598 |
-
|
| 599 |
-
const [start, end, text] = utt;
|
| 600 |
-
const speakerId = hasSpeakers && utt.length >= 4 ? utt[3] : null;
|
| 601 |
-
|
| 602 |
-
const div = document.createElement('div');
|
| 603 |
-
div.className = 'utterance-' + playerId;
|
| 604 |
-
div.dataset.start = start;
|
| 605 |
-
div.dataset.end = end;
|
| 606 |
-
div.dataset.index = i;
|
| 607 |
-
|
| 608 |
-
// Apply speaker color if available
|
| 609 |
-
if (speakerId !== null && speakerColors[speakerId]) {{
|
| 610 |
-
div.style.borderLeftColor = speakerColors[speakerId];
|
| 611 |
-
div.style.backgroundColor = speakerColors[speakerId] + '15'; // 15% opacity
|
| 612 |
-
}}
|
| 613 |
-
|
| 614 |
-
const minutes = Math.floor(start / 60);
|
| 615 |
-
const seconds = Math.floor(start % 60).toString().padStart(2, '0');
|
| 616 |
-
|
| 617 |
-
// Build content with optional speaker label and edit controls
|
| 618 |
-
let content = `<span class="timestamp-${{playerId}}">[${{minutes}}:${{seconds}}]</span>`;
|
| 619 |
-
if (speakerId !== null) {{
|
| 620 |
-
content += ` <span class="speaker-label-${{playerId}}" style="background: ${{speakerColors[speakerId] || '#ccc'}}; color: white; padding: 2px 6px; border-radius: 3px; font-size: 0.8em; margin-right: 6px;">S${{speakerId + 1}}</span>`;
|
| 621 |
-
}}
|
| 622 |
-
|
| 623 |
-
// Wrap text in a container for editing
|
| 624 |
-
content += `<div class="utterance-text-${{playerId}}">
|
| 625 |
-
<span class="text-display-${{playerId}}">${{text}}</span>
|
| 626 |
-
<div class="edit-icon-${{playerId}}" onclick="startEdit(${{i}})" title="Edit this utterance">✏️</div>
|
| 627 |
-
<div class="edit-mode-container-${{playerId}}" style="display: none;">
|
| 628 |
-
<textarea class="edit-textarea-${{playerId}}">${{text}}</textarea>
|
| 629 |
-
<div class="edit-controls-${{playerId}}">
|
| 630 |
-
<button class="edit-btn-${{playerId}} save" onclick="saveEdit(${{i}})">💾 Save</button>
|
| 631 |
-
<button class="edit-btn-${{playerId}} cancel" onclick="cancelEdit(${{i}})">❌ Cancel</button>
|
| 632 |
-
</div>
|
| 633 |
-
</div>
|
| 634 |
-
</div>`;
|
| 635 |
-
|
| 636 |
-
div.innerHTML = content;
|
| 637 |
-
|
| 638 |
-
// Optimized click handler
|
| 639 |
-
div.addEventListener('click', (e) => {{
|
| 640 |
-
e.stopPropagation();
|
| 641 |
-
isSeeking = true;
|
| 642 |
-
player.currentTime = start;
|
| 643 |
-
player.play().catch(() => {{}});
|
| 644 |
-
setTimeout(() => isSeeking = false, 150);
|
| 645 |
-
}});
|
| 646 |
-
|
| 647 |
-
fragment.appendChild(div);
|
| 648 |
-
}}
|
| 649 |
-
|
| 650 |
-
virtualContent.appendChild(fragment);
|
| 651 |
-
updatePagination();
|
| 652 |
-
}}
|
| 653 |
-
|
| 654 |
-
// Pagination controls
|
| 655 |
-
function updatePagination() {{
|
| 656 |
-
if (!useVirtualization) return;
|
| 657 |
-
|
| 658 |
-
const pagination = document.getElementById('pagination-' + playerId);
|
| 659 |
-
if (!pagination) return;
|
| 660 |
-
|
| 661 |
-
pagination.innerHTML = `
|
| 662 |
-
<button onclick="window.transcriptPlayers_${{playerId}}.goToPage(1)"
|
| 663 |
-
${{currentPage === 1 ? 'disabled' : ''}}>⏮️</button>
|
| 664 |
-
<button onclick="window.transcriptPlayers_${{playerId}}.goToPage(${{Math.max(1, currentPage - 1)}})"
|
| 665 |
-
${{currentPage === 1 ? 'disabled' : ''}}>⏪</button>
|
| 666 |
-
<span>Page ${{currentPage}} of ${{totalPages}}</span>
|
| 667 |
-
<button onclick="window.transcriptPlayers_${{playerId}}.goToPage(${{Math.min(totalPages, currentPage + 1)}})"
|
| 668 |
-
${{currentPage === totalPages ? 'disabled' : ''}}>⏩</button>
|
| 669 |
-
<button onclick="window.transcriptPlayers_${{playerId}}.goToPage(${{totalPages}})"
|
| 670 |
-
${{currentPage === totalPages ? 'disabled' : ''}}>⏭️</button>
|
| 671 |
-
`;
|
| 672 |
-
}}
|
| 673 |
-
|
| 674 |
-
// Page navigation
|
| 675 |
-
function goToPage(page) {{
|
| 676 |
-
if (page < 1 || page > totalPages) return;
|
| 677 |
-
currentPage = page;
|
| 678 |
-
buildTranscript(currentPage);
|
| 679 |
-
}}
|
| 680 |
-
|
| 681 |
-
// Auto-navigate to page containing active utterance
|
| 682 |
-
function navigateToActiveUtterance(utteranceIndex) {{
|
| 683 |
-
if (!useVirtualization || utteranceIndex === -1) return;
|
| 684 |
-
|
| 685 |
-
const targetPage = Math.ceil((utteranceIndex + 1) / itemsPerPage);
|
| 686 |
-
if (targetPage !== currentPage) {{
|
| 687 |
-
currentPage = targetPage;
|
| 688 |
-
buildTranscript(currentPage);
|
| 689 |
-
}}
|
| 690 |
-
}}
|
| 691 |
-
|
| 692 |
-
// Optimized highlighting with debouncing - max 20fps for better performance
|
| 693 |
-
function updateHighlight() {{
|
| 694 |
-
const now = Date.now();
|
| 695 |
-
if (now - lastUpdateTime < 50) return; // 20fps max
|
| 696 |
-
lastUpdateTime = now;
|
| 697 |
-
|
| 698 |
-
if (isSeeking) return;
|
| 699 |
-
|
| 700 |
-
const time = player.currentTime;
|
| 701 |
-
const activeUtteranceIndex = findActiveUtterance(time);
|
| 702 |
-
|
| 703 |
-
// Auto-navigate to correct page if needed
|
| 704 |
-
navigateToActiveUtterance(activeUtteranceIndex);
|
| 705 |
-
|
| 706 |
-
// Find active div in current page
|
| 707 |
-
const divs = virtualContent.querySelectorAll('.utterance-' + playerId);
|
| 708 |
-
let activeDiv = null;
|
| 709 |
-
|
| 710 |
-
for (const div of divs) {{
|
| 711 |
-
const index = parseInt(div.dataset.index);
|
| 712 |
-
if (index === activeUtteranceIndex) {{
|
| 713 |
-
activeDiv = div;
|
| 714 |
-
break;
|
| 715 |
-
}}
|
| 716 |
-
}}
|
| 717 |
-
|
| 718 |
-
// Update highlight with smooth transition
|
| 719 |
-
if (activeDiv !== currentHighlight) {{
|
| 720 |
-
if (currentHighlight) {{
|
| 721 |
-
currentHighlight.classList.remove('current-' + playerId);
|
| 722 |
-
}}
|
| 723 |
-
if (activeDiv) {{
|
| 724 |
-
activeDiv.classList.add('current-' + playerId);
|
| 725 |
-
// Smooth scroll with animation
|
| 726 |
-
activeDiv.scrollIntoView({{
|
| 727 |
-
behavior: 'smooth',
|
| 728 |
-
block: 'center',
|
| 729 |
-
inline: 'nearest'
|
| 730 |
-
}});
|
| 731 |
-
}}
|
| 732 |
-
currentHighlight = activeDiv;
|
| 733 |
-
}}
|
| 734 |
-
}}
|
| 735 |
-
|
| 736 |
-
// Global API for pagination
|
| 737 |
-
window.transcriptPlayers_{player_id} = {{ goToPage }};
|
| 738 |
-
|
| 739 |
-
// Initialize
|
| 740 |
-
buildTranscript(1);
|
| 741 |
-
player.addEventListener('timeupdate', updateHighlight);
|
| 742 |
-
|
| 743 |
-
// Enhanced audio loading diagnostics with UI feedback
|
| 744 |
-
player.addEventListener('loadstart', () => {{
|
| 745 |
-
console.log('🔄 Audio loading started');
|
| 746 |
-
const container = document.getElementById('audio-container-' + playerId);
|
| 747 |
-
const statusDiv = document.createElement('div');
|
| 748 |
-
statusDiv.id = 'loading-status-' + playerId;
|
| 749 |
-
statusDiv.style.cssText = 'padding: 5px; background: #e3f2fd; color: #1976d2; border-radius: 4px; margin-top: 5px; font-size: 0.9em;';
|
| 750 |
-
statusDiv.innerHTML = '🔄 Loading audio...';
|
| 751 |
-
container.appendChild(statusDiv);
|
| 752 |
-
}});
|
| 753 |
-
|
| 754 |
-
player.addEventListener('loadedmetadata', () => {{
|
| 755 |
-
console.log('✅ Audio metadata loaded');
|
| 756 |
-
const statusDiv = document.getElementById('loading-status-' + playerId);
|
| 757 |
-
if (statusDiv) statusDiv.innerHTML = '✅ Metadata loaded';
|
| 758 |
-
}});
|
| 759 |
-
|
| 760 |
-
player.addEventListener('loadeddata', () => {{
|
| 761 |
-
console.log('✅ Audio data loaded');
|
| 762 |
-
const statusDiv = document.getElementById('loading-status-' + playerId);
|
| 763 |
-
if (statusDiv) statusDiv.innerHTML = '✅ Audio data ready';
|
| 764 |
-
}});
|
| 765 |
-
|
| 766 |
-
player.addEventListener('canplay', () => {{
|
| 767 |
-
console.log('▶️ Audio can start playing');
|
| 768 |
-
const statusDiv = document.getElementById('loading-status-' + playerId);
|
| 769 |
-
if (statusDiv) {{
|
| 770 |
-
statusDiv.innerHTML = '🎵 Ready to play';
|
| 771 |
-
setTimeout(() => statusDiv.remove(), 2000);
|
| 772 |
-
}}
|
| 773 |
-
}});
|
| 774 |
-
|
| 775 |
-
player.addEventListener('canplaythrough', () => {{
|
| 776 |
-
console.log('🚀 Audio can play through');
|
| 777 |
-
}});
|
| 778 |
-
|
| 779 |
-
player.addEventListener('error', (e) => {{
|
| 780 |
-
console.error('❌ Audio error:', e, player.error);
|
| 781 |
-
const statusDiv = document.getElementById('loading-status-' + playerId);
|
| 782 |
-
if (statusDiv) statusDiv.remove();
|
| 783 |
-
|
| 784 |
-
const errorDiv = document.createElement('div');
|
| 785 |
-
errorDiv.style.cssText = 'padding: 10px; background: #ffebee; color: #c62828; border-radius: 4px; margin-top: 10px; border-left: 4px solid #f44336;';
|
| 786 |
-
|
| 787 |
-
let errorMessage = '❌ Audio loading failed. ';
|
| 788 |
-
if (player.error) {{
|
| 789 |
-
switch(player.error.code) {{
|
| 790 |
-
case 1: errorMessage += 'Network error - check your connection.'; break;
|
| 791 |
-
case 2: errorMessage += 'File format not supported.'; break;
|
| 792 |
-
case 3: errorMessage += 'Audio decoding failed.'; break;
|
| 793 |
-
case 4: errorMessage += 'Audio source not usable.'; break;
|
| 794 |
-
default: errorMessage += 'Unknown error occurred.';
|
| 795 |
-
}}
|
| 796 |
-
}} else {{
|
| 797 |
-
errorMessage += 'Please check the file format and try again.';
|
| 798 |
-
}}
|
| 799 |
-
|
| 800 |
-
errorDiv.innerHTML = errorMessage;
|
| 801 |
-
document.getElementById('audio-container-' + playerId).appendChild(errorDiv);
|
| 802 |
-
}});
|
| 803 |
-
|
| 804 |
-
// Timeout fallback - if no canplay event after 30 seconds
|
| 805 |
-
setTimeout(() => {{
|
| 806 |
-
if (player.readyState === 0) {{
|
| 807 |
-
console.warn('⚠️ Audio loading timeout');
|
| 808 |
-
const container = document.getElementById('audio-container-' + playerId);
|
| 809 |
-
const timeoutDiv = document.createElement('div');
|
| 810 |
-
timeoutDiv.style.cssText = 'padding: 8px; background: #fff3e0; color: #f57c00; border-radius: 4px; margin-top: 5px;';
|
| 811 |
-
timeoutDiv.innerHTML = '⚠️ Audio loading is taking longer than expected. Large file or slow connection?';
|
| 812 |
-
container.appendChild(timeoutDiv);
|
| 813 |
-
}}
|
| 814 |
-
}}, 30000);
|
| 815 |
-
|
| 816 |
-
// Handle seek events
|
| 817 |
-
player.addEventListener('seeking', () => isSeeking = true);
|
| 818 |
-
player.addEventListener('seeked', () => {{
|
| 819 |
-
setTimeout(() => isSeeking = false, 100);
|
| 820 |
-
}});
|
| 821 |
-
|
| 822 |
-
// Keyboard navigation
|
| 823 |
-
document.addEventListener('keydown', (e) => {{
|
| 824 |
-
if (!useVirtualization) return;
|
| 825 |
-
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
|
| 826 |
-
|
| 827 |
-
if (e.key === 'ArrowLeft' && currentPage > 1) {{
|
| 828 |
-
e.preventDefault();
|
| 829 |
-
goToPage(currentPage - 1);
|
| 830 |
-
}} else if (e.key === 'ArrowRight' && currentPage < totalPages) {{
|
| 831 |
-
e.preventDefault();
|
| 832 |
-
goToPage(currentPage + 1);
|
| 833 |
-
}}
|
| 834 |
-
}});
|
| 835 |
-
|
| 836 |
-
// Inline editing functions
|
| 837 |
-
window.startEdit = function(index) {{
|
| 838 |
-
const div = document.querySelector(`[data-index="${{index}}"]`);
|
| 839 |
-
if (!div) return;
|
| 840 |
-
|
| 841 |
-
const textDisplay = div.querySelector('.text-display-' + playerId);
|
| 842 |
-
const editContainer = div.querySelector('.edit-mode-container-' + playerId);
|
| 843 |
-
const textarea = div.querySelector('.edit-textarea-' + playerId);
|
| 844 |
-
|
| 845 |
-
if (!textDisplay || !editContainer || !textarea) return;
|
| 846 |
-
|
| 847 |
-
// Store original text for cancel
|
| 848 |
-
textarea.dataset.originalText = textDisplay.textContent;
|
| 849 |
-
|
| 850 |
-
// Switch to edit mode
|
| 851 |
-
textDisplay.style.display = 'none';
|
| 852 |
-
editContainer.style.display = 'block';
|
| 853 |
-
div.classList.add('edit-mode-' + playerId);
|
| 854 |
-
|
| 855 |
-
// Focus and select all text
|
| 856 |
-
textarea.focus();
|
| 857 |
-
textarea.select();
|
| 858 |
-
}};
|
| 859 |
-
|
| 860 |
-
window.saveEdit = function(index) {{
|
| 861 |
-
const div = document.querySelector(`[data-index="${{index}}"]`);
|
| 862 |
-
if (!div) return;
|
| 863 |
-
|
| 864 |
-
const textDisplay = div.querySelector('.text-display-' + playerId);
|
| 865 |
-
const editContainer = div.querySelector('.edit-mode-container-' + playerId);
|
| 866 |
-
const textarea = div.querySelector('.edit-textarea-' + playerId);
|
| 867 |
-
|
| 868 |
-
if (!textDisplay || !editContainer || !textarea) return;
|
| 869 |
-
|
| 870 |
-
const newText = textarea.value.trim();
|
| 871 |
-
if (!newText) {{
|
| 872 |
-
alert('Text cannot be empty');
|
| 873 |
-
return;
|
| 874 |
-
}}
|
| 875 |
-
|
| 876 |
-
// Update display text
|
| 877 |
-
textDisplay.textContent = newText;
|
| 878 |
-
|
| 879 |
-
// Update utterances data
|
| 880 |
-
utterances[index][2] = newText;
|
| 881 |
-
|
| 882 |
-
// Send update to Streamlit (via session state simulation)
|
| 883 |
-
try {{
|
| 884 |
-
// Create a custom event to notify Streamlit about the change
|
| 885 |
-
const updateEvent = new CustomEvent('utteranceUpdate', {{
|
| 886 |
-
detail: {{
|
| 887 |
-
index: index,
|
| 888 |
-
text: newText,
|
| 889 |
-
playerId: playerId
|
| 890 |
-
}}
|
| 891 |
-
}});
|
| 892 |
-
window.dispatchEvent(updateEvent);
|
| 893 |
-
|
| 894 |
-
// Store in localStorage as backup
|
| 895 |
-
const editKey = 'voxsum_edits_' + playerId;
|
| 896 |
-
let edits = JSON.parse(localStorage.getItem(editKey) || '{{}}');
|
| 897 |
-
edits[index] = newText;
|
| 898 |
-
localStorage.setItem(editKey, JSON.stringify(edits));
|
| 899 |
-
|
| 900 |
-
console.log('💾 Utterance updated:', index, newText);
|
| 901 |
-
}} catch (e) {{
|
| 902 |
-
console.warn('⚠️ Could not save to session state:', e);
|
| 903 |
-
}}
|
| 904 |
-
|
| 905 |
-
// Exit edit mode
|
| 906 |
-
cancelEdit(index, false);
|
| 907 |
-
|
| 908 |
-
// Show success feedback
|
| 909 |
-
showSuccessMessage(div, 'Saved!');
|
| 910 |
-
}};
|
| 911 |
-
|
| 912 |
-
window.cancelEdit = function(index, restoreText = true) {{
|
| 913 |
-
const div = document.querySelector(`[data-index="${{index}}"]`);
|
| 914 |
-
if (!div) return;
|
| 915 |
-
|
| 916 |
-
const textDisplay = div.querySelector('.text-display-' + playerId);
|
| 917 |
-
const editContainer = div.querySelector('.edit-mode-container-' + playerId);
|
| 918 |
-
const textarea = div.querySelector('.edit-textarea-' + playerId);
|
| 919 |
-
|
| 920 |
-
if (!textDisplay || !editContainer || !textarea) return;
|
| 921 |
-
|
| 922 |
-
// Restore original text if cancelling
|
| 923 |
-
if (restoreText && textarea.dataset.originalText) {{
|
| 924 |
-
textarea.value = textarea.dataset.originalText;
|
| 925 |
-
}}
|
| 926 |
-
|
| 927 |
-
// Exit edit mode
|
| 928 |
-
textDisplay.style.display = 'inline';
|
| 929 |
-
editContainer.style.display = 'none';
|
| 930 |
-
div.classList.remove('edit-mode-' + playerId);
|
| 931 |
-
}};
|
| 932 |
-
|
| 933 |
-
// Helper function to show success message
|
| 934 |
-
function showSuccessMessage(div, message) {{
|
| 935 |
-
const successDiv = document.createElement('div');
|
| 936 |
-
successDiv.style.cssText = `
|
| 937 |
-
position: absolute;
|
| 938 |
-
top: -30px;
|
| 939 |
-
right: 10px;
|
| 940 |
-
background: #4caf50;
|
| 941 |
-
color: white;
|
| 942 |
-
padding: 4px 8px;
|
| 943 |
-
border-radius: 4px;
|
| 944 |
-
font-size: 0.8em;
|
| 945 |
-
pointer-events: none;
|
| 946 |
-
z-index: 1000;
|
| 947 |
-
`;
|
| 948 |
-
successDiv.textContent = message;
|
| 949 |
-
|
| 950 |
-
div.style.position = 'relative';
|
| 951 |
-
div.appendChild(successDiv);
|
| 952 |
-
|
| 953 |
-
setTimeout(() => {{
|
| 954 |
-
if (successDiv.parentNode) {{
|
| 955 |
-
successDiv.parentNode.removeChild(successDiv);
|
| 956 |
-
}}
|
| 957 |
-
}}, 2000);
|
| 958 |
-
}}
|
| 959 |
-
|
| 960 |
-
// Load saved edits from localStorage
|
| 961 |
-
const editKey = 'voxsum_edits_' + playerId;
|
| 962 |
-
const savedEdits = JSON.parse(localStorage.getItem(editKey) || '{{}}');
|
| 963 |
-
for (const [index, text] of Object.entries(savedEdits)) {{
|
| 964 |
-
if (utterances[index]) {{
|
| 965 |
-
utterances[index][2] = text;
|
| 966 |
-
}}
|
| 967 |
-
}}
|
| 968 |
-
}})();
|
| 969 |
-
</script>
|
| 970 |
-
</body>
|
| 971 |
-
</html>
|
| 972 |
-
"""
|
| 973 |
-
return html_content
|
| 974 |
-
|
| 975 |
-
def create_export_interface():
|
| 976 |
-
"""Create interface for exporting transcripts and summaries"""
|
| 977 |
-
if not st.session_state.utterances and not st.session_state.summary:
|
| 978 |
-
return
|
| 979 |
-
|
| 980 |
-
st.markdown("### 📥 Export Options")
|
| 981 |
-
|
| 982 |
-
export_tab1, export_tab2 = st.tabs(["📝 Transcript", "📄 Summary"])
|
| 983 |
-
|
| 984 |
-
with export_tab1:
|
| 985 |
-
if st.session_state.utterances:
|
| 986 |
-
# Choose format based on speaker diarization
|
| 987 |
-
if st.session_state.utterances_with_speakers:
|
| 988 |
-
st.markdown("**Speaker diarization detected - Transcript formats available:**")
|
| 989 |
-
format_options = TRANSCRIPT_FORMATS
|
| 990 |
-
else:
|
| 991 |
-
st.markdown("**No speaker diarization - Subtitle formats available:**")
|
| 992 |
-
format_options = SUBTITLE_FORMATS
|
| 993 |
-
|
| 994 |
-
# Format selection
|
| 995 |
-
format_name = st.selectbox(
|
| 996 |
-
"Export format",
|
| 997 |
-
list(format_options.keys()),
|
| 998 |
-
key="transcript_export_format"
|
| 999 |
-
)
|
| 1000 |
-
|
| 1001 |
-
format_info = format_options[format_name]
|
| 1002 |
-
|
| 1003 |
-
# Export button and download
|
| 1004 |
-
if st.button(f"📥 Export as {format_name}", key="export_transcript"):
|
| 1005 |
-
# Prepare data - use available utterances (with or without speakers)
|
| 1006 |
-
if st.session_state.utterances_with_speakers:
|
| 1007 |
-
utterances_data = st.session_state.utterances_with_speakers
|
| 1008 |
-
else:
|
| 1009 |
-
utterances_data = [(start, end, text, 0) for start, end, text in st.session_state.utterances]
|
| 1010 |
-
|
| 1011 |
-
# Generate content
|
| 1012 |
-
try:
|
| 1013 |
-
if format_name in SUBTITLE_FORMATS:
|
| 1014 |
-
# For subtitle formats, use regular utterances
|
| 1015 |
-
regular_utterances = [(start, end, text) for start, end, text, _ in utterances_data]
|
| 1016 |
-
content = format_info["function"](regular_utterances, utterances_data if st.session_state.utterances_with_speakers else None)
|
| 1017 |
-
else:
|
| 1018 |
-
# For transcript formats, pass speaker-aware data
|
| 1019 |
-
content = format_info["function"](
|
| 1020 |
-
[(start, end, text) for start, end, text, _ in utterances_data],
|
| 1021 |
-
utterances_data if st.session_state.utterances_with_speakers else None
|
| 1022 |
-
)
|
| 1023 |
-
|
| 1024 |
-
# Create download button
|
| 1025 |
-
filename = f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}{format_info['extension']}"
|
| 1026 |
-
st.download_button(
|
| 1027 |
-
label=f"💾 Download {filename}",
|
| 1028 |
-
data=content,
|
| 1029 |
-
file_name=filename,
|
| 1030 |
-
mime=format_info["mime_type"]
|
| 1031 |
-
)
|
| 1032 |
-
|
| 1033 |
-
except Exception as e:
|
| 1034 |
-
st.error(f"Export failed: {str(e)}")
|
| 1035 |
-
else:
|
| 1036 |
-
st.info("No transcript available for export")
|
| 1037 |
-
|
| 1038 |
-
with export_tab2:
|
| 1039 |
-
if st.session_state.summary:
|
| 1040 |
-
# Summary export formats
|
| 1041 |
-
format_name = st.selectbox(
|
| 1042 |
-
"Summary format",
|
| 1043 |
-
list(SUMMARY_FORMATS.keys()),
|
| 1044 |
-
key="summary_export_format"
|
| 1045 |
-
)
|
| 1046 |
-
|
| 1047 |
-
format_info = SUMMARY_FORMATS[format_name]
|
| 1048 |
-
|
| 1049 |
-
# Metadata for summary
|
| 1050 |
-
with st.expander("📋 Add metadata (optional)"):
|
| 1051 |
-
metadata = {}
|
| 1052 |
-
metadata["title"] = st.text_input("Title", key="summary_title")
|
| 1053 |
-
metadata["date"] = st.date_input("Date", value=datetime.now().date(), key="summary_date").isoformat()
|
| 1054 |
-
if st.session_state.utterances_with_speakers:
|
| 1055 |
-
num_speakers = len(set(speaker for _, _, _, speaker in st.session_state.utterances_with_speakers))
|
| 1056 |
-
metadata["speakers"] = f"{num_speakers} speakers detected"
|
| 1057 |
-
if st.session_state.audio_path:
|
| 1058 |
-
# Calculate duration if possible
|
| 1059 |
-
try:
|
| 1060 |
-
if st.session_state.utterances:
|
| 1061 |
-
last_utterance = st.session_state.utterances[-1]
|
| 1062 |
-
duration_sec = last_utterance[1] # end time
|
| 1063 |
-
duration_min = int(duration_sec // 60)
|
| 1064 |
-
duration_sec_remainder = int(duration_sec % 60)
|
| 1065 |
-
metadata["duration"] = f"{duration_min}m {duration_sec_remainder}s"
|
| 1066 |
-
except:
|
| 1067 |
-
pass
|
| 1068 |
-
|
| 1069 |
-
# Clean empty metadata
|
| 1070 |
-
metadata = {k: v for k, v in metadata.items() if v}
|
| 1071 |
-
|
| 1072 |
-
# Export button
|
| 1073 |
-
if st.button(f"📥 Export summary as {format_name}", key="export_summary"):
|
| 1074 |
-
try:
|
| 1075 |
-
content = format_info["function"](st.session_state.summary, metadata if metadata else None)
|
| 1076 |
-
|
| 1077 |
-
filename = f"summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}{format_info['extension']}"
|
| 1078 |
-
st.download_button(
|
| 1079 |
-
label=f"💾 Download {filename}",
|
| 1080 |
-
data=content,
|
| 1081 |
-
file_name=filename,
|
| 1082 |
-
mime=format_info["mime_type"]
|
| 1083 |
-
)
|
| 1084 |
-
|
| 1085 |
-
except Exception as e:
|
| 1086 |
-
st.error(f"Export failed: {str(e)}")
|
| 1087 |
-
else:
|
| 1088 |
-
st.info("No summary available for export")
|
| 1089 |
-
|
| 1090 |
-
def render_results_tab(settings):
|
| 1091 |
-
st.subheader("🎤 Transcription & Summary")
|
| 1092 |
-
status_placeholder = st.empty()
|
| 1093 |
-
transcript_display = st.empty()
|
| 1094 |
-
summary_container = st.container()
|
| 1095 |
-
|
| 1096 |
-
# Update pagination settings
|
| 1097 |
-
st.session_state.utterances_per_page = settings.get("utterances_per_page", 100)
|
| 1098 |
-
|
| 1099 |
-
# Handle audio base64 encoding
|
| 1100 |
-
if (st.session_state.audio_path and
|
| 1101 |
-
st.session_state.get("prev_audio_path") != st.session_state.audio_path):
|
| 1102 |
-
st.session_state.audio_base64 = None
|
| 1103 |
-
st.session_state.prev_audio_path = st.session_state.audio_path
|
| 1104 |
-
st.session_state.static_audio_url = None # Reset static URL
|
| 1105 |
-
|
| 1106 |
-
# Transcription Process
|
| 1107 |
-
if st.button("🎙️ Transcribe Audio"):
|
| 1108 |
-
if st.session_state.audio_path:
|
| 1109 |
-
status_placeholder.info("🔊 Transcribing audio... Please wait.")
|
| 1110 |
-
st.session_state.utterances = []
|
| 1111 |
-
st.session_state.transcript = ""
|
| 1112 |
-
st.session_state.transcribing = True
|
| 1113 |
-
|
| 1114 |
-
with transcript_display.container():
|
| 1115 |
-
st.markdown("### 📝 Live Transcript (Streaming)")
|
| 1116 |
-
live_placeholder = st.empty()
|
| 1117 |
-
progress_bar = st.progress(0)
|
| 1118 |
-
utterance_counter = st.empty()
|
| 1119 |
-
|
| 1120 |
-
try:
|
| 1121 |
-
# Determine model name and backend-specific parameters
|
| 1122 |
-
if st.session_state.backend == "moonshine":
|
| 1123 |
-
model_key = model_names[settings["model_name"]]
|
| 1124 |
-
else:
|
| 1125 |
-
model_key = sensevoice_models[settings["model_name"]]
|
| 1126 |
-
|
| 1127 |
-
gen = transcribe_file(
|
| 1128 |
-
st.session_state.audio_path,
|
| 1129 |
-
settings["vad_threshold"],
|
| 1130 |
-
model_key,
|
| 1131 |
-
backend=st.session_state.backend,
|
| 1132 |
-
language=st.session_state.language if st.session_state.backend == "sensevoice" else "auto",
|
| 1133 |
-
textnorm=st.session_state.textnorm if st.session_state.backend == "sensevoice" else "withitn"
|
| 1134 |
-
)
|
| 1135 |
-
|
| 1136 |
-
# Estimate total duration for progress
|
| 1137 |
-
try:
|
| 1138 |
-
import soundfile as sf
|
| 1139 |
-
audio_info = sf.info(st.session_state.audio_path)
|
| 1140 |
-
total_duration = audio_info.duration
|
| 1141 |
-
except:
|
| 1142 |
-
total_duration = None
|
| 1143 |
-
|
| 1144 |
-
utterance_count = 0
|
| 1145 |
-
for current_utterance, all_utts in gen:
|
| 1146 |
-
st.session_state.utterances = list(all_utts) if all_utts else []
|
| 1147 |
-
utterance_count = len(st.session_state.utterances)
|
| 1148 |
-
|
| 1149 |
-
# Update progress if we have duration info
|
| 1150 |
-
if total_duration and current_utterance:
|
| 1151 |
-
progress = min(1.0, current_utterance[1] / total_duration)
|
| 1152 |
-
progress_bar.progress(progress)
|
| 1153 |
-
|
| 1154 |
-
# Efficient transcript display for streaming
|
| 1155 |
-
if utterance_count <= 200:
|
| 1156 |
-
# For smaller transcripts, show full text
|
| 1157 |
-
st.session_state.transcript = "\n".join(
|
| 1158 |
-
text for start, end, text in st.session_state.utterances
|
| 1159 |
-
)
|
| 1160 |
-
live_placeholder.markdown(st.session_state.transcript)
|
| 1161 |
-
else:
|
| 1162 |
-
# For large transcripts, show last few utterances only
|
| 1163 |
-
recent_utterances = st.session_state.utterances[-10:]
|
| 1164 |
-
recent_text = "\n".join(
|
| 1165 |
-
f"[{int(start//60)}:{int(start%60):02d}] {text}"
|
| 1166 |
-
for start, end, text in recent_utterances
|
| 1167 |
-
)
|
| 1168 |
-
live_placeholder.markdown(f"**Recent utterances (last 10):**\n{recent_text}")
|
| 1169 |
-
|
| 1170 |
-
utterance_counter.info(f"📊 {utterance_count} utterances processed")
|
| 1171 |
-
|
| 1172 |
-
st.session_state.transcribing = False
|
| 1173 |
-
progress_bar.progress(1.0)
|
| 1174 |
-
status_placeholder.success(f"✅ Transcription completed! {utterance_count} utterances generated.")
|
| 1175 |
-
|
| 1176 |
-
# Perform speaker diarization if enabled
|
| 1177 |
-
print(f"🔍 DEBUG Diarization Check: enable_diarization={st.session_state.enable_diarization}, utterances_count={len(st.session_state.utterances)}")
|
| 1178 |
-
if st.session_state.enable_diarization and st.session_state.utterances:
|
| 1179 |
-
print("✅ DEBUG: Starting diarization process...")
|
| 1180 |
-
status_placeholder.info("🎭 Performing speaker diarization... This may take a few minutes.")
|
| 1181 |
-
diarization_progress = st.progress(0)
|
| 1182 |
-
|
| 1183 |
-
try:
|
| 1184 |
-
# Initialize embedding extractor (lighter than full diarization system)
|
| 1185 |
-
print("🔍 DEBUG: Initializing embedding extractor...")
|
| 1186 |
-
extractor_result = init_speaker_embedding_extractor(
|
| 1187 |
-
cluster_threshold=st.session_state.cluster_threshold,
|
| 1188 |
-
num_speakers=st.session_state.num_speakers
|
| 1189 |
-
)
|
| 1190 |
-
|
| 1191 |
-
if extractor_result:
|
| 1192 |
-
print("✅ DEBUG: Embedding extractor initialized successfully")
|
| 1193 |
-
embedding_extractor, config_dict = extractor_result
|
| 1194 |
-
|
| 1195 |
-
# Load audio for diarization (needs to be 16kHz)
|
| 1196 |
-
import soundfile as sf
|
| 1197 |
-
import scipy.signal
|
| 1198 |
-
|
| 1199 |
-
audio, sample_rate = sf.read(st.session_state.audio_path, dtype='float32')
|
| 1200 |
-
|
| 1201 |
-
# Resample to 16kHz if needed (reusing existing resampling logic)
|
| 1202 |
-
if sample_rate != 16000:
|
| 1203 |
-
audio = scipy.signal.resample(audio, int(len(audio) * 16000 / sample_rate))
|
| 1204 |
-
sample_rate = 16000
|
| 1205 |
-
|
| 1206 |
-
# Ensure mono
|
| 1207 |
-
if len(audio.shape) > 1:
|
| 1208 |
-
audio = audio.mean(axis=1)
|
| 1209 |
-
|
| 1210 |
-
# Progress callback for diarization
|
| 1211 |
-
def diarization_progress_callback(progress):
|
| 1212 |
-
diarization_progress.progress(min(1.0, progress))
|
| 1213 |
-
|
| 1214 |
-
# Perform diarization using existing ASR utterance segments
|
| 1215 |
-
print(f"🔍 DEBUG: Starting diarization with {len(st.session_state.utterances)} utterances")
|
| 1216 |
-
diarization_result = perform_speaker_diarization_on_utterances(
|
| 1217 |
-
audio, sample_rate, st.session_state.utterances,
|
| 1218 |
-
embedding_extractor, config_dict, diarization_progress_callback
|
| 1219 |
-
)
|
| 1220 |
-
print(f"🔍 DEBUG: Diarization returned {len(diarization_result) if diarization_result else 0} results")
|
| 1221 |
-
|
| 1222 |
-
if diarization_result:
|
| 1223 |
-
print("✅ DEBUG: Merging transcription with diarization...")
|
| 1224 |
-
# Merge transcription with diarization
|
| 1225 |
-
merged_utterances = merge_transcription_with_diarization(
|
| 1226 |
-
st.session_state.utterances, diarization_result
|
| 1227 |
-
)
|
| 1228 |
-
|
| 1229 |
-
# Merge consecutive utterances from the same speaker
|
| 1230 |
-
st.session_state.utterances_with_speakers = merge_consecutive_utterances(
|
| 1231 |
-
merged_utterances, max_gap=1.0
|
| 1232 |
-
)
|
| 1233 |
-
print(f"✅ DEBUG: Merged result has {len(st.session_state.utterances_with_speakers)} utterances with speakers")
|
| 1234 |
-
|
| 1235 |
-
# Calculate statistics
|
| 1236 |
-
st.session_state.diarization_stats = get_diarization_stats(
|
| 1237 |
-
st.session_state.utterances_with_speakers
|
| 1238 |
-
)
|
| 1239 |
-
|
| 1240 |
-
diarization_progress.progress(1.0)
|
| 1241 |
-
num_speakers = st.session_state.diarization_stats.get("total_speakers", 0)
|
| 1242 |
-
status_placeholder.success(f"✅ Speaker diarization completed! {num_speakers} speakers detected.")
|
| 1243 |
-
else:
|
| 1244 |
-
print("❌ DEBUG: Diarization returned empty result")
|
| 1245 |
-
status_placeholder.error("❌ Speaker diarization failed.")
|
| 1246 |
-
st.session_state.utterances_with_speakers = []
|
| 1247 |
-
else:
|
| 1248 |
-
print("❌ DEBUG: Failed to initialize embedding extractor")
|
| 1249 |
-
status_placeholder.error("❌ Failed to initialize speaker diarization.")
|
| 1250 |
-
st.session_state.utterances_with_speakers = []
|
| 1251 |
-
|
| 1252 |
-
except Exception as e:
|
| 1253 |
-
print(f"❌ DEBUG: Exception in diarization: {str(e)}")
|
| 1254 |
-
status_placeholder.error(f"❌ Speaker diarization error: {str(e)}")
|
| 1255 |
-
st.session_state.utterances_with_speakers = []
|
| 1256 |
-
else:
|
| 1257 |
-
# No diarization requested - clear previous results
|
| 1258 |
-
print(f"❌ DEBUG: Diarization not executed - enable_diarization={st.session_state.enable_diarization}, has_utterances={bool(st.session_state.utterances)}")
|
| 1259 |
-
st.session_state.utterances_with_speakers = []
|
| 1260 |
-
st.session_state.diarization_stats = {}
|
| 1261 |
-
|
| 1262 |
-
st.rerun()
|
| 1263 |
-
except Exception as e:
|
| 1264 |
-
status_placeholder.error(f"Transcription error: {str(e)}")
|
| 1265 |
-
st.session_state.transcribing = False
|
| 1266 |
-
else:
|
| 1267 |
-
status_placeholder.warning("⚠️ No audio file available")
|
| 1268 |
-
|
| 1269 |
-
# Summarization Process
|
| 1270 |
-
if st.button("📝 Generate Summary"):
|
| 1271 |
-
if st.session_state.transcript:
|
| 1272 |
-
status_placeholder.info("🧠 Generating summary...")
|
| 1273 |
-
st.session_state.summary = ""
|
| 1274 |
-
summary_container.empty()
|
| 1275 |
-
|
| 1276 |
-
# Show transcript during summarization
|
| 1277 |
-
with transcript_display.container():
|
| 1278 |
-
if st.session_state.audio_path and st.session_state.utterances:
|
| 1279 |
-
# Use efficient player for summarization view with speaker colors if available
|
| 1280 |
-
utterances_display = st.session_state.utterances_with_speakers if st.session_state.utterances_with_speakers else None
|
| 1281 |
-
html = create_efficient_sync_player(
|
| 1282 |
-
st.session_state.audio_path,
|
| 1283 |
-
st.session_state.utterances,
|
| 1284 |
-
utterances_display
|
| 1285 |
-
)
|
| 1286 |
-
# Dynamic height calculation with better scaling - increased for more visibility
|
| 1287 |
-
base_height = 300
|
| 1288 |
-
content_height = min(800, max(base_height, len(st.session_state.utterances) * 15 + 200))
|
| 1289 |
-
st.components.v1.html(html, height=content_height, scrolling=True)
|
| 1290 |
-
elif st.session_state.utterances:
|
| 1291 |
-
st.markdown("### 📝 Transcript")
|
| 1292 |
-
# For very long transcripts, show summary info
|
| 1293 |
-
if len(st.session_state.utterances) > 500:
|
| 1294 |
-
st.info(f"📊 Large transcript: {len(st.session_state.utterances)} utterances")
|
| 1295 |
-
with st.expander("View full transcript"):
|
| 1296 |
-
st.markdown(st.session_state.transcript)
|
| 1297 |
-
else:
|
| 1298 |
-
st.markdown(st.session_state.transcript)
|
| 1299 |
-
else:
|
| 1300 |
-
st.info("No transcript available.")
|
| 1301 |
-
|
| 1302 |
-
# Live summary display
|
| 1303 |
-
live_summary_area = st.empty()
|
| 1304 |
-
with live_summary_area.container():
|
| 1305 |
-
st.markdown("### 📝 Live Summary (In Progress)")
|
| 1306 |
-
progress_placeholder = st.empty()
|
| 1307 |
-
|
| 1308 |
-
summary_gen = summarize_transcript(
|
| 1309 |
-
st.session_state.transcript,
|
| 1310 |
-
settings["llm_model"],
|
| 1311 |
-
settings["prompt_input"]
|
| 1312 |
-
)
|
| 1313 |
-
|
| 1314 |
-
for accumulated_summary in summary_gen:
|
| 1315 |
-
st.session_state.summary = accumulated_summary
|
| 1316 |
-
progress_placeholder.markdown(accumulated_summary)
|
| 1317 |
-
|
| 1318 |
-
live_summary_area.empty()
|
| 1319 |
-
st.rerun()
|
| 1320 |
-
else:
|
| 1321 |
-
status_placeholder.warning("⚠️ No transcript available")
|
| 1322 |
-
|
| 1323 |
-
# Display final results
|
| 1324 |
-
if st.session_state.audio_path and st.session_state.utterances and not st.session_state.transcribing:
|
| 1325 |
-
# Show speaker diarization statistics if available
|
| 1326 |
-
if st.session_state.diarization_stats and st.session_state.diarization_stats.get("total_speakers", 0) > 0:
|
| 1327 |
-
st.markdown("### 🎭 Speaker Analysis")
|
| 1328 |
-
stats = st.session_state.diarization_stats
|
| 1329 |
-
|
| 1330 |
-
col1, col2 = st.columns([2, 1])
|
| 1331 |
-
with col1:
|
| 1332 |
-
# Speaker breakdown
|
| 1333 |
-
speaker_data = []
|
| 1334 |
-
for speaker_id, speaker_stats in stats["speakers"].items():
|
| 1335 |
-
speaker_data.append({
|
| 1336 |
-
"Speaker": f"Speaker {speaker_id + 1}",
|
| 1337 |
-
"Speaking Time": f"{speaker_stats['speaking_time']:.1f}s",
|
| 1338 |
-
"Percentage": f"{speaker_stats['percentage']:.1f}%",
|
| 1339 |
-
"Utterances": speaker_stats['utterances'],
|
| 1340 |
-
"Avg Length": f"{speaker_stats['avg_utterance_length']:.1f}s"
|
| 1341 |
-
})
|
| 1342 |
-
|
| 1343 |
-
import pandas as pd
|
| 1344 |
-
df = pd.DataFrame(speaker_data)
|
| 1345 |
-
st.dataframe(df, use_container_width=True)
|
| 1346 |
-
|
| 1347 |
-
with col2:
|
| 1348 |
-
st.metric("Total Speakers", stats["total_speakers"])
|
| 1349 |
-
st.metric("Total Duration", f"{stats['total_duration']:.1f}s")
|
| 1350 |
-
|
| 1351 |
-
# Performance optimization: show stats for large transcripts
|
| 1352 |
-
if len(st.session_state.utterances) > 100:
|
| 1353 |
-
col1, col2, col3 = st.columns(3)
|
| 1354 |
-
with col1:
|
| 1355 |
-
st.metric("📊 Utterances", len(st.session_state.utterances))
|
| 1356 |
-
with col2:
|
| 1357 |
-
duration = st.session_state.utterances[-1][1] if st.session_state.utterances else 0
|
| 1358 |
-
st.metric("⏱️ Duration", f"{duration/60:.1f} min")
|
| 1359 |
-
with col3:
|
| 1360 |
-
avg_length = sum(len(text) for _, _, text in st.session_state.utterances) / len(st.session_state.utterances)
|
| 1361 |
-
st.metric("📝 Avg Length", f"{avg_length:.0f} chars")
|
| 1362 |
-
|
| 1363 |
-
# Use efficient player for final results with speaker colors if available
|
| 1364 |
-
utterances_display = st.session_state.utterances_with_speakers if st.session_state.utterances_with_speakers else None
|
| 1365 |
-
|
| 1366 |
-
# DEBUG: Print information about diarization
|
| 1367 |
-
if utterances_display:
|
| 1368 |
-
print(f"🎭 DEBUG: Using diarized utterances - {len(utterances_display)} segments with speakers")
|
| 1369 |
-
for i, (start, end, text, speaker) in enumerate(utterances_display[:3]): # Show first 3
|
| 1370 |
-
print(f" Sample {i+1}: [{start:.1f}-{end:.1f}s] Speaker {speaker}: '{text[:30]}...'")
|
| 1371 |
-
else:
|
| 1372 |
-
print(f"📝 DEBUG: Using regular utterances - {len(st.session_state.utterances)} segments without speakers")
|
| 1373 |
-
|
| 1374 |
-
html = create_efficient_sync_player(
|
| 1375 |
-
st.session_state.audio_path,
|
| 1376 |
-
st.session_state.utterances,
|
| 1377 |
-
utterances_display
|
| 1378 |
-
)
|
| 1379 |
-
# Improved height calculation for better UX - increased for more transcript visibility
|
| 1380 |
-
base_height = 350
|
| 1381 |
-
content_height = min(900, max(base_height, len(st.session_state.utterances) * 12 + 250))
|
| 1382 |
-
|
| 1383 |
-
with transcript_display.container():
|
| 1384 |
-
st.components.v1.html(html, height=content_height, scrolling=True)
|
| 1385 |
-
|
| 1386 |
-
# Show formatted transcript with speakers if diarization was performed
|
| 1387 |
-
if st.session_state.utterances_with_speakers:
|
| 1388 |
-
with st.expander("📄 Speaker-Labeled Transcript", expanded=False):
|
| 1389 |
-
formatted_transcript = format_speaker_transcript(st.session_state.utterances_with_speakers)
|
| 1390 |
-
st.markdown(formatted_transcript)
|
| 1391 |
-
|
| 1392 |
-
# Add export interface (editing is now inline)
|
| 1393 |
-
st.markdown("---")
|
| 1394 |
-
create_export_interface()
|
| 1395 |
-
|
| 1396 |
-
elif not st.session_state.utterances and not st.session_state.transcribing:
|
| 1397 |
-
with transcript_display.container():
|
| 1398 |
-
st.info("No transcript available. Click 'Transcribe Audio' to generate one.")
|
| 1399 |
-
|
| 1400 |
-
if st.session_state.summary:
|
| 1401 |
-
with summary_container:
|
| 1402 |
-
st.markdown("### 📝 Final Summary")
|
| 1403 |
-
st.markdown(st.session_state.summary)
|
| 1404 |
-
|
| 1405 |
-
# === 3. Main App ===
|
| 1406 |
-
def main():
|
| 1407 |
-
init_session_state()
|
| 1408 |
-
|
| 1409 |
-
# Optimized page config for HF Spaces and large files
|
| 1410 |
-
st.set_page_config(
|
| 1411 |
-
page_title="🎙️ ASR + LLM",
|
| 1412 |
-
layout="wide",
|
| 1413 |
-
initial_sidebar_state="expanded",
|
| 1414 |
-
menu_items={
|
| 1415 |
-
'Get Help': 'https://github.com/your-repo/issues',
|
| 1416 |
-
'Report a bug': 'https://github.com/your-repo/issues',
|
| 1417 |
-
'About': "VoxSum Studio - Optimized for large audio files"
|
| 1418 |
-
}
|
| 1419 |
-
)
|
| 1420 |
-
|
| 1421 |
-
# HF Spaces specific optimizations
|
| 1422 |
-
if os.environ.get('SPACE_ID'):
|
| 1423 |
-
st.markdown("""
|
| 1424 |
-
<div style='background: linear-gradient(90deg, #1f77b4, #ff7f0e); padding: 8px; border-radius: 6px; margin-bottom: 15px;'>
|
| 1425 |
-
<p style='color: white; margin: 0; text-align: center; font-weight: 500;'>
|
| 1426 |
-
🚀 Running on Hugging Face Spaces - Optimized for large audio files
|
| 1427 |
-
</p>
|
| 1428 |
-
</div>
|
| 1429 |
-
""", unsafe_allow_html=True)
|
| 1430 |
-
|
| 1431 |
-
st.title("🎙️ Speech Summarization with Moonshine & SenseVoice ASR")
|
| 1432 |
-
|
| 1433 |
-
settings = render_settings_sidebar()
|
| 1434 |
-
tab1, tab2, tab3 = st.tabs(["📻 Podcast", "🎵 Audio Input", "📄 Results"])
|
| 1435 |
-
|
| 1436 |
-
with tab1:
|
| 1437 |
-
render_podcast_tab()
|
| 1438 |
-
with tab2:
|
| 1439 |
-
render_audio_tab()
|
| 1440 |
-
with tab3:
|
| 1441 |
-
render_results_tab(settings)
|
| 1442 |
-
|
| 1443 |
-
if __name__ == "__main__":
|
| 1444 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|