Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,923 +1,923 @@
|
|
| 1 |
-
# Specific conda environment : Kabyle_streamlit
|
| 2 |
-
|
| 3 |
-
# Streamlit TTS Reader - Taqbaylit TTS Sɣer Adlis
|
| 4 |
-
# Bouaziz Ait Driss October 2025
|
| 5 |
-
|
| 6 |
-
import streamlit as st
|
| 7 |
-
import fitz # PyMuPDF
|
| 8 |
-
import re
|
| 9 |
-
import numpy as np
|
| 10 |
-
import torch
|
| 11 |
-
from scipy.io.wavfile import write as wav_write
|
| 12 |
-
import tempfile
|
| 13 |
-
import os
|
| 14 |
-
import base64
|
| 15 |
-
import json
|
| 16 |
-
import io
|
| 17 |
-
import threading
|
| 18 |
-
import queue
|
| 19 |
-
import time
|
| 20 |
-
import pdfplumber
|
| 21 |
-
|
| 22 |
-
# Configure Streamlit page
|
| 23 |
-
st.set_page_config(
|
| 24 |
-
page_title="Kabyle TTS Document Reader",
|
| 25 |
-
page_icon="🎙️",
|
| 26 |
-
layout="wide",
|
| 27 |
-
initial_sidebar_state="expanded",
|
| 28 |
-
)
|
| 29 |
-
|
| 30 |
-
# Try to import transformers
|
| 31 |
-
try:
|
| 32 |
-
from transformers import VitsModel, AutoTokenizer
|
| 33 |
-
TRANSFORMERS_AVAILABLE = True
|
| 34 |
-
except ImportError:
|
| 35 |
-
try:
|
| 36 |
-
from transformers import AutoModel, AutoTokenizer
|
| 37 |
-
TRANSFORMERS_AVAILABLE = True
|
| 38 |
-
VitsModel = AutoModel
|
| 39 |
-
except ImportError:
|
| 40 |
-
TRANSFORMERS_AVAILABLE = False
|
| 41 |
-
|
| 42 |
-
# --- Model Loading ---
|
| 43 |
-
@st.cache_resource
|
| 44 |
-
def load_model():
|
| 45 |
-
if not TRANSFORMERS_AVAILABLE:
|
| 46 |
-
return None, None, "cpu"
|
| 47 |
-
try:
|
| 48 |
-
model = VitsModel.from_pretrained("facebook/mms-tts-kab")
|
| 49 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kab")
|
| 50 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 51 |
-
model = model.to(device)
|
| 52 |
-
return model, tokenizer, device
|
| 53 |
-
except Exception as e:
|
| 54 |
-
st.error(f"Error loading model: {e}")
|
| 55 |
-
return None, None, "cpu"
|
| 56 |
-
|
| 57 |
-
model, tokenizer, device = load_model()
|
| 58 |
-
|
| 59 |
-
# --- Text Processing ---
|
| 60 |
-
def clean_text(text):
|
| 61 |
-
if not text:
|
| 62 |
-
return text
|
| 63 |
-
text = re.sub(r' +', ' ', text)
|
| 64 |
-
text = re.sub(r'\n\s*\n', '\n\n', text)
|
| 65 |
-
return text.strip()
|
| 66 |
-
|
| 67 |
-
def smart_split_paragraphs(text, initial_paragraphs=5, initial_word_target=50, normal_word_target=200):
|
| 68 |
-
"""
|
| 69 |
-
Smart paragraph splitting strategy:
|
| 70 |
-
- First 'initial_paragraphs' paragraphs: ~initial_word_target words each (for quick startup)
|
| 71 |
-
- Remaining paragraphs: ~normal_word_target words each (for better timing)
|
| 72 |
-
"""
|
| 73 |
-
if not text:
|
| 74 |
-
return []
|
| 75 |
-
|
| 76 |
-
# First, split by major paragraph breaks
|
| 77 |
-
raw_paragraphs = re.split(r'\n\s*\n', text)
|
| 78 |
-
raw_paragraphs = [p.strip() for p in raw_paragraphs if p.strip()]
|
| 79 |
-
|
| 80 |
-
if not raw_paragraphs:
|
| 81 |
-
return []
|
| 82 |
-
|
| 83 |
-
final_paragraphs = []
|
| 84 |
-
current_sentences = []
|
| 85 |
-
|
| 86 |
-
# Split each raw paragraph into sentences
|
| 87 |
-
all_sentences = []
|
| 88 |
-
for paragraph in raw_paragraphs:
|
| 89 |
-
# Split on sentence endings: . ! ? : ; followed by space
|
| 90 |
-
sentences = re.split(r'(?<=[.!?:;])\s+', paragraph)
|
| 91 |
-
sentences = [s.strip() for s in sentences if s.strip()]
|
| 92 |
-
all_sentences.extend(sentences)
|
| 93 |
-
|
| 94 |
-
if not all_sentences:
|
| 95 |
-
return []
|
| 96 |
-
|
| 97 |
-
# Build initial quick-start paragraphs (shorter)
|
| 98 |
-
word_count = 0
|
| 99 |
-
for sentence in all_sentences:
|
| 100 |
-
sentence_word_count = len(sentence.split())
|
| 101 |
-
|
| 102 |
-
# For first few paragraphs, use smaller target
|
| 103 |
-
if len(final_paragraphs) < initial_paragraphs:
|
| 104 |
-
target_word_count = initial_word_target
|
| 105 |
-
else:
|
| 106 |
-
target_word_count = normal_word_target
|
| 107 |
-
|
| 108 |
-
# If adding this sentence would exceed target and we have some content, start new paragraph
|
| 109 |
-
if current_sentences and word_count + sentence_word_count > target_word_count:
|
| 110 |
-
# Join current sentences to form a paragraph
|
| 111 |
-
paragraph_text = ' '.join(current_sentences)
|
| 112 |
-
final_paragraphs.append(paragraph_text)
|
| 113 |
-
current_sentences = [sentence]
|
| 114 |
-
word_count = sentence_word_count
|
| 115 |
-
else:
|
| 116 |
-
current_sentences.append(sentence)
|
| 117 |
-
word_count += sentence_word_count
|
| 118 |
-
|
| 119 |
-
# Add the last paragraph if we have remaining sentences
|
| 120 |
-
if current_sentences:
|
| 121 |
-
paragraph_text = ' '.join(current_sentences)
|
| 122 |
-
final_paragraphs.append(paragraph_text)
|
| 123 |
-
|
| 124 |
-
return final_paragraphs
|
| 125 |
-
|
| 126 |
-
def split_paragraph_into_phrases(paragraph, max_phrase_length=150):
|
| 127 |
-
if not paragraph:
|
| 128 |
-
return []
|
| 129 |
-
|
| 130 |
-
phrases = []
|
| 131 |
-
current_phrase = ""
|
| 132 |
-
sentences = re.split(r'(?<=[.!?])\s+', paragraph)
|
| 133 |
-
|
| 134 |
-
for sentence in sentences:
|
| 135 |
-
if len(current_phrase) + len(sentence) > max_phrase_length and current_phrase:
|
| 136 |
-
phrases.append(current_phrase.strip())
|
| 137 |
-
current_phrase = sentence
|
| 138 |
-
else:
|
| 139 |
-
current_phrase = current_phrase + " " + sentence if current_phrase else sentence
|
| 140 |
-
|
| 141 |
-
if current_phrase.strip():
|
| 142 |
-
phrases.append(current_phrase.strip())
|
| 143 |
-
|
| 144 |
-
return phrases
|
| 145 |
-
|
| 146 |
-
def generate_audio_with_precise_timing(paragraph):
|
| 147 |
-
"""Generate audio with precise phrase-level timing using per-phrase generation"""
|
| 148 |
-
|
| 149 |
-
if model is None or tokenizer is None:
|
| 150 |
-
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 151 |
-
|
| 152 |
-
try:
|
| 153 |
-
phrases = split_paragraph_into_phrases(paragraph)
|
| 154 |
-
if not phrases:
|
| 155 |
-
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 156 |
-
|
| 157 |
-
all_audio = []
|
| 158 |
-
phrase_durations = []
|
| 159 |
-
sampling_rate = 16000
|
| 160 |
-
|
| 161 |
-
# Generate audio for each phrase separately to get exact timing
|
| 162 |
-
for i, phrase in enumerate(phrases):
|
| 163 |
-
if not phrase.strip():
|
| 164 |
-
continue
|
| 165 |
-
|
| 166 |
-
# Generate audio for this specific phrase
|
| 167 |
-
inputs = tokenizer(phrase, return_tensors="pt").to(device)
|
| 168 |
-
with torch.no_grad():
|
| 169 |
-
output = model(**inputs)
|
| 170 |
-
|
| 171 |
-
# Extract audio data
|
| 172 |
-
if hasattr(output, 'waveform'):
|
| 173 |
-
audio_chunk = output.waveform.cpu().numpy().squeeze()
|
| 174 |
-
else:
|
| 175 |
-
audio_chunk = output[0].cpu().numpy().squeeze() if len(output) > 0 else None
|
| 176 |
-
|
| 177 |
-
if audio_chunk is not None:
|
| 178 |
-
# Calculate exact duration for this phrase
|
| 179 |
-
phrase_duration = len(audio_chunk) / sampling_rate
|
| 180 |
-
phrase_durations.append(phrase_duration)
|
| 181 |
-
|
| 182 |
-
# Normalize and store
|
| 183 |
-
max_val = np.max(np.abs(audio_chunk))
|
| 184 |
-
if max_val > 0:
|
| 185 |
-
audio_chunk = audio_chunk / max_val * 0.8
|
| 186 |
-
all_audio.append(audio_chunk)
|
| 187 |
-
else:
|
| 188 |
-
# Fallback: estimate duration if audio generation failed
|
| 189 |
-
word_count = len(phrase.split())
|
| 190 |
-
estimated_duration = word_count * 0.4
|
| 191 |
-
phrase_durations.append(estimated_duration)
|
| 192 |
-
|
| 193 |
-
if all_audio:
|
| 194 |
-
# Concatenate all phrase audio
|
| 195 |
-
full_audio = np.concatenate(all_audio)
|
| 196 |
-
total_duration = len(full_audio) / sampling_rate
|
| 197 |
-
|
| 198 |
-
# Build precise timing info using actual phrase durations
|
| 199 |
-
timing_info = []
|
| 200 |
-
current_time = 0.0
|
| 201 |
-
|
| 202 |
-
for i, (phrase, duration) in enumerate(zip(phrases, phrase_durations)):
|
| 203 |
-
timing_info.append({
|
| 204 |
-
'text': phrase,
|
| 205 |
-
'start': current_time,
|
| 206 |
-
'end': current_time + duration,
|
| 207 |
-
'duration': duration
|
| 208 |
-
})
|
| 209 |
-
current_time += duration
|
| 210 |
-
|
| 211 |
-
return full_audio, sampling_rate, timing_info
|
| 212 |
-
else:
|
| 213 |
-
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 214 |
-
|
| 215 |
-
except Exception as e:
|
| 216 |
-
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 217 |
-
|
| 218 |
-
def create_dummy_audio_for_paragraph(paragraph):
|
| 219 |
-
word_count = len(paragraph.split())
|
| 220 |
-
total_duration = word_count * 0.4
|
| 221 |
-
sampling_rate = 16000
|
| 222 |
-
t = np.linspace(0, total_duration, int(sampling_rate * total_duration))
|
| 223 |
-
audio = 0.3 * np.sin(2 * np.pi * 220 * t)
|
| 224 |
-
return audio
|
| 225 |
-
|
| 226 |
-
# Add a fallback to plumber for reading diacritic letters
|
| 227 |
-
def read_document(file_path):
|
| 228 |
-
text = ""
|
| 229 |
-
try:
|
| 230 |
-
if file_path.lower().endswith('.pdf'):
|
| 231 |
-
# FIRST TRY: PyMuPDF (faster for digital PDFs)
|
| 232 |
-
try:
|
| 233 |
-
pdf_document = fitz.open(file_path)
|
| 234 |
-
for page_num in range(len(pdf_document)):
|
| 235 |
-
page = pdf_document[page_num]
|
| 236 |
-
text += page.get_text() + "\n"
|
| 237 |
-
pdf_document.close()
|
| 238 |
-
|
| 239 |
-
# Check if we got meaningful text with diacritics
|
| 240 |
-
if text.strip() and any(char in text for char in ['é', 'è', 'à', 'ù', 'ï', 'ö', 'α', 'β', 'γ']):
|
| 241 |
-
return text, []
|
| 242 |
-
|
| 243 |
-
except Exception as e:
|
| 244 |
-
text = "" # Reset text if PyMuPDF fails
|
| 245 |
-
|
| 246 |
-
# FALLBACK: pdfplumber (better for scanned/diacritic PDFs)
|
| 247 |
-
try:
|
| 248 |
-
import pdfplumber
|
| 249 |
-
with pdfplumber.open(file_path) as pdf:
|
| 250 |
-
text = ""
|
| 251 |
-
for page in pdf.pages:
|
| 252 |
-
page_text = page.extract_text() or ""
|
| 253 |
-
text += page_text + "\n"
|
| 254 |
-
except ImportError:
|
| 255 |
-
return "pdfplumber not available", []
|
| 256 |
-
except Exception as e:
|
| 257 |
-
return f"Both PDF methods failed: {e}", []
|
| 258 |
-
|
| 259 |
-
elif file_path.lower().endswith('.txt'):
|
| 260 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
| 261 |
-
text = f.read()
|
| 262 |
-
else:
|
| 263 |
-
return "Unsupported file format", []
|
| 264 |
-
|
| 265 |
-
except Exception as e:
|
| 266 |
-
return f"Error reading document: {e}", []
|
| 267 |
-
|
| 268 |
-
return text, []
|
| 269 |
-
|
| 270 |
-
def normalize_audio(audio_data):
|
| 271 |
-
if audio_data.dtype != np.float32:
|
| 272 |
-
audio_data = audio_data.astype(np.float32)
|
| 273 |
-
max_val = np.max(np.abs(audio_data))
|
| 274 |
-
if max_val > 0:
|
| 275 |
-
audio_data = audio_data / max_val * 0.9
|
| 276 |
-
return audio_data
|
| 277 |
-
|
| 278 |
-
def get_audio_bytes(audio_data):
|
| 279 |
-
audio_bytes = io.BytesIO()
|
| 280 |
-
audio_data = normalize_audio(audio_data)
|
| 281 |
-
audio_int16 = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
|
| 282 |
-
wav_write(audio_bytes, 16000, audio_int16)
|
| 283 |
-
audio_bytes.seek(0)
|
| 284 |
-
return audio_bytes.getvalue()
|
| 285 |
-
|
| 286 |
-
# --- Background Audio Generation ---
|
| 287 |
-
def background_audio_worker(paragraphs, audio_queue, start_index=0):
|
| 288 |
-
"""Background worker to generate audio for paragraphs - NO SESSION STATE ACCESS"""
|
| 289 |
-
try:
|
| 290 |
-
for i, paragraph in enumerate(paragraphs):
|
| 291 |
-
paragraph_index = start_index + i
|
| 292 |
-
word_count = len(paragraph.split())
|
| 293 |
-
|
| 294 |
-
# Generate audio with precise timing
|
| 295 |
-
audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(paragraph)
|
| 296 |
-
|
| 297 |
-
# Send to main thread via queue ONLY
|
| 298 |
-
audio_queue.put({
|
| 299 |
-
'paragraph_index': paragraph_index,
|
| 300 |
-
'paragraph_text': paragraph,
|
| 301 |
-
'audio_data': audio_data,
|
| 302 |
-
'timing_info': timing_info,
|
| 303 |
-
'audio_duration': len(audio_data) / sampling_rate,
|
| 304 |
-
'audio_bytes': get_audio_bytes(audio_data),
|
| 305 |
-
'word_count': word_count
|
| 306 |
-
})
|
| 307 |
-
|
| 308 |
-
except Exception as e:
|
| 309 |
-
pass
|
| 310 |
-
|
| 311 |
-
# --- Queue Processing ---
|
| 312 |
-
def process_audio_queue(audio_queue):
|
| 313 |
-
"""Process all available items in the audio queue - called from main thread only"""
|
| 314 |
-
processed_count = 0
|
| 315 |
-
try:
|
| 316 |
-
while True:
|
| 317 |
-
# Non-blocking check
|
| 318 |
-
audio_data = audio_queue.get_nowait()
|
| 319 |
-
|
| 320 |
-
# SAFE: Main thread updating session state
|
| 321 |
-
st.session_state.paragraphs_data[audio_data['paragraph_index']] = audio_data
|
| 322 |
-
processed_count += 1
|
| 323 |
-
|
| 324 |
-
except queue.Empty:
|
| 325 |
-
pass
|
| 326 |
-
|
| 327 |
-
return processed_count
|
| 328 |
-
|
| 329 |
-
# Initialize session state
|
| 330 |
-
if 'processed' not in st.session_state:
|
| 331 |
-
st.session_state.processed = False
|
| 332 |
-
if 'current_paragraph_index' not in st.session_state:
|
| 333 |
-
st.session_state.current_paragraph_index = 0
|
| 334 |
-
if 'total_paragraphs' not in st.session_state:
|
| 335 |
-
st.session_state.total_paragraphs = 0
|
| 336 |
-
if 'paragraphs_data' not in st.session_state:
|
| 337 |
-
st.session_state.paragraphs_data = {}
|
| 338 |
-
if 'audio_ready' not in st.session_state:
|
| 339 |
-
st.session_state.audio_ready = False
|
| 340 |
-
if 'background_worker_started' not in st.session_state:
|
| 341 |
-
st.session_state.background_worker_started = False
|
| 342 |
-
if 'reading_status' not in st.session_state:
|
| 343 |
-
st.session_state.reading_status = "Ready to start reading"
|
| 344 |
-
if 'current_document_id' not in st.session_state:
|
| 345 |
-
st.session_state.current_document_id = None
|
| 346 |
-
if 'audio_queue' not in st.session_state:
|
| 347 |
-
st.session_state.audio_queue = queue.Queue()
|
| 348 |
-
if 'playback_speed' not in st.session_state:
|
| 349 |
-
st.session_state.playback_speed = 1.0 # Default normal speed
|
| 350 |
-
if 'full_document_text' not in st.session_state:
|
| 351 |
-
st.session_state.full_document_text = ""
|
| 352 |
-
if 'smart_splitting_expanded' not in st.session_state:
|
| 353 |
-
st.session_state.smart_splitting_expanded = False
|
| 354 |
-
|
| 355 |
-
# --- Streamlit UI ---
|
| 356 |
-
def main():
|
| 357 |
-
# Custom CSS for better styling - FIXED FONT HARMONIZATION
|
| 358 |
-
st.markdown("""
|
| 359 |
-
<style>
|
| 360 |
-
.main-title {
|
| 361 |
-
font-size: 1.5rem !important;
|
| 362 |
-
font-weight: bold !important;
|
| 363 |
-
margin-bottom: -1rem !important;
|
| 364 |
-
margin-top: -1rem !important; /* MAXIMUM REDUCED top margin */
|
| 365 |
-
color: #1f77b4;
|
| 366 |
-
}
|
| 367 |
-
.section-title {
|
| 368 |
-
font-size: 1.3rem !important;
|
| 369 |
-
font-weight: bold !important;
|
| 370 |
-
margin-bottom: 0.3rem !important;
|
| 371 |
-
margin-top: 0rem !important;
|
| 372 |
-
color: #2e86ab;
|
| 373 |
-
}
|
| 374 |
-
.document-viewer {
|
| 375 |
-
max-height: 70vh;
|
| 376 |
-
overflow-y: auto;
|
| 377 |
-
border: 1px solid #e1e1e1;
|
| 378 |
-
border-radius: 8px;
|
| 379 |
-
padding: 15px;
|
| 380 |
-
background: #fafafa;
|
| 381 |
-
font-family: Arial, sans-serif;
|
| 382 |
-
line-height: 1.6;
|
| 383 |
-
scrollbar-width: thin;
|
| 384 |
-
scrollbar-color: #888 #f1f1f1;
|
| 385 |
-
color: #333333; /* ADD THIS LINE - ensures dark text */
|
| 386 |
-
}
|
| 387 |
-
.document-viewer::-webkit-scrollbar {
|
| 388 |
-
width: 8px;
|
| 389 |
-
}
|
| 390 |
-
.document-viewer::-webkit-scrollbar-track {
|
| 391 |
-
background: #f1f1f1;
|
| 392 |
-
border-radius: 4px;
|
| 393 |
-
}
|
| 394 |
-
.document-viewer::-webkit-scrollbar-thumb {
|
| 395 |
-
background: #888;
|
| 396 |
-
border-radius: 4px;
|
| 397 |
-
}
|
| 398 |
-
.document-viewer::-webkit-scrollbar-thumb:hover {
|
| 399 |
-
background: #555;
|
| 400 |
-
}
|
| 401 |
-
.current-paragraph-highlight {
|
| 402 |
-
background: linear-gradient(120deg, #e3f2fd, #bbdefb);
|
| 403 |
-
padding: 8px 12px;
|
| 404 |
-
margin: 8px 0;
|
| 405 |
-
border-left: 4px solid #2196f3;
|
| 406 |
-
border-radius: 4px;
|
| 407 |
-
box-shadow: 0 2px 4px rgba(33, 150, 243, 0.2);
|
| 408 |
-
}
|
| 409 |
-
.reading-content {
|
| 410 |
-
margin-bottom: 10px;
|
| 411 |
-
}
|
| 412 |
-
.controls-section {
|
| 413 |
-
margin-top: 15px;
|
| 414 |
-
}
|
| 415 |
-
/* Reduce sidebar header spacing */
|
| 416 |
-
.sidebar .sidebar-content {
|
| 417 |
-
padding-top: 1rem;
|
| 418 |
-
}
|
| 419 |
-
/* FIXED: Make expander headers consistent with sidebar titles */
|
| 420 |
-
.streamlit-expanderHeader {
|
| 421 |
-
font-size: 1.1rem !important;
|
| 422 |
-
font-weight: 600 !important;
|
| 423 |
-
color: inherit !important;
|
| 424 |
-
}
|
| 425 |
-
/* FIXED: Ensure consistent styling for smart splitting expander */
|
| 426 |
-
div[data-testid="stExpander"] details summary p {
|
| 427 |
-
font-size: 1.1rem !important;
|
| 428 |
-
font-weight: 600 !important;
|
| 429 |
-
}
|
| 430 |
-
/* Style for the smart splitting section specifically */
|
| 431 |
-
.smart-splitting-header {
|
| 432 |
-
font-size: 1.1rem !important;
|
| 433 |
-
font-weight: 600 !important;
|
| 434 |
-
}
|
| 435 |
-
</style>
|
| 436 |
-
""", unsafe_allow_html=True)
|
| 437 |
-
|
| 438 |
-
# Main title with LARGER font and MAXIMUM REDUCED TOP SPACE
|
| 439 |
-
st.markdown('<div class="main-title">🎙️Taqbaylit TTS Sɣer Adlis</div>', unsafe_allow_html=True)
|
| 440 |
-
|
| 441 |
-
if model is None:
|
| 442 |
-
st.warning("⚠️ Using test audio (TTS model not available)")
|
| 443 |
-
|
| 444 |
-
# Process audio queue on every run
|
| 445 |
-
if st.session_state.get('background_worker_started', False):
|
| 446 |
-
processed_count = process_audio_queue(st.session_state.audio_queue)
|
| 447 |
-
if processed_count > 0:
|
| 448 |
-
st.success(f"📥 Loaded {processed_count} paragraph(s)")
|
| 449 |
-
|
| 450 |
-
# Sidebar with KABYLE TRANSLATIONS
|
| 451 |
-
with st.sidebar:
|
| 452 |
-
# Document Settings with reduced spacing
|
| 453 |
-
st.header("📁 Tawila n Tɣuri", anchor=False)
|
| 454 |
-
uploaded_file = st.file_uploader("Sali-d Aḍris - jbed sers afaylu", type=['pdf', 'txt'],
|
| 455 |
-
help="Ulac OCR ara yerren afaylu PDF n tugna ɣer txt deg teqaylit. Ma ur iwulem ara ɛiwed-as tamuɣli.")
|
| 456 |
-
|
| 457 |
-
# Clear document button - MOVED UP immediately under file uploader
|
| 458 |
-
if st.session_state.get('processed'):
|
| 459 |
-
if st.button("🗑️ Sfeḍ Afaylu-a", type="secondary", use_container_width=True):
|
| 460 |
-
# Reset all document-related session state
|
| 461 |
-
st.session_state.processed = False
|
| 462 |
-
st.session_state.current_paragraph_index = 0
|
| 463 |
-
st.session_state.total_paragraphs = 0
|
| 464 |
-
st.session_state.paragraphs_data = {}
|
| 465 |
-
st.session_state.audio_ready = False
|
| 466 |
-
st.session_state.background_worker_started = False
|
| 467 |
-
st.session_state.reading_status = "Ready to start reading"
|
| 468 |
-
st.session_state.current_document_id = None
|
| 469 |
-
st.session_state.audio_queue = queue.Queue()
|
| 470 |
-
st.session_state.full_document_text = ""
|
| 471 |
-
st.rerun()
|
| 472 |
-
|
| 473 |
-
# Playback Speed Control
|
| 474 |
-
st.markdown("---")
|
| 475 |
-
st.markdown("### 🎵 Arured n Tɣuri")
|
| 476 |
-
|
| 477 |
-
# Define the speed options with labels
|
| 478 |
-
speed_options = [0.5, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5]
|
| 479 |
-
speed_labels = {
|
| 480 |
-
0.5: "0.5x (Ddac ddac ugar)",
|
| 481 |
-
0.8: "0.8x (Ddac ddac)",
|
| 482 |
-
0.9: "0.9x (Ddac ddac cwiṭ)",
|
| 483 |
-
1.0: "1.0x (Amagnu)",
|
| 484 |
-
1.1: "1.1x (Ɣiwel cwiṭ)",
|
| 485 |
-
1.2: "1.2x (Ɣiwel)",
|
| 486 |
-
1.3: "1.3x (Ɣiwel aṭas)",
|
| 487 |
-
1.5: "1.5x (Ɣiwel aṭas ugar)"
|
| 488 |
-
}
|
| 489 |
-
|
| 490 |
-
# Create a select slider for playback speed
|
| 491 |
-
current_speed = st.select_slider(
|
| 492 |
-
"Asenfel n urured n tɣuri",
|
| 493 |
-
options=speed_options,
|
| 494 |
-
value=st.session_state.playback_speed,
|
| 495 |
-
format_func=lambda x: speed_labels[x],
|
| 496 |
-
help="Senfel arured n tɣuri i yal taseddaṛt"
|
| 497 |
-
)
|
| 498 |
-
|
| 499 |
-
# Update session state if speed changed
|
| 500 |
-
if current_speed != st.session_state.playback_speed:
|
| 501 |
-
st.session_state.playback_speed = current_speed
|
| 502 |
-
# st.success(f"🎵 Arured n tɣuri yettwasenfel ɣer {speed_labels[current_speed]}")
|
| 503 |
-
|
| 504 |
-
# Show current speed effect
|
| 505 |
-
speed_effect = {
|
| 506 |
-
0.5: "⏳ Ugar n ukud i tmesliwt",
|
| 507 |
-
0.8: "🐢 Fessus i uḍfaṛ",
|
| 508 |
-
0.9: "📝 S ttawil i usishel n tigzi",
|
| 509 |
-
1.0: "✅ Arured amagnu n tmeslayt",
|
| 510 |
-
1.1: "⚡ Taɣuri s cwiṭ n uɣiwel",
|
| 511 |
-
1.2: "🚀 Taɣuri s uɣiwel",
|
| 512 |
-
1.3: "🎯 Taɣuri s uɣiwel d tmellit",
|
| 513 |
-
1.5: "🔥 Taɣuri s uɣiwel yuzzlen - i yimazzayen"
|
| 514 |
-
}
|
| 515 |
-
st.caption(f"**Asemdu:** {speed_effect[current_speed]}")
|
| 516 |
-
|
| 517 |
-
# Smart splitting configuration - FIXED FONT STYLING
|
| 518 |
-
st.markdown("---")
|
| 519 |
-
|
| 520 |
-
# Collapsible section for Smart Splitting with PROPER FONT HARMONIZATION
|
| 521 |
-
with st.expander("🎯 Agzam Amegzu", expanded=st.session_state.smart_splitting_expanded):
|
| 522 |
-
initial_paragraphs = st.slider("Tiseddaṛin deg tazwara", 3, 10, 5,
|
| 523 |
-
help="Amḍan n tseddaṛin wezzilen deg tazwara n tɣuri")
|
| 524 |
-
initial_word_target = st.slider("Awalen deg tseddaṛt n tazwara", 30, 100, 50,
|
| 525 |
-
help="Amḍan n wawalen deg tseddaṛin n tazwara")
|
| 526 |
-
normal_word_target = st.slider("Awalen deg tseddaṛt tamagnut", 50, 300, 100,
|
| 527 |
-
help="Amḍan n wawalen deg tseddaṛin tineggura")
|
| 528 |
-
|
| 529 |
-
# Main content
|
| 530 |
-
# col_doc, col_reading = st.columns([2, 3]) # 40% document, 60% reading
|
| 531 |
-
col_reading, col_doc = st.columns([3, 2]) # 60% reading, 40% document
|
| 532 |
-
|
| 533 |
-
with col_doc:
|
| 534 |
-
# Kabyle title for document overview with SMALLER font
|
| 535 |
-
st.markdown('<div class="section-title">📄 Tamuɣli Ɣer Uḍris</div>', unsafe_allow_html=True)
|
| 536 |
-
|
| 537 |
-
if st.session_state.get('full_document_text'):
|
| 538 |
-
# Display the full document in a scrollable container
|
| 539 |
-
document_html = "<div class='document-viewer'>"
|
| 540 |
-
paragraphs = st.session_state.get('paragraphs_list', [])
|
| 541 |
-
current_index = st.session_state.current_paragraph_index
|
| 542 |
-
|
| 543 |
-
for i, paragraph in enumerate(paragraphs):
|
| 544 |
-
if i == current_index:
|
| 545 |
-
document_html += f"<div class='current-paragraph-highlight'><strong>📍 Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
|
| 546 |
-
else:
|
| 547 |
-
document_html += f"<div><strong>Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
|
| 548 |
-
if i < len(paragraphs) - 1:
|
| 549 |
-
document_html += "<hr style='margin: 10px 0;'>"
|
| 550 |
-
|
| 551 |
-
document_html += "</div>"
|
| 552 |
-
st.markdown(document_html, unsafe_allow_html=True)
|
| 553 |
-
|
| 554 |
-
st.caption(f"📋 Aḍris: {len(paragraphs)} n tseddarin | 📍 Imir-a: Taseddaṛt {current_index + 1}")
|
| 555 |
-
else:
|
| 556 |
-
st.info("📁 Sali-d afaylu akken ad d-ikad da")
|
| 557 |
-
if uploaded_file and st.session_state.get('processed'):
|
| 558 |
-
st.warning("⚠️ Aḍris yettwasleḍ maca ulac-it. Ttxil-k, ɛiwed-as tikelt-nniḍen.")
|
| 559 |
-
|
| 560 |
-
with col_reading:
|
| 561 |
-
# Kabyle title for audio reading with SMALLER font
|
| 562 |
-
st.markdown('<div class="section-title">🎵 Ɣer - Sel
|
| 563 |
-
|
| 564 |
-
if st.session_state.get('processed') and st.session_state.get('audio_ready', False):
|
| 565 |
-
current_index = st.session_state.current_paragraph_index
|
| 566 |
-
|
| 567 |
-
# Check if current paragraph data exists
|
| 568 |
-
if current_index not in st.session_state.paragraphs_data:
|
| 569 |
-
st.warning(f"⏳ Taseddaṛt {current_index + 1} mazal d-tuli...")
|
| 570 |
-
st.info("Ameslaw n tseddaṛt-a mazal yemmid. Rǧu cwiṭ.")
|
| 571 |
-
# Auto-refresh to check for new data
|
| 572 |
-
time.sleep(2)
|
| 573 |
-
st.rerun()
|
| 574 |
-
return
|
| 575 |
-
|
| 576 |
-
current_data = st.session_state.paragraphs_data[current_index]
|
| 577 |
-
|
| 578 |
-
# NAVIGATION LAYOUT: Back + Audio + Next in one row
|
| 579 |
-
col_back, col_audio, col_next = st.columns([1, 2, 1])
|
| 580 |
-
|
| 581 |
-
with col_back:
|
| 582 |
-
# BACK BUTTON
|
| 583 |
-
if current_index > 0:
|
| 584 |
-
if st.button("⏮️ Deffir",
|
| 585 |
-
use_container_width=True,
|
| 586 |
-
type="secondary",
|
| 587 |
-
key=f"back_btn_top_{current_index}"):
|
| 588 |
-
prev_index = current_index - 1
|
| 589 |
-
st.session_state.current_paragraph_index = prev_index
|
| 590 |
-
st.session_state.reading_status = f"Taɣuri n tseddaṛt {prev_index + 1}/{st.session_state.total_paragraphs}"
|
| 591 |
-
st.rerun()
|
| 592 |
-
else:
|
| 593 |
-
st.button("⏮️ Deffir", disabled=True, use_container_width=True)
|
| 594 |
-
|
| 595 |
-
with col_audio:
|
| 596 |
-
# Audio player status placeholder - will be in the HTML
|
| 597 |
-
pass
|
| 598 |
-
|
| 599 |
-
with col_next:
|
| 600 |
-
# NEXT BUTTON
|
| 601 |
-
if current_index < st.session_state.total_paragraphs - 1:
|
| 602 |
-
next_index = current_index + 1
|
| 603 |
-
|
| 604 |
-
if st.button("⏭️ Sdat",
|
| 605 |
-
type="primary",
|
| 606 |
-
use_container_width=True,
|
| 607 |
-
key=f"next_btn_top_{current_index}"):
|
| 608 |
-
|
| 609 |
-
st.session_state.current_paragraph_index = next_index
|
| 610 |
-
st.session_state.reading_status = f"Taɣuri n tseddaṛt {next_index + 1}/{st.session_state.total_paragraphs}"
|
| 611 |
-
st.rerun()
|
| 612 |
-
|
| 613 |
-
# Status indicator (informational only)
|
| 614 |
-
next_ready = next_index in st.session_state.paragraphs_data
|
| 615 |
-
ready_count = len(st.session_state.paragraphs_data)
|
| 616 |
-
total_count = st.session_state.total_paragraphs
|
| 617 |
-
if not next_ready:
|
| 618 |
-
st.caption(f"⏳ Yesselkat... ({ready_count}/{total_count})")
|
| 619 |
-
else:
|
| 620 |
-
st.caption(f"✅ Yemmed ({ready_count}/{total_count})")
|
| 621 |
-
|
| 622 |
-
else:
|
| 623 |
-
st.button("⏭️ Sdat", disabled=True, use_container_width=True)
|
| 624 |
-
st.caption("🎉 Temmed tɣuri!")
|
| 625 |
-
|
| 626 |
-
# HTML with real-time text highlighting
|
| 627 |
-
timing_json = json.dumps(current_data['timing_info'])
|
| 628 |
-
full_text = current_data['paragraph_text'].replace('`', '\\`').replace('${', '\\${')
|
| 629 |
-
audio_b64 = base64.b64encode(current_data['audio_bytes']).decode()
|
| 630 |
-
playback_speed = st.session_state.playback_speed
|
| 631 |
-
|
| 632 |
-
complete_html = f"""
|
| 633 |
-
<!DOCTYPE html>
|
| 634 |
-
<html>
|
| 635 |
-
<head>
|
| 636 |
-
<style>
|
| 637 |
-
.phrase-highlight {{
|
| 638 |
-
background: linear-gradient(120deg, #ffeb3b, #ffd54f);
|
| 639 |
-
padding: 4px 8px;
|
| 640 |
-
margin: 2px 1px;
|
| 641 |
-
border-radius: 6px;
|
| 642 |
-
box-shadow: 0 2px 4px rgba(255, 235, 59, 0.3);
|
| 643 |
-
font-weight: bold;
|
| 644 |
-
transition: all 0.3s ease;
|
| 645 |
-
}}
|
| 646 |
-
.phrase-text {{
|
| 647 |
-
padding: 2px 4px;
|
| 648 |
-
margin: 1px 0px;
|
| 649 |
-
border-radius: 4px;
|
| 650 |
-
transition: all 0.3s ease;
|
| 651 |
-
}}
|
| 652 |
-
.reading-container {{
|
| 653 |
-
max-height: 40vh;
|
| 654 |
-
overflow-y: auto;
|
| 655 |
-
padding: 20px;
|
| 656 |
-
border: 2px solid #e1e1e1;
|
| 657 |
-
border-radius: 12px;
|
| 658 |
-
background: #fafafa;
|
| 659 |
-
margin: 5px 0;
|
| 660 |
-
line-height: 1.8;
|
| 661 |
-
font-size: 16px;
|
| 662 |
-
font-family: Arial, sans-serif;
|
| 663 |
-
}}
|
| 664 |
-
.status-bar {{
|
| 665 |
-
background: #e3f2fd;
|
| 666 |
-
padding: 8px;
|
| 667 |
-
border-radius: 8px;
|
| 668 |
-
margin: 8px 0;
|
| 669 |
-
font-size: 14px;
|
| 670 |
-
}}
|
| 671 |
-
.audio-player {{
|
| 672 |
-
width: 100%;
|
| 673 |
-
margin: 8px 0;
|
| 674 |
-
text-align: center;
|
| 675 |
-
}}
|
| 676 |
-
.audio-controls {{
|
| 677 |
-
display: flex;
|
| 678 |
-
justify-content: center;
|
| 679 |
-
align-items: center;
|
| 680 |
-
gap: 10px;
|
| 681 |
-
margin-bottom: 10px;
|
| 682 |
-
}}
|
| 683 |
-
</style>
|
| 684 |
-
</head>
|
| 685 |
-
<body>
|
| 686 |
-
<div class="audio-player">
|
| 687 |
-
<div class="audio-controls">
|
| 688 |
-
<audio id="mainAudio" controls playbackRate={playback_speed} style="min-width: 250px;">
|
| 689 |
-
<source src="data:audio/wav;base64,{audio_b64}" type="audio/wav">
|
| 690 |
-
</audio>
|
| 691 |
-
</div>
|
| 692 |
-
<div style="margin-top: 5px; font-size: 14px; color: #666;">
|
| 693 |
-
🎵 Seɣbel, tekki ɣef ▶️ afella i tɣuri s urured: <strong>{playback_speed}x</strong>
|
| 694 |
-
{"" if playback_speed == 1.0 else " - " + ("ddac ddac" if playback_speed < 1.0 else "aɣiwel")}
|
| 695 |
-
</div>
|
| 696 |
-
</div>
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
<div class="reading-container" id="readingContainer">
|
| 701 |
-
{full_text}
|
| 702 |
-
</div>
|
| 703 |
-
|
| 704 |
-
<script>
|
| 705 |
-
const timingInfo = {timing_json};
|
| 706 |
-
const fullText = `{full_text}`;
|
| 707 |
-
const playbackSpeed = {playback_speed};
|
| 708 |
-
let currentHighlightIndex = -1;
|
| 709 |
-
let phraseElements = [];
|
| 710 |
-
|
| 711 |
-
function initializeHighlighting() {{
|
| 712 |
-
// Create phrase elements by wrapping text
|
| 713 |
-
let container = document.getElementById('readingContainer');
|
| 714 |
-
let workingText = fullText;
|
| 715 |
-
|
| 716 |
-
timingInfo.forEach((phrase, index) => {{
|
| 717 |
-
const cleanPhrase = phrase.text.trim();
|
| 718 |
-
if (workingText.includes(cleanPhrase)) {{
|
| 719 |
-
const spanId = 'phrase_' + index;
|
| 720 |
-
const spanHtml = '<span id="' + spanId + '" class="phrase-text">' + cleanPhrase + '</span>';
|
| 721 |
-
workingText = workingText.replace(cleanPhrase, spanHtml);
|
| 722 |
-
}}
|
| 723 |
-
}});
|
| 724 |
-
|
| 725 |
-
container.innerHTML = workingText;
|
| 726 |
-
|
| 727 |
-
// Store references to all phrase elements
|
| 728 |
-
timingInfo.forEach((phrase, index) => {{
|
| 729 |
-
const element = document.getElementById('phrase_' + index);
|
| 730 |
-
if (element) {{
|
| 731 |
-
phraseElements.push(element);
|
| 732 |
-
}}
|
| 733 |
-
}});
|
| 734 |
-
|
| 735 |
-
updateDebugInfo("Agzam amegzu yemmed! " + phraseElements.length + " n tefyar s " + playbackSpeed + "x arured");
|
| 736 |
-
}}
|
| 737 |
-
|
| 738 |
-
function updateDebugInfo(message) {{
|
| 739 |
-
const debugEl = document.getElementById('debugInfo');
|
| 740 |
-
if (debugEl) debugEl.textContent = message;
|
| 741 |
-
}}
|
| 742 |
-
|
| 743 |
-
function highlightCurrentPhrase(currentTime) {{
|
| 744 |
-
let newIndex = -1;
|
| 745 |
-
for (let i = 0; i < timingInfo.length; i++) {{
|
| 746 |
-
if (currentTime >= timingInfo[i].start && currentTime < timingInfo[i].end) {{
|
| 747 |
-
newIndex = i;
|
| 748 |
-
break;
|
| 749 |
-
}}
|
| 750 |
-
}}
|
| 751 |
-
|
| 752 |
-
if (newIndex !== currentHighlightIndex) {{
|
| 753 |
-
currentHighlightIndex = newIndex;
|
| 754 |
-
updateHighlightDisplay();
|
| 755 |
-
if (newIndex >= 0) {{
|
| 756 |
-
updateDebugInfo("Akud: " + currentTime.toFixed(2) + "s | Tafyirt: " + (newIndex + 1) + "/" + timingInfo.length + " | Arured: " + playbackSpeed + "x");
|
| 757 |
-
}}
|
| 758 |
-
}}
|
| 759 |
-
}}
|
| 760 |
-
|
| 761 |
-
function updateHighlightDisplay() {{
|
| 762 |
-
const currentPhraseSpan = document.getElementById('currentPhrase');
|
| 763 |
-
|
| 764 |
-
// Remove all highlights
|
| 765 |
-
phraseElements.forEach(element => {{
|
| 766 |
-
element.className = 'phrase-text';
|
| 767 |
-
}});
|
| 768 |
-
|
| 769 |
-
// Highlight current phrase
|
| 770 |
-
if (currentHighlightIndex >= 0 && currentHighlightIndex < phraseElements.length) {{
|
| 771 |
-
const element = phraseElements[currentHighlightIndex];
|
| 772 |
-
if (element) {{
|
| 773 |
-
element.className = 'phrase-highlight';
|
| 774 |
-
element.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
|
| 775 |
-
}}
|
| 776 |
-
|
| 777 |
-
if (currentPhraseSpan && timingInfo[currentHighlightIndex]) {{
|
| 778 |
-
currentPhraseSpan.textContent = timingInfo[currentHighlightIndex].text.substring(0, 100) +
|
| 779 |
-
(timingInfo[currentHighlightIndex].text.length > 100 ? '...' : '');
|
| 780 |
-
}}
|
| 781 |
-
}} else {{
|
| 782 |
-
if (currentPhraseSpan) {{
|
| 783 |
-
currentPhraseSpan.textContent = 'Araǧu amesli...';
|
| 784 |
-
}}
|
| 785 |
-
}}
|
| 786 |
-
}}
|
| 787 |
-
|
| 788 |
-
// Set up audio event listeners
|
| 789 |
-
function setupAudioListeners() {{
|
| 790 |
-
const audioElement = document.getElementById('mainAudio');
|
| 791 |
-
if (audioElement) {{
|
| 792 |
-
// Set playback rate
|
| 793 |
-
audioElement.playbackRate = playbackSpeed;
|
| 794 |
-
|
| 795 |
-
audioElement.addEventListener('timeupdate', function() {{
|
| 796 |
-
highlightCurrentPhrase(this.currentTime);
|
| 797 |
-
}});
|
| 798 |
-
|
| 799 |
-
audioElement.addEventListener('play', function() {{
|
| 800 |
-
updateDebugInfo("🎵 Taɣuri... aseḍfeṛ n tira iteddu s " + playbackSpeed + "x arured");
|
| 801 |
-
}});
|
| 802 |
-
|
| 803 |
-
audioElement.addEventListener('ended', function() {{
|
| 804 |
-
currentHighlightIndex = -1;
|
| 805 |
-
updateHighlightDisplay();
|
| 806 |
-
updateDebugInfo("✅ Taɣuri tekfa s " + playbackSpeed + "x arured");
|
| 807 |
-
}});
|
| 808 |
-
|
| 809 |
-
}} else {{
|
| 810 |
-
setTimeout(setupAudioListeners, 100);
|
| 811 |
-
}}
|
| 812 |
-
}}
|
| 813 |
-
|
| 814 |
-
// Initialize everything when page loads
|
| 815 |
-
document.addEventListener('DOMContentLoaded', function() {{
|
| 816 |
-
initializeHighlighting();
|
| 817 |
-
setupAudioListeners();
|
| 818 |
-
}});
|
| 819 |
-
</script>
|
| 820 |
-
</body>
|
| 821 |
-
</html>
|
| 822 |
-
"""
|
| 823 |
-
|
| 824 |
-
# Display the complete reading content
|
| 825 |
-
st.components.v1.html(complete_html, height=300, scrolling=True)
|
| 826 |
-
|
| 827 |
-
# Place the remaining controls BELOW the reading content
|
| 828 |
-
st.markdown('<div class="controls-section">', unsafe_allow_html=True)
|
| 829 |
-
|
| 830 |
-
# Show paragraph info
|
| 831 |
-
word_count = current_data.get('word_count', len(current_data['paragraph_text'].split()))
|
| 832 |
-
st.markdown(f"**Taseddaṛt {current_index + 1}/{st.session_state.total_paragraphs}**")
|
| 833 |
-
st.caption(f"📊 {word_count} n wawalen | ⏱️ {current_data['audio_duration']:.1f}s | 🎵 {st.session_state.playback_speed}x arured")
|
| 834 |
-
|
| 835 |
-
# Display progress
|
| 836 |
-
ready_count = len(st.session_state.paragraphs_data)
|
| 837 |
-
total_count = st.session_state.total_paragraphs
|
| 838 |
-
progress = ready_count / total_count if total_count > 0 else 0
|
| 839 |
-
st.progress(progress)
|
| 840 |
-
st.caption(f"📊 Asekker: {ready_count}/{total_count} n tseddarin mmedent ({progress:.0%})")
|
| 841 |
-
|
| 842 |
-
# Download button for current paragraph
|
| 843 |
-
audio_bytes = current_data['audio_bytes']
|
| 844 |
-
st.download_button(
|
| 845 |
-
"📥 Zdem ameslaw n tseddaṛt-a",
|
| 846 |
-
audio_bytes,
|
| 847 |
-
f"Taseddaṛt_{current_index + 1}.wav",
|
| 848 |
-
"audio/wav",
|
| 849 |
-
use_container_width=True
|
| 850 |
-
)
|
| 851 |
-
|
| 852 |
-
st.markdown('</div>', unsafe_allow_html=True)
|
| 853 |
-
|
| 854 |
-
elif uploaded_file and not st.session_state.processed:
|
| 855 |
-
if st.button("🔄 Selket Aḍris", type="primary"):
|
| 856 |
-
# Process document when button is clicked
|
| 857 |
-
with st.spinner("Asekker n uḍris s ugzam amegzu n tseddaṛin..."):
|
| 858 |
-
temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
|
| 859 |
-
with open(temp_path, "wb") as f:
|
| 860 |
-
f.write(uploaded_file.getvalue())
|
| 861 |
-
|
| 862 |
-
text, error = read_document(temp_path)
|
| 863 |
-
if error:
|
| 864 |
-
st.error(error)
|
| 865 |
-
else:
|
| 866 |
-
cleaned_text = clean_text(text)
|
| 867 |
-
st.session_state.full_document_text = cleaned_text
|
| 868 |
-
|
| 869 |
-
# Use smart splitting strategy
|
| 870 |
-
paragraphs = smart_split_paragraphs(
|
| 871 |
-
cleaned_text,
|
| 872 |
-
initial_paragraphs=initial_paragraphs,
|
| 873 |
-
initial_word_target=initial_word_target,
|
| 874 |
-
normal_word_target=normal_word_target
|
| 875 |
-
)
|
| 876 |
-
|
| 877 |
-
if not paragraphs:
|
| 878 |
-
st.error("Ulac agbur i tɣuri.")
|
| 879 |
-
return
|
| 880 |
-
|
| 881 |
-
# Initialize processing state
|
| 882 |
-
st.session_state.total_paragraphs = len(paragraphs)
|
| 883 |
-
st.session_state.current_paragraph_index = 0
|
| 884 |
-
st.session_state.paragraphs_data = {}
|
| 885 |
-
st.session_state.paragraphs_list = paragraphs
|
| 886 |
-
st.session_state.processed = True
|
| 887 |
-
|
| 888 |
-
# Generate first paragraph immediately in main thread
|
| 889 |
-
first_paragraph = paragraphs[0]
|
| 890 |
-
audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(first_paragraph)
|
| 891 |
-
|
| 892 |
-
st.session_state.paragraphs_data[0] = {
|
| 893 |
-
'paragraph_text': first_paragraph,
|
| 894 |
-
'audio_data': audio_data,
|
| 895 |
-
'timing_info': timing_info,
|
| 896 |
-
'audio_duration': len(audio_data) / sampling_rate,
|
| 897 |
-
'audio_bytes': get_audio_bytes(audio_data),
|
| 898 |
-
'word_count': len(first_paragraph.split())
|
| 899 |
-
}
|
| 900 |
-
st.session_state.audio_ready = True
|
| 901 |
-
|
| 902 |
-
# Start background worker for ALL remaining paragraphs
|
| 903 |
-
if len(paragraphs) > 1:
|
| 904 |
-
remaining_paragraphs = paragraphs[1:]
|
| 905 |
-
|
| 906 |
-
# Use queue-based background worker
|
| 907 |
-
thread = threading.Thread(
|
| 908 |
-
target=background_audio_worker,
|
| 909 |
-
args=(remaining_paragraphs, st.session_state.audio_queue, 1)
|
| 910 |
-
)
|
| 911 |
-
thread.daemon = True
|
| 912 |
-
thread.start()
|
| 913 |
-
|
| 914 |
-
st.session_state.background_worker_started = True
|
| 915 |
-
|
| 916 |
-
st.rerun()
|
| 917 |
-
else:
|
| 918 |
-
st.info("🔄 Seɣbel, tekki ɣef 'Selket Aḍris' iwakken ad yettwasleḍ u ad yeddu seg tira ɣer umeslaw")
|
| 919 |
-
else:
|
| 920 |
-
st.info("👆 Sali-d afaylu iwakken ad tedduḍ ɣer tɣuri")
|
| 921 |
-
|
| 922 |
-
if __name__ == "__main__":
|
| 923 |
main()
|
|
|
|
| 1 |
+
# Specific conda environment : Kabyle_streamlit
|
| 2 |
+
|
| 3 |
+
# Streamlit TTS Reader - Taqbaylit TTS Sɣer Adlis
|
| 4 |
+
# Bouaziz Ait Driss October 2025
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import fitz # PyMuPDF
|
| 8 |
+
import re
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch
|
| 11 |
+
from scipy.io.wavfile import write as wav_write
|
| 12 |
+
import tempfile
|
| 13 |
+
import os
|
| 14 |
+
import base64
|
| 15 |
+
import json
|
| 16 |
+
import io
|
| 17 |
+
import threading
|
| 18 |
+
import queue
|
| 19 |
+
import time
|
| 20 |
+
import pdfplumber
|
| 21 |
+
|
| 22 |
+
# Configure Streamlit page
|
| 23 |
+
st.set_page_config(
|
| 24 |
+
page_title="Kabyle TTS Document Reader",
|
| 25 |
+
page_icon="🎙️",
|
| 26 |
+
layout="wide",
|
| 27 |
+
initial_sidebar_state="expanded",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Try to import transformers
|
| 31 |
+
try:
|
| 32 |
+
from transformers import VitsModel, AutoTokenizer
|
| 33 |
+
TRANSFORMERS_AVAILABLE = True
|
| 34 |
+
except ImportError:
|
| 35 |
+
try:
|
| 36 |
+
from transformers import AutoModel, AutoTokenizer
|
| 37 |
+
TRANSFORMERS_AVAILABLE = True
|
| 38 |
+
VitsModel = AutoModel
|
| 39 |
+
except ImportError:
|
| 40 |
+
TRANSFORMERS_AVAILABLE = False
|
| 41 |
+
|
| 42 |
+
# --- Model Loading ---
|
| 43 |
+
@st.cache_resource
|
| 44 |
+
def load_model():
|
| 45 |
+
if not TRANSFORMERS_AVAILABLE:
|
| 46 |
+
return None, None, "cpu"
|
| 47 |
+
try:
|
| 48 |
+
model = VitsModel.from_pretrained("facebook/mms-tts-kab")
|
| 49 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kab")
|
| 50 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 51 |
+
model = model.to(device)
|
| 52 |
+
return model, tokenizer, device
|
| 53 |
+
except Exception as e:
|
| 54 |
+
st.error(f"Error loading model: {e}")
|
| 55 |
+
return None, None, "cpu"
|
| 56 |
+
|
| 57 |
+
model, tokenizer, device = load_model()
|
| 58 |
+
|
| 59 |
+
# --- Text Processing ---
|
| 60 |
+
def clean_text(text):
|
| 61 |
+
if not text:
|
| 62 |
+
return text
|
| 63 |
+
text = re.sub(r' +', ' ', text)
|
| 64 |
+
text = re.sub(r'\n\s*\n', '\n\n', text)
|
| 65 |
+
return text.strip()
|
| 66 |
+
|
| 67 |
+
def smart_split_paragraphs(text, initial_paragraphs=5, initial_word_target=50, normal_word_target=200):
|
| 68 |
+
"""
|
| 69 |
+
Smart paragraph splitting strategy:
|
| 70 |
+
- First 'initial_paragraphs' paragraphs: ~initial_word_target words each (for quick startup)
|
| 71 |
+
- Remaining paragraphs: ~normal_word_target words each (for better timing)
|
| 72 |
+
"""
|
| 73 |
+
if not text:
|
| 74 |
+
return []
|
| 75 |
+
|
| 76 |
+
# First, split by major paragraph breaks
|
| 77 |
+
raw_paragraphs = re.split(r'\n\s*\n', text)
|
| 78 |
+
raw_paragraphs = [p.strip() for p in raw_paragraphs if p.strip()]
|
| 79 |
+
|
| 80 |
+
if not raw_paragraphs:
|
| 81 |
+
return []
|
| 82 |
+
|
| 83 |
+
final_paragraphs = []
|
| 84 |
+
current_sentences = []
|
| 85 |
+
|
| 86 |
+
# Split each raw paragraph into sentences
|
| 87 |
+
all_sentences = []
|
| 88 |
+
for paragraph in raw_paragraphs:
|
| 89 |
+
# Split on sentence endings: . ! ? : ; followed by space
|
| 90 |
+
sentences = re.split(r'(?<=[.!?:;])\s+', paragraph)
|
| 91 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 92 |
+
all_sentences.extend(sentences)
|
| 93 |
+
|
| 94 |
+
if not all_sentences:
|
| 95 |
+
return []
|
| 96 |
+
|
| 97 |
+
# Build initial quick-start paragraphs (shorter)
|
| 98 |
+
word_count = 0
|
| 99 |
+
for sentence in all_sentences:
|
| 100 |
+
sentence_word_count = len(sentence.split())
|
| 101 |
+
|
| 102 |
+
# For first few paragraphs, use smaller target
|
| 103 |
+
if len(final_paragraphs) < initial_paragraphs:
|
| 104 |
+
target_word_count = initial_word_target
|
| 105 |
+
else:
|
| 106 |
+
target_word_count = normal_word_target
|
| 107 |
+
|
| 108 |
+
# If adding this sentence would exceed target and we have some content, start new paragraph
|
| 109 |
+
if current_sentences and word_count + sentence_word_count > target_word_count:
|
| 110 |
+
# Join current sentences to form a paragraph
|
| 111 |
+
paragraph_text = ' '.join(current_sentences)
|
| 112 |
+
final_paragraphs.append(paragraph_text)
|
| 113 |
+
current_sentences = [sentence]
|
| 114 |
+
word_count = sentence_word_count
|
| 115 |
+
else:
|
| 116 |
+
current_sentences.append(sentence)
|
| 117 |
+
word_count += sentence_word_count
|
| 118 |
+
|
| 119 |
+
# Add the last paragraph if we have remaining sentences
|
| 120 |
+
if current_sentences:
|
| 121 |
+
paragraph_text = ' '.join(current_sentences)
|
| 122 |
+
final_paragraphs.append(paragraph_text)
|
| 123 |
+
|
| 124 |
+
return final_paragraphs
|
| 125 |
+
|
| 126 |
+
def split_paragraph_into_phrases(paragraph, max_phrase_length=150):
|
| 127 |
+
if not paragraph:
|
| 128 |
+
return []
|
| 129 |
+
|
| 130 |
+
phrases = []
|
| 131 |
+
current_phrase = ""
|
| 132 |
+
sentences = re.split(r'(?<=[.!?])\s+', paragraph)
|
| 133 |
+
|
| 134 |
+
for sentence in sentences:
|
| 135 |
+
if len(current_phrase) + len(sentence) > max_phrase_length and current_phrase:
|
| 136 |
+
phrases.append(current_phrase.strip())
|
| 137 |
+
current_phrase = sentence
|
| 138 |
+
else:
|
| 139 |
+
current_phrase = current_phrase + " " + sentence if current_phrase else sentence
|
| 140 |
+
|
| 141 |
+
if current_phrase.strip():
|
| 142 |
+
phrases.append(current_phrase.strip())
|
| 143 |
+
|
| 144 |
+
return phrases
|
| 145 |
+
|
| 146 |
+
def generate_audio_with_precise_timing(paragraph):
|
| 147 |
+
"""Generate audio with precise phrase-level timing using per-phrase generation"""
|
| 148 |
+
|
| 149 |
+
if model is None or tokenizer is None:
|
| 150 |
+
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 151 |
+
|
| 152 |
+
try:
|
| 153 |
+
phrases = split_paragraph_into_phrases(paragraph)
|
| 154 |
+
if not phrases:
|
| 155 |
+
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 156 |
+
|
| 157 |
+
all_audio = []
|
| 158 |
+
phrase_durations = []
|
| 159 |
+
sampling_rate = 16000
|
| 160 |
+
|
| 161 |
+
# Generate audio for each phrase separately to get exact timing
|
| 162 |
+
for i, phrase in enumerate(phrases):
|
| 163 |
+
if not phrase.strip():
|
| 164 |
+
continue
|
| 165 |
+
|
| 166 |
+
# Generate audio for this specific phrase
|
| 167 |
+
inputs = tokenizer(phrase, return_tensors="pt").to(device)
|
| 168 |
+
with torch.no_grad():
|
| 169 |
+
output = model(**inputs)
|
| 170 |
+
|
| 171 |
+
# Extract audio data
|
| 172 |
+
if hasattr(output, 'waveform'):
|
| 173 |
+
audio_chunk = output.waveform.cpu().numpy().squeeze()
|
| 174 |
+
else:
|
| 175 |
+
audio_chunk = output[0].cpu().numpy().squeeze() if len(output) > 0 else None
|
| 176 |
+
|
| 177 |
+
if audio_chunk is not None:
|
| 178 |
+
# Calculate exact duration for this phrase
|
| 179 |
+
phrase_duration = len(audio_chunk) / sampling_rate
|
| 180 |
+
phrase_durations.append(phrase_duration)
|
| 181 |
+
|
| 182 |
+
# Normalize and store
|
| 183 |
+
max_val = np.max(np.abs(audio_chunk))
|
| 184 |
+
if max_val > 0:
|
| 185 |
+
audio_chunk = audio_chunk / max_val * 0.8
|
| 186 |
+
all_audio.append(audio_chunk)
|
| 187 |
+
else:
|
| 188 |
+
# Fallback: estimate duration if audio generation failed
|
| 189 |
+
word_count = len(phrase.split())
|
| 190 |
+
estimated_duration = word_count * 0.4
|
| 191 |
+
phrase_durations.append(estimated_duration)
|
| 192 |
+
|
| 193 |
+
if all_audio:
|
| 194 |
+
# Concatenate all phrase audio
|
| 195 |
+
full_audio = np.concatenate(all_audio)
|
| 196 |
+
total_duration = len(full_audio) / sampling_rate
|
| 197 |
+
|
| 198 |
+
# Build precise timing info using actual phrase durations
|
| 199 |
+
timing_info = []
|
| 200 |
+
current_time = 0.0
|
| 201 |
+
|
| 202 |
+
for i, (phrase, duration) in enumerate(zip(phrases, phrase_durations)):
|
| 203 |
+
timing_info.append({
|
| 204 |
+
'text': phrase,
|
| 205 |
+
'start': current_time,
|
| 206 |
+
'end': current_time + duration,
|
| 207 |
+
'duration': duration
|
| 208 |
+
})
|
| 209 |
+
current_time += duration
|
| 210 |
+
|
| 211 |
+
return full_audio, sampling_rate, timing_info
|
| 212 |
+
else:
|
| 213 |
+
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 214 |
+
|
| 215 |
+
except Exception as e:
|
| 216 |
+
return create_dummy_audio_for_paragraph(paragraph), 16000, []
|
| 217 |
+
|
| 218 |
+
def create_dummy_audio_for_paragraph(paragraph):
|
| 219 |
+
word_count = len(paragraph.split())
|
| 220 |
+
total_duration = word_count * 0.4
|
| 221 |
+
sampling_rate = 16000
|
| 222 |
+
t = np.linspace(0, total_duration, int(sampling_rate * total_duration))
|
| 223 |
+
audio = 0.3 * np.sin(2 * np.pi * 220 * t)
|
| 224 |
+
return audio
|
| 225 |
+
|
| 226 |
+
# Add a fallback to plumber for reading diacritic letters
|
| 227 |
+
def read_document(file_path):
|
| 228 |
+
text = ""
|
| 229 |
+
try:
|
| 230 |
+
if file_path.lower().endswith('.pdf'):
|
| 231 |
+
# FIRST TRY: PyMuPDF (faster for digital PDFs)
|
| 232 |
+
try:
|
| 233 |
+
pdf_document = fitz.open(file_path)
|
| 234 |
+
for page_num in range(len(pdf_document)):
|
| 235 |
+
page = pdf_document[page_num]
|
| 236 |
+
text += page.get_text() + "\n"
|
| 237 |
+
pdf_document.close()
|
| 238 |
+
|
| 239 |
+
# Check if we got meaningful text with diacritics
|
| 240 |
+
if text.strip() and any(char in text for char in ['é', 'è', 'à', 'ù', 'ï', 'ö', 'α', 'β', 'γ']):
|
| 241 |
+
return text, []
|
| 242 |
+
|
| 243 |
+
except Exception as e:
|
| 244 |
+
text = "" # Reset text if PyMuPDF fails
|
| 245 |
+
|
| 246 |
+
# FALLBACK: pdfplumber (better for scanned/diacritic PDFs)
|
| 247 |
+
try:
|
| 248 |
+
import pdfplumber
|
| 249 |
+
with pdfplumber.open(file_path) as pdf:
|
| 250 |
+
text = ""
|
| 251 |
+
for page in pdf.pages:
|
| 252 |
+
page_text = page.extract_text() or ""
|
| 253 |
+
text += page_text + "\n"
|
| 254 |
+
except ImportError:
|
| 255 |
+
return "pdfplumber not available", []
|
| 256 |
+
except Exception as e:
|
| 257 |
+
return f"Both PDF methods failed: {e}", []
|
| 258 |
+
|
| 259 |
+
elif file_path.lower().endswith('.txt'):
|
| 260 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 261 |
+
text = f.read()
|
| 262 |
+
else:
|
| 263 |
+
return "Unsupported file format", []
|
| 264 |
+
|
| 265 |
+
except Exception as e:
|
| 266 |
+
return f"Error reading document: {e}", []
|
| 267 |
+
|
| 268 |
+
return text, []
|
| 269 |
+
|
| 270 |
+
def normalize_audio(audio_data):
|
| 271 |
+
if audio_data.dtype != np.float32:
|
| 272 |
+
audio_data = audio_data.astype(np.float32)
|
| 273 |
+
max_val = np.max(np.abs(audio_data))
|
| 274 |
+
if max_val > 0:
|
| 275 |
+
audio_data = audio_data / max_val * 0.9
|
| 276 |
+
return audio_data
|
| 277 |
+
|
| 278 |
+
def get_audio_bytes(audio_data):
|
| 279 |
+
audio_bytes = io.BytesIO()
|
| 280 |
+
audio_data = normalize_audio(audio_data)
|
| 281 |
+
audio_int16 = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
|
| 282 |
+
wav_write(audio_bytes, 16000, audio_int16)
|
| 283 |
+
audio_bytes.seek(0)
|
| 284 |
+
return audio_bytes.getvalue()
|
| 285 |
+
|
| 286 |
+
# --- Background Audio Generation ---
|
| 287 |
+
def background_audio_worker(paragraphs, audio_queue, start_index=0):
|
| 288 |
+
"""Background worker to generate audio for paragraphs - NO SESSION STATE ACCESS"""
|
| 289 |
+
try:
|
| 290 |
+
for i, paragraph in enumerate(paragraphs):
|
| 291 |
+
paragraph_index = start_index + i
|
| 292 |
+
word_count = len(paragraph.split())
|
| 293 |
+
|
| 294 |
+
# Generate audio with precise timing
|
| 295 |
+
audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(paragraph)
|
| 296 |
+
|
| 297 |
+
# Send to main thread via queue ONLY
|
| 298 |
+
audio_queue.put({
|
| 299 |
+
'paragraph_index': paragraph_index,
|
| 300 |
+
'paragraph_text': paragraph,
|
| 301 |
+
'audio_data': audio_data,
|
| 302 |
+
'timing_info': timing_info,
|
| 303 |
+
'audio_duration': len(audio_data) / sampling_rate,
|
| 304 |
+
'audio_bytes': get_audio_bytes(audio_data),
|
| 305 |
+
'word_count': word_count
|
| 306 |
+
})
|
| 307 |
+
|
| 308 |
+
except Exception as e:
|
| 309 |
+
pass
|
| 310 |
+
|
| 311 |
+
# --- Queue Processing ---
|
| 312 |
+
def process_audio_queue(audio_queue):
|
| 313 |
+
"""Process all available items in the audio queue - called from main thread only"""
|
| 314 |
+
processed_count = 0
|
| 315 |
+
try:
|
| 316 |
+
while True:
|
| 317 |
+
# Non-blocking check
|
| 318 |
+
audio_data = audio_queue.get_nowait()
|
| 319 |
+
|
| 320 |
+
# SAFE: Main thread updating session state
|
| 321 |
+
st.session_state.paragraphs_data[audio_data['paragraph_index']] = audio_data
|
| 322 |
+
processed_count += 1
|
| 323 |
+
|
| 324 |
+
except queue.Empty:
|
| 325 |
+
pass
|
| 326 |
+
|
| 327 |
+
return processed_count
|
| 328 |
+
|
| 329 |
+
# Initialize session state
|
| 330 |
+
if 'processed' not in st.session_state:
|
| 331 |
+
st.session_state.processed = False
|
| 332 |
+
if 'current_paragraph_index' not in st.session_state:
|
| 333 |
+
st.session_state.current_paragraph_index = 0
|
| 334 |
+
if 'total_paragraphs' not in st.session_state:
|
| 335 |
+
st.session_state.total_paragraphs = 0
|
| 336 |
+
if 'paragraphs_data' not in st.session_state:
|
| 337 |
+
st.session_state.paragraphs_data = {}
|
| 338 |
+
if 'audio_ready' not in st.session_state:
|
| 339 |
+
st.session_state.audio_ready = False
|
| 340 |
+
if 'background_worker_started' not in st.session_state:
|
| 341 |
+
st.session_state.background_worker_started = False
|
| 342 |
+
if 'reading_status' not in st.session_state:
|
| 343 |
+
st.session_state.reading_status = "Ready to start reading"
|
| 344 |
+
if 'current_document_id' not in st.session_state:
|
| 345 |
+
st.session_state.current_document_id = None
|
| 346 |
+
if 'audio_queue' not in st.session_state:
|
| 347 |
+
st.session_state.audio_queue = queue.Queue()
|
| 348 |
+
if 'playback_speed' not in st.session_state:
|
| 349 |
+
st.session_state.playback_speed = 1.0 # Default normal speed
|
| 350 |
+
if 'full_document_text' not in st.session_state:
|
| 351 |
+
st.session_state.full_document_text = ""
|
| 352 |
+
if 'smart_splitting_expanded' not in st.session_state:
|
| 353 |
+
st.session_state.smart_splitting_expanded = False
|
| 354 |
+
|
| 355 |
+
# --- Streamlit UI ---
|
| 356 |
+
def main():
|
| 357 |
+
# Custom CSS for better styling - FIXED FONT HARMONIZATION
|
| 358 |
+
st.markdown("""
|
| 359 |
+
<style>
|
| 360 |
+
.main-title {
|
| 361 |
+
font-size: 1.5rem !important;
|
| 362 |
+
font-weight: bold !important;
|
| 363 |
+
margin-bottom: -1rem !important;
|
| 364 |
+
margin-top: -1rem !important; /* MAXIMUM REDUCED top margin */
|
| 365 |
+
color: #1f77b4;
|
| 366 |
+
}
|
| 367 |
+
.section-title {
|
| 368 |
+
font-size: 1.3rem !important;
|
| 369 |
+
font-weight: bold !important;
|
| 370 |
+
margin-bottom: 0.3rem !important;
|
| 371 |
+
margin-top: 0rem !important;
|
| 372 |
+
color: #2e86ab;
|
| 373 |
+
}
|
| 374 |
+
.document-viewer {
|
| 375 |
+
max-height: 70vh;
|
| 376 |
+
overflow-y: auto;
|
| 377 |
+
border: 1px solid #e1e1e1;
|
| 378 |
+
border-radius: 8px;
|
| 379 |
+
padding: 15px;
|
| 380 |
+
background: #fafafa;
|
| 381 |
+
font-family: Arial, sans-serif;
|
| 382 |
+
line-height: 1.6;
|
| 383 |
+
scrollbar-width: thin;
|
| 384 |
+
scrollbar-color: #888 #f1f1f1;
|
| 385 |
+
color: #333333; /* ADD THIS LINE - ensures dark text */
|
| 386 |
+
}
|
| 387 |
+
.document-viewer::-webkit-scrollbar {
|
| 388 |
+
width: 8px;
|
| 389 |
+
}
|
| 390 |
+
.document-viewer::-webkit-scrollbar-track {
|
| 391 |
+
background: #f1f1f1;
|
| 392 |
+
border-radius: 4px;
|
| 393 |
+
}
|
| 394 |
+
.document-viewer::-webkit-scrollbar-thumb {
|
| 395 |
+
background: #888;
|
| 396 |
+
border-radius: 4px;
|
| 397 |
+
}
|
| 398 |
+
.document-viewer::-webkit-scrollbar-thumb:hover {
|
| 399 |
+
background: #555;
|
| 400 |
+
}
|
| 401 |
+
.current-paragraph-highlight {
|
| 402 |
+
background: linear-gradient(120deg, #e3f2fd, #bbdefb);
|
| 403 |
+
padding: 8px 12px;
|
| 404 |
+
margin: 8px 0;
|
| 405 |
+
border-left: 4px solid #2196f3;
|
| 406 |
+
border-radius: 4px;
|
| 407 |
+
box-shadow: 0 2px 4px rgba(33, 150, 243, 0.2);
|
| 408 |
+
}
|
| 409 |
+
.reading-content {
|
| 410 |
+
margin-bottom: 10px;
|
| 411 |
+
}
|
| 412 |
+
.controls-section {
|
| 413 |
+
margin-top: 15px;
|
| 414 |
+
}
|
| 415 |
+
/* Reduce sidebar header spacing */
|
| 416 |
+
.sidebar .sidebar-content {
|
| 417 |
+
padding-top: 1rem;
|
| 418 |
+
}
|
| 419 |
+
/* FIXED: Make expander headers consistent with sidebar titles */
|
| 420 |
+
.streamlit-expanderHeader {
|
| 421 |
+
font-size: 1.1rem !important;
|
| 422 |
+
font-weight: 600 !important;
|
| 423 |
+
color: inherit !important;
|
| 424 |
+
}
|
| 425 |
+
/* FIXED: Ensure consistent styling for smart splitting expander */
|
| 426 |
+
div[data-testid="stExpander"] details summary p {
|
| 427 |
+
font-size: 1.1rem !important;
|
| 428 |
+
font-weight: 600 !important;
|
| 429 |
+
}
|
| 430 |
+
/* Style for the smart splitting section specifically */
|
| 431 |
+
.smart-splitting-header {
|
| 432 |
+
font-size: 1.1rem !important;
|
| 433 |
+
font-weight: 600 !important;
|
| 434 |
+
}
|
| 435 |
+
</style>
|
| 436 |
+
""", unsafe_allow_html=True)
|
| 437 |
+
|
| 438 |
+
# Main title with LARGER font and MAXIMUM REDUCED TOP SPACE
|
| 439 |
+
st.markdown('<div class="main-title">🎙️Taqbaylit TTS Sɣer Adlis</div>', unsafe_allow_html=True)
|
| 440 |
+
|
| 441 |
+
if model is None:
|
| 442 |
+
st.warning("⚠️ Using test audio (TTS model not available)")
|
| 443 |
+
|
| 444 |
+
# Process audio queue on every run
|
| 445 |
+
if st.session_state.get('background_worker_started', False):
|
| 446 |
+
processed_count = process_audio_queue(st.session_state.audio_queue)
|
| 447 |
+
if processed_count > 0:
|
| 448 |
+
st.success(f"📥 Loaded {processed_count} paragraph(s)")
|
| 449 |
+
|
| 450 |
+
# Sidebar with KABYLE TRANSLATIONS
|
| 451 |
+
with st.sidebar:
|
| 452 |
+
# Document Settings with reduced spacing
|
| 453 |
+
st.header("📁 Tawila n Tɣuri", anchor=False)
|
| 454 |
+
uploaded_file = st.file_uploader("Sali-d Aḍris - jbed sers afaylu", type=['pdf', 'txt'],
|
| 455 |
+
help="Ulac OCR ara yerren afaylu PDF n tugna ɣer txt deg teqaylit. Ma ur iwulem ara ɛiwed-as tamuɣli.")
|
| 456 |
+
|
| 457 |
+
# Clear document button - MOVED UP immediately under file uploader
|
| 458 |
+
if st.session_state.get('processed'):
|
| 459 |
+
if st.button("🗑️ Sfeḍ Afaylu-a", type="secondary", use_container_width=True):
|
| 460 |
+
# Reset all document-related session state
|
| 461 |
+
st.session_state.processed = False
|
| 462 |
+
st.session_state.current_paragraph_index = 0
|
| 463 |
+
st.session_state.total_paragraphs = 0
|
| 464 |
+
st.session_state.paragraphs_data = {}
|
| 465 |
+
st.session_state.audio_ready = False
|
| 466 |
+
st.session_state.background_worker_started = False
|
| 467 |
+
st.session_state.reading_status = "Ready to start reading"
|
| 468 |
+
st.session_state.current_document_id = None
|
| 469 |
+
st.session_state.audio_queue = queue.Queue()
|
| 470 |
+
st.session_state.full_document_text = ""
|
| 471 |
+
st.rerun()
|
| 472 |
+
|
| 473 |
+
# Playback Speed Control
|
| 474 |
+
st.markdown("---")
|
| 475 |
+
st.markdown("### 🎵 Arured n Tɣuri")
|
| 476 |
+
|
| 477 |
+
# Define the speed options with labels
|
| 478 |
+
speed_options = [0.5, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5]
|
| 479 |
+
speed_labels = {
|
| 480 |
+
0.5: "0.5x (Ddac ddac ugar)",
|
| 481 |
+
0.8: "0.8x (Ddac ddac)",
|
| 482 |
+
0.9: "0.9x (Ddac ddac cwiṭ)",
|
| 483 |
+
1.0: "1.0x (Amagnu)",
|
| 484 |
+
1.1: "1.1x (Ɣiwel cwiṭ)",
|
| 485 |
+
1.2: "1.2x (Ɣiwel)",
|
| 486 |
+
1.3: "1.3x (Ɣiwel aṭas)",
|
| 487 |
+
1.5: "1.5x (Ɣiwel aṭas ugar)"
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
# Create a select slider for playback speed
|
| 491 |
+
current_speed = st.select_slider(
|
| 492 |
+
"Asenfel n urured n tɣuri",
|
| 493 |
+
options=speed_options,
|
| 494 |
+
value=st.session_state.playback_speed,
|
| 495 |
+
format_func=lambda x: speed_labels[x],
|
| 496 |
+
help="Senfel arured n tɣuri i yal taseddaṛt"
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
# Update session state if speed changed
|
| 500 |
+
if current_speed != st.session_state.playback_speed:
|
| 501 |
+
st.session_state.playback_speed = current_speed
|
| 502 |
+
# st.success(f"🎵 Arured n tɣuri yettwasenfel ɣer {speed_labels[current_speed]}")
|
| 503 |
+
|
| 504 |
+
# Show current speed effect
|
| 505 |
+
speed_effect = {
|
| 506 |
+
0.5: "⏳ Ugar n ukud i tmesliwt",
|
| 507 |
+
0.8: "🐢 Fessus i uḍfaṛ",
|
| 508 |
+
0.9: "📝 S ttawil i usishel n tigzi",
|
| 509 |
+
1.0: "✅ Arured amagnu n tmeslayt",
|
| 510 |
+
1.1: "⚡ Taɣuri s cwiṭ n uɣiwel",
|
| 511 |
+
1.2: "🚀 Taɣuri s uɣiwel",
|
| 512 |
+
1.3: "🎯 Taɣuri s uɣiwel d tmellit",
|
| 513 |
+
1.5: "🔥 Taɣuri s uɣiwel yuzzlen - i yimazzayen"
|
| 514 |
+
}
|
| 515 |
+
st.caption(f"**Asemdu:** {speed_effect[current_speed]}")
|
| 516 |
+
|
| 517 |
+
# Smart splitting configuration - FIXED FONT STYLING
|
| 518 |
+
st.markdown("---")
|
| 519 |
+
|
| 520 |
+
# Collapsible section for Smart Splitting with PROPER FONT HARMONIZATION
|
| 521 |
+
with st.expander("🎯 Agzam Amegzu", expanded=st.session_state.smart_splitting_expanded):
|
| 522 |
+
initial_paragraphs = st.slider("Tiseddaṛin deg tazwara", 3, 10, 5,
|
| 523 |
+
help="Amḍan n tseddaṛin wezzilen deg tazwara n tɣuri")
|
| 524 |
+
initial_word_target = st.slider("Awalen deg tseddaṛt n tazwara", 30, 100, 50,
|
| 525 |
+
help="Amḍan n wawalen deg tseddaṛin n tazwara")
|
| 526 |
+
normal_word_target = st.slider("Awalen deg tseddaṛt tamagnut", 50, 300, 100,
|
| 527 |
+
help="Amḍan n wawalen deg tseddaṛin tineggura")
|
| 528 |
+
|
| 529 |
+
# Main content
|
| 530 |
+
# col_doc, col_reading = st.columns([2, 3]) # 40% document, 60% reading
|
| 531 |
+
col_reading, col_doc = st.columns([3, 2]) # 60% reading, 40% document
|
| 532 |
+
|
| 533 |
+
with col_doc:
|
| 534 |
+
# Kabyle title for document overview with SMALLER font
|
| 535 |
+
st.markdown('<div class="section-title">📄 Tamuɣli Ɣer Uḍris</div>', unsafe_allow_html=True)
|
| 536 |
+
|
| 537 |
+
if st.session_state.get('full_document_text'):
|
| 538 |
+
# Display the full document in a scrollable container
|
| 539 |
+
document_html = "<div class='document-viewer'>"
|
| 540 |
+
paragraphs = st.session_state.get('paragraphs_list', [])
|
| 541 |
+
current_index = st.session_state.current_paragraph_index
|
| 542 |
+
|
| 543 |
+
for i, paragraph in enumerate(paragraphs):
|
| 544 |
+
if i == current_index:
|
| 545 |
+
document_html += f"<div class='current-paragraph-highlight'><strong>📍 Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
|
| 546 |
+
else:
|
| 547 |
+
document_html += f"<div><strong>Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
|
| 548 |
+
if i < len(paragraphs) - 1:
|
| 549 |
+
document_html += "<hr style='margin: 10px 0;'>"
|
| 550 |
+
|
| 551 |
+
document_html += "</div>"
|
| 552 |
+
st.markdown(document_html, unsafe_allow_html=True)
|
| 553 |
+
|
| 554 |
+
st.caption(f"📋 Aḍris: {len(paragraphs)} n tseddarin | 📍 Imir-a: Taseddaṛt {current_index + 1}")
|
| 555 |
+
else:
|
| 556 |
+
st.info("📁 Sali-d afaylu akken ad d-ikad da")
|
| 557 |
+
if uploaded_file and st.session_state.get('processed'):
|
| 558 |
+
st.warning("⚠️ Aḍris yettwasleḍ maca ulac-it. Ttxil-k, ɛiwed-as tikelt-nniḍen.")
|
| 559 |
+
|
| 560 |
+
with col_reading:
|
| 561 |
+
# Kabyle title for audio reading with SMALLER font
|
| 562 |
+
st.markdown('<div class="section-title">🎵 Ɣer - Sel Aḍris</div>', unsafe_allow_html=True)
|
| 563 |
+
|
| 564 |
+
if st.session_state.get('processed') and st.session_state.get('audio_ready', False):
|
| 565 |
+
current_index = st.session_state.current_paragraph_index
|
| 566 |
+
|
| 567 |
+
# Check if current paragraph data exists
|
| 568 |
+
if current_index not in st.session_state.paragraphs_data:
|
| 569 |
+
st.warning(f"⏳ Taseddaṛt {current_index + 1} mazal d-tuli...")
|
| 570 |
+
st.info("Ameslaw n tseddaṛt-a mazal yemmid. Rǧu cwiṭ.")
|
| 571 |
+
# Auto-refresh to check for new data
|
| 572 |
+
time.sleep(2)
|
| 573 |
+
st.rerun()
|
| 574 |
+
return
|
| 575 |
+
|
| 576 |
+
current_data = st.session_state.paragraphs_data[current_index]
|
| 577 |
+
|
| 578 |
+
# NAVIGATION LAYOUT: Back + Audio + Next in one row
|
| 579 |
+
col_back, col_audio, col_next = st.columns([1, 2, 1])
|
| 580 |
+
|
| 581 |
+
with col_back:
|
| 582 |
+
# BACK BUTTON
|
| 583 |
+
if current_index > 0:
|
| 584 |
+
if st.button("⏮️ Deffir",
|
| 585 |
+
use_container_width=True,
|
| 586 |
+
type="secondary",
|
| 587 |
+
key=f"back_btn_top_{current_index}"):
|
| 588 |
+
prev_index = current_index - 1
|
| 589 |
+
st.session_state.current_paragraph_index = prev_index
|
| 590 |
+
st.session_state.reading_status = f"Taɣuri n tseddaṛt {prev_index + 1}/{st.session_state.total_paragraphs}"
|
| 591 |
+
st.rerun()
|
| 592 |
+
else:
|
| 593 |
+
st.button("⏮️ Deffir", disabled=True, use_container_width=True)
|
| 594 |
+
|
| 595 |
+
with col_audio:
|
| 596 |
+
# Audio player status placeholder - will be in the HTML
|
| 597 |
+
pass
|
| 598 |
+
|
| 599 |
+
with col_next:
|
| 600 |
+
# NEXT BUTTON
|
| 601 |
+
if current_index < st.session_state.total_paragraphs - 1:
|
| 602 |
+
next_index = current_index + 1
|
| 603 |
+
|
| 604 |
+
if st.button("⏭️ Sdat",
|
| 605 |
+
type="primary",
|
| 606 |
+
use_container_width=True,
|
| 607 |
+
key=f"next_btn_top_{current_index}"):
|
| 608 |
+
|
| 609 |
+
st.session_state.current_paragraph_index = next_index
|
| 610 |
+
st.session_state.reading_status = f"Taɣuri n tseddaṛt {next_index + 1}/{st.session_state.total_paragraphs}"
|
| 611 |
+
st.rerun()
|
| 612 |
+
|
| 613 |
+
# Status indicator (informational only)
|
| 614 |
+
next_ready = next_index in st.session_state.paragraphs_data
|
| 615 |
+
ready_count = len(st.session_state.paragraphs_data)
|
| 616 |
+
total_count = st.session_state.total_paragraphs
|
| 617 |
+
if not next_ready:
|
| 618 |
+
st.caption(f"⏳ Yesselkat... ({ready_count}/{total_count})")
|
| 619 |
+
else:
|
| 620 |
+
st.caption(f"✅ Yemmed ({ready_count}/{total_count})")
|
| 621 |
+
|
| 622 |
+
else:
|
| 623 |
+
st.button("⏭️ Sdat", disabled=True, use_container_width=True)
|
| 624 |
+
st.caption("🎉 Temmed tɣuri!")
|
| 625 |
+
|
| 626 |
+
# HTML with real-time text highlighting
|
| 627 |
+
timing_json = json.dumps(current_data['timing_info'])
|
| 628 |
+
full_text = current_data['paragraph_text'].replace('`', '\\`').replace('${', '\\${')
|
| 629 |
+
audio_b64 = base64.b64encode(current_data['audio_bytes']).decode()
|
| 630 |
+
playback_speed = st.session_state.playback_speed
|
| 631 |
+
|
| 632 |
+
complete_html = f"""
|
| 633 |
+
<!DOCTYPE html>
|
| 634 |
+
<html>
|
| 635 |
+
<head>
|
| 636 |
+
<style>
|
| 637 |
+
.phrase-highlight {{
|
| 638 |
+
background: linear-gradient(120deg, #ffeb3b, #ffd54f);
|
| 639 |
+
padding: 4px 8px;
|
| 640 |
+
margin: 2px 1px;
|
| 641 |
+
border-radius: 6px;
|
| 642 |
+
box-shadow: 0 2px 4px rgba(255, 235, 59, 0.3);
|
| 643 |
+
font-weight: bold;
|
| 644 |
+
transition: all 0.3s ease;
|
| 645 |
+
}}
|
| 646 |
+
.phrase-text {{
|
| 647 |
+
padding: 2px 4px;
|
| 648 |
+
margin: 1px 0px;
|
| 649 |
+
border-radius: 4px;
|
| 650 |
+
transition: all 0.3s ease;
|
| 651 |
+
}}
|
| 652 |
+
.reading-container {{
|
| 653 |
+
max-height: 40vh;
|
| 654 |
+
overflow-y: auto;
|
| 655 |
+
padding: 20px;
|
| 656 |
+
border: 2px solid #e1e1e1;
|
| 657 |
+
border-radius: 12px;
|
| 658 |
+
background: #fafafa;
|
| 659 |
+
margin: 5px 0;
|
| 660 |
+
line-height: 1.8;
|
| 661 |
+
font-size: 16px;
|
| 662 |
+
font-family: Arial, sans-serif;
|
| 663 |
+
}}
|
| 664 |
+
.status-bar {{
|
| 665 |
+
background: #e3f2fd;
|
| 666 |
+
padding: 8px;
|
| 667 |
+
border-radius: 8px;
|
| 668 |
+
margin: 8px 0;
|
| 669 |
+
font-size: 14px;
|
| 670 |
+
}}
|
| 671 |
+
.audio-player {{
|
| 672 |
+
width: 100%;
|
| 673 |
+
margin: 8px 0;
|
| 674 |
+
text-align: center;
|
| 675 |
+
}}
|
| 676 |
+
.audio-controls {{
|
| 677 |
+
display: flex;
|
| 678 |
+
justify-content: center;
|
| 679 |
+
align-items: center;
|
| 680 |
+
gap: 10px;
|
| 681 |
+
margin-bottom: 10px;
|
| 682 |
+
}}
|
| 683 |
+
</style>
|
| 684 |
+
</head>
|
| 685 |
+
<body>
|
| 686 |
+
<div class="audio-player">
|
| 687 |
+
<div class="audio-controls">
|
| 688 |
+
<audio id="mainAudio" controls playbackRate={playback_speed} style="min-width: 250px;">
|
| 689 |
+
<source src="data:audio/wav;base64,{audio_b64}" type="audio/wav">
|
| 690 |
+
</audio>
|
| 691 |
+
</div>
|
| 692 |
+
<div style="margin-top: 5px; font-size: 14px; color: #666;">
|
| 693 |
+
🎵 Seɣbel, tekki ɣef ▶️ afella i tɣuri s urured: <strong>{playback_speed}x</strong>
|
| 694 |
+
{"" if playback_speed == 1.0 else " - " + ("ddac ddac" if playback_speed < 1.0 else "aɣiwel")}
|
| 695 |
+
</div>
|
| 696 |
+
</div>
|
| 697 |
+
|
| 698 |
+
|
| 699 |
+
|
| 700 |
+
<div class="reading-container" id="readingContainer">
|
| 701 |
+
{full_text}
|
| 702 |
+
</div>
|
| 703 |
+
|
| 704 |
+
<script>
|
| 705 |
+
const timingInfo = {timing_json};
|
| 706 |
+
const fullText = `{full_text}`;
|
| 707 |
+
const playbackSpeed = {playback_speed};
|
| 708 |
+
let currentHighlightIndex = -1;
|
| 709 |
+
let phraseElements = [];
|
| 710 |
+
|
| 711 |
+
function initializeHighlighting() {{
|
| 712 |
+
// Create phrase elements by wrapping text
|
| 713 |
+
let container = document.getElementById('readingContainer');
|
| 714 |
+
let workingText = fullText;
|
| 715 |
+
|
| 716 |
+
timingInfo.forEach((phrase, index) => {{
|
| 717 |
+
const cleanPhrase = phrase.text.trim();
|
| 718 |
+
if (workingText.includes(cleanPhrase)) {{
|
| 719 |
+
const spanId = 'phrase_' + index;
|
| 720 |
+
const spanHtml = '<span id="' + spanId + '" class="phrase-text">' + cleanPhrase + '</span>';
|
| 721 |
+
workingText = workingText.replace(cleanPhrase, spanHtml);
|
| 722 |
+
}}
|
| 723 |
+
}});
|
| 724 |
+
|
| 725 |
+
container.innerHTML = workingText;
|
| 726 |
+
|
| 727 |
+
// Store references to all phrase elements
|
| 728 |
+
timingInfo.forEach((phrase, index) => {{
|
| 729 |
+
const element = document.getElementById('phrase_' + index);
|
| 730 |
+
if (element) {{
|
| 731 |
+
phraseElements.push(element);
|
| 732 |
+
}}
|
| 733 |
+
}});
|
| 734 |
+
|
| 735 |
+
updateDebugInfo("Agzam amegzu yemmed! " + phraseElements.length + " n tefyar s " + playbackSpeed + "x arured");
|
| 736 |
+
}}
|
| 737 |
+
|
| 738 |
+
function updateDebugInfo(message) {{
|
| 739 |
+
const debugEl = document.getElementById('debugInfo');
|
| 740 |
+
if (debugEl) debugEl.textContent = message;
|
| 741 |
+
}}
|
| 742 |
+
|
| 743 |
+
function highlightCurrentPhrase(currentTime) {{
|
| 744 |
+
let newIndex = -1;
|
| 745 |
+
for (let i = 0; i < timingInfo.length; i++) {{
|
| 746 |
+
if (currentTime >= timingInfo[i].start && currentTime < timingInfo[i].end) {{
|
| 747 |
+
newIndex = i;
|
| 748 |
+
break;
|
| 749 |
+
}}
|
| 750 |
+
}}
|
| 751 |
+
|
| 752 |
+
if (newIndex !== currentHighlightIndex) {{
|
| 753 |
+
currentHighlightIndex = newIndex;
|
| 754 |
+
updateHighlightDisplay();
|
| 755 |
+
if (newIndex >= 0) {{
|
| 756 |
+
updateDebugInfo("Akud: " + currentTime.toFixed(2) + "s | Tafyirt: " + (newIndex + 1) + "/" + timingInfo.length + " | Arured: " + playbackSpeed + "x");
|
| 757 |
+
}}
|
| 758 |
+
}}
|
| 759 |
+
}}
|
| 760 |
+
|
| 761 |
+
function updateHighlightDisplay() {{
|
| 762 |
+
const currentPhraseSpan = document.getElementById('currentPhrase');
|
| 763 |
+
|
| 764 |
+
// Remove all highlights
|
| 765 |
+
phraseElements.forEach(element => {{
|
| 766 |
+
element.className = 'phrase-text';
|
| 767 |
+
}});
|
| 768 |
+
|
| 769 |
+
// Highlight current phrase
|
| 770 |
+
if (currentHighlightIndex >= 0 && currentHighlightIndex < phraseElements.length) {{
|
| 771 |
+
const element = phraseElements[currentHighlightIndex];
|
| 772 |
+
if (element) {{
|
| 773 |
+
element.className = 'phrase-highlight';
|
| 774 |
+
element.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
|
| 775 |
+
}}
|
| 776 |
+
|
| 777 |
+
if (currentPhraseSpan && timingInfo[currentHighlightIndex]) {{
|
| 778 |
+
currentPhraseSpan.textContent = timingInfo[currentHighlightIndex].text.substring(0, 100) +
|
| 779 |
+
(timingInfo[currentHighlightIndex].text.length > 100 ? '...' : '');
|
| 780 |
+
}}
|
| 781 |
+
}} else {{
|
| 782 |
+
if (currentPhraseSpan) {{
|
| 783 |
+
currentPhraseSpan.textContent = 'Araǧu amesli...';
|
| 784 |
+
}}
|
| 785 |
+
}}
|
| 786 |
+
}}
|
| 787 |
+
|
| 788 |
+
// Set up audio event listeners
|
| 789 |
+
function setupAudioListeners() {{
|
| 790 |
+
const audioElement = document.getElementById('mainAudio');
|
| 791 |
+
if (audioElement) {{
|
| 792 |
+
// Set playback rate
|
| 793 |
+
audioElement.playbackRate = playbackSpeed;
|
| 794 |
+
|
| 795 |
+
audioElement.addEventListener('timeupdate', function() {{
|
| 796 |
+
highlightCurrentPhrase(this.currentTime);
|
| 797 |
+
}});
|
| 798 |
+
|
| 799 |
+
audioElement.addEventListener('play', function() {{
|
| 800 |
+
updateDebugInfo("🎵 Taɣuri... aseḍfeṛ n tira iteddu s " + playbackSpeed + "x arured");
|
| 801 |
+
}});
|
| 802 |
+
|
| 803 |
+
audioElement.addEventListener('ended', function() {{
|
| 804 |
+
currentHighlightIndex = -1;
|
| 805 |
+
updateHighlightDisplay();
|
| 806 |
+
updateDebugInfo("✅ Taɣuri tekfa s " + playbackSpeed + "x arured");
|
| 807 |
+
}});
|
| 808 |
+
|
| 809 |
+
}} else {{
|
| 810 |
+
setTimeout(setupAudioListeners, 100);
|
| 811 |
+
}}
|
| 812 |
+
}}
|
| 813 |
+
|
| 814 |
+
// Initialize everything when page loads
|
| 815 |
+
document.addEventListener('DOMContentLoaded', function() {{
|
| 816 |
+
initializeHighlighting();
|
| 817 |
+
setupAudioListeners();
|
| 818 |
+
}});
|
| 819 |
+
</script>
|
| 820 |
+
</body>
|
| 821 |
+
</html>
|
| 822 |
+
"""
|
| 823 |
+
|
| 824 |
+
# Display the complete reading content
|
| 825 |
+
st.components.v1.html(complete_html, height=300, scrolling=True)
|
| 826 |
+
|
| 827 |
+
# Place the remaining controls BELOW the reading content
|
| 828 |
+
st.markdown('<div class="controls-section">', unsafe_allow_html=True)
|
| 829 |
+
|
| 830 |
+
# Show paragraph info
|
| 831 |
+
word_count = current_data.get('word_count', len(current_data['paragraph_text'].split()))
|
| 832 |
+
st.markdown(f"**Taseddaṛt {current_index + 1}/{st.session_state.total_paragraphs}**")
|
| 833 |
+
st.caption(f"📊 {word_count} n wawalen | ⏱️ {current_data['audio_duration']:.1f}s | 🎵 {st.session_state.playback_speed}x arured")
|
| 834 |
+
|
| 835 |
+
# Display progress
|
| 836 |
+
ready_count = len(st.session_state.paragraphs_data)
|
| 837 |
+
total_count = st.session_state.total_paragraphs
|
| 838 |
+
progress = ready_count / total_count if total_count > 0 else 0
|
| 839 |
+
st.progress(progress)
|
| 840 |
+
st.caption(f"📊 Asekker: {ready_count}/{total_count} n tseddarin mmedent ({progress:.0%})")
|
| 841 |
+
|
| 842 |
+
# Download button for current paragraph
|
| 843 |
+
audio_bytes = current_data['audio_bytes']
|
| 844 |
+
st.download_button(
|
| 845 |
+
"📥 Zdem ameslaw n tseddaṛt-a",
|
| 846 |
+
audio_bytes,
|
| 847 |
+
f"Taseddaṛt_{current_index + 1}.wav",
|
| 848 |
+
"audio/wav",
|
| 849 |
+
use_container_width=True
|
| 850 |
+
)
|
| 851 |
+
|
| 852 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 853 |
+
|
| 854 |
+
elif uploaded_file and not st.session_state.processed:
|
| 855 |
+
if st.button("🔄 Selket Aḍris", type="primary"):
|
| 856 |
+
# Process document when button is clicked
|
| 857 |
+
with st.spinner("Asekker n uḍris s ugzam amegzu n tseddaṛin..."):
|
| 858 |
+
temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
|
| 859 |
+
with open(temp_path, "wb") as f:
|
| 860 |
+
f.write(uploaded_file.getvalue())
|
| 861 |
+
|
| 862 |
+
text, error = read_document(temp_path)
|
| 863 |
+
if error:
|
| 864 |
+
st.error(error)
|
| 865 |
+
else:
|
| 866 |
+
cleaned_text = clean_text(text)
|
| 867 |
+
st.session_state.full_document_text = cleaned_text
|
| 868 |
+
|
| 869 |
+
# Use smart splitting strategy
|
| 870 |
+
paragraphs = smart_split_paragraphs(
|
| 871 |
+
cleaned_text,
|
| 872 |
+
initial_paragraphs=initial_paragraphs,
|
| 873 |
+
initial_word_target=initial_word_target,
|
| 874 |
+
normal_word_target=normal_word_target
|
| 875 |
+
)
|
| 876 |
+
|
| 877 |
+
if not paragraphs:
|
| 878 |
+
st.error("Ulac agbur i tɣuri.")
|
| 879 |
+
return
|
| 880 |
+
|
| 881 |
+
# Initialize processing state
|
| 882 |
+
st.session_state.total_paragraphs = len(paragraphs)
|
| 883 |
+
st.session_state.current_paragraph_index = 0
|
| 884 |
+
st.session_state.paragraphs_data = {}
|
| 885 |
+
st.session_state.paragraphs_list = paragraphs
|
| 886 |
+
st.session_state.processed = True
|
| 887 |
+
|
| 888 |
+
# Generate first paragraph immediately in main thread
|
| 889 |
+
first_paragraph = paragraphs[0]
|
| 890 |
+
audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(first_paragraph)
|
| 891 |
+
|
| 892 |
+
st.session_state.paragraphs_data[0] = {
|
| 893 |
+
'paragraph_text': first_paragraph,
|
| 894 |
+
'audio_data': audio_data,
|
| 895 |
+
'timing_info': timing_info,
|
| 896 |
+
'audio_duration': len(audio_data) / sampling_rate,
|
| 897 |
+
'audio_bytes': get_audio_bytes(audio_data),
|
| 898 |
+
'word_count': len(first_paragraph.split())
|
| 899 |
+
}
|
| 900 |
+
st.session_state.audio_ready = True
|
| 901 |
+
|
| 902 |
+
# Start background worker for ALL remaining paragraphs
|
| 903 |
+
if len(paragraphs) > 1:
|
| 904 |
+
remaining_paragraphs = paragraphs[1:]
|
| 905 |
+
|
| 906 |
+
# Use queue-based background worker
|
| 907 |
+
thread = threading.Thread(
|
| 908 |
+
target=background_audio_worker,
|
| 909 |
+
args=(remaining_paragraphs, st.session_state.audio_queue, 1)
|
| 910 |
+
)
|
| 911 |
+
thread.daemon = True
|
| 912 |
+
thread.start()
|
| 913 |
+
|
| 914 |
+
st.session_state.background_worker_started = True
|
| 915 |
+
|
| 916 |
+
st.rerun()
|
| 917 |
+
else:
|
| 918 |
+
st.info("🔄 Seɣbel, tekki ɣef 'Selket Aḍris' iwakken ad yettwasleḍ u ad yeddu seg tira ɣer umeslaw")
|
| 919 |
+
else:
|
| 920 |
+
st.info("👆 Sali-d afaylu iwakken ad tedduḍ ɣer tɣuri")
|
| 921 |
+
|
| 922 |
+
if __name__ == "__main__":
|
| 923 |
main()
|