AitBAD commited on
Commit
87cb055
·
verified ·
1 Parent(s): a9b5922

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +922 -922
app.py CHANGED
@@ -1,923 +1,923 @@
1
- # Specific conda environment : Kabyle_streamlit
2
-
3
- # Streamlit TTS Reader - Taqbaylit TTS Sɣer Adlis
4
- # Bouaziz Ait Driss October 2025
5
-
6
- import streamlit as st
7
- import fitz # PyMuPDF
8
- import re
9
- import numpy as np
10
- import torch
11
- from scipy.io.wavfile import write as wav_write
12
- import tempfile
13
- import os
14
- import base64
15
- import json
16
- import io
17
- import threading
18
- import queue
19
- import time
20
- import pdfplumber
21
-
22
- # Configure Streamlit page
23
- st.set_page_config(
24
- page_title="Kabyle TTS Document Reader",
25
- page_icon="🎙️",
26
- layout="wide",
27
- initial_sidebar_state="expanded",
28
- )
29
-
30
- # Try to import transformers
31
- try:
32
- from transformers import VitsModel, AutoTokenizer
33
- TRANSFORMERS_AVAILABLE = True
34
- except ImportError:
35
- try:
36
- from transformers import AutoModel, AutoTokenizer
37
- TRANSFORMERS_AVAILABLE = True
38
- VitsModel = AutoModel
39
- except ImportError:
40
- TRANSFORMERS_AVAILABLE = False
41
-
42
- # --- Model Loading ---
43
- @st.cache_resource
44
- def load_model():
45
- if not TRANSFORMERS_AVAILABLE:
46
- return None, None, "cpu"
47
- try:
48
- model = VitsModel.from_pretrained("facebook/mms-tts-kab")
49
- tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kab")
50
- device = "cuda" if torch.cuda.is_available() else "cpu"
51
- model = model.to(device)
52
- return model, tokenizer, device
53
- except Exception as e:
54
- st.error(f"Error loading model: {e}")
55
- return None, None, "cpu"
56
-
57
- model, tokenizer, device = load_model()
58
-
59
- # --- Text Processing ---
60
- def clean_text(text):
61
- if not text:
62
- return text
63
- text = re.sub(r' +', ' ', text)
64
- text = re.sub(r'\n\s*\n', '\n\n', text)
65
- return text.strip()
66
-
67
- def smart_split_paragraphs(text, initial_paragraphs=5, initial_word_target=50, normal_word_target=200):
68
- """
69
- Smart paragraph splitting strategy:
70
- - First 'initial_paragraphs' paragraphs: ~initial_word_target words each (for quick startup)
71
- - Remaining paragraphs: ~normal_word_target words each (for better timing)
72
- """
73
- if not text:
74
- return []
75
-
76
- # First, split by major paragraph breaks
77
- raw_paragraphs = re.split(r'\n\s*\n', text)
78
- raw_paragraphs = [p.strip() for p in raw_paragraphs if p.strip()]
79
-
80
- if not raw_paragraphs:
81
- return []
82
-
83
- final_paragraphs = []
84
- current_sentences = []
85
-
86
- # Split each raw paragraph into sentences
87
- all_sentences = []
88
- for paragraph in raw_paragraphs:
89
- # Split on sentence endings: . ! ? : ; followed by space
90
- sentences = re.split(r'(?<=[.!?:;])\s+', paragraph)
91
- sentences = [s.strip() for s in sentences if s.strip()]
92
- all_sentences.extend(sentences)
93
-
94
- if not all_sentences:
95
- return []
96
-
97
- # Build initial quick-start paragraphs (shorter)
98
- word_count = 0
99
- for sentence in all_sentences:
100
- sentence_word_count = len(sentence.split())
101
-
102
- # For first few paragraphs, use smaller target
103
- if len(final_paragraphs) < initial_paragraphs:
104
- target_word_count = initial_word_target
105
- else:
106
- target_word_count = normal_word_target
107
-
108
- # If adding this sentence would exceed target and we have some content, start new paragraph
109
- if current_sentences and word_count + sentence_word_count > target_word_count:
110
- # Join current sentences to form a paragraph
111
- paragraph_text = ' '.join(current_sentences)
112
- final_paragraphs.append(paragraph_text)
113
- current_sentences = [sentence]
114
- word_count = sentence_word_count
115
- else:
116
- current_sentences.append(sentence)
117
- word_count += sentence_word_count
118
-
119
- # Add the last paragraph if we have remaining sentences
120
- if current_sentences:
121
- paragraph_text = ' '.join(current_sentences)
122
- final_paragraphs.append(paragraph_text)
123
-
124
- return final_paragraphs
125
-
126
- def split_paragraph_into_phrases(paragraph, max_phrase_length=150):
127
- if not paragraph:
128
- return []
129
-
130
- phrases = []
131
- current_phrase = ""
132
- sentences = re.split(r'(?<=[.!?])\s+', paragraph)
133
-
134
- for sentence in sentences:
135
- if len(current_phrase) + len(sentence) > max_phrase_length and current_phrase:
136
- phrases.append(current_phrase.strip())
137
- current_phrase = sentence
138
- else:
139
- current_phrase = current_phrase + " " + sentence if current_phrase else sentence
140
-
141
- if current_phrase.strip():
142
- phrases.append(current_phrase.strip())
143
-
144
- return phrases
145
-
146
- def generate_audio_with_precise_timing(paragraph):
147
- """Generate audio with precise phrase-level timing using per-phrase generation"""
148
-
149
- if model is None or tokenizer is None:
150
- return create_dummy_audio_for_paragraph(paragraph), 16000, []
151
-
152
- try:
153
- phrases = split_paragraph_into_phrases(paragraph)
154
- if not phrases:
155
- return create_dummy_audio_for_paragraph(paragraph), 16000, []
156
-
157
- all_audio = []
158
- phrase_durations = []
159
- sampling_rate = 16000
160
-
161
- # Generate audio for each phrase separately to get exact timing
162
- for i, phrase in enumerate(phrases):
163
- if not phrase.strip():
164
- continue
165
-
166
- # Generate audio for this specific phrase
167
- inputs = tokenizer(phrase, return_tensors="pt").to(device)
168
- with torch.no_grad():
169
- output = model(**inputs)
170
-
171
- # Extract audio data
172
- if hasattr(output, 'waveform'):
173
- audio_chunk = output.waveform.cpu().numpy().squeeze()
174
- else:
175
- audio_chunk = output[0].cpu().numpy().squeeze() if len(output) > 0 else None
176
-
177
- if audio_chunk is not None:
178
- # Calculate exact duration for this phrase
179
- phrase_duration = len(audio_chunk) / sampling_rate
180
- phrase_durations.append(phrase_duration)
181
-
182
- # Normalize and store
183
- max_val = np.max(np.abs(audio_chunk))
184
- if max_val > 0:
185
- audio_chunk = audio_chunk / max_val * 0.8
186
- all_audio.append(audio_chunk)
187
- else:
188
- # Fallback: estimate duration if audio generation failed
189
- word_count = len(phrase.split())
190
- estimated_duration = word_count * 0.4
191
- phrase_durations.append(estimated_duration)
192
-
193
- if all_audio:
194
- # Concatenate all phrase audio
195
- full_audio = np.concatenate(all_audio)
196
- total_duration = len(full_audio) / sampling_rate
197
-
198
- # Build precise timing info using actual phrase durations
199
- timing_info = []
200
- current_time = 0.0
201
-
202
- for i, (phrase, duration) in enumerate(zip(phrases, phrase_durations)):
203
- timing_info.append({
204
- 'text': phrase,
205
- 'start': current_time,
206
- 'end': current_time + duration,
207
- 'duration': duration
208
- })
209
- current_time += duration
210
-
211
- return full_audio, sampling_rate, timing_info
212
- else:
213
- return create_dummy_audio_for_paragraph(paragraph), 16000, []
214
-
215
- except Exception as e:
216
- return create_dummy_audio_for_paragraph(paragraph), 16000, []
217
-
218
- def create_dummy_audio_for_paragraph(paragraph):
219
- word_count = len(paragraph.split())
220
- total_duration = word_count * 0.4
221
- sampling_rate = 16000
222
- t = np.linspace(0, total_duration, int(sampling_rate * total_duration))
223
- audio = 0.3 * np.sin(2 * np.pi * 220 * t)
224
- return audio
225
-
226
- # Add a fallback to plumber for reading diacritic letters
227
- def read_document(file_path):
228
- text = ""
229
- try:
230
- if file_path.lower().endswith('.pdf'):
231
- # FIRST TRY: PyMuPDF (faster for digital PDFs)
232
- try:
233
- pdf_document = fitz.open(file_path)
234
- for page_num in range(len(pdf_document)):
235
- page = pdf_document[page_num]
236
- text += page.get_text() + "\n"
237
- pdf_document.close()
238
-
239
- # Check if we got meaningful text with diacritics
240
- if text.strip() and any(char in text for char in ['é', 'è', 'à', 'ù', 'ï', 'ö', 'α', 'β', 'γ']):
241
- return text, []
242
-
243
- except Exception as e:
244
- text = "" # Reset text if PyMuPDF fails
245
-
246
- # FALLBACK: pdfplumber (better for scanned/diacritic PDFs)
247
- try:
248
- import pdfplumber
249
- with pdfplumber.open(file_path) as pdf:
250
- text = ""
251
- for page in pdf.pages:
252
- page_text = page.extract_text() or ""
253
- text += page_text + "\n"
254
- except ImportError:
255
- return "pdfplumber not available", []
256
- except Exception as e:
257
- return f"Both PDF methods failed: {e}", []
258
-
259
- elif file_path.lower().endswith('.txt'):
260
- with open(file_path, 'r', encoding='utf-8') as f:
261
- text = f.read()
262
- else:
263
- return "Unsupported file format", []
264
-
265
- except Exception as e:
266
- return f"Error reading document: {e}", []
267
-
268
- return text, []
269
-
270
- def normalize_audio(audio_data):
271
- if audio_data.dtype != np.float32:
272
- audio_data = audio_data.astype(np.float32)
273
- max_val = np.max(np.abs(audio_data))
274
- if max_val > 0:
275
- audio_data = audio_data / max_val * 0.9
276
- return audio_data
277
-
278
- def get_audio_bytes(audio_data):
279
- audio_bytes = io.BytesIO()
280
- audio_data = normalize_audio(audio_data)
281
- audio_int16 = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
282
- wav_write(audio_bytes, 16000, audio_int16)
283
- audio_bytes.seek(0)
284
- return audio_bytes.getvalue()
285
-
286
- # --- Background Audio Generation ---
287
- def background_audio_worker(paragraphs, audio_queue, start_index=0):
288
- """Background worker to generate audio for paragraphs - NO SESSION STATE ACCESS"""
289
- try:
290
- for i, paragraph in enumerate(paragraphs):
291
- paragraph_index = start_index + i
292
- word_count = len(paragraph.split())
293
-
294
- # Generate audio with precise timing
295
- audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(paragraph)
296
-
297
- # Send to main thread via queue ONLY
298
- audio_queue.put({
299
- 'paragraph_index': paragraph_index,
300
- 'paragraph_text': paragraph,
301
- 'audio_data': audio_data,
302
- 'timing_info': timing_info,
303
- 'audio_duration': len(audio_data) / sampling_rate,
304
- 'audio_bytes': get_audio_bytes(audio_data),
305
- 'word_count': word_count
306
- })
307
-
308
- except Exception as e:
309
- pass
310
-
311
- # --- Queue Processing ---
312
- def process_audio_queue(audio_queue):
313
- """Process all available items in the audio queue - called from main thread only"""
314
- processed_count = 0
315
- try:
316
- while True:
317
- # Non-blocking check
318
- audio_data = audio_queue.get_nowait()
319
-
320
- # SAFE: Main thread updating session state
321
- st.session_state.paragraphs_data[audio_data['paragraph_index']] = audio_data
322
- processed_count += 1
323
-
324
- except queue.Empty:
325
- pass
326
-
327
- return processed_count
328
-
329
- # Initialize session state
330
- if 'processed' not in st.session_state:
331
- st.session_state.processed = False
332
- if 'current_paragraph_index' not in st.session_state:
333
- st.session_state.current_paragraph_index = 0
334
- if 'total_paragraphs' not in st.session_state:
335
- st.session_state.total_paragraphs = 0
336
- if 'paragraphs_data' not in st.session_state:
337
- st.session_state.paragraphs_data = {}
338
- if 'audio_ready' not in st.session_state:
339
- st.session_state.audio_ready = False
340
- if 'background_worker_started' not in st.session_state:
341
- st.session_state.background_worker_started = False
342
- if 'reading_status' not in st.session_state:
343
- st.session_state.reading_status = "Ready to start reading"
344
- if 'current_document_id' not in st.session_state:
345
- st.session_state.current_document_id = None
346
- if 'audio_queue' not in st.session_state:
347
- st.session_state.audio_queue = queue.Queue()
348
- if 'playback_speed' not in st.session_state:
349
- st.session_state.playback_speed = 1.0 # Default normal speed
350
- if 'full_document_text' not in st.session_state:
351
- st.session_state.full_document_text = ""
352
- if 'smart_splitting_expanded' not in st.session_state:
353
- st.session_state.smart_splitting_expanded = False
354
-
355
- # --- Streamlit UI ---
356
- def main():
357
- # Custom CSS for better styling - FIXED FONT HARMONIZATION
358
- st.markdown("""
359
- <style>
360
- .main-title {
361
- font-size: 1.5rem !important;
362
- font-weight: bold !important;
363
- margin-bottom: -1rem !important;
364
- margin-top: -1rem !important; /* MAXIMUM REDUCED top margin */
365
- color: #1f77b4;
366
- }
367
- .section-title {
368
- font-size: 1.3rem !important;
369
- font-weight: bold !important;
370
- margin-bottom: 0.3rem !important;
371
- margin-top: 0rem !important;
372
- color: #2e86ab;
373
- }
374
- .document-viewer {
375
- max-height: 70vh;
376
- overflow-y: auto;
377
- border: 1px solid #e1e1e1;
378
- border-radius: 8px;
379
- padding: 15px;
380
- background: #fafafa;
381
- font-family: Arial, sans-serif;
382
- line-height: 1.6;
383
- scrollbar-width: thin;
384
- scrollbar-color: #888 #f1f1f1;
385
- color: #333333; /* ADD THIS LINE - ensures dark text */
386
- }
387
- .document-viewer::-webkit-scrollbar {
388
- width: 8px;
389
- }
390
- .document-viewer::-webkit-scrollbar-track {
391
- background: #f1f1f1;
392
- border-radius: 4px;
393
- }
394
- .document-viewer::-webkit-scrollbar-thumb {
395
- background: #888;
396
- border-radius: 4px;
397
- }
398
- .document-viewer::-webkit-scrollbar-thumb:hover {
399
- background: #555;
400
- }
401
- .current-paragraph-highlight {
402
- background: linear-gradient(120deg, #e3f2fd, #bbdefb);
403
- padding: 8px 12px;
404
- margin: 8px 0;
405
- border-left: 4px solid #2196f3;
406
- border-radius: 4px;
407
- box-shadow: 0 2px 4px rgba(33, 150, 243, 0.2);
408
- }
409
- .reading-content {
410
- margin-bottom: 10px;
411
- }
412
- .controls-section {
413
- margin-top: 15px;
414
- }
415
- /* Reduce sidebar header spacing */
416
- .sidebar .sidebar-content {
417
- padding-top: 1rem;
418
- }
419
- /* FIXED: Make expander headers consistent with sidebar titles */
420
- .streamlit-expanderHeader {
421
- font-size: 1.1rem !important;
422
- font-weight: 600 !important;
423
- color: inherit !important;
424
- }
425
- /* FIXED: Ensure consistent styling for smart splitting expander */
426
- div[data-testid="stExpander"] details summary p {
427
- font-size: 1.1rem !important;
428
- font-weight: 600 !important;
429
- }
430
- /* Style for the smart splitting section specifically */
431
- .smart-splitting-header {
432
- font-size: 1.1rem !important;
433
- font-weight: 600 !important;
434
- }
435
- </style>
436
- """, unsafe_allow_html=True)
437
-
438
- # Main title with LARGER font and MAXIMUM REDUCED TOP SPACE
439
- st.markdown('<div class="main-title">🎙️Taqbaylit TTS Sɣer Adlis</div>', unsafe_allow_html=True)
440
-
441
- if model is None:
442
- st.warning("⚠️ Using test audio (TTS model not available)")
443
-
444
- # Process audio queue on every run
445
- if st.session_state.get('background_worker_started', False):
446
- processed_count = process_audio_queue(st.session_state.audio_queue)
447
- if processed_count > 0:
448
- st.success(f"📥 Loaded {processed_count} paragraph(s)")
449
-
450
- # Sidebar with KABYLE TRANSLATIONS
451
- with st.sidebar:
452
- # Document Settings with reduced spacing
453
- st.header("📁 Tawila n Tɣuri", anchor=False)
454
- uploaded_file = st.file_uploader("Sali-d Aḍris - jbed sers afaylu", type=['pdf', 'txt'],
455
- help="Ulac OCR ara yerren afaylu PDF n tugna ɣer txt deg teqaylit. Ma ur iwulem ara ɛiwed-as tamuɣli.")
456
-
457
- # Clear document button - MOVED UP immediately under file uploader
458
- if st.session_state.get('processed'):
459
- if st.button("🗑️ Sfeḍ Afaylu-a", type="secondary", use_container_width=True):
460
- # Reset all document-related session state
461
- st.session_state.processed = False
462
- st.session_state.current_paragraph_index = 0
463
- st.session_state.total_paragraphs = 0
464
- st.session_state.paragraphs_data = {}
465
- st.session_state.audio_ready = False
466
- st.session_state.background_worker_started = False
467
- st.session_state.reading_status = "Ready to start reading"
468
- st.session_state.current_document_id = None
469
- st.session_state.audio_queue = queue.Queue()
470
- st.session_state.full_document_text = ""
471
- st.rerun()
472
-
473
- # Playback Speed Control
474
- st.markdown("---")
475
- st.markdown("### 🎵 Arured n Tɣuri")
476
-
477
- # Define the speed options with labels
478
- speed_options = [0.5, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5]
479
- speed_labels = {
480
- 0.5: "0.5x (Ddac ddac ugar)",
481
- 0.8: "0.8x (Ddac ddac)",
482
- 0.9: "0.9x (Ddac ddac cwiṭ)",
483
- 1.0: "1.0x (Amagnu)",
484
- 1.1: "1.1x (Ɣiwel cwiṭ)",
485
- 1.2: "1.2x (Ɣiwel)",
486
- 1.3: "1.3x (Ɣiwel aṭas)",
487
- 1.5: "1.5x (Ɣiwel aṭas ugar)"
488
- }
489
-
490
- # Create a select slider for playback speed
491
- current_speed = st.select_slider(
492
- "Asenfel n urured n tɣuri",
493
- options=speed_options,
494
- value=st.session_state.playback_speed,
495
- format_func=lambda x: speed_labels[x],
496
- help="Senfel arured n tɣuri i yal taseddaṛt"
497
- )
498
-
499
- # Update session state if speed changed
500
- if current_speed != st.session_state.playback_speed:
501
- st.session_state.playback_speed = current_speed
502
- # st.success(f"🎵 Arured n tɣuri yettwasenfel ɣer {speed_labels[current_speed]}")
503
-
504
- # Show current speed effect
505
- speed_effect = {
506
- 0.5: "⏳ Ugar n ukud i tmesliwt",
507
- 0.8: "🐢 Fessus i uḍfaṛ",
508
- 0.9: "📝 S ttawil i usishel n tigzi",
509
- 1.0: "✅ Arured amagnu n tmeslayt",
510
- 1.1: "⚡ Taɣuri s cwiṭ n uɣiwel",
511
- 1.2: "🚀 Taɣuri s uɣiwel",
512
- 1.3: "🎯 Taɣuri s uɣiwel d tmellit",
513
- 1.5: "🔥 Taɣuri s uɣiwel yuzzlen - i yimazzayen"
514
- }
515
- st.caption(f"**Asemdu:** {speed_effect[current_speed]}")
516
-
517
- # Smart splitting configuration - FIXED FONT STYLING
518
- st.markdown("---")
519
-
520
- # Collapsible section for Smart Splitting with PROPER FONT HARMONIZATION
521
- with st.expander("🎯 Agzam Amegzu", expanded=st.session_state.smart_splitting_expanded):
522
- initial_paragraphs = st.slider("Tiseddaṛin deg tazwara", 3, 10, 5,
523
- help="Amḍan n tseddaṛin wezzilen deg tazwara n tɣuri")
524
- initial_word_target = st.slider("Awalen deg tseddaṛt n tazwara", 30, 100, 50,
525
- help="Amḍan n wawalen deg tseddaṛin n tazwara")
526
- normal_word_target = st.slider("Awalen deg tseddaṛt tamagnut", 50, 300, 100,
527
- help="Amḍan n wawalen deg tseddaṛin tineggura")
528
-
529
- # Main content
530
- # col_doc, col_reading = st.columns([2, 3]) # 40% document, 60% reading
531
- col_reading, col_doc = st.columns([3, 2]) # 60% reading, 40% document
532
-
533
- with col_doc:
534
- # Kabyle title for document overview with SMALLER font
535
- st.markdown('<div class="section-title">📄 Tamuɣli Ɣer Uḍris</div>', unsafe_allow_html=True)
536
-
537
- if st.session_state.get('full_document_text'):
538
- # Display the full document in a scrollable container
539
- document_html = "<div class='document-viewer'>"
540
- paragraphs = st.session_state.get('paragraphs_list', [])
541
- current_index = st.session_state.current_paragraph_index
542
-
543
- for i, paragraph in enumerate(paragraphs):
544
- if i == current_index:
545
- document_html += f"<div class='current-paragraph-highlight'><strong>📍 Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
546
- else:
547
- document_html += f"<div><strong>Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
548
- if i < len(paragraphs) - 1:
549
- document_html += "<hr style='margin: 10px 0;'>"
550
-
551
- document_html += "</div>"
552
- st.markdown(document_html, unsafe_allow_html=True)
553
-
554
- st.caption(f"📋 Aḍris: {len(paragraphs)} n tseddarin | 📍 Imir-a: Taseddaṛt {current_index + 1}")
555
- else:
556
- st.info("📁 Sali-d afaylu akken ad d-ikad da")
557
- if uploaded_file and st.session_state.get('processed'):
558
- st.warning("⚠️ Aḍris yettwasleḍ maca ulac-it. Ttxil-k, ɛiwed-as tikelt-nniḍen.")
559
-
560
- with col_reading:
561
- # Kabyle title for audio reading with SMALLER font
562
- st.markdown('<div class="section-title">🎵 Ɣer - Sel : Aḍris</div>', unsafe_allow_html=True)
563
-
564
- if st.session_state.get('processed') and st.session_state.get('audio_ready', False):
565
- current_index = st.session_state.current_paragraph_index
566
-
567
- # Check if current paragraph data exists
568
- if current_index not in st.session_state.paragraphs_data:
569
- st.warning(f"⏳ Taseddaṛt {current_index + 1} mazal d-tuli...")
570
- st.info("Ameslaw n tseddaṛt-a mazal yemmid. Rǧu cwiṭ.")
571
- # Auto-refresh to check for new data
572
- time.sleep(2)
573
- st.rerun()
574
- return
575
-
576
- current_data = st.session_state.paragraphs_data[current_index]
577
-
578
- # NAVIGATION LAYOUT: Back + Audio + Next in one row
579
- col_back, col_audio, col_next = st.columns([1, 2, 1])
580
-
581
- with col_back:
582
- # BACK BUTTON
583
- if current_index > 0:
584
- if st.button("⏮️ Deffir",
585
- use_container_width=True,
586
- type="secondary",
587
- key=f"back_btn_top_{current_index}"):
588
- prev_index = current_index - 1
589
- st.session_state.current_paragraph_index = prev_index
590
- st.session_state.reading_status = f"Taɣuri n tseddaṛt {prev_index + 1}/{st.session_state.total_paragraphs}"
591
- st.rerun()
592
- else:
593
- st.button("⏮️ Deffir", disabled=True, use_container_width=True)
594
-
595
- with col_audio:
596
- # Audio player status placeholder - will be in the HTML
597
- pass
598
-
599
- with col_next:
600
- # NEXT BUTTON
601
- if current_index < st.session_state.total_paragraphs - 1:
602
- next_index = current_index + 1
603
-
604
- if st.button("⏭️ Sdat",
605
- type="primary",
606
- use_container_width=True,
607
- key=f"next_btn_top_{current_index}"):
608
-
609
- st.session_state.current_paragraph_index = next_index
610
- st.session_state.reading_status = f"Taɣuri n tseddaṛt {next_index + 1}/{st.session_state.total_paragraphs}"
611
- st.rerun()
612
-
613
- # Status indicator (informational only)
614
- next_ready = next_index in st.session_state.paragraphs_data
615
- ready_count = len(st.session_state.paragraphs_data)
616
- total_count = st.session_state.total_paragraphs
617
- if not next_ready:
618
- st.caption(f"⏳ Yesselkat... ({ready_count}/{total_count})")
619
- else:
620
- st.caption(f"✅ Yemmed ({ready_count}/{total_count})")
621
-
622
- else:
623
- st.button("⏭️ Sdat", disabled=True, use_container_width=True)
624
- st.caption("🎉 Temmed tɣuri!")
625
-
626
- # HTML with real-time text highlighting
627
- timing_json = json.dumps(current_data['timing_info'])
628
- full_text = current_data['paragraph_text'].replace('`', '\\`').replace('${', '\\${')
629
- audio_b64 = base64.b64encode(current_data['audio_bytes']).decode()
630
- playback_speed = st.session_state.playback_speed
631
-
632
- complete_html = f"""
633
- <!DOCTYPE html>
634
- <html>
635
- <head>
636
- <style>
637
- .phrase-highlight {{
638
- background: linear-gradient(120deg, #ffeb3b, #ffd54f);
639
- padding: 4px 8px;
640
- margin: 2px 1px;
641
- border-radius: 6px;
642
- box-shadow: 0 2px 4px rgba(255, 235, 59, 0.3);
643
- font-weight: bold;
644
- transition: all 0.3s ease;
645
- }}
646
- .phrase-text {{
647
- padding: 2px 4px;
648
- margin: 1px 0px;
649
- border-radius: 4px;
650
- transition: all 0.3s ease;
651
- }}
652
- .reading-container {{
653
- max-height: 40vh;
654
- overflow-y: auto;
655
- padding: 20px;
656
- border: 2px solid #e1e1e1;
657
- border-radius: 12px;
658
- background: #fafafa;
659
- margin: 5px 0;
660
- line-height: 1.8;
661
- font-size: 16px;
662
- font-family: Arial, sans-serif;
663
- }}
664
- .status-bar {{
665
- background: #e3f2fd;
666
- padding: 8px;
667
- border-radius: 8px;
668
- margin: 8px 0;
669
- font-size: 14px;
670
- }}
671
- .audio-player {{
672
- width: 100%;
673
- margin: 8px 0;
674
- text-align: center;
675
- }}
676
- .audio-controls {{
677
- display: flex;
678
- justify-content: center;
679
- align-items: center;
680
- gap: 10px;
681
- margin-bottom: 10px;
682
- }}
683
- </style>
684
- </head>
685
- <body>
686
- <div class="audio-player">
687
- <div class="audio-controls">
688
- <audio id="mainAudio" controls playbackRate={playback_speed} style="min-width: 250px;">
689
- <source src="data:audio/wav;base64,{audio_b64}" type="audio/wav">
690
- </audio>
691
- </div>
692
- <div style="margin-top: 5px; font-size: 14px; color: #666;">
693
- 🎵 Seɣbel, tekki ɣef ▶️ afella i tɣuri s urured: <strong>{playback_speed}x</strong>
694
- {"" if playback_speed == 1.0 else " - " + ("ddac ddac" if playback_speed < 1.0 else "aɣiwel")}
695
- </div>
696
- </div>
697
-
698
-
699
-
700
- <div class="reading-container" id="readingContainer">
701
- {full_text}
702
- </div>
703
-
704
- <script>
705
- const timingInfo = {timing_json};
706
- const fullText = `{full_text}`;
707
- const playbackSpeed = {playback_speed};
708
- let currentHighlightIndex = -1;
709
- let phraseElements = [];
710
-
711
- function initializeHighlighting() {{
712
- // Create phrase elements by wrapping text
713
- let container = document.getElementById('readingContainer');
714
- let workingText = fullText;
715
-
716
- timingInfo.forEach((phrase, index) => {{
717
- const cleanPhrase = phrase.text.trim();
718
- if (workingText.includes(cleanPhrase)) {{
719
- const spanId = 'phrase_' + index;
720
- const spanHtml = '<span id="' + spanId + '" class="phrase-text">' + cleanPhrase + '</span>';
721
- workingText = workingText.replace(cleanPhrase, spanHtml);
722
- }}
723
- }});
724
-
725
- container.innerHTML = workingText;
726
-
727
- // Store references to all phrase elements
728
- timingInfo.forEach((phrase, index) => {{
729
- const element = document.getElementById('phrase_' + index);
730
- if (element) {{
731
- phraseElements.push(element);
732
- }}
733
- }});
734
-
735
- updateDebugInfo("Agzam amegzu yemmed! " + phraseElements.length + " n tefyar s " + playbackSpeed + "x arured");
736
- }}
737
-
738
- function updateDebugInfo(message) {{
739
- const debugEl = document.getElementById('debugInfo');
740
- if (debugEl) debugEl.textContent = message;
741
- }}
742
-
743
- function highlightCurrentPhrase(currentTime) {{
744
- let newIndex = -1;
745
- for (let i = 0; i < timingInfo.length; i++) {{
746
- if (currentTime >= timingInfo[i].start && currentTime < timingInfo[i].end) {{
747
- newIndex = i;
748
- break;
749
- }}
750
- }}
751
-
752
- if (newIndex !== currentHighlightIndex) {{
753
- currentHighlightIndex = newIndex;
754
- updateHighlightDisplay();
755
- if (newIndex >= 0) {{
756
- updateDebugInfo("Akud: " + currentTime.toFixed(2) + "s | Tafyirt: " + (newIndex + 1) + "/" + timingInfo.length + " | Arured: " + playbackSpeed + "x");
757
- }}
758
- }}
759
- }}
760
-
761
- function updateHighlightDisplay() {{
762
- const currentPhraseSpan = document.getElementById('currentPhrase');
763
-
764
- // Remove all highlights
765
- phraseElements.forEach(element => {{
766
- element.className = 'phrase-text';
767
- }});
768
-
769
- // Highlight current phrase
770
- if (currentHighlightIndex >= 0 && currentHighlightIndex < phraseElements.length) {{
771
- const element = phraseElements[currentHighlightIndex];
772
- if (element) {{
773
- element.className = 'phrase-highlight';
774
- element.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
775
- }}
776
-
777
- if (currentPhraseSpan && timingInfo[currentHighlightIndex]) {{
778
- currentPhraseSpan.textContent = timingInfo[currentHighlightIndex].text.substring(0, 100) +
779
- (timingInfo[currentHighlightIndex].text.length > 100 ? '...' : '');
780
- }}
781
- }} else {{
782
- if (currentPhraseSpan) {{
783
- currentPhraseSpan.textContent = 'Araǧu amesli...';
784
- }}
785
- }}
786
- }}
787
-
788
- // Set up audio event listeners
789
- function setupAudioListeners() {{
790
- const audioElement = document.getElementById('mainAudio');
791
- if (audioElement) {{
792
- // Set playback rate
793
- audioElement.playbackRate = playbackSpeed;
794
-
795
- audioElement.addEventListener('timeupdate', function() {{
796
- highlightCurrentPhrase(this.currentTime);
797
- }});
798
-
799
- audioElement.addEventListener('play', function() {{
800
- updateDebugInfo("🎵 Taɣuri... aseḍfeṛ n tira iteddu s " + playbackSpeed + "x arured");
801
- }});
802
-
803
- audioElement.addEventListener('ended', function() {{
804
- currentHighlightIndex = -1;
805
- updateHighlightDisplay();
806
- updateDebugInfo("✅ Taɣuri tekfa s " + playbackSpeed + "x arured");
807
- }});
808
-
809
- }} else {{
810
- setTimeout(setupAudioListeners, 100);
811
- }}
812
- }}
813
-
814
- // Initialize everything when page loads
815
- document.addEventListener('DOMContentLoaded', function() {{
816
- initializeHighlighting();
817
- setupAudioListeners();
818
- }});
819
- </script>
820
- </body>
821
- </html>
822
- """
823
-
824
- # Display the complete reading content
825
- st.components.v1.html(complete_html, height=300, scrolling=True)
826
-
827
- # Place the remaining controls BELOW the reading content
828
- st.markdown('<div class="controls-section">', unsafe_allow_html=True)
829
-
830
- # Show paragraph info
831
- word_count = current_data.get('word_count', len(current_data['paragraph_text'].split()))
832
- st.markdown(f"**Taseddaṛt {current_index + 1}/{st.session_state.total_paragraphs}**")
833
- st.caption(f"📊 {word_count} n wawalen | ⏱️ {current_data['audio_duration']:.1f}s | 🎵 {st.session_state.playback_speed}x arured")
834
-
835
- # Display progress
836
- ready_count = len(st.session_state.paragraphs_data)
837
- total_count = st.session_state.total_paragraphs
838
- progress = ready_count / total_count if total_count > 0 else 0
839
- st.progress(progress)
840
- st.caption(f"📊 Asekker: {ready_count}/{total_count} n tseddarin mmedent ({progress:.0%})")
841
-
842
- # Download button for current paragraph
843
- audio_bytes = current_data['audio_bytes']
844
- st.download_button(
845
- "📥 Zdem ameslaw n tseddaṛt-a",
846
- audio_bytes,
847
- f"Taseddaṛt_{current_index + 1}.wav",
848
- "audio/wav",
849
- use_container_width=True
850
- )
851
-
852
- st.markdown('</div>', unsafe_allow_html=True)
853
-
854
- elif uploaded_file and not st.session_state.processed:
855
- if st.button("🔄 Selket Aḍris", type="primary"):
856
- # Process document when button is clicked
857
- with st.spinner("Asekker n uḍris s ugzam amegzu n tseddaṛin..."):
858
- temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
859
- with open(temp_path, "wb") as f:
860
- f.write(uploaded_file.getvalue())
861
-
862
- text, error = read_document(temp_path)
863
- if error:
864
- st.error(error)
865
- else:
866
- cleaned_text = clean_text(text)
867
- st.session_state.full_document_text = cleaned_text
868
-
869
- # Use smart splitting strategy
870
- paragraphs = smart_split_paragraphs(
871
- cleaned_text,
872
- initial_paragraphs=initial_paragraphs,
873
- initial_word_target=initial_word_target,
874
- normal_word_target=normal_word_target
875
- )
876
-
877
- if not paragraphs:
878
- st.error("Ulac agbur i tɣuri.")
879
- return
880
-
881
- # Initialize processing state
882
- st.session_state.total_paragraphs = len(paragraphs)
883
- st.session_state.current_paragraph_index = 0
884
- st.session_state.paragraphs_data = {}
885
- st.session_state.paragraphs_list = paragraphs
886
- st.session_state.processed = True
887
-
888
- # Generate first paragraph immediately in main thread
889
- first_paragraph = paragraphs[0]
890
- audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(first_paragraph)
891
-
892
- st.session_state.paragraphs_data[0] = {
893
- 'paragraph_text': first_paragraph,
894
- 'audio_data': audio_data,
895
- 'timing_info': timing_info,
896
- 'audio_duration': len(audio_data) / sampling_rate,
897
- 'audio_bytes': get_audio_bytes(audio_data),
898
- 'word_count': len(first_paragraph.split())
899
- }
900
- st.session_state.audio_ready = True
901
-
902
- # Start background worker for ALL remaining paragraphs
903
- if len(paragraphs) > 1:
904
- remaining_paragraphs = paragraphs[1:]
905
-
906
- # Use queue-based background worker
907
- thread = threading.Thread(
908
- target=background_audio_worker,
909
- args=(remaining_paragraphs, st.session_state.audio_queue, 1)
910
- )
911
- thread.daemon = True
912
- thread.start()
913
-
914
- st.session_state.background_worker_started = True
915
-
916
- st.rerun()
917
- else:
918
- st.info("🔄 Seɣbel, tekki ɣef 'Selket Aḍris' iwakken ad yettwasleḍ u ad yeddu seg tira ɣer umeslaw")
919
- else:
920
- st.info("👆 Sali-d afaylu iwakken ad tedduḍ ɣer tɣuri")
921
-
922
- if __name__ == "__main__":
923
  main()
 
1
+ # Specific conda environment : Kabyle_streamlit
2
+
3
+ # Streamlit TTS Reader - Taqbaylit TTS Sɣer Adlis
4
+ # Bouaziz Ait Driss October 2025
5
+
6
+ import streamlit as st
7
+ import fitz # PyMuPDF
8
+ import re
9
+ import numpy as np
10
+ import torch
11
+ from scipy.io.wavfile import write as wav_write
12
+ import tempfile
13
+ import os
14
+ import base64
15
+ import json
16
+ import io
17
+ import threading
18
+ import queue
19
+ import time
20
+ import pdfplumber
21
+
22
+ # Configure Streamlit page
23
+ st.set_page_config(
24
+ page_title="Kabyle TTS Document Reader",
25
+ page_icon="🎙️",
26
+ layout="wide",
27
+ initial_sidebar_state="expanded",
28
+ )
29
+
30
+ # Try to import transformers
31
+ try:
32
+ from transformers import VitsModel, AutoTokenizer
33
+ TRANSFORMERS_AVAILABLE = True
34
+ except ImportError:
35
+ try:
36
+ from transformers import AutoModel, AutoTokenizer
37
+ TRANSFORMERS_AVAILABLE = True
38
+ VitsModel = AutoModel
39
+ except ImportError:
40
+ TRANSFORMERS_AVAILABLE = False
41
+
42
+ # --- Model Loading ---
43
+ @st.cache_resource
44
+ def load_model():
45
+ if not TRANSFORMERS_AVAILABLE:
46
+ return None, None, "cpu"
47
+ try:
48
+ model = VitsModel.from_pretrained("facebook/mms-tts-kab")
49
+ tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kab")
50
+ device = "cuda" if torch.cuda.is_available() else "cpu"
51
+ model = model.to(device)
52
+ return model, tokenizer, device
53
+ except Exception as e:
54
+ st.error(f"Error loading model: {e}")
55
+ return None, None, "cpu"
56
+
57
+ model, tokenizer, device = load_model()
58
+
59
+ # --- Text Processing ---
60
+ def clean_text(text):
61
+ if not text:
62
+ return text
63
+ text = re.sub(r' +', ' ', text)
64
+ text = re.sub(r'\n\s*\n', '\n\n', text)
65
+ return text.strip()
66
+
67
+ def smart_split_paragraphs(text, initial_paragraphs=5, initial_word_target=50, normal_word_target=200):
68
+ """
69
+ Smart paragraph splitting strategy:
70
+ - First 'initial_paragraphs' paragraphs: ~initial_word_target words each (for quick startup)
71
+ - Remaining paragraphs: ~normal_word_target words each (for better timing)
72
+ """
73
+ if not text:
74
+ return []
75
+
76
+ # First, split by major paragraph breaks
77
+ raw_paragraphs = re.split(r'\n\s*\n', text)
78
+ raw_paragraphs = [p.strip() for p in raw_paragraphs if p.strip()]
79
+
80
+ if not raw_paragraphs:
81
+ return []
82
+
83
+ final_paragraphs = []
84
+ current_sentences = []
85
+
86
+ # Split each raw paragraph into sentences
87
+ all_sentences = []
88
+ for paragraph in raw_paragraphs:
89
+ # Split on sentence endings: . ! ? : ; followed by space
90
+ sentences = re.split(r'(?<=[.!?:;])\s+', paragraph)
91
+ sentences = [s.strip() for s in sentences if s.strip()]
92
+ all_sentences.extend(sentences)
93
+
94
+ if not all_sentences:
95
+ return []
96
+
97
+ # Build initial quick-start paragraphs (shorter)
98
+ word_count = 0
99
+ for sentence in all_sentences:
100
+ sentence_word_count = len(sentence.split())
101
+
102
+ # For first few paragraphs, use smaller target
103
+ if len(final_paragraphs) < initial_paragraphs:
104
+ target_word_count = initial_word_target
105
+ else:
106
+ target_word_count = normal_word_target
107
+
108
+ # If adding this sentence would exceed target and we have some content, start new paragraph
109
+ if current_sentences and word_count + sentence_word_count > target_word_count:
110
+ # Join current sentences to form a paragraph
111
+ paragraph_text = ' '.join(current_sentences)
112
+ final_paragraphs.append(paragraph_text)
113
+ current_sentences = [sentence]
114
+ word_count = sentence_word_count
115
+ else:
116
+ current_sentences.append(sentence)
117
+ word_count += sentence_word_count
118
+
119
+ # Add the last paragraph if we have remaining sentences
120
+ if current_sentences:
121
+ paragraph_text = ' '.join(current_sentences)
122
+ final_paragraphs.append(paragraph_text)
123
+
124
+ return final_paragraphs
125
+
126
+ def split_paragraph_into_phrases(paragraph, max_phrase_length=150):
127
+ if not paragraph:
128
+ return []
129
+
130
+ phrases = []
131
+ current_phrase = ""
132
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
133
+
134
+ for sentence in sentences:
135
+ if len(current_phrase) + len(sentence) > max_phrase_length and current_phrase:
136
+ phrases.append(current_phrase.strip())
137
+ current_phrase = sentence
138
+ else:
139
+ current_phrase = current_phrase + " " + sentence if current_phrase else sentence
140
+
141
+ if current_phrase.strip():
142
+ phrases.append(current_phrase.strip())
143
+
144
+ return phrases
145
+
146
+ def generate_audio_with_precise_timing(paragraph):
147
+ """Generate audio with precise phrase-level timing using per-phrase generation"""
148
+
149
+ if model is None or tokenizer is None:
150
+ return create_dummy_audio_for_paragraph(paragraph), 16000, []
151
+
152
+ try:
153
+ phrases = split_paragraph_into_phrases(paragraph)
154
+ if not phrases:
155
+ return create_dummy_audio_for_paragraph(paragraph), 16000, []
156
+
157
+ all_audio = []
158
+ phrase_durations = []
159
+ sampling_rate = 16000
160
+
161
+ # Generate audio for each phrase separately to get exact timing
162
+ for i, phrase in enumerate(phrases):
163
+ if not phrase.strip():
164
+ continue
165
+
166
+ # Generate audio for this specific phrase
167
+ inputs = tokenizer(phrase, return_tensors="pt").to(device)
168
+ with torch.no_grad():
169
+ output = model(**inputs)
170
+
171
+ # Extract audio data
172
+ if hasattr(output, 'waveform'):
173
+ audio_chunk = output.waveform.cpu().numpy().squeeze()
174
+ else:
175
+ audio_chunk = output[0].cpu().numpy().squeeze() if len(output) > 0 else None
176
+
177
+ if audio_chunk is not None:
178
+ # Calculate exact duration for this phrase
179
+ phrase_duration = len(audio_chunk) / sampling_rate
180
+ phrase_durations.append(phrase_duration)
181
+
182
+ # Normalize and store
183
+ max_val = np.max(np.abs(audio_chunk))
184
+ if max_val > 0:
185
+ audio_chunk = audio_chunk / max_val * 0.8
186
+ all_audio.append(audio_chunk)
187
+ else:
188
+ # Fallback: estimate duration if audio generation failed
189
+ word_count = len(phrase.split())
190
+ estimated_duration = word_count * 0.4
191
+ phrase_durations.append(estimated_duration)
192
+
193
+ if all_audio:
194
+ # Concatenate all phrase audio
195
+ full_audio = np.concatenate(all_audio)
196
+ total_duration = len(full_audio) / sampling_rate
197
+
198
+ # Build precise timing info using actual phrase durations
199
+ timing_info = []
200
+ current_time = 0.0
201
+
202
+ for i, (phrase, duration) in enumerate(zip(phrases, phrase_durations)):
203
+ timing_info.append({
204
+ 'text': phrase,
205
+ 'start': current_time,
206
+ 'end': current_time + duration,
207
+ 'duration': duration
208
+ })
209
+ current_time += duration
210
+
211
+ return full_audio, sampling_rate, timing_info
212
+ else:
213
+ return create_dummy_audio_for_paragraph(paragraph), 16000, []
214
+
215
+ except Exception as e:
216
+ return create_dummy_audio_for_paragraph(paragraph), 16000, []
217
+
218
+ def create_dummy_audio_for_paragraph(paragraph):
219
+ word_count = len(paragraph.split())
220
+ total_duration = word_count * 0.4
221
+ sampling_rate = 16000
222
+ t = np.linspace(0, total_duration, int(sampling_rate * total_duration))
223
+ audio = 0.3 * np.sin(2 * np.pi * 220 * t)
224
+ return audio
225
+
226
+ # Add a fallback to plumber for reading diacritic letters
227
+ def read_document(file_path):
228
+ text = ""
229
+ try:
230
+ if file_path.lower().endswith('.pdf'):
231
+ # FIRST TRY: PyMuPDF (faster for digital PDFs)
232
+ try:
233
+ pdf_document = fitz.open(file_path)
234
+ for page_num in range(len(pdf_document)):
235
+ page = pdf_document[page_num]
236
+ text += page.get_text() + "\n"
237
+ pdf_document.close()
238
+
239
+ # Check if we got meaningful text with diacritics
240
+ if text.strip() and any(char in text for char in ['é', 'è', 'à', 'ù', 'ï', 'ö', 'α', 'β', 'γ']):
241
+ return text, []
242
+
243
+ except Exception as e:
244
+ text = "" # Reset text if PyMuPDF fails
245
+
246
+ # FALLBACK: pdfplumber (better for scanned/diacritic PDFs)
247
+ try:
248
+ import pdfplumber
249
+ with pdfplumber.open(file_path) as pdf:
250
+ text = ""
251
+ for page in pdf.pages:
252
+ page_text = page.extract_text() or ""
253
+ text += page_text + "\n"
254
+ except ImportError:
255
+ return "pdfplumber not available", []
256
+ except Exception as e:
257
+ return f"Both PDF methods failed: {e}", []
258
+
259
+ elif file_path.lower().endswith('.txt'):
260
+ with open(file_path, 'r', encoding='utf-8') as f:
261
+ text = f.read()
262
+ else:
263
+ return "Unsupported file format", []
264
+
265
+ except Exception as e:
266
+ return f"Error reading document: {e}", []
267
+
268
+ return text, []
269
+
270
+ def normalize_audio(audio_data):
271
+ if audio_data.dtype != np.float32:
272
+ audio_data = audio_data.astype(np.float32)
273
+ max_val = np.max(np.abs(audio_data))
274
+ if max_val > 0:
275
+ audio_data = audio_data / max_val * 0.9
276
+ return audio_data
277
+
278
+ def get_audio_bytes(audio_data):
279
+ audio_bytes = io.BytesIO()
280
+ audio_data = normalize_audio(audio_data)
281
+ audio_int16 = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
282
+ wav_write(audio_bytes, 16000, audio_int16)
283
+ audio_bytes.seek(0)
284
+ return audio_bytes.getvalue()
285
+
286
+ # --- Background Audio Generation ---
287
+ def background_audio_worker(paragraphs, audio_queue, start_index=0):
288
+ """Background worker to generate audio for paragraphs - NO SESSION STATE ACCESS"""
289
+ try:
290
+ for i, paragraph in enumerate(paragraphs):
291
+ paragraph_index = start_index + i
292
+ word_count = len(paragraph.split())
293
+
294
+ # Generate audio with precise timing
295
+ audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(paragraph)
296
+
297
+ # Send to main thread via queue ONLY
298
+ audio_queue.put({
299
+ 'paragraph_index': paragraph_index,
300
+ 'paragraph_text': paragraph,
301
+ 'audio_data': audio_data,
302
+ 'timing_info': timing_info,
303
+ 'audio_duration': len(audio_data) / sampling_rate,
304
+ 'audio_bytes': get_audio_bytes(audio_data),
305
+ 'word_count': word_count
306
+ })
307
+
308
+ except Exception as e:
309
+ pass
310
+
311
+ # --- Queue Processing ---
312
+ def process_audio_queue(audio_queue):
313
+ """Process all available items in the audio queue - called from main thread only"""
314
+ processed_count = 0
315
+ try:
316
+ while True:
317
+ # Non-blocking check
318
+ audio_data = audio_queue.get_nowait()
319
+
320
+ # SAFE: Main thread updating session state
321
+ st.session_state.paragraphs_data[audio_data['paragraph_index']] = audio_data
322
+ processed_count += 1
323
+
324
+ except queue.Empty:
325
+ pass
326
+
327
+ return processed_count
328
+
329
+ # Initialize session state
330
+ if 'processed' not in st.session_state:
331
+ st.session_state.processed = False
332
+ if 'current_paragraph_index' not in st.session_state:
333
+ st.session_state.current_paragraph_index = 0
334
+ if 'total_paragraphs' not in st.session_state:
335
+ st.session_state.total_paragraphs = 0
336
+ if 'paragraphs_data' not in st.session_state:
337
+ st.session_state.paragraphs_data = {}
338
+ if 'audio_ready' not in st.session_state:
339
+ st.session_state.audio_ready = False
340
+ if 'background_worker_started' not in st.session_state:
341
+ st.session_state.background_worker_started = False
342
+ if 'reading_status' not in st.session_state:
343
+ st.session_state.reading_status = "Ready to start reading"
344
+ if 'current_document_id' not in st.session_state:
345
+ st.session_state.current_document_id = None
346
+ if 'audio_queue' not in st.session_state:
347
+ st.session_state.audio_queue = queue.Queue()
348
+ if 'playback_speed' not in st.session_state:
349
+ st.session_state.playback_speed = 1.0 # Default normal speed
350
+ if 'full_document_text' not in st.session_state:
351
+ st.session_state.full_document_text = ""
352
+ if 'smart_splitting_expanded' not in st.session_state:
353
+ st.session_state.smart_splitting_expanded = False
354
+
355
+ # --- Streamlit UI ---
356
+ def main():
357
+ # Custom CSS for better styling - FIXED FONT HARMONIZATION
358
+ st.markdown("""
359
+ <style>
360
+ .main-title {
361
+ font-size: 1.5rem !important;
362
+ font-weight: bold !important;
363
+ margin-bottom: -1rem !important;
364
+ margin-top: -1rem !important; /* MAXIMUM REDUCED top margin */
365
+ color: #1f77b4;
366
+ }
367
+ .section-title {
368
+ font-size: 1.3rem !important;
369
+ font-weight: bold !important;
370
+ margin-bottom: 0.3rem !important;
371
+ margin-top: 0rem !important;
372
+ color: #2e86ab;
373
+ }
374
+ .document-viewer {
375
+ max-height: 70vh;
376
+ overflow-y: auto;
377
+ border: 1px solid #e1e1e1;
378
+ border-radius: 8px;
379
+ padding: 15px;
380
+ background: #fafafa;
381
+ font-family: Arial, sans-serif;
382
+ line-height: 1.6;
383
+ scrollbar-width: thin;
384
+ scrollbar-color: #888 #f1f1f1;
385
+ color: #333333; /* ADD THIS LINE - ensures dark text */
386
+ }
387
+ .document-viewer::-webkit-scrollbar {
388
+ width: 8px;
389
+ }
390
+ .document-viewer::-webkit-scrollbar-track {
391
+ background: #f1f1f1;
392
+ border-radius: 4px;
393
+ }
394
+ .document-viewer::-webkit-scrollbar-thumb {
395
+ background: #888;
396
+ border-radius: 4px;
397
+ }
398
+ .document-viewer::-webkit-scrollbar-thumb:hover {
399
+ background: #555;
400
+ }
401
+ .current-paragraph-highlight {
402
+ background: linear-gradient(120deg, #e3f2fd, #bbdefb);
403
+ padding: 8px 12px;
404
+ margin: 8px 0;
405
+ border-left: 4px solid #2196f3;
406
+ border-radius: 4px;
407
+ box-shadow: 0 2px 4px rgba(33, 150, 243, 0.2);
408
+ }
409
+ .reading-content {
410
+ margin-bottom: 10px;
411
+ }
412
+ .controls-section {
413
+ margin-top: 15px;
414
+ }
415
+ /* Reduce sidebar header spacing */
416
+ .sidebar .sidebar-content {
417
+ padding-top: 1rem;
418
+ }
419
+ /* FIXED: Make expander headers consistent with sidebar titles */
420
+ .streamlit-expanderHeader {
421
+ font-size: 1.1rem !important;
422
+ font-weight: 600 !important;
423
+ color: inherit !important;
424
+ }
425
+ /* FIXED: Ensure consistent styling for smart splitting expander */
426
+ div[data-testid="stExpander"] details summary p {
427
+ font-size: 1.1rem !important;
428
+ font-weight: 600 !important;
429
+ }
430
+ /* Style for the smart splitting section specifically */
431
+ .smart-splitting-header {
432
+ font-size: 1.1rem !important;
433
+ font-weight: 600 !important;
434
+ }
435
+ </style>
436
+ """, unsafe_allow_html=True)
437
+
438
+ # Main title with LARGER font and MAXIMUM REDUCED TOP SPACE
439
+ st.markdown('<div class="main-title">🎙️Taqbaylit TTS Sɣer Adlis</div>', unsafe_allow_html=True)
440
+
441
+ if model is None:
442
+ st.warning("⚠️ Using test audio (TTS model not available)")
443
+
444
+ # Process audio queue on every run
445
+ if st.session_state.get('background_worker_started', False):
446
+ processed_count = process_audio_queue(st.session_state.audio_queue)
447
+ if processed_count > 0:
448
+ st.success(f"📥 Loaded {processed_count} paragraph(s)")
449
+
450
+ # Sidebar with KABYLE TRANSLATIONS
451
+ with st.sidebar:
452
+ # Document Settings with reduced spacing
453
+ st.header("📁 Tawila n Tɣuri", anchor=False)
454
+ uploaded_file = st.file_uploader("Sali-d Aḍris - jbed sers afaylu", type=['pdf', 'txt'],
455
+ help="Ulac OCR ara yerren afaylu PDF n tugna ɣer txt deg teqaylit. Ma ur iwulem ara ɛiwed-as tamuɣli.")
456
+
457
+ # Clear document button - MOVED UP immediately under file uploader
458
+ if st.session_state.get('processed'):
459
+ if st.button("🗑️ Sfeḍ Afaylu-a", type="secondary", use_container_width=True):
460
+ # Reset all document-related session state
461
+ st.session_state.processed = False
462
+ st.session_state.current_paragraph_index = 0
463
+ st.session_state.total_paragraphs = 0
464
+ st.session_state.paragraphs_data = {}
465
+ st.session_state.audio_ready = False
466
+ st.session_state.background_worker_started = False
467
+ st.session_state.reading_status = "Ready to start reading"
468
+ st.session_state.current_document_id = None
469
+ st.session_state.audio_queue = queue.Queue()
470
+ st.session_state.full_document_text = ""
471
+ st.rerun()
472
+
473
+ # Playback Speed Control
474
+ st.markdown("---")
475
+ st.markdown("### 🎵 Arured n Tɣuri")
476
+
477
+ # Define the speed options with labels
478
+ speed_options = [0.5, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5]
479
+ speed_labels = {
480
+ 0.5: "0.5x (Ddac ddac ugar)",
481
+ 0.8: "0.8x (Ddac ddac)",
482
+ 0.9: "0.9x (Ddac ddac cwiṭ)",
483
+ 1.0: "1.0x (Amagnu)",
484
+ 1.1: "1.1x (Ɣiwel cwiṭ)",
485
+ 1.2: "1.2x (Ɣiwel)",
486
+ 1.3: "1.3x (Ɣiwel aṭas)",
487
+ 1.5: "1.5x (Ɣiwel aṭas ugar)"
488
+ }
489
+
490
+ # Create a select slider for playback speed
491
+ current_speed = st.select_slider(
492
+ "Asenfel n urured n tɣuri",
493
+ options=speed_options,
494
+ value=st.session_state.playback_speed,
495
+ format_func=lambda x: speed_labels[x],
496
+ help="Senfel arured n tɣuri i yal taseddaṛt"
497
+ )
498
+
499
+ # Update session state if speed changed
500
+ if current_speed != st.session_state.playback_speed:
501
+ st.session_state.playback_speed = current_speed
502
+ # st.success(f"🎵 Arured n tɣuri yettwasenfel ɣer {speed_labels[current_speed]}")
503
+
504
+ # Show current speed effect
505
+ speed_effect = {
506
+ 0.5: "⏳ Ugar n ukud i tmesliwt",
507
+ 0.8: "🐢 Fessus i uḍfaṛ",
508
+ 0.9: "📝 S ttawil i usishel n tigzi",
509
+ 1.0: "✅ Arured amagnu n tmeslayt",
510
+ 1.1: "⚡ Taɣuri s cwiṭ n uɣiwel",
511
+ 1.2: "🚀 Taɣuri s uɣiwel",
512
+ 1.3: "🎯 Taɣuri s uɣiwel d tmellit",
513
+ 1.5: "🔥 Taɣuri s uɣiwel yuzzlen - i yimazzayen"
514
+ }
515
+ st.caption(f"**Asemdu:** {speed_effect[current_speed]}")
516
+
517
+ # Smart splitting configuration - FIXED FONT STYLING
518
+ st.markdown("---")
519
+
520
+ # Collapsible section for Smart Splitting with PROPER FONT HARMONIZATION
521
+ with st.expander("🎯 Agzam Amegzu", expanded=st.session_state.smart_splitting_expanded):
522
+ initial_paragraphs = st.slider("Tiseddaṛin deg tazwara", 3, 10, 5,
523
+ help="Amḍan n tseddaṛin wezzilen deg tazwara n tɣuri")
524
+ initial_word_target = st.slider("Awalen deg tseddaṛt n tazwara", 30, 100, 50,
525
+ help="Amḍan n wawalen deg tseddaṛin n tazwara")
526
+ normal_word_target = st.slider("Awalen deg tseddaṛt tamagnut", 50, 300, 100,
527
+ help="Amḍan n wawalen deg tseddaṛin tineggura")
528
+
529
+ # Main content
530
+ # col_doc, col_reading = st.columns([2, 3]) # 40% document, 60% reading
531
+ col_reading, col_doc = st.columns([3, 2]) # 60% reading, 40% document
532
+
533
+ with col_doc:
534
+ # Kabyle title for document overview with SMALLER font
535
+ st.markdown('<div class="section-title">📄 Tamuɣli Ɣer Uḍris</div>', unsafe_allow_html=True)
536
+
537
+ if st.session_state.get('full_document_text'):
538
+ # Display the full document in a scrollable container
539
+ document_html = "<div class='document-viewer'>"
540
+ paragraphs = st.session_state.get('paragraphs_list', [])
541
+ current_index = st.session_state.current_paragraph_index
542
+
543
+ for i, paragraph in enumerate(paragraphs):
544
+ if i == current_index:
545
+ document_html += f"<div class='current-paragraph-highlight'><strong>📍 Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
546
+ else:
547
+ document_html += f"<div><strong>Taseddaṛt {i+1}</strong><br>{paragraph}</div>"
548
+ if i < len(paragraphs) - 1:
549
+ document_html += "<hr style='margin: 10px 0;'>"
550
+
551
+ document_html += "</div>"
552
+ st.markdown(document_html, unsafe_allow_html=True)
553
+
554
+ st.caption(f"📋 Aḍris: {len(paragraphs)} n tseddarin | 📍 Imir-a: Taseddaṛt {current_index + 1}")
555
+ else:
556
+ st.info("📁 Sali-d afaylu akken ad d-ikad da")
557
+ if uploaded_file and st.session_state.get('processed'):
558
+ st.warning("⚠️ Aḍris yettwasleḍ maca ulac-it. Ttxil-k, ɛiwed-as tikelt-nniḍen.")
559
+
560
+ with col_reading:
561
+ # Kabyle title for audio reading with SMALLER font
562
+ st.markdown('<div class="section-title">🎵 Ɣer - Sel Aḍris</div>', unsafe_allow_html=True)
563
+
564
+ if st.session_state.get('processed') and st.session_state.get('audio_ready', False):
565
+ current_index = st.session_state.current_paragraph_index
566
+
567
+ # Check if current paragraph data exists
568
+ if current_index not in st.session_state.paragraphs_data:
569
+ st.warning(f"⏳ Taseddaṛt {current_index + 1} mazal d-tuli...")
570
+ st.info("Ameslaw n tseddaṛt-a mazal yemmid. Rǧu cwiṭ.")
571
+ # Auto-refresh to check for new data
572
+ time.sleep(2)
573
+ st.rerun()
574
+ return
575
+
576
+ current_data = st.session_state.paragraphs_data[current_index]
577
+
578
+ # NAVIGATION LAYOUT: Back + Audio + Next in one row
579
+ col_back, col_audio, col_next = st.columns([1, 2, 1])
580
+
581
+ with col_back:
582
+ # BACK BUTTON
583
+ if current_index > 0:
584
+ if st.button("⏮️ Deffir",
585
+ use_container_width=True,
586
+ type="secondary",
587
+ key=f"back_btn_top_{current_index}"):
588
+ prev_index = current_index - 1
589
+ st.session_state.current_paragraph_index = prev_index
590
+ st.session_state.reading_status = f"Taɣuri n tseddaṛt {prev_index + 1}/{st.session_state.total_paragraphs}"
591
+ st.rerun()
592
+ else:
593
+ st.button("⏮️ Deffir", disabled=True, use_container_width=True)
594
+
595
+ with col_audio:
596
+ # Audio player status placeholder - will be in the HTML
597
+ pass
598
+
599
+ with col_next:
600
+ # NEXT BUTTON
601
+ if current_index < st.session_state.total_paragraphs - 1:
602
+ next_index = current_index + 1
603
+
604
+ if st.button("⏭️ Sdat",
605
+ type="primary",
606
+ use_container_width=True,
607
+ key=f"next_btn_top_{current_index}"):
608
+
609
+ st.session_state.current_paragraph_index = next_index
610
+ st.session_state.reading_status = f"Taɣuri n tseddaṛt {next_index + 1}/{st.session_state.total_paragraphs}"
611
+ st.rerun()
612
+
613
+ # Status indicator (informational only)
614
+ next_ready = next_index in st.session_state.paragraphs_data
615
+ ready_count = len(st.session_state.paragraphs_data)
616
+ total_count = st.session_state.total_paragraphs
617
+ if not next_ready:
618
+ st.caption(f"⏳ Yesselkat... ({ready_count}/{total_count})")
619
+ else:
620
+ st.caption(f"✅ Yemmed ({ready_count}/{total_count})")
621
+
622
+ else:
623
+ st.button("⏭️ Sdat", disabled=True, use_container_width=True)
624
+ st.caption("🎉 Temmed tɣuri!")
625
+
626
+ # HTML with real-time text highlighting
627
+ timing_json = json.dumps(current_data['timing_info'])
628
+ full_text = current_data['paragraph_text'].replace('`', '\\`').replace('${', '\\${')
629
+ audio_b64 = base64.b64encode(current_data['audio_bytes']).decode()
630
+ playback_speed = st.session_state.playback_speed
631
+
632
+ complete_html = f"""
633
+ <!DOCTYPE html>
634
+ <html>
635
+ <head>
636
+ <style>
637
+ .phrase-highlight {{
638
+ background: linear-gradient(120deg, #ffeb3b, #ffd54f);
639
+ padding: 4px 8px;
640
+ margin: 2px 1px;
641
+ border-radius: 6px;
642
+ box-shadow: 0 2px 4px rgba(255, 235, 59, 0.3);
643
+ font-weight: bold;
644
+ transition: all 0.3s ease;
645
+ }}
646
+ .phrase-text {{
647
+ padding: 2px 4px;
648
+ margin: 1px 0px;
649
+ border-radius: 4px;
650
+ transition: all 0.3s ease;
651
+ }}
652
+ .reading-container {{
653
+ max-height: 40vh;
654
+ overflow-y: auto;
655
+ padding: 20px;
656
+ border: 2px solid #e1e1e1;
657
+ border-radius: 12px;
658
+ background: #fafafa;
659
+ margin: 5px 0;
660
+ line-height: 1.8;
661
+ font-size: 16px;
662
+ font-family: Arial, sans-serif;
663
+ }}
664
+ .status-bar {{
665
+ background: #e3f2fd;
666
+ padding: 8px;
667
+ border-radius: 8px;
668
+ margin: 8px 0;
669
+ font-size: 14px;
670
+ }}
671
+ .audio-player {{
672
+ width: 100%;
673
+ margin: 8px 0;
674
+ text-align: center;
675
+ }}
676
+ .audio-controls {{
677
+ display: flex;
678
+ justify-content: center;
679
+ align-items: center;
680
+ gap: 10px;
681
+ margin-bottom: 10px;
682
+ }}
683
+ </style>
684
+ </head>
685
+ <body>
686
+ <div class="audio-player">
687
+ <div class="audio-controls">
688
+ <audio id="mainAudio" controls playbackRate={playback_speed} style="min-width: 250px;">
689
+ <source src="data:audio/wav;base64,{audio_b64}" type="audio/wav">
690
+ </audio>
691
+ </div>
692
+ <div style="margin-top: 5px; font-size: 14px; color: #666;">
693
+ 🎵 Seɣbel, tekki ɣef ▶️ afella i tɣuri s urured: <strong>{playback_speed}x</strong>
694
+ {"" if playback_speed == 1.0 else " - " + ("ddac ddac" if playback_speed < 1.0 else "aɣiwel")}
695
+ </div>
696
+ </div>
697
+
698
+
699
+
700
+ <div class="reading-container" id="readingContainer">
701
+ {full_text}
702
+ </div>
703
+
704
+ <script>
705
+ const timingInfo = {timing_json};
706
+ const fullText = `{full_text}`;
707
+ const playbackSpeed = {playback_speed};
708
+ let currentHighlightIndex = -1;
709
+ let phraseElements = [];
710
+
711
+ function initializeHighlighting() {{
712
+ // Create phrase elements by wrapping text
713
+ let container = document.getElementById('readingContainer');
714
+ let workingText = fullText;
715
+
716
+ timingInfo.forEach((phrase, index) => {{
717
+ const cleanPhrase = phrase.text.trim();
718
+ if (workingText.includes(cleanPhrase)) {{
719
+ const spanId = 'phrase_' + index;
720
+ const spanHtml = '<span id="' + spanId + '" class="phrase-text">' + cleanPhrase + '</span>';
721
+ workingText = workingText.replace(cleanPhrase, spanHtml);
722
+ }}
723
+ }});
724
+
725
+ container.innerHTML = workingText;
726
+
727
+ // Store references to all phrase elements
728
+ timingInfo.forEach((phrase, index) => {{
729
+ const element = document.getElementById('phrase_' + index);
730
+ if (element) {{
731
+ phraseElements.push(element);
732
+ }}
733
+ }});
734
+
735
+ updateDebugInfo("Agzam amegzu yemmed! " + phraseElements.length + " n tefyar s " + playbackSpeed + "x arured");
736
+ }}
737
+
738
+ function updateDebugInfo(message) {{
739
+ const debugEl = document.getElementById('debugInfo');
740
+ if (debugEl) debugEl.textContent = message;
741
+ }}
742
+
743
+ function highlightCurrentPhrase(currentTime) {{
744
+ let newIndex = -1;
745
+ for (let i = 0; i < timingInfo.length; i++) {{
746
+ if (currentTime >= timingInfo[i].start && currentTime < timingInfo[i].end) {{
747
+ newIndex = i;
748
+ break;
749
+ }}
750
+ }}
751
+
752
+ if (newIndex !== currentHighlightIndex) {{
753
+ currentHighlightIndex = newIndex;
754
+ updateHighlightDisplay();
755
+ if (newIndex >= 0) {{
756
+ updateDebugInfo("Akud: " + currentTime.toFixed(2) + "s | Tafyirt: " + (newIndex + 1) + "/" + timingInfo.length + " | Arured: " + playbackSpeed + "x");
757
+ }}
758
+ }}
759
+ }}
760
+
761
+ function updateHighlightDisplay() {{
762
+ const currentPhraseSpan = document.getElementById('currentPhrase');
763
+
764
+ // Remove all highlights
765
+ phraseElements.forEach(element => {{
766
+ element.className = 'phrase-text';
767
+ }});
768
+
769
+ // Highlight current phrase
770
+ if (currentHighlightIndex >= 0 && currentHighlightIndex < phraseElements.length) {{
771
+ const element = phraseElements[currentHighlightIndex];
772
+ if (element) {{
773
+ element.className = 'phrase-highlight';
774
+ element.scrollIntoView({{ behavior: 'smooth', block: 'center' }});
775
+ }}
776
+
777
+ if (currentPhraseSpan && timingInfo[currentHighlightIndex]) {{
778
+ currentPhraseSpan.textContent = timingInfo[currentHighlightIndex].text.substring(0, 100) +
779
+ (timingInfo[currentHighlightIndex].text.length > 100 ? '...' : '');
780
+ }}
781
+ }} else {{
782
+ if (currentPhraseSpan) {{
783
+ currentPhraseSpan.textContent = 'Araǧu amesli...';
784
+ }}
785
+ }}
786
+ }}
787
+
788
+ // Set up audio event listeners
789
+ function setupAudioListeners() {{
790
+ const audioElement = document.getElementById('mainAudio');
791
+ if (audioElement) {{
792
+ // Set playback rate
793
+ audioElement.playbackRate = playbackSpeed;
794
+
795
+ audioElement.addEventListener('timeupdate', function() {{
796
+ highlightCurrentPhrase(this.currentTime);
797
+ }});
798
+
799
+ audioElement.addEventListener('play', function() {{
800
+ updateDebugInfo("🎵 Taɣuri... aseḍfeṛ n tira iteddu s " + playbackSpeed + "x arured");
801
+ }});
802
+
803
+ audioElement.addEventListener('ended', function() {{
804
+ currentHighlightIndex = -1;
805
+ updateHighlightDisplay();
806
+ updateDebugInfo("✅ Taɣuri tekfa s " + playbackSpeed + "x arured");
807
+ }});
808
+
809
+ }} else {{
810
+ setTimeout(setupAudioListeners, 100);
811
+ }}
812
+ }}
813
+
814
+ // Initialize everything when page loads
815
+ document.addEventListener('DOMContentLoaded', function() {{
816
+ initializeHighlighting();
817
+ setupAudioListeners();
818
+ }});
819
+ </script>
820
+ </body>
821
+ </html>
822
+ """
823
+
824
+ # Display the complete reading content
825
+ st.components.v1.html(complete_html, height=300, scrolling=True)
826
+
827
+ # Place the remaining controls BELOW the reading content
828
+ st.markdown('<div class="controls-section">', unsafe_allow_html=True)
829
+
830
+ # Show paragraph info
831
+ word_count = current_data.get('word_count', len(current_data['paragraph_text'].split()))
832
+ st.markdown(f"**Taseddaṛt {current_index + 1}/{st.session_state.total_paragraphs}**")
833
+ st.caption(f"📊 {word_count} n wawalen | ⏱️ {current_data['audio_duration']:.1f}s | 🎵 {st.session_state.playback_speed}x arured")
834
+
835
+ # Display progress
836
+ ready_count = len(st.session_state.paragraphs_data)
837
+ total_count = st.session_state.total_paragraphs
838
+ progress = ready_count / total_count if total_count > 0 else 0
839
+ st.progress(progress)
840
+ st.caption(f"📊 Asekker: {ready_count}/{total_count} n tseddarin mmedent ({progress:.0%})")
841
+
842
+ # Download button for current paragraph
843
+ audio_bytes = current_data['audio_bytes']
844
+ st.download_button(
845
+ "📥 Zdem ameslaw n tseddaṛt-a",
846
+ audio_bytes,
847
+ f"Taseddaṛt_{current_index + 1}.wav",
848
+ "audio/wav",
849
+ use_container_width=True
850
+ )
851
+
852
+ st.markdown('</div>', unsafe_allow_html=True)
853
+
854
+ elif uploaded_file and not st.session_state.processed:
855
+ if st.button("🔄 Selket Aḍris", type="primary"):
856
+ # Process document when button is clicked
857
+ with st.spinner("Asekker n uḍris s ugzam amegzu n tseddaṛin..."):
858
+ temp_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
859
+ with open(temp_path, "wb") as f:
860
+ f.write(uploaded_file.getvalue())
861
+
862
+ text, error = read_document(temp_path)
863
+ if error:
864
+ st.error(error)
865
+ else:
866
+ cleaned_text = clean_text(text)
867
+ st.session_state.full_document_text = cleaned_text
868
+
869
+ # Use smart splitting strategy
870
+ paragraphs = smart_split_paragraphs(
871
+ cleaned_text,
872
+ initial_paragraphs=initial_paragraphs,
873
+ initial_word_target=initial_word_target,
874
+ normal_word_target=normal_word_target
875
+ )
876
+
877
+ if not paragraphs:
878
+ st.error("Ulac agbur i tɣuri.")
879
+ return
880
+
881
+ # Initialize processing state
882
+ st.session_state.total_paragraphs = len(paragraphs)
883
+ st.session_state.current_paragraph_index = 0
884
+ st.session_state.paragraphs_data = {}
885
+ st.session_state.paragraphs_list = paragraphs
886
+ st.session_state.processed = True
887
+
888
+ # Generate first paragraph immediately in main thread
889
+ first_paragraph = paragraphs[0]
890
+ audio_data, sampling_rate, timing_info = generate_audio_with_precise_timing(first_paragraph)
891
+
892
+ st.session_state.paragraphs_data[0] = {
893
+ 'paragraph_text': first_paragraph,
894
+ 'audio_data': audio_data,
895
+ 'timing_info': timing_info,
896
+ 'audio_duration': len(audio_data) / sampling_rate,
897
+ 'audio_bytes': get_audio_bytes(audio_data),
898
+ 'word_count': len(first_paragraph.split())
899
+ }
900
+ st.session_state.audio_ready = True
901
+
902
+ # Start background worker for ALL remaining paragraphs
903
+ if len(paragraphs) > 1:
904
+ remaining_paragraphs = paragraphs[1:]
905
+
906
+ # Use queue-based background worker
907
+ thread = threading.Thread(
908
+ target=background_audio_worker,
909
+ args=(remaining_paragraphs, st.session_state.audio_queue, 1)
910
+ )
911
+ thread.daemon = True
912
+ thread.start()
913
+
914
+ st.session_state.background_worker_started = True
915
+
916
+ st.rerun()
917
+ else:
918
+ st.info("🔄 Seɣbel, tekki ɣef 'Selket Aḍris' iwakken ad yettwasleḍ u ad yeddu seg tira ɣer umeslaw")
919
+ else:
920
+ st.info("👆 Sali-d afaylu iwakken ad tedduḍ ɣer tɣuri")
921
+
922
+ if __name__ == "__main__":
923
  main()