Spaces:
Running
on
Zero
Running
on
Zero
Add ghunnah/madd durations
Browse files- config.py +21 -0
- recitation_engine/audio_pipeline.py +11 -3
- recitation_engine/segment_processor.py +6 -2
- ui/builder.py +201 -201
- ui/components/verse_selector.py +2 -2
- ui/styles.py +7 -0
config.py
CHANGED
|
@@ -25,6 +25,27 @@ VISIBLE_ITEMS_IN_SELECTOR = 4 # Number of items visible in scrollable selectors
|
|
| 25 |
IS_HF_SPACE = os.environ.get("SPACE_ID") is not None
|
| 26 |
DEV_TAB_VISIBLE = not IS_HF_SPACE # Hide Dev tab on HF Spaces, show locally
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# =============================================================================
|
| 29 |
# Arabic text styling (UI)
|
| 30 |
# =============================================================================
|
|
|
|
| 25 |
IS_HF_SPACE = os.environ.get("SPACE_ID") is not None
|
| 26 |
DEV_TAB_VISIBLE = not IS_HF_SPACE # Hide Dev tab on HF Spaces, show locally
|
| 27 |
|
| 28 |
+
# =============================================================================
|
| 29 |
+
# Layout configuration
|
| 30 |
+
# =============================================================================
|
| 31 |
+
|
| 32 |
+
LEFT_COLUMN_SCALE = 3 # 30% width (right column is 10 - 3 = 7, i.e. 70%)
|
| 33 |
+
|
| 34 |
+
# Components in each column (for documentation, used by builder.py)
|
| 35 |
+
LEFT_COLUMN_COMPONENTS = [
|
| 36 |
+
"verse_selector", # Chapter/verse dropdowns
|
| 37 |
+
"control_buttons", # Random, Reset, Multi-models
|
| 38 |
+
"audio_input", # Microphone/upload
|
| 39 |
+
"analyze_btn", # Analyze button
|
| 40 |
+
"recitation_settings", # Accordion with madd/ghunnah settings
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
RIGHT_COLUMN_COMPONENTS = [
|
| 44 |
+
"arabic_display", # Selected verse text
|
| 45 |
+
"reference_audio", # Reference audio player
|
| 46 |
+
"analysis_tabs", # Error, Ghunnah, Madd, Dev tabs
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
# =============================================================================
|
| 50 |
# Arabic text styling (UI)
|
| 51 |
# =============================================================================
|
recitation_engine/audio_pipeline.py
CHANGED
|
@@ -366,8 +366,8 @@ class UnifiedVadProcessor(AudioProcessor):
|
|
| 366 |
parallel_time = time.time() - parallel_start
|
| 367 |
print(f"[PIPELINE] Parallel CPU+GPU completed in {parallel_time:.2f}s")
|
| 368 |
|
| 369 |
-
# Build segment infos from results
|
| 370 |
-
all_segment_infos, all_predicted_phonemes = build_segment_infos(
|
| 371 |
vad_result.vad_segments,
|
| 372 |
segment_audios,
|
| 373 |
match_results,
|
|
@@ -393,10 +393,13 @@ class UnifiedVadProcessor(AudioProcessor):
|
|
| 393 |
user_audio_clips,
|
| 394 |
)
|
| 395 |
|
|
|
|
|
|
|
|
|
|
| 396 |
# Apply pre-computed FA results to segment results (no separate GPU lease needed)
|
| 397 |
segment_results_with_duration = self._apply_fa_to_segment_results(
|
| 398 |
all_results, all_segment_infos, segment_audios, vad_result,
|
| 399 |
-
|
| 400 |
)
|
| 401 |
|
| 402 |
total_elapsed = time.time() - pipeline_start
|
|
@@ -625,6 +628,11 @@ class UnifiedVadProcessor(AudioProcessor):
|
|
| 625 |
if not all_results:
|
| 626 |
return []
|
| 627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 628 |
from recitation_analysis.duration_analysis.fa_backend import compute_cvc_baseline
|
| 629 |
from recitation_analysis.duration_analysis.duration_calculator import (
|
| 630 |
compute_instance_durations,
|
|
|
|
| 366 |
parallel_time = time.time() - parallel_start
|
| 367 |
print(f"[PIPELINE] Parallel CPU+GPU completed in {parallel_time:.2f}s")
|
| 368 |
|
| 369 |
+
# Build segment infos from results (returns kept_indices for FA alignment)
|
| 370 |
+
all_segment_infos, all_predicted_phonemes, kept_indices = build_segment_infos(
|
| 371 |
vad_result.vad_segments,
|
| 372 |
segment_audios,
|
| 373 |
match_results,
|
|
|
|
| 393 |
user_audio_clips,
|
| 394 |
)
|
| 395 |
|
| 396 |
+
# Filter FA results to match kept segments (excludes Basmala/Isti'adha)
|
| 397 |
+
filtered_fa_results = [fa_results[i] for i in kept_indices] if fa_results else []
|
| 398 |
+
|
| 399 |
# Apply pre-computed FA results to segment results (no separate GPU lease needed)
|
| 400 |
segment_results_with_duration = self._apply_fa_to_segment_results(
|
| 401 |
all_results, all_segment_infos, segment_audios, vad_result,
|
| 402 |
+
filtered_fa_results, user_audio_clips
|
| 403 |
)
|
| 404 |
|
| 405 |
total_elapsed = time.time() - pipeline_start
|
|
|
|
| 628 |
if not all_results:
|
| 629 |
return []
|
| 630 |
|
| 631 |
+
# Defensive check: warn if lists are misaligned (indicates filtering bug)
|
| 632 |
+
if fa_results and len(all_results) != len(fa_results):
|
| 633 |
+
print(f"[SEGMENT FA] WARNING: Result count mismatch! "
|
| 634 |
+
f"all_results={len(all_results)}, fa_results={len(fa_results)}")
|
| 635 |
+
|
| 636 |
from recitation_analysis.duration_analysis.fa_backend import compute_cvc_baseline
|
| 637 |
from recitation_analysis.duration_analysis.duration_calculator import (
|
| 638 |
compute_instance_durations,
|
recitation_engine/segment_processor.py
CHANGED
|
@@ -1185,12 +1185,14 @@ def build_segment_infos(vad_segments, segment_audios, match_results, wav2vec_res
|
|
| 1185 |
wav2vec_results: List of phoneme transcriptions from Wav2Vec2
|
| 1186 |
|
| 1187 |
Returns:
|
| 1188 |
-
Tuple of (segment_infos, predicted_phonemes)
|
| 1189 |
- segment_infos: List of SegmentInfo objects (special segments filtered out)
|
| 1190 |
- predicted_phonemes: List of phoneme strings matching segment_infos
|
|
|
|
| 1191 |
"""
|
| 1192 |
segment_infos = []
|
| 1193 |
predicted_phonemes = []
|
|
|
|
| 1194 |
skipped_count = 0
|
| 1195 |
empty_phoneme_count = 0
|
| 1196 |
|
|
@@ -1222,6 +1224,7 @@ def build_segment_infos(vad_segments, segment_audios, match_results, wav2vec_res
|
|
| 1222 |
error=error_msg if error_msg else "Transcription failed"
|
| 1223 |
))
|
| 1224 |
predicted_phonemes.append("")
|
|
|
|
| 1225 |
else:
|
| 1226 |
# Valid match - get transcription by index
|
| 1227 |
word_start_idx, word_end_idx = match_result[0]
|
|
@@ -1251,6 +1254,7 @@ def build_segment_infos(vad_segments, segment_audios, match_results, wav2vec_res
|
|
| 1251 |
match_score=match_score,
|
| 1252 |
error=None
|
| 1253 |
))
|
|
|
|
| 1254 |
|
| 1255 |
if skipped_count > 0:
|
| 1256 |
print(f"[SEGMENT FILTER] Skipped {skipped_count} special segment(s) (Basmala/Isti'adha)")
|
|
@@ -1262,4 +1266,4 @@ def build_segment_infos(vad_segments, segment_audios, match_results, wav2vec_res
|
|
| 1262 |
if empty_phoneme_count > 0:
|
| 1263 |
print(f"[BUILD SEGMENT] WARNING: {empty_phoneme_count} segment(s) have no phoneme data")
|
| 1264 |
|
| 1265 |
-
return segment_infos, predicted_phonemes
|
|
|
|
| 1185 |
wav2vec_results: List of phoneme transcriptions from Wav2Vec2
|
| 1186 |
|
| 1187 |
Returns:
|
| 1188 |
+
Tuple of (segment_infos, predicted_phonemes, kept_indices)
|
| 1189 |
- segment_infos: List of SegmentInfo objects (special segments filtered out)
|
| 1190 |
- predicted_phonemes: List of phoneme strings matching segment_infos
|
| 1191 |
+
- kept_indices: List of original segment indices that were kept (for FA alignment)
|
| 1192 |
"""
|
| 1193 |
segment_infos = []
|
| 1194 |
predicted_phonemes = []
|
| 1195 |
+
kept_indices = [] # Track original indices for FA result alignment
|
| 1196 |
skipped_count = 0
|
| 1197 |
empty_phoneme_count = 0
|
| 1198 |
|
|
|
|
| 1224 |
error=error_msg if error_msg else "Transcription failed"
|
| 1225 |
))
|
| 1226 |
predicted_phonemes.append("")
|
| 1227 |
+
kept_indices.append(i) # Track for FA alignment
|
| 1228 |
else:
|
| 1229 |
# Valid match - get transcription by index
|
| 1230 |
word_start_idx, word_end_idx = match_result[0]
|
|
|
|
| 1254 |
match_score=match_score,
|
| 1255 |
error=None
|
| 1256 |
))
|
| 1257 |
+
kept_indices.append(i) # Track for FA alignment
|
| 1258 |
|
| 1259 |
if skipped_count > 0:
|
| 1260 |
print(f"[SEGMENT FILTER] Skipped {skipped_count} special segment(s) (Basmala/Isti'adha)")
|
|
|
|
| 1266 |
if empty_phoneme_count > 0:
|
| 1267 |
print(f"[BUILD SEGMENT] WARNING: {empty_phoneme_count} segment(s) have no phoneme data")
|
| 1268 |
|
| 1269 |
+
return segment_infos, predicted_phonemes, kept_indices
|
ui/builder.py
CHANGED
|
@@ -22,6 +22,7 @@ from config import (
|
|
| 22 |
DURATION_TOLERANCE_STEP,
|
| 23 |
JS_POLLING_ENABLED,
|
| 24 |
JS_MUTATION_OBSERVER_ENABLED,
|
|
|
|
| 25 |
)
|
| 26 |
from shared_state import get_model_bundles
|
| 27 |
from utils.phonemizer_utils import get_chapter_list
|
|
@@ -146,23 +147,7 @@ def build_interface() -> gr.Blocks:
|
|
| 146 |
components = {}
|
| 147 |
states = {}
|
| 148 |
|
| 149 |
-
#
|
| 150 |
-
verse_components, _ = create_verse_selector(
|
| 151 |
-
horizontal=True,
|
| 152 |
-
initial_selection=init["selection"],
|
| 153 |
-
chapters=chapters_list
|
| 154 |
-
)
|
| 155 |
-
components["from_chapter"] = verse_components["from_chapter"]
|
| 156 |
-
components["from_verse"] = verse_components["from_verse"]
|
| 157 |
-
components["to_verse"] = verse_components["to_verse"]
|
| 158 |
-
|
| 159 |
-
# Control buttons (segmentation is now auto-detected by VAD)
|
| 160 |
-
controls = create_control_buttons(multi_visible)
|
| 161 |
-
components["random_btn"] = controls["random_btn"]
|
| 162 |
-
components["reset_btn"] = controls["reset_btn"]
|
| 163 |
-
components["multi_models_cb"] = controls["multi_models_cb"]
|
| 164 |
-
|
| 165 |
-
# Hidden components to hold session values (avoids gr.State issues in some Gradio versions)
|
| 166 |
states["expected_phonemes"] = gr.Textbox(
|
| 167 |
value=init["phonemes"], visible=False, interactive=False
|
| 168 |
)
|
|
@@ -181,9 +166,6 @@ def build_interface() -> gr.Blocks:
|
|
| 181 |
)
|
| 182 |
|
| 183 |
# Hidden components for lazy audio loading API
|
| 184 |
-
# JavaScript triggers lazy_verse_key.change() to fetch audio on navigation
|
| 185 |
-
# NOTE: Using gr.HTML wrapper with inline CSS to hide, because visible=False
|
| 186 |
-
# prevents DOM rendering on HF Spaces
|
| 187 |
gr.HTML('<div id="lazy-audio-api-wrapper" style="display:none !important; height:0; overflow:hidden;"></div>')
|
| 188 |
states["lazy_verse_key"] = gr.Textbox(
|
| 189 |
value="",
|
|
@@ -200,7 +182,7 @@ def build_interface() -> gr.Blocks:
|
|
| 200 |
elem_classes=["lazy-audio-hidden"]
|
| 201 |
)
|
| 202 |
|
| 203 |
-
# Hidden components for segment clip lazy loading
|
| 204 |
states["segment_clip_request"] = gr.Textbox(
|
| 205 |
value="",
|
| 206 |
visible=True,
|
|
@@ -216,202 +198,220 @@ def build_interface() -> gr.Blocks:
|
|
| 216 |
elem_classes=["lazy-audio-hidden"]
|
| 217 |
)
|
| 218 |
|
| 219 |
-
#
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
init_audio_html=init["ref_audio_html"]
|
| 229 |
-
)
|
| 230 |
-
|
| 231 |
-
components["reference_audio"] = ref_components["audio_html"]
|
| 232 |
-
components["reference_dropdown_col"] = ref_components["dropdown_col"]
|
| 233 |
-
components["reference_dropdown"] = ref_components["dropdown"]
|
| 234 |
-
|
| 235 |
-
# Recitation settings (collapsed by default)
|
| 236 |
-
# Row 1: Madd settings, Row 2: Ghunnah settings + tolerance
|
| 237 |
-
with gr.Accordion("Recitation Settings", open=False):
|
| 238 |
-
# Row 1: All Madd length settings
|
| 239 |
-
with gr.Row():
|
| 240 |
-
components["madd_tabii_length"] = gr.Radio(
|
| 241 |
-
choices=[2],
|
| 242 |
-
value=MADD_TABII_LENGTH,
|
| 243 |
-
label="Tabi'i",
|
| 244 |
-
scale=1,
|
| 245 |
-
interactive=False
|
| 246 |
-
)
|
| 247 |
-
components["madd_wajib_length"] = gr.Radio(
|
| 248 |
-
choices=[4, 5],
|
| 249 |
-
value=MADD_WAJIB_MUTTASIL_LENGTH,
|
| 250 |
-
label="Wajib Muttasil",
|
| 251 |
-
scale=1
|
| 252 |
-
)
|
| 253 |
-
components["madd_jaiz_length"] = gr.Radio(
|
| 254 |
-
choices=[2, 4, 5],
|
| 255 |
-
value=MADD_JAIZ_MUNFASIL_LENGTH,
|
| 256 |
-
label="Ja'iz Munfasil",
|
| 257 |
-
scale=1
|
| 258 |
-
)
|
| 259 |
-
components["madd_lazim_length"] = gr.Radio(
|
| 260 |
-
choices=[6],
|
| 261 |
-
value=MADD_LAZIM_LENGTH,
|
| 262 |
-
label="Lazim",
|
| 263 |
-
scale=1,
|
| 264 |
-
interactive=False
|
| 265 |
-
)
|
| 266 |
-
components["madd_arid_length"] = gr.Radio(
|
| 267 |
-
choices=[2, 4, 6],
|
| 268 |
-
value=MADD_ARID_LISSUKUN_LENGTH,
|
| 269 |
-
label="'Arid Lissukun",
|
| 270 |
-
scale=1
|
| 271 |
)
|
| 272 |
-
components["
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
)
|
| 278 |
-
# Row 2: Ghunnah settings + tolerance slider
|
| 279 |
-
with gr.Row():
|
| 280 |
-
components["iqlab_ikhfaa_sound"] = gr.Radio(
|
| 281 |
-
choices=["meem ghunnah", "ikhfaa"],
|
| 282 |
-
value=IQLAB_IKHFAA_SHAFAWI_SOUND,
|
| 283 |
-
label="Iqlab/Ikhfaa Shafawi Sound",
|
| 284 |
-
scale=1
|
| 285 |
-
)
|
| 286 |
-
components["ghunnah_length"] = gr.Radio(
|
| 287 |
-
choices=[2, 3],
|
| 288 |
-
value=GHUNNAH_LENGTH,
|
| 289 |
-
label="Ghunnah Length",
|
| 290 |
-
scale=1
|
| 291 |
-
)
|
| 292 |
-
components["duration_tolerance"] = gr.Slider(
|
| 293 |
-
minimum=DURATION_TOLERANCE_MIN,
|
| 294 |
-
maximum=DURATION_TOLERANCE_MAX,
|
| 295 |
-
step=DURATION_TOLERANCE_STEP,
|
| 296 |
-
value=DURATION_TOLERANCE,
|
| 297 |
-
label="Duration Tolerance",
|
| 298 |
-
info="How much deviation from expected length to allow (harakat)",
|
| 299 |
-
scale=2
|
| 300 |
-
)
|
| 301 |
-
|
| 302 |
-
# Audio input
|
| 303 |
-
# Using type="filepath" to access original filename for auto-surah selection
|
| 304 |
-
components["audio_input"] = gr.Audio(
|
| 305 |
-
sources=["microphone", "upload"],
|
| 306 |
-
type="filepath",
|
| 307 |
-
label="Record or Upload Your Recitation"
|
| 308 |
-
)
|
| 309 |
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
)
|
| 314 |
-
|
| 315 |
-
# Analysis tabs
|
| 316 |
-
with gr.Tabs():
|
| 317 |
-
with gr.Tab("Error Analysis"):
|
| 318 |
-
components["error_display"] = gr.HTML(value="", label="Error Analysis")
|
| 319 |
-
with gr.Tab("Ghunnah Analysis"):
|
| 320 |
-
with gr.Row(elem_classes=["sort-row"]):
|
| 321 |
-
components["ghunnah_sort_dropdown"] = gr.Radio(
|
| 322 |
-
choices=[("Text Order", "text_order"), ("By Rule", "by_rule"), ("By Errors", "by_errors")],
|
| 323 |
-
value="text_order",
|
| 324 |
-
show_label=False,
|
| 325 |
-
interactive=True,
|
| 326 |
-
container=False,
|
| 327 |
-
visible=False,
|
| 328 |
-
elem_classes=["sort-toggle"],
|
| 329 |
-
)
|
| 330 |
-
components["ghunnah_display"] = gr.HTML(
|
| 331 |
-
value=init["ghunnah_html"], label="Ghunnah Analysis"
|
| 332 |
-
)
|
| 333 |
-
with gr.Tab("Madd Analysis"):
|
| 334 |
-
with gr.Row(elem_classes=["sort-row"]):
|
| 335 |
-
components["madd_sort_dropdown"] = gr.Radio(
|
| 336 |
-
choices=[("Text Order", "text_order"), ("By Rule", "by_rule"), ("By Errors", "by_errors")],
|
| 337 |
-
value="text_order",
|
| 338 |
-
show_label=False,
|
| 339 |
-
interactive=True,
|
| 340 |
-
container=False,
|
| 341 |
-
visible=False,
|
| 342 |
-
elem_classes=["sort-toggle"],
|
| 343 |
-
)
|
| 344 |
-
components["madd_display"] = gr.HTML(
|
| 345 |
-
value=init["madd_html"], label="Madd Analysis"
|
| 346 |
)
|
| 347 |
-
if DEV_TAB_VISIBLE:
|
| 348 |
-
with gr.Tab("Dev"):
|
| 349 |
|
|
|
|
|
|
|
|
|
|
| 350 |
with gr.Row():
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
)
|
| 358 |
-
with gr.Column(scale=1):
|
| 359 |
-
components["dev_detected_phonemes"] = gr.Textbox(
|
| 360 |
-
label="Detected Phonemes (edit to simulate errors)",
|
| 361 |
-
value=init["phonemes"],
|
| 362 |
-
lines=3,
|
| 363 |
-
interactive=True,
|
| 364 |
-
placeholder="Modify phonemes here to simulate detection errors...",
|
| 365 |
-
)
|
| 366 |
-
|
| 367 |
-
with gr.Row():
|
| 368 |
-
components["dev_simulate_btn"] = gr.Button(
|
| 369 |
-
"Simulate Errors", variant="primary"
|
| 370 |
)
|
| 371 |
-
components["
|
| 372 |
-
|
|
|
|
|
|
|
|
|
|
| 373 |
)
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
label="Test Name",
|
| 380 |
-
placeholder="Enter a name for this test case...",
|
| 381 |
-
scale=2,
|
| 382 |
)
|
| 383 |
-
components["
|
| 384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
)
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
with gr.Row():
|
| 391 |
-
components["
|
| 392 |
-
choices=["
|
| 393 |
-
value=
|
| 394 |
-
label="
|
| 395 |
-
scale=
|
| 396 |
)
|
| 397 |
-
components["
|
| 398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
)
|
| 400 |
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
)
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
# ----- Wire all events -----
|
| 417 |
wire_all_events(components, states, handlers)
|
|
|
|
| 22 |
DURATION_TOLERANCE_STEP,
|
| 23 |
JS_POLLING_ENABLED,
|
| 24 |
JS_MUTATION_OBSERVER_ENABLED,
|
| 25 |
+
LEFT_COLUMN_SCALE,
|
| 26 |
)
|
| 27 |
from shared_state import get_model_bundles
|
| 28 |
from utils.phonemizer_utils import get_chapter_list
|
|
|
|
| 147 |
components = {}
|
| 148 |
states = {}
|
| 149 |
|
| 150 |
+
# Hidden components to hold session values (must be outside Row/Column for event wiring)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
states["expected_phonemes"] = gr.Textbox(
|
| 152 |
value=init["phonemes"], visible=False, interactive=False
|
| 153 |
)
|
|
|
|
| 166 |
)
|
| 167 |
|
| 168 |
# Hidden components for lazy audio loading API
|
|
|
|
|
|
|
|
|
|
| 169 |
gr.HTML('<div id="lazy-audio-api-wrapper" style="display:none !important; height:0; overflow:hidden;"></div>')
|
| 170 |
states["lazy_verse_key"] = gr.Textbox(
|
| 171 |
value="",
|
|
|
|
| 182 |
elem_classes=["lazy-audio-hidden"]
|
| 183 |
)
|
| 184 |
|
| 185 |
+
# Hidden components for segment clip lazy loading
|
| 186 |
states["segment_clip_request"] = gr.Textbox(
|
| 187 |
value="",
|
| 188 |
visible=True,
|
|
|
|
| 198 |
elem_classes=["lazy-audio-hidden"]
|
| 199 |
)
|
| 200 |
|
| 201 |
+
# ----- Two-column layout -----
|
| 202 |
+
with gr.Row():
|
| 203 |
+
# ========== LEFT COLUMN ==========
|
| 204 |
+
with gr.Column(scale=LEFT_COLUMN_SCALE):
|
| 205 |
+
# Verse selector
|
| 206 |
+
verse_components, _ = create_verse_selector(
|
| 207 |
+
horizontal=True,
|
| 208 |
+
initial_selection=init["selection"],
|
| 209 |
+
chapters=chapters_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
)
|
| 211 |
+
components["from_chapter"] = verse_components["from_chapter"]
|
| 212 |
+
components["from_verse"] = verse_components["from_verse"]
|
| 213 |
+
components["to_verse"] = verse_components["to_verse"]
|
| 214 |
+
|
| 215 |
+
# Control buttons (segmentation is now auto-detected by VAD)
|
| 216 |
+
controls = create_control_buttons(multi_visible)
|
| 217 |
+
components["random_btn"] = controls["random_btn"]
|
| 218 |
+
components["reset_btn"] = controls["reset_btn"]
|
| 219 |
+
components["multi_models_cb"] = controls["multi_models_cb"]
|
| 220 |
+
|
| 221 |
+
# Audio input
|
| 222 |
+
components["audio_input"] = gr.Audio(
|
| 223 |
+
sources=["microphone", "upload"],
|
| 224 |
+
type="filepath",
|
| 225 |
+
label="Record or Upload Your Recitation"
|
| 226 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
+
# Analyze button
|
| 229 |
+
components["analyze_btn"] = gr.Button(
|
| 230 |
+
"Analyze", variant="primary", visible=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
)
|
|
|
|
|
|
|
| 232 |
|
| 233 |
+
# Recitation settings (expanded by default)
|
| 234 |
+
with gr.Accordion("Recitation Settings", open=True):
|
| 235 |
+
# Row 1: All Madd length settings
|
| 236 |
with gr.Row():
|
| 237 |
+
components["madd_tabii_length"] = gr.Radio(
|
| 238 |
+
choices=[2],
|
| 239 |
+
value=MADD_TABII_LENGTH,
|
| 240 |
+
label="Tabi'i",
|
| 241 |
+
scale=1,
|
| 242 |
+
interactive=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
)
|
| 244 |
+
components["madd_wajib_length"] = gr.Radio(
|
| 245 |
+
choices=[4, 5],
|
| 246 |
+
value=MADD_WAJIB_MUTTASIL_LENGTH,
|
| 247 |
+
label="Wajib Muttasil",
|
| 248 |
+
scale=1
|
| 249 |
)
|
| 250 |
+
components["madd_jaiz_length"] = gr.Radio(
|
| 251 |
+
choices=[2, 4, 5],
|
| 252 |
+
value=MADD_JAIZ_MUNFASIL_LENGTH,
|
| 253 |
+
label="Ja'iz Munfasil",
|
| 254 |
+
scale=1
|
|
|
|
|
|
|
|
|
|
| 255 |
)
|
| 256 |
+
components["madd_lazim_length"] = gr.Radio(
|
| 257 |
+
choices=[6],
|
| 258 |
+
value=MADD_LAZIM_LENGTH,
|
| 259 |
+
label="Lazim",
|
| 260 |
+
scale=1,
|
| 261 |
+
interactive=False
|
| 262 |
)
|
| 263 |
+
components["madd_arid_length"] = gr.Radio(
|
| 264 |
+
choices=[2, 4, 6],
|
| 265 |
+
value=MADD_ARID_LISSUKUN_LENGTH,
|
| 266 |
+
label="'Arid Lissukun",
|
| 267 |
+
scale=1
|
| 268 |
+
)
|
| 269 |
+
components["madd_leen_length"] = gr.Radio(
|
| 270 |
+
choices=[2, 4, 6],
|
| 271 |
+
value=MADD_LEEN_LENGTH,
|
| 272 |
+
label="Leen",
|
| 273 |
+
scale=1
|
| 274 |
+
)
|
| 275 |
+
# Row 2: Ghunnah settings + tolerance slider
|
| 276 |
with gr.Row():
|
| 277 |
+
components["iqlab_ikhfaa_sound"] = gr.Radio(
|
| 278 |
+
choices=["meem ghunnah", "ikhfaa"],
|
| 279 |
+
value=IQLAB_IKHFAA_SHAFAWI_SOUND,
|
| 280 |
+
label="Iqlab/Ikhfaa Shafawi Sound",
|
| 281 |
+
scale=1
|
| 282 |
)
|
| 283 |
+
components["ghunnah_length"] = gr.Radio(
|
| 284 |
+
choices=[2, 3],
|
| 285 |
+
value=GHUNNAH_LENGTH,
|
| 286 |
+
label="Ghunnah Length",
|
| 287 |
+
scale=1
|
| 288 |
+
)
|
| 289 |
+
components["duration_tolerance"] = gr.Slider(
|
| 290 |
+
minimum=DURATION_TOLERANCE_MIN,
|
| 291 |
+
maximum=DURATION_TOLERANCE_MAX,
|
| 292 |
+
step=DURATION_TOLERANCE_STEP,
|
| 293 |
+
value=DURATION_TOLERANCE,
|
| 294 |
+
label="Duration Tolerance",
|
| 295 |
+
info="How much deviation from expected length to allow (harakat)",
|
| 296 |
+
scale=2
|
| 297 |
)
|
| 298 |
|
| 299 |
+
# ========== RIGHT COLUMN ==========
|
| 300 |
+
with gr.Column(scale=10 - LEFT_COLUMN_SCALE, elem_classes=["right-column"]):
|
| 301 |
+
# Arabic display
|
| 302 |
+
components["arabic_display"] = gr.HTML(
|
| 303 |
+
value=init["arabic_html"] or format_no_verse_message(),
|
| 304 |
+
label="Selected Verse"
|
| 305 |
+
)
|
| 306 |
|
| 307 |
+
# Reference audio section
|
| 308 |
+
from ui.components import create_reference_audio_section
|
| 309 |
+
ref_components = create_reference_audio_section(
|
| 310 |
+
init_audio_html=init["ref_audio_html"]
|
| 311 |
+
)
|
| 312 |
+
components["reference_audio"] = ref_components["audio_html"]
|
| 313 |
+
components["reference_dropdown_col"] = ref_components["dropdown_col"]
|
| 314 |
+
components["reference_dropdown"] = ref_components["dropdown"]
|
| 315 |
+
|
| 316 |
+
# Analysis tabs
|
| 317 |
+
with gr.Tabs():
|
| 318 |
+
with gr.Tab("Error Analysis"):
|
| 319 |
+
components["error_display"] = gr.HTML(value="", label="Error Analysis")
|
| 320 |
+
with gr.Tab("Ghunnah Analysis"):
|
| 321 |
+
with gr.Row(elem_classes=["sort-row"]):
|
| 322 |
+
components["ghunnah_sort_dropdown"] = gr.Radio(
|
| 323 |
+
choices=[("Text Order", "text_order"), ("By Rule", "by_rule"), ("By Errors", "by_errors")],
|
| 324 |
+
value="text_order",
|
| 325 |
+
show_label=False,
|
| 326 |
+
interactive=True,
|
| 327 |
+
container=False,
|
| 328 |
+
visible=False,
|
| 329 |
+
elem_classes=["sort-toggle"],
|
| 330 |
+
)
|
| 331 |
+
components["ghunnah_display"] = gr.HTML(
|
| 332 |
+
value=init["ghunnah_html"], label="Ghunnah Analysis"
|
| 333 |
)
|
| 334 |
+
with gr.Tab("Madd Analysis"):
|
| 335 |
+
with gr.Row(elem_classes=["sort-row"]):
|
| 336 |
+
components["madd_sort_dropdown"] = gr.Radio(
|
| 337 |
+
choices=[("Text Order", "text_order"), ("By Rule", "by_rule"), ("By Errors", "by_errors")],
|
| 338 |
+
value="text_order",
|
| 339 |
+
show_label=False,
|
| 340 |
+
interactive=True,
|
| 341 |
+
container=False,
|
| 342 |
+
visible=False,
|
| 343 |
+
elem_classes=["sort-toggle"],
|
| 344 |
+
)
|
| 345 |
+
components["madd_display"] = gr.HTML(
|
| 346 |
+
value=init["madd_html"], label="Madd Analysis"
|
| 347 |
+
)
|
| 348 |
+
if DEV_TAB_VISIBLE:
|
| 349 |
+
with gr.Tab("Dev"):
|
| 350 |
+
with gr.Row():
|
| 351 |
+
with gr.Column(scale=1):
|
| 352 |
+
components["dev_canonical_phonemes"] = gr.Textbox(
|
| 353 |
+
label="Canonical Phonemes (from phonemizer)",
|
| 354 |
+
value=init["phonemes"],
|
| 355 |
+
lines=3,
|
| 356 |
+
interactive=False,
|
| 357 |
+
)
|
| 358 |
+
with gr.Column(scale=1):
|
| 359 |
+
components["dev_detected_phonemes"] = gr.Textbox(
|
| 360 |
+
label="Detected Phonemes (edit to simulate errors)",
|
| 361 |
+
value=init["phonemes"],
|
| 362 |
+
lines=3,
|
| 363 |
+
interactive=True,
|
| 364 |
+
placeholder="Modify phonemes here to simulate detection errors...",
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
with gr.Row():
|
| 368 |
+
components["dev_simulate_btn"] = gr.Button(
|
| 369 |
+
"Simulate Errors", variant="primary"
|
| 370 |
+
)
|
| 371 |
+
components["dev_reset_btn"] = gr.Button(
|
| 372 |
+
"Reset to Canonical", variant="secondary"
|
| 373 |
+
)
|
| 374 |
+
|
| 375 |
+
components["dev_simulation_output"] = gr.HTML(value="", label="Simulation Result")
|
| 376 |
+
|
| 377 |
+
with gr.Row():
|
| 378 |
+
components["dev_test_name"] = gr.Textbox(
|
| 379 |
+
label="Test Name",
|
| 380 |
+
placeholder="Enter a name for this test case...",
|
| 381 |
+
scale=2,
|
| 382 |
+
)
|
| 383 |
+
components["dev_save_btn"] = gr.Button(
|
| 384 |
+
"Save Test Case", variant="secondary", scale=1
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
components["dev_save_status"] = gr.HTML(value="", label="Save Status")
|
| 388 |
+
|
| 389 |
+
gr.Markdown("### Test Runner")
|
| 390 |
+
with gr.Row():
|
| 391 |
+
components["dev_test_source"] = gr.Radio(
|
| 392 |
+
choices=["MDD Tests (mdd_tests.yaml)", "Formal Tests (error_analysis/)"],
|
| 393 |
+
value="MDD Tests (mdd_tests.yaml)",
|
| 394 |
+
label="Test Source",
|
| 395 |
+
scale=2,
|
| 396 |
+
)
|
| 397 |
+
components["dev_load_btn"] = gr.Button(
|
| 398 |
+
"Load & Run Tests", variant="primary", scale=1
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
components["dev_tests_output"] = gr.HTML(value="", label="Test Results")
|
| 402 |
+
|
| 403 |
+
gr.Markdown("### Alignment Visualization")
|
| 404 |
+
with gr.Row():
|
| 405 |
+
components["dev_alignment_btn"] = gr.Button(
|
| 406 |
+
"Generate Alignment Plot", variant="secondary", scale=1
|
| 407 |
+
)
|
| 408 |
+
components["dev_alignment_status"] = gr.HTML(value="", label="Status")
|
| 409 |
+
components["dev_alignment_image"] = gr.Image(
|
| 410 |
+
value=None,
|
| 411 |
+
label="CTC Forced Alignment",
|
| 412 |
+
type="filepath",
|
| 413 |
+
show_download_button=True,
|
| 414 |
+
)
|
| 415 |
|
| 416 |
# ----- Wire all events -----
|
| 417 |
wire_all_events(components, states, handlers)
|
ui/components/verse_selector.py
CHANGED
|
@@ -200,7 +200,7 @@ def create_verse_selector(horizontal=False, initial_selection=None, chapters=Non
|
|
| 200 |
label="From Verse",
|
| 201 |
interactive=from_verse_interactive,
|
| 202 |
scale=1,
|
| 203 |
-
min_width=
|
| 204 |
allow_custom_value=True,
|
| 205 |
),
|
| 206 |
"to_verse": gr.Dropdown(
|
|
@@ -209,7 +209,7 @@ def create_verse_selector(horizontal=False, initial_selection=None, chapters=Non
|
|
| 209 |
label="To Verse",
|
| 210 |
interactive=to_verse_interactive,
|
| 211 |
scale=1,
|
| 212 |
-
min_width=
|
| 213 |
allow_custom_value=True,
|
| 214 |
),
|
| 215 |
}
|
|
|
|
| 200 |
label="From Verse",
|
| 201 |
interactive=from_verse_interactive,
|
| 202 |
scale=1,
|
| 203 |
+
min_width=80,
|
| 204 |
allow_custom_value=True,
|
| 205 |
),
|
| 206 |
"to_verse": gr.Dropdown(
|
|
|
|
| 209 |
label="To Verse",
|
| 210 |
interactive=to_verse_interactive,
|
| 211 |
scale=1,
|
| 212 |
+
min_width=80,
|
| 213 |
allow_custom_value=True,
|
| 214 |
),
|
| 215 |
}
|
ui/styles.py
CHANGED
|
@@ -204,6 +204,13 @@ def get_custom_css() -> str:
|
|
| 204 |
.sort-toggle input[type="radio"] {
|
| 205 |
display: none !important;
|
| 206 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
"""
|
| 208 |
|
| 209 |
return get_force_dark_mode_css() + get_digital_khatt_font_face(DIGITAL_KHATT_FONT_B64) + get_uthmanic_font_face() + toggle_button_css
|
|
|
|
| 204 |
.sort-toggle input[type="radio"] {
|
| 205 |
display: none !important;
|
| 206 |
}
|
| 207 |
+
/* Right column - reduce gap between arabic display and reference audio */
|
| 208 |
+
.right-column {
|
| 209 |
+
gap: 8px !important;
|
| 210 |
+
}
|
| 211 |
+
.right-column > div:first-child {
|
| 212 |
+
margin-bottom: 0 !important;
|
| 213 |
+
}
|
| 214 |
"""
|
| 215 |
|
| 216 |
return get_force_dark_mode_css() + get_digital_khatt_font_face(DIGITAL_KHATT_FONT_B64) + get_uthmanic_font_face() + toggle_button_css
|