Quran-multi-aligner

Running on Zero

App Files Files Community

hetchyy commited on 23 days ago

Commit

1fc019e

verified ·

1 Parent(s): 7ac4996

feat: redesign audio input as Link/Upload/Record toggle;perf: use per-segment WAVs and skip audio I/O for API calls

Browse files

Files changed (7) hide show

config.py +1 -0
src/pipeline.py +23 -5
src/ui/event_wiring.py +93 -22
src/ui/handlers.py +50 -24
src/ui/interface.py +36 -11
src/ui/segments.py +11 -7
src/ui/styles.py +17 -0

config.py CHANGED Viewed

@@ -21,6 +21,7 @@ PORT = 6902
 RESAMPLE_TYPE = "soxr_lq"
 SEGMENT_AUDIO_DIR = Path("/tmp/segments")   # WAV files written here per request
 URL_DOWNLOAD_DIR = Path("/tmp/url_downloads")  # Audio downloaded from URLs via yt-dlp
 DELETE_CACHE_FREQUENCY = 3600*5             # Gradio cache cleanup interval (seconds)
 DELETE_CACHE_AGE = 3600*5                   # Delete cached files older than this (seconds)

 RESAMPLE_TYPE = "soxr_lq"
 SEGMENT_AUDIO_DIR = Path("/tmp/segments")   # WAV files written here per request
 URL_DOWNLOAD_DIR = Path("/tmp/url_downloads")  # Audio downloaded from URLs via yt-dlp
+DEFAULT_INPUT_MODE = "Upload"                  # "Link", "Upload", or "Record"
 DELETE_CACHE_FREQUENCY = 3600*5             # Gradio cache cleanup interval (seconds)
 DELETE_CACHE_AGE = 3600*5                   # Delete cached files older than this (seconds)

src/pipeline.py CHANGED Viewed

@@ -840,6 +840,10 @@ def _run_post_vad_pipeline(
     json_output = {"segments": segments_list}
     # Compute full audio URL (file written in background after render)
     full_path = segment_dir / "full.wav"
     full_audio_url = f"/gradio_api/file={full_path}"
@@ -858,18 +862,20 @@ def _run_post_vad_pipeline(
     print(f"[DIAG] Before render_segments: RSS={_rss:.0f}MB, segments={len(segments)}")
     t_render = time.time()
-    html = render_segments(segments, full_audio_url=full_audio_url)
     print(f"[PROFILE] render_segments: {time.time() - t_render:.3f}s ({len(segments)} segments, HTML={len(html)/1e6:.2f}MB)")
-    # Write full.wav in background thread from float32 audio
     # sf.write converts float32→PCM16 internally (no extra int16 copy in memory)
-    # File ready before user can click play (browser still rendering cards)
     import threading
     import soundfile as sf
     _audio_ref = audio  # prevent GC while thread runs
     _sr_ref = sample_rate
     _path_ref = str(full_path)
-    def _write_full_wav():
         import os
         # Diagnostics: memory + disk before write
         rss_mb = -1
@@ -894,7 +900,19 @@ def _run_post_vad_pipeline(
             print(f"[PROFILE] Full audio write (bg): {time.time() - t:.3f}s ({expected_mb:.0f}MB)")
         except Exception as e:
             print(f"[ERROR] Full audio write failed: {e}")
-    threading.Thread(target=_write_full_wav, daemon=True).start()
     print("[STAGE] Done!")

     json_output = {"segments": segments_list}
+    # API callers only need json_output; skip HTML render and audio file writes
+    if endpoint != "ui":
+        return "", json_output, str(segment_dir), log_row
     # Compute full audio URL (file written in background after render)
     full_path = segment_dir / "full.wav"
     full_audio_url = f"/gradio_api/file={full_path}"
     print(f"[DIAG] Before render_segments: RSS={_rss:.0f}MB, segments={len(segments)}")
     t_render = time.time()
+    html = render_segments(segments, full_audio_url=full_audio_url, segment_dir=str(segment_dir))
     print(f"[PROFILE] render_segments: {time.time() - t_render:.3f}s ({len(segments)} segments, HTML={len(html)/1e6:.2f}MB)")
+    # Write full.wav + per-segment WAVs in background thread
     # sf.write converts float32→PCM16 internally (no extra int16 copy in memory)
+    # Files ready before user can click play (browser still rendering cards)
     import threading
     import soundfile as sf
     _audio_ref = audio  # prevent GC while thread runs
     _sr_ref = sample_rate
     _path_ref = str(full_path)
+    _seg_dir_ref = str(segment_dir)
+    _segments_ref = segments
+    def _write_audio_files():
         import os
         # Diagnostics: memory + disk before write
         rss_mb = -1
             print(f"[PROFILE] Full audio write (bg): {time.time() - t:.3f}s ({expected_mb:.0f}MB)")
         except Exception as e:
             print(f"[ERROR] Full audio write failed: {e}")
+            return  # Can't write per-segment files without full.wav succeeding
+        # Per-segment WAVs (slices from float32 array, converted to PCM16 by soundfile)
+        t_segs = time.time()
+        try:
+            for i, seg in enumerate(_segments_ref):
+                start = int(seg.start_time * _sr_ref)
+                end = int(seg.end_time * _sr_ref)
+                sf.write(os.path.join(_seg_dir_ref, f"seg_{i}.wav"),
+                         _audio_ref[start:end], _sr_ref, format='WAV', subtype='PCM_16')
+            print(f"[PROFILE] Per-segment WAVs (bg): {time.time() - t_segs:.3f}s ({len(_segments_ref)} files)")
+        except Exception as e:
+            print(f"[ERROR] Per-segment WAV write failed: {e}")
+    threading.Thread(target=_write_audio_files, daemon=True).start()
     print("[STAGE] Done!")

src/ui/event_wiring.py CHANGED Viewed

@@ -18,7 +18,7 @@ from src.ui.progress_bar import pipeline_progress_bar_html
 from src.ui.handlers import (
     wire_presets, toggle_resegment_panel,
     on_mode_change, on_verse_toggle, restore_anim_settings,
-    download_url_audio,
 )
 _EMPTY_PLACEHOLDER = (
@@ -30,6 +30,7 @@ _EMPTY_PLACEHOLDER = (
 def wire_events(app, c):
     """Wire all event handlers to Gradio components."""
     _wire_preset_buttons(c)
     _wire_url_input(c)
     _wire_audio_input(c)
     _wire_extract_chain(c)
@@ -51,50 +52,115 @@ def _wire_preset_buttons(c):
                  c.rs_silence, c.rs_speech, c.rs_pad)
 def _wire_url_input(c):
-    """Wire URL textbox → yt-dlp download → populate audio component."""
     def _on_download(url):
-        # Yield 1: show loading state
         yield (
-            gr.update(),                                     # audio_input unchanged
-            gr.update(visible=False),                        # hide old info
             gr.update(
                 value='<div style="text-align:center;padding:8px;">Downloading audio...</div>',
                 visible=True,
-            ),                                               # url_status
-            gr.update(interactive=False),                    # disable button
         )
-        # Yield 2: download result
         try:
             wav_path, info_html = download_url_audio(url)
             yield (
-                wav_path,                                    # set audio_input
-                gr.update(value=info_html, visible=True),    # url_info_html
-                gr.update(visible=False),                    # hide status
-                gr.update(interactive=True),                 # re-enable button
             )
         except gr.Error:
             raise
         except Exception as e:
             yield (
                 gr.update(),
-                gr.update(visible=False),
                 gr.update(
-                    value=f'<div style="color:var(--error-text-color);padding:8px;">Error: {str(e)[:200]}</div>',
                     visible=True,
                 ),
                 gr.update(interactive=True),
             )
-    _url_outputs = [c.audio_input, c.url_info_html, c.url_status, c.url_download_btn]
     c.url_download_btn.click(
-        fn=_on_download, inputs=[c.url_input], outputs=_url_outputs,
-        api_name=False, show_progress="hidden",
-    )
-    c.url_input.submit(
-        fn=_on_download, inputs=[c.url_input], outputs=_url_outputs,
         api_name=False, show_progress="hidden",
     )
@@ -141,13 +207,18 @@ def _wire_audio_input(c):
         api_name=False, show_progress="hidden"
     )
     c.btn_ex_112.click(fn=lambda: ("data/112.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
     c.btn_ex_84.click(fn=lambda: ("data/84.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
     c.btn_ex_7.click(fn=lambda: ("data/7.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
     c.btn_ex_juz30.click(fn=lambda: ("data/Juz' 30.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
-    # Reset is_preset when user uploads/records their own audio (.input fires only on user interaction, not programmatic changes)
-    c.audio_input.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
 def _wire_extract_chain(c):

 from src.ui.handlers import (
     wire_presets, toggle_resegment_panel,
     on_mode_change, on_verse_toggle, restore_anim_settings,
+    fetch_url_info, download_url_audio,
 )
 _EMPTY_PLACEHOLDER = (
 def wire_events(app, c):
     """Wire all event handlers to Gradio components."""
     _wire_preset_buttons(c)
+    _wire_input_mode_toggle(c)
     _wire_url_input(c)
     _wire_audio_input(c)
     _wire_extract_chain(c)
                  c.rs_silence, c.rs_speech, c.rs_pad)
+def _wire_input_mode_toggle(c):
+    """Wire Link/Upload/Record toggle buttons."""
+    def _switch_to(mode):
+        is_link = mode == "Link"
+        is_upload = mode == "Upload"
+        is_record = mode == "Record"
+        return (
+            gr.update(elem_classes=["mode-active"] if is_link else []),
+            gr.update(elem_classes=["mode-active"] if is_upload else []),
+            gr.update(elem_classes=["mode-active"] if is_record else []),
+            gr.update(visible=is_link),      # link_panel
+            gr.update(visible=is_upload),    # upload_panel
+            gr.update(visible=is_record),    # record_panel
+            gr.update(visible=not is_link),  # example_row
+        )
+    _toggle_outputs = [
+        c.mode_link, c.mode_upload, c.mode_record,
+        c.link_panel, c.upload_panel, c.record_panel,
+        c.example_row,
+    ]
+    c.mode_link.click(fn=lambda: _switch_to("Link"), inputs=[], outputs=_toggle_outputs, api_name=False)
+    c.mode_upload.click(fn=lambda: _switch_to("Upload"), inputs=[], outputs=_toggle_outputs, api_name=False)
+    c.mode_record.click(fn=lambda: _switch_to("Record"), inputs=[], outputs=_toggle_outputs, api_name=False)
 def _wire_url_input(c):
+    """Wire URL paste → auto-fetch metadata → download button."""
+    def _on_url_change(url):
+        """Auto-fetch metadata when a URL is pasted."""
+        if not url or not url.strip():
+            return (
+                gr.update(visible=False),    # url_info_html
+                gr.update(visible=False),    # url_status
+                gr.update(visible=False),    # url_download_btn
+            )
+        # Show fetching status
+        yield (
+            gr.update(visible=False),
+            gr.update(value='<div style="text-align:center;padding:8px;opacity:0.7;">Fetching info...</div>', visible=True),
+            gr.update(visible=False),
+        )
+        try:
+            info_html = fetch_url_info(url)
+            if info_html is None:
+                yield (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
+                return
+            yield (
+                gr.update(value=info_html, visible=True),
+                gr.update(visible=False),
+                gr.update(visible=True),     # show Download button
+            )
+        except gr.Error:
+            raise
+        except Exception as e:
+            yield (
+                gr.update(visible=False),
+                gr.update(
+                    value=f'<div style="color:var(--error-text-color);padding:8px;">Error: {str(e)[:200]}</div>',
+                    visible=True,
+                ),
+                gr.update(visible=False),
+            )
+    _fetch_outputs = [c.url_info_html, c.url_status, c.url_download_btn]
+    c.url_input.change(
+        fn=_on_url_change, inputs=[c.url_input], outputs=_fetch_outputs,
+        api_name=False, show_progress="hidden",
+    )
     def _on_download(url):
+        """Download audio after metadata was fetched."""
+        # Yield 1: loading state
         yield (
+            gr.update(),                     # audio_input
             gr.update(
                 value='<div style="text-align:center;padding:8px;">Downloading audio...</div>',
                 visible=True,
+            ),                               # url_status
+            gr.update(interactive=False),    # disable download btn
         )
+        # Yield 2: result
         try:
             wav_path, info_html = download_url_audio(url)
             yield (
+                wav_path,
+                gr.update(visible=False),
+                gr.update(interactive=True),
             )
         except gr.Error:
             raise
         except Exception as e:
             yield (
                 gr.update(),
                 gr.update(
+                    value=f'<div style="color:var(--error-text-color);padding:8px;">Download failed: {str(e)[:200]}</div>',
                     visible=True,
                 ),
                 gr.update(interactive=True),
             )
+    _dl_outputs = [c.audio_input, c.url_status, c.url_download_btn]
     c.url_download_btn.click(
+        fn=_on_download, inputs=[c.url_input], outputs=_dl_outputs,
         api_name=False, show_progress="hidden",
     )
         api_name=False, show_progress="hidden"
     )
+    # Bridge upload/record to hidden unified audio_input
+    c.audio_upload.change(fn=lambda x: x, inputs=[c.audio_upload], outputs=[c.audio_input], api_name=False, show_progress="hidden")
+    c.audio_record.change(fn=lambda x: x, inputs=[c.audio_record], outputs=[c.audio_input], api_name=False, show_progress="hidden")
     c.btn_ex_112.click(fn=lambda: ("data/112.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
     c.btn_ex_84.click(fn=lambda: ("data/84.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
     c.btn_ex_7.click(fn=lambda: ("data/7.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
     c.btn_ex_juz30.click(fn=lambda: ("data/Juz' 30.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
+    # Reset is_preset when user uploads/records their own audio
+    c.audio_upload.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
+    c.audio_record.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
 def _wire_extract_chain(c):

src/ui/handlers.py CHANGED Viewed

@@ -20,8 +20,53 @@ from config import (
 )
 def download_url_audio(url: str):
-    """Download audio from a URL using yt-dlp. Returns (wav_path, info_html)."""
     import yt_dlp
     if not url or not url.strip():
@@ -29,7 +74,6 @@ def download_url_audio(url: str):
     url = url.strip()
-    # Download audio as WAV (single extract_info call so PO token plugin can intercept)
     URL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
     out_path = URL_DOWNLOAD_DIR / str(uuid.uuid4())
@@ -49,33 +93,15 @@ def download_url_audio(url: str):
         except Exception as e:
             raise gr.Error(f"Download failed: {str(e)[:200]}")
-    if info.get("_type") == "playlist":
-        raise gr.Error("Playlists are not supported. Please paste a single video/audio URL.")
-    duration = info.get("duration")
-    title = info.get("title", "Unknown")
-    thumbnail = info.get("thumbnail", "")
     wav_path = str(out_path) + ".wav"
     if not Path(wav_path).exists():
         raise gr.Error("Download completed but audio file was not created.")
-    # Build info card HTML
-    dur_str = f"{int(duration) // 60}:{int(duration) % 60:02d}" if duration else "unknown"
-    thumb_html = (
-        f'<img src="{thumbnail}" style="max-width:100%;max-height:120px;border-radius:8px;margin-bottom:4px;">'
-        if thumbnail else ""
-    )
-    info_html = (
-        f'<div style="padding:8px;border-radius:8px;background:var(--block-background-fill);'
-        f'border:1px solid var(--border-color-primary);">'
-        f'{thumb_html}'
-        f'<div style="font-weight:bold;font-size:14px;">{title}</div>'
-        f'<div style="font-size:12px;opacity:0.7;">Duration: {dur_str}</div>'
-        f'</div>'
-    )
-    return wav_path, info_html
 def create_segmentation_settings(id_suffix=""):

 )
+def _build_info_html(title, duration, thumbnail):
+    """Build HTML info card for a URL-sourced audio."""
+    dur_str = f"{int(duration) // 60}:{int(duration) % 60:02d}" if duration else "unknown"
+    thumb_html = (
+        f'<img src="{thumbnail}" style="max-width:100%;max-height:120px;border-radius:8px;margin-bottom:4px;">'
+        if thumbnail else ""
+    )
+    return (
+        f'<div style="padding:8px;border-radius:8px;background:var(--block-background-fill);'
+        f'border:1px solid var(--border-color-primary);">'
+        f'{thumb_html}'
+        f'<div style="font-weight:bold;font-size:14px;">{title}</div>'
+        f'<div style="font-size:12px;opacity:0.7;">Duration: {dur_str}</div>'
+        f'</div>'
+    )
+def fetch_url_info(url: str):
+    """Fetch metadata only (no download). Returns info_html or raises gr.Error."""
+    import yt_dlp
+    if not url or not url.strip():
+        return None
+    url = url.strip()
+    with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
+        try:
+            info = ydl.extract_info(url, download=False)
+        except yt_dlp.utils.DownloadError as e:
+            raise gr.Error(f"Could not fetch URL: {str(e)[:200]}")
+    if info.get("_type") == "playlist":
+        raise gr.Error("Playlists are not supported. Please paste a single video/audio URL.")
+    duration = info.get("duration")
+    if duration is None:
+        raise gr.Error("Live streams are not supported. Please use a completed video/audio.")
+    title = info.get("title", "Unknown")
+    thumbnail = info.get("thumbnail", "")
+    return _build_info_html(title, duration, thumbnail)
 def download_url_audio(url: str):
+    """Full download of audio from URL. Returns (wav_path, info_html)."""
     import yt_dlp
     if not url or not url.strip():
     url = url.strip()
     URL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
     out_path = URL_DOWNLOAD_DIR / str(uuid.uuid4())
         except Exception as e:
             raise gr.Error(f"Download failed: {str(e)[:200]}")
     wav_path = str(out_path) + ".wav"
     if not Path(wav_path).exists():
         raise gr.Error("Download completed but audio file was not created.")
+    title = info.get("title", "Unknown")
+    duration = info.get("duration")
+    thumbnail = info.get("thumbnail", "")
+    return wav_path, _build_info_html(title, duration, thumbnail)
 def create_segmentation_settings(id_suffix=""):

src/ui/interface.py CHANGED Viewed

@@ -20,6 +20,7 @@ from config import (
     MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
     MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
     LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
 )
 from src.ui.styles import build_css
 from src.ui.js_config import build_js_head
@@ -101,24 +102,48 @@ def build_interface():
 def _build_left_column(c):
     """Build the left input column."""
     with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
-        with gr.Group():
             c.url_input = gr.Textbox(
-                label="Or paste a URL (YouTube, SoundCloud, etc.)",
-                placeholder="https://youtube.com/watch?v=...",
                 lines=1,
             )
-            c.url_download_btn = gr.Button("Download Audio", size="sm", variant="secondary")
             c.url_status = gr.HTML(value="", visible=False)
             c.url_info_html = gr.HTML(value="", visible=False)
-        c.audio_input = gr.Audio(
-            label="Upload Recitation",
-            sources=["upload", "microphone"],
-            type="filepath"
-        )
-        # Example audio files
-        with gr.Row():
             c.btn_ex_112 = gr.Button("112", size="sm", min_width=0)
             c.btn_ex_84 = gr.Button("84", size="sm", min_width=0)
             c.btn_ex_7 = gr.Button("7", size="sm", min_width=0)

     MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
     MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
     LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
+    DEFAULT_INPUT_MODE,
 )
 from src.ui.styles import build_css
 from src.ui.js_config import build_js_head
 def _build_left_column(c):
     """Build the left input column."""
     with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
+        _is_link = DEFAULT_INPUT_MODE == "Link"
+        _is_upload = DEFAULT_INPUT_MODE == "Upload"
+        _is_record = DEFAULT_INPUT_MODE == "Record"
+        # Input mode toggle
+        with gr.Row(elem_id="input-mode-row"):
+            c.mode_link = gr.Button("Link", size="sm", min_width=0,
+                                     elem_classes=["mode-active"] if _is_link else [])
+            c.mode_upload = gr.Button("Upload", size="sm", min_width=0,
+                                       elem_classes=["mode-active"] if _is_upload else [])
+            c.mode_record = gr.Button("Record", size="sm", min_width=0,
+                                       elem_classes=["mode-active"] if _is_record else [])
+        # Link panel
+        with gr.Group(visible=_is_link, elem_id="link-panel") as c.link_panel:
             c.url_input = gr.Textbox(
+                label="Paste a link",
+                placeholder="TikTok, SoundCloud, Archive.org, or direct audio link",
                 lines=1,
             )
             c.url_status = gr.HTML(value="", visible=False)
             c.url_info_html = gr.HTML(value="", visible=False)
+            c.url_download_btn = gr.Button("Download", size="sm", variant="primary", visible=False)
+            gr.Markdown(
+                "Supports [1800+ sites](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"
+                " — TikTok, SoundCloud, Archive.org, direct links, and more",
+                elem_id="url-help",
+            )
+        # Upload panel
+        with gr.Group(visible=_is_upload, elem_id="upload-panel") as c.upload_panel:
+            c.audio_upload = gr.Audio(label="Upload Recitation", sources=["upload"], type="filepath")
+        # Record panel
+        with gr.Group(visible=_is_record, elem_id="record-panel") as c.record_panel:
+            c.audio_record = gr.Audio(label="Record Recitation", sources=["microphone"], type="filepath")
+        # Hidden unified audio (fed by upload, record, or URL download)
+        c.audio_input = gr.Audio(visible=False, type="filepath")
+        # Example audio files (hidden in Link mode)
+        with gr.Row(visible=not _is_link, elem_id="example-row") as c.example_row:
             c.btn_ex_112 = gr.Button("112", size="sm", min_width=0)
             c.btn_ex_84 = gr.Button("84", size="sm", min_width=0)
             c.btn_ex_7 = gr.Button("7", size="sm", min_width=0)

src/ui/segments.py CHANGED Viewed

@@ -213,7 +213,7 @@ def simplify_ref(ref: str) -> str:
     return ref
-def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", render_key: str = "") -> str:
     """Render a single segment as an HTML card with optional audio player."""
     is_special = seg.matched_ref in ALL_SPECIAL_REFS
     confidence_class = get_confidence_class(seg.match_score)
@@ -249,10 +249,13 @@ def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", re
     if seg.error:
         error_html = f'<div class="segment-error">{seg.error}</div>'
-    # Audio player HTML — uses media fragment of the full recording
     audio_html = ""
-    if full_audio_url:
-        audio_src = f"{full_audio_url}#t={seg.start_time:.3f},{seg.end_time:.3f}"
         # Add animate button only if segment has a Quran verse ref (word spans for animation).
         # Basmala/Isti'adha get animate because they have indexed word spans for MFA.
         # Transition segments (Amin, Takbir, Tahmeed) don't.
@@ -350,12 +353,13 @@ def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", re
     return html
-def render_segments(segments: list, full_audio_url: str = "") -> str:
     """Render all segments as HTML with optional audio players.
     Args:
         segments: List of SegmentInfo objects
-        full_audio_url: URL to full audio WAV (media fragments used for per-segment playback)
     """
     if not segments:
         return '<div class="no-segments">No segments detected</div>'
@@ -443,7 +447,7 @@ def render_segments(segments: list, full_audio_url: str = "") -> str:
     t_cards = time.time()
     for idx, seg in enumerate(segments):
-        html_parts.append(render_segment_card(seg, idx, full_audio_url, render_key))
     html_parts.append('</div>')
     print(f"[PROFILE] Segment cards: {time.time() - t_cards:.3f}s ({len(segments)} cards, HTML only)")

     return ref
+def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", render_key: str = "", segment_dir: str = "") -> str:
     """Render a single segment as an HTML card with optional audio player."""
     is_special = seg.matched_ref in ALL_SPECIAL_REFS
     confidence_class = get_confidence_class(seg.match_score)
     if seg.error:
         error_html = f'<div class="segment-error">{seg.error}</div>'
+    # Audio player HTML — per-segment WAV (preferred) or media fragment fallback
     audio_html = ""
+    if segment_dir or full_audio_url:
+        if segment_dir:
+            audio_src = f"/gradio_api/file={segment_dir}/seg_{idx}.wav"
+        else:
+            audio_src = f"{full_audio_url}#t={seg.start_time:.3f},{seg.end_time:.3f}"
         # Add animate button only if segment has a Quran verse ref (word spans for animation).
         # Basmala/Isti'adha get animate because they have indexed word spans for MFA.
         # Transition segments (Amin, Takbir, Tahmeed) don't.
     return html
+def render_segments(segments: list, full_audio_url: str = "", segment_dir: str = "") -> str:
     """Render all segments as HTML with optional audio players.
     Args:
         segments: List of SegmentInfo objects
+        full_audio_url: URL to full audio WAV (used by mega card / Animate All)
+        segment_dir: Path to segment directory containing per-segment WAV files
     """
     if not segments:
         return '<div class="no-segments">No segments detected</div>'
     t_cards = time.time()
     for idx, seg in enumerate(segments):
+        html_parts.append(render_segment_card(seg, idx, full_audio_url, render_key, segment_dir))
     html_parts.append('</div>')
     print(f"[PROFILE] Segment cards: {time.time() - t_cards:.3f}s ({len(segments)} cards, HTML only)")

src/ui/styles.py CHANGED Viewed

@@ -432,4 +432,21 @@ def build_css() -> str:
     .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
     .dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
     """

     .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
     .dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
+    /* Input mode toggle */
+    #input-mode-row {{ gap: 0 !important; }}
+    #input-mode-row button {{
+        border-radius: 0 !important;
+        border: 1px solid var(--border-color-primary) !important;
+    }}
+    #input-mode-row button:first-child {{ border-radius: 8px 0 0 8px !important; }}
+    #input-mode-row button:last-child {{ border-radius: 0 8px 8px 0 !important; }}
+    #input-mode-row button:not(:first-child) {{ border-left: none !important; }}
+    .mode-active {{
+        background: var(--button-primary-background-fill) !important;
+        color: var(--button-primary-text-color) !important;
+        border-color: var(--button-primary-background-fill) !important;
+    }}
+    #url-help {{ font-size: 12px; opacity: 0.7; margin-top: -8px; }}
+    #url-help a {{ color: var(--link-text-color); }}
     """