Spaces:
Running on Zero
Running on Zero
feat: redesign audio input as Link/Upload/Record toggle;perf: use per-segment WAVs and skip audio I/O for API calls
Browse files- config.py +1 -0
- src/pipeline.py +23 -5
- src/ui/event_wiring.py +93 -22
- src/ui/handlers.py +50 -24
- src/ui/interface.py +36 -11
- src/ui/segments.py +11 -7
- src/ui/styles.py +17 -0
config.py
CHANGED
|
@@ -21,6 +21,7 @@ PORT = 6902
|
|
| 21 |
RESAMPLE_TYPE = "soxr_lq"
|
| 22 |
SEGMENT_AUDIO_DIR = Path("/tmp/segments") # WAV files written here per request
|
| 23 |
URL_DOWNLOAD_DIR = Path("/tmp/url_downloads") # Audio downloaded from URLs via yt-dlp
|
|
|
|
| 24 |
DELETE_CACHE_FREQUENCY = 3600*5 # Gradio cache cleanup interval (seconds)
|
| 25 |
DELETE_CACHE_AGE = 3600*5 # Delete cached files older than this (seconds)
|
| 26 |
|
|
|
|
| 21 |
RESAMPLE_TYPE = "soxr_lq"
|
| 22 |
SEGMENT_AUDIO_DIR = Path("/tmp/segments") # WAV files written here per request
|
| 23 |
URL_DOWNLOAD_DIR = Path("/tmp/url_downloads") # Audio downloaded from URLs via yt-dlp
|
| 24 |
+
DEFAULT_INPUT_MODE = "Upload" # "Link", "Upload", or "Record"
|
| 25 |
DELETE_CACHE_FREQUENCY = 3600*5 # Gradio cache cleanup interval (seconds)
|
| 26 |
DELETE_CACHE_AGE = 3600*5 # Delete cached files older than this (seconds)
|
| 27 |
|
src/pipeline.py
CHANGED
|
@@ -840,6 +840,10 @@ def _run_post_vad_pipeline(
|
|
| 840 |
|
| 841 |
json_output = {"segments": segments_list}
|
| 842 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 843 |
# Compute full audio URL (file written in background after render)
|
| 844 |
full_path = segment_dir / "full.wav"
|
| 845 |
full_audio_url = f"/gradio_api/file={full_path}"
|
|
@@ -858,18 +862,20 @@ def _run_post_vad_pipeline(
|
|
| 858 |
print(f"[DIAG] Before render_segments: RSS={_rss:.0f}MB, segments={len(segments)}")
|
| 859 |
|
| 860 |
t_render = time.time()
|
| 861 |
-
html = render_segments(segments, full_audio_url=full_audio_url)
|
| 862 |
print(f"[PROFILE] render_segments: {time.time() - t_render:.3f}s ({len(segments)} segments, HTML={len(html)/1e6:.2f}MB)")
|
| 863 |
|
| 864 |
-
# Write full.wav
|
| 865 |
# sf.write converts float32→PCM16 internally (no extra int16 copy in memory)
|
| 866 |
-
#
|
| 867 |
import threading
|
| 868 |
import soundfile as sf
|
| 869 |
_audio_ref = audio # prevent GC while thread runs
|
| 870 |
_sr_ref = sample_rate
|
| 871 |
_path_ref = str(full_path)
|
| 872 |
-
|
|
|
|
|
|
|
| 873 |
import os
|
| 874 |
# Diagnostics: memory + disk before write
|
| 875 |
rss_mb = -1
|
|
@@ -894,7 +900,19 @@ def _run_post_vad_pipeline(
|
|
| 894 |
print(f"[PROFILE] Full audio write (bg): {time.time() - t:.3f}s ({expected_mb:.0f}MB)")
|
| 895 |
except Exception as e:
|
| 896 |
print(f"[ERROR] Full audio write failed: {e}")
|
| 897 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 898 |
|
| 899 |
print("[STAGE] Done!")
|
| 900 |
|
|
|
|
| 840 |
|
| 841 |
json_output = {"segments": segments_list}
|
| 842 |
|
| 843 |
+
# API callers only need json_output; skip HTML render and audio file writes
|
| 844 |
+
if endpoint != "ui":
|
| 845 |
+
return "", json_output, str(segment_dir), log_row
|
| 846 |
+
|
| 847 |
# Compute full audio URL (file written in background after render)
|
| 848 |
full_path = segment_dir / "full.wav"
|
| 849 |
full_audio_url = f"/gradio_api/file={full_path}"
|
|
|
|
| 862 |
print(f"[DIAG] Before render_segments: RSS={_rss:.0f}MB, segments={len(segments)}")
|
| 863 |
|
| 864 |
t_render = time.time()
|
| 865 |
+
html = render_segments(segments, full_audio_url=full_audio_url, segment_dir=str(segment_dir))
|
| 866 |
print(f"[PROFILE] render_segments: {time.time() - t_render:.3f}s ({len(segments)} segments, HTML={len(html)/1e6:.2f}MB)")
|
| 867 |
|
| 868 |
+
# Write full.wav + per-segment WAVs in background thread
|
| 869 |
# sf.write converts float32→PCM16 internally (no extra int16 copy in memory)
|
| 870 |
+
# Files ready before user can click play (browser still rendering cards)
|
| 871 |
import threading
|
| 872 |
import soundfile as sf
|
| 873 |
_audio_ref = audio # prevent GC while thread runs
|
| 874 |
_sr_ref = sample_rate
|
| 875 |
_path_ref = str(full_path)
|
| 876 |
+
_seg_dir_ref = str(segment_dir)
|
| 877 |
+
_segments_ref = segments
|
| 878 |
+
def _write_audio_files():
|
| 879 |
import os
|
| 880 |
# Diagnostics: memory + disk before write
|
| 881 |
rss_mb = -1
|
|
|
|
| 900 |
print(f"[PROFILE] Full audio write (bg): {time.time() - t:.3f}s ({expected_mb:.0f}MB)")
|
| 901 |
except Exception as e:
|
| 902 |
print(f"[ERROR] Full audio write failed: {e}")
|
| 903 |
+
return # Can't write per-segment files without full.wav succeeding
|
| 904 |
+
# Per-segment WAVs (slices from float32 array, converted to PCM16 by soundfile)
|
| 905 |
+
t_segs = time.time()
|
| 906 |
+
try:
|
| 907 |
+
for i, seg in enumerate(_segments_ref):
|
| 908 |
+
start = int(seg.start_time * _sr_ref)
|
| 909 |
+
end = int(seg.end_time * _sr_ref)
|
| 910 |
+
sf.write(os.path.join(_seg_dir_ref, f"seg_{i}.wav"),
|
| 911 |
+
_audio_ref[start:end], _sr_ref, format='WAV', subtype='PCM_16')
|
| 912 |
+
print(f"[PROFILE] Per-segment WAVs (bg): {time.time() - t_segs:.3f}s ({len(_segments_ref)} files)")
|
| 913 |
+
except Exception as e:
|
| 914 |
+
print(f"[ERROR] Per-segment WAV write failed: {e}")
|
| 915 |
+
threading.Thread(target=_write_audio_files, daemon=True).start()
|
| 916 |
|
| 917 |
print("[STAGE] Done!")
|
| 918 |
|
src/ui/event_wiring.py
CHANGED
|
@@ -18,7 +18,7 @@ from src.ui.progress_bar import pipeline_progress_bar_html
|
|
| 18 |
from src.ui.handlers import (
|
| 19 |
wire_presets, toggle_resegment_panel,
|
| 20 |
on_mode_change, on_verse_toggle, restore_anim_settings,
|
| 21 |
-
download_url_audio,
|
| 22 |
)
|
| 23 |
|
| 24 |
_EMPTY_PLACEHOLDER = (
|
|
@@ -30,6 +30,7 @@ _EMPTY_PLACEHOLDER = (
|
|
| 30 |
def wire_events(app, c):
|
| 31 |
"""Wire all event handlers to Gradio components."""
|
| 32 |
_wire_preset_buttons(c)
|
|
|
|
| 33 |
_wire_url_input(c)
|
| 34 |
_wire_audio_input(c)
|
| 35 |
_wire_extract_chain(c)
|
|
@@ -51,50 +52,115 @@ def _wire_preset_buttons(c):
|
|
| 51 |
c.rs_silence, c.rs_speech, c.rs_pad)
|
| 52 |
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
def _wire_url_input(c):
|
| 55 |
-
"""Wire URL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def _on_download(url):
|
| 58 |
-
|
|
|
|
| 59 |
yield (
|
| 60 |
-
gr.update(),
|
| 61 |
-
gr.update(visible=False), # hide old info
|
| 62 |
gr.update(
|
| 63 |
value='<div style="text-align:center;padding:8px;">Downloading audio...</div>',
|
| 64 |
visible=True,
|
| 65 |
-
),
|
| 66 |
-
gr.update(interactive=False),
|
| 67 |
)
|
| 68 |
|
| 69 |
-
# Yield 2:
|
| 70 |
try:
|
| 71 |
wav_path, info_html = download_url_audio(url)
|
| 72 |
yield (
|
| 73 |
-
wav_path,
|
| 74 |
-
gr.update(
|
| 75 |
-
gr.update(
|
| 76 |
-
gr.update(interactive=True), # re-enable button
|
| 77 |
)
|
| 78 |
except gr.Error:
|
| 79 |
raise
|
| 80 |
except Exception as e:
|
| 81 |
yield (
|
| 82 |
gr.update(),
|
| 83 |
-
gr.update(visible=False),
|
| 84 |
gr.update(
|
| 85 |
-
value=f'<div style="color:var(--error-text-color);padding:8px;">
|
| 86 |
visible=True,
|
| 87 |
),
|
| 88 |
gr.update(interactive=True),
|
| 89 |
)
|
| 90 |
|
| 91 |
-
|
| 92 |
c.url_download_btn.click(
|
| 93 |
-
fn=_on_download, inputs=[c.url_input], outputs=
|
| 94 |
-
api_name=False, show_progress="hidden",
|
| 95 |
-
)
|
| 96 |
-
c.url_input.submit(
|
| 97 |
-
fn=_on_download, inputs=[c.url_input], outputs=_url_outputs,
|
| 98 |
api_name=False, show_progress="hidden",
|
| 99 |
)
|
| 100 |
|
|
@@ -141,13 +207,18 @@ def _wire_audio_input(c):
|
|
| 141 |
api_name=False, show_progress="hidden"
|
| 142 |
)
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
c.btn_ex_112.click(fn=lambda: ("data/112.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 145 |
c.btn_ex_84.click(fn=lambda: ("data/84.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 146 |
c.btn_ex_7.click(fn=lambda: ("data/7.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 147 |
c.btn_ex_juz30.click(fn=lambda: ("data/Juz' 30.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 148 |
|
| 149 |
-
# Reset is_preset when user uploads/records their own audio
|
| 150 |
-
c.
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
def _wire_extract_chain(c):
|
|
|
|
| 18 |
from src.ui.handlers import (
|
| 19 |
wire_presets, toggle_resegment_panel,
|
| 20 |
on_mode_change, on_verse_toggle, restore_anim_settings,
|
| 21 |
+
fetch_url_info, download_url_audio,
|
| 22 |
)
|
| 23 |
|
| 24 |
_EMPTY_PLACEHOLDER = (
|
|
|
|
| 30 |
def wire_events(app, c):
|
| 31 |
"""Wire all event handlers to Gradio components."""
|
| 32 |
_wire_preset_buttons(c)
|
| 33 |
+
_wire_input_mode_toggle(c)
|
| 34 |
_wire_url_input(c)
|
| 35 |
_wire_audio_input(c)
|
| 36 |
_wire_extract_chain(c)
|
|
|
|
| 52 |
c.rs_silence, c.rs_speech, c.rs_pad)
|
| 53 |
|
| 54 |
|
| 55 |
+
def _wire_input_mode_toggle(c):
|
| 56 |
+
"""Wire Link/Upload/Record toggle buttons."""
|
| 57 |
+
|
| 58 |
+
def _switch_to(mode):
|
| 59 |
+
is_link = mode == "Link"
|
| 60 |
+
is_upload = mode == "Upload"
|
| 61 |
+
is_record = mode == "Record"
|
| 62 |
+
return (
|
| 63 |
+
gr.update(elem_classes=["mode-active"] if is_link else []),
|
| 64 |
+
gr.update(elem_classes=["mode-active"] if is_upload else []),
|
| 65 |
+
gr.update(elem_classes=["mode-active"] if is_record else []),
|
| 66 |
+
gr.update(visible=is_link), # link_panel
|
| 67 |
+
gr.update(visible=is_upload), # upload_panel
|
| 68 |
+
gr.update(visible=is_record), # record_panel
|
| 69 |
+
gr.update(visible=not is_link), # example_row
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
_toggle_outputs = [
|
| 73 |
+
c.mode_link, c.mode_upload, c.mode_record,
|
| 74 |
+
c.link_panel, c.upload_panel, c.record_panel,
|
| 75 |
+
c.example_row,
|
| 76 |
+
]
|
| 77 |
+
c.mode_link.click(fn=lambda: _switch_to("Link"), inputs=[], outputs=_toggle_outputs, api_name=False)
|
| 78 |
+
c.mode_upload.click(fn=lambda: _switch_to("Upload"), inputs=[], outputs=_toggle_outputs, api_name=False)
|
| 79 |
+
c.mode_record.click(fn=lambda: _switch_to("Record"), inputs=[], outputs=_toggle_outputs, api_name=False)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
def _wire_url_input(c):
|
| 83 |
+
"""Wire URL paste → auto-fetch metadata → download button."""
|
| 84 |
+
|
| 85 |
+
def _on_url_change(url):
|
| 86 |
+
"""Auto-fetch metadata when a URL is pasted."""
|
| 87 |
+
if not url or not url.strip():
|
| 88 |
+
return (
|
| 89 |
+
gr.update(visible=False), # url_info_html
|
| 90 |
+
gr.update(visible=False), # url_status
|
| 91 |
+
gr.update(visible=False), # url_download_btn
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Show fetching status
|
| 95 |
+
yield (
|
| 96 |
+
gr.update(visible=False),
|
| 97 |
+
gr.update(value='<div style="text-align:center;padding:8px;opacity:0.7;">Fetching info...</div>', visible=True),
|
| 98 |
+
gr.update(visible=False),
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
info_html = fetch_url_info(url)
|
| 103 |
+
if info_html is None:
|
| 104 |
+
yield (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
|
| 105 |
+
return
|
| 106 |
+
yield (
|
| 107 |
+
gr.update(value=info_html, visible=True),
|
| 108 |
+
gr.update(visible=False),
|
| 109 |
+
gr.update(visible=True), # show Download button
|
| 110 |
+
)
|
| 111 |
+
except gr.Error:
|
| 112 |
+
raise
|
| 113 |
+
except Exception as e:
|
| 114 |
+
yield (
|
| 115 |
+
gr.update(visible=False),
|
| 116 |
+
gr.update(
|
| 117 |
+
value=f'<div style="color:var(--error-text-color);padding:8px;">Error: {str(e)[:200]}</div>',
|
| 118 |
+
visible=True,
|
| 119 |
+
),
|
| 120 |
+
gr.update(visible=False),
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
_fetch_outputs = [c.url_info_html, c.url_status, c.url_download_btn]
|
| 124 |
+
c.url_input.change(
|
| 125 |
+
fn=_on_url_change, inputs=[c.url_input], outputs=_fetch_outputs,
|
| 126 |
+
api_name=False, show_progress="hidden",
|
| 127 |
+
)
|
| 128 |
|
| 129 |
def _on_download(url):
|
| 130 |
+
"""Download audio after metadata was fetched."""
|
| 131 |
+
# Yield 1: loading state
|
| 132 |
yield (
|
| 133 |
+
gr.update(), # audio_input
|
|
|
|
| 134 |
gr.update(
|
| 135 |
value='<div style="text-align:center;padding:8px;">Downloading audio...</div>',
|
| 136 |
visible=True,
|
| 137 |
+
), # url_status
|
| 138 |
+
gr.update(interactive=False), # disable download btn
|
| 139 |
)
|
| 140 |
|
| 141 |
+
# Yield 2: result
|
| 142 |
try:
|
| 143 |
wav_path, info_html = download_url_audio(url)
|
| 144 |
yield (
|
| 145 |
+
wav_path,
|
| 146 |
+
gr.update(visible=False),
|
| 147 |
+
gr.update(interactive=True),
|
|
|
|
| 148 |
)
|
| 149 |
except gr.Error:
|
| 150 |
raise
|
| 151 |
except Exception as e:
|
| 152 |
yield (
|
| 153 |
gr.update(),
|
|
|
|
| 154 |
gr.update(
|
| 155 |
+
value=f'<div style="color:var(--error-text-color);padding:8px;">Download failed: {str(e)[:200]}</div>',
|
| 156 |
visible=True,
|
| 157 |
),
|
| 158 |
gr.update(interactive=True),
|
| 159 |
)
|
| 160 |
|
| 161 |
+
_dl_outputs = [c.audio_input, c.url_status, c.url_download_btn]
|
| 162 |
c.url_download_btn.click(
|
| 163 |
+
fn=_on_download, inputs=[c.url_input], outputs=_dl_outputs,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
api_name=False, show_progress="hidden",
|
| 165 |
)
|
| 166 |
|
|
|
|
| 207 |
api_name=False, show_progress="hidden"
|
| 208 |
)
|
| 209 |
|
| 210 |
+
# Bridge upload/record to hidden unified audio_input
|
| 211 |
+
c.audio_upload.change(fn=lambda x: x, inputs=[c.audio_upload], outputs=[c.audio_input], api_name=False, show_progress="hidden")
|
| 212 |
+
c.audio_record.change(fn=lambda x: x, inputs=[c.audio_record], outputs=[c.audio_input], api_name=False, show_progress="hidden")
|
| 213 |
+
|
| 214 |
c.btn_ex_112.click(fn=lambda: ("data/112.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 215 |
c.btn_ex_84.click(fn=lambda: ("data/84.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 216 |
c.btn_ex_7.click(fn=lambda: ("data/7.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 217 |
c.btn_ex_juz30.click(fn=lambda: ("data/Juz' 30.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
|
| 218 |
|
| 219 |
+
# Reset is_preset when user uploads/records their own audio
|
| 220 |
+
c.audio_upload.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
|
| 221 |
+
c.audio_record.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
|
| 222 |
|
| 223 |
|
| 224 |
def _wire_extract_chain(c):
|
src/ui/handlers.py
CHANGED
|
@@ -20,8 +20,53 @@ from config import (
|
|
| 20 |
)
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def download_url_audio(url: str):
|
| 24 |
-
"""
|
| 25 |
import yt_dlp
|
| 26 |
|
| 27 |
if not url or not url.strip():
|
|
@@ -29,7 +74,6 @@ def download_url_audio(url: str):
|
|
| 29 |
|
| 30 |
url = url.strip()
|
| 31 |
|
| 32 |
-
# Download audio as WAV (single extract_info call so PO token plugin can intercept)
|
| 33 |
URL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
| 34 |
out_path = URL_DOWNLOAD_DIR / str(uuid.uuid4())
|
| 35 |
|
|
@@ -49,33 +93,15 @@ def download_url_audio(url: str):
|
|
| 49 |
except Exception as e:
|
| 50 |
raise gr.Error(f"Download failed: {str(e)[:200]}")
|
| 51 |
|
| 52 |
-
if info.get("_type") == "playlist":
|
| 53 |
-
raise gr.Error("Playlists are not supported. Please paste a single video/audio URL.")
|
| 54 |
-
|
| 55 |
-
duration = info.get("duration")
|
| 56 |
-
title = info.get("title", "Unknown")
|
| 57 |
-
thumbnail = info.get("thumbnail", "")
|
| 58 |
-
|
| 59 |
wav_path = str(out_path) + ".wav"
|
| 60 |
if not Path(wav_path).exists():
|
| 61 |
raise gr.Error("Download completed but audio file was not created.")
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
f'<img src="{thumbnail}" style="max-width:100%;max-height:120px;border-radius:8px;margin-bottom:4px;">'
|
| 67 |
-
if thumbnail else ""
|
| 68 |
-
)
|
| 69 |
-
info_html = (
|
| 70 |
-
f'<div style="padding:8px;border-radius:8px;background:var(--block-background-fill);'
|
| 71 |
-
f'border:1px solid var(--border-color-primary);">'
|
| 72 |
-
f'{thumb_html}'
|
| 73 |
-
f'<div style="font-weight:bold;font-size:14px;">{title}</div>'
|
| 74 |
-
f'<div style="font-size:12px;opacity:0.7;">Duration: {dur_str}</div>'
|
| 75 |
-
f'</div>'
|
| 76 |
-
)
|
| 77 |
|
| 78 |
-
return wav_path,
|
| 79 |
|
| 80 |
|
| 81 |
def create_segmentation_settings(id_suffix=""):
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
|
| 23 |
+
def _build_info_html(title, duration, thumbnail):
|
| 24 |
+
"""Build HTML info card for a URL-sourced audio."""
|
| 25 |
+
dur_str = f"{int(duration) // 60}:{int(duration) % 60:02d}" if duration else "unknown"
|
| 26 |
+
thumb_html = (
|
| 27 |
+
f'<img src="{thumbnail}" style="max-width:100%;max-height:120px;border-radius:8px;margin-bottom:4px;">'
|
| 28 |
+
if thumbnail else ""
|
| 29 |
+
)
|
| 30 |
+
return (
|
| 31 |
+
f'<div style="padding:8px;border-radius:8px;background:var(--block-background-fill);'
|
| 32 |
+
f'border:1px solid var(--border-color-primary);">'
|
| 33 |
+
f'{thumb_html}'
|
| 34 |
+
f'<div style="font-weight:bold;font-size:14px;">{title}</div>'
|
| 35 |
+
f'<div style="font-size:12px;opacity:0.7;">Duration: {dur_str}</div>'
|
| 36 |
+
f'</div>'
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def fetch_url_info(url: str):
|
| 41 |
+
"""Fetch metadata only (no download). Returns info_html or raises gr.Error."""
|
| 42 |
+
import yt_dlp
|
| 43 |
+
|
| 44 |
+
if not url or not url.strip():
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
url = url.strip()
|
| 48 |
+
|
| 49 |
+
with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
|
| 50 |
+
try:
|
| 51 |
+
info = ydl.extract_info(url, download=False)
|
| 52 |
+
except yt_dlp.utils.DownloadError as e:
|
| 53 |
+
raise gr.Error(f"Could not fetch URL: {str(e)[:200]}")
|
| 54 |
+
|
| 55 |
+
if info.get("_type") == "playlist":
|
| 56 |
+
raise gr.Error("Playlists are not supported. Please paste a single video/audio URL.")
|
| 57 |
+
|
| 58 |
+
duration = info.get("duration")
|
| 59 |
+
if duration is None:
|
| 60 |
+
raise gr.Error("Live streams are not supported. Please use a completed video/audio.")
|
| 61 |
+
|
| 62 |
+
title = info.get("title", "Unknown")
|
| 63 |
+
thumbnail = info.get("thumbnail", "")
|
| 64 |
+
|
| 65 |
+
return _build_info_html(title, duration, thumbnail)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
def download_url_audio(url: str):
|
| 69 |
+
"""Full download of audio from URL. Returns (wav_path, info_html)."""
|
| 70 |
import yt_dlp
|
| 71 |
|
| 72 |
if not url or not url.strip():
|
|
|
|
| 74 |
|
| 75 |
url = url.strip()
|
| 76 |
|
|
|
|
| 77 |
URL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
| 78 |
out_path = URL_DOWNLOAD_DIR / str(uuid.uuid4())
|
| 79 |
|
|
|
|
| 93 |
except Exception as e:
|
| 94 |
raise gr.Error(f"Download failed: {str(e)[:200]}")
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
wav_path = str(out_path) + ".wav"
|
| 97 |
if not Path(wav_path).exists():
|
| 98 |
raise gr.Error("Download completed but audio file was not created.")
|
| 99 |
|
| 100 |
+
title = info.get("title", "Unknown")
|
| 101 |
+
duration = info.get("duration")
|
| 102 |
+
thumbnail = info.get("thumbnail", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
return wav_path, _build_info_html(title, duration, thumbnail)
|
| 105 |
|
| 106 |
|
| 107 |
def create_segmentation_settings(id_suffix=""):
|
src/ui/interface.py
CHANGED
|
@@ -20,6 +20,7 @@ from config import (
|
|
| 20 |
MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
|
| 21 |
MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
|
| 22 |
LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
|
|
|
|
| 23 |
)
|
| 24 |
from src.ui.styles import build_css
|
| 25 |
from src.ui.js_config import build_js_head
|
|
@@ -101,24 +102,48 @@ def build_interface():
|
|
| 101 |
def _build_left_column(c):
|
| 102 |
"""Build the left input column."""
|
| 103 |
with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
c.url_input = gr.Textbox(
|
| 106 |
-
label="
|
| 107 |
-
placeholder="
|
| 108 |
lines=1,
|
| 109 |
)
|
| 110 |
-
c.url_download_btn = gr.Button("Download Audio", size="sm", variant="secondary")
|
| 111 |
c.url_status = gr.HTML(value="", visible=False)
|
| 112 |
c.url_info_html = gr.HTML(value="", visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
sources=["upload", "
|
| 117 |
-
type="filepath"
|
| 118 |
-
)
|
| 119 |
|
| 120 |
-
#
|
| 121 |
-
with gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
c.btn_ex_112 = gr.Button("112", size="sm", min_width=0)
|
| 123 |
c.btn_ex_84 = gr.Button("84", size="sm", min_width=0)
|
| 124 |
c.btn_ex_7 = gr.Button("7", size="sm", min_width=0)
|
|
|
|
| 20 |
MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
|
| 21 |
MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
|
| 22 |
LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
|
| 23 |
+
DEFAULT_INPUT_MODE,
|
| 24 |
)
|
| 25 |
from src.ui.styles import build_css
|
| 26 |
from src.ui.js_config import build_js_head
|
|
|
|
| 102 |
def _build_left_column(c):
|
| 103 |
"""Build the left input column."""
|
| 104 |
with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
|
| 105 |
+
_is_link = DEFAULT_INPUT_MODE == "Link"
|
| 106 |
+
_is_upload = DEFAULT_INPUT_MODE == "Upload"
|
| 107 |
+
_is_record = DEFAULT_INPUT_MODE == "Record"
|
| 108 |
+
|
| 109 |
+
# Input mode toggle
|
| 110 |
+
with gr.Row(elem_id="input-mode-row"):
|
| 111 |
+
c.mode_link = gr.Button("Link", size="sm", min_width=0,
|
| 112 |
+
elem_classes=["mode-active"] if _is_link else [])
|
| 113 |
+
c.mode_upload = gr.Button("Upload", size="sm", min_width=0,
|
| 114 |
+
elem_classes=["mode-active"] if _is_upload else [])
|
| 115 |
+
c.mode_record = gr.Button("Record", size="sm", min_width=0,
|
| 116 |
+
elem_classes=["mode-active"] if _is_record else [])
|
| 117 |
+
|
| 118 |
+
# Link panel
|
| 119 |
+
with gr.Group(visible=_is_link, elem_id="link-panel") as c.link_panel:
|
| 120 |
c.url_input = gr.Textbox(
|
| 121 |
+
label="Paste a link",
|
| 122 |
+
placeholder="TikTok, SoundCloud, Archive.org, or direct audio link",
|
| 123 |
lines=1,
|
| 124 |
)
|
|
|
|
| 125 |
c.url_status = gr.HTML(value="", visible=False)
|
| 126 |
c.url_info_html = gr.HTML(value="", visible=False)
|
| 127 |
+
c.url_download_btn = gr.Button("Download", size="sm", variant="primary", visible=False)
|
| 128 |
+
gr.Markdown(
|
| 129 |
+
"Supports [1800+ sites](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"
|
| 130 |
+
" — TikTok, SoundCloud, Archive.org, direct links, and more",
|
| 131 |
+
elem_id="url-help",
|
| 132 |
+
)
|
| 133 |
|
| 134 |
+
# Upload panel
|
| 135 |
+
with gr.Group(visible=_is_upload, elem_id="upload-panel") as c.upload_panel:
|
| 136 |
+
c.audio_upload = gr.Audio(label="Upload Recitation", sources=["upload"], type="filepath")
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
# Record panel
|
| 139 |
+
with gr.Group(visible=_is_record, elem_id="record-panel") as c.record_panel:
|
| 140 |
+
c.audio_record = gr.Audio(label="Record Recitation", sources=["microphone"], type="filepath")
|
| 141 |
+
|
| 142 |
+
# Hidden unified audio (fed by upload, record, or URL download)
|
| 143 |
+
c.audio_input = gr.Audio(visible=False, type="filepath")
|
| 144 |
+
|
| 145 |
+
# Example audio files (hidden in Link mode)
|
| 146 |
+
with gr.Row(visible=not _is_link, elem_id="example-row") as c.example_row:
|
| 147 |
c.btn_ex_112 = gr.Button("112", size="sm", min_width=0)
|
| 148 |
c.btn_ex_84 = gr.Button("84", size="sm", min_width=0)
|
| 149 |
c.btn_ex_7 = gr.Button("7", size="sm", min_width=0)
|
src/ui/segments.py
CHANGED
|
@@ -213,7 +213,7 @@ def simplify_ref(ref: str) -> str:
|
|
| 213 |
return ref
|
| 214 |
|
| 215 |
|
| 216 |
-
def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", render_key: str = "") -> str:
|
| 217 |
"""Render a single segment as an HTML card with optional audio player."""
|
| 218 |
is_special = seg.matched_ref in ALL_SPECIAL_REFS
|
| 219 |
confidence_class = get_confidence_class(seg.match_score)
|
|
@@ -249,10 +249,13 @@ def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", re
|
|
| 249 |
if seg.error:
|
| 250 |
error_html = f'<div class="segment-error">{seg.error}</div>'
|
| 251 |
|
| 252 |
-
# Audio player HTML —
|
| 253 |
audio_html = ""
|
| 254 |
-
if full_audio_url:
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
| 256 |
# Add animate button only if segment has a Quran verse ref (word spans for animation).
|
| 257 |
# Basmala/Isti'adha get animate because they have indexed word spans for MFA.
|
| 258 |
# Transition segments (Amin, Takbir, Tahmeed) don't.
|
|
@@ -350,12 +353,13 @@ def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", re
|
|
| 350 |
return html
|
| 351 |
|
| 352 |
|
| 353 |
-
def render_segments(segments: list, full_audio_url: str = "") -> str:
|
| 354 |
"""Render all segments as HTML with optional audio players.
|
| 355 |
|
| 356 |
Args:
|
| 357 |
segments: List of SegmentInfo objects
|
| 358 |
-
full_audio_url: URL to full audio WAV (
|
|
|
|
| 359 |
"""
|
| 360 |
if not segments:
|
| 361 |
return '<div class="no-segments">No segments detected</div>'
|
|
@@ -443,7 +447,7 @@ def render_segments(segments: list, full_audio_url: str = "") -> str:
|
|
| 443 |
|
| 444 |
t_cards = time.time()
|
| 445 |
for idx, seg in enumerate(segments):
|
| 446 |
-
html_parts.append(render_segment_card(seg, idx, full_audio_url, render_key))
|
| 447 |
|
| 448 |
html_parts.append('</div>')
|
| 449 |
print(f"[PROFILE] Segment cards: {time.time() - t_cards:.3f}s ({len(segments)} cards, HTML only)")
|
|
|
|
| 213 |
return ref
|
| 214 |
|
| 215 |
|
| 216 |
+
def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", render_key: str = "", segment_dir: str = "") -> str:
|
| 217 |
"""Render a single segment as an HTML card with optional audio player."""
|
| 218 |
is_special = seg.matched_ref in ALL_SPECIAL_REFS
|
| 219 |
confidence_class = get_confidence_class(seg.match_score)
|
|
|
|
| 249 |
if seg.error:
|
| 250 |
error_html = f'<div class="segment-error">{seg.error}</div>'
|
| 251 |
|
| 252 |
+
# Audio player HTML — per-segment WAV (preferred) or media fragment fallback
|
| 253 |
audio_html = ""
|
| 254 |
+
if segment_dir or full_audio_url:
|
| 255 |
+
if segment_dir:
|
| 256 |
+
audio_src = f"/gradio_api/file={segment_dir}/seg_{idx}.wav"
|
| 257 |
+
else:
|
| 258 |
+
audio_src = f"{full_audio_url}#t={seg.start_time:.3f},{seg.end_time:.3f}"
|
| 259 |
# Add animate button only if segment has a Quran verse ref (word spans for animation).
|
| 260 |
# Basmala/Isti'adha get animate because they have indexed word spans for MFA.
|
| 261 |
# Transition segments (Amin, Takbir, Tahmeed) don't.
|
|
|
|
| 353 |
return html
|
| 354 |
|
| 355 |
|
| 356 |
+
def render_segments(segments: list, full_audio_url: str = "", segment_dir: str = "") -> str:
|
| 357 |
"""Render all segments as HTML with optional audio players.
|
| 358 |
|
| 359 |
Args:
|
| 360 |
segments: List of SegmentInfo objects
|
| 361 |
+
full_audio_url: URL to full audio WAV (used by mega card / Animate All)
|
| 362 |
+
segment_dir: Path to segment directory containing per-segment WAV files
|
| 363 |
"""
|
| 364 |
if not segments:
|
| 365 |
return '<div class="no-segments">No segments detected</div>'
|
|
|
|
| 447 |
|
| 448 |
t_cards = time.time()
|
| 449 |
for idx, seg in enumerate(segments):
|
| 450 |
+
html_parts.append(render_segment_card(seg, idx, full_audio_url, render_key, segment_dir))
|
| 451 |
|
| 452 |
html_parts.append('</div>')
|
| 453 |
print(f"[PROFILE] Segment cards: {time.time() - t_cards:.3f}s ({len(segments)} cards, HTML only)")
|
src/ui/styles.py
CHANGED
|
@@ -432,4 +432,21 @@ def build_css() -> str:
|
|
| 432 |
.dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
|
| 433 |
.dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
|
| 434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
"""
|
|
|
|
| 432 |
.dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
|
| 433 |
.dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
|
| 434 |
|
| 435 |
+
/* Input mode toggle */
|
| 436 |
+
#input-mode-row {{ gap: 0 !important; }}
|
| 437 |
+
#input-mode-row button {{
|
| 438 |
+
border-radius: 0 !important;
|
| 439 |
+
border: 1px solid var(--border-color-primary) !important;
|
| 440 |
+
}}
|
| 441 |
+
#input-mode-row button:first-child {{ border-radius: 8px 0 0 8px !important; }}
|
| 442 |
+
#input-mode-row button:last-child {{ border-radius: 0 8px 8px 0 !important; }}
|
| 443 |
+
#input-mode-row button:not(:first-child) {{ border-left: none !important; }}
|
| 444 |
+
.mode-active {{
|
| 445 |
+
background: var(--button-primary-background-fill) !important;
|
| 446 |
+
color: var(--button-primary-text-color) !important;
|
| 447 |
+
border-color: var(--button-primary-background-fill) !important;
|
| 448 |
+
}}
|
| 449 |
+
#url-help {{ font-size: 12px; opacity: 0.7; margin-top: -8px; }}
|
| 450 |
+
#url-help a {{ color: var(--link-text-color); }}
|
| 451 |
+
|
| 452 |
"""
|