hetchyy commited on
Commit
1fc019e
·
verified ·
1 Parent(s): 7ac4996

feat: redesign audio input as Link/Upload/Record toggle;perf: use per-segment WAVs and skip audio I/O for API calls

Browse files
config.py CHANGED
@@ -21,6 +21,7 @@ PORT = 6902
21
  RESAMPLE_TYPE = "soxr_lq"
22
  SEGMENT_AUDIO_DIR = Path("/tmp/segments") # WAV files written here per request
23
  URL_DOWNLOAD_DIR = Path("/tmp/url_downloads") # Audio downloaded from URLs via yt-dlp
 
24
  DELETE_CACHE_FREQUENCY = 3600*5 # Gradio cache cleanup interval (seconds)
25
  DELETE_CACHE_AGE = 3600*5 # Delete cached files older than this (seconds)
26
 
 
21
  RESAMPLE_TYPE = "soxr_lq"
22
  SEGMENT_AUDIO_DIR = Path("/tmp/segments") # WAV files written here per request
23
  URL_DOWNLOAD_DIR = Path("/tmp/url_downloads") # Audio downloaded from URLs via yt-dlp
24
+ DEFAULT_INPUT_MODE = "Upload" # "Link", "Upload", or "Record"
25
  DELETE_CACHE_FREQUENCY = 3600*5 # Gradio cache cleanup interval (seconds)
26
  DELETE_CACHE_AGE = 3600*5 # Delete cached files older than this (seconds)
27
 
src/pipeline.py CHANGED
@@ -840,6 +840,10 @@ def _run_post_vad_pipeline(
840
 
841
  json_output = {"segments": segments_list}
842
 
 
 
 
 
843
  # Compute full audio URL (file written in background after render)
844
  full_path = segment_dir / "full.wav"
845
  full_audio_url = f"/gradio_api/file={full_path}"
@@ -858,18 +862,20 @@ def _run_post_vad_pipeline(
858
  print(f"[DIAG] Before render_segments: RSS={_rss:.0f}MB, segments={len(segments)}")
859
 
860
  t_render = time.time()
861
- html = render_segments(segments, full_audio_url=full_audio_url)
862
  print(f"[PROFILE] render_segments: {time.time() - t_render:.3f}s ({len(segments)} segments, HTML={len(html)/1e6:.2f}MB)")
863
 
864
- # Write full.wav in background thread from float32 audio
865
  # sf.write converts float32→PCM16 internally (no extra int16 copy in memory)
866
- # File ready before user can click play (browser still rendering cards)
867
  import threading
868
  import soundfile as sf
869
  _audio_ref = audio # prevent GC while thread runs
870
  _sr_ref = sample_rate
871
  _path_ref = str(full_path)
872
- def _write_full_wav():
 
 
873
  import os
874
  # Diagnostics: memory + disk before write
875
  rss_mb = -1
@@ -894,7 +900,19 @@ def _run_post_vad_pipeline(
894
  print(f"[PROFILE] Full audio write (bg): {time.time() - t:.3f}s ({expected_mb:.0f}MB)")
895
  except Exception as e:
896
  print(f"[ERROR] Full audio write failed: {e}")
897
- threading.Thread(target=_write_full_wav, daemon=True).start()
 
 
 
 
 
 
 
 
 
 
 
 
898
 
899
  print("[STAGE] Done!")
900
 
 
840
 
841
  json_output = {"segments": segments_list}
842
 
843
+ # API callers only need json_output; skip HTML render and audio file writes
844
+ if endpoint != "ui":
845
+ return "", json_output, str(segment_dir), log_row
846
+
847
  # Compute full audio URL (file written in background after render)
848
  full_path = segment_dir / "full.wav"
849
  full_audio_url = f"/gradio_api/file={full_path}"
 
862
  print(f"[DIAG] Before render_segments: RSS={_rss:.0f}MB, segments={len(segments)}")
863
 
864
  t_render = time.time()
865
+ html = render_segments(segments, full_audio_url=full_audio_url, segment_dir=str(segment_dir))
866
  print(f"[PROFILE] render_segments: {time.time() - t_render:.3f}s ({len(segments)} segments, HTML={len(html)/1e6:.2f}MB)")
867
 
868
+ # Write full.wav + per-segment WAVs in background thread
869
  # sf.write converts float32→PCM16 internally (no extra int16 copy in memory)
870
+ # Files ready before user can click play (browser still rendering cards)
871
  import threading
872
  import soundfile as sf
873
  _audio_ref = audio # prevent GC while thread runs
874
  _sr_ref = sample_rate
875
  _path_ref = str(full_path)
876
+ _seg_dir_ref = str(segment_dir)
877
+ _segments_ref = segments
878
+ def _write_audio_files():
879
  import os
880
  # Diagnostics: memory + disk before write
881
  rss_mb = -1
 
900
  print(f"[PROFILE] Full audio write (bg): {time.time() - t:.3f}s ({expected_mb:.0f}MB)")
901
  except Exception as e:
902
  print(f"[ERROR] Full audio write failed: {e}")
903
+ return # Can't write per-segment files without full.wav succeeding
904
+ # Per-segment WAVs (slices from float32 array, converted to PCM16 by soundfile)
905
+ t_segs = time.time()
906
+ try:
907
+ for i, seg in enumerate(_segments_ref):
908
+ start = int(seg.start_time * _sr_ref)
909
+ end = int(seg.end_time * _sr_ref)
910
+ sf.write(os.path.join(_seg_dir_ref, f"seg_{i}.wav"),
911
+ _audio_ref[start:end], _sr_ref, format='WAV', subtype='PCM_16')
912
+ print(f"[PROFILE] Per-segment WAVs (bg): {time.time() - t_segs:.3f}s ({len(_segments_ref)} files)")
913
+ except Exception as e:
914
+ print(f"[ERROR] Per-segment WAV write failed: {e}")
915
+ threading.Thread(target=_write_audio_files, daemon=True).start()
916
 
917
  print("[STAGE] Done!")
918
 
src/ui/event_wiring.py CHANGED
@@ -18,7 +18,7 @@ from src.ui.progress_bar import pipeline_progress_bar_html
18
  from src.ui.handlers import (
19
  wire_presets, toggle_resegment_panel,
20
  on_mode_change, on_verse_toggle, restore_anim_settings,
21
- download_url_audio,
22
  )
23
 
24
  _EMPTY_PLACEHOLDER = (
@@ -30,6 +30,7 @@ _EMPTY_PLACEHOLDER = (
30
  def wire_events(app, c):
31
  """Wire all event handlers to Gradio components."""
32
  _wire_preset_buttons(c)
 
33
  _wire_url_input(c)
34
  _wire_audio_input(c)
35
  _wire_extract_chain(c)
@@ -51,50 +52,115 @@ def _wire_preset_buttons(c):
51
  c.rs_silence, c.rs_speech, c.rs_pad)
52
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def _wire_url_input(c):
55
- """Wire URL textboxyt-dlp downloadpopulate audio component."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def _on_download(url):
58
- # Yield 1: show loading state
 
59
  yield (
60
- gr.update(), # audio_input unchanged
61
- gr.update(visible=False), # hide old info
62
  gr.update(
63
  value='<div style="text-align:center;padding:8px;">Downloading audio...</div>',
64
  visible=True,
65
- ), # url_status
66
- gr.update(interactive=False), # disable button
67
  )
68
 
69
- # Yield 2: download result
70
  try:
71
  wav_path, info_html = download_url_audio(url)
72
  yield (
73
- wav_path, # set audio_input
74
- gr.update(value=info_html, visible=True), # url_info_html
75
- gr.update(visible=False), # hide status
76
- gr.update(interactive=True), # re-enable button
77
  )
78
  except gr.Error:
79
  raise
80
  except Exception as e:
81
  yield (
82
  gr.update(),
83
- gr.update(visible=False),
84
  gr.update(
85
- value=f'<div style="color:var(--error-text-color);padding:8px;">Error: {str(e)[:200]}</div>',
86
  visible=True,
87
  ),
88
  gr.update(interactive=True),
89
  )
90
 
91
- _url_outputs = [c.audio_input, c.url_info_html, c.url_status, c.url_download_btn]
92
  c.url_download_btn.click(
93
- fn=_on_download, inputs=[c.url_input], outputs=_url_outputs,
94
- api_name=False, show_progress="hidden",
95
- )
96
- c.url_input.submit(
97
- fn=_on_download, inputs=[c.url_input], outputs=_url_outputs,
98
  api_name=False, show_progress="hidden",
99
  )
100
 
@@ -141,13 +207,18 @@ def _wire_audio_input(c):
141
  api_name=False, show_progress="hidden"
142
  )
143
 
 
 
 
 
144
  c.btn_ex_112.click(fn=lambda: ("data/112.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
145
  c.btn_ex_84.click(fn=lambda: ("data/84.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
146
  c.btn_ex_7.click(fn=lambda: ("data/7.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
147
  c.btn_ex_juz30.click(fn=lambda: ("data/Juz' 30.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
148
 
149
- # Reset is_preset when user uploads/records their own audio (.input fires only on user interaction, not programmatic changes)
150
- c.audio_input.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
 
151
 
152
 
153
  def _wire_extract_chain(c):
 
18
  from src.ui.handlers import (
19
  wire_presets, toggle_resegment_panel,
20
  on_mode_change, on_verse_toggle, restore_anim_settings,
21
+ fetch_url_info, download_url_audio,
22
  )
23
 
24
  _EMPTY_PLACEHOLDER = (
 
30
  def wire_events(app, c):
31
  """Wire all event handlers to Gradio components."""
32
  _wire_preset_buttons(c)
33
+ _wire_input_mode_toggle(c)
34
  _wire_url_input(c)
35
  _wire_audio_input(c)
36
  _wire_extract_chain(c)
 
52
  c.rs_silence, c.rs_speech, c.rs_pad)
53
 
54
 
55
+ def _wire_input_mode_toggle(c):
56
+ """Wire Link/Upload/Record toggle buttons."""
57
+
58
+ def _switch_to(mode):
59
+ is_link = mode == "Link"
60
+ is_upload = mode == "Upload"
61
+ is_record = mode == "Record"
62
+ return (
63
+ gr.update(elem_classes=["mode-active"] if is_link else []),
64
+ gr.update(elem_classes=["mode-active"] if is_upload else []),
65
+ gr.update(elem_classes=["mode-active"] if is_record else []),
66
+ gr.update(visible=is_link), # link_panel
67
+ gr.update(visible=is_upload), # upload_panel
68
+ gr.update(visible=is_record), # record_panel
69
+ gr.update(visible=not is_link), # example_row
70
+ )
71
+
72
+ _toggle_outputs = [
73
+ c.mode_link, c.mode_upload, c.mode_record,
74
+ c.link_panel, c.upload_panel, c.record_panel,
75
+ c.example_row,
76
+ ]
77
+ c.mode_link.click(fn=lambda: _switch_to("Link"), inputs=[], outputs=_toggle_outputs, api_name=False)
78
+ c.mode_upload.click(fn=lambda: _switch_to("Upload"), inputs=[], outputs=_toggle_outputs, api_name=False)
79
+ c.mode_record.click(fn=lambda: _switch_to("Record"), inputs=[], outputs=_toggle_outputs, api_name=False)
80
+
81
+
82
  def _wire_url_input(c):
83
+ """Wire URL pasteauto-fetch metadatadownload button."""
84
+
85
+ def _on_url_change(url):
86
+ """Auto-fetch metadata when a URL is pasted."""
87
+ if not url or not url.strip():
88
+ return (
89
+ gr.update(visible=False), # url_info_html
90
+ gr.update(visible=False), # url_status
91
+ gr.update(visible=False), # url_download_btn
92
+ )
93
+
94
+ # Show fetching status
95
+ yield (
96
+ gr.update(visible=False),
97
+ gr.update(value='<div style="text-align:center;padding:8px;opacity:0.7;">Fetching info...</div>', visible=True),
98
+ gr.update(visible=False),
99
+ )
100
+
101
+ try:
102
+ info_html = fetch_url_info(url)
103
+ if info_html is None:
104
+ yield (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
105
+ return
106
+ yield (
107
+ gr.update(value=info_html, visible=True),
108
+ gr.update(visible=False),
109
+ gr.update(visible=True), # show Download button
110
+ )
111
+ except gr.Error:
112
+ raise
113
+ except Exception as e:
114
+ yield (
115
+ gr.update(visible=False),
116
+ gr.update(
117
+ value=f'<div style="color:var(--error-text-color);padding:8px;">Error: {str(e)[:200]}</div>',
118
+ visible=True,
119
+ ),
120
+ gr.update(visible=False),
121
+ )
122
+
123
+ _fetch_outputs = [c.url_info_html, c.url_status, c.url_download_btn]
124
+ c.url_input.change(
125
+ fn=_on_url_change, inputs=[c.url_input], outputs=_fetch_outputs,
126
+ api_name=False, show_progress="hidden",
127
+ )
128
 
129
  def _on_download(url):
130
+ """Download audio after metadata was fetched."""
131
+ # Yield 1: loading state
132
  yield (
133
+ gr.update(), # audio_input
 
134
  gr.update(
135
  value='<div style="text-align:center;padding:8px;">Downloading audio...</div>',
136
  visible=True,
137
+ ), # url_status
138
+ gr.update(interactive=False), # disable download btn
139
  )
140
 
141
+ # Yield 2: result
142
  try:
143
  wav_path, info_html = download_url_audio(url)
144
  yield (
145
+ wav_path,
146
+ gr.update(visible=False),
147
+ gr.update(interactive=True),
 
148
  )
149
  except gr.Error:
150
  raise
151
  except Exception as e:
152
  yield (
153
  gr.update(),
 
154
  gr.update(
155
+ value=f'<div style="color:var(--error-text-color);padding:8px;">Download failed: {str(e)[:200]}</div>',
156
  visible=True,
157
  ),
158
  gr.update(interactive=True),
159
  )
160
 
161
+ _dl_outputs = [c.audio_input, c.url_status, c.url_download_btn]
162
  c.url_download_btn.click(
163
+ fn=_on_download, inputs=[c.url_input], outputs=_dl_outputs,
 
 
 
 
164
  api_name=False, show_progress="hidden",
165
  )
166
 
 
207
  api_name=False, show_progress="hidden"
208
  )
209
 
210
+ # Bridge upload/record to hidden unified audio_input
211
+ c.audio_upload.change(fn=lambda x: x, inputs=[c.audio_upload], outputs=[c.audio_input], api_name=False, show_progress="hidden")
212
+ c.audio_record.change(fn=lambda x: x, inputs=[c.audio_record], outputs=[c.audio_input], api_name=False, show_progress="hidden")
213
+
214
  c.btn_ex_112.click(fn=lambda: ("data/112.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
215
  c.btn_ex_84.click(fn=lambda: ("data/84.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
216
  c.btn_ex_7.click(fn=lambda: ("data/7.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
217
  c.btn_ex_juz30.click(fn=lambda: ("data/Juz' 30.mp3", "GPU", True), inputs=[], outputs=[c.audio_input, c.device_radio, c.is_preset], api_name=False)
218
 
219
+ # Reset is_preset when user uploads/records their own audio
220
+ c.audio_upload.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
221
+ c.audio_record.input(fn=lambda: False, inputs=[], outputs=[c.is_preset], api_name=False, show_progress="hidden")
222
 
223
 
224
  def _wire_extract_chain(c):
src/ui/handlers.py CHANGED
@@ -20,8 +20,53 @@ from config import (
20
  )
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def download_url_audio(url: str):
24
- """Download audio from a URL using yt-dlp. Returns (wav_path, info_html)."""
25
  import yt_dlp
26
 
27
  if not url or not url.strip():
@@ -29,7 +74,6 @@ def download_url_audio(url: str):
29
 
30
  url = url.strip()
31
 
32
- # Download audio as WAV (single extract_info call so PO token plugin can intercept)
33
  URL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
34
  out_path = URL_DOWNLOAD_DIR / str(uuid.uuid4())
35
 
@@ -49,33 +93,15 @@ def download_url_audio(url: str):
49
  except Exception as e:
50
  raise gr.Error(f"Download failed: {str(e)[:200]}")
51
 
52
- if info.get("_type") == "playlist":
53
- raise gr.Error("Playlists are not supported. Please paste a single video/audio URL.")
54
-
55
- duration = info.get("duration")
56
- title = info.get("title", "Unknown")
57
- thumbnail = info.get("thumbnail", "")
58
-
59
  wav_path = str(out_path) + ".wav"
60
  if not Path(wav_path).exists():
61
  raise gr.Error("Download completed but audio file was not created.")
62
 
63
- # Build info card HTML
64
- dur_str = f"{int(duration) // 60}:{int(duration) % 60:02d}" if duration else "unknown"
65
- thumb_html = (
66
- f'<img src="{thumbnail}" style="max-width:100%;max-height:120px;border-radius:8px;margin-bottom:4px;">'
67
- if thumbnail else ""
68
- )
69
- info_html = (
70
- f'<div style="padding:8px;border-radius:8px;background:var(--block-background-fill);'
71
- f'border:1px solid var(--border-color-primary);">'
72
- f'{thumb_html}'
73
- f'<div style="font-weight:bold;font-size:14px;">{title}</div>'
74
- f'<div style="font-size:12px;opacity:0.7;">Duration: {dur_str}</div>'
75
- f'</div>'
76
- )
77
 
78
- return wav_path, info_html
79
 
80
 
81
  def create_segmentation_settings(id_suffix=""):
 
20
  )
21
 
22
 
23
+ def _build_info_html(title, duration, thumbnail):
24
+ """Build HTML info card for a URL-sourced audio."""
25
+ dur_str = f"{int(duration) // 60}:{int(duration) % 60:02d}" if duration else "unknown"
26
+ thumb_html = (
27
+ f'<img src="{thumbnail}" style="max-width:100%;max-height:120px;border-radius:8px;margin-bottom:4px;">'
28
+ if thumbnail else ""
29
+ )
30
+ return (
31
+ f'<div style="padding:8px;border-radius:8px;background:var(--block-background-fill);'
32
+ f'border:1px solid var(--border-color-primary);">'
33
+ f'{thumb_html}'
34
+ f'<div style="font-weight:bold;font-size:14px;">{title}</div>'
35
+ f'<div style="font-size:12px;opacity:0.7;">Duration: {dur_str}</div>'
36
+ f'</div>'
37
+ )
38
+
39
+
40
+ def fetch_url_info(url: str):
41
+ """Fetch metadata only (no download). Returns info_html or raises gr.Error."""
42
+ import yt_dlp
43
+
44
+ if not url or not url.strip():
45
+ return None
46
+
47
+ url = url.strip()
48
+
49
+ with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
50
+ try:
51
+ info = ydl.extract_info(url, download=False)
52
+ except yt_dlp.utils.DownloadError as e:
53
+ raise gr.Error(f"Could not fetch URL: {str(e)[:200]}")
54
+
55
+ if info.get("_type") == "playlist":
56
+ raise gr.Error("Playlists are not supported. Please paste a single video/audio URL.")
57
+
58
+ duration = info.get("duration")
59
+ if duration is None:
60
+ raise gr.Error("Live streams are not supported. Please use a completed video/audio.")
61
+
62
+ title = info.get("title", "Unknown")
63
+ thumbnail = info.get("thumbnail", "")
64
+
65
+ return _build_info_html(title, duration, thumbnail)
66
+
67
+
68
  def download_url_audio(url: str):
69
+ """Full download of audio from URL. Returns (wav_path, info_html)."""
70
  import yt_dlp
71
 
72
  if not url or not url.strip():
 
74
 
75
  url = url.strip()
76
 
 
77
  URL_DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
78
  out_path = URL_DOWNLOAD_DIR / str(uuid.uuid4())
79
 
 
93
  except Exception as e:
94
  raise gr.Error(f"Download failed: {str(e)[:200]}")
95
 
 
 
 
 
 
 
 
96
  wav_path = str(out_path) + ".wav"
97
  if not Path(wav_path).exists():
98
  raise gr.Error("Download completed but audio file was not created.")
99
 
100
+ title = info.get("title", "Unknown")
101
+ duration = info.get("duration")
102
+ thumbnail = info.get("thumbnail", "")
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ return wav_path, _build_info_html(title, duration, thumbnail)
105
 
106
 
107
  def create_segmentation_settings(id_suffix=""):
src/ui/interface.py CHANGED
@@ -20,6 +20,7 @@ from config import (
20
  MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
21
  MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
22
  LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
 
23
  )
24
  from src.ui.styles import build_css
25
  from src.ui.js_config import build_js_head
@@ -101,24 +102,48 @@ def build_interface():
101
  def _build_left_column(c):
102
  """Build the left input column."""
103
  with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
104
- with gr.Group():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  c.url_input = gr.Textbox(
106
- label="Or paste a URL (YouTube, SoundCloud, etc.)",
107
- placeholder="https://youtube.com/watch?v=...",
108
  lines=1,
109
  )
110
- c.url_download_btn = gr.Button("Download Audio", size="sm", variant="secondary")
111
  c.url_status = gr.HTML(value="", visible=False)
112
  c.url_info_html = gr.HTML(value="", visible=False)
 
 
 
 
 
 
113
 
114
- c.audio_input = gr.Audio(
115
- label="Upload Recitation",
116
- sources=["upload", "microphone"],
117
- type="filepath"
118
- )
119
 
120
- # Example audio files
121
- with gr.Row():
 
 
 
 
 
 
 
122
  c.btn_ex_112 = gr.Button("112", size="sm", min_width=0)
123
  c.btn_ex_84 = gr.Button("84", size="sm", min_width=0)
124
  c.btn_ex_7 = gr.Button("7", size="sm", min_width=0)
 
20
  MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
21
  MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
22
  LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
23
+ DEFAULT_INPUT_MODE,
24
  )
25
  from src.ui.styles import build_css
26
  from src.ui.js_config import build_js_head
 
102
  def _build_left_column(c):
103
  """Build the left input column."""
104
  with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
105
+ _is_link = DEFAULT_INPUT_MODE == "Link"
106
+ _is_upload = DEFAULT_INPUT_MODE == "Upload"
107
+ _is_record = DEFAULT_INPUT_MODE == "Record"
108
+
109
+ # Input mode toggle
110
+ with gr.Row(elem_id="input-mode-row"):
111
+ c.mode_link = gr.Button("Link", size="sm", min_width=0,
112
+ elem_classes=["mode-active"] if _is_link else [])
113
+ c.mode_upload = gr.Button("Upload", size="sm", min_width=0,
114
+ elem_classes=["mode-active"] if _is_upload else [])
115
+ c.mode_record = gr.Button("Record", size="sm", min_width=0,
116
+ elem_classes=["mode-active"] if _is_record else [])
117
+
118
+ # Link panel
119
+ with gr.Group(visible=_is_link, elem_id="link-panel") as c.link_panel:
120
  c.url_input = gr.Textbox(
121
+ label="Paste a link",
122
+ placeholder="TikTok, SoundCloud, Archive.org, or direct audio link",
123
  lines=1,
124
  )
 
125
  c.url_status = gr.HTML(value="", visible=False)
126
  c.url_info_html = gr.HTML(value="", visible=False)
127
+ c.url_download_btn = gr.Button("Download", size="sm", variant="primary", visible=False)
128
+ gr.Markdown(
129
+ "Supports [1800+ sites](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)"
130
+ " — TikTok, SoundCloud, Archive.org, direct links, and more",
131
+ elem_id="url-help",
132
+ )
133
 
134
+ # Upload panel
135
+ with gr.Group(visible=_is_upload, elem_id="upload-panel") as c.upload_panel:
136
+ c.audio_upload = gr.Audio(label="Upload Recitation", sources=["upload"], type="filepath")
 
 
137
 
138
+ # Record panel
139
+ with gr.Group(visible=_is_record, elem_id="record-panel") as c.record_panel:
140
+ c.audio_record = gr.Audio(label="Record Recitation", sources=["microphone"], type="filepath")
141
+
142
+ # Hidden unified audio (fed by upload, record, or URL download)
143
+ c.audio_input = gr.Audio(visible=False, type="filepath")
144
+
145
+ # Example audio files (hidden in Link mode)
146
+ with gr.Row(visible=not _is_link, elem_id="example-row") as c.example_row:
147
  c.btn_ex_112 = gr.Button("112", size="sm", min_width=0)
148
  c.btn_ex_84 = gr.Button("84", size="sm", min_width=0)
149
  c.btn_ex_7 = gr.Button("7", size="sm", min_width=0)
src/ui/segments.py CHANGED
@@ -213,7 +213,7 @@ def simplify_ref(ref: str) -> str:
213
  return ref
214
 
215
 
216
- def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", render_key: str = "") -> str:
217
  """Render a single segment as an HTML card with optional audio player."""
218
  is_special = seg.matched_ref in ALL_SPECIAL_REFS
219
  confidence_class = get_confidence_class(seg.match_score)
@@ -249,10 +249,13 @@ def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", re
249
  if seg.error:
250
  error_html = f'<div class="segment-error">{seg.error}</div>'
251
 
252
- # Audio player HTML — uses media fragment of the full recording
253
  audio_html = ""
254
- if full_audio_url:
255
- audio_src = f"{full_audio_url}#t={seg.start_time:.3f},{seg.end_time:.3f}"
 
 
 
256
  # Add animate button only if segment has a Quran verse ref (word spans for animation).
257
  # Basmala/Isti'adha get animate because they have indexed word spans for MFA.
258
  # Transition segments (Amin, Takbir, Tahmeed) don't.
@@ -350,12 +353,13 @@ def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", re
350
  return html
351
 
352
 
353
- def render_segments(segments: list, full_audio_url: str = "") -> str:
354
  """Render all segments as HTML with optional audio players.
355
 
356
  Args:
357
  segments: List of SegmentInfo objects
358
- full_audio_url: URL to full audio WAV (media fragments used for per-segment playback)
 
359
  """
360
  if not segments:
361
  return '<div class="no-segments">No segments detected</div>'
@@ -443,7 +447,7 @@ def render_segments(segments: list, full_audio_url: str = "") -> str:
443
 
444
  t_cards = time.time()
445
  for idx, seg in enumerate(segments):
446
- html_parts.append(render_segment_card(seg, idx, full_audio_url, render_key))
447
 
448
  html_parts.append('</div>')
449
  print(f"[PROFILE] Segment cards: {time.time() - t_cards:.3f}s ({len(segments)} cards, HTML only)")
 
213
  return ref
214
 
215
 
216
+ def render_segment_card(seg: SegmentInfo, idx: int, full_audio_url: str = "", render_key: str = "", segment_dir: str = "") -> str:
217
  """Render a single segment as an HTML card with optional audio player."""
218
  is_special = seg.matched_ref in ALL_SPECIAL_REFS
219
  confidence_class = get_confidence_class(seg.match_score)
 
249
  if seg.error:
250
  error_html = f'<div class="segment-error">{seg.error}</div>'
251
 
252
+ # Audio player HTML — per-segment WAV (preferred) or media fragment fallback
253
  audio_html = ""
254
+ if segment_dir or full_audio_url:
255
+ if segment_dir:
256
+ audio_src = f"/gradio_api/file={segment_dir}/seg_{idx}.wav"
257
+ else:
258
+ audio_src = f"{full_audio_url}#t={seg.start_time:.3f},{seg.end_time:.3f}"
259
  # Add animate button only if segment has a Quran verse ref (word spans for animation).
260
  # Basmala/Isti'adha get animate because they have indexed word spans for MFA.
261
  # Transition segments (Amin, Takbir, Tahmeed) don't.
 
353
  return html
354
 
355
 
356
+ def render_segments(segments: list, full_audio_url: str = "", segment_dir: str = "") -> str:
357
  """Render all segments as HTML with optional audio players.
358
 
359
  Args:
360
  segments: List of SegmentInfo objects
361
+ full_audio_url: URL to full audio WAV (used by mega card / Animate All)
362
+ segment_dir: Path to segment directory containing per-segment WAV files
363
  """
364
  if not segments:
365
  return '<div class="no-segments">No segments detected</div>'
 
447
 
448
  t_cards = time.time()
449
  for idx, seg in enumerate(segments):
450
+ html_parts.append(render_segment_card(seg, idx, full_audio_url, render_key, segment_dir))
451
 
452
  html_parts.append('</div>')
453
  print(f"[PROFILE] Segment cards: {time.time() - t_cards:.3f}s ({len(segments)} cards, HTML only)")
src/ui/styles.py CHANGED
@@ -432,4 +432,21 @@ def build_css() -> str:
432
  .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
433
  .dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  """
 
432
  .dark .segment-underseg {{ background: rgba(255, 140, 0, 0.2); border-color: #ff8c00; }}
433
  .dark .segment-special {{ background: rgba(92, 107, 192, 0.2); border-color: #5c6bc0; border-style: dashed; }}
434
 
435
+ /* Input mode toggle */
436
+ #input-mode-row {{ gap: 0 !important; }}
437
+ #input-mode-row button {{
438
+ border-radius: 0 !important;
439
+ border: 1px solid var(--border-color-primary) !important;
440
+ }}
441
+ #input-mode-row button:first-child {{ border-radius: 8px 0 0 8px !important; }}
442
+ #input-mode-row button:last-child {{ border-radius: 0 8px 8px 0 !important; }}
443
+ #input-mode-row button:not(:first-child) {{ border-left: none !important; }}
444
+ .mode-active {{
445
+ background: var(--button-primary-background-fill) !important;
446
+ color: var(--button-primary-text-color) !important;
447
+ border-color: var(--button-primary-background-fill) !important;
448
+ }}
449
+ #url-help {{ font-size: 12px; opacity: 0.7; margin-top: -8px; }}
450
+ #url-help a {{ color: var(--link-text-color); }}
451
+
452
  """