File size: 13,806 Bytes
a59d993
045ee7d
 
a59d993
045ee7d
 
 
 
 
881307e
045ee7d
 
 
 
 
 
 
 
 
 
 
 
1fc019e
045ee7d
410852a
6a5aa21
a59d993
 
045ee7d
 
 
 
 
 
 
 
a59d993
410852a
6a5aa21
045ee7d
 
a59d993
045ee7d
 
 
7046378
045ee7d
845c59a
8ad3617
045ee7d
a59d993
045ee7d
c836860
a59d993
c836860
045ee7d
f1bbb03
 
a6d19f4
f1bbb03
 
5df5324
 
 
 
 
 
 
 
 
 
 
 
045ee7d
 
a59d993
 
 
 
 
 
 
 
6ab628d
a59d993
 
6cdb091
 
 
 
 
 
 
 
 
2ce56b1
 
058f17e
 
e67922d
6cdb091
 
a59d993
045ee7d
a59d993
045ee7d
 
a59d993
 
 
1fc019e
 
 
 
 
 
 
 
 
 
 
 
 
 
e67922d
f340007
1fc019e
a3b9d0f
f340007
 
0e9d086
f1bbb03
1fc019e
 
e67922d
f1bbb03
 
 
 
 
1fc019e
 
 
e67922d
1fc019e
f340007
681991f
 
 
a59d993
 
 
 
a6d19f4
a59d993
 
 
 
 
2ce56b1
a59d993
 
 
 
 
 
 
045ee7d
a6d19f4
a59d993
 
 
 
 
 
 
 
 
ef9d3f2
a59d993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5df5324
a59d993
5df5324
881307e
 
 
 
f1bbb03
7d8587f
881307e
 
 
 
 
 
 
 
 
 
 
 
 
a59d993
881307e
 
 
 
 
a59d993
881307e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
"""Gradio UI — layout orchestrator."""
import json
from pathlib import Path
from types import SimpleNamespace

import gradio as gr

from config import (
    DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE,
    DEV_TAB_VISIBLE,
    ANIM_WORD_COLOR, ANIM_STYLE_ROW_SCALES,
    ANIM_DISPLAY_MODES, ANIM_DISPLAY_MODE_DEFAULT,
    ANIM_OPACITY_PREV_DEFAULT, ANIM_OPACITY_AFTER_DEFAULT, ANIM_OPACITY_STEP,
    ANIM_PRESETS,
    ANIM_GRANULARITIES, ANIM_GRANULARITY_DEFAULT,
    ANIM_WINDOW_PREV_DEFAULT, ANIM_WINDOW_AFTER_DEFAULT,
    ANIM_WINDOW_PREV_MIN, ANIM_WINDOW_PREV_MAX,
    ANIM_WINDOW_AFTER_MIN, ANIM_WINDOW_AFTER_MAX,
    MEGA_WORD_SPACING_MIN, MEGA_WORD_SPACING_MAX, MEGA_WORD_SPACING_STEP, MEGA_WORD_SPACING_DEFAULT,
    MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT,
    MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT,
    LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE,
    DEFAULT_INPUT_MODE,
)
from src.ui.styles import build_css
from src.ui.js_config import build_js_head
from src.ui.handlers import create_segmentation_settings
from src.ui.event_wiring import wire_events

# Load surah name ligature map
with open(Path(__file__).parent.parent.parent / "data" / "ligatures.json") as _f:
    _SURAH_LIGATURES = json.load(_f)


def build_interface():
    """Build the Gradio interface."""
    c = SimpleNamespace()
    css = build_css()
    js = build_js_head(_SURAH_LIGATURES)

    with gr.Blocks(title="Quran Multi-Aligner", css=css, head=js, delete_cache=(DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE)) as app:
        gr.Markdown("# \U0001f399\ufe0f Quran Multi-Aligner")
        gr.Markdown("""
- Transcribe and split any recitation by pauses within 1-2 minutes
- Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON
- GPU-powered <a href="https://github.com/Wider-Community/quranic-universal-audio/blob/main/quranic_universal_aligner/docs/client_api.md" target="_blank">API usage</a> with daily quotas, and unlimited CPU usage
- Reliable confidence system to flag uncertain segments and missed words — no silent errors
- Robust tolerance to noise, speaker variation and low audio quality, particularly with the large model
- <a href="https://github.com/Wider-Community/quranic-universal-audio/issues" target="_blank">Feedback/contributions are welcome</a>
""")

        # API Documentation accordion
        _api_doc = (Path(__file__).parent.parent.parent / "docs" / "client_api.md").read_text()
        with gr.Accordion("\U0001f4e1 API Usage", open=False):
            gr.Markdown(_api_doc)

        # Changelog accordion
        _changelog = (Path(__file__).parent.parent.parent / "docs" / "CHANGELOG.md").read_text()
        with gr.Accordion("📋 Changelog", open=False):
            gr.Markdown(_changelog)

        if DEV_TAB_VISIBLE:
            with gr.Tabs():
                with gr.Tab("Results"):
                    with gr.Row(elem_id="main-row"):
                        _build_left_column(c)
                        _build_right_column(c)
                with gr.Tab("Dev"):
                    _build_dev_tab(c)
        else:
            with gr.Row(elem_id="main-row"):
                _build_left_column(c)
                _build_right_column(c)

        # State components for caching VAD data between runs
        c.cached_speech_intervals = gr.State(value=None)
        c.cached_is_complete = gr.State(value=None)
        c.cached_audio = gr.State(value=None)
        c.cached_sample_rate = gr.State(value=None)
        c.cached_intervals = gr.State(value=None)
        c.cached_model_name = gr.State(value=None)
        c.cached_segment_dir = gr.State(value=None)
        c.cached_log_row = gr.State(value=None)
        c.is_preset = gr.State(value=False)
        c.resegment_panel_visible = gr.State(value=False)

        # Session API components (hidden, API-only)
        c.api_audio = gr.Audio(visible=False, type="numpy")
        c.api_audio_id = gr.Textbox(visible=False)
        c.api_silence = gr.Number(visible=False, precision=0)
        c.api_speech = gr.Number(visible=False, precision=0)
        c.api_pad = gr.Number(visible=False, precision=0)
        c.api_model = gr.Textbox(visible=False)
        c.api_device = gr.Textbox(visible=False)
        c.api_timestamps = gr.JSON(visible=False)
        c.api_mfa_segments = gr.JSON(visible=False)
        c.api_mfa_granularity = gr.Textbox(visible=False)
        c.api_estimate_endpoint = gr.Textbox(visible=False)
        c.api_estimate_audio_duration = gr.Number(visible=False)
        c.api_url = gr.Textbox(visible=False)
        c.api_result = gr.JSON(visible=False)

        wire_events(app, c)

    return app


def _build_left_column(c):
    """Build the left input column."""
    with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"):
        _is_link = DEFAULT_INPUT_MODE == "Link"
        _is_upload = DEFAULT_INPUT_MODE == "Upload"
        _is_record = DEFAULT_INPUT_MODE == "Record"

        # Input mode toggle
        with gr.Row(elem_id="input-mode-row"):
            c.mode_link = gr.Button("Link", size="sm", min_width=0,
                                     elem_classes=["mode-active"] if _is_link else [])
            c.mode_upload = gr.Button("Upload", size="sm", min_width=0,
                                       elem_classes=["mode-active"] if _is_upload else [])
            c.mode_record = gr.Button("Record", size="sm", min_width=0,
                                       elem_classes=["mode-active"] if _is_record else [])

        # Link panel
        with gr.Column(visible=_is_link, elem_id="link-panel") as c.link_panel:
            c.url_input = gr.Textbox(
                label="Paste a link",
                info='e.g. TikTok · SoundCloud · [MP3Quran](https://www.mp3quran.net/) · [all supported sites](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)',
                lines=1,
            )
            c.url_download_btn = gr.Button("Download", size="sm", variant="secondary", interactive=False)
            c.url_audio_player = gr.Audio(label="Downloaded Audio", visible=False, interactive=False)

        # Upload panel
        with gr.Column(visible=_is_upload, elem_id="upload-panel") as c.upload_panel:
            with gr.Row(elem_id="example-row"):
                c.btn_ex_112 = gr.Button("112", size="sm", min_width=0)
                c.btn_ex_84 = gr.Button("84", size="sm", min_width=0)
                c.btn_ex_7 = gr.Button("7", size="sm", min_width=0)
                c.btn_ex_juz30 = gr.Button("Juz' 30", size="sm", min_width=0)
            c.audio_upload = gr.Audio(label="Upload Recitation", sources=["upload"], type="filepath")

        # Record panel
        with gr.Column(visible=_is_record, elem_id="record-panel") as c.record_panel:
            c.audio_record = gr.Audio(label="Record Recitation", sources=["microphone"], type="filepath")

        # Hidden unified audio state (fed by upload, record, or URL download)
        # gr.State avoids cascading .change events that gr.Audio would fire
        c.audio_input = gr.State(value=None)

        _build_animation_settings(c)

        c.anim_cached_settings = gr.JSON(value=None, visible=False)
        with gr.Accordion("Model Settings", open=True) as c.model_accordion:
            with gr.Row():
                c.model_radio = gr.Radio(
                    choices=["Base", "Large"],
                    value="Base",
                    label="ASR Model",
                    info="Large: more robust to noisy/non-studio recitations but slower"
                )
                c.device_radio = gr.Radio(
                    choices=["GPU", "CPU"],
                    value="GPU",
                    label="Device",
                    info="Daily GPU usage limits. Unlimitted CPU usage but slower"
                )

        with gr.Accordion("Segmentation Settings", open=True) as c.seg_accordion:
            c.min_silence_slider, c.min_speech_slider, c.pad_slider, \
                c.preset_mujawwad, c.preset_murattal, c.preset_fast = create_segmentation_settings()

        # JSON download appears here after extraction
        c.export_file = gr.File(label="\U0001f4e5 Download JSON", visible=True, interactive=False)


def _build_animation_settings(c):
    """Build the animation settings accordion."""
    with gr.Accordion("Animation Settings", open=False, elem_id="anim-settings-accordion"):
        with gr.Row(elem_id="anim-style-row"):
            c.anim_granularity_radio = gr.Radio(
                choices=ANIM_GRANULARITIES,
                value=ANIM_GRANULARITY_DEFAULT,
                label="Granularity",
                scale=ANIM_STYLE_ROW_SCALES[0],
            )
            c.anim_mode_radio = gr.Radio(
                choices=ANIM_DISPLAY_MODES,
                value=ANIM_DISPLAY_MODE_DEFAULT,
                label="Animation Style",
                scale=ANIM_STYLE_ROW_SCALES[1],
            )
            c.anim_verse_checkbox = gr.Checkbox(
                value=False,
                label="Verse Only",
                elem_id="anim-verse-mode",
                scale=ANIM_STYLE_ROW_SCALES[2], min_width=90,
            )
            c.anim_color_picker = gr.ColorPicker(
                value=ANIM_WORD_COLOR,
                label="Color",
                scale=ANIM_STYLE_ROW_SCALES[3],
            )
        _is_custom = (ANIM_DISPLAY_MODE_DEFAULT == "Custom")
        _preset = ANIM_PRESETS.get(ANIM_DISPLAY_MODE_DEFAULT, {})
        with gr.Row():
            c.anim_opacity_prev_slider = gr.Slider(
                minimum=0, maximum=1, step=ANIM_OPACITY_STEP,
                value=_preset.get("prev_opacity", ANIM_OPACITY_PREV_DEFAULT),
                label="Before Opacity",
                interactive=_is_custom,
                elem_id="anim-opacity-prev",
            )
            c.anim_opacity_after_slider = gr.Slider(
                minimum=0, maximum=1, step=ANIM_OPACITY_STEP,
                value=_preset.get("after_opacity", ANIM_OPACITY_AFTER_DEFAULT),
                label="After Opacity",
                interactive=_is_custom,
                elem_id="anim-opacity-after",
            )
        with gr.Row():
            c.anim_window_prev_slider = gr.Slider(
                minimum=ANIM_WINDOW_PREV_MIN, maximum=ANIM_WINDOW_PREV_MAX, step=1,
                value=_preset.get("prev_words", ANIM_WINDOW_PREV_DEFAULT),
                label="Before Words", elem_id="anim-window-prev",
                interactive=_is_custom,
            )
            c.anim_window_after_slider = gr.Slider(
                minimum=ANIM_WINDOW_AFTER_MIN, maximum=ANIM_WINDOW_AFTER_MAX, step=1,
                value=_preset.get("after_words", ANIM_WINDOW_AFTER_DEFAULT),
                label="After Words", elem_id="anim-window-after",
                interactive=_is_custom,
            )
        with gr.Row(elem_id="mega-styling-row"):
            c.anim_word_spacing_slider = gr.Slider(
                minimum=MEGA_WORD_SPACING_MIN, maximum=MEGA_WORD_SPACING_MAX,
                step=MEGA_WORD_SPACING_STEP, value=MEGA_WORD_SPACING_DEFAULT,
                label="Word Spacing", elem_id="anim-word-spacing",
            )
            c.anim_text_size_slider = gr.Slider(
                minimum=MEGA_TEXT_SIZE_MIN, maximum=MEGA_TEXT_SIZE_MAX,
                step=MEGA_TEXT_SIZE_STEP, value=MEGA_TEXT_SIZE_DEFAULT,
                label="Text Size", elem_id="anim-text-size",
            )
            c.anim_line_spacing_slider = gr.Slider(
                minimum=MEGA_LINE_SPACING_MIN, maximum=MEGA_LINE_SPACING_MAX,
                step=MEGA_LINE_SPACING_STEP, value=MEGA_LINE_SPACING_DEFAULT,
                label="Line Spacing", elem_id="anim-line-spacing",
            )


def _build_right_column(c):
    """Build the right output column."""
    with gr.Column(scale=RIGHT_COLUMN_SCALE):
        _build_results_content(c)


def _build_results_content(c):
    """Build the main results content (extract/resegment/output)."""
    c.extract_btn = gr.Button("Extract Segments", variant="secondary", size="lg", interactive=False)
    c.pipeline_progress = gr.HTML(value="", visible=False)
    with gr.Row(elem_id="action-btns-row"):
        c.resegment_toggle_btn = gr.Button(
            "Resegment with New Settings", variant="primary", size="lg", visible=False
        )
        c.retranscribe_btn = gr.Button(
            "Retranscribe with Large Model", variant="primary", size="lg", visible=False
        )
    with gr.Row(elem_id="ts-row"):
        c.compute_ts_btn = gr.Button(
            "Compute Timestamps", variant="secondary", size="lg", interactive=False, visible=False
        )
        c.compute_ts_progress = gr.HTML(value="", visible=False)
        c.animate_all_html = gr.HTML(value="", visible=False)

    with gr.Column(visible=False) as c.resegment_panel:
        gr.Markdown(
            "Uses cached data, skipping the heavy computation, "
            "so it's much faster. Useful if results are over-segmented "
            "or under-segmented"
        )
        c.rs_silence, c.rs_speech, c.rs_pad, \
            c.rs_btn_muj, c.rs_btn_mur, c.rs_btn_fast = create_segmentation_settings(id_suffix="-rs")
        c.resegment_btn = gr.Button("Resegment", variant="primary", size="lg")

    c.output_html = gr.HTML(
        value='<div style="text-align: center; color: #666; padding: 60px;">Upload audio and click "Extract Segments" to begin</div>',
        elem_classes=["output-html"]
    )
    # Hidden JSON output for API consumers
    c.output_json = gr.JSON(visible=False, label="JSON Output")


def _build_dev_tab(c):
    """Build the Dev tab UI (delegates to dev_tools module)."""
    from src.ui.dev_tools import build_dev_tab_ui
    build_dev_tab_ui(c)