Spaces:
Running on Zero
Running on Zero
| """Gradio UI — layout orchestrator.""" | |
| import json | |
| from pathlib import Path | |
| from types import SimpleNamespace | |
| import gradio as gr | |
| from config import ( | |
| DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE, | |
| DEV_TAB_VISIBLE, | |
| ANIM_WORD_COLOR, ANIM_STYLE_ROW_SCALES, | |
| ANIM_DISPLAY_MODES, ANIM_DISPLAY_MODE_DEFAULT, | |
| ANIM_OPACITY_PREV_DEFAULT, ANIM_OPACITY_AFTER_DEFAULT, ANIM_OPACITY_STEP, | |
| ANIM_PRESETS, | |
| ANIM_GRANULARITIES, ANIM_GRANULARITY_DEFAULT, | |
| ANIM_WINDOW_PREV_DEFAULT, ANIM_WINDOW_AFTER_DEFAULT, | |
| ANIM_WINDOW_PREV_MIN, ANIM_WINDOW_PREV_MAX, | |
| ANIM_WINDOW_AFTER_MIN, ANIM_WINDOW_AFTER_MAX, | |
| MEGA_WORD_SPACING_MIN, MEGA_WORD_SPACING_MAX, MEGA_WORD_SPACING_STEP, MEGA_WORD_SPACING_DEFAULT, | |
| MEGA_TEXT_SIZE_MIN, MEGA_TEXT_SIZE_MAX, MEGA_TEXT_SIZE_STEP, MEGA_TEXT_SIZE_DEFAULT, | |
| MEGA_LINE_SPACING_MIN, MEGA_LINE_SPACING_MAX, MEGA_LINE_SPACING_STEP, MEGA_LINE_SPACING_DEFAULT, | |
| LEFT_COLUMN_SCALE, RIGHT_COLUMN_SCALE, | |
| DEFAULT_INPUT_MODE, | |
| ) | |
| from src.ui.styles import build_css | |
| from src.ui.js_config import build_js_head | |
| from src.ui.handlers import create_segmentation_settings | |
| from src.ui.event_wiring import wire_events | |
| # Load surah name ligature map | |
| with open(Path(__file__).parent.parent.parent / "data" / "ligatures.json") as _f: | |
| _SURAH_LIGATURES = json.load(_f) | |
| def build_interface(): | |
| """Build the Gradio interface.""" | |
| c = SimpleNamespace() | |
| css = build_css() | |
| js = build_js_head(_SURAH_LIGATURES) | |
| with gr.Blocks(title="Quran Multi-Aligner", css=css, head=js, delete_cache=(DELETE_CACHE_FREQUENCY, DELETE_CACHE_AGE)) as app: | |
| gr.Markdown("# \U0001f399\ufe0f Quran Multi-Aligner") | |
| gr.Markdown(""" | |
| - Transcribe and split any recitation by pauses within 1-2 minutes | |
| - Get precise pause-, verse-, word- and character-level timestamps, exportable as JSON | |
| - GPU-powered <a href="https://github.com/Wider-Community/quranic-universal-audio/blob/main/quranic_universal_aligner/docs/client_api.md" target="_blank">API usage</a> with daily quotas, and unlimited CPU usage | |
| - Reliable confidence system to flag uncertain segments and missed words — no silent errors | |
| - Robust tolerance to noise, speaker variation and low audio quality, particularly with the large model | |
| - <a href="https://github.com/Wider-Community/quranic-universal-audio/issues" target="_blank">Feedback/contributions are welcome</a> | |
| """) | |
| # API Documentation accordion | |
| _api_doc = (Path(__file__).parent.parent.parent / "docs" / "client_api.md").read_text() | |
| with gr.Accordion("\U0001f4e1 API Usage", open=False): | |
| gr.Markdown(_api_doc) | |
| # Changelog accordion | |
| _changelog = (Path(__file__).parent.parent.parent / "docs" / "CHANGELOG.md").read_text() | |
| with gr.Accordion("📋 Changelog", open=False): | |
| gr.Markdown(_changelog) | |
| if DEV_TAB_VISIBLE: | |
| with gr.Tabs(): | |
| with gr.Tab("Results"): | |
| with gr.Row(elem_id="main-row"): | |
| _build_left_column(c) | |
| _build_right_column(c) | |
| with gr.Tab("Dev"): | |
| _build_dev_tab(c) | |
| else: | |
| with gr.Row(elem_id="main-row"): | |
| _build_left_column(c) | |
| _build_right_column(c) | |
| # State components for caching VAD data between runs | |
| c.cached_speech_intervals = gr.State(value=None) | |
| c.cached_is_complete = gr.State(value=None) | |
| c.cached_audio = gr.State(value=None) | |
| c.cached_sample_rate = gr.State(value=None) | |
| c.cached_intervals = gr.State(value=None) | |
| c.cached_model_name = gr.State(value=None) | |
| c.cached_segment_dir = gr.State(value=None) | |
| c.cached_log_row = gr.State(value=None) | |
| c.is_preset = gr.State(value=False) | |
| c.resegment_panel_visible = gr.State(value=False) | |
| # Session API components (hidden, API-only) | |
| c.api_audio = gr.Audio(visible=False, type="numpy") | |
| c.api_audio_id = gr.Textbox(visible=False) | |
| c.api_silence = gr.Number(visible=False, precision=0) | |
| c.api_speech = gr.Number(visible=False, precision=0) | |
| c.api_pad = gr.Number(visible=False, precision=0) | |
| c.api_model = gr.Textbox(visible=False) | |
| c.api_device = gr.Textbox(visible=False) | |
| c.api_timestamps = gr.JSON(visible=False) | |
| c.api_mfa_segments = gr.JSON(visible=False) | |
| c.api_mfa_granularity = gr.Textbox(visible=False) | |
| c.api_estimate_endpoint = gr.Textbox(visible=False) | |
| c.api_estimate_audio_duration = gr.Number(visible=False) | |
| c.api_url = gr.Textbox(visible=False) | |
| c.api_result = gr.JSON(visible=False) | |
| wire_events(app, c) | |
| return app | |
| def _build_left_column(c): | |
| """Build the left input column.""" | |
| with gr.Column(scale=LEFT_COLUMN_SCALE, elem_id="left-col"): | |
| _is_link = DEFAULT_INPUT_MODE == "Link" | |
| _is_upload = DEFAULT_INPUT_MODE == "Upload" | |
| _is_record = DEFAULT_INPUT_MODE == "Record" | |
| # Input mode toggle | |
| with gr.Row(elem_id="input-mode-row"): | |
| c.mode_link = gr.Button("Link", size="sm", min_width=0, | |
| elem_classes=["mode-active"] if _is_link else []) | |
| c.mode_upload = gr.Button("Upload", size="sm", min_width=0, | |
| elem_classes=["mode-active"] if _is_upload else []) | |
| c.mode_record = gr.Button("Record", size="sm", min_width=0, | |
| elem_classes=["mode-active"] if _is_record else []) | |
| # Link panel | |
| with gr.Column(visible=_is_link, elem_id="link-panel") as c.link_panel: | |
| c.url_input = gr.Textbox( | |
| label="Paste a link", | |
| info='e.g. TikTok · SoundCloud · [MP3Quran](https://www.mp3quran.net/) · [all supported sites](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)', | |
| lines=1, | |
| ) | |
| c.url_download_btn = gr.Button("Download", size="sm", variant="secondary", interactive=False) | |
| c.url_audio_player = gr.Audio(label="Downloaded Audio", visible=False, interactive=False) | |
| # Upload panel | |
| with gr.Column(visible=_is_upload, elem_id="upload-panel") as c.upload_panel: | |
| with gr.Row(elem_id="example-row"): | |
| c.btn_ex_112 = gr.Button("112", size="sm", min_width=0) | |
| c.btn_ex_84 = gr.Button("84", size="sm", min_width=0) | |
| c.btn_ex_7 = gr.Button("7", size="sm", min_width=0) | |
| c.btn_ex_juz30 = gr.Button("Juz' 30", size="sm", min_width=0) | |
| c.audio_upload = gr.Audio(label="Upload Recitation", sources=["upload"], type="filepath") | |
| # Record panel | |
| with gr.Column(visible=_is_record, elem_id="record-panel") as c.record_panel: | |
| c.audio_record = gr.Audio(label="Record Recitation", sources=["microphone"], type="filepath") | |
| # Hidden unified audio state (fed by upload, record, or URL download) | |
| # gr.State avoids cascading .change events that gr.Audio would fire | |
| c.audio_input = gr.State(value=None) | |
| _build_animation_settings(c) | |
| c.anim_cached_settings = gr.JSON(value=None, visible=False) | |
| with gr.Accordion("Model Settings", open=True) as c.model_accordion: | |
| with gr.Row(): | |
| c.model_radio = gr.Radio( | |
| choices=["Base", "Large"], | |
| value="Base", | |
| label="ASR Model", | |
| info="Large: more robust to noisy/non-studio recitations but slower" | |
| ) | |
| c.device_radio = gr.Radio( | |
| choices=["GPU", "CPU"], | |
| value="GPU", | |
| label="Device", | |
| info="Daily GPU usage limits. Unlimitted CPU usage but slower" | |
| ) | |
| with gr.Accordion("Segmentation Settings", open=True) as c.seg_accordion: | |
| c.min_silence_slider, c.min_speech_slider, c.pad_slider, \ | |
| c.preset_mujawwad, c.preset_murattal, c.preset_fast = create_segmentation_settings() | |
| # JSON download appears here after extraction | |
| c.export_file = gr.File(label="\U0001f4e5 Download JSON", visible=True, interactive=False) | |
| def _build_animation_settings(c): | |
| """Build the animation settings accordion.""" | |
| with gr.Accordion("Animation Settings", open=False, elem_id="anim-settings-accordion"): | |
| with gr.Row(elem_id="anim-style-row"): | |
| c.anim_granularity_radio = gr.Radio( | |
| choices=ANIM_GRANULARITIES, | |
| value=ANIM_GRANULARITY_DEFAULT, | |
| label="Granularity", | |
| scale=ANIM_STYLE_ROW_SCALES[0], | |
| ) | |
| c.anim_mode_radio = gr.Radio( | |
| choices=ANIM_DISPLAY_MODES, | |
| value=ANIM_DISPLAY_MODE_DEFAULT, | |
| label="Animation Style", | |
| scale=ANIM_STYLE_ROW_SCALES[1], | |
| ) | |
| c.anim_verse_checkbox = gr.Checkbox( | |
| value=False, | |
| label="Verse Only", | |
| elem_id="anim-verse-mode", | |
| scale=ANIM_STYLE_ROW_SCALES[2], min_width=90, | |
| ) | |
| c.anim_color_picker = gr.ColorPicker( | |
| value=ANIM_WORD_COLOR, | |
| label="Color", | |
| scale=ANIM_STYLE_ROW_SCALES[3], | |
| ) | |
| _is_custom = (ANIM_DISPLAY_MODE_DEFAULT == "Custom") | |
| _preset = ANIM_PRESETS.get(ANIM_DISPLAY_MODE_DEFAULT, {}) | |
| with gr.Row(): | |
| c.anim_opacity_prev_slider = gr.Slider( | |
| minimum=0, maximum=1, step=ANIM_OPACITY_STEP, | |
| value=_preset.get("prev_opacity", ANIM_OPACITY_PREV_DEFAULT), | |
| label="Before Opacity", | |
| interactive=_is_custom, | |
| elem_id="anim-opacity-prev", | |
| ) | |
| c.anim_opacity_after_slider = gr.Slider( | |
| minimum=0, maximum=1, step=ANIM_OPACITY_STEP, | |
| value=_preset.get("after_opacity", ANIM_OPACITY_AFTER_DEFAULT), | |
| label="After Opacity", | |
| interactive=_is_custom, | |
| elem_id="anim-opacity-after", | |
| ) | |
| with gr.Row(): | |
| c.anim_window_prev_slider = gr.Slider( | |
| minimum=ANIM_WINDOW_PREV_MIN, maximum=ANIM_WINDOW_PREV_MAX, step=1, | |
| value=_preset.get("prev_words", ANIM_WINDOW_PREV_DEFAULT), | |
| label="Before Words", elem_id="anim-window-prev", | |
| interactive=_is_custom, | |
| ) | |
| c.anim_window_after_slider = gr.Slider( | |
| minimum=ANIM_WINDOW_AFTER_MIN, maximum=ANIM_WINDOW_AFTER_MAX, step=1, | |
| value=_preset.get("after_words", ANIM_WINDOW_AFTER_DEFAULT), | |
| label="After Words", elem_id="anim-window-after", | |
| interactive=_is_custom, | |
| ) | |
| with gr.Row(elem_id="mega-styling-row"): | |
| c.anim_word_spacing_slider = gr.Slider( | |
| minimum=MEGA_WORD_SPACING_MIN, maximum=MEGA_WORD_SPACING_MAX, | |
| step=MEGA_WORD_SPACING_STEP, value=MEGA_WORD_SPACING_DEFAULT, | |
| label="Word Spacing", elem_id="anim-word-spacing", | |
| ) | |
| c.anim_text_size_slider = gr.Slider( | |
| minimum=MEGA_TEXT_SIZE_MIN, maximum=MEGA_TEXT_SIZE_MAX, | |
| step=MEGA_TEXT_SIZE_STEP, value=MEGA_TEXT_SIZE_DEFAULT, | |
| label="Text Size", elem_id="anim-text-size", | |
| ) | |
| c.anim_line_spacing_slider = gr.Slider( | |
| minimum=MEGA_LINE_SPACING_MIN, maximum=MEGA_LINE_SPACING_MAX, | |
| step=MEGA_LINE_SPACING_STEP, value=MEGA_LINE_SPACING_DEFAULT, | |
| label="Line Spacing", elem_id="anim-line-spacing", | |
| ) | |
| def _build_right_column(c): | |
| """Build the right output column.""" | |
| with gr.Column(scale=RIGHT_COLUMN_SCALE): | |
| _build_results_content(c) | |
| def _build_results_content(c): | |
| """Build the main results content (extract/resegment/output).""" | |
| c.extract_btn = gr.Button("Extract Segments", variant="secondary", size="lg", interactive=False) | |
| c.pipeline_progress = gr.HTML(value="", visible=False) | |
| with gr.Row(elem_id="action-btns-row"): | |
| c.resegment_toggle_btn = gr.Button( | |
| "Resegment with New Settings", variant="primary", size="lg", visible=False | |
| ) | |
| c.retranscribe_btn = gr.Button( | |
| "Retranscribe with Large Model", variant="primary", size="lg", visible=False | |
| ) | |
| with gr.Row(elem_id="ts-row"): | |
| c.compute_ts_btn = gr.Button( | |
| "Compute Timestamps", variant="secondary", size="lg", interactive=False, visible=False | |
| ) | |
| c.compute_ts_progress = gr.HTML(value="", visible=False) | |
| c.animate_all_html = gr.HTML(value="", visible=False) | |
| with gr.Column(visible=False) as c.resegment_panel: | |
| gr.Markdown( | |
| "Uses cached data, skipping the heavy computation, " | |
| "so it's much faster. Useful if results are over-segmented " | |
| "or under-segmented" | |
| ) | |
| c.rs_silence, c.rs_speech, c.rs_pad, \ | |
| c.rs_btn_muj, c.rs_btn_mur, c.rs_btn_fast = create_segmentation_settings(id_suffix="-rs") | |
| c.resegment_btn = gr.Button("Resegment", variant="primary", size="lg") | |
| c.output_html = gr.HTML( | |
| value='<div style="text-align: center; color: #666; padding: 60px;">Upload audio and click "Extract Segments" to begin</div>', | |
| elem_classes=["output-html"] | |
| ) | |
| # Hidden JSON output for API consumers | |
| c.output_json = gr.JSON(visible=False, label="JSON Output") | |
| def _build_dev_tab(c): | |
| """Build the Dev tab UI (delegates to dev_tools module).""" | |
| from src.ui.dev_tools import build_dev_tab_ui | |
| build_dev_tab_ui(c) | |