Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import time | |
| import threading | |
| import traceback | |
| import queue | |
| from moonshine_voice import ( | |
| Transcriber, | |
| load_wav_file, | |
| TranscriptEventListener, | |
| get_model_for_language, | |
| string_to_model_arch, | |
| ModelArch, | |
| ) | |
| STREAMING_MODELS = { | |
| "medium-streaming": "Medium Streaming (245M params)", | |
| "small-streaming": "Small Streaming (123M params)", | |
| "tiny-streaming": "Tiny Streaming (34M params)", | |
| } | |
| NON_STREAMING_MODELS = { | |
| "base": "Base (61M params)", | |
| "tiny": "Tiny (39M params)", | |
| } | |
| STREAMING_CHOICES = [(v, k) for k, v in STREAMING_MODELS.items()] | |
| NON_STREAMING_CHOICES = [(v, k) for k, v in NON_STREAMING_MODELS.items()] | |
| ALL_CHOICES = STREAMING_CHOICES + NON_STREAMING_CHOICES | |
| ALL_MODELS = list(STREAMING_MODELS.keys()) + list(NON_STREAMING_MODELS.keys()) | |
| # Preload all English models at startup | |
| print("=" * 60) | |
| print(" Preloading all English Moonshine models...") | |
| print("=" * 60) | |
| transcriber_cache: dict[str, Transcriber] = {} | |
| for model_name in ALL_MODELS: | |
| print(f" -> Loading '{model_name}'...") | |
| arch = string_to_model_arch(model_name) | |
| model_path, model_arch = get_model_for_language("en", arch) | |
| transcriber_cache[model_name] = Transcriber( | |
| model_path=model_path, model_arch=model_arch | |
| ) | |
| print(f" OK '{model_name}' ready") | |
| print("=" * 60) | |
| print(" All models loaded!") | |
| print("=" * 60) | |
| ASSETS_DIR = os.path.join(os.path.dirname(__file__), "assets") | |
| EXAMPLE_AUDIO = os.path.join(ASSETS_DIR, "Aiden.wav") | |
| # --------------------------------------------------------------------------- | |
| # Queue system β serializes transcription requests (critical for 2 vCPU) | |
| # --------------------------------------------------------------------------- | |
| transcription_queue: queue.Queue = queue.Queue() | |
| stop_event = threading.Event() # raised to cancel current job | |
| active_transcriber: Transcriber | None = None # so stop can call .stop() | |
| active_transcriber_lock = threading.Lock() | |
| queue_position_lock = threading.Lock() | |
| current_queue_size = 0 # approximate position indicator | |
| job_active = False # is a transcription currently running? | |
| def request_generation_stop(): | |
| """Signal a request to stop the current generation.""" | |
| stop_event.set() | |
| with active_transcriber_lock: | |
| if active_transcriber is not None: | |
| try: | |
| active_transcriber.stop() | |
| except Exception: | |
| pass | |
| return gr.update(interactive=False) | |
| def update_model_choices(mode): | |
| """Return ONLY the relevant models for the selected mode.""" | |
| if mode == "Streaming": | |
| return gr.Dropdown(choices=STREAMING_CHOICES, value="tiny-streaming") | |
| else: | |
| return gr.Dropdown(choices=NON_STREAMING_CHOICES, value="tiny") | |
| def transcribe(audio_path, mode, model_name): | |
| """Run transcription with queue system and stop support.""" | |
| global active_transcriber, current_queue_size, job_active | |
| if audio_path is None: | |
| raise gr.Error("Please upload an audio file.") | |
| try: | |
| audio_data, sample_rate = load_wav_file(audio_path) | |
| except Exception as e: | |
| raise gr.Error(f"Error loading audio: {e}") | |
| transcriber = transcriber_cache.get(model_name) | |
| if transcriber is None: | |
| raise gr.Error(f"Model '{model_name}' not loaded.") | |
| # --- Queue gate: wait for our turn --- | |
| ticket = threading.Event() | |
| with queue_position_lock: | |
| current_queue_size += 1 | |
| pos = current_queue_size | |
| if not job_active: | |
| # No one is running β we go immediately | |
| job_active = True | |
| ticket.set() | |
| else: | |
| # Someone is running β queue up | |
| transcription_queue.put(ticket) | |
| if pos > 1 and not ticket.is_set(): | |
| yield f"β³ Queued β position {pos - 1} in line. Please wait..." | |
| # Block until it's our turn | |
| while not ticket.wait(timeout=0.5): | |
| if stop_event.is_set(): | |
| with queue_position_lock: | |
| current_queue_size = max(0, current_queue_size - 1) | |
| yield "π Cancelled while queued." | |
| return | |
| # Reset stop event for this job | |
| stop_event.clear() | |
| with active_transcriber_lock: | |
| active_transcriber = transcriber | |
| is_streaming = mode == "Streaming" | |
| try: | |
| if not is_streaming: | |
| yield "β³ Transcribing..." | |
| if stop_event.is_set(): | |
| yield "π Stopped." | |
| return | |
| transcript = transcriber.transcribe_without_streaming( | |
| audio_data, sample_rate=sample_rate, flags=0 | |
| ) | |
| if stop_event.is_set(): | |
| yield "π Stopped." | |
| return | |
| lines = [] | |
| for line in transcript.lines: | |
| end = line.start_time + line.duration | |
| lines.append(f"[{line.start_time:.2f}s β {end:.2f}s] {line.text}") | |
| yield "\n".join(lines) if lines else "No speech detected." | |
| else: | |
| yield "β³ Streaming..." | |
| transcriber.start() | |
| completed_lines: list[str] = [] | |
| current_partial = "" | |
| class _Listener(TranscriptEventListener): | |
| def on_line_started(self, event): | |
| nonlocal current_partial | |
| current_partial = ( | |
| f"βΊ {event.line.start_time:.2f}s: {event.line.text}" | |
| ) | |
| def on_line_text_changed(self, event): | |
| nonlocal current_partial | |
| current_partial = ( | |
| f"βΊ {event.line.start_time:.2f}s: {event.line.text}" | |
| ) | |
| def on_line_completed(self, event): | |
| nonlocal current_partial | |
| completed_lines.append( | |
| f"β {event.line.start_time:.2f}s: {event.line.text}" | |
| ) | |
| current_partial = "" | |
| listener = _Listener() | |
| transcriber.remove_all_listeners() | |
| transcriber.add_listener(listener) | |
| chunk_duration = 0.25 | |
| chunk_size = int(chunk_duration * sample_rate) | |
| for i in range(0, len(audio_data), chunk_size): | |
| if stop_event.is_set(): | |
| display = "\n".join(completed_lines) | |
| display += "\nπ Stopped." | |
| yield display | |
| try: | |
| transcriber.stop() | |
| except Exception: | |
| pass | |
| break | |
| chunk = audio_data[i : i + chunk_size] | |
| transcriber.add_audio(chunk, sample_rate) | |
| time.sleep(0.05) | |
| display = "\n".join(completed_lines) | |
| if current_partial: | |
| display += "\n" + current_partial | |
| yield display.strip() or "β³ Streaming..." | |
| else: | |
| # Normal completion (loop didn't break) | |
| transcriber.stop() | |
| time.sleep(0.5) | |
| display = "\n".join(completed_lines) | |
| if current_partial: | |
| display += "\n" + current_partial | |
| yield display.strip() or "No speech detected." | |
| except gr.Error: | |
| raise | |
| except Exception as e: | |
| full_error = traceback.format_exc() | |
| print(f"Unexpected error: {full_error}") | |
| raise gr.Error(f"An unexpected error occurred: {str(e)}") | |
| finally: | |
| with active_transcriber_lock: | |
| active_transcriber = None | |
| with queue_position_lock: | |
| current_queue_size = max(0, current_queue_size - 1) | |
| # Release next job in the queue | |
| try: | |
| next_ticket = transcription_queue.get_nowait() | |
| next_ticket.set() | |
| except queue.Empty: | |
| with queue_position_lock: | |
| job_active = False | |
| # Load custom theme with fallback | |
| try: | |
| theme_path = os.path.join(ASSETS_DIR, "theme.json") | |
| theme = gr.Theme.load(theme_path) | |
| except Exception as e: | |
| print(f"Warning: Could not load custom theme: {e}. Using default Soft theme.") | |
| theme = gr.themes.Soft() | |
| css = """ | |
| footer {visibility: hidden} | |
| .gradio-container { | |
| max-width: 100% !important; | |
| padding: 0 !important; | |
| } | |
| .header-section { | |
| text-align: left; | |
| margin-bottom: 0; | |
| } | |
| #app-header { | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| #app-header > div { | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| .logo-container { | |
| display: flex; | |
| justify-content: flex-start; | |
| align-items: center; | |
| gap: 8px; | |
| margin-bottom: 0; | |
| } | |
| .logo-img { | |
| height: 34px; | |
| border-radius: 8px; | |
| } | |
| .main-title { | |
| color: #2c8afa; | |
| font-weight: 800; | |
| font-size: 1.7rem; | |
| margin: 0; | |
| } | |
| .description { | |
| max-width: 900px; | |
| margin: 0; | |
| font-size: 0.9rem; | |
| line-height: 1.35; | |
| color: #4b5563; | |
| } | |
| .links-row { | |
| display: flex; | |
| flex-wrap: wrap; | |
| justify-content: flex-start; | |
| gap: 8px; | |
| margin: 0; | |
| font-size: 0.85rem; | |
| } | |
| .links-row a { | |
| color: #2c8afa; | |
| text-decoration: none; | |
| padding: 3px 12px; | |
| border: 1px solid #2c8afa; | |
| border-radius: 15px; | |
| transition: all 0.2s; | |
| white-space: nowrap; | |
| } | |
| .links-row a:hover { | |
| background-color: #2c8afa; | |
| color: white; | |
| } | |
| .disclaimer { | |
| text-align: center; | |
| font-size: 0.8rem; | |
| color: #9ca3af; | |
| margin-top: 30px; | |
| padding: 20px; | |
| border: 2px dashed #4b5563; | |
| border-radius: 12px; | |
| } | |
| #app-disclaimer { | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| #app-disclaimer > div { | |
| margin: 0 !important; | |
| } | |
| #app-disclaimer .html-container { | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| #app-disclaimer .html-container .disclaimer { | |
| margin-left: 0 !important; | |
| } | |
| .social-handles { | |
| display: flex; | |
| justify-content: center; | |
| gap: 20px; | |
| margin: 15px 0; | |
| } | |
| .social-icon { | |
| width: 28px; | |
| height: 28px; | |
| transition: all 0.3s ease; | |
| } | |
| .social-icon:hover { | |
| transform: scale(1.1) translateY(-3px); | |
| } | |
| #transcription-mode .wrap { | |
| display: flex !important; | |
| flex-direction: row !important; | |
| width: 100% !important; | |
| } | |
| #transcription-mode .wrap label { | |
| flex: 1 !important; | |
| justify-content: center !important; | |
| text-align: center !important; | |
| } | |
| """ | |
| with gr.Blocks(css=css, theme=theme) as demo: | |
| with gr.Column(elem_classes="header-section"): | |
| gr.HTML(""" | |
| <div style="gap: 12px; display: flex; flex-direction: column; align-items: flex-start;"> | |
| <div class="logo-container"> | |
| <img src="https://raw.githubusercontent.com/moonshine-ai/moonshine/main/images/logo.png" class="logo-img" alt="Moonshine Web Logo"> | |
| <h1 class="main-title">Moonshine ASR</h1> | |
| </div> | |
| <div class="description"> | |
| <b>Fast, accurate, on-device speech recognition.</b><br> | |
| Moonshine delivers real-time transcription on edge devices — from laptops to Raspberry Pi. | |
| </div> | |
| <div class="links-row"> | |
| <a href="https://github.com/moonshine-ai/moonshine" target="_blank">β Star on GitHub</a> | |
| </div> | |
| </div> | |
| """, elem_id="app-header") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| audio_file = gr.Audio( | |
| sources=["upload", "microphone"], | |
| type="filepath", | |
| label="Upload Audio (.wav)", | |
| ) | |
| mode_radio = gr.Radio( | |
| choices=["Streaming", "Non-Streaming"], | |
| value="Streaming", | |
| label="Transcription Mode", | |
| elem_id="transcription-mode", | |
| ) | |
| model_dropdown = gr.Dropdown( | |
| choices=ALL_CHOICES, | |
| value="tiny-streaming", | |
| label="Select from Moonshine Models", | |
| ) | |
| mode_radio.change( | |
| fn=update_model_choices, | |
| inputs=mode_radio, | |
| outputs=model_dropdown, | |
| ) | |
| with gr.Row(): | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
| transcribe_btn = gr.Button("β‘ Transcribe", variant="primary") | |
| stop_btn = gr.Button("π΄ Stop", variant="stop", visible=False) | |
| with gr.Column(scale=1): | |
| output_text = gr.Textbox(label="Transcription Output", lines=6) | |
| gr.Examples( | |
| examples=[ | |
| [EXAMPLE_AUDIO, "Streaming", "medium-streaming"], | |
| [EXAMPLE_AUDIO, "Streaming", "small-streaming"], | |
| [EXAMPLE_AUDIO, "Streaming", "tiny-streaming"], | |
| [EXAMPLE_AUDIO, "Non-Streaming", "base"], | |
| [EXAMPLE_AUDIO, "Non-Streaming", "tiny"], | |
| ], | |
| inputs=[audio_file, mode_radio, model_dropdown], | |
| ) | |
| gr.HTML(""" | |
| <div class="disclaimer"> | |
| <div class="social-handles"> | |
| <a href="https://github.com/D3vShoaib" target="_blank" style="color: inherit;" aria-label="GitHub"> | |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="24" height="24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/></svg> | |
| </a> | |
| <a href="https://linkedin.com/in/D3vShoaib" target="_blank" style="color: inherit;" aria-label="LinkedIn"> | |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="24" height="24" fill="currentColor"><path d="M19 0h-14c-2.761 0-5 2.239-5 5v14c0 2.761 2.239 5 5 5h14c2.762 0 5-2.239 5-5v-14c0-2.761-2.238-5-5-5zm-11 19h-3v-11h3v11zm-1.5-12.268c-.966 0-1.75-.79-1.75-1.764s.784-1.764 1.75-1.764 1.75.79 1.75 1.764-.783 1.764-1.75 1.764zm13.5 12.268h-3v-5.604c0-3.368-4-3.113-4 0v5.604h-3v-11h3v1.765c1.396-2.586 7-2.777 7 2.476v6.759z"/></svg> | |
| </a> | |
| <a href="https://twitter.com/D3vShoaib" target="_blank" style="color: inherit;" aria-label="Twitter"> | |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="24" height="24" fill="currentColor"><path d="M13.682 10.621L20.216 3h-1.549l-5.674 6.624-4.53-6.624H2.433l6.85 10.007-6.85 7.993h1.549l6.014-7.022 4.811 7.022h6.03L13.68 10.62zm-2.091 2.441l-.683-.98L5.342 4.144H7.72l4.475 6.417.683.981 5.8 8.32h-2.378l-4.71-6.8z"/></svg> | |
| </a> | |
| <a href="https://instagram.com/d3vshoaib" target="_blank" style="color: inherit;" aria-label="Instagram"> | |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="24" height="24" fill="currentColor"><path d="M12 2.163c3.204 0 3.584.012 4.85.07 3.252.148 4.771 1.691 4.919 4.919.058 1.265.069 1.645.069 4.849 0 3.205-.012 3.584-.069 4.849-.149 3.225-1.664 4.771-4.919 4.919-1.266.058-1.644.07-4.85.07-3.204 0-3.584-.012-4.849-.07-3.26-.149-4.771-1.699-4.919-4.92-.058-1.265-.07-1.644-.07-4.849 0-3.204.013-3.583.07-4.849.149-3.227 1.664-4.771 4.919-4.919 1.266-.057 1.645-.069 4.849-.069zm0-2.163c-3.259 0-3.667.014-4.947.072-4.358.2-6.78 2.618-6.98 6.98-.059 1.281-.073 1.689-.073 4.948 0 3.259.014 3.668.072 4.948.2 4.358 2.618 6.78 6.98 6.98 1.281.058 1.689.072 4.948.072 3.259 0 3.668-.014 4.948-.072 4.354-.2 6.782-2.618 6.979-6.98.059-1.28.073-1.689.073-4.948 0-3.259-.014-3.667-.072-4.947-.196-4.354-2.617-6.78-6.979-6.98-1.281-.059-1.69-.073-4.949-.073zm0 5.838c-3.403 0-6.162 2.759-6.162 6.162s2.759 6.163 6.162 6.163 6.162-2.759 6.162-6.163c0-3.403-2.759-6.162-6.162-6.162zm0 10.162c-2.209 0-4-1.79-4-4 0-2.209 1.791-4 4-4s4 1.791 4 4c0 2.21-1.791 4-4 4zm6.406-11.845c-.796 0-1.441.645-1.441 1.44s.645 1.44 1.441 1.44c.795 0 1.439-.645 1.439-1.44s-.644-1.44-1.439-1.44z"/></svg> | |
| </a> | |
| </div> | |
| <p>Built with β€οΈ by <a href="https://github.com/D3vShoaib" style="color: #2c8afa; text-decoration: none; font-weight: 500;">D3vShoaib</a></p> | |
| <p>β οΈ I am not associated with Moonshine and this is only for demonstration purposes.</p> | |
| </div> | |
| """, elem_id="app-disclaimer") | |
| # UI state management functions | |
| def switch_to_generating_state(): | |
| return ( | |
| gr.update(visible=False), # Hide transcribe button | |
| gr.update(visible=True, interactive=True), # Show stop button | |
| ) | |
| def switch_to_idle_state(): | |
| return ( | |
| gr.update(visible=True), # Show transcribe button | |
| gr.update(visible=False), # Hide stop button | |
| ) | |
| # Event handlers | |
| transcribe_event = ( | |
| transcribe_btn.click( | |
| fn=switch_to_generating_state, outputs=[transcribe_btn, stop_btn] | |
| ) | |
| .then( | |
| fn=transcribe, | |
| inputs=[audio_file, mode_radio, model_dropdown], | |
| outputs=output_text, | |
| ) | |
| .then(fn=switch_to_idle_state, outputs=[transcribe_btn, stop_btn]) | |
| ) | |
| # Stop button handler β cancels the Gradio event AND sets the Python stop flag | |
| stop_btn.click( | |
| fn=request_generation_stop, outputs=[stop_btn], cancels=[transcribe_event] | |
| ).then(fn=switch_to_idle_state, outputs=[transcribe_btn, stop_btn]) | |
| # Clear button handler | |
| def perform_clear_action(): | |
| return ( | |
| None, # audio_file | |
| "Streaming", # mode_radio | |
| "tiny-streaming", # model_dropdown | |
| "", # output_text | |
| ) | |
| clear_btn.click( | |
| fn=perform_clear_action, | |
| outputs=[audio_file, mode_radio, model_dropdown, output_text], | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(default_concurrency_limit=1).launch( | |
| theme=theme, css=css, allowed_paths=[ASSETS_DIR] | |
| ) | |