Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from faster_whisper import WhisperModel | |
| import torch | |
| import os | |
| import warnings | |
| # Suppress Hugging Face Hub unauthenticated requests warning | |
| warnings.filterwarnings("ignore", message="You are sending unauthenticated requests") | |
| # --- Configuration --- | |
| # You can change the model size here (tiny, base, small, medium, large-v2, large-v3) | |
| # The user specifically requested "tiny" (guillaumekln/faster-whisper-tiny equivalent) | |
| MODEL_SIZE = "tiny" | |
| # Check for CUDA availability | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Use float16 only if on CUDA, otherwise int8 or float32 for CPU | |
| compute_type = "float16" if device == "cuda" else "int8" | |
| # Global model variable | |
| model = None | |
| def get_model(): | |
| global model | |
| if model is None: | |
| print(f"Initializing Faster Whisper Model: {MODEL_SIZE}") | |
| print(f"Device: {device}, Compute Type: {compute_type}") | |
| try: | |
| model = WhisperModel(MODEL_SIZE, device=device, compute_type=compute_type) | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| print("Attempting to load on CPU with int8...") | |
| model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8") | |
| return model | |
| # --- Language Options --- | |
| # A selection of common languages supported by Whisper | |
| LANGUAGES = [ | |
| "Auto-Detect", | |
| "Bengali (bn)", | |
| "English (en)", | |
| "Hindi (hi)", | |
| "Chinese (zh)", | |
| "Spanish (es)", | |
| "French (fr)", | |
| "German (de)", | |
| "Japanese (ja)", | |
| "Russian (ru)", | |
| "Portuguese (pt)", | |
| "Arabic (ar)", | |
| "Urdu (ur)", | |
| "Italian (it)", | |
| "Korean (ko)", | |
| "Turkish (tr)", | |
| "Polish (pl)", | |
| "Dutch (nl)", | |
| "Thai (th)", | |
| "Vietnamese (vi)", | |
| "Indonesian (id)" | |
| ] | |
| def format_timestamp(seconds): | |
| """Formats seconds into MM:SS.ms""" | |
| minutes = int(seconds // 60) | |
| secs = seconds % 60 | |
| return f"{minutes:02d}:{secs:05.2f}" | |
| def transcribe_audio(audio_path, language, beam_size, vad_filter): | |
| """ | |
| Transcribes the given audio file using Faster Whisper. | |
| Yields segments as they are processed for a real-time effect. | |
| """ | |
| if not audio_path: | |
| yield "Please upload or record an audio file first.", "Waiting..." | |
| return | |
| # Parse language code | |
| lang_code = None | |
| if language and language != "Auto-Detect": | |
| # Extracts 'bn' from 'Bengali (bn)' | |
| try: | |
| lang_code = language.split("(")[-1].strip(")") | |
| except: | |
| lang_code = None | |
| print(f"Transcribing {audio_path} with language={lang_code}, beam_size={beam_size}, vad={vad_filter}") | |
| try: | |
| # Get the model (loads only once) | |
| whisper_model = get_model() | |
| segments, info = whisper_model.transcribe( | |
| audio_path, | |
| language=lang_code, | |
| beam_size=int(beam_size), | |
| vad_filter=vad_filter | |
| ) | |
| detected_lang_info = f"Detected Language: {info.language} (Prob: {info.language_probability:.2f})" | |
| full_transcript = "" | |
| current_text = "" | |
| # Iterate over segments generator | |
| for segment in segments: | |
| start_fmt = format_timestamp(segment.start) | |
| end_fmt = format_timestamp(segment.end) | |
| # Format: [00:00.00 -> 00:05.00] Text | |
| segment_text = f"[{start_fmt} -> {end_fmt}] {segment.text}" | |
| full_transcript += segment_text + "\n" | |
| # Yielding the updated transcript and status | |
| yield full_transcript, f"{detected_lang_info} | Processing segment endings at {end_fmt}s" | |
| yield full_transcript, f"{detected_lang_info} | Completed" | |
| except Exception as e: | |
| yield f"Error during transcription: {str(e)}", "Error" | |
| # --- Gradio UI --- | |
| theme = gr.themes.Soft(primary_hue="blue", neutral_hue="slate") | |
| with gr.Blocks(title="Faster Whisper Tiny Demo") as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown( | |
| """ | |
| # 🎙️ Faster Whisper Tiny STT Demo | |
| ### Bengali & Multilingual Support | বাংলা এবং বহুভাষিক সমর্থন | |
| This Space uses the `faster-whisper` library with the **'tiny'** model for fast and efficient speech-to-text transcription. | |
| Run entirely on CPU/GPU seamlessly. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Audio Input: allow file upload and microphone | |
| audio_input = gr.Audio( | |
| sources=["upload", "microphone"], | |
| type="filepath", | |
| label="Audio Input (Audio File or Microphone) | অডিও ইনপুট" | |
| ) | |
| with gr.Accordion("Advanced Settings | উন্নত সেটিংস", open=True): | |
| language_dropdown = gr.Dropdown( | |
| choices=LANGUAGES, | |
| value="Auto-Detect", | |
| label="Language | ভাষা", | |
| info="Select 'Auto-Detect' or specify a language." | |
| ) | |
| beam_size_slider = gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| step=1, | |
| value=5, | |
| label="Beam Size", | |
| info="Higher values search more paths (slower but potentially more accurate)." | |
| ) | |
| vad_filter_checkbox = gr.Checkbox( | |
| value=True, | |
| label="VAD Filter", | |
| info="Filter out silence using Voice Activity Detection." | |
| ) | |
| transcribe_btn = gr.Button("Transcribe Audio | প্রতিলিপি করুন", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| status_output = gr.Textbox(label="Status | অবস্থা", interactive=False) | |
| transcript_output = gr.Textbox( | |
| label="Transcription Output | প্রতিলিপি ফলাফল", | |
| lines=20, | |
| max_lines=30, | |
| placeholder="Transcription will appear here..." | |
| ) | |
| # Event Handlers | |
| transcribe_btn.click( | |
| fn=transcribe_audio, | |
| inputs=[audio_input, language_dropdown, beam_size_slider, vad_filter_checkbox], | |
| outputs=[transcript_output, status_output] | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **Note:** The model downloads automatically on the first run. | |
| Powered by [faster-whisper](https://github.com/guillaumekln/faster-whisper) and Hugging Face Spaces. | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |