Spaces:

Hadi32
/

faster-whisper-tiny

Sleeping

App Files Files Community

Hadi32 commited on Feb 3

Commit

dca6b45

verified ·

1 Parent(s): 9612a6d

Upload 2 files

Browse files

Files changed (2) hide show

app.py +189 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import gradio as gr
+from faster_whisper import WhisperModel
+import torch
+import os
+# --- Configuration ---
+# You can change the model size here (tiny, base, small, medium, large-v2, large-v3)
+# The user specifically requested "tiny" (guillaumekln/faster-whisper-tiny equivalent)
+MODEL_SIZE = "tiny"
+# Check for CUDA availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Use float16 only if on CUDA, otherwise int8 or float32 for CPU
+compute_type = "float16" if device == "cuda" else "int8"
+print(f"Initializing Faster Whisper Model: {MODEL_SIZE}")
+print(f"Device: {device}, Compute Type: {compute_type}")
+# Load the model.
+# download_root is not specified, so it defaults to the user's cache directory
+# (which persists in HF Spaces if caching is enabled, or redownloads if ephemeral)
+try:
+    model = WhisperModel(MODEL_SIZE, device=device, compute_type=compute_type)
+except Exception as e:
+    print(f"Error loading model: {e}")
+    print("Attempting to load on CPU with int8...")
+    model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
+# --- Language Options ---
+# A selection of common languages supported by Whisper
+LANGUAGES = [
+    "Auto-Detect",
+    "Bengali (bn)",
+    "English (en)",
+    "Hindi (hi)",
+    "Chinese (zh)",
+    "Spanish (es)",
+    "French (fr)",
+    "German (de)",
+    "Japanese (ja)",
+    "Russian (ru)",
+    "Portuguese (pt)",
+    "Arabic (ar)",
+    "Urdu (ur)",
+    "Italian (it)",
+    "Korean (ko)",
+    "Turkish (tr)",
+    "Polish (pl)",
+    "Dutch (nl)",
+    "Thai (th)",
+    "Vietnamese (vi)",
+    "Indonesian (id)"
+]
+def format_timestamp(seconds):
+    """Formats seconds into MM:SS.ms"""
+    minutes = int(seconds // 60)
+    secs = seconds % 60
+    return f"{minutes:02d}:{secs:05.2f}"
+def transcribe_audio(audio_path, language, beam_size, vad_filter):
+    """
+    Transcribes the given audio file using Faster Whisper.
+    Yields segments as they are processed for a real-time effect.
+    """
+    if not audio_path:
+        yield "Please upload or record an audio file first.", "Waiting..."
+        return
+    # Parse language code
+    lang_code = None
+    if language and language != "Auto-Detect":
+        # Extracts 'bn' from 'Bengali (bn)'
+        try:
+            lang_code = language.split("(")[-1].strip(")")
+        except:
+            lang_code = None
+    print(f"Transcribing {audio_path} with language={lang_code}, beam_size={beam_size}, vad={vad_filter}")
+    try:
+        segments, info = model.transcribe(
+            audio_path,
+            language=lang_code,
+            beam_size=int(beam_size),
+            vad_filter=vad_filter
+        )
+        detected_lang_info = f"Detected Language: {info.language} (Prob: {info.language_probability:.2f})"
+        full_transcript = ""
+        current_text = ""
+        # Iterate over segments generator
+        for segment in segments:
+            start_fmt = format_timestamp(segment.start)
+            end_fmt = format_timestamp(segment.end)
+            # Format: [00:00.00 -> 00:05.00] Text
+            segment_text = f"[{start_fmt} -> {end_fmt}] {segment.text}"
+            full_transcript += segment_text + "\n"
+            # Yielding the updated transcript and status
+            yield full_transcript, f"{detected_lang_info} | Processing segment endings at {end_fmt}s"
+        yield full_transcript, f"{detected_lang_info} | Completed"
+    except Exception as e:
+        yield f"Error during transcription: {str(e)}", "Error"
+# --- Gradio UI ---
+theme = gr.themes.Soft(primary_hue="blue", neutral_hue="slate")
+with gr.Blocks(theme=theme, title="Faster Whisper Tiny Demo") as demo:
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown(
+                """
+                # 🎙️ Faster Whisper Tiny STT Demo
+                ### Bengali & Multilingual Support | বাংলা এবং বহুভাষিক সমর্থন
+                This Space uses the `faster-whisper` library with the **'tiny'** model for fast and efficient speech-to-text transcription.
+                Run entirely on CPU/GPU seamlessly.
+                """
+            )
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Audio Input: allow file upload and microphone
+            audio_input = gr.Audio(
+                sources=["upload", "microphone"],
+                type="filepath",
+                label="Audio Input (Audio File or Microphone) | অডিও ইনপুট"
+            )
+            with gr.Accordion("Advanced Settings | উন্নত সেটিংস", open=True):
+                language_dropdown = gr.Dropdown(
+                    choices=LANGUAGES,
+                    value="Auto-Detect",
+                    label="Language | ভাষা",
+                    info="Select 'Auto-Detect' or specify a language."
+                )
+                beam_size_slider = gr.Slider(
+                    minimum=1,
+                    maximum=10,
+                    step=1,
+                    value=5,
+                    label="Beam Size",
+                    info="Higher values search more paths (slower but potentially more accurate)."
+                )
+                vad_filter_checkbox = gr.Checkbox(
+                    value=True,
+                    label="VAD Filter",
+                    info="Filter out silence using Voice Activity Detection."
+                )
+            transcribe_btn = gr.Button("Transcribe Audio | প্রতিলিপি করুন", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            status_output = gr.Textbox(label="Status | অবস্থা", interactive=False)
+            transcript_output = gr.Textbox(
+                label="Transcription Output | প্রতিলিপি ফলাফল",
+                show_copy_button=True,
+                lines=20,
+                max_lines=30,
+                placeholder="Transcription will appear here..."
+            )
+    # Event Handlers
+    transcribe_btn.click(
+        fn=transcribe_audio,
+        inputs=[audio_input, language_dropdown, beam_size_slider, vad_filter_checkbox],
+        outputs=[transcript_output, status_output]
+    )
+    gr.Markdown(
+        """
+        ---
+        **Note:** The model downloads automatically on the first run.
+        Powered by [faster-whisper](https://github.com/guillaumekln/faster-whisper) and Hugging Face Spaces.
+        """
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+faster-whisper
+torch