Spaces:

sifujohn
/

transcribe

Sleeping

App Files Files Community

sifujohn commited on Jul 31, 2025

Commit

1a5e2a2

verified ·

1 Parent(s): 1e10d8f

Create app.py

Browse files

Files changed (1) hide show

app.py +58 -0

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import gradio as gr
+import torch
+from transformers import pipeline
+from transformers.pipelines.audio_utils import ffmpeg_read
+import time
+import os
+# Choose an open-source model (English only or multilingual)
+MODEL_NAME = "openai/whisper-small"  # or try "distil-whisper/distil-small.en"
+BATCH_SIZE = 8
+YT_LENGTH_LIMIT_S = 3600
+device = 0 if torch.cuda.is_available() else "cpu"
+# Load open-source model
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    chunk_length_s=30,
+    device=device,
+)
+# Transcribe function
+def transcribe(audio_path, task="transcribe"):
+    if audio_path is None or not os.path.exists(audio_path):
+        raise gr.Error("Invalid file path.")
+    # Read the audio file using ffmpeg_read
+    audio_array = ffmpeg_read(audio_path, pipe.feature_extractor.sampling_rate)
+    # Ensure the audio data is in the correct format
+    inputs = {"array": audio_array, "sampling_rate": pipe.feature_extractor.sampling_rate}
+    # Transcribe the audio
+    result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
+    return result["text"]
+# Wrapper for file uploads
+def handle_audio(audio_path, task):
+    try:
+        return transcribe(audio_path, task)
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎙️ Free Whisper Speech-to-Text App\nPowered by Open Source Whisper from Hugging Face.")
+    with gr.Tabs():
+        with gr.Tab("🎧 Upload Audio"):
+            with gr.Row():
+                audio_input = gr.Audio(type="filepath", label="Upload audio file")
+                task_option = gr.Radio(["transcribe", "translate"], value="transcribe", label="Choose Task")
+            transcribe_btn = gr.Button("Transcribe")
+            result = gr.Textbox(label="📝 Transcription", lines=8)
+            transcribe_btn.click(handle_audio, inputs=[audio_input, task_option], outputs=result)
+demo.launch()