Spaces:

idofgaurav
/

audio

Sleeping

idofgaurav commited on Nov 16, 2025

Commit

b8a7969

verified ·

1 Parent(s): d04a88d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,47 +1,43 @@
 import gradio as gr
 from transformers import pipeline
-# 1. Load the ASR pipeline (this will download the model the first time)
 asr_pipeline = pipeline(
     "automatic-speech-recognition",
-    model="facebook/wav2vec2-base-960h"
 )
-# 2. Define the transcription function
-def transcribe(audio):
-    if audio is None:
         return "No audio received."
-    sr, data = audio
-    result = asr_pipeline({"array": data, "sampling_rate": sr})
     return result["text"]
-# 3. Build the Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎤 Browser ASR Demo\nRecord your voice and get the transcription.")
-    with gr.Row():
-        audio_input = gr.Audio(
-            sources=["microphone"],  # enables browser mic recording
-            type="numpy",           # gives (sample_rate, data) to the function
-            label="Record or upload audio"
-        )
-    transcribe_btn = gr.Button("Transcribe")
-    output_text = gr.Textbox(
-        label="Transcription",
-        placeholder="Your transcript will appear here..."
     )
-    # Link button to function
     transcribe_btn.click(
         fn=transcribe,
         inputs=audio_input,
         outputs=output_text
     )
-# 4. Launch app locally
 if __name__ == "__main__":
-    demo.launch(True)

 import gradio as gr
 from transformers import pipeline
+# 1. Build the ASR pipeline (English-only model)
 asr_pipeline = pipeline(
     "automatic-speech-recognition",
+    model="facebook/wav2vec2-base-960h"  # good English model
 )
+# 2. Transcription function using a file path
+def transcribe(audio_path):
+    """
+    audio_path: path to a .wav file recorded by Gradio
+    """
+    if audio_path is None:
         return "No audio received."
+    # pipeline can take a file path directly
+    result = asr_pipeline(audio_path)
     return result["text"]
+# 3. Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎤 ASR Demo (Hugging Face Space)\nSpeak into your mic and get a transcript.")
+    audio_input = gr.Audio(
+        sources=["microphone"],
+        type="filepath",      # <-- IMPORTANT: send a file path, not numpy
+        format="wav",         # ensure WAV format (easier to decode)
+        label="Record your voice"
     )
+    transcribe_btn = gr.Button("Transcribe")
+    output_text = gr.Textbox(label="Transcription")
     transcribe_btn.click(
         fn=transcribe,
         inputs=audio_input,
         outputs=output_text
     )
 if __name__ == "__main__":
+    demo.launch()