File size: 1,190 Bytes
e55df08
 
 
b8a7969
e55df08
d04a88d
b8a7969
e55df08
 
b8a7969
 
 
 
 
 
e55df08
 
b8a7969
 
e55df08
 
b8a7969
e55df08
b8a7969
 
 
 
 
 
 
e55df08
 
b8a7969
 
 
e55df08
 
 
 
 
 
 
b8a7969
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from transformers import pipeline

# 1. Build the ASR pipeline (English-only model)
asr_pipeline = pipeline(
    "automatic-speech-recognition",
    model="facebook/wav2vec2-base-960h"  # good English model
)

# 2. Transcription function using a file path
def transcribe(audio_path):
    """
    audio_path: path to a .wav file recorded by Gradio
    """
    if audio_path is None:
        return "No audio received."

    # pipeline can take a file path directly
    result = asr_pipeline(audio_path)
    return result["text"]

# 3. Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🎤 ASR Demo (Hugging Face Space)\nSpeak into your mic and get a transcript.")

    audio_input = gr.Audio(
        sources=["microphone"],
        type="filepath",      # <-- IMPORTANT: send a file path, not numpy
        format="wav",         # ensure WAV format (easier to decode)
        label="Record your voice"
    )

    transcribe_btn = gr.Button("Transcribe")
    output_text = gr.Textbox(label="Transcription")

    transcribe_btn.click(
        fn=transcribe,
        inputs=audio_input,
        outputs=output_text
    )

if __name__ == "__main__":
    demo.launch()