Spaces:

Futuresony
/

ASR

Sleeping

Futuresony commited on Feb 9, 2025

Commit

7e8eba3

verified ·

1 Parent(s): 070c6b0

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import torch
+import torchaudio
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+# Load MMS ASR model
+MODEL_NAME = "facebook/mms-1b-all"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME).to(device)
+asr_pipeline = pipeline("automatic-speech-recognition", model=model, processor=processor, torch_dtype=torch.float16, device=0 if device == "cuda" else -1)
+# Speech-to-text function
+def transcribe(audio):
+    waveform, sr = torchaudio.load(audio)
+    waveform = torchaudio.transforms.Resample(sr, 16000)(waveform)  # Ensure 16kHz sample rate
+    text = asr_pipeline({"array": waveform.squeeze().numpy(), "sampling_rate": 16000})["text"]
+    return text
+# Gradio UI
+gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(source="microphone", type="filepath"),
+    outputs=gr.Text(label="Transcription"),
+    title="Real-time Speech-to-Text",
+    description="Speak into your microphone and see the transcribed text.",
+).launch()