Spaces:
Sleeping
Sleeping
File size: 1,785 Bytes
1746625 dbc1dc3 1746625 7ea0519 1746625 1cf42e4 1746625 1cf42e4 1746625 3a893b4 1746625 f7003ba 1746625 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"
from transformers import pipeline
import gradio as gr
from evaluate import load
# Load WER metric
wer_metric = load("wer")
# Preload multiple ASR models for comparison
models = {
"Wav2Vec2": pipeline(
task="automatic-speech-recognition",
model="Devion333/wav2vec2-xls-r-300m-dv"
),
"Whisper small": pipeline(
task="automatic-speech-recognition",
model="Devion333/whisper-small-dv-syn"
),
}
def transcribe(audio, chosen_models, reference):
results = {}
for model_name in chosen_models:
asr_pipe = models[model_name]
prediction = asr_pipe(audio)["text"]
if reference.strip():
# compute WER if reference provided
wer = wer_metric.compute(
predictions=[prediction.lower()],
references=[reference.lower()]
)
results[model_name] = {
"prediction": prediction,
"WER": round(wer, 3)
}
else:
results[model_name] = {
"prediction": prediction
}
return results
demo = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Speech"),
gr.CheckboxGroup(choices=list(models.keys()), value=["Wav2Vec2"], label="Choose Models to Compare"),
gr.Textbox(label="Reference Transcript (optional)")
],
outputs=gr.JSON(label="Transcriptions & Statistics"),
title="ASR Model Comparison",
description="Upload or record audio, select ASR models, and compare their transcriptions. Optionally, provide a reference transcript to calculate WER."
)
if __name__ == "__main__":
demo.launch() |