Devion333 commited on
Commit
1746625
·
verified ·
1 Parent(s): aad5e80

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["TRANSFORMERS_NO_TF"] = "1"
3
+
4
+ from transformers import pipeline
5
+ import gradio as gr
6
+ from evaluate import load
7
+
8
+ # Load WER metric
9
+ wer_metric = load("wer")
10
+
11
+ # Preload multiple ASR models for comparison
12
+ models = {
13
+ "Wav2Vec2 (Devion333)": pipeline(
14
+ task="automatic-speech-recognition",
15
+ model="Devion333/wav2vec2-xls-r-300m-dv"
16
+ ),
17
+ "Wav2Vec2 (Sammau)": pipeline(
18
+ task="automatic-speech-recognition",
19
+ model="Sammau/wav2vec2-large-xls-r-300m-dv-ng"
20
+ ),
21
+ "Wav2Vec2 (Alyaan)": pipeline(
22
+ task="automatic-speech-recognition",
23
+ model="shiimi/wav2vec2LM"
24
+ )
25
+ }
26
+
27
+ def transcribe(audio, chosen_models, reference):
28
+ results = {}
29
+ for model_name in chosen_models:
30
+ asr_pipe = models[model_name]
31
+ prediction = asr_pipe(audio)["text"]
32
+
33
+ if reference.strip():
34
+ # compute WER if reference provided
35
+ wer = wer_metric.compute(
36
+ predictions=[prediction.lower()],
37
+ references=[reference.lower()]
38
+ )
39
+ results[model_name] = {
40
+ "prediction": prediction,
41
+ "WER": round(wer, 3)
42
+ }
43
+ else:
44
+ results[model_name] = {
45
+ "prediction": prediction
46
+ }
47
+ return results
48
+
49
+ demo = gr.Interface(
50
+ fn=transcribe,
51
+ inputs=[
52
+ gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Speech"),
53
+ gr.CheckboxGroup(choices=list(models.keys()), value=["Wav2Vec2 (Devion333)"], label="Choose Models to Compare"),
54
+ gr.Textbox(label="Reference Transcript (optional)")
55
+ ],
56
+ outputs=gr.JSON(label="Transcriptions & Statistics"),
57
+ title="ASR Model Comparison",
58
+ description="Upload or record audio, select ASR models, and compare their transcriptions. Optionally, provide a reference transcript to calculate WER."
59
+ )
60
+
61
+ if name == "main":
62
+ demo.launch()