V10.0
Browse files
app.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
| 1 |
import time
|
| 2 |
import os
|
| 3 |
import evaluate
|
|
|
|
| 4 |
from datasets import load_dataset
|
| 5 |
-
from
|
| 6 |
-
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
| 7 |
-
|
| 8 |
-
# 🔑 Authenticate using HF_TOKEN secret
|
| 9 |
-
login(token=os.environ.get("HF_TOKEN"))
|
| 10 |
|
| 11 |
# -----------------
|
| 12 |
# Load evaluation metrics
|
|
@@ -15,15 +12,12 @@ cer_metric = evaluate.load("cer")
|
|
| 15 |
|
| 16 |
# -----------------
|
| 17 |
# Small sample dataset for Hindi
|
| 18 |
-
# (free Spaces can't handle large test sets)
|
| 19 |
test_ds = load_dataset("mozilla-foundation/common_voice_11_0", "hi", split="test[:3]")
|
| 20 |
|
| 21 |
# Extract references + audio
|
| 22 |
refs = [x["sentence"] for x in test_ds]
|
| 23 |
audio_data = [x["audio"]["array"] for x in test_ds]
|
| 24 |
|
| 25 |
-
results = {}
|
| 26 |
-
|
| 27 |
# -----------------
|
| 28 |
# Helper to evaluate model
|
| 29 |
def evaluate_model(model_name, pipeline_kwargs=None):
|
|
@@ -45,7 +39,6 @@ def evaluate_model(model_name, pipeline_kwargs=None):
|
|
| 45 |
rtf = (end - start) / sum(len(a) / 16000 for a in audio_data)
|
| 46 |
|
| 47 |
return {
|
| 48 |
-
"Transcriptions": preds,
|
| 49 |
"WER": wer_metric.compute(predictions=preds, references=refs),
|
| 50 |
"CER": cer_metric.compute(predictions=preds, references=refs),
|
| 51 |
"RTF": rtf
|
|
@@ -72,9 +65,23 @@ models = {
|
|
| 72 |
}
|
| 73 |
|
| 74 |
# -----------------
|
| 75 |
-
#
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
| 1 |
import time
|
| 2 |
import os
|
| 3 |
import evaluate
|
| 4 |
+
import gradio as gr
|
| 5 |
from datasets import load_dataset
|
| 6 |
+
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# -----------------
|
| 9 |
# Load evaluation metrics
|
|
|
|
| 12 |
|
| 13 |
# -----------------
|
| 14 |
# Small sample dataset for Hindi
|
|
|
|
| 15 |
test_ds = load_dataset("mozilla-foundation/common_voice_11_0", "hi", split="test[:3]")
|
| 16 |
|
| 17 |
# Extract references + audio
|
| 18 |
refs = [x["sentence"] for x in test_ds]
|
| 19 |
audio_data = [x["audio"]["array"] for x in test_ds]
|
| 20 |
|
|
|
|
|
|
|
| 21 |
# -----------------
|
| 22 |
# Helper to evaluate model
|
| 23 |
def evaluate_model(model_name, pipeline_kwargs=None):
|
|
|
|
| 39 |
rtf = (end - start) / sum(len(a) / 16000 for a in audio_data)
|
| 40 |
|
| 41 |
return {
|
|
|
|
| 42 |
"WER": wer_metric.compute(predictions=preds, references=refs),
|
| 43 |
"CER": cer_metric.compute(predictions=preds, references=refs),
|
| 44 |
"RTF": rtf
|
|
|
|
| 65 |
}
|
| 66 |
|
| 67 |
# -----------------
|
| 68 |
+
# Gradio interface
|
| 69 |
+
def run_evaluations():
|
| 70 |
+
rows = []
|
| 71 |
+
for label, cfg in models.items():
|
| 72 |
+
res = evaluate_model(cfg["name"], cfg["pipeline_kwargs"])
|
| 73 |
+
if "Error" in res:
|
| 74 |
+
rows.append([label, res["Error"], "-", "-"])
|
| 75 |
+
else:
|
| 76 |
+
rows.append([label, f"{res['WER']:.3f}", f"{res['CER']:.3f}", f"{res['RTF']:.2f}"])
|
| 77 |
+
return rows
|
| 78 |
+
|
| 79 |
+
with gr.Blocks() as demo:
|
| 80 |
+
gr.Markdown("## ASR Benchmark Comparison (Hindi Sample)\nEvaluating **WER, CER, RTF** across models.")
|
| 81 |
+
btn = gr.Button("Run Evaluation")
|
| 82 |
+
table = gr.Dataframe(headers=["Model", "WER", "CER", "RTF"], datatype=["str", "str", "str", "str"], interactive=False)
|
| 83 |
+
|
| 84 |
+
btn.click(fn=run_evaluations, outputs=table)
|
| 85 |
|
| 86 |
+
if __name__ == "__main__":
|
| 87 |
+
demo.launch()
|