AvtnshM commited on
Commit
4924252
·
verified ·
1 Parent(s): 64a9e0f
Files changed (1) hide show
  1. app.py +21 -14
app.py CHANGED
@@ -1,12 +1,9 @@
1
  import time
2
  import os
3
  import evaluate
 
4
  from datasets import load_dataset
5
- from huggingface_hub import login
6
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
7
-
8
- # 🔑 Authenticate using HF_TOKEN secret
9
- login(token=os.environ.get("HF_TOKEN"))
10
 
11
  # -----------------
12
  # Load evaluation metrics
@@ -15,15 +12,12 @@ cer_metric = evaluate.load("cer")
15
 
16
  # -----------------
17
  # Small sample dataset for Hindi
18
- # (free Spaces can't handle large test sets)
19
  test_ds = load_dataset("mozilla-foundation/common_voice_11_0", "hi", split="test[:3]")
20
 
21
  # Extract references + audio
22
  refs = [x["sentence"] for x in test_ds]
23
  audio_data = [x["audio"]["array"] for x in test_ds]
24
 
25
- results = {}
26
-
27
  # -----------------
28
  # Helper to evaluate model
29
  def evaluate_model(model_name, pipeline_kwargs=None):
@@ -45,7 +39,6 @@ def evaluate_model(model_name, pipeline_kwargs=None):
45
  rtf = (end - start) / sum(len(a) / 16000 for a in audio_data)
46
 
47
  return {
48
- "Transcriptions": preds,
49
  "WER": wer_metric.compute(predictions=preds, references=refs),
50
  "CER": cer_metric.compute(predictions=preds, references=refs),
51
  "RTF": rtf
@@ -72,9 +65,23 @@ models = {
72
  }
73
 
74
  # -----------------
75
- # Run evaluations
76
- for label, cfg in models.items():
77
- print(f"Running {label}...")
78
- results[label] = evaluate_model(cfg["name"], cfg["pipeline_kwargs"])
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- print(results)
 
 
1
  import time
2
  import os
3
  import evaluate
4
+ import gradio as gr
5
  from datasets import load_dataset
6
+ from transformers import pipeline
 
 
 
 
7
 
8
  # -----------------
9
  # Load evaluation metrics
 
12
 
13
  # -----------------
14
  # Small sample dataset for Hindi
 
15
  test_ds = load_dataset("mozilla-foundation/common_voice_11_0", "hi", split="test[:3]")
16
 
17
  # Extract references + audio
18
  refs = [x["sentence"] for x in test_ds]
19
  audio_data = [x["audio"]["array"] for x in test_ds]
20
 
 
 
21
  # -----------------
22
  # Helper to evaluate model
23
  def evaluate_model(model_name, pipeline_kwargs=None):
 
39
  rtf = (end - start) / sum(len(a) / 16000 for a in audio_data)
40
 
41
  return {
 
42
  "WER": wer_metric.compute(predictions=preds, references=refs),
43
  "CER": cer_metric.compute(predictions=preds, references=refs),
44
  "RTF": rtf
 
65
  }
66
 
67
  # -----------------
68
+ # Gradio interface
69
+ def run_evaluations():
70
+ rows = []
71
+ for label, cfg in models.items():
72
+ res = evaluate_model(cfg["name"], cfg["pipeline_kwargs"])
73
+ if "Error" in res:
74
+ rows.append([label, res["Error"], "-", "-"])
75
+ else:
76
+ rows.append([label, f"{res['WER']:.3f}", f"{res['CER']:.3f}", f"{res['RTF']:.2f}"])
77
+ return rows
78
+
79
+ with gr.Blocks() as demo:
80
+ gr.Markdown("## ASR Benchmark Comparison (Hindi Sample)\nEvaluating **WER, CER, RTF** across models.")
81
+ btn = gr.Button("Run Evaluation")
82
+ table = gr.Dataframe(headers=["Model", "WER", "CER", "RTF"], datatype=["str", "str", "str", "str"], interactive=False)
83
+
84
+ btn.click(fn=run_evaluations, outputs=table)
85
 
86
+ if __name__ == "__main__":
87
+ demo.launch()