Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py
|
| 2 |
|
| 3 |
import os
|
| 4 |
import json
|
|
@@ -12,14 +12,6 @@ import librosa # pip install librosa
|
|
| 12 |
# --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
|
| 13 |
from datasets import Dataset, Features, Value, Audio, load_dataset
|
| 14 |
|
| 15 |
-
# Optional but recommended for better jiwer performance
|
| 16 |
-
# pip install python-Levenshtein
|
| 17 |
-
try:
|
| 18 |
-
from jiwer import compute_measures, wer as jiwer_wer, cer as jiwer_cer
|
| 19 |
-
HAS_JIWER = True
|
| 20 |
-
except Exception:
|
| 21 |
-
HAS_JIWER = False
|
| 22 |
-
|
| 23 |
# -------- CONFIG: Hub dataset target (no persistent storage needed) --------
|
| 24 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
|
| 25 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
@@ -39,21 +31,12 @@ HF_FEATURES = Features({
|
|
| 39 |
"decode_params": Value("string"),
|
| 40 |
|
| 41 |
"transcript_hyp": Value("string"),
|
| 42 |
-
"reference_text": Value("string"),
|
| 43 |
"corrected_text": Value("string"),
|
| 44 |
|
| 45 |
"latency_ms": Value("int32"),
|
| 46 |
"rtf": Value("float32"),
|
| 47 |
|
| 48 |
-
"wer": Value("float32"),
|
| 49 |
-
"cer": Value("float32"),
|
| 50 |
-
"subs": Value("int32"),
|
| 51 |
-
"ins": Value("int32"),
|
| 52 |
-
"dels": Value("int32"),
|
| 53 |
-
|
| 54 |
"score_out_of_10": Value("int32"),
|
| 55 |
-
"feedback_text": Value("string"),
|
| 56 |
-
"tags": Value("string"),
|
| 57 |
"share_publicly": Value("bool"),
|
| 58 |
})
|
| 59 |
|
|
@@ -82,9 +65,9 @@ def _push_row_to_hf_dataset(row, audio_file_path):
|
|
| 82 |
except Exception:
|
| 83 |
return None
|
| 84 |
|
| 85 |
-
for k in ["
|
| 86 |
example[k] = _to_int(example.get(k))
|
| 87 |
-
for k in ["
|
| 88 |
example[k] = _to_float(example.get(k))
|
| 89 |
|
| 90 |
ds = Dataset.from_list([example], features=HF_FEATURES)
|
|
@@ -138,7 +121,7 @@ language_models = {
|
|
| 138 |
"Pidgin": "FarmerlineML/pidgin_nigerian",
|
| 139 |
"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu",
|
| 140 |
"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1",
|
| 141 |
-
|
| 142 |
}
|
| 143 |
|
| 144 |
# -------- Lazy-load pipeline cache (Space-safe) --------
|
|
@@ -187,21 +170,6 @@ def _model_revision_from_pipeline(pipe) -> str:
|
|
| 187 |
except Exception:
|
| 188 |
return "unknown"
|
| 189 |
|
| 190 |
-
def _compute_metrics(hyp: str, ref_or_corrected: str):
|
| 191 |
-
if not HAS_JIWER or not ref_or_corrected or not hyp:
|
| 192 |
-
return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
|
| 193 |
-
try:
|
| 194 |
-
measures = compute_measures(ref_or_corrected, hyp)
|
| 195 |
-
return {
|
| 196 |
-
"wer": measures.get("wer"),
|
| 197 |
-
"cer": jiwer_cer(ref_or_corrected, hyp),
|
| 198 |
-
"subs": measures.get("substitutions"),
|
| 199 |
-
"ins": measures.get("insertions"),
|
| 200 |
-
"dels": measures.get("deletions"),
|
| 201 |
-
}
|
| 202 |
-
except Exception:
|
| 203 |
-
return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
|
| 204 |
-
|
| 205 |
# -------- Inference --------
|
| 206 |
def transcribe(audio_path: str, language: str):
|
| 207 |
"""
|
|
@@ -241,40 +209,23 @@ def transcribe(audio_path: str, language: str):
|
|
| 241 |
}
|
| 242 |
return hyp_text, meta
|
| 243 |
|
| 244 |
-
# -------- Feedback submit --------
|
| 245 |
-
def submit_feedback(meta,
|
| 246 |
-
tags, store_audio, share_publicly, audio_file_path):
|
| 247 |
"""
|
| 248 |
-
|
| 249 |
-
No
|
| 250 |
"""
|
| 251 |
if not meta:
|
| 252 |
return {"status": "No transcription metadata available. Please transcribe first."}
|
| 253 |
|
| 254 |
-
ref_for_metrics = (reference_text or "").strip()
|
| 255 |
-
corrected_text = (corrected_text or "").strip()
|
| 256 |
-
if not ref_for_metrics and corrected_text:
|
| 257 |
-
ref_for_metrics = corrected_text
|
| 258 |
-
|
| 259 |
-
metrics = _compute_metrics(meta.get("transcript_hyp", ""), ref_for_metrics)
|
| 260 |
-
|
| 261 |
row = dict(meta)
|
| 262 |
row.update({
|
| 263 |
-
"
|
| 264 |
-
"corrected_text": corrected_text or "",
|
| 265 |
-
"wer": metrics["wer"],
|
| 266 |
-
"cer": metrics["cer"],
|
| 267 |
-
"subs": metrics["subs"],
|
| 268 |
-
"ins": metrics["ins"],
|
| 269 |
-
"dels": metrics["dels"],
|
| 270 |
"score_out_of_10": int(score) if score is not None else None,
|
| 271 |
-
"feedback_text": feedback_text or "",
|
| 272 |
-
"tags": json.dumps({"labels": tags or []}),
|
| 273 |
"share_publicly": bool(share_publicly),
|
| 274 |
})
|
| 275 |
|
| 276 |
try:
|
| 277 |
-
# Use the temporary upload path from Gradio iff the user consented
|
| 278 |
audio_to_push = audio_file_path if store_audio else None
|
| 279 |
hf_status = _push_row_to_hf_dataset(row, audio_to_push)
|
| 280 |
status = f"Feedback saved. {hf_status}"
|
|
@@ -283,15 +234,11 @@ def submit_feedback(meta, reference_text, corrected_text, score, feedback_text,
|
|
| 283 |
|
| 284 |
return {
|
| 285 |
"status": status,
|
| 286 |
-
"wer": row["wer"],
|
| 287 |
-
"cer": row["cer"],
|
| 288 |
-
"subs": row["subs"],
|
| 289 |
-
"ins": row["ins"],
|
| 290 |
-
"dels": row["dels"],
|
| 291 |
"latency_ms": row["latency_ms"],
|
| 292 |
"rtf": row["rtf"],
|
| 293 |
"model_id": row["model_id"],
|
| 294 |
-
"model_revision": row["model_revision"]
|
|
|
|
| 295 |
}
|
| 296 |
|
| 297 |
# -------- UI (original preserved; additions appended) --------
|
|
@@ -331,27 +278,18 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
|
|
| 331 |
# Pre-fill corrected with hypothesis for easy edits
|
| 332 |
return hyp, meta, hyp
|
| 333 |
|
| 334 |
-
# --- Evaluation
|
| 335 |
-
with gr.Accordion("Evaluation
|
| 336 |
-
with gr.Row():
|
| 337 |
-
reference_tb = gr.Textbox(label="Reference text (optional)", lines=4, value="")
|
| 338 |
with gr.Row():
|
| 339 |
corrected_tb = gr.Textbox(label="Corrected transcript (optional)", lines=4, value="")
|
| 340 |
with gr.Row():
|
| 341 |
score_slider = gr.Slider(minimum=0, maximum=10, step=1, label="Score out of 10", value=7)
|
| 342 |
-
with gr.Row():
|
| 343 |
-
feedback_tb = gr.Textbox(label="Feedback (what went right/wrong?)", lines=3, value="")
|
| 344 |
-
with gr.Row():
|
| 345 |
-
tags_cb = gr.CheckboxGroup(
|
| 346 |
-
["noisy", "far-field", "code-switching", "numbers-heavy", "named-entities", "read-speech", "spontaneous", "call-center", "voicenote"],
|
| 347 |
-
label="Slice tags (select any that apply)"
|
| 348 |
-
)
|
| 349 |
with gr.Row():
|
| 350 |
store_audio_cb = gr.Checkbox(label="Allow storing my audio for research/eval", value=False)
|
| 351 |
share_cb = gr.Checkbox(label="Allow sharing this example publicly", value=False)
|
| 352 |
|
| 353 |
-
submit_btn = gr.Button("Submit
|
| 354 |
-
results_json = gr.JSON(label="
|
| 355 |
|
| 356 |
# Wire events
|
| 357 |
btn.click(
|
|
@@ -364,11 +302,8 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
|
|
| 364 |
fn=submit_feedback,
|
| 365 |
inputs=[
|
| 366 |
meta_state,
|
| 367 |
-
reference_tb,
|
| 368 |
corrected_tb,
|
| 369 |
score_slider,
|
| 370 |
-
feedback_tb,
|
| 371 |
-
tags_cb,
|
| 372 |
store_audio_cb,
|
| 373 |
share_cb,
|
| 374 |
audio # raw file path from gr.Audio
|
|
|
|
| 1 |
+
# app.py (simplified: no WER/CER, minimal feedback)
|
| 2 |
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
| 12 |
# --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
|
| 13 |
from datasets import Dataset, Features, Value, Audio, load_dataset
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# -------- CONFIG: Hub dataset target (no persistent storage needed) --------
|
| 16 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
|
| 17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
| 31 |
"decode_params": Value("string"),
|
| 32 |
|
| 33 |
"transcript_hyp": Value("string"),
|
|
|
|
| 34 |
"corrected_text": Value("string"),
|
| 35 |
|
| 36 |
"latency_ms": Value("int32"),
|
| 37 |
"rtf": Value("float32"),
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
"score_out_of_10": Value("int32"),
|
|
|
|
|
|
|
| 40 |
"share_publicly": Value("bool"),
|
| 41 |
})
|
| 42 |
|
|
|
|
| 65 |
except Exception:
|
| 66 |
return None
|
| 67 |
|
| 68 |
+
for k in ["latency_ms", "score_out_of_10", "sample_rate"]:
|
| 69 |
example[k] = _to_int(example.get(k))
|
| 70 |
+
for k in ["rtf", "audio_duration_s"]:
|
| 71 |
example[k] = _to_float(example.get(k))
|
| 72 |
|
| 73 |
ds = Dataset.from_list([example], features=HF_FEATURES)
|
|
|
|
| 121 |
"Pidgin": "FarmerlineML/pidgin_nigerian",
|
| 122 |
"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu",
|
| 123 |
"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1",
|
| 124 |
+
"Krio": "FarmerlineML/w2v-bert-2.0_krio_v3"
|
| 125 |
}
|
| 126 |
|
| 127 |
# -------- Lazy-load pipeline cache (Space-safe) --------
|
|
|
|
| 170 |
except Exception:
|
| 171 |
return "unknown"
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
# -------- Inference --------
|
| 174 |
def transcribe(audio_path: str, language: str):
|
| 175 |
"""
|
|
|
|
| 209 |
}
|
| 210 |
return hyp_text, meta
|
| 211 |
|
| 212 |
+
# -------- Feedback submit (minimal) --------
|
| 213 |
+
def submit_feedback(meta, corrected_text, score, store_audio, share_publicly, audio_file_path):
|
|
|
|
| 214 |
"""
|
| 215 |
+
Push a minimal row to HF Dataset: model info, language, transcript, optional corrected text, score.
|
| 216 |
+
No WER/CER computations.
|
| 217 |
"""
|
| 218 |
if not meta:
|
| 219 |
return {"status": "No transcription metadata available. Please transcribe first."}
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
row = dict(meta)
|
| 222 |
row.update({
|
| 223 |
+
"corrected_text": (corrected_text or "").strip(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
"score_out_of_10": int(score) if score is not None else None,
|
|
|
|
|
|
|
| 225 |
"share_publicly": bool(share_publicly),
|
| 226 |
})
|
| 227 |
|
| 228 |
try:
|
|
|
|
| 229 |
audio_to_push = audio_file_path if store_audio else None
|
| 230 |
hf_status = _push_row_to_hf_dataset(row, audio_to_push)
|
| 231 |
status = f"Feedback saved. {hf_status}"
|
|
|
|
| 234 |
|
| 235 |
return {
|
| 236 |
"status": status,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
"latency_ms": row["latency_ms"],
|
| 238 |
"rtf": row["rtf"],
|
| 239 |
"model_id": row["model_id"],
|
| 240 |
+
"model_revision": row["model_revision"],
|
| 241 |
+
"language": row["language_display"],
|
| 242 |
}
|
| 243 |
|
| 244 |
# -------- UI (original preserved; additions appended) --------
|
|
|
|
| 278 |
# Pre-fill corrected with hypothesis for easy edits
|
| 279 |
return hyp, meta, hyp
|
| 280 |
|
| 281 |
+
# --- Minimal Evaluation (score + optional corrected text) ---
|
| 282 |
+
with gr.Accordion("Evaluation", open=False):
|
|
|
|
|
|
|
| 283 |
with gr.Row():
|
| 284 |
corrected_tb = gr.Textbox(label="Corrected transcript (optional)", lines=4, value="")
|
| 285 |
with gr.Row():
|
| 286 |
score_slider = gr.Slider(minimum=0, maximum=10, step=1, label="Score out of 10", value=7)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
with gr.Row():
|
| 288 |
store_audio_cb = gr.Checkbox(label="Allow storing my audio for research/eval", value=False)
|
| 289 |
share_cb = gr.Checkbox(label="Allow sharing this example publicly", value=False)
|
| 290 |
|
| 291 |
+
submit_btn = gr.Button("Submit")
|
| 292 |
+
results_json = gr.JSON(label="Status")
|
| 293 |
|
| 294 |
# Wire events
|
| 295 |
btn.click(
|
|
|
|
| 302 |
fn=submit_feedback,
|
| 303 |
inputs=[
|
| 304 |
meta_state,
|
|
|
|
| 305 |
corrected_tb,
|
| 306 |
score_slider,
|
|
|
|
|
|
|
| 307 |
store_audio_cb,
|
| 308 |
share_cb,
|
| 309 |
audio # raw file path from gr.Audio
|