Spaces:

ZTXRiley
/

AUDIO_ONE

Sleeping

App Files Files Community

unknown commited on Jan 24

Commit

66849b2

1 Parent(s): b6be6a8

Dataset上传

Browse files

Files changed (1) hide show

app.py +93 -80

app.py CHANGED Viewed

@@ -433,78 +433,82 @@
 #     demo = build_app()
 #     demo.launch()
-import gradio as gr
 import numpy as np
-from datasets import load_dataset
-# ========== 工具函数 ==========
-def normalize_segments(segments):
-    out = []
-    for i, s in enumerate(segments):
-        out.append({
-            "row_id": s.get("index", i),
-            "start": float(s.get("start", 0.0)),
-            "end": float(s.get("end", 0.0)),
-            "dur": float(s.get("end", 0.0)) - float(s.get("start", 0.0)),
-            "status": s.get("status", ""),
-            "speaker": s.get("speaker", ""),
-            "gender": s.get("gender", ""),
-            "age_group": s.get("age_group", ""),
-            "emotion": s.get("emotion", ""),
-            "text": s.get("text", "") or "",
-        })
-    return out
-def slice_audio(audio, sr, start, end):
-    s = int(start * sr)
-    e = int(end * sr)
-    return sr, audio[s:e]
-# ========== Dataset 相关 ==========
-def load_dataset_meta(dataset_name, split):
-    ds = load_dataset(dataset_name, split=split)
-    return ds, len(ds)
-def load_sample(dataset_name, split, index):
-    ds = load_dataset(dataset_name, split=split)
-    sample = ds[int(index)]
-    # -------- audio --------
-    audio = sample.get("audio")
-    if isinstance(audio, dict) and "array" in audio:
-        audio_array = np.asarray(audio["array"], dtype=np.float32)
-        sr = audio["sampling_rate"]
-    else:
-        raise ValueError("audio 字段必须是 datasets Audio")
-    # -------- segments --------
-    if "segments" in sample:
-        segments = sample["segments"]
-    elif "transcript" in sample and "segments" in sample["transcript"]:
-        segments = sample["transcript"]["segments"]
-    else:
-        raise ValueError("未找到 segments")
-    segments = normalize_segments(segments)
-    return {
-        "audio": audio_array,
-        "sr": sr,
-        "segments": segments,
-        "sample_id": sample.get("id", index),
-    }
-# ========== Gradio 交互 ==========
-def on_load_sample(dataset_name, split, index):
-    state = load_sample(dataset_name, split, index)
     rows = [
         [
@@ -512,17 +516,22 @@ def on_load_sample(dataset_name, split, index):
             s["status"], s["speaker"], s["gender"],
             s["age_group"], s["emotion"], s["text"]
         ]
-        for s in state["segments"]
     ]
     info = (
-        f"**Dataset**: `{dataset_name}`  \n"
-        f"**Split**: `{split}`  \n"
-        f"**Sample**: `{state['sample_id']}`  \n"
-        f"**Segments**: {len(state['segments'])}  \n"
-        f"**Sample rate**: {state['sr']} Hz"
     )
     return state, rows, info
@@ -530,9 +539,7 @@ def on_select_segment(evt: gr.SelectData, state):
     row = evt.row_value
     start, end = float(row[1]), float(row[2])
-    sr, audio_seg = slice_audio(
-        state["audio"], state["sr"], start, end
-    )
     meta = (
         f"- **speaker**: {row[5]}\n"
@@ -544,22 +551,27 @@ def on_select_segment(evt: gr.SelectData, state):
     return (sr, audio_seg), meta, row[9]
-# ========== UI ==========
-with gr.Blocks(title="HF Dataset Audio Segment Explorer") as demo:
-    gr.Markdown("# 🎧 Hugging Face Dataset 音频分段可视化")
     state = gr.State()
-    with gr.Row():
-        dataset_name = gr.Textbox(
-            label="Dataset name",
-            value="your-username/your-dataset"
-        )
-        split = gr.Textbox(label="Split", value="train")
-        index = gr.Number(label="Sample index", value=0, precision=0)
-    load_btn = gr.Button("加载 Sample", variant="primary")
     info = gr.Markdown()
     df = gr.Dataframe(
@@ -568,20 +580,20 @@ with gr.Blocks(title="HF Dataset Audio Segment Explorer") as demo:
             "status", "speaker", "gender",
             "age_group", "emotion", "text"
         ],
-        interactive=False,
         wrap=True,
-        max_height=400,
     )
     with gr.Row():
         audio_out = gr.Audio(label="分段播放", type="numpy")
         meta = gr.Markdown()
-    text = gr.Textbox(label="转写文本", lines=4)
     load_btn.click(
-        on_load_sample,
-        inputs=[dataset_name, split, index],
         outputs=[state, df, info],
     )
@@ -592,3 +604,4 @@ with gr.Blocks(title="HF Dataset Audio Segment Explorer") as demo:
     )
 demo.launch()

 #     demo = build_app()
 #     demo.launch()
+import json
 import numpy as np
+import gradio as gr
+from huggingface_hub import hf_hub_download, list_repo_files
+import soundfile as sf
+# =====================
+# 固定配置（你的数据）
+# =====================
+REPO_ID = "AlexTYJ/Multilingual-ASR-Benchmark"
+AUDIO_DIR = "audio/testbatch/ARE"
+JSON_DIR = "text/ref/testbatch/ARE"
+# =====================
+# 工具函数
+# =====================
+def list_are_audio_files():
+    files = list_repo_files(REPO_ID)
+    audio_files = [
+        f for f in files
+        if f.startswith(AUDIO_DIR) and f.lower().endswith((".wav", ".mp3", ".flac"))
+    ]
+    audio_files.sort()
+    return audio_files
+def load_audio_and_json(audio_path):
+    # ---- 推导 json 路径 ----
+    filename = audio_path.split("/")[-1]
+    json_path = f"{JSON_DIR}/{filename.replace('.wav', '.json').replace('.mp3', '.json').replace('.flac', '.json')}"
+    # ---- 下载 ----
+    local_audio = hf_hub_download(REPO_ID, audio_path)
+    local_json = hf_hub_download(REPO_ID, json_path)
+    # ---- 读音频 ----
+    audio, sr = sf.read(local_audio)
+    if audio.ndim == 2:
+        audio = audio.mean(axis=1)
+    # ---- 读 JSON ----
+    with open(local_json, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    segments = []
+    for i, s in enumerate(data["segments"]):
+        segments.append({
+            "row_id": s.get("index", i),
+            "start": float(s["start"]),
+            "end": float(s["end"]),
+            "dur": float(s["end"] - s["start"]),
+            "status": s.get("status", ""),
+            "speaker": s.get("speaker", ""),
+            "gender": s.get("gender", ""),
+            "age_group": s.get("age_group", ""),
+            "emotion": s.get("emotion", ""),
+            "text": s.get("text", "") or "",
+        })
+    return audio, sr, segments, data.get("audio_name", filename)
+def slice_audio(audio, sr, start, end):
+    return sr, audio[int(start * sr): int(end * sr)]
+# =====================
+# Gradio 交互逻辑
+# =====================
+def on_select_file(audio_path):
+    audio, sr, segments, audio_name = load_audio_and_json(audio_path)
     rows = [
         [
             s["status"], s["speaker"], s["gender"],
             s["age_group"], s["emotion"], s["text"]
         ]
+        for s in segments
     ]
     info = (
+        f"**Repo**: `{REPO_ID}`  \n"
+        f"**Audio**: `{audio_name}`  \n"
+        f"**Segments**: {len(segments)}  \n"
+        f"**Sample rate**: {sr} Hz"
     )
+    state = {
+        "audio": audio,
+        "sr": sr,
+        "segments": segments
+    }
     return state, rows, info
     row = evt.row_value
     start, end = float(row[1]), float(row[2])
+    sr, audio_seg = slice_audio(state["audio"], state["sr"], start, end)
     meta = (
         f"- **speaker**: {row[5]}\n"
     return (sr, audio_seg), meta, row[9]
+# =====================
+# UI
+# =====================
+with gr.Blocks(title="ARE Audio Segment Explorer") as demo:
+    gr.Markdown(
+        "# 🎧 ARE 音频 & 字幕可视化（Hugging Face Dataset）\n"
+        "数据来源：`AlexTYJ/Multilingual-ASR-Benchmark`"
+    )
     state = gr.State()
+    audio_files = list_are_audio_files()
+    audio_selector = gr.Dropdown(
+        choices=audio_files,
+        label="选择音频文件（ARE）",
+        value=audio_files[0] if audio_files else None
+    )
+    load_btn = gr.Button("加载", variant="primary")
     info = gr.Markdown()
     df = gr.Dataframe(
             "status", "speaker", "gender",
             "age_group", "emotion", "text"
         ],
         wrap=True,
+        interactive=False,
+        max_height=420,
     )
     with gr.Row():
         audio_out = gr.Audio(label="分段播放", type="numpy")
         meta = gr.Markdown()
+    text = gr.Textbox(label="字幕文本", lines=4)
     load_btn.click(
+        on_select_file,
+        inputs=audio_selector,
         outputs=[state, df, info],
     )
     )
 demo.launch()