unknown commited on
Commit
66849b2
·
1 Parent(s): b6be6a8

Dataset上传

Browse files
Files changed (1) hide show
  1. app.py +93 -80
app.py CHANGED
@@ -433,78 +433,82 @@
433
  # demo = build_app()
434
  # demo.launch()
435
 
436
- import gradio as gr
437
  import numpy as np
438
- from datasets import load_dataset
439
 
440
- # ========== 工具函数 ==========
441
 
442
- def normalize_segments(segments):
443
- out = []
444
- for i, s in enumerate(segments):
445
- out.append({
446
- "row_id": s.get("index", i),
447
- "start": float(s.get("start", 0.0)),
448
- "end": float(s.get("end", 0.0)),
449
- "dur": float(s.get("end", 0.0)) - float(s.get("start", 0.0)),
450
- "status": s.get("status", ""),
451
- "speaker": s.get("speaker", ""),
452
- "gender": s.get("gender", ""),
453
- "age_group": s.get("age_group", ""),
454
- "emotion": s.get("emotion", ""),
455
- "text": s.get("text", "") or "",
456
- })
457
- return out
458
 
 
 
 
 
 
 
459
 
460
- def slice_audio(audio, sr, start, end):
461
- s = int(start * sr)
462
- e = int(end * sr)
463
- return sr, audio[s:e]
464
 
 
 
 
465
 
466
- # ========== Dataset 相关 ==========
 
 
 
 
 
 
 
467
 
468
- def load_dataset_meta(dataset_name, split):
469
- ds = load_dataset(dataset_name, split=split)
470
- return ds, len(ds)
471
 
 
 
 
 
472
 
473
- def load_sample(dataset_name, split, index):
474
- ds = load_dataset(dataset_name, split=split)
 
475
 
476
- sample = ds[int(index)]
 
 
 
477
 
478
- # -------- audio --------
479
- audio = sample.get("audio")
480
- if isinstance(audio, dict) and "array" in audio:
481
- audio_array = np.asarray(audio["array"], dtype=np.float32)
482
- sr = audio["sampling_rate"]
483
- else:
484
- raise ValueError("audio 字段必须是 datasets Audio")
 
 
 
 
 
 
 
 
 
 
 
485
 
486
- # -------- segments --------
487
- if "segments" in sample:
488
- segments = sample["segments"]
489
- elif "transcript" in sample and "segments" in sample["transcript"]:
490
- segments = sample["transcript"]["segments"]
491
- else:
492
- raise ValueError("未找到 segments")
493
 
494
- segments = normalize_segments(segments)
495
 
496
- return {
497
- "audio": audio_array,
498
- "sr": sr,
499
- "segments": segments,
500
- "sample_id": sample.get("id", index),
501
- }
502
 
503
 
504
- # ========== Gradio 交互 ==========
 
 
505
 
506
- def on_load_sample(dataset_name, split, index):
507
- state = load_sample(dataset_name, split, index)
508
 
509
  rows = [
510
  [
@@ -512,17 +516,22 @@ def on_load_sample(dataset_name, split, index):
512
  s["status"], s["speaker"], s["gender"],
513
  s["age_group"], s["emotion"], s["text"]
514
  ]
515
- for s in state["segments"]
516
  ]
517
 
518
  info = (
519
- f"**Dataset**: `{dataset_name}` \n"
520
- f"**Split**: `{split}` \n"
521
- f"**Sample**: `{state['sample_id']}` \n"
522
- f"**Segments**: {len(state['segments'])} \n"
523
- f"**Sample rate**: {state['sr']} Hz"
524
  )
525
 
 
 
 
 
 
 
526
  return state, rows, info
527
 
528
 
@@ -530,9 +539,7 @@ def on_select_segment(evt: gr.SelectData, state):
530
  row = evt.row_value
531
  start, end = float(row[1]), float(row[2])
532
 
533
- sr, audio_seg = slice_audio(
534
- state["audio"], state["sr"], start, end
535
- )
536
 
537
  meta = (
538
  f"- **speaker**: {row[5]}\n"
@@ -544,22 +551,27 @@ def on_select_segment(evt: gr.SelectData, state):
544
  return (sr, audio_seg), meta, row[9]
545
 
546
 
547
- # ========== UI ==========
 
 
548
 
549
- with gr.Blocks(title="HF Dataset Audio Segment Explorer") as demo:
550
- gr.Markdown("# 🎧 Hugging Face Dataset 音频分段可视化")
 
 
 
551
 
552
  state = gr.State()
553
 
554
- with gr.Row():
555
- dataset_name = gr.Textbox(
556
- label="Dataset name",
557
- value="your-username/your-dataset"
558
- )
559
- split = gr.Textbox(label="Split", value="train")
560
- index = gr.Number(label="Sample index", value=0, precision=0)
561
-
562
- load_btn = gr.Button("加载 Sample", variant="primary")
563
  info = gr.Markdown()
564
 
565
  df = gr.Dataframe(
@@ -568,20 +580,20 @@ with gr.Blocks(title="HF Dataset Audio Segment Explorer") as demo:
568
  "status", "speaker", "gender",
569
  "age_group", "emotion", "text"
570
  ],
571
- interactive=False,
572
  wrap=True,
573
- max_height=400,
 
574
  )
575
 
576
  with gr.Row():
577
  audio_out = gr.Audio(label="分段播放", type="numpy")
578
  meta = gr.Markdown()
579
 
580
- text = gr.Textbox(label="转写文本", lines=4)
581
 
582
  load_btn.click(
583
- on_load_sample,
584
- inputs=[dataset_name, split, index],
585
  outputs=[state, df, info],
586
  )
587
 
@@ -592,3 +604,4 @@ with gr.Blocks(title="HF Dataset Audio Segment Explorer") as demo:
592
  )
593
 
594
  demo.launch()
 
 
433
  # demo = build_app()
434
  # demo.launch()
435
 
436
+ import json
437
  import numpy as np
438
+ import gradio as gr
439
 
440
+ from huggingface_hub import hf_hub_download, list_repo_files
441
 
442
+ import soundfile as sf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
+ # =====================
445
+ # 固定配置(你的数据)
446
+ # =====================
447
+ REPO_ID = "AlexTYJ/Multilingual-ASR-Benchmark"
448
+ AUDIO_DIR = "audio/testbatch/ARE"
449
+ JSON_DIR = "text/ref/testbatch/ARE"
450
 
 
 
 
 
451
 
452
+ # =====================
453
+ # 工具函数
454
+ # =====================
455
 
456
+ def list_are_audio_files():
457
+ files = list_repo_files(REPO_ID)
458
+ audio_files = [
459
+ f for f in files
460
+ if f.startswith(AUDIO_DIR) and f.lower().endswith((".wav", ".mp3", ".flac"))
461
+ ]
462
+ audio_files.sort()
463
+ return audio_files
464
 
 
 
 
465
 
466
+ def load_audio_and_json(audio_path):
467
+ # ---- 推导 json 路径 ----
468
+ filename = audio_path.split("/")[-1]
469
+ json_path = f"{JSON_DIR}/{filename.replace('.wav', '.json').replace('.mp3', '.json').replace('.flac', '.json')}"
470
 
471
+ # ---- 下载 ----
472
+ local_audio = hf_hub_download(REPO_ID, audio_path)
473
+ local_json = hf_hub_download(REPO_ID, json_path)
474
 
475
+ # ---- 读音频 ----
476
+ audio, sr = sf.read(local_audio)
477
+ if audio.ndim == 2:
478
+ audio = audio.mean(axis=1)
479
 
480
+ # ---- JSON ----
481
+ with open(local_json, "r", encoding="utf-8") as f:
482
+ data = json.load(f)
483
+
484
+ segments = []
485
+ for i, s in enumerate(data["segments"]):
486
+ segments.append({
487
+ "row_id": s.get("index", i),
488
+ "start": float(s["start"]),
489
+ "end": float(s["end"]),
490
+ "dur": float(s["end"] - s["start"]),
491
+ "status": s.get("status", ""),
492
+ "speaker": s.get("speaker", ""),
493
+ "gender": s.get("gender", ""),
494
+ "age_group": s.get("age_group", ""),
495
+ "emotion": s.get("emotion", ""),
496
+ "text": s.get("text", "") or "",
497
+ })
498
 
499
+ return audio, sr, segments, data.get("audio_name", filename)
 
 
 
 
 
 
500
 
 
501
 
502
+ def slice_audio(audio, sr, start, end):
503
+ return sr, audio[int(start * sr): int(end * sr)]
 
 
 
 
504
 
505
 
506
+ # =====================
507
+ # Gradio 交互逻辑
508
+ # =====================
509
 
510
+ def on_select_file(audio_path):
511
+ audio, sr, segments, audio_name = load_audio_and_json(audio_path)
512
 
513
  rows = [
514
  [
 
516
  s["status"], s["speaker"], s["gender"],
517
  s["age_group"], s["emotion"], s["text"]
518
  ]
519
+ for s in segments
520
  ]
521
 
522
  info = (
523
+ f"**Repo**: `{REPO_ID}` \n"
524
+ f"**Audio**: `{audio_name}` \n"
525
+ f"**Segments**: {len(segments)} \n"
526
+ f"**Sample rate**: {sr} Hz"
 
527
  )
528
 
529
+ state = {
530
+ "audio": audio,
531
+ "sr": sr,
532
+ "segments": segments
533
+ }
534
+
535
  return state, rows, info
536
 
537
 
 
539
  row = evt.row_value
540
  start, end = float(row[1]), float(row[2])
541
 
542
+ sr, audio_seg = slice_audio(state["audio"], state["sr"], start, end)
 
 
543
 
544
  meta = (
545
  f"- **speaker**: {row[5]}\n"
 
551
  return (sr, audio_seg), meta, row[9]
552
 
553
 
554
+ # =====================
555
+ # UI
556
+ # =====================
557
 
558
+ with gr.Blocks(title="ARE Audio Segment Explorer") as demo:
559
+ gr.Markdown(
560
+ "# 🎧 ARE 音频 & 字幕可视化(Hugging Face Dataset)\n"
561
+ "数据来源:`AlexTYJ/Multilingual-ASR-Benchmark`"
562
+ )
563
 
564
  state = gr.State()
565
 
566
+ audio_files = list_are_audio_files()
567
+
568
+ audio_selector = gr.Dropdown(
569
+ choices=audio_files,
570
+ label="选择音频文件(ARE)",
571
+ value=audio_files[0] if audio_files else None
572
+ )
573
+
574
+ load_btn = gr.Button("加载", variant="primary")
575
  info = gr.Markdown()
576
 
577
  df = gr.Dataframe(
 
580
  "status", "speaker", "gender",
581
  "age_group", "emotion", "text"
582
  ],
 
583
  wrap=True,
584
+ interactive=False,
585
+ max_height=420,
586
  )
587
 
588
  with gr.Row():
589
  audio_out = gr.Audio(label="分段播放", type="numpy")
590
  meta = gr.Markdown()
591
 
592
+ text = gr.Textbox(label="字幕文本", lines=4)
593
 
594
  load_btn.click(
595
+ on_select_file,
596
+ inputs=audio_selector,
597
  outputs=[state, df, info],
598
  )
599
 
 
604
  )
605
 
606
  demo.launch()
607
+