Spaces:
Sleeping
Sleeping
unknown commited on
Commit ·
fa56eeb
1
Parent(s): 50d1858
play19
Browse files
app.py
CHANGED
|
@@ -628,6 +628,388 @@
|
|
| 628 |
# if __name__ == "__main__":
|
| 629 |
# demo.launch()
|
| 630 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 631 |
import re
|
| 632 |
from dataclasses import dataclass
|
| 633 |
from typing import Any, Dict, List, Tuple, Optional
|
|
@@ -1009,4 +1391,3 @@ with gr.Blocks(title="双语音频字幕对齐(点击即播放)") as demo:
|
|
| 1009 |
|
| 1010 |
if __name__ == "__main__":
|
| 1011 |
demo.launch()
|
| 1012 |
-
|
|
|
|
| 628 |
# if __name__ == "__main__":
|
| 629 |
# demo.launch()
|
| 630 |
|
| 631 |
+
# import re
|
| 632 |
+
# from dataclasses import dataclass
|
| 633 |
+
# from typing import Any, Dict, List, Tuple, Optional
|
| 634 |
+
|
| 635 |
+
# import gradio as gr
|
| 636 |
+
# from huggingface_hub import list_repo_files, hf_hub_download
|
| 637 |
+
# from pydub import AudioSegment
|
| 638 |
+
# import numpy as np
|
| 639 |
+
|
| 640 |
+
# # =========================================================
|
| 641 |
+
# # Config
|
| 642 |
+
# # =========================================================
|
| 643 |
+
# MEDIA_EXTS = (".mp4", ".m4a", ".mp3", ".wav", ".flac", ".ogg", ".aac", ".mov", ".avi")
|
| 644 |
+
# VTT_EXTS = (".vtt",)
|
| 645 |
+
|
| 646 |
+
# DEFAULT_MAX_MID_DIFF = 1.5
|
| 647 |
+
|
| 648 |
+
# # Normalize audio for stable playback in browsers
|
| 649 |
+
# TARGET_SR = 48000
|
| 650 |
+
# TARGET_CH = 1 # mono
|
| 651 |
+
# TARGET_SW = 2 # 16-bit PCM
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
# # =========================================================
|
| 655 |
+
# # Data structures
|
| 656 |
+
# # =========================================================
|
| 657 |
+
# @dataclass
|
| 658 |
+
# class Cue:
|
| 659 |
+
# start: float
|
| 660 |
+
# end: float
|
| 661 |
+
# text: str
|
| 662 |
+
|
| 663 |
+
|
| 664 |
+
# # =========================================================
|
| 665 |
+
# # VTT parsing
|
| 666 |
+
# # =========================================================
|
| 667 |
+
# _TAG_RE = re.compile(r"</?[^>]+?>", re.IGNORECASE)
|
| 668 |
+
# _VTT_TIME_RE = re.compile(
|
| 669 |
+
# r"(?P<start>\d{2}:\d{2}:\d{2}\.\d{3}|\d{1,2}:\d{2}\.\d{3})\s*-->\s*"
|
| 670 |
+
# r"(?P<end>\d{2}:\d{2}:\d{2}\.\d{3}|\d{1,2}:\d{2}\.\d{3})"
|
| 671 |
+
# )
|
| 672 |
+
|
| 673 |
+
|
| 674 |
+
# def _strip_tags(text: str) -> str:
|
| 675 |
+
# return _TAG_RE.sub("", text).strip()
|
| 676 |
+
|
| 677 |
+
|
| 678 |
+
# def _time_to_seconds(t: str) -> float:
|
| 679 |
+
# parts = t.split(":")
|
| 680 |
+
# if len(parts) == 3:
|
| 681 |
+
# return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
|
| 682 |
+
# if len(parts) == 2:
|
| 683 |
+
# return int(parts[0]) * 60 + float(parts[1])
|
| 684 |
+
# raise ValueError(f"Bad VTT timestamp: {t}")
|
| 685 |
+
|
| 686 |
+
|
| 687 |
+
# def parse_vtt_file(path: str) -> List[Cue]:
|
| 688 |
+
# with open(path, "r", encoding="utf-8") as f:
|
| 689 |
+
# content = f.read()
|
| 690 |
+
|
| 691 |
+
# # Remove BOM / WEBVTT header (if any)
|
| 692 |
+
# content = content.replace("\ufeff", "")
|
| 693 |
+
# content = re.sub(r"^\s*WEBVTT.*?\n", "", content, flags=re.IGNORECASE)
|
| 694 |
+
|
| 695 |
+
# blocks = re.split(r"\r?\n\r?\n", content.strip())
|
| 696 |
+
# cues: List[Cue] = []
|
| 697 |
+
|
| 698 |
+
# for block in blocks:
|
| 699 |
+
# lines = [l.strip() for l in block.splitlines() if l.strip()]
|
| 700 |
+
# if not lines:
|
| 701 |
+
# continue
|
| 702 |
+
|
| 703 |
+
# # Locate the timestamp line (must contain "-->")
|
| 704 |
+
# time_idx: Optional[int] = None
|
| 705 |
+
# for i, line in enumerate(lines):
|
| 706 |
+
# if "-->" in line:
|
| 707 |
+
# time_idx = i
|
| 708 |
+
# break
|
| 709 |
+
# if time_idx is None:
|
| 710 |
+
# continue
|
| 711 |
+
|
| 712 |
+
# m = _VTT_TIME_RE.search(lines[time_idx])
|
| 713 |
+
# if not m:
|
| 714 |
+
# continue
|
| 715 |
+
|
| 716 |
+
# start = _time_to_seconds(m.group("start"))
|
| 717 |
+
# end = _time_to_seconds(m.group("end"))
|
| 718 |
+
# if end <= start:
|
| 719 |
+
# continue
|
| 720 |
+
|
| 721 |
+
# # Only take lines after the timestamp line as subtitle text
|
| 722 |
+
# text_lines = lines[time_idx + 1 :]
|
| 723 |
+
# if not text_lines:
|
| 724 |
+
# continue
|
| 725 |
+
|
| 726 |
+
# text = _strip_tags("\n".join(text_lines))
|
| 727 |
+
# if text:
|
| 728 |
+
# cues.append(Cue(start=start, end=end, text=text))
|
| 729 |
+
|
| 730 |
+
# return sorted(cues, key=lambda x: x.start)
|
| 731 |
+
|
| 732 |
+
|
| 733 |
+
# # =========================================================
|
| 734 |
+
# # Alignment (match by mid time), preserve per-track windows
|
| 735 |
+
# # =========================================================
|
| 736 |
+
# def align_by_time(a: List[Cue], b: List[Cue], th: float) -> List[Dict[str, Any]]:
|
| 737 |
+
# out: List[Dict[str, Any]] = []
|
| 738 |
+
# i, j, idx = 0, 0, 1
|
| 739 |
+
|
| 740 |
+
# while i < len(a) and j < len(b):
|
| 741 |
+
# ma = (a[i].start + a[i].end) / 2
|
| 742 |
+
# mb = (b[j].start + b[j].end) / 2
|
| 743 |
+
|
| 744 |
+
# if abs(ma - mb) <= th:
|
| 745 |
+
# out.append(
|
| 746 |
+
# {
|
| 747 |
+
# "idx": idx,
|
| 748 |
+
# # Per-track time window (recommended for playback)
|
| 749 |
+
# "a_start": a[i].start,
|
| 750 |
+
# "a_end": a[i].end,
|
| 751 |
+
# "b_start": b[j].start,
|
| 752 |
+
# "b_end": b[j].end,
|
| 753 |
+
# # Optional global time window (for comparison/debug)
|
| 754 |
+
# "start": min(a[i].start, b[j].start),
|
| 755 |
+
# "end": max(a[i].end, b[j].end),
|
| 756 |
+
# "a_text": a[i].text,
|
| 757 |
+
# "b_text": b[j].text,
|
| 758 |
+
# }
|
| 759 |
+
# )
|
| 760 |
+
# idx += 1
|
| 761 |
+
# i += 1
|
| 762 |
+
# j += 1
|
| 763 |
+
# elif ma < mb:
|
| 764 |
+
# i += 1
|
| 765 |
+
# else:
|
| 766 |
+
# j += 1
|
| 767 |
+
|
| 768 |
+
# return out
|
| 769 |
+
|
| 770 |
+
|
| 771 |
+
# # =========================================================
|
| 772 |
+
# # Audio slicing -> return (sr, np.int16) for gr.Audio(type="numpy")
|
| 773 |
+
# # =========================================================
|
| 774 |
+
# def export_segment_numpy(audio: AudioSegment, start: float, end: float) -> Tuple[int, np.ndarray]:
|
| 775 |
+
# start = max(0.0, float(start))
|
| 776 |
+
# end = max(start + 0.05, float(end)) # avoid too-short segments
|
| 777 |
+
|
| 778 |
+
# seg = audio[int(start * 1000) : int(end * 1000)]
|
| 779 |
+
|
| 780 |
+
# # Normalize to mono/48k/int16 for stable browser playback
|
| 781 |
+
# seg = seg.set_channels(TARGET_CH).set_frame_rate(TARGET_SR).set_sample_width(TARGET_SW)
|
| 782 |
+
|
| 783 |
+
# arr = np.asarray(seg.get_array_of_samples())
|
| 784 |
+
# if arr.dtype != np.int16:
|
| 785 |
+
# arr = arr.astype(np.int16, copy=False)
|
| 786 |
+
|
| 787 |
+
# # Shape should be (n,) for mono
|
| 788 |
+
# arr = np.ascontiguousarray(arr)
|
| 789 |
+
# return TARGET_SR, arr
|
| 790 |
+
|
| 791 |
+
|
| 792 |
+
# # =========================================================
|
| 793 |
+
# # Helper: robustly read seg_idx from gr.Dataframe value
|
| 794 |
+
# # =========================================================
|
| 795 |
+
# def _get_seg_idx_from_df(df_value: Any, row: int) -> Optional[int]:
|
| 796 |
+
# if df_value is None:
|
| 797 |
+
# return None
|
| 798 |
+
|
| 799 |
+
# # pandas DataFrame in some Gradio versions
|
| 800 |
+
# try:
|
| 801 |
+
# import pandas as pd # type: ignore
|
| 802 |
+
# if isinstance(df_value, pd.DataFrame):
|
| 803 |
+
# if row < 0 or row >= len(df_value.index) or df_value.shape[1] < 1:
|
| 804 |
+
# return None
|
| 805 |
+
# return int(df_value.iloc[row, 0])
|
| 806 |
+
# except Exception:
|
| 807 |
+
# pass
|
| 808 |
+
|
| 809 |
+
# # list-of-lists
|
| 810 |
+
# try:
|
| 811 |
+
# if isinstance(df_value, list) and row >= 0 and row < len(df_value) and len(df_value[row]) >= 1:
|
| 812 |
+
# return int(df_value[row][0])
|
| 813 |
+
# except Exception:
|
| 814 |
+
# return None
|
| 815 |
+
|
| 816 |
+
# return None
|
| 817 |
+
|
| 818 |
+
|
| 819 |
+
# # =========================================================
|
| 820 |
+
# # Gradio callbacks
|
| 821 |
+
# # =========================================================
|
| 822 |
+
# def scan_dataset(repo_id: str, repo_type: str):
|
| 823 |
+
# if not repo_id:
|
| 824 |
+
# raise gr.Error("请填写 Dataset / Repo 名称(例如 org/dataset)。")
|
| 825 |
+
|
| 826 |
+
# files = list_repo_files(repo_id, repo_type=repo_type)
|
| 827 |
+
# media_files = sorted([f for f in files if f.lower().endswith(MEDIA_EXTS)])
|
| 828 |
+
# vtt_files = sorted([f for f in files if f.lower().endswith(VTT_EXTS)])
|
| 829 |
+
|
| 830 |
+
# if not media_files:
|
| 831 |
+
# raise gr.Error("未找到媒体文件(mp4/mp3/wav 等)。")
|
| 832 |
+
# if not vtt_files:
|
| 833 |
+
# raise gr.Error("未找到 VTT 字幕文件。")
|
| 834 |
+
|
| 835 |
+
# return (
|
| 836 |
+
# gr.update(choices=media_files, value=media_files[0]),
|
| 837 |
+
# gr.update(choices=media_files, value=media_files[0]),
|
| 838 |
+
# gr.update(choices=vtt_files, value=vtt_files[0]),
|
| 839 |
+
# gr.update(choices=vtt_files, value=vtt_files[0]),
|
| 840 |
+
# )
|
| 841 |
+
|
| 842 |
+
|
| 843 |
+
# def load_and_align(repo_id, repo_type, media_a, media_b, vtt_a, vtt_b, th):
|
| 844 |
+
# if not all([repo_id, repo_type, media_a, media_b, vtt_a, vtt_b]):
|
| 845 |
+
# raise gr.Error("请先选择 A/B 的媒体文件与 VTT 文件。")
|
| 846 |
+
|
| 847 |
+
# local_media_a = hf_hub_download(repo_id, media_a, repo_type=repo_type)
|
| 848 |
+
# local_media_b = hf_hub_download(repo_id, media_b, repo_type=repo_type)
|
| 849 |
+
# local_vtt_a = hf_hub_download(repo_id, vtt_a, repo_type=repo_type)
|
| 850 |
+
# local_vtt_b = hf_hub_download(repo_id, vtt_b, repo_type=repo_type)
|
| 851 |
+
|
| 852 |
+
# try:
|
| 853 |
+
# audio_a = AudioSegment.from_file(local_media_a)
|
| 854 |
+
# audio_b = AudioSegment.from_file(local_media_b)
|
| 855 |
+
# except Exception as e:
|
| 856 |
+
# raise gr.Error(
|
| 857 |
+
# "媒体解码失败。若是 mp4/m4a,通常需要 ffmpeg。\n"
|
| 858 |
+
# f"原始错误: {repr(e)}"
|
| 859 |
+
# )
|
| 860 |
+
|
| 861 |
+
# cues_a = parse_vtt_file(local_vtt_a)
|
| 862 |
+
# cues_b = parse_vtt_file(local_vtt_b)
|
| 863 |
+
# if not cues_a or not cues_b:
|
| 864 |
+
# raise gr.Error("VTT 解析为空,请检查字幕文件内容。")
|
| 865 |
+
|
| 866 |
+
# aligned = align_by_time(cues_a, cues_b, float(th))
|
| 867 |
+
# if not aligned:
|
| 868 |
+
# raise gr.Error("未对齐到任何字幕片段,请尝试增大对齐阈值。")
|
| 869 |
+
|
| 870 |
+
# rows = [
|
| 871 |
+
# [
|
| 872 |
+
# x["idx"],
|
| 873 |
+
# f'{x["a_start"]:.2f}-{x["a_end"]:.2f}',
|
| 874 |
+
# f'{x["b_start"]:.2f}-{x["b_end"]:.2f}',
|
| 875 |
+
# x["a_text"],
|
| 876 |
+
# x["b_text"],
|
| 877 |
+
# ]
|
| 878 |
+
# for x in aligned
|
| 879 |
+
# ]
|
| 880 |
+
|
| 881 |
+
# # Critical: build idx -> seg map to survive dataframe sorting/reordering
|
| 882 |
+
# idx_map = {int(x["idx"]): x for x in aligned}
|
| 883 |
+
|
| 884 |
+
# state = {
|
| 885 |
+
# "aligned": aligned,
|
| 886 |
+
# "idx_map": idx_map,
|
| 887 |
+
# "audio_a": audio_a,
|
| 888 |
+
# "audio_b": audio_b,
|
| 889 |
+
# }
|
| 890 |
+
|
| 891 |
+
# # Clear old playback outputs
|
| 892 |
+
# return rows, state, None, None, {}
|
| 893 |
+
|
| 894 |
+
|
| 895 |
+
# def play_on_select(evt: gr.SelectData, df_value, crop_mode, offset_a, offset_b, state):
|
| 896 |
+
# if not state or "aligned" not in state:
|
| 897 |
+
# raise gr.Error("请先加载并对齐。")
|
| 898 |
+
|
| 899 |
+
# # evt.index: int or (row, col)
|
| 900 |
+
# idx_raw = evt.index
|
| 901 |
+
# row = int(idx_raw[0] if isinstance(idx_raw, (tuple, list)) else idx_raw)
|
| 902 |
+
|
| 903 |
+
# offset_a = float(offset_a)
|
| 904 |
+
# offset_b = float(offset_b)
|
| 905 |
+
|
| 906 |
+
# # Prefer seg_idx from the clicked row's first column; then resolve via idx_map.
|
| 907 |
+
# seg_idx = _get_seg_idx_from_df(df_value, row)
|
| 908 |
+
# seg = None
|
| 909 |
+
# idx_map = state.get("idx_map", {}) or {}
|
| 910 |
+
# if seg_idx is not None and seg_idx in idx_map:
|
| 911 |
+
# seg = idx_map[seg_idx]
|
| 912 |
+
# else:
|
| 913 |
+
# # Fallback to row->aligned if idx missing (should be rare)
|
| 914 |
+
# aligned = state["aligned"]
|
| 915 |
+
# if row < 0 or row >= len(aligned):
|
| 916 |
+
# raise gr.Error("选中行越界,请重试或重新对齐。")
|
| 917 |
+
# seg = aligned[row]
|
| 918 |
+
# seg_idx = int(seg.get("idx", row + 1))
|
| 919 |
+
|
| 920 |
+
# if crop_mode == "global":
|
| 921 |
+
# a_start, a_end = seg["start"] + offset_a, seg["end"] + offset_a
|
| 922 |
+
# b_start, b_end = seg["start"] + offset_b, seg["end"] + offset_b
|
| 923 |
+
# else:
|
| 924 |
+
# # per_track playback (recommended)
|
| 925 |
+
# a_start, a_end = seg["a_start"] + offset_a, seg["a_end"] + offset_a
|
| 926 |
+
# b_start, b_end = seg["b_start"] + offset_b, seg["b_end"] + offset_b
|
| 927 |
+
|
| 928 |
+
# a_np = export_segment_numpy(state["audio_a"], a_start, a_end)
|
| 929 |
+
# b_np = export_segment_numpy(state["audio_b"], b_start, b_end)
|
| 930 |
+
|
| 931 |
+
# info = {
|
| 932 |
+
# "segment": seg_idx,
|
| 933 |
+
# "row": row,
|
| 934 |
+
# "crop_mode": crop_mode,
|
| 935 |
+
# "A_time": f"{a_start:.2f}-{a_end:.2f}",
|
| 936 |
+
# "B_time": f"{b_start:.2f}-{b_end:.2f}",
|
| 937 |
+
# }
|
| 938 |
+
# return a_np, b_np, info
|
| 939 |
+
|
| 940 |
+
|
| 941 |
+
# # =========================================================
|
| 942 |
+
# # UI
|
| 943 |
+
# # =========================================================
|
| 944 |
+
# with gr.Blocks(title="双语音频字幕对齐(点击即播放)") as demo:
|
| 945 |
+
# gr.Markdown(
|
| 946 |
+
# "# 双语音频字幕对齐(点击表格即播放)\n"
|
| 947 |
+
# "流程:扫描 Dataset → 选择 A/B 媒体与字幕 → 加载并对齐 → 点击表格任意单元格播放对应片段。\n"
|
| 948 |
+
# "若字幕与音频整体存在固定延迟,可用 Track A/B 偏移进行校正。"
|
| 949 |
+
# )
|
| 950 |
+
|
| 951 |
+
# state = gr.State()
|
| 952 |
+
|
| 953 |
+
# with gr.Row():
|
| 954 |
+
# repo_id = gr.Textbox(label="Dataset / Repo 名称", placeholder="org/dataset")
|
| 955 |
+
# repo_type = gr.Radio(["dataset", "model"], value="dataset", label="Repo 类型")
|
| 956 |
+
|
| 957 |
+
# btn_scan = gr.Button("扫描 Dataset", variant="primary")
|
| 958 |
+
|
| 959 |
+
# with gr.Row():
|
| 960 |
+
# media_a = gr.Dropdown(label="Track A 媒体")
|
| 961 |
+
# media_b = gr.Dropdown(label="Track B 媒体")
|
| 962 |
+
|
| 963 |
+
# with gr.Row():
|
| 964 |
+
# vtt_a = gr.Dropdown(label="Track A 字幕")
|
| 965 |
+
# vtt_b = gr.Dropdown(label="Track B 字幕")
|
| 966 |
+
|
| 967 |
+
# btn_scan.click(
|
| 968 |
+
# scan_dataset,
|
| 969 |
+
# inputs=[repo_id, repo_type],
|
| 970 |
+
# outputs=[media_a, media_b, vtt_a, vtt_b],
|
| 971 |
+
# )
|
| 972 |
+
|
| 973 |
+
# th = gr.Slider(0.3, 5.0, value=DEFAULT_MAX_MID_DIFF, step=0.1, label="对齐阈值(秒)")
|
| 974 |
+
# btn_align = gr.Button("加载并对齐", variant="primary")
|
| 975 |
+
|
| 976 |
+
# df = gr.Dataframe(
|
| 977 |
+
# headers=["#", "A Time", "B Time", "Track A", "Track B"],
|
| 978 |
+
# interactive=True, # can be sorted/edited; mapping is stable due to idx_map
|
| 979 |
+
# wrap=True,
|
| 980 |
+
# max_height=520,
|
| 981 |
+
# )
|
| 982 |
+
|
| 983 |
+
# with gr.Row():
|
| 984 |
+
# crop_mode = gr.Radio(
|
| 985 |
+
# choices=["per_track", "global"],
|
| 986 |
+
# value="per_track",
|
| 987 |
+
# label="裁剪方式(建议 per_track)",
|
| 988 |
+
# )
|
| 989 |
+
# offset_a = gr.Slider(-20, 20, value=0.0, step=0.05, label="Track A 时间偏移(s)")
|
| 990 |
+
# offset_b = gr.Slider(-20, 20, value=0.0, step=0.05, label="Track B 时间偏移(s)")
|
| 991 |
+
|
| 992 |
+
# with gr.Row():
|
| 993 |
+
# a_out = gr.Audio(label="Track A 片段", type="numpy")
|
| 994 |
+
# b_out = gr.Audio(label="Track B 片段", type="numpy")
|
| 995 |
+
|
| 996 |
+
# play_info = gr.JSON(label="当前片段")
|
| 997 |
+
|
| 998 |
+
# btn_align.click(
|
| 999 |
+
# load_and_align,
|
| 1000 |
+
# inputs=[repo_id, repo_type, media_a, media_b, vtt_a, vtt_b, th],
|
| 1001 |
+
# outputs=[df, state, a_out, b_out, play_info],
|
| 1002 |
+
# )
|
| 1003 |
+
|
| 1004 |
+
# df.select(
|
| 1005 |
+
# play_on_select,
|
| 1006 |
+
# inputs=[df, crop_mode, offset_a, offset_b, state],
|
| 1007 |
+
# outputs=[a_out, b_out, play_info],
|
| 1008 |
+
# )
|
| 1009 |
+
|
| 1010 |
+
# if __name__ == "__main__":
|
| 1011 |
+
# demo.launch()
|
| 1012 |
+
|
| 1013 |
import re
|
| 1014 |
from dataclasses import dataclass
|
| 1015 |
from typing import Any, Dict, List, Tuple, Optional
|
|
|
|
| 1391 |
|
| 1392 |
if __name__ == "__main__":
|
| 1393 |
demo.launch()
|
|
|