Lingwei Meng commited on
Commit
c52df1b
·
1 Parent(s): c3e45ef
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. README.md +21 -3
  3. data/Whisper-Sidecar-data-metadata/convert_to_wavllm_data_format.py +42 -0
  4. data/Whisper-Sidecar-data-metadata/data/aishell1mix2_dev.jsonl +3 -0
  5. data/Whisper-Sidecar-data-metadata/data/aishell1mix2_test.jsonl +3 -0
  6. data/Whisper-Sidecar-data-metadata/data/aishell1mix2_test20.jsonl +3 -0
  7. data/Whisper-Sidecar-data-metadata/data/aishell1mix2_train.jsonl +3 -0
  8. data/Whisper-Sidecar-data-metadata/data/aishell1mix3_dev.jsonl +3 -0
  9. data/Whisper-Sidecar-data-metadata/data/aishell1mix3_test.jsonl +3 -0
  10. data/Whisper-Sidecar-data-metadata/data/aishell1mix3_train.jsonl +3 -0
  11. data/Whisper-Sidecar-data-metadata/data/data_prepare_aishellmix.py +71 -0
  12. data/Whisper-Sidecar-data-metadata/data/data_prepare_librimix.py +49 -0
  13. data/Whisper-Sidecar-data-metadata/data/data_prepare_librispeech.py +35 -0
  14. data/Whisper-Sidecar-data-metadata/data/data_prepare_librispeechmix.py +53 -0
  15. data/Whisper-Sidecar-data-metadata/data/generate_librimix_wav_from_jsonl.py +60 -0
  16. data/Whisper-Sidecar-data-metadata/data/libri2mix_dev-both.jsonl +3 -0
  17. data/Whisper-Sidecar-data-metadata/data/libri2mix_dev.jsonl +3 -0
  18. data/Whisper-Sidecar-data-metadata/data/libri2mix_test-both.jsonl +3 -0
  19. data/Whisper-Sidecar-data-metadata/data/libri2mix_test.jsonl +3 -0
  20. data/Whisper-Sidecar-data-metadata/data/libri2mix_test20.jsonl +3 -0
  21. data/Whisper-Sidecar-data-metadata/data/libri2mix_train-100-both.jsonl +3 -0
  22. data/Whisper-Sidecar-data-metadata/data/libri2mix_train-100.jsonl +3 -0
  23. data/Whisper-Sidecar-data-metadata/data/libri2mix_train-200.jsonl +3 -0
  24. data/Whisper-Sidecar-data-metadata/data/libri2mix_train-both.jsonl +3 -0
  25. data/Whisper-Sidecar-data-metadata/data/libri2mix_train.jsonl +3 -0
  26. data/Whisper-Sidecar-data-metadata/data/libri2mix_train20.jsonl +3 -0
  27. data/Whisper-Sidecar-data-metadata/data/libri2mix_train_remove_enroll.jsonl +3 -0
  28. data/Whisper-Sidecar-data-metadata/data/libri3mix_test.jsonl +3 -0
  29. data/Whisper-Sidecar-data-metadata/data/libri3mix_test20.jsonl +3 -0
  30. data/Whisper-Sidecar-data-metadata/data/libri3mix_train.jsonl +3 -0
  31. data/Whisper-Sidecar-data-metadata/data/librispeech2mix_test.jsonl +3 -0
  32. data/Whisper-Sidecar-data-metadata/data/librispeech2mix_test_30s.jsonl +3 -0
  33. data/Whisper-Sidecar-data-metadata/data/librispeech2mix_train.jsonl +3 -0
  34. data/Whisper-Sidecar-data-metadata/data/librispeech3mix_test.jsonl +3 -0
  35. data/Whisper-Sidecar-data-metadata/data/librispeech3mix_test_temp.jsonl +3 -0
  36. data/Whisper-Sidecar-data-metadata/data/librispeech3mix_train.jsonl +3 -0
  37. data/Whisper-Sidecar-data-metadata/data/librispeech_dev.jsonl +3 -0
  38. data/Whisper-Sidecar-data-metadata/data/librispeech_test.jsonl +3 -0
  39. data/Whisper-Sidecar-data-metadata/data/librispeech_train.jsonl +3 -0
  40. data/Whisper-Sidecar-data-metadata/data/long_wav_resample.py +52 -0
  41. data/Whisper-Sidecar-data-metadata/data/select_prompt_wav.py +132 -0
  42. data/Whisper-Sidecar-data-metadata/data/test_examples.jsonl +3 -0
  43. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test.tsv +3 -0
  44. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_1350.tsv +3 -0
  45. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_1350_targetLingual.tsv +3 -0
  46. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_targetLingual.tsv +3 -0
  47. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_targetLingual_1350.tsv +3 -0
  48. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_train.tsv +3 -0
  49. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_train_targetLingual.tsv +3 -0
  50. data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-3mix_test.tsv +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ *.tsv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,21 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 代码
2
+ https://github.com/XiaoshanHsj/speechllm/tree/multispk_lingmeng
3
+
4
+ 看代码的README_lingmeng.md
5
+
6
+ ## 数据:
7
+ 英文部分: `./data/Whisper-Sidecar-data-metadata/data_for_wavllm`
8
+
9
+ 德文相关: `./data/de-en-mix`
10
+
11
+ targetASR (target-talker ASR)用到的reference audio: `./data/reference_enroll_audio/all`
12
+
13
+ 英文部分只有metadata,可以从librispeech生成。德文部分还备份了测试集音频。
14
+
15
+ ## 模型:
16
+ tokenizer: `./llama_model/llama/tokenizer.model`
17
+
18
+ llama-2-chat: `./llama_model/llama-2-7b-chat/consolidated.00.pth`
19
+
20
+ 训练好的MT-LLM模型目录: `./lingmeng_multispk_multitask_retrain_speechllm_v0.1_llama2_chat_wavlm_weighted_update_lora_32_32_prompt_build_multispk_multitask_de.yaml_16gpu_1accum`
21
+
data/Whisper-Sidecar-data-metadata/convert_to_wavllm_data_format.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import soundfile as sf
2
+ import json
3
+ from tqdm import tqdm
4
+
5
+ input_jsonl = "/home/v-lingmeng/datasets/Whisper-Sidecar-data-metadata/data/librispeech3mix_test.jsonl"
6
+ output_tsv = "/home/v-lingmeng/datasets/Whisper-Sidecar-data-metadata/data_for_wavllm/" + input_jsonl.split("/")[-1].replace(".jsonl", "1.tsv")
7
+ print(output_tsv)
8
+ head = "\t".join(["id", "audio", "n_frames", "prompt", "tgt_text", "codec", "with_speech", "language", "speakers", "genders"])
9
+ prompts = ['Transcribe the given audio into text. If multiple speakers are speaking, transcribe the utterances of multiple speakers in the order of their start times, separated by "<sc>".']
10
+
11
+
12
+
13
+ with open("/home/v-lingmeng/datasets/LibriSpeech/SPEAKERS.TXT", "r") as f:
14
+ speaker_info = f.readlines()
15
+ speaker_gender = {l.split("|")[0].strip():l.split("|")[1].strip() for l in speaker_info if not l.startswith(";")}
16
+ with open(input_jsonl, "r") as f:
17
+ lines = f.readlines()
18
+ new_lines = []
19
+ for line in tqdm(lines):
20
+ line = json.loads(line.strip())
21
+ audio = line['audio']['path'].replace("./dataset", "/valleblob/v-lingmeng/speech/data")
22
+ wav_id = audio.split("/")[-1]
23
+ n_frames = str(sf.read(audio)[0].shape[0])
24
+ prompt = prompts[0]
25
+ tgt_text = line["sentence"]
26
+ codec = "None"
27
+ with_speech = "True"
28
+ language = "en"
29
+ if "speakers" in line:
30
+ speakers = "|".join(line["speakers"])
31
+ else:
32
+ speakers = "|".join([_id.split("-")[0] for _id in wav_id.split("_")])
33
+ genders = "|".join([speaker_gender[spk] for spk in speakers.split("|")])
34
+
35
+ new_line = "\t".join([wav_id, audio, n_frames, prompt, tgt_text, codec, with_speech, language, speakers, genders])
36
+ new_lines.append(new_line)
37
+
38
+ with open(output_tsv, "w") as f:
39
+ new_lines.insert(0, head)
40
+ f.write("\n".join(new_lines))
41
+
42
+ # print(speaker_gender)
data/Whisper-Sidecar-data-metadata/data/aishell1mix2_dev.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:132bd6f6c6e353bca38831088bbe2eae65ce67635b5cc204249bd070f9c56e2a
3
+ size 2460550
data/Whisper-Sidecar-data-metadata/data/aishell1mix2_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b0d3301937ee8affd9a7da43c8c7940dc6f632b1594efa7de4b7faa4524f5b
3
+ size 1483980
data/Whisper-Sidecar-data-metadata/data/aishell1mix2_test20.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b0d3301937ee8affd9a7da43c8c7940dc6f632b1594efa7de4b7faa4524f5b
3
+ size 1483980
data/Whisper-Sidecar-data-metadata/data/aishell1mix2_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a612cd6616cdb756e6c2ad587eebad584e76a1101e754d220bb01b22224c0221
3
+ size 27055103
data/Whisper-Sidecar-data-metadata/data/aishell1mix3_dev.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bbe7c57288ad1565ffd0d630004508a26b48204b189cf30dd8a0f8f295e01a1
3
+ size 2870838
data/Whisper-Sidecar-data-metadata/data/aishell1mix3_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2d2fb69a6fbe3cd6c89f89a002e28e01944d96c3d85a6cd240fcb52bfbd8ec2
3
+ size 1442387
data/Whisper-Sidecar-data-metadata/data/aishell1mix3_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f583f9f8ff84df2ed1eb4da4dfd3a3d615d87c91bd4a3fda34dcf565028e76c
3
+ size 23867370
data/Whisper-Sidecar-data-metadata/data/data_prepare_aishellmix.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prepare from msra-dev-node
2
+
3
+ import jsonlines
4
+ import soundfile as sf
5
+ import glob
6
+ import numpy as np
7
+ import os
8
+ import pandas as pd
9
+
10
+ def generate_jsonl_from_fairseq_datafile(root_data_dir, output_dir, with_timestamps=False):
11
+ splits = ['test', 'dev', 'train']
12
+ num_spks = ["2", "3"]
13
+ # convert transcript file to two list, ID and text
14
+ transcript_path = '/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/aishell1/data_aishell/transcript/aishell_transcript_v0.8.txt'
15
+ transcripts = open(transcript_path, 'r').readlines() # ID\ttext\n for each line
16
+ ID_list = []
17
+ text_list = []
18
+ for line in transcripts:
19
+ line = line.strip()
20
+ _id, *text = line.split(' ')
21
+ ID_list.append(_id)
22
+ text_list.append(''.join(text))
23
+
24
+ id_text_dict = dict(zip(ID_list, text_list))
25
+
26
+ for num_spk in num_spks:
27
+ for split in splits:
28
+ # data_dir = os.path,join(root_data_dir, 'Aishell1Mix', "data", f'Aishell1Mix{num_spk}', 'wav16k', 'max', split, 'mix_clean')
29
+
30
+ metadata = os.path.join(root_data_dir, 'Aishell1Mix', "data", f'Aishell1Mix{num_spk}', 'wav16k', 'max', 'metadata', f'mixture_{split}_mix_clean.csv')
31
+ df = pd.read_csv(metadata)
32
+ mix_id_list = df['mixture_ID'].tolist()
33
+ mix_path_list = df['mixture_path'].tolist()
34
+ source_wav_root = os.path.join("/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/aishell1/data_aishell/wav", split)
35
+ new_jsonl = os.path.join(output_dir, f'aishell1mix{num_spk}_{split}.jsonl')
36
+ if os.path.exists(new_jsonl):
37
+ os.remove(new_jsonl)
38
+
39
+
40
+ for mix_id, mix_path in zip(mix_id_list, mix_path_list):
41
+ source_ids = mix_id.split('_')
42
+ source_texts = [id_text_dict[source_id] for source_id in source_ids]
43
+ source_text = '</s>'.join(source_texts)
44
+
45
+ speakers = [source_id.split("S")[1].split("W")[0] for source_id in source_ids]
46
+
47
+ source_paths = [os.path.join(source_wav_root, "S"+str(speakers[i]), source_id + '.wav') for i, source_id in enumerate(source_ids)]
48
+ source_durations = [sf.info(source_path).duration for source_path in source_paths]
49
+
50
+ duration = max(source_durations)
51
+ dic = {"audio": {"path": mix_path},
52
+ "language": "zh",
53
+ "duration": duration,
54
+ "speakers": speakers,
55
+ "sentence": source_text}
56
+
57
+ starts = [0] * len(source_durations)
58
+ ends = source_durations
59
+ sentences_dict = []
60
+ for i in range(len(starts)):
61
+ sentences_dict.append({"start": starts[i], "end": ends[i], "text": source_texts[i]})
62
+ dic["sentences"] = sentences_dict
63
+
64
+ with jsonlines.open(new_jsonl, mode='a') as writer:
65
+ writer.write(dic)
66
+
67
+
68
+ if __name__ == '__main__':
69
+ root_data_dir = '/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset'
70
+ output_dir = '/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/'
71
+ generate_jsonl_from_fairseq_datafile(root_data_dir, output_dir, with_timestamps=True)
data/Whisper-Sidecar-data-metadata/data/data_prepare_librimix.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prepare from SEPC
2
+
3
+ import jsonlines
4
+ # import soundfile as sf
5
+ import glob
6
+ import numpy as np
7
+ import os
8
+
9
+ def generate_jsonl_from_fairseq_datafile(root_data_dir, output_dir):
10
+ splits = ['test', 'dev', 'train']
11
+ num_spks = ["2", "3"]
12
+
13
+ for num_spk in num_spks:
14
+ for split in splits:
15
+ data_dir = root_data_dir + f'Libri{num_spk}Mix_wav16k_max/'
16
+ wrd = data_dir + split + '.wrd'
17
+ fairseq_jsonl = data_dir + split + '_clean.jsonl'
18
+ new_jsonl = output_dir + f"libri{num_spk}mix_" + split + '.jsonl'
19
+ if os.path.exists(new_jsonl):
20
+ os.remove(new_jsonl)
21
+
22
+ with jsonlines.open(fairseq_jsonl) as reader:
23
+ with open(wrd, 'r') as f:
24
+ for meta, text in zip(reader, f.readlines()):
25
+ # print(meta, text)
26
+ dic = {"audio": {"path": f"./dataset/LibriMix/data/Libri{num_spk}Mix/wav16k/max/" + meta['mixed_wav']},
27
+ "language": "en",
28
+ "duration": max(meta['durations']),
29
+ "speakers": meta['speakers'],
30
+ "sentence": text.strip().lower()}
31
+
32
+ starts = meta['delays']
33
+ durations = meta['durations']
34
+ ends = list(map(lambda x, y: x + y, starts, durations))
35
+ texts = text.strip().lower().split(" </s> ")
36
+ sentences_dict = []
37
+ for i in range(len(starts)):
38
+ sentences_dict.append({"start": starts[i], "end": ends[i], "text": texts[i].strip().lower()})
39
+ dic["sentences"] = sentences_dict
40
+
41
+ with jsonlines.open(new_jsonl, mode='a') as writer:
42
+ writer.write(dic)
43
+
44
+
45
+
46
+ if __name__ == '__main__':
47
+ root_data_dir = '/mnt/users/hccl.local/lmeng/workspaces/overlapASR/egs_wav2vec/data/'
48
+ output_dir = '/mnt/users/hccl.local/lmeng/workspaces/overlapASR/Whisper-Finetune-ovlp/dataset/'
49
+ generate_jsonl_from_fairseq_datafile(root_data_dir, output_dir)
data/Whisper-Sidecar-data-metadata/data/data_prepare_librispeech.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prepare from SEPC
2
+
3
+ import jsonlines
4
+ # import soundfile as sf
5
+ import glob
6
+ import numpy as np
7
+
8
+ def generate_jsonl_from_fairseq_datafile(data_dir, output_dir):
9
+ splits = ['test']
10
+
11
+ for split in splits:
12
+ wrd = data_dir + split + '.wrd'
13
+ # fairseq_jsonl = data_dir + split + '_clean.jsonl'
14
+ tsv = data_dir + split + '.tsv'
15
+ new_jsonl = output_dir + split + '.jsonl'
16
+
17
+ with open(tsv, 'r') as flac_path_f:
18
+ # remove the first line
19
+ flac_path_f.readline()
20
+ with open(wrd, 'r') as trans_f:
21
+ for flac_path, trans in zip(flac_path_f.readlines(), trans_f.readlines()):
22
+ # print(meta, text)
23
+ flac_path, duration = flac_path.strip().split('\t')
24
+ dic = {"audio": {"path": "./dataset/librispeech/" + flac_path},
25
+ "language": "en",
26
+ "duration": int(duration)/16000.0,
27
+ "sentence": trans.strip().lower()}
28
+ with jsonlines.open(new_jsonl, mode='a') as writer:
29
+ writer.write(dic)
30
+
31
+
32
+ if __name__ == '__main__':
33
+ data_dir = '/mnt/users/hccl.local/lmeng/workspaces/overlapASR/egs_wav2vec/data/LibriSpeech/'
34
+ output_dir = '/mnt/users/hccl.local/lmeng/workspaces/overlapASR/Whisper-Finetune/dataset/'
35
+ generate_jsonl_from_fairseq_datafile(data_dir, output_dir)
data/Whisper-Sidecar-data-metadata/data/data_prepare_librispeechmix.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prepare from SEPC
2
+
3
+ import jsonlines
4
+ # import soundfile as sf
5
+ import glob
6
+ import numpy as np
7
+ import os
8
+
9
+ def generate_jsonl_from_fairseq_datafile(root_data_dir, output_dir, with_timestamps=False):
10
+ splits = ['test', 'dev', 'train']
11
+ num_spks = ["2", "3"]
12
+ for num_spk in num_spks:
13
+ for split in splits:
14
+ data_dir = root_data_dir + f'LibriSpeechMix-{num_spk}spkr/'
15
+ wrd = data_dir + split + '.wrd'
16
+ fairseq_jsonl = data_dir + split + '_clean.jsonl'
17
+ if with_timestamps:
18
+ new_jsonl = output_dir + f"librispeech{num_spk}mix_timestamps_" + split + '.jsonl'
19
+ else:
20
+ new_jsonl = output_dir + f"librispeech{num_spk}mix_" + split + '.jsonl'
21
+
22
+ if os.path.exists(new_jsonl):
23
+ os.remove(new_jsonl)
24
+
25
+ with jsonlines.open(fairseq_jsonl) as reader:
26
+ with open(wrd, 'r') as f:
27
+ for meta, text in zip(reader, f.readlines()):
28
+ # print(meta, text)
29
+ dic = {"audio": {"path": "./dataset/LibriSpeechMix/" + meta['mixed_wav']},
30
+ "language": "en",
31
+ "duration": max(list(map(lambda x, y: x + y, meta['durations'], meta['delays']))),
32
+ "speakers": meta['speakers'],
33
+ "sentence": text.strip().lower(),
34
+ }
35
+ if with_timestamps:
36
+ starts = meta['delays']
37
+ durations = meta['durations']
38
+ ends = list(map(lambda x, y: x + y, starts, durations))
39
+ texts = meta['texts']
40
+ sentences_dict = []
41
+ for i in range(len(starts)):
42
+ sentences_dict.append({"start": starts[i], "end": ends[i], "text": texts[i].strip().lower()})
43
+ dic["sentences"] = sentences_dict
44
+
45
+ with jsonlines.open(new_jsonl, mode='a') as writer:
46
+ writer.write(dic)
47
+
48
+
49
+
50
+ if __name__ == '__main__':
51
+ root_data_dir = '/mnt/users/hccl.local/lmeng/workspaces/overlapASR/egs_wav2vec/data/'
52
+ output_dir = '/mnt/users/hccl.local/lmeng/workspaces/overlapASR/Whisper-Finetune-ovlp/dataset/'
53
+ generate_jsonl_from_fairseq_datafile(root_data_dir, output_dir, with_timestamps=True)
data/Whisper-Sidecar-data-metadata/data/generate_librimix_wav_from_jsonl.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import glob
4
+ import soundfile
5
+ import librosa
6
+ from tqdm import tqdm
7
+ import numpy as np
8
+
9
+ def get_delayed_audio(wav_file, delay, sampling_rate=16000):
10
+ audio, _ = soundfile.read(wav_file)
11
+ delay_frame = int(delay * sampling_rate)
12
+ if delay_frame != 0:
13
+ audio = np.append(np.zeros(delay_frame), audio)
14
+ return audio
15
+
16
+ def mix_audio(wav_files, delays):
17
+ for i, wav_file in enumerate(wav_files):
18
+ if i == 0:
19
+ audio = get_delayed_audio(wav_file, delays[i])
20
+ else:
21
+ additional_audio = get_delayed_audio(wav_file, delays[i])
22
+ # tune length & sum up to audio
23
+ target_length = max(len(audio), len(additional_audio))
24
+ # print(additional_audio.shape)
25
+ audio = librosa.util.fix_length(audio, size=target_length)
26
+ additional_audio = librosa.util.fix_length(additional_audio, size=target_length)
27
+ audio = audio + additional_audio
28
+ return audio
29
+
30
+
31
+ jsonl_path = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/libri3mix_train.jsonl"
32
+ if "test" in jsonl_path:
33
+ subset = "test"
34
+ elif "train" in jsonl_path:
35
+ subset = "train"
36
+ else:
37
+ subset = "dev"
38
+ librispeech_dir = f"/home/v-lingmeng/datasets/LibriSpeech/{subset}*/"
39
+ output_dir = "/home/v-lingmeng/datasets"
40
+
41
+
42
+ with open(jsonl_path, 'r', encoding='utf-8') as file:
43
+ json_list = [json.loads(line.strip()) for line in file]
44
+
45
+ for line in tqdm(json_list):
46
+ audio_name = line['audio']['path'].replace("./dataset", output_dir)
47
+ if not os.path.exists(os.path.split(audio_name)[0]):
48
+ os.makedirs(os.path.split(audio_name)[0])
49
+
50
+ source_ids = os.path.split(audio_name)[1].split(".")[0].split("_")
51
+ source_files = [glob.glob(librispeech_dir + "/".join(i.split("-")[:-1]) + f"/{i}*")[0] for i in source_ids]
52
+ delays = [l["start"] for l in line['sentences']]
53
+ # ends = [l["end"] for l in line['sentences']]
54
+
55
+ # for source_audio, start, end in zip(source_auidos, starts, ends):
56
+ mixed_audio = mix_audio(source_files, delays)
57
+ soundfile.write(audio_name, mixed_audio, samplerate=16000)
58
+ print(audio_name)
59
+
60
+ # print(delays)
data/Whisper-Sidecar-data-metadata/data/libri2mix_dev-both.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a00333b0e46f84e4389e63df5045f8a762487eb29bb89eb7de9f70941e1434c
3
+ size 2224222
data/Whisper-Sidecar-data-metadata/data/libri2mix_dev.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c08740dd203b33312ea9793a8859302c2ff41f44de77ffb404c6d5859df2287
3
+ size 2227222
data/Whisper-Sidecar-data-metadata/data/libri2mix_test-both.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88e089c638beff65237045f526348e50341026b0324e0230e1ee17568b3173c1
3
+ size 2094572
data/Whisper-Sidecar-data-metadata/data/libri2mix_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf70cb2bca21af6465f534a1910756cce13eb410815eccc90fc74b24567e4b6
3
+ size 2097572
data/Whisper-Sidecar-data-metadata/data/libri2mix_test20.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9831f61da77ba9a17367845c6288c9a7b24858cf69dcbb53e4377cb23640820b
3
+ size 14417
data/Whisper-Sidecar-data-metadata/data/libri2mix_train-100-both.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0eb2462fa951da717d18e172a5b781b6149d3483e56022bcea7459d163b76e
3
+ size 14778639
data/Whisper-Sidecar-data-metadata/data/libri2mix_train-100.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ac8db4f8c3851c7e34f4643268355c2d34a7f8952aa316b0b70d2019733f70
3
+ size 14792539
data/Whisper-Sidecar-data-metadata/data/libri2mix_train-200.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e70011854f573bf05586a9793f19510d6be7188be89c032459910ad1d3e11d
3
+ size 29571178
data/Whisper-Sidecar-data-metadata/data/libri2mix_train-both.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f72957a086f85d8fee3aea616c471dd7c7baf465d355db361f796f13c7e6478e
3
+ size 68694922
data/Whisper-Sidecar-data-metadata/data/libri2mix_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9bd7eceb100a8f842e1b708066e5da7ac77e5008ef013266c567528ab1f8c4f
3
+ size 68759621
data/Whisper-Sidecar-data-metadata/data/libri2mix_train20.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d2502e124c4cdf60adb5a83f8942f2fb5d710634485b5a0382e45873570048
3
+ size 22316
data/Whisper-Sidecar-data-metadata/data/libri2mix_train_remove_enroll.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d3c34060e44c767cd70ebb53aa18d53db4a3828fa92037addeed9c04e9f8a5
3
+ size 68268344
data/Whisper-Sidecar-data-metadata/data/libri3mix_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf3f6411bb717e86d77b4f17a455323c5d9df5fdb37fe6513a11859d50f41e9
3
+ size 2855611
data/Whisper-Sidecar-data-metadata/data/libri3mix_test20.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f66ad5735f3e3d1bd24bf960439142e181dbe4ab50a951a3807b4050040568c
3
+ size 19612
data/Whisper-Sidecar-data-metadata/data/libri3mix_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ba19b8a28a4430dbab099d5450ae075f6096c869ca318846f1ed219b471c997
3
+ size 65232138
data/Whisper-Sidecar-data-metadata/data/librispeech2mix_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dfd13ad50aeb92257fc7c8c4d721fad1d2e3901be95eed0aa4d58211027d471
3
+ size 1945661
data/Whisper-Sidecar-data-metadata/data/librispeech2mix_test_30s.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d07d099fe7d64cfec16efcc06b5dcac825984ca8668909152882adef2edcb9
3
+ size 1945138
data/Whisper-Sidecar-data-metadata/data/librispeech2mix_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f629fb7f89d02d937e5f4d1ced117c70904fc5d97d520706758a3e41010c1c2
3
+ size 287742080
data/Whisper-Sidecar-data-metadata/data/librispeech3mix_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4019227f5a2f827f2517e942c8c6f72b7a6d80fe8fe32db2909393254d7af771
3
+ size 2730857
data/Whisper-Sidecar-data-metadata/data/librispeech3mix_test_temp.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1fafb707b168a23d3c8225e3f04407da63360b27b99d435d11c8a7560dee9c
3
+ size 2729249
data/Whisper-Sidecar-data-metadata/data/librispeech3mix_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47917569ed5a933e6ad66ccef5ccefded8f4c117a0296dafcbde9fc564e835e6
3
+ size 410988677
data/Whisper-Sidecar-data-metadata/data/librispeech_dev.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca54eb22dd7f421bd9c8b8e60a5df32946a7823d5547e49eaa030769c48de0da
3
+ size 2194617
data/Whisper-Sidecar-data-metadata/data/librispeech_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8421fbe45b046b2a5644782b3e982538bd8483d72964b6f08fc208e5e7059648
3
+ size 2197082
data/Whisper-Sidecar-data-metadata/data/librispeech_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc923ead53a1248037e622bbeb75899a0c2637720600cc8c386530d5cdfa95b8
3
+ size 91088243
data/Whisper-Sidecar-data-metadata/data/long_wav_resample.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # resamle long wav (>30s) to 16k 30s, and update the jsonl file
2
+
3
+
4
+ import jsonlines
5
+ import os
6
+
7
+ from pydub import AudioSegment
8
+ from pydub.playback import play
9
+ import soundfile as sf
10
+
11
+
12
+
13
+ jsonl_file = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/librispeech3mix_test.jsonl"
14
+ temp_file = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/librispeech3mix_test_temp.jsonl"
15
+ # time strench long wav (>30s) to 16k 30s, and update the jsonl line in-place
16
+
17
+ with jsonlines.open(jsonl_file, 'r') as reader, jsonlines.open(temp_file, 'w') as writer:
18
+ for obj in reader:
19
+ wav_path = obj['audio']['path']
20
+ duration = obj['duration']
21
+ sentences = obj['sentences']
22
+ # resample wav
23
+ if duration > 30.1:
24
+ print(wav_path, duration)
25
+ wav = AudioSegment.from_file(wav_path)
26
+ target_len = 30.0 * 1000
27
+ speed_up_rate = len(wav) / target_len
28
+ wav = wav.speedup(playback_speed=speed_up_rate)
29
+ wav = wav[:target_len]
30
+ wav.export(wav_path, format="wav")
31
+ print(speed_up_rate)
32
+ obj['duration'] = 30
33
+ for sentence in sentences:
34
+ sentence['start'] = sentence['start'] / speed_up_rate
35
+ sentence['end'] = sentence['end'] / speed_up_rate
36
+ if sentence['start'] > 30:
37
+ sentence['start'] = 30
38
+ if sentence['end'] > 30:
39
+ sentence['end'] = 30
40
+ obj['sentences'] = sentences
41
+ elif duration > 30.0:
42
+ wav, sr = sf.read(wav_path)
43
+ wav = wav[:int(16000 * 30)]
44
+ obj['duration'] = 30 if duration > 30 else duration
45
+ for sentence in sentences:
46
+ sentence['start'] = sentence['start'] if sentence['start'] < 30 else 30
47
+ sentence['end'] = sentence['end'] if sentence['end'] < 30 else 30
48
+ obj['sentences'] = sentences
49
+ sf.write(wav_path, wav, sr)
50
+
51
+
52
+ writer.write(obj)
data/Whisper-Sidecar-data-metadata/data/select_prompt_wav.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import random
4
+
5
+ import jsonlines
6
+
7
+ # backup_dir = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/temp/removed_mix_wav"
8
+ # librispeech_dir = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/LibriSpeech/train-clean-360"
9
+ # enroll_dir = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/temp/new_enroll"
10
+
11
+ # # 1. 获取所有的wav文件
12
+ # wav_files = glob.glob("/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/LibriMix/data/Libri2Mix/wav16k/max/train-360/mix_clean/*.wav")
13
+
14
+ # # 2. 每个wav文件有两个说话人,记录所有wav文件出现的说话人
15
+ # all_speakers = set()
16
+ # for wav_file in wav_files:
17
+ # speakers = [f.split('-')[0] for f in os.path.basename(wav_file).split("_")]
18
+ # all_speakers.update(speakers)
19
+ # # random.shuffle(wav_files)
20
+
21
+ # len_all_speakers = len(all_speakers)
22
+ # # 3. 对每个说话人,复制且只复制一个具有它的语音文件
23
+ # count = 0
24
+ # for wav_file in wav_files:
25
+ # source_wavs = os.path.basename(wav_file).split("_")
26
+ # speakers = [f.split('-')[0] for f in source_wavs]
27
+ # # 如果有任意一个说话人不在all_speakers中,跳过这个文件
28
+ # if not all(s in all_speakers for s in speakers):
29
+ # continue
30
+ # else:
31
+ # # 从all_speakers中删除这两个说话人
32
+ # all_speakers.difference_update(speakers)
33
+ # # 复制这个文件
34
+ # os.system(f"cp {wav_file} {backup_dir}")
35
+ # # print(f"cp {wav_file} {backup_dir}")
36
+ # # 复制source_wavs中的每个说话人的语音文件
37
+ # for source_wav in source_wavs:
38
+ # count+=1
39
+ # source_wav_path = os.path.join(librispeech_dir, source_wav.split("-")[0], source_wav.split("-")[1], source_wav.split('.')[0] + ".flac")
40
+ # # 判断是否存在
41
+ # if not os.path.exists(source_wav_path):
42
+ # print(f"source_wav_path: {source_wav_path} not exists")
43
+ # continue
44
+ # os.system(f"cp {source_wav_path} {enroll_dir}")
45
+
46
+ # print(all_speakers)
47
+ # print(count, len_all_speakers)
48
+
49
+
50
+
51
+ # source = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/temp/new_enroll"
52
+ # enroll_dir = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/LibriMix_enroll_audio/train-360"
53
+
54
+ # flac_files = glob.glob(f"{source}/*.flac")
55
+
56
+ # for flac_files in flac_files:
57
+ # # mkdir enroll_dir/spk_id
58
+ # spk_id = os.path.basename(flac_files).split("-")[0]
59
+ # spk_dir = os.path.join(enroll_dir, spk_id)
60
+ # if os.path.exists(spk_dir):
61
+ # # 删除
62
+ # os.system(f"rm -rf {spk_dir}")
63
+ # os.makedirs(spk_dir, exist_ok=True)
64
+ # # convert flac to wav, move to spk_dir
65
+ # wav_file = os.path.join(spk_dir, spk_id+ ".wav")
66
+ # os.system(f"ffmpeg -i {flac_files} {wav_file}")
67
+
68
+
69
+ # jsonl_file = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/libri2mix_train_remove_enroll.jsonl"
70
+ # enrolled_dir = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/temp/removed_mix_wav"
71
+
72
+ # # remove wav file in enrolled_dir from jsonl_file
73
+ # with jsonlines.open(jsonl_file) as reader:
74
+ # lines = list(reader)
75
+ # print(len(lines))
76
+ # for line in lines:
77
+ # mix_wav = line['audio']['path']
78
+ # if os.path.exists(os.path.join(enrolled_dir, os.path.basename(mix_wav))):
79
+ # lines.remove(line)
80
+ # print(len(lines))
81
+ # # write to new jsonl file
82
+ # new_jsonl_file = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/libri2mix_train_remove_enroll.jsonl"
83
+ # with jsonlines.open(new_jsonl_file, "w") as writer:
84
+ # for line in lines:
85
+ # writer.write(line)
86
+ # print("done")
87
+
88
+
89
+ librispeech_dir = "/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/LibriSpeech/train-other-500"
90
+ enroll_path ="/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/LibriMix_enroll_audio/train-500"
91
+
92
+ # 检查librispeech_dir中每个speaker是否存在在enroll_path中
93
+ speaker_dirs = glob.glob(f"{librispeech_dir}/*")
94
+ new_file = []
95
+ for speaker_dir in speaker_dirs:
96
+ if ".TXT" in speaker_dir:
97
+ continue
98
+ speaker_id = os.path.basename(speaker_dir)
99
+ enroll_speaker_dir = os.path.join(enroll_path, speaker_id)
100
+ if not os.path.exists(enroll_speaker_dir):
101
+ print(f"{enroll_speaker_dir} not exists")
102
+ os.makedirs(enroll_speaker_dir, exist_ok=True)
103
+
104
+ if len(glob.glob(f"{enroll_speaker_dir}/*.wav")) == 0:
105
+ # 从librispeech_dir中复制一个语音文件到enroll_speaker_dir
106
+ flac_files = glob.glob(f"{speaker_dir}/*/*.flac")
107
+ # 从flac_files中随机选择一个, 并复制到enroll_speaker_dir
108
+ flac_file = random.choice(flac_files)
109
+ # 复制到enroll_speaker_dir
110
+ new_flac_file = os.path.join(enroll_speaker_dir, os.path.basename(flac_file))
111
+ print(new_flac_file)
112
+ os.system(f"cp {flac_file} {new_flac_file}")
113
+ # 记录flac_file的名字
114
+ new_file.append(flac_file)
115
+ else:
116
+ print(glob.glob(f"{enroll_speaker_dir}/*.wav"))
117
+
118
+ # 检查enroll_path中speaker的语音命名是否为{speaker_id}.wav
119
+ enroll_wav_files = glob.glob(f"{enroll_speaker_dir}/*")
120
+ for enroll_wav_file in enroll_wav_files:
121
+ if os.path.basename(enroll_wav_file).split(".")[0] != speaker_id:
122
+ print(f"{enroll_wav_file} not match")
123
+ # 转为wav格式, 名字只保留speaker_id
124
+ wav_file = os.path.join(enroll_speaker_dir, speaker_id + ".wav")
125
+ os.system(f"ffmpeg -i {enroll_wav_file} {wav_file}")
126
+ # 删除原来的文件
127
+ os.system(f"rm -rf {enroll_wav_file}")
128
+
129
+ # 记录新的flac文件
130
+ with open("/home/v-lingmeng/codebase/Whisper-Finetune-ovlp/dataset/temp/new_flac_files.txt", "w") as f:
131
+ for flac_file in new_file:
132
+ f.write(flac_file + "\n")
data/Whisper-Sidecar-data-metadata/data/test_examples.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf048b062b475972c1e6dad258244680a88a9acfc8841dd8ea284ad56efc3dc
3
+ size 794
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53fb93ebd24db3e32a7dd7fd5908222f2f0a498a7d0ede6835140c2d4b1f3552
3
+ size 8892480
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_1350.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec64b8b840655de399e374931c056dd4b22d4070c8b97cb197863182599af765
3
+ size 889784
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_1350_targetLingual.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:971baf65ee1603b22483ef12a1040d884419077069b68dad7931a305ffe2f7aa
3
+ size 632636
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_targetLingual.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12249076735100be44176352c65ab89a30751ae7ac5621583130674529774d57
3
+ size 6361360
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_test_targetLingual_1350.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968fd7a78bdca0783e5a2baf7db53f621ed274ba1295f5bd4d2553794a4718da
3
+ size 636201
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_train.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21cb5e6b2243c0ce5a2cf6e431dd1c9c5dc5efb79fb3c2a9d548dbbc335c212f
3
+ size 93936028
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-2mix_train_targetLingual.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aad7b8718ed09f7e98742caa6c85e496f68e332f73e65a557ecdac06cec8f2a
3
+ size 65029852
data/Whisper-Sidecar-data-metadata/data_for_wavllm/de-en-3mix_test.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e84678170247fd7ef2a06622a32d9d42abf3e1b83e8c4426e4a9d03e44ce72
3
+ size 10851471