HoneyTian commited on
Commit
68b5c74
·
1 Parent(s): d4b5819
examples/badcase_filter/bad_case_find.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+ import shutil
7
+
8
+ from gradio_client import Client, handle_file
9
+ from tqdm import tqdm
10
+
11
+ from project_settings import project_path
12
+
13
+
14
+ def get_args():
15
+ parser = argparse.ArgumentParser()
16
+ parser.add_argument(
17
+ "--data_dir",
18
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_12\es-MX2",
19
+ # default=(project_path / "data/calling/62/wav_segmented"),
20
+ type=str
21
+ )
22
+ args = parser.parse_args()
23
+ return args
24
+
25
+
26
+ def main():
27
+ args = get_args()
28
+
29
+ data_dir = Path(args.data_dir)
30
+
31
+ client = Client("http://127.0.0.1:7864/")
32
+
33
+ for idx, filename in tqdm(enumerate(data_dir.glob("**/active_media_r_*.wav"))):
34
+ filename = filename.as_posix()
35
+
36
+ sub_audio_dataset = client.predict(
37
+ audio_t=handle_file(filename),
38
+ model_name="sound-2-ch16-cnn",
39
+ label="voice",
40
+ win_size=2,
41
+ win_step=0.25,
42
+ n_erode=2,
43
+ n_dilate=2,
44
+ api_name="/when_click_split_button"
45
+ )
46
+ sub_audio_dataset = sub_audio_dataset["samples"]
47
+ if len(sub_audio_dataset) == 0:
48
+ continue
49
+
50
+ return
51
+
52
+
53
+ if __name__ == "__main__":
54
+ main()
examples/download_wav/step_3_split_two_second_wav.py CHANGED
@@ -15,21 +15,21 @@ def get_args():
15
 
16
  parser.add_argument(
17
  "--audio_dir",
18
- default=(project_path / "data/calling/62/wav_1ch").as_posix(),
19
  # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
20
  # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
21
- # default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music",
22
  type=str
23
  )
24
  parser.add_argument(
25
  "--output_dir",
26
- default=(project_path / "data/calling/62/wav_segmented").as_posix(),
27
- # default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music\wav_segmented",
28
  type=str
29
  )
30
  parser.add_argument(
31
  "--first_n_seconds",
32
- default=6,
33
  type=int
34
  )
35
  args = parser.parse_args()
@@ -43,7 +43,7 @@ def main():
43
  output_dir = Path(args.output_dir)
44
  output_dir.mkdir(parents=True, exist_ok=True)
45
 
46
- for filename in tqdm(list(audio_dir.glob("*.wav"))):
47
  splits = filename.stem.split("_")
48
  call_id = splits[3]
49
  language = splits[4]
@@ -64,7 +64,8 @@ def main():
64
  end = begin + sample_rate * 2
65
  sub_signal = signal[begin: end]
66
 
67
- to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}_from_mp3.wav"
 
68
  wavfile.write(
69
  to_filename.as_posix(),
70
  sample_rate,
 
15
 
16
  parser.add_argument(
17
  "--audio_dir",
18
+ # default=(project_path / "data/calling/62/wav_1ch").as_posix(),
19
  # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
20
  # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
21
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\es-MX\keep",
22
  type=str
23
  )
24
  parser.add_argument(
25
  "--output_dir",
26
+ # default=(project_path / "data/calling/62/wav_segmented").as_posix(),
27
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup",
28
  type=str
29
  )
30
  parser.add_argument(
31
  "--first_n_seconds",
32
+ default=100,
33
  type=int
34
  )
35
  args = parser.parse_args()
 
43
  output_dir = Path(args.output_dir)
44
  output_dir.mkdir(parents=True, exist_ok=True)
45
 
46
+ for filename in tqdm(list(audio_dir.glob("active_media_r_*.wav"))):
47
  splits = filename.stem.split("_")
48
  call_id = splits[3]
49
  language = splits[4]
 
64
  end = begin + sample_rate * 2
65
  sub_signal = signal[begin: end]
66
 
67
+ # to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}_from_mp3.wav"
68
+ to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}.wav"
69
  wavfile.write(
70
  to_filename.as_posix(),
71
  sample_rate,
examples/sample_filter/bad_case_find.py CHANGED
@@ -15,19 +15,29 @@ def get_args():
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--data_dir",
18
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\backup",
 
19
  # default=(project_path / "data/calling/62/wav_segmented"),
20
  type=str
21
  )
22
  parser.add_argument(
23
- "--keep_dir",
24
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\backup\keep",
 
 
 
 
 
 
 
 
25
  # default=(project_path / "data/calling/62/wav_segmented/keep"),
26
  type=str
27
  )
28
  parser.add_argument(
29
  "--trash_dir",
30
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\backup\trash",
 
31
  # default=(project_path / "data/calling/62/wav_segmented/trash"),
32
  type=str
33
  )
@@ -39,16 +49,19 @@ def main():
39
  args = get_args()
40
 
41
  data_dir = Path(args.data_dir)
42
- keep_dir = Path(args.keep_dir)
43
- keep_dir.mkdir(parents=True, exist_ok=True)
 
 
44
  trash_dir = Path(args.trash_dir)
45
  trash_dir.mkdir(parents=True, exist_ok=True)
46
 
47
  client = Client("http://127.0.0.1:7864/")
48
 
49
- for idx, filename in tqdm(enumerate(data_dir.glob("**/*.wav"))):
50
- if idx < 9000:
51
- continue
 
52
  filename = filename.as_posix()
53
 
54
  outputs1 = client.predict(
@@ -72,14 +85,25 @@ def main():
72
  prob2 = outputs2["prob"]
73
 
74
  if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob1 > 0.6:
75
- pass
 
 
 
76
  elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell", "voice") and prob1 > 0.6:
77
- pass
 
 
 
 
 
 
 
 
78
  else:
79
  if label1 == "non_voicemail" and label2 in ("voice",):
80
- tgt_dir = trash_dir
81
  else:
82
- tgt_dir = keep_dir
83
  print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
84
  shutil.move(
85
  filename,
 
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--data_dir",
18
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup",
19
+ # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\backup",
20
  # default=(project_path / "data/calling/62/wav_segmented"),
21
  type=str
22
  )
23
  parser.add_argument(
24
+ "--keep_dir1",
25
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\keep1",
26
+ # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\backup\keep1",
27
+ # default=(project_path / "data/calling/62/wav_segmented/keep"),
28
+ type=str
29
+ )
30
+ parser.add_argument(
31
+ "--keep_dir2",
32
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\keep2",
33
+ # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\backup\keep2",
34
  # default=(project_path / "data/calling/62/wav_segmented/keep"),
35
  type=str
36
  )
37
  parser.add_argument(
38
  "--trash_dir",
39
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\es-MX-backup\trash",
40
+ # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\backup\trash",
41
  # default=(project_path / "data/calling/62/wav_segmented/trash"),
42
  type=str
43
  )
 
49
  args = get_args()
50
 
51
  data_dir = Path(args.data_dir)
52
+ keep_dir1 = Path(args.keep_dir1)
53
+ keep_dir1.mkdir(parents=True, exist_ok=True)
54
+ keep_dir2 = Path(args.keep_dir2)
55
+ keep_dir2.mkdir(parents=True, exist_ok=True)
56
  trash_dir = Path(args.trash_dir)
57
  trash_dir.mkdir(parents=True, exist_ok=True)
58
 
59
  client = Client("http://127.0.0.1:7864/")
60
 
61
+ # for idx, filename in tqdm(enumerate(data_dir.glob("**/active_media_r_*.wav"))):
62
+ for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
63
+ # if idx < 9000:
64
+ # continue
65
  filename = filename.as_posix()
66
 
67
  outputs1 = client.predict(
 
85
  prob2 = outputs2["prob"]
86
 
87
  if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob1 > 0.6:
88
+ shutil.move(
89
+ filename,
90
+ trash_dir.as_posix(),
91
+ )
92
  elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell", "voice") and prob1 > 0.6:
93
+ shutil.move(
94
+ filename,
95
+ trash_dir.as_posix(),
96
+ )
97
+ elif label2 in ("mute", "white_noise"):
98
+ shutil.move(
99
+ filename,
100
+ trash_dir.as_posix(),
101
+ )
102
  else:
103
  if label1 == "non_voicemail" and label2 in ("voice",):
104
+ tgt_dir = keep_dir1
105
  else:
106
+ tgt_dir = keep_dir2
107
  print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
108
  shutil.move(
109
  filename,
examples/sample_filter/non_voicemail_filter.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+ import shutil
7
+
8
+ from gradio_client import Client, handle_file
9
+ from tqdm import tqdm
10
+
11
+ from project_settings import project_path
12
+
13
+
14
+ def get_args():
15
+ parser = argparse.ArgumentParser()
16
+ parser.add_argument(
17
+ "--data_dir",
18
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\es-MX",
19
+ type=str
20
+ )
21
+ parser.add_argument(
22
+ "--keep_dir",
23
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\es-MX\keep",
24
+ type=str
25
+ )
26
+ parser.add_argument(
27
+ "--trash_dir",
28
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\es-MX\trash",
29
+ type=str
30
+ )
31
+ args = parser.parse_args()
32
+ return args
33
+
34
+
35
+ def main():
36
+ args = get_args()
37
+
38
+ data_dir = Path(args.data_dir)
39
+ keep_dir = Path(args.keep_dir)
40
+ keep_dir.mkdir(parents=True, exist_ok=True)
41
+ trash_dir = Path(args.trash_dir)
42
+ trash_dir.mkdir(parents=True, exist_ok=True)
43
+
44
+ client = Client("http://127.0.0.1:7864/")
45
+
46
+ for idx, filename in tqdm(enumerate(data_dir.glob("active_media_r_*.wav"))):
47
+ filename = filename.as_posix()
48
+
49
+ model_name = f"voicemail-es-mx-2-ch4-cnn"
50
+ labels = client.predict(
51
+ model_name=model_name,
52
+ api_name="/partial"
53
+ )
54
+ target_label = labels["value"]
55
+ # print(model_name)
56
+ # print(target_label)
57
+ outputs = client.predict(
58
+ audio_t=handle_file(filename),
59
+ model_name=model_name,
60
+ target_label="voice",
61
+ win_size=2,
62
+ win_step=2,
63
+ max_duration=4,
64
+ api_name="/when_click_event_button"
65
+ )
66
+ outputs = json.loads(outputs)
67
+ if len(outputs) == 0:
68
+ continue
69
+ for row in outputs:
70
+ label = row["label"]
71
+ if label in ("voicemail",):
72
+ break
73
+ else:
74
+ shutil.copy(
75
+ filename,
76
+ keep_dir.as_posix()
77
+ )
78
+ # exit(0)
79
+ return
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
examples/sound_classification_by_cnn/run_batch.sh CHANGED
@@ -97,12 +97,12 @@
97
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
98
  #--label_plan 4 \
99
  #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
100
- #
101
- #
102
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32-cnn \
103
- #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
104
- #--label_plan 8 \
105
- #--config_file "yaml/conv2d-classifier-8-ch32.yaml"
106
 
107
 
108
  # pretrained voicemail
 
97
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
98
  #--label_plan 4 \
99
  #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
100
+
101
+
102
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32-cnn \
103
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
104
+ --label_plan 8 \
105
+ --config_file "yaml/conv2d-classifier-8-ch32.yaml"
106
 
107
 
108
  # pretrained voicemail
tabs/split_tab.py CHANGED
@@ -112,6 +112,8 @@ def correct_labels(labels: List[str], target_label: str = "noise", n_erode: int
112
 
113
  def split_signal_by_labels(signal: np.ndarray, labels: List[str], target_label: str):
114
  l = len(labels)
 
 
115
 
116
  noise_list = list()
117
  begin = None
 
112
 
113
  def split_signal_by_labels(signal: np.ndarray, labels: List[str], target_label: str):
114
  l = len(labels)
115
+ if l == 0:
116
+ return list()
117
 
118
  noise_list = list()
119
  begin = None