Student0809 commited on
Commit
14e1dca
·
verified ·
1 Parent(s): fb79a6d

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ms-swift/.ipynb_checkpoints/dataset_OSST-checkpoint.json +0 -0
  2. ms-swift/scripts/benchmark/exp_utils.py +391 -0
  3. ms-swift/scripts/utils/plot_loss.py +9 -0
  4. ms-swift/silence_overlaps/700/.ipynb_checkpoints/silence_isoverlap_train-checkpoint.json +1152 -0
  5. ms-swift/silence_overlaps/700/original/silence_isoverlaps.json +0 -0
  6. ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/overlap5s_silence_segments_test-checkpoint.json +27 -0
  7. ms-swift/silence_overlaps/700/test/overlap5s_speaker_segments_test.json +27 -0
  8. ms-swift/silence_overlaps/700/test/silence_speaker_segments_test.json +27 -0
  9. ms-swift/silence_overlaps/700/train/overlap5s_transcriptions_train.json +0 -0
  10. ms-swift/silence_overlaps/700/train/silence_isoverlaps_train.json +0 -0
  11. ms-swift/silence_overlaps/silence_speaker_segments.json +0 -0
  12. ms-swift/silence_overlaps/test/.ipynb_checkpoints/test_train-checkpoint.json +963 -0
  13. ms-swift/swift/cli/__pycache__/main.cpython-310.pyc +0 -0
  14. ms-swift/swift/cli/_megatron/__init__.py +0 -0
  15. ms-swift/swift/cli/deploy.py +5 -0
  16. ms-swift/swift/cli/sample.py +5 -0
  17. ms-swift/swift/cli/sft.py +7 -0
  18. ms-swift/swift/cli/web_ui.py +5 -0
  19. ms-swift/swift/hub/__init__.py +1 -0
  20. ms-swift/swift/llm/__init__.py +85 -0
  21. ms-swift/swift/llm/__pycache__/base.cpython-310.pyc +0 -0
  22. ms-swift/swift/llm/app/build_ui.py +139 -0
  23. ms-swift/swift/llm/argument/__pycache__/app_args.cpython-310.pyc +0 -0
  24. ms-swift/swift/llm/argument/__pycache__/eval_args.cpython-310.pyc +0 -0
  25. ms-swift/swift/llm/argument/__pycache__/export_args.cpython-310.pyc +0 -0
  26. ms-swift/swift/llm/argument/__pycache__/rlhf_args.cpython-310.pyc +0 -0
  27. ms-swift/swift/llm/argument/__pycache__/sampling_args.cpython-310.pyc +0 -0
  28. ms-swift/swift/llm/argument/__pycache__/train_args.cpython-310.pyc +0 -0
  29. ms-swift/swift/llm/argument/__pycache__/tuner_args.cpython-310.pyc +0 -0
  30. ms-swift/swift/llm/argument/__pycache__/webui_args.cpython-310.pyc +0 -0
  31. ms-swift/swift/llm/argument/base_args/__pycache__/generation_args.cpython-310.pyc +0 -0
  32. ms-swift/swift/llm/argument/base_args/__pycache__/quant_args.cpython-310.pyc +0 -0
  33. ms-swift/swift/llm/argument/base_args/base_args.py +268 -0
  34. ms-swift/swift/llm/argument/base_args/model_args.py +178 -0
  35. ms-swift/swift/llm/argument/base_args/quant_args.py +91 -0
  36. ms-swift/swift/llm/argument/deploy_args.py +76 -0
  37. ms-swift/swift/llm/argument/export_args.py +107 -0
  38. ms-swift/swift/llm/argument/infer_args.py +179 -0
  39. ms-swift/swift/llm/argument/train_args.py +234 -0
  40. ms-swift/swift/llm/argument/tuner_args.py +222 -0
  41. ms-swift/swift/llm/dataset/__pycache__/__init__.cpython-310.pyc +0 -0
  42. ms-swift/swift/llm/dataset/dataset/mllm.py +1215 -0
  43. ms-swift/swift/llm/dataset/preprocessor/__pycache__/extra.cpython-310.pyc +0 -0
  44. ms-swift/swift/llm/dataset/preprocessor/core.py +529 -0
  45. ms-swift/swift/llm/ds_config/zero2_offload.json +35 -0
  46. ms-swift/swift/llm/export/merge_lora.py +44 -0
  47. ms-swift/swift/llm/export/quant.py +266 -0
  48. ms-swift/swift/llm/infer/__pycache__/__init__.cpython-310.pyc +0 -0
  49. ms-swift/swift/llm/infer/__pycache__/protocol.cpython-310.pyc +0 -0
  50. ms-swift/swift/llm/infer/infer_engine/__init__.py +35 -0
ms-swift/.ipynb_checkpoints/dataset_OSST-checkpoint.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/scripts/benchmark/exp_utils.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import time
5
+ from collections import deque
6
+ from copy import deepcopy
7
+ from dataclasses import asdict, dataclass, field
8
+ from typing import Any, Dict, List
9
+
10
+ import json
11
+ import torch
12
+
13
+ from swift.llm import ExportArguments
14
+ from swift.utils import find_free_port, get_device_count, get_logger
15
+
16
+ logger = get_logger()
17
+
18
+
19
+ @dataclass
20
+ class Experiment:
21
+
22
+ name: str
23
+
24
+ cmd: str
25
+
26
+ group: str
27
+
28
+ requirements: Dict = field(default_factory=dict)
29
+
30
+ eval_requirements: Dict = field(default_factory=dict)
31
+
32
+ eval_dataset: List = field(default_factory=list)
33
+
34
+ args: Dict = field(default_factory=dict)
35
+
36
+ env: Dict = field(default_factory=dict)
37
+
38
+ record: Dict = field(default_factory=dict)
39
+
40
+ create_time: float = None
41
+
42
+ runtime: Dict = field(default_factory=dict)
43
+
44
+ input_args: Any = None
45
+
46
+ do_eval = False
47
+
48
+ def __init__(self,
49
+ name,
50
+ cmd,
51
+ group,
52
+ requirements=None,
53
+ eval_requirements=None,
54
+ eval_dataset=None,
55
+ args=None,
56
+ input_args=None,
57
+ **kwargs):
58
+ self.name = name
59
+ self.cmd = cmd
60
+ self.group = group
61
+ self.requirements = requirements or {}
62
+ self.args = args or {}
63
+ self.record = {}
64
+ self.env = {}
65
+ self.runtime = {}
66
+ self.input_args = input_args
67
+ self.eval_requirements = eval_requirements or {}
68
+ self.eval_dataset = eval_dataset or []
69
+ if self.cmd == 'eval':
70
+ self.do_eval = True
71
+
72
+ def load(self, _json):
73
+ self.name = _json['name']
74
+ self.cmd = _json['cmd']
75
+ self.requirements = _json['requirements']
76
+ self.args = _json['args']
77
+ self.record = _json['record']
78
+ self.env = _json['env']
79
+ self.create_time = _json['create_time']
80
+
81
+ @property
82
+ def priority(self):
83
+ return self.requirements.get('gpu', 0)
84
+
85
+ def to_dict(self):
86
+ _dict = asdict(self)
87
+ _dict.pop('runtime')
88
+ _dict.pop('input_args')
89
+ return _dict
90
+
91
+
92
+ class ExpManager:
93
+
94
+ RESULT_FILE = 'result.jsonl'
95
+
96
+ def __init__(self):
97
+ self.exps = []
98
+
99
+ def assert_gpu_not_overlap(self):
100
+ all_gpus = set()
101
+ for exp in self.exps:
102
+ gpus = exp.runtime['env']['CUDA_VISIBLE_DEVICES'].split(',')
103
+ if all_gpus & set(gpus):
104
+ raise ValueError(f'GPU overlap: {self.exps}!')
105
+ all_gpus.update(gpus)
106
+
107
+ def run(self, exp: Experiment):
108
+ if os.path.exists(os.path.join(exp.input_args.save_dir, exp.name + '.json')):
109
+ with open(os.path.join(exp.input_args.save_dir, exp.name + '.json'), 'r', encoding='utf-8') as f:
110
+ _json = json.load(f)
111
+ if exp.eval_dataset and 'eval_result' not in _json['record']:
112
+ if not exp.do_eval:
113
+ logger.info(f'Experiment {exp.name} need eval, load from file.')
114
+ exp.load(_json)
115
+ exp.do_eval = True
116
+ else:
117
+ logger.warn(f'Experiment {exp.name} already done, skip')
118
+ return
119
+
120
+ if exp.do_eval:
121
+ runtime = self._build_eval_cmd(exp)
122
+ exp.runtime = runtime
123
+ envs = deepcopy(runtime.get('env', {}))
124
+ envs.update(os.environ)
125
+ logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
126
+ os.makedirs('exp', exist_ok=True)
127
+ log_file = os.path.join('exp', f'{exp.name}.eval.log')
128
+ exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
129
+ self.exps.append(exp)
130
+ self.assert_gpu_not_overlap()
131
+ return
132
+
133
+ if any([exp.name == e.name for e in self.exps]):
134
+ raise ValueError(f'Why exp name duplicate? {exp.name}')
135
+ elif exp.cmd == 'export' and any([exp.cmd == 'export' for exp in self.exps]): # noqa
136
+ raise AssertionError('Cannot run parallel export task.')
137
+ else:
138
+ exp.create_time = time.time()
139
+ runtime = self._build_cmd(exp)
140
+ exp.runtime = runtime
141
+ envs = deepcopy(runtime.get('env', {}))
142
+ envs.update(os.environ)
143
+ logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
144
+ os.makedirs('exp', exist_ok=True)
145
+ log_file = os.path.join('exp', f'{exp.name}.{exp.cmd}.log')
146
+ exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
147
+ self.exps.append(exp)
148
+ self.assert_gpu_not_overlap()
149
+
150
+ def _build_eval_cmd(self, exp: Experiment):
151
+ gpu = exp.eval_requirements.get('gpu', None)
152
+ env = {}
153
+ allocated = []
154
+ if gpu:
155
+ allocated = self._find_free_gpu(int(gpu))
156
+ assert allocated, 'No free gpu for now!'
157
+ allocated = [str(gpu) for gpu in allocated]
158
+ env['CUDA_VISIBLE_DEVICES'] = ','.join(allocated)
159
+
160
+ best_model_checkpoint = exp.record.get('best_model_checkpoint')
161
+ eval_dataset = exp.eval_dataset
162
+ if best_model_checkpoint is not None:
163
+ if not os.path.exists(os.path.join(best_model_checkpoint, 'args.json')):
164
+ cmd = f'swift eval --ckpt_dir {best_model_checkpoint} ' \
165
+ + f'--infer_backend pt --train_type full --eval_dataset {" ".join(eval_dataset)}'
166
+ else:
167
+ cmd = f'swift eval --model {exp.args.get("model")} --infer_backend pt ' \
168
+ f'--eval_dataset {" ".join(eval_dataset)}'
169
+
170
+ return {
171
+ 'running_cmd': cmd,
172
+ 'gpu': allocated,
173
+ 'env': env,
174
+ }
175
+
176
+ def _build_cmd(self, exp: Experiment):
177
+ gpu = exp.requirements.get('gpu', None)
178
+ env = {}
179
+ allocated = []
180
+ if gpu:
181
+ allocated = self._find_free_gpu(int(gpu))
182
+ assert allocated, 'No free gpu for now!'
183
+ allocated = [str(gpu) for gpu in allocated]
184
+ env['CUDA_VISIBLE_DEVICES'] = ','.join(allocated)
185
+ if int(exp.requirements.get('ddp', 1)) > 1:
186
+ env['NPROC_PER_NODE'] = exp.requirements.get('ddp')
187
+ env['MASTER_PORT'] = str(find_free_port())
188
+
189
+ if exp.cmd == 'sft':
190
+ from swift.llm import TrainArguments
191
+ args = exp.args
192
+ sft_args = TrainArguments(**args)
193
+ args['output_dir'] = sft_args.output_dir
194
+ args['logging_dir'] = sft_args.logging_dir
195
+ args['add_version'] = False
196
+ os.makedirs(sft_args.output_dir, exist_ok=True)
197
+ os.makedirs(sft_args.logging_dir, exist_ok=True)
198
+ cmd = 'swift sft '
199
+ for key, value in args.items():
200
+ cmd += f' --{key} {value}'
201
+ elif exp.cmd == 'rlhf':
202
+ from swift.llm import RLHFArguments
203
+ args = exp.args
204
+ rlhf_args = RLHFArguments(**args)
205
+ args['output_dir'] = rlhf_args.output_dir
206
+ args['logging_dir'] = rlhf_args.logging_dir
207
+ args['add_version'] = False
208
+ os.makedirs(rlhf_args.output_dir, exist_ok=True)
209
+ os.makedirs(rlhf_args.logging_dir, exist_ok=True)
210
+ cmd = 'swift rlhf '
211
+ for key, value in args.items():
212
+ cmd += f' --{key} {value}'
213
+ elif exp.cmd == 'export':
214
+ args = exp.args
215
+ cmd = 'swift export '
216
+ for key, value in args.items():
217
+ cmd += f' --{key} {value}'
218
+ else:
219
+ raise ValueError(f'Unsupported cmd type: {exp.cmd}')
220
+ return {
221
+ 'running_cmd': cmd,
222
+ 'gpu': allocated,
223
+ 'env': env,
224
+ 'logging_dir': args.get('logging_dir'),
225
+ 'output_dir': args.get('output_dir', args.get('ckpt_dir'))
226
+ }
227
+
228
+ def _find_free_gpu(self, n):
229
+ all_gpus = set()
230
+ for exp in self.exps:
231
+ all_gpus.update(exp.runtime.get('gpu', set()))
232
+ all_gpus = {int(g) for g in all_gpus}
233
+ free_gpu = set(range(get_device_count())) - all_gpus
234
+ if len(free_gpu) < n:
235
+ return None
236
+ return list(free_gpu)[:n]
237
+
238
+ def prepare_experiments(self, args: Any):
239
+ experiments = []
240
+ for config_file in args.config:
241
+ with open(config_file, 'r', encoding='utf-8') as f:
242
+ group = os.path.basename(config_file)
243
+ group = group[:-5]
244
+ content = json.load(f)
245
+ exps = content['experiment']
246
+ for exp in exps:
247
+ main_cfg = deepcopy(content)
248
+ name = exp['name']
249
+ cmd = main_cfg['cmd']
250
+ run_args = main_cfg['args']
251
+ env = main_cfg.get('env', {})
252
+ requirements = main_cfg.get('requirements', {})
253
+ eval_requirements = main_cfg.get('eval_requirements', {})
254
+ eval_dataset = main_cfg.get('eval_dataset', {})
255
+ if 'args' in exp:
256
+ run_args.update(exp['args'])
257
+ if 'requirements' in exp:
258
+ requirements.update(exp['requirements'])
259
+ if 'env' in exp:
260
+ env.update(exp['env'])
261
+ experiments.append(
262
+ Experiment(
263
+ group=group,
264
+ name=name,
265
+ cmd=cmd,
266
+ args=run_args,
267
+ env=env,
268
+ requirements=requirements,
269
+ eval_requirements=eval_requirements,
270
+ eval_dataset=eval_dataset,
271
+ input_args=args))
272
+ return experiments
273
+
274
+ @staticmethod
275
+ def _get_metric(exp: Experiment):
276
+ if exp.do_eval:
277
+ if os.path.isfile(os.path.join('exp', f'{exp.name}.eval.log')):
278
+ with open(os.path.join('exp', f'{exp.name}.eval.log'), 'r', encoding='utf-8') as f:
279
+ for line in f.readlines():
280
+ if 'Final report:' in line:
281
+ return json.loads(line.split('Final report:')[1].replace('\'', '"'))
282
+ elif exp.cmd == 'export':
283
+ exp_args = ExportArguments(**exp.args)
284
+ if exp_args.quant_bits > 0:
285
+ if exp_args.ckpt_dir is None:
286
+ path = f'{exp_args.model_type}-{exp_args.quant_method}-int{exp_args.quant_bits}'
287
+ else:
288
+ ckpt_dir, ckpt_name = os.path.split(exp_args.ckpt_dir)
289
+ path = os.path.join(ckpt_dir, f'{ckpt_name}-{exp_args.quant_method}-int{exp_args.quant_bits}')
290
+ else:
291
+ ckpt_dir, ckpt_name = os.path.split(exp_args.ckpt_dir)
292
+ path = os.path.join(ckpt_dir, f'{ckpt_name}-merged')
293
+ if os.path.exists(path):
294
+ shutil.rmtree(exp.name, ignore_errors=True)
295
+ os.makedirs(exp.name, exist_ok=True)
296
+ shutil.move(path, os.path.join(exp.name, path))
297
+ return {
298
+ 'best_model_checkpoint': os.path.join(exp.name, path),
299
+ }
300
+ else:
301
+ logging_dir = exp.runtime.get('logging_dir')
302
+ logging_file = os.path.join(logging_dir, '..', 'logging.jsonl')
303
+ if os.path.isfile(logging_file):
304
+ with open(logging_file, 'r', encoding='utf-8') as f:
305
+ for line in f.readlines():
306
+ if 'model_info' in line:
307
+ return json.loads(line)
308
+ return None
309
+
310
+ @staticmethod
311
+ def write_record(exp: Experiment):
312
+ target_dir = exp.input_args.save_dir
313
+ file = os.path.join(target_dir, exp.name + '.json')
314
+ with open(file, 'w', encoding='utf-8') as f:
315
+ f.write(json.dumps(exp.to_dict()) + '\n')
316
+
317
+ def _poll(self):
318
+ while True:
319
+ time.sleep(5)
320
+
321
+ has_finished = False
322
+ for exp in self.exps:
323
+ rt = exp.handler.poll()
324
+ if rt is None:
325
+ continue
326
+
327
+ has_finished = True
328
+ if rt == 0:
329
+ if not exp.do_eval:
330
+ all_metric = self._get_metric(exp)
331
+ if all_metric:
332
+ exp.record.update(all_metric)
333
+ if exp.eval_dataset:
334
+ exp.do_eval = True
335
+ self.exp_queue.appendleft(exp)
336
+ self.write_record(exp)
337
+ else:
338
+ logger.error(f'Running {exp.name} task, but no result found')
339
+ else:
340
+ all_metric = self._get_metric(exp)
341
+ exp.record['eval_result'] = all_metric
342
+ if all_metric:
343
+ self.write_record(exp)
344
+ else:
345
+ logger.error(f'Running {exp.name} eval task, but no eval result found')
346
+ logger.info(f'Running {exp.name} finished with return code: {rt}')
347
+
348
+ if has_finished:
349
+ self.exps = [exp for exp in self.exps if exp.handler.poll() is None]
350
+ break
351
+
352
+ def begin(self, args: Any):
353
+ exps = self.prepare_experiments(args)
354
+ logger.info(f'all exps: {exps}')
355
+ exps.sort(key=lambda e: e.priority)
356
+ self.exp_queue = deque()
357
+ for exp in exps:
358
+ self.exp_queue.append(exp)
359
+
360
+ while len(self.exp_queue) or len(self.exps) > 0:
361
+ while len(self.exp_queue):
362
+ try:
363
+ logger.info(f'Running exp: {self.exp_queue[0].name}')
364
+ self.run(self.exp_queue[0])
365
+ except Exception as e:
366
+ if not isinstance(e, AssertionError):
367
+ logger.error(f'Adding exp {self.exp_queue[0].name} error because of:')
368
+ logger.error(e)
369
+ self.exp_queue.popleft()
370
+ else:
371
+ logger.info(f'Adding exp {self.exp_queue[0].name} error because of:', str(e))
372
+ if 'no free gpu' in str(e).lower():
373
+ break
374
+ else:
375
+ continue
376
+ else:
377
+ self.exp_queue.popleft()
378
+ self._poll()
379
+ logger.info(f'Run task finished because of exp queue: {self.exp_queue} and exps: {self.exps}')
380
+
381
+
382
+ def find_all_config(dir_or_file: str):
383
+ if os.path.isfile(dir_or_file):
384
+ return [dir_or_file]
385
+ else:
386
+ configs = []
387
+ for dirpath, dirnames, filenames in os.walk(dir_or_file):
388
+ for name in filenames:
389
+ if name.endswith('.json') and 'ipynb' not in dirpath:
390
+ configs.append(os.path.join(dirpath, name))
391
+ return configs
ms-swift/scripts/utils/plot_loss.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from swift.utils import plot_images
4
+
5
+ ckpt_dir = 'output/xxx/vx-xxx'
6
+ if __name__ == '__main__':
7
+ images_dir = os.path.join(ckpt_dir, 'images')
8
+ tb_dir = os.path.join(ckpt_dir, 'runs')
9
+ plot_images(images_dir, tb_dir, ['train/loss'], 0.9)
ms-swift/silence_overlaps/700/.ipynb_checkpoints/silence_isoverlap_train-checkpoint.json ADDED
@@ -0,0 +1,1152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1101857.wav",
4
+ "key": "SODA_PROCESSED--train--1101857",
5
+ "model_output": "No significant overlaps found."
6
+ },
7
+ {
8
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--508884.wav",
9
+ "key": "SODA_PROCESSED--train--508884",
10
+ "model_output": "No significant overlaps found."
11
+ },
12
+ {
13
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1113674.wav",
14
+ "key": "SODA_PROCESSED--train--1113674",
15
+ "model_output": "No significant overlaps found."
16
+ },
17
+ {
18
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--158293.wav",
19
+ "key": "SODA_PROCESSED--train--158293",
20
+ "model_output": "No significant overlaps found."
21
+ },
22
+ {
23
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--631363.wav",
24
+ "key": "SODA_PROCESSED--train--631363",
25
+ "model_output": "No significant overlaps found."
26
+ },
27
+ {
28
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277322.wav",
29
+ "key": "SODA_PROCESSED--train--277322",
30
+ "model_output": "No significant overlaps found."
31
+ },
32
+ {
33
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1131940.wav",
34
+ "key": "SODA_PROCESSED--train--1131940",
35
+ "model_output": "No significant overlaps found."
36
+ },
37
+ {
38
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1108753.wav",
39
+ "key": "SODA_PROCESSED--train--1108753",
40
+ "model_output": "No significant overlaps found."
41
+ },
42
+ {
43
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--27924.wav",
44
+ "key": "SODA_PROCESSED--train--27924",
45
+ "model_output": "No significant overlaps found."
46
+ },
47
+ {
48
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--374749.wav",
49
+ "key": "SODA_PROCESSED--train--374749",
50
+ "model_output": "No significant overlaps found."
51
+ },
52
+ {
53
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--821468.wav",
54
+ "key": "SODA_PROCESSED--train--821468",
55
+ "model_output": "No significant overlaps found."
56
+ },
57
+ {
58
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--416516.wav",
59
+ "key": "SODA_PROCESSED--train--416516",
60
+ "model_output": "No significant overlaps found."
61
+ },
62
+ {
63
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1029082.wav",
64
+ "key": "SODA_PROCESSED--train--1029082",
65
+ "model_output": "No significant overlaps found."
66
+ },
67
+ {
68
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--992151.wav",
69
+ "key": "SODA_PROCESSED--train--992151",
70
+ "model_output": "No significant overlaps found."
71
+ },
72
+ {
73
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--744708.wav",
74
+ "key": "SODA_PROCESSED--train--744708",
75
+ "model_output": "No significant overlaps found."
76
+ },
77
+ {
78
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--11862.wav",
79
+ "key": "SODA_PROCESSED--train--11862",
80
+ "model_output": "No significant overlaps found."
81
+ },
82
+ {
83
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--419304.wav",
84
+ "key": "SODA_PROCESSED--train--419304",
85
+ "model_output": "No significant overlaps found."
86
+ },
87
+ {
88
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--98673.wav",
89
+ "key": "SODA_PROCESSED--train--98673",
90
+ "model_output": "No significant overlaps found."
91
+ },
92
+ {
93
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--22719.wav",
94
+ "key": "SODA_PROCESSED--train--22719",
95
+ "model_output": "No significant overlaps found."
96
+ },
97
+ {
98
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1028263.wav",
99
+ "key": "SODA_PROCESSED--train--1028263",
100
+ "model_output": "No significant overlaps found."
101
+ },
102
+ {
103
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--848051.wav",
104
+ "key": "SODA_PROCESSED--train--848051",
105
+ "model_output": "No significant overlaps found."
106
+ },
107
+ {
108
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--511668.wav",
109
+ "key": "SODA_PROCESSED--train--511668",
110
+ "model_output": "No significant overlaps found."
111
+ },
112
+ {
113
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12047.wav",
114
+ "key": "SODA_PROCESSED--train--12047",
115
+ "model_output": "No significant overlaps found."
116
+ },
117
+ {
118
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--153751.wav",
119
+ "key": "SODA_PROCESSED--train--153751",
120
+ "model_output": "No significant overlaps found."
121
+ },
122
+ {
123
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795559.wav",
124
+ "key": "SODA_PROCESSED--train--795559",
125
+ "model_output": "No significant overlaps found."
126
+ },
127
+ {
128
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--387024.wav",
129
+ "key": "SODA_PROCESSED--train--387024",
130
+ "model_output": "No significant overlaps found."
131
+ },
132
+ {
133
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1168213.wav",
134
+ "key": "SODA_PROCESSED--train--1168213",
135
+ "model_output": "No significant overlaps found."
136
+ },
137
+ {
138
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1123711.wav",
139
+ "key": "SODA_PROCESSED--train--1123711",
140
+ "model_output": "No significant overlaps found."
141
+ },
142
+ {
143
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--819618.wav",
144
+ "key": "SODA_PROCESSED--train--819618",
145
+ "model_output": "No significant overlaps found."
146
+ },
147
+ {
148
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--752118.wav",
149
+ "key": "SODA_PROCESSED--train--752118",
150
+ "model_output": "No significant overlaps found."
151
+ },
152
+ {
153
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--581770.wav",
154
+ "key": "SODA_PROCESSED--train--581770",
155
+ "model_output": "No significant overlaps found."
156
+ },
157
+ {
158
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--276032.wav",
159
+ "key": "SODA_PROCESSED--train--276032",
160
+ "model_output": "No significant overlaps found."
161
+ },
162
+ {
163
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556475.wav",
164
+ "key": "SODA_PROCESSED--train--556475",
165
+ "model_output": "No significant overlaps found."
166
+ },
167
+ {
168
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674667.wav",
169
+ "key": "SODA_PROCESSED--train--674667",
170
+ "model_output": "No significant overlaps found."
171
+ },
172
+ {
173
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--501206.wav",
174
+ "key": "SODA_PROCESSED--train--501206",
175
+ "model_output": "No significant overlaps found."
176
+ },
177
+ {
178
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--905725.wav",
179
+ "key": "SODA_PROCESSED--train--905725",
180
+ "model_output": "No significant overlaps found."
181
+ },
182
+ {
183
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--265829.wav",
184
+ "key": "SODA_PROCESSED--train--265829",
185
+ "model_output": "No significant overlaps found."
186
+ },
187
+ {
188
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--207527.wav",
189
+ "key": "SODA_PROCESSED--train--207527",
190
+ "model_output": "No significant overlaps found."
191
+ },
192
+ {
193
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--985415.wav",
194
+ "key": "SODA_PROCESSED--train--985415",
195
+ "model_output": "No significant overlaps found."
196
+ },
197
+ {
198
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--115102.wav",
199
+ "key": "SODA_PROCESSED--train--115102",
200
+ "model_output": "No significant overlaps found."
201
+ },
202
+ {
203
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--8820.wav",
204
+ "key": "SODA_PROCESSED--train--8820",
205
+ "model_output": "No significant overlaps found."
206
+ },
207
+ {
208
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--453454.wav",
209
+ "key": "SODA_PROCESSED--train--453454",
210
+ "model_output": "No significant overlaps found."
211
+ },
212
+ {
213
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--375003.wav",
214
+ "key": "SODA_PROCESSED--train--375003",
215
+ "model_output": "No significant overlaps found."
216
+ },
217
+ {
218
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--757426.wav",
219
+ "key": "SODA_PROCESSED--train--757426",
220
+ "model_output": "No significant overlaps found."
221
+ },
222
+ {
223
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--202914.wav",
224
+ "key": "SODA_PROCESSED--train--202914",
225
+ "model_output": "No significant overlaps found."
226
+ },
227
+ {
228
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007416.wav",
229
+ "key": "SODA_PROCESSED--train--1007416",
230
+ "model_output": "No significant overlaps found."
231
+ },
232
+ {
233
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193891.wav",
234
+ "key": "SODA_PROCESSED--train--193891",
235
+ "model_output": "No significant overlaps found."
236
+ },
237
+ {
238
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--96343.wav",
239
+ "key": "SODA_PROCESSED--train--96343",
240
+ "model_output": "No significant overlaps found."
241
+ },
242
+ {
243
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1031234.wav",
244
+ "key": "SODA_PROCESSED--train--1031234",
245
+ "model_output": "No significant overlaps found."
246
+ },
247
+ {
248
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--798455.wav",
249
+ "key": "SODA_PROCESSED--train--798455",
250
+ "model_output": "No significant overlaps found."
251
+ },
252
+ {
253
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--438636.wav",
254
+ "key": "SODA_PROCESSED--train--438636",
255
+ "model_output": "No significant overlaps found."
256
+ },
257
+ {
258
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--776766.wav",
259
+ "key": "SODA_PROCESSED--train--776766",
260
+ "model_output": "No significant overlaps found."
261
+ },
262
+ {
263
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--691830.wav",
264
+ "key": "SODA_PROCESSED--train--691830",
265
+ "model_output": "No significant overlaps found."
266
+ },
267
+ {
268
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--524306.wav",
269
+ "key": "SODA_PROCESSED--train--524306",
270
+ "model_output": "No significant overlaps found."
271
+ },
272
+ {
273
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--471264.wav",
274
+ "key": "SODA_PROCESSED--train--471264",
275
+ "model_output": "No significant overlaps found."
276
+ },
277
+ {
278
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421778.wav",
279
+ "key": "SODA_PROCESSED--train--421778",
280
+ "model_output": "No significant overlaps found."
281
+ },
282
+ {
283
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--541347.wav",
284
+ "key": "SODA_PROCESSED--train--541347",
285
+ "model_output": "No significant overlaps found."
286
+ },
287
+ {
288
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1084325.wav",
289
+ "key": "SODA_PROCESSED--train--1084325",
290
+ "model_output": "No significant overlaps found."
291
+ },
292
+ {
293
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29039.wav",
294
+ "key": "SODA_PROCESSED--train--29039",
295
+ "model_output": "No significant overlaps found."
296
+ },
297
+ {
298
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1182464.wav",
299
+ "key": "SODA_PROCESSED--train--1182464",
300
+ "model_output": "No significant overlaps found."
301
+ },
302
+ {
303
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--735517.wav",
304
+ "key": "SODA_PROCESSED--train--735517",
305
+ "model_output": "No significant overlaps found."
306
+ },
307
+ {
308
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--417260.wav",
309
+ "key": "SODA_PROCESSED--train--417260",
310
+ "model_output": "No significant overlaps found."
311
+ },
312
+ {
313
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--784738.wav",
314
+ "key": "SODA_PROCESSED--train--784738",
315
+ "model_output": "No significant overlaps found."
316
+ },
317
+ {
318
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303363.wav",
319
+ "key": "SODA_PROCESSED--train--303363",
320
+ "model_output": "No significant overlaps found."
321
+ },
322
+ {
323
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795181.wav",
324
+ "key": "SODA_PROCESSED--train--795181",
325
+ "model_output": "No significant overlaps found."
326
+ },
327
+ {
328
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--33760.wav",
329
+ "key": "SODA_PROCESSED--train--33760",
330
+ "model_output": "No significant overlaps found."
331
+ },
332
+ {
333
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--126878.wav",
334
+ "key": "SODA_PROCESSED--train--126878",
335
+ "model_output": "No significant overlaps found."
336
+ },
337
+ {
338
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--317167.wav",
339
+ "key": "SODA_PROCESSED--train--317167",
340
+ "model_output": "No significant overlaps found."
341
+ },
342
+ {
343
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463322.wav",
344
+ "key": "SODA_PROCESSED--train--463322",
345
+ "model_output": "No significant overlaps found."
346
+ },
347
+ {
348
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--51285.wav",
349
+ "key": "SODA_PROCESSED--train--51285",
350
+ "model_output": "No significant overlaps found."
351
+ },
352
+ {
353
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1081079.wav",
354
+ "key": "SODA_PROCESSED--train--1081079",
355
+ "model_output": "No significant overlaps found."
356
+ },
357
+ {
358
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--58199.wav",
359
+ "key": "SODA_PROCESSED--train--58199",
360
+ "model_output": "No significant overlaps found."
361
+ },
362
+ {
363
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1017701.wav",
364
+ "key": "SODA_PROCESSED--train--1017701",
365
+ "model_output": "No significant overlaps found."
366
+ },
367
+ {
368
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--762267.wav",
369
+ "key": "SODA_PROCESSED--train--762267",
370
+ "model_output": "No significant overlaps found."
371
+ },
372
+ {
373
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4948.wav",
374
+ "key": "SODA_PROCESSED--train--4948",
375
+ "model_output": "No significant overlaps found."
376
+ },
377
+ {
378
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737676.wav",
379
+ "key": "SODA_PROCESSED--train--737676",
380
+ "model_output": "No significant overlaps found."
381
+ },
382
+ {
383
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--606362.wav",
384
+ "key": "SODA_PROCESSED--train--606362",
385
+ "model_output": "No significant overlaps found."
386
+ },
387
+ {
388
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674832.wav",
389
+ "key": "SODA_PROCESSED--train--674832",
390
+ "model_output": "No significant overlaps found."
391
+ },
392
+ {
393
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--588465.wav",
394
+ "key": "SODA_PROCESSED--train--588465",
395
+ "model_output": "No significant overlaps found."
396
+ },
397
+ {
398
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--386163.wav",
399
+ "key": "SODA_PROCESSED--train--386163",
400
+ "model_output": "No significant overlaps found."
401
+ },
402
+ {
403
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421624.wav",
404
+ "key": "SODA_PROCESSED--train--421624",
405
+ "model_output": "No significant overlaps found."
406
+ },
407
+ {
408
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977126.wav",
409
+ "key": "SODA_PROCESSED--train--977126",
410
+ "model_output": "No significant overlaps found."
411
+ },
412
+ {
413
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--932676.wav",
414
+ "key": "SODA_PROCESSED--train--932676",
415
+ "model_output": "No significant overlaps found."
416
+ },
417
+ {
418
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--315768.wav",
419
+ "key": "SODA_PROCESSED--train--315768",
420
+ "model_output": "No significant overlaps found."
421
+ },
422
+ {
423
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--939669.wav",
424
+ "key": "SODA_PROCESSED--train--939669",
425
+ "model_output": "No significant overlaps found."
426
+ },
427
+ {
428
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1174912.wav",
429
+ "key": "SODA_PROCESSED--train--1174912",
430
+ "model_output": "No significant overlaps found."
431
+ },
432
+ {
433
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1023331.wav",
434
+ "key": "SODA_PROCESSED--train--1023331",
435
+ "model_output": "No significant overlaps found."
436
+ },
437
+ {
438
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--144310.wav",
439
+ "key": "SODA_PROCESSED--train--144310",
440
+ "model_output": "No significant overlaps found."
441
+ },
442
+ {
443
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1011922.wav",
444
+ "key": "SODA_PROCESSED--train--1011922",
445
+ "model_output": "No significant overlaps found."
446
+ },
447
+ {
448
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--713730.wav",
449
+ "key": "SODA_PROCESSED--train--713730",
450
+ "model_output": "No significant overlaps found."
451
+ },
452
+ {
453
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708040.wav",
454
+ "key": "SODA_PROCESSED--train--708040",
455
+ "model_output": "No significant overlaps found."
456
+ },
457
+ {
458
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--860576.wav",
459
+ "key": "SODA_PROCESSED--train--860576",
460
+ "model_output": "No significant overlaps found."
461
+ },
462
+ {
463
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1001007.wav",
464
+ "key": "SODA_PROCESSED--train--1001007",
465
+ "model_output": "No significant overlaps found."
466
+ },
467
+ {
468
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1166623.wav",
469
+ "key": "SODA_PROCESSED--train--1166623",
470
+ "model_output": "No significant overlaps found."
471
+ },
472
+ {
473
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--372789.wav",
474
+ "key": "SODA_PROCESSED--train--372789",
475
+ "model_output": "No significant overlaps found."
476
+ },
477
+ {
478
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468603.wav",
479
+ "key": "SODA_PROCESSED--train--468603",
480
+ "model_output": "No significant overlaps found."
481
+ },
482
+ {
483
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--233562.wav",
484
+ "key": "SODA_PROCESSED--train--233562",
485
+ "model_output": "No significant overlaps found."
486
+ },
487
+ {
488
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--476626.wav",
489
+ "key": "SODA_PROCESSED--train--476626",
490
+ "model_output": "No significant overlaps found."
491
+ },
492
+ {
493
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--49462.wav",
494
+ "key": "SODA_PROCESSED--train--49462",
495
+ "model_output": "No significant overlaps found."
496
+ },
497
+ {
498
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303336.wav",
499
+ "key": "SODA_PROCESSED--train--303336",
500
+ "model_output": "No significant overlaps found."
501
+ },
502
+ {
503
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--221358.wav",
504
+ "key": "SODA_PROCESSED--train--221358",
505
+ "model_output": "No significant overlaps found."
506
+ },
507
+ {
508
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--843615.wav",
509
+ "key": "SODA_PROCESSED--train--843615",
510
+ "model_output": "No significant overlaps found."
511
+ },
512
+ {
513
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--873625.wav",
514
+ "key": "SODA_PROCESSED--train--873625",
515
+ "model_output": "No significant overlaps found."
516
+ },
517
+ {
518
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4814.wav",
519
+ "key": "SODA_PROCESSED--train--4814",
520
+ "model_output": "No significant overlaps found."
521
+ },
522
+ {
523
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--280675.wav",
524
+ "key": "SODA_PROCESSED--train--280675",
525
+ "model_output": "No significant overlaps found."
526
+ },
527
+ {
528
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1132437.wav",
529
+ "key": "SODA_PROCESSED--train--1132437",
530
+ "model_output": "No significant overlaps found."
531
+ },
532
+ {
533
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--650705.wav",
534
+ "key": "SODA_PROCESSED--train--650705",
535
+ "model_output": "No significant overlaps found."
536
+ },
537
+ {
538
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186756.wav",
539
+ "key": "SODA_PROCESSED--train--1186756",
540
+ "model_output": "No significant overlaps found."
541
+ },
542
+ {
543
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--108309.wav",
544
+ "key": "SODA_PROCESSED--train--108309",
545
+ "model_output": "No significant overlaps found."
546
+ },
547
+ {
548
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--82238.wav",
549
+ "key": "SODA_PROCESSED--train--82238",
550
+ "model_output": "No significant overlaps found."
551
+ },
552
+ {
553
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--616846.wav",
554
+ "key": "SODA_PROCESSED--train--616846",
555
+ "model_output": "No significant overlaps found."
556
+ },
557
+ {
558
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--61606.wav",
559
+ "key": "SODA_PROCESSED--train--61606",
560
+ "model_output": "No significant overlaps found."
561
+ },
562
+ {
563
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--370577.wav",
564
+ "key": "SODA_PROCESSED--train--370577",
565
+ "model_output": "No significant overlaps found."
566
+ },
567
+ {
568
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--69581.wav",
569
+ "key": "SODA_PROCESSED--train--69581",
570
+ "model_output": "No significant overlaps found."
571
+ },
572
+ {
573
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--180962.wav",
574
+ "key": "SODA_PROCESSED--train--180962",
575
+ "model_output": "No significant overlaps found."
576
+ },
577
+ {
578
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--578986.wav",
579
+ "key": "SODA_PROCESSED--train--578986",
580
+ "model_output": "No significant overlaps found."
581
+ },
582
+ {
583
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--132857.wav",
584
+ "key": "SODA_PROCESSED--train--132857",
585
+ "model_output": "No significant overlaps found."
586
+ },
587
+ {
588
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--188417.wav",
589
+ "key": "SODA_PROCESSED--train--188417",
590
+ "model_output": "No significant overlaps found."
591
+ },
592
+ {
593
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--771154.wav",
594
+ "key": "SODA_PROCESSED--train--771154",
595
+ "model_output": "No significant overlaps found."
596
+ },
597
+ {
598
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--720445.wav",
599
+ "key": "SODA_PROCESSED--train--720445",
600
+ "model_output": "No significant overlaps found."
601
+ },
602
+ {
603
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--514225.wav",
604
+ "key": "SODA_PROCESSED--train--514225",
605
+ "model_output": "No significant overlaps found."
606
+ },
607
+ {
608
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--815822.wav",
609
+ "key": "SODA_PROCESSED--train--815822",
610
+ "model_output": "No significant overlaps found."
611
+ },
612
+ {
613
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--761001.wav",
614
+ "key": "SODA_PROCESSED--train--761001",
615
+ "model_output": "No significant overlaps found."
616
+ },
617
+ {
618
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1061857.wav",
619
+ "key": "SODA_PROCESSED--train--1061857",
620
+ "model_output": "No significant overlaps found."
621
+ },
622
+ {
623
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--475793.wav",
624
+ "key": "SODA_PROCESSED--train--475793",
625
+ "model_output": "No significant overlaps found."
626
+ },
627
+ {
628
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--406352.wav",
629
+ "key": "SODA_PROCESSED--train--406352",
630
+ "model_output": "No significant overlaps found."
631
+ },
632
+ {
633
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--486716.wav",
634
+ "key": "SODA_PROCESSED--train--486716",
635
+ "model_output": "No significant overlaps found."
636
+ },
637
+ {
638
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468879.wav",
639
+ "key": "SODA_PROCESSED--train--468879",
640
+ "model_output": "No significant overlaps found."
641
+ },
642
+ {
643
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--338832.wav",
644
+ "key": "SODA_PROCESSED--train--338832",
645
+ "model_output": "No significant overlaps found."
646
+ },
647
+ {
648
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--845126.wav",
649
+ "key": "SODA_PROCESSED--train--845126",
650
+ "model_output": "No significant overlaps found."
651
+ },
652
+ {
653
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--490986.wav",
654
+ "key": "SODA_PROCESSED--train--490986",
655
+ "model_output": "No significant overlaps found."
656
+ },
657
+ {
658
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1128813.wav",
659
+ "key": "SODA_PROCESSED--train--1128813",
660
+ "model_output": "No significant overlaps found."
661
+ },
662
+ {
663
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193134.wav",
664
+ "key": "SODA_PROCESSED--train--193134",
665
+ "model_output": "No significant overlaps found."
666
+ },
667
+ {
668
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--258235.wav",
669
+ "key": "SODA_PROCESSED--train--258235",
670
+ "model_output": "No significant overlaps found."
671
+ },
672
+ {
673
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--895260.wav",
674
+ "key": "SODA_PROCESSED--train--895260",
675
+ "model_output": "No significant overlaps found."
676
+ },
677
+ {
678
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--119322.wav",
679
+ "key": "SODA_PROCESSED--train--119322",
680
+ "model_output": "No significant overlaps found."
681
+ },
682
+ {
683
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--413405.wav",
684
+ "key": "SODA_PROCESSED--train--413405",
685
+ "model_output": "No significant overlaps found."
686
+ },
687
+ {
688
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--346041.wav",
689
+ "key": "SODA_PROCESSED--train--346041",
690
+ "model_output": "No significant overlaps found."
691
+ },
692
+ {
693
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--718092.wav",
694
+ "key": "SODA_PROCESSED--train--718092",
695
+ "model_output": "No significant overlaps found."
696
+ },
697
+ {
698
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--218634.wav",
699
+ "key": "SODA_PROCESSED--train--218634",
700
+ "model_output": "No significant overlaps found."
701
+ },
702
+ {
703
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--835488.wav",
704
+ "key": "SODA_PROCESSED--train--835488",
705
+ "model_output": "No significant overlaps found."
706
+ },
707
+ {
708
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--113543.wav",
709
+ "key": "SODA_PROCESSED--train--113543",
710
+ "model_output": "No significant overlaps found."
711
+ },
712
+ {
713
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--869455.wav",
714
+ "key": "SODA_PROCESSED--train--869455",
715
+ "model_output": "No significant overlaps found."
716
+ },
717
+ {
718
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--330048.wav",
719
+ "key": "SODA_PROCESSED--train--330048",
720
+ "model_output": "No significant overlaps found."
721
+ },
722
+ {
723
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--766234.wav",
724
+ "key": "SODA_PROCESSED--train--766234",
725
+ "model_output": "No significant overlaps found."
726
+ },
727
+ {
728
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--535368.wav",
729
+ "key": "SODA_PROCESSED--train--535368",
730
+ "model_output": "No significant overlaps found."
731
+ },
732
+ {
733
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--908444.wav",
734
+ "key": "SODA_PROCESSED--train--908444",
735
+ "model_output": "No significant overlaps found."
736
+ },
737
+ {
738
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--748910.wav",
739
+ "key": "SODA_PROCESSED--train--748910",
740
+ "model_output": "No significant overlaps found."
741
+ },
742
+ {
743
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--525710.wav",
744
+ "key": "SODA_PROCESSED--train--525710",
745
+ "model_output": "No significant overlaps found."
746
+ },
747
+ {
748
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--399572.wav",
749
+ "key": "SODA_PROCESSED--train--399572",
750
+ "model_output": "No significant overlaps found."
751
+ },
752
+ {
753
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737726.wav",
754
+ "key": "SODA_PROCESSED--train--737726",
755
+ "model_output": "No significant overlaps found."
756
+ },
757
+ {
758
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--44625.wav",
759
+ "key": "SODA_PROCESSED--train--44625",
760
+ "model_output": "No significant overlaps found."
761
+ },
762
+ {
763
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1095086.wav",
764
+ "key": "SODA_PROCESSED--train--1095086",
765
+ "model_output": "No significant overlaps found."
766
+ },
767
+ {
768
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--269886.wav",
769
+ "key": "SODA_PROCESSED--train--269886",
770
+ "model_output": "No significant overlaps found."
771
+ },
772
+ {
773
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596068.wav",
774
+ "key": "SODA_PROCESSED--train--596068",
775
+ "model_output": "No significant overlaps found."
776
+ },
777
+ {
778
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--16779.wav",
779
+ "key": "SODA_PROCESSED--train--16779",
780
+ "model_output": "No significant overlaps found."
781
+ },
782
+ {
783
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--532510.wav",
784
+ "key": "SODA_PROCESSED--train--532510",
785
+ "model_output": "No significant overlaps found."
786
+ },
787
+ {
788
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--68508.wav",
789
+ "key": "SODA_PROCESSED--train--68508",
790
+ "model_output": "No significant overlaps found."
791
+ },
792
+ {
793
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--162106.wav",
794
+ "key": "SODA_PROCESSED--train--162106",
795
+ "model_output": "No significant overlaps found."
796
+ },
797
+ {
798
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--831005.wav",
799
+ "key": "SODA_PROCESSED--train--831005",
800
+ "model_output": "No significant overlaps found."
801
+ },
802
+ {
803
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--509788.wav",
804
+ "key": "SODA_PROCESSED--train--509788",
805
+ "model_output": "No significant overlaps found."
806
+ },
807
+ {
808
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--489519.wav",
809
+ "key": "SODA_PROCESSED--train--489519",
810
+ "model_output": "No significant overlaps found."
811
+ },
812
+ {
813
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1020087.wav",
814
+ "key": "SODA_PROCESSED--train--1020087",
815
+ "model_output": "No significant overlaps found."
816
+ },
817
+ {
818
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1050427.wav",
819
+ "key": "SODA_PROCESSED--train--1050427",
820
+ "model_output": "No significant overlaps found."
821
+ },
822
+ {
823
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--842885.wav",
824
+ "key": "SODA_PROCESSED--train--842885",
825
+ "model_output": "No significant overlaps found."
826
+ },
827
+ {
828
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--166191.wav",
829
+ "key": "SODA_PROCESSED--train--166191",
830
+ "model_output": "No significant overlaps found."
831
+ },
832
+ {
833
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--826028.wav",
834
+ "key": "SODA_PROCESSED--train--826028",
835
+ "model_output": "No significant overlaps found."
836
+ },
837
+ {
838
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--715956.wav",
839
+ "key": "SODA_PROCESSED--train--715956",
840
+ "model_output": "No significant overlaps found."
841
+ },
842
+ {
843
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--967872.wav",
844
+ "key": "SODA_PROCESSED--train--967872",
845
+ "model_output": "No significant overlaps found."
846
+ },
847
+ {
848
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277060.wav",
849
+ "key": "SODA_PROCESSED--train--277060",
850
+ "model_output": "No significant overlaps found."
851
+ },
852
+ {
853
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--803822.wav",
854
+ "key": "SODA_PROCESSED--train--803822",
855
+ "model_output": "No significant overlaps found."
856
+ },
857
+ {
858
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--928982.wav",
859
+ "key": "SODA_PROCESSED--train--928982",
860
+ "model_output": "No significant overlaps found."
861
+ },
862
+ {
863
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--371354.wav",
864
+ "key": "SODA_PROCESSED--train--371354",
865
+ "model_output": "No significant overlaps found."
866
+ },
867
+ {
868
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12295.wav",
869
+ "key": "SODA_PROCESSED--train--12295",
870
+ "model_output": "No significant overlaps found."
871
+ },
872
+ {
873
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1030451.wav",
874
+ "key": "SODA_PROCESSED--train--1030451",
875
+ "model_output": "No significant overlaps found."
876
+ },
877
+ {
878
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--168398.wav",
879
+ "key": "SODA_PROCESSED--train--168398",
880
+ "model_output": "No significant overlaps found."
881
+ },
882
+ {
883
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556505.wav",
884
+ "key": "SODA_PROCESSED--train--556505",
885
+ "model_output": "No significant overlaps found."
886
+ },
887
+ {
888
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--123906.wav",
889
+ "key": "SODA_PROCESSED--train--123906",
890
+ "model_output": "No significant overlaps found."
891
+ },
892
+ {
893
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1120331.wav",
894
+ "key": "SODA_PROCESSED--train--1120331",
895
+ "model_output": "No significant overlaps found."
896
+ },
897
+ {
898
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--121129.wav",
899
+ "key": "SODA_PROCESSED--train--121129",
900
+ "model_output": "No significant overlaps found."
901
+ },
902
+ {
903
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--690063.wav",
904
+ "key": "SODA_PROCESSED--train--690063",
905
+ "model_output": "No significant overlaps found."
906
+ },
907
+ {
908
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--334902.wav",
909
+ "key": "SODA_PROCESSED--train--334902",
910
+ "model_output": "No significant overlaps found."
911
+ },
912
+ {
913
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--442672.wav",
914
+ "key": "SODA_PROCESSED--train--442672",
915
+ "model_output": "No significant overlaps found."
916
+ },
917
+ {
918
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--640494.wav",
919
+ "key": "SODA_PROCESSED--train--640494",
920
+ "model_output": "No significant overlaps found."
921
+ },
922
+ {
923
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--171463.wav",
924
+ "key": "SODA_PROCESSED--train--171463",
925
+ "model_output": "No significant overlaps found."
926
+ },
927
+ {
928
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--565809.wav",
929
+ "key": "SODA_PROCESSED--train--565809",
930
+ "model_output": "No significant overlaps found."
931
+ },
932
+ {
933
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--329396.wav",
934
+ "key": "SODA_PROCESSED--train--329396",
935
+ "model_output": "No significant overlaps found."
936
+ },
937
+ {
938
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1090942.wav",
939
+ "key": "SODA_PROCESSED--train--1090942",
940
+ "model_output": "No significant overlaps found."
941
+ },
942
+ {
943
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--980776.wav",
944
+ "key": "SODA_PROCESSED--train--980776",
945
+ "model_output": "No significant overlaps found."
946
+ },
947
+ {
948
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29858.wav",
949
+ "key": "SODA_PROCESSED--train--29858",
950
+ "model_output": "No significant overlaps found."
951
+ },
952
+ {
953
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596349.wav",
954
+ "key": "SODA_PROCESSED--train--596349",
955
+ "model_output": "No significant overlaps found."
956
+ },
957
+ {
958
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--604536.wav",
959
+ "key": "SODA_PROCESSED--train--604536",
960
+ "model_output": "No significant overlaps found."
961
+ },
962
+ {
963
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--500115.wav",
964
+ "key": "SODA_PROCESSED--train--500115",
965
+ "model_output": "No significant overlaps found."
966
+ },
967
+ {
968
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--605295.wav",
969
+ "key": "SODA_PROCESSED--train--605295",
970
+ "model_output": "No significant overlaps found."
971
+ },
972
+ {
973
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--310941.wav",
974
+ "key": "SODA_PROCESSED--train--310941",
975
+ "model_output": "No significant overlaps found."
976
+ },
977
+ {
978
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1051089.wav",
979
+ "key": "SODA_PROCESSED--train--1051089",
980
+ "model_output": "No significant overlaps found."
981
+ },
982
+ {
983
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--187351.wav",
984
+ "key": "SODA_PROCESSED--train--187351",
985
+ "model_output": "No significant overlaps found."
986
+ },
987
+ {
988
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--645254.wav",
989
+ "key": "SODA_PROCESSED--train--645254",
990
+ "model_output": "No significant overlaps found."
991
+ },
992
+ {
993
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1066203.wav",
994
+ "key": "SODA_PROCESSED--train--1066203",
995
+ "model_output": "No significant overlaps found."
996
+ },
997
+ {
998
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--913166.wav",
999
+ "key": "SODA_PROCESSED--train--913166",
1000
+ "model_output": "No significant overlaps found."
1001
+ },
1002
+ {
1003
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--583204.wav",
1004
+ "key": "SODA_PROCESSED--train--583204",
1005
+ "model_output": "No significant overlaps found."
1006
+ },
1007
+ {
1008
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--575640.wav",
1009
+ "key": "SODA_PROCESSED--train--575640",
1010
+ "model_output": "No significant overlaps found."
1011
+ },
1012
+ {
1013
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--109428.wav",
1014
+ "key": "SODA_PROCESSED--train--109428",
1015
+ "model_output": "No significant overlaps found."
1016
+ },
1017
+ {
1018
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--246434.wav",
1019
+ "key": "SODA_PROCESSED--train--246434",
1020
+ "model_output": "No significant overlaps found."
1021
+ },
1022
+ {
1023
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977434.wav",
1024
+ "key": "SODA_PROCESSED--train--977434",
1025
+ "model_output": "No significant overlaps found."
1026
+ },
1027
+ {
1028
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--665430.wav",
1029
+ "key": "SODA_PROCESSED--train--665430",
1030
+ "model_output": "No significant overlaps found."
1031
+ },
1032
+ {
1033
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--960193.wav",
1034
+ "key": "SODA_PROCESSED--train--960193",
1035
+ "model_output": "No significant overlaps found."
1036
+ },
1037
+ {
1038
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--31287.wav",
1039
+ "key": "SODA_PROCESSED--train--31287",
1040
+ "model_output": "No significant overlaps found."
1041
+ },
1042
+ {
1043
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--254497.wav",
1044
+ "key": "SODA_PROCESSED--train--254497",
1045
+ "model_output": "No significant overlaps found."
1046
+ },
1047
+ {
1048
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--774546.wav",
1049
+ "key": "SODA_PROCESSED--train--774546",
1050
+ "model_output": "No significant overlaps found."
1051
+ },
1052
+ {
1053
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--273875.wav",
1054
+ "key": "SODA_PROCESSED--train--273875",
1055
+ "model_output": "No significant overlaps found."
1056
+ },
1057
+ {
1058
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--822773.wav",
1059
+ "key": "SODA_PROCESSED--train--822773",
1060
+ "model_output": "No significant overlaps found."
1061
+ },
1062
+ {
1063
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1052554.wav",
1064
+ "key": "SODA_PROCESSED--train--1052554",
1065
+ "model_output": "No significant overlaps found."
1066
+ },
1067
+ {
1068
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--179972.wav",
1069
+ "key": "SODA_PROCESSED--train--179972",
1070
+ "model_output": "No significant overlaps found."
1071
+ },
1072
+ {
1073
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1117467.wav",
1074
+ "key": "SODA_PROCESSED--train--1117467",
1075
+ "model_output": "No significant overlaps found."
1076
+ },
1077
+ {
1078
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--961025.wav",
1079
+ "key": "SODA_PROCESSED--train--961025",
1080
+ "model_output": "No significant overlaps found."
1081
+ },
1082
+ {
1083
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--923496.wav",
1084
+ "key": "SODA_PROCESSED--train--923496",
1085
+ "model_output": "No significant overlaps found."
1086
+ },
1087
+ {
1088
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--41171.wav",
1089
+ "key": "SODA_PROCESSED--train--41171",
1090
+ "model_output": "No significant overlaps found."
1091
+ },
1092
+ {
1093
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--679971.wav",
1094
+ "key": "SODA_PROCESSED--train--679971",
1095
+ "model_output": "No significant overlaps found."
1096
+ },
1097
+ {
1098
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--876910.wav",
1099
+ "key": "SODA_PROCESSED--train--876910",
1100
+ "model_output": "No significant overlaps found."
1101
+ },
1102
+ {
1103
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--617278.wav",
1104
+ "key": "SODA_PROCESSED--train--617278",
1105
+ "model_output": "No significant overlaps found."
1106
+ },
1107
+ {
1108
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463700.wav",
1109
+ "key": "SODA_PROCESSED--train--463700",
1110
+ "model_output": "No significant overlaps found."
1111
+ },
1112
+ {
1113
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186623.wav",
1114
+ "key": "SODA_PROCESSED--train--1186623",
1115
+ "model_output": "No significant overlaps found."
1116
+ },
1117
+ {
1118
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1076109.wav",
1119
+ "key": "SODA_PROCESSED--train--1076109",
1120
+ "model_output": "No significant overlaps found."
1121
+ },
1122
+ {
1123
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--414445.wav",
1124
+ "key": "SODA_PROCESSED--train--414445",
1125
+ "model_output": "No significant overlaps found."
1126
+ },
1127
+ {
1128
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--546350.wav",
1129
+ "key": "SODA_PROCESSED--train--546350",
1130
+ "model_output": "No significant overlaps found."
1131
+ },
1132
+ {
1133
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1144076.wav",
1134
+ "key": "SODA_PROCESSED--train--1144076",
1135
+ "model_output": "No significant overlaps found."
1136
+ },
1137
+ {
1138
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--104948.wav",
1139
+ "key": "SODA_PROCESSED--train--104948",
1140
+ "model_output": "No significant overlaps found."
1141
+ },
1142
+ {
1143
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--836740.wav",
1144
+ "key": "SODA_PROCESSED--train--836740",
1145
+ "model_output": "No significant overlaps found."
1146
+ },
1147
+ {
1148
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--437951.wav",
1149
+ "key": "SODA_PROCESSED--train--437951",
1150
+ "model_output": "No significant overlaps found."
1151
+ }
1152
+ ]
ms-swift/silence_overlaps/700/original/silence_isoverlaps.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/overlap5s_silence_segments_test-checkpoint.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "key": "SODA_PROCESSED--train--137471",
4
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--137471.wav",
5
+ "model_output": "No, there is no silence gap."
6
+ },
7
+ {
8
+ "key": "SODA_PROCESSED--train--201044",
9
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--201044.wav",
10
+ "model_output": "No, there is no silence gap."
11
+ },
12
+ {
13
+ "key": "SODA_PROCESSED--train--596349",
14
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--596349.wav",
15
+ "model_output": "No, there is no silence gap."
16
+ },
17
+ {
18
+ "key": "SODA_PROCESSED--train--956648",
19
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--956648.wav",
20
+ "model_output": "No, there is no silence gap."
21
+ },
22
+ {
23
+ "key": "SODA_PROCESSED--train--962210",
24
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--962210.wav",
25
+ "model_output": "No, there is no silence gap."
26
+ }
27
+ ]
ms-swift/silence_overlaps/700/test/overlap5s_speaker_segments_test.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "key": "SODA_PROCESSED--train--254497",
4
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--254497.wav",
5
+ "model_output": "Speaker A: 00:00-00:03, 00:06-00:14, 00:23-00:26, 00:31-00:35\nSpeaker B: 00:03-00:15, 00:16-00:22, 00:27-00:30"
6
+ },
7
+ {
8
+ "key": "SODA_PROCESSED--train--1185164",
9
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--1185164.wav",
10
+ "model_output": "Speaker A: 00:00-00:01, 00:04-00:20, 00:20-00:33, 00:41-00:46, 00:52-00:54\nSpeaker B: 00:02-00:03, 00:09-00:15, 00:33-00:40, 00:47-00:52"
11
+ },
12
+ {
13
+ "key": "SODA_PROCESSED--train--205413",
14
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--205413.wav",
15
+ "model_output": "Speaker A: 00:00-00:01, 00:03-00:05, 00:15-00:18, 00:28-00:31, 00:36-00:40\nSpeaker B: 00:02-00:09, 00:09-00:15, 00:19-00:28, 00:32-00:36, 00:40-00:41"
16
+ },
17
+ {
18
+ "key": "SODA_PROCESSED--train--585968",
19
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--585968.wav",
20
+ "model_output": "Speaker A: 00:00-00:15, 00:15-00:23, 00:29-00:33, 00:36-00:43, 00:51-00:57, 01:05-01:08, 01:13-01:16\nSpeaker B: 00:06-00:14, 00:24-00:28, 00:33-00:36, 00:43-00:51, 00:57-01:04, 01:09-01:12, 01:17-01:18"
21
+ },
22
+ {
23
+ "key": "SODA_PROCESSED--train--1079940",
24
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--1079940.wav",
25
+ "model_output": "Speaker A: 00:00-00:04, 00:07-00:12, 00:18-00:22, 00:32-00:37, 00:43-00:46\nSpeaker B: 00:04-00:13, 00:14-00:17, 00:23-00:32, 00:38-00:42"
26
+ }
27
+ ]
ms-swift/silence_overlaps/700/test/silence_speaker_segments_test.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "key": "SODA_PROCESSED--train--869455",
4
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--869455.wav",
5
+ "model_output": "Speaker A: 00:00-00:11, 00:15-00:25, 00:27-00:38, 00:41-00:53\nSpeaker B: 00:12-00:15, 00:25-00:27, 00:39-00:42"
6
+ },
7
+ {
8
+ "key": "SODA_PROCESSED--train--420178",
9
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--420178.wav",
10
+ "model_output": "Speaker A: 00:00-00:01, 00:04-00:08, 00:10-00:19, 00:31-00:46, 00:50-00:52\nSpeaker B: 00:01-00:03, 00:08-00:09, 00:18-00:26, 00:47-00:49"
11
+ },
12
+ {
13
+ "key": "SODA_PROCESSED--train--836740",
14
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--836740.wav",
15
+ "model_output": "Speaker A: 00:00-00:04, 00:07-00:15, 00:25-00:29, 00:43-00:48\nSpeaker B: 00:04-00:08, 00:20-00:24, 00:29-00:42, 00:48-00:54"
16
+ },
17
+ {
18
+ "key": "SODA_PROCESSED--train--64931",
19
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--64931.wav",
20
+ "model_output": "Speaker A: 00:00-00:02, 00:05-00:08, 00:23-00:28, 00:32-00:39\nSpeaker B: 00:03-00:06, 00:13-00:22, 00:29-00:32"
21
+ },
22
+ {
23
+ "key": "SODA_PROCESSED--train--193891",
24
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193891.wav",
25
+ "model_output": "Speaker A: 00:00-00:01, 00:04-00:09, 00:14-00:21, 00:38-00:47, 00:56-01:05\nSpeaker B: 00:01-00:03, 00:09-00:15, 00:26-00:38, 00:48-00:56, 01:05-01:10"
26
+ }
27
+ ]
ms-swift/silence_overlaps/700/train/overlap5s_transcriptions_train.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/train/silence_isoverlaps_train.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/silence_speaker_segments.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/test/.ipynb_checkpoints/test_train-checkpoint.json ADDED
@@ -0,0 +1,963 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "SODA_PROCESSED--train--449689": {
3
+ "original_dialog_id": "",
4
+ "dialog_index": 449689,
5
+ "processed_dialogue": "A: Hey there. Mind if I lay down next to you? \nB: No, go ahead. \nA: Thanks. I needed a break from the sun. It's so hot today. \nB: Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage. \nA: Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax? \nB: Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week. \nA: That sounds rough. Are you excited for it? Or [interrupt] worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period? \nB: Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus. \nA: Definitely. Well, I hope you enjoy the rest of your day here. \nB: Thanks. You too.",
6
+ "clean_dialogue": "A: Hey there. Mind if I lay down next to you? \nB: No, go ahead. \nA: Thanks. I needed a break from the sun. It's so hot today. \nB: Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage. \nA: Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax? \nB: Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week. \nA:That sounds rough. Are you excited for it? Or worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period?\nB: Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus. \nA: Definitely. Well, I hope you enjoy the rest of your day here. \nB: Thanks. You too.",
7
+ "speaker_tracks": {
8
+ "A": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/A_track.wav",
9
+ "B": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/B_track.wav"
10
+ },
11
+ "error_type": "error_after_interrupt",
12
+ "stereo_audio": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/stereo_dialogue.wav",
13
+ "total_duration": 50.09668934240363,
14
+ "segments": [
15
+ {
16
+ "speaker": "A",
17
+ "text": "Hey there. Mind if I lay down next to you?",
18
+ "original_text": "Hey there. Mind if I lay down next to you?",
19
+ "start_time": 0,
20
+ "end_time": 2.4961451247165534,
21
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_0_A.wav",
22
+ "silence_duration": 0,
23
+ "is_interrupted": false
24
+ },
25
+ {
26
+ "speaker": "B",
27
+ "text": "No, go ahead.",
28
+ "original_text": "No, go ahead.",
29
+ "start_time": 3.0616233505922237,
30
+ "end_time": 4.257451014991316,
31
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_1_B.wav",
32
+ "silence_duration": 0.5654782258756702,
33
+ "is_interrupted": false
34
+ },
35
+ {
36
+ "speaker": "A",
37
+ "text": "Thanks. I needed a break from the sun. It's so hot today.",
38
+ "original_text": "Thanks. I needed a break from the sun. It's so hot today.",
39
+ "start_time": 4.673061027457998,
40
+ "end_time": 8.666893227004483,
41
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_2_A.wav",
42
+ "silence_duration": 0.41561001246668183,
43
+ "is_interrupted": false
44
+ },
45
+ {
46
+ "speaker": "B",
47
+ "text": "Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage.",
48
+ "original_text": "Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage.",
49
+ "start_time": 9.128191918953855,
50
+ "end_time": 19.01989259922596,
51
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_3_B.wav",
52
+ "silence_duration": 0.46129869194937123,
53
+ "is_interrupted": false
54
+ },
55
+ {
56
+ "speaker": "A",
57
+ "text": "Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax?",
58
+ "original_text": "Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax?",
59
+ "start_time": 19.43691572474219,
60
+ "end_time": 27.215600531998426,
61
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_4_A.wav",
62
+ "silence_duration": 0.4170231255162265,
63
+ "is_interrupted": false
64
+ },
65
+ {
66
+ "speaker": "B",
67
+ "text": "Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week.",
68
+ "original_text": "Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week.",
69
+ "start_time": 27.73206790619358,
70
+ "end_time": 34.08272550256547,
71
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_5_B.wav",
72
+ "silence_duration": 0.5164673741951538,
73
+ "is_interrupted": false
74
+ },
75
+ {
76
+ "speaker": "A",
77
+ "text": "That sounds rough. Are you excited for it? Or",
78
+ "original_text": "That sounds rough. Are you excited for it? Or [interrupt] worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period?",
79
+ "start_time": 34.40566150397062,
80
+ "end_time": 44.703711390591934,
81
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_6_A.wav",
82
+ "silence_duration": 0.3229360014051523,
83
+ "is_interrupted": true,
84
+ "text_after_interrupt": "worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period?"
85
+ },
86
+ {
87
+ "speaker": "B",
88
+ "text": "Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus.",
89
+ "original_text": "Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus.",
90
+ "start_time": 37.1456161524967,
91
+ "end_time": 44.564391662700785,
92
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_7_B.wav",
93
+ "silence_duration": 0.36321869535217244,
94
+ "is_interrupted": false
95
+ },
96
+ {
97
+ "speaker": "A",
98
+ "text": "Definitely. Well, I hope you enjoy the rest of your day here.",
99
+ "original_text": "Definitely. Well, I hope you enjoy the rest of your day here.",
100
+ "start_time": 44.9023552612567,
101
+ "end_time": 48.78008768756056,
102
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_8_A.wav",
103
+ "silence_duration": 0.33796359855591646,
104
+ "is_interrupted": false
105
+ },
106
+ {
107
+ "speaker": "B",
108
+ "text": "Thanks. You too.",
109
+ "original_text": "Thanks. You too.",
110
+ "start_time": 49.1679089027611,
111
+ "end_time": 50.09670708870214,
112
+ "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_9_B.wav",
113
+ "silence_duration": 0.38782121520053575,
114
+ "is_interrupted": false
115
+ }
116
+ ],
117
+ "gt_score": 1
118
+ },
119
+ "SODA_PROCESSED--train--787791": {
120
+ "original_dialog_id": "",
121
+ "dialog_index": 787791,
122
+ "processed_dialogue": "A: You're welcome. I'm just glad I was able to stop it from happening. \nB: Thank you so much for saving my life. I can't even begin to express how [interrupt] grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment. \nA: Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally? \nB: I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time. \nA: Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay? \nB: I really appreciate that. Thanks again, Antwain. \nA: No problem. Take care.",
123
+ "clean_dialogue": "A: You're welcome. I'm just glad I was able to stop it from happening. \nB:Thank you so much for saving my life. I can't even begin to express how grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment.\nA: Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally? \nB: I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time. \nA: Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay? \nB: I really appreciate that. Thanks again, Antwain. \nA: No problem. Take care.",
124
+ "speaker_tracks": {
125
+ "A": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/A_track.wav",
126
+ "B": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/B_track.wav"
127
+ },
128
+ "error_type": "error_after_interrupt",
129
+ "stereo_audio": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/stereo_dialogue.wav",
130
+ "total_duration": 37.52730158730159,
131
+ "segments": [
132
+ {
133
+ "speaker": "A",
134
+ "text": "You're welcome. I'm just glad I was able to stop it from happening.",
135
+ "original_text": "You're welcome. I'm just glad I was able to stop it from happening.",
136
+ "start_time": 0,
137
+ "end_time": 4.249251700680272,
138
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_0_A.wav",
139
+ "silence_duration": 0,
140
+ "is_interrupted": false
141
+ },
142
+ {
143
+ "speaker": "B",
144
+ "text": "Thank you so much for saving my life. I can't even begin to express how",
145
+ "original_text": "Thank you so much for saving my life. I can't even begin to express how [interrupt] grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment.",
146
+ "start_time": 4.756366963799184,
147
+ "end_time": 14.694507553368345,
148
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_1_B.wav",
149
+ "silence_duration": 0.5071152631189118,
150
+ "is_interrupted": true,
151
+ "text_after_interrupt": "grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment."
152
+ },
153
+ {
154
+ "speaker": "A",
155
+ "text": "Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally?",
156
+ "original_text": "Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally?",
157
+ "start_time": 8.726979208697143,
158
+ "end_time": 14.357818210964716,
159
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_2_A.wav",
160
+ "silence_duration": 0.4049084459018305,
161
+ "is_interrupted": false
162
+ },
163
+ {
164
+ "speaker": "B",
165
+ "text": "I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time.",
166
+ "original_text": "I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time.",
167
+ "start_time": 14.861085984580113,
168
+ "end_time": 23.649838819047233,
169
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_3_B.wav",
170
+ "silence_duration": 0.5032677736153957,
171
+ "is_interrupted": false
172
+ },
173
+ {
174
+ "speaker": "A",
175
+ "text": "Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay?",
176
+ "original_text": "Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay?",
177
+ "start_time": 24.145193415777634,
178
+ "end_time": 32.515987066571284,
179
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_4_A.wav",
180
+ "silence_duration": 0.4953545967303996,
181
+ "is_interrupted": false
182
+ },
183
+ {
184
+ "speaker": "B",
185
+ "text": "I really appreciate that. Thanks again, Antwain.",
186
+ "original_text": "I really appreciate that. Thanks again, Antwain.",
187
+ "start_time": 32.97180815148517,
188
+ "end_time": 35.68854284536272,
189
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_5_B.wav",
190
+ "silence_duration": 0.4558210849138826,
191
+ "is_interrupted": false
192
+ },
193
+ {
194
+ "speaker": "A",
195
+ "text": "No problem. Take care.",
196
+ "original_text": "No problem. Take care.",
197
+ "start_time": 35.99481454512998,
198
+ "end_time": 37.5273315519327,
199
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_6_A.wav",
200
+ "silence_duration": 0.3062716997672569,
201
+ "is_interrupted": false
202
+ }
203
+ ],
204
+ "gt_score": 1
205
+ },
206
+ "SODA_PROCESSED--train--179972": {
207
+ "original_dialog_id": "",
208
+ "dialog_index": 179972,
209
+ "processed_dialogue": "A: So, how did you like the book? \nB: I loved it! The ending was so shocking, I couldn't believe what happened. \nA: Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided. \nB: No, I didn't see it coming at all! It was so unexpected. \nA: Yeah, I know. I couldn't put it down. \nB: Me neither. I'm so glad you wanted to read it. \nA: Yeah, I was curious about the protagonist's journey and how it would [interrupt] evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict. \nB: Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me. \nA: It was definitely a rollercoaster ride. There were so many twists and turns. \nB: I know! I didn't see any of them coming. \nA: That's what made it so great. It kept you guessing the whole time. \nB: Definitely. It was a great book. Thanks for lending it to me.",
210
+ "clean_dialogue": "A: So, how did you like the book? \nB: I loved it! The ending was so shocking, I couldn't believe what happened. \nA: Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided. \nB: No, I didn't see it coming at all! It was so unexpected. \nA: Yeah, I know. I couldn't put it down. \nB: Me neither. I'm so glad you wanted to read it. \nA:Yeah, I was curious about the protagonist's journey and how it would evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict.\nB: Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me. \nA: It was definitely a rollercoaster ride. There were so many twists and turns. \nB: I know! I didn't see any of them coming. \nA: That's what made it so great. It kept you guessing the whole time. \nB: Definitely. It was a great book. Thanks for lending it to me.",
211
+ "speaker_tracks": {
212
+ "A": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/A_track.wav",
213
+ "B": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/B_track.wav"
214
+ },
215
+ "error_type": "error_after_interrupt",
216
+ "stereo_audio": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/stereo_dialogue.wav",
217
+ "total_duration": 53.57845804988662,
218
+ "segments": [
219
+ {
220
+ "speaker": "A",
221
+ "text": "So, how did you like the book?",
222
+ "original_text": "So, how did you like the book?",
223
+ "start_time": 0,
224
+ "end_time": 1.6950566893424037,
225
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_0_A.wav",
226
+ "silence_duration": 0,
227
+ "is_interrupted": false
228
+ },
229
+ {
230
+ "speaker": "B",
231
+ "text": "I loved it! The ending was so shocking, I couldn't believe what happened.",
232
+ "original_text": "I loved it! The ending was so shocking, I couldn't believe what happened.",
233
+ "start_time": 2.1792484824735485,
234
+ "end_time": 5.871221271589195,
235
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_1_B.wav",
236
+ "silence_duration": 0.4841917931311449,
237
+ "is_interrupted": false
238
+ },
239
+ {
240
+ "speaker": "A",
241
+ "text": "Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided.",
242
+ "original_text": "Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided.",
243
+ "start_time": 6.47038511683308,
244
+ "end_time": 14.504489425223102,
245
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_2_A.wav",
246
+ "silence_duration": 0.5991638452438857,
247
+ "is_interrupted": false
248
+ },
249
+ {
250
+ "speaker": "B",
251
+ "text": "No, I didn't see it coming at all! It was so unexpected.",
252
+ "original_text": "No, I didn't see it coming at all! It was so unexpected.",
253
+ "start_time": 15.012397119017507,
254
+ "end_time": 18.448950406999366,
255
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_3_B.wav",
256
+ "silence_duration": 0.507907693794404,
257
+ "is_interrupted": false
258
+ },
259
+ {
260
+ "speaker": "A",
261
+ "text": "Yeah, I know. I couldn't put it down.",
262
+ "original_text": "Yeah, I know. I couldn't put it down.",
263
+ "start_time": 18.875209136594886,
264
+ "end_time": 21.847363331606225,
265
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_4_A.wav",
266
+ "silence_duration": 0.42625872959552,
267
+ "is_interrupted": false
268
+ },
269
+ {
270
+ "speaker": "B",
271
+ "text": "Me neither. I'm so glad you wanted to read it.",
272
+ "original_text": "Me neither. I'm so glad you wanted to read it.",
273
+ "start_time": 22.440054691555087,
274
+ "end_time": 25.110349476135585,
275
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_5_B.wav",
276
+ "silence_duration": 0.5926913599488615,
277
+ "is_interrupted": false
278
+ },
279
+ {
280
+ "speaker": "A",
281
+ "text": "Yeah, I was curious about the protagonist's journey and how it would",
282
+ "original_text": "Yeah, I was curious about the protagonist's journey and how it would [interrupt] evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict.",
283
+ "start_time": 25.51803755034393,
284
+ "end_time": 36.89581532812171,
285
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_6_A.wav",
286
+ "silence_duration": 0.40768807420834613,
287
+ "is_interrupted": true,
288
+ "text_after_interrupt": "evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict."
289
+ },
290
+ {
291
+ "speaker": "B",
292
+ "text": "Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me.",
293
+ "original_text": "Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me.",
294
+ "start_time": 29.790509205672727,
295
+ "end_time": 37.429874285037805,
296
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_7_B.wav",
297
+ "silence_duration": 0.32835611460902553,
298
+ "is_interrupted": false
299
+ },
300
+ {
301
+ "speaker": "A",
302
+ "text": "It was definitely a rollercoaster ride. There were so many twists and turns.",
303
+ "original_text": "It was definitely a rollercoaster ride. There were so many twists and turns.",
304
+ "start_time": 37.91219711578734,
305
+ "end_time": 42.405258340277136,
306
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_8_A.wav",
307
+ "silence_duration": 0.4823228307495384,
308
+ "is_interrupted": false
309
+ },
310
+ {
311
+ "speaker": "B",
312
+ "text": "I know! I didn't see any of them coming.",
313
+ "original_text": "I know! I didn't see any of them coming.",
314
+ "start_time": 42.860468420817675,
315
+ "end_time": 45.08958406707618,
316
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_9_B.wav",
317
+ "silence_duration": 0.4552100805405374,
318
+ "is_interrupted": false
319
+ },
320
+ {
321
+ "speaker": "A",
322
+ "text": "That's what made it so great. It kept you guessing the whole time.",
323
+ "original_text": "That's what made it so great. It kept you guessing the whole time.",
324
+ "start_time": 45.679186523390214,
325
+ "end_time": 49.394379267154385,
326
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_10_A.wav",
327
+ "silence_duration": 0.5896024563140343,
328
+ "is_interrupted": false
329
+ },
330
+ {
331
+ "speaker": "B",
332
+ "text": "Definitely. It was a great book. Thanks for lending it to me.",
333
+ "original_text": "Definitely. It was a great book. Thanks for lending it to me.",
334
+ "start_time": 49.70074891577286,
335
+ "end_time": 53.57848134207672,
336
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_11_B.wav",
337
+ "silence_duration": 0.3063696486184793,
338
+ "is_interrupted": false
339
+ }
340
+ ],
341
+ "gt_score": 1
342
+ },
343
+ "SODA_PROCESSED--train--715956": {
344
+ "original_dialog_id": "",
345
+ "dialog_index": 715956,
346
+ "processed_dialogue": "A: Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your [interrupt] help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time.\nB: Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.\nA: But you're my lawyer! You're supposed to help me!\nB: Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.\nA: But I can't go to prison! I'll lose my job, my apartment, everything!\nB: Aadya, you need to calm down. Getting upset isn't going to help anything.\nA: Fine. But, you have to promise me that you'll do everything you can to help me.\nB: I promise.",
347
+ "clean_dialogue": "A:Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time.\nB: Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.\nA: But you're my lawyer! You're supposed to help me!\nB: Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.\nA: But I can't go to prison! I'll lose my job, my apartment, everything!\nB: Aadya, you need to calm down. Getting upset isn't going to help anything.\nA: Fine. But, you have to promise me that you'll do everything you can to help me.\nB: I promise.",
348
+ "speaker_tracks": {
349
+ "A": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/A_track.wav",
350
+ "B": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/B_track.wav"
351
+ },
352
+ "error_type": "error_after_interrupt",
353
+ "stereo_audio": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/stereo_dialogue.wav",
354
+ "total_duration": 49.52126984126984,
355
+ "segments": [
356
+ {
357
+ "speaker": "A",
358
+ "text": "Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your",
359
+ "original_text": "Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your [interrupt] help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time.",
360
+ "start_time": 0,
361
+ "end_time": 16.579047619047618,
362
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_0_A.wav",
363
+ "silence_duration": 0,
364
+ "is_interrupted": true,
365
+ "text_after_interrupt": "help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time."
366
+ },
367
+ {
368
+ "speaker": "B",
369
+ "text": "Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.",
370
+ "original_text": "Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.",
371
+ "start_time": 8.510113378684807,
372
+ "end_time": 18.36698412698413,
373
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_1_B.wav",
374
+ "silence_duration": 0.4899749375576017,
375
+ "is_interrupted": false
376
+ },
377
+ {
378
+ "speaker": "A",
379
+ "text": "But you're my lawyer! You're supposed to help me!",
380
+ "original_text": "But you're my lawyer! You're supposed to help me!",
381
+ "start_time": 18.846747434390966,
382
+ "end_time": 21.37772249108031,
383
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_2_A.wav",
384
+ "silence_duration": 0.4797633074068387,
385
+ "is_interrupted": false
386
+ },
387
+ {
388
+ "speaker": "B",
389
+ "text": "Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.",
390
+ "original_text": "Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.",
391
+ "start_time": 21.881120947184385,
392
+ "end_time": 33.51431822609595,
393
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_3_B.wav",
394
+ "silence_duration": 0.5033984561040751,
395
+ "is_interrupted": false
396
+ },
397
+ {
398
+ "speaker": "A",
399
+ "text": "But I can't go to prison! I'll lose my job, my apartment, everything!",
400
+ "original_text": "But I can't go to prison! I'll lose my job, my apartment, everything!",
401
+ "start_time": 34.047335561433606,
402
+ "end_time": 38.48234689930209,
403
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_4_A.wav",
404
+ "silence_duration": 0.5330173353376504,
405
+ "is_interrupted": false
406
+ },
407
+ {
408
+ "speaker": "B",
409
+ "text": "Aadya, you need to calm down. Getting upset isn't going to help anything.",
410
+ "original_text": "Aadya, you need to calm down. Getting upset isn't going to help anything.",
411
+ "start_time": 38.89720479711025,
412
+ "end_time": 43.39026602160004,
413
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_5_B.wav",
414
+ "silence_duration": 0.4148578978081613,
415
+ "is_interrupted": false
416
+ },
417
+ {
418
+ "speaker": "A",
419
+ "text": "Fine. But, you have to promise me that you'll do everything you can to help me.",
420
+ "original_text": "Fine. But, you have to promise me that you'll do everything you can to help me.",
421
+ "start_time": 43.92319932038778,
422
+ "end_time": 48.27694081698642,
423
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_6_A.wav",
424
+ "silence_duration": 0.5329332987877419,
425
+ "is_interrupted": false
426
+ },
427
+ {
428
+ "speaker": "B",
429
+ "text": "I promise.",
430
+ "original_text": "I promise.",
431
+ "start_time": 48.62731544236006,
432
+ "end_time": 49.52128369632831,
433
+ "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_7_B.wav",
434
+ "silence_duration": 0.3503746253736393,
435
+ "is_interrupted": false
436
+ }
437
+ ],
438
+ "gt_score": 1
439
+ },
440
+ "SODA_PROCESSED--train--740576": {
441
+ "original_text": "A: Good morning, Mr. Nguyen! I hope you're doing well today.\nB: I'm doing well, thank you. How are you?\nA: I'm feeling great today! I have a lot of energy and I'm excited to [interrupt] tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients.\nB: Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?\nA: I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really supportive and collaborative, always willing to share their expertise and help each other grow professionally.\nB: I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?\nA: That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
442
+ "cleaned_text": "A: Good morning, Mr. Nguyen! I hope you're doing well today.\nB: I'm doing well, thank you. How are you?\nA:I'm feeling great today! I have a lot of energy and I'm excited to tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients.\nB: Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?\nA: I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really supportive and collaborative, always willing to share their expertise and help each other grow professionally.\nB: I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?\nA: That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
443
+ "total_duration": 49.437278911564626,
444
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/stereo_dialogue.wav",
445
+ "speaker_tracks": {
446
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/A_track.wav",
447
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/B_track.wav"
448
+ },
449
+ "error_type": "error_after_interrupt",
450
+ "segments": [
451
+ {
452
+ "speaker": "A",
453
+ "text": "Good morning, Mr. Nguyen! I hope you're doing well today.",
454
+ "original_text": "Good morning, Mr. Nguyen! I hope you're doing well today.",
455
+ "start_time": 0,
456
+ "end_time": 3.332063492063492,
457
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_0_A.wav",
458
+ "silence_duration": 0,
459
+ "is_interrupted": false
460
+ },
461
+ {
462
+ "speaker": "B",
463
+ "text": "I'm doing well, thank you. How are you?",
464
+ "original_text": "I'm doing well, thank you. How are you?",
465
+ "start_time": 3.7838731632362803,
466
+ "end_time": 5.583419648497051,
467
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_1_B.wav",
468
+ "silence_duration": 0.4518096711727882,
469
+ "is_interrupted": false
470
+ },
471
+ {
472
+ "speaker": "A",
473
+ "text": "I'm feeling great today! I have a lot of energy and I'm excited to",
474
+ "original_text": "I'm feeling great today! I have a lot of energy and I'm excited to [interrupt] tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients.",
475
+ "start_time": 5.88797031081498,
476
+ "end_time": 16.96388867816192,
477
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_2_A.wav",
478
+ "silence_duration": 0.30455066231792893,
479
+ "is_interrupted": true,
480
+ "text_after_interrupt": "tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients."
481
+ },
482
+ {
483
+ "speaker": "B",
484
+ "text": "Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?",
485
+ "original_text": "Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?",
486
+ "start_time": 10.485521331223143,
487
+ "end_time": 16.104750356166456,
488
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_3_B.wav",
489
+ "silence_duration": 0.587489668114177,
490
+ "is_interrupted": false
491
+ },
492
+ {
493
+ "speaker": "A",
494
+ "text": "I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really supportive and collaborative, always willing to share their expertise and help each other grow professionally.",
495
+ "original_text": "I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really supportive and collaborative, always willing to share their expertise and help each other grow professionally.",
496
+ "start_time": 17.385624216961087,
497
+ "end_time": 33.94145188136018,
498
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_4_A.wav",
499
+ "silence_duration": 0.4217355387991674,
500
+ "is_interrupted": false
501
+ },
502
+ {
503
+ "speaker": "B",
504
+ "text": "I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?",
505
+ "original_text": "I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?",
506
+ "start_time": 34.39980783470558,
507
+ "end_time": 41.74892348096408,
508
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_5_B.wav",
509
+ "silence_duration": 0.4583559533453947,
510
+ "is_interrupted": false
511
+ },
512
+ {
513
+ "speaker": "A",
514
+ "text": "That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
515
+ "original_text": "That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
516
+ "start_time": 42.285572803275116,
517
+ "end_time": 49.437318835021145,
518
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_6_A.wav",
519
+ "silence_duration": 0.5366493223110326,
520
+ "is_interrupted": false
521
+ }
522
+ ]
523
+ },
524
+ "SODA_PROCESSED--train--836018": {
525
+ "original_text": "A: Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with [interrupt] organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned.\nB: Actually, I could use some help with the data analysis part. It's a bit overwhelming.\nA: Sure, I can take care of that. So what do you think of the project so far?\nB: It's interesting. I'm learning a lot about different cultures and how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.\nA: Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.\nB: Yeah, definitely. It's fascinating.",
526
+ "cleaned_text": "A:Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned.\nB: Actually, I could use some help with the data analysis part. It's a bit overwhelming.\nA: Sure, I can take care of that. So what do you think of the project so far?\nB: It's interesting. I'm learning a lot about different cultures and how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.\nA: Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.\nB: Yeah, definitely. It's fascinating.",
527
+ "total_duration": 42.34984126984127,
528
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/stereo_dialogue.wav",
529
+ "speaker_tracks": {
530
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/A_track.wav",
531
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/B_track.wav"
532
+ },
533
+ "error_type": "error_after_interrupt",
534
+ "segments": [
535
+ {
536
+ "speaker": "A",
537
+ "text": "Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with",
538
+ "original_text": "Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with [interrupt] organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned.",
539
+ "start_time": 0,
540
+ "end_time": 15.011700680272108,
541
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_0_A.wav",
542
+ "silence_duration": 0,
543
+ "is_interrupted": true,
544
+ "text_after_interrupt": "organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned."
545
+ },
546
+ {
547
+ "speaker": "B",
548
+ "text": "Actually, I could use some help with the data analysis part. It's a bit overwhelming.",
549
+ "original_text": "Actually, I could use some help with the data analysis part. It's a bit overwhelming.",
550
+ "start_time": 6.176507936507937,
551
+ "end_time": 11.250068027210885,
552
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_1_B.wav",
553
+ "silence_duration": 0.5190912573415952,
554
+ "is_interrupted": false
555
+ },
556
+ {
557
+ "speaker": "A",
558
+ "text": "Sure, I can take care of that. So what do you think of the project so far?",
559
+ "original_text": "Sure, I can take care of that. So what do you think of the project so far?",
560
+ "start_time": 15.60657282124108,
561
+ "end_time": 19.937094363191193,
562
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_2_A.wav",
563
+ "silence_duration": 0.5948721409689715,
564
+ "is_interrupted": false
565
+ },
566
+ {
567
+ "speaker": "B",
568
+ "text": "It's interesting. I'm learning a lot about different cultures and how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.",
569
+ "original_text": "It's interesting. I'm learning a lot about different cultures and how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.",
570
+ "start_time": 20.306213172030862,
571
+ "end_time": 30.476553308085286,
572
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_3_B.wav",
573
+ "silence_duration": 0.36911880883966963,
574
+ "is_interrupted": false
575
+ },
576
+ {
577
+ "speaker": "A",
578
+ "text": "Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.",
579
+ "original_text": "Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.",
580
+ "start_time": 30.848617682402736,
581
+ "end_time": 39.10331155995375,
582
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_4_A.wav",
583
+ "silence_duration": 0.3720643743174508,
584
+ "is_interrupted": false
585
+ },
586
+ {
587
+ "speaker": "B",
588
+ "text": "Yeah, definitely. It's fascinating.",
589
+ "original_text": "Yeah, definitely. It's fascinating.",
590
+ "start_time": 39.435776463870354,
591
+ "end_time": 42.34988077226038,
592
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_5_B.wav",
593
+ "silence_duration": 0.3324649039166007,
594
+ "is_interrupted": false
595
+ }
596
+ ]
597
+ },
598
+ "SODA_PROCESSED--train--771154": {
599
+ "original_text": "A: Hey, Mom. Can I go to Tim's house today? \nB: No, you can't go to Tim's house today. \nA: Why [interrupt] can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this. \nB: Because I said so. And before you ask again, you need to focus on your homework. \nA: But I don't want to do my homework. \nB: Well, you have to do it anyway. Now go and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester. \nA: Wait, can't I just finish it later? I promise I'll get it done before bed. \nB: No, you need to do it now. Once it's done, then we can talk about other plans.",
600
+ "cleaned_text": "A: Hey, Mom. Can I go to Tim's house today? \nB: No, you can't go to Tim's house today. \nA:Why can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this.\nB: Because I said so. And before you ask again, you need to focus on your homework. \nA: But I don't want to do my homework. \nB: Well, you have to do it anyway. Now go and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester. \nA: Wait, can't I just finish it later? I promise I'll get it done before bed. \nB: No, you need to do it now. Once it's done, then we can talk about other plans.",
601
+ "total_duration": 35.76784580498866,
602
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/stereo_dialogue.wav",
603
+ "speaker_tracks": {
604
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/A_track.wav",
605
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/B_track.wav"
606
+ },
607
+ "error_type": "error_after_interrupt",
608
+ "segments": [
609
+ {
610
+ "speaker": "A",
611
+ "text": "Hey, Mom. Can I go to Tim's house today?",
612
+ "original_text": "Hey, Mom. Can I go to Tim's house today?",
613
+ "start_time": 0,
614
+ "end_time": 3.5294331065759637,
615
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_0_A.wav",
616
+ "silence_duration": 0,
617
+ "is_interrupted": false
618
+ },
619
+ {
620
+ "speaker": "B",
621
+ "text": "No, you can't go to Tim's house today.",
622
+ "original_text": "No, you can't go to Tim's house today.",
623
+ "start_time": 3.9899851353219105,
624
+ "end_time": 6.126220962986309,
625
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_1_B.wav",
626
+ "silence_duration": 0.4605520287459467,
627
+ "is_interrupted": false
628
+ },
629
+ {
630
+ "speaker": "A",
631
+ "text": "Why",
632
+ "original_text": "Why [interrupt] can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this.",
633
+ "start_time": 6.4787876256667465,
634
+ "end_time": 14.652211661947927,
635
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_2_A.wav",
636
+ "silence_duration": 0.3525666626804373,
637
+ "is_interrupted": true,
638
+ "text_after_interrupt": "can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this."
639
+ },
640
+ {
641
+ "speaker": "B",
642
+ "text": "Because I said so. And before you ask again, you need to focus on your homework.",
643
+ "original_text": "Because I said so. And before you ask again, you need to focus on your homework.",
644
+ "start_time": 7.210216197095318,
645
+ "end_time": 11.889037058773322,
646
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_3_B.wav",
647
+ "silence_duration": 0.4183677243140269,
648
+ "is_interrupted": false
649
+ },
650
+ {
651
+ "speaker": "A",
652
+ "text": "But I don't want to do my homework.",
653
+ "original_text": "But I don't want to do my homework.",
654
+ "start_time": 15.159162983353092,
655
+ "end_time": 17.074809241856492,
656
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_4_A.wav",
657
+ "silence_duration": 0.5069513214051653,
658
+ "is_interrupted": false
659
+ },
660
+ {
661
+ "speaker": "B",
662
+ "text": "Well, you have to do it anyway. Now go and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester.",
663
+ "original_text": "Well, you have to do it anyway. Now go and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester.",
664
+ "start_time": 17.6716136549098,
665
+ "end_time": 25.763767849921138,
666
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_5_B.wav",
667
+ "silence_duration": 0.5968044130533094,
668
+ "is_interrupted": false
669
+ },
670
+ {
671
+ "speaker": "A",
672
+ "text": "Wait, can't I just finish it later? I promise I'll get it done before bed.",
673
+ "original_text": "Wait, can't I just finish it later? I promise I'll get it done before bed.",
674
+ "start_time": 26.149694131743242,
675
+ "end_time": 31.02588460793372,
676
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_6_A.wav",
677
+ "silence_duration": 0.38592628182210614,
678
+ "is_interrupted": false
679
+ },
680
+ {
681
+ "speaker": "B",
682
+ "text": "No, you need to do it now. Once it's done, then we can talk about other plans.",
683
+ "original_text": "No, you need to do it now. Once it's done, then we can talk about other plans.",
684
+ "start_time": 31.518621255026567,
685
+ "end_time": 35.767872955706835,
686
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_7_B.wav",
687
+ "silence_duration": 0.49273664709284837,
688
+ "is_interrupted": false
689
+ }
690
+ ]
691
+ },
692
+ "SODA_PROCESSED--train--1794": {
693
+ "original_text": "A: Hey, Mom. \nB: Hey, Moriah. What's up? \nA: Not much. Just hanging out in my room. \nB: That's good. I'm glad you're keeping busy. \nA: Yeah, I'm just trying to stay out of [interrupt] everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately. \nB: Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you. \nA: I'm just kind of going through some stuff right now. \nB: Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being. \nA: Wait, is this about me staying out late last weekend? \nB: Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you. \nA: I don't know, Mom. Like I said, I'm just dealing with some stuff. \nB: Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah. \nA: I love you too, Mom.",
694
+ "cleaned_text": "A: Hey, Mom. \nB: Hey, Moriah. What's up? \nA: Not much. Just hanging out in my room. \nB: That's good. I'm glad you're keeping busy. \nA:Yeah, I'm just trying to stay out of everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately.\nB: Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you. \nA: I'm just kind of going through some stuff right now. \nB: Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being. \nA: Wait, is this about me staying out late last weekend? \nB: Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you. \nA: I don't know, Mom. Like I said, I'm just dealing with some stuff. \nB: Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah. \nA: I love you too, Mom.",
695
+ "total_duration": 57.99024943310658,
696
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/stereo_dialogue.wav",
697
+ "speaker_tracks": {
698
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/A_track.wav",
699
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/B_track.wav"
700
+ },
701
+ "error_type": "error_after_interrupt",
702
+ "segments": [
703
+ {
704
+ "speaker": "A",
705
+ "text": "Hey, Mom.",
706
+ "original_text": "Hey, Mom.",
707
+ "start_time": 0,
708
+ "end_time": 0.8591383219954648,
709
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_0_A.wav",
710
+ "silence_duration": 0,
711
+ "is_interrupted": false
712
+ },
713
+ {
714
+ "speaker": "B",
715
+ "text": "Hey, Moriah. What's up?",
716
+ "original_text": "Hey, Moriah. What's up?",
717
+ "start_time": 1.2689805234753475,
718
+ "end_time": 2.7782775756295424,
719
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_1_B.wav",
720
+ "silence_duration": 0.4098422014798827,
721
+ "is_interrupted": false
722
+ },
723
+ {
724
+ "speaker": "A",
725
+ "text": "Not much. Just hanging out in my room.",
726
+ "original_text": "Not much. Just hanging out in my room.",
727
+ "start_time": 3.2528527196865094,
728
+ "end_time": 5.505188320593539,
729
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_2_A.wav",
730
+ "silence_duration": 0.47457514405696677,
731
+ "is_interrupted": false
732
+ },
733
+ {
734
+ "speaker": "B",
735
+ "text": "That's good. I'm glad you're keeping busy.",
736
+ "original_text": "That's good. I'm glad you're keeping busy.",
737
+ "start_time": 6.047417085120735,
738
+ "end_time": 8.520342255188762,
739
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_3_B.wav",
740
+ "silence_duration": 0.5422287645271964,
741
+ "is_interrupted": false
742
+ },
743
+ {
744
+ "speaker": "A",
745
+ "text": "Yeah, I'm just trying to stay out of",
746
+ "original_text": "Yeah, I'm just trying to stay out of [interrupt] everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately.",
747
+ "start_time": 8.88750351109664,
748
+ "end_time": 18.059385597264438,
749
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_4_A.wav",
750
+ "silence_duration": 0.3671612559078772,
751
+ "is_interrupted": true,
752
+ "text_after_interrupt": "everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately."
753
+ },
754
+ {
755
+ "speaker": "B",
756
+ "text": "Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you.",
757
+ "original_text": "Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you.",
758
+ "start_time": 11.697118023568294,
759
+ "end_time": 18.2915851437497,
760
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_5_B.wav",
761
+ "silence_duration": 0.32519714638310315,
762
+ "is_interrupted": false
763
+ },
764
+ {
765
+ "speaker": "A",
766
+ "text": "I'm just kind of going through some stuff right now.",
767
+ "original_text": "I'm just kind of going through some stuff right now.",
768
+ "start_time": 18.62204195980515,
769
+ "end_time": 21.396826540304016,
770
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_6_A.wav",
771
+ "silence_duration": 0.3304568160554501,
772
+ "is_interrupted": false
773
+ },
774
+ {
775
+ "speaker": "B",
776
+ "text": "Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being.",
777
+ "original_text": "Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being.",
778
+ "start_time": 21.697523952118004,
779
+ "end_time": 34.7355284872654,
780
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_7_B.wav",
781
+ "silence_duration": 0.30069741181398774,
782
+ "is_interrupted": false
783
+ },
784
+ {
785
+ "speaker": "A",
786
+ "text": "Wait, is this about me staying out late last weekend?",
787
+ "original_text": "Wait, is this about me staying out late last weekend?",
788
+ "start_time": 35.29912687220732,
789
+ "end_time": 38.677630273567864,
790
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_8_A.wav",
791
+ "silence_duration": 0.5635983849419206,
792
+ "is_interrupted": false
793
+ },
794
+ {
795
+ "speaker": "B",
796
+ "text": "Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you.",
797
+ "original_text": "Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you.",
798
+ "start_time": 39.09678068392148,
799
+ "end_time": 45.99310721453372,
800
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_9_B.wav",
801
+ "silence_duration": 0.4191504103536184,
802
+ "is_interrupted": false
803
+ },
804
+ {
805
+ "speaker": "A",
806
+ "text": "I don't know, Mom. Like I said, I'm just dealing with some stuff.",
807
+ "original_text": "I don't know, Mom. Like I said, I'm just dealing with some stuff.",
808
+ "start_time": 46.3670775788443,
809
+ "end_time": 50.46539957430915,
810
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_10_A.wav",
811
+ "silence_duration": 0.3739703643105766,
812
+ "is_interrupted": false
813
+ },
814
+ {
815
+ "speaker": "B",
816
+ "text": "Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah.",
817
+ "original_text": "Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah.",
818
+ "start_time": 50.99388055366539,
819
+ "end_time": 56.06744064436834,
820
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_11_B.wav",
821
+ "silence_duration": 0.5284809793562373,
822
+ "is_interrupted": false
823
+ },
824
+ {
825
+ "speaker": "A",
826
+ "text": "I love you too, Mom.",
827
+ "original_text": "I love you too, Mom.",
828
+ "start_time": 56.55062063706958,
829
+ "end_time": 57.99025782527819,
830
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_12_A.wav",
831
+ "silence_duration": 0.4831799927012399,
832
+ "is_interrupted": false
833
+ }
834
+ ]
835
+ },
836
+ "SODA_PROCESSED--train--1070688": {
837
+ "original_text": "A: Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're [interrupt] ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met.\nB: Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.\nA: No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.\nB: Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.\nA: I'm glad you're excited! I was about to say I made it this morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
838
+ "cleaned_text": "A:Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met.\nB: Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.\nA: No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.\nB: Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.\nA: I'm glad you're excited! I was about to say I made it this morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
839
+ "total_duration": 66.58453514739229,
840
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/stereo_dialogue.wav",
841
+ "speaker_tracks": {
842
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/A_track.wav",
843
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/B_track.wav"
844
+ },
845
+ "error_type": "error_after_interrupt",
846
+ "segments": [
847
+ {
848
+ "speaker": "A",
849
+ "text": "Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're",
850
+ "original_text": "Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're [interrupt] ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met.",
851
+ "start_time": 0,
852
+ "end_time": 16.172698412698413,
853
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_0_A.wav",
854
+ "silence_duration": 0,
855
+ "is_interrupted": true,
856
+ "text_after_interrupt": "ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met."
857
+ },
858
+ {
859
+ "speaker": "B",
860
+ "text": "Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.",
861
+ "original_text": "Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.",
862
+ "start_time": 8.719092970521542,
863
+ "end_time": 15.650249433106577,
864
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_1_B.wav",
865
+ "silence_duration": 0.42791712549357114,
866
+ "is_interrupted": false
867
+ },{
868
+ "speaker": "A",
869
+ "text": "No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.",
870
+ "original_text": "No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.",
871
+ "start_time": 16.66087863834312,
872
+ "end_time": 43.38704643879663,
873
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_2_A.wav",
874
+ "silence_duration": 0.488180225644707,
875
+ "is_interrupted": false
876
+ },
877
+ {
878
+ "speaker": "B",
879
+ "text": "Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.",
880
+ "original_text": "Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.",
881
+ "start_time": 43.75020989775093,
882
+ "end_time": 49.926717834258866,
883
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_3_B.wav",
884
+ "silence_duration": 0.36316345895429397,
885
+ "is_interrupted": false
886
+ },
887
+ {
888
+ "speaker": "A",
889
+ "text": "I'm glad you're excited! I was about to say I made it this morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
890
+ "original_text": "I'm glad you're excited! I was about to say I made it this morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
891
+ "start_time": 50.49314394878711,
892
+ "end_time": 66.58457252021569,
893
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_4_A.wav",
894
+ "silence_duration": 0.5664261145282402,
895
+ "is_interrupted": false
896
+ }
897
+ ]
898
+ },
899
+ "SODA_PROCESSED--train--737676": {
900
+ "original_text": "A: Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your [interrupt] car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply.\nB: I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?\nA: Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.\nB: Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.\nA: Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
901
+ "cleaned_text": "A:Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply.\nB: I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?\nA: Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.\nB: Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.\nA: Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
902
+ "total_duration": 52.89809523809524,
903
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/stereo_dialogue.wav",
904
+ "speaker_tracks": {
905
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/A_track.wav",
906
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/B_track.wav"
907
+ },
908
+ "error_type": "error_after_interrupt",
909
+ "segments": [
910
+ {
911
+ "speaker": "A",
912
+ "text": "Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your",
913
+ "original_text": "Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your [interrupt] car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply.",
914
+ "start_time": 0,
915
+ "end_time": 16.938956916099773,
916
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_0_A.wav",
917
+ "silence_duration": 0,
918
+ "is_interrupted": true,
919
+ "text_after_interrupt": "car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply."
920
+ },
921
+ {
922
+ "speaker": "B",
923
+ "text": "I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?",
924
+ "original_text": "I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?",
925
+ "start_time": 8.753922902494331,
926
+ "end_time": 15.348390022675737,
927
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_1_B.wav",
928
+ "silence_duration": 0.5553895116856843,
929
+ "is_interrupted": false
930
+ },
931
+ {
932
+ "speaker": "A",
933
+ "text": "Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.",
934
+ "original_text": "Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.",
935
+ "start_time": 17.329799609194744,
936
+ "end_time": 26.582951536632386,
937
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_2_A.wav",
938
+ "silence_duration": 0.3908426930949695,
939
+ "is_interrupted": false
940
+ },
941
+ {
942
+ "speaker": "B",
943
+ "text": "Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.",
944
+ "original_text": "Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.",
945
+ "start_time": 26.900238001740547,
946
+ "end_time": 44.05978448700132,
947
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_3_B.wav",
948
+ "silence_duration": 0.3172864651081615,
949
+ "is_interrupted": false
950
+ },
951
+ {
952
+ "speaker": "A",
953
+ "text": "Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
954
+ "original_text": "Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
955
+ "start_time": 44.64342590433178,
956
+ "end_time": 52.8981197818828,
957
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_4_A.wav",
958
+ "silence_duration": 0.5836414173304574,
959
+ "is_interrupted": false
960
+ }
961
+ ]
962
+ }
963
+ }
ms-swift/swift/cli/__pycache__/main.cpython-310.pyc ADDED
Binary file (2.31 kB). View file
 
ms-swift/swift/cli/_megatron/__init__.py ADDED
File without changes
ms-swift/swift/cli/deploy.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from swift.llm import deploy_main
3
+
4
+ if __name__ == '__main__':
5
+ deploy_main()
ms-swift/swift/cli/sample.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from swift.llm.sampling import sampling_main
3
+
4
+ if __name__ == '__main__':
5
+ sampling_main()
ms-swift/swift/cli/sft.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+
4
+ from swift.llm import sft_main
5
+
6
+ if __name__ == '__main__':
7
+ sft_main()
ms-swift/swift/cli/web_ui.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from swift.ui import webui_main
3
+
4
+ if __name__ == '__main__':
5
+ webui_main()
ms-swift/swift/hub/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .hub import HFHub, MSHub, get_hub
ms-swift/swift/llm/__init__.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from typing import TYPE_CHECKING
3
+
4
+ from swift.utils.import_utils import _LazyModule
5
+
6
+ if TYPE_CHECKING:
7
+ # Recommend using `xxx_main`
8
+ from .infer import (VllmEngine, RequestConfig, LmdeployEngine, PtEngine, InferEngine, infer_main, deploy_main,
9
+ InferClient, run_deploy, AdapterRequest, prepare_model_template, BaseInferEngine, rollout_main)
10
+ from .export import (export_main, merge_lora, quantize_model, export_to_ollama)
11
+ from .eval import eval_main
12
+ from .app import app_main
13
+ from .train import sft_main, pt_main, rlhf_main, get_multimodal_target_regex
14
+ from .sampling import sampling_main
15
+ from .argument import (EvalArguments, InferArguments, TrainArguments, ExportArguments, DeployArguments,
16
+ RLHFArguments, WebUIArguments, BaseArguments, AppArguments, SamplingArguments)
17
+ from .template import (TEMPLATE_MAPPING, Template, Word, get_template, TemplateType, register_template,
18
+ TemplateInputs, TemplateMeta, get_template_meta, InferRequest, load_image, MaxLengthError,
19
+ load_file, draw_bbox)
20
+ from .model import (register_model, MODEL_MAPPING, ModelType, get_model_tokenizer, safe_snapshot_download,
21
+ HfConfigFactory, ModelInfo, ModelMeta, ModelKeys, register_model_arch, MultiModelKeys,
22
+ ModelArch, get_model_arch, MODEL_ARCH_MAPPING, get_model_info_meta, get_model_name, ModelGroup,
23
+ Model, get_model_tokenizer_with_flash_attn, get_model_tokenizer_multimodal, load_by_unsloth,
24
+ git_clone_github, get_matched_model_meta)
25
+ from .dataset import (AlpacaPreprocessor, ResponsePreprocessor, MessagesPreprocessor, AutoPreprocessor,
26
+ DATASET_MAPPING, MediaResource, register_dataset, register_dataset_info, EncodePreprocessor,
27
+ LazyLLMDataset, load_dataset, DATASET_TYPE, sample_dataset, RowPreprocessor, DatasetMeta,
28
+ HfDataset, SubsetDataset)
29
+ from .utils import (deep_getattr, to_float_dtype, to_device, History, Messages, history_to_messages,
30
+ messages_to_history, Processor, save_checkpoint, ProcessorMixin,
31
+ get_temporary_cache_files_directory, get_cache_dir, is_moe_model)
32
+ from .base import SwiftPipeline
33
+ from .data_loader import DataLoaderDispatcher, DataLoaderShard, BatchSamplerShard
34
+ else:
35
+ _import_structure = {
36
+ 'rlhf': ['rlhf_main'],
37
+ 'infer': [
38
+ 'deploy_main', 'VllmEngine', 'RequestConfig', 'LmdeployEngine', 'PtEngine', 'infer_main', 'InferClient',
39
+ 'run_deploy', 'InferEngine', 'AdapterRequest', 'prepare_model_template', 'BaseInferEngine', 'rollout_main'
40
+ ],
41
+ 'export': ['export_main', 'merge_lora', 'quantize_model', 'export_to_ollama'],
42
+ 'app': ['app_main'],
43
+ 'eval': ['eval_main'],
44
+ 'train': ['sft_main', 'pt_main', 'rlhf_main', 'get_multimodal_target_regex'],
45
+ 'sampling': ['sampling_main'],
46
+ 'argument': [
47
+ 'EvalArguments', 'InferArguments', 'TrainArguments', 'ExportArguments', 'WebUIArguments', 'DeployArguments',
48
+ 'RLHFArguments', 'BaseArguments', 'AppArguments', 'SamplingArguments'
49
+ ],
50
+ 'template': [
51
+ 'TEMPLATE_MAPPING', 'Template', 'Word', 'get_template', 'TemplateType', 'register_template',
52
+ 'TemplateInputs', 'TemplateMeta', 'get_template_meta', 'InferRequest', 'load_image', 'MaxLengthError',
53
+ 'load_file', 'draw_bbox'
54
+ ],
55
+ 'model': [
56
+ 'MODEL_MAPPING', 'ModelType', 'get_model_tokenizer', 'safe_snapshot_download', 'HfConfigFactory',
57
+ 'ModelInfo', 'ModelMeta', 'ModelKeys', 'register_model_arch', 'MultiModelKeys', 'ModelArch',
58
+ 'MODEL_ARCH_MAPPING', 'get_model_arch', 'get_model_info_meta', 'get_model_name', 'register_model',
59
+ 'ModelGroup', 'Model', 'get_model_tokenizer_with_flash_attn', 'get_model_tokenizer_multimodal',
60
+ 'load_by_unsloth', 'git_clone_github', 'get_matched_model_meta'
61
+ ],
62
+ 'dataset': [
63
+ 'AlpacaPreprocessor', 'MessagesPreprocessor', 'AutoPreprocessor', 'DATASET_MAPPING', 'MediaResource',
64
+ 'register_dataset', 'register_dataset_info', 'EncodePreprocessor', 'LazyLLMDataset', 'load_dataset',
65
+ 'DATASET_TYPE', 'sample_dataset', 'RowPreprocessor', 'ResponsePreprocessor', 'DatasetMeta', 'HfDataset',
66
+ 'SubsetDataset'
67
+ ],
68
+ 'utils': [
69
+ 'deep_getattr', 'to_device', 'to_float_dtype', 'History', 'Messages', 'history_to_messages',
70
+ 'messages_to_history', 'Processor', 'save_checkpoint', 'ProcessorMixin',
71
+ 'get_temporary_cache_files_directory', 'get_cache_dir', 'is_moe_model'
72
+ ],
73
+ 'base': ['SwiftPipeline'],
74
+ 'data_loader': ['DataLoaderDispatcher', 'DataLoaderShard', 'BatchSamplerShard'],
75
+ }
76
+
77
+ import sys
78
+
79
+ sys.modules[__name__] = _LazyModule(
80
+ __name__,
81
+ globals()['__file__'],
82
+ _import_structure,
83
+ module_spec=__spec__,
84
+ extra_objects={},
85
+ )
ms-swift/swift/llm/__pycache__/base.cpython-310.pyc ADDED
Binary file (2.35 kB). View file
 
ms-swift/swift/llm/app/build_ui.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from functools import partial
3
+ from typing import Literal, Optional
4
+
5
+ import gradio as gr
6
+
7
+ from swift.utils import get_file_mm_type
8
+ from ..utils import History
9
+ from .locale import locale_mapping
10
+
11
+
12
+ def clear_session():
13
+ return '', [], []
14
+
15
+
16
+ def modify_system_session(system: str):
17
+ system = system or ''
18
+ return system, '', [], []
19
+
20
+
21
+ def _history_to_messages(history: History, system: Optional[str]):
22
+ messages = []
23
+ if system is not None:
24
+ messages.append({'role': 'system', 'content': system})
25
+ content = []
26
+ for h in history:
27
+ assert isinstance(h, (list, tuple))
28
+ if isinstance(h[0], tuple):
29
+ assert h[1] is None
30
+ file_path = h[0][0]
31
+ try:
32
+ mm_type = get_file_mm_type(file_path)
33
+ content.append({'type': mm_type, mm_type: file_path})
34
+ except ValueError:
35
+ with open(file_path, 'r', encoding='utf-8') as f:
36
+ content.append({'type': 'text', 'text': f.read()})
37
+ else:
38
+ content.append({'type': 'text', 'text': h[0]})
39
+ messages.append({'role': 'user', 'content': content})
40
+ if h[1] is not None:
41
+ messages.append({'role': 'assistant', 'content': h[1]})
42
+ content = []
43
+ return messages
44
+
45
+
46
+ def _parse_text(text: str) -> str:
47
+ mapping = {'<': '&lt;', '>': '&gt;', '*': '&ast;'}
48
+ for k, v in mapping.items():
49
+ text = text.replace(k, v)
50
+ return text
51
+
52
+
53
+ async def model_chat(history: History, real_history: History, system: Optional[str], *, client, model: str,
54
+ request_config: Optional['RequestConfig']):
55
+ if history:
56
+ from swift.llm import InferRequest
57
+
58
+ messages = _history_to_messages(real_history, system)
59
+ resp_or_gen = await client.infer_async(
60
+ InferRequest(messages=messages), request_config=request_config, model=model)
61
+ if request_config and request_config.stream:
62
+ response = ''
63
+ async for resp in resp_or_gen:
64
+ if resp is None:
65
+ continue
66
+ response += resp.choices[0].delta.content
67
+ history[-1][1] = _parse_text(response)
68
+ real_history[-1][-1] = response
69
+ yield history, real_history
70
+
71
+ else:
72
+ response = resp_or_gen.choices[0].message.content
73
+ history[-1][1] = _parse_text(response)
74
+ real_history[-1][-1] = response
75
+ yield history, real_history
76
+
77
+ else:
78
+ yield [], []
79
+
80
+
81
+ def add_text(history: History, real_history: History, query: str):
82
+ history = history or []
83
+ real_history = real_history or []
84
+ history.append([_parse_text(query), None])
85
+ real_history.append([query, None])
86
+ return history, real_history, ''
87
+
88
+
89
+ def add_file(history: History, real_history: History, file):
90
+ history = history or []
91
+ real_history = real_history or []
92
+ history.append([(file.name, ), None])
93
+ real_history.append([(file.name, ), None])
94
+ return history, real_history
95
+
96
+
97
+ def build_ui(base_url: str,
98
+ model: Optional[str] = None,
99
+ *,
100
+ request_config: Optional['RequestConfig'] = None,
101
+ is_multimodal: bool = True,
102
+ studio_title: Optional[str] = None,
103
+ lang: Literal['en', 'zh'] = 'en',
104
+ default_system: Optional[str] = None):
105
+ from swift.llm import InferClient
106
+ client = InferClient(base_url=base_url)
107
+ model = model or client.models[0]
108
+ studio_title = studio_title or model
109
+ with gr.Blocks() as demo:
110
+ gr.Markdown(f'<center><font size=8>{studio_title}</center>')
111
+ with gr.Row():
112
+ with gr.Column(scale=3):
113
+ system_input = gr.Textbox(value=default_system, lines=1, label='System')
114
+ with gr.Column(scale=1):
115
+ modify_system = gr.Button(locale_mapping['modify_system'][lang], scale=2)
116
+ chatbot = gr.Chatbot(label='Chatbot')
117
+ textbox = gr.Textbox(lines=1, label='Input')
118
+
119
+ with gr.Row():
120
+ upload = gr.UploadButton(locale_mapping['upload'][lang], visible=is_multimodal)
121
+ submit = gr.Button(locale_mapping['submit'][lang])
122
+ regenerate = gr.Button(locale_mapping['regenerate'][lang])
123
+ clear_history = gr.Button(locale_mapping['clear_history'][lang])
124
+
125
+ system_state = gr.State(value=default_system)
126
+ history_state = gr.State(value=[])
127
+ model_chat_ = partial(model_chat, client=client, model=model, request_config=request_config)
128
+
129
+ upload.upload(add_file, [chatbot, history_state, upload], [chatbot, history_state])
130
+ textbox.submit(add_text, [chatbot, history_state, textbox],
131
+ [chatbot, history_state, textbox]).then(model_chat_, [chatbot, history_state, system_state],
132
+ [chatbot, history_state])
133
+ submit.click(add_text, [chatbot, history_state, textbox],
134
+ [chatbot, history_state, textbox]).then(model_chat_, [chatbot, history_state, system_state],
135
+ [chatbot, history_state])
136
+ regenerate.click(model_chat_, [chatbot, history_state, system_state], [chatbot, history_state])
137
+ clear_history.click(clear_session, [], [textbox, chatbot, history_state])
138
+ modify_system.click(modify_system_session, [system_input], [system_state, textbox, chatbot, history_state])
139
+ return demo
ms-swift/swift/llm/argument/__pycache__/app_args.cpython-310.pyc ADDED
Binary file (1.55 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/eval_args.cpython-310.pyc ADDED
Binary file (4.96 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/export_args.cpython-310.pyc ADDED
Binary file (3.78 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/rlhf_args.cpython-310.pyc ADDED
Binary file (11 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/sampling_args.cpython-310.pyc ADDED
Binary file (2.79 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/train_args.cpython-310.pyc ADDED
Binary file (8.79 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/tuner_args.cpython-310.pyc ADDED
Binary file (10.6 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/webui_args.cpython-310.pyc ADDED
Binary file (957 Bytes). View file
 
ms-swift/swift/llm/argument/base_args/__pycache__/generation_args.cpython-310.pyc ADDED
Binary file (2.25 kB). View file
 
ms-swift/swift/llm/argument/base_args/__pycache__/quant_args.cpython-310.pyc ADDED
Binary file (3.18 kB). View file
 
ms-swift/swift/llm/argument/base_args/base_args.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+ from dataclasses import dataclass, field, fields
4
+ from typing import Any, Dict, List, Literal, Optional, Union
5
+
6
+ import json
7
+
8
+ from swift.hub import get_hub
9
+ from swift.llm import Processor, Template, get_model_tokenizer, get_template, load_by_unsloth, safe_snapshot_download
10
+ from swift.llm.utils import get_ckpt_dir
11
+ from swift.plugin import extra_tuners
12
+ from swift.utils import (check_json_format, get_dist_setting, get_logger, import_external_file, is_dist, is_master,
13
+ set_device, use_hf_hub)
14
+ from .data_args import DataArguments
15
+ from .generation_args import GenerationArguments
16
+ from .model_args import ModelArguments
17
+ from .quant_args import QuantizeArguments
18
+ from .template_args import TemplateArguments
19
+
20
+ logger = get_logger()
21
+
22
+
23
+ def get_supported_tuners():
24
+ return {'lora', 'full', 'longlora', 'adalora', 'llamapro', 'adapter', 'vera', 'boft', 'fourierft', 'reft', 'bone'
25
+ } | set(extra_tuners.keys())
26
+
27
+
28
+ @dataclass
29
+ class CompatArguments:
30
+ ckpt_dir: Optional[str] = None
31
+ lora_modules: List[str] = field(default_factory=list)
32
+
33
+ def _handle_ckpt_dir(self: 'BaseArguments'):
34
+ assert os.path.isdir(self.ckpt_dir), f'self.ckpt_dir: {self.ckpt_dir}'
35
+ if (os.path.exists(os.path.join(self.ckpt_dir, 'adapter_config.json'))
36
+ or os.path.exists(os.path.join(self.ckpt_dir, 'default', 'adapter_config.json'))
37
+ or os.path.exists(os.path.join(self.ckpt_dir, 'reft'))):
38
+ if self.ckpt_dir in self.adapters:
39
+ return
40
+ self.adapters.insert(0, self.ckpt_dir)
41
+ else:
42
+ self.model = self.ckpt_dir
43
+ self.ckpt_dir = None
44
+
45
+ def __post_init__(self: 'BaseArguments'):
46
+ if self.ckpt_dir is not None:
47
+ self._handle_ckpt_dir()
48
+
49
+ if len(self.lora_modules) > 0:
50
+ self.adapters += self.lora_modules
51
+
52
+
53
+ @dataclass
54
+ class BaseArguments(CompatArguments, GenerationArguments, QuantizeArguments, DataArguments, TemplateArguments,
55
+ ModelArguments):
56
+ """
57
+ BaseArguments class is a dataclass that inherits from multiple argument classes:
58
+ GenerationArguments, QuantizeArguments, DataArguments, TemplateArguments, ModelArguments.
59
+
60
+ Args:
61
+ tuner_backend(str): Support peft or unsloth.
62
+ train_type(str): The training type, support all supported tuners and `full`.
63
+ seed (int): Random seed for reproducibility. Default is 42.
64
+ model_kwargs (Optional[str]): Additional keyword arguments for the model. Default is None.
65
+ load_data_args (bool): Flag to determine if dataset configuration should be loaded. Default is False.
66
+ use_hf (bool): Flag to determine if Hugging Face should be used. Default is False.
67
+ hub_token (Optional[str]): SDK token for authentication. Default is None.
68
+ custom_register_path (List[str]): Path to custom .py file for dataset registration. Default is None.
69
+ ignore_args_error (bool): Flag to ignore argument errors for notebook compatibility. Default is False.
70
+ use_swift_lora (bool): Use swift lora, a compatible argument
71
+ """
72
+ tuner_backend: Literal['peft', 'unsloth'] = 'peft'
73
+ train_type: str = field(default='lora', metadata={'help': f'train_type choices: {list(get_supported_tuners())}'})
74
+ adapters: List[str] = field(default_factory=list)
75
+ external_plugins: List[str] = field(default_factory=list)
76
+
77
+ seed: int = 42
78
+ model_kwargs: Optional[Union[dict, str]] = None
79
+ load_args: bool = True
80
+ load_data_args: bool = False
81
+
82
+ use_hf: bool = False
83
+ # None: use env var `MODELSCOPE_API_TOKEN`
84
+ hub_token: Optional[str] = field(
85
+ default=None, metadata={'help': 'SDK token can be found in https://modelscope.cn/my/myaccesstoken'})
86
+ custom_register_path: List[str] = field(default_factory=list) # .py
87
+
88
+ # extra
89
+ ignore_args_error: bool = False # True: notebook compatibility
90
+ use_swift_lora: bool = False # True for using tuner_backend == swift, don't specify this unless you know what you are doing # noqa
91
+
92
+ def _prepare_training_args(self, training_args: Dict[str, Any]) -> None:
93
+ pass
94
+
95
+ def _init_custom_register(self) -> None:
96
+ """Register custom .py file to datasets"""
97
+ if isinstance(self.custom_register_path, str):
98
+ self.custom_register_path = [self.custom_register_path]
99
+ if not self.custom_register_path:
100
+ return
101
+ for path in self.custom_register_path:
102
+ import_external_file(path)
103
+ logger.info(f'Successfully registered {self.custom_register_path}.')
104
+
105
+ def _import_external_plugins(self):
106
+ if isinstance(self.external_plugins, str):
107
+ self.external_plugins = [self.external_plugins]
108
+ if not self.external_plugins:
109
+ return
110
+ for external_plugin in self.external_plugins:
111
+ import_external_file(external_plugin)
112
+ logger.info(f'Successfully imported external_plugins: {self.external_plugins}.')
113
+
114
+ @staticmethod
115
+ def _check_is_adapter(adapter_dir: str) -> bool:
116
+ if (os.path.exists(os.path.join(adapter_dir, 'adapter_config.json'))
117
+ or os.path.exists(os.path.join(adapter_dir, 'default', 'adapter_config.json'))
118
+ or os.path.exists(os.path.join(adapter_dir, 'reft'))):
119
+ return True
120
+ return False
121
+
122
+ def _init_adapters(self):
123
+ if isinstance(self.adapters, str):
124
+ self.adapters = [self.adapters]
125
+ self.adapters = [
126
+ safe_snapshot_download(adapter, use_hf=self.use_hf, hub_token=self.hub_token) for adapter in self.adapters
127
+ ]
128
+
129
+ def __post_init__(self):
130
+ if self.use_hf or use_hf_hub():
131
+ self.use_hf = True
132
+ os.environ['USE_HF'] = '1'
133
+ CompatArguments.__post_init__(self)
134
+ self._init_adapters()
135
+ self._init_ckpt_dir()
136
+ self._init_custom_register()
137
+ self._import_external_plugins()
138
+ self._init_model_kwargs()
139
+ # The Seq2SeqTrainingArguments has a property called world_size, which cannot be assigned a value.
140
+ self.rank, self.local_rank, self.global_world_size, self.local_world_size = get_dist_setting()
141
+ logger.info(f'rank: {self.rank}, local_rank: {self.local_rank}, '
142
+ f'world_size: {self.global_world_size}, local_world_size: {self.local_world_size}')
143
+ if self.train_type not in extra_tuners:
144
+ for adapter in self.adapters:
145
+ assert self._check_is_adapter(adapter), (
146
+ f'`{adapter}` is not an adapter, please try using `--model` to pass it.')
147
+ ModelArguments.__post_init__(self)
148
+ QuantizeArguments.__post_init__(self)
149
+ TemplateArguments.__post_init__(self)
150
+ DataArguments.__post_init__(self)
151
+
152
+ self.hub = get_hub(self.use_hf)
153
+ if self.hub.try_login(self.hub_token):
154
+ logger.info('hub login successful!')
155
+
156
+ def _init_model_kwargs(self):
157
+ """Prepare model kwargs and set them to the env"""
158
+ self.model_kwargs: Dict[str, Any] = self.parse_to_dict(self.model_kwargs)
159
+ for k, v in self.model_kwargs.items():
160
+ k = k.upper()
161
+ os.environ[k] = str(v)
162
+
163
+ @property
164
+ def is_adapter(self) -> bool:
165
+ return self.train_type not in {'full'}
166
+
167
+ @property
168
+ def supported_tuners(self):
169
+ return get_supported_tuners()
170
+
171
+ @property
172
+ def adapters_can_be_merged(self):
173
+ return {'lora', 'longlora', 'llamapro', 'adalora'}
174
+
175
+ @classmethod
176
+ def from_pretrained(cls, checkpoint_dir: str):
177
+ self = super().__new__(cls)
178
+ self.load_data_args = True
179
+ self.ckpt_dir = checkpoint_dir
180
+ self.load_args_from_ckpt()
181
+ all_keys = list(f.name for f in fields(BaseArguments))
182
+ for key in all_keys:
183
+ if not hasattr(self, key):
184
+ setattr(self, key, None)
185
+ return self
186
+
187
+ def _init_ckpt_dir(self, adapters=None):
188
+ # compat megatron
189
+ model = self.model or getattr(self, 'mcore_model', None) or getattr(self, 'load', None)
190
+ self.ckpt_dir = get_ckpt_dir(model, adapters or self.adapters)
191
+ if self.ckpt_dir and self.load_args:
192
+ self.load_args_from_ckpt()
193
+
194
+ def load_args_from_ckpt(self) -> None:
195
+ from ..train_args import TrainArguments
196
+ args_path = os.path.join(self.ckpt_dir, 'args.json')
197
+ assert os.path.exists(args_path), f'args_path: {args_path}'
198
+ with open(args_path, 'r', encoding='utf-8') as f:
199
+ old_args = json.load(f)
200
+ all_keys = list(f.name for f in fields(BaseArguments))
201
+ data_keys = list(f.name for f in fields(DataArguments))
202
+ load_keys = [
203
+ # quant_args
204
+ 'bnb_4bit_quant_type',
205
+ 'bnb_4bit_use_double_quant',
206
+ # base_args
207
+ 'train_type',
208
+ 'tuner_backend',
209
+ 'use_swift_lora',
210
+ # data_args
211
+ 'model_name',
212
+ 'model_author',
213
+ 'split_dataset_ratio',
214
+ # template_args
215
+ 'use_chat_template',
216
+ ]
217
+ skip_keys = list(f.name for f in fields(GenerationArguments) + fields(CompatArguments)) + ['adapters']
218
+ if not isinstance(self, TrainArguments):
219
+ skip_keys += ['max_length']
220
+ all_keys = set(all_keys) - set(skip_keys)
221
+ for key, old_value in old_args.items():
222
+ if key not in all_keys or old_value is None:
223
+ continue
224
+ if not self.load_data_args and key in data_keys:
225
+ continue
226
+ value = getattr(self, key, None)
227
+ if value is None or isinstance(value, (list, tuple)) and len(value) == 0 or key in load_keys:
228
+ setattr(self, key, old_value)
229
+ logger.info(f'Successfully loaded {args_path}.')
230
+
231
+ def save_args(self, output_dir=None) -> None:
232
+ if is_master():
233
+ output_dir = output_dir or self.output_dir
234
+ os.makedirs(output_dir, exist_ok=True)
235
+ fpath = os.path.join(output_dir, 'args.json')
236
+ logger.info(f'The {self.__class__.__name__} will be saved in: {fpath}')
237
+ with open(fpath, 'w', encoding='utf-8') as f:
238
+ json.dump(check_json_format(self.__dict__), f, ensure_ascii=False, indent=2)
239
+
240
+ def _init_device(self):
241
+ if is_dist():
242
+ set_device()
243
+
244
+ def get_template(self, processor: 'Processor', template_type: Optional[str] = None) -> 'Template':
245
+ template_kwargs = self.get_template_kwargs()
246
+ template_type = template_type or self.template
247
+ template = get_template(template_type, processor, **template_kwargs)
248
+ return template
249
+
250
+ def get_model_processor(self,
251
+ *,
252
+ model=None,
253
+ model_type=None,
254
+ model_revision=None,
255
+ task_type=None,
256
+ num_labels=None,
257
+ **kwargs):
258
+ if self.tuner_backend == 'unsloth':
259
+ return load_by_unsloth(self)
260
+ kwargs.update(self.get_model_kwargs())
261
+ # compat rlhf
262
+ kwargs['model_id_or_path'] = model or self.model
263
+ kwargs['model_type'] = model_type or self.model_type
264
+ kwargs['model_revision'] = model_revision or self.model_revision
265
+ kwargs['task_type'] = task_type or self.task_type
266
+ kwargs['num_labels'] = num_labels or self.num_labels
267
+
268
+ return get_model_tokenizer(**kwargs)
ms-swift/swift/llm/argument/base_args/model_args.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import ast
3
+ import math
4
+ import os
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Dict, Literal, Optional, Union
7
+
8
+ import json
9
+ import torch
10
+ from transformers.utils import is_torch_mps_available
11
+
12
+ from swift.llm import MODEL_MAPPING, HfConfigFactory, get_model_info_meta, get_model_name
13
+ from swift.utils import get_dist_setting, get_logger
14
+
15
+ logger = get_logger()
16
+
17
+
18
+ @dataclass
19
+ class ModelArguments:
20
+ """
21
+ ModelArguments class is a dataclass that holds various arguments related to model configuration and usage.
22
+
23
+ Args:
24
+ model (Optional[str]): model_id or model_path. Default is None.
25
+ model_type (Optional[str]): Type of the model group. Default is None.
26
+ model_revision (Optional[str]): Revision of the model. Default is None.
27
+ torch_dtype (Literal): Model parameter dtype. Default is None.
28
+ attn_impl (Literal): Attention implementation to use. Default is None.
29
+ num_labels (Optional[int]): Number of labels for classification tasks. Default is None.
30
+ rope_scaling (Literal): Type of rope scaling to use. Default is None.
31
+ device_map (Optional[str]): Configuration for device mapping. Default is None.
32
+ local_repo_path (Optional[str]): Path to the local github repository for model. Default is None.
33
+ init_strategy (Literal): Strategy to initialize all uninitialized parameters. Default is None.
34
+ """
35
+ model: Optional[str] = None # model id or model path
36
+ model_type: Optional[str] = field(
37
+ default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'})
38
+ model_revision: Optional[str] = None
39
+ task_type: Literal['causal_lm', 'seq_cls', 'embedding'] = None
40
+
41
+ torch_dtype: Literal['bfloat16', 'float16', 'float32', None] = None
42
+ # flash_attn: It will automatically convert names based on the model.
43
+ # None: It will be automatically selected between sdpa and eager.
44
+ attn_impl: Literal['flash_attn', 'sdpa', 'eager', 'flex_attention', None] = None
45
+
46
+ num_labels: Optional[int] = None
47
+ problem_type: Literal['regression', 'single_label_classification', 'multi_label_classification'] = None
48
+ rope_scaling: Literal['linear', 'dynamic'] = None
49
+ device_map: Optional[Union[dict, str]] = None
50
+ max_memory: Optional[Union[dict, str]] = None
51
+ # When some model code needs to be downloaded from GitHub,
52
+ # this parameter specifies the path to the locally downloaded repository.
53
+ local_repo_path: Optional[str] = None
54
+ init_strategy: Literal['zero', 'uniform', 'normal', 'xavier_uniform', 'xavier_normal', 'kaiming_uniform',
55
+ 'kaiming_normal', 'orthogonal'] = None
56
+
57
+ @staticmethod
58
+ def parse_to_dict(value: Union[str, Dict, None], strict: bool = True) -> Union[str, Dict]:
59
+ """Convert a JSON string or JSON file into a dict"""
60
+ # If the value could potentially be a string, it is generally advisable to set strict to False.
61
+ if value is None:
62
+ value = {}
63
+ elif isinstance(value, str):
64
+ if os.path.exists(value): # local path
65
+ with open(value, 'r', encoding='utf-8') as f:
66
+ value = json.load(f)
67
+ else: # json str
68
+ try:
69
+ value = json.loads(value)
70
+ except json.JSONDecodeError:
71
+ if strict:
72
+ logger.error(f"Unable to parse string: '{value}'")
73
+ raise
74
+ return value
75
+
76
+ def _init_device_map(self):
77
+ """Prepare device map args"""
78
+ if self.device_map:
79
+ self.device_map: Union[str, Dict[str, Any], None] = self.parse_to_dict(self.device_map, strict=False)
80
+ # compat mp&ddp
81
+ _, local_rank, _, local_world_size = get_dist_setting()
82
+ if local_world_size > 1 and isinstance(self.device_map, dict) and local_rank > 0:
83
+ for k, v in self.device_map.items():
84
+ if isinstance(v, int):
85
+ self.device_map[k] += local_rank
86
+
87
+ def _init_max_memory(self):
88
+ if isinstance(self.max_memory, str):
89
+ try:
90
+ self.max_memory = ast.literal_eval(self.max_memory)
91
+ except Exception:
92
+ pass
93
+ self.max_memory = self.parse_to_dict(self.max_memory)
94
+ # compat mp&ddp
95
+ _, local_rank, _, local_world_size = get_dist_setting()
96
+ if local_world_size > 1 and isinstance(self.max_memory, dict) and local_rank > 0:
97
+ for k in list(self.max_memory.keys()):
98
+ if isinstance(k, int):
99
+ self.max_memory[k + local_rank] = self.max_memory.pop(k)
100
+
101
+ def _init_torch_dtype(self) -> None:
102
+ """"If torch_dtype is None, find a proper dtype by the train_type/GPU"""
103
+ from swift.llm import TrainArguments
104
+
105
+ self.torch_dtype: Optional[torch.dtype] = HfConfigFactory.to_torch_dtype(self.torch_dtype)
106
+ self.torch_dtype: torch.dtype = self._init_model_info()
107
+ # Mixed Precision Training
108
+ if isinstance(self, TrainArguments):
109
+ self._init_mixed_precision()
110
+
111
+ def _init_mixed_precision(self):
112
+ if is_torch_mps_available():
113
+ fp16, bf16 = False, False
114
+ elif self.torch_dtype in {torch.float16, torch.float32}:
115
+ fp16, bf16 = True, False
116
+ elif self.torch_dtype == torch.bfloat16:
117
+ fp16, bf16 = False, True
118
+ else:
119
+ raise ValueError(f'args.torch_dtype: {self.torch_dtype}')
120
+ if self.fp16 is None:
121
+ self.fp16 = fp16
122
+ if self.bf16 is None:
123
+ self.bf16 = bf16
124
+
125
+ def _init_rope_scaling(self):
126
+ assert self.max_length is not None, 'Use max_model_len together with rope_scaling'
127
+ rope_scaling = self.model_info.rope_scaling or {}
128
+ max_model_len = self.model_info.max_model_len
129
+ rope_scaling_factor = 1.0
130
+ if max_model_len:
131
+ rope_scaling_factor = max(float(math.ceil(self.max_length / max_model_len)), 1.0)
132
+ if rope_scaling:
133
+ rope_scaling_factor = max(rope_scaling.get('factor', -1), rope_scaling_factor)
134
+ rope_scaling['type'] = self.rope_scaling
135
+ rope_scaling['factor'] = rope_scaling_factor
136
+ else:
137
+ rope_scaling = {'type': self.rope_scaling, 'factor': rope_scaling_factor}
138
+ self.rope_scaling = rope_scaling
139
+ logger.info(f'rope_scaling is set to type: {self.rope_scaling}')
140
+
141
+ def _init_model_info(self) -> torch.dtype:
142
+ self.model_info, self.model_meta = get_model_info_meta(**self.get_model_kwargs())
143
+ self.task_type = self.model_info.task_type
144
+ self.num_labels = self.model_info.num_labels
145
+
146
+ self.model_dir = self.model_info.model_dir
147
+ self.model_type = self.model_info.model_type
148
+ if isinstance(self.rope_scaling, str):
149
+ self._init_rope_scaling()
150
+ return self.model_info.torch_dtype
151
+
152
+ def __post_init__(self):
153
+ if self.model is None:
154
+ raise ValueError(f'Please set --model <model_id_or_path>`, model: {self.model}')
155
+ self.model_suffix = get_model_name(self.model)
156
+ self._init_device_map()
157
+ self._init_max_memory()
158
+ self._init_torch_dtype()
159
+
160
+ def get_model_kwargs(self):
161
+ return {
162
+ 'model_id_or_path': self.model,
163
+ 'torch_dtype': self.torch_dtype,
164
+ 'model_type': self.model_type,
165
+ 'revision': self.model_revision,
166
+ 'use_hf': self.use_hf,
167
+ 'hub_token': self.hub_token,
168
+ 'local_repo_path': self.local_repo_path,
169
+ 'device_map': self.device_map,
170
+ 'max_memory': self.max_memory,
171
+ 'quantization_config': self.get_quantization_config(),
172
+ 'attn_impl': self.attn_impl,
173
+ 'rope_scaling': self.rope_scaling,
174
+ 'task_type': self.task_type,
175
+ 'num_labels': self.num_labels,
176
+ 'problem_type': self.problem_type,
177
+ 'init_strategy': self.init_strategy,
178
+ }
ms-swift/swift/llm/argument/base_args/quant_args.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Literal, Optional
5
+
6
+ import torch
7
+
8
+ from swift.llm import HfConfigFactory
9
+
10
+
11
+ @dataclass
12
+ class QuantizeArguments:
13
+ """
14
+ QuantizeArguments is a dataclass that holds the configuration for model quantization.
15
+
16
+ Args:
17
+ quant_method (Literal['bnb', 'hqq', 'eetq']): The quantization method to be used.
18
+ quant_bits (Literal[1, 2, 3, 4, 8]): The number of bits to use for quantization.
19
+ hqq_axis (Optional[int]): The axis for hqq quantization.
20
+ bnb_4bit_compute_dtype (Literal['float16', 'bfloat16', 'float32', None]):
21
+ The compute dtype for bnb 4-bit quantization.
22
+ bnb_4bit_quant_type (Literal['fp4', 'nf4']): The quantization type for bnb 4-bit quantization.
23
+ bnb_4bit_use_double_quant (bool): Whether to use double quantization for bnb 4-bit quantization.
24
+ bnb_4bit_quant_storage (Optional[str]): This sets the storage type to pack the quantized 4-bit params.
25
+ """
26
+ # awq, gptq, and aqlm need to be pre-quantized models.
27
+ # It can be detected automatically, without the need to pass in.
28
+ # while bnb, hqq, and eetq can be quantized during SFT using the original models.
29
+ quant_method: Literal['bnb', 'hqq', 'eetq', 'quanto'] = None
30
+ # bnb: 4,8; hqq: 1,2,3,4,8'; eetq: 8
31
+ # awq: 4; gptq: 2,3,4,8
32
+ quant_bits: Literal[1, 2, 3, 4, 8, 'float8'] = None
33
+ # hqq
34
+ hqq_axis: Optional[int] = None
35
+ # bnb
36
+ bnb_4bit_compute_dtype: Literal['float16', 'bfloat16', 'float32', None] = None
37
+ bnb_4bit_quant_type: Literal['fp4', 'nf4'] = 'nf4'
38
+ bnb_4bit_use_double_quant: bool = True
39
+ bnb_4bit_quant_storage: Optional[str] = None
40
+
41
+ def get_quantization_config(self):
42
+ if self.quant_method is None or self.quant_method in {'awq', 'gptq'}:
43
+ return None
44
+ assert self.quant_method in {'bnb', 'hqq', 'eetq', 'quanto'}
45
+ if self.quant_bits is None:
46
+ raise ValueError(f'Please set the quant_bits. args.quant_bits: {self.quant_bits}')
47
+ if self.quant_method == 'bnb':
48
+ if self.quant_bits == 4:
49
+ load_in_4bit, load_in_8bit = True, False
50
+ elif self.quant_bits == 8:
51
+ load_in_4bit, load_in_8bit = False, True
52
+ else:
53
+ raise ValueError(f'bnb not support quant_bits: {self.quant_bits}')
54
+
55
+ from transformers import BitsAndBytesConfig
56
+ quantization_config = BitsAndBytesConfig(
57
+ load_in_4bit=load_in_4bit,
58
+ load_in_8bit=load_in_8bit,
59
+ bnb_4bit_compute_dtype=self.bnb_4bit_compute_dtype,
60
+ bnb_4bit_quant_type=self.bnb_4bit_quant_type,
61
+ bnb_4bit_use_double_quant=self.bnb_4bit_use_double_quant,
62
+ bnb_4bit_quant_storage=self.bnb_4bit_quant_storage)
63
+ elif self.quant_method == 'hqq':
64
+ from transformers import HqqConfig
65
+ quantization_config = HqqConfig(nbits=self.quant_bits, axis=self.hqq_axis)
66
+ elif self.quant_method == 'quanto':
67
+ from transformers import QuantoConfig
68
+ if self.quant_bits == 8:
69
+ weights = 'int8'
70
+ elif self.quant_bits == 'float8':
71
+ weights = 'float8'
72
+ elif self.quant_bits == 4:
73
+ weights = 'int4'
74
+ elif self.quant_bits == 2:
75
+ weights = 'int2'
76
+ else:
77
+ raise ValueError('quanto quantization only support quant bits 2/4/8/float8')
78
+ quantization_config = QuantoConfig(weights=weights)
79
+ else: # 'eetq'
80
+ from transformers import EetqConfig
81
+ quantization_config = EetqConfig(f'int{self.quant_bits}')
82
+
83
+ return quantization_config
84
+
85
+ def __post_init__(self):
86
+ if self.bnb_4bit_compute_dtype is None:
87
+ if self.torch_dtype in {torch.float16, torch.float32}:
88
+ self.bnb_4bit_compute_dtype = torch.float32
89
+ elif self.torch_dtype == torch.bfloat16:
90
+ self.bnb_4bit_compute_dtype = torch.bfloat16
91
+ self.bnb_4bit_compute_dtype: torch.dtype = HfConfigFactory.to_torch_dtype(self.bnb_4bit_compute_dtype)
ms-swift/swift/llm/argument/deploy_args.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from swift.llm import safe_snapshot_download
6
+ from swift.utils import find_free_port, get_logger
7
+ from .infer_args import InferArguments
8
+
9
+ logger = get_logger()
10
+
11
+
12
+ @dataclass
13
+ class DeployArguments(InferArguments):
14
+ """
15
+ DeployArguments is a dataclass that extends InferArguments and is used to define
16
+ the arguments required for deploying a model.
17
+
18
+ Args:
19
+ host (str): The host address to bind the server to. Default is '0.0.0.0'.
20
+ port (int): The port number to bind the server to. Default is 8000.
21
+ api_key (Optional[str]): The API key for authentication. Default is None.
22
+ ssl_keyfile (Optional[str]): The path to the SSL key file. Default is None.
23
+ ssl_certfile (Optional[str]): The path to the SSL certificate file. Default is None.
24
+ owned_by (str): The owner of the deployment. Default is 'swift'.
25
+ served_model_name (Optional[str]): The name of the model being served. Default is None.
26
+ verbose (bool): Whether to log request information. Default is True.
27
+ log_interval (int): The interval for printing global statistics. Default is 20.
28
+ max_logprobs(int): Max number of logprobs to return
29
+ """
30
+ host: str = '0.0.0.0'
31
+ port: int = 8000
32
+ api_key: Optional[str] = None
33
+ ssl_keyfile: Optional[str] = None
34
+ ssl_certfile: Optional[str] = None
35
+
36
+ owned_by: str = 'swift'
37
+ served_model_name: Optional[str] = None
38
+ verbose: bool = True # Whether to log request_info
39
+ log_interval: int = 20 # Interval for printing global statistics
40
+
41
+ max_logprobs: int = 20
42
+
43
+ def __post_init__(self):
44
+ super().__post_init__()
45
+ self.port = find_free_port(self.port)
46
+
47
+ def _init_adapters(self):
48
+ if isinstance(self.adapters, str):
49
+ self.adapters = [self.adapters]
50
+ self.adapter_mapping = {}
51
+ adapters = []
52
+ for i, adapter in enumerate(self.adapters):
53
+ adapter_path = adapter.split('=')
54
+ if len(adapter_path) == 1:
55
+ adapter_path = (None, adapter_path[0])
56
+ adapter_name, adapter_path = adapter_path
57
+ adapter_path = safe_snapshot_download(adapter_path, use_hf=self.use_hf, hub_token=self.hub_token)
58
+ if adapter_name is None:
59
+ adapters.append(adapter_path)
60
+ else:
61
+ self.adapter_mapping[adapter_name] = adapter_path
62
+ self.adapters = adapters
63
+
64
+ def _init_ckpt_dir(self, adapters=None):
65
+ return super()._init_ckpt_dir(self.adapters + list(self.adapter_mapping.values()))
66
+
67
+ def _init_stream(self):
68
+ pass
69
+
70
+ def _init_eval_human(self):
71
+ pass
72
+
73
+ def _init_result_path(self, folder_name: str) -> None:
74
+ if folder_name == 'infer_result':
75
+ folder_name = 'deploy_result'
76
+ return super()._init_result_path(folder_name)
ms-swift/swift/llm/argument/export_args.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+ from dataclasses import dataclass
4
+ from typing import Literal, Optional
5
+
6
+ import torch
7
+ import torch.distributed as dist
8
+
9
+ from swift.utils import get_logger, init_process_group, set_default_ddp_config
10
+ from .base_args import BaseArguments, to_abspath
11
+ from .merge_args import MergeArguments
12
+
13
+ logger = get_logger()
14
+
15
+
16
+ @dataclass
17
+ class ExportArguments(MergeArguments, BaseArguments):
18
+ """
19
+ ExportArguments is a dataclass that inherits from BaseArguments and MergeArguments.
20
+
21
+ Args:
22
+ output_dir (Optional[str]): Directory where the output will be saved.
23
+ quant_n_samples (int): Number of samples for quantization.
24
+ max_length (int): Sequence length for quantization.
25
+ quant_batch_size (int): Batch size for quantization.
26
+ to_ollama (bool): Flag to indicate export model to ollama format.
27
+ push_to_hub (bool): Flag to indicate if the output should be pushed to the model hub.
28
+ hub_model_id (Optional[str]): Model ID for the hub.
29
+ hub_private_repo (bool): Flag to indicate if the hub repository is private.
30
+ commit_message (str): Commit message for pushing to the hub.
31
+ to_peft_format (bool): Flag to indicate if the output should be in PEFT format.
32
+ This argument is useless for now.
33
+ """
34
+ output_dir: Optional[str] = None
35
+
36
+ # awq/gptq
37
+ quant_method: Literal['awq', 'gptq', 'bnb'] = None
38
+ quant_n_samples: int = 256
39
+ max_length: int = 2048
40
+ quant_batch_size: int = 1
41
+ group_size: int = 128
42
+
43
+ # ollama
44
+ to_ollama: bool = False
45
+
46
+ # megatron
47
+ to_mcore: bool = False
48
+ to_hf: bool = False
49
+ mcore_model: Optional[str] = None
50
+ thread_count: Optional[int] = None
51
+ test_convert_precision: bool = False
52
+
53
+ # push to ms hub
54
+ push_to_hub: bool = False
55
+ # 'user_name/repo_name' or 'repo_name'
56
+ hub_model_id: Optional[str] = None
57
+ hub_private_repo: bool = False
58
+ commit_message: str = 'update files'
59
+ # compat
60
+ to_peft_format: bool = False
61
+ exist_ok: bool = False
62
+
63
+ def _init_output_dir(self):
64
+ if self.output_dir is None:
65
+ ckpt_dir = self.ckpt_dir or f'./{self.model_suffix}'
66
+ ckpt_dir, ckpt_name = os.path.split(ckpt_dir)
67
+ if self.to_peft_format:
68
+ suffix = 'peft'
69
+ elif self.quant_method:
70
+ suffix = f'{self.quant_method}-int{self.quant_bits}'
71
+ elif self.to_ollama:
72
+ suffix = 'ollama'
73
+ elif self.merge_lora:
74
+ suffix = 'merged'
75
+ elif self.to_mcore:
76
+ suffix = 'mcore'
77
+ elif self.to_hf:
78
+ suffix = 'hf'
79
+ else:
80
+ return
81
+
82
+ self.output_dir = os.path.join(ckpt_dir, f'{ckpt_name}-{suffix}')
83
+
84
+ self.output_dir = to_abspath(self.output_dir)
85
+ if not self.exist_ok and os.path.exists(self.output_dir):
86
+ raise FileExistsError(f'args.output_dir: `{self.output_dir}` already exists.')
87
+ logger.info(f'args.output_dir: `{self.output_dir}`')
88
+
89
+ def __post_init__(self):
90
+ if self.quant_batch_size == -1:
91
+ self.quant_batch_size = None
92
+ if self.quant_bits and self.quant_method is None:
93
+ raise ValueError('Please specify the quantization method using `--quant_method awq/gptq/bnb`.')
94
+ if self.quant_method and self.quant_bits is None:
95
+ raise ValueError('Please specify `--quant_bits`.')
96
+ if self.quant_method in {'gptq', 'awq'} and self.torch_dtype is None:
97
+ self.torch_dtype = torch.float16
98
+ if self.to_mcore or self.to_hf:
99
+ self.mcore_model = to_abspath(self.mcore_model, check_path_exist=True)
100
+ if not dist.is_initialized():
101
+ set_default_ddp_config()
102
+ init_process_group()
103
+
104
+ BaseArguments.__post_init__(self)
105
+ self._init_output_dir()
106
+ if self.quant_method in {'gptq', 'awq'} and len(self.dataset) == 0:
107
+ raise ValueError(f'self.dataset: {self.dataset}, Please input the quant dataset.')
ms-swift/swift/llm/argument/infer_args.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import datetime as dt
3
+ import os
4
+ from dataclasses import dataclass
5
+ from typing import Literal, Optional, Union
6
+
7
+ import torch.distributed as dist
8
+
9
+ from swift.utils import get_logger, init_process_group, is_dist
10
+ from .base_args import BaseArguments, to_abspath
11
+ from .base_args.model_args import ModelArguments
12
+ from .merge_args import MergeArguments
13
+
14
+ logger = get_logger()
15
+
16
+
17
+ @dataclass
18
+ class LmdeployArguments:
19
+ """
20
+ LmdeployArguments is a dataclass that holds the configuration for lmdeploy.
21
+
22
+ Args:
23
+ tp (int): Tensor parallelism size. Default is 1.
24
+ session_len(Optional[int]): The session length, default None.
25
+ cache_max_entry_count (float): Maximum entry count for cache. Default is 0.8.
26
+ quant_policy (int): Quantization policy, e.g., 4, 8. Default is 0.
27
+ vision_batch_size (int): Maximum batch size in VisionConfig. Default is 1.
28
+ """
29
+
30
+ # lmdeploy
31
+ tp: int = 1
32
+ session_len: Optional[int] = None
33
+ cache_max_entry_count: float = 0.8
34
+ quant_policy: int = 0 # e.g. 4, 8
35
+ vision_batch_size: int = 1 # max_batch_size in VisionConfig
36
+
37
+ def get_lmdeploy_engine_kwargs(self):
38
+ kwargs = {
39
+ 'tp': self.tp,
40
+ 'session_len': self.session_len,
41
+ 'cache_max_entry_count': self.cache_max_entry_count,
42
+ 'quant_policy': self.quant_policy,
43
+ 'vision_batch_size': self.vision_batch_size
44
+ }
45
+ if dist.is_initialized():
46
+ kwargs.update({'devices': [dist.get_rank()]})
47
+ return kwargs
48
+
49
+
50
+ @dataclass
51
+ class VllmArguments:
52
+ """
53
+ VllmArguments is a dataclass that holds the configuration for vllm.
54
+
55
+ Args:
56
+ gpu_memory_utilization (float): GPU memory utilization. Default is 0.9.
57
+ tensor_parallel_size (int): Tensor parallelism size. Default is 1.
58
+ pipeline_parallel_size(int): Pipeline parallelism size. Default is 1.
59
+ max_num_seqs (int): Maximum number of sequences. Default is 256.
60
+ max_model_len (Optional[int]): Maximum model length. Default is None.
61
+ disable_custom_all_reduce (bool): Flag to disable custom all-reduce. Default is False.
62
+ enforce_eager (bool): Flag to enforce eager execution. Default is False.
63
+ limit_mm_per_prompt (Optional[str]): Limit multimedia per prompt. Default is None.
64
+ vllm_max_lora_rank (int): Maximum LoRA rank. Default is 16.
65
+ enable_prefix_caching (bool): Flag to enable automatic prefix caching. Default is False.
66
+ """
67
+ # vllm
68
+ gpu_memory_utilization: float = 0.9
69
+ tensor_parallel_size: int = 1
70
+ pipeline_parallel_size: int = 1
71
+ max_num_seqs: int = 256
72
+ max_model_len: Optional[int] = None
73
+ disable_custom_all_reduce: bool = False
74
+ enforce_eager: bool = False
75
+ limit_mm_per_prompt: Optional[Union[dict, str]] = None # '{"image": 5, "video": 2}'
76
+ vllm_max_lora_rank: int = 16
77
+ enable_prefix_caching: bool = False
78
+ use_async_engine: bool = True
79
+ data_parallel_size: int = 1
80
+ log_level: Literal['critical', 'error', 'warning', 'info', 'debug', 'trace'] = 'info'
81
+ vllm_quantization: Optional[str] = None
82
+
83
+ def __post_init__(self):
84
+ self.limit_mm_per_prompt = ModelArguments.parse_to_dict(self.limit_mm_per_prompt)
85
+
86
+ def get_vllm_engine_kwargs(self):
87
+ adapters = self.adapters
88
+ if hasattr(self, 'adapter_mapping'):
89
+ adapters = adapters + list(self.adapter_mapping.values())
90
+ kwargs = {
91
+ 'gpu_memory_utilization': self.gpu_memory_utilization,
92
+ 'tensor_parallel_size': self.tensor_parallel_size,
93
+ 'pipeline_parallel_size': self.pipeline_parallel_size,
94
+ 'max_num_seqs': self.max_num_seqs,
95
+ 'max_model_len': self.max_model_len,
96
+ 'disable_custom_all_reduce': self.disable_custom_all_reduce,
97
+ 'enforce_eager': self.enforce_eager,
98
+ 'limit_mm_per_prompt': self.limit_mm_per_prompt,
99
+ 'max_lora_rank': self.vllm_max_lora_rank,
100
+ 'enable_lora': len(adapters) > 0,
101
+ 'max_loras': max(len(adapters), 1),
102
+ 'enable_prefix_caching': self.enable_prefix_caching,
103
+ 'quantization': self.vllm_quantization,
104
+ }
105
+ if dist.is_initialized():
106
+ kwargs.update({'device': dist.get_rank()})
107
+ return kwargs
108
+
109
+
110
+ @dataclass
111
+ class InferArguments(MergeArguments, VllmArguments, LmdeployArguments, BaseArguments):
112
+ """
113
+ InferArguments is a dataclass that extends BaseArguments, MergeArguments, VllmArguments, and LmdeployArguments.
114
+ It is used to define the arguments required for model inference.
115
+
116
+ Args:
117
+ ckpt_dir (Optional[str]): Directory to the checkpoint. Default is None.
118
+ infer_backend (Literal): Backend to use for inference. Default is 'pt'.
119
+ Allowed values are 'vllm', 'pt', 'lmdeploy'.
120
+ result_path (Optional[str]): Directory to store inference results. Default is None.
121
+ max_batch_size (int): Maximum batch size for the pt engine. Default is 1.
122
+ val_dataset_sample (Optional[int]): Sample size for validation dataset. Default is None.
123
+ """
124
+ infer_backend: Literal['vllm', 'pt', 'lmdeploy'] = 'pt'
125
+
126
+ result_path: Optional[str] = None
127
+ metric: Literal['acc', 'rouge'] = None
128
+ # for pt engine
129
+ max_batch_size: int = 1
130
+ ddp_backend: Optional[str] = None
131
+
132
+ # only for inference
133
+ val_dataset_sample: Optional[int] = None
134
+
135
+ def _get_result_path(self, folder_name: str) -> str:
136
+ result_dir = self.ckpt_dir or f'result/{self.model_suffix}'
137
+ os.makedirs(result_dir, exist_ok=True)
138
+ result_dir = to_abspath(os.path.join(result_dir, folder_name))
139
+ os.makedirs(result_dir, exist_ok=True)
140
+ time = dt.datetime.now().strftime('%Y%m%d-%H%M%S')
141
+ return os.path.join(result_dir, f'{time}.jsonl')
142
+
143
+ def _init_result_path(self, folder_name: str) -> None:
144
+ if self.result_path is not None:
145
+ self.result_path = to_abspath(self.result_path)
146
+ return
147
+ self.result_path = self._get_result_path(folder_name)
148
+ logger.info(f'args.result_path: {self.result_path}')
149
+
150
+ def _init_stream(self):
151
+ self.eval_human = not (self.dataset and self.split_dataset_ratio > 0 or self.val_dataset)
152
+
153
+ if self.stream and self.num_beams != 1:
154
+ self.stream = False
155
+ logger.info('Setting args.stream: False')
156
+
157
+ def _init_ddp(self):
158
+ if not is_dist():
159
+ return
160
+ assert not self.eval_human and not self.stream, (
161
+ f'args.eval_human: {self.eval_human}, args.stream: {self.stream}')
162
+ self._init_device()
163
+ init_process_group(self.ddp_backend)
164
+
165
+ def __post_init__(self) -> None:
166
+ BaseArguments.__post_init__(self)
167
+ VllmArguments.__post_init__(self)
168
+ self._init_result_path('infer_result')
169
+ self._init_eval_human()
170
+ self._init_stream()
171
+ self._init_ddp()
172
+
173
+ def _init_eval_human(self):
174
+ if len(self.dataset) == 0 and len(self.val_dataset) == 0:
175
+ eval_human = True
176
+ else:
177
+ eval_human = False
178
+ self.eval_human = eval_human
179
+ logger.info(f'Setting args.eval_human: {self.eval_human}')
ms-swift/swift/llm/argument/train_args.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+ from dataclasses import dataclass, field
4
+ from typing import Literal, Optional
5
+
6
+ from transformers import Seq2SeqTrainingArguments
7
+ from transformers.utils.versions import require_version
8
+
9
+ from swift.plugin import LOSS_MAPPING
10
+ from swift.trainers import TrainerFactory
11
+ from swift.trainers.arguments import TrainArgumentsMixin
12
+ from swift.utils import (add_version_to_work_dir, get_device_count, get_logger, get_pai_tensorboard_dir, is_master,
13
+ is_mp, is_pai_training_job, is_swanlab_available)
14
+ from .base_args import BaseArguments, to_abspath
15
+ from .tuner_args import TunerArguments
16
+
17
+ logger = get_logger()
18
+
19
+
20
+ @dataclass
21
+ class Seq2SeqTrainingOverrideArguments(TrainArgumentsMixin, Seq2SeqTrainingArguments):
22
+ """Override the default value in `Seq2SeqTrainingArguments`"""
23
+ output_dir: Optional[str] = None
24
+ learning_rate: Optional[float] = None
25
+ eval_strategy: Optional[str] = None # steps, epoch
26
+ fp16: Optional[bool] = None
27
+ bf16: Optional[bool] = None
28
+
29
+ def _init_output_dir(self):
30
+ if self.output_dir is None:
31
+ self.output_dir = f'output/{self.model_suffix}'
32
+ self.output_dir = to_abspath(self.output_dir)
33
+
34
+ def _init_eval_strategy(self):
35
+ if self.eval_strategy is None:
36
+ self.eval_strategy = self.save_strategy
37
+ if self.eval_strategy == 'no':
38
+ self.eval_steps = None
39
+ self.split_dataset_ratio = 0.
40
+ logger.info(f'Setting args.split_dataset_ratio: {self.split_dataset_ratio}')
41
+ elif self.eval_strategy == 'steps' and self.eval_steps is None:
42
+ self.eval_steps = self.save_steps
43
+ self.evaluation_strategy = self.eval_strategy
44
+
45
+ def _init_metric_for_best_model(self):
46
+ if self.metric_for_best_model is None:
47
+ self.metric_for_best_model = 'rouge-l' if self.predict_with_generate else 'loss'
48
+
49
+ def __post_init__(self):
50
+ self._init_output_dir()
51
+ self._init_metric_for_best_model()
52
+ if self.greater_is_better is None and self.metric_for_best_model is not None:
53
+ self.greater_is_better = 'loss' not in self.metric_for_best_model
54
+
55
+ if self.learning_rate is None:
56
+ if self.train_type == 'full':
57
+ self.learning_rate = 1e-5
58
+ else:
59
+ self.learning_rate = 1e-4
60
+ self._init_eval_strategy()
61
+
62
+
63
+ @dataclass
64
+ class SwanlabArguments:
65
+
66
+ swanlab_token: Optional[str] = None
67
+ swanlab_project: Optional[str] = None
68
+ swanlab_workspace: Optional[str] = None
69
+ swanlab_exp_name: Optional[str] = None
70
+ swanlab_mode: Literal['cloud', 'local'] = 'cloud'
71
+
72
+ def _init_swanlab(self):
73
+ if not is_swanlab_available():
74
+ raise ValueError('You are using swanlab as `report_to`, please install swanlab by ' '`pip install swanlab`')
75
+ if not self.swanlab_exp_name:
76
+ self.swanlab_exp_name = self.output_dir
77
+ from transformers.integrations import INTEGRATION_TO_CALLBACK
78
+ import swanlab
79
+ from swanlab.integration.transformers import SwanLabCallback
80
+ if self.swanlab_token:
81
+ swanlab.login(self.swanlab_token)
82
+ INTEGRATION_TO_CALLBACK['swanlab'] = SwanLabCallback(
83
+ project=self.swanlab_project,
84
+ workspace=self.swanlab_workspace,
85
+ experiment_name=self.swanlab_exp_name,
86
+ config={'UPPERFRAME': '🐦‍⬛ms-swift'},
87
+ mode=self.swanlab_mode,
88
+ )
89
+
90
+
91
+ @dataclass
92
+ class TrainArguments(SwanlabArguments, TunerArguments, Seq2SeqTrainingOverrideArguments, BaseArguments):
93
+ """
94
+ TrainArguments class is a dataclass that inherits from multiple argument classes:
95
+ TunerArguments, Seq2SeqTrainingOverrideArguments, and BaseArguments.
96
+
97
+ Args:
98
+ add_version (bool): Flag to add version information to output_dir. Default is True.
99
+ resume_only_model (bool): Flag to resume training only the model. Default is False.
100
+ loss_type (Optional[str]): Type of loss function to use. Default is None.
101
+ packing (bool): Flag to enable packing of datasets. Default is False.
102
+ lazy_tokenize (Optional[bool]): Flag to enable lazy tokenization. Default is None.
103
+ max_new_tokens (int): Maximum number of new tokens to generate. Default is 64.
104
+ temperature (float): Temperature for sampling. Default is 0.
105
+ optimizer (Optional[str]): Optimizer type to use, define it in the plugin package. Default is None.
106
+ metric (Optional[str]): Metric to use for evaluation, define it in the plugin package. Default is None.
107
+ """
108
+ add_version: bool = True
109
+ resume_only_model: bool = False
110
+ create_checkpoint_symlink: bool = False
111
+
112
+ # dataset
113
+ packing: bool = False
114
+ lazy_tokenize: Optional[bool] = None
115
+
116
+ # plugin
117
+ loss_type: Optional[str] = field(default=None, metadata={'help': f'loss_func choices: {list(LOSS_MAPPING.keys())}'})
118
+ optimizer: Optional[str] = None
119
+ metric: Optional[str] = None
120
+
121
+ # extra
122
+ max_new_tokens: int = 64
123
+ temperature: float = 0.
124
+ load_args: bool = False
125
+
126
+ # zero++
127
+ zero_hpz_partition_size: Optional[int] = None
128
+
129
+ def _init_lazy_tokenize(self):
130
+ if self.streaming and self.lazy_tokenize:
131
+ self.lazy_tokenize = False
132
+ logger.warning('Streaming and lazy_tokenize are incompatible. '
133
+ f'Setting args.lazy_tokenize: {self.lazy_tokenize}.')
134
+ if self.lazy_tokenize is None:
135
+ self.lazy_tokenize = self.model_meta.is_multimodal and not self.streaming
136
+ logger.info(f'Setting args.lazy_tokenize: {self.lazy_tokenize}')
137
+
138
+ def __post_init__(self) -> None:
139
+ if self.packing and self.attn_impl != 'flash_attn':
140
+ raise ValueError('The "packing" feature needs to be used in conjunction with "flash_attn". '
141
+ 'Please specify `--attn_impl flash_attn`.')
142
+ if self.resume_from_checkpoint:
143
+ self.resume_from_checkpoint = to_abspath(self.resume_from_checkpoint, True)
144
+ if self.resume_only_model:
145
+ if self.train_type == 'full':
146
+ self.model = self.resume_from_checkpoint
147
+ else:
148
+ self.adapters = [self.resume_from_checkpoint]
149
+ BaseArguments.__post_init__(self)
150
+ Seq2SeqTrainingOverrideArguments.__post_init__(self)
151
+ TunerArguments.__post_init__(self)
152
+
153
+ if self.optimizer is None:
154
+ if self.lorap_lr_ratio:
155
+ self.optimizer = 'lorap'
156
+ elif self.use_galore:
157
+ self.optimizer = 'galore'
158
+
159
+ if len(self.dataset) == 0:
160
+ raise ValueError(f'self.dataset: {self.dataset}, Please input the training dataset.')
161
+
162
+ self._handle_pai_compat()
163
+
164
+ self._init_deepspeed()
165
+ self._init_device()
166
+ self._init_lazy_tokenize()
167
+
168
+ if getattr(self, 'accelerator_config', None) is None:
169
+ self.accelerator_config = {'dispatch_batches': False}
170
+ self.training_args = TrainerFactory.get_training_args(self)
171
+ self.training_args.remove_unused_columns = False
172
+
173
+ self._add_version()
174
+
175
+ if 'swanlab' in self.report_to:
176
+ self._init_swanlab()
177
+
178
+ def _init_deepspeed(self):
179
+ if self.deepspeed:
180
+ require_version('deepspeed')
181
+ if is_mp():
182
+ raise ValueError('DeepSpeed is not compatible with `device_map`. '
183
+ f'n_gpu: {get_device_count()}, '
184
+ f'local_world_size: {self.local_world_size}.')
185
+
186
+ ds_config_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'ds_config'))
187
+ deepspeed_mapping = {
188
+ name: f'{name}.json'
189
+ for name in ['zero0', 'zero1', 'zero2', 'zero3', 'zero2_offload', 'zero3_offload']
190
+ }
191
+ for ds_name, ds_config in deepspeed_mapping.items():
192
+ if self.deepspeed == ds_name:
193
+ self.deepspeed = os.path.join(ds_config_folder, ds_config)
194
+ break
195
+
196
+ self.deepspeed = self.parse_to_dict(self.deepspeed)
197
+ if self.zero_hpz_partition_size is not None:
198
+ assert 'zero_optimization' in self.deepspeed
199
+ self.deepspeed['zero_optimization']['zero_hpz_partition_size'] = self.zero_hpz_partition_size
200
+ logger.warn('If `zero_hpz_partition_size`(ZeRO++) causes grad_norm NaN, please'
201
+ ' try `--torch_dtype float16`')
202
+ logger.info(f'Using deepspeed: {self.deepspeed}')
203
+
204
+ def _handle_pai_compat(self) -> None:
205
+ if not is_pai_training_job():
206
+ return
207
+
208
+ logger.info('Handle pai compat...')
209
+ pai_tensorboard_dir = get_pai_tensorboard_dir()
210
+ if self.logging_dir is None and pai_tensorboard_dir is not None:
211
+ self.logging_dir = pai_tensorboard_dir
212
+ logger.info(f'Setting args.logging_dir: {self.logging_dir}')
213
+ self.add_version = False
214
+ logger.info(f'Setting args.add_version: {self.add_version}')
215
+
216
+ def _add_version(self):
217
+ """Prepare the output_dir"""
218
+ if self.add_version:
219
+ self.output_dir = add_version_to_work_dir(self.output_dir)
220
+ logger.info(f'output_dir: {self.output_dir}')
221
+
222
+ if self.logging_dir is None:
223
+ self.logging_dir = f'{self.output_dir}/runs'
224
+
225
+ self.logging_dir = to_abspath(self.logging_dir)
226
+ if is_master():
227
+ os.makedirs(self.output_dir, exist_ok=True)
228
+
229
+ if self.run_name is None:
230
+ self.run_name = self.output_dir
231
+
232
+ self.training_args.output_dir = self.output_dir
233
+ self.training_args.run_name = self.run_name
234
+ self.training_args.logging_dir = self.logging_dir
ms-swift/swift/llm/argument/tuner_args.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Literal, Optional
4
+
5
+ from transformers.utils import strtobool
6
+
7
+ from swift.llm import get_model_arch
8
+ from swift.utils import get_logger
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ @dataclass
14
+ class TunerArguments:
15
+ """
16
+ TunerArguments is a dataclass that holds configuration for various tuners.
17
+
18
+ Args:
19
+ target_modules (List[str]): List of target modules for tuning. Default is ['all-linear'].
20
+ target_regex (Optional[str]): Regular expression to match target modules. Default is None.
21
+ modules_to_save (List[str]): List of modules to save. Default is an empty list.
22
+
23
+ lora_rank (int): Rank for LoRA. Default is 8.
24
+ lora_alpha (int): Alpha value for LoRA. Default is 32.
25
+ lora_dropout (float): Dropout rate for LoRA. Default is 0.05.
26
+ Allowed values are 'none', 'all'.
27
+ lora_dtype (Literal): Data type for LoRA. Default is 'AUTO'. Allowed values are 'fp16', 'bf16', 'fp32', 'AUTO'.
28
+ lorap_lr_ratio (float): Learning rate ratio for LoRA. Default is None.
29
+ use_rslora (bool): Flag to indicate if RSLora is used. Default is False.
30
+ use_dora (bool): Flag to indicate if Dora is used. Default is False.
31
+ init_weights (str): Initialization method for weights of supported tuners. Default is 'true'.
32
+ lora_ga_batch_size (int): Batch size used for estimating gradients during initialization in LoRA-GA.
33
+ Default value is 2.
34
+ lora_ga_iters (int): Number of iterations for estimating gradients during initialization in LoRA-GA.
35
+ Default value is 2.
36
+ lora_ga_max_length (int): Maximum input length for estimating gradients during initialization in LoRA-GA.
37
+ Default value is 1024.
38
+ lora_ga_direction (str): Initial direction used for gradient estimation during initialization in LoRA-GA.
39
+ Default value is `ArB2r`. Allowed: `ArBr`, `A2rBr`, `ArB2r`, and `random`.
40
+ lora_ga_scale (str): The scaling method for initialization in LoRA-GA.
41
+ Default value is `stable`. Allowed values are: `gd`, `unit`, `stable`, and `weightS`.
42
+ lora_ga_stable_gamma (int): The gamma value when choosing `stable` scaling for initialization.
43
+ Default value is 16.
44
+
45
+ fourier_n_frequency (int): Number of frequencies for FourierFT. Default is 2000.
46
+ fourier_scaling (float): Scaling factor for FourierFT. Default is 300.0.
47
+
48
+ boft_block_size (int): Block size for BOFT. Default is 4.
49
+ boft_block_num (int): Number of blocks for BOFT. Default is 0.
50
+ boft_n_butterfly_factor (int): Butterfly factor for BOFT. Default is 1.
51
+ boft_dropout (float): Dropout rate for BOFT. Default is 0.0.
52
+
53
+ vera_rank (int): Rank for Vera. Default is 256.
54
+ vera_projection_prng_key (int): PRNG key for Vera projection. Default is 0.
55
+ vera_dropout (float): Dropout rate for Vera. Default is 0.0.
56
+ vera_d_initial (float): Initial value for Vera D. Default is 0.1.
57
+
58
+ adapter_act (str): Activation function for adapter. Default is 'gelu'.
59
+ adapter_length (int): Length of the adapter. Default is 128.
60
+
61
+ use_galore (bool): Flag to indicate if Galore is used. Default is False.
62
+ galore_target_modules (Optional[List[str]]): List of target modules for Galore. Default is None.
63
+ galore_rank (int): Rank for Galore. Default is 128.
64
+ galore_update_proj_gap (int): Update projection gap for Galore. Default is 50.
65
+ galore_scale (float): Scaling factor for Galore. Default is 1.0.
66
+ galore_proj_type (str): Projection type for Galore. Default is 'std'.
67
+ galore_optim_per_parameter (bool): Flag to indicate if optimization is per parameter for Galore.
68
+ Default is False.
69
+ galore_with_embedding (bool): Flag to indicate if embedding is used with Galore. Default is False.
70
+ galore_quantization (bool): Flag to indicate if use Q-Galore. Default is False.
71
+ galore_proj_quant (bool): Flag to indicate if projection quantization is used for Galore. Default is False.
72
+ galore_proj_bits (int): Number of bits for projection quantization. Default is 4.
73
+ galore_proj_group_size (int): Group size for projection quantization. Default is 256.
74
+ galore_cos_threshold (float): Cosine threshold for projection quantization. Default is 0.4.
75
+ galore_gamma_proj (int): Gamma for projection quantization. Default is 2.
76
+ galore_queue_size (int): Queue size for projection quantization. Default is 5.
77
+
78
+ adalora_target_r (int): Target rank for AdaLoRA. Default is 8.
79
+ adalora_init_r (int): Initial rank for AdaLoRA. Default is 12.
80
+ adalora_tinit (int): Initial T value for AdaLoRA. Default is 100.
81
+ adalora_tfinal (int): Final T value for AdaLoRA. Default is 1000.
82
+ adalora_deltaT (int): Delta T value for AdaLoRA. Default is 10.
83
+ adalora_beta1 (float): Beta1 value for AdaLoRA. Default is 0.85.
84
+ adalora_beta2 (float): Beta2 value for AdaLoRA. Default is 0.85.
85
+ adalora_orth_reg_weight (float): Orthogonal regularization weight for AdaLoRA. Default is 0.5.
86
+
87
+ llamapro_num_new_blocks (int): Number of new blocks for LLaMAPro. Default is 4.
88
+ llamapro_num_groups (Optional[int]): Number of groups for LLaMAPro. Default is None.
89
+
90
+ lisa_activated_layers (int): Number of activated layers for LISA. Default is 0.
91
+ lisa_step_interval (int): Step interval for LISA activation. Default is 20.
92
+
93
+ reft_layer_key (Optional[str]): Key identifier for ReFT layer. Default is None.
94
+ reft_layers (Optional[List[int]]): List of layers involved in ReFT. Default is None.
95
+ reft_rank (int): Rank parameter for ReFT. Default is 4.
96
+ reft_intervention_type (Literal): Type of intervention for ReFT. Default is 'LoreftIntervention'.
97
+ reft_args (Optional[str]): Additional arguments for ReFT. Default is None.
98
+ """
99
+ # full
100
+ freeze_parameters: List[str] = field(default_factory=list)
101
+ freeze_parameters_regex: Optional[str] = None
102
+ freeze_parameters_ratio: float = 0. # 0 ~ 1
103
+ trainable_parameters: List[str] = field(default_factory=list)
104
+ trainable_parameters_regex: Optional[str] = None
105
+ # lora or full
106
+ freeze_llm: bool = False
107
+ freeze_vit: bool = True
108
+ freeze_aligner: bool = True
109
+ # tuners
110
+ target_modules: List[str] = field(default_factory=lambda: ['all-linear'])
111
+ target_regex: Optional[str] = None
112
+ # e.g. ['wte', 'ln_1', 'ln_2', 'ln_f', 'lm_head']
113
+ modules_to_save: List[str] = field(default_factory=list)
114
+
115
+ # lora
116
+ lora_rank: int = 8
117
+ lora_alpha: int = 32
118
+ lora_dropout: float = 0.05
119
+ lora_bias: Literal['none', 'all'] = 'none'
120
+ lora_dtype: Literal['float16', 'bfloat16', 'float32', None] = None
121
+ lorap_lr_ratio: Optional[float] = None
122
+ use_rslora: bool = False
123
+ use_dora: bool = False
124
+ # Lora: Literal['gaussian', 'pissa', 'pissa_niter_[number of iters]', 'olora', 'loftq', 'true', 'false', 'lora-ga']
125
+ lora_ga_batch_size: int = 2
126
+ lora_ga_iters: int = 2
127
+ lora_ga_max_length: int = 1024
128
+ lora_ga_direction: str = 'ArB2r'
129
+ lora_ga_scale: str = 'stable'
130
+ lora_ga_stable_gamma: int = 16
131
+
132
+ # Bone: Literal['bat', 'true', 'false']
133
+ init_weights: str = 'true'
134
+
135
+ # fourierft
136
+ fourier_n_frequency: int = 2000
137
+ fourier_scaling: float = 300.0
138
+
139
+ # BOFT
140
+ boft_block_size: int = 4
141
+ boft_block_num: int = 0
142
+ boft_n_butterfly_factor: int = 1
143
+ boft_dropout: float = 0.0
144
+
145
+ # Vera
146
+ vera_rank: int = 256
147
+ vera_projection_prng_key: int = 0
148
+ vera_dropout: float = 0.0
149
+ vera_d_initial: float = 0.1
150
+
151
+ # adapter
152
+ adapter_act: str = 'gelu'
153
+ adapter_length: int = 128
154
+
155
+ # galore
156
+ use_galore: bool = False
157
+ galore_target_modules: Optional[List[str]] = None
158
+ galore_rank: int = 128
159
+ galore_update_proj_gap: int = 50
160
+ galore_scale: float = 1.0
161
+ galore_proj_type: str = 'std'
162
+ galore_optim_per_parameter: bool = False
163
+ galore_with_embedding: bool = False
164
+ galore_quantization: bool = False
165
+ galore_proj_quant: bool = False
166
+ galore_proj_bits: int = 4
167
+ galore_proj_group_size: int = 256
168
+ galore_cos_threshold: float = 0.4
169
+ galore_gamma_proj: int = 2
170
+ galore_queue_size: int = 5
171
+
172
+ # adalora
173
+ adalora_target_r: int = 8
174
+ adalora_init_r: int = 12
175
+ adalora_tinit: int = 0
176
+ adalora_tfinal: int = 0
177
+ adalora_deltaT: int = 1
178
+ adalora_beta1: float = 0.85
179
+ adalora_beta2: float = 0.85
180
+ adalora_orth_reg_weight: float = 0.5
181
+
182
+ # llamapro
183
+ llamapro_num_new_blocks: int = 4
184
+ llamapro_num_groups: Optional[int] = None
185
+
186
+ # lisa
187
+ lisa_activated_layers: int = 0
188
+ lisa_step_interval: int = 20
189
+
190
+ # reft
191
+ reft_layer_key: Optional[str] = None
192
+ reft_layers: Optional[List[int]] = None
193
+ reft_rank: int = 4
194
+ reft_intervention_type: Literal['NoreftIntervention', 'LoreftIntervention', 'ConsreftIntervention',
195
+ 'LobireftIntervention', 'DireftIntervention',
196
+ 'NodireftIntervention'] = 'LoreftIntervention'
197
+ reft_args: Optional[str] = None
198
+
199
+ def __post_init__(self):
200
+ if isinstance(self.init_weights, str) and self.init_weights.lower() in {'true', 'false'}:
201
+ self.init_weights = bool(strtobool(self.init_weights))
202
+ self._init_multimodal_full()
203
+ if self.target_regex:
204
+ self.target_modules = self.target_regex
205
+
206
+ def _init_multimodal_full(self):
207
+ model_arch = get_model_arch(self.model_meta.model_arch)
208
+ if not self.model_meta.is_multimodal or not model_arch:
209
+ return
210
+ if self.freeze_llm:
211
+ self.freeze_parameters += model_arch.language_model
212
+ if self.freeze_vit:
213
+ self.freeze_parameters += model_arch.vision_tower
214
+ if self.freeze_aligner:
215
+ self.freeze_parameters += model_arch.aligner
216
+ else:
217
+ self.trainable_parameters += model_arch.aligner
218
+ self.freeze_parameters += model_arch.generator
219
+ if self.freeze_parameters:
220
+ logger.info(f'freeze_parameters: {self.freeze_parameters}')
221
+ if self.trainable_parameters:
222
+ logger.info(f'additional trainable_parameters: {self.trainable_parameters}')
ms-swift/swift/llm/dataset/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.41 kB). View file
 
ms-swift/swift/llm/dataset/dataset/mllm.py ADDED
@@ -0,0 +1,1215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import ast
3
+ import os
4
+ from typing import Any, Dict, Optional
5
+
6
+ import numpy as np
7
+ from datasets import Dataset as HfDataset
8
+ from datasets import IterableDataset as HfIterableDataset
9
+ from tqdm import tqdm
10
+
11
+ from ..media import MediaResource
12
+ from ..preprocessor import GroundingMixin, MessagesPreprocessor, ResponsePreprocessor, RowPreprocessor
13
+ from ..register import DatasetMeta, SubsetDataset, register_dataset
14
+
15
+
16
+ class ShareGPT4oPreprocessor(MessagesPreprocessor):
17
+
18
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
19
+ row = super().preprocess(row)
20
+ image = row['images']
21
+ if not image:
22
+ return
23
+ image = os.path.join(self.prefix_path, image)
24
+ if not os.path.exists(image):
25
+ return
26
+ row['images'] = [image]
27
+ return row
28
+
29
+ def prepare_dataset(self, dataset):
30
+ url = ('https://www.modelscope.cn/api/v1/datasets/AI-ModelScope/ShareGPT-4o/repo?'
31
+ 'Revision=master&FilePath=images.zip')
32
+ local_dir = MediaResource.download(url, 'sharegpt_4o_images')
33
+ self.prefix_path = os.path.join(local_dir, 'mnt', 'petrelfs', 'wangwenhai', 'workspace_cef', '4o', 'image')
34
+ return super().prepare_dataset(dataset)
35
+
36
+
37
+ register_dataset(
38
+ DatasetMeta(
39
+ ms_dataset_id='AI-ModelScope/ShareGPT-4o',
40
+ hf_dataset_id='OpenGVLab/ShareGPT-4o',
41
+ preprocess_func=ShareGPT4oPreprocessor(),
42
+ subsets=['image_caption'],
43
+ split=['images'],
44
+ tags=['vqa', 'multi-modal'],
45
+ ))
46
+
47
+
48
+ class GPT4vDataset(ResponsePreprocessor):
49
+
50
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
51
+ row['query'] = 'What is the caption of this image?'
52
+ return super().preprocess(row)
53
+
54
+
55
+ register_dataset(
56
+ DatasetMeta(
57
+ ms_dataset_id='swift/gpt4v-dataset',
58
+ hf_dataset_id='laion/gpt4v-dataset',
59
+ preprocess_func=GPT4vDataset(columns={
60
+ 'link': 'images',
61
+ 'caption': 'response'
62
+ }),
63
+ split=['train'],
64
+ tags=['en', 'caption', 'multi-modal', 'quality'],
65
+ huge_dataset=True,
66
+ ))
67
+
68
+ register_dataset(
69
+ DatasetMeta(
70
+ ms_dataset_id='swift/RLAIF-V-Dataset',
71
+ hf_dataset_id='openbmb/RLAIF-V-Dataset',
72
+ preprocess_func=ResponsePreprocessor(columns={
73
+ 'question': 'query',
74
+ 'chosen': 'response',
75
+ 'rejected': 'rejected_response'
76
+ }),
77
+ tags=['rlhf', 'dpo', 'multi-modal', 'en'],
78
+ ))
79
+
80
+
81
+ class GarbagePreprocessor(ResponsePreprocessor):
82
+
83
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
84
+ row['query'] = 'Task: Classify household waste.'
85
+ return super().preprocess(row)
86
+
87
+
88
+ register_dataset(
89
+ DatasetMeta(
90
+ ms_dataset_id='tany0699/garbage265',
91
+ preprocess_func=GarbagePreprocessor(columns={
92
+ 'category': 'label',
93
+ 'image:FILE': 'images'
94
+ }),
95
+ tags=['cls', '🔥', 'multi-modal'],
96
+ ))
97
+
98
+
99
+ class SA1BPairedCaptionPreprocessor(RowPreprocessor):
100
+
101
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
102
+ prompt = ['图片中展示了什么', '讲述一下图片中内容', '告诉我里面有什么', '图片内容是啥']
103
+ response = row['global_caption']
104
+ query = np.random.choice(prompt)
105
+ return {
106
+ 'messages': [{
107
+ 'role': 'user',
108
+ 'content': query,
109
+ }, {
110
+ 'role': 'assistant',
111
+ 'content': response,
112
+ }]
113
+ }
114
+
115
+
116
+ register_dataset(
117
+ DatasetMeta(
118
+ ms_dataset_id='Tongyi-DataEngine/SA1B-Paired-Captions-Images',
119
+ preprocess_func=SA1BPairedCaptionPreprocessor(columns={
120
+ 'opensource_url': 'images',
121
+ }),
122
+ tags=['zh', 'multi-modal', 'vqa'],
123
+ ))
124
+
125
+
126
+ class SA1BDenseCaptionPreprocessor(RowPreprocessor):
127
+ column_mapping = {
128
+ 'url': 'images',
129
+ }
130
+
131
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
132
+ prompt = ['图片中展示了什么', '讲述一下图片中内容', '告诉我里面有什么', '图片内容是啥']
133
+ response = ast.literal_eval(row['cap_seg'])
134
+ response = response.get('global_caption')
135
+ query = np.random.choice(prompt)
136
+ return {
137
+ 'messages': [{
138
+ 'role': 'user',
139
+ 'content': query,
140
+ }, {
141
+ 'role': 'assistant',
142
+ 'content': response,
143
+ }]
144
+ }
145
+
146
+
147
+ register_dataset(
148
+ DatasetMeta(
149
+ ms_dataset_id='Tongyi-DataEngine/SA1B-Dense-Caption',
150
+ preprocess_func=SA1BDenseCaptionPreprocessor(columns={
151
+ 'url': 'images',
152
+ }),
153
+ tags=['zh', 'multi-modal', 'vqa'],
154
+ huge_dataset=True,
155
+ ))
156
+
157
+
158
+ class COCO2014Preprocess(ResponsePreprocessor):
159
+
160
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
161
+ caption = row['caption']
162
+ if '&&' in caption:
163
+ caption = caption.split('&&')[0]
164
+ row['query'] = 'please describe the image.'
165
+ row['response'] = caption
166
+
167
+ return super().preprocess(row)
168
+
169
+
170
+ register_dataset(
171
+ DatasetMeta(
172
+ ms_dataset_id='modelscope/coco_2014_caption',
173
+ preprocess_func=COCO2014Preprocess(),
174
+ subsets=[
175
+ SubsetDataset('train', 'coco_2014_caption', ['train']),
176
+ SubsetDataset('validation', 'coco_2014_caption', ['validation']),
177
+ ],
178
+ tags=['chat', 'multi-modal', 'vision', '🔥'],
179
+ ))
180
+
181
+
182
+ class MantisPreprocessor(MessagesPreprocessor):
183
+
184
+ def __init__(self, *, subset: str, columns: Optional[Dict[str, str]] = None) -> None:
185
+ self.subset = subset
186
+ super().__init__(columns=columns)
187
+
188
+ def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
189
+ url = (f'https://www.modelscope.cn/api/v1/datasets/swift/Mantis-Instruct/repo?Revision='
190
+ f'master&FilePath={self.subset}/train_images.zip') # noqa
191
+ self.local_dir = MediaResource.download(url, f'mantis_{self.subset}')
192
+ return super().prepare_dataset(dataset)
193
+
194
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
195
+ images = [os.path.join(self.local_dir, p['path']) for p in row['images']]
196
+ if not all([os.path.exists(d) for d in images]):
197
+ images = []
198
+
199
+ if not images:
200
+ return
201
+ row['images'] = images
202
+ return super().preprocess(row)
203
+
204
+
205
+ mantis_subsets_name = [
206
+ 'birds-to-words', 'chartqa', 'coinstruct', 'contrastive_caption', 'docvqa', 'dreamsim', 'dvqa', 'iconqa',
207
+ 'imagecode', 'llava_665k_multi', 'lrv_multi', 'multi_vqa', 'nextqa', 'nlvr2', 'spot-the-diff', 'star',
208
+ 'visual_story_telling'
209
+ ]
210
+
211
+ _mantis_subsets = []
212
+ for subset in mantis_subsets_name:
213
+ _subset = SubsetDataset(subset=subset, split=['train'], preprocess_func=MantisPreprocessor(subset=subset))
214
+ _mantis_subsets.append(_subset)
215
+
216
+ register_dataset(
217
+ DatasetMeta(
218
+ ms_dataset_id='swift/Mantis-Instruct',
219
+ subsets=_mantis_subsets,
220
+ tags=['chat', 'multi-modal', 'vision'],
221
+ ))
222
+
223
+
224
+ class LLaVADataPreprocessor(MessagesPreprocessor):
225
+
226
+ def prepare_dataset(self, dataset):
227
+ self.all_folders = {}
228
+ for media_type in ['coco', 'gqa', 'ocr_vqa', 'textvqa', 'VG_100K', 'VG_100K_2']:
229
+ self.all_folders[media_type] = MediaResource.download(media_type)
230
+ return super().prepare_dataset(dataset)
231
+
232
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
233
+ if not row['images']:
234
+ return
235
+ row = super().preprocess(row)
236
+ images = [p['path'] for p in row['images']]
237
+ new_images = []
238
+ for image in images:
239
+ if 'coco/' in image:
240
+ image = os.path.join(self.all_folders['coco'], image.replace('coco/', ''))
241
+ elif 'gqa/' in image:
242
+ image = os.path.join(self.all_folders['gqa'], image.replace('gqa/', ''))
243
+ elif 'ocr_vqa/' in image:
244
+ image = os.path.join(self.all_folders['ocr_vqa'], image)
245
+ elif 'textvqa/' in image:
246
+ image = os.path.join(self.all_folders['textvqa'], image.replace('textvqa/', ''))
247
+ elif 'VG_100K/' in image:
248
+ image = os.path.join(self.all_folders['VG_100K'], image.replace('vg/', ''))
249
+ elif 'VG_100K_2/' in image:
250
+ image = os.path.join(self.all_folders['VG_100K_2'], image.replace('vg/', ''))
251
+ new_images.append(image)
252
+ if all(os.path.exists(image) for image in new_images):
253
+ row['images'] = new_images
254
+ else:
255
+ return {'images': None}
256
+ return row
257
+
258
+
259
+ register_dataset(
260
+ DatasetMeta(
261
+ ms_dataset_id='swift/llava-data',
262
+ hf_dataset_id='TIGER-Lab/llava-data',
263
+ subsets=['llava_instruct'],
264
+ preprocess_func=LLaVADataPreprocessor(),
265
+ tags=['sft', 'multi-modal', 'quality'],
266
+ ))
267
+
268
+
269
+ class PixelProsePreprocessor(RowPreprocessor):
270
+
271
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
272
+ caption_prompt = [
273
+ 'Give the description of this image.', 'Describe this picture', 'What is the proper title of this image?'
274
+ ]
275
+ vlm_caption = row['vlm_caption']
276
+ if vlm_caption.startswith('This image displays:'):
277
+ vlm_caption = vlm_caption[len('This image displays:'):].strip()
278
+ return {
279
+ 'messages': [{
280
+ 'role': 'user',
281
+ 'content': np.random.choice(caption_prompt)
282
+ }, {
283
+ 'role': 'assistant',
284
+ 'content': vlm_caption
285
+ }],
286
+ 'images':
287
+ row['url']
288
+ }
289
+
290
+
291
+ register_dataset(
292
+ DatasetMeta(
293
+ ms_dataset_id='swift/pixelprose',
294
+ hf_dataset_id='tomg-group-umd/pixelprose',
295
+ preprocess_func=PixelProsePreprocessor(),
296
+ split=['train', 'cc12m', 'commonpool', 'redcaps'],
297
+ tags=['caption', 'multi-modal', 'vision'],
298
+ huge_dataset=True,
299
+ ))
300
+
301
+
302
+ class AIShell1Preprocessor(ResponsePreprocessor):
303
+
304
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
305
+ row['query'] = '语音转文本'
306
+ row['response'] = row['Text:LABEL'].replace(' ', '')
307
+ return super().preprocess(row)
308
+
309
+
310
+ register_dataset(
311
+ DatasetMeta(
312
+ ms_dataset_id='speech_asr/speech_asr_aishell1_trainsets',
313
+ subsets=[
314
+ SubsetDataset('train', split=['train']),
315
+ SubsetDataset('validation', split=['validation']),
316
+ SubsetDataset('test', split=['test']),
317
+ ],
318
+ preprocess_func=AIShell1Preprocessor(columns={'Audio:FILE': 'audios'}),
319
+ tags=['chat', 'multi-modal', 'audio'],
320
+ ))
321
+
322
+
323
+ class EmoSchemaPreprocessor(ResponsePreprocessor):
324
+
325
+ def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
326
+ for i in range(1, 6):
327
+ url = f'https://modelscope.cn/datasets/AI-ModelScope/egoschema/resolve/master/videos_chunked_0{i}.zip'
328
+ local_dir = MediaResource.download(url, 'egoschema')
329
+
330
+ self.local_dir = os.path.join(local_dir, 'videos')
331
+ self.mp4_set = [file[:-4] for file in os.listdir(self.local_dir) if file.endswith('mp4')]
332
+ return super().prepare_dataset(dataset)
333
+
334
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
335
+ if row['video_idx'] not in self.mp4_set:
336
+ return None
337
+ transfer_to_option = {
338
+ '0': 'A',
339
+ '1': 'B',
340
+ '2': 'C',
341
+ '3': 'D',
342
+ '4': 'E',
343
+ }
344
+ row = {
345
+ 'query': row['query'] + '\n' + '\n'.join(row['option']),
346
+ 'response': transfer_to_option[row['response']],
347
+ 'videos': [os.path.join(self.local_dir, f"{row['video_idx']}.mp4")],
348
+ }
349
+ return super().preprocess(row)
350
+
351
+
352
+ class EmoSchemaClsPreprocessor(EmoSchemaPreprocessor):
353
+
354
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
355
+ if row['video_idx'] not in self.mp4_set:
356
+ return None
357
+ row = {
358
+ 'query': row['query'] + '\n' + '\n'.join(row['option']),
359
+ 'label': int(row['response']),
360
+ 'videos': [os.path.join(self.local_dir, f"{row['video_idx']}.mp4")],
361
+ }
362
+ return ResponsePreprocessor.preprocess(self, row)
363
+
364
+
365
+ register_dataset(
366
+ DatasetMeta(
367
+ ms_dataset_id='AI-ModelScope/egoschema',
368
+ hf_dataset_id='lmms-lab/egoschema',
369
+ subsets=[
370
+ SubsetDataset('default', 'Subset', preprocess_func=EmoSchemaPreprocessor()),
371
+ SubsetDataset('cls', 'Subset', preprocess_func=EmoSchemaClsPreprocessor())
372
+ ],
373
+ split=['test'],
374
+ tags=['chat', 'multi-modal', 'video'],
375
+ ))
376
+
377
+
378
+ def _generate_url_list(_url, _range):
379
+ lst = []
380
+ for i in range(1, (_range + 1)):
381
+ lst.append(_url.replace('{}', str(i)))
382
+ return lst
383
+
384
+
385
+ class LLaVAVideo178KPreprocessor(MessagesPreprocessor):
386
+
387
+ def __init__(self, *, subset: str, columns: Optional[Dict[str, str]] = None) -> None:
388
+ self.subset = subset
389
+ super().__init__(columns=columns)
390
+
391
+ video_resources = {
392
+ '0_30_s_academic_v0_1':
393
+ _generate_url_list(
394
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
395
+ '0_30_s_academic_v0_1/0_30_s_academic_v0_1_videos_{}.tar.gz',
396
+ 8,
397
+ ),
398
+ '0_30_s_youtube_v0_1':
399
+ _generate_url_list(
400
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
401
+ '0_30_s_youtube_v0_1/0_30_s_youtube_v0_1_videos_{}.tar.gz',
402
+ 19,
403
+ ),
404
+ '1_2_m_academic_v0_1':
405
+ _generate_url_list(
406
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
407
+ '1_2_m_academic_v0_1/1_2_m_academic_v0_1_videos_{}.tar.gz',
408
+ 14,
409
+ ),
410
+ '1_2_m_youtube_v0_1':
411
+ _generate_url_list(
412
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
413
+ '1_2_m_youtube_v0_1/1_2_m_youtube_v0_1_videos_{}.tar.gz',
414
+ 50,
415
+ ),
416
+ '2_3_m_academic_v0_1':
417
+ _generate_url_list(
418
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
419
+ '2_3_m_academic_v0_1/2_3_m_academic_v0_1_videos_{}.tar.gz',
420
+ 18,
421
+ ),
422
+ '2_3_m_youtube_v0_1':
423
+ _generate_url_list(
424
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
425
+ '2_3_m_youtube_v0_1/2_3_m_youtube_v0_1_videos_{}.tar.gz',
426
+ 98,
427
+ ),
428
+ '30_60_s_academic_v0_1':
429
+ _generate_url_list(
430
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
431
+ '30_60_s_academic_v0_1/30_60_s_academic_v0_1_videos_{}.tar.gz',
432
+ 10,
433
+ ),
434
+ '30_60_s_youtube_v0_1':
435
+ _generate_url_list(
436
+ 'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
437
+ '30_60_s_youtube_v0_1/30_60_s_youtube_v0_1_videos_{}.tar.gz',
438
+ 13,
439
+ ),
440
+ }
441
+
442
+ def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
443
+ urls = self.video_resources[self.subset]
444
+ self.local_dir = MediaResource.download(urls, f'llava_video_178k_{self.subset}', file_type='sharded')
445
+ return super().prepare_dataset(dataset)
446
+
447
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
448
+ file_path = os.path.join(self.local_dir, f"{row['videos']}")
449
+ if not os.path.exists(file_path):
450
+ return None
451
+ return super().preprocess({'messages': row['messages'], 'videos': file_path})
452
+
453
+
454
+ llava_video_subsets = []
455
+ for subset in [
456
+ '0_30_s_academic_v0_1',
457
+ '0_30_s_youtube_v0_1',
458
+ '1_2_m_academic_v0_1',
459
+ '1_2_m_youtube_v0_1',
460
+ '2_3_m_academic_v0_1',
461
+ '2_3_m_youtube_v0_1',
462
+ '30_60_s_academic_v0_1',
463
+ '30_60_s_youtube_v0_1',
464
+ ]:
465
+ subset = SubsetDataset(
466
+ subset=subset,
467
+ split=['caption', 'open_ended', 'multi_choice'],
468
+ preprocess_func=LLaVAVideo178KPreprocessor(subset=subset),
469
+ )
470
+ llava_video_subsets.append(subset)
471
+
472
+ register_dataset(
473
+ DatasetMeta(
474
+ hf_dataset_id='lmms-lab/LLaVA-Video-178K', subsets=llava_video_subsets, tags=['chat', 'multi-modal', 'video']))
475
+
476
+
477
+ class MovieChat1KPreprocessor(ResponsePreprocessor):
478
+
479
+ def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
480
+ mp4_set = [f'{i}.mp4' for i in range(1, 10)] + \
481
+ [f'{i}.mp4' for i in range(201, 240)] + \
482
+ [f'AWA-{i}.mp4' for i in range(1, 10)] + \
483
+ [f'AWB-{i}.mp4' for i in range(1, 16)] + \
484
+ [f'AWC-{i}.mp4' for i in range(1, 11)] + \
485
+ [f'AWD-{i}.mp4' for i in range(1, 8)] + \
486
+ [f'AWE-{i}.mp4' for i in range(1, 7)] + \
487
+ [f'AWG-{i}.mp4' for i in range(1, 12)] + \
488
+ [f'AWH-{i}.mp4' for i in range(1, 8)] + \
489
+ [f'BWA-{i}.mp4' for i in range(1, 7)] + \
490
+ [f'BWB-{i}.mp4' for i in range(1, 7)] + \
491
+ [f'BWD-{i}.mp4' for i in range(1, 6)] + \
492
+ [f'BWE-{i}.mp4' for i in range(1, 6)] + \
493
+ [f'BWG-{i}.mp4' for i in range(1, 6)] + \
494
+ [f'BWH-{i}.mp4' for i in range(1, 6)] + \
495
+ [f'TFS-{i}.mp4' for i in range(1, 13)] + \
496
+ [f'UWA-{i}.mp4' for i in range(1, 5)] + ['UWA-6.mp4']
497
+ for file in mp4_set:
498
+ url = f'https://modelscope.cn/datasets/AI-ModelScope/MovieChat-1K-test/resolve/master/videos/{file}'
499
+ self.local_dir = MediaResource.download(url, 'moviechat_1k_test', file_type='file')
500
+ return super().prepare_dataset(dataset)
501
+
502
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
503
+ file_path = os.path.join(self.local_dir, f"{row['info']['video_path']}")
504
+ if not os.path.exists(file_path):
505
+ return None
506
+ return super().preprocess({
507
+ 'query': row['global'][0]['question'],
508
+ 'response': row['global'][0]['answer'],
509
+ 'videos': file_path,
510
+ })
511
+
512
+
513
+ register_dataset(
514
+ DatasetMeta(
515
+ ms_dataset_id='AI-ModelScope/MovieChat-1K-test',
516
+ hf_dataset_id='Enxin/MovieChat-1K-test',
517
+ preprocess_func=MovieChat1KPreprocessor(),
518
+ split=['train'],
519
+ tags=['chat', 'multi-modal', 'video']))
520
+
521
+
522
+ class VideoChatGPTPreprocessor(ResponsePreprocessor):
523
+
524
+ def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
525
+ url = 'https://modelscope.cn/datasets/swift/VideoChatGPT/resolve/master/videos.zip'
526
+ local_dir = MediaResource.download(url, 'video_chatgpt')
527
+ self.local_dir = os.path.join(local_dir, 'Test_Videos')
528
+ return super().prepare_dataset(dataset)
529
+
530
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
531
+ # only `.mp4`
532
+ mp4_set = [file[:-4] for file in os.listdir(self.local_dir) if file.endswith('mp4')]
533
+ if row['video_name'] not in mp4_set:
534
+ return
535
+ row['videos'] = os.path.join(self.local_dir, f"{row['video_name']}.mp4")
536
+ for key in ['query', 'question_1', 'question_2']:
537
+ query = row.get(key)
538
+ if query is None or query == 'None':
539
+ continue
540
+ row['query'] = query
541
+ return super().preprocess(row)
542
+
543
+
544
+ register_dataset(
545
+ DatasetMeta(
546
+ ms_dataset_id='swift/VideoChatGPT',
547
+ hf_dataset_id='lmms-lab/VideoChatGPT',
548
+ subsets=['Generic', 'Temporal', 'Consistency'],
549
+ preprocess_func=VideoChatGPTPreprocessor(),
550
+ split=['test'],
551
+ tags=['chat', 'multi-modal', 'video', '🔥'],
552
+ ))
553
+
554
+
555
+ def preprocess_mind2web(dataset, **kwargs):
556
+
557
+ def preprocess_row(row: Dict[str, Any]) -> Dict[str, Any]:
558
+ raw_html = row['cleaned_html']
559
+ screenshot = row['screenshot']
560
+ row['screenshot'] = MediaResource.safe_save(screenshot, row['action_uid'] + '.jpg', 'mind2web')
561
+ action = row['target_action_reprs']
562
+ actions = action.split('->')
563
+ row['query'] = f'The snapshot of screen:<image>\nThe html source code:{raw_html}\n'
564
+ action = actions[-1]
565
+ where = actions[0] if len(actions) > 1 else ''
566
+ what = ''
567
+ if ':' in action:
568
+ action, what = action[:action.find(':')], action[action.find(':') + 1:]
569
+ row['response'] = f'Action: {action.strip()}\nAction Input: {where.strip()}{"," + what.strip()}'
570
+ return row
571
+
572
+ conversations = []
573
+ tools = [{
574
+ 'function': {
575
+ 'name': 'CLICK',
576
+ 'desc': 'Choose and click an element in the web page',
577
+ 'parameter': [{
578
+ 'element': 'string, the element in the web page to click'
579
+ }]
580
+ }
581
+ }, {
582
+ 'function': {
583
+ 'name':
584
+ 'TYPE',
585
+ 'desc':
586
+ 'Input some text into a web element like <input> or <textbox>',
587
+ 'parameter': [{
588
+ 'element': 'string, the element in the web page to input to',
589
+ 'content': 'string, what content to input into the textbox element'
590
+ }]
591
+ }
592
+ }, {
593
+ 'function': {
594
+ 'name':
595
+ 'SELECT',
596
+ 'desc':
597
+ 'Select an element from a combobox',
598
+ 'parameter': [{
599
+ 'element': 'string, the combobox or dropdown in the web page on which the select happens',
600
+ 'content': 'string, which choices to choose'
601
+ }]
602
+ }
603
+ }]
604
+
605
+ def history_to_messages(history):
606
+ messages = []
607
+ for h in history:
608
+ messages.append({'role': 'user', 'content': h[0]})
609
+ messages.append({'role': 'assistant', 'content': h[1]})
610
+ return messages
611
+
612
+ if isinstance(dataset, HfIterableDataset):
613
+
614
+ def generate_example(dataset):
615
+ history = []
616
+ images = []
617
+ for row in dataset:
618
+ target_action_index = row['target_action_index']
619
+ row = preprocess_row(row)
620
+ query = row['query']
621
+ if target_action_index == '0':
622
+ if history:
623
+ yield {'messages': history_to_messages(history), 'images': images, 'tools': tools}
624
+ images = []
625
+ history = []
626
+ query = query + '\n' + row['confirmed_task']
627
+ history.append([query, row['response']])
628
+ images.append(row['screenshot'])
629
+
630
+ if history:
631
+ yield {'messages': history_to_messages(history), 'images': images, 'tools': tools}
632
+
633
+ return HfIterableDataset.from_generator(generate_example, gen_kwargs={'dataset': dataset})
634
+
635
+ history = []
636
+ images = []
637
+ for row in tqdm(dataset):
638
+ target_action_index = row['target_action_index']
639
+ row = preprocess_row(row)
640
+ query = row['query']
641
+ if target_action_index == '0':
642
+ if history:
643
+ conversations.append({'messages': history_to_messages(history), 'images': images, 'tools': tools})
644
+ images = []
645
+ history = []
646
+ query = query + '\n' + row['confirmed_task']
647
+ history.append([query, row['response']])
648
+ images.append(row['screenshot'])
649
+
650
+ if history:
651
+ conversations.append({'messages': history_to_messages(history), 'images': images, 'tools': tools})
652
+
653
+ return HfDataset.from_list(conversations)
654
+
655
+
656
+ register_dataset(
657
+ DatasetMeta(
658
+ ms_dataset_id='swift/Multimodal-Mind2Web',
659
+ hf_dataset_id='osunlp/Multimodal-Mind2Web',
660
+ preprocess_func=preprocess_mind2web,
661
+ tags=['agent', 'multi-modal']))
662
+
663
+ register_dataset(
664
+ DatasetMeta(
665
+ ms_dataset_id='AI-ModelScope/M3IT',
666
+ subsets=[
667
+ 'coco', 'vqa-v2', 'shapes', 'shapes-rephrased', 'coco-goi-rephrased', 'snli-ve', 'snli-ve-rephrased',
668
+ 'okvqa', 'a-okvqa', 'viquae', 'textcap', 'docvqa', 'science-qa', 'imagenet', 'imagenet-open-ended',
669
+ 'imagenet-rephrased', 'coco-goi', 'clevr', 'clevr-rephrased', 'nlvr', 'coco-itm', 'coco-itm-rephrased',
670
+ 'vsr', 'vsr-rephrased', 'mocheg', 'mocheg-rephrased', 'coco-text', 'fm-iqa', 'activitynet-qa', 'msrvtt',
671
+ 'ss', 'coco-cn', 'refcoco', 'refcoco-rephrased', 'multi30k', 'image-paragraph-captioning', 'visual-dialog',
672
+ 'visual-dialog-rephrased', 'iqa', 'vcr', 'visual-mrc', 'ivqa', 'msrvtt-qa', 'msvd-qa', 'gqa', 'text-vqa',
673
+ 'ocr-vqa', 'st-vqa', 'flickr8k-cn'
674
+ ],
675
+ preprocess_func=ResponsePreprocessor(columns={
676
+ 'instruction': 'system',
677
+ 'inputs': 'query',
678
+ 'image_base64_str': 'images',
679
+ 'outputs': 'response'
680
+ }),
681
+ split=['train'],
682
+ huge_dataset=True,
683
+ tags=['chat', 'multi-modal', 'vision']))
684
+
685
+
686
+ class ShareGPT4VPreprocessor(MessagesPreprocessor):
687
+
688
+ def prepare_dataset(self, dataset):
689
+ split = ['ShareGPT4V', 'ShareGPT4V-PT'] if dataset.config_name is None else dataset.config_name
690
+ IMAGE_DATASET_REQUIREMENTS = {
691
+ 'ShareGPT4V': ['coco', 'sam', 'llava', 'wikiart', 'share_textvqa', 'web-celebrity', 'web-landmark'],
692
+ 'ShareGPT4V-PT': ['coco', 'sam', 'llava']
693
+ }
694
+
695
+ if isinstance(split, str):
696
+ split = [split]
697
+ self.all_folders = {}
698
+ for sp in split:
699
+ for media_type in IMAGE_DATASET_REQUIREMENTS[sp]:
700
+ self.all_folders[media_type] = MediaResource.download(media_type)
701
+ return super().prepare_dataset(dataset)
702
+
703
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
704
+ image = row['image']
705
+ row.update(super().preprocess(row))
706
+ if 'coco/' in image:
707
+ image = os.path.join(self.all_folders['coco'], image.replace('coco/', ''))
708
+ elif 'sam/' in image:
709
+ image = os.path.join(self.all_folders['sam'], image.replace('sam/images/', ''))
710
+ elif 'llava/' in image:
711
+ image = os.path.join(self.all_folders['llava'], image.replace('llava/llava_pretrain/images/', ''))
712
+ elif 'wikiart/' in image:
713
+ image = os.path.join(self.all_folders['wikiart'], image.replace('wikiart/images/', 'data/wikiart/images/'))
714
+ elif 'share_textvqa/' in image:
715
+ image = os.path.join(self.all_folders['share_textvqa'],
716
+ image.replace('share_textvqa/images/', 'data/share_textvqa/images/'))
717
+ elif 'web-celebrity/' in image:
718
+ image = os.path.join(self.all_folders['web-celebrity'],
719
+ image.replace('web-celebrity/images/', 'data/web-celebrity/images/'))
720
+ elif 'web-landmark/' in image:
721
+ image = os.path.join(self.all_folders['web-landmark'],
722
+ image.replace('web-landmark/images/', 'data/web-landmark/images/'))
723
+ if os.path.exists(image):
724
+ row['images'] = image
725
+ else:
726
+ return
727
+ return row
728
+
729
+
730
+ register_dataset(
731
+ DatasetMeta(
732
+ ms_dataset_id='AI-ModelScope/ShareGPT4V',
733
+ subsets=['ShareGPT4V', 'ShareGPT4V-PT'],
734
+ preprocess_func=ShareGPT4VPreprocessor(),
735
+ huge_dataset=True,
736
+ tags=['chat', 'multi-modal', 'vision']))
737
+
738
+
739
+ class TextCapsPreprocessor(ResponsePreprocessor):
740
+
741
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
742
+ row['query'] = 'What is the caption of this image?'
743
+ if not os.path.exists(row['images']['path']):
744
+ return None
745
+ return super().preprocess(row)
746
+
747
+
748
+ class TextCapsEmbPreprocessor(ResponsePreprocessor):
749
+
750
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
751
+ row['query'] = ''
752
+ if not os.path.exists(row['images']['path']):
753
+ return None
754
+ return super().preprocess(row)
755
+
756
+
757
+ register_dataset(
758
+ DatasetMeta(
759
+ ms_dataset_id='swift/TextCaps',
760
+ hf_dataset_id='HuggingFaceM4/TextCaps',
761
+ subsets=[
762
+ SubsetDataset(
763
+ name='default',
764
+ preprocess_func=TextCapsPreprocessor(columns={'reference_strs': 'response'}),
765
+ split=['train', 'validation'],
766
+ ),
767
+ SubsetDataset(
768
+ name='emb',
769
+ preprocess_func=TextCapsEmbPreprocessor(columns={'reference_strs': 'response'}),
770
+ split=['train', 'validation'],
771
+ ),
772
+ ],
773
+ huge_dataset=True,
774
+ tags=['multi-modal', 'en', 'caption', 'quality']))
775
+
776
+
777
+ class RefCOCOPreprocessor(ResponsePreprocessor, GroundingMixin):
778
+ task_type = 'caption'
779
+
780
+ def __init__(self, task_type, **kwargs):
781
+ self.task_type = task_type
782
+ super().__init__(**kwargs)
783
+
784
+ def prepare_dataset(self, dataset):
785
+ self.cache_dir = MediaResource.download(
786
+ 'https://www.modelscope.cn/api/v1/datasets/we_dont_produce_water/'
787
+ 'coco_res/repo?Revision=master&FilePath=coco_2014.zip', 'coco2014')
788
+ return dataset
789
+
790
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
791
+ caption = row['captions'][0]
792
+ bbox = row['bbox']
793
+ image_path = os.path.join(self.cache_dir, row['image_path'].replace('coco/train2014', 'train2014'))
794
+ if not os.path.exists(image_path):
795
+ return
796
+
797
+ for i in range(len(bbox)):
798
+ bbox[i] = round(float(bbox[i]))
799
+ res = {}
800
+
801
+ objects = {
802
+ 'ref': [caption],
803
+ 'bbox': [bbox],
804
+ }
805
+ res['query'], res['response'] = self.construct_grounding_prompt()
806
+ res['images'] = [image_path]
807
+ res['objects'] = objects
808
+ return super().preprocess(res)
809
+
810
+
811
+ register_dataset(
812
+ DatasetMeta(
813
+ ms_dataset_id='swift/refcoco',
814
+ hf_dataset_id='jxu124/refcoco',
815
+ subsets=[
816
+ SubsetDataset(
817
+ name='caption',
818
+ preprocess_func=RefCOCOPreprocessor('caption'),
819
+ ),
820
+ SubsetDataset(
821
+ name='grounding',
822
+ preprocess_func=RefCOCOPreprocessor('grounding'),
823
+ )
824
+ ],
825
+ split=['train', 'validation'],
826
+ tags=['multi-modal', 'en', 'grounding']))
827
+
828
+ register_dataset(
829
+ DatasetMeta(
830
+ ms_dataset_id='swift/refcocog',
831
+ hf_dataset_id='jxu124/refcocog',
832
+ subsets=[
833
+ SubsetDataset(
834
+ name='caption',
835
+ preprocess_func=RefCOCOPreprocessor('caption'),
836
+ ),
837
+ SubsetDataset(
838
+ name='grounding',
839
+ preprocess_func=RefCOCOPreprocessor('grounding'),
840
+ )
841
+ ],
842
+ split=['train', 'validation'],
843
+ tags=['multi-modal', 'en', 'grounding']))
844
+
845
+ register_dataset(
846
+ DatasetMeta(
847
+ ms_dataset_id='swift/lnqa',
848
+ hf_dataset_id='vikhyatk/lnqa',
849
+ preprocess_func=MessagesPreprocessor(user_role='question', assistant_role='answer'),
850
+ split=['train', 'validation'],
851
+ huge_dataset=True,
852
+ tags=['multi-modal', 'en', 'ocr-vqa', 'quality']))
853
+
854
+
855
+ class LLaVAInstructPreprocessor(MessagesPreprocessor):
856
+
857
+ def prepare_dataset(self, dataset):
858
+ self.all_folders = {}
859
+ for media_type in ['coco', 'gqa', 'ocr_vqa', 'textvqa', 'VG_100K', 'VG_100K_2']:
860
+ self.all_folders[media_type] = MediaResource.download(media_type)
861
+ return super().prepare_dataset(dataset)
862
+
863
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
864
+ image = row['images']
865
+ if 'coco/' in image:
866
+ image = os.path.join(self.all_folders['coco'], image.replace('coco/', ''))
867
+ elif 'gqa/' in image:
868
+ image = os.path.join(self.all_folders['gqa'], image.replace('gqa/', ''))
869
+ elif 'ocr_vqa/' in image:
870
+ image = os.path.join(self.all_folders['ocr_vqa'], image)
871
+ elif 'textvqa/' in image:
872
+ image = os.path.join(self.all_folders['textvqa'], image.replace('textvqa/', ''))
873
+ elif 'VG_100K/' in image:
874
+ image = os.path.join(self.all_folders['VG_100K'], image.replace('vg/', ''))
875
+ elif 'VG_100K_2/' in image:
876
+ image = os.path.join(self.all_folders['VG_100K_2'], image.replace('vg/', ''))
877
+ if os.path.exists(image):
878
+ row['images'] = image
879
+ else:
880
+ return
881
+
882
+ return super().preprocess(row)
883
+
884
+
885
+ register_dataset(
886
+ DatasetMeta(
887
+ ms_dataset_id='AI-ModelScope/LLaVA-Instruct-150K',
888
+ ms_revision='d5db3806e395c60496630a206c336932e85a2d00',
889
+ preprocess_func=LLaVAInstructPreprocessor(),
890
+ split=['train'],
891
+ tags=['chat', 'multi-modal', 'vision']))
892
+
893
+
894
+ class LLaVAPretrainPreprocessor(MessagesPreprocessor):
895
+
896
+ def prepare_dataset(self, dataset):
897
+ self.media_dir = MediaResource.download(
898
+ ('https://www.modelscope.cn/api/v1/datasets/AI-ModelScope/LLaVA-Pretrain/repo?'
899
+ 'Revision=master&FilePath=images.zip'),
900
+ # noqa
901
+ 'llava_pretrain')
902
+ return super().prepare_dataset(dataset)
903
+
904
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
905
+ row.update(super().preprocess(row))
906
+ if row['image']:
907
+ file_path = os.path.join(self.media_dir, row['image'])
908
+ if os.path.exists(file_path):
909
+ return {'images': file_path}
910
+ else:
911
+ return
912
+ else:
913
+ return
914
+
915
+
916
+ register_dataset(
917
+ DatasetMeta(
918
+ ms_dataset_id='AI-ModelScope/LLaVA-Pretrain',
919
+ ms_revision='e3a3f0bfaad05e90e46745152a32bf944e0f4a63',
920
+ hf_dataset_id='liuhaotian/LLaVA-Pretrain',
921
+ preprocess_func=LLaVAPretrainPreprocessor(),
922
+ huge_dataset=True,
923
+ tags=['chat', 'multi-modal', 'quality']))
924
+
925
+ register_dataset(
926
+ DatasetMeta(
927
+ ms_dataset_id='swift/MideficsDataset',
928
+ hf_dataset_id='WinterSchool/MideficsDataset',
929
+ preprocess_func=MessagesPreprocessor(inner_key='data', user_role='question', assistant_role='answer'),
930
+ tags=['medical', 'en', 'vqa']))
931
+
932
+ register_dataset(
933
+ DatasetMeta(
934
+ ms_dataset_id='swift/OK-VQA_train',
935
+ hf_dataset_id='Multimodal-Fatima/OK-VQA_train',
936
+ preprocess_func=ResponsePreprocessor(),
937
+ tags=['multi-modal', 'en', 'vqa', 'quality']))
938
+
939
+ register_dataset(
940
+ DatasetMeta(
941
+ ms_dataset_id='swift/A-OKVQA',
942
+ hf_dataset_id='HuggingFaceM4/A-OKVQA',
943
+ split=['train', 'validation'],
944
+ preprocess_func=ResponsePreprocessor(columns={'rationales': 'response'}),
945
+ tags=['multi-modal', 'en', 'vqa', 'quality']))
946
+
947
+
948
+ class OcrvqaPreprocessor(RowPreprocessor):
949
+
950
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
951
+ idx = np.random.choice(range(len(row['questions'])))
952
+ query = row['questions'][idx]
953
+ response = row['answers'][idx]
954
+ return {
955
+ 'messages': [{
956
+ 'role': 'user',
957
+ 'content': query
958
+ }, {
959
+ 'role': 'assistant',
960
+ 'content': response
961
+ }],
962
+ }
963
+
964
+
965
+ register_dataset(
966
+ DatasetMeta(
967
+ ms_dataset_id='swift/OCR-VQA',
968
+ hf_dataset_id='howard-hou/OCR-VQA',
969
+ split=['train', 'validation'],
970
+ preprocess_func=OcrvqaPreprocessor(),
971
+ tags=['multi-modal', 'en', 'ocr-vqa']))
972
+
973
+
974
+ class ScienceQAPreprocessor(RowPreprocessor):
975
+
976
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
977
+ query = row['question']
978
+ response = row['choices'][row['answer']]
979
+ solution = row['solution']
980
+ response = f'{solution}\nSo the final answer is: {response}'
981
+ return {'messages': [{'role': 'user', 'content': query}, {'role': 'assistant', 'content': response}]}
982
+
983
+
984
+ register_dataset(
985
+ DatasetMeta(
986
+ ms_dataset_id='swift/ScienceQA',
987
+ hf_dataset_id='derek-thomas/ScienceQA',
988
+ split=['train', 'validation'],
989
+ preprocess_func=ScienceQAPreprocessor(),
990
+ tags=['multi-modal', 'science', 'vqa', 'quality']))
991
+
992
+
993
+ class GritPreprocessor(RowPreprocessor, GroundingMixin):
994
+
995
+ def __init__(self, task_type, **kwargs):
996
+ self.task_type = task_type
997
+ super().__init__(**kwargs)
998
+
999
+ @staticmethod
1000
+ def has_overlap(start_ends):
1001
+ for i in range(1, len(start_ends)):
1002
+ if start_ends[i][0] < start_ends[i - 1][1]:
1003
+ return True
1004
+ return False
1005
+
1006
+ @staticmethod
1007
+ def replace_intervals_with_tags(response, start_ends):
1008
+ result = []
1009
+ last_end = 0
1010
+ for start, end in start_ends:
1011
+ result.append(response[int(last_end):int(start)])
1012
+ result.append('<ref-object><bbox>')
1013
+ last_end = end
1014
+ result.append(response[int(last_end):])
1015
+ return ''.join(result)
1016
+
1017
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
1018
+ images = row['images']
1019
+ caption = row['caption']
1020
+ ref_exps = row['ref_exps']
1021
+ objects = {'ref': [], 'bbox': [], 'bbox_type': 'norm1'}
1022
+ start_end_pairs = []
1023
+ for ref_exp in ref_exps:
1024
+ start = ref_exp[0]
1025
+ end = ref_exp[1]
1026
+ # conf = ref_exp[6] TODO filter low confidence rows?
1027
+ start_end_pairs.append(ref_exp[0:2])
1028
+
1029
+ object_part = caption[int(start):int(end)]
1030
+ objects['ref'].append(object_part)
1031
+ objects['bbox'].append(ref_exp[2:6])
1032
+
1033
+ start_end_pairs.sort(key=lambda x: (x[0], x[1]))
1034
+ if self.has_overlap(start_end_pairs) or not ref_exps:
1035
+ return
1036
+
1037
+ if self.task_type in ('grounding', 'caption'):
1038
+ query, response = self.construct_grounding_prompt()
1039
+ else:
1040
+ query = 'what is the proper caption of this image?'
1041
+ response = caption
1042
+ return {
1043
+ 'messages': [{
1044
+ 'role': 'user',
1045
+ 'content': query
1046
+ }, {
1047
+ 'role': 'assistant',
1048
+ 'content': response
1049
+ }],
1050
+ 'images': images,
1051
+ 'objects': objects
1052
+ }
1053
+
1054
+
1055
+ register_dataset(
1056
+ DatasetMeta(
1057
+ ms_dataset_id='swift/GRIT',
1058
+ hf_dataset_id='zzliang/GRIT',
1059
+ subsets=[
1060
+ SubsetDataset(
1061
+ name='caption',
1062
+ preprocess_func=GritPreprocessor('caption', columns={'url': 'images'}),
1063
+ ),
1064
+ SubsetDataset(
1065
+ name='grounding',
1066
+ preprocess_func=GritPreprocessor('grounding', columns={'url': 'images'}),
1067
+ ),
1068
+ SubsetDataset(
1069
+ name='vqa',
1070
+ preprocess_func=GritPreprocessor('vqa', columns={'url': 'images'}),
1071
+ )
1072
+ ],
1073
+ huge_dataset=True,
1074
+ tags=['multi-modal', 'en', 'caption-grounding', 'vqa', 'quality']))
1075
+
1076
+
1077
+ class GQAPreprocessor(RowPreprocessor):
1078
+
1079
+ def prepare_dataset(self, dataset):
1080
+ self.local_cache = MediaResource.download('gqa')
1081
+ return super().prepare_dataset(dataset)
1082
+
1083
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
1084
+ if os.path.join(self.local_cache, 'images', row['imageId'] + '.jpg'):
1085
+ return {
1086
+ 'messages': [{
1087
+ 'role': 'user',
1088
+ 'content': row['question']
1089
+ }, {
1090
+ 'role': 'assistant',
1091
+ 'content': row['fullAnswer']
1092
+ }],
1093
+ 'images':
1094
+ os.path.join(self.local_cache, 'images', row['imageId'] + '.jpg'),
1095
+ }
1096
+ else:
1097
+ return
1098
+
1099
+
1100
+ register_dataset(
1101
+ DatasetMeta(
1102
+ hf_dataset_id='lmms-lab/GQA',
1103
+ split=['train_all_instructions'],
1104
+ preprocess_func=GQAPreprocessor(),
1105
+ huge_dataset=True,
1106
+ tags=['multi-modal', 'en', 'vqa', 'quality']))
1107
+
1108
+
1109
+ class CocoPreprocessor(ResponsePreprocessor):
1110
+ category = [
1111
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
1112
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
1113
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
1114
+ 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
1115
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
1116
+ 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
1117
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
1118
+ 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
1119
+ ]
1120
+
1121
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
1122
+ row['query'] = 'Task: Object Detection'
1123
+ objects = row['objects']
1124
+ objects['ref'] = [self.category[c] for c in objects['category']]
1125
+ row['response'] = '\n'.join(['<ref-object><bbox>'] * len(objects['ref']))
1126
+ return super().preprocess(row)
1127
+
1128
+
1129
+ register_dataset(
1130
+ DatasetMeta(
1131
+ ms_dataset_id='AI-ModelScope/coco',
1132
+ hf_dataset_id='detection-datasets/coco',
1133
+ preprocess_func=CocoPreprocessor(),
1134
+ huge_dataset=True,
1135
+ tags=['multi-modal', 'en', 'vqa', 'quality']))
1136
+
1137
+
1138
+ class LLaVAMixSFTPreprocessor(RowPreprocessor):
1139
+
1140
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
1141
+ messages = row['messages']
1142
+ rounds = []
1143
+ for msg in messages:
1144
+ role = msg['role']
1145
+ content = msg['content']
1146
+ text = ''
1147
+ for index in content:
1148
+ if index['type'] == 'text':
1149
+ text += index['text']
1150
+ elif index['type'] == 'image':
1151
+ text += '<image>'
1152
+
1153
+ rounds.append({'role': role, 'content': text})
1154
+
1155
+ return {'messages': rounds}
1156
+
1157
+
1158
+ register_dataset(
1159
+ DatasetMeta(
1160
+ ms_dataset_id='swift/llava-instruct-mix-vsft',
1161
+ hf_dataset_id='HuggingFaceH4/llava-instruct-mix-vsft',
1162
+ split=['test'],
1163
+ preprocess_func=LLaVAMixSFTPreprocessor(),
1164
+ tags=['multi-modal', 'en', 'vqa', 'quality']))
1165
+
1166
+
1167
+ class LatexocrPreprocessor(ResponsePreprocessor):
1168
+
1169
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
1170
+ row['query'] = 'Using LaTeX to perform OCR on the image.'
1171
+ return super().preprocess(row)
1172
+
1173
+
1174
+ register_dataset(
1175
+ DatasetMeta(
1176
+ ms_dataset_id='AI-ModelScope/LaTeX_OCR',
1177
+ hf_dataset_id='linxy/LaTeX_OCR',
1178
+ subsets=['default', 'human_handwrite', 'human_handwrite_print', 'synthetic_handwrite', 'small'],
1179
+ preprocess_func=LatexocrPreprocessor(),
1180
+ split=['train', 'validation', 'test'],
1181
+ tags=['chat', 'ocr', 'multi-modal', 'vision'],
1182
+ ))
1183
+
1184
+
1185
+ class CapchaImagesPreprocessor(ResponsePreprocessor):
1186
+
1187
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
1188
+ row['query'] = 'recognize the content.'
1189
+ return super().preprocess(row)
1190
+
1191
+
1192
+ register_dataset(
1193
+ DatasetMeta(
1194
+ ms_dataset_id='AI-ModelScope/captcha-images',
1195
+ split=['train', 'validation'],
1196
+ preprocess_func=CapchaImagesPreprocessor(columns={'solution': 'response'}),
1197
+ tags=['chat', 'multi-modal', 'vision']))
1198
+
1199
+
1200
+ class ClevrPreprocessor(ResponsePreprocessor):
1201
+
1202
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
1203
+ query = row.get('query', '')
1204
+ query = (f'{query} Output the thinking process in <think> </think> and '
1205
+ 'final answer (number) in <answer> </answer> tags.')
1206
+ row.update({'query': query})
1207
+ return super().preprocess(row)
1208
+
1209
+
1210
+ register_dataset(
1211
+ DatasetMeta(
1212
+ ms_dataset_id='okwinds/clevr_cogen_a_train',
1213
+ hf_dataset_id='leonardPKU/clevr_cogen_a_train',
1214
+ preprocess_func=ClevrPreprocessor(),
1215
+ tags=['qa', 'math', 'vision', 'grpo']))
ms-swift/swift/llm/dataset/preprocessor/__pycache__/extra.cpython-310.pyc ADDED
Binary file (4.23 kB). View file
 
ms-swift/swift/llm/dataset/preprocessor/core.py ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import ast
3
+ import os
4
+ from collections import Counter
5
+ from contextlib import contextmanager
6
+ from typing import Any, Callable, Dict, List, Optional, Union
7
+
8
+ import numpy as np
9
+ from datasets import Dataset as HfDataset
10
+ from datasets import Image
11
+ from datasets import IterableDataset as HfIterableDataset
12
+ from datasets import Sequence, Value
13
+
14
+ from swift.llm import history_to_messages
15
+ from swift.utils import get_logger, is_dist, is_master, safe_ddp_context
16
+
17
+ DATASET_TYPE = Union[HfDataset, HfIterableDataset]
18
+
19
+ logger = get_logger()
20
+
21
+
22
+ class RowPreprocessor:
23
+ standard_keys = ['messages', 'rejected_response', 'label', 'images', 'videos', 'audios', 'tools', 'objects']
24
+
25
+ def __init__(self,
26
+ *,
27
+ columns: Optional[Dict[str, str]] = None,
28
+ dataset_sample: Optional[int] = None,
29
+ random_state: Union[np.random.RandomState, int, None] = 42,
30
+ traceback_limit: int = 10) -> None:
31
+ self.columns = columns or {}
32
+ self.origin_columns = self.columns.copy() # Higher priority and raise Error
33
+ images_keys = ['images', 'image']
34
+ audios_keys = ['audios', 'audio']
35
+ videos_keys = ['videos', 'video']
36
+ for mm_type in ['images', 'audios', 'videos']:
37
+ keys = locals()[f'{mm_type}_keys']
38
+ for key in keys:
39
+ self.columns[key] = mm_type
40
+
41
+ self.traceback_limit = traceback_limit
42
+ self._traceback_counter = 0
43
+ self.dataset_sample = dataset_sample
44
+ if not isinstance(random_state, np.random.RandomState):
45
+ random_state = np.random.RandomState(random_state)
46
+ self.random_state = random_state
47
+
48
+ @staticmethod
49
+ def _check_messages(row: Dict[str, Any]) -> None:
50
+ if 'messages' not in row:
51
+ return
52
+ messages = row['messages']
53
+ assert len(messages) > 0, f'messages: {messages}'
54
+ # fix swift/SlimOrca
55
+ for message in messages:
56
+ keys = set(message.keys()) - {'role', 'content'}
57
+ for key in keys:
58
+ message.pop(key)
59
+
60
+ for message in messages:
61
+ role, content = message['role'], message['content']
62
+ # The terms 'tool' and 'tool_response' have the same meaning, ensuring compatibility.
63
+ assert role in {'system', 'user', 'tool_call', 'tool_response', 'tool', 'assistant'}, f'message: {message}'
64
+ assert content is not None, f'message: {message}'
65
+
66
+ @staticmethod
67
+ def _cast_images(row: Dict[str, Any]) -> None:
68
+ images = row.get('images')
69
+
70
+ if isinstance(images, str) or isinstance(images, list) and images and isinstance(images[0], str):
71
+ if isinstance(images, str):
72
+ images = [images]
73
+ for i, image in enumerate(images):
74
+ images[i] = {'bytes': None, 'path': image}
75
+ row['images'] = images
76
+ elif isinstance(images, dict):
77
+ row['images'] = [images]
78
+
79
+ @staticmethod
80
+ def _check_rejected_response(row: Dict[str, Any]) -> None:
81
+ if 'rejected_messages' in row:
82
+ chosen_messages = row['messages']
83
+ rejected_messages = row['rejected_messages']
84
+ messages = []
85
+ rejected_response = None
86
+ for chosen_user, chosen_assistant, rejected_user, rejected_assistant in zip(
87
+ chosen_messages[::2], chosen_messages[1::2], rejected_messages[::2], rejected_messages[1::2]):
88
+ assert chosen_user == rejected_user
89
+ messages.append(chosen_user)
90
+ messages.append(chosen_assistant)
91
+ if chosen_assistant != rejected_assistant:
92
+ rejected_response = rejected_assistant['content']
93
+ row['messages'] = messages
94
+ row['rejected_response'] = rejected_response
95
+
96
+ if 'rejected_response' in row:
97
+ messages = row['messages']
98
+ rejected_response = row['rejected_response']
99
+ if rejected_response is None or rejected_response == messages[-1]['content']:
100
+ raise ValueError(f'rejected_response: {rejected_response}')
101
+
102
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
103
+ raise NotImplementedError
104
+
105
+ def prepare_dataset(self, dataset: DATASET_TYPE) -> DATASET_TYPE:
106
+ return dataset
107
+
108
+ @staticmethod
109
+ def batched_to_rows(batched_row: Dict[str, Any]):
110
+ keys = list(batched_row.keys())
111
+ batch_size = len(batched_row[keys[0]])
112
+ return [{key: batched_row[key][i] for key in keys} for i in range(batch_size)]
113
+
114
+ @staticmethod
115
+ def rows_to_batched(rows: List[Dict[str, Any]]):
116
+ batched = {}
117
+ for i, row in enumerate(rows):
118
+ for k, v in row.items():
119
+ if k not in batched:
120
+ batched[k] = [None] * i
121
+ batched[k].append(v)
122
+ # Make all the lengths of v the same.
123
+ for k in set(batched.keys()) - set(row.keys()):
124
+ batched[k].append(None)
125
+ return batched
126
+
127
+ @staticmethod
128
+ def _remove_prefix_keys(row, prefix: str):
129
+ for k in list(row.keys()):
130
+ if k.startswith(prefix):
131
+ new_k = k[len(prefix):]
132
+ new_v = row.pop(k)
133
+ if new_k not in row:
134
+ row[new_k] = new_v
135
+
136
+ @staticmethod
137
+ def _check_objects(row):
138
+ objects = row.get('objects')
139
+ if objects is None:
140
+ return
141
+ new_objects = {}
142
+ # Ensure the order
143
+ for k in ['ref', 'bbox', 'bbox_type', 'image_id']:
144
+ if k in objects.keys():
145
+ new_objects[k] = objects[k]
146
+ row['objects'] = new_objects
147
+ bbox = new_objects['bbox']
148
+
149
+ # check bbox
150
+ for box in bbox:
151
+ assert len(box) in {2, 4}, f'len(box): {len(box)}'
152
+ if len(box) == 2:
153
+ continue
154
+ if box[0] > box[2]:
155
+ box[0], box[2] = box[2], box[0]
156
+ if box[1] > box[3]:
157
+ box[1], box[3] = box[3], box[1]
158
+
159
+ def batched_preprocess(self, batched_row: Dict[str, Any], *, strict: bool,
160
+ ignore_max_length_error: bool) -> Dict[str, Any]:
161
+ from ...template import MaxLengthError
162
+ batched_row = dict(batched_row)
163
+ assert len(batched_row) > 0
164
+ self._remove_prefix_keys(batched_row, '__@') # compat streaming
165
+ rows = self.batched_to_rows(batched_row)
166
+
167
+ new_rows = []
168
+ for row in rows:
169
+ try:
170
+ row = self.preprocess(row)
171
+ # support [row1, row2, ...]
172
+ if row is None:
173
+ row = []
174
+ if isinstance(row, dict):
175
+ row = [row]
176
+ for r in row:
177
+ self._check_objects(r)
178
+ self._check_messages(r)
179
+ self._check_rejected_response(r)
180
+ self._cast_images(r)
181
+ except Exception as e:
182
+ if strict:
183
+ logger.warning('To avoid errors, you can pass `strict=False`.')
184
+ raise
185
+ if isinstance(e, MaxLengthError) and ignore_max_length_error:
186
+ pass
187
+ elif self.traceback_limit is not None and self._traceback_counter < self.traceback_limit:
188
+ import traceback
189
+ logger.info(traceback.format_exc())
190
+ logger.warning('👆👆👆There are errors in the dataset, the data will be deleted')
191
+ self._traceback_counter += 1
192
+ row = []
193
+ new_rows += row
194
+ res = self.rows_to_batched(new_rows)
195
+ self._remove_prefix_keys(res, '__#') # compat GRPO
196
+ if len(res) == 0:
197
+ res['messages'] = []
198
+
199
+ return res
200
+
201
+ @staticmethod
202
+ def get_features_dataset(dataset: DATASET_TYPE) -> DATASET_TYPE:
203
+ if dataset.features is None:
204
+ assert isinstance(dataset, HfIterableDataset)
205
+ dataset = dataset._resolve_features()
206
+ return dataset
207
+
208
+ @staticmethod
209
+ def safe_rename_columns(dataset, columns):
210
+ dataset = RowPreprocessor.get_features_dataset(dataset)
211
+ columns_keys = {k.lower(): k for k in dataset.features.keys()} # lower -> lower/upper
212
+ safe_columns = {columns_keys[k.lower()]: v for k, v in columns.items() if k.lower() in columns_keys}
213
+
214
+ counter = Counter(safe_columns.values())
215
+ for k, new_k in list(safe_columns.items()):
216
+ if counter[new_k] > 1:
217
+ # For example, if "response" and "answer" match, then no processing is done.
218
+ safe_columns.pop(k)
219
+ continue
220
+
221
+ # e.g. Keep {'query': 'query'} to ensure that the query has the highest priority.
222
+ safe_columns = {k: v for k, v in safe_columns.items() if k != v}
223
+ if safe_columns:
224
+ dataset = dataset.rename_columns(safe_columns)
225
+
226
+ return dataset
227
+
228
+ def _rename_columns(self, dataset: DATASET_TYPE) -> DATASET_TYPE:
229
+ dataset = self.safe_rename_columns(dataset, self.origin_columns)
230
+ dataset = self.safe_rename_columns(dataset, self.columns)
231
+ if isinstance(dataset, HfIterableDataset):
232
+ # fix: https://github.com/huggingface/datasets/issues/6408
233
+ columns = {k: f'__@{k}' for k in RowPreprocessor.standard_keys if k in dataset.features}
234
+ if columns:
235
+ dataset = dataset.rename_columns(columns)
236
+ return dataset
237
+
238
+ @staticmethod
239
+ def remove_useless_columns(dataset: DATASET_TYPE) -> DATASET_TYPE:
240
+ dataset = RowPreprocessor.get_features_dataset(dataset)
241
+ features = dataset.features
242
+ k_list = [k for k in RowPreprocessor.standard_keys if k in features]
243
+ if len(k_list) != len(features):
244
+ dataset = dataset.select_columns(k_list)
245
+ return dataset
246
+
247
+ @staticmethod
248
+ @contextmanager
249
+ def _patch_arrow_writer():
250
+ # fix AI-ModelScope/ms_agent_for_agentfabric:all
251
+ from datasets.arrow_writer import ArrowWriter
252
+
253
+ def _new_init(self, schema=None, features=None, *args, **kwargs):
254
+
255
+ if features is not None:
256
+ features['messages'] = [{'role': Value(dtype='string'), 'content': Value(dtype='string')}]
257
+ features['images'] = [{'bytes': Value(dtype='binary'), 'path': Value(dtype='string')}]
258
+ features['objects'] = {
259
+ 'ref': Sequence(feature=Value(dtype='string'), length=-1),
260
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='float64'), length=-1), length=-1)
261
+ }
262
+ ArrowWriter.__origin_init__(self, schema, features, *args, **kwargs)
263
+
264
+ ArrowWriter.__origin_init__ = ArrowWriter.__init__
265
+ ArrowWriter.__init__ = _new_init
266
+ try:
267
+ yield
268
+ finally:
269
+ ArrowWriter.__init__ = ArrowWriter.__origin_init__
270
+ del ArrowWriter.__origin_init__
271
+
272
+ def _cast_pil_image(self, dataset):
273
+ features = dataset.features
274
+ if 'images' in features and isinstance(features['images'], Image) and features['images'].decode:
275
+ dataset = dataset.cast_column('images', Image(decode=False))
276
+ return dataset
277
+
278
+ def __call__(
279
+ self,
280
+ dataset: DATASET_TYPE,
281
+ *,
282
+ num_proc: int = 1,
283
+ load_from_cache_file: bool = True,
284
+ strict: bool = False,
285
+ batch_size: Optional[int] = None,
286
+ ) -> DATASET_TYPE:
287
+ from ..utils import sample_dataset
288
+ if batch_size is None:
289
+ batch_size = 1000 if isinstance(dataset, HfDataset) else 16
290
+ if self.dataset_sample is not None:
291
+ dataset = sample_dataset(dataset, self.dataset_sample, True, self.random_state)
292
+
293
+ map_kwargs = {'batched': True, 'batch_size': batch_size}
294
+ if isinstance(dataset, HfDataset):
295
+ if not load_from_cache_file and is_dist() and not is_master():
296
+ load_from_cache_file = True
297
+ map_kwargs.update({
298
+ 'num_proc': num_proc,
299
+ 'load_from_cache_file': load_from_cache_file,
300
+ })
301
+ # compat GRPO: The solution field will be retained.
302
+ dataset = RowPreprocessor.get_features_dataset(dataset)
303
+ if 'solution' in dataset.features:
304
+ with safe_ddp_context(None, True):
305
+ dataset = dataset.map(lambda x: {'__#solution': x['solution']}, **map_kwargs)
306
+ dataset = self._rename_columns(dataset)
307
+ dataset = self.prepare_dataset(dataset)
308
+ dataset = self._cast_pil_image(dataset)
309
+
310
+ ignore_max_length_error = True if isinstance(dataset, HfDataset) and num_proc > 1 else False
311
+ with self._patch_arrow_writer(), safe_ddp_context(None, True):
312
+ try:
313
+ dataset_mapped = dataset.map(
314
+ self.batched_preprocess,
315
+ fn_kwargs={
316
+ 'strict': strict,
317
+ 'ignore_max_length_error': ignore_max_length_error
318
+ },
319
+ remove_columns=list(dataset.features.keys()),
320
+ **map_kwargs)
321
+ except NotImplementedError:
322
+ pass
323
+ if isinstance(dataset_mapped, HfDataset) and len(dataset) != len(dataset_mapped):
324
+ logger.info(
325
+ f'Dataset filtered, origin length: {len(dataset)}, filtered dataset length: {len(dataset_mapped)}')
326
+
327
+ return dataset_mapped
328
+
329
+
330
+ class ResponsePreprocessor(RowPreprocessor):
331
+ """Dataset compatible with older versions of ms-swift"""
332
+
333
+ def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwargs) -> None:
334
+ super().__init__(columns=columns, **kwargs)
335
+ system_keys = ['system', 'system_prompt']
336
+ query_keys = ['query', 'prompt', 'input', 'instruction', 'question', 'problem']
337
+ response_keys = ['response', 'answer', 'output', 'targets', 'target', 'answer_key', 'answers', 'solution'
338
+ ] + ['text', 'completion', 'content']
339
+ for key in system_keys:
340
+ self.columns[key] = 'system'
341
+ for key in query_keys:
342
+ self.columns[key] = 'query'
343
+ for key in response_keys:
344
+ self.columns[key] = 'response'
345
+
346
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
347
+ response = row.pop('response', None)
348
+ if response is not None:
349
+ if isinstance(response, (list, tuple)):
350
+ from transformers.utils import strtobool
351
+ # sometimes response is a list, pick one randomly
352
+ if strtobool(os.environ.get('RANDOM_DATASET_RESPONSE', 'True')):
353
+ response = self.random_state.choice(response)
354
+ else:
355
+ response = response[0]
356
+ history = row.pop('history', None) or []
357
+ query = row.pop('query', None)
358
+ system = row.pop('system', None)
359
+ if isinstance(history, str): # e.g. "[['query1', 'response1']]"
360
+ history = ast.literal_eval(history)
361
+ history.append([query, response])
362
+
363
+ row.update({'messages': history_to_messages(history, system)})
364
+ return row
365
+
366
+
367
+ class AlpacaPreprocessor(ResponsePreprocessor):
368
+
369
+ @classmethod
370
+ def concat_inst_input(cls, instruction, input_):
371
+ if instruction and input_:
372
+ query = f'{instruction}\n{input_}'
373
+ else:
374
+ query = instruction or input_
375
+ assert isinstance(query, str), f'query: {query}'
376
+ return query
377
+
378
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
379
+ instruction = row.pop('instruction', None)
380
+ input_ = row.pop('input', None)
381
+ output = row.pop('output', None)
382
+ if output is not None:
383
+ row['response'] = output
384
+ row['query'] = self.concat_inst_input(instruction, input_)
385
+ return super().preprocess(row)
386
+
387
+
388
+ def default_repair_messages(s: Union[str, Any]) -> Any:
389
+ if isinstance(s, str):
390
+ return ast.literal_eval(s)
391
+ return s
392
+
393
+
394
+ class MessagesPreprocessor(RowPreprocessor):
395
+
396
+ def __init__(
397
+ self,
398
+ *,
399
+ # If set to None, automatic matching will be performed.
400
+ role_key: Optional[str] = None, # 'role', 'from'
401
+ content_key: Optional[str] = None, # 'content', 'value'
402
+ user_role: Optional[str] = None, # 'user', 'human'
403
+ assistant_role: Optional[str] = None, # 'assistant', 'gpt', 'bot'
404
+ system_role: str = 'system',
405
+ # 'conversation', 'conversations' -> 'messages'
406
+ columns: Optional[Dict[str, str]] = None,
407
+ repair_messages: Callable[[Union[str, List[Dict[str, str]]]],
408
+ Optional[List[Dict[str, str]]]] = default_repair_messages,
409
+ inner_key: Optional[str] = None,
410
+ **kwargs):
411
+ super().__init__(columns=columns, **kwargs)
412
+ self.role_keys = ['role', 'from'] if role_key is None else [role_key]
413
+ self.content_keys = ['content', 'value'] if content_key is None else [content_key]
414
+ self.user_roles = ['user', 'human'] if user_role is None else [user_role]
415
+ self.assistant_roles = ['assistant', 'gpt', 'bot'] if assistant_role is None else [assistant_role]
416
+ self.tool_call_roles = ['function_call']
417
+ self.tool_response_roles = ['function_response', 'observation', 'observations']
418
+
419
+ self.system_role = system_role
420
+ self.repair_messages = repair_messages
421
+ self.inner_key = inner_key
422
+
423
+ message_keys = ['messages', 'conversation', 'conversations']
424
+ for key in message_keys:
425
+ self.columns[key] = 'messages'
426
+ # sharegptq
427
+ system_keys = ['system', 'system_prompt']
428
+ if system_role not in system_keys:
429
+ system_keys.append(system_role)
430
+ for key in system_keys:
431
+ self.columns[key] = 'system'
432
+
433
+ @staticmethod
434
+ def _is_sharegpt_format(message: Dict[str, str]) -> bool:
435
+ if 'role' in message or 'content' in message:
436
+ return False
437
+ return True
438
+
439
+ def sharegpt_to_messages(self, messages: List[Dict[str, str]], system: Optional[str]) -> List[Dict[str, str]]:
440
+ self._to_std_key(messages, 'user', self.user_roles)
441
+ self._to_std_key(messages, 'assistant', self.assistant_roles)
442
+ new_messages = []
443
+ if system is not None:
444
+ new_messages.append({'role': 'system', 'content': system})
445
+ for message in messages:
446
+ user_message = {'role': 'user', 'content': message['user']}
447
+ assistant_message = {'role': 'assistant', 'content': message['assistant']}
448
+ new_messages.append(user_message)
449
+ new_messages.append(assistant_message)
450
+ return new_messages
451
+
452
+ def to_std_messages(self, messages: List[Dict[str, str]], system: Optional[str]) -> None:
453
+ if messages[0]['role'] == self.system_role:
454
+ messages[0]['role'] = 'system'
455
+ elif system is not None:
456
+ messages.insert(0, {'role': 'system', 'content': system})
457
+ for message in messages:
458
+ role = message['role']
459
+ if role in self.user_roles:
460
+ message['role'] = 'user'
461
+ elif role in self.assistant_roles:
462
+ message['role'] = 'assistant'
463
+ elif role.replace('-', '_') in self.tool_call_roles:
464
+ message['role'] = 'tool_call'
465
+ elif role.replace('-', '_') in self.tool_response_roles:
466
+ message['role'] = 'tool_response'
467
+
468
+ @staticmethod
469
+ def _to_std_key(messages: List[Dict[str, str]], std_key: str, optional_keys: List[str]) -> None:
470
+ for message in messages:
471
+ for key in optional_keys:
472
+ if key in message:
473
+ message[std_key] = message.pop(key)
474
+
475
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
476
+ if 'rejected_messages' in row:
477
+ row['rejected_messages'] = MessagesPreprocessor.preprocess(
478
+ self, {'messages': row['rejected_messages']})['messages']
479
+ messages = row['messages']
480
+ if self.inner_key is not None:
481
+ messages = messages[self.inner_key]
482
+ messages: Optional[List[Dict[str, str]]] = self.repair_messages(messages)
483
+ if not messages or isinstance(messages, str):
484
+ return
485
+ self._to_std_key(messages, 'role', self.role_keys)
486
+ self._to_std_key(messages, 'content', self.content_keys)
487
+ system = row.pop('system', None)
488
+ if self._is_sharegpt_format(messages[0]):
489
+ messages = self.sharegpt_to_messages(messages, system)
490
+ else:
491
+ self.to_std_messages(messages, system) # inplace
492
+ row['messages'] = messages
493
+ return row
494
+
495
+
496
+ class ClsPreprocessor(ResponsePreprocessor):
497
+
498
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
499
+ res = super().preprocess(row)
500
+ res['label'] = int(res['label'])
501
+ return res
502
+
503
+
504
+ class AutoPreprocessor:
505
+
506
+ def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwargs) -> None:
507
+ self.columns = columns or {}
508
+ self.kwargs = kwargs
509
+
510
+ def _get_preprocessor(self, dataset: DATASET_TYPE) -> RowPreprocessor:
511
+ features = dataset.features
512
+ for key in ['conversation', 'conversations', 'messages']:
513
+ if key in features:
514
+ return MessagesPreprocessor(**self.kwargs)
515
+ if 'instruction' in features and 'input' in features:
516
+ return AlpacaPreprocessor(**self.kwargs)
517
+ return ResponsePreprocessor(**self.kwargs)
518
+
519
+ def __call__(
520
+ self,
521
+ dataset: DATASET_TYPE,
522
+ *,
523
+ num_proc: int = 1,
524
+ load_from_cache_file: bool = True,
525
+ strict: bool = False,
526
+ ) -> DATASET_TYPE:
527
+ dataset = RowPreprocessor.safe_rename_columns(dataset, self.columns)
528
+ preprocessor = self._get_preprocessor(dataset)
529
+ return preprocessor(dataset, num_proc=num_proc, load_from_cache_file=load_from_cache_file, strict=strict)
ms-swift/swift/llm/ds_config/zero2_offload.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+
11
+ "bf16": {
12
+ "enabled": "auto"
13
+ },
14
+
15
+ "zero_optimization": {
16
+ "stage": 2,
17
+ "offload_optimizer": {
18
+ "device": "cpu",
19
+ "pin_memory": true
20
+ },
21
+ "allgather_partitions": true,
22
+ "allgather_bucket_size": 2e8,
23
+ "overlap_comm": false,
24
+ "reduce_scatter": true,
25
+ "reduce_bucket_size": 2e8,
26
+ "contiguous_gradients": true
27
+ },
28
+
29
+ "gradient_accumulation_steps": "auto",
30
+ "gradient_clipping": "auto",
31
+ "steps_per_print": 2000,
32
+ "train_batch_size": "auto",
33
+ "train_micro_batch_size_per_gpu": "auto",
34
+ "wall_clock_breakdown": false
35
+ }
ms-swift/swift/llm/export/merge_lora.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+
4
+ from swift.llm import ExportArguments, prepare_model_template, save_checkpoint
5
+ from swift.tuners import Swift
6
+ from swift.utils import get_logger
7
+
8
+ logger = get_logger()
9
+
10
+
11
+ def merge_lora(args: ExportArguments, device_map=None, replace_if_exists=False) -> None:
12
+ if replace_if_exists:
13
+ logger.info(f'replace_if_exists: {replace_if_exists}')
14
+ output_dir = getattr(args, 'output_dir', None) or f'{args.adapters[0]}-merged'
15
+ if os.path.exists(output_dir) and not replace_if_exists:
16
+ logger.info(f'The weight directory for the merged LoRA already exists in {output_dir}, '
17
+ 'skipping the saving process.')
18
+ else:
19
+ origin_device_map = args.device_map
20
+ args.device_map = device_map or args.device_map
21
+ logger.info(f'merge_device_map: {device_map}')
22
+ model, template = prepare_model_template(args)
23
+ quant_method = model.model_info.quant_method
24
+ assert quant_method is None, (f'quant_method: {quant_method}, '
25
+ 'quantized model and does not support merge-lora.')
26
+ logger.info('Merge LoRA...')
27
+ Swift.merge_and_unload(model)
28
+ model = model.model
29
+ logger.info('Saving merged weights...')
30
+
31
+ save_checkpoint(
32
+ model,
33
+ template.processor,
34
+ output_dir,
35
+ safe_serialization=args.safe_serialization,
36
+ model_dirs=args.adapters,
37
+ max_shard_size=args.max_shard_size,
38
+ additional_saved_files=model.model_meta.additional_saved_files)
39
+ logger.info(f'Successfully merged LoRA and saved in {output_dir}.')
40
+ args.device_map = origin_device_map
41
+
42
+ args.model = output_dir
43
+ args.model_dir = output_dir
44
+ args.adapters = []
ms-swift/swift/llm/export/quant.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from collections import defaultdict
3
+ from contextlib import contextmanager
4
+ from typing import Dict, List, Optional
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ from tqdm import tqdm
9
+
10
+ from swift.llm import (ExportArguments, HfConfigFactory, MaxLengthError, ProcessorMixin, deep_getattr, get_model_arch,
11
+ is_moe_model, load_dataset, prepare_model_template, save_checkpoint, to_device)
12
+ from swift.utils import find_layers, get_logger, get_model_parameter_info
13
+
14
+ logger = get_logger()
15
+
16
+
17
+ class QuantEngine(ProcessorMixin):
18
+
19
+ def __init__(self, args: ExportArguments):
20
+ self.args = args
21
+ kwargs = {}
22
+ if args.quant_method == 'awq':
23
+ from awq import AutoAWQForCausalLM
24
+ kwargs['automodel_class'] = AutoAWQForCausalLM
25
+ self.model, self.template = prepare_model_template(args, **kwargs)
26
+ self.template.set_mode('train')
27
+ self.model.config.use_cache = False
28
+ HfConfigFactory.set_model_config_attr(self.model, 'use_cache', False)
29
+ self.processor = self.template.processor
30
+ args.save_args()
31
+
32
+ def quantize(self):
33
+ args = self.args
34
+ if args.quant_bits is None:
35
+ raise ValueError(f'Please set the quant_bits. args.quant_bits: {args.quant_bits}')
36
+ if args.quant_method == 'awq':
37
+ self.template.model = self.model.model
38
+ self.awq_model_quantize()
39
+ self.model.save_quantized(
40
+ args.output_dir, safetensors=args.safe_serialization, shard_size=args.max_shard_size)
41
+ elif args.quant_method == 'gptq':
42
+ self.template.model = self.model
43
+ gptq_quantizer = self.gptq_model_quantize()
44
+ gptq_quantizer.save(
45
+ self.model,
46
+ args.output_dir,
47
+ safe_serialization=args.safe_serialization,
48
+ max_shard_size=args.max_shard_size)
49
+ elif args.quant_method == 'bnb':
50
+ self.model.save_pretrained(
51
+ args.output_dir, safe_serialization=args.safe_serialization, max_shard_size=args.max_shard_size)
52
+ else:
53
+ raise ValueError(f'args.quant_method: {args.quant_method}')
54
+
55
+ logger.info(f'model: {self.model}')
56
+ logger.info(f'model_parameter_info: {get_model_parameter_info(self.model)}')
57
+ save_checkpoint(
58
+ None,
59
+ self.processor,
60
+ args.output_dir,
61
+ model_dirs=[args.model_dir],
62
+ additional_saved_files=self.model.model_meta.additional_saved_files)
63
+ logger.info(f'Successfully quantized the model and saved in {args.output_dir}.')
64
+
65
+ @torch.inference_mode()
66
+ def _prepare_gptq_dataset(self, examples: List[Dict[str, torch.LongTensor]], batch_size: int = 1, *args, **kwargs):
67
+ res = []
68
+ for start in tqdm(range(0, len(examples), batch_size)):
69
+ batched_inputs = examples[start:start + batch_size]
70
+ inputs = to_device(self.template.data_collator(batched_inputs), self.model.device)
71
+ if self.model.model_meta.is_multimodal:
72
+ _, inputs = self.template.pre_forward_hook(self.model, None, inputs)
73
+ res.append(to_device(inputs, 'cpu'))
74
+ return res
75
+
76
+ @torch.inference_mode()
77
+ def _get_quant_dataset(self, *args, **kwargs):
78
+ args = self.args
79
+ assert args.quant_method in {'awq', 'gptq'}
80
+ template = self.template
81
+ n_samples = args.quant_n_samples
82
+ block_size = args.max_length
83
+
84
+ # only use train_dataset
85
+ dataset = load_dataset(
86
+ args.dataset, split_dataset_ratio=0, shuffle=args.dataset_shuffle, **args.get_dataset_kwargs())[0]
87
+ logger.info(f'quant_dataset: {dataset}')
88
+ dataset = dataset.shuffle()
89
+
90
+ samples = []
91
+ i = 0
92
+ prog_bar = tqdm(total=n_samples, dynamic_ncols=True)
93
+ is_multimodal = self.model.model_meta.is_multimodal
94
+ for data in dataset:
95
+ try:
96
+ inputs = template.encode(data)
97
+ except MaxLengthError:
98
+ continue
99
+ if is_multimodal and args.quant_method == 'gptq':
100
+ inputs.pop('labels', None)
101
+ samples.append(inputs)
102
+ else:
103
+ input_ids = inputs['input_ids']
104
+ samples += input_ids
105
+ i += 1
106
+ prog_bar.update()
107
+ if i == n_samples:
108
+ break
109
+ if is_multimodal and args.quant_method == 'gptq':
110
+ return samples
111
+ # now concatenate all samples and split according to block size
112
+ n_split = len(samples) // block_size
113
+ logger.info(f'Split into {n_split} blocks')
114
+ res = []
115
+ for i in range(n_split):
116
+ input_ids = samples[i * block_size:(i + 1) * block_size]
117
+ if args.quant_method == 'gptq':
118
+ res.append({'input_ids': input_ids})
119
+ else:
120
+ res.append(torch.tensor(input_ids)[None])
121
+ return res
122
+
123
+ @staticmethod
124
+ @contextmanager
125
+ def _patch_awq_move_embed(awq_model):
126
+ _origin_move_embed = awq_model.move_embed
127
+
128
+ def _move_embed(model, device: str):
129
+ if hasattr(model, '_hf_hook') and device != 'cpu':
130
+ return
131
+ _origin_move_embed(model, device)
132
+
133
+ awq_model.move_embed = _move_embed
134
+ try:
135
+ yield
136
+ finally:
137
+ awq_model.move_embed = _origin_move_embed
138
+
139
+ def get_awq_modules_to_not_convert(self):
140
+ block_name = self.get_block_name_to_quantize(self.model)
141
+ block = deep_getattr(self.model, block_name)[-1]
142
+ prefix, experts = self._get_experts(block)
143
+ num_experts = len(experts)
144
+
145
+ def cond(name, module):
146
+ if isinstance(module, nn.Linear) and module.out_features == num_experts:
147
+ return True
148
+ return False
149
+
150
+ return find_layers(self.model, cond, min_name_len=2) # min_name_len: fix Qwen3-MoE
151
+
152
+ def awq_model_quantize(self) -> None:
153
+ from awq.quantize import quantizer
154
+ from transformers import AwqConfig
155
+
156
+ args = self.args
157
+ logger.info(f'Quantization dataset: {args.dataset}')
158
+ _origin_get_calib_dataset = quantizer.get_calib_dataset
159
+ quantizer.get_calib_dataset = self._get_quant_dataset
160
+ quant_config = {
161
+ 'zero_point': True,
162
+ 'q_group_size': args.group_size,
163
+ 'w_bit': args.quant_bits,
164
+ 'version': 'GEMM'
165
+ }
166
+ if is_moe_model(self.model):
167
+ quant_config['modules_to_not_convert'] = self.get_awq_modules_to_not_convert()
168
+ logger.info(f'quant_config: {quant_config}')
169
+ logger.info('Start quantizing the model...')
170
+ with self._patch_awq_move_embed(self.model):
171
+ self.model.quantize(
172
+ self.tokenizer, quant_config=quant_config, n_parallel_calib_samples=args.quant_batch_size)
173
+ quantizer.get_calib_dataset = _origin_get_calib_dataset # recover
174
+ if self.model.quant_config.modules_to_not_convert:
175
+ model_arch = get_model_arch(args.model_meta.model_arch)
176
+ lm_head_key = model_arch.lm_head or 'lm_head'
177
+ self.model.quant_config.modules_to_not_convert.append(lm_head_key)
178
+
179
+ @contextmanager
180
+ def _patch_gptq(self):
181
+ from optimum.gptq import quantizer
182
+ _get_dataset_origin = quantizer.get_dataset
183
+ _prepare_dataset_origin = quantizer.prepare_dataset
184
+ quantizer.get_dataset = self._get_quant_dataset
185
+ quantizer.prepare_dataset = self._prepare_gptq_dataset
186
+ try:
187
+ yield
188
+ finally:
189
+ quantizer.get_dataset = _get_dataset_origin
190
+ quantizer.prepare_dataset = _prepare_dataset_origin
191
+
192
+ @staticmethod
193
+ def get_block_name_to_quantize(model: nn.Module) -> Optional[str]:
194
+ model_arch = get_model_arch(model.model_meta.model_arch)
195
+ prefix = ''
196
+ if hasattr(model_arch, 'language_model'):
197
+ assert len(model_arch.language_model) == 1, f'mllm_arch.language_model: {model_arch.language_model}'
198
+ prefix = model_arch.language_model[0]
199
+ model = deep_getattr(model, prefix)
200
+
201
+ module_lists = []
202
+ for n, m in model.named_modules():
203
+ if (isinstance(m, (nn.ModuleList, nn.Sequential)) and len(m) >= 10
204
+ and 'mlp' not in m[0].__class__.__name__.lower()): # fix moe
205
+ module_lists.append((n, m))
206
+ if module_lists:
207
+ module_list = max(module_lists, key=lambda x: len(x[1]))
208
+ return f'{prefix}.{module_list[0]}'.strip('.')
209
+
210
+ @staticmethod
211
+ def _get_experts(block):
212
+ for n, m in block.named_modules():
213
+ if isinstance(m, (nn.ModuleList, nn.Sequential)):
214
+ return n, m
215
+
216
+ @staticmethod
217
+ def get_modules_in_block_to_quantize(model, block_name: str):
218
+ if not is_moe_model(model):
219
+ return
220
+ from optimum.gptq.utils import get_layers
221
+ # Do not quantize the gate part.
222
+ block = deep_getattr(model, block_name)[-1]
223
+ prefix, experts = QuantEngine._get_experts(block)
224
+ num_experts = len(experts)
225
+
226
+ layers = get_layers(block)
227
+ res = []
228
+ experts = defaultdict(list)
229
+ experts_idx = None
230
+ for name, layer in layers.items():
231
+ if name.startswith(prefix):
232
+ suffix = name.rsplit('.', 1)[-1]
233
+ experts[suffix].append(name)
234
+ experts_idx = len(res)
235
+ elif layer.out_features not in {1, num_experts}:
236
+ res.append([name])
237
+ res[experts_idx:experts_idx] = experts.values()
238
+ return res
239
+
240
+ def gptq_model_quantize(self):
241
+ from optimum.gptq import GPTQQuantizer
242
+ args = self.args
243
+ logger.info(f'Quantization dataset: {args.dataset}')
244
+ block_name_to_quantize = self.get_block_name_to_quantize(self.model)
245
+ modules_in_block_to_quantize = self.get_modules_in_block_to_quantize(self.model, block_name_to_quantize)
246
+ logger.info(f'block_name_to_quantize: {block_name_to_quantize}')
247
+ logger.info(f'modules_in_block_to_quantize: {modules_in_block_to_quantize}')
248
+ with self._patch_gptq():
249
+ gptq_quantizer = GPTQQuantizer(
250
+ bits=args.quant_bits,
251
+ group_size=args.group_size,
252
+ dataset=','.join(args.dataset),
253
+ batch_size=args.quant_batch_size,
254
+ block_name_to_quantize=block_name_to_quantize,
255
+ modules_in_block_to_quantize=modules_in_block_to_quantize)
256
+ gptq_quantizer.serialization_keys.append('block_name_to_quantize')
257
+ logger.info('Start quantizing the model...')
258
+ logger.warning('The process of packing the model takes a long time and there is no progress bar. '
259
+ 'Please be patient and wait...')
260
+ gptq_quantizer.quantize_model(self.model, self.tokenizer)
261
+ self.model.config.quantization_config.pop('dataset', None)
262
+ return gptq_quantizer
263
+
264
+
265
+ def quantize_model(args: ExportArguments):
266
+ QuantEngine(args).quantize()
ms-swift/swift/llm/infer/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.09 kB). View file
 
ms-swift/swift/llm/infer/__pycache__/protocol.cpython-310.pyc ADDED
Binary file (14.2 kB). View file
 
ms-swift/swift/llm/infer/infer_engine/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from typing import TYPE_CHECKING
3
+
4
+ from swift.utils.import_utils import _LazyModule
5
+
6
+ if TYPE_CHECKING:
7
+ from .vllm_engine import VllmEngine
8
+ from .grpo_vllm_engine import GRPOVllmEngine
9
+ from .lmdeploy_engine import LmdeployEngine
10
+ from .pt_engine import PtEngine
11
+ from .infer_client import InferClient
12
+ from .infer_engine import InferEngine
13
+ from .base import BaseInferEngine
14
+ from .utils import prepare_generation_config, AdapterRequest, set_device_context, patch_vllm_memory_leak
15
+ else:
16
+ _import_structure = {
17
+ 'vllm_engine': ['VllmEngine'],
18
+ 'grpo_vllm_engine': ['GRPOVllmEngine'],
19
+ 'lmdeploy_engine': ['LmdeployEngine'],
20
+ 'pt_engine': ['PtEngine'],
21
+ 'infer_client': ['InferClient'],
22
+ 'infer_engine': ['InferEngine'],
23
+ 'base': ['BaseInferEngine'],
24
+ 'utils': ['prepare_generation_config', 'AdapterRequest', 'set_device_context', 'patch_vllm_memory_leak'],
25
+ }
26
+
27
+ import sys
28
+
29
+ sys.modules[__name__] = _LazyModule(
30
+ __name__,
31
+ globals()['__file__'],
32
+ _import_structure,
33
+ module_spec=__spec__,
34
+ extra_objects={},
35
+ )